--- /dev/null
+%patch
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300
+@@ -0,0 +1,2313 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++/*
++ * Extents support for EXT3
++ *
++ * TODO:
++ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent()
++ * - ext3_ext_calc_credits() could take 'mergable' into account
++ * - ext3*_error() should be used in some situations
++ * - find_goal() [to be tested and improved]
++ * - smart tree reduction
++ * - arch-independence
++ * common on-disk format for big/little-endian arch
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/time.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/smp_lock.h>
++#include <linux/highuid.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ext3_extents.h>
++#include <asm/uaccess.h>
++
++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
++{
++ int err;
++
++ if (handle->h_buffer_credits > needed)
++ return handle;
++ if (!ext3_journal_extend(handle, needed))
++ return handle;
++ err = ext3_journal_restart(handle, needed);
++
++ return handle;
++}
++
++static int inline
++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree)
++{
++ if (tree->ops->get_write_access)
++ return tree->ops->get_write_access(h,tree->buffer);
++ else
++ return 0;
++}
++
++static int inline
++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree)
++{
++ if (tree->ops->mark_buffer_dirty)
++ return tree->ops->mark_buffer_dirty(h,tree->buffer);
++ else
++ return 0;
++}
++
++/*
++ * could return:
++ * - EROFS
++ * - ENOMEM
++ */
++static int ext3_ext_get_access(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int err;
++
++ if (path->p_bh) {
++ /* path points to block */
++ err = ext3_journal_get_write_access(handle, path->p_bh);
++ } else {
++ /* path points to leaf/index in inode body */
++ err = ext3_ext_get_access_for_root(handle, tree);
++ }
++ return err;
++}
++
++/*
++ * could return:
++ * - EROFS
++ * - ENOMEM
++ * - EIO
++ */
++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int err;
++ if (path->p_bh) {
++ /* path points to block */
++ err =ext3_journal_dirty_metadata(handle, path->p_bh);
++ } else {
++ /* path points to leaf/index in inode body */
++ err = ext3_ext_mark_root_dirty(handle, tree);
++ }
++ return err;
++}
++
++static int inline
++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, struct ext3_extent *ex,
++ int *err)
++{
++ int goal, depth, newblock;
++ struct inode *inode;
++
++ EXT_ASSERT(tree);
++ if (tree->ops->new_block)
++ return tree->ops->new_block(handle, tree, path, ex, err);
++
++ inode = tree->inode;
++ depth = EXT_DEPTH(tree);
++ if (path && depth > 0) {
++ goal = path[depth-1].p_block;
++ } else {
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ unsigned long bg_start;
++ unsigned long colour;
++
++ bg_start = (ei->i_block_group *
++ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++ colour = (current->pid % 16) *
++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++ goal = bg_start + colour;
++ }
++
++ newblock = ext3_new_block(handle, inode, goal, err);
++ return newblock;
++}
++
++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
++{
++ struct ext3_extent_header *neh;
++ neh = EXT_ROOT_HDR(tree);
++ neh->eh_generation++;
++}
++
++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->inode->i_sb->s_blocksize -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++ size = 6;
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->inode->i_sb->s_blocksize -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++ size = 5;
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->buffer_len - sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++ size = 3;
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->buffer_len -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++ size = 4;
++#endif
++ return size;
++}
++
++static void ext3_ext_show_path(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++ int k, l = path->p_depth;
++
++ ext_debug(tree, "path:");
++ for (k = 0; k <= l; k++, path++) {
++ if (path->p_idx) {
++ ext_debug(tree, " %d->%d", path->p_idx->ei_block,
++ path->p_idx->ei_leaf);
++ } else if (path->p_ext) {
++ ext_debug(tree, " %d:%d:%d",
++ path->p_ext->ee_block,
++ path->p_ext->ee_len,
++ path->p_ext->ee_start);
++ } else
++ ext_debug(tree, " []");
++ }
++ ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++ int depth = EXT_DEPTH(tree);
++ struct ext3_extent_header *eh;
++ struct ext3_extent *ex;
++ int i;
++
++ if (!path)
++ return;
++
++ eh = path[depth].p_hdr;
++ ex = EXT_FIRST_EXTENT(eh);
++
++ for (i = 0; i < eh->eh_entries; i++, ex++) {
++ ext_debug(tree, "%d:%d:%d ",
++ ex->ee_block, ex->ee_len, ex->ee_start);
++ }
++ ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_drop_refs(struct ext3_ext_path *path)
++{
++ int depth = path->p_depth;
++ int i;
++
++ for (i = 0; i <= depth; i++, path++)
++ if (path->p_bh) {
++ brelse(path->p_bh);
++ path->p_bh = NULL;
++ }
++}
++
++/*
++ * binary search for closest index by given block
++ */
++static inline void
++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, int block)
++{
++ struct ext3_extent_header *eh = path->p_hdr;
++ struct ext3_extent_idx *ix;
++ int l = 0, k, r;
++
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++ EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++ EXT_ASSERT(eh->eh_entries > 0);
++
++ ext_debug(tree, "binsearch for %d(idx): ", block);
++
++ path->p_idx = ix = EXT_FIRST_INDEX(eh);
++
++ r = k = eh->eh_entries;
++ while (k > 1) {
++ k = (r - l) / 2;
++ if (block < ix[l + k].ei_block)
++ r -= k;
++ else
++ l += k;
++ ext_debug(tree, "%d:%d:%d ", k, l, r);
++ }
++
++ ix += l;
++ path->p_idx = ix;
++ ext_debug(tree, " -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf);
++
++ while (l++ < r) {
++ if (block < ix->ei_block)
++ break;
++ path->p_idx = ix++;
++ }
++ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block,
++ path->p_idx->ei_leaf);
++
++#ifdef CHECK_BINSEARCH
++ {
++ struct ext3_extent_idx *chix;
++
++ chix = ix = EXT_FIRST_INDEX(eh);
++ for (k = 0; k < eh->eh_entries; k++, ix++) {
++ if (k != 0 && ix->ei_block <= ix[-1].ei_block) {
++ printk("k=%d, ix=0x%p, first=0x%p\n", k,
++ ix, EXT_FIRST_INDEX(eh));
++ printk("%u <= %u\n",
++ ix->ei_block,ix[-1].ei_block);
++ }
++ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block);
++ if (block < ix->ei_block)
++ break;
++ chix = ix;
++ }
++ EXT_ASSERT(chix == path->p_idx);
++ }
++#endif
++
++}
++
++/*
++ * binary search for closest extent by given block
++ */
++static inline void
++ext3_ext_binsearch(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, int block)
++{
++ struct ext3_extent_header *eh = path->p_hdr;
++ struct ext3_extent *ex;
++ int l = 0, k, r;
++
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++ EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++
++ if (eh->eh_entries == 0) {
++ /*
++ * this leaf is empty yet:
++ * we get such a leaf in split/add case
++ */
++ return;
++ }
++
++ ext_debug(tree, "binsearch for %d: ", block);
++
++ path->p_ext = ex = EXT_FIRST_EXTENT(eh);
++
++ r = k = eh->eh_entries;
++ while (k > 1) {
++ k = (r - l) / 2;
++ if (block < ex[l + k].ee_block)
++ r -= k;
++ else
++ l += k;
++ ext_debug(tree, "%d:%d:%d ", k, l, r);
++ }
++
++ ex += l;
++ path->p_ext = ex;
++ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block,
++ path->p_ext->ee_start, path->p_ext->ee_len);
++
++ while (l++ < r) {
++ if (block < ex->ee_block)
++ break;
++ path->p_ext = ex++;
++ }
++ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block,
++ path->p_ext->ee_start, path->p_ext->ee_len);
++
++#ifdef CHECK_BINSEARCH
++ {
++ struct ext3_extent *chex;
++
++ chex = ex = EXT_FIRST_EXTENT(eh);
++ for (k = 0; k < eh->eh_entries; k++, ex++) {
++ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block);
++ if (block < ex->ee_block)
++ break;
++ chex = ex;
++ }
++ EXT_ASSERT(chex == path->p_ext);
++ }
++#endif
++
++}
++
++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree)
++{
++ struct ext3_extent_header *eh;
++
++ BUG_ON(tree->buffer_len == 0);
++ ext3_ext_get_access_for_root(handle, tree);
++ eh = EXT_ROOT_HDR(tree);
++ eh->eh_depth = 0;
++ eh->eh_entries = 0;
++ eh->eh_magic = EXT3_EXT_MAGIC;
++ eh->eh_max = ext3_ext_space_root(tree);
++ ext3_ext_mark_root_dirty(handle, tree);
++ ext3_ext_invalidate_cache(tree);
++ return 0;
++}
++
++struct ext3_ext_path *
++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block,
++ struct ext3_ext_path *path)
++{
++ struct ext3_extent_header *eh;
++ struct buffer_head *bh;
++ int depth, i, ppos = 0;
++
++ EXT_ASSERT(tree);
++ EXT_ASSERT(tree->inode);
++ EXT_ASSERT(tree->root);
++
++ eh = EXT_ROOT_HDR(tree);
++ EXT_ASSERT(eh);
++ i = depth = EXT_DEPTH(tree);
++ EXT_ASSERT(eh->eh_max);
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++ EXT_ASSERT(i == 0 || eh->eh_entries > 0);
++
++ /* account possible depth increase */
++ if (!path) {
++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
++ GFP_NOFS);
++ if (!path)
++ return ERR_PTR(-ENOMEM);
++ }
++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++ path[0].p_hdr = eh;
++
++ /* walk through the tree */
++ while (i) {
++ ext_debug(tree, "depth %d: num %d, max %d\n",
++ ppos, eh->eh_entries, eh->eh_max);
++ ext3_ext_binsearch_idx(tree, path + ppos, block);
++ path[ppos].p_block = path[ppos].p_idx->ei_leaf;
++ path[ppos].p_depth = i;
++ path[ppos].p_ext = NULL;
++
++ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block);
++ if (!bh) {
++ ext3_ext_drop_refs(path);
++ kfree(path);
++ return ERR_PTR(-EIO);
++ }
++ eh = EXT_BLOCK_HDR(bh);
++ ppos++;
++ EXT_ASSERT(ppos <= depth);
++ path[ppos].p_bh = bh;
++ path[ppos].p_hdr = eh;
++ i--;
++ }
++
++ path[ppos].p_depth = i;
++ path[ppos].p_hdr = eh;
++ path[ppos].p_ext = NULL;
++
++ /* find extent */
++ ext3_ext_binsearch(tree, path + ppos, block);
++
++ ext3_ext_show_path(tree, path);
++
++ return path;
++}
++
++/*
++ * insert new index [logical;ptr] into the block at cupr
++ * it check where to insert: before curp or after curp
++ */
++static int ext3_ext_insert_index(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *curp,
++ int logical, int ptr)
++{
++ struct ext3_extent_idx *ix;
++ int len, err;
++
++ if ((err = ext3_ext_get_access(handle, tree, curp)))
++ return err;
++
++ EXT_ASSERT(logical != curp->p_idx->ei_block);
++ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
++ if (logical > curp->p_idx->ei_block) {
++ /* insert after */
++ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
++ len = (len - 1) * sizeof(struct ext3_extent_idx);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert new index %d after: %d. "
++ "move %d from 0x%p to 0x%p\n",
++ logical, ptr, len,
++ (curp->p_idx + 1), (curp->p_idx + 2));
++ memmove(curp->p_idx + 2, curp->p_idx + 1, len);
++ }
++ ix = curp->p_idx + 1;
++ } else {
++ /* insert before */
++ len = len * sizeof(struct ext3_extent_idx);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert new index %d before: %d. "
++ "move %d from 0x%p to 0x%p\n",
++ logical, ptr, len,
++ curp->p_idx, (curp->p_idx + 1));
++ memmove(curp->p_idx + 1, curp->p_idx, len);
++ ix = curp->p_idx;
++ }
++
++ ix->ei_block = logical;
++ ix->ei_leaf = ptr;
++ curp->p_hdr->eh_entries++;
++
++ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max);
++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr));
++
++ err = ext3_ext_dirty(handle, tree, curp);
++ ext3_std_error(tree->inode->i_sb, err);
++
++ return err;
++}
++
++/*
++ * routine inserts new subtree into the path, using free index entry
++ * at depth 'at:
++ * - allocates all needed blocks (new leaf and all intermediate index blocks)
++ * - makes decision where to split
++ * - moves remaining extens and index entries (right to the split point)
++ * into the newly allocated blocks
++ * - initialize subtree
++ */
++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext, int at)
++{
++ struct buffer_head *bh = NULL;
++ int depth = EXT_DEPTH(tree);
++ struct ext3_extent_header *neh;
++ struct ext3_extent_idx *fidx;
++ struct ext3_extent *ex;
++ int i = at, k, m, a;
++ unsigned long newblock, oldblock, border;
++ int *ablocks = NULL; /* array of allocated blocks */
++ int err = 0;
++
++ /* make decision: where to split? */
++ /* FIXME: now desicion is simplest: at current extent */
++
++ /* if current leaf will be splitted, then we should use
++ * border from split point */
++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr));
++ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
++ border = path[depth].p_ext[1].ee_block;
++ ext_debug(tree, "leaf will be splitted."
++ " next leaf starts at %d\n",
++ (int)border);
++ } else {
++ border = newext->ee_block;
++ ext_debug(tree, "leaf will be added."
++ " next leaf starts at %d\n",
++ (int)border);
++ }
++
++ /*
++ * if error occurs, then we break processing
++ * and turn filesystem read-only. so, index won't
++ * be inserted and tree will be in consistent
++ * state. next mount will repair buffers too
++ */
++
++ /*
++ * get array to track all allocated blocks
++ * we need this to handle errors and free blocks
++ * upon them
++ */
++ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
++ if (!ablocks)
++ return -ENOMEM;
++ memset(ablocks, 0, sizeof(unsigned long) * depth);
++
++ /* allocate all needed blocks */
++ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at);
++ for (a = 0; a < depth - at; a++) {
++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++ if (newblock == 0)
++ goto cleanup;
++ ablocks[a] = newblock;
++ }
++
++ /* initialize new leaf */
++ newblock = ablocks[--a];
++ EXT_ASSERT(newblock);
++ bh = sb_getblk(tree->inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh)))
++ goto cleanup;
++
++ neh = EXT_BLOCK_HDR(bh);
++ neh->eh_entries = 0;
++ neh->eh_max = ext3_ext_space_block(tree);
++ neh->eh_magic = EXT3_EXT_MAGIC;
++ neh->eh_depth = 0;
++ ex = EXT_FIRST_EXTENT(neh);
++
++ /* move remain of path[depth] to the new leaf */
++ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max);
++ /* start copy from next extent */
++ /* TODO: we could do it by single memmove */
++ m = 0;
++ path[depth].p_ext++;
++ while (path[depth].p_ext <=
++ EXT_MAX_EXTENT(path[depth].p_hdr)) {
++ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n",
++ path[depth].p_ext->ee_block,
++ path[depth].p_ext->ee_start,
++ path[depth].p_ext->ee_len,
++ newblock);
++ memmove(ex++, path[depth].p_ext++,
++ sizeof(struct ext3_extent));
++ neh->eh_entries++;
++ m++;
++ }
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto cleanup;
++ brelse(bh);
++ bh = NULL;
++
++ /* correct old leaf */
++ if (m) {
++ if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++ goto cleanup;
++ path[depth].p_hdr->eh_entries -= m;
++ if ((err = ext3_ext_dirty(handle, tree, path + depth)))
++ goto cleanup;
++
++ }
++
++ /* create intermediate indexes */
++ k = depth - at - 1;
++ EXT_ASSERT(k >= 0);
++ if (k)
++ ext_debug(tree, "create %d intermediate indices\n", k);
++ /* insert new index into current index block */
++ /* current depth stored in i var */
++ i = depth - 1;
++ while (k--) {
++ oldblock = newblock;
++ newblock = ablocks[--a];
++ bh = sb_getblk(tree->inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh)))
++ goto cleanup;
++
++ neh = EXT_BLOCK_HDR(bh);
++ neh->eh_entries = 1;
++ neh->eh_magic = EXT3_EXT_MAGIC;
++ neh->eh_max = ext3_ext_space_block_idx(tree);
++ neh->eh_depth = depth - i;
++ fidx = EXT_FIRST_INDEX(neh);
++ fidx->ei_block = border;
++ fidx->ei_leaf = oldblock;
++
++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n",
++ i, newblock, border, oldblock);
++ /* copy indexes */
++ m = 0;
++ path[i].p_idx++;
++
++ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx,
++ EXT_MAX_INDEX(path[i].p_hdr));
++ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
++ EXT_LAST_INDEX(path[i].p_hdr));
++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
++ ext_debug(tree, "%d: move %d:%d in new index %lu\n",
++ i, path[i].p_idx->ei_block,
++ path[i].p_idx->ei_leaf, newblock);
++ memmove(++fidx, path[i].p_idx++,
++ sizeof(struct ext3_extent_idx));
++ neh->eh_entries++;
++ EXT_ASSERT(neh->eh_entries <= neh->eh_max);
++ m++;
++ }
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto cleanup;
++ brelse(bh);
++ bh = NULL;
++
++ /* correct old index */
++ if (m) {
++ err = ext3_ext_get_access(handle, tree, path + i);
++ if (err)
++ goto cleanup;
++ path[i].p_hdr->eh_entries -= m;
++ err = ext3_ext_dirty(handle, tree, path + i);
++ if (err)
++ goto cleanup;
++ }
++
++ i--;
++ }
++
++ /* insert new index */
++ if (!err)
++ err = ext3_ext_insert_index(handle, tree, path + at,
++ border, newblock);
++
++cleanup:
++ if (bh) {
++ if (buffer_locked(bh))
++ unlock_buffer(bh);
++ brelse(bh);
++ }
++
++ if (err) {
++ /* free all allocated blocks in error case */
++ for (i = 0; i < depth; i++) {
++ if (!ablocks[i])
++ continue;
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ }
++ }
++ kfree(ablocks);
++
++ return err;
++}
++
++/*
++ * routine implements tree growing procedure:
++ * - allocates new block
++ * - moves top-level data (index block or leaf) into the new block
++ * - initialize new top-level, creating index that points to the
++ * just created block
++ */
++static int ext3_ext_grow_indepth(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct ext3_ext_path *curp = path;
++ struct ext3_extent_header *neh;
++ struct ext3_extent_idx *fidx;
++ struct buffer_head *bh;
++ unsigned long newblock;
++ int err = 0;
++
++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++ if (newblock == 0)
++ return err;
++
++ bh = sb_getblk(tree->inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ ext3_std_error(tree->inode->i_sb, err);
++ return err;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh))) {
++ unlock_buffer(bh);
++ goto out;
++ }
++
++ /* move top-level index/leaf into new block */
++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len);
++
++ /* set size of new block */
++ neh = EXT_BLOCK_HDR(bh);
++ /* old root could have indexes or leaves
++ * so calculate e_max right way */
++ if (EXT_DEPTH(tree))
++ neh->eh_max = ext3_ext_space_block_idx(tree);
++ else
++ neh->eh_max = ext3_ext_space_block(tree);
++ neh->eh_magic = EXT3_EXT_MAGIC;
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto out;
++
++ /* create index in new top-level index: num,max,pointer */
++ if ((err = ext3_ext_get_access(handle, tree, curp)))
++ goto out;
++
++ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC;
++ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree);
++ curp->p_hdr->eh_entries = 1;
++ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++ /* FIXME: it works, but actually path[0] can be index */
++ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
++ curp->p_idx->ei_leaf = newblock;
++
++ neh = EXT_ROOT_HDR(tree);
++ fidx = EXT_FIRST_INDEX(neh);
++ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n",
++ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf);
++
++ neh->eh_depth = path->p_depth + 1;
++ err = ext3_ext_dirty(handle, tree, curp);
++out:
++ brelse(bh);
++
++ return err;
++}
++
++/*
++ * routine finds empty index and adds new leaf. if no free index found
++ * then it requests in-depth growing
++ */
++static int ext3_ext_create_new_leaf(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct ext3_ext_path *curp;
++ int depth, i, err = 0;
++
++repeat:
++ i = depth = EXT_DEPTH(tree);
++
++ /* walk up to the tree and look for free index entry */
++ curp = path + depth;
++ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
++ i--;
++ curp--;
++ }
++
++ /* we use already allocated block for index block
++ * so, subsequent data blocks should be contigoues */
++ if (EXT_HAS_FREE_INDEX(curp)) {
++ /* if we found index with free entry, then use that
++ * entry: create all needed subtree and add new leaf */
++ err = ext3_ext_split(handle, tree, path, newext, i);
++
++ /* refill path */
++ ext3_ext_drop_refs(path);
++ path = ext3_ext_find_extent(tree, newext->ee_block, path);
++ if (IS_ERR(path))
++ err = PTR_ERR(path);
++ } else {
++ /* tree is full, time to grow in depth */
++ err = ext3_ext_grow_indepth(handle, tree, path, newext);
++
++ /* refill path */
++ ext3_ext_drop_refs(path);
++ path = ext3_ext_find_extent(tree, newext->ee_block, path);
++ if (IS_ERR(path))
++ err = PTR_ERR(path);
++
++ /*
++ * only first (depth 0 -> 1) produces free space
++ * in all other cases we have to split growed tree
++ */
++ depth = EXT_DEPTH(tree);
++ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
++ /* now we need split */
++ goto repeat;
++ }
++ }
++
++ if (err)
++ return err;
++
++ return 0;
++}
++
++/*
++ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
++ * NOTE: it consider block number from index entry as
++ * allocated block. thus, index entries have to be consistent
++ * with leafs
++ */
++static unsigned long
++ext3_ext_next_allocated_block(struct ext3_ext_path *path)
++{
++ int depth;
++
++ EXT_ASSERT(path != NULL);
++ depth = path->p_depth;
++
++ if (depth == 0 && path->p_ext == NULL)
++ return EXT_MAX_BLOCK;
++
++ /* FIXME: what if index isn't full ?! */
++ while (depth >= 0) {
++ if (depth == path->p_depth) {
++ /* leaf */
++ if (path[depth].p_ext !=
++ EXT_LAST_EXTENT(path[depth].p_hdr))
++ return path[depth].p_ext[1].ee_block;
++ } else {
++ /* index */
++ if (path[depth].p_idx !=
++ EXT_LAST_INDEX(path[depth].p_hdr))
++ return path[depth].p_idx[1].ei_block;
++ }
++ depth--;
++ }
++
++ return EXT_MAX_BLOCK;
++}
++
++/*
++ * returns first allocated block from next leaf or EXT_MAX_BLOCK
++ */
++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int depth;
++
++ EXT_ASSERT(path != NULL);
++ depth = path->p_depth;
++
++ /* zero-tree has no leaf blocks at all */
++ if (depth == 0)
++ return EXT_MAX_BLOCK;
++
++ /* go to index block */
++ depth--;
++
++ while (depth >= 0) {
++ if (path[depth].p_idx !=
++ EXT_LAST_INDEX(path[depth].p_hdr))
++ return path[depth].p_idx[1].ei_block;
++ depth--;
++ }
++
++ return EXT_MAX_BLOCK;
++}
++
++/*
++ * if leaf gets modified and modified extent is first in the leaf
++ * then we have to correct all indexes above
++ * TODO: do we need to correct tree in all cases?
++ */
++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ struct ext3_extent_header *eh;
++ int depth = EXT_DEPTH(tree);
++ struct ext3_extent *ex;
++ unsigned long border;
++ int k, err = 0;
++
++ eh = path[depth].p_hdr;
++ ex = path[depth].p_ext;
++ EXT_ASSERT(ex);
++ EXT_ASSERT(eh);
++
++ if (depth == 0) {
++ /* there is no tree at all */
++ return 0;
++ }
++
++ if (ex != EXT_FIRST_EXTENT(eh)) {
++ /* we correct tree if first leaf got modified only */
++ return 0;
++ }
++
++ /*
++ * TODO: we need correction if border is smaller then current one
++ */
++ k = depth - 1;
++ border = path[depth].p_ext->ee_block;
++ if ((err = ext3_ext_get_access(handle, tree, path + k)))
++ return err;
++ path[k].p_idx->ei_block = border;
++ if ((err = ext3_ext_dirty(handle, tree, path + k)))
++ return err;
++
++ while (k--) {
++ /* change all left-side indexes */
++ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
++ break;
++ if ((err = ext3_ext_get_access(handle, tree, path + k)))
++ break;
++ path[k].p_idx->ei_block = border;
++ if ((err = ext3_ext_dirty(handle, tree, path + k)))
++ break;
++ }
++
++ return err;
++}
++
++static int inline
++ext3_can_extents_be_merged(struct ext3_extents_tree *tree,
++ struct ext3_extent *ex1,
++ struct ext3_extent *ex2)
++{
++ if (ex1->ee_block + ex1->ee_len != ex2->ee_block)
++ return 0;
++
++#ifdef AGRESSIVE_TEST
++ if (ex1->ee_len >= 4)
++ return 0;
++#endif
++
++ if (!tree->ops->mergable)
++ return 1;
++
++ return tree->ops->mergable(ex1, ex2);
++}
++
++/*
++ * this routine tries to merge requsted extent into the existing
++ * extent or inserts requested extent as new one into the tree,
++ * creating new leaf in no-space case
++ */
++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct ext3_extent_header * eh;
++ struct ext3_extent *ex, *fex;
++ struct ext3_extent *nearex; /* nearest extent */
++ struct ext3_ext_path *npath = NULL;
++ int depth, len, err, next;
++
++ EXT_ASSERT(newext->ee_len > 0);
++ EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK);
++ depth = EXT_DEPTH(tree);
++ ex = path[depth].p_ext;
++ EXT_ASSERT(path[depth].p_hdr);
++
++ /* try to insert block into found extent and return */
++ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) {
++ ext_debug(tree, "append %d block to %d:%d (from %d)\n",
++ newext->ee_len, ex->ee_block, ex->ee_len,
++ ex->ee_start);
++ if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++ return err;
++ ex->ee_len += newext->ee_len;
++ eh = path[depth].p_hdr;
++ nearex = ex;
++ goto merge;
++ }
++
++repeat:
++ depth = EXT_DEPTH(tree);
++ eh = path[depth].p_hdr;
++ if (eh->eh_entries < eh->eh_max)
++ goto has_space;
++
++ /* probably next leaf has space for us? */
++ fex = EXT_LAST_EXTENT(eh);
++ next = ext3_ext_next_leaf_block(tree, path);
++ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) {
++ ext_debug(tree, "next leaf block - %d\n", next);
++ EXT_ASSERT(!npath);
++ npath = ext3_ext_find_extent(tree, next, NULL);
++ if (IS_ERR(npath))
++ return PTR_ERR(npath);
++ EXT_ASSERT(npath->p_depth == path->p_depth);
++ eh = npath[depth].p_hdr;
++ if (eh->eh_entries < eh->eh_max) {
++ ext_debug(tree, "next leaf isnt full(%d)\n",
++ eh->eh_entries);
++ path = npath;
++ goto repeat;
++ }
++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n",
++ eh->eh_entries, eh->eh_max);
++ }
++
++ /*
++ * there is no free space in found leaf
++ * we're gonna add new leaf in the tree
++ */
++ err = ext3_ext_create_new_leaf(handle, tree, path, newext);
++ if (err)
++ goto cleanup;
++ depth = EXT_DEPTH(tree);
++ eh = path[depth].p_hdr;
++
++has_space:
++ nearex = path[depth].p_ext;
++
++ if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++ goto cleanup;
++
++ if (!nearex) {
++ /* there is no extent in this leaf, create first one */
++ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n",
++ newext->ee_block, newext->ee_start,
++ newext->ee_len);
++ path[depth].p_ext = EXT_FIRST_EXTENT(eh);
++ } else if (newext->ee_block > nearex->ee_block) {
++ EXT_ASSERT(newext->ee_block != nearex->ee_block);
++ if (nearex != EXT_LAST_EXTENT(eh)) {
++ len = EXT_MAX_EXTENT(eh) - nearex;
++ len = (len - 1) * sizeof(struct ext3_extent);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, "
++ "move %d from 0x%p to 0x%p\n",
++ newext->ee_block, newext->ee_start,
++ newext->ee_len,
++ nearex, len, nearex + 1, nearex + 2);
++ memmove(nearex + 2, nearex + 1, len);
++ }
++ path[depth].p_ext = nearex + 1;
++ } else {
++ EXT_ASSERT(newext->ee_block != nearex->ee_block);
++ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, "
++ "move %d from 0x%p to 0x%p\n",
++ newext->ee_block, newext->ee_start, newext->ee_len,
++ nearex, len, nearex + 1, nearex + 2);
++ memmove(nearex + 1, nearex, len);
++ path[depth].p_ext = nearex;
++ }
++
++ eh->eh_entries++;
++ nearex = path[depth].p_ext;
++ nearex->ee_block = newext->ee_block;
++ nearex->ee_start = newext->ee_start;
++ nearex->ee_len = newext->ee_len;
++ /* FIXME: support for large fs */
++ nearex->ee_start_hi = 0;
++
++merge:
++ /* try to merge extents to the right */
++ while (nearex < EXT_LAST_EXTENT(eh)) {
++ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1))
++ break;
++ /* merge with next extent! */
++ nearex->ee_len += nearex[1].ee_len;
++ if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
++ len = (EXT_LAST_EXTENT(eh) - nearex - 1)
++ * sizeof(struct ext3_extent);
++ memmove(nearex + 1, nearex + 2, len);
++ }
++ eh->eh_entries--;
++ EXT_ASSERT(eh->eh_entries > 0);
++ }
++
++ /* try to merge extents to the left */
++
++ /* time to correct all indexes above */
++ err = ext3_ext_correct_indexes(handle, tree, path);
++ if (err)
++ goto cleanup;
++
++ err = ext3_ext_dirty(handle, tree, path + depth);
++
++cleanup:
++ if (npath) {
++ ext3_ext_drop_refs(npath);
++ kfree(npath);
++ }
++ ext3_ext_tree_changed(tree);
++ ext3_ext_invalidate_cache(tree);
++ return err;
++}
++
++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block,
++ unsigned long num, ext_prepare_callback func)
++{
++ struct ext3_ext_path *path = NULL;
++ struct ext3_extent *ex, cbex;
++ unsigned long next, start = 0, end = 0;
++ unsigned long last = block + num;
++ int depth, exists, err = 0;
++
++ EXT_ASSERT(tree);
++ EXT_ASSERT(func);
++ EXT_ASSERT(tree->inode);
++ EXT_ASSERT(tree->root);
++
++ while (block < last && block != EXT_MAX_BLOCK) {
++ num = last - block;
++ /* find extent for this block */
++ path = ext3_ext_find_extent(tree, block, path);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ path = NULL;
++ break;
++ }
++
++ depth = EXT_DEPTH(tree);
++ EXT_ASSERT(path[depth].p_hdr);
++ ex = path[depth].p_ext;
++ next = ext3_ext_next_allocated_block(path);
++
++ exists = 0;
++ if (!ex) {
++ /* there is no extent yet, so try to allocate
++ * all requested space */
++ start = block;
++ end = block + num;
++ } else if (ex->ee_block > block) {
++ /* need to allocate space before found extent */
++ start = block;
++ end = ex->ee_block;
++ if (block + num < end)
++ end = block + num;
++ } else if (block >= ex->ee_block + ex->ee_len) {
++ /* need to allocate space after found extent */
++ start = block;
++ end = block + num;
++ if (end >= next)
++ end = next;
++ } else if (block >= ex->ee_block) {
++ /*
++ * some part of requested space is covered
++ * by found extent
++ */
++ start = block;
++ end = ex->ee_block + ex->ee_len;
++ if (block + num < end)
++ end = block + num;
++ exists = 1;
++ } else {
++ BUG();
++ }
++ EXT_ASSERT(end > start);
++
++ if (!exists) {
++ cbex.ee_block = start;
++ cbex.ee_len = end - start;
++ cbex.ee_start = 0;
++ } else
++ cbex = *ex;
++
++ EXT_ASSERT(path[depth].p_hdr);
++ err = func(tree, path, &cbex, exists);
++ ext3_ext_drop_refs(path);
++
++ if (err < 0)
++ break;
++ if (err == EXT_REPEAT)
++ continue;
++ else if (err == EXT_BREAK) {
++ err = 0;
++ break;
++ }
++
++ if (EXT_DEPTH(tree) != depth) {
++ /* depth was changed. we have to realloc path */
++ kfree(path);
++ path = NULL;
++ }
++
++ block = cbex.ee_block + cbex.ee_len;
++ }
++
++ if (path) {
++ ext3_ext_drop_refs(path);
++ kfree(path);
++ }
++
++ return err;
++}
++
++static inline void
++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block,
++ __u32 len, __u32 start, int type)
++{
++ EXT_ASSERT(len > 0);
++ if (tree->cex) {
++ tree->cex->ec_type = type;
++ tree->cex->ec_block = block;
++ tree->cex->ec_len = len;
++ tree->cex->ec_start = start;
++ }
++}
++
++/*
++ * this routine calculate boundaries of the gap requested block fits into
++ * and cache this gap
++ */
++static inline void
++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ unsigned long block)
++{
++ int depth = EXT_DEPTH(tree);
++ unsigned long lblock, len;
++ struct ext3_extent *ex;
++
++ if (!tree->cex)
++ return;
++
++ ex = path[depth].p_ext;
++ if (ex == NULL) {
++ /* there is no extent yet, so gap is [0;-] */
++ lblock = 0;
++ len = EXT_MAX_BLOCK;
++ ext_debug(tree, "cache gap(whole file):");
++ } else if (block < ex->ee_block) {
++ lblock = block;
++ len = ex->ee_block - block;
++ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]",
++ (unsigned long) block,
++ (unsigned long) ex->ee_block,
++ (unsigned long) ex->ee_len);
++ } else if (block >= ex->ee_block + ex->ee_len) {
++ lblock = ex->ee_block + ex->ee_len;
++ len = ext3_ext_next_allocated_block(path);
++ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu",
++ (unsigned long) ex->ee_block,
++ (unsigned long) ex->ee_len,
++ (unsigned long) block);
++ EXT_ASSERT(len > lblock);
++ len = len - lblock;
++ } else {
++ lblock = len = 0;
++ BUG();
++ }
++
++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len);
++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP);
++}
++
++static inline int
++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block,
++ struct ext3_extent *ex)
++{
++ struct ext3_ext_cache *cex = tree->cex;
++
++ /* is there cache storage at all? */
++ if (!cex)
++ return EXT3_EXT_CACHE_NO;
++
++ /* has cache valid data? */
++ if (cex->ec_type == EXT3_EXT_CACHE_NO)
++ return EXT3_EXT_CACHE_NO;
++
++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP ||
++ cex->ec_type == EXT3_EXT_CACHE_EXTENT);
++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
++ ex->ee_block = cex->ec_block;
++ ex->ee_start = cex->ec_start;
++ ex->ee_len = cex->ec_len;
++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n",
++ (unsigned long) block,
++ (unsigned long) ex->ee_block,
++ (unsigned long) ex->ee_len,
++ (unsigned long) ex->ee_start);
++ return cex->ec_type;
++ }
++
++ /* not in cache */
++ return EXT3_EXT_CACHE_NO;
++}
++
++/*
++ * routine removes index from the index block
++ * it's used in truncate case only. thus all requests are for
++ * last index in the block only
++ */
++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ struct buffer_head *bh;
++ int err;
++
++ /* free index block */
++ path--;
++ EXT_ASSERT(path->p_hdr->eh_entries);
++ if ((err = ext3_ext_get_access(handle, tree, path)))
++ return err;
++ path->p_hdr->eh_entries--;
++ if ((err = ext3_ext_dirty(handle, tree, path)))
++ return err;
++ ext_debug(tree, "index is empty, remove it, free block %d\n",
++ path->p_idx->ei_leaf);
++ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
++ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ return err;
++}
++
++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int depth = EXT_DEPTH(tree);
++ int needed;
++
++ if (path) {
++ /* probably there is space in leaf? */
++ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max)
++ return 1;
++ }
++
++ /*
++ * the worste case we're expecting is creation of the
++ * new root (growing in depth) with index splitting
++ * for splitting we have to consider depth + 1 because
++ * previous growing could increase it
++ */
++ depth = depth + 1;
++
++ /*
++ * growing in depth:
++ * block allocation + new root + old root
++ */
++ needed = EXT3_ALLOC_NEEDED + 2;
++
++ /* index split. we may need:
++ * allocate intermediate indexes and new leaf
++ * change two blocks at each level, but root
++ * modify root block (inode)
++ */
++ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
++
++ return needed;
++}
++
++static int
++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, unsigned long start,
++ unsigned long end)
++{
++ struct ext3_extent *ex, tex;
++ struct ext3_ext_path *npath;
++ int depth, creds, err;
++
++ depth = EXT_DEPTH(tree);
++ ex = path[depth].p_ext;
++ EXT_ASSERT(ex);
++ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1);
++ EXT_ASSERT(ex->ee_block < start);
++
++ /* calculate tail extent */
++ tex.ee_block = end + 1;
++ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len);
++ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block;
++
++ creds = ext3_ext_calc_credits_for_insert(tree, path);
++ handle = ext3_ext_journal_restart(handle, creds);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ /* calculate head extent. use primary extent */
++ err = ext3_ext_get_access(handle, tree, path + depth);
++ if (err)
++ return err;
++ ex->ee_len = start - ex->ee_block;
++ err = ext3_ext_dirty(handle, tree, path + depth);
++ if (err)
++ return err;
++
++ /* FIXME: some callback to free underlying resource
++ * and correct ee_start? */
++ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n",
++ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len);
++
++ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL);
++ if (IS_ERR(npath))
++ return PTR_ERR(npath);
++ depth = EXT_DEPTH(tree);
++ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block);
++ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len);
++
++ err = ext3_ext_insert_extent(handle, tree, npath, &tex);
++ ext3_ext_drop_refs(npath);
++ kfree(npath);
++
++ return err;
++
++}
++
++static int
++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, unsigned long start,
++ unsigned long end)
++{
++ struct ext3_extent *ex, *fu = NULL, *lu, *le;
++ int err = 0, correct_index = 0;
++ int depth = EXT_DEPTH(tree), credits;
++ struct ext3_extent_header *eh;
++ unsigned a, b, block, num;
++
++ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end);
++ if (!path[depth].p_hdr)
++ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh);
++ eh = path[depth].p_hdr;
++ EXT_ASSERT(eh);
++ EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++
++ /* find where to start removing */
++ le = ex = EXT_LAST_EXTENT(eh);
++ while (ex != EXT_FIRST_EXTENT(eh)) {
++ if (ex->ee_block <= end)
++ break;
++ ex--;
++ }
++
++ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) {
++ /* removal of internal part of the extent requested
++ * tail and head must be placed in different extent
++ * so, we have to insert one more extent */
++ path[depth].p_ext = ex;
++ return ext3_ext_split_for_rm(handle, tree, path, start, end);
++ }
++
++ lu = ex;
++ while (ex >= EXT_FIRST_EXTENT(eh) &&
++ ex->ee_block + ex->ee_len > start) {
++ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len);
++ path[depth].p_ext = ex;
++
++ a = ex->ee_block > start ? ex->ee_block : start;
++ b = ex->ee_block + ex->ee_len - 1 < end ?
++ ex->ee_block + ex->ee_len - 1 : end;
++
++ ext_debug(tree, " border %u:%u\n", a, b);
++
++ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) {
++ block = 0;
++ num = 0;
++ BUG();
++ } else if (a != ex->ee_block) {
++ /* remove tail of the extent */
++ block = ex->ee_block;
++ num = a - block;
++ } else if (b != ex->ee_block + ex->ee_len - 1) {
++ /* remove head of the extent */
++ block = a;
++ num = b - a;
++ } else {
++ /* remove whole extent: excelent! */
++ block = ex->ee_block;
++ num = 0;
++ EXT_ASSERT(a == ex->ee_block &&
++ b == ex->ee_block + ex->ee_len - 1);
++ }
++
++ if (ex == EXT_FIRST_EXTENT(eh))
++ correct_index = 1;
++
++ credits = 1;
++ if (correct_index)
++ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1;
++ if (tree->ops->remove_extent_credits)
++ credits+=tree->ops->remove_extent_credits(tree,ex,a,b);
++
++ handle = ext3_ext_journal_restart(handle, credits);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out;
++ }
++
++ err = ext3_ext_get_access(handle, tree, path + depth);
++ if (err)
++ goto out;
++
++ if (tree->ops->remove_extent)
++ err = tree->ops->remove_extent(tree, ex, a, b);
++ if (err)
++ goto out;
++
++ if (num == 0) {
++ /* this extent is removed entirely mark slot unused */
++ ex->ee_start = 0;
++ eh->eh_entries--;
++ fu = ex;
++ }
++
++ ex->ee_block = block;
++ ex->ee_len = num;
++
++ err = ext3_ext_dirty(handle, tree, path + depth);
++ if (err)
++ goto out;
++
++ ext_debug(tree, "new extent: %u:%u:%u\n",
++ ex->ee_block, ex->ee_len, ex->ee_start);
++ ex--;
++ }
++
++ if (fu) {
++ /* reuse unused slots */
++ while (lu < le) {
++ if (lu->ee_start) {
++ *fu = *lu;
++ lu->ee_start = 0;
++ fu++;
++ }
++ lu++;
++ }
++ }
++
++ if (correct_index && eh->eh_entries)
++ err = ext3_ext_correct_indexes(handle, tree, path);
++
++ /* if this leaf is free, then we should
++ * remove it from index block above */
++ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
++ err = ext3_ext_rm_idx(handle, tree, path + depth);
++
++out:
++ return err;
++}
++
++
++static struct ext3_extent_idx *
++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block)
++{
++ struct ext3_extent_idx *ix;
++
++ ix = EXT_LAST_INDEX(hdr);
++ while (ix != EXT_FIRST_INDEX(hdr)) {
++ if (ix->ei_block <= block)
++ break;
++ ix--;
++ }
++ return ix;
++}
++
++/*
++ * returns 1 if current index have to be freed (even partial)
++ */
++static int inline
++ext3_ext_more_to_rm(struct ext3_ext_path *path)
++{
++ EXT_ASSERT(path->p_idx);
++
++ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
++ return 0;
++
++ /*
++ * if truncate on deeper level happened it it wasn't partial
++ * so we have to consider current index for truncation
++ */
++ if (path->p_hdr->eh_entries == path->p_block)
++ return 0;
++ return 1;
++}
++
++int ext3_ext_remove_space(struct ext3_extents_tree *tree,
++ unsigned long start, unsigned long end)
++{
++ struct inode *inode = tree->inode;
++ struct super_block *sb = inode->i_sb;
++ int depth = EXT_DEPTH(tree);
++ struct ext3_ext_path *path;
++ handle_t *handle;
++ int i = 0, err = 0;
++
++ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end);
++
++ /* probably first extent we're gonna free will be last in block */
++ handle = ext3_journal_start(inode, depth + 1);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ ext3_ext_invalidate_cache(tree);
++
++ /*
++ * we start scanning from right side freeing all the blocks
++ * after i_size and walking into the deep
++ */
++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
++ if (IS_ERR(path)) {
++ ext3_error(sb, "ext3_ext_remove_space",
++ "Can't allocate path array");
++ ext3_journal_stop(handle);
++ return -ENOMEM;
++ }
++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++ path[i].p_hdr = EXT_ROOT_HDR(tree);
++
++ while (i >= 0 && err == 0) {
++ if (i == depth) {
++ /* this is leaf block */
++ err = ext3_ext_rm_leaf(handle, tree, path, start, end);
++ /* root level have p_bh == NULL, brelse() eats this */
++ brelse(path[i].p_bh);
++ i--;
++ continue;
++ }
++
++ /* this is index block */
++ if (!path[i].p_hdr) {
++ ext_debug(tree, "initialize header\n");
++ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh);
++ }
++
++ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max);
++ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC);
++
++ if (!path[i].p_idx) {
++ /* this level hasn't touched yet */
++ path[i].p_idx =
++ ext3_ext_last_covered(path[i].p_hdr, end);
++ path[i].p_block = path[i].p_hdr->eh_entries + 1;
++ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n",
++ path[i].p_hdr, path[i].p_hdr->eh_entries);
++ } else {
++ /* we've already was here, see at next index */
++ path[i].p_idx--;
++ }
++
++ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n",
++ i, EXT_FIRST_INDEX(path[i].p_hdr),
++ path[i].p_idx);
++ if (ext3_ext_more_to_rm(path + i)) {
++ /* go to the next level */
++ ext_debug(tree, "move to level %d (block %d)\n",
++ i + 1, path[i].p_idx->ei_leaf);
++ memset(path + i + 1, 0, sizeof(*path));
++ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf);
++ if (!path[i+1].p_bh) {
++ /* should we reset i_size? */
++ err = -EIO;
++ break;
++ }
++ /* put actual number of indexes to know is this
++ * number got changed at the next iteration */
++ path[i].p_block = path[i].p_hdr->eh_entries;
++ i++;
++ } else {
++ /* we finish processing this index, go up */
++ if (path[i].p_hdr->eh_entries == 0 && i > 0) {
++ /* index is empty, remove it
++ * handle must be already prepared by the
++ * truncatei_leaf() */
++ err = ext3_ext_rm_idx(handle, tree, path + i);
++ }
++ /* root level have p_bh == NULL, brelse() eats this */
++ brelse(path[i].p_bh);
++ i--;
++ ext_debug(tree, "return to level %d\n", i);
++ }
++ }
++
++ /* TODO: flexible tree reduction should be here */
++ if (path->p_hdr->eh_entries == 0) {
++ /*
++ * truncate to zero freed all the tree
++ * so, we need to correct eh_depth
++ */
++ err = ext3_ext_get_access(handle, tree, path);
++ if (err == 0) {
++ EXT_ROOT_HDR(tree)->eh_depth = 0;
++ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree);
++ err = ext3_ext_dirty(handle, tree, path);
++ }
++ }
++ ext3_ext_tree_changed(tree);
++
++ kfree(path);
++ ext3_journal_stop(handle);
++
++ return err;
++}
++
++int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks)
++{
++ int lcap, icap, rcap, leafs, idxs, num;
++
++ rcap = ext3_ext_space_root(tree);
++ if (blocks <= rcap) {
++ /* all extents fit to the root */
++ return 0;
++ }
++
++ rcap = ext3_ext_space_root_idx(tree);
++ lcap = ext3_ext_space_block(tree);
++ icap = ext3_ext_space_block_idx(tree);
++
++ num = leafs = (blocks + lcap - 1) / lcap;
++ if (leafs <= rcap) {
++ /* all pointers to leafs fit to the root */
++ return leafs;
++ }
++
++ /* ok. we need separate index block(s) to link all leaf blocks */
++ idxs = (leafs + icap - 1) / icap;
++ do {
++ num += idxs;
++ idxs = (idxs + icap - 1) / icap;
++ } while (idxs > rcap);
++
++ return num;
++}
++
++/*
++ * called at mount time
++ */
++void ext3_ext_init(struct super_block *sb)
++{
++ /*
++ * possible initialization would be here
++ */
++
++ if (test_opt(sb, EXTENTS)) {
++ printk("EXT3-fs: file extents enabled");
++#ifdef AGRESSIVE_TEST
++ printk(", agressive tests");
++#endif
++#ifdef CHECK_BINSEARCH
++ printk(", check binsearch");
++#endif
++ printk("\n");
++ }
++}
++
++/*
++ * called at umount time
++ */
++void ext3_ext_release(struct super_block *sb)
++{
++}
++
++/************************************************************************
++ * VFS related routines
++ ************************************************************************/
++
++static int ext3_get_inode_write_access(handle_t *handle, void *buffer)
++{
++ /* we use in-core data, not bh */
++ return 0;
++}
++
++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer)
++{
++ struct inode *inode = buffer;
++ return ext3_mark_inode_dirty(handle, inode);
++}
++
++static int ext3_ext_mergable(struct ext3_extent *ex1,
++ struct ext3_extent *ex2)
++{
++ /* FIXME: support for large fs */
++ if (ex1->ee_start + ex1->ee_len == ex2->ee_start)
++ return 1;
++ return 0;
++}
++
++static int
++ext3_remove_blocks_credits(struct ext3_extents_tree *tree,
++ struct ext3_extent *ex,
++ unsigned long from, unsigned long to)
++{
++ int needed;
++
++ /* at present, extent can't cross block group */;
++ needed = 4; /* bitmap + group desc + sb + inode */
++
++#ifdef CONFIG_QUOTA
++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++ return needed;
++}
++
++static int
++ext3_remove_blocks(struct ext3_extents_tree *tree,
++ struct ext3_extent *ex,
++ unsigned long from, unsigned long to)
++{
++ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
++ handle_t *handle = ext3_journal_start(tree->inode, needed);
++ struct buffer_head *bh;
++ int i;
++
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
++ /* tail removal */
++ unsigned long num, start;
++ num = ex->ee_block + ex->ee_len - from;
++ start = ex->ee_start + ex->ee_len - num;
++ ext_debug(tree, "free last %lu blocks starting %lu\n",
++ num, start);
++ for (i = 0; i < num; i++) {
++ bh = sb_find_get_block(tree->inode->i_sb, start + i);
++ ext3_forget(handle, 0, tree->inode, bh, start + i);
++ }
++ ext3_free_blocks(handle, tree->inode, start, num);
++ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
++ printk("strange request: removal %lu-%lu from %u:%u\n",
++ from, to, ex->ee_block, ex->ee_len);
++ } else {
++ printk("strange request: removal(2) %lu-%lu from %u:%u\n",
++ from, to, ex->ee_block, ex->ee_len);
++ }
++ ext3_journal_stop(handle);
++ return 0;
++}
++
++static int ext3_ext_find_goal(struct inode *inode,
++ struct ext3_ext_path *path, unsigned long block)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ unsigned long bg_start;
++ unsigned long colour;
++ int depth;
++
++ if (path) {
++ struct ext3_extent *ex;
++ depth = path->p_depth;
++
++ /* try to predict block placement */
++ if ((ex = path[depth].p_ext))
++ return ex->ee_start + (block - ex->ee_block);
++
++ /* it looks index is empty
++ * try to find starting from index itself */
++ if (path[depth].p_bh)
++ return path[depth].p_bh->b_blocknr;
++ }
++
++ /* OK. use inode's group */
++ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++ colour = (current->pid % 16) *
++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++ return bg_start + colour + block;
++}
++
++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *ex, int *err)
++{
++ struct inode *inode = tree->inode;
++ int newblock, goal;
++
++ EXT_ASSERT(path);
++ EXT_ASSERT(ex);
++ EXT_ASSERT(ex->ee_start);
++ EXT_ASSERT(ex->ee_len);
++
++ /* reuse block from the extent to order data/metadata */
++ newblock = ex->ee_start++;
++ ex->ee_len--;
++ if (ex->ee_len == 0) {
++ ex->ee_len = 1;
++ /* allocate new block for the extent */
++ goal = ext3_ext_find_goal(inode, path, ex->ee_block);
++ ex->ee_start = ext3_new_block(handle, inode, goal, err);
++ if (ex->ee_start == 0) {
++ /* error occured: restore old extent */
++ ex->ee_start = newblock;
++ return 0;
++ }
++ }
++ return newblock;
++}
++
++static struct ext3_extents_helpers ext3_blockmap_helpers = {
++ .get_write_access = ext3_get_inode_write_access,
++ .mark_buffer_dirty = ext3_mark_buffer_dirty,
++ .mergable = ext3_ext_mergable,
++ .new_block = ext3_new_block_cb,
++ .remove_extent = ext3_remove_blocks,
++ .remove_extent_credits = ext3_remove_blocks_credits,
++};
++
++void ext3_init_tree_desc(struct ext3_extents_tree *tree,
++ struct inode *inode)
++{
++ tree->inode = inode;
++ tree->root = (void *) EXT3_I(inode)->i_data;
++ tree->buffer = (void *) inode;
++ tree->buffer_len = sizeof(EXT3_I(inode)->i_data);
++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent;
++ tree->ops = &ext3_blockmap_helpers;
++}
++
++int ext3_ext_get_block(handle_t *handle, struct inode *inode,
++ long iblock, struct buffer_head *bh_result,
++ int create, int extend_disksize)
++{
++ struct ext3_ext_path *path = NULL;
++ struct ext3_extent newex;
++ struct ext3_extent *ex;
++ int goal, newblock, err = 0, depth;
++ struct ext3_extents_tree tree;
++
++ clear_buffer_new(bh_result);
++ ext3_init_tree_desc(&tree, inode);
++ ext_debug(&tree, "block %d requested for inode %u\n",
++ (int) iblock, (unsigned) inode->i_ino);
++ down(&EXT3_I(inode)->truncate_sem);
++
++ /* check in cache */
++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) {
++ if (goal == EXT3_EXT_CACHE_GAP) {
++ if (!create) {
++ /* block isn't allocated yet and
++ * user don't want to allocate it */
++ goto out2;
++ }
++ /* we should allocate requested block */
++ } else if (goal == EXT3_EXT_CACHE_EXTENT) {
++ /* block is already allocated */
++ newblock = iblock - newex.ee_block + newex.ee_start;
++ goto out;
++ } else {
++ EXT_ASSERT(0);
++ }
++ }
++
++ /* find extent for this block */
++ path = ext3_ext_find_extent(&tree, iblock, NULL);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ path = NULL;
++ goto out2;
++ }
++
++ depth = EXT_DEPTH(&tree);
++
++ /*
++ * consistent leaf must not be empty
++ * this situations is possible, though, _during_ tree modification
++ * this is why assert can't be put in ext3_ext_find_extent()
++ */
++ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0);
++
++ if ((ex = path[depth].p_ext)) {
++ /* if found exent covers block, simple return it */
++ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) {
++ newblock = iblock - ex->ee_block + ex->ee_start;
++ ext_debug(&tree, "%d fit into %d:%d -> %d\n",
++ (int) iblock, ex->ee_block, ex->ee_len,
++ newblock);
++ ext3_ext_put_in_cache(&tree, ex->ee_block,
++ ex->ee_len, ex->ee_start,
++ EXT3_EXT_CACHE_EXTENT);
++ goto out;
++ }
++ }
++
++ /*
++ * requested block isn't allocated yet
++ * we couldn't try to create block if create flag is zero
++ */
++ if (!create) {
++ /* put just found gap into cache to speedup subsequest reqs */
++ ext3_ext_put_gap_in_cache(&tree, path, iblock);
++ goto out2;
++ }
++
++ /* allocate new block */
++ goal = ext3_ext_find_goal(inode, path, iblock);
++ newblock = ext3_new_block(handle, inode, goal, &err);
++ if (!newblock)
++ goto out2;
++ ext_debug(&tree, "allocate new block: goal %d, found %d\n",
++ goal, newblock);
++
++ /* try to insert new extent into found leaf and return */
++ newex.ee_block = iblock;
++ newex.ee_start = newblock;
++ newex.ee_len = 1;
++ err = ext3_ext_insert_extent(handle, &tree, path, &newex);
++ if (err)
++ goto out2;
++
++ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize)
++ EXT3_I(inode)->i_disksize = inode->i_size;
++
++ /* previous routine could use block we allocated */
++ newblock = newex.ee_start;
++ set_buffer_new(bh_result);
++
++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
++ newex.ee_start, EXT3_EXT_CACHE_EXTENT);
++out:
++ ext3_ext_show_leaf(&tree, path);
++ map_bh(bh_result, inode->i_sb, newblock);
++out2:
++ if (path) {
++ ext3_ext_drop_refs(path);
++ kfree(path);
++ }
++ up(&EXT3_I(inode)->truncate_sem);
++
++ return err;
++}
++
++void ext3_ext_truncate(struct inode * inode, struct page *page)
++{
++ struct address_space *mapping = inode->i_mapping;
++ struct super_block *sb = inode->i_sb;
++ struct ext3_extents_tree tree;
++ unsigned long last_block;
++ handle_t *handle;
++ int err = 0;
++
++ ext3_init_tree_desc(&tree, inode);
++
++ /*
++ * probably first extent we're gonna free will be last in block
++ */
++ err = ext3_writepage_trans_blocks(inode) + 3;
++ handle = ext3_journal_start(inode, err);
++ if (IS_ERR(handle)) {
++ if (page) {
++ clear_highpage(page);
++ flush_dcache_page(page);
++ unlock_page(page);
++ page_cache_release(page);
++ }
++ return;
++ }
++
++ if (page)
++ ext3_block_truncate_page(handle, page, mapping, inode->i_size);
++
++ down(&EXT3_I(inode)->truncate_sem);
++ ext3_ext_invalidate_cache(&tree);
++
++ /*
++ * TODO: optimization is possible here
++ * probably we need not scaning at all,
++ * because page truncation is enough
++ */
++ if (ext3_orphan_add(handle, inode))
++ goto out_stop;
++
++ /* we have to know where to truncate from in crash case */
++ EXT3_I(inode)->i_disksize = inode->i_size;
++ ext3_mark_inode_dirty(handle, inode);
++
++ last_block = (inode->i_size + sb->s_blocksize - 1)
++ >> EXT3_BLOCK_SIZE_BITS(sb);
++ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK);
++
++ /* In a multi-transaction truncate, we only make the final
++ * transaction synchronous */
++ if (IS_SYNC(inode))
++ handle->h_sync = 1;
++
++out_stop:
++ /*
++ * If this was a simple ftruncate(), and the file will remain alive
++ * then we need to clear up the orphan record which we created above.
++ * However, if this was a real unlink then we were called by
++ * ext3_delete_inode(), and we allow that function to clean up the
++ * orphan info for us.
++ */
++ if (inode->i_nlink)
++ ext3_orphan_del(handle, inode);
++
++ up(&EXT3_I(inode)->truncate_sem);
++ ext3_journal_stop(handle);
++}
++
++/*
++ * this routine calculate max number of blocks we could modify
++ * in order to allocate new block for an inode
++ */
++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
++{
++ struct ext3_extents_tree tree;
++ int needed;
++
++ ext3_init_tree_desc(&tree, inode);
++
++ needed = ext3_ext_calc_credits_for_insert(&tree, NULL);
++
++ /* caller want to allocate num blocks */
++ needed *= num;
++
++#ifdef CONFIG_QUOTA
++ /*
++ * FIXME: real calculation should be here
++ * it depends on blockmap format of qouta file
++ */
++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++ return needed;
++}
++
++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode)
++{
++ struct ext3_extents_tree tree;
++
++ ext3_init_tree_desc(&tree, inode);
++ ext3_extent_tree_init(handle, &tree);
++}
++
++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks)
++{
++ struct ext3_extents_tree tree;
++
++ ext3_init_tree_desc(&tree, inode);
++ return ext3_ext_calc_metadata_amount(&tree, blocks);
++}
++
++static int
++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newex, int exist)
++{
++ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private;
++
++ if (!exist)
++ return EXT_CONTINUE;
++ if (buf->err < 0)
++ return EXT_BREAK;
++ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen)
++ return EXT_BREAK;
++
++ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) {
++ buf->err++;
++ buf->cur += sizeof(*newex);
++ } else {
++ buf->err = -EFAULT;
++ return EXT_BREAK;
++ }
++ return EXT_CONTINUE;
++}
++
++static int
++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *ex, int exist)
++{
++ struct ext3_extent_tree_stats *buf =
++ (struct ext3_extent_tree_stats *) tree->private;
++ int depth;
++
++ if (!exist)
++ return EXT_CONTINUE;
++
++ depth = EXT_DEPTH(tree);
++ buf->extents_num++;
++ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr))
++ buf->leaf_num++;
++ return EXT_CONTINUE;
++}
++
++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
++ unsigned long arg)
++{
++ int err = 0;
++
++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++ return -EINVAL;
++
++ if (cmd == EXT3_IOC_GET_EXTENTS) {
++ struct ext3_extent_buf buf;
++ struct ext3_extents_tree tree;
++
++ if (copy_from_user(&buf, (void *) arg, sizeof(buf)))
++ return -EFAULT;
++
++ ext3_init_tree_desc(&tree, inode);
++ buf.cur = buf.buffer;
++ buf.err = 0;
++ tree.private = &buf;
++ down(&EXT3_I(inode)->truncate_sem);
++ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK,
++ ext3_ext_store_extent_cb);
++ up(&EXT3_I(inode)->truncate_sem);
++ if (err == 0)
++ err = buf.err;
++ } else if (cmd == EXT3_IOC_GET_TREE_STATS) {
++ struct ext3_extent_tree_stats buf;
++ struct ext3_extents_tree tree;
++
++ ext3_init_tree_desc(&tree, inode);
++ down(&EXT3_I(inode)->truncate_sem);
++ buf.depth = EXT_DEPTH(&tree);
++ buf.extents_num = 0;
++ buf.leaf_num = 0;
++ tree.private = &buf;
++ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK,
++ ext3_ext_collect_stats_cb);
++ up(&EXT3_I(inode)->truncate_sem);
++ if (!err)
++ err = copy_to_user((void *) arg, &buf, sizeof(buf));
++ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) {
++ struct ext3_extents_tree tree;
++ ext3_init_tree_desc(&tree, inode);
++ down(&EXT3_I(inode)->truncate_sem);
++ err = EXT_DEPTH(&tree);
++ up(&EXT3_I(inode)->truncate_sem);
++ }
++
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_init_tree_desc);
++EXPORT_SYMBOL(ext3_mark_inode_dirty);
++EXPORT_SYMBOL(ext3_ext_invalidate_cache);
++EXPORT_SYMBOL(ext3_ext_insert_extent);
++EXPORT_SYMBOL(ext3_ext_walk_space);
++EXPORT_SYMBOL(ext3_ext_find_goal);
++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert);
++
+Index: linux-2.6.5-sles9/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2004-11-09 02:22:55.763148128 +0300
++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300
+@@ -647,6 +647,10 @@
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
++ if (test_opt(sb, EXTENTS)) {
++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
++ ext3_extents_initialize_blockmap(handle, inode);
++ }
+ err = ext3_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext3_std_error(sb, err);
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:22:55.767147520 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300
+@@ -796,6 +796,17 @@
+ goto reread;
+ }
+
++static inline int
++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
++ struct buffer_head *bh, int create, int extend_disksize)
++{
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_get_block(handle, inode, block, bh, create,
++ extend_disksize);
++ return ext3_get_block_handle(handle, inode, block, bh, create,
++ extend_disksize);
++}
++
+ static int ext3_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+ {
+@@ -806,8 +817,8 @@
+ handle = ext3_journal_current_handle();
+ J_ASSERT(handle != 0);
+ }
+- ret = ext3_get_block_handle(handle, inode, iblock,
+- bh_result, create, 1);
++ ret = ext3_get_block_wrap(handle, inode, iblock,
++ bh_result, create, 1);
+ return ret;
+ }
+
+@@ -833,8 +844,8 @@
+ }
+ }
+ if (ret == 0)
+- ret = ext3_get_block_handle(handle, inode, iblock,
+- bh_result, create, 0);
++ ret = ext3_get_block_wrap(handle, inode, iblock,
++ bh_result, create, 0);
+ if (ret == 0)
+ bh_result->b_size = (1 << inode->i_blkbits);
+ return ret;
+@@ -855,7 +866,7 @@
+ dummy.b_state = 0;
+ dummy.b_blocknr = -1000;
+ buffer_trace_init(&dummy.b_history);
+- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
+ if (!*errp && buffer_mapped(&dummy)) {
+ struct buffer_head *bh;
+ bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1587,7 +1598,7 @@
+ * This required during truncate. We need to physically zero the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ */
+-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
++int ext3_block_truncate_page(handle_t *handle, struct page *page,
+ struct address_space *mapping, loff_t from)
+ {
+ unsigned long index = from >> PAGE_CACHE_SHIFT;
+@@ -2083,6 +2094,9 @@
+ return;
+ }
+
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_truncate(inode, page);
++
+ handle = start_transaction(inode);
+ if (IS_ERR(handle)) {
+ if (page) {
+@@ -2789,6 +2803,9 @@
+ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+ int ret;
+
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_writepage_trans_blocks(inode, bpp);
++
+ if (ext3_should_journal_data(inode))
+ ret = 3 * (bpp + indirects) + 2;
+ else
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:22:56.450043704 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
+@@ -389,6 +389,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+ if (!(sb->s_flags & MS_RDONLY)) {
+@@ -447,6 +448,10 @@
+ #endif
+ ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+ ei->vfs_inode.i_version = 1;
++ ei->i_cached_extent[0] = 0;
++ ei->i_cached_extent[1] = 0;
++ ei->i_cached_extent[2] = 0;
++ ei->i_cached_extent[3] = 0;
+ return &ei->vfs_inode;
+ }
+
+@@ -537,7 +542,7 @@
+ Opt_commit, Opt_journal_update, Opt_journal_inum,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_err,
++ Opt_err, Opt_extents, Opt_extdebug
+ };
+
+ static match_table_t tokens = {
+@@ -582,6 +587,8 @@
+ {Opt_iopen, "iopen"},
+ {Opt_noiopen, "noiopen"},
+ {Opt_iopen_nopriv, "iopen_nopriv"},
++ {Opt_extents, "extents"},
++ {Opt_extdebug, "extdebug"},
+ {Opt_err, NULL}
+ };
+
+@@ -797,6 +804,12 @@
+ break;
+ case Opt_ignore:
+ break;
++ case Opt_extents:
++ set_opt (sbi->s_mount_opt, EXTENTS);
++ break;
++ case Opt_extdebug:
++ set_opt (sbi->s_mount_opt, EXTDEBUG);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1449,6 +1462,8 @@
+ percpu_counter_mod(&sbi->s_dirs_counter,
+ ext3_count_dirs(sb));
+
++ ext3_ext_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/ioctl.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2004-11-09 02:15:44.610693264 +0300
++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2004-11-09 02:23:52.991448104 +0300
+@@ -124,6 +124,10 @@
+ err = ext3_change_inode_journal_flag(inode, jflag);
+ return err;
+ }
++ case EXT3_IOC_GET_EXTENTS:
++ case EXT3_IOC_GET_TREE_STATS:
++ case EXT3_IOC_GET_TREE_DEPTH:
++ return ext3_ext_ioctl(inode, filp, cmd, arg);
+ case EXT3_IOC_GETVERSION:
+ case EXT3_IOC_GETVERSION_OLD:
+ return put_user(inode->i_generation, (int *) arg);
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:22:58.767691368 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300
+@@ -186,6 +186,7 @@
+ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
+
+ #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+@@ -211,6 +212,9 @@
+ #endif
+ #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
+ #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long)
++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long)
++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long)
+
+ /*
+ * Structure of an inode on the disk
+@@ -333,6 +337,8 @@
+ #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
+ #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -729,6 +735,7 @@
+
+
+ /* inode.c */
++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t);
+ extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+@@ -802,6 +809,14 @@
+ extern struct inode_operations ext3_symlink_inode_operations;
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+
++/* extents.c */
++extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
++extern int ext3_ext_get_block(handle_t *, struct inode *, long,
++ struct buffer_head *, int, int);
++extern void ext3_ext_truncate(struct inode *, struct page *);
++extern void ext3_ext_init(struct super_block *);
++extern void ext3_ext_release(struct super_block *);
++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
+
+ #endif /* __KERNEL__ */
+
+Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2004-11-09 02:23:21.606219384 +0300
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++#ifndef _LINUX_EXT3_EXTENTS
++#define _LINUX_EXT3_EXTENTS
++
++/*
++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
++ * become very little, so index split, in-depth growing and
++ * other hard changes happens much more often
++ * this is for debug purposes only
++ */
++#define AGRESSIVE_TEST_
++
++/*
++ * if CHECK_BINSEARCH defined, then results of binary search
++ * will be checked by linear search
++ */
++#define CHECK_BINSEARCH_
++
++/*
++ * if EXT_DEBUG is defined you can use 'extdebug' mount option
++ * to get lots of info what's going on
++ */
++#define EXT_DEBUG_
++#ifdef EXT_DEBUG
++#define ext_debug(tree,fmt,a...) \
++do { \
++ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \
++ printk(fmt, ##a); \
++} while (0);
++#else
++#define ext_debug(tree,fmt,a...)
++#endif
++
++/*
++ * if EXT_STATS is defined then stats numbers are collected
++ * these number will be displayed at umount time
++ */
++#define EXT_STATS_
++
++
++#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */
++
++/*
++ * ext3_inode has i_block array (total 60 bytes)
++ * first 4 bytes are used to store:
++ * - tree depth (0 mean there is no tree yet. all extents in the inode)
++ * - number of alive extents in the inode
++ */
++
++/*
++ * this is extent on-disk structure
++ * it's used at the bottom of the tree
++ */
++struct ext3_extent {
++ __u32 ee_block; /* first logical block extent covers */
++ __u16 ee_len; /* number of blocks covered by extent */
++ __u16 ee_start_hi; /* high 16 bits of physical block */
++ __u32 ee_start; /* low 32 bigs of physical block */
++};
++
++/*
++ * this is index on-disk structure
++ * it's used at all the levels, but the bottom
++ */
++struct ext3_extent_idx {
++ __u32 ei_block; /* index covers logical blocks from 'block' */
++ __u32 ei_leaf; /* pointer to the physical block of the next *
++ * level. leaf or next index could bet here */
++ __u16 ei_leaf_hi; /* high 16 bits of physical block */
++ __u16 ei_unused;
++};
++
++/*
++ * each block (leaves and indexes), even inode-stored has header
++ */
++struct ext3_extent_header {
++ __u16 eh_magic; /* probably will support different formats */
++ __u16 eh_entries; /* number of valid entries */
++ __u16 eh_max; /* capacity of store in entries */
++ __u16 eh_depth; /* has tree real underlaying blocks? */
++ __u32 eh_generation; /* generation of the tree */
++};
++
++#define EXT3_EXT_MAGIC 0xf30a
++
++/*
++ * array of ext3_ext_path contains path to some extent
++ * creation/lookup routines use it for traversal/splitting/etc
++ * truncate uses it to simulate recursive walking
++ */
++struct ext3_ext_path {
++ __u32 p_block;
++ __u16 p_depth;
++ struct ext3_extent *p_ext;
++ struct ext3_extent_idx *p_idx;
++ struct ext3_extent_header *p_hdr;
++ struct buffer_head *p_bh;
++};
++
++/*
++ * structure for external API
++ */
++
++/*
++ * storage for cached extent
++ */
++struct ext3_ext_cache {
++ __u32 ec_start;
++ __u32 ec_block;
++ __u32 ec_len;
++ __u32 ec_type;
++};
++
++#define EXT3_EXT_CACHE_NO 0
++#define EXT3_EXT_CACHE_GAP 1
++#define EXT3_EXT_CACHE_EXTENT 2
++
++/*
++ * ext3_extents_tree is used to pass initial information
++ * to top-level extents API
++ */
++struct ext3_extents_helpers;
++struct ext3_extents_tree {
++ struct inode *inode; /* inode which tree belongs to */
++ void *root; /* ptr to data top of tree resides at */
++ void *buffer; /* will be passed as arg to ^^ routines */
++ int buffer_len;
++ void *private;
++ struct ext3_ext_cache *cex;/* last found extent */
++ struct ext3_extents_helpers *ops;
++};
++
++struct ext3_extents_helpers {
++ int (*get_write_access)(handle_t *h, void *buffer);
++ int (*mark_buffer_dirty)(handle_t *h, void *buffer);
++ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2);
++ int (*remove_extent_credits)(struct ext3_extents_tree *,
++ struct ext3_extent *, unsigned long,
++ unsigned long);
++ int (*remove_extent)(struct ext3_extents_tree *,
++ struct ext3_extent *, unsigned long,
++ unsigned long);
++ int (*new_block)(handle_t *, struct ext3_extents_tree *,
++ struct ext3_ext_path *, struct ext3_extent *,
++ int *);
++};
++
++/*
++ * to be called by ext3_ext_walk_space()
++ * negative retcode - error
++ * positive retcode - signal for ext3_ext_walk_space(), see below
++ * callback must return valid extent (passed or newly created)
++ */
++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *,
++ struct ext3_ext_path *,
++ struct ext3_extent *, int);
++
++#define EXT_CONTINUE 0
++#define EXT_BREAK 1
++#define EXT_REPEAT 2
++
++
++#define EXT_MAX_BLOCK 0xffffffff
++#define EXT_CACHE_MARK 0xffff
++
++
++#define EXT_FIRST_EXTENT(__hdr__) \
++ ((struct ext3_extent *) (((char *) (__hdr__)) + \
++ sizeof(struct ext3_extent_header)))
++#define EXT_FIRST_INDEX(__hdr__) \
++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \
++ sizeof(struct ext3_extent_header)))
++#define EXT_HAS_FREE_INDEX(__path__) \
++ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max)
++#define EXT_LAST_EXTENT(__hdr__) \
++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_LAST_INDEX(__hdr__) \
++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_MAX_EXTENT(__hdr__) \
++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_MAX_INDEX(__hdr__) \
++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++
++#define EXT_ROOT_HDR(tree) \
++ ((struct ext3_extent_header *) (tree)->root)
++#define EXT_BLOCK_HDR(bh) \
++ ((struct ext3_extent_header *) (bh)->b_data)
++#define EXT_DEPTH(_t_) \
++ (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
++#define EXT_GENERATION(_t_) \
++ (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++
++
++#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
++
++
++/*
++ * this structure is used to gather extents from the tree via ioctl
++ */
++struct ext3_extent_buf {
++ unsigned long start;
++ int buflen;
++ void *buffer;
++ void *cur;
++ int err;
++};
++
++/*
++ * this structure is used to collect stats info about the tree
++ */
++struct ext3_extent_tree_stats {
++ int depth;
++ int extents_num;
++ int leaf_num;
++};
++
++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *);
++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *);
++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *);
++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback);
++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long);
++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *);
++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *);
++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int);
++
++static inline void
++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree)
++{
++ if (tree->cex)
++ tree->cex->ec_type = EXT3_EXT_CACHE_NO;
++}
++
++
++#endif /* _LINUX_EXT3_EXTENTS */
++
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:22:55.780145544 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300
+@@ -128,6 +128,8 @@
+ */
+ struct semaphore truncate_sem;
+ struct inode vfs_inode;
++
++ __u32 i_cached_extent[4];
+ };
+
+ #endif /* _LINUX_EXT3_FS_I */
+
+%diffstat
+ fs/ext3/Makefile | 2
+ fs/ext3/extents.c | 2313 +++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/ialloc.c | 4
+ fs/ext3/inode.c | 29
+ fs/ext3/ioctl.c | 4
+ fs/ext3/super.c | 17
+ include/linux/ext3_extents.h | 252 ++++
+ include/linux/ext3_fs.h | 15
+ include/linux/ext3_fs_i.h | 2
+ 9 files changed, 2630 insertions(+), 8 deletions(-)
+
--- /dev/null
+Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
+@@ -0,0 +1,1428 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ * - do not scan from the beginning, try to remember first free block
++ * - mb_mark_used_* may allocate chunk right after splitting buddy
++ * - special flag to advice allocator to look for requested + N blocks
++ * this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...) printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE 32
++
++struct ext3_allocation_context {
++ struct super_block *ac_sb;
++
++ /* search goals */
++ int ac_g_group;
++ int ac_g_start;
++ int ac_g_len;
++ int ac_g_flags;
++
++ /* the best found extent */
++ int ac_b_group;
++ int ac_b_start;
++ int ac_b_len;
++
++ /* number of iterations done. we have to track to limit searching */
++ int ac_repeats;
++ int ac_groups_scanned;
++ int ac_status;
++};
++
++#define AC_STATUS_CONTINUE 1
++#define AC_STATUS_FOUND 2
++
++
++struct ext3_buddy {
++ void *bd_bitmap;
++ void *bd_buddy;
++ int bd_blkbits;
++ struct buffer_head *bd_bh;
++ struct buffer_head *bd_bh2;
++ struct ext3_buddy_group_blocks *bd_bd;
++ struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++ int fe_start;
++ int fe_len;
++ unsigned char fe_orders[MB_ARR_SIZE];
++ unsigned char fe_nums;
++ unsigned char fe_back;
++};
++
++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++#define mb_correct_addr_and_bit(bit,addr) \
++{ \
++ if ((unsigned) addr & 1) { \
++ bit += 8; \
++ addr--; \
++ } \
++ if ((unsigned) addr & 2) { \
++ bit += 16; \
++ addr--; \
++ addr--; \
++ } \
++}
++
++static inline int mb_test_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ return test_bit(bit, addr);
++}
++
++static inline void mb_set_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ set_bit(bit, addr);
++}
++
++static inline void mb_clear_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ clear_bit(bit, addr);
++}
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++ int i = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(max != NULL);
++
++ if (order > e3b->bd_blkbits + 1)
++ return NULL;
++
++ /* at order 0 we see each particular block */
++ *max = 1 << (e3b->bd_blkbits + 3);
++ if (order == 0)
++ return e3b->bd_bitmap;
++
++ bb = e3b->bd_buddy;
++ *max = *max >> 1;
++ while (i < order) {
++ bb += 1 << (e3b->bd_blkbits - i);
++ i++;
++ *max = *max >> 1;
++ }
++ return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++ struct ext3_buddy *e3b)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++ /* load bitmap */
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ if (e3b->bd_bh == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh);
++ wait_on_buffer(e3b->bd_bh);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++ /* load buddy */
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ if (e3b->bd_bh2 == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh2)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh2);
++ wait_on_buffer(e3b->bd_bh2);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++ e3b->bd_bitmap = e3b->bd_bh->b_data;
++ e3b->bd_buddy = e3b->bd_bh2->b_data;
++ e3b->bd_blkbits = sb->s_blocksize_bits;
++ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_sb = sb;
++
++ return 0;
++out:
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++ e3b->bd_bh = NULL;
++ e3b->bd_bh2 = NULL;
++ return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++ mark_buffer_dirty(e3b->bd_bh);
++ mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++ int order = e3b->bd_blkbits + 1;
++ int max, max2, i, j, k, count;
++ void *buddy, *buddy2;
++
++ if (!test_opt(e3b->bd_sb, MBALLOC))
++ return;
++
++ while (order > 1) {
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++ J_ASSERT(buddy2);
++ J_ASSERT(buddy != buddy2);
++ J_ASSERT(max * 2 == max2);
++
++ count = 0;
++ for (i = 0; i < max; i++) {
++
++ if (!mb_test_bit(i, buddy)) {
++ /* only single bit in buddy2 may be 1 */
++ if (mb_test_bit(i << 1, buddy2))
++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
++ else if (mb_test_bit((i << 1) + 1, buddy2))
++ J_ASSERT(!mb_test_bit(i << 1, buddy2));
++ continue;
++ }
++
++ /* both bits in buddy2 must be 0 */
++ J_ASSERT(!mb_test_bit(i << 1, buddy2));
++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
++
++ for (j = 0; j < (1 << order); j++) {
++ k = (i * (1 << order)) + j;
++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
++ }
++ count++;
++ }
++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++ order--;
++ }
++
++ buddy = mb_find_buddy(e3b, 0, &max);
++ for (i = 0; i < max; i++) {
++ if (mb_test_bit(i, buddy))
++ continue;
++ /* check used bits only */
++ for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++ buddy2 = mb_find_buddy(e3b, j, &max2);
++ k = i >> j;
++ J_ASSERT(k < max2);
++ J_ASSERT(!mb_test_bit(k, buddy2));
++ }
++ }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++ int order = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++ bb = e3b->bd_buddy;
++ while (order <= e3b->bd_blkbits + 1) {
++ block = block >> 1;
++ if (mb_test_bit(block, bb)) {
++ /* this block is part of buddy of order 'order' */
++ return order;
++ }
++ bb += 1 << (e3b->bd_blkbits - order);
++ order++;
++ }
++ return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0;
++ cur += 32;
++ continue;
++ }
++ mb_clear_bit(cur, bm);
++ cur++;
++ }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0xffffffff;
++ cur += 32;
++ continue;
++ }
++ mb_set_bit(cur, bm);
++ cur++;
++ }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++ int block, max, order;
++ void *buddy, *buddy2;
++
++ mb_check_buddy(e3b);
++ while (count-- > 0) {
++ block = first++;
++ order = 0;
++
++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
++ mb_set_bit(block, e3b->bd_bitmap);
++ e3b->bd_bd->bb_counters[order]++;
++
++ /* start of the buddy */
++ buddy = mb_find_buddy(e3b, order, &max);
++
++ do {
++ block &= ~1UL;
++ if (!mb_test_bit(block, buddy) ||
++ !mb_test_bit(block + 1, buddy))
++ break;
++
++ /* both the buddies are free, try to coalesce them */
++ buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++ if (!buddy2)
++ break;
++
++ if (order > 0) {
++ /* for special purposes, we don't clear
++ * free bits in bitmap */
++ mb_clear_bit(block, buddy);
++ mb_clear_bit(block + 1, buddy);
++ }
++ e3b->bd_bd->bb_counters[order]--;
++ e3b->bd_bd->bb_counters[order]--;
++
++ block = block >> 1;
++ order++;
++ e3b->bd_bd->bb_counters[order]++;
++
++ mb_set_bit(block, buddy2);
++ buddy = buddy2;
++ } while (1);
++ }
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++ struct ext3_free_extent *out, int needed)
++{
++ int i;
++
++ J_ASSERT(in);
++ J_ASSERT(out);
++ J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++ out->fe_len = 0;
++ out->fe_start = in->fe_start + in->fe_len;
++ out->fe_nums = 0;
++
++ /* for single-chunk extent we need not back order
++ * also, if an extent doesn't fill needed space
++ * then it makes no sense to try back order becase
++ * if we select this extent then it'll be use as is */
++ if (in->fe_nums < 2 || in->fe_len < needed)
++ return 0;
++
++ i = in->fe_nums - 1;
++ while (i >= 0 && out->fe_len < needed) {
++ out->fe_len += (1 << in->fe_orders[i]);
++ out->fe_start -= (1 << in->fe_orders[i]);
++ i--;
++ }
++ /* FIXME: in some situation fe_orders may be too small to hold
++ * all the buddies */
++ J_ASSERT(out->fe_len >= needed);
++
++ for (i++; i < in->fe_nums; i++)
++ out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++ J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++ out->fe_back = 1;
++
++ return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++ int needed, struct ext3_free_extent *ex)
++{
++ int space = needed;
++ int next, max, ord;
++ void *buddy;
++
++ J_ASSERT(ex != NULL);
++
++ ex->fe_nums = 0;
++ ex->fe_len = 0;
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ J_ASSERT(block < max);
++ if (!mb_test_bit(block, buddy))
++ goto nofree;
++
++ if (order == 0) {
++ /* find actual order */
++ order = mb_find_order_for_block(e3b, block);
++ block = block >> order;
++ }
++
++ ex->fe_orders[ex->fe_nums++] = order;
++ ex->fe_len = 1 << order;
++ ex->fe_start = block << order;
++ ex->fe_back = 0;
++
++ while ((space = space - (1 << order)) > 0) {
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++
++ if (block + 1 >= max)
++ break;
++
++ next = (block + 1) * (1 << order);
++ if (!mb_test_bit(next, e3b->bd_bitmap))
++ break;
++
++ ord = mb_find_order_for_block(e3b, next);
++
++ if ((1 << ord) >= needed) {
++ /* we dont want to coalesce with self-enough buddies */
++ break;
++ }
++ order = ord;
++ block = next >> order;
++ ex->fe_len += 1 << order;
++
++ if (ex->fe_nums < MB_ARR_SIZE)
++ ex->fe_orders[ex->fe_nums++] = order;
++ }
++
++nofree:
++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++ return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ start = ex->fe_start + ex->fe_len - 1;
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++ len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start -= mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ J_ASSERT(start >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_set_bit(cur, buddy);
++ mb_set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++
++ if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start += mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_set_bit(cur, buddy);
++ mb_set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int err;
++
++ J_ASSERT(ex);
++ if (ex->fe_back == 0)
++ err = mb_mark_used_forward(e3b, ex, len);
++ else
++ err = mb_mark_used_backward(e3b, ex, len);
++ return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b, int group)
++{
++ struct super_block *sb = ac->ac_sb;
++ int err, gorder, max, i;
++ struct ext3_free_extent curex;
++
++ /* let's know order of allocation */
++ gorder = 0;
++ while (ac->ac_g_len > (1 << gorder))
++ gorder++;
++
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++ /* someone asks for space at this specified block
++ * probably he wants to merge it into existing extent */
++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++ /* good. at least one block is free */
++ max = mb_find_extent(e3b, 0, ac->ac_g_start,
++ ac->ac_g_len, &curex);
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ err = 0;
++ goto out;
++ }
++ /* don't try to find goal anymore */
++ ac->ac_g_flags &= ~1;
++ }
++
++ i = 0;
++ while (1) {
++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++ if (i >= sb->s_blocksize * 8)
++ break;
++
++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++ if (max >= ac->ac_g_len) {
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ break;
++ }
++ i += max;
++ }
++
++ return 0;
++
++out:
++ return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++ struct ext3_group_desc *gdp;
++ int free_blocks;
++
++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++ if (!gdp)
++ return 0;
++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++ if (free_blocks == 0)
++ return 0;
++
++ /* someone wants this block very much */
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++ return 1;
++
++ /* FIXME: I'd like to take fragmentation into account here */
++ if (cr == 0) {
++ if (free_blocks >= ac->ac_g_len >> 1)
++ return 1;
++ } else if (cr == 1) {
++ if (free_blocks >= ac->ac_g_len >> 2)
++ return 1;
++ } else if (cr == 2) {
++ return 1;
++ } else {
++ BUG();
++ }
++ return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *len, int flags, int *errp)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_allocation_context ac;
++ int i, group, block, cr, err = 0;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ struct buffer_head *gdp_bh;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++
++ J_ASSERT(len != NULL);
++ J_ASSERT(*len > 0);
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk("ext3_mb_new_nblocks: nonexistent device");
++ return 0;
++ }
++
++ if (!test_opt(sb, MBALLOC)) {
++ static int ext3_mballoc_warning = 0;
++ if (ext3_mballoc_warning == 0) {
++ printk(KERN_ERR "EXT3-fs: multiblock request with "
++ "mballoc disabled!\n");
++ ext3_mballoc_warning++;
++ }
++ *len = 1;
++ err = ext3_new_block_old(handle, inode, goal, errp);
++ return err;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++
++ if (!(flags & 2)) {
++ /* someone asks for non-reserved blocks */
++ BUG_ON(*len > 1);
++ err = ext3_mb_reserve_blocks(sb, 1);
++ if (err) {
++ *errp = err;
++ return 0;
++ }
++ }
++
++ /*
++ * Check quota for allocation of this blocks.
++ */
++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++ *len -= 1;
++ if (*len == 0) {
++ *errp = -EDQUOT;
++ block = 0;
++ goto out;
++ }
++
++ /* start searching from the goal */
++ if (goal < le32_to_cpu(es->s_first_data_block) ||
++ goal >= le32_to_cpu(es->s_blocks_count))
++ goal = le32_to_cpu(es->s_first_data_block);
++ group = (goal - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb));
++
++ /* set up allocation goals */
++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++ ac.ac_status = 0;
++ ac.ac_groups_scanned = 0;
++ ac.ac_sb = inode->i_sb;
++ ac.ac_g_group = group;
++ ac.ac_g_start = block;
++ ac.ac_g_len = *len;
++ ac.ac_g_flags = flags;
++
++ /* loop over the groups */
++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++ if (group == EXT3_SB(sb)->s_groups_count)
++ group = 0;
++
++ /* check is group good for our criteries */
++ if (!mb_good_group(&ac, group, cr))
++ continue;
++
++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++ if (err)
++ goto out_err;
++
++ ext3_lock_group(sb, group);
++ if (!mb_good_group(&ac, group, cr)) {
++ /* someone did allocation from this group */
++ ext3_unlock_group(sb, group);
++ ext3_mb_release_desc(&e3b);
++ continue;
++ }
++
++ err = ext3_mb_new_in_group(&ac, &e3b, group);
++ ext3_unlock_group(sb, group);
++ if (ac.ac_status == AC_STATUS_FOUND)
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ break;
++ }
++ }
++
++ if (ac.ac_status != AC_STATUS_FOUND) {
++ /* unfortunately, we can't satisfy this request */
++ J_ASSERT(ac.ac_b_len == 0);
++ DQUOT_FREE_BLOCK(inode, *len);
++ *errp = -ENOSPC;
++ block = 0;
++ goto out;
++ }
++
++ /* good news - free block(s) have been found. now it's time
++ * to mark block(s) in good old journaled bitmap */
++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++ /* we made a desicion, now mark found blocks in good old
++ * bitmap to be journaled */
++
++ ext3_debug("using block group %d(%d)\n",
++ ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++ if (!bitmap_bh) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err) {
++ *errp = err;
++ goto out_err;
++ }
++
++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++ if (!gdp) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(es->s_first_data_block);
++
++ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++ block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++ in_range(block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error(sb, "ext3_new_block",
++ "Allocating block in system zone - "
++ "block = %u", block);
++#if 0
++ for (i = 0; i < ac.ac_b_len; i++)
++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++ ext3_lock_group(sb, ac.ac_b_group);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
++ ac.ac_b_len);
++ ext3_unlock_group(sb, ac.ac_b_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++ if (err)
++ goto out_err;
++ err = ext3_journal_dirty_metadata(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ sb->s_dirt = 1;
++ *errp = 0;
++ brelse(bitmap_bh);
++
++ /* drop non-allocated, but dquote'd blocks */
++ J_ASSERT(*len >= ac.ac_b_len);
++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++ *len = ac.ac_b_len;
++ J_ASSERT(block != 0);
++ goto out;
++
++out_err:
++ /* if we've already allocated something, roll it back */
++ if (ac.ac_status == AC_STATUS_FOUND) {
++ /* FIXME: free blocks here */
++ }
++
++ DQUOT_FREE_BLOCK(inode, *len);
++ brelse(bitmap_bh);
++ *errp = err;
++ block = 0;
++out:
++ if (!(flags & 2)) {
++ /* block wasn't reserved before and we reserved it
++ * at the beginning of allocation. it doesn't matter
++ * whether we allocated anything or we failed: time
++ * to release reservation. NOTE: because I expect
++ * any multiblock request from delayed allocation
++ * path only, here is single block always */
++ ext3_mb_release_blocks(sb, 1);
++ }
++ return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++ struct buffer_head *bh;
++ int i, err, count = 0;
++ struct ext3_buddy e3b;
++
++ err = ext3_mb_load_desc(sb, group, &e3b);
++ if (err)
++ goto out;
++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++ bh = read_block_bitmap(sb, group);
++ if (bh == NULL) {
++ err = -EIO;
++ goto out2;
++ }
++
++ /* loop over the blocks, nad create buddies for free ones */
++ for (i = 0; i < sb->s_blocksize * 8; i++) {
++ if (!mb_test_bit(i, (void *) bh->b_data)) {
++ mb_free_blocks(&e3b, i, 1);
++ count++;
++ }
++ }
++ brelse(bh);
++ mb_check_buddy(&e3b);
++ ext3_mb_dirty_buddy(&e3b);
++
++out2:
++ ext3_mb_release_desc(&e3b);
++out:
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS \
++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
++ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++ struct inode *root = sb->s_root->d_inode;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct dentry *db;
++ tid_t target;
++ int err, i;
++
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_groups_count, GFP_KERNEL);
++ if (sbi->s_buddy_blocks == NULL) {
++ printk("can't allocate mem for buddy maps\n");
++ return -ENOMEM;
++ }
++ memset(sbi->s_buddy_blocks, 0,
++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sbi->s_buddy = NULL;
++
++ down(&root->i_sem);
++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++ strlen(EXT3_BUDDY_FILE));
++ if (IS_ERR(db)) {
++ err = PTR_ERR(db);
++ printk("can't lookup buddy file: %d\n", err);
++ goto out;
++ }
++
++ if (db->d_inode != NULL) {
++ sbi->s_buddy = igrab(db->d_inode);
++ goto map;
++ }
++
++ err = ext3_create(root, db, S_IFREG, NULL);
++ if (err) {
++ printk("error while creation buddy file: %d\n", err);
++ } else {
++ sbi->s_buddy = igrab(db->d_inode);
++ }
++
++map:
++ for (i = 0; i < sbi->s_groups_count; i++) {
++ struct buffer_head *bh = NULL;
++ handle_t *handle;
++
++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out2;
++ }
++
++ /* allocate block for bitmap */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy bitmap: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ brelse(bh);
++
++ /* allocate block for buddy */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ brelse(bh);
++ ext3_journal_stop(handle);
++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i].bb_tid = 0;
++ }
++
++ if (journal_start_commit(sbi->s_journal, &target))
++ log_wait_commit(sbi->s_journal, target);
++
++out2:
++ dput(db);
++out:
++ up(&root->i_sem);
++ return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* release freed, non-committed blocks */
++ spin_lock(&sbi->s_md_lock);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_committed_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ ext3_mb_free_committed_blocks(sb);
++
++ if (sbi->s_buddy_blocks)
++ kfree(sbi->s_buddy_blocks);
++ if (sbi->s_buddy)
++ iput(sbi->s_buddy);
++ if (sbi->s_blocks_reserved)
++ printk("ext3-fs: %ld blocks being reserved at umount!\n",
++ sbi->s_blocks_reserved);
++ return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++ struct ext3_super_block *es;
++ int i;
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* init file for buddy data */
++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ ext3_mb_init_backend(sb);
++
++ es = EXT3_SB(sb)->s_es;
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++ ext3_mb_generate_buddy(sb, i);
++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++ spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ printk("EXT3-fs: mballoc enabled\n");
++ return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int err, i, count = 0, count2 = 0;
++ struct ext3_free_metadata *md;
++ struct ext3_buddy e3b;
++
++ if (list_empty(&sbi->s_committed_transaction))
++ return;
++
++ /* there is committed blocks to be freed yet */
++ do {
++ /* get next array of blocks */
++ md = NULL;
++ spin_lock(&sbi->s_md_lock);
++ if (!list_empty(&sbi->s_committed_transaction)) {
++ md = list_entry(sbi->s_committed_transaction.next,
++ struct ext3_free_metadata, list);
++ list_del(&md->list);
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ if (md == NULL)
++ break;
++
++ mb_debug("gonna free %u blocks in group %u (0x%p):",
++ md->num, md->group, md);
++
++ err = ext3_mb_load_desc(sb, md->group, &e3b);
++ BUG_ON(err != 0);
++
++ /* there are blocks to put in buddy to make them really free */
++ count += md->num;
++ count2++;
++ ext3_lock_group(sb, md->group);
++ for (i = 0; i < md->num; i++) {
++ mb_debug(" %u", md->blocks[i]);
++ mb_free_blocks(&e3b, md->blocks[i], 1);
++ }
++ mb_debug("\n");
++ ext3_unlock_group(sb, md->group);
++
++ kfree(md);
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ } while (md);
++ mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++ return;
++
++ /* new transaction! time to close last one and free blocks for
++ * committed transaction. we know that only transaction can be
++ * active, so previos transaction can be being logged and we
++ * know that transaction before previous is known to be alreade
++ * logged. this means that now we may free blocks freed in all
++ * transactions before previous one. hope I'm clear enough ... */
++
++ spin_lock(&sbi->s_md_lock);
++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++ mb_debug("new transaction %lu, old %lu\n",
++ (unsigned long) handle->h_transaction->t_tid,
++ (unsigned long) sbi->s_last_transaction);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_closed_transaction);
++ sbi->s_last_transaction = handle->h_transaction->t_tid;
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++ int group, int block, int count)
++{
++ struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++ struct super_block *sb = e3b->bd_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_free_metadata *md;
++ int i;
++
++ ext3_lock_group(sb, group);
++ for (i = 0; i < count; i++) {
++ md = db->bb_md_cur;
++ if (md && db->bb_tid != handle->h_transaction->t_tid) {
++ db->bb_md_cur = NULL;
++ md = NULL;
++ }
++
++ if (md == NULL) {
++ ext3_unlock_group(sb, group);
++ md = kmalloc(sizeof(*md), GFP_KERNEL);
++ if (md == NULL)
++ return -ENOMEM;
++ md->num = 0;
++ md->group = group;
++
++ ext3_lock_group(sb, group);
++ if (db->bb_md_cur == NULL) {
++ spin_lock(&sbi->s_md_lock);
++ list_add(&md->list, &sbi->s_active_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ db->bb_md_cur = md;
++ db->bb_tid = handle->h_transaction->t_tid;
++ mb_debug("new md 0x%p for group %u\n",
++ md, md->group);
++ } else {
++ kfree(md);
++ md = db->bb_md_cur;
++ }
++ }
++
++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++ md->blocks[md->num] = block + i;
++ md->num++;
++ if (md->num == EXT3_BB_MAX_BLOCKS) {
++ /* no more space, put full container on a sb's list */
++ db->bb_md_cur = NULL;
++ }
++ }
++ ext3_unlock_group(sb, group);
++ return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ unsigned long bit, overflow;
++ struct buffer_head *gd_bh;
++ unsigned long block_group;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++ int err = 0, ret;
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk ("ext3_free_blocks: nonexistent device");
++ return;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++ if (block < le32_to_cpu(es->s_first_data_block) ||
++ block + count < block ||
++ block + count > le32_to_cpu(es->s_blocks_count)) {
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks not in datazone - "
++ "block = %lu, count = %lu", block, count);
++ goto error_return;
++ }
++
++ ext3_debug("freeing block %lu\n", block);
++
++do_more:
++ overflow = 0;
++ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ bit = (block - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb);
++ /*
++ * Check to see if we are freeing blocks across a group
++ * boundary.
++ */
++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++ count -= overflow;
++ }
++ brelse(bitmap_bh);
++ bitmap_bh = read_block_bitmap(sb, block_group);
++ if (!bitmap_bh)
++ goto error_return;
++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++ if (!gdp)
++ goto error_return;
++
++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++ in_range (block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group) ||
++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks in system zones - "
++ "Block = %lu, count = %lu",
++ block, count);
++
++ BUFFER_TRACE(bitmap_bh, "getting write access");
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err)
++ goto error_return;
++
++ /*
++ * We are about to modify some metadata. Call the journal APIs
++ * to unshare ->b_data if a currently-committing transaction is
++ * using it
++ */
++ BUFFER_TRACE(gd_bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, gd_bh);
++ if (err)
++ goto error_return;
++
++ err = ext3_mb_load_desc(sb, block_group, &e3b);
++ if (err)
++ goto error_return;
++
++ if (metadata) {
++ /* blocks being freed are metadata. these blocks shouldn't
++ * be used until this transaction is committed */
++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++ } else {
++ ext3_lock_group(sb, block_group);
++ mb_free_blocks(&e3b, bit, count);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++ ext3_unlock_group(sb, block_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++ }
++
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ /* FIXME: undo logic will be implemented later and another way */
++ mb_clear_bits(bitmap_bh->b_data, bit, count);
++ DQUOT_FREE_BLOCK(inode, count);
++
++ /* We dirtied the bitmap block */
++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++ /* And the group descriptor block */
++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++ ret = ext3_journal_dirty_metadata(handle, gd_bh);
++ if (!err) err = ret;
++
++ if (overflow && !err) {
++ block += count;
++ count = overflow;
++ goto do_more;
++ }
++ sb->s_dirt = 1;
++error_return:
++ brelse(bitmap_bh);
++ ext3_std_error(sb, err);
++ return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int free, ret = -ENOSPC;
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++ if (blocks <= free - sbi->s_blocks_reserved) {
++ sbi->s_blocks_reserved += blocks;
++ ret = 0;
++ }
++ spin_unlock(&sbi->s_reserve_lock);
++ return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ sbi->s_blocks_reserved -= blocks;
++ WARN_ON(sbi->s_blocks_reserved < 0);
++ if (sbi->s_blocks_reserved < 0)
++ sbi->s_blocks_reserved = 0;
++ spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *errp)
++{
++ int ret, len;
++
++ if (!test_opt(inode->i_sb, MBALLOC)) {
++ ret = ext3_new_block_old(handle, inode, goal, errp);
++ goto out;
++ }
++ len = 1;
++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++ return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ if (!test_opt(inode->i_sb, MBALLOC))
++ ext3_free_blocks_old(handle, inode, block, count);
++ else
++ ext3_mb_free_blocks(handle, inode, block, count, metadata);
++ return;
++}
++
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300
+@@ -389,6 +389,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_mb_release(sb);
+ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+@@ -542,7 +543,7 @@
+ Opt_commit, Opt_journal_update, Opt_journal_inum,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_err, Opt_extents, Opt_extdebug
++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
+ };
+
+ static match_table_t tokens = {
+@@ -589,6 +590,7 @@
+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_extents, "extents"},
+ {Opt_extdebug, "extdebug"},
++ {Opt_mballoc, "mballoc"},
+ {Opt_err, NULL}
+ };
+
+@@ -810,6 +812,9 @@
+ case Opt_extdebug:
+ set_opt (sbi->s_mount_opt, EXTDEBUG);
+ break;
++ case Opt_mballoc:
++ set_opt (sbi->s_mount_opt, MBALLOC);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1463,7 +1468,8 @@
+ ext3_count_dirs(sb));
+
+ ext3_ext_init(sb);
+-
++ ext3_mb_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o extents.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300
+@@ -78,7 +78,7 @@
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -274,7 +274,7 @@
+ }
+
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
++void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
+ unsigned long block, unsigned long count)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+@@ -1142,7 +1142,7 @@
+ * bitmap, and then for any free bit if that fails.
+ * This function also updates quota and i_blocks field.
+ */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+ unsigned long goal, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.5-sles9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300
++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300
+@@ -1640,7 +1640,7 @@
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+ handle_t *handle;
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300
+@@ -572,7 +572,7 @@
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
+
+@@ -673,7 +673,7 @@
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1829,7 +1829,7 @@
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2000,7 +2000,7 @@
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300
+@@ -740,7 +740,7 @@
+ for (i = 0; i < depth; i++) {
+ if (!ablocks[i])
+ continue;
+- ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+ }
+ }
+ kfree(ablocks);
+@@ -1391,7 +1391,7 @@
+ path->p_idx->ei_leaf);
+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+ return err;
+ }
+
+@@ -1879,10 +1879,12 @@
+ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+ handle_t *handle = ext3_journal_start(tree->inode, needed);
+ struct buffer_head *bh;
+- int i;
++ int i, metadata = 0;
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
++ if (S_ISDIR(tree->inode->i_mode))
++ metadata = 1;
+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+ /* tail removal */
+ unsigned long num, start;
+@@ -1894,7 +1896,7 @@
+ bh = sb_find_get_block(tree->inode->i_sb, start + i);
+ ext3_forget(handle, 0, tree->inode, bh, start + i);
+ }
+- ext3_free_blocks(handle, tree->inode, start, num);
++ ext3_free_blocks(handle, tree->inode, start, num, metadata);
+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+ printk("strange request: removal %lu-%lu from %u:%u\n",
+ from, to, ex->ee_block, ex->ee_len);
+Index: linux-2.6.5-sles9/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300
++++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300
+@@ -1366,7 +1366,7 @@
+ new_bh = sb_getblk(sb, block);
+ if (!new_bh) {
+ getblk_failed:
+- ext3_free_blocks(handle, inode, block, 1);
++ ext3_free_blocks(handle, inode, block, 1, 1);
+ error = -EIO;
+ goto cleanup;
+ }
+@@ -1408,7 +1408,7 @@
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ /* Free the old block. */
+ ea_bdebug(old_bh, "freeing");
+- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+
+ /* ext3_forget() calls bforget() for us, but we
+ let our caller release old_bh, so we need to
+@@ -1504,7 +1504,7 @@
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ext3_xattr_cache_remove(bh);
+- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+ get_bh(bh);
+ ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+ } else {
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
+ /*
+ * Special inodes numbers
+ */
+@@ -339,6 +341,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -698,7 +701,7 @@
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300
+@@ -23,10 +23,30 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+ #include <linux/rbtree.h>
+
++#define EXT3_BB_MAX_BLOCKS 30
++struct ext3_free_metadata {
++ unsigned short group;
++ unsigned short num;
++ unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++ struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER 14
++
++struct ext3_buddy_group_blocks {
++ sector_t bb_bitmap;
++ sector_t bb_buddy;
++ spinlock_t bb_lock;
++ unsigned bb_counters[EXT3_BB_MAX_ORDER];
++ struct ext3_free_metadata *bb_md_cur;
++ unsigned long bb_tid;
++};
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -78,6 +98,17 @@
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+ #endif
++
++ /* for buddy allocator */
++ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct inode *s_buddy;
++ long s_blocks_reserved;
++ spinlock_t s_reserve_lock;
++ struct list_head s_active_transaction;
++ struct list_head s_closed_transaction;
++ struct list_head s_committed_transaction;
++ spinlock_t s_md_lock;
++ tid_t s_last_transaction;
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
--- /dev/null
+Index: linux-2.6.7/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/namei.c 2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/fs/ext3/namei.c 2004-08-20 17:48:54.000000000 -0600
+@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t *
+ static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+ {
+ inode->i_nlink++;
++ if (is_dx(inode) && inode->i_nlink > 1) {
++ /* limit is 16-bit i_links_count */
++ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2)
++ inode->i_nlink = 1;
++ }
+ }
+
+ static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+ {
+- inode->i_nlink--;
++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
++ inode->i_nlink--;
+ }
+
+ static int ext3_add_nondir(handle_t *handle,
+@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir
+ struct ext3_dir_entry_2 * de;
+ int err;
+
+- if (dir->i_nlink >= EXT3_LINK_MAX)
++ if (EXT3_DIR_LINK_MAXED(dir))
+ return -EMLINK;
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir
+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+ dir_block = ext3_bread (handle, inode, 0, 1, &err);
+ if (!dir_block) {
+- inode->i_nlink--; /* is this nlink == 0? */
++ ext3_dec_count(handle, inode); /* is this nlink == 0? */
+ ext3_mark_inode_dirty(handle, inode);
+ iput (inode);
+ goto out_stop;
+@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir
+ iput (inode);
+ goto out_stop;
+ }
+- dir->i_nlink++;
++ ext3_inc_count(handle, dir);
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+ d_instantiate(dentry, inode);
+@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di
+ retval = ext3_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto end_rmdir;
+- if (inode->i_nlink != 2)
+- ext3_warning (inode->i_sb, "ext3_rmdir",
+- "empty directory has nlink!=2 (%d)",
+- inode->i_nlink);
++ if (!EXT3_DIR_LINK_EMPTY(inode))
++ ext3_warning(inode->i_sb, "ext3_rmdir",
++ "empty directory has too many links (%d)",
++ inode->i_nlink);
+ inode->i_version++;
+ inode->i_nlink = 0;
+ /* There's no need to set i_disksize: the fact that i_nlink is
+@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di
+ ext3_orphan_add(handle, inode);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_mark_inode_dirty(handle, inode);
+- dir->i_nlink--;
++ ext3_dec_count(handle, dir);
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+
+@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+- inode->i_nlink--;
++ ext3_dec_count(handle, inode);
+ if (!inode->i_nlink)
+ ext3_orphan_add(handle, inode);
+ inode->i_ctime = dir->i_ctime;
+@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol
+ struct inode *inode = old_dentry->d_inode;
+ int err;
+
+- if (inode->i_nlink >= EXT3_LINK_MAX)
++ if (EXT3_DIR_LINK_MAXED(inode))
+ return -EMLINK;
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o
+ if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+- if (!new_inode && new_dir!=old_dir &&
+- new_dir->i_nlink >= EXT3_LINK_MAX)
++ if (!new_inode && new_dir != old_dir &&
++ EXT3_DIR_LINK_MAXED(new_dir))
+ goto end_rename;
+ }
+ if (!new_bh) {
+@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o
+ }
+
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ new_inode->i_ctime = CURRENT_TIME;
+ }
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+@@ -2299,11 +2304,11 @@ static int ext3_rename (struct inode * o
+ PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
+ BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+ ext3_journal_dirty_metadata(handle, dir_bh);
+- old_dir->i_nlink--;
++ ext3_dec_count(handle, old_dir);
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ } else {
+- new_dir->i_nlink++;
++ ext3_inc_count(handle, new_dir);
+ ext3_update_dx_flag(new_dir);
+ ext3_mark_inode_dirty(handle, new_dir);
+ }
+Index: linux-2.6.7/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-20 17:41:27.000000000 -0600
+@@ -41,7 +41,7 @@ struct statfs;
+ /*
+ * Always enable hashed directories
+ */
+-#define CONFIG_EXT3_INDEX
++#define CONFIG_EXT3_INDEX 1
+
+ /*
+ * Debug code
+@@ -79,7 +81,7 @@
+ /*
+ * Maximal count of links to a file
+ */
+-#define EXT3_LINK_MAX 32000
++#define EXT3_LINK_MAX 65000
+
+ /*
+ * Macro-instructions used to manage several block sizes
+@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
+ */
+
+ #ifdef CONFIG_EXT3_INDEX
+- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+- EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
++ (is_dx(dir) && (dir)->i_nlink == 1))
+ #else
+ #define is_dx(dir) 0
+-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+ #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+ #endif
+
Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile 2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile 2004-11-03 14:41:24.747805262 -0500
++++ linux-stage/fs/ext3/Makefile 2004-11-03 14:41:25.123696274 -0500
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-stage.orig/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c 2004-11-03 14:41:25.040720333 -0500
++++ linux-stage/fs/ext3/inode.c 2004-11-03 14:46:08.458515670 -0500
@@ -37,6 +37,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
#include "acl.h"
/*
-@@ -2472,6 +2473,9 @@
- ei->i_acl = EXT3_ACL_NOT_CACHED;
+@@ -2401,6 +2402,9 @@
ei->i_default_acl = EXT3_ACL_NOT_CACHED;
#endif
-+ if (ext3_iopen_get_inode(inode))
-+ return;
-+
+ ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
++
++ if (ext3_iopen_get_inode(inode))
++ return;
+
if (ext3_get_inode_loc(inode, &iloc, 0))
goto bad_inode;
- bh = iloc.bh;
Index: linux-stage/fs/ext3/iopen.c
===================================================================
--- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.c 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.c 2004-11-03 14:41:25.125695694 -0500
@@ -0,0 +1,272 @@
+/*
+ * linux/fs/ext3/iopen.c
Index: linux-stage/fs/ext3/iopen.h
===================================================================
--- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.h 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.h 2004-11-03 14:41:25.126695404 -0500
@@ -0,0 +1,15 @@
+/*
+ * iopen.h
+ struct inode *inode, int rehash);
Index: linux-stage/fs/ext3/namei.c
===================================================================
---- linux-stage.orig/fs/ext3/namei.c 2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c 2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c 2004-11-03 14:41:24.957744391 -0500
++++ linux-stage/fs/ext3/namei.c 2004-11-03 14:41:25.127695114 -0500
@@ -37,6 +37,7 @@
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
}
-@@ -2019,10 +2021,6 @@
+@@ -2029,10 +2031,6 @@
inode->i_nlink);
inode->i_version++;
inode->i_nlink = 0;
ext3_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
ext3_mark_inode_dirty(handle, inode);
-@@ -2139,6 +2137,23 @@
+@@ -2152,6 +2150,23 @@
return err;
}
static int ext3_link (struct dentry * old_dentry,
struct inode * dir, struct dentry *dentry)
{
-@@ -2161,7 +2176,8 @@
+@@ -2175,7 +2190,8 @@
ext3_inc_count(handle, inode);
atomic_inc(&inode->i_count);
+ err = ext3_add_link(handle, dentry, inode);
+ ext3_orphan_del(handle,inode);
ext3_journal_stop(handle);
- return err;
- }
+ if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+ goto retry;
Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-stage.orig/fs/ext3/super.c 2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/super.c 2004-05-11 17:44:53.000000000 -0400
-@@ -536,7 +536,7 @@
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
+--- linux-stage.orig/fs/ext3/super.c 2004-11-03 14:41:25.043719463 -0500
++++ linux-stage/fs/ext3/super.c 2004-11-03 14:41:25.129694535 -0500
+@@ -534,7 +534,7 @@
+ Opt_reservation, Opt_noreservation, Opt_noload,
Opt_commit, Opt_journal_update, Opt_journal_inum,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
- Opt_ignore, Opt_barrier,
{Opt_err, NULL}
};
-@@ -772,6 +775,18 @@
+@@ -778,6 +781,18 @@
else
clear_opt(sbi->s_mount_opt, BARRIER);
break;
-tbd Cluster File Systems, Inc. <info@clusterfs.com>
- * version 1.2.x
+tbd Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.3.4
+ * bug fixes
+ - fixes from lustre 1.2.8
+ - print NAL number in %x format (4645)
+ - the watchdog thread now runs as interruptible (5246)
+ - drop import inflight refcount on signal_completed_replay error (5255)
+ * miscellania
+ - add pid to ldlm debugging output (4922)
+
+2004-10-08 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.3.3
+ * bug fixes
+ - properly handle portals process identifiers in messages (4165)
+ - finish default directory EA handling (3048)
+ - fixes from lustre 1.2.7
+ - removed PTL_MD_KIOV usage under CRAY_PORTALS (4420)
+ - allow EADDRNOTAVAIL as retry for connect in liblustre tcpnal (4822)
+
+2004-09-16 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.3.2
+ * bug fixes
+ - many liblustre fixes
+ - fixes from lustre 1.2.6
+ * miscellania
+ - update to new libsysio-head-0806
+ - reorganization of lov code
+
+2004-08-30 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.3.1
+ * bug fixes
+ - add locking for mmapped files (2828)
+ - lmc/lconf changes to support multiple interfaces (3376)
+ - fixes from lustre 1.2.5
+
+2004-08-14 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.3.0
* bug fixes
- don't dereference NULL peer_ni in ldlm_handle_ast_error (3258)
- don't allow unlinking open directory if it isn't empty (2904)
- chose better nal ids in liblustre (3292)
- initialize liblustre with uid/group membership (2862)
- let lconf resolve symlinked-to devices (4629)
+ - balance journal closure when 2.6 filter write fails (3401)
+ - add second rpc_lock and last_rcvd info for close reqs (3462)
+ - don't hold llog sem during network request (3652)
+ - update server last transno after client disconnects (2525)
+ - replace config semaphore with spinlock (3306)
+ - ext3 exents and multi-block allocation (3024)
+ - service time statistics in /proc
+ - minor fixes to liblustre build (3317)
+ - client recovery without upcall (3262)
+ - use transno after validating reply (3892)
+ - use different name for 2nd ptlrpcd thread (3887)
+ - get a client lock in ll_inode_revalidate_it (3597)
+ - direct IO reads on OST (4048)
+ - process timed out requests if import state changes (3754)
+ - ignore -ENOENT errors in osc_destroy (3639)
+ - fixes from lustre 1.2.0-1.2.4
+ * miscellania
+ - use "CATALOGS" for the llog catalogs, not "CATLIST" (old) (b=2841)
+ - added kernel patch for /dev/sd I/O stats (4385)
+
+2004-11-16 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.8
+ * bug fixes
+ - fix TCP_NODELAY bug, which caused extreme perf regression (5134)
+ - allocate qswnal tx descriptors singly to avoid fragmentation (4504)
+ - don't LBUG on obdo_alloc() failure, use OBD_SLAB_ALLOC() (4800)
+ - fix NULL dereference in /proc/sys/portals/routes (4827)
+ - allow failed mdc_close() operations to be interrupted (4561)
+ - stop precreate on OST before MDS would time out on it (4778)
+ - don't free dentries not owned by NFS code, check generation (4806)
+ - fix lsm leak if mds_create_objects() fails (4801)
+ - limit debug_daemon file size, always print CERROR messages (4789)
+ - use transno after validating reply (3892)
+ - process timed out requests if import state changes (3754)
+ - update mtime on OST during writes, return in glimpse (4829)
+ - add mkfsoptions to LDAP (4679)
+ - use ->max_readahead method instead of zapping global ra (5039)
+ - don't interrupt __l_wait_event() during strace
+ * miscellania
+ - add software watchdogs to catch hung threads quickly (4941)
+ - make lustrefs init script start after nfs is mounted
+ - fix CWARN/ERROR duplication (4930)
+ - return async write errors to application if possible (2248)
+ - update barely-supported suse-2.4.21-171 series (4842)
+ - support for sles 9 %post scripts
+ - support for building 2.6 kernel-source packages
+ - support for sles km_* packages
+
+2004-10-07 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.7
+ * bug fixes
+ - ignore -ENOENT errors in osc_destroy (3639)
+ - notify osc create thread that OSC is being cleaned up (4600)
+ - add nettype argument for llmount in #5d in conf-sanity.sh (3936)
+ - reconstruct ost_handle() like mds_handle() (4657)
+ - create a new thread to do import eviction to avoid deadlock (3969)
+ - let lconf resolve symlinked-to devices (4629)
+ - don't unlink "objects" from directory with default EA (4554)
+ - hold socknal file ref over connect in case target is down (4394)
+ - allow more than 32000 subdirectories in a single directory (3244)
+ - OST returns ENOSPC from object create when no space left (4539)
+ - don't send truncate RPC if file size isn't changing (4410)
+ - limit OSC precreate to 1/2 of value OST considers bogus (4778)
+ - bind to privileged port in socknal and tcpnal (3689)
+ * miscellania
+ - rate limit CERROR/CWARN console message to avoid overload (4519)
+ - basic mmap support (3918)
+ - kernel patch series update from b1_4 (4711)
+
+2004-09-16 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.6
+ * bug fixes
+ - avoid crash during MDS cleanup with OST shut down (2775)
+ - fix loi_list_lock/oig_lock inversion on interrupted IO (4136)
+ - don't use bad inodes on the MDS (3744)
+ - dynamic object preallocation to improve recovery speed (4236)
+ - don't hold spinlock over lock dumping or change debug flags (4401)
+ - don't zero obd_dev when it is force cleaned (3651)
+ - "lctl deactivate" will stop automatic recovery attempts (3406)
+ - look for existing replayed locks to avoid duplicates (3764)
+ - don't resolve lock handle twice in recovery avoiding race (4401)
+ - revalidate should check working dir is a directory (4134)
+ * miscellania
+ - don't always mark "slow" obdfilter messages as errors (4418)
+
+2004-08-24 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.5
+ * bug fixes
+ - don't close LustreDB during write_conf until it is done (3860)
+ - fix typo in lconf for_each_profile (3821)
+ - allow dumping logs from multiple threads at one time (3820)
+ - don't allow multiple threads in OSC recovery (3812)
+ - fix debug_size parameters (3864)
+ - fix mds_postrecov to initialize import for llog ctxt (3121)
+ - replace config semaphore with spinlock (3306)
+ - be sure to send a reply for a CANCEL rpc with bad export (3863)
+ - don't allow enqueue to complete on a destroyed export (3822)
+ - down write_lock before checking llog header bitmap (3825)
+ - recover from lock replay timeout (3764)
+ - up llog sem before sending rpc (3652)
+ - reduce ns lock hold times when setting kms (3267)
+ - change a dlm LBUG to LASSERTF, to maybe learn something (4228)
+ - fix NULL deref and obd_dev leak on setup error (3312)
+ - replace some LBUG about llog ops with error handling (3841)
+ - don't match INVALID dentries from d_lookup and spin (3784)
+ - hold dcache_lock while marking dentries INVALID and hashing (4255)
+ - fix invalid assertion in ptlrpc_set_wait (3880)
+ * miscellania
+ - add libwrap support for the TCP acceptor (3996)
+ - add /proc/sys/portals/routes for non-root route listing (3994)
+ - allow setting MDS UUID in .xml (2580)
+ - print the stack of a process that LBUGs (4228)
+
+2004-07-14 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.2.4
+ * bug fixes
+ - don't cleanup request in ll_file_open() on failed MDS open (3430)
+ - make sure to unset replay flag from failed open requests (3440)
+ - if default stripe count is 0, use OST count for inode size (3636)
+ - update parent mtime/ctime on client for create/unlink (2611)
+ - drop dentry ref in ext3_add_link from open_connect_dentry (3266)
+ - free recovery state on server during a forced cleanup (3571)
+ - unregister_reply for resent reqs (3063)
+ - loop back devices mounting and status check on 2.6 (3563)
+ - fix resource-creation race that can provoke i_size == 0 (3513)
+ - don't try to use bad inodes returned from MDS/OST fs lookup (3688)
+ - more debugging for page-accounting assertion (3746)
+ - return -ENOENT instead of asserting if ost getattr+unlink race (3558)
+ - avoid deadlock after precreation failure (3758)
+ - fix race and lock order deadlock in orphan handling (3450, 3750)
+ - add validity checks when grabbing inodes from l_ast_data (3599)
* miscellania
- drop scimac NAL (unmaintained)
AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes)
AM_CONDITIONAL(GSS, test x$enable_gss = xyes)
AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
+AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes)
AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
])
<!ATTLIST ptlrpc %object.attr;>
<!ELEMENT osd (fstype | devpath | devsize | autoformat |
- target_ref | node_ref | journalsize )*>
+ target_ref | node_ref | journalsize | mkfsoptions)*>
<!ATTLIST osd %object.attr;
osdtype (obdfilter | obdecho) 'obdfilter'>
failover ( 1 | 0 ) #IMPLIED>
<!ELEMENT mdsdev (fstype | devpath | devsize | autoformat |
- target_ref | node_ref | journalsize )*>
+ target_ref | node_ref | journalsize | mkfsoptions)*>
<!ATTLIST mdsdev %object.attr;>
<!ELEMENT lov (mds_ref |(obd_ref)+)*>
<!ELEMENT ptldebug %object.content;>
<!ELEMENT subsystem %object.content;>
<!ELEMENT journalsize %object.content;>
+<!ELEMENT mkfsoptions %object.content;>
<!ELEMENT fstype %object.content;>
<!ELEMENT nid %object.content;>
<!ELEMENT port %object.content;>
<if test="journalsize">
journalsize: <value-of select="journalsize"/>
</if>
+<if test="mkfsoptions">
+mkfsoptions: <value-of select="mkfsoptions"/>
+</if>
nodeRef: <value-of select="node_ref/@uuidref"/>
targetRef: <value-of select="target_ref/@uuidref"/>
<text>
<if test="journalsize">
journalsize: <value-of select="journalsize"/>
</if>
+<if test="mkfsoptions">
+mkfsoptions: <value-of select="mkfsoptions"/>
+</if>
<text>
</text>
</template>
#define LIBLUSTRE_H__
#include <sys/mman.h>
-#ifndef __CYGWIN__
-#include <stdint.h>
-#include <asm/page.h>
-#else
-#include <sys/types.h>
-#include "ioctl.h"
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_ASM_PAGE_H
+# include <asm/page.h>
+#endif
+#ifdef HAVE_SYS_USER_H
+# include <sys/user.h>
#endif
+
+#include "ioctl.h"
+
#include <stdio.h>
#include <sys/ioctl.h>
#include <stdlib.h>
}
#define lock_kernel() do {} while (0)
+#define unlock_kernel() do {} while (0)
#define daemonize(l) do {} while (0)
#define sigfillset(l) do {} while (0)
#define recalc_sigpending(l) do {} while (0)
#include <linux/lustre_export.h>
#include <linux/lustre_net.h>
-
#endif
const char *name,
struct lprocfs_stats *stats);
-#define LPROCFS_INIT_MULTI_VARS(array, size) \
-void lprocfs_init_multi_vars(unsigned int idx, \
- struct lprocfs_static_vars *x) \
-{ \
- struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \
- LASSERT(glob != 0); \
- LASSERT(idx < (unsigned int)(size)); \
- x->module_vars = glob[idx].module_vars; \
- x->obd_vars = glob[idx].obd_vars; \
-} \
-
-#define LPROCFS_INIT_VARS(name, vclass, vinstance) \
-void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \
-{ \
- x->module_vars = vclass; \
- x->obd_vars = vinstance; \
-} \
-
-#define lprocfs_init_vars(NAME, VAR) \
-do { \
+#define LPROCFS_INIT_MULTI_VARS(array, size) \
+void lprocfs_init_multi_vars(unsigned int idx, \
+ struct lprocfs_static_vars *x) \
+{ \
+ struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \
+ LASSERT(glob != 0); \
+ LASSERT(idx < (unsigned int)(size)); \
+ x->module_vars = glob[idx].module_vars; \
+ x->obd_vars = glob[idx].obd_vars; \
+} \
+
+#define LPROCFS_INIT_VARS(name, vclass, vinstance) \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \
+{ \
+ x->module_vars = vclass; \
+ x->obd_vars = vinstance; \
+} \
+
+#define lprocfs_init_vars(NAME, VAR) \
+do { \
extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \
lprocfs_##NAME##_init_vars(VAR); \
} while (0)
uint32_t lmd_nal;
uint32_t lmd_server_ipaddr;
uint32_t lmd_port;
+ uint32_t lmd_async;
uint32_t lmd_nllu;
uint32_t lmd_nllg;
char lmd_security[16];
/* ptlrpc/recov_thread.c */
int llog_start_commit_thread(void);
+int llog_cleanup_commit_master(int force);
struct llog_canceld_ctxt *llcd_grab(void);
void llcd_send(struct llog_canceld_ctxt *llcd);
} while (0)
#define groups_sort(gi) do {} while (0)
-
#define GROUP_AT(gi, i) ((gi)->small_block[(i)])
static inline int cleanup_group_info(void)
page->private = 0; \
} while(0)
+#ifndef smp_num_cpus
+#define smp_num_cpus num_online_cpus()
+#endif
+
#define kiobuf bio
#include <linux/proc_fs.h>
static inline int mapping_mapped(struct address_space *mapping)
{
- return mapping->i_mmap_shared ? 1 : 0;
+ if (mapping->i_mmap_shared)
+ return 1;
+ if (mapping->i_mmap)
+ return 1;
+ return 0;
}
/* to find proc_dir_entry from inode. 2.6 has native one -bzzz */
#define ll_vfs_symlink(dir, dentry, path, mode) vfs_symlink(dir, dentry, path, mode)
#endif
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+#endif
+
#ifdef HAVE_I_ALLOC_SEM
#define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0)
#define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0)
OP; \
}} while(0)
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \
+ CDEBUG(mask, "page %p map %p ind %lu priv %0lx: " fmt, \
+ page, page->mapping, page->index, page->private, ## arg)
+
/* lib/debug.c */
int dump_lniobuf(struct niobuf_local *lnb);
int dump_rniobuf(struct niobuf_remote *rnb);
struct list_head ns_unused_list; /* all root resources in ns */
int ns_nr_unused;
unsigned int ns_max_unused;
+ unsigned long ns_next_dump; /* next dump time */
spinlock_t ns_counter_lock;
__u64 ns_locks;
/* Server-side-only members */
struct list_head l_pending_chain; /* callbacks pending */
unsigned long l_callback_timeout;
+
+ __u32 l_pid; /* pid which created this lock */
};
#define LDLM_PLAIN 10
CDEBUG(level, "### " format \
" ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
"res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " \
- LPX64" expref: %d\n" , ## a, lock, \
+ LPX64" expref: %d pid: %u\n" , ## a, lock, \
lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
lock->l_readers, lock->l_writers, \
ldlm_lockname[lock->l_granted_mode], \
ldlm_lockname[lock->l_req_mode], \
lock->l_flags, lock->l_remote_handle.cookie, \
lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99); \
+ atomic_read(&lock->l_export->exp_refcount) : -99, \
+ lock->l_pid); \
break; \
} \
if (lock->l_resource->lr_type == LDLM_EXTENT) { \
CDEBUG(level, "### " format \
" ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
"res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s ["LPU64 \
- "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \
- LPX64" expref: %d\n" , ## a, \
+ "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \
+ LPX64" expref: %d pid: %u\n" , ## a, \
lock->l_resource->lr_namespace->ns_name, lock, \
lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
lock->l_readers, lock->l_writers, \
lock->l_req_extent.start, lock->l_req_extent.end, \
lock->l_flags, lock->l_remote_handle.cookie, \
lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99); \
+ atomic_read(&lock->l_export->exp_refcount) : -99, \
+ lock->l_pid); \
break; \
} \
if (lock->l_resource->lr_type == LDLM_FLOCK) { \
CDEBUG(level, "### " format \
" ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
"res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s pid: " \
- LPU64" ["LPU64"->"LPU64"] flags: %x remote: "LPX64 \
- " expref: %d\n" , ## a, \
+ LPU64" " "["LPU64"->"LPU64"] flags: %x remote: "LPX64 \
+ " expref: %d pid: %u\n" , ## a, \
lock->l_resource->lr_namespace->ns_name, lock, \
lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \
lock->l_readers, lock->l_writers, \
lock->l_policy_data.l_flock.end, \
lock->l_flags, lock->l_remote_handle.cookie, \
lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99); \
+ atomic_read(&lock->l_export->exp_refcount) : -99, \
+ lock->l_pid); \
break; \
} \
if (lock->l_resource->lr_type == LDLM_IBITS) { \
CDEBUG(level, "### " format \
" ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
"res: "LPU64"/"LPU64"/"LPU64" bits "LPX64" rrc: %d " \
- "type: %s flags: %x remote: "LPX64" expref: %d\n" , ## a,\
+ "type: %s flags: %x remote: "LPX64" expref: %d " \
+ "pid %u\n" , ## a, \
lock->l_resource->lr_namespace->ns_name, \
lock, lock->l_handle.h_cookie, \
atomic_read (&lock->l_refc), \
ldlm_typename[lock->l_resource->lr_type], \
lock->l_flags, lock->l_remote_handle.cookie, \
lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99); \
+ atomic_read(&lock->l_export->exp_refcount) : -99, \
+ lock->l_pid); \
break; \
} \
{ \
CDEBUG(level, "### " format \
" ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \
- "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s "\
- "flags: %x remote: "LPX64" expref: %d\n" , ## a, \
+ "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s " \
+ "flags: %x remote: "LPX64" expref: %d " \
+ "pid: %u\n" , ## a, \
lock->l_resource->lr_namespace->ns_name, \
lock, lock->l_handle.h_cookie, \
atomic_read (&lock->l_refc), \
ldlm_typename[lock->l_resource->lr_type], \
lock->l_flags, lock->l_remote_handle.cookie, \
lock->l_export ? \
- atomic_read(&lock->l_export->exp_refcount) : -99); \
+ atomic_read(&lock->l_export->exp_refcount) : -99, \
+ lock->l_pid); \
} \
} while (0)
struct ldlm_lock *lock);
void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(void);
-void ldlm_namespace_dump(struct ldlm_namespace *);
-void ldlm_resource_dump(struct ldlm_resource *);
+void ldlm_dump_all_namespaces(int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
struct ldlm_res_id);
struct filter_client_data *fed_fcd;
loff_t fed_lr_off;
int fed_lr_idx;
- unsigned long fed_dirty; /* in bytes */
- unsigned long fed_grant; /* in bytes */
- unsigned long fed_pending; /* bytes just being written */
+ long fed_dirty; /* in bytes */
+ long fed_grant; /* in bytes */
+ long fed_pending; /* bytes just being written */
};
struct obd_export {
unsigned long exp_flags;
int exp_failed:1,
exp_replay_needed:1,
- exp_libclient:1; /* liblustre client? */
+ exp_libclient:1, /* liblustre client? */
+ exp_sync:1;
union {
struct mds_export_data eu_mds_data;
struct filter_export_data eu_filter_data;
int fso_bufcnt;
};
+/* lustre EA type (MEA, LOV, etc.) */
+enum ea_type {
+ EA_LOV = (1 << 0),
+ EA_MEA = (1 << 1),
+ EA_SID = (1 << 2),
+ EA_MID = (1 << 3)
+};
+
struct fsfilt_operations {
struct list_head fs_list;
struct module *fs_owner;
char *fs_type;
+
void *(* fs_start)(struct inode *inode, int op, void *desc_private,
int logs);
void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso,
struct iattr *iattr, int do_trunc);
int (* fs_iocontrol)(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg);
-
- /* two methods for getting lov EA and setting it back to inode xattr. */
- int (* fs_set_md)(struct inode *inode, void *handle, void *md,
- int size);
- int (* fs_get_md)(struct inode *inode, void *md, int size);
-
- /* two methods for getting MID (master id) EA and setting it back to
- * inode xattr. */
- int (* fs_set_mid)(struct inode *inode, void *handle, void *fid,
- int size);
- int (* fs_get_mid)(struct inode *inode, void *fid, int size);
- /* two methods for getting self id EA and setting it back to inode
- * xattr. */
- int (* fs_set_sid)(struct inode *inode, void *handle, void *sid,
- int size);
- int (* fs_get_sid)(struct inode *inode, void *sid, int size);
+ /* two methods for setting getting diff. kind of EAs from inode. */
+ int (* fs_set_md)(struct inode *inode, void *handle, void *md,
+ int size, enum ea_type type);
+ int (* fs_get_md)(struct inode *inode, void *md, int size,
+ enum ea_type type);
int (* fs_send_bio)(int rw, struct inode *inode, void *bio);
ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count,
loff_t *offset);
- int (* fs_add_journal_cb)(struct obd_device *obd,
+ int (* fs_add_journal_cb)(struct obd_device *obd,
struct super_block *sb,
- __u64 last_rcvd, void *handle,
- fsfilt_cb_t cb_func,
- void *cb_data);
+ __u64 last_rcvd, void *handle,
+ fsfilt_cb_t cb_func, void *cb_data);
int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs);
int (* fs_sync)(struct super_block *sb);
int (* fs_map_inode_pages)(struct inode *inode, struct page **page,
#define LMV_EA 1
#define LOV_EA 0
+#define fsfilt_check_slow(start, timeout, msg) \
+do { \
+ if (time_before(jiffies, start + 15 * HZ)) \
+ break; \
+ else if (time_before(jiffies, start + timeout / 2 * HZ)) \
+ CWARN("slow %s %lus\n", msg, (jiffies - start) / HZ); \
+ else \
+ CERROR("slow %s %lus\n", msg, (jiffies - start) / HZ); \
+} while (0)
+
static inline void *
fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode,
int op, struct obd_trans_info *oti, int logs)
LBUG();
}
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, 60, "journal start");
return handle;
}
int rc = ops->fs_commit(sb, inode, handle, force_sync);
CDEBUG(D_INFO, "committing handle %p\n", handle);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, 60, "journal start");
return rc;
}
LBUG();
}
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return handle;
}
int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle);
CDEBUG(D_INFO, "committing handle %p (async)\n", *wait_handle);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return rc;
}
unsigned long now = jiffies;
int rc = obd->obd_fsops->fs_commit_wait(inode, handle);
CDEBUG(D_INFO, "waiting for completion %p\n", handle);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long journal start time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "journal start");
return rc;
}
unsigned long now = jiffies;
int rc;
rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long setattr time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "setattr");
return rc;
}
static inline int
fsfilt_set_md(struct obd_device *obd, struct inode *inode,
- void *handle, void *md, int size)
+ void *handle, void *md, int size, enum ea_type type)
{
- return obd->obd_fsops->fs_set_md(inode, handle, md, size);
+ if (!obd->obd_fsops->fs_set_md)
+ return -ENOSYS;
+
+ return obd->obd_fsops->fs_set_md(inode, handle, md,
+ size, type);
}
static inline int
fsfilt_get_md(struct obd_device *obd, struct inode *inode,
- void *md, int size)
-{
- return obd->obd_fsops->fs_get_md(inode, md, size);
-}
-
-static inline int
-fsfilt_set_mid(struct obd_device *obd, struct inode *inode,
- void *handle, void *mid, int size)
+ void *md, int size, enum ea_type type)
{
- return obd->obd_fsops->fs_set_mid(inode, handle, mid, size);
-}
-
-static inline int
-fsfilt_get_mid(struct obd_device *obd, struct inode *inode,
- void *mid, int size)
-{
- return obd->obd_fsops->fs_get_mid(inode, mid, size);
-}
-
-static inline int
-fsfilt_set_sid(struct obd_device *obd, struct inode *inode,
- void *handle, void *sid, int size)
-{
- return obd->obd_fsops->fs_set_sid(inode, handle, sid, size);
-}
-
-static inline int
-fsfilt_get_sid(struct obd_device *obd, struct inode *inode,
- void *sid, int size)
-{
- return obd->obd_fsops->fs_get_sid(inode, sid, size);
+ if (!obd->obd_fsops->fs_get_md)
+ return -ENOSYS;
+
+ return obd->obd_fsops->fs_get_md(inode, md, size,
+ type);
}
static inline int fsfilt_send_bio(int rw, struct obd_device *obd,
rc = obd->obd_fsops->fs_putpage(inode, page);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long putpage time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "putpage");
return rc;
}
page = obd->obd_fsops->fs_getpage(inode, index);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("long getpage time %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "getpage");
return page;
}
#ifndef _LUSTRE_IDL_H_
#define _LUSTRE_IDL_H_
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
+#else
+#include "types.h"
+#endif
+
+
#ifdef __KERNEL__
# include <linux/ioctl.h>
-# include <asm/types.h>
# include <linux/types.h>
# include <linux/list.h>
# include <linux/string.h> /* for strncpy, below */
#ifdef __CYGWIN__
# include <sys/types.h>
#else
-# include <asm/types.h>
# include <stdint.h>
#endif
# include <libcfs/list.h>
#define MSG_CONNECT_RECOVERING 0x1
#define MSG_CONNECT_RECONNECT 0x2
#define MSG_CONNECT_REPLAYABLE 0x4
-//#define MSG_CONNECT_PEER 0x8
+#define MSG_CONNECT_PEER 0x8
#define MSG_CONNECT_LIBCLIENT 0x10
#define MSG_CONNECT_INITIAL 0x20
+#define MSG_CONNECT_ASYNC 0x40
/*
* OST requests: OBDO & OBD request records
#define OBD_MD_FLDIREA (0x0000000020000000LL) /* dir's extended attribute data */
#define OBD_MD_REINT (0x0000000040000000LL) /* reintegrate oa */
#define OBD_MD_FID (0x0000000080000000LL) /* lustre_id data */
-#define OBD_MD_FLEALIST (0x0000000100000000LL) /* list extended attributes */
-#define OBD_MD_FLACL_ACCESS (0x0000000200000000LL) /*access acl*/
+#define OBD_MD_MEA (0x0000000100000000LL) /* shows we are interested in MEA */
+#define OBD_MD_FLEALIST (0x0000000200000000LL) /* list extended attributes */
+#define OBD_MD_FLACL_ACCESS (0x0000000400000000LL) /*access acl*/
#define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME | \
OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | \
#define OBD_OBJECT_EOF 0xffffffffffffffffULL
+#define OST_MIN_PRECREATE 32
+#define OST_MAX_PRECREATE 20000
+
struct obd_ioobj {
obd_id ioo_id;
obd_gr ioo_gr;
/* INODE LOCK PARTS */
#define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */
#define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */
+#define MDS_INODELOCK_OPEN 0x000004 /* for opened files */
+
+/* do not forget to increase MDS_INODELOCK_MAXSHIFT when adding new bits */
+#define MDS_INODELOCK_MAXSHIFT 2
+
+/* this FULL lock is useful to take on unlink sort of operations */
+#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1)
/* lustre store cookie */
struct lustre_stc {
static inline char * ptlrpc_import_state_name(enum lustre_imp_state state)
{
-
static char* import_state_names[] = {
- "<UNKNOWN>", "CLOSED", "NEW", "DISCONN",
- "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
+ "<UNKNOWN>", "CLOSED", "NEW", "DISCONN",
+ "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT",
"RECOVER", "FULL", "EVICTED",
};
int imp_invalid:1, imp_replayable:1,
imp_dlm_fake:1, imp_server_timeout:1,
imp_initial_recov:1, imp_force_verify:1,
- imp_pingable:1, imp_resend_replay:1;
+ imp_pingable:1, imp_resend_replay:1,
+ imp_deactive:1;
__u32 imp_connect_op;
__u32 imp_connect_flags;
};
#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, long)
#define OBD_IOC_DUMP_LOG _IOWR('f', 185, long)
#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, long)
+#define OBD_IOC_START _IOWR('f', 187, long)
#define OBD_IOC_CATLOGLIST _IOWR('f', 190, long)
#define OBD_IOC_LLOG_INFO _IOWR('f', 191, long)
int __timed_out = 0; \
unsigned long irqflags; \
sigset_t blocked; \
+ signed long timeout_remaining; \
\
init_waitqueue_entry(&__wait, current); \
if (excl) \
else \
blocked = l_w_e_set_sigs(0); \
\
+ timeout_remaining = info->lwi_timeout; \
+ \
for (;;) { \
set_current_state(TASK_INTERRUPTIBLE); \
if (condition) \
break; \
- if (signal_pending(current)) { \
- if (info->lwi_on_signal) \
- info->lwi_on_signal(info->lwi_cb_data); \
- ret = -EINTR; \
- break; \
- } \
if (info->lwi_timeout && !__timed_out) { \
- if (schedule_timeout(info->lwi_timeout) == 0) { \
+ timeout_remaining = schedule_timeout(timeout_remaining); \
+ if (timeout_remaining == 0) { \
__timed_out = 1; \
if (!info->lwi_on_timeout || \
info->lwi_on_timeout(info->lwi_cb_data)) { \
} else { \
schedule(); \
} \
+ if (condition) \
+ break; \
+ if (signal_pending(current)) { \
+ if (__timed_out) { \
+ break; \
+ } else { \
+ /* We have to do this here because some signals */ \
+ /* are not blockable - ie from strace(1). */ \
+ /* In these cases we want to schedule_timeout() */ \
+ /* again, because we don't want that to return */ \
+ /* -EINTR when the RPC actually succeeded. */ \
+ /* the RECALC_SIGPENDING below will deliver the */ \
+ /* signal properly. */ \
+ SIGNAL_MASK_LOCK(current, irqflags); \
+ CLEAR_SIGPENDING; \
+ SIGNAL_MASK_UNLOCK(current, irqflags); \
+ } \
+ } \
} \
\
SIGNAL_MASK_LOCK(current, irqflags); \
RECALC_SIGPENDING; \
SIGNAL_MASK_UNLOCK(current, irqflags); \
\
+ if (__timed_out && signal_pending(current)) { \
+ if (info->lwi_on_signal) \
+ info->lwi_on_signal(info->lwi_cb_data); \
+ ret = -EINTR; \
+ } \
+ \
current->state = TASK_RUNNING; \
remove_wait_queue(&wq, &__wait); \
} while(0)
#else /* !__KERNEL__ */
#define __l_wait_event(wq, condition, info, ret, excl) \
do { \
- int timeout = info->lwi_timeout, elapse; \
+ long timeout = info->lwi_timeout, elapse, last = 0; \
int __timed_out = 0; \
- long last; \
\
- last = time(NULL); \
+ if (info->lwi_timeout == 0) \
+ timeout = 1000000000; \
+ else \
+ last = time(NULL); \
+ \
for (;;) { \
if (condition) \
break; \
if (liblustre_wait_event(timeout)) { \
- if (timeout == 0) \
+ if (timeout == 0 || info->lwi_timeout == 0) \
continue; \
- elapse = (int) (time(NULL) - last); \
+ elapse = time(NULL) - last; \
if (elapse) { \
last += elapse; \
timeout -= elapse; \
struct lustre_id lli_id; /* full lustre_id */
char *lli_symlink_name;
struct semaphore lli_open_sem;
+ struct semaphore lli_size_sem;
__u64 lli_maxbytes;
__u64 lli_io_epoch;
unsigned long lli_flags;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
struct inode lli_vfs_inode;
#endif
+ struct semaphore lli_och_sem; /* Protects access to och pointers
+ and their usage counters */
+ /* We need all three because every inode may be opened in different
+ modes */
+ struct obd_client_handle *lli_mds_read_och;
+ __u64 lli_open_fd_read_count;
+ struct obd_client_handle *lli_mds_write_och;
+ __u64 lli_open_fd_write_count;
+ struct obd_client_handle *lli_mds_exec_och;
+ __u64 lli_open_fd_exec_count;
struct posix_acl *lli_acl_access;
};
#include <linux/lustre_idl.h>
#endif /* __KERNEL__ */
+#define LLAP_FROM_COOKIE(c) \
+ (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC), \
+ (struct ll_async_page *)(c))
+
#include <lustre/lustre_user.h>
#endif
int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res,
struct llog_logid *logid);
+int class_config_dump_handler(struct llog_handle * handle,
+ struct llog_rec_hdr *rec, void *data);
int llog_cat_put(struct llog_handle *cathandle);
int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
struct llog_cookie *reccookie, void *buf,
size = offset + sizeof(struct rw_semaphore *) * lcl->lcl_count;
OBD_FREE(lcl, size);
}
-
#endif
__u64 mcd_last_xid; /* xid for the last transaction */
__u32 mcd_last_result; /* result from last RPC */
__u32 mcd_last_data; /* per-op data (disposition for open &c.) */
- __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 64];
+ /* for MDS_CLOSE requests */
+ __u64 mcd_last_close_transno; /* last completed transaction ID */
+ __u64 mcd_last_close_xid; /* xid for the last transaction */
+ __u32 mcd_last_close_result; /* result from last RPC */
+ __u32 mcd_last_close_data; /* per-op data (disposition for open &c.) */
+ __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 88];
};
/* simple uid/gid mapping hash table */
#define IOC_REQUEST_CLOSE _IOWR('f', 35, long)
#define IOC_REQUEST_MAX_NR 35
-#define MDS_CHECK_RESENT(req, reconstruct) \
-{ \
- if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \
- struct mds_client_data *mcd = \
- req->rq_export->exp_mds_data.med_mcd; \
- if (mcd->mcd_last_xid == req->rq_xid) { \
- reconstruct; \
- RETURN(req->rq_repmsg->status); \
- } \
- DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", \
- mcd->mcd_last_xid); \
- } \
+#define MDS_CHECK_RESENT(req, reconstruct) \
+{ \
+ if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \
+ struct mds_client_data *mcd = \
+ req->rq_export->exp_mds_data.med_mcd; \
+ \
+ if (le64_to_cpu(mcd->mcd_last_xid) == req->rq_xid) { \
+ reconstruct; \
+ RETURN(le32_to_cpu(mcd->mcd_last_result)); \
+ } \
+ if (le64_to_cpu(mcd->mcd_last_close_xid) == req->rq_xid) { \
+ reconstruct; \
+ RETURN(le32_to_cpu(mcd->mcd_last_close_result));\
+ } \
+ DEBUG_REQ(D_HA, req, "no reply for RESENT req" \
+ "(have "LPD64", and "LPD64")", \
+ mcd->mcd_last_xid, mcd->mcd_last_close_xid); \
+ } \
}
#endif
#define MDT_MAX_THREADS 32UL
#define MDT_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \
MDT_MAX_THREADS), 2UL)
-#define MDS_NBUFS (64 * smp_num_cpus)
+#define MDS_NBUFS (64 * smp_num_cpus)
#define MDS_BUFSIZE (8 * 1024)
/* Assume file name length = FNAME_MAX = 256 (true for extN).
* path name length = PATH_MAX = 4096
#define OST_MAX_THREADS 36UL
#define OST_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \
OST_MAX_THREADS), 2UL)
-#define OST_NBUFS (64 * smp_num_cpus)
+#define OST_NBUFS (64 * smp_num_cpus)
#define OST_BUFSIZE (8 * 1024)
/* OST_MAXREQSIZE ~= 1640 bytes =
* lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote
#define PTLBD_MAXREQSIZE 1024
struct ptlrpc_peer {
-/* bugfix #4615
- */
- ptl_process_id_t peer_id;
+ ptl_process_id_t peer_id;
struct ptlrpc_ni *peer_ni;
};
wait_queue_head_t *set_wakeup_ptr;
struct list_head set_requests;
set_interpreter_func set_interpret; /* completion callback */
- union ptlrpc_async_args set_args; /* completion context */
+ void *set_arg; /* completion context */
/* locked so that any old caller can communicate requests to
* the set holder who can then fold them into the lock-free set */
spinlock_t set_new_req_lock;
struct timeval rq_arrival_time; /* request arrival time */
struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */
struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer */
+#if CRAY_PORTALS
+ ptl_uid_t rq_uid; /* peer uid, used in MDS only */
+#endif
/* client-only incoming reply */
ptl_handle_md_t rq_reply_md_h;
struct ptlrpc_cb_id rq_reply_cbid;
struct ptlrpc_peer rq_peer; /* XXX see service.c can this be factored away? */
+ char rq_peerstr[PTL_NALFMT_SIZE];
struct obd_export *rq_export;
struct obd_import *rq_import;
int srv_n_difficult_replies; /* # 'difficult' replies */
int srv_n_active_reqs; /* # reqs being served */
int srv_rqbd_timeout; /* timeout before re-posting reqs */
-
+ int srv_watchdog_timeout; /* soft watchdog timeout, in ms */
+
__u32 srv_req_portal;
__u32 srv_rep_portal;
struct proc_dir_entry *srv_procroot;
struct lprocfs_stats *srv_stats;
-
+
struct ptlrpc_srv_ni srv_interfaces[0];
};
return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str));
}
-/* For bug #4615 */
static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str)
{
LASSERT(p->peer_ni != NULL);
void ptlrpc_commit_replies (struct obd_device *obd);
void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs);
struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
- int req_portal, int rep_portal,
+ int req_portal, int rep_portal,
+ int watchdog_timeout, /* in ms */
svc_handler_t, char *name,
struct proc_dir_entry *proc_entry);
void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
#ifndef __LUSTRE_SMFS_H
#define __LUSTRE_SMFS_H
+#include <linux/lustre_fsfilt.h>
#include <linux/namei.h>
+
struct snap_inode_info {
int sn_flags; /*the flags indicated inode type */
int sn_gen; /*the inode generation*/
unsigned long from, unsigned long num);
extern int smfs_rec_setattr(struct inode *dir, struct dentry *dentry,
struct iattr *attr);
-extern int smfs_rec_precreate(struct dentry *dentry, int *num, struct obdo *oa);
-extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size);
+extern int smfs_rec_precreate(struct dentry *dentry, int *num,
+ struct obdo *oa);
+extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size,
+ enum ea_type type);
extern int smfs_rec_unpack(struct smfs_proc_args *args, char *record,
char **pbuf, int *opcode);
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
- */
+ *
+ * Copyright (C) 2001, 2002, 2003, 2004 Cluster File Systems, Inc.
+ *
+ * Author: <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * lustre VFS/process permission interface
+ */
+
#ifndef __LVFS_H__
#define __LVFS_H__
#include <linux/namei.h>
#include <linux/lustre_compat25.h>
#include <linux/lvfs_linux.h>
-#endif
+#endif
#ifdef LIBLUSTRE
#include <lvfs_user_fs.h>
#endif
};
+struct lvfs_obd_ctxt {
+ struct vfsmount *loc_mnt;
+ atomic_t loc_refcount;
+ char *loc_name;
+ struct list_head loc_list;
+};
+
#ifdef OBD_CTXT_DEBUG
#define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC
#else
int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off);
int lustre_fsync(struct file *file);
long l_readdir(struct file * file, struct list_head *dentry_list);
-
+int lvfs_mount_fs(char *name, char *fstype, char *options, int flags,
+ struct lvfs_obd_ctxt **lvfs_ctxt);
+void lvfs_umount_fs(struct lvfs_obd_ctxt *lvfs_ctxt);
static inline void l_dput(struct dentry *de)
{
if (!de || IS_ERR(de))
#else
up(&dparent->d_inode->i_sem);
#endif
+
+ if (IS_ERR(dchild) || dchild->d_inode == NULL)
+ return dchild;
+
+ if (is_bad_inode(dchild->d_inode)) {
+ CERROR("bad inode returned %lu/%u\n",
+ dchild->d_inode->i_ino, dchild->d_inode->i_generation);
+ dput(dchild);
+ dchild = ERR_PTR(-ENOENT);
+ }
+
return dchild;
}
const char *fo_fstype;
struct super_block *fo_sb;
struct vfsmount *fo_vfsmnt;
+ struct lvfs_obd_ctxt *fo_lvfs_ctxt;
int fo_group_count;
struct dentry *fo_dentry_O; /* the "O"bject directory dentry */
struct obd_service_time cl_enter_stime;
struct mdc_rpc_lock *cl_rpc_lock;
- struct mdc_rpc_lock *cl_setattr_lock;
+ struct mdc_rpc_lock *cl_setattr_lock;
+ struct mdc_rpc_lock *cl_close_lock;
struct osc_creator cl_oscc;
+ int cl_async:1;
};
/* Like a client, with some hangers-on. Keep mc_client_obd first so that we
struct super_block *mds_sb;
struct vfsmount *mds_vfsmnt;
struct dentry *mds_id_de;
+ struct lvfs_obd_ctxt *mds_lvfs_ctxt;
int mds_max_mdsize;
int mds_max_cookiesize;
struct file *mds_rcvd_filp;
gid_t mds_squash_gid;
ptl_nid_t mds_nosquash_nid;
atomic_t mds_real_clients;
+ atomic_t mds_open_count;
struct dentry *mds_id_dir;
int mds_obd_type;
struct dentry *mds_unnamed_dir; /* for mdt_obd_create only */
int master_group;
struct cmobd_write_service *write_srv;
};
-
+
+struct conf_obd {
+ struct super_block *cfobd_sb;
+ struct vfsmount *cfobd_vfsmnt;
+ struct dentry *cfobd_logs_dir;
+ struct dentry *cfobd_objects_dir;
+ struct dentry *cfobd_pending_dir;
+ struct llog_handle *cfobd_cfg_llh;
+ struct lvfs_obd_ctxt *cfobd_lvfs_ctxt;
+};
+
struct lov_tgt_desc {
struct obd_uuid uuid;
__u32 ltd_gen;
struct lov_desc desc;
int bufsize;
int refcount;
- int lo_catalog_loaded:1;
+ int lo_catalog_loaded:1, async:1;
struct semaphore lov_llog_sem;
unsigned long lov_connect_flags;
struct lov_tgt_desc *tgts;
struct mds_obd mds;
struct client_obd cli;
struct ost_obd ost;
- struct echo_client_obd echo_client;
+ struct echo_client_obd echocli;
struct echo_obd echo;
struct recovd_obd recovd;
struct lov_obd lov;
struct mgmtcli_obd mgmtcli;
struct lmv_obd lmv;
struct cm_obd cm;
+ struct conf_obd conf;
} u;
/* fields used by LProcFS */
int (*o_teardown_async_page)(struct obd_export *exp,
struct lov_stripe_md *lsm,
struct lov_oinfo *loi, void *cookie);
+ int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
+ obd_off size, int shrink);
int (*o_punch)(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *ea, obd_size start,
obd_size end, struct obd_trans_info *oti);
struct obd_uuid cfg_uuid;
ptl_nid_t cfg_local_nid;
};
-int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
- struct config_llog_instance *cfg);
int class_config_process_llog(struct llog_ctxt *ctxt, char *name,
struct config_llog_instance *cfg);
int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_valid compare);
void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
-static inline int obd_check_conn(struct lustre_handle *conn)
-{
- struct obd_device *obd;
- if (!conn) {
- CERROR("NULL conn\n");
- RETURN(-ENOTCONN);
- }
-
- obd = class_conn2obd(conn);
- if (!obd) {
- CERROR("NULL obd\n");
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_attached) {
- CERROR("obd %d not attached\n", obd->obd_minor);
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_set_up) {
- CERROR("obd %d not setup\n", obd->obd_minor);
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_type) {
- CERROR("obd %d not typed\n", obd->obd_minor);
- RETURN(-ENODEV);
- }
-
- if (!obd->obd_type->typ_ops) {
- CERROR("obd_check_conn: obd %d no operations\n",
- obd->obd_minor);
- RETURN(-EOPNOTSUPP);
- }
- return 0;
-}
-
-
#define OBT(dev) (dev)->obd_type
#define OBP(dev, op) (dev)->obd_type->typ_ops->o_ ## op
#define MDP(dev, op) (dev)->obd_type->typ_md_ops->m_ ## op
#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
-/* Ensure obd_setup: used for disconnect which might be called while
- an obd is stopping. */
-#define OBD_CHECK_SETUP(conn, exp) \
-do { \
- if (!(conn)) { \
- CERROR("NULL connection\n"); \
- RETURN(-EINVAL); \
- } \
- \
- exp = class_conn2export(conn); \
- if (!(exp)) { \
- CERROR("No export for conn "LPX64"\n", (conn)->cookie); \
- RETURN(-EINVAL); \
- } \
- \
- if (!(exp)->exp_obd->obd_set_up) { \
- CERROR("Device %d not setup\n", \
- (exp)->exp_obd->obd_minor); \
- class_export_put(exp); \
- RETURN(-EINVAL); \
- } \
-} while (0)
-
-/* Ensure obd_setup and !obd_stopping. */
-#define OBD_CHECK_ACTIVE(conn, exp) \
-do { \
- if (!(conn)) { \
- CERROR("NULL connection\n"); \
- RETURN(-EINVAL); \
- } \
- \
- exp = class_conn2export(conn); \
- if (!(exp)) { \
- CERROR("No export for conn "LPX64"\n", (conn)->cookie); \
- RETURN(-EINVAL); \
- } \
- \
- if (!(exp)->exp_obd->obd_set_up || (exp)->exp_obd->obd_stopping) { \
- CERROR("Device %d not setup\n", \
- (exp)->exp_obd->obd_minor); \
- class_export_put(exp); \
- RETURN(-EINVAL); \
- } \
-} while (0)
-
/* Ensure obd_setup: used for cleanup which must be called
while obd is stopping */
-#define OBD_CHECK_DEV_STOPPING(obd) \
+#define OBD_CHECK_DEV(obd) \
do { \
if (!(obd)) { \
CERROR("NULL device\n"); \
RETURN(-ENODEV); \
} \
- \
+} while (0)
+
+#define OBD_CHECK_DEV_STOPPING(obd) \
+do { \
+ OBD_CHECK_DEV(obd); \
if (!(obd)->obd_set_up) { \
CERROR("Device %d not setup\n", \
(obd)->obd_minor); \
/* ensure obd_setup and !obd_stopping */
#define OBD_CHECK_DEV_ACTIVE(obd) \
do { \
- if (!(obd)) { \
- CERROR("NULL device\n"); \
- RETURN(-ENODEV); \
- } \
- \
+ OBD_CHECK_DEV(obd); \
if (!(obd)->obd_set_up || (obd)->obd_stopping) { \
CERROR("Device %d not setup\n", \
(obd)->obd_minor); \
coffset = (unsigned int)(obd)->obd_cntr_base + \
OBD_COUNTER_OFFSET(op); \
LASSERT(coffset < (obd)->obd_stats->ls_num); \
- lprocfs_counter_incr((obd)->obd_stats, coffset); \
+ lprocfs_counter_incr((obd)->obd_stats, coffset);\
}
#define MD_COUNTER_OFFSET(op) \
do { \
if (!OBT(obd) || !MDP((obd), op)) {\
if (err) \
- CERROR("obd_md" #op ": dev %d no operation\n", \
- obd->obd_minor); \
+ CERROR("md_" #op ": dev %s/%d no operation\n", \
+ obd->obd_name, obd->obd_minor); \
RETURN(err); \
} \
} while (0)
RETURN(-EOPNOTSUPP); \
} \
if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
- CERROR("obd_" #op ": dev %d no operation\n", \
- (exp)->exp_obd->obd_minor); \
+ CERROR("obd_" #op ": dev %s/%d no operation\n", \
+ (exp)->exp_obd->obd_name, \
+ (exp)->exp_obd->obd_minor); \
RETURN(-EOPNOTSUPP); \
} \
} while (0)
do { \
if (!OBT(obd) || !OBP((obd), op)) {\
if (err) \
- CERROR("obd_" #op ": dev %d no operation\n", \
- obd->obd_minor); \
+ CERROR("obd_" #op ": dev %s/%d no operation\n", \
+ obd->obd_name, obd->obd_minor); \
RETURN(err); \
} \
} while (0)
RETURN(-EOPNOTSUPP); \
} \
if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
- CERROR("obd_" #op ": dev %d no operation\n", \
- (exp)->exp_obd->obd_minor); \
+ CERROR("obd_" #op ": dev %s/%d no operation\n", \
+ (exp)->exp_obd->obd_name, \
+ (exp)->exp_obd->obd_minor); \
RETURN(-EOPNOTSUPP); \
} \
} while (0)
RETURN(rc);
}
+static inline int obd_adjust_kms(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ obd_off size, int shrink)
+{
+ int rc;
+ ENTRY;
+
+ OBD_CHECK_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP);
+ OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms);
+
+ rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
+ RETURN(rc);
+}
+
static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
int len, void *karg, void *uarg)
{
struct obd_import *imp,
enum obd_import_event event)
{
+ if (!obd) {
+ CERROR("NULL device\n");
+ EXIT;
+ return;
+ }
if (obd->obd_set_up && OBP(obd, import_event)) {
OBD_COUNTER_INCREMENT(obd, import_event);
OBP(obd, import_event)(obd, imp, event);
static inline int obd_notify(struct obd_device *obd, struct obd_device *watched,
int active, void *data)
{
+ OBD_CHECK_DEV(obd);
if (!obd->obd_set_up) {
CERROR("obd %s not set up\n", obd->obd_name);
return -EINVAL;
struct obd_device *observer)
{
ENTRY;
+ OBD_CHECK_DEV(obd);
if (obd->obd_observer && observer)
RETURN(-EALREADY);
obd->obd_observer = observer;
static inline void obdo_free(struct obdo *oa)
{
- if (!oa)
- return;
OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
}
#define OBD_LOV_DEVICENAME "lov"
-struct lov_brw_async_args {
- struct lov_stripe_md *aa_lsm;
- struct obdo *aa_obdos;
- struct obdo *aa_oa;
- struct brw_page *aa_ioarr;
- obd_count aa_oa_bufs;
-};
-
-struct lov_getattr_async_args {
- struct lov_stripe_md *aa_lsm;
- struct obdo *aa_oa;
- struct obdo *aa_obdos;
- struct lov_obd *aa_lov;
-};
-
static inline int lov_stripe_md_size(int stripes)
{
return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo);
#include <linux/lustre_compat25.h>
/* global variables */
-extern atomic_t obd_memory;
extern int obd_memmax;
+extern atomic_t obd_memory;
+
extern unsigned int obd_fail_loc;
-extern unsigned int obd_dump_on_timeout;
extern unsigned int obd_timeout;
extern unsigned int ldlm_timeout;
extern char obd_lustre_upcall[128];
extern unsigned int obd_sync_filter;
+extern unsigned int obd_dump_on_timeout;
extern wait_queue_head_t obd_race_waitq;
#define OBD_FAIL_MDS 0x100
#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126
#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127
#define OBD_FAIL_MDS_ALLOC_OBDO 0x128
+#define OBD_FAIL_MDS_PAUSE_OPEN 0x129
+#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a
+#define OBD_FAIL_MDS_OPEN_CREATE 0x12b
#define OBD_FAIL_OST 0x200
#define OBD_FAIL_OST_CONNECT_NET 0x201
#define OBD_FAIL_SVCGSS_INIT_REQ 0x780
#define OBD_FAIL_SVCGSS_INIT_REP 0x781
+#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800
+
/* preparation for a more advanced failure testbed (not functional yet) */
#define OBD_FAIL_MASK_SYS 0x0000FF00
#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS)
} \
} while(0)
+#define OBD_FAIL_GOTO(id, label, ret) \
+do { \
+ if (OBD_FAIL_CHECK_ONCE(id)) { \
+ GOTO(label, (ret)); \
+ } \
+} while(0)
+
#define OBD_FAIL_TIMEOUT(id, secs) \
do { \
if (OBD_FAIL_CHECK_ONCE(id)) { \
extern atomic_t portal_kmemory;
-#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \
-do { \
- (ptr) = kmalloc(size, (gfp_mask)); \
- if ((ptr) == NULL) { \
- CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
- (int)(size), __FILE__, __LINE__); \
- CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
- atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
- } else { \
- memset(ptr, 0, size); \
- atomic_add(size, &obd_memory); \
- if (atomic_read(&obd_memory) > obd_memmax) \
- obd_memmax = atomic_read(&obd_memory); \
- CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
- (int)(size), ptr, atomic_read(&obd_memory)); \
- } \
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+#define MEM_LOC_LEN 128
+
+struct mtrack {
+ struct hlist_node m_hash;
+ char m_loc[MEM_LOC_LEN];
+ void *m_ptr;
+ int m_size;
+};
+
+void lvfs_memdbg_insert(struct mtrack *mt);
+void lvfs_memdbg_remove(struct mtrack *mt);
+struct mtrack *lvfs_memdbg_find(void *ptr);
+
+int lvfs_memdbg_check_insert(struct mtrack *mt);
+struct mtrack *lvfs_memdbg_check_remove(void *ptr);
+
+static inline struct mtrack *
+__new_mtrack(void *ptr, int size,
+ char *file, int line)
+{
+ struct mtrack *mt;
+
+ mt = kmalloc(sizeof(*mt), GFP_KERNEL);
+ if (!mt)
+ return NULL;
+
+ snprintf(mt->m_loc, sizeof(mt->m_loc) - 1,
+ "%s:%d", file, line);
+
+ mt->m_size = size;
+ mt->m_ptr = ptr;
+ return mt;
+}
+
+static inline void
+__free_mtrack(struct mtrack *mt)
+{
+ kfree(mt);
+}
+
+static inline int
+__get_mtrack(void *ptr, int size,
+ char *file, int line)
+{
+ struct mtrack *mt;
+
+ mt = __new_mtrack(ptr, size, file, line);
+ if (!mt) {
+ CWARN("can't allocate new memory track\n");
+ return 0;
+ }
+
+ if (!lvfs_memdbg_check_insert(mt))
+ __free_mtrack(mt);
+
+ return 1;
+}
+
+static inline int
+__put_mtrack(void *ptr, int size,
+ char *file, int line)
+{
+ struct mtrack *mt;
+
+ if (!(mt = lvfs_memdbg_check_remove(ptr))) {
+ CWARN("ptr 0x%p is not allocated. Attempt to free "
+ "not allocated memory at %s:%d\n", ptr,
+ file, line);
+ return 0;
+ } else {
+ if (mt->m_size != size) {
+ CWARN("freeing memory chunk of different size "
+ "than allocated (%d != %d) at %s:%d\n",
+ mt->m_size, size, file, line);
+ }
+ __free_mtrack(mt);
+ return 1;
+ }
+}
+
+#define get_mtrack(ptr, size, file, line) \
+ __get_mtrack((ptr), (size), (file), (line))
+
+#define put_mtrack(ptr, size, file, line) \
+ __put_mtrack((ptr), (size), (file), (line))
+
+#else /* !CONFIG_DEBUG_MEMORY */
+
+#define get_mtrack(ptr, size, file, line) \
+ do {} while (0)
+
+#define put_mtrack(ptr, size, file, line) \
+ do {} while (0)
+#endif /* !CONFIG_DEBUG_MEMORY */
+
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \
+do { \
+ (ptr) = kmalloc(size, (gfp_mask)); \
+ if ((ptr) == NULL) { \
+ CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
+ (int)(size), __FILE__, __LINE__); \
+ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+ atomic_read(&obd_memory), atomic_read(&portal_kmemory)); \
+ } else { \
+ memset(ptr, 0, size); \
+ atomic_add(size, &obd_memory); \
+ if (atomic_read(&obd_memory) > obd_memmax) \
+ obd_memmax = atomic_read(&obd_memory); \
+ get_mtrack((ptr), (size), __FILE__, __LINE__); \
+ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
+ (int)(size), (ptr), atomic_read(&obd_memory)); \
+ } \
} while (0)
#ifndef OBD_GFP_MASK
#ifdef __arch_um__
# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
#else
-# define OBD_VMALLOC(ptr, size) \
-do { \
- (ptr) = vmalloc(size); \
- if ((ptr) == NULL) { \
- CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
- (int)(size), __FILE__, __LINE__); \
- CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
- atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
- } else { \
- memset(ptr, 0, size); \
- atomic_add(size, &obd_memory); \
- if (atomic_read(&obd_memory) > obd_memmax) \
- obd_memmax = atomic_read(&obd_memory); \
- CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
- (int)(size), ptr, atomic_read(&obd_memory)); \
- } \
+# define OBD_VMALLOC(ptr, size) \
+do { \
+ (ptr) = vmalloc(size); \
+ if ((ptr) == NULL) { \
+ CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \
+ (int)(size), __FILE__, __LINE__); \
+ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+ atomic_read(&obd_memory), atomic_read(&portal_kmemory)); \
+ } else { \
+ memset(ptr, 0, size); \
+ atomic_add(size, &obd_memory); \
+ if (atomic_read(&obd_memory) > obd_memmax) \
+ obd_memmax = atomic_read(&obd_memory); \
+ get_mtrack((ptr), (size), __FILE__, __LINE__); \
+ CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
+ (int)(size), ptr, atomic_read(&obd_memory)); \
+ } \
} while (0)
#endif
#endif
#if POISON_BULK
-#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE); \
+#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE); \
kunmap(page); } while (0)
#else
#define POISON_PAGE(page, val) do { } while (0)
#endif
-#define OBD_FREE(ptr, size) \
-do { \
- LASSERT(ptr); \
- atomic_sub(size, &obd_memory); \
- CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
- (int)(size), ptr, atomic_read(&obd_memory)); \
- POISON(ptr, 0x5a, size); \
- kfree(ptr); \
- (ptr) = (void *)0xdeadbeef; \
+#define OBD_FREE(ptr, size) \
+do { \
+ LASSERT(ptr); \
+ put_mtrack((ptr), (size), __FILE__, __LINE__); \
+ atomic_sub(size, &obd_memory); \
+ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \
+ (int)(size), ptr, atomic_read(&obd_memory)); \
+ POISON(ptr, 0x5a, size); \
+ kfree(ptr); \
+ (ptr) = (void *)0xdeadbeef; \
} while (0)
#ifdef __arch_um__
# define OBD_VFREE(ptr, size) OBD_FREE(ptr, size)
#else
-# define OBD_VFREE(ptr, size) \
-do { \
- LASSERT(ptr); \
- atomic_sub(size, &obd_memory); \
- CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \
- (int)(size), ptr, atomic_read(&obd_memory)); \
- POISON(ptr, 0x5a, size); \
- vfree(ptr); \
- (ptr) = (void *)0xdeadbeef; \
+# define OBD_VFREE(ptr, size) \
+do { \
+ LASSERT(ptr); \
+ put_mtrack((ptr), (size), __FILE__, __LINE__); \
+ atomic_sub(size, &obd_memory); \
+ CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \
+ (int)(size), ptr, atomic_read(&obd_memory)); \
+ POISON(ptr, 0x5a, size); \
+ vfree(ptr); \
+ (ptr) = (void *)0xdeadbeef; \
} while (0)
#endif
-/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
- * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd
- * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
-#define OBD_SLAB_ALLOC(ptr, slab, type, size) \
-do { \
- LASSERT(!in_interrupt()); \
- (ptr) = kmem_cache_alloc(slab, (type)); \
- if ((ptr) == NULL) { \
- CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
- (int)(size), __FILE__, __LINE__); \
- CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
- atomic_read(&obd_memory), atomic_read(&portal_kmemory));\
- } else { \
- memset(ptr, 0, size); \
- atomic_add(size, &obd_memory); \
- if (atomic_read(&obd_memory) > obd_memmax) \
- obd_memmax = atomic_read(&obd_memory); \
- CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\
- (int)(size), ptr, atomic_read(&obd_memory)); \
- } \
+/*
+ * we memset() the slab object to 0 when allocation succeeds, so DO NOT HAVE A
+ * CTOR THAT DOES ANYTHING. Its work will be cleared here. We'd love to assert
+ * on that, but slab.c keeps kmem_cache_s all to itself.
+ */
+#define OBD_SLAB_ALLOC(ptr, slab, type, size) \
+do { \
+ LASSERT(!in_interrupt()); \
+ (ptr) = kmem_cache_alloc(slab, (type)); \
+ if ((ptr) == NULL) { \
+ CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
+ (int)(size), __FILE__, __LINE__); \
+ CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+ atomic_read(&obd_memory), atomic_read(&portal_kmemory)); \
+ } else { \
+ memset(ptr, 0, size); \
+ atomic_add(size, &obd_memory); \
+ if (atomic_read(&obd_memory) > obd_memmax) \
+ obd_memmax = atomic_read(&obd_memory); \
+ get_mtrack((ptr), (size), __FILE__, __LINE__); \
+ CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n", \
+ (int)(size), ptr, atomic_read(&obd_memory)); \
+ } \
} while (0)
-#define OBD_SLAB_FREE(ptr, slab, size) \
-do { \
- LASSERT(ptr); \
- CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \
- (int)(size), ptr, atomic_read(&obd_memory)); \
- atomic_sub(size, &obd_memory); \
- POISON(ptr, 0x5a, size); \
- kmem_cache_free(slab, ptr); \
- (ptr) = (void *)0xdeadbeef; \
+#define OBD_SLAB_FREE(ptr, slab, size) \
+do { \
+ LASSERT(ptr); \
+ CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \
+ (int)(size), ptr, atomic_read(&obd_memory)); \
+ put_mtrack((ptr), (size), __FILE__, __LINE__); \
+ atomic_sub(size, &obd_memory); \
+ POISON(ptr, 0x5a, size); \
+ kmem_cache_free(slab, ptr); \
+ (ptr) = (void *)0xdeadbeef; \
} while (0)
#endif
*/
#ifndef _LUSTRE_USER_H
#define _LUSTRE_USER_H
+
+#ifdef HAVE_ASM_TYPES_H
#include <asm/types.h>
+#else
+#include "types.h"
+#endif
+
#ifdef __KERNEL__
#include <linux/string.h>
#else
/* for statfs() */
#define LL_SUPER_MAGIC 0x0BD00BD0
-
#define IOC_MDC_TYPE 'i'
#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
#define IOC_MDC_SHOWFID _IOWR(IOC_MDC_TYPE, 23, struct lustre_id *)
#ifndef EXT3_IOC_GETFLAGS
-#define EXT3_IOC_GETFLAGS _IOR('f', 1, long)
-#define EXT3_IOC_SETFLAGS _IOW('f', 2, long)
-#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
-#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
-#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
-#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
+#define EXT3_IOC_GETFLAGS _IOR('f', 1, long)
+#define EXT3_IOC_SETFLAGS _IOW('f', 2, long)
+#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
+#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
+#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
+#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
#endif
#define LL_IOC_GETFLAGS _IOR ('f', 151, long)
--- /dev/null
+#ifndef _LUSTRE_TYPES_H
+#define _LUSTRE_TYPES_H
+
+typedef unsigned short umode_t;
+
+#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
+ !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H))
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif
--- /dev/null
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# General setup
+#
+CONFIG_IA64=y
+# CONFIG_HIGHPTE is not set
+CONFIG_HIGHMEM=y
+CONFIG_HIGHIO=y
+# CONFIG_ISA is not set
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SBUS is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+CONFIG_IA64_GENERIC=y
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_IA64_MCA=y
+CONFIG_PM=y
+CONFIG_KCORE_ELF=y
+CONFIG_FORCE_MAX_ZONEORDER=15
+# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set
+CONFIG_HUGETLB_PAGE_SIZE_256MB=y
+# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set
+# CONFIG_IA64_PAL_IDLE is not set
+CONFIG_SMP=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_EFI_VARS=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+CONFIG_AUDIT=m
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_KERNEL_CONFIG=y
+
+#
+# ACPI Support
+#
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_PCI=y
+CONFIG_PCI_NAMES=y
+CONFIG_HOTPLUG=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+# CONFIG_HOTPLUG_PCI_COMPAQ is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_ACPI=m
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_TUX=m
+CONFIG_TUX_EXTCGI=y
+# CONFIG_TUX_EXTENDED_LOG is not set
+# CONFIG_TUX_DEBUG is not set
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_UNCLEAN=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_MIRROR=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_NAT_LOCAL=y
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_NAT_NEEDED=y
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=16
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+
+#
+# IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+# CONFIG_KHTTPD is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=m
+
+#
+#
+#
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+
+#
+# Appletalk devices
+#
+CONFIG_DEV_APPLETALK=y
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+CONFIG_DECNET_ROUTER=y
+CONFIG_DECNET_ROUTE_FWMARK=y
+CONFIG_BRIDGE=m
+# CONFIG_X25 is not set
+CONFIG_EDP2=m
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+# CONFIG_CISS_MONITOR_THREAD is not set
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_STATS=y
+CONFIG_DISKDUMP=m
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+CONFIG_IEEE1394=m
+
+#
+# Device Drivers
+#
+
+#
+# Texas Instruments PCILynx requires I2C bit-banging
+#
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+CONFIG_IEEE1394_SBP2_PHYS_DMA=y
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_PCI=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_LAN=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_LVM=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+# CONFIG_FUSION_BOOT is not set
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_ISENSE is not set
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+CONFIG_NET_FC=y
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+# CONFIG_IDEDISK_STROKE is not set
+# CONFIG_BLK_DEV_IDECS is not set
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+# CONFIG_BLK_DEV_CMD640 is not set
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+# CONFIG_BLK_DEV_ISAPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_PCI_WIP is not set
+CONFIG_BLK_DEV_ADMA100=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_AMD74XX_OVERRIDE is not set
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+# CONFIG_PDC202XX_BURST is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+# CONFIG_BLK_DEV_RZ1000 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_DMA_NONPCI is not set
+CONFIG_BLK_DEV_PDC202XX=y
+CONFIG_BLK_DEV_IDE_MODES=y
+CONFIG_BLK_DEV_ATARAID=m
+CONFIG_BLK_DEV_ATARAID_PDC=m
+CONFIG_BLK_DEV_ATARAID_HPT=m
+CONFIG_BLK_DEV_ATARAID_SII=m
+
+#
+# SCSI support
+#
+CONFIG_SCSI=m
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_SD_EXTRA_DEVS=256
+CONFIG_SD_IOSTATS=y
+CONFIG_SCSI_DUMP=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_SR_EXTRA_DEVS=4
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_DEBUG_QUEUES=y
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_PROBE_EISA_VL is not set
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y
+CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_OLD_PROC_STATS=y
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+CONFIG_SCSI_MEGARAID=m
+CONFIG_SCSI_MEGARAID2=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_NV=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SX4=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=m
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+CONFIG_SCSI_IPS=m
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_NCR53C8XX=m
+CONFIG_SCSI_SYM53C8XX=m
+CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32
+CONFIG_SCSI_NCR53C8XX_SYNC=40
+# CONFIG_SCSI_NCR53C8XX_PROFILE is not set
+# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set
+# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
+CONFIG_SCSI_QLOGIC_1280=m
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+CONFIG_SCSI_NSP32=m
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_SUNLANCE is not set
+CONFIG_HAPPYMEAL=m
+# CONFIG_SUNBMAC is not set
+# CONFIG_SUNQE is not set
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_ELMC is not set
+# CONFIG_ELMC_II is not set
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+# CONFIG_APRICOT is not set
+CONFIG_B44=m
+# CONFIG_CS89x0 is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+CONFIG_TULIP_MMIO=y
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+CONFIG_DM9102=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+# CONFIG_LNE390 is not set
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_TC35815=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+# CONFIG_MYRI_SBUS is not set
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+# CONFIG_FDDI is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+# CONFIG_TMS380TR is not set
+CONFIG_NET_FC=y
+CONFIG_IPHASE5526=m
+# CONFIG_RCPCI is not set
+CONFIG_SHAPER=m
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+CONFIG_INPUT=m
+CONFIG_INPUT_KEYBDEV=m
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+# CONFIG_ECC is not set
+CONFIG_VT_CONSOLE=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_CONSOLE=y
+CONFIG_SERIAL_HCDP=y
+CONFIG_SERIAL_ACPI=y
+CONFIG_HP_DIVA=y
+CONFIG_SERIAL_EXTENDED=y
+CONFIG_SERIAL_MANY_PORTS=y
+CONFIG_SERIAL_SHARE_IRQ=y
+# CONFIG_SERIAL_DETECT_IRQ is not set
+CONFIG_SERIAL_MULTIPORT=y
+# CONFIG_HUB6 is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=2048
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+CONFIG_MK712_MOUSE=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_GAMEPORT=m
+CONFIG_INPUT_NS558=m
+CONFIG_INPUT_LIGHTNING=m
+CONFIG_INPUT_PCIGAME=m
+CONFIG_INPUT_CS461X=m
+CONFIG_INPUT_EMU10K1=m
+CONFIG_INPUT_SERIO=m
+CONFIG_INPUT_SERPORT=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_ANALOG=m
+CONFIG_INPUT_A3D=m
+CONFIG_INPUT_ADI=m
+CONFIG_INPUT_COBRA=m
+CONFIG_INPUT_GF2K=m
+CONFIG_INPUT_GRIP=m
+CONFIG_INPUT_INTERACT=m
+CONFIG_INPUT_TMDC=m
+CONFIG_INPUT_SIDEWINDER=m
+CONFIG_INPUT_IFORCE_USB=m
+CONFIG_INPUT_IFORCE_232=m
+CONFIG_INPUT_WARRIOR=m
+CONFIG_INPUT_MAGELLAN=m
+CONFIG_INPUT_SPACEORB=m
+CONFIG_INPUT_SPACEBALL=m
+CONFIG_INPUT_STINGER=m
+# CONFIG_INPUT_DB9 is not set
+# CONFIG_INPUT_GAMECON is not set
+# CONFIG_INPUT_TURBOGRAFX is not set
+# CONFIG_QIC02_TAPE is not set
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_HANGCHECK_DELAY=m
+# CONFIG_SCx200_GPIO is not set
+CONFIG_INTEL_RNG=m
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+CONFIG_AGP=m
+CONFIG_AGP_INTEL=y
+# CONFIG_AGP_I810 is not set
+# CONFIG_AGP_VIA is not set
+# CONFIG_AGP_AMD is not set
+CONFIG_AGP_AMD_8151=y
+# CONFIG_AGP_SIS is not set
+# CONFIG_AGP_ALI is not set
+# CONFIG_AGP_SWORKS is not set
+CONFIG_AGP_I460=y
+CONFIG_AGP_HP_ZX1=y
+CONFIG_DRM=y
+# CONFIG_DRM_OLD is not set
+
+#
+# DRM 4.1 drivers
+#
+CONFIG_DRM_NEW=y
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_GAMMA=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+# CONFIG_DRM_I810 is not set
+# CONFIG_DRM_I810_XFREE_41 is not set
+# CONFIG_DRM_I830 is not set
+CONFIG_DRM_MGA=m
+# CONFIG_DRM_SIS is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+# CONFIG_QIFACE_COMPAT is not set
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+CONFIG_HFS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_XATTR_SHARING=y
+CONFIG_EXT3_FS_XATTR_USER=y
+CONFIG_EXT3_FS_XATTR_TRUSTED=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_JBD=m
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_UMSDOS_FS=m
+CONFIG_VFAT_FS=m
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_CRAMFS=m
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DEBUG=y
+# CONFIG_JFS_STATISTICS is not set
+CONFIG_MINIX_FS=m
+CONFIG_VXFS_FS=m
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+CONFIG_ROMFS_FS=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_XATTR_SHARING=y
+CONFIG_EXT2_FS_XATTR_USER=y
+CONFIG_SYSV_FS=m
+CONFIG_UDF_FS=m
+CONFIG_UDF_RW=y
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_CODA_FS=m
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFS_ACL=y
+# CONFIG_ROOT_NFS is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFSD_TCP=y
+CONFIG_SUNRPC=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_FS_MBCACHE=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FS_MBCACHE=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SMB_NLS=y
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Console drivers
+#
+CONFIG_VGA_CONSOLE=y
+
+#
+# Frame-buffer support
+#
+CONFIG_FB=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_CLGEN is not set
+# CONFIG_FB_PM2 is not set
+CONFIG_FB_PM3=m
+# CONFIG_FB_CYBER2000 is not set
+CONFIG_FB_VGA16=m
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G450=y
+CONFIG_FB_MATROX_G100=y
+# CONFIG_FB_MATROX_PROC is not set
+CONFIG_FB_MATROX_MULTIHEAD=y
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_RADEON is not set
+CONFIG_FB_ATY128=m
+# CONFIG_FB_INTEL is not set
+# CONFIG_FB_SIS is not set
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FBCON_ADVANCED is not set
+CONFIG_FBCON_CFB8=y
+CONFIG_FBCON_CFB16=y
+CONFIG_FBCON_CFB24=y
+CONFIG_FBCON_CFB32=y
+CONFIG_FBCON_VGA_PLANES=m
+CONFIG_FBCON_HGA=m
+# CONFIG_FBCON_FONTWIDTH8_ONLY is not set
+# CONFIG_FBCON_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_PCI_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+CONFIG_SOUND_ALI5455=m
+# CONFIG_SOUND_BT878 is not set
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+CONFIG_SOUND_AUDIGY=m
+CONFIG_SOUND_FUSION=m
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+# CONFIG_SOUND_TRACEINIT is not set
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+# CONFIG_SOUND_SGALAXY is not set
+# CONFIG_SOUND_ADLIB is not set
+# CONFIG_SOUND_ACI_MIXER is not set
+# CONFIG_SOUND_CS4232 is not set
+# CONFIG_SOUND_SSCAPE is not set
+# CONFIG_SOUND_GUS is not set
+CONFIG_SOUND_VMIDI=m
+# CONFIG_SOUND_TRIX is not set
+# CONFIG_SOUND_MSS is not set
+# CONFIG_SOUND_MPU401 is not set
+# CONFIG_SOUND_NM256 is not set
+# CONFIG_SOUND_MAD16 is not set
+# CONFIG_SOUND_PAS is not set
+# CONFIG_PAS_JOYSTICK is not set
+# CONFIG_SOUND_PSS is not set
+# CONFIG_SOUND_SB is not set
+# CONFIG_SOUND_AWE32_SYNTH is not set
+# CONFIG_SOUND_KAHLUA is not set
+# CONFIG_SOUND_WAVEFRONT is not set
+# CONFIG_SOUND_MAUI is not set
+# CONFIG_SOUND_YM3812 is not set
+# CONFIG_SOUND_OPL3SA1 is not set
+# CONFIG_SOUND_OPL3SA2 is not set
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+# CONFIG_SOUND_UART6850 is not set
+# CONFIG_SOUND_AEDSP16 is not set
+# CONFIG_SOUND_TVMIXER is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_UHCI=m
+CONFIG_USB_UHCI_ALT=m
+CONFIG_USB_OHCI=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+# CONFIG_USB_EMI26 is not set
+
+#
+# USB Bluetooth can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_USB_HIDDEV=y
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_DC2XX is not set
+CONFIG_USB_MDC800=m
+CONFIG_USB_SCANNER=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_CATC=m
+# CONFIG_USB_AX8817X is not set
+CONFIG_USB_CDCETHER=m
+CONFIG_USB_USBNET=m
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+# CONFIG_USB_SERIAL_DEBUG is not set
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_RIO500=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_BRLVGER=m
+CONFIG_USB_LCD=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_QSORT=y
+
+#
+# Bluetooth support
+#
+CONFIG_BLUEZ=m
+CONFIG_BLUEZ_L2CAP=m
+CONFIG_BLUEZ_SCO=m
+CONFIG_BLUEZ_RFCOMM=m
+CONFIG_BLUEZ_RFCOMM_TTY=y
+CONFIG_BLUEZ_BNEP=m
+CONFIG_BLUEZ_BNEP_MC_FILTER=y
+CONFIG_BLUEZ_BNEP_PROTO_FILTER=y
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BLUEZ_HCIUSB=m
+CONFIG_BLUEZ_USB_SCO=y
+CONFIG_BLUEZ_USB_ZERO_PACKET=y
+CONFIG_BLUEZ_HCIUART=m
+CONFIG_BLUEZ_HCIUART_H4=y
+CONFIG_BLUEZ_HCIUART_BCSP=y
+CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y
+# CONFIG_BLUEZ_HCIDTL1 is not set
+# CONFIG_BLUEZ_HCIBT3C is not set
+# CONFIG_BLUEZ_HCIBLUECARD is not set
+# CONFIG_BLUEZ_HCIBTUART is not set
+CONFIG_BLUEZ_HCIVHCI=m
+
+#
+# Simulated drivers
+#
+# CONFIG_HP_SIMETH is not set
+# CONFIG_HP_SIMSERIAL is not set
+# CONFIG_HP_SIMSCSI is not set
+
+#
+# Additional device driver support
+#
+CONFIG_NET_BROADCOM=m
+CONFIG_CIPE=m
+# CONFIG_CRYPTO_AEP is not set
+CONFIG_CRYPTO_BROADCOM=m
+# CONFIG_MEGARAC is not set
+CONFIG_FC_QLA2100=m
+CONFIG_FC_QLA2200=m
+CONFIG_FC_QLA2300=m
+CONFIG_SCSI_ISCSI=m
+# CONFIG_SCSI_IPR is not set
+CONFIG_SCSI_LPFC=m
+
+#
+# Kernel hacking
+#
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_IA64_PRINT_HAZARDS=y
+# CONFIG_DISABLE_VHPT is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_IA64_EARLY_PRINTK is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_KALLSYMS=y
--- /dev/null
+#
+# Automatically generated make config: don't edit
+#
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+
+#
+# General setup
+#
+CONFIG_IA64=y
+# CONFIG_HIGHPTE is not set
+CONFIG_HIGHMEM=y
+CONFIG_HIGHIO=y
+# CONFIG_ISA is not set
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+# CONFIG_SBUS is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_ITANIUM is not set
+CONFIG_MCKINLEY=y
+CONFIG_IA64_GENERIC=y
+# CONFIG_IA64_DIG is not set
+# CONFIG_IA64_HP_SIM is not set
+# CONFIG_IA64_HP_ZX1 is not set
+# CONFIG_IA64_SGI_SN1 is not set
+# CONFIG_IA64_SGI_SN2 is not set
+# CONFIG_IA64_PAGE_SIZE_4KB is not set
+# CONFIG_IA64_PAGE_SIZE_8KB is not set
+CONFIG_IA64_PAGE_SIZE_16KB=y
+# CONFIG_IA64_PAGE_SIZE_64KB is not set
+CONFIG_IA64_L1_CACHE_SHIFT=7
+CONFIG_IA64_MCA=y
+CONFIG_PM=y
+CONFIG_KCORE_ELF=y
+CONFIG_FORCE_MAX_ZONEORDER=15
+# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set
+CONFIG_HUGETLB_PAGE_SIZE_256MB=y
+# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set
+# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set
+# CONFIG_IA64_PAL_IDLE is not set
+CONFIG_SMP=y
+CONFIG_IA32_SUPPORT=y
+CONFIG_COMPAT=y
+CONFIG_PERFMON=y
+CONFIG_IA64_PALINFO=y
+CONFIG_EFI_VARS=y
+CONFIG_IA64_CYCLONE=y
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+CONFIG_AUDIT=m
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_KERNEL_CONFIG=y
+
+#
+# ACPI Support
+#
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI=y
+CONFIG_ACPI_EFI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_PCI=y
+CONFIG_PCI_NAMES=y
+CONFIG_HOTPLUG=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+# CONFIG_HOTPLUG_PCI_COMPAQ is not set
+# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set
+CONFIG_HOTPLUG_PCI_ACPI=m
+
+#
+# PCMCIA/CardBus support
+#
+# CONFIG_PCMCIA is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=y
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_FILTER=y
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_TUX=m
+CONFIG_TUX_EXTCGI=y
+# CONFIG_TUX_EXTENDED_LOG is not set
+# CONFIG_TUX_DEBUG is not set
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_UNCLEAN=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_MIRROR=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_NAT_LOCAL=y
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_NAT_NEEDED=y
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=16
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_PRIVACY=y
+
+#
+# IPv6: Netfilter Configuration
+#
+# CONFIG_IP6_NF_QUEUE is not set
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=y
+# CONFIG_KHTTPD is not set
+# CONFIG_ATM is not set
+CONFIG_VLAN_8021Q=m
+
+#
+#
+#
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+
+#
+# Appletalk devices
+#
+CONFIG_DEV_APPLETALK=y
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+CONFIG_DECNET_ROUTER=y
+CONFIG_DECNET_ROUTE_FWMARK=y
+CONFIG_BRIDGE=m
+# CONFIG_X25 is not set
+CONFIG_EDP2=m
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+CONFIG_NET_DIVERT=y
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+# CONFIG_CISS_MONITOR_THREAD is not set
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_STATS=y
+CONFIG_DISKDUMP=m
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+CONFIG_IEEE1394=m
+
+#
+# Device Drivers
+#
+
+#
+# Texas Instruments PCILynx requires I2C bit-banging
+#
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+CONFIG_IEEE1394_SBP2_PHYS_DMA=y
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_PCI=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_LAN=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_LVM=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+# CONFIG_FUSION_BOOT is not set
+CONFIG_FUSION_MAX_SGE=40
+# CONFIG_FUSION_ISENSE is not set
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+CONFIG_NET_FC=y
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+# CONFIG_IDEDISK_STROKE is not set
+# CONFIG_BLK_DEV_IDECS is not set
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+# CONFIG_BLK_DEV_CMD640 is not set
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+# CONFIG_BLK_DEV_ISAPNP is not set
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_OFFBOARD is not set
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+# CONFIG_IDEDMA_ONLYDISK is not set
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_PCI_WIP is not set
+CONFIG_BLK_DEV_ADMA100=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+# CONFIG_AMD74XX_OVERRIDE is not set
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_HPT34X=y
+# CONFIG_HPT34X_AUTODMA is not set
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_PIIX=y
+# CONFIG_BLK_DEV_NS87415 is not set
+# CONFIG_BLK_DEV_OPTI621 is not set
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+# CONFIG_PDC202XX_BURST is not set
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+# CONFIG_BLK_DEV_RZ1000 is not set
+# CONFIG_BLK_DEV_SC1200 is not set
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+# CONFIG_BLK_DEV_TRM290 is not set
+CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_IDE_CHIPSETS is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_IDEDMA_IVB is not set
+# CONFIG_DMA_NONPCI is not set
+CONFIG_BLK_DEV_PDC202XX=y
+CONFIG_BLK_DEV_IDE_MODES=y
+CONFIG_BLK_DEV_ATARAID=m
+CONFIG_BLK_DEV_ATARAID_PDC=m
+CONFIG_BLK_DEV_ATARAID_HPT=m
+CONFIG_BLK_DEV_ATARAID_SII=m
+
+#
+# SCSI support
+#
+CONFIG_SCSI=m
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_SD_EXTRA_DEVS=256
+CONFIG_SD_IOSTATS=y
+CONFIG_SCSI_DUMP=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_BLK_DEV_SR_VENDOR=y
+CONFIG_SR_EXTRA_DEVS=4
+CONFIG_CHR_DEV_SG=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_DEBUG_QUEUES=y
+# CONFIG_SCSI_MULTI_LUN is not set
+CONFIG_SCSI_CONSTANTS=y
+# CONFIG_SCSI_LOGGING is not set
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+# CONFIG_SCSI_7000FASST is not set
+# CONFIG_SCSI_ACARD is not set
+# CONFIG_SCSI_AHA152X is not set
+# CONFIG_SCSI_AHA1542 is not set
+# CONFIG_SCSI_AHA1740 is not set
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC7XXX_PROBE_EISA_VL is not set
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y
+CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_OLD_PROC_STATS=y
+# CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_IN2000 is not set
+# CONFIG_SCSI_AM53C974 is not set
+CONFIG_SCSI_MEGARAID=m
+CONFIG_SCSI_MEGARAID2=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_NV=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SX4=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+# CONFIG_SCSI_BUSLOGIC is not set
+# CONFIG_SCSI_CPQFCTS is not set
+# CONFIG_SCSI_DMX3191D is not set
+# CONFIG_SCSI_DTC3280 is not set
+# CONFIG_SCSI_EATA is not set
+# CONFIG_SCSI_EATA_DMA is not set
+# CONFIG_SCSI_EATA_PIO is not set
+# CONFIG_SCSI_FUTURE_DOMAIN is not set
+CONFIG_SCSI_GDTH=m
+# CONFIG_SCSI_GENERIC_NCR5380 is not set
+CONFIG_SCSI_IPS=m
+# CONFIG_SCSI_INITIO is not set
+# CONFIG_SCSI_INIA100 is not set
+# CONFIG_SCSI_NCR53C406A is not set
+# CONFIG_SCSI_NCR53C7xx is not set
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_NCR53C8XX=m
+CONFIG_SCSI_SYM53C8XX=m
+CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32
+CONFIG_SCSI_NCR53C8XX_SYNC=40
+# CONFIG_SCSI_NCR53C8XX_PROFILE is not set
+# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set
+# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set
+# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set
+# CONFIG_SCSI_PAS16 is not set
+# CONFIG_SCSI_PCI2000 is not set
+# CONFIG_SCSI_PCI2220I is not set
+# CONFIG_SCSI_PSI240I is not set
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
+CONFIG_SCSI_QLOGIC_1280=m
+# CONFIG_SCSI_SIM710 is not set
+# CONFIG_SCSI_SYM53C416 is not set
+# CONFIG_SCSI_DC390T is not set
+# CONFIG_SCSI_T128 is not set
+# CONFIG_SCSI_U14_34F is not set
+CONFIG_SCSI_NSP32=m
+# CONFIG_SCSI_DEBUG is not set
+
+#
+# Network device support
+#
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+# CONFIG_ARCNET is not set
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+# CONFIG_SUNLANCE is not set
+CONFIG_HAPPYMEAL=m
+# CONFIG_SUNBMAC is not set
+# CONFIG_SUNQE is not set
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+# CONFIG_EL1 is not set
+# CONFIG_EL2 is not set
+# CONFIG_ELPLUS is not set
+# CONFIG_EL16 is not set
+# CONFIG_ELMC is not set
+# CONFIG_ELMC_II is not set
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+# CONFIG_LANCE is not set
+# CONFIG_NET_VENDOR_SMC is not set
+# CONFIG_NET_VENDOR_RACAL is not set
+# CONFIG_HP100 is not set
+# CONFIG_NET_ISA is not set
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+# CONFIG_APRICOT is not set
+CONFIG_B44=m
+# CONFIG_CS89x0 is not set
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+CONFIG_TULIP_MMIO=y
+# CONFIG_DE4X5 is not set
+# CONFIG_DGRS is not set
+CONFIG_DM9102=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+# CONFIG_LNE390 is not set
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+# CONFIG_NE3210 is not set
+# CONFIG_ES3210 is not set
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+# CONFIG_SUNDANCE is not set
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_TC35815=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_NET_POCKET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+# CONFIG_MYRI_SBUS is not set
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+# CONFIG_FDDI is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_HIPPI is not set
+# CONFIG_PLIP is not set
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPPOE is not set
+# CONFIG_SLIP is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+# CONFIG_TMS380TR is not set
+CONFIG_NET_FC=y
+CONFIG_IPHASE5526=m
+# CONFIG_RCPCI is not set
+CONFIG_SHAPER=m
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# CD-ROM drivers (not for SCSI or IDE/ATAPI drives)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input core support
+#
+CONFIG_INPUT=m
+CONFIG_INPUT_KEYBDEV=m
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_EVDEV=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+# CONFIG_ECC is not set
+CONFIG_VT_CONSOLE=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_CONSOLE=y
+CONFIG_SERIAL_HCDP=y
+CONFIG_SERIAL_ACPI=y
+CONFIG_HP_DIVA=y
+CONFIG_SERIAL_EXTENDED=y
+CONFIG_SERIAL_MANY_PORTS=y
+CONFIG_SERIAL_SHARE_IRQ=y
+# CONFIG_SERIAL_DETECT_IRQ is not set
+CONFIG_SERIAL_MULTIPORT=y
+# CONFIG_HUB6 is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=2048
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+CONFIG_MK712_MOUSE=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_GAMEPORT=m
+CONFIG_INPUT_NS558=m
+CONFIG_INPUT_LIGHTNING=m
+CONFIG_INPUT_PCIGAME=m
+CONFIG_INPUT_CS461X=m
+CONFIG_INPUT_EMU10K1=m
+CONFIG_INPUT_SERIO=m
+CONFIG_INPUT_SERPORT=m
+
+#
+# Joysticks
+#
+CONFIG_INPUT_ANALOG=m
+CONFIG_INPUT_A3D=m
+CONFIG_INPUT_ADI=m
+CONFIG_INPUT_COBRA=m
+CONFIG_INPUT_GF2K=m
+CONFIG_INPUT_GRIP=m
+CONFIG_INPUT_INTERACT=m
+CONFIG_INPUT_TMDC=m
+CONFIG_INPUT_SIDEWINDER=m
+CONFIG_INPUT_IFORCE_USB=m
+CONFIG_INPUT_IFORCE_232=m
+CONFIG_INPUT_WARRIOR=m
+CONFIG_INPUT_MAGELLAN=m
+CONFIG_INPUT_SPACEORB=m
+CONFIG_INPUT_SPACEBALL=m
+CONFIG_INPUT_STINGER=m
+# CONFIG_INPUT_DB9 is not set
+# CONFIG_INPUT_GAMECON is not set
+# CONFIG_INPUT_TURBOGRAFX is not set
+# CONFIG_QIC02_TAPE is not set
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_HANGCHECK_DELAY=m
+# CONFIG_SCx200_GPIO is not set
+CONFIG_INTEL_RNG=m
+# CONFIG_AMD_PM768 is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+CONFIG_EFI_RTC=y
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+CONFIG_AGP=m
+CONFIG_AGP_INTEL=y
+# CONFIG_AGP_I810 is not set
+# CONFIG_AGP_VIA is not set
+# CONFIG_AGP_AMD is not set
+CONFIG_AGP_AMD_8151=y
+# CONFIG_AGP_SIS is not set
+# CONFIG_AGP_ALI is not set
+# CONFIG_AGP_SWORKS is not set
+CONFIG_AGP_I460=y
+CONFIG_AGP_HP_ZX1=y
+CONFIG_DRM=y
+# CONFIG_DRM_OLD is not set
+
+#
+# DRM 4.1 drivers
+#
+CONFIG_DRM_NEW=y
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_GAMMA=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+# CONFIG_DRM_I810 is not set
+# CONFIG_DRM_I810_XFREE_41 is not set
+# CONFIG_DRM_I830 is not set
+CONFIG_DRM_MGA=m
+# CONFIG_DRM_SIS is not set
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+CONFIG_QUOTA=y
+# CONFIG_QFMT_V1 is not set
+CONFIG_QFMT_V2=y
+# CONFIG_QIFACE_COMPAT is not set
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_PROC_INFO=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+CONFIG_HFS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BFS_FS is not set
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_XATTR_SHARING=y
+CONFIG_EXT3_FS_XATTR_USER=y
+CONFIG_EXT3_FS_XATTR_TRUSTED=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_JBD=m
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_UMSDOS_FS=m
+CONFIG_VFAT_FS=m
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_CRAMFS=m
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DEBUG=y
+# CONFIG_JFS_STATISTICS is not set
+CONFIG_MINIX_FS=m
+CONFIG_VXFS_FS=m
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+CONFIG_ROMFS_FS=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_XATTR_SHARING=y
+CONFIG_EXT2_FS_XATTR_USER=y
+CONFIG_SYSV_FS=m
+CONFIG_UDF_FS=m
+CONFIG_UDF_RW=y
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_CODA_FS=m
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFS_ACL=y
+# CONFIG_ROOT_NFS is not set
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFSD_TCP=y
+CONFIG_SUNRPC=m
+CONFIG_LOCKD=m
+CONFIG_LOCKD_V4=y
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_FS_MBCACHE=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FS_MBCACHE=y
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+# CONFIG_LDM_PARTITION is not set
+CONFIG_SGI_PARTITION=y
+# CONFIG_ULTRIX_PARTITION is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+CONFIG_SMB_NLS=y
+CONFIG_NLS=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+
+#
+# Console drivers
+#
+CONFIG_VGA_CONSOLE=y
+
+#
+# Frame-buffer support
+#
+CONFIG_FB=y
+CONFIG_DUMMY_CONSOLE=y
+# CONFIG_FB_RIVA is not set
+# CONFIG_FB_CLGEN is not set
+# CONFIG_FB_PM2 is not set
+CONFIG_FB_PM3=m
+# CONFIG_FB_CYBER2000 is not set
+CONFIG_FB_VGA16=m
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MATROX_G450=y
+CONFIG_FB_MATROX_G100=y
+# CONFIG_FB_MATROX_PROC is not set
+CONFIG_FB_MATROX_MULTIHEAD=y
+# CONFIG_FB_ATY is not set
+# CONFIG_FB_RADEON is not set
+CONFIG_FB_ATY128=m
+# CONFIG_FB_INTEL is not set
+# CONFIG_FB_SIS is not set
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+# CONFIG_FB_TRIDENT is not set
+# CONFIG_FB_VIRTUAL is not set
+# CONFIG_FBCON_ADVANCED is not set
+CONFIG_FBCON_CFB8=y
+CONFIG_FBCON_CFB16=y
+CONFIG_FBCON_CFB24=y
+CONFIG_FBCON_CFB32=y
+CONFIG_FBCON_VGA_PLANES=m
+CONFIG_FBCON_HGA=m
+# CONFIG_FBCON_FONTWIDTH8_ONLY is not set
+# CONFIG_FBCON_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+CONFIG_PCI_CONSOLE=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+CONFIG_SOUND_ALI5455=m
+# CONFIG_SOUND_BT878 is not set
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_FMIO=388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+CONFIG_SOUND_AUDIGY=m
+CONFIG_SOUND_FUSION=m
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+# CONFIG_SOUND_TRACEINIT is not set
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+# CONFIG_SOUND_SGALAXY is not set
+# CONFIG_SOUND_ADLIB is not set
+# CONFIG_SOUND_ACI_MIXER is not set
+# CONFIG_SOUND_CS4232 is not set
+# CONFIG_SOUND_SSCAPE is not set
+# CONFIG_SOUND_GUS is not set
+CONFIG_SOUND_VMIDI=m
+# CONFIG_SOUND_TRIX is not set
+# CONFIG_SOUND_MSS is not set
+# CONFIG_SOUND_MPU401 is not set
+# CONFIG_SOUND_NM256 is not set
+# CONFIG_SOUND_MAD16 is not set
+# CONFIG_SOUND_PAS is not set
+# CONFIG_PAS_JOYSTICK is not set
+# CONFIG_SOUND_PSS is not set
+# CONFIG_SOUND_SB is not set
+# CONFIG_SOUND_AWE32_SYNTH is not set
+# CONFIG_SOUND_KAHLUA is not set
+# CONFIG_SOUND_WAVEFRONT is not set
+# CONFIG_SOUND_MAUI is not set
+# CONFIG_SOUND_YM3812 is not set
+# CONFIG_SOUND_OPL3SA1 is not set
+# CONFIG_SOUND_OPL3SA2 is not set
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+# CONFIG_SOUND_UART6850 is not set
+# CONFIG_SOUND_AEDSP16 is not set
+# CONFIG_SOUND_TVMIXER is not set
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_UHCI=m
+CONFIG_USB_UHCI_ALT=m
+CONFIG_USB_OHCI=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+# CONFIG_USB_EMI26 is not set
+
+#
+# USB Bluetooth can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_USB_HIDDEV=y
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_DC2XX is not set
+CONFIG_USB_MDC800=m
+CONFIG_USB_SCANNER=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+
+#
+# Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_CATC=m
+# CONFIG_USB_AX8817X is not set
+CONFIG_USB_CDCETHER=m
+CONFIG_USB_USBNET=m
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+# CONFIG_USB_SERIAL_DEBUG is not set
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_RIO500=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_BRLVGER=m
+CONFIG_USB_LCD=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_DEFLATE=y
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
+CONFIG_QSORT=y
+
+#
+# Bluetooth support
+#
+CONFIG_BLUEZ=m
+CONFIG_BLUEZ_L2CAP=m
+CONFIG_BLUEZ_SCO=m
+CONFIG_BLUEZ_RFCOMM=m
+CONFIG_BLUEZ_RFCOMM_TTY=y
+CONFIG_BLUEZ_BNEP=m
+CONFIG_BLUEZ_BNEP_MC_FILTER=y
+CONFIG_BLUEZ_BNEP_PROTO_FILTER=y
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BLUEZ_HCIUSB=m
+CONFIG_BLUEZ_USB_SCO=y
+CONFIG_BLUEZ_USB_ZERO_PACKET=y
+CONFIG_BLUEZ_HCIUART=m
+CONFIG_BLUEZ_HCIUART_H4=y
+CONFIG_BLUEZ_HCIUART_BCSP=y
+CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y
+# CONFIG_BLUEZ_HCIDTL1 is not set
+# CONFIG_BLUEZ_HCIBT3C is not set
+# CONFIG_BLUEZ_HCIBLUECARD is not set
+# CONFIG_BLUEZ_HCIBTUART is not set
+CONFIG_BLUEZ_HCIVHCI=m
+
+#
+# Simulated drivers
+#
+# CONFIG_HP_SIMETH is not set
+# CONFIG_HP_SIMSERIAL is not set
+# CONFIG_HP_SIMSCSI is not set
+
+#
+# Additional device driver support
+#
+CONFIG_NET_BROADCOM=m
+CONFIG_CIPE=m
+# CONFIG_CRYPTO_AEP is not set
+CONFIG_CRYPTO_BROADCOM=m
+# CONFIG_MEGARAC is not set
+CONFIG_FC_QLA2100=m
+CONFIG_FC_QLA2200=m
+CONFIG_FC_QLA2300=m
+CONFIG_SCSI_ISCSI=m
+# CONFIG_SCSI_IPR is not set
+CONFIG_SCSI_LPFC=m
+
+#
+# Kernel hacking
+#
+CONFIG_IA64_GRANULE_16MB=y
+# CONFIG_IA64_GRANULE_64MB is not set
+CONFIG_DEBUG_KERNEL=y
+CONFIG_IA64_PRINT_HAZARDS=y
+# CONFIG_DISABLE_VHPT is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_IA64_EARLY_PRINTK is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_IA64_DEBUG_CMPXCHG is not set
+# CONFIG_IA64_DEBUG_IRQ is not set
+CONFIG_KALLSYMS=y
--- /dev/null
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_GENERIC_ISA_DMA=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+# CONFIG_STANDALONE is not set
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_HOTPLUG=y
+CONFIG_EVLOG=y
+# CONFIG_EVLOG_FWPRINTK is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+
+#
+# Class Based Kernel Resource Management
+#
+CONFIG_CKRM=y
+CONFIG_RCFS_FS=m
+CONFIG_CKRM_TYPE_TASKCLASS=y
+CONFIG_CKRM_RES_NUMTASKS=m
+CONFIG_CKRM_TYPE_SOCKETCLASS=y
+CONFIG_CKRM_RBCE=m
+CONFIG_CKRM_CRBCE=m
+CONFIG_DELAY_ACCT=y
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+# CONFIG_X86_PC is not set
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+CONFIG_X86_GENERICARCH=y
+# CONFIG_X86_ES7000 is not set
+CONFIG_X86_CYCLONE_TIMER=y
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M686 is not set
+CONFIG_MPENTIUMII=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_GENERIC=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_HPET_EMULATE_RTC is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=128
+CONFIG_SCHED_SMT=y
+# CONFIG_PREEMPT is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_TSC=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+CONFIG_TOSHIBA=m
+CONFIG_I8K=m
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=m
+CONFIG_X86_CPUID=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EDD=m
+# CONFIG_NOHIGHMEM is not set
+# CONFIG_HIGHMEM4G is not set
+CONFIG_HIGHMEM64G=y
+CONFIG_HIGHMEM=y
+CONFIG_X86_PAE=y
+# CONFIG_NUMA is not set
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+CONFIG_EFI=y
+CONFIG_IRQBALANCE=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_BOOT_IOREMAP=y
+CONFIG_REGPARM=y
+
+#
+# Special options
+#
+CONFIG_PROC_MM=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+# CONFIG_SOFTWARE_SUSPEND is not set
+# CONFIG_PM_DISK is not set
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_ASUS is not set
+CONFIG_ACPI_TOSHIBA=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_ACPI_INITRD=y
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+CONFIG_APM=y
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+CONFIG_APM_DO_ENABLE=y
+# CONFIG_APM_CPU_IDLE is not set
+CONFIG_APM_DISPLAY_BLANK=y
+# CONFIG_APM_RTC_IS_GMT is not set
+CONFIG_APM_ALLOW_INTS=y
+# CONFIG_APM_REAL_MODE_POWER_OFF is not set
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_PROC_INTF=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=m
+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+# CONFIG_CPU_FREQ_24_API is not set
+CONFIG_CPU_FREQ_TABLE=m
+
+#
+# CPUFreq processor drivers
+#
+CONFIG_X86_ACPI_CPUFREQ=m
+# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
+CONFIG_X86_POWERNOW_K6=m
+CONFIG_X86_POWERNOW_K7=m
+CONFIG_X86_POWERNOW_K8=m
+CONFIG_X86_POWERNOW_K8_ACPI=y
+CONFIG_X86_GX_SUSPMOD=m
+CONFIG_X86_SPEEDSTEP_CENTRINO=m
+CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y
+# CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI is not set
+CONFIG_X86_SPEEDSTEP_ICH=m
+CONFIG_X86_SPEEDSTEP_SMI=m
+CONFIG_X86_P4_CLOCKMOD=m
+CONFIG_X86_SPEEDSTEP_LIB=m
+CONFIG_X86_LONGRUN=m
+CONFIG_X86_LONGHAUL=m
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCI_USE_VECTOR is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_NAMES is not set
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+CONFIG_SCx200=m
+
+#
+# PCMCIA/CardBus support
+#
+CONFIG_PCMCIA=m
+# CONFIG_PCMCIA_DEBUG is not set
+CONFIG_YENTA=m
+CONFIG_CARDBUS=y
+CONFIG_I82092=m
+CONFIG_I82365=m
+CONFIG_TCIC=m
+CONFIG_PCMCIA_PROBE=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_FAKE=m
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_AMD=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_PCIE=m
+# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+CONFIG_BINFMT_MISC=m
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_FW_LOADER=m
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+CONFIG_MTD=m
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_PARTITIONS=m
+CONFIG_MTD_CONCAT=m
+CONFIG_MTD_REDBOOT_PARTS=m
+CONFIG_MTD_CMDLINE_PARTS=m
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=m
+CONFIG_MTD_BLOCK=m
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_GEOMETRY is not set
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_STAA=m
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+CONFIG_MTD_ABSENT=m
+CONFIG_MTD_OBSOLETE_CHIPS=y
+CONFIG_MTD_AMDSTD=m
+CONFIG_MTD_SHARP=m
+CONFIG_MTD_JEDEC=m
+
+#
+# Mapping drivers for chip access
+#
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_PHYSMAP_START=0x8000000
+CONFIG_MTD_PHYSMAP_LEN=0x4000000
+CONFIG_MTD_PHYSMAP_BUSWIDTH=2
+CONFIG_MTD_PNC2000=m
+CONFIG_MTD_SC520CDP=m
+CONFIG_MTD_NETSC520=m
+CONFIG_MTD_SBC_GXX=m
+CONFIG_MTD_ELAN_104NC=m
+CONFIG_MTD_OCTAGON=m
+CONFIG_MTD_VMAX=m
+CONFIG_MTD_SCx200_DOCFLASH=m
+CONFIG_MTD_AMD76XROM=m
+CONFIG_MTD_ICH2ROM=m
+CONFIG_MTD_SCB2_FLASH=m
+CONFIG_MTD_NETtel=m
+CONFIG_MTD_DILNETPC=m
+CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_L440GX=m
+CONFIG_MTD_PCI=m
+
+#
+# Self-contained MTD device drivers
+#
+CONFIG_MTD_PMC551=m
+CONFIG_MTD_PMC551_BUGFIX=y
+# CONFIG_MTD_PMC551_DEBUG is not set
+CONFIG_MTD_SLRAM=m
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTD_BLKMTD=m
+
+#
+# Disk-On-Chip Device Drivers
+#
+CONFIG_MTD_DOC2000=m
+CONFIG_MTD_DOC2001=m
+CONFIG_MTD_DOC2001PLUS=m
+CONFIG_MTD_DOCPROBE=m
+CONFIG_MTD_DOCPROBE_ADVANCED=y
+CONFIG_MTD_DOCPROBE_ADDRESS=0x0000
+CONFIG_MTD_DOCPROBE_HIGH=y
+CONFIG_MTD_DOCPROBE_55AA=y
+
+#
+# NAND Flash Device Drivers
+#
+CONFIG_MTD_NAND=m
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_IDS=m
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_PC_CML1=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_SUPERIO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+CONFIG_PARPORT_OTHER=y
+CONFIG_PARPORT_1284=y
+
+#
+# Plug and Play support
+#
+CONFIG_PNP=y
+# CONFIG_PNP_DEBUG is not set
+
+#
+# Protocols
+#
+CONFIG_ISAPNP=y
+CONFIG_PNPBIOS=y
+CONFIG_PNPBIOS_PROC_FS=y
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_XD=m
+CONFIG_PARIDE=m
+CONFIG_PARIDE_PARPORT=m
+
+#
+# Parallel IDE high-level drivers
+#
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARIDE_PG=m
+
+#
+# Parallel IDE protocol modules
+#
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_BPCK6=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_EPAT=m
+CONFIG_PARIDE_EPATC8=y
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
+CONFIG_BLK_CPQ_DA=m
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_CARMEL=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=64000
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LBD=y
+CONFIG_CIPHER_TWOFISH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_IDEDISK_STROKE=y
+CONFIG_BLK_DEV_IDECS=m
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+# CONFIG_IDE_TASKFILE_IO is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
+CONFIG_BLK_DEV_IDEPNP=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_OFFBOARD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_OPTI621=y
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+CONFIG_IDEDMA_ONLYDISK=y
+CONFIG_BLK_DEV_ADMA=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_ATIIXP=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5520=m
+CONFIG_BLK_DEV_CS5530=m
+CONFIG_BLK_DEV_HPT34X=y
+CONFIG_HPT34X_AUTODMA=y
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_SC1200=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_NS87415=y
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+CONFIG_BLK_DEV_TRM290=y
+CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_IDE_CHIPSETS=y
+
+#
+# Note: most of these also require special kernel boot parameters
+#
+CONFIG_BLK_DEV_4DRIVES=y
+CONFIG_BLK_DEV_ALI14XX=y
+CONFIG_BLK_DEV_DTC2278=y
+CONFIG_BLK_DEV_HT6560B=y
+# CONFIG_BLK_DEV_PDC4030 is not set
+CONFIG_BLK_DEV_QD65XX=y
+CONFIG_BLK_DEV_UMC8672=y
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=m
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+CONFIG_CHR_DEV_SCH=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_7000FASST=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AHA152X=m
+CONFIG_SCSI_AHA1542=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=5000
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC79XX_NEW is not set
+CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_IN2000=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+CONFIG_SCSI_BUSLOGIC=m
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_DTC3280=m
+CONFIG_SCSI_EATA=m
+CONFIG_SCSI_EATA_TAGGED_QUEUE=y
+CONFIG_SCSI_EATA_LINKED_COMMANDS=y
+CONFIG_SCSI_EATA_MAX_TAGS=16
+CONFIG_SCSI_EATA_PIO=m
+CONFIG_SCSI_FUTURE_DOMAIN=m
+CONFIG_SCSI_GDTH=m
+CONFIG_SCSI_GENERIC_NCR5380=m
+CONFIG_SCSI_GENERIC_NCR5380_MMIO=m
+CONFIG_SCSI_GENERIC_NCR53C400=y
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_IMM=m
+# CONFIG_SCSI_IZIP_EPP16 is not set
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_NCR53C406A=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_IPR=m
+CONFIG_SCSI_IPR_TRACE=y
+CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_PAS16=m
+CONFIG_SCSI_PSI240I=m
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLA2XXX=m
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA6312=m
+CONFIG_SCSI_QLA6322=m
+CONFIG_SCSI_QLA2XXX_FAILOVER=y
+CONFIG_SCSI_QLA4XXX=m
+CONFIG_SCSI_QLA4XXX_FAILOVER=y
+CONFIG_SCSI_SYM53C416=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_DC390T=m
+CONFIG_SCSI_T128=m
+CONFIG_SCSI_U14_34F=m
+CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y
+CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y
+CONFIG_SCSI_U14_34F_MAX_TAGS=8
+CONFIG_SCSI_ULTRASTOR=m
+CONFIG_SCSI_NSP32=m
+CONFIG_SCSI_DEBUG=m
+
+#
+# PCMCIA SCSI adapter support
+#
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_NINJA_SCSI=m
+CONFIG_PCMCIA_QLOGIC=m
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+CONFIG_CD_NO_IDESCSI=y
+CONFIG_AZTCD=m
+CONFIG_GSCD=m
+CONFIG_MCD=m
+CONFIG_MCD_IRQ=11
+CONFIG_MCD_BASE=0x300
+CONFIG_OPTCD=m
+CONFIG_SJCD=m
+CONFIG_ISP16_CDI=m
+CONFIG_CDU535=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_FLAKEY=m
+CONFIG_BLK_DEV_DM_BBR=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+CONFIG_FUSION_MAX_SGE=40
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_IEEE1394=m
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
+
+#
+# Device Drivers
+#
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=m
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_ACCEPT_QUEUES is not set
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=12
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_NDISC_NEW=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+
+#
+# MOBILE IPv6 (EXPERIMENTAL)
+#
+CONFIG_IPV6_MOBILITY=m
+CONFIG_IPV6_MOBILITY_MN=m
+CONFIG_IPV6_MOBILITY_HA=m
+# CONFIG_IPV6_MOBILITY_DEBUG is not set
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_BRIDGE=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_POLICY=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_PHYSDEV=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_LOCAL is not set
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_CONNTRACK_MARK=y
+CONFIG_IP_NF_TARGET_CONNMARK=m
+CONFIG_IP_NF_MATCH_CONNMARK=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_IP6_NF_FTP=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_CONNTRACK=m
+CONFIG_IP6_NF_MATCH_STATE=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+
+#
+# DECnet: Netfilter Configuration
+#
+CONFIG_DECNET_NF_GRABULATOR=m
+
+#
+# Bridge: Netfilter Configuration
+#
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=m
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_ATM=y
+CONFIG_ATM_CLIP=y
+CONFIG_ATM_CLIP_NO_ICMP=y
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_LLC=y
+CONFIG_LLC2=m
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=y
+CONFIG_LTPC=m
+CONFIG_COPS=m
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+# CONFIG_NET_DIVERT is not set
+CONFIG_ECONET=m
+# CONFIG_ECONET_AUNUDP is not set
+# CONFIG_ECONET_NATIVE is not set
+CONFIG_WAN_ROUTER=m
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_ATM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_DELAY=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+CONFIG_ARCNET=m
+CONFIG_ARCNET_1201=m
+CONFIG_ARCNET_1051=m
+CONFIG_ARCNET_RAW=m
+CONFIG_ARCNET_COM90xx=m
+CONFIG_ARCNET_COM90xxIO=m
+CONFIG_ARCNET_RIM_I=m
+CONFIG_ARCNET_COM20020=m
+CONFIG_ARCNET_COM20020_ISA=m
+CONFIG_ARCNET_COM20020_PCI=m
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+CONFIG_NET_SB1000=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_EL1=m
+CONFIG_EL2=m
+CONFIG_ELPLUS=m
+CONFIG_EL16=m
+CONFIG_EL3=m
+CONFIG_3C515=m
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+CONFIG_LANCE=m
+CONFIG_NET_VENDOR_SMC=y
+CONFIG_WD80x3=m
+CONFIG_ULTRA=m
+CONFIG_SMC9194=m
+CONFIG_NET_VENDOR_RACAL=y
+CONFIG_NI52=m
+CONFIG_NI65=m
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+CONFIG_DE4X5=m
+CONFIG_WINBOND_840=m
+CONFIG_DM9102=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_AT1700=m
+CONFIG_DEPCA=m
+CONFIG_HP100=m
+CONFIG_NET_ISA=y
+CONFIG_E2100=m
+CONFIG_EWRK3=m
+CONFIG_EEXPRESS=m
+CONFIG_EEXPRESS_PRO=m
+CONFIG_HPLAN_PLUS=m
+CONFIG_HPLAN=m
+CONFIG_LP486E=m
+CONFIG_ETH16I=m
+CONFIG_NE2000=m
+CONFIG_ZNET=m
+CONFIG_SEEQ8005=m
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ADAPTEC_STARFIRE_NAPI=y
+CONFIG_AC3200=m
+CONFIG_APRICOT=m
+CONFIG_B44=m
+CONFIG_FORCEDETH=m
+CONFIG_CS89x0=m
+CONFIG_DGRS=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+CONFIG_E100_NAPI=y
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_8139_RXBUF_IDX=2
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_NET_POCKET=y
+CONFIG_ATP=m
+CONFIG_DE600=m
+CONFIG_DE620=m
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+CONFIG_E1000_NEW=m
+CONFIG_E1000_NEW_NAPI=y
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SIS190=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+CONFIG_NET_BROADCOM=m
+CONFIG_NET_BROADCOM_NEW=m
+CONFIG_NET_BCM44=m
+CONFIG_TIGON3_NEW=m
+
+#
+# Ethernet (10000 Mbit)
+#
+CONFIG_IXGB=m
+CONFIG_IXGB_NAPI=y
+CONFIG_S2IO=m
+CONFIG_S2IO_NAPI=y
+CONFIG_FDDI=y
+# CONFIG_DEFXX is not set
+CONFIG_SKFP=m
+CONFIG_HIPPI=y
+CONFIG_ROADRUNNER=m
+CONFIG_ROADRUNNER_LARGE_RINGS=y
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+
+#
+# Wireless LAN (non-hamradio)
+#
+CONFIG_NET_RADIO=y
+
+#
+# Obsolete Wireless cards support (pre-802.11)
+#
+CONFIG_STRIP=m
+# CONFIG_ARLAN is not set
+CONFIG_WAVELAN=m
+CONFIG_PCMCIA_WAVELAN=m
+CONFIG_PCMCIA_NETWAVE=m
+
+#
+# Wireless 802.11 Frequency Hopping cards support
+#
+CONFIG_PCMCIA_RAYCS=m
+
+#
+# Wireless 802.11b ISA/PCI cards support
+#
+CONFIG_AIRO=m
+CONFIG_HERMES=m
+CONFIG_PLX_HERMES=m
+CONFIG_TMD_HERMES=m
+CONFIG_PCI_HERMES=m
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+
+#
+# Wireless 802.11b Pcmcia/Cardbus cards support
+#
+CONFIG_PCMCIA_HERMES=m
+CONFIG_AIRO_CS=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_PCMCIA_WL3501=m
+
+#
+# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support
+#
+CONFIG_PRISM54=m
+CONFIG_NET_WIRELESS=y
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMTR=m
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+CONFIG_SKISA=m
+CONFIG_PROTEON=m
+CONFIG_ABYSS=m
+CONFIG_SMCTR=m
+CONFIG_NET_FC=y
+CONFIG_NET_LPFC=m
+CONFIG_RCPCI=m
+CONFIG_SHAPER=m
+CONFIG_NETCONSOLE=m
+
+#
+# Wan interfaces
+#
+CONFIG_WAN=y
+CONFIG_HOSTESS_SV11=m
+# CONFIG_COSA is not set
+CONFIG_DSCC4=m
+CONFIG_DSCC4_PCISYNC=y
+CONFIG_DSCC4_PCI_RST=y
+CONFIG_LANMEDIA=m
+CONFIG_SEALEVEL_4021=m
+CONFIG_SYNCLINK_SYNCPPP=m
+CONFIG_HDLC=m
+CONFIG_HDLC_RAW=y
+CONFIG_HDLC_RAW_ETH=y
+CONFIG_HDLC_CISCO=y
+CONFIG_HDLC_FR=y
+CONFIG_HDLC_PPP=y
+CONFIG_HDLC_X25=y
+CONFIG_PCI200SYN=m
+CONFIG_WANXL=m
+# CONFIG_WANXL_BUILD_FIRMWARE is not set
+CONFIG_PC300=m
+CONFIG_PC300_MLPPP=y
+CONFIG_N2=m
+CONFIG_C101=m
+CONFIG_FARSYNC=m
+CONFIG_DLCI=m
+CONFIG_DLCI_COUNT=24
+CONFIG_DLCI_MAX=8
+CONFIG_SDLA=m
+# CONFIG_WAN_ROUTER_DRIVERS is not set
+CONFIG_LAPBETHER=m
+CONFIG_X25_ASY=m
+# CONFIG_SBNI is not set
+
+#
+# PCMCIA network device support
+#
+CONFIG_NET_PCMCIA=y
+CONFIG_PCMCIA_3C589=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_PCMCIA_AXNET=m
+CONFIG_ARCNET_COM20020_CS=m
+CONFIG_PCMCIA_IBMTR=m
+
+#
+# ATM drivers
+#
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_FIRESTREAM=m
+CONFIG_ATM_ZATM=m
+# CONFIG_ATM_ZATM_DEBUG is not set
+CONFIG_ATM_NICSTAR=m
+CONFIG_ATM_NICSTAR_USE_SUNI=y
+CONFIG_ATM_NICSTAR_USE_IDT77105=y
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+CONFIG_ATM_IDT77252_RCV_ALL=y
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_AMBASSADOR=m
+# CONFIG_ATM_AMBASSADOR_DEBUG is not set
+CONFIG_ATM_HORIZON=m
+# CONFIG_ATM_HORIZON_DEBUG is not set
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_FORE200E_MAYBE=m
+CONFIG_ATM_FORE200E_PCA=y
+CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
+CONFIG_ATM_FORE200E_TX_RETRY=16
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+
+#
+# Amateur Radio support
+#
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_NETROM=m
+CONFIG_ROSE=m
+
+#
+# AX.25 network device drivers
+#
+CONFIG_BPQETHER=m
+CONFIG_SCC=m
+CONFIG_SCC_DELAY=y
+CONFIG_SCC_TRXECHO=y
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_BAYCOM_EPP=m
+CONFIG_YAM=m
+
+#
+# IrDA (infrared) support
+#
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+CONFIG_IRDA_ULTRA=y
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+# CONFIG_IRDA_FAST_RR is not set
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+
+#
+# Old Serial dongle support
+#
+
+#
+# FIR device drivers
+#
+CONFIG_USB_IRDA=m
+CONFIG_SIGMATEL_FIR=m
+CONFIG_NSC_FIR=m
+CONFIG_WINBOND_FIR=m
+CONFIG_TOSHIBA_FIR=m
+CONFIG_SMC_IRCC_FIR=m
+CONFIG_ALI_FIR=m
+CONFIG_VLSI_FIR=m
+CONFIG_VIA_FIR=m
+
+#
+# Bluetooth support
+#
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUSB=m
+CONFIG_BT_HCIUSB_SCO=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+CONFIG_ISDN=m
+
+#
+# Old ISDN4Linux
+#
+CONFIG_ISDN_I4L=m
+CONFIG_ISDN_PPP=y
+CONFIG_ISDN_PPP_VJ=y
+CONFIG_ISDN_MPP=y
+CONFIG_IPPP_FILTER=y
+CONFIG_ISDN_PPP_BSDCOMP=m
+CONFIG_ISDN_AUDIO=y
+CONFIG_ISDN_TTY_FAX=y
+CONFIG_ISDN_X25=y
+
+#
+# ISDN feature submodules
+#
+
+#
+# ISDN4Linux hardware drivers
+#
+
+#
+# Passive cards
+#
+CONFIG_ISDN_DRV_HISAX=m
+
+#
+# D-channel protocol features
+#
+CONFIG_HISAX_EURO=y
+CONFIG_DE_AOC=y
+# CONFIG_HISAX_NO_SENDCOMPLETE is not set
+# CONFIG_HISAX_NO_LLC is not set
+# CONFIG_HISAX_NO_KEYPAD is not set
+CONFIG_HISAX_1TR6=y
+CONFIG_HISAX_NI1=y
+CONFIG_HISAX_MAX_CARDS=8
+
+#
+# HiSax supported cards
+#
+CONFIG_HISAX_16_0=y
+CONFIG_HISAX_16_3=y
+CONFIG_HISAX_TELESPCI=y
+CONFIG_HISAX_S0BOX=y
+CONFIG_HISAX_AVM_A1=y
+CONFIG_HISAX_FRITZPCI=y
+CONFIG_HISAX_AVM_A1_PCMCIA=y
+CONFIG_HISAX_ELSA=y
+CONFIG_HISAX_IX1MICROR2=y
+CONFIG_HISAX_DIEHLDIVA=y
+CONFIG_HISAX_ASUSCOM=y
+CONFIG_HISAX_TELEINT=y
+CONFIG_HISAX_HFCS=y
+CONFIG_HISAX_SEDLBAUER=y
+CONFIG_HISAX_SPORTSTER=y
+CONFIG_HISAX_MIC=y
+CONFIG_HISAX_NETJET=y
+CONFIG_HISAX_NETJET_U=y
+CONFIG_HISAX_NICCY=y
+CONFIG_HISAX_ISURF=y
+CONFIG_HISAX_HSTSAPHIR=y
+CONFIG_HISAX_BKM_A4T=y
+CONFIG_HISAX_SCT_QUADRO=y
+CONFIG_HISAX_GAZEL=y
+CONFIG_HISAX_HFC_PCI=y
+CONFIG_HISAX_W6692=y
+CONFIG_HISAX_HFC_SX=y
+CONFIG_HISAX_ENTERNOW_PCI=y
+CONFIG_HISAX_DEBUG=y
+
+#
+# HiSax PCMCIA card service modules
+#
+CONFIG_HISAX_SEDLBAUER_CS=m
+CONFIG_HISAX_ELSA_CS=m
+CONFIG_HISAX_AVM_A1_CS=m
+CONFIG_HISAX_TELES_CS=m
+
+#
+# HiSax sub driver modules
+#
+CONFIG_HISAX_ST5481=m
+CONFIG_HISAX_HFCUSB=m
+CONFIG_HISAX_FRITZ_PCIPNP=m
+CONFIG_HISAX_HDLC=y
+
+#
+# Active cards
+#
+CONFIG_ISDN_DRV_ICN=m
+CONFIG_ISDN_DRV_PCBIT=m
+CONFIG_ISDN_DRV_SC=m
+CONFIG_ISDN_DRV_ACT2000=m
+CONFIG_ISDN_DRV_TPAM=m
+
+#
+# CAPI subsystem
+#
+CONFIG_ISDN_CAPI=m
+CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_ISDN_CAPI_CAPI20=m
+CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
+CONFIG_ISDN_CAPI_CAPIFS=m
+CONFIG_ISDN_CAPI_CAPIDRV=m
+
+#
+# CAPI hardware drivers
+#
+
+#
+# Active AVM cards
+#
+CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
+
+#
+# Active Eicon DIVA Server cards
+#
+CONFIG_CAPI_EICON=y
+CONFIG_ISDN_DIVAS=m
+CONFIG_ISDN_DIVAS_BRIPCI=y
+CONFIG_ISDN_DIVAS_PRIPCI=y
+CONFIG_ISDN_DIVAS_DIVACAPI=m
+CONFIG_ISDN_DIVAS_USERIDI=m
+CONFIG_ISDN_DIVAS_MAINT=m
+
+#
+# Telephony Support
+#
+CONFIG_PHONE=m
+CONFIG_PHONE_IXJ=m
+CONFIG_PHONE_IXJ_PCMCIA=m
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_TSDEV=m
+CONFIG_INPUT_TSDEV_SCREEN_X=240
+CONFIG_INPUT_TSDEV_SCREEN_Y=320
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+CONFIG_GAMEPORT=m
+CONFIG_SOUND_GAMEPORT=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_VORTEX=m
+CONFIG_GAMEPORT_FM801=m
+CONFIG_GAMEPORT_CS461x=m
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_KEYBOARD_SUNKBD=m
+# CONFIG_KEYBOARD_LKKBD is not set
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_INPORT=m
+CONFIG_MOUSE_ATIXL=y
+CONFIG_MOUSE_LOGIBM=m
+CONFIG_MOUSE_PC110PAD=m
+# CONFIG_MOUSE_VSXXXAA is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TWIDDLER=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+# CONFIG_INPUT_JOYDUMP is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=y
+CONFIG_INPUT_UINPUT=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_ECC=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_ROCKETPORT=m
+CONFIG_SYNCLINK=m
+CONFIG_SYNCLINKMP=m
+CONFIG_N_HDLC=m
+CONFIG_STALDRV=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_MULTIPORT=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_ICOM is not set
+CONFIG_SERIAL_JSM=m
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+CONFIG_PPDEV=m
+CONFIG_TIPAR=m
+CONFIG_QIC02_TAPE=m
+CONFIG_QIC02_DYNCONF=y
+
+#
+# Setting runtime QIC-02 configuration is done with qic02conf
+#
+
+#
+# from the tpqic02-support package. It is available at
+#
+
+#
+# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/
+#
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_PANIC_EVENT=y
+CONFIG_IPMI_PANIC_STRING=y
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_AMD7XX_TCO=m
+CONFIG_SC520_WDT=m
+CONFIG_EUROTECH_WDT=m
+CONFIG_IB700_WDT=m
+CONFIG_WAFER_WDT=m
+CONFIG_I8XX_TCO=m
+CONFIG_SC1200_WDT=m
+CONFIG_SCx200_WDT=m
+CONFIG_60XX_WDT=m
+CONFIG_CPU5_WDT=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_MACHZ_WDT=m
+
+#
+# ISA-based Watchdog Cards
+#
+CONFIG_PCWATCHDOG=m
+CONFIG_MIXCOMWD=m
+CONFIG_WDT=m
+CONFIG_WDT_501=y
+
+#
+# PCI-based Watchdog Cards
+#
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_WDTPCI=m
+CONFIG_WDT_501_PCI=y
+
+#
+# USB-based Watchdog Cards
+#
+CONFIG_USBPCWATCHDOG=m
+CONFIG_HW_RANDOM=m
+CONFIG_NVRAM=m
+CONFIG_RTC=y
+CONFIG_DTLK=m
+CONFIG_R3964=m
+CONFIG_APPLICOM=m
+CONFIG_SONYPI=m
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_ALI=m
+CONFIG_AGP_ATI=m
+CONFIG_AGP_AMD=m
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_INTEL_MCH=m
+CONFIG_AGP_NVIDIA=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_SWORKS=m
+CONFIG_AGP_VIA=m
+CONFIG_AGP_EFFICEON=m
+# CONFIG_DRM is not set
+
+#
+# PCMCIA character devices
+#
+CONFIG_SYNCLINK_CS=m
+# CONFIG_MWAVE is not set
+CONFIG_SCx200_GPIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_MAX_RAW_DEVS=4096
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_VTUNE=m
+
+#
+# Linux InfraRed Controller
+#
+CONFIG_LIRC_SUPPORT=m
+CONFIG_LIRC_MAX_DEV=2
+CONFIG_LIRC_BT829=m
+CONFIG_LIRC_IT87=m
+CONFIG_LIRC_ATIUSB=m
+CONFIG_LIRC_SERIAL=m
+# CONFIG_LIRC_HOMEBREW is not set
+CONFIG_LIRC_PORT_SERIAL=0x3f8
+CONFIG_LIRC_IRQ_SERIAL=4
+CONFIG_LIRC_SIR=m
+CONFIG_LIRC_PORT_SIR=0x3f8
+CONFIG_LIRC_IRQ_SIR=4
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_I2C=m
+CONFIG_SCx200_I2C_SCL=12
+CONFIG_SCx200_I2C_SDA=13
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+
+#
+# Hardware Sensors Chip support
+#
+CONFIG_I2C_SENSOR=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+
+#
+# Other I2C Chip support
+#
+CONFIG_SENSORS_EEPROM=m
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+CONFIG_IBM_ASM=m
+
+#
+# Multimedia devices
+#
+CONFIG_VIDEO_DEV=m
+
+#
+# Video For Linux
+#
+
+#
+# Video Adapters
+#
+CONFIG_VIDEO_BT848=m
+CONFIG_VIDEO_PMS=m
+CONFIG_VIDEO_BWQCAM=m
+CONFIG_VIDEO_CQCAM=m
+CONFIG_VIDEO_W9966=m
+CONFIG_VIDEO_CPIA=m
+CONFIG_VIDEO_CPIA_PP=m
+CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_SAA5246A=m
+CONFIG_VIDEO_SAA5249=m
+CONFIG_TUNER_3036=m
+CONFIG_VIDEO_STRADIS=m
+CONFIG_VIDEO_ZORAN=m
+CONFIG_VIDEO_ZORAN_BUZ=m
+CONFIG_VIDEO_ZORAN_DC10=m
+CONFIG_VIDEO_ZORAN_DC30=m
+CONFIG_VIDEO_ZORAN_LML33=m
+CONFIG_VIDEO_ZORAN_LML33R10=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_MXB=m
+CONFIG_VIDEO_DPC=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_CX88=m
+
+#
+# Radio Adapters
+#
+CONFIG_RADIO_CADET=m
+CONFIG_RADIO_RTRACK=m
+CONFIG_RADIO_RTRACK2=m
+CONFIG_RADIO_AZTECH=m
+CONFIG_RADIO_GEMTEK=m
+CONFIG_RADIO_GEMTEK_PCI=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_MAESTRO=m
+CONFIG_RADIO_MIROPCM20=m
+# CONFIG_RADIO_MIROPCM20_RDS is not set
+CONFIG_RADIO_SF16FMI=m
+CONFIG_RADIO_SF16FMR2=m
+CONFIG_RADIO_TERRATEC=m
+CONFIG_RADIO_TRUST=m
+CONFIG_RADIO_TYPHOON=m
+CONFIG_RADIO_TYPHOON_PROC_FS=y
+CONFIG_RADIO_ZOLTRIX=m
+
+#
+# Digital Video Broadcasting Devices
+#
+CONFIG_DVB=y
+CONFIG_DVB_CORE=m
+
+#
+# Supported Frontend Modules
+#
+CONFIG_DVB_TWINHAN_DST=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc"
+CONFIG_DVB_ALPS_TDLB7=m
+CONFIG_DVB_ALPS_TDMB7=m
+CONFIG_DVB_ATMEL_AT76C651=m
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_GRUNDIG_29504_491=m
+CONFIG_DVB_GRUNDIG_29504_401=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_VES1820=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_TDA1004X_FIRMWARE_FILE="/usr/lib/hotplug/firmware/tda1004x.bin"
+CONFIG_DVB_NXT6000=m
+
+#
+# Supported SAA7146 based PCI Adapters
+#
+CONFIG_DVB_AV7110=m
+# CONFIG_DVB_AV7110_FIRMWARE is not set
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_DVB_BUDGET_PATCH=m
+
+#
+# Supported USB Adapters
+#
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+
+#
+# Supported FlexCopII (B2C2) Adapters
+#
+CONFIG_DVB_B2C2_SKYSTAR=m
+
+#
+# Supported BT878 Adapters
+#
+CONFIG_DVB_BT8XX=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_VIDEO_VIDEOBUF=m
+CONFIG_VIDEO_TUNER=m
+CONFIG_VIDEO_BUF=m
+CONFIG_VIDEO_BTCX=m
+CONFIG_VIDEO_IR=m
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_PM2=m
+CONFIG_FB_PM2_FIFO_DISCONNECT=y
+CONFIG_FB_CYBER2000=m
+CONFIG_FB_IMSTT=y
+CONFIG_FB_VGA16=m
+CONFIG_FB_VESA=y
+CONFIG_VIDEO_SELECT=y
+CONFIG_FB_HGA=m
+CONFIG_FB_RIVA=m
+CONFIG_FB_I810=m
+CONFIG_FB_I810_GTF=y
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON_OLD is not set
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_I2C=y
+# CONFIG_FB_RADEON_DEBUG is not set
+# CONFIG_FB_ATY128 is not set
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_ATY_XL_INIT=y
+CONFIG_FB_SIS=m
+CONFIG_FB_SIS_300=y
+CONFIG_FB_SIS_315=y
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_KYRO=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_TRIDENT=m
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_MDA_CONSOLE=m
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_PCI_CONSOLE=y
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+
+#
+# Bootsplash configuration
+#
+CONFIG_BOOTSPLASH=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_RTCTIMER=m
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_DEBUG_MEMORY=y
+# CONFIG_SND_DEBUG_DETECT is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL4_LIB=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+
+#
+# ISA devices
+#
+CONFIG_SND_AD1816A=m
+CONFIG_SND_AD1848=m
+CONFIG_SND_CS4231=m
+CONFIG_SND_CS4232=m
+CONFIG_SND_CS4236=m
+CONFIG_SND_ES968=m
+CONFIG_SND_ES1688=m
+CONFIG_SND_ES18XX=m
+CONFIG_SND_GUSCLASSIC=m
+CONFIG_SND_GUSEXTREME=m
+CONFIG_SND_GUSMAX=m
+CONFIG_SND_INTERWAVE=m
+CONFIG_SND_INTERWAVE_STB=m
+CONFIG_SND_OPTI92X_AD1848=m
+CONFIG_SND_OPTI92X_CS4231=m
+CONFIG_SND_OPTI93X=m
+CONFIG_SND_SB8=m
+CONFIG_SND_SB16=m
+CONFIG_SND_SBAWE=m
+CONFIG_SND_SB16_CSP=y
+CONFIG_SND_WAVEFRONT=m
+CONFIG_SND_ALS100=m
+CONFIG_SND_AZT2320=m
+CONFIG_SND_CMI8330=m
+CONFIG_SND_DT019X=m
+CONFIG_SND_OPL3SA2=m
+CONFIG_SND_SGALAXY=m
+CONFIG_SND_SSCAPE=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VX222=m
+
+#
+# ALSA USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+
+#
+# PCMCIA devices
+#
+# CONFIG_SND_VXPOCKET is not set
+# CONFIG_SND_VXP440 is not set
+# CONFIG_SND_PDAUDIOCF is not set
+
+#
+# Open Sound System
+#
+CONFIG_SOUND_PRIME=m
+CONFIG_SOUND_BT878=m
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=0x388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=0x330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+# CONFIG_SOUND_FUSION is not set
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_TRACEINIT=y
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+CONFIG_SOUND_SGALAXY=m
+CONFIG_SOUND_ADLIB=m
+CONFIG_SOUND_ACI_MIXER=m
+CONFIG_SOUND_CS4232=m
+CONFIG_SOUND_SSCAPE=m
+CONFIG_SOUND_GUS=m
+# CONFIG_SOUND_GUS16 is not set
+CONFIG_SOUND_GUSMAX=y
+CONFIG_SOUND_VMIDI=m
+CONFIG_SOUND_TRIX=m
+CONFIG_SOUND_MSS=m
+CONFIG_SOUND_MPU401=m
+CONFIG_SOUND_NM256=m
+CONFIG_SOUND_MAD16=m
+CONFIG_MAD16_OLDCARD=y
+CONFIG_SOUND_PAS=m
+CONFIG_SOUND_PSS=m
+CONFIG_PSS_MIXER=y
+# CONFIG_PSS_HAVE_BOOT is not set
+CONFIG_SOUND_SB=m
+# CONFIG_SOUND_AWE32_SYNTH is not set
+CONFIG_SOUND_WAVEFRONT=m
+CONFIG_SOUND_MAUI=m
+CONFIG_SOUND_YM3812=m
+CONFIG_SOUND_OPL3SA1=m
+CONFIG_SOUND_OPL3SA2=m
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+CONFIG_SOUND_UART6850=m
+CONFIG_SOUND_AEDSP16=m
+CONFIG_SC6600=y
+CONFIG_SC6600_JOY=y
+CONFIG_SC6600_CDROM=4
+CONFIG_SC6600_CDROMBASE=0x0
+# CONFIG_AEDSP16_MSS is not set
+# CONFIG_AEDSP16_SBPRO is not set
+CONFIG_AEDSP16_MPU401=y
+CONFIG_SOUND_TVMIXER=m
+CONFIG_SOUND_KAHLUA=m
+CONFIG_SOUND_ALI5455=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_AD1980=m
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_SPLIT_ISO=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+
+#
+# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_HID_FF=y
+CONFIG_HID_PID=y
+CONFIG_LOGITECH_FF=y
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_USB_HIDDEV=y
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+CONFIG_USB_MTOUCH=m
+CONFIG_USB_XPAD=m
+CONFIG_USB_ATI_REMOTE=m
+
+#
+# USB Imaging devices
+#
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+CONFIG_USB_DABUSB=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_DSBR=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_OV511=m
+CONFIG_USB_SE401=m
+CONFIG_USB_STV680=m
+CONFIG_USB_W9968CF=m
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_USBNET=m
+
+#
+# USB Host-to-Host Cables
+#
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_BELKIN=y
+CONFIG_USB_GENESYS=y
+CONFIG_USB_NET1080=y
+CONFIG_USB_PL2301=y
+
+#
+# Intelligent USB Devices/Gadgets
+#
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_ZAURUS=y
+CONFIG_USB_CDCETHER=y
+
+#
+# USB Network Adapters
+#
+CONFIG_USB_AX8817X=y
+
+#
+# USB port drivers
+#
+CONFIG_USB_USS720=m
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_EZUSB=y
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_RIO500=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_LED=m
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_SPEEDTOUCH=m
+# CONFIG_USB_TEST is not set
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_SDP is not set
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_UDAPL_HELPER=m
+CONFIG_INFINIBAND_MELLANOX_HCA=m
+CONFIG_AUDIT=m
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=m
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DMAPI=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_STATISTICS=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_RT=y
+CONFIG_XFS_QUOTA=m
+CONFIG_XFS_DMAPI=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_MINIX_FS=y
+CONFIG_ROMFS_FS=m
+CONFIG_DMAPI=m
+# CONFIG_DMAPI_DEBUG is not set
+CONFIG_QUOTA=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QUOTACTL=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_UDF_FS=m
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_NTFS_RW is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+CONFIG_RELAYFS_FS=m
+# CONFIG_KLOG_CHANNEL is not set
+
+#
+# Miscellaneous filesystems
+#
+CONFIG_ADFS_FS=m
+# CONFIG_ADFS_FS_RW is not set
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_JFFS_FS=m
+CONFIG_JFFS_FS_VERBOSE=0
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+# CONFIG_JFFS2_FS_NAND is not set
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+# CONFIG_QNX4FS_RW is not set
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFS_ACL_SUPPORT=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=y
+CONFIG_STATD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp850"
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+# CONFIG_CODA_FS_OLD_API is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_AFS_FS=m
+CONFIG_RXRPC=m
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+# CONFIG_MINIX_SUBPARTITION is not set
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+# CONFIG_LDM_DEBUG is not set
+CONFIG_NEC98_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_FSHOOKS=y
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Kernel hacking
+#
+CONFIG_CRASH_DUMP=m
+CONFIG_KERNTYPES=y
+CONFIG_CRASH_DUMP_BLOCKDEV=m
+CONFIG_CRASH_DUMP_NETDEV=m
+# CONFIG_CRASH_DUMP_MEMDEV is not set
+CONFIG_CRASH_DUMP_COMPRESS_RLE=m
+CONFIG_CRASH_DUMP_COMPRESS_GZIP=m
+CONFIG_DEBUG_KERNEL=y
+CONFIG_EARLY_PRINTK=y
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUGREG is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_KDB is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+# CONFIG_HOOK is not set
+
+#
+# Security options
+#
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=m
+CONFIG_SECURITY_ROOTPLUG=m
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+# CONFIG_SECURITY_SELINUX_MLS is not set
+
+#
+# IBM Crypto Hardware support
+#
+CONFIG_IBM_CRYPTO=m
+CONFIG_ICA_LEEDSLITE=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+CONFIG_QSORT=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# Build options
+#
+CONFIG_SUSE_KERNEL=y
+CONFIG_CFGNAME="bigsmp"
+CONFIG_RELEASE="SLES9_SP1_BRANCH_2004110217390391"
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_PC=y
--- /dev/null
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_GENERIC_ISA_DMA=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_CLEAN_COMPILE=y
+# CONFIG_STANDALONE is not set
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_HOTPLUG=y
+CONFIG_EVLOG=y
+# CONFIG_EVLOG_FWPRINTK is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+# CONFIG_EMBEDDED is not set
+
+#
+# Class Based Kernel Resource Management
+#
+CONFIG_CKRM=y
+CONFIG_RCFS_FS=m
+CONFIG_CKRM_TYPE_TASKCLASS=y
+CONFIG_CKRM_RES_NUMTASKS=m
+CONFIG_CKRM_TYPE_SOCKETCLASS=y
+CONFIG_CKRM_RBCE=m
+CONFIG_CKRM_CRBCE=m
+CONFIG_DELAY_ACCT=y
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_OBSOLETE_MODPARM=y
+CONFIG_MODVERSIONS=y
+CONFIG_KMOD=y
+CONFIG_STOP_MACHINE=y
+
+#
+# Processor type and features
+#
+# CONFIG_X86_PC is not set
+# CONFIG_X86_ELAN is not set
+# CONFIG_X86_VOYAGER is not set
+# CONFIG_X86_NUMAQ is not set
+# CONFIG_X86_SUMMIT is not set
+# CONFIG_X86_BIGSMP is not set
+# CONFIG_X86_VISWS is not set
+CONFIG_X86_GENERICARCH=y
+# CONFIG_X86_ES7000 is not set
+CONFIG_X86_CYCLONE_TIMER=y
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M686 is not set
+CONFIG_MPENTIUMII=y
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+# CONFIG_MCRUSOE is not set
+# CONFIG_MWINCHIPC6 is not set
+# CONFIG_MWINCHIP2 is not set
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MCYRIXIII is not set
+# CONFIG_MVIAC3_2 is not set
+CONFIG_X86_GENERIC=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_XADD=y
+CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_INVLPG=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_USE_PPRO_CHECKSUM=y
+# CONFIG_HPET_TIMER is not set
+# CONFIG_HPET_EMULATE_RTC is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=128
+CONFIG_SCHED_SMT=y
+# CONFIG_PREEMPT is not set
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_TSC=y
+CONFIG_X86_MCE=y
+# CONFIG_X86_MCE_NONFATAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+CONFIG_TOSHIBA=m
+CONFIG_I8K=m
+CONFIG_MICROCODE=m
+CONFIG_X86_MSR=m
+CONFIG_X86_CPUID=m
+
+#
+# Firmware Drivers
+#
+CONFIG_EDD=m
+# CONFIG_NOHIGHMEM is not set
+# CONFIG_HIGHMEM4G is not set
+CONFIG_HIGHMEM64G=y
+CONFIG_HIGHMEM=y
+CONFIG_X86_PAE=y
+# CONFIG_NUMA is not set
+CONFIG_HIGHPTE=y
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MTRR=y
+CONFIG_EFI=y
+CONFIG_IRQBALANCE=y
+CONFIG_HAVE_DEC_LOCK=y
+CONFIG_BOOT_IOREMAP=y
+CONFIG_REGPARM=y
+
+#
+# Special options
+#
+CONFIG_PROC_MM=y
+
+#
+# Power management options (ACPI, APM)
+#
+CONFIG_PM=y
+# CONFIG_SOFTWARE_SUSPEND is not set
+# CONFIG_PM_DISK is not set
+
+#
+# ACPI (Advanced Configuration and Power Interface) Support
+#
+CONFIG_ACPI=y
+CONFIG_ACPI_BOOT=y
+CONFIG_ACPI_INTERPRETER=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SLEEP_PROC_FS=y
+CONFIG_ACPI_AC=m
+CONFIG_ACPI_BATTERY=m
+CONFIG_ACPI_BUTTON=m
+CONFIG_ACPI_FAN=m
+CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_THERMAL=m
+# CONFIG_ACPI_ASUS is not set
+CONFIG_ACPI_TOSHIBA=m
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BUS=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_POWER=y
+CONFIG_ACPI_PCI=y
+CONFIG_ACPI_SYSTEM=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_ACPI_INITRD=y
+
+#
+# APM (Advanced Power Management) BIOS Support
+#
+CONFIG_APM=y
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+CONFIG_APM_DO_ENABLE=y
+# CONFIG_APM_CPU_IDLE is not set
+CONFIG_APM_DISPLAY_BLANK=y
+# CONFIG_APM_RTC_IS_GMT is not set
+CONFIG_APM_ALLOW_INTS=y
+# CONFIG_APM_REAL_MODE_POWER_OFF is not set
+
+#
+# CPU Frequency scaling
+#
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_PROC_INTF=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=m
+CONFIG_CPU_FREQ_GOV_ONDEMAND=m
+# CONFIG_CPU_FREQ_24_API is not set
+CONFIG_CPU_FREQ_TABLE=m
+
+#
+# CPUFreq processor drivers
+#
+CONFIG_X86_ACPI_CPUFREQ=m
+# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
+CONFIG_X86_POWERNOW_K6=m
+CONFIG_X86_POWERNOW_K7=m
+CONFIG_X86_POWERNOW_K8=m
+CONFIG_X86_POWERNOW_K8_ACPI=y
+CONFIG_X86_GX_SUSPMOD=m
+CONFIG_X86_SPEEDSTEP_CENTRINO=m
+CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y
+# CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI is not set
+CONFIG_X86_SPEEDSTEP_ICH=m
+CONFIG_X86_SPEEDSTEP_SMI=m
+CONFIG_X86_P4_CLOCKMOD=m
+CONFIG_X86_SPEEDSTEP_LIB=m
+CONFIG_X86_LONGRUN=m
+CONFIG_X86_LONGHAUL=m
+
+#
+# Bus options (PCI, PCMCIA, EISA, MCA, ISA)
+#
+CONFIG_PCI=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GODIRECT is not set
+CONFIG_PCI_GOANY=y
+CONFIG_PCI_BIOS=y
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_MMCONFIG=y
+# CONFIG_PCI_USE_VECTOR is not set
+# CONFIG_PCI_LEGACY_PROC is not set
+# CONFIG_PCI_NAMES is not set
+CONFIG_ISA=y
+# CONFIG_EISA is not set
+# CONFIG_MCA is not set
+CONFIG_SCx200=m
+
+#
+# PCMCIA/CardBus support
+#
+CONFIG_PCMCIA=m
+# CONFIG_PCMCIA_DEBUG is not set
+CONFIG_YENTA=m
+CONFIG_CARDBUS=y
+CONFIG_I82092=m
+CONFIG_I82365=m
+CONFIG_TCIC=m
+CONFIG_PCMCIA_PROBE=y
+
+#
+# PCI Hotplug Support
+#
+CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI_FAKE=m
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_AMD=m
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_PCIE=m
+# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set
+# CONFIG_HOTPLUG_PCI_SHPC is not set
+
+#
+# Executable file formats
+#
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_AOUT=m
+CONFIG_BINFMT_MISC=m
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_FW_LOADER=m
+# CONFIG_DEBUG_DRIVER is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+CONFIG_MTD=m
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_PARTITIONS=m
+CONFIG_MTD_CONCAT=m
+CONFIG_MTD_REDBOOT_PARTS=m
+CONFIG_MTD_CMDLINE_PARTS=m
+
+#
+# User Modules And Translation Layers
+#
+CONFIG_MTD_CHAR=m
+CONFIG_MTD_BLOCK=m
+# CONFIG_MTD_BLOCK_RO is not set
+# CONFIG_FTL is not set
+# CONFIG_NFTL is not set
+# CONFIG_INFTL is not set
+
+#
+# RAM/ROM/Flash chip drivers
+#
+CONFIG_MTD_CFI=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_NOSWAP=y
+# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set
+# CONFIG_MTD_CFI_GEOMETRY is not set
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_STAA=m
+# CONFIG_MTD_RAM is not set
+# CONFIG_MTD_ROM is not set
+CONFIG_MTD_ABSENT=m
+CONFIG_MTD_OBSOLETE_CHIPS=y
+CONFIG_MTD_AMDSTD=m
+CONFIG_MTD_SHARP=m
+CONFIG_MTD_JEDEC=m
+
+#
+# Mapping drivers for chip access
+#
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_PHYSMAP_START=0x8000000
+CONFIG_MTD_PHYSMAP_LEN=0x4000000
+CONFIG_MTD_PHYSMAP_BUSWIDTH=2
+CONFIG_MTD_PNC2000=m
+CONFIG_MTD_SC520CDP=m
+CONFIG_MTD_NETSC520=m
+CONFIG_MTD_SBC_GXX=m
+CONFIG_MTD_ELAN_104NC=m
+CONFIG_MTD_OCTAGON=m
+CONFIG_MTD_VMAX=m
+CONFIG_MTD_SCx200_DOCFLASH=m
+CONFIG_MTD_AMD76XROM=m
+CONFIG_MTD_ICH2ROM=m
+CONFIG_MTD_SCB2_FLASH=m
+CONFIG_MTD_NETtel=m
+CONFIG_MTD_DILNETPC=m
+CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_L440GX=m
+CONFIG_MTD_PCI=m
+
+#
+# Self-contained MTD device drivers
+#
+CONFIG_MTD_PMC551=m
+CONFIG_MTD_PMC551_BUGFIX=y
+# CONFIG_MTD_PMC551_DEBUG is not set
+CONFIG_MTD_SLRAM=m
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTD_BLKMTD=m
+
+#
+# Disk-On-Chip Device Drivers
+#
+CONFIG_MTD_DOC2000=m
+CONFIG_MTD_DOC2001=m
+CONFIG_MTD_DOC2001PLUS=m
+CONFIG_MTD_DOCPROBE=m
+CONFIG_MTD_DOCPROBE_ADVANCED=y
+CONFIG_MTD_DOCPROBE_ADDRESS=0x0000
+CONFIG_MTD_DOCPROBE_HIGH=y
+CONFIG_MTD_DOCPROBE_55AA=y
+
+#
+# NAND Flash Device Drivers
+#
+CONFIG_MTD_NAND=m
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NAND_IDS=m
+
+#
+# Parallel port support
+#
+CONFIG_PARPORT=m
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_PC_CML1=m
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_SUPERIO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+CONFIG_PARPORT_OTHER=y
+CONFIG_PARPORT_1284=y
+
+#
+# Plug and Play support
+#
+CONFIG_PNP=y
+# CONFIG_PNP_DEBUG is not set
+
+#
+# Protocols
+#
+CONFIG_ISAPNP=y
+CONFIG_PNPBIOS=y
+CONFIG_PNPBIOS_PROC_FS=y
+
+#
+# Block devices
+#
+CONFIG_BLK_DEV_FD=y
+CONFIG_BLK_DEV_XD=m
+CONFIG_PARIDE=m
+CONFIG_PARIDE_PARPORT=m
+
+#
+# Parallel IDE high-level drivers
+#
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARIDE_PG=m
+
+#
+# Parallel IDE protocol modules
+#
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_BPCK6=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_EPAT=m
+CONFIG_PARIDE_EPATC8=y
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
+CONFIG_BLK_CPQ_DA=m
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_CISS_SCSI_TAPE=y
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_CARMEL=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=64000
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LBD=y
+CONFIG_CIPHER_TWOFISH=m
+
+#
+# ATA/ATAPI/MFM/RLL support
+#
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDEDISK_MULTI_MODE=y
+CONFIG_IDEDISK_STROKE=y
+CONFIG_BLK_DEV_IDECS=m
+CONFIG_BLK_DEV_IDECD=m
+CONFIG_BLK_DEV_IDETAPE=m
+CONFIG_BLK_DEV_IDEFLOPPY=y
+CONFIG_BLK_DEV_IDESCSI=m
+# CONFIG_IDE_TASK_IOCTL is not set
+# CONFIG_IDE_TASKFILE_IO is not set
+
+#
+# IDE chipset support/bugfixes
+#
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_CMD640=y
+CONFIG_BLK_DEV_CMD640_ENHANCED=y
+CONFIG_BLK_DEV_IDEPNP=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_IDEPCI_SHARE_IRQ=y
+CONFIG_BLK_DEV_OFFBOARD=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_OPTI621=y
+CONFIG_BLK_DEV_RZ1000=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
+CONFIG_IDEDMA_PCI_AUTO=y
+CONFIG_IDEDMA_ONLYDISK=y
+CONFIG_BLK_DEV_ADMA=y
+CONFIG_BLK_DEV_AEC62XX=y
+CONFIG_BLK_DEV_ALI15X3=y
+# CONFIG_WDC_ALI15X3 is not set
+CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_ATIIXP=y
+CONFIG_BLK_DEV_CMD64X=y
+CONFIG_BLK_DEV_TRIFLEX=y
+CONFIG_BLK_DEV_CY82C693=y
+CONFIG_BLK_DEV_CS5520=m
+CONFIG_BLK_DEV_CS5530=m
+CONFIG_BLK_DEV_HPT34X=y
+CONFIG_HPT34X_AUTODMA=y
+CONFIG_BLK_DEV_HPT366=y
+CONFIG_BLK_DEV_SC1200=y
+CONFIG_BLK_DEV_PIIX=y
+CONFIG_BLK_DEV_NS87415=y
+CONFIG_BLK_DEV_PDC202XX_OLD=y
+CONFIG_PDC202XX_BURST=y
+CONFIG_BLK_DEV_PDC202XX_NEW=y
+CONFIG_PDC202XX_FORCE=y
+CONFIG_BLK_DEV_SVWKS=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_BLK_DEV_SIS5513=y
+CONFIG_BLK_DEV_SLC90E66=y
+CONFIG_BLK_DEV_TRM290=y
+CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_IDE_CHIPSETS=y
+
+#
+# Note: most of these also require special kernel boot parameters
+#
+CONFIG_BLK_DEV_4DRIVES=y
+CONFIG_BLK_DEV_ALI14XX=y
+CONFIG_BLK_DEV_DTC2278=y
+CONFIG_BLK_DEV_HT6560B=y
+# CONFIG_BLK_DEV_PDC4030 is not set
+CONFIG_BLK_DEV_QD65XX=y
+CONFIG_BLK_DEV_UMC8672=y
+CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDEDMA_IVB is not set
+CONFIG_IDEDMA_AUTO=y
+# CONFIG_BLK_DEV_HD is not set
+
+#
+# SCSI device support
+#
+CONFIG_SCSI=m
+CONFIG_SCSI_PROC_FS=y
+
+#
+# SCSI support type (disk, tape, CD-ROM)
+#
+CONFIG_BLK_DEV_SD=m
+CONFIG_CHR_DEV_ST=m
+CONFIG_CHR_DEV_OSST=m
+CONFIG_BLK_DEV_SR=m
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=m
+CONFIG_CHR_DEV_SCH=m
+
+#
+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+#
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+
+#
+# SCSI Transport Attributes
+#
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_FC_ATTRS=m
+
+#
+# SCSI low-level drivers
+#
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+CONFIG_SCSI_7000FASST=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_AHA152X=m
+CONFIG_SCSI_AHA1542=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_AIC7XXX=m
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=5000
+# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set
+# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_SCSI_AIC7XXX_OLD=m
+CONFIG_SCSI_AIC79XX=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=15000
+# CONFIG_AIC79XX_BUILD_FIRMWARE is not set
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+# CONFIG_SCSI_AIC79XX_NEW is not set
+CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_IN2000=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_SCSI_SATA=y
+CONFIG_SCSI_SATA_SVW=m
+CONFIG_SCSI_ATA_PIIX=m
+CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIS=m
+CONFIG_SCSI_SATA_VIA=m
+CONFIG_SCSI_SATA_VITESSE=m
+CONFIG_SCSI_BUSLOGIC=m
+# CONFIG_SCSI_OMIT_FLASHPOINT is not set
+# CONFIG_SCSI_CPQFCTS is not set
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_DTC3280=m
+CONFIG_SCSI_EATA=m
+CONFIG_SCSI_EATA_TAGGED_QUEUE=y
+CONFIG_SCSI_EATA_LINKED_COMMANDS=y
+CONFIG_SCSI_EATA_MAX_TAGS=16
+CONFIG_SCSI_EATA_PIO=m
+CONFIG_SCSI_FUTURE_DOMAIN=m
+CONFIG_SCSI_GDTH=m
+CONFIG_SCSI_GENERIC_NCR5380=m
+CONFIG_SCSI_GENERIC_NCR5380_MMIO=m
+CONFIG_SCSI_GENERIC_NCR53C400=y
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_IMM=m
+# CONFIG_SCSI_IZIP_EPP16 is not set
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_NCR53C406A=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_IPR=m
+CONFIG_SCSI_IPR_TRACE=y
+CONFIG_SCSI_IPR_DUMP=y
+CONFIG_SCSI_PAS16=m
+CONFIG_SCSI_PSI240I=m
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_QLOGIC_ISP=m
+CONFIG_SCSI_QLOGIC_FC=m
+CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLA2XXX=m
+CONFIG_SCSI_QLA21XX=m
+CONFIG_SCSI_QLA22XX=m
+CONFIG_SCSI_QLA2300=m
+CONFIG_SCSI_QLA2322=m
+CONFIG_SCSI_QLA6312=m
+CONFIG_SCSI_QLA6322=m
+CONFIG_SCSI_QLA2XXX_FAILOVER=y
+CONFIG_SCSI_QLA4XXX=m
+CONFIG_SCSI_QLA4XXX_FAILOVER=y
+CONFIG_SCSI_SYM53C416=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_DC390T=m
+CONFIG_SCSI_T128=m
+CONFIG_SCSI_U14_34F=m
+CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y
+CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y
+CONFIG_SCSI_U14_34F_MAX_TAGS=8
+CONFIG_SCSI_ULTRASTOR=m
+CONFIG_SCSI_NSP32=m
+CONFIG_SCSI_DEBUG=m
+
+#
+# PCMCIA SCSI adapter support
+#
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_NINJA_SCSI=m
+CONFIG_PCMCIA_QLOGIC=m
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+CONFIG_CD_NO_IDESCSI=y
+CONFIG_AZTCD=m
+CONFIG_GSCD=m
+CONFIG_MCD=m
+CONFIG_MCD_IRQ=11
+CONFIG_MCD_BASE=0x300
+CONFIG_OPTCD=m
+CONFIG_SJCD=m
+CONFIG_ISP16_CDI=m
+CONFIG_CDU535=m
+
+#
+# Multi-device support (RAID and LVM)
+#
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID5=m
+CONFIG_MD_RAID6=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_MULTIPATH=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_FLAKEY=m
+CONFIG_BLK_DEV_DM_BBR=m
+
+#
+# Fusion MPT device support
+#
+CONFIG_FUSION=m
+CONFIG_FUSION_MAX_SGE=40
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_LAN=m
+
+#
+# IEEE 1394 (FireWire) support
+#
+CONFIG_IEEE1394=m
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y
+CONFIG_IEEE1394_CONFIG_ROM_IP1394=y
+
+#
+# Device Drivers
+#
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_OHCI1394=m
+
+#
+# Protocol Drivers
+#
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IEEE1394_SBP2=m
+# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_CMP=m
+CONFIG_IEEE1394_AMDTP=m
+
+#
+# I2O device support
+#
+CONFIG_I2O=m
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_SCSI=m
+CONFIG_I2O_PROC=m
+
+#
+# Networking support
+#
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_PACKET=m
+CONFIG_PACKET_MMAP=y
+CONFIG_NETLINK_DEV=m
+CONFIG_UNIX=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_FWMARK=y
+CONFIG_IP_ROUTE_NAT=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_TOS=y
+CONFIG_IP_ROUTE_VERBOSE=y
+# CONFIG_IP_PNP is not set
+CONFIG_NET_IPIP=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_ARPD is not set
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+# CONFIG_ACCEPT_QUEUES is not set
+
+#
+# IP: Virtual Server Configuration
+#
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_TAB_BITS=12
+
+#
+# IPVS transport protocol load balancing support
+#
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+
+#
+# IPVS scheduler
+#
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_NQ=m
+
+#
+# IPVS application helper
+#
+CONFIG_IP_VS_FTP=m
+CONFIG_IPV6=m
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IPV6_PRIVACY=y
+CONFIG_IPV6_NDISC_NEW=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_TUNNEL=m
+
+#
+# MOBILE IPv6 (EXPERIMENTAL)
+#
+CONFIG_IPV6_MOBILITY=m
+CONFIG_IPV6_MOBILITY_MN=m
+CONFIG_IPV6_MOBILITY_HA=m
+# CONFIG_IPV6_MOBILITY_DEBUG is not set
+CONFIG_DECNET=m
+CONFIG_DECNET_SIOCGIFCONF=y
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_BRIDGE=m
+CONFIG_NETFILTER=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_BRIDGE_NETFILTER=y
+
+#
+# IP: Netfilter Configuration
+#
+CONFIG_IP_NF_CONNTRACK=m
+CONFIG_IP_NF_FTP=m
+CONFIG_IP_NF_IRC=m
+CONFIG_IP_NF_TFTP=m
+CONFIG_IP_NF_AMANDA=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MATCH_LIMIT=m
+CONFIG_IP_NF_MATCH_IPRANGE=m
+CONFIG_IP_NF_MATCH_MAC=m
+CONFIG_IP_NF_MATCH_PKTTYPE=m
+CONFIG_IP_NF_MATCH_POLICY=m
+CONFIG_IP_NF_MATCH_MARK=m
+CONFIG_IP_NF_MATCH_MULTIPORT=m
+CONFIG_IP_NF_MATCH_TOS=m
+CONFIG_IP_NF_MATCH_RECENT=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_DSCP=m
+CONFIG_IP_NF_MATCH_AH_ESP=m
+CONFIG_IP_NF_MATCH_LENGTH=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_MATCH_TCPMSS=m
+CONFIG_IP_NF_MATCH_HELPER=m
+CONFIG_IP_NF_MATCH_STATE=m
+CONFIG_IP_NF_MATCH_CONNTRACK=m
+CONFIG_IP_NF_MATCH_OWNER=m
+CONFIG_IP_NF_MATCH_PHYSDEV=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_NAT_NEEDED=y
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_SAME=m
+# CONFIG_IP_NF_NAT_LOCAL is not set
+CONFIG_IP_NF_NAT_SNMP_BASIC=m
+CONFIG_IP_NF_NAT_IRC=m
+CONFIG_IP_NF_NAT_FTP=m
+CONFIG_IP_NF_NAT_TFTP=m
+CONFIG_IP_NF_NAT_AMANDA=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_TOS=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_DSCP=m
+CONFIG_IP_NF_TARGET_MARK=m
+CONFIG_IP_NF_TARGET_CLASSIFY=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_NF_TARGET_TCPMSS=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_COMPAT_IPCHAINS=m
+CONFIG_IP_NF_COMPAT_IPFWADM=m
+CONFIG_IP_NF_CONNTRACK_MARK=y
+CONFIG_IP_NF_TARGET_CONNMARK=m
+CONFIG_IP_NF_MATCH_CONNMARK=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+
+#
+# IPv6: Netfilter Configuration
+#
+CONFIG_IP6_NF_FTP=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MATCH_LIMIT=m
+CONFIG_IP6_NF_MATCH_MAC=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_MULTIPORT=m
+CONFIG_IP6_NF_MATCH_OWNER=m
+CONFIG_IP6_NF_MATCH_MARK=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_AHESP=m
+CONFIG_IP6_NF_MATCH_LENGTH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_CONNTRACK=m
+CONFIG_IP6_NF_MATCH_STATE=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_TARGET_MARK=m
+
+#
+# DECnet: Netfilter Configuration
+#
+CONFIG_DECNET_NF_GRABULATOR=m
+
+#
+# Bridge: Netfilter Configuration
+#
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_XFRM=y
+CONFIG_XFRM_USER=m
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IP_SCTP=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCTP_HMAC_MD5=y
+CONFIG_ATM=y
+CONFIG_ATM_CLIP=y
+CONFIG_ATM_CLIP_NO_ICMP=y
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_LLC=y
+CONFIG_LLC2=m
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_ATALK=m
+CONFIG_DEV_APPLETALK=y
+CONFIG_LTPC=m
+CONFIG_COPS=m
+CONFIG_COPS_DAYNA=y
+CONFIG_COPS_TANGENT=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPDDP_DECAP=y
+CONFIG_X25=m
+CONFIG_LAPB=m
+# CONFIG_NET_DIVERT is not set
+CONFIG_ECONET=m
+# CONFIG_ECONET_AUNUDP is not set
+# CONFIG_ECONET_NATIVE is not set
+CONFIG_WAN_ROUTER=m
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_CSZ=m
+CONFIG_NET_SCH_ATM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_DELAY=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_QOS=y
+CONFIG_NET_ESTIMATOR=y
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_POLICE=y
+
+#
+# Network testing
+#
+CONFIG_NET_PKTGEN=m
+CONFIG_NETDEVICES=y
+
+#
+# ARCnet devices
+#
+CONFIG_ARCNET=m
+CONFIG_ARCNET_1201=m
+CONFIG_ARCNET_1051=m
+CONFIG_ARCNET_RAW=m
+CONFIG_ARCNET_COM90xx=m
+CONFIG_ARCNET_COM90xxIO=m
+CONFIG_ARCNET_RIM_I=m
+CONFIG_ARCNET_COM20020=m
+CONFIG_ARCNET_COM20020_ISA=m
+CONFIG_ARCNET_COM20020_PCI=m
+CONFIG_DUMMY=m
+CONFIG_BONDING=m
+CONFIG_EQUALIZER=m
+CONFIG_TUN=m
+CONFIG_ETHERTAP=m
+CONFIG_NET_SB1000=m
+
+#
+# Ethernet (10 or 100Mbit)
+#
+CONFIG_NET_ETHERNET=y
+CONFIG_MII=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_EL1=m
+CONFIG_EL2=m
+CONFIG_ELPLUS=m
+CONFIG_EL16=m
+CONFIG_EL3=m
+CONFIG_3C515=m
+CONFIG_VORTEX=m
+CONFIG_TYPHOON=m
+CONFIG_LANCE=m
+CONFIG_NET_VENDOR_SMC=y
+CONFIG_WD80x3=m
+CONFIG_ULTRA=m
+CONFIG_SMC9194=m
+CONFIG_NET_VENDOR_RACAL=y
+CONFIG_NI52=m
+CONFIG_NI65=m
+
+#
+# Tulip family network device support
+#
+CONFIG_NET_TULIP=y
+CONFIG_DE2104X=m
+CONFIG_TULIP=m
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+CONFIG_TULIP_NAPI=y
+CONFIG_TULIP_NAPI_HW_MITIGATION=y
+CONFIG_DE4X5=m
+CONFIG_WINBOND_840=m
+CONFIG_DM9102=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_AT1700=m
+CONFIG_DEPCA=m
+CONFIG_HP100=m
+CONFIG_NET_ISA=y
+CONFIG_E2100=m
+CONFIG_EWRK3=m
+CONFIG_EEXPRESS=m
+CONFIG_EEXPRESS_PRO=m
+CONFIG_HPLAN_PLUS=m
+CONFIG_HPLAN=m
+CONFIG_LP486E=m
+CONFIG_ETH16I=m
+CONFIG_NE2000=m
+CONFIG_ZNET=m
+CONFIG_SEEQ8005=m
+CONFIG_NET_PCI=y
+CONFIG_PCNET32=m
+CONFIG_AMD8111_ETH=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ADAPTEC_STARFIRE_NAPI=y
+CONFIG_AC3200=m
+CONFIG_APRICOT=m
+CONFIG_B44=m
+CONFIG_FORCEDETH=m
+CONFIG_CS89x0=m
+CONFIG_DGRS=m
+CONFIG_EEPRO100=m
+# CONFIG_EEPRO100_PIO is not set
+CONFIG_E100=m
+CONFIG_E100_NAPI=y
+CONFIG_FEALNX=m
+CONFIG_NATSEMI=m
+CONFIG_NE2K_PCI=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+CONFIG_8139TOO_8129=y
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_8139_RXBUF_IDX=2
+CONFIG_SIS900=m
+CONFIG_EPIC100=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_TLAN=m
+CONFIG_VIA_RHINE=m
+# CONFIG_VIA_RHINE_MMIO is not set
+CONFIG_NET_POCKET=y
+CONFIG_ATP=m
+CONFIG_DE600=m
+CONFIG_DE620=m
+
+#
+# Ethernet (1000 Mbit)
+#
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_DL2K=m
+CONFIG_E1000=m
+CONFIG_E1000_NAPI=y
+CONFIG_E1000_NEW=m
+CONFIG_E1000_NEW_NAPI=y
+CONFIG_NS83820=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_R8169=m
+CONFIG_SIS190=m
+CONFIG_SK98LIN=m
+CONFIG_TIGON3=m
+CONFIG_NET_BROADCOM=m
+CONFIG_NET_BROADCOM_NEW=m
+CONFIG_NET_BCM44=m
+CONFIG_TIGON3_NEW=m
+
+#
+# Ethernet (10000 Mbit)
+#
+CONFIG_IXGB=m
+CONFIG_IXGB_NAPI=y
+CONFIG_S2IO=m
+CONFIG_S2IO_NAPI=y
+CONFIG_FDDI=y
+# CONFIG_DEFXX is not set
+CONFIG_SKFP=m
+CONFIG_HIPPI=y
+CONFIG_ROADRUNNER=m
+CONFIG_ROADRUNNER_LARGE_RINGS=y
+CONFIG_PLIP=m
+CONFIG_PPP=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPPOATM=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
+CONFIG_SLIP_MODE_SLIP6=y
+
+#
+# Wireless LAN (non-hamradio)
+#
+CONFIG_NET_RADIO=y
+
+#
+# Obsolete Wireless cards support (pre-802.11)
+#
+CONFIG_STRIP=m
+# CONFIG_ARLAN is not set
+CONFIG_WAVELAN=m
+CONFIG_PCMCIA_WAVELAN=m
+CONFIG_PCMCIA_NETWAVE=m
+
+#
+# Wireless 802.11 Frequency Hopping cards support
+#
+CONFIG_PCMCIA_RAYCS=m
+
+#
+# Wireless 802.11b ISA/PCI cards support
+#
+CONFIG_AIRO=m
+CONFIG_HERMES=m
+CONFIG_PLX_HERMES=m
+CONFIG_TMD_HERMES=m
+CONFIG_PCI_HERMES=m
+CONFIG_ATMEL=m
+CONFIG_PCI_ATMEL=m
+
+#
+# Wireless 802.11b Pcmcia/Cardbus cards support
+#
+CONFIG_PCMCIA_HERMES=m
+CONFIG_AIRO_CS=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_PCMCIA_WL3501=m
+
+#
+# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support
+#
+CONFIG_PRISM54=m
+CONFIG_NET_WIRELESS=y
+
+#
+# Token Ring devices
+#
+CONFIG_TR=y
+CONFIG_IBMTR=m
+CONFIG_IBMOL=m
+CONFIG_IBMLS=m
+CONFIG_3C359=m
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+CONFIG_SKISA=m
+CONFIG_PROTEON=m
+CONFIG_ABYSS=m
+CONFIG_SMCTR=m
+CONFIG_NET_FC=y
+CONFIG_NET_LPFC=m
+CONFIG_RCPCI=m
+CONFIG_SHAPER=m
+CONFIG_NETCONSOLE=m
+
+#
+# Wan interfaces
+#
+CONFIG_WAN=y
+CONFIG_HOSTESS_SV11=m
+# CONFIG_COSA is not set
+CONFIG_DSCC4=m
+CONFIG_DSCC4_PCISYNC=y
+CONFIG_DSCC4_PCI_RST=y
+CONFIG_LANMEDIA=m
+CONFIG_SEALEVEL_4021=m
+CONFIG_SYNCLINK_SYNCPPP=m
+CONFIG_HDLC=m
+CONFIG_HDLC_RAW=y
+CONFIG_HDLC_RAW_ETH=y
+CONFIG_HDLC_CISCO=y
+CONFIG_HDLC_FR=y
+CONFIG_HDLC_PPP=y
+CONFIG_HDLC_X25=y
+CONFIG_PCI200SYN=m
+CONFIG_WANXL=m
+# CONFIG_WANXL_BUILD_FIRMWARE is not set
+CONFIG_PC300=m
+CONFIG_PC300_MLPPP=y
+CONFIG_N2=m
+CONFIG_C101=m
+CONFIG_FARSYNC=m
+CONFIG_DLCI=m
+CONFIG_DLCI_COUNT=24
+CONFIG_DLCI_MAX=8
+CONFIG_SDLA=m
+# CONFIG_WAN_ROUTER_DRIVERS is not set
+CONFIG_LAPBETHER=m
+CONFIG_X25_ASY=m
+# CONFIG_SBNI is not set
+
+#
+# PCMCIA network device support
+#
+CONFIG_NET_PCMCIA=y
+CONFIG_PCMCIA_3C589=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_PCMCIA_AXNET=m
+CONFIG_ARCNET_COM20020_CS=m
+CONFIG_PCMCIA_IBMTR=m
+
+#
+# ATM drivers
+#
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_FIRESTREAM=m
+CONFIG_ATM_ZATM=m
+# CONFIG_ATM_ZATM_DEBUG is not set
+CONFIG_ATM_NICSTAR=m
+CONFIG_ATM_NICSTAR_USE_SUNI=y
+CONFIG_ATM_NICSTAR_USE_IDT77105=y
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+CONFIG_ATM_IDT77252_RCV_ALL=y
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_AMBASSADOR=m
+# CONFIG_ATM_AMBASSADOR_DEBUG is not set
+CONFIG_ATM_HORIZON=m
+# CONFIG_ATM_HORIZON_DEBUG is not set
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_FORE200E_MAYBE=m
+CONFIG_ATM_FORE200E_PCA=y
+CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y
+CONFIG_ATM_FORE200E_TX_RETRY=16
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+
+#
+# Amateur Radio support
+#
+CONFIG_HAMRADIO=y
+
+#
+# Packet Radio protocols
+#
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_NETROM=m
+CONFIG_ROSE=m
+
+#
+# AX.25 network device drivers
+#
+CONFIG_BPQETHER=m
+CONFIG_SCC=m
+CONFIG_SCC_DELAY=y
+CONFIG_SCC_TRXECHO=y
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_BAYCOM_EPP=m
+CONFIG_YAM=m
+
+#
+# IrDA (infrared) support
+#
+CONFIG_IRDA=m
+
+#
+# IrDA protocols
+#
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+CONFIG_IRCOMM=m
+CONFIG_IRDA_ULTRA=y
+
+#
+# IrDA options
+#
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+# CONFIG_IRDA_FAST_RR is not set
+# CONFIG_IRDA_DEBUG is not set
+
+#
+# Infrared-port device drivers
+#
+
+#
+# SIR device drivers
+#
+CONFIG_IRTTY_SIR=m
+
+#
+# Dongle support
+#
+CONFIG_DONGLE=y
+CONFIG_ESI_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_LITELINK_DONGLE=m
+CONFIG_MA600_DONGLE=m
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_MCP2120_DONGLE=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_ACT200L_DONGLE=m
+
+#
+# Old SIR device drivers
+#
+
+#
+# Old Serial dongle support
+#
+
+#
+# FIR device drivers
+#
+CONFIG_USB_IRDA=m
+CONFIG_SIGMATEL_FIR=m
+CONFIG_NSC_FIR=m
+CONFIG_WINBOND_FIR=m
+CONFIG_TOSHIBA_FIR=m
+CONFIG_SMC_IRCC_FIR=m
+CONFIG_ALI_FIR=m
+CONFIG_VLSI_FIR=m
+CONFIG_VIA_FIR=m
+
+#
+# Bluetooth support
+#
+CONFIG_BT=m
+CONFIG_BT_L2CAP=m
+CONFIG_BT_SCO=m
+CONFIG_BT_RFCOMM=m
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+
+#
+# Bluetooth device drivers
+#
+CONFIG_BT_HCIUSB=m
+CONFIG_BT_HCIUSB_SCO=y
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_BCSP_TXCRC=y
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIVHCI=m
+CONFIG_NETPOLL=y
+CONFIG_NETPOLL_RX=y
+CONFIG_NETPOLL_TRAP=y
+CONFIG_NET_POLL_CONTROLLER=y
+
+#
+# ISDN subsystem
+#
+CONFIG_ISDN=m
+
+#
+# Old ISDN4Linux
+#
+CONFIG_ISDN_I4L=m
+CONFIG_ISDN_PPP=y
+CONFIG_ISDN_PPP_VJ=y
+CONFIG_ISDN_MPP=y
+CONFIG_IPPP_FILTER=y
+CONFIG_ISDN_PPP_BSDCOMP=m
+CONFIG_ISDN_AUDIO=y
+CONFIG_ISDN_TTY_FAX=y
+CONFIG_ISDN_X25=y
+
+#
+# ISDN feature submodules
+#
+
+#
+# ISDN4Linux hardware drivers
+#
+
+#
+# Passive cards
+#
+CONFIG_ISDN_DRV_HISAX=m
+
+#
+# D-channel protocol features
+#
+CONFIG_HISAX_EURO=y
+CONFIG_DE_AOC=y
+# CONFIG_HISAX_NO_SENDCOMPLETE is not set
+# CONFIG_HISAX_NO_LLC is not set
+# CONFIG_HISAX_NO_KEYPAD is not set
+CONFIG_HISAX_1TR6=y
+CONFIG_HISAX_NI1=y
+CONFIG_HISAX_MAX_CARDS=8
+
+#
+# HiSax supported cards
+#
+CONFIG_HISAX_16_0=y
+CONFIG_HISAX_16_3=y
+CONFIG_HISAX_TELESPCI=y
+CONFIG_HISAX_S0BOX=y
+CONFIG_HISAX_AVM_A1=y
+CONFIG_HISAX_FRITZPCI=y
+CONFIG_HISAX_AVM_A1_PCMCIA=y
+CONFIG_HISAX_ELSA=y
+CONFIG_HISAX_IX1MICROR2=y
+CONFIG_HISAX_DIEHLDIVA=y
+CONFIG_HISAX_ASUSCOM=y
+CONFIG_HISAX_TELEINT=y
+CONFIG_HISAX_HFCS=y
+CONFIG_HISAX_SEDLBAUER=y
+CONFIG_HISAX_SPORTSTER=y
+CONFIG_HISAX_MIC=y
+CONFIG_HISAX_NETJET=y
+CONFIG_HISAX_NETJET_U=y
+CONFIG_HISAX_NICCY=y
+CONFIG_HISAX_ISURF=y
+CONFIG_HISAX_HSTSAPHIR=y
+CONFIG_HISAX_BKM_A4T=y
+CONFIG_HISAX_SCT_QUADRO=y
+CONFIG_HISAX_GAZEL=y
+CONFIG_HISAX_HFC_PCI=y
+CONFIG_HISAX_W6692=y
+CONFIG_HISAX_HFC_SX=y
+CONFIG_HISAX_ENTERNOW_PCI=y
+CONFIG_HISAX_DEBUG=y
+
+#
+# HiSax PCMCIA card service modules
+#
+CONFIG_HISAX_SEDLBAUER_CS=m
+CONFIG_HISAX_ELSA_CS=m
+CONFIG_HISAX_AVM_A1_CS=m
+CONFIG_HISAX_TELES_CS=m
+
+#
+# HiSax sub driver modules
+#
+CONFIG_HISAX_ST5481=m
+CONFIG_HISAX_HFCUSB=m
+CONFIG_HISAX_FRITZ_PCIPNP=m
+CONFIG_HISAX_HDLC=y
+
+#
+# Active cards
+#
+CONFIG_ISDN_DRV_ICN=m
+CONFIG_ISDN_DRV_PCBIT=m
+CONFIG_ISDN_DRV_SC=m
+CONFIG_ISDN_DRV_ACT2000=m
+CONFIG_ISDN_DRV_TPAM=m
+
+#
+# CAPI subsystem
+#
+CONFIG_ISDN_CAPI=m
+CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_ISDN_CAPI_CAPI20=m
+CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
+CONFIG_ISDN_CAPI_CAPIFS=m
+CONFIG_ISDN_CAPI_CAPIDRV=m
+
+#
+# CAPI hardware drivers
+#
+
+#
+# Active AVM cards
+#
+CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
+
+#
+# Active Eicon DIVA Server cards
+#
+CONFIG_CAPI_EICON=y
+CONFIG_ISDN_DIVAS=m
+CONFIG_ISDN_DIVAS_BRIPCI=y
+CONFIG_ISDN_DIVAS_PRIPCI=y
+CONFIG_ISDN_DIVAS_DIVACAPI=m
+CONFIG_ISDN_DIVAS_USERIDI=m
+CONFIG_ISDN_DIVAS_MAINT=m
+
+#
+# Telephony Support
+#
+CONFIG_PHONE=m
+CONFIG_PHONE_IXJ=m
+CONFIG_PHONE_IXJ_PCMCIA=m
+
+#
+# Input device support
+#
+CONFIG_INPUT=y
+
+#
+# Userland interfaces
+#
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_TSDEV=m
+CONFIG_INPUT_TSDEV_SCREEN_X=240
+CONFIG_INPUT_TSDEV_SCREEN_Y=320
+CONFIG_INPUT_EVDEV=m
+# CONFIG_INPUT_EVBUG is not set
+
+#
+# Input I/O drivers
+#
+CONFIG_GAMEPORT=m
+CONFIG_SOUND_GAMEPORT=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_VORTEX=m
+CONFIG_GAMEPORT_FM801=m
+CONFIG_GAMEPORT_CS461x=m
+CONFIG_SERIO=y
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+
+#
+# Input Device Drivers
+#
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_KEYBOARD_SUNKBD=m
+# CONFIG_KEYBOARD_LKKBD is not set
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_INPUT_MOUSE=y
+CONFIG_MOUSE_PS2=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_INPORT=m
+CONFIG_MOUSE_ATIXL=y
+CONFIG_MOUSE_LOGIBM=m
+CONFIG_MOUSE_PC110PAD=m
+# CONFIG_MOUSE_VSXXXAA is not set
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TWIDDLER=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+# CONFIG_INPUT_JOYDUMP is not set
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_PCSPKR=y
+CONFIG_INPUT_UINPUT=m
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_HW_CONSOLE=y
+CONFIG_ECC=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_ROCKETPORT=m
+CONFIG_SYNCLINK=m
+CONFIG_SYNCLINKMP=m
+CONFIG_N_HDLC=m
+CONFIG_STALDRV=y
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_MULTIPORT=y
+CONFIG_SERIAL_8250_RSA=y
+
+#
+# Non-8250 serial port support
+#
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+# CONFIG_SERIAL_ICOM is not set
+CONFIG_SERIAL_JSM=m
+CONFIG_UNIX98_PTYS=y
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=256
+CONFIG_PRINTER=m
+# CONFIG_LP_CONSOLE is not set
+CONFIG_PPDEV=m
+CONFIG_TIPAR=m
+CONFIG_QIC02_TAPE=m
+CONFIG_QIC02_DYNCONF=y
+
+#
+# Setting runtime QIC-02 configuration is done with qic02conf
+#
+
+#
+# from the tpqic02-support package. It is available at
+#
+
+#
+# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/
+#
+
+#
+# IPMI
+#
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_PANIC_EVENT=y
+CONFIG_IPMI_PANIC_STRING=y
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_KCS=m
+CONFIG_IPMI_WATCHDOG=m
+
+#
+# Watchdog Cards
+#
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+
+#
+# Watchdog Device Drivers
+#
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_AMD7XX_TCO=m
+CONFIG_SC520_WDT=m
+CONFIG_EUROTECH_WDT=m
+CONFIG_IB700_WDT=m
+CONFIG_WAFER_WDT=m
+CONFIG_I8XX_TCO=m
+CONFIG_SC1200_WDT=m
+CONFIG_SCx200_WDT=m
+CONFIG_60XX_WDT=m
+CONFIG_CPU5_WDT=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_MACHZ_WDT=m
+
+#
+# ISA-based Watchdog Cards
+#
+CONFIG_PCWATCHDOG=m
+CONFIG_MIXCOMWD=m
+CONFIG_WDT=m
+CONFIG_WDT_501=y
+
+#
+# PCI-based Watchdog Cards
+#
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_WDTPCI=m
+CONFIG_WDT_501_PCI=y
+
+#
+# USB-based Watchdog Cards
+#
+CONFIG_USBPCWATCHDOG=m
+CONFIG_HW_RANDOM=m
+CONFIG_NVRAM=m
+CONFIG_RTC=y
+CONFIG_DTLK=m
+CONFIG_R3964=m
+CONFIG_APPLICOM=m
+CONFIG_SONYPI=m
+
+#
+# Ftape, the floppy tape device driver
+#
+CONFIG_AGP=m
+CONFIG_AGP_ALI=m
+CONFIG_AGP_ATI=m
+CONFIG_AGP_AMD=m
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_INTEL_MCH=m
+CONFIG_AGP_NVIDIA=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_SWORKS=m
+CONFIG_AGP_VIA=m
+CONFIG_AGP_EFFICEON=m
+# CONFIG_DRM is not set
+
+#
+# PCMCIA character devices
+#
+CONFIG_SYNCLINK_CS=m
+# CONFIG_MWAVE is not set
+CONFIG_SCx200_GPIO=m
+CONFIG_RAW_DRIVER=m
+CONFIG_MAX_RAW_DEVS=4096
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_VTUNE=m
+
+#
+# Linux InfraRed Controller
+#
+CONFIG_LIRC_SUPPORT=m
+CONFIG_LIRC_MAX_DEV=2
+CONFIG_LIRC_BT829=m
+CONFIG_LIRC_IT87=m
+CONFIG_LIRC_ATIUSB=m
+CONFIG_LIRC_SERIAL=m
+# CONFIG_LIRC_HOMEBREW is not set
+CONFIG_LIRC_PORT_SERIAL=0x3f8
+CONFIG_LIRC_IRQ_SERIAL=4
+CONFIG_LIRC_SIR=m
+CONFIG_LIRC_PORT_SIR=0x3f8
+CONFIG_LIRC_IRQ_SIR=4
+
+#
+# I2C support
+#
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCF=m
+
+#
+# I2C Hardware Bus support
+#
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_I801=m
+CONFIG_I2C_I810=m
+CONFIG_I2C_ISA=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_PROSAVAGE=m
+CONFIG_I2C_SAVAGE4=m
+CONFIG_SCx200_I2C=m
+CONFIG_SCx200_I2C_SCL=12
+CONFIG_SCx200_I2C_SDA=13
+CONFIG_SCx200_ACB=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+
+#
+# Hardware Sensors Chip support
+#
+CONFIG_I2C_SENSOR=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83627HF=m
+
+#
+# Other I2C Chip support
+#
+CONFIG_SENSORS_EEPROM=m
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+
+#
+# Misc devices
+#
+CONFIG_IBM_ASM=m
+
+#
+# Multimedia devices
+#
+CONFIG_VIDEO_DEV=m
+
+#
+# Video For Linux
+#
+
+#
+# Video Adapters
+#
+CONFIG_VIDEO_BT848=m
+CONFIG_VIDEO_PMS=m
+CONFIG_VIDEO_BWQCAM=m
+CONFIG_VIDEO_CQCAM=m
+CONFIG_VIDEO_W9966=m
+CONFIG_VIDEO_CPIA=m
+CONFIG_VIDEO_CPIA_PP=m
+CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_SAA5246A=m
+CONFIG_VIDEO_SAA5249=m
+CONFIG_TUNER_3036=m
+CONFIG_VIDEO_STRADIS=m
+CONFIG_VIDEO_ZORAN=m
+CONFIG_VIDEO_ZORAN_BUZ=m
+CONFIG_VIDEO_ZORAN_DC10=m
+CONFIG_VIDEO_ZORAN_DC30=m
+CONFIG_VIDEO_ZORAN_LML33=m
+CONFIG_VIDEO_ZORAN_LML33R10=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_MXB=m
+CONFIG_VIDEO_DPC=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_CX88=m
+
+#
+# Radio Adapters
+#
+CONFIG_RADIO_CADET=m
+CONFIG_RADIO_RTRACK=m
+CONFIG_RADIO_RTRACK2=m
+CONFIG_RADIO_AZTECH=m
+CONFIG_RADIO_GEMTEK=m
+CONFIG_RADIO_GEMTEK_PCI=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_MAESTRO=m
+CONFIG_RADIO_MIROPCM20=m
+# CONFIG_RADIO_MIROPCM20_RDS is not set
+CONFIG_RADIO_SF16FMI=m
+CONFIG_RADIO_SF16FMR2=m
+CONFIG_RADIO_TERRATEC=m
+CONFIG_RADIO_TRUST=m
+CONFIG_RADIO_TYPHOON=m
+CONFIG_RADIO_TYPHOON_PROC_FS=y
+CONFIG_RADIO_ZOLTRIX=m
+
+#
+# Digital Video Broadcasting Devices
+#
+CONFIG_DVB=y
+CONFIG_DVB_CORE=m
+
+#
+# Supported Frontend Modules
+#
+CONFIG_DVB_TWINHAN_DST=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc"
+CONFIG_DVB_ALPS_TDLB7=m
+CONFIG_DVB_ALPS_TDMB7=m
+CONFIG_DVB_ATMEL_AT76C651=m
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_GRUNDIG_29504_491=m
+CONFIG_DVB_GRUNDIG_29504_401=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_VES1820=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_TDA1004X_FIRMWARE_FILE="/usr/lib/hotplug/firmware/tda1004x.bin"
+CONFIG_DVB_NXT6000=m
+
+#
+# Supported SAA7146 based PCI Adapters
+#
+CONFIG_DVB_AV7110=m
+# CONFIG_DVB_AV7110_FIRMWARE is not set
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_DVB_BUDGET_PATCH=m
+
+#
+# Supported USB Adapters
+#
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+
+#
+# Supported FlexCopII (B2C2) Adapters
+#
+CONFIG_DVB_B2C2_SKYSTAR=m
+
+#
+# Supported BT878 Adapters
+#
+CONFIG_DVB_BT8XX=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_VIDEO_VIDEOBUF=m
+CONFIG_VIDEO_TUNER=m
+CONFIG_VIDEO_BUF=m
+CONFIG_VIDEO_BTCX=m
+CONFIG_VIDEO_IR=m
+
+#
+# Graphics support
+#
+CONFIG_FB=y
+CONFIG_FB_PM2=m
+CONFIG_FB_PM2_FIFO_DISCONNECT=y
+CONFIG_FB_CYBER2000=m
+CONFIG_FB_IMSTT=y
+CONFIG_FB_VGA16=m
+CONFIG_FB_VESA=y
+CONFIG_VIDEO_SELECT=y
+CONFIG_FB_HGA=m
+CONFIG_FB_RIVA=m
+CONFIG_FB_I810=m
+CONFIG_FB_I810_GTF=y
+# CONFIG_FB_MATROX is not set
+# CONFIG_FB_RADEON_OLD is not set
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_I2C=y
+# CONFIG_FB_RADEON_DEBUG is not set
+# CONFIG_FB_ATY128 is not set
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_ATY_XL_INIT=y
+CONFIG_FB_SIS=m
+CONFIG_FB_SIS_300=y
+CONFIG_FB_SIS_315=y
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_KYRO=m
+CONFIG_FB_3DFX=m
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_TRIDENT=m
+# CONFIG_FB_VIRTUAL is not set
+
+#
+# Console display driver support
+#
+CONFIG_VGA_CONSOLE=y
+CONFIG_MDA_CONSOLE=m
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_PCI_CONSOLE=y
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x8=y
+CONFIG_FONT_8x16=y
+
+#
+# Logo configuration
+#
+# CONFIG_LOGO is not set
+
+#
+# Bootsplash configuration
+#
+CONFIG_BOOTSPLASH=y
+
+#
+# Sound
+#
+CONFIG_SOUND=m
+
+#
+# Advanced Linux Sound Architecture
+#
+CONFIG_SND=m
+CONFIG_SND_TIMER=m
+CONFIG_SND_PCM=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_RTCTIMER=m
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_DEBUG=y
+CONFIG_SND_DEBUG_MEMORY=y
+# CONFIG_SND_DEBUG_DETECT is not set
+
+#
+# Generic devices
+#
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL4_LIB=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_MTPAV=m
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_MPU401=m
+
+#
+# ISA devices
+#
+CONFIG_SND_AD1816A=m
+CONFIG_SND_AD1848=m
+CONFIG_SND_CS4231=m
+CONFIG_SND_CS4232=m
+CONFIG_SND_CS4236=m
+CONFIG_SND_ES968=m
+CONFIG_SND_ES1688=m
+CONFIG_SND_ES18XX=m
+CONFIG_SND_GUSCLASSIC=m
+CONFIG_SND_GUSEXTREME=m
+CONFIG_SND_GUSMAX=m
+CONFIG_SND_INTERWAVE=m
+CONFIG_SND_INTERWAVE_STB=m
+CONFIG_SND_OPTI92X_AD1848=m
+CONFIG_SND_OPTI92X_CS4231=m
+CONFIG_SND_OPTI93X=m
+CONFIG_SND_SB8=m
+CONFIG_SND_SB16=m
+CONFIG_SND_SBAWE=m
+CONFIG_SND_SB16_CSP=y
+CONFIG_SND_WAVEFRONT=m
+CONFIG_SND_ALS100=m
+CONFIG_SND_AZT2320=m
+CONFIG_SND_CMI8330=m
+CONFIG_SND_DT019X=m
+CONFIG_SND_OPL3SA2=m
+CONFIG_SND_SGALAXY=m
+CONFIG_SND_SSCAPE=m
+
+#
+# PCI devices
+#
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS4281=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_KORG1212=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_NM256=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_HDSP=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VX222=m
+
+#
+# ALSA USB devices
+#
+CONFIG_SND_USB_AUDIO=m
+
+#
+# PCMCIA devices
+#
+# CONFIG_SND_VXPOCKET is not set
+# CONFIG_SND_VXP440 is not set
+# CONFIG_SND_PDAUDIOCF is not set
+
+#
+# Open Sound System
+#
+CONFIG_SOUND_PRIME=m
+CONFIG_SOUND_BT878=m
+CONFIG_SOUND_CMPCI=m
+CONFIG_SOUND_CMPCI_FM=y
+CONFIG_SOUND_CMPCI_FMIO=0x388
+CONFIG_SOUND_CMPCI_MIDI=y
+CONFIG_SOUND_CMPCI_MPUIO=0x330
+CONFIG_SOUND_CMPCI_JOYSTICK=y
+CONFIG_SOUND_CMPCI_CM8738=y
+# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set
+CONFIG_SOUND_CMPCI_SPDIFLOOP=y
+CONFIG_SOUND_CMPCI_SPEAKERS=2
+CONFIG_SOUND_EMU10K1=m
+CONFIG_MIDI_EMU10K1=y
+# CONFIG_SOUND_FUSION is not set
+CONFIG_SOUND_CS4281=m
+CONFIG_SOUND_ES1370=m
+CONFIG_SOUND_ES1371=m
+CONFIG_SOUND_ESSSOLO1=m
+CONFIG_SOUND_MAESTRO=m
+CONFIG_SOUND_MAESTRO3=m
+CONFIG_SOUND_ICH=m
+CONFIG_SOUND_SONICVIBES=m
+CONFIG_SOUND_TRIDENT=m
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+CONFIG_SOUND_VIA82CXXX=m
+CONFIG_MIDI_VIA82CXXX=y
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_TRACEINIT=y
+CONFIG_SOUND_DMAP=y
+# CONFIG_SOUND_AD1816 is not set
+CONFIG_SOUND_AD1889=m
+CONFIG_SOUND_SGALAXY=m
+CONFIG_SOUND_ADLIB=m
+CONFIG_SOUND_ACI_MIXER=m
+CONFIG_SOUND_CS4232=m
+CONFIG_SOUND_SSCAPE=m
+CONFIG_SOUND_GUS=m
+# CONFIG_SOUND_GUS16 is not set
+CONFIG_SOUND_GUSMAX=y
+CONFIG_SOUND_VMIDI=m
+CONFIG_SOUND_TRIX=m
+CONFIG_SOUND_MSS=m
+CONFIG_SOUND_MPU401=m
+CONFIG_SOUND_NM256=m
+CONFIG_SOUND_MAD16=m
+CONFIG_MAD16_OLDCARD=y
+CONFIG_SOUND_PAS=m
+CONFIG_SOUND_PSS=m
+CONFIG_PSS_MIXER=y
+# CONFIG_PSS_HAVE_BOOT is not set
+CONFIG_SOUND_SB=m
+# CONFIG_SOUND_AWE32_SYNTH is not set
+CONFIG_SOUND_WAVEFRONT=m
+CONFIG_SOUND_MAUI=m
+CONFIG_SOUND_YM3812=m
+CONFIG_SOUND_OPL3SA1=m
+CONFIG_SOUND_OPL3SA2=m
+CONFIG_SOUND_YMFPCI=m
+CONFIG_SOUND_YMFPCI_LEGACY=y
+CONFIG_SOUND_UART6850=m
+CONFIG_SOUND_AEDSP16=m
+CONFIG_SC6600=y
+CONFIG_SC6600_JOY=y
+CONFIG_SC6600_CDROM=4
+CONFIG_SC6600_CDROMBASE=0x0
+# CONFIG_AEDSP16_MSS is not set
+# CONFIG_AEDSP16_SBPRO is not set
+CONFIG_AEDSP16_MPU401=y
+CONFIG_SOUND_TVMIXER=m
+CONFIG_SOUND_KAHLUA=m
+CONFIG_SOUND_ALI5455=m
+CONFIG_SOUND_FORTE=m
+CONFIG_SOUND_RME96XX=m
+CONFIG_SOUND_AD1980=m
+
+#
+# USB support
+#
+CONFIG_USB=m
+# CONFIG_USB_DEBUG is not set
+
+#
+# Miscellaneous USB options
+#
+CONFIG_USB_DEVICEFS=y
+# CONFIG_USB_BANDWIDTH is not set
+# CONFIG_USB_DYNAMIC_MINORS is not set
+
+#
+# USB Host Controller Drivers
+#
+CONFIG_USB_EHCI_HCD=m
+CONFIG_USB_EHCI_SPLIT_ISO=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_UHCI_HCD=m
+
+#
+# USB Device Class drivers
+#
+CONFIG_USB_AUDIO=m
+
+#
+# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+#
+CONFIG_USB_MIDI=m
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_STORAGE=m
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_DPCM=y
+CONFIG_USB_STORAGE_HP8200e=y
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+
+#
+# USB Human Interface Devices (HID)
+#
+CONFIG_USB_HID=m
+CONFIG_USB_HIDINPUT=y
+CONFIG_HID_FF=y
+CONFIG_HID_PID=y
+CONFIG_LOGITECH_FF=y
+CONFIG_THRUSTMASTER_FF=y
+CONFIG_USB_HIDDEV=y
+
+#
+# USB HID Boot Protocol drivers
+#
+# CONFIG_USB_KBD is not set
+# CONFIG_USB_MOUSE is not set
+CONFIG_USB_AIPTEK=m
+CONFIG_USB_WACOM=m
+CONFIG_USB_KBTAB=m
+CONFIG_USB_POWERMATE=m
+CONFIG_USB_MTOUCH=m
+CONFIG_USB_XPAD=m
+CONFIG_USB_ATI_REMOTE=m
+
+#
+# USB Imaging devices
+#
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_HPUSBSCSI=m
+
+#
+# USB Multimedia devices
+#
+CONFIG_USB_DABUSB=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_DSBR=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_OV511=m
+CONFIG_USB_SE401=m
+CONFIG_USB_STV680=m
+CONFIG_USB_W9968CF=m
+
+#
+# USB Network adaptors
+#
+CONFIG_USB_CATC=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_USBNET=m
+
+#
+# USB Host-to-Host Cables
+#
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+CONFIG_USB_BELKIN=y
+CONFIG_USB_GENESYS=y
+CONFIG_USB_NET1080=y
+CONFIG_USB_PL2301=y
+
+#
+# Intelligent USB Devices/Gadgets
+#
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_ZAURUS=y
+CONFIG_USB_CDCETHER=y
+
+#
+# USB Network Adapters
+#
+CONFIG_USB_AX8817X=y
+
+#
+# USB port drivers
+#
+CONFIG_USB_USS720=m
+
+#
+# USB Serial Converter support
+#
+CONFIG_USB_SERIAL=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IR=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+CONFIG_USB_SERIAL_SAFE_PADDED=y
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_EZUSB=y
+
+#
+# USB Miscellaneous drivers
+#
+CONFIG_USB_EMI62=m
+CONFIG_USB_EMI26=m
+CONFIG_USB_TIGL=m
+CONFIG_USB_AUERSWALD=m
+CONFIG_USB_RIO500=m
+CONFIG_USB_LEGOTOWER=m
+CONFIG_USB_LCD=m
+CONFIG_USB_LED=m
+CONFIG_USB_CYTHERM=m
+CONFIG_USB_SPEEDTOUCH=m
+# CONFIG_USB_TEST is not set
+
+#
+# USB Gadget Support
+#
+# CONFIG_USB_GADGET is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_SDP is not set
+CONFIG_INFINIBAND_SRP=m
+CONFIG_INFINIBAND_UDAPL_HELPER=m
+CONFIG_INFINIBAND_MELLANOX_HCA=m
+CONFIG_AUDIT=m
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=m
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JBD=m
+CONFIG_JBD_DEBUG=y
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=m
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_DMAPI=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_STATISTICS=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_RT=y
+CONFIG_XFS_QUOTA=m
+CONFIG_XFS_DMAPI=y
+CONFIG_XFS_SECURITY=y
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_MINIX_FS=y
+CONFIG_ROMFS_FS=m
+CONFIG_DMAPI=m
+# CONFIG_DMAPI_DEBUG is not set
+CONFIG_QUOTA=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QUOTACTL=y
+CONFIG_AUTOFS_FS=m
+CONFIG_AUTOFS4_FS=m
+
+#
+# CD-ROM/DVD Filesystems
+#
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_ZISOFS_FS=y
+CONFIG_UDF_FS=m
+
+#
+# DOS/FAT/NT Filesystems
+#
+CONFIG_FAT_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_NTFS_RW is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_DEVFS_FS is not set
+CONFIG_DEVPTS_FS_XATTR=y
+CONFIG_DEVPTS_FS_SECURITY=y
+CONFIG_TMPFS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_RAMFS=y
+CONFIG_RELAYFS_FS=m
+# CONFIG_KLOG_CHANNEL is not set
+
+#
+# Miscellaneous filesystems
+#
+CONFIG_ADFS_FS=m
+# CONFIG_ADFS_FS_RW is not set
+CONFIG_AFFS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_BEFS_FS=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BFS_FS=m
+CONFIG_EFS_FS=m
+CONFIG_JFFS_FS=m
+CONFIG_JFFS_FS_VERBOSE=0
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+# CONFIG_JFFS2_FS_NAND is not set
+CONFIG_CRAMFS=m
+CONFIG_VXFS_FS=m
+CONFIG_HPFS_FS=m
+CONFIG_QNX4FS_FS=m
+# CONFIG_QNX4FS_RW is not set
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+
+#
+# Network File Systems
+#
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_NFS_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_DIRECTIO=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_ACL=y
+CONFIG_NFS_ACL_SUPPORT=y
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_LOCKD=y
+CONFIG_STATD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=m
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+CONFIG_SMB_FS=m
+CONFIG_SMB_NLS_DEFAULT=y
+CONFIG_SMB_NLS_REMOTE="cp850"
+CONFIG_CIFS=m
+CONFIG_CIFS_STATS=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CIFS_POSIX=y
+CONFIG_NCP_FS=m
+CONFIG_NCPFS_PACKET_SIGNING=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_SMALLDOS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_CODA_FS=m
+# CONFIG_CODA_FS_OLD_API is not set
+# CONFIG_INTERMEZZO_FS is not set
+CONFIG_AFS_FS=m
+CONFIG_RXRPC=m
+
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_ACORN_PARTITION is not set
+CONFIG_OSF_PARTITION=y
+# CONFIG_AMIGA_PARTITION is not set
+CONFIG_ATARI_PARTITION=y
+CONFIG_MAC_PARTITION=y
+CONFIG_MSDOS_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+# CONFIG_MINIX_SUBPARTITION is not set
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_LDM_PARTITION=y
+# CONFIG_LDM_DEBUG is not set
+CONFIG_NEC98_PARTITION=y
+CONFIG_SGI_PARTITION=y
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_EFI_PARTITION=y
+
+#
+# Native Language Support
+#
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_FSHOOKS=y
+
+#
+# Profiling support
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
+# Kernel hacking
+#
+CONFIG_CRASH_DUMP=m
+CONFIG_KERNTYPES=y
+CONFIG_CRASH_DUMP_BLOCKDEV=m
+CONFIG_CRASH_DUMP_NETDEV=m
+# CONFIG_CRASH_DUMP_MEMDEV is not set
+CONFIG_CRASH_DUMP_COMPRESS_RLE=m
+CONFIG_CRASH_DUMP_COMPRESS_GZIP=m
+CONFIG_DEBUG_KERNEL=y
+CONFIG_EARLY_PRINTK=y
+# CONFIG_KPROBES is not set
+# CONFIG_DEBUGREG is not set
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_SLAB is not set
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_KDB is not set
+CONFIG_X86_FIND_SMP_CONFIG=y
+CONFIG_X86_MPPARSE=y
+# CONFIG_HOOK is not set
+
+#
+# Security options
+#
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_CAPABILITIES=m
+CONFIG_SECURITY_ROOTPLUG=m
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+# CONFIG_SECURITY_SELINUX_MLS is not set
+
+#
+# IBM Crypto Hardware support
+#
+CONFIG_IBM_CRYPTO=m
+CONFIG_ICA_LEEDSLITE=m
+
+#
+# Cryptographic options
+#
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_TEST=m
+
+#
+# Library routines
+#
+CONFIG_CRC32=y
+CONFIG_QSORT=y
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=m
+
+#
+# Build options
+#
+CONFIG_SUSE_KERNEL=y
+CONFIG_CFGNAME="bigsmp"
+CONFIG_RELEASE="SLES9_SP1_BRANCH_2004110217390391"
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
+CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_PC=y
--- /dev/null
+Index: linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c
+===================================================================
+--- linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091.orig/kernel/sched.c 2004-10-22 15:25:05.000000000 -0400
++++ linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c 2004-10-22 15:39:18.000000000 -0400
+@@ -3147,7 +3147,7 @@
+ return list_entry(p->sibling.next,struct task_struct,sibling);
+ }
+
+-static void show_task(task_t * p)
++void show_task(task_t * p)
+ {
+ task_t *relative;
+ unsigned state;
+@@ -3200,6 +3200,7 @@
+ if (state != TASK_RUNNING)
+ show_stack(p, NULL);
+ }
++EXPORT_SYMBOL(show_task);
+
+ void show_state(void)
+ {
--- /dev/null
+%patch
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300
+@@ -0,0 +1,2313 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++/*
++ * Extents support for EXT3
++ *
++ * TODO:
++ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent()
++ * - ext3_ext_calc_credits() could take 'mergable' into account
++ * - ext3*_error() should be used in some situations
++ * - find_goal() [to be tested and improved]
++ * - smart tree reduction
++ * - arch-independence
++ * common on-disk format for big/little-endian arch
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/time.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/smp_lock.h>
++#include <linux/highuid.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/ext3_extents.h>
++#include <asm/uaccess.h>
++
++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
++{
++ int err;
++
++ if (handle->h_buffer_credits > needed)
++ return handle;
++ if (!ext3_journal_extend(handle, needed))
++ return handle;
++ err = ext3_journal_restart(handle, needed);
++
++ return handle;
++}
++
++static int inline
++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree)
++{
++ if (tree->ops->get_write_access)
++ return tree->ops->get_write_access(h,tree->buffer);
++ else
++ return 0;
++}
++
++static int inline
++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree)
++{
++ if (tree->ops->mark_buffer_dirty)
++ return tree->ops->mark_buffer_dirty(h,tree->buffer);
++ else
++ return 0;
++}
++
++/*
++ * could return:
++ * - EROFS
++ * - ENOMEM
++ */
++static int ext3_ext_get_access(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int err;
++
++ if (path->p_bh) {
++ /* path points to block */
++ err = ext3_journal_get_write_access(handle, path->p_bh);
++ } else {
++ /* path points to leaf/index in inode body */
++ err = ext3_ext_get_access_for_root(handle, tree);
++ }
++ return err;
++}
++
++/*
++ * could return:
++ * - EROFS
++ * - ENOMEM
++ * - EIO
++ */
++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int err;
++ if (path->p_bh) {
++ /* path points to block */
++ err =ext3_journal_dirty_metadata(handle, path->p_bh);
++ } else {
++ /* path points to leaf/index in inode body */
++ err = ext3_ext_mark_root_dirty(handle, tree);
++ }
++ return err;
++}
++
++static int inline
++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, struct ext3_extent *ex,
++ int *err)
++{
++ int goal, depth, newblock;
++ struct inode *inode;
++
++ EXT_ASSERT(tree);
++ if (tree->ops->new_block)
++ return tree->ops->new_block(handle, tree, path, ex, err);
++
++ inode = tree->inode;
++ depth = EXT_DEPTH(tree);
++ if (path && depth > 0) {
++ goal = path[depth-1].p_block;
++ } else {
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ unsigned long bg_start;
++ unsigned long colour;
++
++ bg_start = (ei->i_block_group *
++ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++ colour = (current->pid % 16) *
++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++ goal = bg_start + colour;
++ }
++
++ newblock = ext3_new_block(handle, inode, goal, err);
++ return newblock;
++}
++
++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
++{
++ struct ext3_extent_header *neh;
++ neh = EXT_ROOT_HDR(tree);
++ neh->eh_generation++;
++}
++
++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->inode->i_sb->s_blocksize -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++ size = 6;
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->inode->i_sb->s_blocksize -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++ size = 5;
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->buffer_len - sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++ size = 3;
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree)
++{
++ int size;
++
++ size = (tree->buffer_len -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++ size = 4;
++#endif
++ return size;
++}
++
++static void ext3_ext_show_path(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++ int k, l = path->p_depth;
++
++ ext_debug(tree, "path:");
++ for (k = 0; k <= l; k++, path++) {
++ if (path->p_idx) {
++ ext_debug(tree, " %d->%d", path->p_idx->ei_block,
++ path->p_idx->ei_leaf);
++ } else if (path->p_ext) {
++ ext_debug(tree, " %d:%d:%d",
++ path->p_ext->ee_block,
++ path->p_ext->ee_len,
++ path->p_ext->ee_start);
++ } else
++ ext_debug(tree, " []");
++ }
++ ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++#ifdef EXT_DEBUG
++ int depth = EXT_DEPTH(tree);
++ struct ext3_extent_header *eh;
++ struct ext3_extent *ex;
++ int i;
++
++ if (!path)
++ return;
++
++ eh = path[depth].p_hdr;
++ ex = EXT_FIRST_EXTENT(eh);
++
++ for (i = 0; i < eh->eh_entries; i++, ex++) {
++ ext_debug(tree, "%d:%d:%d ",
++ ex->ee_block, ex->ee_len, ex->ee_start);
++ }
++ ext_debug(tree, "\n");
++#endif
++}
++
++static void ext3_ext_drop_refs(struct ext3_ext_path *path)
++{
++ int depth = path->p_depth;
++ int i;
++
++ for (i = 0; i <= depth; i++, path++)
++ if (path->p_bh) {
++ brelse(path->p_bh);
++ path->p_bh = NULL;
++ }
++}
++
++/*
++ * binary search for closest index by given block
++ */
++static inline void
++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, int block)
++{
++ struct ext3_extent_header *eh = path->p_hdr;
++ struct ext3_extent_idx *ix;
++ int l = 0, k, r;
++
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++ EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++ EXT_ASSERT(eh->eh_entries > 0);
++
++ ext_debug(tree, "binsearch for %d(idx): ", block);
++
++ path->p_idx = ix = EXT_FIRST_INDEX(eh);
++
++ r = k = eh->eh_entries;
++ while (k > 1) {
++ k = (r - l) / 2;
++ if (block < ix[l + k].ei_block)
++ r -= k;
++ else
++ l += k;
++ ext_debug(tree, "%d:%d:%d ", k, l, r);
++ }
++
++ ix += l;
++ path->p_idx = ix;
++ ext_debug(tree, " -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf);
++
++ while (l++ < r) {
++ if (block < ix->ei_block)
++ break;
++ path->p_idx = ix++;
++ }
++ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block,
++ path->p_idx->ei_leaf);
++
++#ifdef CHECK_BINSEARCH
++ {
++ struct ext3_extent_idx *chix;
++
++ chix = ix = EXT_FIRST_INDEX(eh);
++ for (k = 0; k < eh->eh_entries; k++, ix++) {
++ if (k != 0 && ix->ei_block <= ix[-1].ei_block) {
++ printk("k=%d, ix=0x%p, first=0x%p\n", k,
++ ix, EXT_FIRST_INDEX(eh));
++ printk("%u <= %u\n",
++ ix->ei_block,ix[-1].ei_block);
++ }
++ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block);
++ if (block < ix->ei_block)
++ break;
++ chix = ix;
++ }
++ EXT_ASSERT(chix == path->p_idx);
++ }
++#endif
++
++}
++
++/*
++ * binary search for closest extent by given block
++ */
++static inline void
++ext3_ext_binsearch(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, int block)
++{
++ struct ext3_extent_header *eh = path->p_hdr;
++ struct ext3_extent *ex;
++ int l = 0, k, r;
++
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++ EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++
++ if (eh->eh_entries == 0) {
++ /*
++ * this leaf is empty yet:
++ * we get such a leaf in split/add case
++ */
++ return;
++ }
++
++ ext_debug(tree, "binsearch for %d: ", block);
++
++ path->p_ext = ex = EXT_FIRST_EXTENT(eh);
++
++ r = k = eh->eh_entries;
++ while (k > 1) {
++ k = (r - l) / 2;
++ if (block < ex[l + k].ee_block)
++ r -= k;
++ else
++ l += k;
++ ext_debug(tree, "%d:%d:%d ", k, l, r);
++ }
++
++ ex += l;
++ path->p_ext = ex;
++ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block,
++ path->p_ext->ee_start, path->p_ext->ee_len);
++
++ while (l++ < r) {
++ if (block < ex->ee_block)
++ break;
++ path->p_ext = ex++;
++ }
++ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block,
++ path->p_ext->ee_start, path->p_ext->ee_len);
++
++#ifdef CHECK_BINSEARCH
++ {
++ struct ext3_extent *chex;
++
++ chex = ex = EXT_FIRST_EXTENT(eh);
++ for (k = 0; k < eh->eh_entries; k++, ex++) {
++ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block);
++ if (block < ex->ee_block)
++ break;
++ chex = ex;
++ }
++ EXT_ASSERT(chex == path->p_ext);
++ }
++#endif
++
++}
++
++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree)
++{
++ struct ext3_extent_header *eh;
++
++ BUG_ON(tree->buffer_len == 0);
++ ext3_ext_get_access_for_root(handle, tree);
++ eh = EXT_ROOT_HDR(tree);
++ eh->eh_depth = 0;
++ eh->eh_entries = 0;
++ eh->eh_magic = EXT3_EXT_MAGIC;
++ eh->eh_max = ext3_ext_space_root(tree);
++ ext3_ext_mark_root_dirty(handle, tree);
++ ext3_ext_invalidate_cache(tree);
++ return 0;
++}
++
++struct ext3_ext_path *
++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block,
++ struct ext3_ext_path *path)
++{
++ struct ext3_extent_header *eh;
++ struct buffer_head *bh;
++ int depth, i, ppos = 0;
++
++ EXT_ASSERT(tree);
++ EXT_ASSERT(tree->inode);
++ EXT_ASSERT(tree->root);
++
++ eh = EXT_ROOT_HDR(tree);
++ EXT_ASSERT(eh);
++ i = depth = EXT_DEPTH(tree);
++ EXT_ASSERT(eh->eh_max);
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++ EXT_ASSERT(i == 0 || eh->eh_entries > 0);
++
++ /* account possible depth increase */
++ if (!path) {
++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
++ GFP_NOFS);
++ if (!path)
++ return ERR_PTR(-ENOMEM);
++ }
++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++ path[0].p_hdr = eh;
++
++ /* walk through the tree */
++ while (i) {
++ ext_debug(tree, "depth %d: num %d, max %d\n",
++ ppos, eh->eh_entries, eh->eh_max);
++ ext3_ext_binsearch_idx(tree, path + ppos, block);
++ path[ppos].p_block = path[ppos].p_idx->ei_leaf;
++ path[ppos].p_depth = i;
++ path[ppos].p_ext = NULL;
++
++ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block);
++ if (!bh) {
++ ext3_ext_drop_refs(path);
++ kfree(path);
++ return ERR_PTR(-EIO);
++ }
++ eh = EXT_BLOCK_HDR(bh);
++ ppos++;
++ EXT_ASSERT(ppos <= depth);
++ path[ppos].p_bh = bh;
++ path[ppos].p_hdr = eh;
++ i--;
++ }
++
++ path[ppos].p_depth = i;
++ path[ppos].p_hdr = eh;
++ path[ppos].p_ext = NULL;
++
++ /* find extent */
++ ext3_ext_binsearch(tree, path + ppos, block);
++
++ ext3_ext_show_path(tree, path);
++
++ return path;
++}
++
++/*
++ * insert new index [logical;ptr] into the block at cupr
++ * it check where to insert: before curp or after curp
++ */
++static int ext3_ext_insert_index(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *curp,
++ int logical, int ptr)
++{
++ struct ext3_extent_idx *ix;
++ int len, err;
++
++ if ((err = ext3_ext_get_access(handle, tree, curp)))
++ return err;
++
++ EXT_ASSERT(logical != curp->p_idx->ei_block);
++ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
++ if (logical > curp->p_idx->ei_block) {
++ /* insert after */
++ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
++ len = (len - 1) * sizeof(struct ext3_extent_idx);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert new index %d after: %d. "
++ "move %d from 0x%p to 0x%p\n",
++ logical, ptr, len,
++ (curp->p_idx + 1), (curp->p_idx + 2));
++ memmove(curp->p_idx + 2, curp->p_idx + 1, len);
++ }
++ ix = curp->p_idx + 1;
++ } else {
++ /* insert before */
++ len = len * sizeof(struct ext3_extent_idx);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert new index %d before: %d. "
++ "move %d from 0x%p to 0x%p\n",
++ logical, ptr, len,
++ curp->p_idx, (curp->p_idx + 1));
++ memmove(curp->p_idx + 1, curp->p_idx, len);
++ ix = curp->p_idx;
++ }
++
++ ix->ei_block = logical;
++ ix->ei_leaf = ptr;
++ curp->p_hdr->eh_entries++;
++
++ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max);
++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr));
++
++ err = ext3_ext_dirty(handle, tree, curp);
++ ext3_std_error(tree->inode->i_sb, err);
++
++ return err;
++}
++
++/*
++ * routine inserts new subtree into the path, using free index entry
++ * at depth 'at:
++ * - allocates all needed blocks (new leaf and all intermediate index blocks)
++ * - makes decision where to split
++ * - moves remaining extens and index entries (right to the split point)
++ * into the newly allocated blocks
++ * - initialize subtree
++ */
++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext, int at)
++{
++ struct buffer_head *bh = NULL;
++ int depth = EXT_DEPTH(tree);
++ struct ext3_extent_header *neh;
++ struct ext3_extent_idx *fidx;
++ struct ext3_extent *ex;
++ int i = at, k, m, a;
++ unsigned long newblock, oldblock, border;
++ int *ablocks = NULL; /* array of allocated blocks */
++ int err = 0;
++
++ /* make decision: where to split? */
++ /* FIXME: now desicion is simplest: at current extent */
++
++ /* if current leaf will be splitted, then we should use
++ * border from split point */
++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr));
++ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
++ border = path[depth].p_ext[1].ee_block;
++ ext_debug(tree, "leaf will be splitted."
++ " next leaf starts at %d\n",
++ (int)border);
++ } else {
++ border = newext->ee_block;
++ ext_debug(tree, "leaf will be added."
++ " next leaf starts at %d\n",
++ (int)border);
++ }
++
++ /*
++ * if error occurs, then we break processing
++ * and turn filesystem read-only. so, index won't
++ * be inserted and tree will be in consistent
++ * state. next mount will repair buffers too
++ */
++
++ /*
++ * get array to track all allocated blocks
++ * we need this to handle errors and free blocks
++ * upon them
++ */
++ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
++ if (!ablocks)
++ return -ENOMEM;
++ memset(ablocks, 0, sizeof(unsigned long) * depth);
++
++ /* allocate all needed blocks */
++ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at);
++ for (a = 0; a < depth - at; a++) {
++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++ if (newblock == 0)
++ goto cleanup;
++ ablocks[a] = newblock;
++ }
++
++ /* initialize new leaf */
++ newblock = ablocks[--a];
++ EXT_ASSERT(newblock);
++ bh = sb_getblk(tree->inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh)))
++ goto cleanup;
++
++ neh = EXT_BLOCK_HDR(bh);
++ neh->eh_entries = 0;
++ neh->eh_max = ext3_ext_space_block(tree);
++ neh->eh_magic = EXT3_EXT_MAGIC;
++ neh->eh_depth = 0;
++ ex = EXT_FIRST_EXTENT(neh);
++
++ /* move remain of path[depth] to the new leaf */
++ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max);
++ /* start copy from next extent */
++ /* TODO: we could do it by single memmove */
++ m = 0;
++ path[depth].p_ext++;
++ while (path[depth].p_ext <=
++ EXT_MAX_EXTENT(path[depth].p_hdr)) {
++ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n",
++ path[depth].p_ext->ee_block,
++ path[depth].p_ext->ee_start,
++ path[depth].p_ext->ee_len,
++ newblock);
++ memmove(ex++, path[depth].p_ext++,
++ sizeof(struct ext3_extent));
++ neh->eh_entries++;
++ m++;
++ }
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto cleanup;
++ brelse(bh);
++ bh = NULL;
++
++ /* correct old leaf */
++ if (m) {
++ if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++ goto cleanup;
++ path[depth].p_hdr->eh_entries -= m;
++ if ((err = ext3_ext_dirty(handle, tree, path + depth)))
++ goto cleanup;
++
++ }
++
++ /* create intermediate indexes */
++ k = depth - at - 1;
++ EXT_ASSERT(k >= 0);
++ if (k)
++ ext_debug(tree, "create %d intermediate indices\n", k);
++ /* insert new index into current index block */
++ /* current depth stored in i var */
++ i = depth - 1;
++ while (k--) {
++ oldblock = newblock;
++ newblock = ablocks[--a];
++ bh = sb_getblk(tree->inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh)))
++ goto cleanup;
++
++ neh = EXT_BLOCK_HDR(bh);
++ neh->eh_entries = 1;
++ neh->eh_magic = EXT3_EXT_MAGIC;
++ neh->eh_max = ext3_ext_space_block_idx(tree);
++ neh->eh_depth = depth - i;
++ fidx = EXT_FIRST_INDEX(neh);
++ fidx->ei_block = border;
++ fidx->ei_leaf = oldblock;
++
++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n",
++ i, newblock, border, oldblock);
++ /* copy indexes */
++ m = 0;
++ path[i].p_idx++;
++
++ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx,
++ EXT_MAX_INDEX(path[i].p_hdr));
++ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
++ EXT_LAST_INDEX(path[i].p_hdr));
++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
++ ext_debug(tree, "%d: move %d:%d in new index %lu\n",
++ i, path[i].p_idx->ei_block,
++ path[i].p_idx->ei_leaf, newblock);
++ memmove(++fidx, path[i].p_idx++,
++ sizeof(struct ext3_extent_idx));
++ neh->eh_entries++;
++ EXT_ASSERT(neh->eh_entries <= neh->eh_max);
++ m++;
++ }
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto cleanup;
++ brelse(bh);
++ bh = NULL;
++
++ /* correct old index */
++ if (m) {
++ err = ext3_ext_get_access(handle, tree, path + i);
++ if (err)
++ goto cleanup;
++ path[i].p_hdr->eh_entries -= m;
++ err = ext3_ext_dirty(handle, tree, path + i);
++ if (err)
++ goto cleanup;
++ }
++
++ i--;
++ }
++
++ /* insert new index */
++ if (!err)
++ err = ext3_ext_insert_index(handle, tree, path + at,
++ border, newblock);
++
++cleanup:
++ if (bh) {
++ if (buffer_locked(bh))
++ unlock_buffer(bh);
++ brelse(bh);
++ }
++
++ if (err) {
++ /* free all allocated blocks in error case */
++ for (i = 0; i < depth; i++) {
++ if (!ablocks[i])
++ continue;
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ }
++ }
++ kfree(ablocks);
++
++ return err;
++}
++
++/*
++ * routine implements tree growing procedure:
++ * - allocates new block
++ * - moves top-level data (index block or leaf) into the new block
++ * - initialize new top-level, creating index that points to the
++ * just created block
++ */
++static int ext3_ext_grow_indepth(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct ext3_ext_path *curp = path;
++ struct ext3_extent_header *neh;
++ struct ext3_extent_idx *fidx;
++ struct buffer_head *bh;
++ unsigned long newblock;
++ int err = 0;
++
++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err);
++ if (newblock == 0)
++ return err;
++
++ bh = sb_getblk(tree->inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ ext3_std_error(tree->inode->i_sb, err);
++ return err;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh))) {
++ unlock_buffer(bh);
++ goto out;
++ }
++
++ /* move top-level index/leaf into new block */
++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len);
++
++ /* set size of new block */
++ neh = EXT_BLOCK_HDR(bh);
++ /* old root could have indexes or leaves
++ * so calculate e_max right way */
++ if (EXT_DEPTH(tree))
++ neh->eh_max = ext3_ext_space_block_idx(tree);
++ else
++ neh->eh_max = ext3_ext_space_block(tree);
++ neh->eh_magic = EXT3_EXT_MAGIC;
++ set_buffer_uptodate(bh);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto out;
++
++ /* create index in new top-level index: num,max,pointer */
++ if ((err = ext3_ext_get_access(handle, tree, curp)))
++ goto out;
++
++ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC;
++ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree);
++ curp->p_hdr->eh_entries = 1;
++ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++ /* FIXME: it works, but actually path[0] can be index */
++ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
++ curp->p_idx->ei_leaf = newblock;
++
++ neh = EXT_ROOT_HDR(tree);
++ fidx = EXT_FIRST_INDEX(neh);
++ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n",
++ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf);
++
++ neh->eh_depth = path->p_depth + 1;
++ err = ext3_ext_dirty(handle, tree, curp);
++out:
++ brelse(bh);
++
++ return err;
++}
++
++/*
++ * routine finds empty index and adds new leaf. if no free index found
++ * then it requests in-depth growing
++ */
++static int ext3_ext_create_new_leaf(handle_t *handle,
++ struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct ext3_ext_path *curp;
++ int depth, i, err = 0;
++
++repeat:
++ i = depth = EXT_DEPTH(tree);
++
++ /* walk up to the tree and look for free index entry */
++ curp = path + depth;
++ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
++ i--;
++ curp--;
++ }
++
++ /* we use already allocated block for index block
++ * so, subsequent data blocks should be contigoues */
++ if (EXT_HAS_FREE_INDEX(curp)) {
++ /* if we found index with free entry, then use that
++ * entry: create all needed subtree and add new leaf */
++ err = ext3_ext_split(handle, tree, path, newext, i);
++
++ /* refill path */
++ ext3_ext_drop_refs(path);
++ path = ext3_ext_find_extent(tree, newext->ee_block, path);
++ if (IS_ERR(path))
++ err = PTR_ERR(path);
++ } else {
++ /* tree is full, time to grow in depth */
++ err = ext3_ext_grow_indepth(handle, tree, path, newext);
++
++ /* refill path */
++ ext3_ext_drop_refs(path);
++ path = ext3_ext_find_extent(tree, newext->ee_block, path);
++ if (IS_ERR(path))
++ err = PTR_ERR(path);
++
++ /*
++ * only first (depth 0 -> 1) produces free space
++ * in all other cases we have to split growed tree
++ */
++ depth = EXT_DEPTH(tree);
++ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
++ /* now we need split */
++ goto repeat;
++ }
++ }
++
++ if (err)
++ return err;
++
++ return 0;
++}
++
++/*
++ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
++ * NOTE: it consider block number from index entry as
++ * allocated block. thus, index entries have to be consistent
++ * with leafs
++ */
++static unsigned long
++ext3_ext_next_allocated_block(struct ext3_ext_path *path)
++{
++ int depth;
++
++ EXT_ASSERT(path != NULL);
++ depth = path->p_depth;
++
++ if (depth == 0 && path->p_ext == NULL)
++ return EXT_MAX_BLOCK;
++
++ /* FIXME: what if index isn't full ?! */
++ while (depth >= 0) {
++ if (depth == path->p_depth) {
++ /* leaf */
++ if (path[depth].p_ext !=
++ EXT_LAST_EXTENT(path[depth].p_hdr))
++ return path[depth].p_ext[1].ee_block;
++ } else {
++ /* index */
++ if (path[depth].p_idx !=
++ EXT_LAST_INDEX(path[depth].p_hdr))
++ return path[depth].p_idx[1].ei_block;
++ }
++ depth--;
++ }
++
++ return EXT_MAX_BLOCK;
++}
++
++/*
++ * returns first allocated block from next leaf or EXT_MAX_BLOCK
++ */
++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int depth;
++
++ EXT_ASSERT(path != NULL);
++ depth = path->p_depth;
++
++ /* zero-tree has no leaf blocks at all */
++ if (depth == 0)
++ return EXT_MAX_BLOCK;
++
++ /* go to index block */
++ depth--;
++
++ while (depth >= 0) {
++ if (path[depth].p_idx !=
++ EXT_LAST_INDEX(path[depth].p_hdr))
++ return path[depth].p_idx[1].ei_block;
++ depth--;
++ }
++
++ return EXT_MAX_BLOCK;
++}
++
++/*
++ * if leaf gets modified and modified extent is first in the leaf
++ * then we have to correct all indexes above
++ * TODO: do we need to correct tree in all cases?
++ */
++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ struct ext3_extent_header *eh;
++ int depth = EXT_DEPTH(tree);
++ struct ext3_extent *ex;
++ unsigned long border;
++ int k, err = 0;
++
++ eh = path[depth].p_hdr;
++ ex = path[depth].p_ext;
++ EXT_ASSERT(ex);
++ EXT_ASSERT(eh);
++
++ if (depth == 0) {
++ /* there is no tree at all */
++ return 0;
++ }
++
++ if (ex != EXT_FIRST_EXTENT(eh)) {
++ /* we correct tree if first leaf got modified only */
++ return 0;
++ }
++
++ /*
++ * TODO: we need correction if border is smaller then current one
++ */
++ k = depth - 1;
++ border = path[depth].p_ext->ee_block;
++ if ((err = ext3_ext_get_access(handle, tree, path + k)))
++ return err;
++ path[k].p_idx->ei_block = border;
++ if ((err = ext3_ext_dirty(handle, tree, path + k)))
++ return err;
++
++ while (k--) {
++ /* change all left-side indexes */
++ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
++ break;
++ if ((err = ext3_ext_get_access(handle, tree, path + k)))
++ break;
++ path[k].p_idx->ei_block = border;
++ if ((err = ext3_ext_dirty(handle, tree, path + k)))
++ break;
++ }
++
++ return err;
++}
++
++static int inline
++ext3_can_extents_be_merged(struct ext3_extents_tree *tree,
++ struct ext3_extent *ex1,
++ struct ext3_extent *ex2)
++{
++ if (ex1->ee_block + ex1->ee_len != ex2->ee_block)
++ return 0;
++
++#ifdef AGRESSIVE_TEST
++ if (ex1->ee_len >= 4)
++ return 0;
++#endif
++
++ if (!tree->ops->mergable)
++ return 1;
++
++ return tree->ops->mergable(ex1, ex2);
++}
++
++/*
++ * this routine tries to merge requsted extent into the existing
++ * extent or inserts requested extent as new one into the tree,
++ * creating new leaf in no-space case
++ */
++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct ext3_extent_header * eh;
++ struct ext3_extent *ex, *fex;
++ struct ext3_extent *nearex; /* nearest extent */
++ struct ext3_ext_path *npath = NULL;
++ int depth, len, err, next;
++
++ EXT_ASSERT(newext->ee_len > 0);
++ EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK);
++ depth = EXT_DEPTH(tree);
++ ex = path[depth].p_ext;
++ EXT_ASSERT(path[depth].p_hdr);
++
++ /* try to insert block into found extent and return */
++ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) {
++ ext_debug(tree, "append %d block to %d:%d (from %d)\n",
++ newext->ee_len, ex->ee_block, ex->ee_len,
++ ex->ee_start);
++ if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++ return err;
++ ex->ee_len += newext->ee_len;
++ eh = path[depth].p_hdr;
++ nearex = ex;
++ goto merge;
++ }
++
++repeat:
++ depth = EXT_DEPTH(tree);
++ eh = path[depth].p_hdr;
++ if (eh->eh_entries < eh->eh_max)
++ goto has_space;
++
++ /* probably next leaf has space for us? */
++ fex = EXT_LAST_EXTENT(eh);
++ next = ext3_ext_next_leaf_block(tree, path);
++ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) {
++ ext_debug(tree, "next leaf block - %d\n", next);
++ EXT_ASSERT(!npath);
++ npath = ext3_ext_find_extent(tree, next, NULL);
++ if (IS_ERR(npath))
++ return PTR_ERR(npath);
++ EXT_ASSERT(npath->p_depth == path->p_depth);
++ eh = npath[depth].p_hdr;
++ if (eh->eh_entries < eh->eh_max) {
++ ext_debug(tree, "next leaf isnt full(%d)\n",
++ eh->eh_entries);
++ path = npath;
++ goto repeat;
++ }
++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n",
++ eh->eh_entries, eh->eh_max);
++ }
++
++ /*
++ * there is no free space in found leaf
++ * we're gonna add new leaf in the tree
++ */
++ err = ext3_ext_create_new_leaf(handle, tree, path, newext);
++ if (err)
++ goto cleanup;
++ depth = EXT_DEPTH(tree);
++ eh = path[depth].p_hdr;
++
++has_space:
++ nearex = path[depth].p_ext;
++
++ if ((err = ext3_ext_get_access(handle, tree, path + depth)))
++ goto cleanup;
++
++ if (!nearex) {
++ /* there is no extent in this leaf, create first one */
++ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n",
++ newext->ee_block, newext->ee_start,
++ newext->ee_len);
++ path[depth].p_ext = EXT_FIRST_EXTENT(eh);
++ } else if (newext->ee_block > nearex->ee_block) {
++ EXT_ASSERT(newext->ee_block != nearex->ee_block);
++ if (nearex != EXT_LAST_EXTENT(eh)) {
++ len = EXT_MAX_EXTENT(eh) - nearex;
++ len = (len - 1) * sizeof(struct ext3_extent);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, "
++ "move %d from 0x%p to 0x%p\n",
++ newext->ee_block, newext->ee_start,
++ newext->ee_len,
++ nearex, len, nearex + 1, nearex + 2);
++ memmove(nearex + 2, nearex + 1, len);
++ }
++ path[depth].p_ext = nearex + 1;
++ } else {
++ EXT_ASSERT(newext->ee_block != nearex->ee_block);
++ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
++ len = len < 0 ? 0 : len;
++ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, "
++ "move %d from 0x%p to 0x%p\n",
++ newext->ee_block, newext->ee_start, newext->ee_len,
++ nearex, len, nearex + 1, nearex + 2);
++ memmove(nearex + 1, nearex, len);
++ path[depth].p_ext = nearex;
++ }
++
++ eh->eh_entries++;
++ nearex = path[depth].p_ext;
++ nearex->ee_block = newext->ee_block;
++ nearex->ee_start = newext->ee_start;
++ nearex->ee_len = newext->ee_len;
++ /* FIXME: support for large fs */
++ nearex->ee_start_hi = 0;
++
++merge:
++ /* try to merge extents to the right */
++ while (nearex < EXT_LAST_EXTENT(eh)) {
++ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1))
++ break;
++ /* merge with next extent! */
++ nearex->ee_len += nearex[1].ee_len;
++ if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
++ len = (EXT_LAST_EXTENT(eh) - nearex - 1)
++ * sizeof(struct ext3_extent);
++ memmove(nearex + 1, nearex + 2, len);
++ }
++ eh->eh_entries--;
++ EXT_ASSERT(eh->eh_entries > 0);
++ }
++
++ /* try to merge extents to the left */
++
++ /* time to correct all indexes above */
++ err = ext3_ext_correct_indexes(handle, tree, path);
++ if (err)
++ goto cleanup;
++
++ err = ext3_ext_dirty(handle, tree, path + depth);
++
++cleanup:
++ if (npath) {
++ ext3_ext_drop_refs(npath);
++ kfree(npath);
++ }
++ ext3_ext_tree_changed(tree);
++ ext3_ext_invalidate_cache(tree);
++ return err;
++}
++
++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block,
++ unsigned long num, ext_prepare_callback func)
++{
++ struct ext3_ext_path *path = NULL;
++ struct ext3_extent *ex, cbex;
++ unsigned long next, start = 0, end = 0;
++ unsigned long last = block + num;
++ int depth, exists, err = 0;
++
++ EXT_ASSERT(tree);
++ EXT_ASSERT(func);
++ EXT_ASSERT(tree->inode);
++ EXT_ASSERT(tree->root);
++
++ while (block < last && block != EXT_MAX_BLOCK) {
++ num = last - block;
++ /* find extent for this block */
++ path = ext3_ext_find_extent(tree, block, path);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ path = NULL;
++ break;
++ }
++
++ depth = EXT_DEPTH(tree);
++ EXT_ASSERT(path[depth].p_hdr);
++ ex = path[depth].p_ext;
++ next = ext3_ext_next_allocated_block(path);
++
++ exists = 0;
++ if (!ex) {
++ /* there is no extent yet, so try to allocate
++ * all requested space */
++ start = block;
++ end = block + num;
++ } else if (ex->ee_block > block) {
++ /* need to allocate space before found extent */
++ start = block;
++ end = ex->ee_block;
++ if (block + num < end)
++ end = block + num;
++ } else if (block >= ex->ee_block + ex->ee_len) {
++ /* need to allocate space after found extent */
++ start = block;
++ end = block + num;
++ if (end >= next)
++ end = next;
++ } else if (block >= ex->ee_block) {
++ /*
++ * some part of requested space is covered
++ * by found extent
++ */
++ start = block;
++ end = ex->ee_block + ex->ee_len;
++ if (block + num < end)
++ end = block + num;
++ exists = 1;
++ } else {
++ BUG();
++ }
++ EXT_ASSERT(end > start);
++
++ if (!exists) {
++ cbex.ee_block = start;
++ cbex.ee_len = end - start;
++ cbex.ee_start = 0;
++ } else
++ cbex = *ex;
++
++ EXT_ASSERT(path[depth].p_hdr);
++ err = func(tree, path, &cbex, exists);
++ ext3_ext_drop_refs(path);
++
++ if (err < 0)
++ break;
++ if (err == EXT_REPEAT)
++ continue;
++ else if (err == EXT_BREAK) {
++ err = 0;
++ break;
++ }
++
++ if (EXT_DEPTH(tree) != depth) {
++ /* depth was changed. we have to realloc path */
++ kfree(path);
++ path = NULL;
++ }
++
++ block = cbex.ee_block + cbex.ee_len;
++ }
++
++ if (path) {
++ ext3_ext_drop_refs(path);
++ kfree(path);
++ }
++
++ return err;
++}
++
++static inline void
++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block,
++ __u32 len, __u32 start, int type)
++{
++ EXT_ASSERT(len > 0);
++ if (tree->cex) {
++ tree->cex->ec_type = type;
++ tree->cex->ec_block = block;
++ tree->cex->ec_len = len;
++ tree->cex->ec_start = start;
++ }
++}
++
++/*
++ * this routine calculate boundaries of the gap requested block fits into
++ * and cache this gap
++ */
++static inline void
++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ unsigned long block)
++{
++ int depth = EXT_DEPTH(tree);
++ unsigned long lblock, len;
++ struct ext3_extent *ex;
++
++ if (!tree->cex)
++ return;
++
++ ex = path[depth].p_ext;
++ if (ex == NULL) {
++ /* there is no extent yet, so gap is [0;-] */
++ lblock = 0;
++ len = EXT_MAX_BLOCK;
++ ext_debug(tree, "cache gap(whole file):");
++ } else if (block < ex->ee_block) {
++ lblock = block;
++ len = ex->ee_block - block;
++ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]",
++ (unsigned long) block,
++ (unsigned long) ex->ee_block,
++ (unsigned long) ex->ee_len);
++ } else if (block >= ex->ee_block + ex->ee_len) {
++ lblock = ex->ee_block + ex->ee_len;
++ len = ext3_ext_next_allocated_block(path);
++ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu",
++ (unsigned long) ex->ee_block,
++ (unsigned long) ex->ee_len,
++ (unsigned long) block);
++ EXT_ASSERT(len > lblock);
++ len = len - lblock;
++ } else {
++ lblock = len = 0;
++ BUG();
++ }
++
++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len);
++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP);
++}
++
++static inline int
++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block,
++ struct ext3_extent *ex)
++{
++ struct ext3_ext_cache *cex = tree->cex;
++
++ /* is there cache storage at all? */
++ if (!cex)
++ return EXT3_EXT_CACHE_NO;
++
++ /* has cache valid data? */
++ if (cex->ec_type == EXT3_EXT_CACHE_NO)
++ return EXT3_EXT_CACHE_NO;
++
++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP ||
++ cex->ec_type == EXT3_EXT_CACHE_EXTENT);
++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
++ ex->ee_block = cex->ec_block;
++ ex->ee_start = cex->ec_start;
++ ex->ee_len = cex->ec_len;
++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n",
++ (unsigned long) block,
++ (unsigned long) ex->ee_block,
++ (unsigned long) ex->ee_len,
++ (unsigned long) ex->ee_start);
++ return cex->ec_type;
++ }
++
++ /* not in cache */
++ return EXT3_EXT_CACHE_NO;
++}
++
++/*
++ * routine removes index from the index block
++ * it's used in truncate case only. thus all requests are for
++ * last index in the block only
++ */
++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ struct buffer_head *bh;
++ int err;
++
++ /* free index block */
++ path--;
++ EXT_ASSERT(path->p_hdr->eh_entries);
++ if ((err = ext3_ext_get_access(handle, tree, path)))
++ return err;
++ path->p_hdr->eh_entries--;
++ if ((err = ext3_ext_dirty(handle, tree, path)))
++ return err;
++ ext_debug(tree, "index is empty, remove it, free block %d\n",
++ path->p_idx->ei_leaf);
++ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
++ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ return err;
++}
++
++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path)
++{
++ int depth = EXT_DEPTH(tree);
++ int needed;
++
++ if (path) {
++ /* probably there is space in leaf? */
++ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max)
++ return 1;
++ }
++
++ /*
++ * the worste case we're expecting is creation of the
++ * new root (growing in depth) with index splitting
++ * for splitting we have to consider depth + 1 because
++ * previous growing could increase it
++ */
++ depth = depth + 1;
++
++ /*
++ * growing in depth:
++ * block allocation + new root + old root
++ */
++ needed = EXT3_ALLOC_NEEDED + 2;
++
++ /* index split. we may need:
++ * allocate intermediate indexes and new leaf
++ * change two blocks at each level, but root
++ * modify root block (inode)
++ */
++ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
++
++ return needed;
++}
++
++static int
++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, unsigned long start,
++ unsigned long end)
++{
++ struct ext3_extent *ex, tex;
++ struct ext3_ext_path *npath;
++ int depth, creds, err;
++
++ depth = EXT_DEPTH(tree);
++ ex = path[depth].p_ext;
++ EXT_ASSERT(ex);
++ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1);
++ EXT_ASSERT(ex->ee_block < start);
++
++ /* calculate tail extent */
++ tex.ee_block = end + 1;
++ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len);
++ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block;
++
++ creds = ext3_ext_calc_credits_for_insert(tree, path);
++ handle = ext3_ext_journal_restart(handle, creds);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ /* calculate head extent. use primary extent */
++ err = ext3_ext_get_access(handle, tree, path + depth);
++ if (err)
++ return err;
++ ex->ee_len = start - ex->ee_block;
++ err = ext3_ext_dirty(handle, tree, path + depth);
++ if (err)
++ return err;
++
++ /* FIXME: some callback to free underlying resource
++ * and correct ee_start? */
++ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n",
++ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len);
++
++ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL);
++ if (IS_ERR(npath))
++ return PTR_ERR(npath);
++ depth = EXT_DEPTH(tree);
++ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block);
++ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len);
++
++ err = ext3_ext_insert_extent(handle, tree, npath, &tex);
++ ext3_ext_drop_refs(npath);
++ kfree(npath);
++
++ return err;
++
++}
++
++static int
++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path, unsigned long start,
++ unsigned long end)
++{
++ struct ext3_extent *ex, *fu = NULL, *lu, *le;
++ int err = 0, correct_index = 0;
++ int depth = EXT_DEPTH(tree), credits;
++ struct ext3_extent_header *eh;
++ unsigned a, b, block, num;
++
++ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end);
++ if (!path[depth].p_hdr)
++ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh);
++ eh = path[depth].p_hdr;
++ EXT_ASSERT(eh);
++ EXT_ASSERT(eh->eh_entries <= eh->eh_max);
++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC);
++
++ /* find where to start removing */
++ le = ex = EXT_LAST_EXTENT(eh);
++ while (ex != EXT_FIRST_EXTENT(eh)) {
++ if (ex->ee_block <= end)
++ break;
++ ex--;
++ }
++
++ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) {
++ /* removal of internal part of the extent requested
++ * tail and head must be placed in different extent
++ * so, we have to insert one more extent */
++ path[depth].p_ext = ex;
++ return ext3_ext_split_for_rm(handle, tree, path, start, end);
++ }
++
++ lu = ex;
++ while (ex >= EXT_FIRST_EXTENT(eh) &&
++ ex->ee_block + ex->ee_len > start) {
++ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len);
++ path[depth].p_ext = ex;
++
++ a = ex->ee_block > start ? ex->ee_block : start;
++ b = ex->ee_block + ex->ee_len - 1 < end ?
++ ex->ee_block + ex->ee_len - 1 : end;
++
++ ext_debug(tree, " border %u:%u\n", a, b);
++
++ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) {
++ block = 0;
++ num = 0;
++ BUG();
++ } else if (a != ex->ee_block) {
++ /* remove tail of the extent */
++ block = ex->ee_block;
++ num = a - block;
++ } else if (b != ex->ee_block + ex->ee_len - 1) {
++ /* remove head of the extent */
++ block = a;
++ num = b - a;
++ } else {
++ /* remove whole extent: excelent! */
++ block = ex->ee_block;
++ num = 0;
++ EXT_ASSERT(a == ex->ee_block &&
++ b == ex->ee_block + ex->ee_len - 1);
++ }
++
++ if (ex == EXT_FIRST_EXTENT(eh))
++ correct_index = 1;
++
++ credits = 1;
++ if (correct_index)
++ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1;
++ if (tree->ops->remove_extent_credits)
++ credits+=tree->ops->remove_extent_credits(tree,ex,a,b);
++
++ handle = ext3_ext_journal_restart(handle, credits);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out;
++ }
++
++ err = ext3_ext_get_access(handle, tree, path + depth);
++ if (err)
++ goto out;
++
++ if (tree->ops->remove_extent)
++ err = tree->ops->remove_extent(tree, ex, a, b);
++ if (err)
++ goto out;
++
++ if (num == 0) {
++ /* this extent is removed entirely mark slot unused */
++ ex->ee_start = 0;
++ eh->eh_entries--;
++ fu = ex;
++ }
++
++ ex->ee_block = block;
++ ex->ee_len = num;
++
++ err = ext3_ext_dirty(handle, tree, path + depth);
++ if (err)
++ goto out;
++
++ ext_debug(tree, "new extent: %u:%u:%u\n",
++ ex->ee_block, ex->ee_len, ex->ee_start);
++ ex--;
++ }
++
++ if (fu) {
++ /* reuse unused slots */
++ while (lu < le) {
++ if (lu->ee_start) {
++ *fu = *lu;
++ lu->ee_start = 0;
++ fu++;
++ }
++ lu++;
++ }
++ }
++
++ if (correct_index && eh->eh_entries)
++ err = ext3_ext_correct_indexes(handle, tree, path);
++
++ /* if this leaf is free, then we should
++ * remove it from index block above */
++ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
++ err = ext3_ext_rm_idx(handle, tree, path + depth);
++
++out:
++ return err;
++}
++
++
++static struct ext3_extent_idx *
++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block)
++{
++ struct ext3_extent_idx *ix;
++
++ ix = EXT_LAST_INDEX(hdr);
++ while (ix != EXT_FIRST_INDEX(hdr)) {
++ if (ix->ei_block <= block)
++ break;
++ ix--;
++ }
++ return ix;
++}
++
++/*
++ * returns 1 if current index have to be freed (even partial)
++ */
++static int inline
++ext3_ext_more_to_rm(struct ext3_ext_path *path)
++{
++ EXT_ASSERT(path->p_idx);
++
++ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
++ return 0;
++
++ /*
++ * if truncate on deeper level happened it it wasn't partial
++ * so we have to consider current index for truncation
++ */
++ if (path->p_hdr->eh_entries == path->p_block)
++ return 0;
++ return 1;
++}
++
++int ext3_ext_remove_space(struct ext3_extents_tree *tree,
++ unsigned long start, unsigned long end)
++{
++ struct inode *inode = tree->inode;
++ struct super_block *sb = inode->i_sb;
++ int depth = EXT_DEPTH(tree);
++ struct ext3_ext_path *path;
++ handle_t *handle;
++ int i = 0, err = 0;
++
++ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end);
++
++ /* probably first extent we're gonna free will be last in block */
++ handle = ext3_journal_start(inode, depth + 1);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ ext3_ext_invalidate_cache(tree);
++
++ /*
++ * we start scanning from right side freeing all the blocks
++ * after i_size and walking into the deep
++ */
++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
++ if (IS_ERR(path)) {
++ ext3_error(sb, "ext3_ext_remove_space",
++ "Can't allocate path array");
++ ext3_journal_stop(handle);
++ return -ENOMEM;
++ }
++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++ path[i].p_hdr = EXT_ROOT_HDR(tree);
++
++ while (i >= 0 && err == 0) {
++ if (i == depth) {
++ /* this is leaf block */
++ err = ext3_ext_rm_leaf(handle, tree, path, start, end);
++ /* root level have p_bh == NULL, brelse() eats this */
++ brelse(path[i].p_bh);
++ i--;
++ continue;
++ }
++
++ /* this is index block */
++ if (!path[i].p_hdr) {
++ ext_debug(tree, "initialize header\n");
++ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh);
++ }
++
++ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max);
++ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC);
++
++ if (!path[i].p_idx) {
++ /* this level hasn't touched yet */
++ path[i].p_idx =
++ ext3_ext_last_covered(path[i].p_hdr, end);
++ path[i].p_block = path[i].p_hdr->eh_entries + 1;
++ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n",
++ path[i].p_hdr, path[i].p_hdr->eh_entries);
++ } else {
++ /* we've already was here, see at next index */
++ path[i].p_idx--;
++ }
++
++ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n",
++ i, EXT_FIRST_INDEX(path[i].p_hdr),
++ path[i].p_idx);
++ if (ext3_ext_more_to_rm(path + i)) {
++ /* go to the next level */
++ ext_debug(tree, "move to level %d (block %d)\n",
++ i + 1, path[i].p_idx->ei_leaf);
++ memset(path + i + 1, 0, sizeof(*path));
++ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf);
++ if (!path[i+1].p_bh) {
++ /* should we reset i_size? */
++ err = -EIO;
++ break;
++ }
++ /* put actual number of indexes to know is this
++ * number got changed at the next iteration */
++ path[i].p_block = path[i].p_hdr->eh_entries;
++ i++;
++ } else {
++ /* we finish processing this index, go up */
++ if (path[i].p_hdr->eh_entries == 0 && i > 0) {
++ /* index is empty, remove it
++ * handle must be already prepared by the
++ * truncatei_leaf() */
++ err = ext3_ext_rm_idx(handle, tree, path + i);
++ }
++ /* root level have p_bh == NULL, brelse() eats this */
++ brelse(path[i].p_bh);
++ i--;
++ ext_debug(tree, "return to level %d\n", i);
++ }
++ }
++
++ /* TODO: flexible tree reduction should be here */
++ if (path->p_hdr->eh_entries == 0) {
++ /*
++ * truncate to zero freed all the tree
++ * so, we need to correct eh_depth
++ */
++ err = ext3_ext_get_access(handle, tree, path);
++ if (err == 0) {
++ EXT_ROOT_HDR(tree)->eh_depth = 0;
++ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree);
++ err = ext3_ext_dirty(handle, tree, path);
++ }
++ }
++ ext3_ext_tree_changed(tree);
++
++ kfree(path);
++ ext3_journal_stop(handle);
++
++ return err;
++}
++
++int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks)
++{
++ int lcap, icap, rcap, leafs, idxs, num;
++
++ rcap = ext3_ext_space_root(tree);
++ if (blocks <= rcap) {
++ /* all extents fit to the root */
++ return 0;
++ }
++
++ rcap = ext3_ext_space_root_idx(tree);
++ lcap = ext3_ext_space_block(tree);
++ icap = ext3_ext_space_block_idx(tree);
++
++ num = leafs = (blocks + lcap - 1) / lcap;
++ if (leafs <= rcap) {
++ /* all pointers to leafs fit to the root */
++ return leafs;
++ }
++
++ /* ok. we need separate index block(s) to link all leaf blocks */
++ idxs = (leafs + icap - 1) / icap;
++ do {
++ num += idxs;
++ idxs = (idxs + icap - 1) / icap;
++ } while (idxs > rcap);
++
++ return num;
++}
++
++/*
++ * called at mount time
++ */
++void ext3_ext_init(struct super_block *sb)
++{
++ /*
++ * possible initialization would be here
++ */
++
++ if (test_opt(sb, EXTENTS)) {
++ printk("EXT3-fs: file extents enabled");
++#ifdef AGRESSIVE_TEST
++ printk(", agressive tests");
++#endif
++#ifdef CHECK_BINSEARCH
++ printk(", check binsearch");
++#endif
++ printk("\n");
++ }
++}
++
++/*
++ * called at umount time
++ */
++void ext3_ext_release(struct super_block *sb)
++{
++}
++
++/************************************************************************
++ * VFS related routines
++ ************************************************************************/
++
++static int ext3_get_inode_write_access(handle_t *handle, void *buffer)
++{
++ /* we use in-core data, not bh */
++ return 0;
++}
++
++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer)
++{
++ struct inode *inode = buffer;
++ return ext3_mark_inode_dirty(handle, inode);
++}
++
++static int ext3_ext_mergable(struct ext3_extent *ex1,
++ struct ext3_extent *ex2)
++{
++ /* FIXME: support for large fs */
++ if (ex1->ee_start + ex1->ee_len == ex2->ee_start)
++ return 1;
++ return 0;
++}
++
++static int
++ext3_remove_blocks_credits(struct ext3_extents_tree *tree,
++ struct ext3_extent *ex,
++ unsigned long from, unsigned long to)
++{
++ int needed;
++
++ /* at present, extent can't cross block group */;
++ needed = 4; /* bitmap + group desc + sb + inode */
++
++#ifdef CONFIG_QUOTA
++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++ return needed;
++}
++
++static int
++ext3_remove_blocks(struct ext3_extents_tree *tree,
++ struct ext3_extent *ex,
++ unsigned long from, unsigned long to)
++{
++ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
++ handle_t *handle = ext3_journal_start(tree->inode, needed);
++ struct buffer_head *bh;
++ int i;
++
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
++ /* tail removal */
++ unsigned long num, start;
++ num = ex->ee_block + ex->ee_len - from;
++ start = ex->ee_start + ex->ee_len - num;
++ ext_debug(tree, "free last %lu blocks starting %lu\n",
++ num, start);
++ for (i = 0; i < num; i++) {
++ bh = sb_find_get_block(tree->inode->i_sb, start + i);
++ ext3_forget(handle, 0, tree->inode, bh, start + i);
++ }
++ ext3_free_blocks(handle, tree->inode, start, num);
++ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
++ printk("strange request: removal %lu-%lu from %u:%u\n",
++ from, to, ex->ee_block, ex->ee_len);
++ } else {
++ printk("strange request: removal(2) %lu-%lu from %u:%u\n",
++ from, to, ex->ee_block, ex->ee_len);
++ }
++ ext3_journal_stop(handle);
++ return 0;
++}
++
++static int ext3_ext_find_goal(struct inode *inode,
++ struct ext3_ext_path *path, unsigned long block)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ unsigned long bg_start;
++ unsigned long colour;
++ int depth;
++
++ if (path) {
++ struct ext3_extent *ex;
++ depth = path->p_depth;
++
++ /* try to predict block placement */
++ if ((ex = path[depth].p_ext))
++ return ex->ee_start + (block - ex->ee_block);
++
++ /* it looks index is empty
++ * try to find starting from index itself */
++ if (path[depth].p_bh)
++ return path[depth].p_bh->b_blocknr;
++ }
++
++ /* OK. use inode's group */
++ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++ colour = (current->pid % 16) *
++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++ return bg_start + colour + block;
++}
++
++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *ex, int *err)
++{
++ struct inode *inode = tree->inode;
++ int newblock, goal;
++
++ EXT_ASSERT(path);
++ EXT_ASSERT(ex);
++ EXT_ASSERT(ex->ee_start);
++ EXT_ASSERT(ex->ee_len);
++
++ /* reuse block from the extent to order data/metadata */
++ newblock = ex->ee_start++;
++ ex->ee_len--;
++ if (ex->ee_len == 0) {
++ ex->ee_len = 1;
++ /* allocate new block for the extent */
++ goal = ext3_ext_find_goal(inode, path, ex->ee_block);
++ ex->ee_start = ext3_new_block(handle, inode, goal, err);
++ if (ex->ee_start == 0) {
++ /* error occured: restore old extent */
++ ex->ee_start = newblock;
++ return 0;
++ }
++ }
++ return newblock;
++}
++
++static struct ext3_extents_helpers ext3_blockmap_helpers = {
++ .get_write_access = ext3_get_inode_write_access,
++ .mark_buffer_dirty = ext3_mark_buffer_dirty,
++ .mergable = ext3_ext_mergable,
++ .new_block = ext3_new_block_cb,
++ .remove_extent = ext3_remove_blocks,
++ .remove_extent_credits = ext3_remove_blocks_credits,
++};
++
++void ext3_init_tree_desc(struct ext3_extents_tree *tree,
++ struct inode *inode)
++{
++ tree->inode = inode;
++ tree->root = (void *) EXT3_I(inode)->i_data;
++ tree->buffer = (void *) inode;
++ tree->buffer_len = sizeof(EXT3_I(inode)->i_data);
++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent;
++ tree->ops = &ext3_blockmap_helpers;
++}
++
++int ext3_ext_get_block(handle_t *handle, struct inode *inode,
++ long iblock, struct buffer_head *bh_result,
++ int create, int extend_disksize)
++{
++ struct ext3_ext_path *path = NULL;
++ struct ext3_extent newex;
++ struct ext3_extent *ex;
++ int goal, newblock, err = 0, depth;
++ struct ext3_extents_tree tree;
++
++ clear_buffer_new(bh_result);
++ ext3_init_tree_desc(&tree, inode);
++ ext_debug(&tree, "block %d requested for inode %u\n",
++ (int) iblock, (unsigned) inode->i_ino);
++ down(&EXT3_I(inode)->truncate_sem);
++
++ /* check in cache */
++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) {
++ if (goal == EXT3_EXT_CACHE_GAP) {
++ if (!create) {
++ /* block isn't allocated yet and
++ * user don't want to allocate it */
++ goto out2;
++ }
++ /* we should allocate requested block */
++ } else if (goal == EXT3_EXT_CACHE_EXTENT) {
++ /* block is already allocated */
++ newblock = iblock - newex.ee_block + newex.ee_start;
++ goto out;
++ } else {
++ EXT_ASSERT(0);
++ }
++ }
++
++ /* find extent for this block */
++ path = ext3_ext_find_extent(&tree, iblock, NULL);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ path = NULL;
++ goto out2;
++ }
++
++ depth = EXT_DEPTH(&tree);
++
++ /*
++ * consistent leaf must not be empty
++ * this situations is possible, though, _during_ tree modification
++ * this is why assert can't be put in ext3_ext_find_extent()
++ */
++ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0);
++
++ if ((ex = path[depth].p_ext)) {
++ /* if found exent covers block, simple return it */
++ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) {
++ newblock = iblock - ex->ee_block + ex->ee_start;
++ ext_debug(&tree, "%d fit into %d:%d -> %d\n",
++ (int) iblock, ex->ee_block, ex->ee_len,
++ newblock);
++ ext3_ext_put_in_cache(&tree, ex->ee_block,
++ ex->ee_len, ex->ee_start,
++ EXT3_EXT_CACHE_EXTENT);
++ goto out;
++ }
++ }
++
++ /*
++ * requested block isn't allocated yet
++ * we couldn't try to create block if create flag is zero
++ */
++ if (!create) {
++ /* put just found gap into cache to speedup subsequest reqs */
++ ext3_ext_put_gap_in_cache(&tree, path, iblock);
++ goto out2;
++ }
++
++ /* allocate new block */
++ goal = ext3_ext_find_goal(inode, path, iblock);
++ newblock = ext3_new_block(handle, inode, goal, &err);
++ if (!newblock)
++ goto out2;
++ ext_debug(&tree, "allocate new block: goal %d, found %d\n",
++ goal, newblock);
++
++ /* try to insert new extent into found leaf and return */
++ newex.ee_block = iblock;
++ newex.ee_start = newblock;
++ newex.ee_len = 1;
++ err = ext3_ext_insert_extent(handle, &tree, path, &newex);
++ if (err)
++ goto out2;
++
++ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize)
++ EXT3_I(inode)->i_disksize = inode->i_size;
++
++ /* previous routine could use block we allocated */
++ newblock = newex.ee_start;
++ set_buffer_new(bh_result);
++
++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
++ newex.ee_start, EXT3_EXT_CACHE_EXTENT);
++out:
++ ext3_ext_show_leaf(&tree, path);
++ map_bh(bh_result, inode->i_sb, newblock);
++out2:
++ if (path) {
++ ext3_ext_drop_refs(path);
++ kfree(path);
++ }
++ up(&EXT3_I(inode)->truncate_sem);
++
++ return err;
++}
++
++void ext3_ext_truncate(struct inode * inode, struct page *page)
++{
++ struct address_space *mapping = inode->i_mapping;
++ struct super_block *sb = inode->i_sb;
++ struct ext3_extents_tree tree;
++ unsigned long last_block;
++ handle_t *handle;
++ int err = 0;
++
++ ext3_init_tree_desc(&tree, inode);
++
++ /*
++ * probably first extent we're gonna free will be last in block
++ */
++ err = ext3_writepage_trans_blocks(inode) + 3;
++ handle = ext3_journal_start(inode, err);
++ if (IS_ERR(handle)) {
++ if (page) {
++ clear_highpage(page);
++ flush_dcache_page(page);
++ unlock_page(page);
++ page_cache_release(page);
++ }
++ return;
++ }
++
++ if (page)
++ ext3_block_truncate_page(handle, page, mapping, inode->i_size);
++
++ down(&EXT3_I(inode)->truncate_sem);
++ ext3_ext_invalidate_cache(&tree);
++
++ /*
++ * TODO: optimization is possible here
++ * probably we need not scaning at all,
++ * because page truncation is enough
++ */
++ if (ext3_orphan_add(handle, inode))
++ goto out_stop;
++
++ /* we have to know where to truncate from in crash case */
++ EXT3_I(inode)->i_disksize = inode->i_size;
++ ext3_mark_inode_dirty(handle, inode);
++
++ last_block = (inode->i_size + sb->s_blocksize - 1)
++ >> EXT3_BLOCK_SIZE_BITS(sb);
++ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK);
++
++ /* In a multi-transaction truncate, we only make the final
++ * transaction synchronous */
++ if (IS_SYNC(inode))
++ handle->h_sync = 1;
++
++out_stop:
++ /*
++ * If this was a simple ftruncate(), and the file will remain alive
++ * then we need to clear up the orphan record which we created above.
++ * However, if this was a real unlink then we were called by
++ * ext3_delete_inode(), and we allow that function to clean up the
++ * orphan info for us.
++ */
++ if (inode->i_nlink)
++ ext3_orphan_del(handle, inode);
++
++ up(&EXT3_I(inode)->truncate_sem);
++ ext3_journal_stop(handle);
++}
++
++/*
++ * this routine calculate max number of blocks we could modify
++ * in order to allocate new block for an inode
++ */
++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
++{
++ struct ext3_extents_tree tree;
++ int needed;
++
++ ext3_init_tree_desc(&tree, inode);
++
++ needed = ext3_ext_calc_credits_for_insert(&tree, NULL);
++
++ /* caller want to allocate num blocks */
++ needed *= num;
++
++#ifdef CONFIG_QUOTA
++ /*
++ * FIXME: real calculation should be here
++ * it depends on blockmap format of qouta file
++ */
++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++ return needed;
++}
++
++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode)
++{
++ struct ext3_extents_tree tree;
++
++ ext3_init_tree_desc(&tree, inode);
++ ext3_extent_tree_init(handle, &tree);
++}
++
++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks)
++{
++ struct ext3_extents_tree tree;
++
++ ext3_init_tree_desc(&tree, inode);
++ return ext3_ext_calc_metadata_amount(&tree, blocks);
++}
++
++static int
++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newex, int exist)
++{
++ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private;
++
++ if (!exist)
++ return EXT_CONTINUE;
++ if (buf->err < 0)
++ return EXT_BREAK;
++ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen)
++ return EXT_BREAK;
++
++ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) {
++ buf->err++;
++ buf->cur += sizeof(*newex);
++ } else {
++ buf->err = -EFAULT;
++ return EXT_BREAK;
++ }
++ return EXT_CONTINUE;
++}
++
++static int
++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree,
++ struct ext3_ext_path *path,
++ struct ext3_extent *ex, int exist)
++{
++ struct ext3_extent_tree_stats *buf =
++ (struct ext3_extent_tree_stats *) tree->private;
++ int depth;
++
++ if (!exist)
++ return EXT_CONTINUE;
++
++ depth = EXT_DEPTH(tree);
++ buf->extents_num++;
++ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr))
++ buf->leaf_num++;
++ return EXT_CONTINUE;
++}
++
++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
++ unsigned long arg)
++{
++ int err = 0;
++
++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++ return -EINVAL;
++
++ if (cmd == EXT3_IOC_GET_EXTENTS) {
++ struct ext3_extent_buf buf;
++ struct ext3_extents_tree tree;
++
++ if (copy_from_user(&buf, (void *) arg, sizeof(buf)))
++ return -EFAULT;
++
++ ext3_init_tree_desc(&tree, inode);
++ buf.cur = buf.buffer;
++ buf.err = 0;
++ tree.private = &buf;
++ down(&EXT3_I(inode)->truncate_sem);
++ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK,
++ ext3_ext_store_extent_cb);
++ up(&EXT3_I(inode)->truncate_sem);
++ if (err == 0)
++ err = buf.err;
++ } else if (cmd == EXT3_IOC_GET_TREE_STATS) {
++ struct ext3_extent_tree_stats buf;
++ struct ext3_extents_tree tree;
++
++ ext3_init_tree_desc(&tree, inode);
++ down(&EXT3_I(inode)->truncate_sem);
++ buf.depth = EXT_DEPTH(&tree);
++ buf.extents_num = 0;
++ buf.leaf_num = 0;
++ tree.private = &buf;
++ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK,
++ ext3_ext_collect_stats_cb);
++ up(&EXT3_I(inode)->truncate_sem);
++ if (!err)
++ err = copy_to_user((void *) arg, &buf, sizeof(buf));
++ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) {
++ struct ext3_extents_tree tree;
++ ext3_init_tree_desc(&tree, inode);
++ down(&EXT3_I(inode)->truncate_sem);
++ err = EXT_DEPTH(&tree);
++ up(&EXT3_I(inode)->truncate_sem);
++ }
++
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_init_tree_desc);
++EXPORT_SYMBOL(ext3_mark_inode_dirty);
++EXPORT_SYMBOL(ext3_ext_invalidate_cache);
++EXPORT_SYMBOL(ext3_ext_insert_extent);
++EXPORT_SYMBOL(ext3_ext_walk_space);
++EXPORT_SYMBOL(ext3_ext_find_goal);
++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert);
++
+Index: linux-2.6.5-sles9/fs/ext3/ialloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2004-11-09 02:22:55.763148128 +0300
++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300
+@@ -647,6 +647,10 @@
+ DQUOT_FREE_INODE(inode);
+ goto fail2;
+ }
++ if (test_opt(sb, EXTENTS)) {
++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
++ ext3_extents_initialize_blockmap(handle, inode);
++ }
+ err = ext3_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext3_std_error(sb, err);
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:22:55.767147520 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300
+@@ -796,6 +796,17 @@
+ goto reread;
+ }
+
++static inline int
++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
++ struct buffer_head *bh, int create, int extend_disksize)
++{
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_get_block(handle, inode, block, bh, create,
++ extend_disksize);
++ return ext3_get_block_handle(handle, inode, block, bh, create,
++ extend_disksize);
++}
++
+ static int ext3_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+ {
+@@ -806,8 +817,8 @@
+ handle = ext3_journal_current_handle();
+ J_ASSERT(handle != 0);
+ }
+- ret = ext3_get_block_handle(handle, inode, iblock,
+- bh_result, create, 1);
++ ret = ext3_get_block_wrap(handle, inode, iblock,
++ bh_result, create, 1);
+ return ret;
+ }
+
+@@ -833,8 +844,8 @@
+ }
+ }
+ if (ret == 0)
+- ret = ext3_get_block_handle(handle, inode, iblock,
+- bh_result, create, 0);
++ ret = ext3_get_block_wrap(handle, inode, iblock,
++ bh_result, create, 0);
+ if (ret == 0)
+ bh_result->b_size = (1 << inode->i_blkbits);
+ return ret;
+@@ -855,7 +866,7 @@
+ dummy.b_state = 0;
+ dummy.b_blocknr = -1000;
+ buffer_trace_init(&dummy.b_history);
+- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
+ if (!*errp && buffer_mapped(&dummy)) {
+ struct buffer_head *bh;
+ bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1587,7 +1598,7 @@
+ * This required during truncate. We need to physically zero the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ */
+-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
++int ext3_block_truncate_page(handle_t *handle, struct page *page,
+ struct address_space *mapping, loff_t from)
+ {
+ unsigned long index = from >> PAGE_CACHE_SHIFT;
+@@ -2083,6 +2094,9 @@
+ return;
+ }
+
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_truncate(inode, page);
++
+ handle = start_transaction(inode);
+ if (IS_ERR(handle)) {
+ if (page) {
+@@ -2789,6 +2803,9 @@
+ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+ int ret;
+
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_writepage_trans_blocks(inode, bpp);
++
+ if (ext3_should_journal_data(inode))
+ ret = 3 * (bpp + indirects) + 2;
+ else
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:22:56.450043704 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
+@@ -389,6 +389,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+ if (!(sb->s_flags & MS_RDONLY)) {
+@@ -447,6 +448,10 @@
+ #endif
+ ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+ ei->vfs_inode.i_version = 1;
++ ei->i_cached_extent[0] = 0;
++ ei->i_cached_extent[1] = 0;
++ ei->i_cached_extent[2] = 0;
++ ei->i_cached_extent[3] = 0;
+ return &ei->vfs_inode;
+ }
+
+@@ -537,7 +542,7 @@
+ Opt_commit, Opt_journal_update, Opt_journal_inum,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_err,
++ Opt_err, Opt_extents, Opt_extdebug
+ };
+
+ static match_table_t tokens = {
+@@ -582,6 +587,8 @@
+ {Opt_iopen, "iopen"},
+ {Opt_noiopen, "noiopen"},
+ {Opt_iopen_nopriv, "iopen_nopriv"},
++ {Opt_extents, "extents"},
++ {Opt_extdebug, "extdebug"},
+ {Opt_err, NULL}
+ };
+
+@@ -797,6 +804,12 @@
+ break;
+ case Opt_ignore:
+ break;
++ case Opt_extents:
++ set_opt (sbi->s_mount_opt, EXTENTS);
++ break;
++ case Opt_extdebug:
++ set_opt (sbi->s_mount_opt, EXTDEBUG);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1449,6 +1462,8 @@
+ percpu_counter_mod(&sbi->s_dirs_counter,
+ ext3_count_dirs(sb));
+
++ ext3_ext_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/ioctl.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2004-11-09 02:15:44.610693264 +0300
++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2004-11-09 02:23:52.991448104 +0300
+@@ -124,6 +124,10 @@
+ err = ext3_change_inode_journal_flag(inode, jflag);
+ return err;
+ }
++ case EXT3_IOC_GET_EXTENTS:
++ case EXT3_IOC_GET_TREE_STATS:
++ case EXT3_IOC_GET_TREE_DEPTH:
++ return ext3_ext_ioctl(inode, filp, cmd, arg);
+ case EXT3_IOC_GETVERSION:
+ case EXT3_IOC_GETVERSION_OLD:
+ return put_user(inode->i_generation, (int *) arg);
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:22:58.767691368 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300
+@@ -186,6 +186,7 @@
+ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
+
+ #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+@@ -211,6 +212,9 @@
+ #endif
+ #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long)
+ #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long)
++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long)
++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long)
++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long)
+
+ /*
+ * Structure of an inode on the disk
+@@ -333,6 +337,8 @@
+ #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */
+ #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -729,6 +735,7 @@
+
+
+ /* inode.c */
++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t);
+ extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+@@ -802,6 +809,14 @@
+ extern struct inode_operations ext3_symlink_inode_operations;
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+
++/* extents.c */
++extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
++extern int ext3_ext_get_block(handle_t *, struct inode *, long,
++ struct buffer_head *, int, int);
++extern void ext3_ext_truncate(struct inode *, struct page *);
++extern void ext3_ext_init(struct super_block *);
++extern void ext3_ext_release(struct super_block *);
++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *);
+
+ #endif /* __KERNEL__ */
+
+Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2004-11-09 02:23:21.606219384 +0300
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++#ifndef _LINUX_EXT3_EXTENTS
++#define _LINUX_EXT3_EXTENTS
++
++/*
++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
++ * become very little, so index split, in-depth growing and
++ * other hard changes happens much more often
++ * this is for debug purposes only
++ */
++#define AGRESSIVE_TEST_
++
++/*
++ * if CHECK_BINSEARCH defined, then results of binary search
++ * will be checked by linear search
++ */
++#define CHECK_BINSEARCH_
++
++/*
++ * if EXT_DEBUG is defined you can use 'extdebug' mount option
++ * to get lots of info what's going on
++ */
++#define EXT_DEBUG_
++#ifdef EXT_DEBUG
++#define ext_debug(tree,fmt,a...) \
++do { \
++ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \
++ printk(fmt, ##a); \
++} while (0);
++#else
++#define ext_debug(tree,fmt,a...)
++#endif
++
++/*
++ * if EXT_STATS is defined then stats numbers are collected
++ * these number will be displayed at umount time
++ */
++#define EXT_STATS_
++
++
++#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */
++
++/*
++ * ext3_inode has i_block array (total 60 bytes)
++ * first 4 bytes are used to store:
++ * - tree depth (0 mean there is no tree yet. all extents in the inode)
++ * - number of alive extents in the inode
++ */
++
++/*
++ * this is extent on-disk structure
++ * it's used at the bottom of the tree
++ */
++struct ext3_extent {
++ __u32 ee_block; /* first logical block extent covers */
++ __u16 ee_len; /* number of blocks covered by extent */
++ __u16 ee_start_hi; /* high 16 bits of physical block */
++ __u32 ee_start; /* low 32 bigs of physical block */
++};
++
++/*
++ * this is index on-disk structure
++ * it's used at all the levels, but the bottom
++ */
++struct ext3_extent_idx {
++ __u32 ei_block; /* index covers logical blocks from 'block' */
++ __u32 ei_leaf; /* pointer to the physical block of the next *
++ * level. leaf or next index could bet here */
++ __u16 ei_leaf_hi; /* high 16 bits of physical block */
++ __u16 ei_unused;
++};
++
++/*
++ * each block (leaves and indexes), even inode-stored has header
++ */
++struct ext3_extent_header {
++ __u16 eh_magic; /* probably will support different formats */
++ __u16 eh_entries; /* number of valid entries */
++ __u16 eh_max; /* capacity of store in entries */
++ __u16 eh_depth; /* has tree real underlaying blocks? */
++ __u32 eh_generation; /* generation of the tree */
++};
++
++#define EXT3_EXT_MAGIC 0xf30a
++
++/*
++ * array of ext3_ext_path contains path to some extent
++ * creation/lookup routines use it for traversal/splitting/etc
++ * truncate uses it to simulate recursive walking
++ */
++struct ext3_ext_path {
++ __u32 p_block;
++ __u16 p_depth;
++ struct ext3_extent *p_ext;
++ struct ext3_extent_idx *p_idx;
++ struct ext3_extent_header *p_hdr;
++ struct buffer_head *p_bh;
++};
++
++/*
++ * structure for external API
++ */
++
++/*
++ * storage for cached extent
++ */
++struct ext3_ext_cache {
++ __u32 ec_start;
++ __u32 ec_block;
++ __u32 ec_len;
++ __u32 ec_type;
++};
++
++#define EXT3_EXT_CACHE_NO 0
++#define EXT3_EXT_CACHE_GAP 1
++#define EXT3_EXT_CACHE_EXTENT 2
++
++/*
++ * ext3_extents_tree is used to pass initial information
++ * to top-level extents API
++ */
++struct ext3_extents_helpers;
++struct ext3_extents_tree {
++ struct inode *inode; /* inode which tree belongs to */
++ void *root; /* ptr to data top of tree resides at */
++ void *buffer; /* will be passed as arg to ^^ routines */
++ int buffer_len;
++ void *private;
++ struct ext3_ext_cache *cex;/* last found extent */
++ struct ext3_extents_helpers *ops;
++};
++
++struct ext3_extents_helpers {
++ int (*get_write_access)(handle_t *h, void *buffer);
++ int (*mark_buffer_dirty)(handle_t *h, void *buffer);
++ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2);
++ int (*remove_extent_credits)(struct ext3_extents_tree *,
++ struct ext3_extent *, unsigned long,
++ unsigned long);
++ int (*remove_extent)(struct ext3_extents_tree *,
++ struct ext3_extent *, unsigned long,
++ unsigned long);
++ int (*new_block)(handle_t *, struct ext3_extents_tree *,
++ struct ext3_ext_path *, struct ext3_extent *,
++ int *);
++};
++
++/*
++ * to be called by ext3_ext_walk_space()
++ * negative retcode - error
++ * positive retcode - signal for ext3_ext_walk_space(), see below
++ * callback must return valid extent (passed or newly created)
++ */
++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *,
++ struct ext3_ext_path *,
++ struct ext3_extent *, int);
++
++#define EXT_CONTINUE 0
++#define EXT_BREAK 1
++#define EXT_REPEAT 2
++
++
++#define EXT_MAX_BLOCK 0xffffffff
++#define EXT_CACHE_MARK 0xffff
++
++
++#define EXT_FIRST_EXTENT(__hdr__) \
++ ((struct ext3_extent *) (((char *) (__hdr__)) + \
++ sizeof(struct ext3_extent_header)))
++#define EXT_FIRST_INDEX(__hdr__) \
++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \
++ sizeof(struct ext3_extent_header)))
++#define EXT_HAS_FREE_INDEX(__path__) \
++ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max)
++#define EXT_LAST_EXTENT(__hdr__) \
++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_LAST_INDEX(__hdr__) \
++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1)
++#define EXT_MAX_EXTENT(__hdr__) \
++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_MAX_INDEX(__hdr__) \
++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++
++#define EXT_ROOT_HDR(tree) \
++ ((struct ext3_extent_header *) (tree)->root)
++#define EXT_BLOCK_HDR(bh) \
++ ((struct ext3_extent_header *) (bh)->b_data)
++#define EXT_DEPTH(_t_) \
++ (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
++#define EXT_GENERATION(_t_) \
++ (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++
++
++#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
++
++
++/*
++ * this structure is used to gather extents from the tree via ioctl
++ */
++struct ext3_extent_buf {
++ unsigned long start;
++ int buflen;
++ void *buffer;
++ void *cur;
++ int err;
++};
++
++/*
++ * this structure is used to collect stats info about the tree
++ */
++struct ext3_extent_tree_stats {
++ int depth;
++ int extents_num;
++ int leaf_num;
++};
++
++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *);
++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *);
++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *);
++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback);
++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long);
++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *);
++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *);
++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int);
++
++static inline void
++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree)
++{
++ if (tree->cex)
++ tree->cex->ec_type = EXT3_EXT_CACHE_NO;
++}
++
++
++#endif /* _LINUX_EXT3_EXTENTS */
++
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:22:55.780145544 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300
+@@ -128,6 +128,8 @@
+ */
+ struct semaphore truncate_sem;
+ struct inode vfs_inode;
++
++ __u32 i_cached_extent[4];
+ };
+
+ #endif /* _LINUX_EXT3_FS_I */
+
+%diffstat
+ fs/ext3/Makefile | 2
+ fs/ext3/extents.c | 2313 +++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/ialloc.c | 4
+ fs/ext3/inode.c | 29
+ fs/ext3/ioctl.c | 4
+ fs/ext3/super.c | 17
+ include/linux/ext3_extents.h | 252 ++++
+ include/linux/ext3_fs.h | 15
+ include/linux/ext3_fs_i.h | 2
+ 9 files changed, 2630 insertions(+), 8 deletions(-)
+
--- /dev/null
+Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
+@@ -0,0 +1,1428 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ * - do not scan from the beginning, try to remember first free block
++ * - mb_mark_used_* may allocate chunk right after splitting buddy
++ * - special flag to advice allocator to look for requested + N blocks
++ * this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...) printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE 32
++
++struct ext3_allocation_context {
++ struct super_block *ac_sb;
++
++ /* search goals */
++ int ac_g_group;
++ int ac_g_start;
++ int ac_g_len;
++ int ac_g_flags;
++
++ /* the best found extent */
++ int ac_b_group;
++ int ac_b_start;
++ int ac_b_len;
++
++ /* number of iterations done. we have to track to limit searching */
++ int ac_repeats;
++ int ac_groups_scanned;
++ int ac_status;
++};
++
++#define AC_STATUS_CONTINUE 1
++#define AC_STATUS_FOUND 2
++
++
++struct ext3_buddy {
++ void *bd_bitmap;
++ void *bd_buddy;
++ int bd_blkbits;
++ struct buffer_head *bd_bh;
++ struct buffer_head *bd_bh2;
++ struct ext3_buddy_group_blocks *bd_bd;
++ struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++ int fe_start;
++ int fe_len;
++ unsigned char fe_orders[MB_ARR_SIZE];
++ unsigned char fe_nums;
++ unsigned char fe_back;
++};
++
++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++#define mb_correct_addr_and_bit(bit,addr) \
++{ \
++ if ((unsigned) addr & 1) { \
++ bit += 8; \
++ addr--; \
++ } \
++ if ((unsigned) addr & 2) { \
++ bit += 16; \
++ addr--; \
++ addr--; \
++ } \
++}
++
++static inline int mb_test_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ return test_bit(bit, addr);
++}
++
++static inline void mb_set_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ set_bit(bit, addr);
++}
++
++static inline void mb_clear_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ clear_bit(bit, addr);
++}
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++ int i = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(max != NULL);
++
++ if (order > e3b->bd_blkbits + 1)
++ return NULL;
++
++ /* at order 0 we see each particular block */
++ *max = 1 << (e3b->bd_blkbits + 3);
++ if (order == 0)
++ return e3b->bd_bitmap;
++
++ bb = e3b->bd_buddy;
++ *max = *max >> 1;
++ while (i < order) {
++ bb += 1 << (e3b->bd_blkbits - i);
++ i++;
++ *max = *max >> 1;
++ }
++ return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++ struct ext3_buddy *e3b)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++ /* load bitmap */
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ if (e3b->bd_bh == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh);
++ wait_on_buffer(e3b->bd_bh);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++ /* load buddy */
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ if (e3b->bd_bh2 == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh2)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh2);
++ wait_on_buffer(e3b->bd_bh2);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++ e3b->bd_bitmap = e3b->bd_bh->b_data;
++ e3b->bd_buddy = e3b->bd_bh2->b_data;
++ e3b->bd_blkbits = sb->s_blocksize_bits;
++ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_sb = sb;
++
++ return 0;
++out:
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++ e3b->bd_bh = NULL;
++ e3b->bd_bh2 = NULL;
++ return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++ mark_buffer_dirty(e3b->bd_bh);
++ mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++ int order = e3b->bd_blkbits + 1;
++ int max, max2, i, j, k, count;
++ void *buddy, *buddy2;
++
++ if (!test_opt(e3b->bd_sb, MBALLOC))
++ return;
++
++ while (order > 1) {
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++ J_ASSERT(buddy2);
++ J_ASSERT(buddy != buddy2);
++ J_ASSERT(max * 2 == max2);
++
++ count = 0;
++ for (i = 0; i < max; i++) {
++
++ if (!mb_test_bit(i, buddy)) {
++ /* only single bit in buddy2 may be 1 */
++ if (mb_test_bit(i << 1, buddy2))
++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
++ else if (mb_test_bit((i << 1) + 1, buddy2))
++ J_ASSERT(!mb_test_bit(i << 1, buddy2));
++ continue;
++ }
++
++ /* both bits in buddy2 must be 0 */
++ J_ASSERT(!mb_test_bit(i << 1, buddy2));
++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
++
++ for (j = 0; j < (1 << order); j++) {
++ k = (i * (1 << order)) + j;
++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
++ }
++ count++;
++ }
++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++ order--;
++ }
++
++ buddy = mb_find_buddy(e3b, 0, &max);
++ for (i = 0; i < max; i++) {
++ if (mb_test_bit(i, buddy))
++ continue;
++ /* check used bits only */
++ for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++ buddy2 = mb_find_buddy(e3b, j, &max2);
++ k = i >> j;
++ J_ASSERT(k < max2);
++ J_ASSERT(!mb_test_bit(k, buddy2));
++ }
++ }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++ int order = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++ bb = e3b->bd_buddy;
++ while (order <= e3b->bd_blkbits + 1) {
++ block = block >> 1;
++ if (mb_test_bit(block, bb)) {
++ /* this block is part of buddy of order 'order' */
++ return order;
++ }
++ bb += 1 << (e3b->bd_blkbits - order);
++ order++;
++ }
++ return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0;
++ cur += 32;
++ continue;
++ }
++ mb_clear_bit(cur, bm);
++ cur++;
++ }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0xffffffff;
++ cur += 32;
++ continue;
++ }
++ mb_set_bit(cur, bm);
++ cur++;
++ }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++ int block, max, order;
++ void *buddy, *buddy2;
++
++ mb_check_buddy(e3b);
++ while (count-- > 0) {
++ block = first++;
++ order = 0;
++
++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
++ mb_set_bit(block, e3b->bd_bitmap);
++ e3b->bd_bd->bb_counters[order]++;
++
++ /* start of the buddy */
++ buddy = mb_find_buddy(e3b, order, &max);
++
++ do {
++ block &= ~1UL;
++ if (!mb_test_bit(block, buddy) ||
++ !mb_test_bit(block + 1, buddy))
++ break;
++
++ /* both the buddies are free, try to coalesce them */
++ buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++ if (!buddy2)
++ break;
++
++ if (order > 0) {
++ /* for special purposes, we don't clear
++ * free bits in bitmap */
++ mb_clear_bit(block, buddy);
++ mb_clear_bit(block + 1, buddy);
++ }
++ e3b->bd_bd->bb_counters[order]--;
++ e3b->bd_bd->bb_counters[order]--;
++
++ block = block >> 1;
++ order++;
++ e3b->bd_bd->bb_counters[order]++;
++
++ mb_set_bit(block, buddy2);
++ buddy = buddy2;
++ } while (1);
++ }
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++ struct ext3_free_extent *out, int needed)
++{
++ int i;
++
++ J_ASSERT(in);
++ J_ASSERT(out);
++ J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++ out->fe_len = 0;
++ out->fe_start = in->fe_start + in->fe_len;
++ out->fe_nums = 0;
++
++ /* for single-chunk extent we need not back order
++ * also, if an extent doesn't fill needed space
++ * then it makes no sense to try back order becase
++ * if we select this extent then it'll be use as is */
++ if (in->fe_nums < 2 || in->fe_len < needed)
++ return 0;
++
++ i = in->fe_nums - 1;
++ while (i >= 0 && out->fe_len < needed) {
++ out->fe_len += (1 << in->fe_orders[i]);
++ out->fe_start -= (1 << in->fe_orders[i]);
++ i--;
++ }
++ /* FIXME: in some situation fe_orders may be too small to hold
++ * all the buddies */
++ J_ASSERT(out->fe_len >= needed);
++
++ for (i++; i < in->fe_nums; i++)
++ out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++ J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++ out->fe_back = 1;
++
++ return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++ int needed, struct ext3_free_extent *ex)
++{
++ int space = needed;
++ int next, max, ord;
++ void *buddy;
++
++ J_ASSERT(ex != NULL);
++
++ ex->fe_nums = 0;
++ ex->fe_len = 0;
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ J_ASSERT(block < max);
++ if (!mb_test_bit(block, buddy))
++ goto nofree;
++
++ if (order == 0) {
++ /* find actual order */
++ order = mb_find_order_for_block(e3b, block);
++ block = block >> order;
++ }
++
++ ex->fe_orders[ex->fe_nums++] = order;
++ ex->fe_len = 1 << order;
++ ex->fe_start = block << order;
++ ex->fe_back = 0;
++
++ while ((space = space - (1 << order)) > 0) {
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++
++ if (block + 1 >= max)
++ break;
++
++ next = (block + 1) * (1 << order);
++ if (!mb_test_bit(next, e3b->bd_bitmap))
++ break;
++
++ ord = mb_find_order_for_block(e3b, next);
++
++ if ((1 << ord) >= needed) {
++ /* we dont want to coalesce with self-enough buddies */
++ break;
++ }
++ order = ord;
++ block = next >> order;
++ ex->fe_len += 1 << order;
++
++ if (ex->fe_nums < MB_ARR_SIZE)
++ ex->fe_orders[ex->fe_nums++] = order;
++ }
++
++nofree:
++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++ return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ start = ex->fe_start + ex->fe_len - 1;
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++ len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start -= mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ J_ASSERT(start >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_set_bit(cur, buddy);
++ mb_set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++
++ if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start += mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_set_bit(cur, buddy);
++ mb_set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int err;
++
++ J_ASSERT(ex);
++ if (ex->fe_back == 0)
++ err = mb_mark_used_forward(e3b, ex, len);
++ else
++ err = mb_mark_used_backward(e3b, ex, len);
++ return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b, int group)
++{
++ struct super_block *sb = ac->ac_sb;
++ int err, gorder, max, i;
++ struct ext3_free_extent curex;
++
++ /* let's know order of allocation */
++ gorder = 0;
++ while (ac->ac_g_len > (1 << gorder))
++ gorder++;
++
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++ /* someone asks for space at this specified block
++ * probably he wants to merge it into existing extent */
++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++ /* good. at least one block is free */
++ max = mb_find_extent(e3b, 0, ac->ac_g_start,
++ ac->ac_g_len, &curex);
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ err = 0;
++ goto out;
++ }
++ /* don't try to find goal anymore */
++ ac->ac_g_flags &= ~1;
++ }
++
++ i = 0;
++ while (1) {
++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++ if (i >= sb->s_blocksize * 8)
++ break;
++
++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++ if (max >= ac->ac_g_len) {
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ break;
++ }
++ i += max;
++ }
++
++ return 0;
++
++out:
++ return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++ struct ext3_group_desc *gdp;
++ int free_blocks;
++
++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++ if (!gdp)
++ return 0;
++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++ if (free_blocks == 0)
++ return 0;
++
++ /* someone wants this block very much */
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++ return 1;
++
++ /* FIXME: I'd like to take fragmentation into account here */
++ if (cr == 0) {
++ if (free_blocks >= ac->ac_g_len >> 1)
++ return 1;
++ } else if (cr == 1) {
++ if (free_blocks >= ac->ac_g_len >> 2)
++ return 1;
++ } else if (cr == 2) {
++ return 1;
++ } else {
++ BUG();
++ }
++ return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *len, int flags, int *errp)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_allocation_context ac;
++ int i, group, block, cr, err = 0;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ struct buffer_head *gdp_bh;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++
++ J_ASSERT(len != NULL);
++ J_ASSERT(*len > 0);
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk("ext3_mb_new_nblocks: nonexistent device");
++ return 0;
++ }
++
++ if (!test_opt(sb, MBALLOC)) {
++ static int ext3_mballoc_warning = 0;
++ if (ext3_mballoc_warning == 0) {
++ printk(KERN_ERR "EXT3-fs: multiblock request with "
++ "mballoc disabled!\n");
++ ext3_mballoc_warning++;
++ }
++ *len = 1;
++ err = ext3_new_block_old(handle, inode, goal, errp);
++ return err;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++
++ if (!(flags & 2)) {
++ /* someone asks for non-reserved blocks */
++ BUG_ON(*len > 1);
++ err = ext3_mb_reserve_blocks(sb, 1);
++ if (err) {
++ *errp = err;
++ return 0;
++ }
++ }
++
++ /*
++ * Check quota for allocation of this blocks.
++ */
++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++ *len -= 1;
++ if (*len == 0) {
++ *errp = -EDQUOT;
++ block = 0;
++ goto out;
++ }
++
++ /* start searching from the goal */
++ if (goal < le32_to_cpu(es->s_first_data_block) ||
++ goal >= le32_to_cpu(es->s_blocks_count))
++ goal = le32_to_cpu(es->s_first_data_block);
++ group = (goal - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb));
++
++ /* set up allocation goals */
++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++ ac.ac_status = 0;
++ ac.ac_groups_scanned = 0;
++ ac.ac_sb = inode->i_sb;
++ ac.ac_g_group = group;
++ ac.ac_g_start = block;
++ ac.ac_g_len = *len;
++ ac.ac_g_flags = flags;
++
++ /* loop over the groups */
++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++ if (group == EXT3_SB(sb)->s_groups_count)
++ group = 0;
++
++ /* check is group good for our criteries */
++ if (!mb_good_group(&ac, group, cr))
++ continue;
++
++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++ if (err)
++ goto out_err;
++
++ ext3_lock_group(sb, group);
++ if (!mb_good_group(&ac, group, cr)) {
++ /* someone did allocation from this group */
++ ext3_unlock_group(sb, group);
++ ext3_mb_release_desc(&e3b);
++ continue;
++ }
++
++ err = ext3_mb_new_in_group(&ac, &e3b, group);
++ ext3_unlock_group(sb, group);
++ if (ac.ac_status == AC_STATUS_FOUND)
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ break;
++ }
++ }
++
++ if (ac.ac_status != AC_STATUS_FOUND) {
++ /* unfortunately, we can't satisfy this request */
++ J_ASSERT(ac.ac_b_len == 0);
++ DQUOT_FREE_BLOCK(inode, *len);
++ *errp = -ENOSPC;
++ block = 0;
++ goto out;
++ }
++
++ /* good news - free block(s) have been found. now it's time
++ * to mark block(s) in good old journaled bitmap */
++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++ /* we made a desicion, now mark found blocks in good old
++ * bitmap to be journaled */
++
++ ext3_debug("using block group %d(%d)\n",
++ ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++ if (!bitmap_bh) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err) {
++ *errp = err;
++ goto out_err;
++ }
++
++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++ if (!gdp) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(es->s_first_data_block);
++
++ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++ block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++ in_range(block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error(sb, "ext3_new_block",
++ "Allocating block in system zone - "
++ "block = %u", block);
++#if 0
++ for (i = 0; i < ac.ac_b_len; i++)
++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++ ext3_lock_group(sb, ac.ac_b_group);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
++ ac.ac_b_len);
++ ext3_unlock_group(sb, ac.ac_b_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++ if (err)
++ goto out_err;
++ err = ext3_journal_dirty_metadata(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ sb->s_dirt = 1;
++ *errp = 0;
++ brelse(bitmap_bh);
++
++ /* drop non-allocated, but dquote'd blocks */
++ J_ASSERT(*len >= ac.ac_b_len);
++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++ *len = ac.ac_b_len;
++ J_ASSERT(block != 0);
++ goto out;
++
++out_err:
++ /* if we've already allocated something, roll it back */
++ if (ac.ac_status == AC_STATUS_FOUND) {
++ /* FIXME: free blocks here */
++ }
++
++ DQUOT_FREE_BLOCK(inode, *len);
++ brelse(bitmap_bh);
++ *errp = err;
++ block = 0;
++out:
++ if (!(flags & 2)) {
++ /* block wasn't reserved before and we reserved it
++ * at the beginning of allocation. it doesn't matter
++ * whether we allocated anything or we failed: time
++ * to release reservation. NOTE: because I expect
++ * any multiblock request from delayed allocation
++ * path only, here is single block always */
++ ext3_mb_release_blocks(sb, 1);
++ }
++ return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++ struct buffer_head *bh;
++ int i, err, count = 0;
++ struct ext3_buddy e3b;
++
++ err = ext3_mb_load_desc(sb, group, &e3b);
++ if (err)
++ goto out;
++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++ bh = read_block_bitmap(sb, group);
++ if (bh == NULL) {
++ err = -EIO;
++ goto out2;
++ }
++
++ /* loop over the blocks, nad create buddies for free ones */
++ for (i = 0; i < sb->s_blocksize * 8; i++) {
++ if (!mb_test_bit(i, (void *) bh->b_data)) {
++ mb_free_blocks(&e3b, i, 1);
++ count++;
++ }
++ }
++ brelse(bh);
++ mb_check_buddy(&e3b);
++ ext3_mb_dirty_buddy(&e3b);
++
++out2:
++ ext3_mb_release_desc(&e3b);
++out:
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS \
++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
++ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++ struct inode *root = sb->s_root->d_inode;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct dentry *db;
++ tid_t target;
++ int err, i;
++
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_groups_count, GFP_KERNEL);
++ if (sbi->s_buddy_blocks == NULL) {
++ printk("can't allocate mem for buddy maps\n");
++ return -ENOMEM;
++ }
++ memset(sbi->s_buddy_blocks, 0,
++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sbi->s_buddy = NULL;
++
++ down(&root->i_sem);
++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++ strlen(EXT3_BUDDY_FILE));
++ if (IS_ERR(db)) {
++ err = PTR_ERR(db);
++ printk("can't lookup buddy file: %d\n", err);
++ goto out;
++ }
++
++ if (db->d_inode != NULL) {
++ sbi->s_buddy = igrab(db->d_inode);
++ goto map;
++ }
++
++ err = ext3_create(root, db, S_IFREG, NULL);
++ if (err) {
++ printk("error while creation buddy file: %d\n", err);
++ } else {
++ sbi->s_buddy = igrab(db->d_inode);
++ }
++
++map:
++ for (i = 0; i < sbi->s_groups_count; i++) {
++ struct buffer_head *bh = NULL;
++ handle_t *handle;
++
++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out2;
++ }
++
++ /* allocate block for bitmap */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy bitmap: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ brelse(bh);
++
++ /* allocate block for buddy */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ brelse(bh);
++ ext3_journal_stop(handle);
++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i].bb_tid = 0;
++ }
++
++ if (journal_start_commit(sbi->s_journal, &target))
++ log_wait_commit(sbi->s_journal, target);
++
++out2:
++ dput(db);
++out:
++ up(&root->i_sem);
++ return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* release freed, non-committed blocks */
++ spin_lock(&sbi->s_md_lock);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_committed_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ ext3_mb_free_committed_blocks(sb);
++
++ if (sbi->s_buddy_blocks)
++ kfree(sbi->s_buddy_blocks);
++ if (sbi->s_buddy)
++ iput(sbi->s_buddy);
++ if (sbi->s_blocks_reserved)
++ printk("ext3-fs: %ld blocks being reserved at umount!\n",
++ sbi->s_blocks_reserved);
++ return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++ struct ext3_super_block *es;
++ int i;
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* init file for buddy data */
++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ ext3_mb_init_backend(sb);
++
++ es = EXT3_SB(sb)->s_es;
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++ ext3_mb_generate_buddy(sb, i);
++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++ spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ printk("EXT3-fs: mballoc enabled\n");
++ return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int err, i, count = 0, count2 = 0;
++ struct ext3_free_metadata *md;
++ struct ext3_buddy e3b;
++
++ if (list_empty(&sbi->s_committed_transaction))
++ return;
++
++ /* there is committed blocks to be freed yet */
++ do {
++ /* get next array of blocks */
++ md = NULL;
++ spin_lock(&sbi->s_md_lock);
++ if (!list_empty(&sbi->s_committed_transaction)) {
++ md = list_entry(sbi->s_committed_transaction.next,
++ struct ext3_free_metadata, list);
++ list_del(&md->list);
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ if (md == NULL)
++ break;
++
++ mb_debug("gonna free %u blocks in group %u (0x%p):",
++ md->num, md->group, md);
++
++ err = ext3_mb_load_desc(sb, md->group, &e3b);
++ BUG_ON(err != 0);
++
++ /* there are blocks to put in buddy to make them really free */
++ count += md->num;
++ count2++;
++ ext3_lock_group(sb, md->group);
++ for (i = 0; i < md->num; i++) {
++ mb_debug(" %u", md->blocks[i]);
++ mb_free_blocks(&e3b, md->blocks[i], 1);
++ }
++ mb_debug("\n");
++ ext3_unlock_group(sb, md->group);
++
++ kfree(md);
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ } while (md);
++ mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++ return;
++
++ /* new transaction! time to close last one and free blocks for
++ * committed transaction. we know that only transaction can be
++ * active, so previos transaction can be being logged and we
++ * know that transaction before previous is known to be alreade
++ * logged. this means that now we may free blocks freed in all
++ * transactions before previous one. hope I'm clear enough ... */
++
++ spin_lock(&sbi->s_md_lock);
++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++ mb_debug("new transaction %lu, old %lu\n",
++ (unsigned long) handle->h_transaction->t_tid,
++ (unsigned long) sbi->s_last_transaction);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_closed_transaction);
++ sbi->s_last_transaction = handle->h_transaction->t_tid;
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++ int group, int block, int count)
++{
++ struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++ struct super_block *sb = e3b->bd_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_free_metadata *md;
++ int i;
++
++ ext3_lock_group(sb, group);
++ for (i = 0; i < count; i++) {
++ md = db->bb_md_cur;
++ if (md && db->bb_tid != handle->h_transaction->t_tid) {
++ db->bb_md_cur = NULL;
++ md = NULL;
++ }
++
++ if (md == NULL) {
++ ext3_unlock_group(sb, group);
++ md = kmalloc(sizeof(*md), GFP_KERNEL);
++ if (md == NULL)
++ return -ENOMEM;
++ md->num = 0;
++ md->group = group;
++
++ ext3_lock_group(sb, group);
++ if (db->bb_md_cur == NULL) {
++ spin_lock(&sbi->s_md_lock);
++ list_add(&md->list, &sbi->s_active_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ db->bb_md_cur = md;
++ db->bb_tid = handle->h_transaction->t_tid;
++ mb_debug("new md 0x%p for group %u\n",
++ md, md->group);
++ } else {
++ kfree(md);
++ md = db->bb_md_cur;
++ }
++ }
++
++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++ md->blocks[md->num] = block + i;
++ md->num++;
++ if (md->num == EXT3_BB_MAX_BLOCKS) {
++ /* no more space, put full container on a sb's list */
++ db->bb_md_cur = NULL;
++ }
++ }
++ ext3_unlock_group(sb, group);
++ return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ unsigned long bit, overflow;
++ struct buffer_head *gd_bh;
++ unsigned long block_group;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++ int err = 0, ret;
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk ("ext3_free_blocks: nonexistent device");
++ return;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++ if (block < le32_to_cpu(es->s_first_data_block) ||
++ block + count < block ||
++ block + count > le32_to_cpu(es->s_blocks_count)) {
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks not in datazone - "
++ "block = %lu, count = %lu", block, count);
++ goto error_return;
++ }
++
++ ext3_debug("freeing block %lu\n", block);
++
++do_more:
++ overflow = 0;
++ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ bit = (block - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb);
++ /*
++ * Check to see if we are freeing blocks across a group
++ * boundary.
++ */
++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++ count -= overflow;
++ }
++ brelse(bitmap_bh);
++ bitmap_bh = read_block_bitmap(sb, block_group);
++ if (!bitmap_bh)
++ goto error_return;
++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++ if (!gdp)
++ goto error_return;
++
++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++ in_range (block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group) ||
++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks in system zones - "
++ "Block = %lu, count = %lu",
++ block, count);
++
++ BUFFER_TRACE(bitmap_bh, "getting write access");
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err)
++ goto error_return;
++
++ /*
++ * We are about to modify some metadata. Call the journal APIs
++ * to unshare ->b_data if a currently-committing transaction is
++ * using it
++ */
++ BUFFER_TRACE(gd_bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, gd_bh);
++ if (err)
++ goto error_return;
++
++ err = ext3_mb_load_desc(sb, block_group, &e3b);
++ if (err)
++ goto error_return;
++
++ if (metadata) {
++ /* blocks being freed are metadata. these blocks shouldn't
++ * be used until this transaction is committed */
++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++ } else {
++ ext3_lock_group(sb, block_group);
++ mb_free_blocks(&e3b, bit, count);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++ ext3_unlock_group(sb, block_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++ }
++
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ /* FIXME: undo logic will be implemented later and another way */
++ mb_clear_bits(bitmap_bh->b_data, bit, count);
++ DQUOT_FREE_BLOCK(inode, count);
++
++ /* We dirtied the bitmap block */
++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++ /* And the group descriptor block */
++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++ ret = ext3_journal_dirty_metadata(handle, gd_bh);
++ if (!err) err = ret;
++
++ if (overflow && !err) {
++ block += count;
++ count = overflow;
++ goto do_more;
++ }
++ sb->s_dirt = 1;
++error_return:
++ brelse(bitmap_bh);
++ ext3_std_error(sb, err);
++ return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int free, ret = -ENOSPC;
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++ if (blocks <= free - sbi->s_blocks_reserved) {
++ sbi->s_blocks_reserved += blocks;
++ ret = 0;
++ }
++ spin_unlock(&sbi->s_reserve_lock);
++ return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ sbi->s_blocks_reserved -= blocks;
++ WARN_ON(sbi->s_blocks_reserved < 0);
++ if (sbi->s_blocks_reserved < 0)
++ sbi->s_blocks_reserved = 0;
++ spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *errp)
++{
++ int ret, len;
++
++ if (!test_opt(inode->i_sb, MBALLOC)) {
++ ret = ext3_new_block_old(handle, inode, goal, errp);
++ goto out;
++ }
++ len = 1;
++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++ return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ if (!test_opt(inode->i_sb, MBALLOC))
++ ext3_free_blocks_old(handle, inode, block, count);
++ else
++ ext3_mb_free_blocks(handle, inode, block, count, metadata);
++ return;
++}
++
+Index: linux-2.6.5-sles9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300
++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300
+@@ -389,6 +389,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_mb_release(sb);
+ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+@@ -542,7 +543,7 @@
+ Opt_commit, Opt_journal_update, Opt_journal_inum,
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_err, Opt_extents, Opt_extdebug
++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
+ };
+
+ static match_table_t tokens = {
+@@ -589,6 +590,7 @@
+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_extents, "extents"},
+ {Opt_extdebug, "extdebug"},
++ {Opt_mballoc, "mballoc"},
+ {Opt_err, NULL}
+ };
+
+@@ -810,6 +812,9 @@
+ case Opt_extdebug:
+ set_opt (sbi->s_mount_opt, EXTDEBUG);
+ break;
++ case Opt_mballoc:
++ set_opt (sbi->s_mount_opt, MBALLOC);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1463,7 +1468,8 @@
+ ext3_count_dirs(sb));
+
+ ext3_ext_init(sb);
+-
++ ext3_mb_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-2.6.5-sles9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300
++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o extents.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.5-sles9/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300
++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300
+@@ -78,7 +78,7 @@
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -274,7 +274,7 @@
+ }
+
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
++void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
+ unsigned long block, unsigned long count)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+@@ -1142,7 +1142,7 @@
+ * bitmap, and then for any free bit if that fails.
+ * This function also updates quota and i_blocks field.
+ */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+ unsigned long goal, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.5-sles9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300
++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300
+@@ -1640,7 +1640,7 @@
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+ handle_t *handle;
+Index: linux-2.6.5-sles9/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300
++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300
+@@ -572,7 +572,7 @@
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
+
+@@ -673,7 +673,7 @@
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1829,7 +1829,7 @@
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2000,7 +2000,7 @@
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
+Index: linux-2.6.5-sles9/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300
++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300
+@@ -740,7 +740,7 @@
+ for (i = 0; i < depth; i++) {
+ if (!ablocks[i])
+ continue;
+- ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+ }
+ }
+ kfree(ablocks);
+@@ -1391,7 +1391,7 @@
+ path->p_idx->ei_leaf);
+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+ return err;
+ }
+
+@@ -1879,10 +1879,12 @@
+ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+ handle_t *handle = ext3_journal_start(tree->inode, needed);
+ struct buffer_head *bh;
+- int i;
++ int i, metadata = 0;
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
++ if (S_ISDIR(tree->inode->i_mode))
++ metadata = 1;
+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+ /* tail removal */
+ unsigned long num, start;
+@@ -1894,7 +1896,7 @@
+ bh = sb_find_get_block(tree->inode->i_sb, start + i);
+ ext3_forget(handle, 0, tree->inode, bh, start + i);
+ }
+- ext3_free_blocks(handle, tree->inode, start, num);
++ ext3_free_blocks(handle, tree->inode, start, num, metadata);
+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+ printk("strange request: removal %lu-%lu from %u:%u\n",
+ from, to, ex->ee_block, ex->ee_len);
+Index: linux-2.6.5-sles9/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300
++++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300
+@@ -1366,7 +1366,7 @@
+ new_bh = sb_getblk(sb, block);
+ if (!new_bh) {
+ getblk_failed:
+- ext3_free_blocks(handle, inode, block, 1);
++ ext3_free_blocks(handle, inode, block, 1, 1);
+ error = -EIO;
+ goto cleanup;
+ }
+@@ -1408,7 +1408,7 @@
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ /* Free the old block. */
+ ea_bdebug(old_bh, "freeing");
+- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+
+ /* ext3_forget() calls bforget() for us, but we
+ let our caller release old_bh, so we need to
+@@ -1504,7 +1504,7 @@
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ext3_xattr_cache_remove(bh);
+- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+ get_bh(bh);
+ ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+ } else {
+Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
+ /*
+ * Special inodes numbers
+ */
+@@ -339,6 +341,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -698,7 +701,7 @@
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300
++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300
+@@ -23,10 +23,30 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+ #include <linux/rbtree.h>
+
++#define EXT3_BB_MAX_BLOCKS 30
++struct ext3_free_metadata {
++ unsigned short group;
++ unsigned short num;
++ unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++ struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER 14
++
++struct ext3_buddy_group_blocks {
++ sector_t bb_bitmap;
++ sector_t bb_buddy;
++ spinlock_t bb_lock;
++ unsigned bb_counters[EXT3_BB_MAX_ORDER];
++ struct ext3_free_metadata *bb_md_cur;
++ unsigned long bb_tid;
++};
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -78,6 +98,17 @@
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+ #endif
++
++ /* for buddy allocator */
++ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct inode *s_buddy;
++ long s_blocks_reserved;
++ spinlock_t s_reserve_lock;
++ struct list_head s_active_transaction;
++ struct list_head s_closed_transaction;
++ struct list_head s_committed_transaction;
++ spinlock_t s_md_lock;
++ tid_t s_last_transaction;
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
--- /dev/null
+Index: linux-2.6.7/fs/ext3/mballoc.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.7/fs/ext3/mballoc.c 2004-09-06 12:51:42.000000000 +0400
+@@ -0,0 +1,1428 @@
++/*
++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
++ * Written by Alex Tomas <alex@clusterfs.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public Licens
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
++ */
++
++
++/*
++ * mballoc.c contains the multiblocks allocation routines
++ */
++
++#include <linux/config.h>
++#include <linux/time.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/module.h>
++
++/*
++ * TODO:
++ * - do not scan from the beginning, try to remember first free block
++ * - mb_mark_used_* may allocate chunk right after splitting buddy
++ * - special flag to advice allocator to look for requested + N blocks
++ * this may improve interaction between extents and mballoc
++ */
++
++/*
++ * with AGRESSIVE_CHECK allocator runs consistency checks over
++ * structures. this checks slow things down a lot
++ */
++#define AGGRESSIVE_CHECK__
++
++/*
++ */
++#define MB_DEBUG__
++#ifdef MB_DEBUG
++#define mb_debug(fmt,a...) printk(fmt, ##a)
++#else
++#define mb_debug(fmt,a...)
++#endif
++
++/*
++ * where to save buddies structures beetween umount/mount (clean case only)
++ */
++#define EXT3_BUDDY_FILE ".buddy"
++
++/*
++ * max. number of chunks to be tracked in ext3_free_extent struct
++ */
++#define MB_ARR_SIZE 32
++
++struct ext3_allocation_context {
++ struct super_block *ac_sb;
++
++ /* search goals */
++ int ac_g_group;
++ int ac_g_start;
++ int ac_g_len;
++ int ac_g_flags;
++
++ /* the best found extent */
++ int ac_b_group;
++ int ac_b_start;
++ int ac_b_len;
++
++ /* number of iterations done. we have to track to limit searching */
++ int ac_repeats;
++ int ac_groups_scanned;
++ int ac_status;
++};
++
++#define AC_STATUS_CONTINUE 1
++#define AC_STATUS_FOUND 2
++
++
++struct ext3_buddy {
++ void *bd_bitmap;
++ void *bd_buddy;
++ int bd_blkbits;
++ struct buffer_head *bd_bh;
++ struct buffer_head *bd_bh2;
++ struct ext3_buddy_group_blocks *bd_bd;
++ struct super_block *bd_sb;
++};
++
++struct ext3_free_extent {
++ int fe_start;
++ int fe_len;
++ unsigned char fe_orders[MB_ARR_SIZE];
++ unsigned char fe_nums;
++ unsigned char fe_back;
++};
++
++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
++
++
++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
++int ext3_mb_reserve_blocks(struct super_block *, int);
++void ext3_mb_release_blocks(struct super_block *, int);
++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
++void ext3_mb_free_committed_blocks(struct super_block *);
++
++#define mb_correct_addr_and_bit(bit,addr) \
++{ \
++ if ((unsigned) addr & 1) { \
++ bit += 8; \
++ addr--; \
++ } \
++ if ((unsigned) addr & 2) { \
++ bit += 16; \
++ addr--; \
++ addr--; \
++ } \
++}
++
++static inline int mb_test_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ return test_bit(bit, addr);
++}
++
++static inline void mb_set_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ set_bit(bit, addr);
++}
++
++static inline void mb_clear_bit(int bit, void *addr)
++{
++ mb_correct_addr_and_bit(bit,addr);
++ clear_bit(bit, addr);
++}
++
++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
++{
++ int i = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(max != NULL);
++
++ if (order > e3b->bd_blkbits + 1)
++ return NULL;
++
++ /* at order 0 we see each particular block */
++ *max = 1 << (e3b->bd_blkbits + 3);
++ if (order == 0)
++ return e3b->bd_bitmap;
++
++ bb = e3b->bd_buddy;
++ *max = *max >> 1;
++ while (i < order) {
++ bb += 1 << (e3b->bd_blkbits - i);
++ i++;
++ *max = *max >> 1;
++ }
++ return bb;
++}
++
++static int ext3_mb_load_desc(struct super_block *sb, int group,
++ struct ext3_buddy *e3b)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
++
++ /* load bitmap */
++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
++ if (e3b->bd_bh == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh);
++ wait_on_buffer(e3b->bd_bh);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh));
++
++ /* load buddy */
++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
++ if (e3b->bd_bh2 == NULL) {
++ ext3_error(sb, "ext3_mb_load_desc",
++ "can't get block for buddy bitmap\n");
++ goto out;
++ }
++ if (!buffer_uptodate(e3b->bd_bh2)) {
++ ll_rw_block(READ, 1, &e3b->bd_bh2);
++ wait_on_buffer(e3b->bd_bh2);
++ }
++ J_ASSERT(buffer_uptodate(e3b->bd_bh2));
++
++ e3b->bd_bitmap = e3b->bd_bh->b_data;
++ e3b->bd_buddy = e3b->bd_bh2->b_data;
++ e3b->bd_blkbits = sb->s_blocksize_bits;
++ e3b->bd_bd = sbi->s_buddy_blocks + group;
++ e3b->bd_sb = sb;
++
++ return 0;
++out:
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++ e3b->bd_bh = NULL;
++ e3b->bd_bh2 = NULL;
++ return -EIO;
++}
++
++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
++{
++ mark_buffer_dirty(e3b->bd_bh);
++ mark_buffer_dirty(e3b->bd_bh2);
++}
++
++static void ext3_mb_release_desc(struct ext3_buddy *e3b)
++{
++ brelse(e3b->bd_bh);
++ brelse(e3b->bd_bh2);
++}
++
++#ifdef AGGRESSIVE_CHECK
++static void mb_check_buddy(struct ext3_buddy *e3b)
++{
++ int order = e3b->bd_blkbits + 1;
++ int max, max2, i, j, k, count;
++ void *buddy, *buddy2;
++
++ if (!test_opt(e3b->bd_sb, MBALLOC))
++ return;
++
++ while (order > 1) {
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ buddy2 = mb_find_buddy(e3b, order - 1, &max2);
++ J_ASSERT(buddy2);
++ J_ASSERT(buddy != buddy2);
++ J_ASSERT(max * 2 == max2);
++
++ count = 0;
++ for (i = 0; i < max; i++) {
++
++ if (!mb_test_bit(i, buddy)) {
++ /* only single bit in buddy2 may be 1 */
++ if (mb_test_bit(i << 1, buddy2))
++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
++ else if (mb_test_bit((i << 1) + 1, buddy2))
++ J_ASSERT(!mb_test_bit(i << 1, buddy2));
++ continue;
++ }
++
++ /* both bits in buddy2 must be 0 */
++ J_ASSERT(!mb_test_bit(i << 1, buddy2));
++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
++
++ for (j = 0; j < (1 << order); j++) {
++ k = (i * (1 << order)) + j;
++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
++ }
++ count++;
++ }
++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
++ order--;
++ }
++
++ buddy = mb_find_buddy(e3b, 0, &max);
++ for (i = 0; i < max; i++) {
++ if (mb_test_bit(i, buddy))
++ continue;
++ /* check used bits only */
++ for (j = 0; j < e3b->bd_blkbits + 1; j++) {
++ buddy2 = mb_find_buddy(e3b, j, &max2);
++ k = i >> j;
++ J_ASSERT(k < max2);
++ J_ASSERT(!mb_test_bit(k, buddy2));
++ }
++ }
++}
++#else
++#define mb_check_buddy(e3b)
++#endif
++
++static inline void
++ext3_lock_group(struct super_block *sb, int group)
++{
++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static inline void
++ext3_unlock_group(struct super_block *sb, int group)
++{
++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
++}
++
++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
++{
++ int order = 1;
++ void *bb;
++
++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
++
++ bb = e3b->bd_buddy;
++ while (order <= e3b->bd_blkbits + 1) {
++ block = block >> 1;
++ if (mb_test_bit(block, bb)) {
++ /* this block is part of buddy of order 'order' */
++ return order;
++ }
++ bb += 1 << (e3b->bd_blkbits - order);
++ order++;
++ }
++ return 0;
++}
++
++static inline void mb_clear_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0;
++ cur += 32;
++ continue;
++ }
++ mb_clear_bit(cur, bm);
++ cur++;
++ }
++}
++
++static inline void mb_set_bits(void *bm, int cur, int len)
++{
++ __u32 *addr;
++
++ len = cur + len;
++ while (cur < len) {
++ if ((cur & 31) == 0 && (len - cur) >= 32) {
++ /* fast path: clear whole word at once */
++ addr = bm + (cur >> 3);
++ *addr = 0xffffffff;
++ cur += 32;
++ continue;
++ }
++ mb_set_bit(cur, bm);
++ cur++;
++ }
++}
++
++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
++{
++ int block, max, order;
++ void *buddy, *buddy2;
++
++ mb_check_buddy(e3b);
++ while (count-- > 0) {
++ block = first++;
++ order = 0;
++
++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
++ mb_set_bit(block, e3b->bd_bitmap);
++ e3b->bd_bd->bb_counters[order]++;
++
++ /* start of the buddy */
++ buddy = mb_find_buddy(e3b, order, &max);
++
++ do {
++ block &= ~1UL;
++ if (!mb_test_bit(block, buddy) ||
++ !mb_test_bit(block + 1, buddy))
++ break;
++
++ /* both the buddies are free, try to coalesce them */
++ buddy2 = mb_find_buddy(e3b, order + 1, &max);
++
++ if (!buddy2)
++ break;
++
++ if (order > 0) {
++ /* for special purposes, we don't clear
++ * free bits in bitmap */
++ mb_clear_bit(block, buddy);
++ mb_clear_bit(block + 1, buddy);
++ }
++ e3b->bd_bd->bb_counters[order]--;
++ e3b->bd_bd->bb_counters[order]--;
++
++ block = block >> 1;
++ order++;
++ e3b->bd_bd->bb_counters[order]++;
++
++ mb_set_bit(block, buddy2);
++ buddy = buddy2;
++ } while (1);
++ }
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++/*
++ * returns 1 if out extent is enough to fill needed space
++ */
++int mb_make_backward_extent(struct ext3_free_extent *in,
++ struct ext3_free_extent *out, int needed)
++{
++ int i;
++
++ J_ASSERT(in);
++ J_ASSERT(out);
++ J_ASSERT(in->fe_nums < MB_ARR_SIZE);
++
++ out->fe_len = 0;
++ out->fe_start = in->fe_start + in->fe_len;
++ out->fe_nums = 0;
++
++ /* for single-chunk extent we need not back order
++ * also, if an extent doesn't fill needed space
++ * then it makes no sense to try back order becase
++ * if we select this extent then it'll be use as is */
++ if (in->fe_nums < 2 || in->fe_len < needed)
++ return 0;
++
++ i = in->fe_nums - 1;
++ while (i >= 0 && out->fe_len < needed) {
++ out->fe_len += (1 << in->fe_orders[i]);
++ out->fe_start -= (1 << in->fe_orders[i]);
++ i--;
++ }
++ /* FIXME: in some situation fe_orders may be too small to hold
++ * all the buddies */
++ J_ASSERT(out->fe_len >= needed);
++
++ for (i++; i < in->fe_nums; i++)
++ out->fe_orders[out->fe_nums++] = in->fe_orders[i];
++ J_ASSERT(out->fe_nums < MB_ARR_SIZE);
++ out->fe_back = 1;
++
++ return 1;
++}
++
++int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
++ int needed, struct ext3_free_extent *ex)
++{
++ int space = needed;
++ int next, max, ord;
++ void *buddy;
++
++ J_ASSERT(ex != NULL);
++
++ ex->fe_nums = 0;
++ ex->fe_len = 0;
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++ J_ASSERT(block < max);
++ if (!mb_test_bit(block, buddy))
++ goto nofree;
++
++ if (order == 0) {
++ /* find actual order */
++ order = mb_find_order_for_block(e3b, block);
++ block = block >> order;
++ }
++
++ ex->fe_orders[ex->fe_nums++] = order;
++ ex->fe_len = 1 << order;
++ ex->fe_start = block << order;
++ ex->fe_back = 0;
++
++ while ((space = space - (1 << order)) > 0) {
++
++ buddy = mb_find_buddy(e3b, order, &max);
++ J_ASSERT(buddy);
++
++ if (block + 1 >= max)
++ break;
++
++ next = (block + 1) * (1 << order);
++ if (!mb_test_bit(next, e3b->bd_bitmap))
++ break;
++
++ ord = mb_find_order_for_block(e3b, next);
++
++ if ((1 << ord) >= needed) {
++ /* we dont want to coalesce with self-enough buddies */
++ break;
++ }
++ order = ord;
++ block = next >> order;
++ ex->fe_len += 1 << order;
++
++ if (ex->fe_nums < MB_ARR_SIZE)
++ ex->fe_orders[ex->fe_nums++] = order;
++ }
++
++nofree:
++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
++ return ex->fe_len;
++}
++
++static int mb_mark_used_backward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ start = ex->fe_start + ex->fe_len - 1;
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
++ len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start -= mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ J_ASSERT(start >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_set_bit(cur, buddy);
++ mb_set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++static int mb_mark_used_forward(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int start = ex->fe_start, len0 = len;
++ int ord, mlen, max, cur;
++ void *buddy;
++
++ while (len) {
++ ord = mb_find_order_for_block(e3b, start);
++
++ if (((start >> ord) << ord) == start && len >= (1 << ord)) {
++ /* the whole chunk may be allocated at once! */
++ mlen = 1 << ord;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ J_ASSERT((start >> ord) < max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++ start += mlen;
++ len -= mlen;
++ J_ASSERT(len >= 0);
++ continue;
++ }
++
++ /* we have to split large buddy */
++ J_ASSERT(ord > 0);
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_clear_bit(start >> ord, buddy);
++ e3b->bd_bd->bb_counters[ord]--;
++
++ ord--;
++ cur = (start >> ord) & ~1U;
++ buddy = mb_find_buddy(e3b, ord, &max);
++ mb_set_bit(cur, buddy);
++ mb_set_bit(cur + 1, buddy);
++ e3b->bd_bd->bb_counters[ord]++;
++ e3b->bd_bd->bb_counters[ord]++;
++ }
++
++ /* now drop all the bits in bitmap */
++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
++
++ mb_check_buddy(e3b);
++
++ return 0;
++}
++
++int inline mb_mark_used(struct ext3_buddy *e3b,
++ struct ext3_free_extent *ex, int len)
++{
++ int err;
++
++ J_ASSERT(ex);
++ if (ex->fe_back == 0)
++ err = mb_mark_used_forward(e3b, ex, len);
++ else
++ err = mb_mark_used_backward(e3b, ex, len);
++ return err;
++}
++
++int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
++ struct ext3_buddy *e3b, int group)
++{
++ struct super_block *sb = ac->ac_sb;
++ int err, gorder, max, i;
++ struct ext3_free_extent curex;
++
++ /* let's know order of allocation */
++ gorder = 0;
++ while (ac->ac_g_len > (1 << gorder))
++ gorder++;
++
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
++ /* someone asks for space at this specified block
++ * probably he wants to merge it into existing extent */
++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
++ /* good. at least one block is free */
++ max = mb_find_extent(e3b, 0, ac->ac_g_start,
++ ac->ac_g_len, &curex);
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ err = 0;
++ goto out;
++ }
++ /* don't try to find goal anymore */
++ ac->ac_g_flags &= ~1;
++ }
++
++ i = 0;
++ while (1) {
++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
++ if (i >= sb->s_blocksize * 8)
++ break;
++
++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
++ if (max >= ac->ac_g_len) {
++ max = min(curex.fe_len, ac->ac_g_len);
++ mb_mark_used(e3b, &curex, max);
++
++ ac->ac_b_group = group;
++ ac->ac_b_start = curex.fe_start;
++ ac->ac_b_len = max;
++ ac->ac_status = AC_STATUS_FOUND;
++ break;
++ }
++ i += max;
++ }
++
++ return 0;
++
++out:
++ return err;
++}
++
++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
++{
++ struct ext3_group_desc *gdp;
++ int free_blocks;
++
++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
++ if (!gdp)
++ return 0;
++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++ if (free_blocks == 0)
++ return 0;
++
++ /* someone wants this block very much */
++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
++ return 1;
++
++ /* FIXME: I'd like to take fragmentation into account here */
++ if (cr == 0) {
++ if (free_blocks >= ac->ac_g_len >> 1)
++ return 1;
++ } else if (cr == 1) {
++ if (free_blocks >= ac->ac_g_len >> 2)
++ return 1;
++ } else if (cr == 2) {
++ return 1;
++ } else {
++ BUG();
++ }
++ return 0;
++}
++
++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++ unsigned long goal, int *len, int flags, int *errp)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_allocation_context ac;
++ int i, group, block, cr, err = 0;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ struct buffer_head *gdp_bh;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++
++ J_ASSERT(len != NULL);
++ J_ASSERT(*len > 0);
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk("ext3_mb_new_nblocks: nonexistent device");
++ return 0;
++ }
++
++ if (!test_opt(sb, MBALLOC)) {
++ static int ext3_mballoc_warning = 0;
++ if (ext3_mballoc_warning == 0) {
++ printk(KERN_ERR "EXT3-fs: multiblock request with "
++ "mballoc disabled!\n");
++ ext3_mballoc_warning++;
++ }
++ *len = 1;
++ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp);
++ return err;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++
++ if (!(flags & 2)) {
++ /* someone asks for non-reserved blocks */
++ BUG_ON(*len > 1);
++ err = ext3_mb_reserve_blocks(sb, 1);
++ if (err) {
++ *errp = err;
++ return 0;
++ }
++ }
++
++ /*
++ * Check quota for allocation of this blocks.
++ */
++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
++ *len -= 1;
++ if (*len == 0) {
++ *errp = -EDQUOT;
++ block = 0;
++ goto out;
++ }
++
++ /* start searching from the goal */
++ if (goal < le32_to_cpu(es->s_first_data_block) ||
++ goal >= le32_to_cpu(es->s_blocks_count))
++ goal = le32_to_cpu(es->s_first_data_block);
++ group = (goal - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ block = ((goal - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb));
++
++ /* set up allocation goals */
++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
++ ac.ac_status = 0;
++ ac.ac_groups_scanned = 0;
++ ac.ac_sb = inode->i_sb;
++ ac.ac_g_group = group;
++ ac.ac_g_start = block;
++ ac.ac_g_len = *len;
++ ac.ac_g_flags = flags;
++
++ /* loop over the groups */
++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
++ if (group == EXT3_SB(sb)->s_groups_count)
++ group = 0;
++
++ /* check is group good for our criteries */
++ if (!mb_good_group(&ac, group, cr))
++ continue;
++
++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
++ if (err)
++ goto out_err;
++
++ ext3_lock_group(sb, group);
++ if (!mb_good_group(&ac, group, cr)) {
++ /* someone did allocation from this group */
++ ext3_unlock_group(sb, group);
++ ext3_mb_release_desc(&e3b);
++ continue;
++ }
++
++ err = ext3_mb_new_in_group(&ac, &e3b, group);
++ ext3_unlock_group(sb, group);
++ if (ac.ac_status == AC_STATUS_FOUND)
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++ if (err)
++ goto out_err;
++ if (ac.ac_status == AC_STATUS_FOUND)
++ break;
++ }
++ }
++
++ if (ac.ac_status != AC_STATUS_FOUND) {
++ /* unfortunately, we can't satisfy this request */
++ J_ASSERT(ac.ac_b_len == 0);
++ DQUOT_FREE_BLOCK(inode, *len);
++ *errp = -ENOSPC;
++ block = 0;
++ goto out;
++ }
++
++ /* good news - free block(s) have been found. now it's time
++ * to mark block(s) in good old journaled bitmap */
++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
++
++ /* we made a desicion, now mark found blocks in good old
++ * bitmap to be journaled */
++
++ ext3_debug("using block group %d(%d)\n",
++ ac.ac_b_group.group, gdp->bg_free_blocks_count);
++
++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
++ if (!bitmap_bh) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err) {
++ *errp = err;
++ goto out_err;
++ }
++
++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
++ if (!gdp) {
++ *errp = -EIO;
++ goto out_err;
++ }
++
++ err = ext3_journal_get_write_access(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
++ + le32_to_cpu(es->s_first_data_block);
++
++ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
++ block == le32_to_cpu(gdp->bg_inode_bitmap) ||
++ in_range(block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error(sb, "ext3_new_block",
++ "Allocating block in system zone - "
++ "block = %u", block);
++#if 0
++ for (i = 0; i < ac.ac_b_len; i++)
++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
++#endif
++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
++
++ ext3_lock_group(sb, ac.ac_b_group);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) -
++ ac.ac_b_len);
++ ext3_unlock_group(sb, ac.ac_b_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
++
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++ if (err)
++ goto out_err;
++ err = ext3_journal_dirty_metadata(handle, gdp_bh);
++ if (err)
++ goto out_err;
++
++ sb->s_dirt = 1;
++ *errp = 0;
++ brelse(bitmap_bh);
++
++ /* drop non-allocated, but dquote'd blocks */
++ J_ASSERT(*len >= ac.ac_b_len);
++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
++
++ *len = ac.ac_b_len;
++ J_ASSERT(block != 0);
++ goto out;
++
++out_err:
++ /* if we've already allocated something, roll it back */
++ if (ac.ac_status == AC_STATUS_FOUND) {
++ /* FIXME: free blocks here */
++ }
++
++ DQUOT_FREE_BLOCK(inode, *len);
++ brelse(bitmap_bh);
++ *errp = err;
++ block = 0;
++out:
++ if (!(flags & 2)) {
++ /* block wasn't reserved before and we reserved it
++ * at the beginning of allocation. it doesn't matter
++ * whether we allocated anything or we failed: time
++ * to release reservation. NOTE: because I expect
++ * any multiblock request from delayed allocation
++ * path only, here is single block always */
++ ext3_mb_release_blocks(sb, 1);
++ }
++ return block;
++}
++
++int ext3_mb_generate_buddy(struct super_block *sb, int group)
++{
++ struct buffer_head *bh;
++ int i, err, count = 0;
++ struct ext3_buddy e3b;
++
++ err = ext3_mb_load_desc(sb, group, &e3b);
++ if (err)
++ goto out;
++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
++
++ bh = read_block_bitmap(sb, group);
++ if (bh == NULL) {
++ err = -EIO;
++ goto out2;
++ }
++
++ /* loop over the blocks, nad create buddies for free ones */
++ for (i = 0; i < sb->s_blocksize * 8; i++) {
++ if (!mb_test_bit(i, (void *) bh->b_data)) {
++ mb_free_blocks(&e3b, i, 1);
++ count++;
++ }
++ }
++ brelse(bh);
++ mb_check_buddy(&e3b);
++ ext3_mb_dirty_buddy(&e3b);
++
++out2:
++ ext3_mb_release_desc(&e3b);
++out:
++ return err;
++}
++
++EXPORT_SYMBOL(ext3_mb_new_blocks);
++
++#define MB_CREDITS \
++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \
++ 2 * EXT3_QUOTA_INIT_BLOCKS)
++
++int ext3_mb_init_backend(struct super_block *sb)
++{
++ struct inode *root = sb->s_root->d_inode;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct dentry *db;
++ tid_t target;
++ int err, i;
++
++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
++ sbi->s_groups_count, GFP_KERNEL);
++ if (sbi->s_buddy_blocks == NULL) {
++ printk("can't allocate mem for buddy maps\n");
++ return -ENOMEM;
++ }
++ memset(sbi->s_buddy_blocks, 0,
++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
++ sbi->s_buddy = NULL;
++
++ down(&root->i_sem);
++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
++ strlen(EXT3_BUDDY_FILE));
++ if (IS_ERR(db)) {
++ err = PTR_ERR(db);
++ printk("can't lookup buddy file: %d\n", err);
++ goto out;
++ }
++
++ if (db->d_inode != NULL) {
++ sbi->s_buddy = igrab(db->d_inode);
++ goto map;
++ }
++
++ err = ext3_create(root, db, S_IFREG, NULL);
++ if (err) {
++ printk("error while creation buddy file: %d\n", err);
++ } else {
++ sbi->s_buddy = igrab(db->d_inode);
++ }
++
++map:
++ for (i = 0; i < sbi->s_groups_count; i++) {
++ struct buffer_head *bh = NULL;
++ handle_t *handle;
++
++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
++ if (IS_ERR(handle)) {
++ err = PTR_ERR(handle);
++ goto out2;
++ }
++
++ /* allocate block for bitmap */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy bitmap: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
++ brelse(bh);
++
++ /* allocate block for buddy */
++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
++ if (bh == NULL) {
++ printk("can't get block for buddy: %d\n", err);
++ goto out2;
++ }
++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
++ brelse(bh);
++ ext3_journal_stop(handle);
++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
++ sbi->s_buddy_blocks[i].bb_md_cur = NULL;
++ sbi->s_buddy_blocks[i].bb_tid = 0;
++ }
++
++ if (journal_start_commit(sbi->s_journal, &target))
++ log_wait_commit(sbi->s_journal, target);
++
++out2:
++ dput(db);
++out:
++ up(&root->i_sem);
++ return err;
++}
++
++int ext3_mb_release(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* release freed, non-committed blocks */
++ spin_lock(&sbi->s_md_lock);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_committed_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ ext3_mb_free_committed_blocks(sb);
++
++ if (sbi->s_buddy_blocks)
++ kfree(sbi->s_buddy_blocks);
++ if (sbi->s_buddy)
++ iput(sbi->s_buddy);
++ if (sbi->s_blocks_reserved)
++ printk("ext3-fs: %ld blocks being reserved at umount!\n",
++ sbi->s_blocks_reserved);
++ return 0;
++}
++
++int ext3_mb_init(struct super_block *sb)
++{
++ struct ext3_super_block *es;
++ int i;
++
++ if (!test_opt(sb, MBALLOC))
++ return 0;
++
++ /* init file for buddy data */
++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ ext3_mb_init_backend(sb);
++
++ es = EXT3_SB(sb)->s_es;
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
++ ext3_mb_generate_buddy(sb, i);
++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
++ spin_lock_init(&EXT3_SB(sb)->s_md_lock);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
++ printk("EXT3-fs: mballoc enabled\n");
++ return 0;
++}
++
++void ext3_mb_free_committed_blocks(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int err, i, count = 0, count2 = 0;
++ struct ext3_free_metadata *md;
++ struct ext3_buddy e3b;
++
++ if (list_empty(&sbi->s_committed_transaction))
++ return;
++
++ /* there is committed blocks to be freed yet */
++ do {
++ /* get next array of blocks */
++ md = NULL;
++ spin_lock(&sbi->s_md_lock);
++ if (!list_empty(&sbi->s_committed_transaction)) {
++ md = list_entry(sbi->s_committed_transaction.next,
++ struct ext3_free_metadata, list);
++ list_del(&md->list);
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ if (md == NULL)
++ break;
++
++ mb_debug("gonna free %u blocks in group %u (0x%p):",
++ md->num, md->group, md);
++
++ err = ext3_mb_load_desc(sb, md->group, &e3b);
++ BUG_ON(err != 0);
++
++ /* there are blocks to put in buddy to make them really free */
++ count += md->num;
++ count2++;
++ ext3_lock_group(sb, md->group);
++ for (i = 0; i < md->num; i++) {
++ mb_debug(" %u", md->blocks[i]);
++ mb_free_blocks(&e3b, md->blocks[i], 1);
++ }
++ mb_debug("\n");
++ ext3_unlock_group(sb, md->group);
++
++ kfree(md);
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ } while (md);
++ mb_debug("freed %u blocks in %u structures\n", count, count2);
++}
++
++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ if (sbi->s_last_transaction == handle->h_transaction->t_tid)
++ return;
++
++ /* new transaction! time to close last one and free blocks for
++ * committed transaction. we know that only transaction can be
++ * active, so previos transaction can be being logged and we
++ * know that transaction before previous is known to be alreade
++ * logged. this means that now we may free blocks freed in all
++ * transactions before previous one. hope I'm clear enough ... */
++
++ spin_lock(&sbi->s_md_lock);
++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
++ mb_debug("new transaction %lu, old %lu\n",
++ (unsigned long) handle->h_transaction->t_tid,
++ (unsigned long) sbi->s_last_transaction);
++ list_splice_init(&sbi->s_closed_transaction,
++ &sbi->s_committed_transaction);
++ list_splice_init(&sbi->s_active_transaction,
++ &sbi->s_closed_transaction);
++ sbi->s_last_transaction = handle->h_transaction->t_tid;
++ }
++ spin_unlock(&sbi->s_md_lock);
++
++ ext3_mb_free_committed_blocks(sb);
++}
++
++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
++ int group, int block, int count)
++{
++ struct ext3_buddy_group_blocks *db = e3b->bd_bd;
++ struct super_block *sb = e3b->bd_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_free_metadata *md;
++ int i;
++
++ ext3_lock_group(sb, group);
++ for (i = 0; i < count; i++) {
++ md = db->bb_md_cur;
++ if (md && db->bb_tid != handle->h_transaction->t_tid) {
++ db->bb_md_cur = NULL;
++ md = NULL;
++ }
++
++ if (md == NULL) {
++ ext3_unlock_group(sb, group);
++ md = kmalloc(sizeof(*md), GFP_KERNEL);
++ if (md == NULL)
++ return -ENOMEM;
++ md->num = 0;
++ md->group = group;
++
++ ext3_lock_group(sb, group);
++ if (db->bb_md_cur == NULL) {
++ spin_lock(&sbi->s_md_lock);
++ list_add(&md->list, &sbi->s_active_transaction);
++ spin_unlock(&sbi->s_md_lock);
++ db->bb_md_cur = md;
++ db->bb_tid = handle->h_transaction->t_tid;
++ mb_debug("new md 0x%p for group %u\n",
++ md, md->group);
++ } else {
++ kfree(md);
++ md = db->bb_md_cur;
++ }
++ }
++
++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
++ md->blocks[md->num] = block + i;
++ md->num++;
++ if (md->num == EXT3_BB_MAX_BLOCKS) {
++ /* no more space, put full container on a sb's list */
++ db->bb_md_cur = NULL;
++ }
++ }
++ ext3_unlock_group(sb, group);
++ return 0;
++}
++
++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ struct buffer_head *bitmap_bh = NULL;
++ struct ext3_group_desc *gdp;
++ struct ext3_super_block *es;
++ unsigned long bit, overflow;
++ struct buffer_head *gd_bh;
++ unsigned long block_group;
++ struct ext3_sb_info *sbi;
++ struct super_block *sb;
++ struct ext3_buddy e3b;
++ int err = 0, ret;
++
++ sb = inode->i_sb;
++ if (!sb) {
++ printk ("ext3_free_blocks: nonexistent device");
++ return;
++ }
++
++ ext3_mb_poll_new_transaction(sb, handle);
++
++ sbi = EXT3_SB(sb);
++ es = EXT3_SB(sb)->s_es;
++ if (block < le32_to_cpu(es->s_first_data_block) ||
++ block + count < block ||
++ block + count > le32_to_cpu(es->s_blocks_count)) {
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks not in datazone - "
++ "block = %lu, count = %lu", block, count);
++ goto error_return;
++ }
++
++ ext3_debug("freeing block %lu\n", block);
++
++do_more:
++ overflow = 0;
++ block_group = (block - le32_to_cpu(es->s_first_data_block)) /
++ EXT3_BLOCKS_PER_GROUP(sb);
++ bit = (block - le32_to_cpu(es->s_first_data_block)) %
++ EXT3_BLOCKS_PER_GROUP(sb);
++ /*
++ * Check to see if we are freeing blocks across a group
++ * boundary.
++ */
++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
++ count -= overflow;
++ }
++ brelse(bitmap_bh);
++ bitmap_bh = read_block_bitmap(sb, block_group);
++ if (!bitmap_bh)
++ goto error_return;
++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
++ if (!gdp)
++ goto error_return;
++
++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
++ in_range (block, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group) ||
++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
++ EXT3_SB(sb)->s_itb_per_group))
++ ext3_error (sb, "ext3_free_blocks",
++ "Freeing blocks in system zones - "
++ "Block = %lu, count = %lu",
++ block, count);
++
++ BUFFER_TRACE(bitmap_bh, "getting write access");
++ err = ext3_journal_get_write_access(handle, bitmap_bh);
++ if (err)
++ goto error_return;
++
++ /*
++ * We are about to modify some metadata. Call the journal APIs
++ * to unshare ->b_data if a currently-committing transaction is
++ * using it
++ */
++ BUFFER_TRACE(gd_bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, gd_bh);
++ if (err)
++ goto error_return;
++
++ err = ext3_mb_load_desc(sb, block_group, &e3b);
++ if (err)
++ goto error_return;
++
++ if (metadata) {
++ /* blocks being freed are metadata. these blocks shouldn't
++ * be used until this transaction is committed */
++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
++ } else {
++ ext3_lock_group(sb, block_group);
++ mb_free_blocks(&e3b, bit, count);
++ gdp->bg_free_blocks_count =
++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++ ext3_unlock_group(sb, block_group);
++ percpu_counter_mod(&sbi->s_freeblocks_counter, count);
++ }
++
++ ext3_mb_dirty_buddy(&e3b);
++ ext3_mb_release_desc(&e3b);
++
++ /* FIXME: undo logic will be implemented later and another way */
++ mb_clear_bits(bitmap_bh->b_data, bit, count);
++ DQUOT_FREE_BLOCK(inode, count);
++
++ /* We dirtied the bitmap block */
++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
++ err = ext3_journal_dirty_metadata(handle, bitmap_bh);
++
++ /* And the group descriptor block */
++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
++ ret = ext3_journal_dirty_metadata(handle, gd_bh);
++ if (!err) err = ret;
++
++ if (overflow && !err) {
++ block += count;
++ count = overflow;
++ goto do_more;
++ }
++ sb->s_dirt = 1;
++error_return:
++ brelse(bitmap_bh);
++ ext3_std_error(sb, err);
++ return;
++}
++
++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int free, ret = -ENOSPC;
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
++ if (blocks <= free - sbi->s_blocks_reserved) {
++ sbi->s_blocks_reserved += blocks;
++ ret = 0;
++ }
++ spin_unlock(&sbi->s_reserve_lock);
++ return ret;
++}
++
++void ext3_mb_release_blocks(struct super_block *sb, int blocks)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ BUG_ON(blocks < 0);
++ spin_lock(&sbi->s_reserve_lock);
++ sbi->s_blocks_reserved -= blocks;
++ WARN_ON(sbi->s_blocks_reserved < 0);
++ if (sbi->s_blocks_reserved < 0)
++ sbi->s_blocks_reserved = 0;
++ spin_unlock(&sbi->s_reserve_lock);
++}
++
++int ext3_new_block(handle_t *handle, struct inode *inode,
++ unsigned long goal, u32 *pc, u32 *pb, int *errp)
++{
++ int ret, len;
++
++ if (!test_opt(inode->i_sb, MBALLOC)) {
++ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
++ goto out;
++ }
++ len = 1;
++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
++out:
++ return ret;
++}
++
++
++void ext3_free_blocks(handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count, int metadata)
++{
++ if (!test_opt(inode->i_sb, MBALLOC))
++ ext3_free_blocks_old(handle, inode, block, count);
++ else
++ ext3_mb_free_blocks(handle, inode, block, count, metadata);
++ return;
++}
++
+Index: linux-2.6.7/fs/ext3/super.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400
+@@ -392,6 +392,7 @@
+ struct ext3_super_block *es = sbi->s_es;
+ int i;
+
++ ext3_mb_release(sb);
+ ext3_ext_release(sb);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+@@ -594,7 +595,7 @@
+ Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+- Opt_ignore, Opt_err, Opt_extents, Opt_extdebug
++ Opt_ignore, Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
+ };
+
+ static match_table_t tokens = {
+@@ -644,6 +645,7 @@
+ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_extents, "extents"},
+ {Opt_extdebug, "extdebug"},
++ {Opt_mballoc, "mballoc"},
+ {Opt_err, NULL}
+ };
+
+@@ -929,6 +931,9 @@
+ case Opt_extdebug:
+ set_opt (sbi->s_mount_opt, EXTDEBUG);
+ break;
++ case Opt_mballoc:
++ set_opt (sbi->s_mount_opt, MBALLOC);
++ break;
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1602,7 +1607,8 @@
+ ext3_count_dirs(sb));
+
+ ext3_ext_init(sb);
+-
++ ext3_mb_init(sb);
++
+ return 0;
+
+ failed_mount3:
+Index: linux-2.6.7/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/Makefile 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/Makefile 2004-09-03 08:46:59.000000000 +0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+- ioctl.o namei.o super.o symlink.o hash.o extents.o
++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.7/fs/ext3/balloc.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/balloc.c 2004-08-26 17:11:16.000000000 +0400
++++ linux-2.6.7/fs/ext3/balloc.c 2004-09-03 08:46:59.000000000 +0400
+@@ -78,7 +78,7 @@
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+ struct ext3_group_desc * desc;
+@@ -98,8 +98,8 @@
+ }
+
+ /* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks (handle_t *handle, struct inode * inode,
+- unsigned long block, unsigned long count)
++void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
++ unsigned long block, unsigned long count)
+ {
+ struct buffer_head *bitmap_bh = NULL;
+ struct buffer_head *gd_bh;
+@@ -474,8 +474,8 @@
+ * This function also updates quota and i_blocks field.
+ */
+ int
+-ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
+- u32 *prealloc_count, u32 *prealloc_block, int *errp)
++ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal,
++ u32 *prealloc_count, u32 *prealloc_block, int *errp)
+ {
+ struct buffer_head *bitmap_bh = NULL; /* bh */
+ struct buffer_head *gdp_bh; /* bh2 */
+Index: linux-2.6.7/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400
+@@ -1640,7 +1640,7 @@
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
++int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+ handle_t *handle;
+Index: linux-2.6.7/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400
+@@ -254,7 +254,7 @@
+ ei->i_prealloc_count = 0;
+ ei->i_prealloc_block = 0;
+ /* Writer: end */
+- ext3_free_blocks (inode, block, total);
++ ext3_free_blocks (inode, block, total, 1);
+ }
+ #endif
+ }
+@@ -633,7 +633,7 @@
+ ext3_journal_forget(handle, branch[i].bh);
+ }
+ for (i = 0; i < keys; i++)
+- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+ return err;
+ }
+
+@@ -734,7 +734,7 @@
+ if (err == -EAGAIN)
+ for (i = 0; i < num; i++)
+ ext3_free_blocks(handle, inode,
+- le32_to_cpu(where[i].key), 1);
++ le32_to_cpu(where[i].key), 1, 1);
+ return err;
+ }
+
+@@ -1911,7 +1911,7 @@
+ }
+ }
+
+- ext3_free_blocks(handle, inode, block_to_free, count);
++ ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+
+ /**
+@@ -2082,7 +2082,7 @@
+ ext3_journal_test_restart(handle, inode);
+ }
+
+- ext3_free_blocks(handle, inode, nr, 1);
++ ext3_free_blocks(handle, inode, nr, 1, 1);
+
+ if (parent_bh) {
+ /*
+Index: linux-2.6.7/fs/ext3/extents.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400
+@@ -740,7 +740,7 @@
+ for (i = 0; i < depth; i++) {
+ if (!ablocks[i])
+ continue;
+- ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+ }
+ }
+ kfree(ablocks);
+@@ -1388,7 +1388,7 @@
+ path->p_idx->ei_leaf);
+ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+ return err;
+ }
+
+@@ -1876,10 +1876,12 @@
+ int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+ handle_t *handle = ext3_journal_start(tree->inode, needed);
+ struct buffer_head *bh;
+- int i;
++ int i, metadata = 0;
+
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
++ if (S_ISDIR(tree->inode->i_mode))
++ metadata = 1;
+ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+ /* tail removal */
+ unsigned long num, start;
+@@ -1891,7 +1893,7 @@
+ bh = sb_find_get_block(tree->inode->i_sb, start + i);
+ ext3_forget(handle, 0, tree->inode, bh, start + i);
+ }
+- ext3_free_blocks(handle, tree->inode, start, num);
++ ext3_free_blocks(handle, tree->inode, start, num, metadata);
+ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+ printk("strange request: removal %lu-%lu from %u:%u\n",
+ from, to, ex->ee_block, ex->ee_len);
+Index: linux-2.6.7/fs/ext3/xattr.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400
+@@ -1366,7 +1366,7 @@
+ new_bh = sb_getblk(sb, block);
+ if (!new_bh) {
+ getblk_failed:
+- ext3_free_blocks(handle, inode, block, 1);
++ ext3_free_blocks(handle, inode, block, 1, 1);
+ error = -EIO;
+ goto cleanup;
+ }
+@@ -1408,7 +1408,7 @@
+ if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ /* Free the old block. */
+ ea_bdebug(old_bh, "freeing");
+- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
+
+ /* ext3_forget() calls bforget() for us, but we
+ let our caller release old_bh, so we need to
+@@ -1497,7 +1497,7 @@
+ lock_buffer(bh);
+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
+ ext3_xattr_cache_remove(bh);
+- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
+ get_bh(bh);
+ ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
+ } else {
+Index: linux-2.6.7/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/include/linux/ext3_fs.h 2004-09-03 08:47:35.000000000 +0400
+@@ -57,6 +57,8 @@
+ #define ext3_debug(f, a...) do {} while (0)
+ #endif
+
++#define EXT3_MULTIBLOCK_ALLOCATOR 1
++
+ /*
+ * Special inodes numbers
+ */
+@@ -335,6 +337,7 @@
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */
++#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -695,7 +698,7 @@
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
+ __u32 *, __u32 *, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+- unsigned long);
++ unsigned long, int);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+ extern void ext3_check_blocks_bitmap (struct super_block *);
+ extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+Index: linux-2.6.7/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400
++++ linux-2.6.7/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400
+@@ -23,9 +23,29 @@
+ #define EXT_INCLUDE
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #endif
+
++#define EXT3_BB_MAX_BLOCKS 30
++struct ext3_free_metadata {
++ unsigned short group;
++ unsigned short num;
++ unsigned short blocks[EXT3_BB_MAX_BLOCKS];
++ struct list_head list;
++};
++
++#define EXT3_BB_MAX_ORDER 14
++
++struct ext3_buddy_group_blocks {
++ sector_t bb_bitmap;
++ sector_t bb_buddy;
++ spinlock_t bb_lock;
++ unsigned bb_counters[EXT3_BB_MAX_ORDER];
++ struct ext3_free_metadata *bb_md_cur;
++ unsigned long bb_tid;
++};
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -76,6 +96,17 @@
+ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
+ int s_jquota_fmt; /* Format of quota to use */
+ #endif
++
++ /* for buddy allocator */
++ struct ext3_buddy_group_blocks *s_buddy_blocks;
++ struct inode *s_buddy;
++ long s_blocks_reserved;
++ spinlock_t s_reserve_lock;
++ struct list_head s_active_transaction;
++ struct list_head s_closed_transaction;
++ struct list_head s_committed_transaction;
++ spinlock_t s_md_lock;
++ tid_t s_last_transaction;
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
--- /dev/null
+Index: linux-2.6.7/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.7.orig/fs/ext3/namei.c 2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/fs/ext3/namei.c 2004-08-20 17:48:54.000000000 -0600
+@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t *
+ static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+ {
+ inode->i_nlink++;
++ if (is_dx(inode) && inode->i_nlink > 1) {
++ /* limit is 16-bit i_links_count */
++ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2)
++ inode->i_nlink = 1;
++ }
+ }
+
+ static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+ {
+- inode->i_nlink--;
++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
++ inode->i_nlink--;
+ }
+
+ static int ext3_add_nondir(handle_t *handle,
+@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir
+ struct ext3_dir_entry_2 * de;
+ int err;
+
+- if (dir->i_nlink >= EXT3_LINK_MAX)
++ if (EXT3_DIR_LINK_MAXED(dir))
+ return -EMLINK;
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir
+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+ dir_block = ext3_bread (handle, inode, 0, 1, &err);
+ if (!dir_block) {
+- inode->i_nlink--; /* is this nlink == 0? */
++ ext3_dec_count(handle, inode); /* is this nlink == 0? */
+ ext3_mark_inode_dirty(handle, inode);
+ iput (inode);
+ goto out_stop;
+@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir
+ iput (inode);
+ goto out_stop;
+ }
+- dir->i_nlink++;
++ ext3_inc_count(handle, dir);
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+ d_instantiate(dentry, inode);
+@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di
+ retval = ext3_delete_entry(handle, dir, de, bh);
+ if (retval)
+ goto end_rmdir;
+- if (inode->i_nlink != 2)
+- ext3_warning (inode->i_sb, "ext3_rmdir",
+- "empty directory has nlink!=2 (%d)",
+- inode->i_nlink);
++ if (!EXT3_DIR_LINK_EMPTY(inode))
++ ext3_warning(inode->i_sb, "ext3_rmdir",
++ "empty directory has too many links (%d)",
++ inode->i_nlink);
+ inode->i_version++;
+ inode->i_nlink = 0;
+ /* There's no need to set i_disksize: the fact that i_nlink is
+@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di
+ ext3_orphan_add(handle, inode);
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_mark_inode_dirty(handle, inode);
+- dir->i_nlink--;
++ ext3_dec_count(handle, dir);
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+
+@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+ ext3_update_dx_flag(dir);
+ ext3_mark_inode_dirty(handle, dir);
+- inode->i_nlink--;
++ ext3_dec_count(handle, inode);
+ if (!inode->i_nlink)
+ ext3_orphan_add(handle, inode);
+ inode->i_ctime = dir->i_ctime;
+@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol
+ struct inode *inode = old_dentry->d_inode;
+ int err;
+
+- if (inode->i_nlink >= EXT3_LINK_MAX)
++ if (EXT3_DIR_LINK_MAXED(inode))
+ return -EMLINK;
+
+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o
+ if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+ goto end_rename;
+ retval = -EMLINK;
+- if (!new_inode && new_dir!=old_dir &&
+- new_dir->i_nlink >= EXT3_LINK_MAX)
++ if (!new_inode && new_dir != old_dir &&
++ EXT3_DIR_LINK_MAXED(new_dir))
+ goto end_rename;
+ }
+ if (!new_bh) {
+@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o
+ }
+
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ new_inode->i_ctime = CURRENT_TIME;
+ }
+ old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
+@@ -2299,11 +2304,11 @@ static int ext3_rename (struct inode * o
+ PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
+ BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+ ext3_journal_dirty_metadata(handle, dir_bh);
+- old_dir->i_nlink--;
++ ext3_dec_count(handle, old_dir);
+ if (new_inode) {
+- new_inode->i_nlink--;
++ ext3_dec_count(handle, new_inode);
+ } else {
+- new_dir->i_nlink++;
++ ext3_inc_count(handle, new_dir);
+ ext3_update_dx_flag(new_dir);
+ ext3_mark_inode_dirty(handle, new_dir);
+ }
+Index: linux-2.6.7/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-06-15 23:19:36.000000000 -0600
++++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-20 17:41:27.000000000 -0600
+@@ -41,7 +41,7 @@ struct statfs;
+ /*
+ * Always enable hashed directories
+ */
+-#define CONFIG_EXT3_INDEX
++#define CONFIG_EXT3_INDEX 1
+
+ /*
+ * Debug code
+@@ -79,7 +81,7 @@
+ /*
+ * Maximal count of links to a file
+ */
+-#define EXT3_LINK_MAX 32000
++#define EXT3_LINK_MAX 65000
+
+ /*
+ * Macro-instructions used to manage several block sizes
+@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 {
+ */
+
+ #ifdef CONFIG_EXT3_INDEX
+- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+- EXT3_FEATURE_COMPAT_DIR_INDEX) && \
++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \
++ (is_dx(dir) && (dir)->i_nlink == 1))
+ #else
+ #define is_dx(dir) 0
+-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+ #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+ #endif
+
+++ /dev/null
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/acl.c linux-2.6.4-51.0/fs/ext3/acl.c
---- linux-2.6.4-51.0.orig/fs/ext3/acl.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/acl.c 2004-04-07 11:06:33.000000000 +0300
-@@ -10,7 +10,7 @@
- #include <linux/fs.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
-
- /*
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/file.c linux-2.6.4-51.0/fs/ext3/file.c
---- linux-2.6.4-51.0.orig/fs/ext3/file.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/file.c 2004-04-07 11:06:39.000000000 +0300
-@@ -23,7 +23,7 @@
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
- #include <linux/ext3_jbd.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
-
- /*
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/ialloc.c linux-2.6.4-51.0/fs/ext3/ialloc.c
---- linux-2.6.4-51.0.orig/fs/ext3/ialloc.c 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/ialloc.c 2004-04-07 11:06:46.000000000 +0300
-@@ -26,7 +26,7 @@
- #include <asm/bitops.h>
- #include <asm/byteorder.h>
-
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
-
- /*
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/inode.c linux-2.6.4-51.0/fs/ext3/inode.c
---- linux-2.6.4-51.0.orig/fs/ext3/inode.c 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/inode.c 2004-04-07 11:25:05.000000000 +0300
-@@ -36,7 +36,7 @@
- #include <linux/writeback.h>
- #include <linux/mpage.h>
- #include <linux/uio.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "iopen.h"
- #include "acl.h"
-
-@@ -2340,7 +2340,7 @@ static unsigned long ext3_get_inode_bloc
- * performed.
- */
- int ext3_get_inode_loc(struct inode *inode,
-- struct ext3_iloc *iloc, int in_mem)
-+ struct ext3_iloc *iloc, int in_mem)
- {
- unsigned long block;
- struct buffer_head *bh;
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/namei.c linux-2.6.4-51.0/fs/ext3/namei.c
---- linux-2.6.4-51.0.orig/fs/ext3/namei.c 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/namei.c 2004-04-07 11:06:57.000000000 +0300
-@@ -36,7 +36,7 @@
- #include <linux/quotaops.h>
- #include <linux/buffer_head.h>
- #include <linux/smp_lock.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "iopen.h"
- #include "acl.h"
-
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/super.c linux-2.6.4-51.0/fs/ext3/super.c
---- linux-2.6.4-51.0.orig/fs/ext3/super.c 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/super.c 2004-04-07 11:07:05.000000000 +0300
-@@ -33,7 +33,7 @@
- #include <linux/vfs.h>
- #include <linux/random.h>
- #include <asm/uaccess.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
-
- static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/symlink.c linux-2.6.4-51.0/fs/ext3/symlink.c
---- linux-2.6.4-51.0.orig/fs/ext3/symlink.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/symlink.c 2004-04-07 11:07:16.000000000 +0300
-@@ -20,7 +20,7 @@
- #include <linux/fs.h>
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
-
- static int
- ext3_readlink(struct dentry *dentry, char __user *buffer, int buflen)
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr.c linux-2.6.4-51.0/fs/ext3/xattr.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr.c 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr.c 2004-04-07 11:22:34.000000000 +0300
-@@ -59,7 +59,7 @@
- #include <linux/mbcache.h>
- #include <linux/quotaops.h>
- #include <linux/rwsem.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
- #include "acl.h"
-
- #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
-@@ -348,7 +348,7 @@ cleanup:
- */
- int
- ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
-- void *buffer, size_t buffer_size)
-+ void *buffer, size_t buffer_size)
- {
- int size, name_len = strlen(name), storage_size;
- struct ext3_xattr_entry *last;
-@@ -360,7 +360,7 @@ ext3_xattr_ibody_get(struct inode *inode
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return -ENOENT;
-
-- ret = ext3_get_inode_loc(inode, &iloc);
-+ ret = ext3_get_inode_loc(inode, &iloc, 1);
- if (ret)
- return ret;
- raw_inode = ext3_raw_inode(&iloc);
-@@ -542,7 +542,7 @@ ext3_xattr_ibody_list(struct inode *inod
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return 0;
-
-- ret = ext3_get_inode_loc(inode, &iloc);
-+ ret = ext3_get_inode_loc(inode, &iloc, 1);
- if (ret)
- return ret;
- raw_inode = ext3_raw_inode(&iloc);
-@@ -693,7 +693,7 @@ ext3_xattr_ibody_find(struct inode *inod
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return ret;
-
-- err = ext3_get_inode_loc(inode, &iloc);
-+ err = ext3_get_inode_loc(inode, &iloc, 1);
- if (err)
- return -EIO;
- raw_inode = ext3_raw_inode(&iloc);
-@@ -824,7 +824,7 @@ ext3_xattr_ibody_set(handle_t *handle, s
- if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
- return -ENOSPC;
-
-- err = ext3_get_inode_loc(inode, &iloc);
-+ err = ext3_get_inode_loc(inode, &iloc, 1);
- if (err)
- return err;
- raw_inode = ext3_raw_inode(&iloc);
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr.h linux-2.6.4-51.0/fs/ext3/xattr.h
---- linux-2.6.4-51.0.orig/fs/ext3/xattr.h 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr.h 1970-01-01 03:00:00.000000000 +0300
-@@ -1,147 +0,0 @@
--/*
-- File: fs/ext3/xattr.h
--
-- On-disk format of extended attributes for the ext3 filesystem.
--
-- (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
--*/
--
--#include <linux/config.h>
--#include <linux/xattr.h>
--
--/* Magic value in attribute blocks */
--#define EXT3_XATTR_MAGIC 0xEA020000
--
--/* Maximum number of references to one attribute block */
--#define EXT3_XATTR_REFCOUNT_MAX 1024
--
--/* Name indexes */
--#define EXT3_XATTR_INDEX_MAX 10
--#define EXT3_XATTR_INDEX_USER 1
--#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2
--#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3
--#define EXT3_XATTR_INDEX_TRUSTED 4
--#define EXT3_XATTR_INDEX_LUSTRE 5
--#define EXT3_XATTR_INDEX_SECURITY 6
--
--struct ext3_xattr_header {
-- __u32 h_magic; /* magic number for identification */
-- __u32 h_refcount; /* reference count */
-- __u32 h_blocks; /* number of disk blocks used */
-- __u32 h_hash; /* hash value of all attributes */
-- __u32 h_reserved[4]; /* zero right now */
--};
--
--struct ext3_xattr_entry {
-- __u8 e_name_len; /* length of name */
-- __u8 e_name_index; /* attribute name index */
-- __u16 e_value_offs; /* offset in disk block of value */
-- __u32 e_value_block; /* disk block attribute is stored on (n/i) */
-- __u32 e_value_size; /* size of attribute value */
-- __u32 e_hash; /* hash value of name and value */
-- char e_name[0]; /* attribute name */
--};
--
--#define EXT3_XATTR_PAD_BITS 2
--#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS)
--#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1)
--#define EXT3_XATTR_LEN(name_len) \
-- (((name_len) + EXT3_XATTR_ROUND + \
-- sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
--#define EXT3_XATTR_NEXT(entry) \
-- ( (struct ext3_xattr_entry *)( \
-- (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
--#define EXT3_XATTR_SIZE(size) \
-- (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
--
--# ifdef CONFIG_EXT3_FS_XATTR
--
--struct ext3_xattr_handler {
-- char *prefix;
-- size_t (*list)(char *list, struct inode *inode, const char *name,
-- int name_len);
-- int (*get)(struct inode *inode, const char *name, void *buffer,
-- size_t size);
-- int (*set)(struct inode *inode, const char *name, const void *buffer,
-- size_t size, int flags);
--};
--
--extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
--extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
--
--extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
--extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
--extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
--extern int ext3_removexattr(struct dentry *, const char *);
--
--extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
--extern int ext3_xattr_list(struct inode *, char *, size_t);
--extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
--extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
--extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
--
--extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
--extern void ext3_xattr_put_super(struct super_block *);
--
--extern int init_ext3_xattr(void);
--extern void exit_ext3_xattr(void);
--
--# else /* CONFIG_EXT3_FS_XATTR */
--# define ext3_setxattr NULL
--# define ext3_getxattr NULL
--# define ext3_listxattr NULL
--# define ext3_removexattr NULL
--
--static inline int
--ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-- void *buffer, size_t size, int flags)
--{
-- return -EOPNOTSUPP;
--}
--
--static inline int
--ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
--{
-- return -EOPNOTSUPP;
--}
--
--static inline int
--ext3_xattr_set(struct inode *inode, int name_index, const char *name,
-- const void *value, size_t size, int flags)
--{
-- return -EOPNOTSUPP;
--}
--
--static inline int
--ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-- const char *name, const void *value, size_t size, int flags)
--{
-- return -EOPNOTSUPP;
--}
--
--static inline void
--ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
--{
--}
--
--static inline void
--ext3_xattr_put_super(struct super_block *sb)
--{
--}
--
--static inline int
--init_ext3_xattr(void)
--{
-- return 0;
--}
--
--static inline void
--exit_ext3_xattr(void)
--{
--}
--
--# endif /* CONFIG_EXT3_FS_XATTR */
--
--extern struct ext3_xattr_handler ext3_xattr_user_handler;
--extern struct ext3_xattr_handler ext3_xattr_trusted_handler;
--extern struct ext3_xattr_handler ext3_xattr_security_handler;
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_security.c linux-2.6.4-51.0/fs/ext3/xattr_security.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr_security.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr_security.c 2004-04-07 11:06:22.000000000 +0300
-@@ -9,7 +9,7 @@
- #include <linux/smp_lock.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
-
- static size_t
- ext3_xattr_security_list(char *list, struct inode *inode,
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_trusted.c linux-2.6.4-51.0/fs/ext3/xattr_trusted.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr_trusted.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr_trusted.c 2004-04-07 11:07:41.000000000 +0300
-@@ -11,7 +11,7 @@
- #include <linux/smp_lock.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
-
- #define XATTR_TRUSTED_PREFIX "trusted."
-
-diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_user.c linux-2.6.4-51.0/fs/ext3/xattr_user.c
---- linux-2.6.4-51.0.orig/fs/ext3/xattr_user.c 2004-04-05 19:41:59.000000000 +0300
-+++ linux-2.6.4-51.0/fs/ext3/xattr_user.c 2004-04-07 11:07:47.000000000 +0300
-@@ -11,7 +11,7 @@
- #include <linux/smp_lock.h>
- #include <linux/ext3_jbd.h>
- #include <linux/ext3_fs.h>
--#include "xattr.h"
-+#include <linux/ext3_xattr.h>
-
- #define XATTR_USER_PREFIX "user."
-
-diff -rupN linux-2.6.4-51.0.orig/include/linux/ext3_fs.h linux-2.6.4-51.0/include/linux/ext3_fs.h
---- linux-2.6.4-51.0.orig/include/linux/ext3_fs.h 2004-04-06 22:17:15.000000000 +0300
-+++ linux-2.6.4-51.0/include/linux/ext3_fs.h 2004-04-07 11:13:26.000000000 +0300
-@@ -741,6 +741,9 @@ extern void ext3_truncate (struct inode
- extern void ext3_set_inode_flags(struct inode *);
- extern void ext3_set_aops(struct inode *inode);
-
-+extern int ext3_get_inode_loc(struct inode *inode,
-+ struct ext3_iloc *iloc, int in_mem);
-+
- /* ioctl.c */
- extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
- unsigned long);
-diff -rupN linux-2.6.4-51.0.orig/include/linux/ext3_xattr.h linux-2.6.4-51.0/include/linux/ext3_xattr.h
---- linux-2.6.4-51.0.orig/include/linux/ext3_xattr.h 1970-01-01 03:00:00.000000000 +0300
-+++ linux-2.6.4-51.0/include/linux/ext3_xattr.h 2004-04-07 11:08:34.000000000 +0300
-@@ -0,0 +1,152 @@
-+/*
-+ File: linux/include/linux/ext3_xattr.h
-+
-+ On-disk format of extended attributes for the ext3 filesystem.
-+
-+ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
-+*/
-+
-+#ifndef _LINUX_EXT3_XATTR_H
-+#define _LINUX_EXT3_XATTR_H
-+
-+#include <linux/config.h>
-+#include <linux/xattr.h>
-+
-+/* Magic value in attribute blocks */
-+#define EXT3_XATTR_MAGIC 0xEA020000
-+
-+/* Maximum number of references to one attribute block */
-+#define EXT3_XATTR_REFCOUNT_MAX 1024
-+
-+/* Name indexes */
-+#define EXT3_XATTR_INDEX_MAX 10
-+#define EXT3_XATTR_INDEX_USER 1
-+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2
-+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3
-+#define EXT3_XATTR_INDEX_TRUSTED 4
-+#define EXT3_XATTR_INDEX_LUSTRE 5
-+#define EXT3_XATTR_INDEX_SECURITY 6
-+
-+struct ext3_xattr_header {
-+ __u32 h_magic; /* magic number for identification */
-+ __u32 h_refcount; /* reference count */
-+ __u32 h_blocks; /* number of disk blocks used */
-+ __u32 h_hash; /* hash value of all attributes */
-+ __u32 h_reserved[4]; /* zero right now */
-+};
-+
-+struct ext3_xattr_entry {
-+ __u8 e_name_len; /* length of name */
-+ __u8 e_name_index; /* attribute name index */
-+ __u16 e_value_offs; /* offset in disk block of value */
-+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */
-+ __u32 e_value_size; /* size of attribute value */
-+ __u32 e_hash; /* hash value of name and value */
-+ char e_name[0]; /* attribute name */
-+};
-+
-+#define EXT3_XATTR_PAD_BITS 2
-+#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS)
-+#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1)
-+#define EXT3_XATTR_LEN(name_len) \
-+ (((name_len) + EXT3_XATTR_ROUND + \
-+ sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
-+#define EXT3_XATTR_NEXT(entry) \
-+ ( (struct ext3_xattr_entry *)( \
-+ (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
-+#define EXT3_XATTR_SIZE(size) \
-+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
-+
-+# ifdef CONFIG_EXT3_FS_XATTR
-+
-+struct ext3_xattr_handler {
-+ char *prefix;
-+ size_t (*list)(char *list, struct inode *inode, const char *name,
-+ int name_len);
-+ int (*get)(struct inode *inode, const char *name, void *buffer,
-+ size_t size);
-+ int (*set)(struct inode *inode, const char *name, const void *buffer,
-+ size_t size, int flags);
-+};
-+
-+extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
-+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
-+
-+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int);
-+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
-+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
-+extern int ext3_removexattr(struct dentry *, const char *);
-+
-+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
-+extern int ext3_xattr_list(struct inode *, char *, size_t);
-+extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
-+extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int);
-+
-+extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
-+extern void ext3_xattr_put_super(struct super_block *);
-+
-+extern int init_ext3_xattr(void);
-+extern void exit_ext3_xattr(void);
-+
-+# else /* CONFIG_EXT3_FS_XATTR */
-+# define ext3_setxattr NULL
-+# define ext3_getxattr NULL
-+# define ext3_listxattr NULL
-+# define ext3_removexattr NULL
-+
-+static inline int
-+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
-+ void *buffer, size_t size, int flags)
-+{
-+ return -EOPNOTSUPP;
-+}
-+
-+static inline int
-+ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
-+{
-+ return -EOPNOTSUPP;
-+}
-+
-+static inline int
-+ext3_xattr_set(struct inode *inode, int name_index, const char *name,
-+ const void *value, size_t size, int flags)
-+{
-+ return -EOPNOTSUPP;
-+}
-+
-+static inline int
-+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
-+ const char *name, const void *value, size_t size, int flags)
-+{
-+ return -EOPNOTSUPP;
-+}
-+
-+static inline void
-+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
-+{
-+}
-+
-+static inline void
-+ext3_xattr_put_super(struct super_block *sb)
-+{
-+}
-+
-+static inline int
-+init_ext3_xattr(void)
-+{
-+ return 0;
-+}
-+
-+static inline void
-+exit_ext3_xattr(void)
-+{
-+}
-+
-+# endif /* CONFIG_EXT3_FS_XATTR */
-+
-+extern struct ext3_xattr_handler ext3_xattr_user_handler;
-+extern struct ext3_xattr_handler ext3_xattr_trusted_handler;
-+extern struct ext3_xattr_handler ext3_xattr_security_handler;
-+
-+#endif
Index: linux-stage/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/Makefile 2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/Makefile 2004-11-03 14:41:24.747805262 -0500
++++ linux-stage/fs/ext3/Makefile 2004-11-03 14:41:25.123696274 -0500
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
Index: linux-stage/fs/ext3/inode.c
===================================================================
---- linux-stage.orig/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/inode.c 2004-11-03 14:41:25.040720333 -0500
++++ linux-stage/fs/ext3/inode.c 2004-11-03 14:46:08.458515670 -0500
@@ -37,6 +37,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
#include "acl.h"
/*
-@@ -2472,6 +2473,9 @@
- ei->i_acl = EXT3_ACL_NOT_CACHED;
+@@ -2401,6 +2402,9 @@
ei->i_default_acl = EXT3_ACL_NOT_CACHED;
#endif
-+ if (ext3_iopen_get_inode(inode))
-+ return;
-+
+ ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
++
++ if (ext3_iopen_get_inode(inode))
++ return;
+
if (ext3_get_inode_loc(inode, &iloc, 0))
goto bad_inode;
- bh = iloc.bh;
Index: linux-stage/fs/ext3/iopen.c
===================================================================
--- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.c 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.c 2004-11-03 14:41:25.125695694 -0500
@@ -0,0 +1,272 @@
+/*
+ * linux/fs/ext3/iopen.c
Index: linux-stage/fs/ext3/iopen.h
===================================================================
--- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500
-+++ linux-stage/fs/ext3/iopen.h 2004-05-11 17:21:21.000000000 -0400
++++ linux-stage/fs/ext3/iopen.h 2004-11-03 14:41:25.126695404 -0500
@@ -0,0 +1,15 @@
+/*
+ * iopen.h
+ struct inode *inode, int rehash);
Index: linux-stage/fs/ext3/namei.c
===================================================================
---- linux-stage.orig/fs/ext3/namei.c 2004-05-11 17:21:20.000000000 -0400
-+++ linux-stage/fs/ext3/namei.c 2004-05-11 17:21:21.000000000 -0400
+--- linux-stage.orig/fs/ext3/namei.c 2004-11-03 14:41:24.957744391 -0500
++++ linux-stage/fs/ext3/namei.c 2004-11-03 14:41:25.127695114 -0500
@@ -37,6 +37,7 @@
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
}
-@@ -2019,10 +2021,6 @@
+@@ -2029,10 +2031,6 @@
inode->i_nlink);
inode->i_version++;
inode->i_nlink = 0;
ext3_orphan_add(handle, inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
ext3_mark_inode_dirty(handle, inode);
-@@ -2139,6 +2137,23 @@
+@@ -2152,6 +2150,23 @@
return err;
}
static int ext3_link (struct dentry * old_dentry,
struct inode * dir, struct dentry *dentry)
{
-@@ -2161,7 +2176,8 @@
+@@ -2175,7 +2190,8 @@
ext3_inc_count(handle, inode);
atomic_inc(&inode->i_count);
+ err = ext3_add_link(handle, dentry, inode);
+ ext3_orphan_del(handle,inode);
ext3_journal_stop(handle);
- return err;
- }
+ if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+ goto retry;
Index: linux-stage/fs/ext3/super.c
===================================================================
---- linux-stage.orig/fs/ext3/super.c 2004-05-11 17:21:21.000000000 -0400
-+++ linux-stage/fs/ext3/super.c 2004-05-11 17:44:53.000000000 -0400
-@@ -536,7 +536,7 @@
- Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload,
+--- linux-stage.orig/fs/ext3/super.c 2004-11-03 14:41:25.043719463 -0500
++++ linux-stage/fs/ext3/super.c 2004-11-03 14:41:25.129694535 -0500
+@@ -534,7 +534,7 @@
+ Opt_reservation, Opt_noreservation, Opt_noload,
Opt_commit, Opt_journal_update, Opt_journal_inum,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
- Opt_ignore, Opt_barrier,
{Opt_err, NULL}
};
-@@ -772,6 +775,18 @@
+@@ -778,6 +781,18 @@
else
clear_opt(sbi->s_mount_opt, BARRIER);
break;
-.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
-.new.........fs/nfs/dir.c
-.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
-.new.........fs/nfs/dir.c
-Index: linux-2.6.4-51.0/fs/nfs/dir.c
+Index: linux-2.6.5-7.108/fs/nfs/dir.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 17:09:16.000000000 -0400
-+++ linux-2.6.4-51.0/fs/nfs/dir.c 2004-04-05 17:09:23.000000000 -0400
+--- linux-2.6.5-7.108.orig/fs/nfs/dir.c 2004-09-15 19:26:43.012732408 +0300
++++ linux-2.6.5-7.108/fs/nfs/dir.c 2004-09-15 20:03:32.882781096 +0300
@@ -782,7 +782,7 @@
if (nd->flags & LOOKUP_DIRECTORY)
return 0;
if (openflags & O_CREAT) {
/* If this is a negative dentry, just drop it */
if (!inode)
-Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c
+Index: linux-2.6.5-7.108/fs/nfs/nfs4proc.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/nfs/nfs4proc.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/nfs/nfs4proc.c 2004-04-05 17:09:23.000000000 -0400
+--- linux-2.6.5-7.108.orig/fs/nfs/nfs4proc.c 2004-04-04 06:37:39.000000000 +0300
++++ linux-2.6.5-7.108/fs/nfs/nfs4proc.c 2004-09-15 20:03:32.885780640 +0300
@@ -792,17 +792,17 @@
struct nfs4_state *state;
put_rpccred(cred);
if (IS_ERR(state))
return (struct inode *)state;
-Index: linux-2.6.4-51.0/fs/cifs/dir.c
+Index: linux-2.6.5-7.108/fs/cifs/dir.c
===================================================================
---- linux-2.6.4-51.0.orig/fs/cifs/dir.c 2004-04-05 12:41:59.000000000 -0400
-+++ linux-2.6.4-51.0/fs/cifs/dir.c 2004-04-05 17:13:47.000000000 -0400
-@@ -146,22 +146,22 @@
- if(nd) {
- cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path));
+--- linux-2.6.5-7.108.orig/fs/cifs/dir.c 2004-09-04 13:28:22.000000000 +0300
++++ linux-2.6.5-7.108/fs/cifs/dir.c 2004-09-15 20:03:40.065689128 +0300
+@@ -173,23 +173,23 @@
+ }
+ if(nd) {
- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
+ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
desiredAccess = GENERIC_READ;
-- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
-+ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY)
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) {
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) {
desiredAccess = GENERIC_WRITE;
-- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
-+ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) {
+ write_only = TRUE;
+- } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) {
++ } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) {
/* GENERIC_ALL is too much permission to request */
/* can cause unnecessary access denied on create */
/* desiredAccess = GENERIC_ALL; */
disposition = FILE_OPEN_IF;
else {
cFYI(1,("Create flag not set in create function"));
-@@ -319,7 +319,7 @@
+@@ -359,7 +359,7 @@
parent_dir_inode, direntry->d_name.name, direntry));
if(nd) { /* BB removeme */
+++ /dev/null
- include/linux/fs.h | 1 +
- mm/filemap.c | 3 +++
- 2 files changed, 4 insertions(+)
-
-Index: linux-2.6.4-30.1/include/linux/fs.h
-===================================================================
---- linux-2.6.4-30.1.orig/include/linux/fs.h 2004-04-02 03:20:19.000000000 -0500
-+++ linux-2.6.4-30.1/include/linux/fs.h 2004-04-02 03:20:19.000000000 -0500
-@@ -320,6 +320,7 @@
- int (*releasepage) (struct page *, int);
- int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs);
-+ void (*removepage)(struct page *); /* called when page gets removed from the inode */
- };
-
- struct backing_dev_info;
-Index: linux-2.6.4-30.1/mm/filemap.c
-===================================================================
---- linux-2.6.4-30.1.orig/mm/filemap.c 2004-04-02 03:19:42.000000000 -0500
-+++ linux-2.6.4-30.1/mm/filemap.c 2004-04-02 03:23:10.000000000 -0500
-@@ -102,6 +102,9 @@
- {
- struct address_space *mapping = page->mapping;
-
-+ if (mapping->a_ops->removepage)
-+ mapping->a_ops->removepage(page);
-+
- radix_tree_delete(&mapping->page_tree, page->index);
- list_del(&page->list);
- page->mapping = NULL;
-
===================================================================
--- uml-2.6.7.orig/fs/hostfs/hostfs_kern.c 2004-07-16 19:47:23.631218720 +0300
+++ uml-2.6.7/fs/hostfs/hostfs_kern.c 2004-07-16 19:47:24.263122656 +0300
-@@ -0,0 +1,1024 @@
+@@ -0,0 +1,1022 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ struct dentry *dentry = nd->dentry;
+ int err, counter = 0;
+
++ revalidate_again:
+ if (!dentry->d_op || !dentry->d_op->d_revalidate)
+ return 0;
-+ revalidate_again:
+ if (!dentry->d_op->d_revalidate(dentry, nd)) {
+ struct dentry *new;
-+ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd)))
++ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd)))
+ return err;
+ new = real_lookup(dentry->d_parent, &dentry->d_name, nd);
+ if (IS_ERR(new))
spinlock_t f_ep_lock;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
-+ struct lookup_intent *f_it;
++ struct lookup_intent *f_it;
};
extern spinlock_t files_lock;
#define file_list_lock() spin_lock(&files_lock);
--- /dev/null
+--- linux-2.6.7.orig/fs/namei.c 2005-04-01 11:14:26.000000000 +0300
++++ linux-2.6.7/fs/namei.c 2005-04-01 11:23:01.748305104 +0300
+@@ -762,6 +762,13 @@ last_component:
+ inode = nd->dentry->d_inode;
+ /* fallthrough */
+ case 1:
++ if (lookup_flags & LOOKUP_DIRECTORY) {
++ err = -ENOTDIR;
++ if (!nd->dentry->d_inode->i_op ||
++ !nd->dentry->d_inode->i_op->lookup) {
++ goto return_err;
++ }
++ }
+ goto return_reval;
+ }
+ if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
path_release(&nd);
out:
putname(tmp);
-@@ -1626,10 +1637,18 @@
+@@ -1626,10 +1637,20 @@
struct dentry *dentry;
struct nameidata nd;
-+ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent, IT_LOOKUP);
error = path_lookup(tmp, LOOKUP_PARENT, &nd);
if (error)
goto out;
-+ if (nd.dentry->d_inode->i_op->mkdir_raw) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->mkdir_raw(&nd, mode);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto out2;
-+ }
++
++ if (nd.dentry->d_inode->i_op->mkdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir_raw(&nd, mode);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
dentry = lookup_create(&nd, 1);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
char * name;
struct dentry *dentry;
struct nameidata nd;
-+ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent, IT_LOOKUP);
name = getname(pathname);
if(IS_ERR(name))
-@@ -1744,6 +1765,14 @@
+@@ -1744,6 +1765,16 @@
error = -EBUSY;
goto exit1;
}
-+ if (nd.dentry->d_inode->i_op->rmdir_raw) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
+
-+ error = op->rmdir_raw(&nd);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit1;
-+ }
++ if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ error = op->rmdir_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
++
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
struct dentry *dentry;
struct nameidata nd;
struct inode *inode = NULL;
-+ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent, IT_LOOKUP);
name = getname(pathname);
if(IS_ERR(name))
error = -EISDIR;
if (nd.last_type != LAST_NORM)
goto exit1;
-+ if (nd.dentry->d_inode->i_op->unlink_raw) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->unlink_raw(&nd);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit1;
-+ }
++ if (nd.dentry->d_inode->i_op->unlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
down(&nd.dentry->d_inode->i_sem);
dentry = lookup_hash(&nd.last, nd.dentry);
error = PTR_ERR(dentry);
struct dentry *dentry;
struct nameidata nd;
-+ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent, IT_LOOKUP);
error = path_lookup(to, LOOKUP_PARENT, &nd);
if (error)
struct nameidata nd, old_nd;
int error;
char * to;
-+ intent_init(&nd.intent, IT_LOOKUP);
-+ intent_init(&old_nd.intent, IT_LOOKUP);
++ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&old_nd.intent, IT_LOOKUP);
to = getname(newname);
if (IS_ERR(to))
error = -EXDEV;
if (old_nd.mnt != nd.mnt)
goto out_release;
-+ if (nd.dentry->d_inode->i_op->link_raw) {
-+ struct inode_operations *op = nd.dentry->d_inode->i_op;
-+ error = op->link_raw(&old_nd, &nd);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto out_release;
-+ }
++ if (nd.dentry->d_inode->i_op->link_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link_raw(&old_nd, &nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
new_dentry = lookup_create(&nd, 0);
error = PTR_ERR(new_dentry);
if (!IS_ERR(new_dentry)) {
-@@ -2038,7 +2093,7 @@
- * locking].
- */
- int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
-- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry)
- {
- int error = 0;
- struct inode *target;
-@@ -2083,7 +2138,7 @@
- }
-
- int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-- struct inode *new_dir, struct dentry *new_dentry)
-+ struct inode *new_dir, struct dentry *new_dentry)
- {
- struct inode *target;
- int error;
@@ -2160,6 +2215,8 @@
struct dentry * old_dentry, *new_dentry;
struct dentry * trap;
struct nameidata oldnd, newnd;
-+ intent_init(&oldnd.intent, IT_LOOKUP);
-+ intent_init(&newnd.intent, IT_LOOKUP);
++ intent_init(&oldnd.intent, IT_LOOKUP);
++ intent_init(&newnd.intent, IT_LOOKUP);
error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
if (error)
if (newnd.last_type != LAST_NORM)
goto exit2;
-+ if (old_dir->d_inode->i_op->rename_raw) {
-+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
-+ /* the file system wants to use normal vfs path now */
-+ if (error != -EOPNOTSUPP)
-+ goto exit2;
-+ }
++ if (old_dir->d_inode->i_op->rename_raw) {
++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
+
trap = lock_rename(new_dir, old_dir);
+ if (error != -EOPNOTSUPP)
+ goto dput_and_out;
+ } else {
-+ down(&inode->i_sem);
-+ error = notify_change(nd.dentry, &newattrs);
-+ up(&inode->i_sem);
-+ }
++ down(&inode->i_sem);
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++ }
dput_and_out:
path_release(&nd);
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
goto out;
newattrs.ia_valid = ATTR_CTIME;
-@@ -723,6 +749,7 @@
- }
- if (!S_ISDIR(inode->i_mode))
- newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
-+
- down(&inode->i_sem);
- error = notify_change(dentry, &newattrs);
- up(&inode->i_sem);
Index: linux-2.6.5-12.1/fs/exec.c
===================================================================
--- linux-2.6.5-12.1.orig/fs/exec.c 2004-05-11 15:41:54.000000000 -0400
dev_read_only-2.6-suse.patch
vfs_gns-2.6-vanilla.patch
linux-2.6.7-CITI_NFS4_ALL-7-lsec.patch
+vfs_lookup_in_file-2.6.patch
--- /dev/null
+lnxmaj="2.6.5"
+lnxrel="SLES9_SP1_BRANCH_2004111114454891"
+
+KERNEL=linux-$lnxmaj-$lnxrel.tar.gz
+# they include our patches
+SERIES=
+VERSION=$lnxmaj
+EXTRA_VERSION="${lnxrel}_lustre.@VERSION@"
+RHBUILD=0
+LINUX26=1
+SUSEBUILD=1
+
+BASE_ARCHS="i686 ppc"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS=""
+BIGSMP_ARCHS="i686 ppc"
+UP_ARCHS=""
+SRC_ARCHS=""
+
+for cc in gcc33 ; do
+ if which $cc >/dev/null 2>/dev/null ; then
+ export CC=$cc
+ break
+ fi
+done
-KERNEL=linux-2.6.5-12.1.tar.gz
-SERIES=2.6-suse
-VERSION=2.6.5
-EXTRA_VERSION=12.1_lustre
+lnxmaj=2.6.6
+
+KERNEL=linux-$lnxmaj.tar.gz
+SERIES=2.6-vanilla
+VERSION=$lnxmaj
+EXTRA_VERSION=lustre.@VERSION@
RHBUILD=0
BASE_ARCHS=""
-KERNEL=linux-2.4.20-hp_pnnl.tar.gz
+lnxmaj=2.4.20
+lnxrel=hp_pnnl
+
+KERNEL=linux-$lnxmaj-$lnxrel.tar.gz
SERIES=hp-pnnl-2.4.20
-VERSION=2.4.20
-EXTRA_VERSION=hp_pnnl_lustre
+VERSION=$lnxmaj
+EXTRA_VERSION=$lnxrel_lustre.@VERSION@
RHBUILD=0
BASE_ARCHS="ia64"
+++ /dev/null
-KERNEL=linux-2.4.20-31.9.tar.gz
-SERIES=rh-2.4.20
-VERSION=2.4.20
-EXTRA_VERSION=31.9_lustre
-RHBUILD=1
-
-BASE_ARCHS="i686"
-BIGMEM_ARCHS=""
-BOOT_ARCHS=""
-JENSEN_ARCHS=""
-SMP_ARCHS="i686"
-UP_ARCHS=""
-SRC_ARCHS="i686"
--- /dev/null
+lnxmaj="2.4.20"
+lnxrel="31.9"
+
+KERNEL=linux-${lnxmaj}-${lnxrel}.tar.gz
+SERIES=rh-2.4.20
+VERSION=$lnxmaj
+EXTRA_VERSION=${lnxrel}_lustre.@VERSION@
+RHBUILD=1
+
+BASE_ARCHS="i686"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS="i686"
+UP_ARCHS=""
+
+# the modules in this kernel do not build with gcc 3
+for cc in i386-redhat-linux-gcc-2.96 gcc296 gcc ; do
+ if which $cc >/dev/null 2>/dev/null ; then
+ CC=$cc
+ break
+ fi
+done
--- /dev/null
+lnxmaj="2.4.21"
+lnxrel="20.EL"
+
+KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2
+SERIES=rhel-2.4.21
+VERSION=${lnxmaj}
+EXTRA_VERSION=${lnxrel}_lustre.@VERSION@
+RHBUILD=1
+
+BASE_ARCHS="i686 x86_64 ia64"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS="i686 x86_64 ia64"
+UP_ARCHS=""
+
+# the modules in this kernel do not build with gcc 3.3 or 2.96
+for cc in gcc33 ; do
+ if which $cc >/dev/null 2>/dev/null ; then
+ export CC=$cc
+ break
+ fi
+done
KERNEL=linux-2.4.21-x86_64.tar.gz
SERIES=suse-2.4.21-2
VERSION=2.4.21
-EXTRA_VERSION=lustre.1.2.1
+EXTRA_VERSION=lustre.@VERSION@
RHBUILD=0
BASE_ARCHS="x86_64"
SERIES MNEMONIC COMMENT ARCH
-chaos-2.4.18 linux-chaos-2.4.18 LLNL 2.4.18 chaos ~65 i386
+SUPPORTED KERNELS:
+rhel-2.4.21 linux-2.4.21-20.3EL same as chaos-2.4.21 all
+2.6-suse linux-2.6 SLES9 SP1 kernel all
+
+UNSUPPORTED KERNELS; BEING PHASED OUT; MAY BE MISSING CRITICAL BUG FIXES:
hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64
vanilla-2.4.20 linux-2.4.20 patch with uml-2.4.20-6 um
chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8 i386
kgdb-2.5.73 linux-2.5.73 vanilla 2.5.73 with kgdb i386
bproc-2.4.20-hp-pnnl linux-2.4.20-hp4_pnnl9 hp-pnnl + bproc i386
suse-2.4.19 SUSE ES 8
+vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-6 um
+chaos-2.4.21 linux-chaos-2.4.21 same as rh-2.4.21-20.EL i386
+suse-2.4.21-jvn linux-2.4.21-241 sles8 2.4 kernel i386
if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) {
CERROR("namespace %s lock held illegally; tell phil\n",
ns->ns_name);
+ portals_debug_dumpstack(NULL);
next_msg = jiffies + 60 * HZ;
}
}
mode, flags);
LASSERT(list_empty(&lock->l_flock_waitq));
-
list_del_init(&lock->l_res_link);
if (flags == LDLM_FL_WAIT_NOREPROC) {
/* client side - set a flag to prevent sending a CANCEL */
int overlaps = 0;
ENTRY;
- CDEBUG(D_DLMTRACE, "flags %#x pid "LPU64" mode %u start "LPU64" end "
- LPU64"\n", *flags, new->l_policy_data.l_flock.pid, mode,
- req->l_policy_data.l_flock.start,
+ CDEBUG(D_DLMTRACE, "flags %#x pid %u mode %u start "LPU64" end "
+ LPU64"\n", *flags, (unsigned int)new->l_policy_data.l_flock.pid,
+ mode, req->l_policy_data.l_flock.start,
req->l_policy_data.l_flock.end);
*err = ELDLM_OK;
if (added)
ldlm_flock_destroy(req, mode, *flags);
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_OTHER, res);
RETURN(LDLM_ITER_CONTINUE);
}
{
struct ldlm_lock *lock;
struct lustre_handle lockh;
- int rc;
ENTRY;
lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
/* take lock off the deadlock detection waitq. */
list_del_init(&lock->l_flock_waitq);
+ /* client side - set flag to prevent lock from being put on lru list */
+ lock->l_flags |= LDLM_FL_CBPENDING;
+
ldlm_lock_decref_internal(lock, lock->l_req_mode);
ldlm_lock2handle(lock, &lockh);
- rc = ldlm_cli_cancel(&lockh);
+ ldlm_cli_cancel(&lockh);
EXIT;
}
LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
- if (flags == 0) {
- wake_up(&lock->l_waitq);
- RETURN(0);
- }
-
if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
LDLM_FL_BLOCK_CONV)))
goto granted;
"sleeping");
ldlm_lock_dump(D_DLMTRACE, lock, 0);
-
fwd.fwd_lock = lock;
obd = class_exp2obd(lock->l_conn_export);
((lock->l_req_mode == lock->l_granted_mode) ||
lock->l_destroyed), &lwi);
- if (rc) {
- LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
- rc);
- RETURN(rc);
- }
-
- LASSERT(!(lock->l_destroyed));
-
+ LDLM_DEBUG(lock, "client-side enqueue waking up: rc = %d", rc);
+ RETURN(rc);
+
granted:
- LDLM_DEBUG(lock, "client-side enqueue waking up");
+ LDLM_DEBUG(lock, "client-side enqueue granted");
ns = lock->l_resource->lr_namespace;
l_lock(&ns->ns_lock);
getlk->fl_start = lock->l_policy_data.l_flock.start;
getlk->fl_end = lock->l_policy_data.l_flock.end;
} else {
+ int noreproc = LDLM_FL_WAIT_NOREPROC;
+
/* We need to reprocess the lock to do merges or splits
* with existing locks owned by this process. */
- flags = LDLM_FL_WAIT_NOREPROC;
- ldlm_process_flock_lock(lock, &flags, 1, &err);
+ ldlm_process_flock_lock(lock, &noreproc, 1, &err);
+ if (flags == 0)
+ wake_up(&lock->l_waitq);
}
l_unlock(&ns->ns_lock);
RETURN(0);
{
struct ldlm_namespace *ns;
ENTRY;
-
+
LASSERT(lock);
LASSERT(flag == LDLM_CB_CANCELING);
-
+
ns = lock->l_resource->lr_namespace;
-
+
/* take lock off the deadlock detection waitq. */
l_lock(&ns->ns_lock);
list_del_init(&lock->l_flock_waitq);
cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT;
}
-
rc = ldlm_get_ref();
if (rc) {
CERROR("ldlm_get_ref failed: %d\n", rc);
RETURN(0);
}
-static char nidstr[PTL_NALFMT_SIZE];
+static inline int ptlrpc_peer_is_local(struct ptlrpc_peer *peer)
+{
+ ptl_process_id_t myid;
+
+ PtlGetId(peer->peer_ni->pni_ni_h, &myid);
+ return (memcmp(&peer->peer_id, &myid, sizeof(myid)) == 0);
+}
+
int target_handle_connect(struct ptlrpc_request *req)
{
unsigned long connect_flags = 0, *cfp;
obd_str2uuid (&tgtuuid, str);
target = class_uuid2obd(&tgtuuid);
- if (!target) {
+ if (!target)
target = class_name2obd(str);
- }
if (!target || target->obd_stopping || !target->obd_set_up) {
- CERROR("UUID '%s' is not available for connect from NID %s\n",
- str, ptlrpc_peernid2str(&req->rq_peer, nidstr));
+ CERROR("UUID '%s' is not available for connect from %s\n",
+ str, req->rq_peerstr);
GOTO(out, rc = -ENODEV);
}
ptlrpc_peernid2str(&req->rq_peer, peer_str),
export, atomic_read(&export->exp_rpc_count));
GOTO(out, rc = -EBUSY);
- }
- else if (req->rq_reqmsg->conn_cnt == 1 && !initial_conn) {
+ } else if (req->rq_reqmsg->conn_cnt == 1 && !initial_conn) {
CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n",
cluuid.uuid);
GOTO(out, rc = -EALREADY);
CWARN("%s: connection from %s@%s/%lu %s\n", target->obd_name, cluuid.uuid,
ptlrpc_peernid2str(&req->rq_peer, peer_str), *cfp,
target->obd_recovering ? "(recovering)" : "");
+
if (target->obd_recovering) {
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING);
target_start_recovery_timer(target);
}
+
#if 0
/* Tell the client if we support replayable requests */
if (target->obd_replayable)
lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE);
#endif
+
if (export == NULL) {
if (target->obd_recovering) {
CERROR("%s denying connection for new client %s@%s: "
if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT)
export->exp_libclient = 1;
+ if (!(lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_ASYNC) &&
+ ptlrpc_peer_is_local(&req->rq_peer)) {
+ CWARN("%s: exp %p set sync\n", target->obd_name, export);
+ export->exp_sync = 1;
+ } else {
+ CDEBUG(D_HA, "%s: exp %p set async\n",target->obd_name,export);
+ export->exp_sync = 0;
+ }
+
if (export->exp_connection != NULL)
ptlrpc_put_connection(export->exp_connection);
export->exp_connection = ptlrpc_get_connection(&req->rq_peer,
GOTO(out, rc = 0);
}
- if (target->obd_recovering) {
+ if (target->obd_recovering)
target->obd_connected_clients++;
- }
- memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn),
- sizeof conn);
+ memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof(conn)),
+ sizeof(conn));
if (export->exp_imp_reverse != NULL) {
/* same logic as client_obd_cleanup */
return copy_req;
}
+
void ptlrpc_free_clone( struct ptlrpc_request *req)
{
if (req->rq_svcsec)
OBD_FREE(req, sizeof *req);
}
-
-
static void target_release_saved_req(struct ptlrpc_request *req)
{
if (req->rq_svcsec)
OBD_FREE(req, sizeof *req);
}
-#ifdef __KERNEL__
static void target_finish_recovery(struct obd_device *obd)
{
struct list_head *tmp, *n;
target_release_saved_req(req);
}
}
-#endif
/* Called from a cleanup function if the device is being cleaned up
forcefully. The exports should all have been disconnected already,
list_del(&req->rq_list);
LASSERT (req->rq_reply_state == 0);
target_release_saved_req(req);
- }
+ }
}
-#ifdef __KERNEL__
static void target_abort_recovery(void *data)
{
struct obd_device *obd = data;
-
+
LASSERT(!obd->obd_recovering);
class_disconnect_stale_exports(obd, 0);
target_finish_recovery(obd);
ptlrpc_run_recovery_over_upcall(obd);
}
-#endif
static void target_recovery_expired(unsigned long castmeharder)
{
}
}
- if (!inserted) {
+ if (!inserted)
list_add_tail(&req->rq_list, &obd->obd_recovery_queue);
- }
obd->obd_requests_queued_for_recovery++;
wake_up(&obd->obd_next_transno_waitq);
spin_unlock_bh(&obd->obd_processing_task_lock);
-
return 0;
}
ENTRY;
l_lock(&lock->l_resource->lr_namespace->ns_lock);
if (!list_empty(&lock->l_lru)) {
+ LASSERT(lock->l_resource->lr_type != LDLM_FLOCK);
list_del_init(&lock->l_lru);
lock->l_resource->lr_namespace->ns_nr_unused--;
LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0);
if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP))
lock->l_writers++;
lock->l_last_used = jiffies;
- l_unlock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_LOCK_GET(lock);
LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
}
void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
struct ldlm_namespace *ns;
ENTRY;
- LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
ns = lock->l_resource->lr_namespace;
+
l_lock(&ns->ns_lock);
+ LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
LASSERT(lock->l_readers > 0);
lock->l_readers--;
policy->l_inodebits.bits))
continue;
- if (lock->l_destroyed)
+ if (lock->l_destroyed || (lock->l_flags & LDLM_FL_FAILED))
continue;
if ((flags & LDLM_FL_LOCAL_ONLY) &&
ldlm_lock2handle(lock, lockh);
if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) {
struct l_wait_info lwi;
- if (lock->l_completion_ast)
- lock->l_completion_ast(lock,
- LDLM_FL_WAIT_NOREPROC,
- NULL);
+ if (lock->l_completion_ast) {
+ int err = lock->l_completion_ast(lock,
+ LDLM_FL_WAIT_NOREPROC,
+ NULL);
+ if (err) {
+ rc = 0;
+ goto out2;
+ }
+ }
lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
(lock->l_flags & LDLM_FL_CAN_MATCH), &lwi);
}
}
- if (rc)
+
+out2:
+ if (rc) {
+ l_lock(&ns->ns_lock);
LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")",
type == LDLM_PLAIN ? res_id->name[2] :
policy->l_extent.start,
type == LDLM_PLAIN ? res_id->name[3] :
- policy->l_extent.end);
- else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */
+ policy->l_extent.end);
+ l_unlock(&ns->ns_lock);
+ } else if (!(flags & LDLM_FL_TEST_LOCK)) {/* less verbose for test-only */
LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res "
LPU64"/"LPU64" ("LPU64" "LPU64")", ns,
type, mode, res_id->name[0], res_id->name[1],
type == LDLM_PLAIN ? res_id->name[2] :
- policy->l_extent.start,
+ policy->l_extent.start,
type == LDLM_PLAIN ? res_id->name[3] :
- policy->l_extent.end);
+ policy->l_extent.end);
+ }
if (old_lock)
LDLM_LOCK_PUT(old_lock);
lock->l_blocking_ast = blocking;
lock->l_completion_ast = completion;
lock->l_glimpse_ast = glimpse;
+ lock->l_pid = current->pid;
if (lvb_len) {
lock->l_lvb_len = lvb_len;
struct ldlm_namespace *ns;
ENTRY;
- /* There's no race between calling this and taking the ns lock below;
- * a lock can only be put on the waiting list once, because it can only
- * issue a blocking AST once. */
- ldlm_del_waiting_lock(lock);
-
res = lock->l_resource;
ns = res->lr_namespace;
l_lock(&ns->ns_lock);
+ ldlm_del_waiting_lock(lock);
+
/* Please do not, no matter how tempting, remove this LBUG without
* talking to me first. -phik */
if (lock->l_readers || lock->l_writers) {
*flags |= LDLM_FL_BLOCK_GRANTED;
RETURN(lock->l_resource);
}
-
+
LASSERTF(new_mode == LCK_PW && lock->l_granted_mode == LCK_PR,
"new_mode %u, granted %u\n", new_mode, lock->l_granted_mode);
return;
}
- CDEBUG(level, " -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n",
+ CDEBUG(level, " -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d) (pid: %d)\n",
lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
- pos);
+ pos, lock->l_pid);
if (lock->l_conn_export != NULL)
obd = lock->l_conn_export->exp_obd;
if (lock->l_export && lock->l_export->exp_connection) {
struct ldlm_lock *lock;
char str[PTL_NALFMT_SIZE];
+ if (obd_dump_on_timeout)
+ portals_debug_dumplog();
+
spin_lock_bh(&waiting_locks_spinlock);
while (!list_empty(&waiting_locks_list)) {
lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
" (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid,
conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid,
ptlrpc_peernid2str(&conn->c_peer, str));
+
+ if (obd_dump_on_timeout)
+ portals_debug_dumplog();
ptlrpc_fail_export(lock->l_export);
}
ldlm_lock_cancel(lock);
rc = -ERESTART;
} else {
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
ldlm_del_waiting_lock(lock);
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
ldlm_failed_ast(lock, rc, ast_type);
}
} else if (rc) {
if (lock->l_granted_mode != lock->l_req_mode) {
/* this blocking AST will be communicated as part of the
* completion AST instead */
+ LDLM_DEBUG(lock, "lock not granted, not sending blocking AST");
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- LDLM_DEBUG(lock, "lock not granted, not sending blocking AST");
RETURN(0);
}
sizeof(body->lock_handle1));
ldlm_lock2desc(lock, &body->lock_desc);
+ down(&lock->l_resource->lr_lvb_sem);
size = lock->l_resource->lr_lvb_len;
+ up(&lock->l_resource->lr_lvb_sem);
req->rq_replen = lustre_msg_size(1, &size);
req->rq_send_state = LUSTRE_IMP_FULL;
RETURN(rc);
}
+static struct ldlm_lock *
+find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct list_head *iter;
+
+ l_lock(&obd->obd_namespace->ns_lock);
+ list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) {
+ struct ldlm_lock *lock;
+ lock = list_entry(iter, struct ldlm_lock, l_export_chain);
+ if (lock->l_remote_handle.cookie == remote_hdl->cookie) {
+ LDLM_LOCK_GET(lock);
+ l_unlock(&obd->obd_namespace->ns_lock);
+ return lock;
+ }
+ }
+ l_unlock(&obd->obd_namespace->ns_lock);
+ return NULL;
+}
+
+
int ldlm_handle_enqueue(struct ptlrpc_request *req,
ldlm_completion_callback completion_callback,
ldlm_blocking_callback blocking_callback,
flags = dlm_req->lock_flags;
+ LASSERT(req->rq_export);
+
+ if (flags & LDLM_FL_REPLAY) {
+ lock = find_existing_lock(req->rq_export,
+ &dlm_req->lock_handle1);
+ if (lock != NULL) {
+ DEBUG_REQ(D_HA, req, "found existing lock cookie "LPX64,
+ lock->l_handle.h_cookie);
+ GOTO(existing_lock, rc = 0);
+ }
+ }
+
/* The lock's callback data might be set in the policy function */
lock = ldlm_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2,
dlm_req->lock_desc.l_resource.lr_name,
&lock->l_export->exp_ldlm_data.led_held_locks);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+existing_lock:
+
if (flags & LDLM_FL_HAS_INTENT) {
/* In this case, the reply buffer is allocated deep in
* local_lock_enqueue by the policy function. */
/* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this
* ldlm_reprocess_all. If this moves, revisit that code. -phil */
if (lock) {
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
"(err=%d, rc=%d)", err, rc);
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
if (rc == 0) {
down(&lock->l_resource->lr_lvb_sem);
up(&lock->l_resource->lr_lvb_sem);
} else {
ldlm_lock_destroy(lock);
-
}
if (!err && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK)
req->rq_status = EINVAL;
} else {
void *res = NULL;
-
+
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_DEBUG(lock, "server-side convert handler START");
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+
res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode,
&dlm_rep->lock_flags);
-
if (res) {
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
if (ldlm_del_waiting_lock(lock))
CDEBUG(D_DLMTRACE, "converted waiting lock %p\n", lock);
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
req->rq_status = 0;
} else {
req->rq_status = EDEADLOCK;
}
if (lock) {
- ldlm_reprocess_all(lock->l_resource);
+ if (!req->rq_status)
+ ldlm_reprocess_all(lock->l_resource);
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_DEBUG(lock, "server-side convert handler END");
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_LOCK_PUT(lock);
} else
LDLM_DEBUG_NOLOCK("server-side convert handler END");
struct ldlm_request *dlm_req;
struct ldlm_lock *lock;
struct ldlm_resource *res;
- char str[PTL_NALFMT_SIZE];
int rc;
ENTRY;
lock = ldlm_handle2lock(&dlm_req->lock_handle1);
if (!lock) {
CERROR("received cancel for unknown lock cookie "LPX64
- " from client %s nid %s\n",
+ " from client %s id %s\n",
dlm_req->lock_handle1.cookie,
req->rq_export->exp_client_uuid.uuid,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock "
"(cookie "LPU64")",
dlm_req->lock_handle1.cookie);
//(res, req->rq_reqmsg, 1);
}
+ l_lock(&res->lr_namespace->ns_lock);
ldlm_lock_cancel(lock);
if (ldlm_del_waiting_lock(lock))
CDEBUG(D_DLMTRACE, "cancelled waiting lock %p\n", lock);
+ l_unlock(&res->lr_namespace->ns_lock);
req->rq_status = rc;
}
if (lock) {
ldlm_reprocess_all(lock->l_resource);
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_DEBUG(lock, "server-side cancel handler END");
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
LDLM_LOCK_PUT(lock);
}
struct ldlm_namespace *ns;
struct ldlm_request *dlm_req;
struct ldlm_lock *lock;
- char str[PTL_NALFMT_SIZE];
int rc;
ENTRY;
if (req->rq_export == NULL) {
struct ldlm_request *dlm_req;
- CDEBUG(D_RPCTRACE, "operation %d from nid %s with bad "
+ CDEBUG(D_RPCTRACE, "operation %d from %s with bad "
"export cookie "LPX64"; this is "
"normal if this node rebooted with a lock held\n",
req->rq_reqmsg->opc,
- ptlrpc_peernid2str(&req->rq_peer, str),
+ req->rq_peerstr,
req->rq_reqmsg->handle.cookie);
dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req),
lustre_swab_ldlm_request);
if (req->rq_export == NULL) {
struct ldlm_request *dlm_req;
- char str[PTL_NALFMT_SIZE];
- CERROR("operation %d with bad export from NID %s\n",
+ CERROR("operation %d with bad export from %s\n",
req->rq_reqmsg->opc,
- ptlrpc_peernid2str(&req->rq_peer, str));
-
+ req->rq_peerstr);
CERROR("--> export cookie: "LPX64"\n",
req->rq_reqmsg->handle.cookie);
dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req),
ldlm_state->ldlm_cb_service =
ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE,
LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
- ldlm_callback_handler, "ldlm_cbd",
+ 1500, ldlm_callback_handler, "ldlm_cbd",
ldlm_svc_proc_dir);
if (!ldlm_state->ldlm_cb_service) {
ldlm_state->ldlm_cancel_service =
ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE,
LDLM_CANCEL_REQUEST_PORTAL,
- LDLM_CANCEL_REPLY_PORTAL,
+ LDLM_CANCEL_REPLY_PORTAL, 30000,
ldlm_cancel_handler, "ldlm_canceld",
ldlm_svc_proc_dir);
if (!list_empty(&ldlm_namespace_list)) {
CERROR("ldlm still has namespaces; clean these up first.\n");
- ldlm_dump_all_namespaces();
+ ldlm_dump_all_namespaces(D_DLMTRACE);
RETURN(-EBUSY);
}
struct obd_device *obd;
if (lock->l_conn_export == NULL) {
- static unsigned long next_dump = 0;
+ static unsigned long next_dump = 0, last_dump = 0;
LDLM_ERROR(lock, "lock timed out; not entering recovery in "
"server code, just going back to sleep");
if (time_after(jiffies, next_dump)) {
- ldlm_namespace_dump(lock->l_resource->lr_namespace);
- if (next_dump == 0)
- portals_debug_dumplog();
+ last_dump = next_dump;
next_dump = jiffies + 300 * HZ;
+ ldlm_namespace_dump(D_DLMTRACE,
+ lock->l_resource->lr_namespace);
+ if (last_dump == 0)
+ portals_debug_dumplog();
}
RETURN(0);
}
l_unlock(&ns->ns_lock);
ldlm_lock_decref_and_cancel(lockh, mode);
+
+ /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
+ * from llite/file.c/ll_file_flock(). */
+ if (lock->l_resource->lr_type == LDLM_FLOCK) {
+ ldlm_lock_destroy(lock);
+ }
}
int ldlm_cli_enqueue(struct obd_export *exp,
GOTO(cleanup, rc = -EPROTO);
}
- /* XXX - Phil, wasn't sure if this should go before or after the
- * lustre_swab_repbuf() ? If we can't unpack the reply then we
- * don't know what occurred on the server so I think the safest
- * bet is to cleanup the lock as if it didn't make it ? */
-
/* lock enqueued on the server */
cleanup_phase = 1;
rc = ldlm_lock_enqueue(ns, &lock, NULL, flags);
if (lock->l_completion_ast != NULL) {
int err = lock->l_completion_ast(lock, *flags, NULL);
- if (!rc) {
- cleanup_phase = 2;
+ if (!rc)
rc = err;
- }
+ if (rc)
+ cleanup_phase = 2;
}
}
static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode,
int *flags)
{
+ struct ldlm_resource *res;
+ int rc;
ENTRY;
if (lock->l_resource->lr_namespace->ns_client) {
CERROR("Trying to cancel local lock\n");
}
LDLM_DEBUG(lock, "client-side local convert");
- ldlm_lock_convert(lock, new_mode, flags);
- ldlm_reprocess_all(lock->l_resource);
-
+ res = ldlm_lock_convert(lock, new_mode, flags);
+ if (res) {
+ ldlm_reprocess_all(res);
+ rc = 0;
+ } else {
+ rc = EDEADLOCK;
+ }
LDLM_DEBUG(lock, "client-side local convert handler END");
LDLM_LOCK_PUT(lock);
- RETURN(0);
+ RETURN(rc);
}
/* FIXME: one of ldlm_cli_convert or the server side should reject attempted
* conversion of locks which are on the waiting or converting queue */
+/* Caller of this code is supposed to take care of lock readers/writers
+ accounting */
int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
{
struct ldlm_request *body;
struct ldlm_reply *reply;
struct ldlm_lock *lock;
struct ldlm_resource *res;
- struct ptlrpc_request *req;
+ struct ptlrpc_request *req = NULL;
int rc, size = sizeof(*body);
ENTRY;
GOTO (out, rc = -EPROTO);
}
+ if (req->rq_status)
+ GOTO(out, rc = req->rq_status);
+
res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags);
- if (res != NULL)
+ if (res != NULL) {
ldlm_reprocess_all(res);
- /* Go to sleep until the lock is granted. */
- /* FIXME: or cancelled. */
- if (lock->l_completion_ast)
- lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL);
+ /* Go to sleep until the lock is granted. */
+ /* FIXME: or cancelled. */
+ if (lock->l_completion_ast) {
+ rc = lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC,
+ NULL);
+ if (rc)
+ GOTO(out, rc);
+ }
+ } else {
+ rc = EDEADLOCK;
+ }
EXIT;
out:
LDLM_LOCK_PUT(lock);
/* Set this flag to prevent others from getting new references*/
l_lock(&lock->l_resource->lr_namespace->ns_lock);
lock->l_flags |= LDLM_FL_CBPENDING;
- local_only = (lock->l_flags & LDLM_FL_LOCAL_ONLY);
+ local_only = lock->l_flags & LDLM_FL_LOCAL_ONLY;
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
ldlm_cancel_callback(lock);
ENTRY;
if (ns == NULL) {
- CERROR("must pass in namespace");
+ CERROR("must pass in namespace\n");
LBUG();
}
kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
-spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED;
+DECLARE_MUTEX(ldlm_namespace_lock);
struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
struct proc_dir_entry *ldlm_type_proc_dir = NULL;
struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
unsigned long count, void *data)
{
- ldlm_dump_all_namespaces();
+ ldlm_dump_all_namespaces(D_DLMTRACE);
RETURN(count);
}
ns->ns_nr_unused = 0;
ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
- spin_lock(&ldlm_namespace_lock);
+ down(&ldlm_namespace_lock);
list_add(&ns->ns_list_chain, &ldlm_namespace_list);
- spin_unlock(&ldlm_namespace_lock);
+ up(&ldlm_namespace_lock);
#ifdef __KERNEL__
ldlm_proc_namespace(ns);
#endif
CERROR("Resource refcount nonzero (%d) after "
"lock cleanup; forcing cleanup.\n",
atomic_read(&res->lr_refcount));
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
atomic_set(&res->lr_refcount, 1);
ldlm_resource_putref(res);
}
if (!ns)
RETURN(ELDLM_OK);
- spin_lock(&ldlm_namespace_lock);
+ down(&ldlm_namespace_lock);
list_del(&ns->ns_list_chain);
-
- spin_unlock(&ldlm_namespace_lock);
+ up(&ldlm_namespace_lock);
/* At shutdown time, don't call the cancellation callback */
ldlm_namespace_cleanup(ns, 0);
struct ldlm_resource *res;
OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res);
- if (res == NULL) {
- LBUG();
+ if (res == NULL)
return NULL;
- }
+
memset(res, 0, sizeof(*res));
INIT_LIST_HEAD(&res->lr_children);
"type: %d", type);
res = ldlm_resource_new();
- if (!res) {
- LBUG();
+ if (!res)
RETURN(NULL);
- }
spin_lock(&ns->ns_counter_lock);
ns->ns_resources++;
}
}
- if (create)
+ if (create) {
res = ldlm_resource_add(ns, parent, name, type);
- else
+ if (res == NULL)
+ GOTO(out, NULL);
+ } else {
res = NULL;
-
+ }
if (create && ns->ns_lvbo && ns->ns_lvbo->lvbo_init) {
int rc;
CERROR("lvbo_init failed for resource "LPU64": rc %d\n",
name.name[0], rc);
} else {
+out:
l_unlock(&ns->ns_lock);
}
}
if (!list_empty(&res->lr_granted)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
if (!list_empty(&res->lr_converting)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
if (!list_empty(&res->lr_waiting)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
if (!list_empty(&res->lr_children)) {
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_ERROR, res);
LBUG();
}
{
l_lock(&res->lr_namespace->ns_lock);
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_OTHER, res);
CDEBUG(D_OTHER, "About to add this lock:\n");
ldlm_lock_dump(D_OTHER, lock, 0);
l_lock(&res->lr_namespace->ns_lock);
- ldlm_resource_dump(res);
+ ldlm_resource_dump(D_OTHER, res);
CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original);
ldlm_lock_dump(D_OTHER, new, 0);
memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name));
}
-void ldlm_dump_all_namespaces(void)
+void ldlm_dump_all_namespaces(int level)
{
struct list_head *tmp;
- spin_lock(&ldlm_namespace_lock);
+ down(&ldlm_namespace_lock);
list_for_each(tmp, &ldlm_namespace_list) {
struct ldlm_namespace *ns;
ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain);
- ldlm_namespace_dump(ns);
+ ldlm_namespace_dump(level, ns);
}
- spin_unlock(&ldlm_namespace_lock);
+ up(&ldlm_namespace_lock);
}
-void ldlm_namespace_dump(struct ldlm_namespace *ns)
+void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
{
struct list_head *tmp;
- unsigned int debug_save = portal_debug;
- portal_debug |= D_OTHER;
- l_lock(&ns->ns_lock);
- CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name,
- ns->ns_refcount, ns->ns_client);
+ CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n",
+ ns->ns_name, ns->ns_refcount, ns->ns_client);
- list_for_each(tmp, &ns->ns_root_list) {
- struct ldlm_resource *res;
- res = list_entry(tmp, struct ldlm_resource, lr_childof);
+ l_lock(&ns->ns_lock);
+ if (time_after(jiffies, ns->ns_next_dump)) {
+ list_for_each(tmp, &ns->ns_root_list) {
+ struct ldlm_resource *res;
+ res = list_entry(tmp, struct ldlm_resource, lr_childof);
- /* Once we have resources with children, this should really dump
- * them recursively. */
- ldlm_resource_dump(res);
+ /* Once we have resources with children, this should
+ * really dump them recursively. */
+ ldlm_resource_dump(level, res);
+ }
+ ns->ns_next_dump = jiffies + 10 * HZ;
}
l_unlock(&ns->ns_lock);
- portal_debug = debug_save;
}
-void ldlm_resource_dump(struct ldlm_resource *res)
+void ldlm_resource_dump(int level, struct ldlm_resource *res)
{
struct list_head *tmp;
int pos;
if (RES_NAME_SIZE != 4)
LBUG();
- CDEBUG(D_OTHER, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
+ CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64
") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1],
res->lr_name.name[2], res->lr_name.name[3],
atomic_read(&res->lr_refcount));
if (!list_empty(&res->lr_granted)) {
pos = 0;
- CDEBUG(D_OTHER, "Granted locks:\n");
+ CDEBUG(level, "Granted locks:\n");
list_for_each(tmp, &res->lr_granted) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(D_OTHER, lock, ++pos);
+ ldlm_lock_dump(level, lock, ++pos);
}
}
if (!list_empty(&res->lr_converting)) {
pos = 0;
- CDEBUG(D_OTHER, "Converting locks:\n");
+ CDEBUG(level, "Converting locks:\n");
list_for_each(tmp, &res->lr_converting) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(D_OTHER, lock, ++pos);
+ ldlm_lock_dump(level, lock, ++pos);
}
}
if (!list_empty(&res->lr_waiting)) {
pos = 0;
- CDEBUG(D_OTHER, "Waiting locks:\n");
+ CDEBUG(level, "Waiting locks:\n");
list_for_each(tmp, &res->lr_waiting) {
struct ldlm_lock *lock;
lock = list_entry(tmp, struct ldlm_lock, l_res_link);
- ldlm_lock_dump(D_OTHER, lock, ++pos);
+ ldlm_lock_dump(level, lock, ++pos);
}
}
}
LBUG();
flags = 0;
- lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,
- 0);
+ lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0);
if (lock1 == NULL)
LBUG();
err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL,
LBUG();
flags = 0;
- lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR,
- NULL, 0);
+ lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0);
err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL,
NULL);
if (err != ELDLM_OK)
$(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
AM_CFLAGS = $(LLCFLAGS)
-LIBS = $(LIBEFENCE)
+AM_LIBS = $(LIBEFENCE)
-LUSTRE_LIBS = liblutils.a libllite.a \
+LUSTRE_LIBS = libllite.a \
$(top_builddir)/lustre/lov/liblov.a \
$(top_builddir)/lustre/obdecho/libobdecho.a \
$(top_builddir)/lustre/osc/libosc.a \
$(top_builddir)/portals/portals/libportals.a
else
PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \
- $(CRAY_PORTALS_PATH)/lib_TV/snos64/libportals.a
+ $(CRAY_PORTALS_LIBS)/libportals.a
endif
SYSIO_LIBS = $(SYSIO)/lib/libsysio.a
if LIBLUSTRE
lib_LIBRARIES = liblustre.a
-noinst_LIBRARIES = liblutils.a libllite.a
+noinst_LIBRARIES = libllite.a
install-exec-hook: liblustre.so
@$(NORMAL_INSTALL)
install-exec-hook:
endif
-liblutils_a_SOURCES = lutil.c lutil.h
-
libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \
- llite_lib.h
+ lutil.c lutil.h llite_lib.h
# for make rpms -- need cleanup
liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c
liblustre.a : $(LUSTRE_LIBS) $(PTL_LIBS) $(SYSIO_LIBS)
- sh $(srcdir)/genlib.sh $(SYSIO) $(CRAY_PORTALS_PATH)
+ sh $(srcdir)/genlib.sh "$(SYSIO)" "$(CRAY_PORTALS_LIBS)" "$(LIBS)"
EXTRA_DIST = genlib.sh
#include <sys/fcntl.h>
#include <sys/queue.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
#include <sysio.h>
#include <fs.h>
#include <mount.h>
#include <inode.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
#undef LIST_HEAD
+#ifdef HAVE_LINUX_TYPES_H
#include <linux/types.h>
+#elif defined(HAVE_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_LINUX_UNISTD_H
#include <linux/unistd.h>
+#elif defined(HAVE_UNISTD_H)
+#include <unistd.h>
+#endif
+
#include <dirent.h>
#include "llite_lib.h"
#include <sys/queue.h>
#include <fcntl.h>
+#ifdef HAVE_XTIO_H
#include <xtio.h>
+#endif
#include <sysio.h>
#include <fs.h>
#include <mount.h>
#include <inode.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
#undef LIST_HEAD
CWD=`pwd`
SYSIO=$1
-CRAY_PORTALS_PATH=$2
+CRAY_PORTALS_LIBS=$2
+LIBS=$3
if [ ! -f $SYSIO/lib/libsysio.a ]; then
echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist"
_objs=`$AR -t $1`
mkdir -p $sysio_tmp
cd $sysio_tmp
- $AR -x $1
+ $AR -x ../$1
cd ..
for _lib in $_objs; do
ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib ";
}
# lustre components libs
-build_obj_list . liblutils.a
+build_obj_list . libllite.a
build_obj_list ../lov liblov.a
build_obj_list ../obdecho libobdecho.a
build_obj_list ../osc libosc.a
$AR -cru $CWD/liblsupport.a $ALL_OBJS
$RANLIB $CWD/liblsupport.a
-# libllite should be at the beginning of obj list
-prepend_obj_list . libllite.a
-
-# libsysio
-build_sysio_obj_list $SYSIO/lib/libsysio.a
+# if libsysio is already in our LIBS we don't need to link against it here
+if $(echo "$LIBS" | grep -v -- "-lsysio" >/dev/null) ; then
+ build_sysio_obj_list $SYSIO/lib/libsysio.a
+fi
# create static lib lustre
rm -f $CWD/liblustre.a
#include <sys/types.h>
#include <sys/queue.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
#include <sysio.h>
#include <fs.h>
#include <mount.h>
#include <inode.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
+
+/* env variables */
+#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT"
+#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET"
+#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT"
+#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE"
+#define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK"
+#define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS"
+#define ENV_LUSTRE_NAL_NAME "LIBLUSTRE_NAL_NAME"
#ifdef REDSTORM
#define CSTART_INIT
struct llog_ctxt *ctxt;
ptl_nid_t nid = 0;
int nal, err, rc = 0;
+ char *nal_name;
ENTRY;
generate_random_uuid(uuid);
RETURN(-EINVAL);
}
- nal = ptl_name2nal(LIBLUSTRE_NAL_NAME);
+ nal_name = getenv(ENV_LUSTRE_NAL_NAME);
+ if (!nal_name)
+ nal_name = "tcp";
+ nal = ptl_name2nal(nal_name);
if (nal <= 0) {
- CERROR("Can't parse NAL %s\n", LIBLUSTRE_NAL_NAME);
+ CERROR("Can't parse NAL %s\n", nal_name);
RETURN(-EINVAL);
}
LCFG_INIT(lcfg, LCFG_ADD_UUID, name);
err = class_process_config(&lcfg);
if (err < 0)
GOTO(out_detach, err);
-
+
obd = class_name2obd(name);
if (obd == NULL)
GOTO(out_cleanup, err = -EINVAL);
g_zconf_mdsname, err);
GOTO(out_cleanup, err);
}
-
+
exp = class_conn2export(&mdc_conn);
-
+
ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT];
rc = class_config_process_llog(ctxt, g_zconf_profile, cfg);
if (rc)
out:
if (rc == 0)
rc = err;
-
+
RETURN(rc);
}
if ((s = strchr(buf, ':'))) {
*mdsnid = buf;
*s = '\0';
-
+
while (*++s == '/')
;
*mdsname = s;
* or in the apps themselves. The NAMESPACE_STRING specifying
* the initial set of fs ops (creates, mounts, etc.) is passed
* as an environment variable.
- *
+ *
* _sysio_init();
* _sysio_incore_init();
* _sysio_native_init();
err = lllib_init();
if (err) {
perror("init llite driver");
- }
+ }
return err;
}
-/* env variables */
-#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT"
-#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET"
-#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT"
-#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE"
-#define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK"
-#define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS"
-
extern int _sysio_native_init();
extern unsigned int obd_timeout;
-static char *lustre_path = NULL;
+char *lustre_path = NULL;
/* global variables */
char *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */
portal_subsystem_debug =
(unsigned int) strtol(debug_subsys, NULL, 0);
-#ifndef CSTART_INIT
+
+#ifdef INIT_SYSIO
/* initialize libsysio & mount rootfs */
- if (_sysio_init()) {
- perror("init sysio");
- exit(1);
- }
+ if (_sysio_init()) {
+ perror("init sysio");
+ exit(1);
+ }
_sysio_native_init();
- err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
- if (err) {
- perror(root_driver);
- exit(1);
- }
+ err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL);
+ if (err) {
+ perror(root_driver);
+ exit(1);
+ }
if (_sysio_lustre_init())
- exit(1);
-#endif
+ exit(1);
+#endif /* INIT_SYSIO */
err = mount("/", lustre_path, lustre_driver, mntflgs, NULL);
- if (err) {
- errno = -err;
- perror(lustre_driver);
- exit(1);
- }
+ if (err) {
+ errno = -err;
+ perror(lustre_driver);
+ exit(1);
+ }
}
void __liblustre_cleanup_(void)
* liblutre. this delima lead to another hack in
* libsysio/src/file_hack.c FIXME
*/
+#ifdef INIT_SYSIO
_sysio_shutdown();
cleanup_lib_portals();
PtlFini();
+#else
+ /*
+ * don't do any libsysio or low level portals cleanups
+ * platform framework does it
+ */
+ cleanup_lib_portals();
+#endif
}
unsigned long fd_gid;
};
-struct llu_sb_info
-{
+struct llu_sb_info {
struct obd_uuid ll_sb_uuid;
struct obd_export *ll_md_exp;
struct obd_export *ll_dt_exp;
unsigned long lli_st_generation;
};
-static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs)
-{
- return (struct llu_sb_info*)(fs->fs_private);
-}
+#define llu_fs2sbi(fs) (struct llu_sb_info *)(fs)->fs_private
static inline struct llu_inode_info *llu_i2info(struct inode *inode)
{
- return (struct llu_inode_info*)(inode->i_private);
+ return (struct llu_inode_info *)inode->i_private;
}
static inline struct llu_sb_info *llu_i2sbi(struct inode *inode)
static inline int llu_is_root_inode(struct inode *inode)
{
- return (llu_i2info(inode)->lli_id.li_stc.u.e3s.l3s_ino ==
+ return (id_ino(&llu_i2info(inode)->lli_id) ==
llu_i2info(inode)->lli_sbi->ll_rootino);
}
}
typedef int (*intent_finish_cb)(struct ptlrpc_request *,
- struct inode *parent, struct pnode *pnode,
- struct lookup_intent *, int offset, obd_id ino);
+ struct inode *parent,
+ struct pnode *pnode,
+ struct lookup_intent *,
+ int offset, obd_id ino);
+
int llu_intent_lock(struct inode *parent, struct pnode *pnode,
- struct lookup_intent *, int flags, intent_finish_cb);
+ struct lookup_intent *, int flags,
+ intent_finish_cb);
static inline __u64 ll_file_maxbytes(struct inode *inode)
{
char *osc_uuid;
};
-#define IS_BAD_PTR(ptr) \
+#define IS_BAD_PTR(ptr) \
((unsigned long)(ptr) == 0 || (unsigned long)(ptr) > -1000UL)
/* llite_lib.c */
void generate_random_uuid(unsigned char uuid_out[16]);
-int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov);
+
+int liblustre_process_log(struct config_llog_instance *cfg,
+ int allow_recov);
+
int ll_parse_mount_target(const char *target, char **mdsnid,
char **mdsname, char **profile);
-extern char *g_zconf_mdsnid;
-extern char *g_zconf_mdsname;
-extern char *g_zconf_profile;
+extern char *g_zconf_mdsnid;
+extern char *g_zconf_mdsname;
+extern char *g_zconf_profile;
extern struct mount_option_s mount_option;
/* super.c */
#include <sys/fcntl.h>
#include <sys/queue.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
#include <sysio.h>
#include <fs.h>
#include <mount.h>
#include <inode.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
#undef LIST_HEAD
int rc;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%x\n",
- pb->pb_name.name, it ? it->it_op : 0);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,intent=%x\n",
+ (int)pb->pb_name.len, pb->pb_name.name, it ? it->it_op : 0);
/* We don't want to cache negative dentries, so return 0 immediately.
* We believe that this is safe, that negative dentries cannot be
#include <fcntl.h>
#include <sys/uio.h>
-#include <fs.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
#include <sysio.h>
#include <mount.h>
#include <inode.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
#undef LIST_HEAD
struct inode *llap_inode;
};
-static struct ll_async_page *llap_from_cookie(void *cookie)
-{
- struct ll_async_page *llap = cookie;
- if (llap->llap_magic != LLAP_MAGIC)
- return ERR_PTR(-EINVAL);
- return llap;
-};
-
static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
{
struct ll_async_page *llap;
obd_valid valid_flags;
ENTRY;
- llap = llap_from_cookie(data);
- if (IS_ERR(llap)) {
- EXIT;
- return;
- }
-
+ llap = LLAP_FROM_COOKIE(data);
inode = llap->llap_inode;
lsm = llu_i2info(inode)->lli_smd;
struct ll_async_page *llap;
struct page *page;
- llap = llap_from_cookie(data);
- if (IS_ERR(llap)) {
- EXIT;
- return;
- }
-
+ llap = LLAP_FROM_COOKIE(data);
llap->llap_queued = 0;
page = llap->llap_page;
OBD_FREE(group, LLU_IO_GROUP_SIZE(group->lig_maxpages));
}
-void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
- obd_off size);
-
static
ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen,
_SYSIO_OFF_T pos, ssize_t len,
pos += ret;
if (!is_read) {
LASSERT(ret == count);
- lov_increase_kms(exp, lsm, pos);
+ obd_adjust_kms(exp, lsm, pos, 0);
/* file size grow immediately */
if (pos > lli->lli_st_size)
lli->lli_st_size = pos;
# include <sys/statfs.h>
#endif
-#include <fs.h>
+#ifdef HAVE_XTIO_H
+#include <xtio.h>
+#endif
#include <sysio.h>
#include <mount.h>
#include <inode.h>
+#include <fs.h>
+#ifdef HAVE_FILE_H
#include <file.h>
+#endif
#undef LIST_HEAD
#include "llite_lib.h"
static void llu_fsop_gone(struct filesys *fs)
{
- struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
+ struct llu_sb_info *sbi = (struct llu_sb_info *)fs->fs_private;
struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
struct lustre_cfg lcfg;
int next = 0;
valid &= src->o_valid;
if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
- CDEBUG(D_INODE, "valid %llx, cur time %lu/%lu, new %lu/%lu\n",
- (unsigned long long)src->o_valid,
+ CDEBUG(D_INODE, "valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
+ src->o_valid,
LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime),
(long)src->o_mtime, (long)src->o_ctime);
obd_valid newvalid = 0;
if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
- CDEBUG(D_INODE, "valid %llx, new time %lu/%lu\n",
- (unsigned long long)valid, LTIME_S(lli->lli_st_mtime),
+ CDEBUG(D_INODE, "valid "LPX64", new time %lu/%lu\n",
+ valid, LTIME_S(lli->lli_st_mtime),
LTIME_S(lli->lli_st_ctime));
if (valid & OBD_MD_FLATIME) {
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n",
- pno->p_base->pb_name.name, llu_i2info(dir)->lli_st_ino);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu\n",
+ (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
+ llu_i2info(dir)->lli_st_ino);
if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX)
RETURN(err);
struct mdc_op_data op_data;
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
- name, lli->lli_st_ino, lli->lli_st_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%lu(%p)\n",
+ len, name, lli->lli_st_ino, lli->lli_st_generation, dir);
if (lli->lli_st_nlink >= EXT2_LINK_MAX)
RETURN(err);
struct llu_inode_info *lli = llu_i2info(dir);
int rc;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
- name, lli->lli_st_ino, lli->lli_st_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%lu(%p)\n",
+ len, name, lli->lli_st_ino, lli->lli_st_generation, dir);
llu_prepare_mdc_data(&op_data, dir, NULL, name, len, S_IFDIR);
rc = mdc_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
flags = va_arg(ap, long);
flags &= FCNTL_FLMASK;
if (flags & FCNTL_FLMASK_INVALID) {
- CERROR("liblustre does not support O_NONBLOCK, O_ASYNC, "
+ CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, "
"and O_DIRECT on file descriptor\n");
*rtn = -1;
return EINVAL;
if ((md->body->valid &
(OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
(OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
- CERROR("bad md body valid mask 0x%llx\n",
- (unsigned long long)md->body->valid);
+ CERROR("bad md body valid mask 0x"LPX64"\n",
+ md->body->valid);
LBUG();
return ERR_PTR(-EPERM);
}
}
obd_set_info(obd->obd_self_export, strlen("async"), "async",
sizeof(async), &async);
-#warning "FIXME ASAP!"
#if 0
- if (mdc_init_ea_size(obd, lov))
+ if (mdc_init_ea_size(obd, osc))
GOTO(out_free, err = -EINVAL);
#endif
/* setup mdc */
AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals
AM_CFLAGS = $(LLCFLAGS)
-LIBS = $(LIBEFENCE) $(LIBREADLINE)
+AM_LIBS = $(LIBEFENCE) $(LIBREADLINE)
LLIB_EXEC= $(top_builddir)/lustre/liblustre/liblustre.a -lcap -lpthread
if LIBLUSTRE
noinst_LIBRARIES = libtestcommon.a
-def_tests = echo_test sanity recovery_small replay_single replay_ost_single
+
+if LIBLUSTRE_TESTS
+noinst_PROGRAMS = sanity recovery_small replay_single replay_ost_single
+
+if TESTS
+noinst_PROGRAMS += echo_test
+endif # TESTS
if MPITESTS
-noinst_PROGRAMS = $(def_tests) test_lock_cancel
-else
-noinst_PROGRAMS = $(def_tests)
-endif
+noinst_PROGRAMS += test_lock_cancel
+endif # MPITESTS
+
+endif # LIBLUSTRE_TESTS
endif # LIBLUSTRE
libtestcommon_a_SOURCES = test_common.c test_common.h
int count;
int size;
};
+
/* bug #4615 */
#if 0
char *portals_id2str(int nal, ptl_process_id_t id, char *str)
void t0()
{
+ char *path="/mnt/lustre/f0";
ENTRY("empty replay");
replay_barrier();
mds_failover();
+ t_check_stat_fail("/mnt/lustre/f0");
LEAVE();
}
#include <signal.h>
#include <errno.h>
#include <dirent.h>
+#include <sys/uio.h>
+#include <sys/time.h>
#include "test_common.h"
+extern char *lustre_path;
+
#define ENTRY(str) \
do { \
char buf[100]; \
void t1()
{
- char *path="/mnt/lustre/test_t1";
+ char path[MAX_PATH_LENGTH] = "";
+
ENTRY("create/delete");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t1", lustre_path);
t_touch(path);
t_unlink(path);
void t2()
{
- char *path="/mnt/lustre/test_t2";
+ char path[MAX_PATH_LENGTH] = "";
+
ENTRY("mkdir/rmdir");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t2", lustre_path);
t_mkdir(path);
t_rmdir(path);
void t3()
{
- char *path="/mnt/lustre/test_t3";
+ char path[MAX_PATH_LENGTH] = "";
+
ENTRY("regular stat");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t3", lustre_path);
t_touch(path);
t_check_stat(path, NULL);
void t4()
{
- char *path="/mnt/lustre/test_t4";
+ char path[MAX_PATH_LENGTH] = "";
+
ENTRY("dir stat");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t4", lustre_path);
t_mkdir(path);
t_check_stat(path, NULL);
void t6()
{
- char *path="/mnt/lustre/test_t6";
- char *path2="/mnt/lustre/test_t6_link";
+ char path[MAX_PATH_LENGTH] = "";
+ char path2[MAX_PATH_LENGTH] = "";
+
ENTRY("symlink");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t6", lustre_path);
+ snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6_link", lustre_path);
t_touch(path);
t_symlink(path, path2);
void t7()
{
- char *path="/mnt/lustre/test_t7";
+ char path[MAX_PATH_LENGTH] = "";
int rc;
+
ENTRY("mknod");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t7", lustre_path);
if (geteuid() != 0) {
rc = mknod(path, S_IFCHR | 0644, (5<<8 | 4));
void t8()
{
- char *path="/mnt/lustre/test_t8";
+ char path[MAX_PATH_LENGTH] = "";
+
ENTRY("chmod");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t8", lustre_path);
t_touch(path);
t_chmod_raw(path, 0700);
void t9()
{
- char *path="/mnt/lustre/test_t9";
- char *path2="/mnt/lustre/test_t9_link";
+ char path[MAX_PATH_LENGTH] = "";
+ char path2[MAX_PATH_LENGTH] = "";
+
ENTRY("hard link");
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t9", lustre_path);
+ snprintf(path2, MAX_PATH_LENGTH, "%s/test_t9_link", lustre_path);
t_touch(path);
t_link(path, path2);
void t10()
{
- char *dir1="/mnt/lustre/test_t10_dir1";
- char *dir2="/mnt/lustre/test_t10_dir2";
- char *path1="/mnt/lustre/test_t10_reg1";
- char *path2="/mnt/lustre/test_t10_reg2";
- char *rename1="/mnt/lustre/test_t10_dir1/rename1";
- char *rename2="/mnt/lustre/test_t10_dir2/rename2";
- char *rename3="/mnt/lustre/test_t10_dir2/rename3";
+ char dir1[MAX_PATH_LENGTH] = "";
+ char dir2[MAX_PATH_LENGTH] = "";
+ char path1[MAX_PATH_LENGTH] = "";
+ char path2[MAX_PATH_LENGTH] = "";
+ char rename1[MAX_PATH_LENGTH] = "";
+ char rename2[MAX_PATH_LENGTH] = "";
+ char rename3[MAX_PATH_LENGTH] = "";
+
ENTRY("rename");
+ snprintf(dir1, MAX_PATH_LENGTH, "%s/test_t10_dir1", lustre_path);
+ snprintf(dir2, MAX_PATH_LENGTH, "%s/test_t10_dir2", lustre_path);
+ snprintf(path1, MAX_PATH_LENGTH, "%s/test_t10_reg1", lustre_path);
+ snprintf(path2, MAX_PATH_LENGTH, "%s/test_t10_reg2", lustre_path);
+ snprintf(rename1, MAX_PATH_LENGTH, "%s/test_t10_dir1/rename1", lustre_path);
+ snprintf(rename2, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename2", lustre_path);
+ snprintf(rename3, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename3", lustre_path);
t_mkdir(dir1);
t_mkdir(dir2);
void t11()
{
- char *base="/mnt/lustre";
+ char *base=lustre_path;
char path[MAX_PATH_LENGTH], path2[MAX_PATH_LENGTH];
int i, j, level = 5, nreg = 5;
ENTRY("deep tree");
void t12()
{
- char *dir="/mnt/lustre/test_t12_dir";
+ char dir[MAX_PATH_LENGTH] = "";
char buf[1024*128];
int fd;
ENTRY("empty directory readdir");
+ snprintf(dir, MAX_PATH_LENGTH, "%s/test_t12_dir", lustre_path);
t_mkdir(dir);
fd = t_opendir(dir);
void t13()
{
- char *dir="/mnt/lustre/test_t13_dir/";
+ char dir[MAX_PATH_LENGTH] = "";
char name[1024];
char buf[1024];
const int nfiles = 20;
char *prefix = "test13_filename_prefix_";
int fd, i;
ENTRY("multiple entries directory readdir");
+ snprintf(dir, MAX_PATH_LENGTH, "%s/test_t13_dir/", lustre_path);
t_mkdir(dir);
printf("Creating %d files...\n", nfiles);
void t14()
{
- char *dir="/mnt/lustre/test_t14_dir/";
+ char dir[MAX_PATH_LENGTH] = "";
char name[1024];
char buf[1024];
const int nfiles = 256;
int fd, i, rc, pos, index;
loff_t base = 0;
ENTRY(">1 block(4k) directory readdir");
+ snprintf(dir, MAX_PATH_LENGTH, "%s/test_t14_dir/", lustre_path);
t_mkdir(dir);
printf("Creating %d files...\n", nfiles);
void t15()
{
- char *file = "/mnt/lustre/test_t15_file";
+ char file[MAX_PATH_LENGTH] = "";
int fd;
ENTRY("open-stat-close");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t15_file", lustre_path);
t_touch(file);
fd = t_open(file);
void t16()
{
- char *file = "/mnt/lustre/test_t16_file";
- int fd;
+ char file[MAX_PATH_LENGTH] = "";
ENTRY("small-write-read");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t16_file", lustre_path);
t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaa");
t_grep(file, "aaaaaaaaaaaaaaaaaaaaaa");
void t17()
{
- char *file = "/mnt/lustre/test_t17_file";
+ char file[MAX_PATH_LENGTH] = "";
int fd;
ENTRY("open-unlink without close");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t17_file", lustre_path);
fd = open(file, O_WRONLY | O_CREAT, 0666);
if (fd < 0) {
void t18()
{
- char *file = "/mnt/lustre/test_t18_file";
+ char file[MAX_PATH_LENGTH] = "";
char buf[128];
int fd, i;
struct stat statbuf[3];
ENTRY("write should change mtime/atime");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t18_file", lustre_path);
for (i = 0; i < 3; i++) {
fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666);
printf("Error stat\n");
exit(1);
}
- printf("mtime %ld, ctime %d\n",
+ printf("mtime %lu, ctime %lu\n",
statbuf[i].st_atime, statbuf[i].st_mtime);
sleep(2);
}
}
}
t_unlink(file);
+ LEAVE();
}
+void t19()
+{
+ char file[MAX_PATH_LENGTH] = "";
+ int fd;
+ struct stat statbuf;
+ ENTRY("open(O_TRUNC) should trancate file to 0-length");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t19_file", lustre_path);
+
+ t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+ fd = open(file, O_RDWR|O_CREAT|O_TRUNC, (mode_t)0666);
+ if (fd < 0) {
+ printf("error open file: %s\n", strerror(errno));
+ exit(-1);
+ }
+ close(fd);
+ if(stat(file, &statbuf) != 0) {
+ printf("Error stat\n");
+ exit(1);
+ }
+ if (statbuf.st_size != 0) {
+ printf("size %ld is not zero\n", statbuf.st_size);
+ exit(-1);
+ }
+ t_unlink(file);
+ LEAVE();
+}
+
+void t20()
+{
+ char file[MAX_PATH_LENGTH] = "";
+ int fd;
+ struct iovec iov[2];
+ char buf[100];
+ ssize_t ret;
+ ENTRY("trap app's general bad pointer for file i/o");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t20_file", lustre_path);
+
+ fd = open(file, O_RDWR|O_CREAT, (mode_t)0666);
+ if (fd < 0) {
+ printf("error open file: %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ ret = write(fd, NULL, 20);
+ if (ret != -1 || errno != EFAULT) {
+ printf("write 1: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+ ret = write(fd, (void *)-1, 20);
+ if (ret != -1 || errno != EFAULT) {
+ printf("write 2: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+ iov[0].iov_base = NULL;
+ iov[0].iov_len = 10;
+ iov[1].iov_base = (void *)-1;
+ iov[1].iov_len = 10;
+ ret = writev(fd, iov, 2);
+ if (ret != -1 || errno != EFAULT) {
+ printf("writev 1: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+ iov[0].iov_base = NULL;
+ iov[0].iov_len = 0;
+ iov[1].iov_base = buf;
+ iov[1].iov_len = sizeof(buf);
+ ret = writev(fd, iov, 2);
+ if (ret != sizeof(buf)) {
+ printf("write 3 ret %ld, error %d\n", ret, errno);
+ exit(1);
+ }
+ lseek(fd, 0, SEEK_SET);
+
+ ret = read(fd, NULL, 20);
+ if (ret != -1 || errno != EFAULT) {
+ printf("read 1: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+ ret = read(fd, (void *)-1, 20);
+ if (ret != -1 || errno != EFAULT) {
+ printf("read 2: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+ iov[0].iov_base = NULL;
+ iov[0].iov_len = 10;
+ iov[1].iov_base = (void *)-1;
+ iov[1].iov_len = 10;
+ ret = readv(fd, iov, 2);
+ if (ret != -1 || errno != EFAULT) {
+ printf("readv 1: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+ iov[0].iov_base = NULL;
+ iov[0].iov_len = 0;
+ iov[1].iov_base = buf;
+ iov[1].iov_len = sizeof(buf);
+ ret = readv(fd, iov, 2);
+ if (ret != sizeof(buf)) {
+ printf("read 3 ret %ld, error %d\n", ret, errno);
+ exit(1);
+ }
+
+ close(fd);
+ t_unlink(file);
+ LEAVE();
+}
+
+void t21()
+{
+ char file[MAX_PATH_LENGTH] = "";
+ int fd, ret;
+ ENTRY("basic fcntl support");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t21_file", lustre_path);
+
+ fd = open(file, O_RDWR|O_CREAT, (mode_t)0666);
+ if (fd < 0) {
+ printf("error open file: %s\n", strerror(errno));
+ exit(-1);
+ }
+ if (fcntl(fd, F_SETFL, O_APPEND)) {
+ printf("error set flag: %s\n", strerror(errno));
+ exit(-1);
+ }
+ if ((ret = fcntl(fd, F_GETFL)) != O_APPEND) {
+ printf("error get flag: ret %x\n", ret);
+ exit(-1);
+ }
+
+ close(fd);
+ t_unlink(file);
+ LEAVE();
+}
+
+void t22()
+{
+ char file[MAX_PATH_LENGTH] = "";
+ int fd;
+ char *str = "1234567890";
+ char buf[100];
+ ssize_t ret;
+ ENTRY("make sure O_APPEND take effect");
+ snprintf(file, MAX_PATH_LENGTH, "%s/test_t22_file", lustre_path);
+
+ fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666);
+ if (fd < 0) {
+ printf("error open file: %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ lseek(fd, 100, SEEK_SET);
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ printf("write 1: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+
+ lseek(fd, 0, SEEK_SET);
+ ret = read(fd, buf, sizeof(buf));
+ if (ret != strlen(str)) {
+ printf("read 1 got %ld\n", ret);
+ exit(1);
+ }
+
+ if (memcmp(buf, str, strlen(str))) {
+ printf("read 1 data err\n");
+ exit(1);
+ }
+
+ if (fcntl(fd, F_SETFL, 0)) {
+ printf("fcntl err: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ lseek(fd, 100, SEEK_SET);
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ printf("write 2: ret %ld, errno %d\n", ret, errno);
+ exit(1);
+ }
+
+ lseek(fd, 100, SEEK_SET);
+ ret = read(fd, buf, sizeof(buf));
+ if (ret != strlen(str)) {
+ printf("read 2 got %ld\n", ret);
+ exit(1);
+ }
+
+ if (memcmp(buf, str, strlen(str))) {
+ printf("read 2 data err\n");
+ exit(1);
+ }
+
+ close(fd);
+ t_unlink(file);
+ LEAVE();
+}
+
+
#define PAGE_SIZE (4096)
#define _npages (2048)
*/
static void pages_io(int xfer, loff_t pos)
{
- char *path="/mnt/lustre/test_t50";
+ char path[MAX_PATH_LENGTH] = "";
+
int check_sum[_npages] = {0,};
- int fd, rc, i, j;
+ int fd, rc, i, j, data_error = 0;
struct timeval tw1, tw2, tr1, tr2;
double tw, tr;
+ snprintf(path, MAX_PATH_LENGTH, "%s/test_t50", lustre_path);
memset(_buffer, 0, sizeof(_buffer));
/* create sample data */
sum += _buffer[i][j];
}
if (sum != check_sum[i]) {
+ data_error = 1;
printf("chunk %d checksum error: expected 0x%x, get 0x%x\n",
i, check_sum[i], sum);
}
printf(" (R:%.3fM/s, W:%.3fM/s)\n",
(_npages * PAGE_SIZE) / (tw / 1000000.0) / (1024 * 1024),
(_npages * PAGE_SIZE) / (tr / 1000000.0) / (1024 * 1024));
+
+ if (data_error)
+ exit(1);
}
void t50()
{
- char text[256];
- loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191, 1024*1024*1024*1024ULL};
+ loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191,
+ 1024*1024*1024*1024ULL};
int np = 1, i;
loff_t offset = 0;
t16();
t17();
t18();
+ t19();
+ t20();
+ t21();
+ t22();
t50();
printf("liblustre is about shutdown\n");
EXIT;
}
+/* Compare if two dentries are the same. Don't match if the existing dentry
+ * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
+ *
+ * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
+ * an AST before calling d_revalidate_it(). The dentry still exists (marked
+ * INVALID) so d_lookup() matches it, but we have no lock on it (so
+ * lock_match() fails) and we spin around real_lookup(). */
+static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
+ struct qstr *name){
+ struct dentry *dchild;
+ ENTRY;
+
+ if (d_name->len != name->len)
+ RETURN(1);
+
+ if (memcmp(d_name->name, name->name, name->len))
+ RETURN(1);
+
+ dchild = container_of(d_name, struct dentry, d_name); /* ugh */
+ if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
+ CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
+ dchild);
+ RETURN(1);
+ }
+
+ RETURN(0);
+}
+
/* should NOT be called with the dcache lock, see fs/dcache.c */
static int ll_ddelete(struct dentry *de)
{
ENTRY;
LASSERT(de != NULL);
- CDEBUG(D_DENTRY, "ldd on dentry %*s (%p) parent %p inode %p refc %d\n",
+ CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
atomic_read(&de->d_count));
lock_kernel();
if (inode == NULL) {
CERROR("unexpected NULL inode, tell phil\n");
+ EXIT;
return;
}
while ((tmp = tmp->next) != head) {
struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
if (atomic_read(&dentry->d_count) == 0) {
- CDEBUG(D_DENTRY, "deleting dentry %*s (%p) parent %p "
+ CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
"inode %p\n", dentry->d_name.len,
dentry->d_name.name, dentry, dentry->d_parent,
dentry->d_inode);
dput(dentry);
goto restart;
} else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
- CDEBUG(D_DENTRY, "unhashing dentry %*s (%p) parent %p "
+ CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
"inode %p refc %d\n", dentry->d_name.len,
dentry->d_name.name, dentry, dentry->d_parent,
dentry->d_inode, atomic_read(&dentry->d_count));
{
struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
struct ptlrpc_request *req = NULL;
- struct it_cb_data icbd;
struct obd_export *exp;
+ struct it_cb_data icbd;
struct lustre_id pid;
struct lustre_id cid;
int orig_it, rc = 0;
if (nd != NULL)
nd->mnt->mnt_last_used = jiffies;
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
orig_it = it ? it->it_op : IT_OPEN;
ll_frob_intent(&it, &lookup_it);
LASSERT(it != NULL);
ll_intent_release(&lookup_it);
}
+#if 1
+ if ((it->it_op == IT_OPEN) && de->d_inode) {
+ struct inode *inode = de->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
+ struct obd_device *obddev;
+ struct lustre_handle lockh;
+ int flags = LDLM_FL_BLOCK_GRANTED;
+ ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
+ struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
+ id_group(&lli->lli_id)}};
+ int lockmode;
+
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ lockmode = LCK_CW;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ lockmode = LCK_PR;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ lockmode = LCK_CR;
+ }
+
+ /* Check for the proper lock */
+ obddev = md_get_real_obd(exp, &lli->lli_id);
+ if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
+ LDLM_IBITS, &policy, lockmode, &lockh))
+ goto do_lock;
+ down(&lli->lli_och_sem);
+ if (*och_p) { /* Everything is open already, do nothing */
+ /*(*och_usecount)++; Do not let them steal our open
+ handle from under us */
+ /* XXX The code above was my original idea, but in case
+ we have the handle, but we cannot use it due to later
+ checks (e.g. O_CREAT|O_EXCL flags set), nobody
+ would decrement counter increased here. So we just
+ hope the lock won't be invalidated in between. But
+ if it would be, we'll reopen the open request to
+ MDS later during file open path */
+ up(&lli->lli_och_sem);
+ memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
+ sizeof(lockh));
+ LUSTRE_IT(it)->it_lock_mode = lockmode;
+ RETURN(1);
+ } else {
+ /* Hm, interesting. Lock is present, but no open
+ handle? */
+ up(&lli->lli_och_sem);
+ ldlm_lock_decref(&lockh, lockmode);
+ }
+ }
+#endif
+
+do_lock:
rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len,
- NULL, 0, &cid, it, flags, &req,
- ll_mdc_blocking_ast);
-
- /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
+ NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
+ /* If req is NULL, then md_intent_lock() only tried to do a lock match;
* if all was well, it will return 1 if it found locks, 0 otherwise. */
if (req == NULL && rc >= 0) {
if (!rc)
ptlrpc_req_finished(req);
}
ll_unhash_aliases(de->d_inode);
- return rc;
+ return 0;
}
CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
"inode %p refc %d\n", de->d_name.len,
de->d_name.name, de, de->d_parent, de->d_inode,
atomic_read(&de->d_count));
+
ll_lookup_finish_locks(it, de);
de->d_flags &= ~DCACHE_LUSTRE_INVALID;
if (it == &lookup_it)
.d_release = ll_release,
.d_iput = ll_dentry_iput,
.d_delete = ll_ddelete,
+ .d_compare = ll_dcompare,
#if 0
.d_pin = ll_pin,
.d_unpin = ll_unpin,
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
- RETURN(ERR_PTR(-ENOMEM));
+ return ERR_PTR(-ENOMEM);
ll_prepare_mdc_data(op_data, dir, NULL, NULL, 0, 0);
#include "llite_internal.h"
#include <linux/obd_lov.h>
-#define XATTR_NAME_MAX 255
-int ll_md_close(struct obd_export *md_exp, struct inode *inode,
- struct file *file)
+int ll_md_och_close(struct obd_export *md_exp, struct inode *inode,
+ struct obd_client_handle *och)
{
- struct ll_file_data *fd = file->private_data;
struct ptlrpc_request *req = NULL;
- struct obd_client_handle *och = &fd->fd_mds_och;
struct obdo *obdo = NULL;
+ struct obd_device *obd;
int rc;
ENTRY;
- /* clear group lock, if present */
- if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
- struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
- fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
- rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
- &fd->fd_cwlockh);
+ obd = class_exp2obd(md_exp);
+ if (obd == NULL) {
+ CERROR("Invalid MDC connection handle "LPX64"\n",
+ md_exp->exp_handle.h_cookie);
+ EXIT;
+ return 0;
}
+ /*
+ * here we check if this is forced umount. If so this is called on
+ * canceling "open lock" and we do not call md_close() in this case , as
+ * it will not successful, as import is already deactivated.
+ */
+ if (obd->obd_no_recov)
+ GOTO(out, rc = 0);
+
+ /* closing opened file */
obdo = obdo_alloc();
if (obdo == NULL)
RETURN(-ENOMEM);
obdo->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES;
obdo->o_valid |= OBD_MD_FLFLAGS;
}
+ obdo->o_fid = id_fid(&ll_i2info(inode)->lli_id);
obdo->o_mds = id_group(&ll_i2info(inode)->lli_id);
rc = md_close(md_exp, obdo, och, &req);
obdo_free(obdo);
if (rc == EAGAIN) {
- /* We are the last writer, so the MDS has instructed us to get
- * the file size and any write cookies, then close again. */
+ /*
+ * we are the last writer, so the MDS has instructed us to get
+ * the file size and any write cookies, then close again.
+ */
+
//ll_queue_done_writing(inode);
rc = 0;
} else if (rc) {
CERROR("inode %lu mdc close failed: rc = %d\n",
- inode->i_ino, rc);
+ (unsigned long)inode->i_ino, rc);
}
+
+ /* objects are destroed on OST only if metadata close was
+ * successful.*/
if (rc == 0) {
- rc = ll_objects_destroy(req, file->f_dentry->d_inode, 1);
+ rc = ll_objects_destroy(req, inode, 1);
if (rc)
CERROR("inode %lu ll_objects destroy: rc = %d\n",
inode->i_ino, rc);
}
- mdc_clear_open_replay_data(md_exp, och);
ptlrpc_req_finished(req);
+ EXIT;
+out:
+ mdc_clear_open_replay_data(md_exp, och);
och->och_fh.cookie = DEAD_HANDLE_MAGIC;
+ OBD_FREE(och, sizeof *och);
+ return rc;
+}
+
+int ll_md_real_close(struct obd_export *md_exp,
+ struct inode *inode, int flags)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct obd_client_handle **och_p;
+ struct obd_client_handle *och;
+ __u64 *och_usecount;
+ int rc = 0;
+ ENTRY;
+
+ if (flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+
+ down(&lli->lli_och_sem);
+ if (*och_usecount) { /* There are still users of this handle, so
+ skip freeing it. */
+ up(&lli->lli_och_sem);
+ RETURN(0);
+ }
+ och = *och_p;
+
+ *och_p = NULL;
+ up(&lli->lli_och_sem);
+
+ /*
+ * there might be a race and somebody have freed this och
+ * already. Another way to have this twice called is if file closing
+ * will fail due to netwok problems and on umount lock will be canceled
+ * and this will be called from block_ast callack.
+ */
+ if (och && och->och_fh.cookie != DEAD_HANDLE_MAGIC)
+ rc = ll_md_och_close(md_exp, inode, och);
+
+ RETURN(rc);
+}
+
+int ll_md_close(struct obd_export *md_exp, struct inode *inode,
+ struct file *file)
+{
+ struct ll_file_data *fd = file->private_data;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc = 0;
+ ENTRY;
+
+ /* clear group lock, if present */
+ if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
+ struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
+ rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
+ &fd->fd_cwlockh);
+ }
+
+ /* Let's see if we have good enough OPEN lock on the file and if
+ we can skip talking to MDS */
+ if (file->f_dentry->d_inode) {
+ int lockmode;
+ struct obd_device *obddev;
+ struct lustre_handle lockh;
+ int flags = LDLM_FL_BLOCK_GRANTED;
+ struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
+ id_group(&lli->lli_id)}};
+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}};
+
+ down(&lli->lli_och_sem);
+ if (fd->fd_omode & FMODE_WRITE) {
+ lockmode = LCK_CW;
+ LASSERT(lli->lli_open_fd_write_count);
+ lli->lli_open_fd_write_count--;
+ } else if (fd->fd_omode & FMODE_EXEC) {
+ lockmode = LCK_PR;
+ LASSERT(lli->lli_open_fd_exec_count);
+ lli->lli_open_fd_exec_count--;
+ } else {
+ lockmode = LCK_CR;
+ LASSERT(lli->lli_open_fd_read_count);
+ lli->lli_open_fd_read_count--;
+ }
+ up(&lli->lli_och_sem);
+
+ obddev = md_get_real_obd(md_exp, &lli->lli_id);
+ if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
+ LDLM_IBITS, &policy, lockmode, &lockh))
+ {
+ rc = ll_md_real_close(md_exp, file->f_dentry->d_inode,
+ fd->fd_omode);
+ } else {
+ ldlm_lock_decref(&lockh, lockmode);
+ }
+ }
+
file->private_data = NULL;
OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof(*fd));
RETURN(rc);
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, parent->d_inode, NULL, name, len, O_RDWR);
+
+ ll_prepare_mdc_data(op_data, parent->d_inode, NULL,
+ name, len, O_RDWR);
rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PR, op_data,
&lockh, lmm, lmmsize, ldlm_completion_ast,
} else if (rc < 0) {
CERROR("lock enqueue: err: %d\n", rc);
}
-
RETURN(rc);
}
-int ll_local_open(struct file *file, struct lookup_intent *it)
+void ll_och_fill(struct inode *inode, struct lookup_intent *it,
+ struct obd_client_handle *och)
{
struct ptlrpc_request *req = LUSTRE_IT(it)->it_data;
- struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode);
- struct obd_export *md_exp = ll_i2mdexp(file->f_dentry->d_inode);
- struct ll_file_data *fd;
+ struct ll_inode_info *lli = ll_i2info(inode);
struct mds_body *body;
- ENTRY;
+ LASSERT(och);
body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body));
- LASSERT (body != NULL); /* reply already checked out */
- LASSERT_REPSWABBED (req, 1); /* and swabbed down */
+ LASSERT (body != NULL); /* reply already checked out */
+ LASSERT_REPSWABBED (req, 1); /* and swabbed down */
+
+ memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
+ och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+ lli->lli_io_epoch = body->io_epoch;
+ mdc_set_open_replay_data(ll_i2mdexp(inode), och,
+ LUSTRE_IT(it)->it_data);
+}
+
+int ll_local_open(struct file *file, struct lookup_intent *it,
+ struct obd_client_handle *och)
+{
+ struct ll_file_data *fd;
+ ENTRY;
- LASSERTF(file->private_data == NULL, "file %*s/%*s ino %lu/%u (%o)\n",
+ if (och)
+ ll_och_fill(file->f_dentry->d_inode, it, och);
+
+ LASSERTF(file->private_data == NULL, "file %.*s/%.*s ino %lu/%u (%o)\n",
file->f_dentry->d_name.len, file->f_dentry->d_name.name,
file->f_dentry->d_parent->d_name.len,
file->f_dentry->d_parent->d_name.name,
file->f_dentry->d_inode->i_generation,
file->f_dentry->d_inode->i_mode);
-
OBD_SLAB_ALLOC(fd, ll_file_data_slab, SLAB_KERNEL, sizeof *fd);
/* We can't handle this well without reorganizing ll_file_open and
* ll_md_close(), so don't even try right now. */
LASSERT(fd != NULL);
- memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle));
- fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC;
file->private_data = fd;
ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras);
-
- lli->lli_io_epoch = body->io_epoch;
-
- mdc_set_open_replay_data(md_exp, &fd->fd_mds_och, LUSTRE_IT(it)->it_data);
-
+ fd->fd_omode = it->it_flags;
RETURN(0);
}
struct lov_stripe_md *lsm;
struct ptlrpc_request *req;
int rc = 0;
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino,
- inode->i_generation, inode, file->f_flags);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n",
+ inode->i_ino, inode->i_generation, inode, file->f_flags);
/* don't do anything for / */
if (inode->i_sb->s_root == file->f_dentry)
RETURN(0);
+ if ((file->f_flags+1) & O_ACCMODE)
+ oit.it_flags++;
+ if (file->f_flags & O_TRUNC)
+ oit.it_flags |= 2;
+
it = file->f_it;
- if (!it || !LUSTRE_IT(it) || !LUSTRE_IT(it)->it_disposition) {
+ /*
+ * sometimes LUSTRE_IT(it) may not be allocated like opening file by
+ * dentry_open() from GNS stuff.
+ */
+ if (!it || !LUSTRE_IT(it)) {
it = &oit;
rc = ll_intent_alloc(it);
if (rc)
GOTO(out, rc);
- rc = ll_intent_file_open(file, NULL, 0, it);
- if (rc)
- GOTO(out, rc);
}
-
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
- /* mdc_intent_lock() didn't get a request ref if there was an open
- * error, so don't do cleanup on the request here (bug 3430) */
- rc = it_open_error(DISP_OPEN_OPEN, it);
- if (rc)
- RETURN(rc);
+
+ /*
+ * mdc_intent_lock() didn't get a request ref if there was an open
+ * error, so don't do cleanup on the * request here (bug 3430)
+ */
+ if (LUSTRE_IT(it)->it_disposition) {
+ rc = it_open_error(DISP_OPEN_OPEN, it);
+ if (rc)
+ RETURN(rc);
+ }
+
+ /* Let's see if we have file open on MDS already. */
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+
+ down(&lli->lli_och_sem);
+ if (*och_p) { /* Open handle is present */
+ if (LUSTRE_IT(it)->it_disposition) {
+ struct obd_client_handle *och;
+ /* Well, there's extra open request that we do not need,
+ let's close it somehow*/
+ OBD_ALLOC(och, sizeof (struct obd_client_handle));
+ if (!och) {
+ up(&lli->lli_och_sem);
+ RETURN(-ENOMEM);
+ }
- rc = ll_local_open(file, it);
+ ll_och_fill(inode, it, och);
+ /* ll_md_och_close() will free och */
+ ll_md_och_close(ll_i2mdexp(inode), inode, och);
+ }
+ (*och_usecount)++;
+
+ rc = ll_local_open(file, it, NULL);
+ if (rc)
+ LBUG();
+ } else {
+ LASSERT(*och_usecount == 0);
+ OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+ if (!*och_p)
+ GOTO(out, rc = -ENOMEM);
+ (*och_usecount)++;
+
+ if (!it || !LUSTRE_IT(it) || !LUSTRE_IT(it)->it_disposition) {
+ /* We are going to replace intent here, and that may
+ possibly change access mode (FMODE_EXEC can only be
+ set in intent), but I hope it never happens (I was
+ not able to trigger it yet at least) -- green */
+ /* FIXME: FMODE_EXEC is not covered by O_ACCMODE! */
+ LASSERT(!(it->it_flags & FMODE_EXEC));
+ LASSERTF((it->it_flags & O_ACCMODE) ==
+ (oit.it_flags & O_ACCMODE), "Changing intent "
+ "flags %x to incompatible %x\n",it->it_flags,
+ oit.it_flags);
+ it = &oit;
+ rc = ll_intent_file_open(file, NULL, 0, it);
+ if (rc)
+ GOTO(out, rc);
+ rc = it_open_error(DISP_OPEN_OPEN, it);
+ if (rc)
+ GOTO(out_och_free, rc);
- LASSERTF(rc == 0, "rc = %d\n", rc);
+ mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle,
+ file->f_dentry->d_inode);
+ }
+ lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
+ rc = ll_local_open(file, it, *och_p);
+ LASSERTF(rc == 0, "rc = %d\n", rc);
+ }
+ up(&lli->lli_och_sem);
+ /* Must do this outside lli_och_sem lock to prevent deadlock where
+ different kind of OPEN lock for this same inode gets cancelled
+ by ldlm_cancel_lru */
if (!S_ISREG(inode->i_mode))
GOTO(out, rc);
GOTO(out, rc);
out:
req = LUSTRE_IT(it)->it_data;
+ ll_intent_drop_lock(it);
ll_intent_release(it);
-
ptlrpc_req_finished(req);
- if (rc == 0)
+ if (rc == 0) {
ll_open_complete(inode);
+ } else {
+out_och_free:
+ if (*och_p) {
+ OBD_FREE(*och_p, sizeof (struct obd_client_handle));
+ *och_p = NULL; /* OBD_FREE writes some magic there */
+ (*och_usecount)--;
+ }
+ up(&lli->lli_och_sem);
+ }
+
return rc;
}
for (i = start; i <= end; i += (j + skip)) {
j = min(count - (i % count), end - i + 1);
+ LASSERT(j > 0);
LASSERT(inode->i_mapping);
if (ll_teardown_mmaps(inode->i_mapping, i << PAGE_CACHE_SHIFT,
((i+j) << PAGE_CACHE_SHIFT) - 1) )
goto iput;
ll_pgcache_remove_extent(inode, lsm, lock, stripe);
- /* grabbing the i_sem will wait for write() to complete. ns
- * lock hold times should be very short as ast processing
- * requires them and has a short timeout. so, i_sem before ns
- * lock.*/
-
- down(&inode->i_sem);
l_lock(&lock->l_resource->lr_namespace->ns_lock);
+ down(&lli->lli_size_sem);
kms = ldlm_extent_shift_kms(lock,
lsm->lsm_oinfo[stripe].loi_kms);
LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64,
lsm->lsm_oinfo[stripe].loi_kms, kms);
lsm->lsm_oinfo[stripe].loi_kms = kms;
+ up(&lli->lli_size_sem);
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
- up(&inode->i_sem);
//ll_try_done_writing(inode);
iput:
iput(inode);
lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb));
lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms;
+ lvb->lvb_mtime = LTIME_S(inode->i_mtime);
+ lvb->lvb_atime = LTIME_S(inode->i_atime);
+ lvb->lvb_ctime = LTIME_S(inode->i_ctime);
LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64,
inode->i_size, stripe, lvb->lvb_size);
RETURN(rc > 0 ? -EIO : rc);
}
+ down(&lli->lli_size_sem);
inode->i_size = lov_merge_size(lli->lli_smd, 0);
inode->i_blocks = lov_merge_blocks(lli->lli_smd);
- //inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime);
+ up(&lli->lli_size_sem);
+
+ LTIME_S(inode->i_mtime) = lov_merge_mtime(lli->lli_smd,
+ LTIME_S(inode->i_mtime));
CDEBUG(D_DLMTRACE, "glimpse: size: "LPU64", blocks: "LPU64"\n",
(__u64)inode->i_size, (__u64)inode->i_blocks);
+
obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh);
RETURN(rc);
}
ldlm_policy_data_t *policy, struct lustre_handle *lockh,
int ast_flags, struct obd_service_time *stime)
{
+ struct ll_inode_info *lli = ll_i2info(inode);
struct ll_sb_info *sbi = ll_i2sbi(inode);
struct timeval start;
int rc;
* when doing appending writes and effectively cancel the
* result of the truncate. Getting the i_sem after the enqueue
* maintains the DLM -> i_sem acquiry order. */
- down(&inode->i_sem);
+ down(&lli->lli_size_sem);
inode->i_size = lov_merge_size(lsm, 1);
- up(&inode->i_sem);
+ up(&lli->lli_size_sem);
+ }
+
+ if (rc == 0) {
+ LTIME_S(inode->i_mtime) =
+ lov_merge_mtime(lsm, LTIME_S(inode->i_mtime));
}
- //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime);
RETURN(rc);
}
if (rc != 0)
RETURN(rc);
+ down(&lli->lli_size_sem);
kms = lov_merge_size(lsm, 1);
if (*ppos + count - 1 > kms) {
/* A glimpse is necessary to determine whether we return a short
* read or some zeroes at the end of the buffer */
+ up(&lli->lli_size_sem);
retval = ll_glimpse_size(inode);
if (retval)
goto out;
} else {
inode->i_size = kms;
+ up(&lli->lli_size_sem);
}
CDEBUG(D_INFO, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n",
struct ptlrpc_request *req = NULL;
int rc = 0;
struct lustre_md md;
+ struct obd_client_handle *och;
ENTRY;
+
+ if ((file->f_flags+1) & O_ACCMODE)
+ oit.it_flags++;
+ if (file->f_flags & O_TRUNC)
+ oit.it_flags |= 2;
+
down(&lli->lli_open_sem);
lsm = lli->lli_smd;
if (lsm) {
f->f_dentry = file->f_dentry;
f->f_vfsmnt = file->f_vfsmnt;
+ f->f_flags = flags;
rc = ll_intent_alloc(&oit);
if (rc)
GOTO(out, rc);
ll_update_inode(f->f_dentry->d_inode, &md);
- rc = ll_local_open(f, &oit);
- if (rc)
+ OBD_ALLOC(och, sizeof(struct obd_client_handle));
+ rc = ll_local_open(f, &oit, och);
+ if (rc) { /* Actually ll_local_open cannot fail! */
GOTO(out, rc);
+ }
+ if (LUSTRE_IT(&oit)->it_lock_mode) {
+ ldlm_lock_decref_and_cancel((struct lustre_handle *)
+ &LUSTRE_IT(&oit)->it_lock_handle,
+ LUSTRE_IT(&oit)->it_lock_mode);
+ LUSTRE_IT(&oit)->it_lock_mode = 0;
+ }
+
ll_intent_release(&oit);
+ /* ll_file_release will decrease the count, but won't free anything
+ because we have at least one more reference coming from actual open
+ */
+ down(&lli->lli_och_sem);
+ lli->lli_open_fd_write_count++;
+ up(&lli->lli_och_sem);
rc = ll_file_release(f->f_dentry->d_inode, f);
+
+ /* Now also destroy our supplemental och */
+ ll_md_och_close(ll_i2mdexp(inode), f->f_dentry->d_inode, och);
EXIT;
out:
ll_intent_release(&oit);
RETURN(-ENODATA);
return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm,
- (void *)arg);
+ (void *)arg);
}
static int ll_get_grouplock(struct inode *inode, struct file *file,
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK);
if (origin == 2) { /* SEEK_END */
ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF }};
+ struct ll_inode_info *lli = ll_i2info(inode);
int nonblock = 0, rc;
if (file->f_flags & O_NONBLOCK)
if (rc != 0)
RETURN(rc);
+ down(&lli->lli_size_sem);
offset += inode->i_size;
+ up(&lli->lli_size_sem);
} else if (origin == 1) { /* SEEK_CUR */
offset += file->f_pos;
}
{
int res = 0;
struct inode *inode = de->d_inode;
+ struct ll_inode_info *lli = ll_i2info(inode);
res = ll_inode_revalidate_it(de);
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR);
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
- stat->size = inode->i_size;
stat->blksize = inode->i_blksize;
+
+ down(&lli->lli_size_sem);
+ stat->size = inode->i_size;
stat->blocks = inode->i_blocks;
+ up(&lli->lli_size_sem);
+
stat->rdev = kdev_t_to_nr(inode->i_rdev);
stat->dev = id_group(&ll_i2info(inode)->lli_id);
return 0;
#ifndef LLITE_INTERNAL_H
#define LLITE_INTERNAL_H
+#include <linux/lustre_debug.h>
+
/* default to about 40meg of readahead on a given system. That much tied
* up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
#define SBI_DEFAULT_RA_MAX ((40 << 20) >> PAGE_CACHE_SHIFT)
extern kmem_cache_t *ll_intent_slab;
struct lustre_handle;
struct ll_file_data {
- struct obd_client_handle fd_mds_och;
struct ll_readahead_state fd_ras;
__u32 fd_flags;
+ int fd_omode;
struct lustre_handle fd_cwlockh;
unsigned long fd_gid;
};
/* only trust these if the page lock is providing exclusion */
unsigned llap_write_queued:1,
llap_defer_uptodate:1,
+ llap_origin:3,
llap_ra_used:1;
struct list_head llap_proc_item;
};
-#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \
- CDEBUG(mask, "page %p map %p ind %lu priv %0lx: " fmt, \
- page, page->mapping, page->index, page->private, ## arg)
+enum {
+ LLAP_ORIGIN_UNKNOWN = 0,
+ LLAP_ORIGIN_READPAGE,
+ LLAP_ORIGIN_READAHEAD,
+ LLAP_ORIGIN_COMMIT_WRITE,
+ LLAP_ORIGIN_WRITEPAGE,
+ LLAP__ORIGIN_MAX,
+};
/* llite/lproc_llite.c */
int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
void ll_removepage(struct page *page);
int ll_readpage(struct file *file, struct page *page);
struct ll_async_page *llap_from_cookie(void *cookie);
-struct ll_async_page *llap_from_page(struct page *page);
+struct ll_async_page *llap_from_page(struct page *page, unsigned origin);
struct ll_async_page *llap_cast_private(struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
/* llite/file.c */
extern struct file_operations ll_file_operations;
extern struct inode_operations ll_file_inode_operations;
+int ll_md_real_close(struct obd_export *md_exp,
+ struct inode *inode, int flags);
extern int ll_inode_revalidate_it(struct dentry *);
extern int ll_setxattr(struct dentry *, const char *, const void *,
size_t, int);
int ll_file_release(struct inode *inode, struct file *file);
int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
int ll_glimpse_size(struct inode *inode);
-int ll_local_open(struct file *file, struct lookup_intent *it);
+int ll_local_open(struct file *file, struct lookup_intent *it,
+ struct obd_client_handle *och);
int ll_md_close(struct obd_export *md_exp, struct inode *inode,
- struct file *file);
+ struct file *file);
+int ll_md_och_close(struct obd_export *md_exp, struct inode *inode,
+ struct obd_client_handle *och);
+void ll_och_fill(struct inode *inode, struct lookup_intent *it,
+ struct obd_client_handle *och);
+
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
#endif
char *ll_read_opt(const char *opt, char *data);
int ll_set_opt(const char *opt, char *data, int fl);
-void ll_options(char *options, char **ost, char **mds, char **sec, int *flags);
+void ll_options(char *options, char **ost, char **mds, char **sec,
+ int *async, int *flags);
void ll_lli_init(struct ll_inode_info *lli);
int ll_fill_super(struct super_block *sb, void *data, int silent);
int lustre_fill_super(struct super_block *sb, void *data, int silent);
extern struct dentry_operations ll_d_ops;
int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov,
- char *security, __u32 *nllu)
+ char *security, __u32 *nllu, int async)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
struct ptlrpc_request *request = NULL;
CERROR("MDC %s: not setup or attached\n", lmv);
RETURN(-EINVAL);
}
+ obd_set_info(obd->obd_self_export, strlen("async"), "async",
+ sizeof(async), &async);
if (security == NULL)
security = "null";
err = obd_connect(&md_conn, obd, &sbi->ll_sb_uuid, OBD_OPT_REAL_CLIENT);
if (err == -EBUSY) {
- CERROR("An MDS (mdc %s) is performing recovery, of which this"
+ CERROR("An MDS (lmv %s) is performing recovery, of which this"
" client is not a part. Please wait for recovery to "
"complete, abort, or time out.\n", lmv);
GOTO(out, err);
CERROR("OSC %s: not setup or attached\n", lov);
GOTO(out_lmv, err);
}
+ obd_set_info(obd->obd_self_export, strlen("async"), "async",
+ sizeof(async), &async);
err = obd_connect(&dt_conn, obd, &sbi->ll_sb_uuid, OBD_OPT_REAL_CLIENT);
if (err == -EBUSY) {
- CERROR("An OST (osc %s) is performing recovery, of which this"
+ CERROR("An OST (lov %s) is performing recovery, of which this"
" client is not a part. Please wait for recovery to "
"complete, abort, or time out.\n", lov);
GOTO(out, err);
ll_gns_add_timer(sbi);
- /* making vm readahead 0 for 2.4.x. In the case of 2.6.x, backing dev
- info assigned to inode mapping is used for determining maximal
- readahead. */
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+ /* making vm readahead 0 for 2.4.x. In the case of 2.6.x,
+ backing dev info assigned to inode mapping is used for
+ determining maximal readahead. */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
+ !defined(KERNEL_HAS_AS_MAX_READAHEAD)
/* bug 2805 - set VM readahead to zero */
vm_max_readahead = vm_min_readahead = 0;
#endif
obd_disconnect(sbi->ll_md_exp, 0);
// We do this to get rid of orphaned dentries. That is not really trw.
- spin_lock(&dcache_lock);
hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) {
struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash);
- CWARN("orphan dentry %*s (%p) at unmount\n",
- dentry->d_name.len, dentry->d_name.name, dentry);
+ CWARN("orphan dentry %.*s (%p->%p) at unmount\n",
+ dentry->d_name.len, dentry->d_name.name, dentry, next);
shrink_dcache_parent(dentry);
}
- spin_unlock(&dcache_lock);
EXIT;
}
RETURN(fl);
}
-void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags)
+void ll_options(char *options, char **lov, char **lmv, char **sec,
+ int *async, int *flags)
{
char *this_char;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
return;
}
+ *async = 0;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
for (this_char = strtok (options, ",");
this_char != NULL;
continue;
if (!*lmv && (*lmv = ll_read_opt("mdc", this_char)))
continue;
+ if (!strncmp(this_char, "lasync", strlen("lasync"))) {
+ *async = 1;
+ continue;
+ }
if (!*sec && (*sec = ll_read_opt("sec", this_char)))
continue;
if (!(*flags & LL_SBI_NOLCK) &&
void ll_lli_init(struct ll_inode_info *lli)
{
sema_init(&lli->lli_open_sem, 1);
+ sema_init(&lli->lli_size_sem, 1);
lli->lli_flags = 0;
lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
spin_lock_init(&lli->lli_lock);
INIT_LIST_HEAD(&lli->lli_pending_write_llaps);
lli->lli_inode_magic = LLI_INODE_MAGIC;
memset(&lli->lli_id, 0, sizeof(lli->lli_id));
+ sema_init(&lli->lli_och_sem, 1);
+ lli->lli_mds_read_och = lli->lli_mds_write_och = NULL;
+ lli->lli_mds_exec_och = NULL;
+ lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0;
+ lli->lli_open_fd_exec_count = 0;
}
int ll_fill_super(struct super_block *sb, void *data, int silent)
struct ll_sb_info *sbi;
char *lov = NULL;
char *lmv = NULL;
+ int async, err;
char *sec = NULL;
__u32 nllu[2] = { 99, 99 };
- int err;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb);
RETURN(-ENOMEM);
sbi->ll_flags |= LL_SBI_READAHEAD;
- ll_options(data, &lov, &lmv, &sec, &sbi->ll_flags);
+ ll_options(data, &lov, &lmv, &sec, &async, &sbi->ll_flags);
if (!lov) {
CERROR("no osc\n");
GOTO(out, err = -EINVAL);
}
- err = lustre_common_fill_super(sb, lmv, lov, sec, nllu);
+ err = lustre_common_fill_super(sb, lmv, lov, sec, nllu, async);
EXIT;
out:
if (err)
return rc;
}
+static void lustre_manual_cleanup(struct ll_sb_info *sbi)
+{
+ struct lustre_cfg lcfg;
+ struct obd_device *obd;
+ int next = 0;
+
+ while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
+ {
+ int err;
+
+ LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name);
+ err = class_process_config(&lcfg);
+ if (err) {
+ CERROR("cleanup failed: %s\n", obd->obd_name);
+ //continue;
+ }
+
+ LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name);
+ err = class_process_config(&lcfg);
+ if (err) {
+ CERROR("detach failed: %s\n", obd->obd_name);
+ //continue;
+ }
+ }
+
+ if (sbi->ll_lmd != NULL)
+ class_del_profile(sbi->ll_lmd->lmd_profile);
+}
+
int lustre_fill_super(struct super_block *sb, void *data, int silent)
{
struct lustre_mount_data * lmd = data;
}
err = lustre_common_fill_super(sb, lmv, lov, lmd->lmd_security,
- &lmd->lmd_nllu);
+ &lmd->lmd_nllu, lmd->lmd_async);
if (err)
GOTO(out_free, err);
if (sbi->ll_instance != NULL) {
struct lustre_mount_data *lmd = sbi->ll_lmd;
- char * cln_prof;
struct config_llog_instance cfg;
+ char *cl_prof;
cfg.cfg_instance = sbi->ll_instance;
cfg.cfg_uuid = sbi->ll_sb_uuid;
- OBD_ALLOC(cln_prof, len);
- sprintf(cln_prof, "%s-clean", lmd->lmd_profile);
-
- err = lustre_process_log(lmd, cln_prof, &cfg, 0);
- if (err < 0)
- CERROR("Unable to process log: %s\n", cln_prof);
- OBD_FREE(cln_prof, len);
- OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1);
+ OBD_ALLOC(cl_prof, len);
+ sprintf(cl_prof, "%s-clean", lmd->lmd_profile);
+ err = lustre_process_log(lmd, cl_prof, &cfg, 0);
+ if (err < 0) {
+ CERROR("Unable to process log: %s\n", cl_prof);
+ lustre_manual_cleanup(sbi);
+ }
+ OBD_FREE(cl_prof, len);
+ OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1);
}
OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
}
lustre_free_sbi(sb);
-
goto out_dev;
} /* lustre_fill_super */
-static void lustre_manual_cleanup(struct ll_sb_info *sbi)
-{
- struct lustre_cfg lcfg;
- struct obd_device *obd;
- int next = 0;
-
- while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
- {
- int err;
-
- LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name);
- err = class_process_config(&lcfg);
- if (err) {
- CERROR("cleanup failed: %s\n", obd->obd_name);
- //continue;
- }
-
- LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name);
- err = class_process_config(&lcfg);
- if (err) {
- CERROR("detach failed: %s\n", obd->obd_name);
- //continue;
- }
- }
-
- if (sbi->ll_lmd != NULL)
- class_del_profile(sbi->ll_lmd->lmd_profile);
-}
-
void lustre_put_super(struct super_block *sb)
{
struct obd_device *obd;
lustre_common_put_super(sb);
if (sbi->ll_lmd != NULL) {
- char * cln_prof;
+ char *cl_prof;
int len = strlen(sbi->ll_lmd->lmd_profile) + sizeof("-clean")+1;
int err;
struct config_llog_instance cfg;
cfg.cfg_instance = sbi->ll_instance;
cfg.cfg_uuid = sbi->ll_sb_uuid;
- OBD_ALLOC(cln_prof, len);
- sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
-
- err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0);
+ OBD_ALLOC(cl_prof, len);
+ sprintf(cl_prof, "%s-clean", sbi->ll_lmd->lmd_profile);
+ err = lustre_process_log(sbi->ll_lmd, cl_prof, &cfg, 0);
if (err < 0) {
CERROR("Unable to process log: %s, doing manual cleanup"
- "\n", cln_prof);
+ "\n", cl_prof);
lustre_manual_cleanup(sbi);
}
- OBD_FREE(cln_prof, len);
+ OBD_FREE(cl_prof, len);
free_lmd:
OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd));
OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1);
if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
inode = igrab(lock->l_ast_data);
} else {
- CERROR("DEBUG: l_ast_data %p is bogus: magic %x\n",
+ inode = lock->l_ast_data;
+ CDEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING,
+ "l_ast_data %p is bogus: magic %0x8\n",
lock->l_ast_data, lli->lli_inode_magic);
+ inode = NULL;
}
}
l_unlock(&lock->l_resource->lr_namespace->ns_lock);
clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags));
md_change_cbdata(sbi->ll_md_exp, &id, null_if_equal, inode);
+ LASSERT(!lli->lli_open_fd_write_count);
+ LASSERT(!lli->lli_open_fd_read_count);
+ LASSERT(!lli->lli_open_fd_exec_count);
+
+ if (lli->lli_mds_write_och)
+ ll_md_real_close(sbi->ll_md_exp, inode, FMODE_WRITE);
+ if (lli->lli_mds_exec_och)
+ ll_md_real_close(sbi->ll_md_exp, inode, FMODE_EXEC);
+ if (lli->lli_mds_read_och)
+ ll_md_real_close(sbi->ll_md_exp, inode, FMODE_READ);
+
if (lli->lli_smd)
obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd,
null_if_equal, inode);
strlen(lli->lli_symlink_name) + 1);
lli->lli_symlink_name = NULL;
}
+ lli->lli_inode_magic = LLI_INODE_DEAD;
EXIT;
}
* inode ourselves so we can call obdo_from_inode() always. */
if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN /*| ATTR_RAW*/) : ~0)) {
struct lustre_md md;
+ int save_valid;
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(rc);
}
- /* Won't invoke vmtruncate as we already cleared ATTR_SIZE,
- * but needed to set timestamps backwards on utime. */
+ /* We call inode_setattr to adjust timestamps, but we first
+ * clear ATTR_SIZE to avoid invoking vmtruncate.
+ *
+ * NB: ATTR_SIZE will only be set at this point if the size
+ * resides on the MDS, ie, this file has no objects. */
+ save_valid = attr->ia_valid;
+ attr->ia_valid &= ~ATTR_SIZE;
inode_setattr(inode, attr);
+ attr->ia_valid = save_valid;
+
ll_update_inode(inode, &md);
ptlrpc_req_finished(request);
ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
OBD_OBJECT_EOF } };
struct lustre_handle lockh = { 0 };
+ struct ll_inode_info *lli = ll_i2info(inode);
int err, ast_flags = 0;
/* XXX when we fix the AST intents to pass the discard-range
* XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
if (attr->ia_size == 0)
ast_flags = LDLM_AST_DISCARD_DATA;
- /* bug 1639: avoid write/truncate i_sem/DLM deadlock */
- LASSERT(atomic_read(&inode->i_sem.count) <= 0);
- up(&inode->i_sem);
- UP_WRITE_I_ALLOC_SEM(inode);
rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh,
ast_flags, &ll_i2sbi(inode)->ll_seek_stime);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- DOWN_WRITE_I_ALLOC_SEM(inode);
- down(&inode->i_sem);
-#else
- down(&inode->i_sem);
- DOWN_WRITE_I_ALLOC_SEM(inode);
-#endif
+
if (rc != 0)
RETURN(rc);
+ down(&lli->lli_size_sem);
rc = vmtruncate(inode, attr->ia_size);
+ if (rc != 0) {
+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
+ up(&lli->lli_size_sem);
+ }
- /* We need to drop the semaphore here, because this unlock may
- * result in a cancellation, which will need the i_sem */
- up(&inode->i_sem);
- UP_WRITE_I_ALLOC_SEM(inode);
- /* unlock now as we don't mind others file lockers racing with
- * the mds updates below? */
err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- DOWN_WRITE_I_ALLOC_SEM(inode);
- down(&inode->i_sem);
-#else
- down(&inode->i_sem);
- DOWN_WRITE_I_ALLOC_SEM(inode);
-#endif
if (err) {
CERROR("ll_extent_unlock failed: %d\n", err);
if (!rc)
rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age);
if (rc) {
- CERROR("mdc_statfs fails: rc = %d\n", rc);
+ CERROR("obd_statfs fails: rc = %d\n", rc);
RETURN(rc);
}
ENTRY;
LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
- LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0));
+ if (md->lsm && md->lsm->lsm_magic != LOV_MAGIC) {
+ /* check for default striping info for dir. */
+ LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0));
+ }
+
if (lsm != NULL) {
LASSERT(lsm->lsm_object_gr > 0);
if (lli->lli_smd == NULL) {
void ll_umount_begin(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct obd_device *obd;
struct obd_ioctl_data ioc_data = { 0 };
+ struct obd_device *obd;
ENTRY;
CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two)
{
+ /* XXX remove this assert when we really want to use this function
+ * to compare different file's region */
+ LASSERT(one->lt_oid == two->lt_oid);
+
if ( one->lt_oid < two->lt_oid)
return -1;
if ( one->lt_oid > two->lt_oid)
if (first_node != NULL)
lt_insert(tree, first_node);
+ /* order locking. what we have to concern about is ONLY double lock:
+ * the buffer is mapped to exactly this file. */
if (mapping_mapped(inode->i_mapping)) {
rc = lt_get_mmap_locks(tree, inode, (unsigned long)buf, count);
if (rc)
policy->l_extent.end = (policy->l_extent.start + count - 1) |
(PAGE_CACHE_SIZE - 1);
}
-static struct vm_area_struct * our_vma(unsigned long addr, size_t count)
+
+static struct vm_area_struct *our_vma(unsigned long addr, size_t count,
+ struct inode *inode)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *ret = NULL;
spin_lock(&mm->page_table_lock);
for(vma = find_vma(mm, addr);
vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) {
- if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage) {
+ if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage &&
+ vma->vm_file && vma->vm_file->f_dentry->d_inode == inode) {
ret = vma;
break;
}
count += addr & (PAGE_SIZE - 1);
addr -= addr & (PAGE_SIZE - 1);
- while ((vma = our_vma(addr, count)) != NULL) {
+ while ((vma = our_vma(addr, count, inode)) != NULL) {
policy_from_vma(&policy, vma, addr, count);
node = ll_node_from_inode(inode, policy.l_extent.start,
mode = mode_from_vma(vma);
stime = (mode & LCK_PW) ? &ll_i2sbi(inode)->ll_write_stime :
&ll_i2sbi(inode)->ll_read_stime;
-
+
rc = ll_extent_lock(fd, inode, ll_i2info(inode)->lli_smd, mode, &policy,
&lockh, LDLM_FL_CBPENDING, stime);
if (rc != 0)
{
return vma->vm_start +
(byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT));
-
}
#define VMA_DEBUG(vma, fmt, arg...) \
- CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p): " \
- fmt, vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, \
- vma->vm_file->f_dentry->d_inode, ## arg);
+ CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p) " \
+ "ino(%lu) iname(%s): " fmt, vma, vma->vm_start, vma->vm_end, \
+ vma->vm_pgoff, vma->vm_file->f_dentry->d_inode, \
+ vma->vm_file->f_dentry->d_inode->i_ino, \
+ vma->vm_file->f_dentry->d_iname, ## arg); \
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
/* [first, last] are the byte offsets affected.
{
unsigned long address, len;
for (; vma ; vma = vma->vm_next_share) {
- if (last >> PAGE_CACHE_SHIFT < vma->vm_pgoff)
+ if (last >> PAGE_SHIFT < vma->vm_pgoff)
continue;
if (first >> PAGE_CACHE_SHIFT > (vma->vm_pgoff +
((vma->vm_end - vma->vm_start) >> PAGE_CACHE_SHIFT)))
continue;
- address = max((unsigned long)vma->vm_start,
+ /* XXX in case of unmap the cow pages of a running file,
+ * don't unmap these private writeable mapping here!
+ * though that will break private mappping a little.
+ *
+ * the clean way is to check the mapping of every page
+ * and just unmap the non-cow pages, just like
+ * unmap_mapping_range() with even_cow=0 in kernel 2.6.
+ */
+ if (!(vma->vm_flags & VM_SHARED) &&
+ (vma->vm_flags & VM_WRITE))
+ continue;
+
+ address = max((unsigned long)vma->vm_start,
file_to_user(vma, first));
len = min((unsigned long)vma->vm_end,
file_to_user(vma, last) + 1) - address;
- VMA_DEBUG(vma, "zapping vma [address=%ld len=%ld]\n",
- address, len);
- LASSERT(vma->vm_mm);
+ VMA_DEBUG(vma, "zapping vma [first="LPU64" last="LPU64" "
+ "address=%ld len=%ld]\n", first, last, address, len);
+ LASSERT(len > 0);
ll_zap_page_range(vma, address, len);
}
}
int rc = -ENOENT;
ENTRY;
+ LASSERT(last > first);
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
if (mapping_mapped(mapping)) {
rc = 0;
unmap_mapping_range(mapping, first + PAGE_SIZE - 1,
- last - first + 1, 1);
+ last - first + 1, 0);
}
#else
spin_lock(&mapping->i_shared_lock);
return (key0 << 1);
}
-static struct inode *search_inode_for_lustre(struct super_block *sb,
- unsigned long ino,
- unsigned long generation,
- int mode)
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static int ll_nfs_test_inode(struct inode *inode, unsigned long ino,
+ void *opaque)
+#else
+static int ll_nfs_test_inode(struct inode *inode, void *opaque)
+#endif
+{
+ struct lustre_id *iid = opaque;
+
+ if (inode->i_ino == id_ino(iid) &&
+ inode->i_generation == id_gen(iid))
+ return 1;
+
+ return 0;
+}
+static struct inode * search_inode_for_lustre(struct super_block *sb,
+ unsigned long ino,
+ unsigned long generation,
+ int mode)
{
struct ptlrpc_request *req = NULL;
struct ll_sb_info *sbi = ll_s2sbi(sb);
__u64 valid = 0;
int eadatalen = 0, rc;
struct inode *inode = NULL;
-
- inode = ILOOKUP(sb, ino, NULL, NULL);
+ struct lustre_id iid;
+
+ id_ino(&iid) = (__u64)ino;
+ id_gen(&iid) = generation;
+ inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid);
if (inode)
return inode;
if (IS_ERR(inode)) {
return ERR_PTR(PTR_ERR(inode));
}
- if (is_bad_inode(inode)
- || (generation && inode->i_generation != generation)
- ){
+ if (is_bad_inode(inode) ||
+ (generation && inode->i_generation != generation)){
/* we didn't find the right inode.. */
- CERROR(" Inode %lu, Bad count: %lu %d or version %u %u\n",
- inode->i_ino,
- (unsigned long)inode->i_nlink,
- atomic_read(&inode->i_count),
- inode->i_generation,
- generation);
+ CERROR(" Inode %lu, Bad count: %lu %d or version %u %u\n",
+ inode->i_ino, (unsigned long)inode->i_nlink,
+ atomic_read(&inode->i_count), inode->i_generation,
+ generation);
iput(inode);
return ERR_PTR(-ESTALE);
}
-
+
/* now to find a dentry.
* If possible, get a well-connected one
*/
#include <linux/lprocfs_status.h>
#include <linux/seq_file.h>
#include <linux/obd_support.h>
+#ifdef HAVE_MM_INLINE
+#include <linux/mm_inline.h>
+#endif
#include "llite_internal.h"
/* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement
* it in our own state */
if (dummy_llap->llap_magic == 0) {
- seq_printf(seq, "generation | llap .cookie | page ");
- seq_printf(seq, "inode .index [ page flags ]\n");
+ seq_printf(seq, "generation | llap cookie origin | page ");
+ seq_printf(seq, "inode index count [ page flags ]\n");
return 0;
}
if (llap != NULL) {
int has_flags = 0;
struct page *page = llap->llap_page;
-
- seq_printf(seq, "%lu | %p %p | %p %p %lu [",
+ static char *origins[] = {
+ [LLAP_ORIGIN_UNKNOWN] = "--",
+ [LLAP_ORIGIN_READPAGE] = "rp",
+ [LLAP_ORIGIN_READAHEAD] = "ra",
+ [LLAP_ORIGIN_COMMIT_WRITE] = "cw",
+ [LLAP_ORIGIN_WRITEPAGE] = "wp",
+ };
+
+ LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n",
+ llap->llap_origin);
+
+ seq_printf(seq, "%lu | %p %p %s | %p %p %lu %u [",
sbi->ll_pglist_gen,
llap, llap->llap_cookie,
- page, page->mapping->host, page->index);
+ origins[llap->llap_origin],
+ page, page->mapping->host, page->index,
+ page_count(page));
seq_page_flag(seq, page, locked, has_flags);
seq_page_flag(seq, page, error, has_flags);
seq_page_flag(seq, page, referenced, has_flags);
spin_lock(&sbi->ll_lock);
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "pending issued pages: %lu\n",
ra->ra_cur_pages);
spin_lock(&sbi->ll_lock);
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "lock wait times: (num, average ms)\n");
(unsigned long)id_group(&li->lli_id));
}
+ if (bits & MDS_INODELOCK_OPEN) {
+ int flags = 0;
+ switch (lock->l_req_mode) {
+ case LCK_CW:
+ flags = FMODE_WRITE;
+ break;
+ case LCK_PR:
+ flags = FMODE_EXEC;
+ break;
+ case LCK_CR:
+ flags = FMODE_READ;
+ break;
+ default:
+ CERROR("Unexpected lock mode for OPEN lock "
+ "%d, inode %ld\n", lock->l_req_mode,
+ inode->i_ino);
+ }
+ ll_md_real_close(ll_i2mdexp(inode), inode, flags);
+ }
+
+ if (bits & MDS_INODELOCK_UPDATE)
+ clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
+ &(ll_i2info(inode)->lli_flags));
+
+
/* If lookup lock is cancelled, we just drop the dentry and
this will cause us to reget data from MDS when we'd want to
access this dentry/inode again. If this is lock on
int rc, orig_it;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, parent->i_ino, parent->i_generation,
- parent, LL_IT2STR(it));
+ if (dentry->d_name.len > EXT3_NAME_LEN)
+ RETURN(ERR_PTR(-ENAMETOOLONG));
+
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
+ dentry->d_name.len, dentry->d_name.name, parent->i_ino,
+ parent->i_generation, parent, LL_IT2STR(it));
if (d_mountpoint(dentry))
CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
int rc = 0;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
- dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
- LL_IT2STR(it));
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
+ dentry->d_name.len, dentry->d_name.name, dir->i_ino,
+ dir->i_generation, dir, LL_IT2STR(it));
rc = it_open_error(DISP_OPEN_CREATE, it);
if (rc)
{
struct ptlrpc_request *request = NULL;
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data *op_data;
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
mode &= ~current->fs->umask;
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
+ nd->last.len, 0);
err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
current->fsuid, current->fsgid, rdev,
&request);
RETURN(err);
}
-static int ll_mknod(struct inode *dir, struct dentry *child,
+static int ll_mknod(struct inode *dir, struct dentry *dchild,
int mode, ll_dev_t rdev)
{
struct ptlrpc_request *request = NULL;
struct inode *inode = NULL;
- const char *name = child->d_name.name;
- int len = child->d_name.len;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data *op_data;
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ dchild->d_name.len, dchild->d_name.name,
+ dir->i_ino, dir->i_generation, dir);
mode &= ~current->fs->umask;
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_data(op_data, dir, NULL, dchild->d_name.name,
+ dchild->d_name.len, 0);
err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
current->fsuid, current->fsgid, rdev,
&request);
GOTO(out_err, err);
ll_update_times(request, 0, dir);
-
err = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
- &inode, request, 0, child->d_sb);
+ &inode, request, 0, dchild->d_sb);
if (err)
GOTO(out_err, err);
break;
RETURN(-EINVAL);
}
- d_instantiate(child, inode);
+ d_instantiate(dchild, inode);
EXIT;
out_err:
ptlrpc_req_finished(request);
static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dir);
+ const char *name = nd->last.name;
struct mdc_op_data *op_data;
+ int len = nd->last.len;
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n",
- name, dir->i_ino, dir->i_generation, dir, tgt);
-
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),target=%s\n",
+ nd->last.len, nd->last.name, dir->i_ino, dir->i_generation,
+ dir, tgt);
+
+ if (dir->i_nlink >= EXT3_LINK_MAX)
+ RETURN(err);
+
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
{
struct inode *src = srcnd->dentry->d_inode;
struct inode *dir = tgtnd->dentry->d_inode;
- const char *name = tgtnd->last.name;
- int len = tgtnd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data *op_data;
int err;
struct ll_sb_info *sbi = ll_i2sbi(dir);
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n",
- src->i_ino, src->i_generation, src, dir->i_ino, dir->i_generation,
- dir, name);
+ CDEBUG(D_VFSTRACE,
+ "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
+ src->i_ino, src->i_generation, src, dir->i_ino,
+ dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name);
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, src, dir, name, len, 0);
+ ll_prepare_mdc_data(op_data, src, dir, tgtnd->last.name,
+ tgtnd->last.len, 0);
err = md_link(sbi->ll_md_exp, op_data, &request);
OBD_FREE(op_data, sizeof(*op_data));
if (err == 0)
static int ll_mkdir_raw(struct nameidata *nd, int mode)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(dir);
struct mdc_op_data *op_data;
int err = -EMLINK;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
+ nd->last.len, 0);
err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode,
current->fsuid, current->fsgid, 0, &request);
OBD_FREE(op_data, sizeof(*op_data));
static int ll_rmdir_raw(struct nameidata *nd)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data *op_data;
int rc;
+
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, dir, NULL, name, len, S_IFDIR);
+ ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name,
+ nd->last.len, S_IFDIR);
rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
OBD_FREE(op_data, sizeof(*op_data));
if (rc == 0)
GOTO(out, rc = -EPROTO);
}
- /* The MDS sent back the EA because we unlinked the last reference
- * to this file. Use this EA to unlink the objects on the OST.
- * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
- * check it is complete and sensible. */
+ /*
+ * the MDS sent back the EA because we unlinked the last reference to
+ * this file. Use this EA to unlink the objects on the OST. It's opaque
+ * so we don't swab here; we leave it to obd_unpackmd() to check it is
+ * complete and sensible.
+ */
eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL);
LASSERT(eadata != NULL);
if (eadata == NULL) {
static int ll_unlink_raw(struct nameidata *nd)
{
struct inode *dir = nd->dentry->d_inode;
- const char *name = nd->last.name;
- int len = nd->last.len;
struct ptlrpc_request *request = NULL;
struct mdc_op_data *op_data;
int rc;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
- name, dir->i_ino, dir->i_generation, dir);
+ CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
+ nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
- ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0);
+ ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, nd->last.len, 0);
rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
OBD_FREE(op_data, sizeof(*op_data));
if (rc)
return rc;
}
-static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd)
+static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
{
- struct inode *src = oldnd->dentry->d_inode;
- struct inode *tgt = newnd->dentry->d_inode;
- const char *oldname = oldnd->last.name;
- int oldlen = oldnd->last.len;
- const char *newname = newnd->last.name;
- int newlen = newnd->last.len;
+ struct inode *src = srcnd->dentry->d_inode;
+ struct inode *tgt = tgtnd->dentry->d_inode;
struct ptlrpc_request *request = NULL;
struct ll_sb_info *sbi = ll_i2sbi(src);
struct mdc_op_data *op_data;
int err;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s, src_dir=%lu/%u(%p), newname=%s, "
- "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation,
- src, newname, tgt->i_ino, tgt->i_generation, tgt);
+
+ CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
+ "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name,
+ src->i_ino, src->i_generation, src, tgtnd->last.len,
+ tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt);
OBD_ALLOC(op_data, sizeof(*op_data));
if (op_data == NULL)
RETURN(-ENOMEM);
ll_prepare_mdc_data(op_data, src, tgt, NULL, 0, 0);
- err = md_rename(sbi->ll_md_exp, op_data, oldname, oldlen,
- newname, newlen, &request);
+ err = md_rename(sbi->ll_md_exp, op_data, srcnd->last.name,
+ srcnd->last.len, tgtnd->last.name, tgtnd->last.len,
+ &request);
OBD_FREE(op_data, sizeof(*op_data));
if (!err) {
ll_update_times(request, 0, src);
__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
-/* this isn't where truncate starts. roughly:
+/*
+ * this isn't where truncate starts. roughly:
* sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate
- * we grab the lock back in setattr_raw to avoid races. */
+ * we grab the lock back in setattr_raw to avoid races.
+ *
+ * must be called with lli_size_sem held.
+ */
void ll_truncate(struct inode *inode)
{
struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+ struct ll_inode_info *lli = ll_i2info(inode);
struct obdo *oa = NULL;
int rc;
ENTRY;
- CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino,
- inode->i_generation, inode);
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %llu\n", inode->i_ino,
+ inode->i_generation, inode, inode->i_size);
if (!lsm) {
CDEBUG(D_INODE, "truncate on inode %lu with no objects\n",
inode->i_ino);
- EXIT;
- return;
+ GOTO(out_unlock, 0);
}
if (lov_merge_size(lsm, 0) == inode->i_size) {
CDEBUG(D_VFSTRACE, "skipping punch for "LPX64" (size = %llu)\n",
lsm->lsm_object_id, inode->i_size);
- } else {
- CDEBUG(D_INFO, "calling punch for "LPX64" (new size %llu)\n",
- lsm->lsm_object_id, inode->i_size);
+ GOTO(out_unlock, 0);
+ }
+
+ CDEBUG(D_INFO, "calling punch for "LPX64" (new size %llu)\n",
+ lsm->lsm_object_id, inode->i_size);
- oa = obdo_alloc();
- if (oa == NULL) {
- CERROR("cannot alloc oa, error %d\n",
- -ENOMEM);
- EXIT;
- return;
- }
-
- oa->o_id = lsm->lsm_object_id;
- oa->o_gr = lsm->lsm_object_gr;
- oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
- obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
- OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
- /* truncate == punch from new size to absolute end of file */
- /* NB: obd_punch must be called with i_sem held! It updates the kms! */
- rc = obd_punch(ll_i2dtexp(inode), oa, lsm, inode->i_size,
- OBD_OBJECT_EOF, NULL);
- if (rc)
- CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
- else
- obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
- OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
- obdo_free(oa);
- }
+ oa = obdo_alloc();
+ if (oa == NULL) {
+ CERROR("cannot alloc oa, error %d\n",
+ -ENOMEM);
+ EXIT;
+ return;
+ }
+
+ oa->o_id = lsm->lsm_object_id;
+ oa->o_gr = lsm->lsm_object_gr;
+ oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
+ obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+ obd_adjust_kms(ll_i2dtexp(inode), lsm, inode->i_size, 1);
+
+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
+ up(&lli->lli_size_sem);
+
+ rc = obd_punch(ll_i2dtexp(inode), oa, lsm, inode->i_size,
+ OBD_OBJECT_EOF, NULL);
+ if (rc)
+ CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino);
+ else
+ obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
+ OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+
+ obdo_free(oa);
EXIT;
return;
+
+out_unlock:
+ LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0);
+ up(&lli->lli_size_sem);
} /* ll_truncate */
int ll_prepare_write(struct file *file, struct page *page, unsigned from,
/* If are writing to a new page, no need to read old data. The extent
* locking will have updated the KMS, and for our purposes here we can
* treat it like i_size. */
+ down(&lli->lli_size_sem);
kms = lov_merge_size(lsm, 1);
+ up(&lli->lli_size_sem);
if (kms <= offset) {
memset(kmap(page), 0, PAGE_SIZE);
kunmap(page);
return rc;
}
-struct ll_async_page *llap_from_cookie(void *cookie)
-{
- struct ll_async_page *llap = cookie;
- if (llap->llap_magic != LLAP_MAGIC)
- return ERR_PTR(-EINVAL);
- return llap;
-};
-
static int ll_ap_make_ready(void *data, int cmd)
{
struct ll_async_page *llap;
struct page *page;
ENTRY;
- llap = llap_from_cookie(data);
- if (IS_ERR(llap))
- RETURN(-EINVAL);
-
+ llap = LLAP_FROM_COOKIE(data);
page = llap->llap_page;
LASSERT(cmd != OBD_BRW_READ);
/* readpage queues with _COUNT_STABLE, shouldn't get here. */
LASSERT(cmd != OBD_BRW_READ);
- llap = llap_from_cookie(data);
- if (IS_ERR(llap))
- RETURN(PTR_ERR(llap));
-
+ llap = LLAP_FROM_COOKIE(data);
page = llap->llap_page;
lsm = ll_i2info(page->mapping->host)->lli_smd;
kms = lov_merge_size(lsm, 1);
struct ll_async_page *llap;
ENTRY;
- llap = llap_from_cookie(data);
- if (IS_ERR(llap)) {
- EXIT;
- return;
- }
-
+ llap = LLAP_FROM_COOKIE(data);
ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa);
EXIT;
}
.ap_completion = ll_ap_completion,
};
+
struct ll_async_page *llap_cast_private(struct page *page)
{
struct ll_async_page *llap = (struct ll_async_page *)page->private;
}
/* XXX have the exp be an argument? */
-struct ll_async_page *llap_from_page(struct page *page)
+struct ll_async_page *llap_from_page(struct page *page, unsigned origin)
{
struct ll_async_page *llap;
struct obd_export *exp;
int rc;
ENTRY;
+ LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin);
+
llap = llap_cast_private(page);
if (llap != NULL)
- RETURN(llap);
+ GOTO(out, llap);
exp = ll_i2dtexp(page->mapping->host);
if (exp == NULL)
list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist);
spin_unlock(&sbi->ll_lock);
+out:
+ llap->llap_origin = origin;
RETURN(llap);
}
return rc;
}
-void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
- obd_off size);
-
/* be careful not to return success without setting the page Uptodate or
* the next pass through prepare_write will read in stale data from disk. */
int ll_commit_write(struct file *file, struct page *page, unsigned from,
CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n",
inode, page, from, to, page->index);
- llap = llap_from_page(page);
+ llap = llap_from_page(page, LLAP_ORIGIN_COMMIT_WRITE);
if (IS_ERR(llap))
RETURN(PTR_ERR(llap));
+ exp = ll_i2dtexp(inode);
+ if (exp == NULL)
+ RETURN(-EINVAL);
+
/* queue a write for some time in the future the first time we
* dirty the page */
if (!PageDirty(page)) {
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats,
LPROC_LL_DIRTY_MISSES);
- exp = ll_i2dtexp(inode);
- if (exp == NULL)
- RETURN(-EINVAL);
-
- rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap,
- to, 0);
+ rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd,
+ llap, to, 0);
if (rc)
GOTO(out, rc);
} else {
set_page_dirty(page);
EXIT;
out:
+ size = (((obd_off)page->index) << PAGE_SHIFT) + to;
+ down(&lli->lli_size_sem);
if (rc == 0) {
- size = (((obd_off)page->index) << PAGE_SHIFT) + to;
- lov_increase_kms(ll_i2dtexp(inode), lsm, size);
+ obd_adjust_kms(exp, lsm, size, 0);
if (size > inode->i_size)
inode->i_size = size;
SetPageUptodate(page);
+ } else if (size > inode->i_size) {
+ /* this page beyond the pales of i_size, so it can't be
+ * truncated in ll_p_r_e during lock revoking. we must
+ * teardown our book-keeping here. */
+ ll_removepage(page);
}
+ up(&lli->lli_size_sem);
return rc;
}
-
+
+static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
+{
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+ unsigned long ret;
+ ENTRY;
+
+ spin_lock(&sbi->ll_lock);
+ ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
+ ra->ra_cur_pages += ret;
+ spin_unlock(&sbi->ll_lock);
+
+ RETURN(ret);
+}
+
+static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
+{
+ struct ll_ra_info *ra = &sbi->ll_ra_info;
+ spin_lock(&sbi->ll_lock);
+ LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
+ ra->ra_cur_pages, len);
+ ra->ra_cur_pages -= len;
+ spin_unlock(&sbi->ll_lock);
+}
+
int ll_writepage(struct page *page)
{
struct inode *inode = page->mapping->host;
if (exp == NULL)
GOTO(out, rc = -EINVAL);
- llap = llap_from_page(page);
+ llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
if (IS_ERR(llap))
GOTO(out, rc = PTR_ERR(llap));
return rc;
}
-static unsigned long
-ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- unsigned long ret;
- ENTRY;
-
- spin_lock(&sbi->ll_lock);
- ret = min(ra->ra_max_pages - ra->ra_cur_pages, len);
- ra->ra_cur_pages += ret;
- spin_unlock(&sbi->ll_lock);
-
- RETURN(ret);
-}
-
-static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len)
-{
- struct ll_ra_info *ra = &sbi->ll_ra_info;
- spin_lock(&sbi->ll_lock);
- LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n",
- ra->ra_cur_pages, len);
- ra->ra_cur_pages -= len;
- spin_unlock(&sbi->ll_lock);
-}
-
/* called for each page in a completed rpc.*/
void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
struct page *page;
ENTRY;
- llap = llap_from_cookie(data);
- if (IS_ERR(llap)) {
- EXIT;
- return;
- }
-
+ llap = LLAP_FROM_COOKIE(data);
page = llap->llap_page;
LASSERT(PageLocked(page));
return;
}
- llap = llap_from_page(page);
+ llap = llap_from_page(page, 0);
if (IS_ERR(llap)) {
CERROR("page %p ind %lu couldn't find llap: %ld\n", page,
page->index, PTR_ERR(llap));
{
struct ll_async_page *llap;
- llap = llap_from_page(page);
+ llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE);
if (IS_ERR(llap))
return;
/* we do this first so that we can see the page in the /proc
* accounting */
- llap = llap_from_page(page);
+ llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD);
if (IS_ERR(llap) || llap->llap_defer_uptodate)
goto next_page;
if (exp == NULL)
GOTO(out, rc = -EINVAL);
- llap = llap_from_page(page);
+ llap = llap_from_page(page, LLAP_ORIGIN_READPAGE);
if (IS_ERR(llap))
GOTO(out, rc = PTR_ERR(llap));
CERROR("error from callback: rc = %d\n", rc);
}
ptlrpc_set_destroy(set);
- if (rc == 0 && rw == WRITE) {
- void lov_increase_kms(struct obd_export *,
- struct lov_stripe_md *, obd_off size);
- obd_off size = offset + length;
- lov_increase_kms(ll_i2dtexp(inode), lsm, size);
- if (size > inode->i_size)
- inode->i_size = size;
- }
if (rc == 0) {
rc = iobuf->length;
- obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS);
+ if (rw == WRITE)
+ obd_adjust_kms(ll_i2dtexp(inode), lsm, offset, 0);
}
obdo_free(oa);
EXIT;
return rc;
}
+#ifdef KERNEL_HAS_AS_MAX_READAHEAD
+static int ll_max_readahead(struct inode *inode)
+{
+ return 0;
+}
+#endif
+
struct address_space_operations ll_aops = {
.readpage = ll_readpage,
.direct_IO = ll_direct_IO_24,
.commit_write = ll_commit_write,
.removepage = ll_removepage,
.sync_page = NULL,
- .bmap = NULL
+ .bmap = NULL,
+#ifdef KERNEL_HAS_AS_MAX_READAHEAD
+ .max_readahead = ll_max_readahead,
+#endif
};
{
struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
struct file_operations *sfops = filp->f_op;
+ struct ll_inode_info *lli = ll_i2info(inode);
struct ptlrpc_request *req;
struct lookup_intent *it;
int rc = -EINVAL, err;
+ struct obd_client_handle **och_p;
+ __u64 *och_usecount;
ENTRY;
+ it = filp->f_it;
+
+ if (LUSTRE_IT(it)->it_disposition) {
+ err = it_open_error(DISP_OPEN_OPEN, it);
+ if (err)
+ RETURN(err);
+ }
+
if (pfop && *pfop) {
/* mostly we will have @def_blk_fops here and it is not in a
* module but we do this just to be sure. */
}
}
+ /* Let's see if we have file open on MDS already. */
+ if (it->it_flags & FMODE_WRITE) {
+ och_p = &lli->lli_mds_write_och;
+ och_usecount = &lli->lli_open_fd_write_count;
+ } else if (it->it_flags & FMODE_EXEC) {
+ och_p = &lli->lli_mds_exec_och;
+ och_usecount = &lli->lli_open_fd_exec_count;
+ } else {
+ och_p = &lli->lli_mds_read_och;
+ och_usecount = &lli->lli_open_fd_read_count;
+ }
+
lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
- it = filp->f_it;
+ down(&lli->lli_och_sem);
+ if (*och_p) { /* Open handle is present */
+ if (LUSTRE_IT(it)->it_disposition) {
+ struct obd_client_handle *och;
+ /* Well, there's extra open request that we do not need,
+ let's close it somehow*/
+ OBD_ALLOC(och, sizeof (struct obd_client_handle));
+ if (!och) {
+ /* XXX We leak open fd and open OPEN connectioni
+ to server here */
+ up(&lli->lli_och_sem);
+ RETURN(-ENOMEM);
+ }
+ ll_och_fill(inode, it, och);
+ /* ll_md_och_close() will free och */
+ ll_md_och_close(ll_i2mdexp(inode), inode, och);
+ }
+ (*och_usecount)++;
+
+ err = ll_local_open(filp, it, NULL);
+ } else {
+ LASSERT(*och_usecount == 0);
+ OBD_ALLOC(*och_p, sizeof (struct obd_client_handle));
+ if (!*och_p) {
+ // XXX Same as above
+ up(&lli->lli_och_sem);
+ RETURN(-ENOMEM);
+ }
+ (*och_usecount)++;
+
+ err = ll_local_open(filp, it, *och_p);
+ }
+ up(&lli->lli_och_sem);
- err = ll_local_open(filp, it);
if (rc != 0) {
CERROR("error opening special file: rc %d\n", rc);
ll_md_close(ll_i2sbi(inode)->ll_md_exp, inode, filp);
if (rc) {
if (rc != -ENOENT)
CERROR("inode %lu: rc = %d\n", inode->i_ino, rc);
- RETURN(rc);
+ GOTO(failed, rc);
}
body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body));
failed:
ptlrpc_req_finished (*request);
- RETURN (-EPROTO);
+ RETURN(rc);
}
static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
down(&lli->lli_open_sem);
rc = ll_readlink_internal(inode, &request, &symname);
up(&lli->lli_open_sem);
- if (rc)
+ if (rc) {
+ path_release(nd); /* Kernel assumes that ->follow_link()
+ releases nameidata on error */
GOTO(out, rc);
+ }
rc = vfs_follow_link(nd, symname);
ptlrpc_req_finished(request);
#include <linux/lustre_lite.h>
#include "lmv_internal.h"
+/* not defined for liblustre building */
+#if !defined(ATOMIC_INIT)
+#define ATOMIC_INIT(val) { (val) }
+#endif
+
/* object cache. */
kmem_cache_t *obj_cache;
atomic_t obj_cache_count = ATOMIC_INIT(0);
md.mea = NULL;
mealen = MEA_SIZE_LMV(lmv);
- valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+ valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
/* time to update mea of parent id */
rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
lsmp = (struct mea *)lsm;
meap = (struct mea *)*lmmp;
+ if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR &&
+ lsmp->mea_magic != MEA_MAGIC_ALL_CHARS)
+ RETURN(-EINVAL);
+
meap->mea_magic = cpu_to_le32(lsmp->mea_magic);
meap->mea_count = cpu_to_le32(lsmp->mea_count);
meap->mea_master = cpu_to_le32(lsmp->mea_master);
RETURN(mea_size);
}
-int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt,
- struct lov_mds_md *disk_src, int mdsize)
+int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_size)
{
struct obd_device *obd = class_exp2obd(exp);
- struct mea **tmea = (struct mea **)mem_tgt;
- struct mea *mea = (struct mea *)disk_src;
+ struct mea **tmea = (struct mea **)lsmp;
+ struct mea *mea = (struct mea *)lmm;
struct lmv_obd *lmv = &obd->u.lmv;
- int mea_size, i;
+ int mea_size, i, rc = 0;
+ __u32 magic;
ENTRY;
- mea_size = sizeof(struct lustre_id) *
+ mea_size = sizeof(struct lustre_id) *
lmv->desc.ld_tgt_count + sizeof(struct mea);
- if (mem_tgt == NULL)
+
+ if (lsmp == NULL)
return mea_size;
- if (*mem_tgt != NULL && disk_src == NULL) {
+ if (*lsmp != NULL && lmm == NULL) {
OBD_FREE(*tmea, mea_size);
RETURN(0);
}
- LASSERT(mea_size == mdsize);
+ LASSERT(mea_size == lmm_size);
OBD_ALLOC(*tmea, mea_size);
if (*tmea == NULL)
RETURN(-ENOMEM);
- if (!disk_src)
+ if (!lmm)
RETURN(mea_size);
- (*tmea)->mea_magic = le32_to_cpu(mea->mea_magic);
+ if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
+ mea->mea_magic == MEA_MAGIC_ALL_CHARS)
+ {
+ magic = le32_to_cpu(mea->mea_magic);
+ } else {
+ struct mea_old *old = (struct mea_old *)lmm;
+
+ mea_size = sizeof(struct lustre_id) * old->mea_count +
+ sizeof(struct mea_old);
+
+ if (old->mea_count > 256 || old->mea_master > 256 ||
+ lmm_size < mea_size || old->mea_master > old->mea_count) {
+ CWARN("bad MEA: count %u, master %u, size %u\n",
+ old->mea_count, old->mea_master, mea_size);
+ GOTO(out_free_mea, rc = -EINVAL);
+ }
+ magic = MEA_MAGIC_LAST_CHAR;
+ }
+
+ (*tmea)->mea_magic = magic;
(*tmea)->mea_count = le32_to_cpu(mea->mea_count);
(*tmea)->mea_master = le32_to_cpu(mea->mea_master);
- for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ for (i = 0; i < (*tmea)->mea_count; i++) {
(*tmea)->mea_ids[i] = mea->mea_ids[i];
id_le_to_cpu(&(*tmea)->mea_ids[i]);
}
-
RETURN(mea_size);
+
+out_free_mea:
+ OBD_FREE(*tmea, mea_size);
+ return rc;
}
int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa,
/* time to update mea of parent id */
md.mea = NULL;
- valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA;
+ valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA;
rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp,
id, valid, NULL, 0, mealen, &req);
MODULES := lov
-lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o
+lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o
@INCLUDE_RULES@
if LIBLUSTRE
noinst_LIBRARIES = liblov.a
-liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_internal.h
+liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_internal.h
liblov_a_CPPFLAGS = $(LLCPPFLAGS)
liblov_a_CFLAGS = $(LLCFLAGS)
endif
#include <lustre/lustre_user.h>
+struct lov_lock_handles {
+ struct portals_handle llh_handle;
+ atomic_t llh_refcount;
+ int llh_stripe_count;
+ struct lustre_handle llh_handles[0];
+};
+
+struct lov_request {
+ struct list_head rq_link;
+ struct ldlm_extent rq_extent;
+ int rq_idx; /* index in lov->tgts array */
+ int rq_stripe; /* stripe number */
+ int rq_complete;
+ int rq_rc;
+ int rq_buflen; /* length of sub_md */
+ struct obdo *rq_oa;
+ struct lov_stripe_md *rq_md;
+ obd_count rq_oabufs;
+ obd_count rq_pgaidx;
+};
+
+struct lov_request_set {
+ atomic_t set_refcount;
+ struct obd_export *set_exp;
+ int set_count;
+ int set_completes;
+ int set_success;
+ struct llog_cookie *set_cookies;
+ int set_cookie_sent;
+ struct lov_stripe_md *set_md;
+ struct obdo *set_oa;
+ struct obd_trans_info *set_oti;
+ obd_count set_oabufs;
+ struct brw_page *set_pga;
+ struct lov_lock_handles *set_lockh;
+ struct list_head set_list;
+};
+
#define LAP_MAGIC 8200
#define LOV_MAX_TGT_COUNT 1024
obd_off lap_sub_offset;
void *lap_sub_cookie;
struct obd_async_page_ops *lap_caller_ops;
- struct obd_async_page_ops *lap_caller_data;
+ void *lap_caller_data;
obd_id lap_loi_id;
};
+#define LAP_FROM_COOKIE(c) \
+ (LASSERT(((struct lov_async_page *)(c))->lap_magic == LAP_MAGIC), \
+ (struct lov_async_page *)(c))
+
+static inline void lov_llh_addref(void *llhp)
+{
+ struct lov_lock_handles *llh = llhp;
+ atomic_inc(&llh->llh_refcount);
+ CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
+ atomic_read(&llh->llh_refcount));
+}
+
+static inline struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
+{
+ struct lov_lock_handles *llh;
+
+ OBD_ALLOC(llh, sizeof *llh +
+ sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
+ if (llh == NULL)
+ return NULL;
+ atomic_set(&llh->llh_refcount, 2);
+ llh->llh_stripe_count = lsm->lsm_stripe_count;
+ INIT_LIST_HEAD(&llh->llh_handle.h_link);
+ class_handle_hash(&llh->llh_handle, lov_llh_addref);
+ return llh;
+}
+
+static inline struct lov_lock_handles *
+lov_handle2llh(struct lustre_handle *handle)
+{
+ LASSERT(handle != NULL);
+ return(class_handle2object(handle->cookie));
+}
+
+static inline void lov_llh_put(struct lov_lock_handles *llh)
+{
+ CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh,
+ atomic_read(&llh->llh_refcount) - 1);
+ LASSERT(atomic_read(&llh->llh_refcount) > 0 &&
+ atomic_read(&llh->llh_refcount) < 0x5a5a);
+ if (atomic_dec_and_test(&llh->llh_refcount)) {
+ class_handle_unhash(&llh->llh_handle);
+ LASSERT(list_empty(&llh->llh_handle.h_link));
+ OBD_FREE(llh, sizeof *llh +
+ sizeof(*llh->llh_handles) * llh->llh_stripe_count);
+ }
+}
+
+/* lov_merge.c */
+void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flags valid,
+ struct lov_stripe_md *lsm, int stripeno, int *set);
+
+int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
+ obd_off size, int shrink);
+/* lov_offset.c */
+obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
+ int stripeno);
+int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+ int stripeno, obd_off *obd_off);
+obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
+ int stripeno);
+int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
+ obd_off start, obd_off end,
+ obd_off *obd_start, obd_off *obd_end);
+int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off);
+
+/* lov_qos.c */
+void qos_shrink_lsm(struct lov_request_set *set);
+int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set,
+ int newea);
+
+/* lov_request.c */
+void lov_set_add_req(struct lov_request *req, struct lov_request_set *set);
+int lov_update_common_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea,
+ struct obdo *src_oa, struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_update_create_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea);
+int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, obd_count oa_bufs,
+ struct brw_page *pga, struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_fini_brw_set(struct lov_request_set *set);
+int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm,
+ struct lov_request_set **reqset);
+int lov_fini_getattr_set(struct lov_request_set *set);
+int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_update_destroy_set(struct lov_request_set *set,
+ struct lov_request *req, int rc);
+int lov_fini_destroy_set(struct lov_request_set *set);
+int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_fini_setattr_set(struct lov_request_set *set);
+int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, obd_off start,
+ obd_off end, struct obd_trans_info *oti,
+ struct lov_request_set **reqset);
+int lov_update_punch_set(struct lov_request_set *set, struct lov_request *req,
+ int rc);
+int lov_fini_punch_set(struct lov_request_set *set);
+int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, obd_off start,
+ obd_off end, struct lov_request_set **reqset);
+int lov_fini_sync_set(struct lov_request_set *set);
+int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_policy_data_t *policy, __u32 mode,
+ struct lustre_handle *lockh,
+ struct lov_request_set **reqset);
+int lov_update_enqueue_set(struct lov_request_set *set,
+ struct lov_request *req, int rc, int flags);
+int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode);
+int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_policy_data_t *policy, __u32 mode,
+ struct lustre_handle *lockh,
+ struct lov_request_set **reqset);
+int lov_update_match_set(struct lov_request_set *set, struct lov_request *req,
+ int rc);
+int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags);
+int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+ __u32 mode, struct lustre_handle *lockh,
+ struct lov_request_set **reqset);
+int lov_fini_cancel_set(struct lov_request_set *set);
+
/* lov_obd.c */
int lov_get_stripecnt(struct lov_obd *lov, int stripe_count);
int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+
+#include "lov_internal.h"
+
+/* Merge rss if kms == 0
+ *
+ * Even when merging RSS, we will take the KMS value if it's larger.
+ * This prevents getattr from stomping on dirty cached pages which
+ * extend the file size. */
+__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms)
+{
+ struct lov_oinfo *loi;
+ __u64 size = 0;
+ int i;
+
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++) {
+ obd_size lov_size, tmpsize;
+
+ tmpsize = loi->loi_kms;
+ if (kms == 0 && loi->loi_rss > tmpsize)
+ tmpsize = loi->loi_rss;
+
+ lov_size = lov_stripe_size(lsm, tmpsize, i);
+ if (lov_size > size)
+ size = lov_size;
+ }
+
+ return size;
+}
+EXPORT_SYMBOL(lov_merge_size);
+
+/* Merge blocks */
+__u64 lov_merge_blocks(struct lov_stripe_md *lsm)
+{
+ struct lov_oinfo *loi;
+ __u64 blocks = 0;
+ int i;
+
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
+ blocks += loi->loi_blocks;
+ return blocks;
+}
+EXPORT_SYMBOL(lov_merge_blocks);
+
+__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time)
+{
+ struct lov_oinfo *loi;
+ int i;
+
+ for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++)
+ if (loi->loi_mtime > current_time)
+ current_time = loi->loi_mtime;
+ return current_time;
+}
+EXPORT_SYMBOL(lov_merge_mtime);
+
+int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
+ obd_off size, int shrink)
+{
+ struct lov_oinfo *loi;
+ int stripe = 0;
+ __u64 kms;
+ ENTRY;
+
+ if (shrink) {
+ struct lov_oinfo *loi;
+ int i = 0;
+ for (loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
+ i++, loi++) {
+ kms = lov_size_to_stripe(lsm, size, i);
+ loi->loi_kms = loi->loi_rss = kms;
+ }
+ RETURN(0);
+ }
+
+ if (size > 0)
+ stripe = lov_stripe_number(lsm, size - 1);
+ kms = lov_size_to_stripe(lsm, size, stripe);
+ loi = &(lsm->lsm_oinfo[stripe]);
+
+ CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n",
+ stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
+ if (kms > loi->loi_kms)
+ loi->loi_kms = kms;
+
+ RETURN(0);
+}
+EXPORT_SYMBOL(lov_adjust_kms);
+
+void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flags valid,
+ struct lov_stripe_md *lsm, int stripeno, int *set)
+{
+ valid &= src->o_valid;
+
+ if (*set) {
+ if (valid & OBD_MD_FLSIZE) {
+ /* this handles sparse files properly */
+ obd_size lov_size;
+
+ lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
+ if (lov_size > tgt->o_size)
+ tgt->o_size = lov_size;
+ }
+ if (valid & OBD_MD_FLBLOCKS)
+ tgt->o_blocks += src->o_blocks;
+ if (valid & OBD_MD_FLBLKSZ)
+ tgt->o_blksize += src->o_blksize;
+ if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
+ tgt->o_ctime = src->o_ctime;
+ if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
+ tgt->o_mtime = src->o_mtime;
+ } else {
+ memcpy(tgt, src, sizeof(*tgt));
+ tgt->o_id = lsm->lsm_object_id;
+ if (valid & OBD_MD_FLSIZE)
+ tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
+ *set = 1;
+ }
+}
#include "lov_internal.h"
-static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
- int stripeno, obd_off *obd_off);
-
-struct lov_lock_handles {
- struct portals_handle llh_handle;
- atomic_t llh_refcount;
- int llh_stripe_count;
- struct lustre_handle llh_handles[0];
-};
-
-static void lov_llh_addref(void *llhp)
-{
- struct lov_lock_handles *llh = llhp;
-
- atomic_inc(&llh->llh_refcount);
- CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh,
- atomic_read(&llh->llh_refcount));
-}
-
-static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
-{
- struct lov_lock_handles *llh;
-
- OBD_ALLOC(llh, sizeof *llh +
- sizeof(*llh->llh_handles) * lsm->lsm_stripe_count);
- if (llh == NULL) {
- CERROR("out of memory\n");
- return NULL;
- }
- atomic_set(&llh->llh_refcount, 2);
- llh->llh_stripe_count = lsm->lsm_stripe_count;
- INIT_LIST_HEAD(&llh->llh_handle.h_link);
- class_handle_hash(&llh->llh_handle, lov_llh_addref);
- return llh;
-}
-
-static struct lov_lock_handles *lov_handle2llh(struct lustre_handle *handle)
-{
- ENTRY;
- LASSERT(handle != NULL);
- RETURN(class_handle2object(handle->cookie));
-}
-
-static void lov_llh_put(struct lov_lock_handles *llh)
-{
- CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh,
- atomic_read(&llh->llh_refcount) - 1);
- LASSERT(atomic_read(&llh->llh_refcount) > 0 &&
- atomic_read(&llh->llh_refcount) < 0x5a5a);
- if (atomic_dec_and_test(&llh->llh_refcount)) {
- LASSERT(list_empty(&llh->llh_handle.h_link));
- OBD_FREE(llh, sizeof *llh +
- sizeof(*llh->llh_handles) * llh->llh_stripe_count);
- }
-}
-
-static void lov_llh_destroy(struct lov_lock_handles *llh)
-{
- class_handle_unhash(&llh->llh_handle);
- lov_llh_put(llh);
-}
-
/* obd methods */
#define MAX_STRING_SIZE 128
static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt,
}
}
#endif
-
if (obd->obd_no_recov) {
/* Pass it on to our clients.
* XXX This should be an argument to disconnect,
tgt->active = 0;
lov->desc.ld_active_tgt_count--;
}
-
tgt->ltd_exp = NULL;
RETURN(0);
}
obd->obd_type->typ_name, obd->obd_name);
}
#endif
-
+
out_local:
rc = class_disconnect(exp, 0);
RETURN(rc);
spin_lock(&lov->lov_lock);
for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) {
+ if (tgt->ltd_exp == NULL)
+ continue;
+
CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n",
i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie);
if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0)
RETURN(rc);
}
-/* compute object size given "stripeno" and the ost size */
-static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
- int stripeno)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- unsigned long swidth = ssize * lsm->lsm_stripe_count;
- unsigned long stripe_size;
- obd_size lov_size;
-
- if (ost_size == 0)
- return 0;
-
- /* do_div(a, b) returns a % b, and a = a / b */
- stripe_size = do_div(ost_size, ssize);
- if (stripe_size)
- lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
- else
- lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
-
- return lov_size;
-}
-
-static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid,
- struct lov_stripe_md *lsm, int stripeno, int *set)
-{
- valid &= src->o_valid;
-
- if (*set) {
- if (valid & OBD_MD_FLSIZE) {
- /* this handles sparse files properly */
- obd_size lov_size;
-
- lov_size = lov_stripe_size(lsm, src->o_size, stripeno);
- if (lov_size > tgt->o_size)
- tgt->o_size = lov_size;
- }
- if (valid & OBD_MD_FLBLOCKS)
- tgt->o_blocks += src->o_blocks;
- if (valid & OBD_MD_FLBLKSZ)
- tgt->o_blksize += src->o_blksize;
- if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime)
- tgt->o_ctime = src->o_ctime;
- if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime)
- tgt->o_mtime = src->o_mtime;
- } else {
- memcpy(tgt, src, sizeof(*tgt));
- tgt->o_id = lsm->lsm_object_id;
- if (valid & OBD_MD_FLSIZE)
- tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno);
- *set = 1;
- }
-}
-
#ifndef log2
#define log2(n) ffz(~(n))
#endif
RETURN(rc);
}
-#define LOV_CREATE_RESEED_INTERVAL 1000
+static int lov_recreate(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
+{
+ struct lov_stripe_md *obj_mdp, *lsm;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ unsigned ost_idx;
+ int rc, i;
+ ENTRY;
+
+ LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS &&
+ src_oa->o_flags & OBD_FL_RECREATE_OBJS);
+
+ OBD_ALLOC(obj_mdp, sizeof(*obj_mdp));
+ if (obj_mdp == NULL)
+ RETURN(-ENOMEM);
+
+ ost_idx = src_oa->o_nlink;
+ lsm = *ea;
+ if (lsm == NULL)
+ GOTO(out, rc = -EINVAL);
+ if (ost_idx >= lov->desc.ld_tgt_count)
+ GOTO(out, rc = -EINVAL);
+
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) {
+ if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id)
+ GOTO(out, rc = -EINVAL);
+ break;
+ }
+ }
+ if (i == lsm->lsm_stripe_count)
+ GOTO(out, rc = -EINVAL);
+
+ rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti);
+out:
+ OBD_FREE(obj_mdp, sizeof(*obj_mdp));
+ RETURN(rc);
+}
/* the LOV expects oa->o_id to be set to the LOV object id */
static int lov_create(struct obd_export *exp, struct obdo *src_oa,
struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
- static int ost_start_idx, ost_start_count;
+ struct lov_request_set *set = NULL;
+ struct list_head *pos;
struct lov_obd *lov;
- struct lov_stripe_md *lsm;
- struct lov_oinfo *loi = NULL;
- struct obdo *tmp_oa, *ret_oa;
- struct llog_cookie *cookies = NULL;
- unsigned ost_count, ost_idx;
- int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i;
+ int rc = 0;
ENTRY;
LASSERT(ea != NULL);
+ if (exp == NULL)
+ RETURN(-EINVAL);
if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
src_oa->o_flags == OBD_FL_DELORPHAN) {
RETURN(rc);
}
- if (exp == NULL)
- RETURN(-EINVAL);
-
lov = &exp->exp_obd->u.lov;
-
if (!lov->desc.ld_active_tgt_count)
RETURN(-EIO);
/* Recreate a specific object id at the given OST index */
if ((src_oa->o_valid & OBD_MD_FLFLAGS) &&
(src_oa->o_flags & OBD_FL_RECREATE_OBJS)) {
- struct lov_stripe_md obj_md;
- struct lov_stripe_md *obj_mdp = &obj_md;
-
- ost_idx = src_oa->o_nlink;
- lsm = *ea;
- if (lsm == NULL)
- RETURN(-EINVAL);
- if (ost_idx >= lov->desc.ld_tgt_count)
- RETURN(-EINVAL);
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) {
- if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id ||
- lsm->lsm_oinfo[i].loi_gr != src_oa->o_gr) {
- RETURN(-EINVAL);
- }
- break;
- }
- }
- if (i == lsm->lsm_stripe_count)
- RETURN(-EINVAL);
-
- rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa,
- &obj_mdp, oti);
+ rc = lov_recreate(exp, src_oa, ea, oti);
RETURN(rc);
}
- ret_oa = obdo_alloc();
- if (!ret_oa)
- RETURN(-ENOMEM);
-
- tmp_oa = obdo_alloc();
- if (!tmp_oa)
- GOTO(out_oa, rc = -ENOMEM);
-
- lsm = *ea;
- if (lsm == NULL) {
- int stripes;
- ost_count = lov_get_stripecnt(lov, 0);
-
- /* If the MDS file was truncated up to some size, stripe over
- * enough OSTs to allow the file to be created at that size. */
- if (src_oa->o_valid & OBD_MD_FLSIZE) {
- stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
- do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
-
- if (stripes > lov->desc.ld_active_tgt_count)
- RETURN(-EFBIG);
- if (stripes < ost_count)
- stripes = ost_count;
- } else {
- stripes = ost_count;
- }
-
- rc = lov_alloc_memmd(&lsm, stripes, lov->desc.ld_pattern ?
- lov->desc.ld_pattern : LOV_PATTERN_RAID0);
- if (rc < 0)
- GOTO(out_tmp, rc);
-
- rc = 0;
- }
-
- ost_count = lov->desc.ld_tgt_count;
-
- LASSERT(src_oa->o_gr > 0);
- LASSERT(src_oa->o_valid & OBD_MD_FLID);
- lsm->lsm_object_id = src_oa->o_id;
- lsm->lsm_object_gr = src_oa->o_gr;
- if (!lsm->lsm_stripe_size)
- lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
- if (!lsm->lsm_pattern) {
- lsm->lsm_pattern = lov->desc.ld_pattern ?
- lov->desc.ld_pattern : LOV_PATTERN_RAID0;
- }
-
- if (*ea == NULL || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
- if (--ost_start_count <= 0) {
- ost_start_idx = ll_insecure_random_int();
- ost_start_count = LOV_CREATE_RESEED_INTERVAL;
- } else if (lsm->lsm_stripe_count >=
- lov->desc.ld_active_tgt_count) {
- /* If we allocate from all of the stripes, make the
- * next file start on the next OST. */
- ++ost_start_idx;
- }
- ost_idx = ost_start_idx % ost_count;
- } else {
- ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
- }
-
- CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
- lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
-
- /* XXX LOV STACKING: need to figure out how many real OSCs */
- if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
- oti_alloc_cookies(oti, lsm->lsm_stripe_count);
- if (!oti->oti_logcookies)
- GOTO(out_cleanup, rc = -ENOMEM);
- cookies = oti->oti_logcookies;
- }
-
- loi = lsm->lsm_oinfo;
- for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
- struct lov_stripe_md obj_md;
- struct lov_stripe_md *obj_mdp = &obj_md;
- int err;
-
- ++ost_start_idx;
- if (lov->tgts[ost_idx].active == 0) {
- if (!obd_uuid_empty(&lov->tgts[ost_idx].uuid))
- CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
- continue;
- }
-
- /* create data objects with "parent" OA */
- memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-
- /* XXX When we start creating objects on demand, we need to
- * make sure that we always create the object on the
- * stripe which holds the existing file size.
- */
- if (src_oa->o_valid & OBD_MD_FLSIZE) {
- if (lov_stripe_offset(lsm, src_oa->o_size, i,
- &tmp_oa->o_size) < 0 &&
- tmp_oa->o_size)
- tmp_oa->o_size--;
-
- CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
- i, tmp_oa->o_size, src_oa->o_size);
- }
+ rc = lov_prep_create_set(exp, ea, src_oa, oti, &set);
+ if (rc)
+ RETURN(rc);
+ list_for_each (pos, &set->set_list) {
+ struct lov_request *req =
+ list_entry(pos, struct lov_request, rq_link);
/* XXX: LOV STACKING: use real "obj_mdp" sub-data */
- err = obd_create(lov->tgts[ost_idx].ltd_exp, tmp_oa, &obj_mdp,
- oti);
- if (err) {
- if (lov->tgts[ost_idx].active) {
- CERROR("error creating objid "LPX64" sub-object"
- " on OST idx %d/%d: rc = %d\n",
- src_oa->o_id, ost_idx,
- lsm->lsm_stripe_count, err);
- if (err > 0) {
- CERROR("obd_create returned invalid "
- "err %d\n", err);
- err = -EIO;
- }
- }
- if (!rc)
- rc = err;
- continue;
- }
- if (oti->oti_objid)
- oti->oti_objid[ost_idx] = tmp_oa->o_id;
- loi->loi_id = tmp_oa->o_id;
- loi->loi_gr = tmp_oa->o_gr;
- loi->loi_ost_idx = ost_idx;
- loi->loi_ost_gen = lov->tgts[ost_idx].ltd_gen;
- CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at "
- "idx %d gen %d\n", lsm->lsm_object_id, loi->loi_id,
- ost_idx, loi->loi_ost_gen);
-
- lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
- obj_alloc, &set);
- loi_init(loi);
-
- if (cookies)
- ++oti->oti_logcookies;
- if (tmp_oa->o_valid & OBD_MD_FLCOOKIE)
- ++cookie_sent;
- ++obj_alloc;
- ++loi;
-
- /* If we have allocated enough objects, we are OK */
- if (obj_alloc == lsm->lsm_stripe_count)
- GOTO(out_done, rc = 0);
- }
-
- if (obj_alloc == 0) {
- if (rc == 0)
- rc = -EIO;
- GOTO(out_cleanup, rc);
- }
-
- /* If we were passed specific striping params, then a failure to
- * meet those requirements is an error, since we can't reallocate
- * that memory (it might be part of a larger array or something).
- *
- * We can only get here if lsm_stripe_count was originally > 1.
- */
- if (*ea != NULL) {
- CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
- lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc);
- if (rc == 0)
- rc = -EFBIG;
- GOTO(out_cleanup, rc);
- } else {
- struct lov_stripe_md *lsm_new;
- /* XXX LOV STACKING call into osc for sizes */
- unsigned oldsize, newsize;
-
- if (oti && cookies && cookie_sent) {
- oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
- newsize = obj_alloc * sizeof(*cookies);
-
- oti_alloc_cookies(oti, obj_alloc);
- if (oti->oti_logcookies) {
- memcpy(oti->oti_logcookies, cookies, newsize);
- OBD_FREE(cookies, oldsize);
- cookies = oti->oti_logcookies;
- } else {
- CWARN("'leaking' %d bytes\n", oldsize-newsize);
- }
- }
-
- CWARN("using fewer stripes for object "LPX64": old %u new %u\n",
- lsm->lsm_object_id, lsm->lsm_stripe_count, obj_alloc);
- oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
- newsize = lov_stripe_md_size(obj_alloc);
- OBD_ALLOC(lsm_new, newsize);
- if (lsm_new != NULL) {
- memcpy(lsm_new, lsm, newsize);
- lsm_new->lsm_stripe_count = obj_alloc;
- OBD_FREE(lsm, oldsize);
- lsm = lsm_new;
- } else {
- CWARN("'leaking' %d bytes\n", oldsize - newsize);
- }
- rc = 0;
- }
- EXIT;
- out_done:
- *ea = lsm;
- if (src_oa->o_valid & OBD_MD_FLSIZE &&
- ret_oa->o_size != src_oa->o_size) {
- CERROR("original size "LPU64" isn't new object size "LPU64"\n",
- src_oa->o_size, ret_oa->o_size);
- LBUG();
- }
- ret_oa->o_id = src_oa->o_id;
- ret_oa->o_gr = src_oa->o_gr;
- ret_oa->o_valid |= OBD_MD_FLGROUP;
- memcpy(src_oa, ret_oa, sizeof(*src_oa));
-
- out_tmp:
- obdo_free(tmp_oa);
- out_oa:
- obdo_free(ret_oa);
- if (oti && cookies) {
- oti->oti_logcookies = cookies;
- if (!cookie_sent) {
- oti_free_cookies(oti);
- src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
- } else {
- src_oa->o_valid |= OBD_MD_FLCOOKIE;
- }
+ rc = obd_create(lov->tgts[req->rq_idx].ltd_exp,
+ req->rq_oa, &req->rq_md, oti);
+ lov_update_create_set(set, req, rc);
}
+ rc = lov_fini_create_set(set, ea);
RETURN(rc);
-
- out_cleanup:
- while (obj_alloc-- > 0) {
- struct obd_export *sub_exp;
- int err;
-
- --loi;
- sub_exp = lov->tgts[loi->loi_ost_idx].ltd_exp;
- /* destroy already created objects here */
- memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
- tmp_oa->o_id = loi->loi_id;
-
- err = obd_destroy(sub_exp, tmp_oa, NULL, oti);
- if (err)
- CERROR("Failed to uncreate objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n", src_oa->o_id,
- loi->loi_id, loi->loi_ost_idx, err);
- }
- if (*ea == NULL)
- obd_free_memmd(exp, &lsm);
- goto out_tmp;
-}
-
-static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm)
-{
- static int next_idx = 0;
- struct lov_tgt_desc *tgt;
- int i, count;
-
- /* XXX - we should do something clever and take lsm
- * into account but just do round robin for now. */
-
- /* last_idx must always be less that count because
- * ld_tgt_count currently cannot shrink. */
- count = lov->desc.ld_tgt_count;
-
- for (i = next_idx, tgt = lov->tgts + i; i < count; i++, tgt++) {
- if (tgt->active) {
- next_idx = (i + 1) % count;
- RETURN(i);
- }
- }
-
- for (i = 0, tgt = lov->tgts; i < next_idx; i++, tgt++) {
- if (tgt->active) {
- next_idx = (i + 1) % count;
- RETURN(i);
- }
- }
-
- RETURN(-EIO);
}
#define lsm_bad_magic(LSMP) \
static int lov_destroy(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
- struct obdo *tmp = NULL;
- struct lov_oinfo *loi;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
struct lov_obd *lov;
- int rc = 0, i;
+ int rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
RETURN(-ENODEV);
lov = &exp->exp_obd->u.lov;
- loi = lsm->lsm_oinfo;
- for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
- int err;
+ rc = lov_prep_destroy_set(exp, oa, lsm, oti, &set);
+ if (rc)
+ RETURN(rc);
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- /* Orphan clean up will (someday) fix this up. */
- if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE)
- oti->oti_logcookies++;
- continue;
- }
+ list_for_each (pos, &set->set_list) {
+ int err;
+ req = list_entry(pos, struct lov_request, rq_link);
- tmp = obdo_alloc();
- if (tmp == NULL)
- RETURN(-ENOMEM);
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
- err = obd_destroy(lov->tgts[loi->loi_ost_idx].ltd_exp,
- tmp, NULL, oti);
- obdo_free(tmp);
- if (err && lov->tgts[loi->loi_ost_idx].active) {
- CDEBUG(D_INODE, "error: destroying objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx, err);
+ /* XXX update the cookie position */
+ oti->oti_logcookies = set->set_cookies + req->rq_stripe;
+ rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
+ NULL, oti);
+ err = lov_update_common_set(set, req, rc);
+ if (rc) {
+ CERROR("error: destroying objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ set->set_oa->o_id, req->rq_oa->o_id,
+ req->rq_idx, rc);
if (!rc)
rc = err;
}
}
+ lov_fini_destroy_set(set);
RETURN(rc);
}
static int lov_getattr(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm)
{
- struct obdo *tmp = NULL;
- int i, rc = 0, set = 0;
- struct lov_oinfo *loi;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
struct lov_obd *lov;
+ int err = 0, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
RETURN(-ENODEV);
lov = &exp->exp_obd->u.lov;
+
+ rc = lov_prep_getattr_set(exp, oa, lsm, &set);
+ if (rc)
+ RETURN(rc);
- CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n",
- lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size);
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
- int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
-
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
- "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx);
- /* create data objects with "parent" OA */
- tmp = obdo_alloc();
- if (tmp == NULL)
- RETURN(-ENOMEM);
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
-
- err = obd_getattr(lov->tgts[loi->loi_ost_idx].ltd_exp,
- tmp, NULL);
+ "%u\n", oa->o_id, req->rq_stripe, req->rq_oa->o_id,
+ req->rq_idx);
+
+ rc = obd_getattr(lov->tgts[req->rq_idx].ltd_exp,
+ req->rq_oa, NULL);
+ err = lov_update_common_set(set, req, rc);
if (err) {
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: getattr objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx,
- err);
- obdo_free(tmp);
- RETURN(err);
- }
- } else {
- lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set);
+ CERROR("error: getattr objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ set->set_oa->o_id, req->rq_oa->o_id,
+ req->rq_idx, err);
+ break;
}
- obdo_free(tmp);
}
- if (!set)
- rc = -EIO;
+
+ rc = lov_fini_getattr_set(set);
+ if (err)
+ rc = err;
RETURN(rc);
}
static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data,
int rc)
{
- struct lov_getattr_async_args *aa = data;
- struct lov_stripe_md *lsm = aa->aa_lsm;
- struct obdo *oa = aa->aa_oa;
- struct obdo *obdos = aa->aa_obdos;
- struct lov_oinfo *loi;
- int i;
- int set = 0;
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
ENTRY;
- if (rc == 0) {
- /* NB all stripe requests succeeded to get here */
-
- loi = lsm->lsm_oinfo;
- for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
- if (obdos[i].o_valid == 0) /* inactive stripe */
- continue;
-
- lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
- i, &set);
- }
-
- if (!set) {
- CERROR ("No stripes had valid attrs\n");
- rc = -EIO;
- }
+ /* don't do attribute merge if this aysnc op failed */
+ if (rc) {
+ lovset->set_completes = 0;
+ lov_fini_getattr_set(lovset);
+ } else {
+ rc = lov_fini_getattr_set(lovset);
}
-
- OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos));
RETURN (rc);
}
struct lov_stripe_md *lsm,
struct ptlrpc_request_set *rqset)
{
- struct obdo *obdos;
+ struct lov_request_set *lovset;
struct lov_obd *lov;
- struct lov_oinfo *loi;
- struct lov_getattr_async_args *aa;
- int i, rc = 0, set = 0;
+ struct list_head *pos;
+ struct lov_request *req;
+ int rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
lov = &exp->exp_obd->u.lov;
- OBD_ALLOC (obdos, lsm->lsm_stripe_count * sizeof (*obdos));
- if (obdos == NULL)
- RETURN(-ENOMEM);
+ rc = lov_prep_getattr_set(exp, oa, lsm, &lovset);
+ if (rc)
+ RETURN(rc);
CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n",
lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size);
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
- int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- /* leaves obdos[i].obd_valid unset */
- continue;
- }
- CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at "
- "idx %u gen %d\n", oa->o_id, i, loi->loi_id,
- loi->loi_ost_idx, loi->loi_ost_gen);
-
- /* create data objects with "parent" OA */
- memcpy(&obdos[i], oa, sizeof(obdos[i]));
- obdos[i].o_id = loi->loi_id;
-
- err = obd_getattr_async(lov->tgts[loi->loi_ost_idx].ltd_exp,
- &obdos[i], NULL, rqset);
- if (err) {
+ list_for_each (pos, &lovset->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx "
+ "%u\n", oa->o_id, req->rq_stripe, req->rq_oa->o_id,
+ req->rq_idx);
+ rc = obd_getattr_async(lov->tgts[req->rq_idx].ltd_exp,
+ req->rq_oa, NULL, rqset);
+ if (rc) {
CERROR("error: getattr objid "LPX64" subobj "
LPX64" on OST idx %d: rc = %d\n",
- oa->o_id, loi->loi_id, loi->loi_ost_idx,
- err);
- GOTO(out_obdos, rc = err);
+ lovset->set_oa->o_id, req->rq_oa->o_id,
+ req->rq_idx, rc);
+ GOTO(out, rc);
}
- set = 1;
+ lov_update_common_set(lovset, req, rc);
}
- if (!set)
- GOTO (out_obdos, rc = -EIO);
-
+
+ LASSERT(rc == 0);
LASSERT (rqset->set_interpret == NULL);
rqset->set_interpret = lov_getattr_interpret;
- LASSERT (sizeof (rqset->set_args) >= sizeof (*aa));
- aa = (struct lov_getattr_async_args *)&rqset->set_args;
- aa->aa_lsm = lsm;
- aa->aa_oa = oa;
- aa->aa_obdos = obdos;
- aa->aa_lov = lov;
- GOTO(out, rc = 0);
-
-out_obdos:
- OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos));
+ rqset->set_arg = (void *)lovset;
+ RETURN(rc);
out:
+ LASSERT(rc);
+ lov_fini_getattr_set(lovset);
RETURN(rc);
}
-
static int lov_setattr(struct obd_export *exp, struct obdo *src_oa,
struct lov_stripe_md *lsm, struct obd_trans_info *oti)
{
- struct obdo *tmp_oa, *ret_oa;
+ struct lov_request_set *set;
struct lov_obd *lov;
- struct lov_oinfo *loi;
- int rc = 0, i, set = 0;
+ struct list_head *pos;
+ struct lov_request *req;
+ int err = 0, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
LASSERT(!(src_oa->o_valid & OBD_MD_FLGROUP) || src_oa->o_gr > 0);
- ret_oa = obdo_alloc();
- if (!ret_oa)
- RETURN(-ENOMEM);
-
- tmp_oa = obdo_alloc();
- if (!tmp_oa)
- GOTO(out_oa, rc = -ENOMEM);
-
lov = &exp->exp_obd->u.lov;
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
- int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
-
- memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
- tmp_oa->o_id = loi->loi_id;
- LASSERT(!(tmp_oa->o_valid & OBD_MD_FLGROUP) || tmp_oa->o_gr>0);
-
- if (src_oa->o_valid & OBD_MD_FLSIZE) {
- if (lov_stripe_offset(lsm, src_oa->o_size, i,
- &tmp_oa->o_size) < 0 &&
- tmp_oa->o_size)
- tmp_oa->o_size--;
-
- CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
- i, tmp_oa->o_size, src_oa->o_size);
- }
+ rc = lov_prep_setattr_set(exp, src_oa, lsm, NULL, &set);
+ if (rc)
+ RETURN(rc);
- err = obd_setattr(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp_oa,
- NULL, NULL);
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ rc = obd_setattr(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
+ NULL, NULL);
+ err = lov_update_common_set(set, req, rc);
if (err) {
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: setattr objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
- src_oa->o_id, loi->loi_id,
- loi->loi_ost_idx, err);
- if (!rc)
- rc = err;
- }
- continue;
+ CERROR("error: setattr objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ set->set_oa->o_id, req->rq_oa->o_id,
+ req->rq_idx, err);
+ if (!rc)
+ rc = err;
}
- lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set);
}
- if (!set && !rc)
- rc = -EIO;
-
- ret_oa->o_id = src_oa->o_id;
- memcpy(src_oa, ret_oa, sizeof(*src_oa));
- GOTO(out_tmp, rc);
-out_tmp:
- obdo_free(tmp_oa);
-out_oa:
- obdo_free(ret_oa);
- return rc;
+ err = lov_fini_setattr_set(set);
+ if (!rc)
+ rc = err;
+ RETURN(rc);
}
-/* we have an offset in file backed by an lov and want to find out where
- * that offset lands in our given stripe of the file. for the easy
- * case where the offset is within the stripe, we just have to scale the
- * offset down to make it relative to the stripe instead of the lov.
- *
- * the harder case is what to do when the offset doesn't intersect the
- * stripe. callers will want start offsets clamped ahead to the start
- * of the nearest stripe in the file. end offsets similarly clamped to the
- * nearest ending byte of a stripe in the file:
- *
- * all this function does is move offsets to the nearest region of the
- * stripe, and it does its work "mod" the full length of all the stripes.
- * consider a file with 3 stripes:
- *
- * S E
- * ---------------------------------------------------------------------
- * | 0 | 1 | 2 | 0 | 1 | 2 |
- * ---------------------------------------------------------------------
- *
- * to find stripe 1's offsets for S and E, it divides by the full stripe
- * width and does its math in the context of a single set of stripes:
- *
- * S E
- * -----------------------------------
- * | 0 | 1 | 2 |
- * -----------------------------------
- *
- * it'll notice that E is outside stripe 1 and clamp it to the end of the
- * stripe, then multiply it back out by lov_off to give the real offsets in
- * the stripe:
- *
- * S E
- * ---------------------------------------------------------------------
- * | 1 | 1 | 1 | 1 | 1 | 1 |
- * ---------------------------------------------------------------------
- *
- * it would have done similarly and pulled S forward to the start of a 1
- * stripe if, say, S had landed in a 0 stripe.
- *
- * this rounding isn't always correct. consider an E lov offset that lands
- * on a 0 stripe, the "mod stripe width" math will pull it forward to the
- * start of a 1 stripe, when in fact it wanted to be rounded back to the end
- * of a previous 1 stripe. this logic is handled by callers and this is why:
- *
- * this function returns < 0 when the offset was "before" the stripe and
- * was moved forward to the start of the stripe in question; 0 when it
- * falls in the stripe and no shifting was done; > 0 when the offset
- * was outside the stripe and was pulled back to its final byte. */
-static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
- int stripeno, obd_off *obd_off)
+static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm)
{
- unsigned long ssize = lsm->lsm_stripe_size;
- unsigned long swidth = ssize * lsm->lsm_stripe_count;
- unsigned long stripe_off, this_stripe;
- int ret = 0;
-
- if (lov_off == OBD_OBJECT_EOF) {
- *obd_off = OBD_OBJECT_EOF;
- return 0;
- }
+ static int next_idx = 0;
+ struct lov_tgt_desc *tgt;
+ int i, count;
- /* do_div(a, b) returns a % b, and a = a / b */
- stripe_off = do_div(lov_off, swidth);
+ /* XXX - we should do something clever and take lsm
+ * into account but just do round robin for now. */
- this_stripe = stripeno * ssize;
- if (stripe_off < this_stripe) {
- stripe_off = 0;
- ret = -1;
- } else {
- stripe_off -= this_stripe;
+ /* last_idx must always be less that count because
+ * ld_tgt_count currently cannot shrink. */
+ count = lov->desc.ld_tgt_count;
- if (stripe_off >= ssize) {
- stripe_off = ssize;
- ret = 1;
+ for (i = next_idx, tgt = lov->tgts + i; i < count; i++, tgt++) {
+ if (tgt->active) {
+ next_idx = (i + 1) % count;
+ RETURN(i);
}
}
- *obd_off = lov_off * ssize + stripe_off;
- return ret;
-}
-
-/* Given a whole-file size and a stripe number, give the file size which
- * corresponds to the individual object of that stripe.
- *
- * This behaves basically in the same was as lov_stripe_offset, except that
- * file sizes falling before the beginning of a stripe are clamped to the end
- * of the previous stripe, not the beginning of the next:
- *
- * S
- * ---------------------------------------------------------------------
- * | 0 | 1 | 2 | 0 | 1 | 2 |
- * ---------------------------------------------------------------------
- *
- * if clamped to stripe 2 becomes:
- *
- * S
- * ---------------------------------------------------------------------
- * | 0 | 1 | 2 | 0 | 1 | 2 |
- * ---------------------------------------------------------------------
- */
-static obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
- int stripeno)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- unsigned long swidth = ssize * lsm->lsm_stripe_count;
- unsigned long stripe_off, this_stripe;
-
- if (file_size == OBD_OBJECT_EOF)
- return OBD_OBJECT_EOF;
-
- /* do_div(a, b) returns a % b, and a = a / b */
- stripe_off = do_div(file_size, swidth);
-
- this_stripe = stripeno * ssize;
- if (stripe_off < this_stripe) {
- /* Move to end of previous stripe, or zero */
- if (file_size > 0) {
- file_size--;
- stripe_off = ssize;
- } else {
- stripe_off = 0;
- }
- } else {
- stripe_off -= this_stripe;
-
- if (stripe_off >= ssize) {
- /* Clamp to end of this stripe */
- stripe_off = ssize;
+ for (i = 0, tgt = lov->tgts; i < next_idx; i++, tgt++) {
+ if (tgt->active) {
+ next_idx = (i + 1) % count;
+ RETURN(i);
}
}
- return (file_size * ssize + stripe_off);
-}
-
-/* given an extent in an lov and a stripe, calculate the extent of the stripe
- * that is contained within the lov extent. this returns true if the given
- * stripe does intersect with the lov extent. */
-static int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
- obd_off start, obd_off end,
- obd_off *obd_start, obd_off *obd_end)
-{
- int start_side = 0, end_side = 0;
-
- switch (lsm->lsm_pattern) {
- case LOV_PATTERN_RAID0:
- start_side = lov_stripe_offset(lsm, start, stripeno, obd_start);
- end_side = lov_stripe_offset(lsm, end, stripeno, obd_end);
- break;
- case LOV_PATTERN_CMOBD:
- *obd_start = start;
- *obd_end = end;
- start_side = end_side = 0;
- break;
- default:
- LBUG();
- }
-
- CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n",
- start, end, start_side, *obd_start, *obd_end, end_side);
-
- /* this stripe doesn't intersect the file extent when neither
- * start or the end intersected the stripe and obd_start and
- * obd_end got rounded up to the save value. */
- if (start_side != 0 && end_side != 0 && *obd_start == *obd_end)
- return 0;
-
- /* as mentioned in the lov_stripe_offset commentary, end
- * might have been shifted in the wrong direction. This
- * happens when an end offset is before the stripe when viewed
- * through the "mod stripe size" math. we detect it being shifted
- * in the wrong direction and touch it up.
- * interestingly, this can't underflow since end must be > start
- * if we passed through the previous check.
- * (should we assert for that somewhere?) */
- if (end_side != 0)
- (*obd_end)--;
-
- return 1;
-}
-
-/* compute which stripe number "lov_off" will be written into */
-static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
-{
- unsigned long ssize = lsm->lsm_stripe_size;
- unsigned long swidth = ssize * lsm->lsm_stripe_count;
- unsigned long stripe_off;
-
- if (lsm->lsm_pattern == LOV_PATTERN_CMOBD)
- return 0;
-
- stripe_off = do_div(lov_off, swidth);
-
- return stripe_off / ssize;
+ RETURN(-EIO);
}
static int lov_revalidate_md(struct obd_export *exp, struct obdo *src_oa,
if (oti->oti_objid)
oti->oti_objid[ost_idx] = tmp_oa->o_id;
loi->loi_id = tmp_oa->o_id;
+ loi->loi_gr = tmp_oa->o_gr;
loi->loi_ost_idx = ost_idx;
loi->loi_ost_gen = lov->tgts[ost_idx].ltd_gen;
CDEBUG(D_INODE, "replacing objid "LPX64" subobj "LPX64
struct lov_stripe_md *lsm,
obd_off start, obd_off end, struct obd_trans_info *oti)
{
- struct obdo *tmp = NULL;
- struct lov_oinfo *loi;
+ struct lov_request_set *set;
struct lov_obd *lov;
- int rc = 0, i;
+ struct list_head *pos;
+ struct lov_request *req;
+ int err = 0, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
RETURN(-ENODEV);
lov = &exp->exp_obd->u.lov;
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
- obd_off starti, endi;
- int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
-
- if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi))
- continue;
+ rc = lov_prep_punch_set(exp, oa, lsm, start, end, oti, &set);
+ if (rc)
+ RETURN(rc);
- /* create data objects with "parent" OA */
- tmp = obdo_alloc();
- if (tmp == NULL)
- RETURN(-ENOMEM);
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
- err = obd_punch(lov->tgts[loi->loi_ost_idx].ltd_exp,
- tmp, NULL, starti, endi, NULL);
- obdo_free(tmp);
+ rc = obd_punch(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
+ NULL, req->rq_extent.start,
+ req->rq_extent.end, NULL);
+ err = lov_update_punch_set(set, req, rc);
if (err) {
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: punch objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n", oa->o_id,
- loi->loi_id, loi->loi_ost_idx, err);
- }
+ CERROR("error: punch objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n", set->set_oa->o_id,
+ req->rq_oa->o_id, req->rq_idx, rc);
if (!rc)
rc = err;
- } else {
- loi->loi_kms = loi->loi_rss = starti;
}
}
+ err = lov_fini_punch_set(set);
+ if (!rc)
+ rc = err;
RETURN(rc);
}
static int lov_sync(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md *lsm, obd_off start, obd_off end)
{
- struct obdo *tmp;
+ struct lov_request_set *set;
struct lov_obd *lov;
- struct lov_oinfo *loi;
- int rc = 0, i;
+ struct list_head *pos;
+ struct lov_request *req;
+ int err = 0, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
if (!exp->exp_obd)
RETURN(-ENODEV);
- tmp = obdo_alloc();
- if (!tmp)
- RETURN(-ENOMEM);
-
lov = &exp->exp_obd->u.lov;
- for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) {
- obd_off starti, endi;
- int err;
-
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
-
- if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi))
- continue;
+ rc = lov_prep_sync_set(exp, oa, lsm, start, end, &set);
+ if (rc)
+ RETURN(rc);
- memcpy(tmp, oa, sizeof(*tmp));
- tmp->o_id = loi->loi_id;
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
- err = obd_sync(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp, NULL,
- starti, endi);
+ rc = obd_sync(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa,
+ NULL, req->rq_extent.start, req->rq_extent.end);
+ err = lov_update_common_set(set, req, rc);
if (err) {
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: fsync objid "LPX64" subobj "LPX64
- " on OST idx %d: rc = %d\n", oa->o_id,
- loi->loi_id, loi->loi_ost_idx, err);
- }
+ CERROR("error: fsync objid "LPX64" subobj "LPX64
+ " on OST idx %d: rc = %d\n", set->set_oa->o_id,
+ req->rq_oa->o_id, req->rq_idx, rc);
if (!rc)
rc = err;
}
}
-
- obdo_free(tmp);
+ err = lov_fini_sync_set(set);
+ if (!rc)
+ rc = err;
RETURN(rc);
}
struct lov_stripe_md *lsm, obd_count oa_bufs,
struct brw_page *pga, struct obd_trans_info *oti)
{
- struct {
- int bufct;
- int index;
- int subcount;
- struct lov_stripe_md lsm;
- int ost_idx;
- } *stripeinfo, *si, *si_last;
- struct obdo *ret_oa = NULL, *tmp_oa = NULL;
- struct lov_obd *lov;
- struct brw_page *ioarr;
- struct lov_oinfo *loi;
- int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int err, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
RETURN(-EINVAL);
- lov = &exp->exp_obd->u.lov;
-
if (cmd == OBD_BRW_CHECK) {
rc = lov_brw_check(lov, src_oa, lsm, oa_bufs, pga);
RETURN(rc);
}
- OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
- if (!stripeinfo)
- RETURN(-ENOMEM);
-
- OBD_ALLOC(where, sizeof(*where) * oa_bufs);
- if (!where)
- GOTO(out_sinfo, rc = -ENOMEM);
-
- OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
- if (!ioarr)
- GOTO(out_where, rc = -ENOMEM);
-
- if (src_oa) {
- ret_oa = obdo_alloc();
- if (!ret_oa)
- GOTO(out_ioarr, rc = -ENOMEM);
-
- tmp_oa = obdo_alloc();
- if (!tmp_oa)
- GOTO(out_oa, rc = -ENOMEM);
- }
-
- for (i = 0; i < oa_bufs; i++) {
- where[i] = lov_stripe_number(lsm, pga[i].disk_offset);
- stripeinfo[where[i]].bufct++;
- }
-
- for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
- i < stripe_count; i++, loi++, si_last = si, si++) {
- if (i > 0)
- si->index = si_last->index + si_last->bufct;
- si->lsm.lsm_object_id = loi->loi_id;
- si->lsm.lsm_object_gr = lsm->lsm_object_gr;
- si->ost_idx = loi->loi_ost_idx;
- }
-
- for (i = 0; i < oa_bufs; i++) {
- int which = where[i];
- int shift;
-
- shift = stripeinfo[which].index + stripeinfo[which].subcount;
- LASSERT(shift < oa_bufs);
- ioarr[shift] = pga[i];
- lov_stripe_offset(lsm, pga[i].disk_offset, which,
- &ioarr[shift].disk_offset);
- stripeinfo[which].subcount++;
- }
-
- for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
- int shift = si->index;
-
- if (lov->tgts[si->ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
- GOTO(out_oa, rc = -EIO);
- }
-
- if (si->bufct) {
- LASSERT(shift < oa_bufs);
- if (src_oa)
- memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-
- tmp_oa->o_id = si->lsm.lsm_object_id;
- rc = obd_brw(cmd, lov->tgts[si->ost_idx].ltd_exp,
- tmp_oa, &si->lsm, si->bufct,
- &ioarr[shift], oti);
- if (rc)
- GOTO(out_oa, rc);
+ rc = lov_prep_brw_set(exp, src_oa, lsm, oa_bufs, pga, oti, &set);
+ if (rc)
+ RETURN(rc);
- lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm,
- i, &set);
- }
+ list_for_each (pos, &set->set_list) {
+ struct obd_export *sub_exp;
+ struct brw_page *sub_pga;
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ sub_exp = lov->tgts[req->rq_idx].ltd_exp;
+ sub_pga = set->set_pga + req->rq_pgaidx;
+ rc = obd_brw(cmd, sub_exp, req->rq_oa, req->rq_md,
+ req->rq_oabufs, sub_pga, oti);
+ if (rc)
+ break;
+ lov_update_common_set(set, req, rc);
}
- ret_oa->o_id = src_oa->o_id;
- memcpy(src_oa, ret_oa, sizeof(*src_oa));
-
- GOTO(out_oa, rc);
- out_oa:
- if (tmp_oa)
- obdo_free(tmp_oa);
- if (ret_oa)
- obdo_free(ret_oa);
- out_ioarr:
- OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
- out_where:
- OBD_FREE(where, sizeof(*where) * oa_bufs);
- out_sinfo:
- OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
- return rc;
+ err = lov_fini_brw_set(set);
+ if (!rc)
+ rc = err;
+ RETURN(rc);
}
static int lov_brw_interpret(struct ptlrpc_request_set *reqset, void *data,
int rc)
{
- struct lov_brw_async_args *aa = data;
- struct lov_stripe_md *lsm = aa->aa_lsm;
- obd_count oa_bufs = aa->aa_oa_bufs;
- struct obdo *oa = aa->aa_oa;
- struct obdo *obdos = aa->aa_obdos;
- struct brw_page *ioarr = aa->aa_ioarr;
- struct lov_oinfo *loi;
- int i, set = 0;
+ struct lov_request_set *lovset = (struct lov_request_set *)data;
ENTRY;
-
- if (rc == 0) {
- /* NB all stripe requests succeeded to get here */
-
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
- if (obdos[i].o_valid == 0) /* inactive stripe */
- continue;
-
- lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm,
- i, &set);
- }
-
- if (!set) {
- CERROR("No stripes had valid attrs\n");
- rc = -EIO;
- }
+
+ if (rc) {
+ lovset->set_completes = 0;
+ lov_fini_brw_set(lovset);
+ } else {
+ rc = lov_fini_brw_set(lovset);
}
- oa->o_id = lsm->lsm_object_id;
-
- OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos));
- OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
+
RETURN(rc);
}
struct brw_page *pga, struct ptlrpc_request_set *set,
struct obd_trans_info *oti)
{
- struct {
- int bufct;
- int index;
- int subcount;
- struct lov_stripe_md lsm;
- int ost_idx;
- } *stripeinfo, *si, *si_last;
- struct lov_obd *lov;
- struct brw_page *ioarr;
- struct obdo *obdos = NULL;
- struct lov_oinfo *loi;
- struct lov_brw_async_args *aa;
- int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count;
+ struct lov_request_set *lovset;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
RETURN(-EINVAL);
- lov = &exp->exp_obd->u.lov;
-
if (cmd == OBD_BRW_CHECK) {
rc = lov_brw_check(lov, oa, lsm, oa_bufs, pga);
RETURN(rc);
}
- OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo));
- if (!stripeinfo)
- RETURN(-ENOMEM);
-
- OBD_ALLOC(where, sizeof(*where) * oa_bufs);
- if (!where)
- GOTO(out_sinfo, rc = -ENOMEM);
-
- if (oa) {
- OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count);
- if (!obdos)
- GOTO(out_where, rc = -ENOMEM);
- }
-
- OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs);
- if (!ioarr)
- GOTO(out_obdos, rc = -ENOMEM);
-
- for (i = 0; i < oa_bufs; i++) {
- where[i] = lov_stripe_number(lsm, pga[i].disk_offset);
- stripeinfo[where[i]].bufct++;
- }
-
- for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo;
- i < stripe_count; i++, loi++, si_last = si, si++) {
- if (i > 0)
- si->index = si_last->index + si_last->bufct;
- si->lsm.lsm_object_id = loi->loi_id;
- si->ost_idx = loi->loi_ost_idx;
-
- if (oa) {
- memcpy(&obdos[i], oa, sizeof(*obdos));
- obdos[i].o_id = si->lsm.lsm_object_id;
- }
- }
-
- for (i = 0; i < oa_bufs; i++) {
- int which = where[i];
- int shift;
-
- shift = stripeinfo[which].index + stripeinfo[which].subcount;
- LASSERT(shift < oa_bufs);
- ioarr[shift] = pga[i];
- lov_stripe_offset(lsm, pga[i].disk_offset, which,
- &ioarr[shift].disk_offset);
- stripeinfo[which].subcount++;
- }
-
- for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) {
- int shift = si->index;
-
- if (si->bufct == 0)
- continue;
-
- if (lov->tgts[si->ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx);
- GOTO(out_ioarr, rc = -EIO);
- }
-
- LASSERT(shift < oa_bufs);
+ rc = lov_prep_brw_set(exp, oa, lsm, oa_bufs, pga, oti, &lovset);
+ if (rc)
+ RETURN(rc);
- rc = obd_brw_async(cmd, lov->tgts[si->ost_idx].ltd_exp,
- &obdos[i], &si->lsm, si->bufct,
- &ioarr[shift], set, oti);
+ list_for_each (pos, &lovset->set_list) {
+ struct obd_export *sub_exp;
+ struct brw_page *sub_pga;
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ sub_exp = lov->tgts[req->rq_idx].ltd_exp;
+ sub_pga = lovset->set_pga + req->rq_pgaidx;
+ rc = obd_brw_async(cmd, sub_exp, req->rq_oa, req->rq_md,
+ req->rq_oabufs, sub_pga, set, oti);
if (rc)
- GOTO(out_ioarr, rc);
+ GOTO(out, rc);
+ lov_update_common_set(lovset, req, rc);
}
LASSERT(rc == 0);
LASSERT(set->set_interpret == NULL);
set->set_interpret = (set_interpreter_func)lov_brw_interpret;
- LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args));
- aa = (struct lov_brw_async_args *)&set->set_args;
- aa->aa_lsm = lsm;
- aa->aa_obdos = obdos;
- aa->aa_oa = oa;
- aa->aa_ioarr = ioarr;
- aa->aa_oa_bufs = oa_bufs;
-
- /* Don't free ioarr or obdos - that's done in lov_brw_interpret */
- GOTO(out_where, rc);
-
- out_ioarr:
- OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs);
- out_obdos:
- OBD_FREE(obdos, stripe_count * sizeof(*obdos));
- out_where:
- OBD_FREE(where, sizeof(*where) * oa_bufs);
- out_sinfo:
- OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo));
- return rc;
+ set->set_arg = (void *)lovset;
+
+ RETURN(rc);
+out:
+ lov_fini_brw_set(lovset);
+ RETURN(rc);
}
-struct lov_async_page *lap_from_cookie(void *cookie)
-{
- struct lov_async_page *lap = cookie;
- if (lap->lap_magic != LAP_MAGIC)
- return ERR_PTR(-EINVAL);
- return lap;
-};
-
static int lov_ap_make_ready(void *data, int cmd)
{
- struct lov_async_page *lap = lap_from_cookie(data);
- /* XXX should these assert? */
- if (IS_ERR(lap))
- return -EINVAL;
+ struct lov_async_page *lap = LAP_FROM_COOKIE(data);
return lap->lap_caller_ops->ap_make_ready(lap->lap_caller_data, cmd);
}
static int lov_ap_refresh_count(void *data, int cmd)
{
- struct lov_async_page *lap = lap_from_cookie(data);
- if (IS_ERR(lap))
- return -EINVAL;
+ struct lov_async_page *lap = LAP_FROM_COOKIE(data);
return lap->lap_caller_ops->ap_refresh_count(lap->lap_caller_data,
cmd);
}
static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
{
- struct lov_async_page *lap = lap_from_cookie(data);
- /* XXX should these assert? */
- if (IS_ERR(lap))
- return;
+ struct lov_async_page *lap = LAP_FROM_COOKIE(data);
lap->lap_caller_ops->ap_fill_obdo(lap->lap_caller_data, cmd, oa);
/* XXX woah, shouldn't we be altering more here? size? */
static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
- struct lov_async_page *lap = lap_from_cookie(data);
- if (IS_ERR(lap))
- return;
+ struct lov_async_page *lap = LAP_FROM_COOKIE(data);
/* in a raid1 regime this would down a count of many ios
* in flight, onl calling the caller_ops completion when all
if (lsm_bad_magic(lsm))
RETURN(-EINVAL);
- lap = lap_from_cookie(cookie);
- if (IS_ERR(lap))
- RETURN(PTR_ERR(lap));
+ lap = LAP_FROM_COOKIE(cookie);
loi = &lsm->lsm_oinfo[lap->lap_stripe];
if (lsm_bad_magic(lsm))
RETURN(-EINVAL);
- lap = lap_from_cookie(cookie);
- if (IS_ERR(lap))
- RETURN(PTR_ERR(lap));
+ lap = LAP_FROM_COOKIE(cookie);
loi = &lsm->lsm_oinfo[lap->lap_stripe];
if (lsm_bad_magic(lsm))
RETURN(-EINVAL);
- lap = lap_from_cookie(cookie);
- if (IS_ERR(lap))
- RETURN(PTR_ERR(lap));
+ lap = LAP_FROM_COOKIE(cookie);
loi = &lsm->lsm_oinfo[lap->lap_stripe];
if (lsm_bad_magic(lsm))
RETURN(-EINVAL);
- lap = lap_from_cookie(cookie);
- if (IS_ERR(lap))
- RETURN(PTR_ERR(lap));
+ lap = LAP_FROM_COOKIE(cookie);
loi = &lsm->lsm_oinfo[lap->lap_stripe];
void *data,__u32 lvb_len, void *lvb_swabber,
struct lustre_handle *lockh)
{
- struct lov_lock_handles *lov_lockh = NULL;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
struct lustre_handle *lov_lockhp;
struct lov_obd *lov;
- struct lov_oinfo *loi;
- char submd_buf[sizeof(struct lov_stripe_md) + sizeof(struct lov_oinfo)];
- struct lov_stripe_md *submd = (void *)submd_buf;
ldlm_error_t rc;
- int i, save_flags = *flags;
+ int save_flags = *flags;
ENTRY;
if (lsm_bad_magic(lsm))
if (!exp || !exp->exp_obd)
RETURN(-ENODEV);
- if (lsm->lsm_stripe_count > 1) {
- lov_lockh = lov_llh_new(lsm);
- if (lov_lockh == NULL)
- RETURN(-ENOMEM);
-
- lockh->cookie = lov_lockh->llh_handle.h_cookie;
- lov_lockhp = lov_lockh->llh_handles;
- } else {
- lov_lockhp = lockh;
- }
-
lov = &exp->exp_obd->u.lov;
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++, lov_lockhp++) {
- ldlm_policy_data_t sub_ext;
- obd_off start, end;
-
- if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
- policy->l_extent.end, &start,
- &end))
- continue;
-
- sub_ext.l_extent.start = start;
- sub_ext.l_extent.end = end;
- sub_ext.l_extent.gid = policy->l_extent.gid;
+ rc = lov_prep_enqueue_set(exp, lsm, policy, mode, lockh, &set);
+ if (rc)
+ RETURN(rc);
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- continue;
- }
+ list_for_each (pos, &set->set_list) {
+ ldlm_policy_data_t sub_policy;
+ req = list_entry(pos, struct lov_request, rq_link);
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+ LASSERT(lov_lockhp);
- /* XXX LOV STACKING: submd should be from the subobj */
- submd->lsm_object_id = loi->loi_id;
- submd->lsm_object_gr = lsm->lsm_object_gr;
- submd->lsm_stripe_count = 0;
- submd->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
- submd->lsm_oinfo->loi_rss = loi->loi_rss;
- submd->lsm_oinfo->loi_kms = loi->loi_kms;
- submd->lsm_oinfo->loi_blocks = loi->loi_blocks;
- loi->loi_mtime = submd->lsm_oinfo->loi_mtime;
- /* XXX submd is not fully initialized here */
*flags = save_flags;
- rc = obd_enqueue(lov->tgts[loi->loi_ost_idx].ltd_exp, submd,
- type, &sub_ext, mode, flags, bl_cb, cp_cb,
- gl_cb, data, lvb_len, lvb_swabber, lov_lockhp);
-
- /* XXX FIXME: This unpleasantness doesn't belong here at *all*.
- * It belongs in the OSC, except that the OSC doesn't have
- * access to the real LOI -- it gets a copy, that we created
- * above, and that copy can be arbitrarily out of date.
- *
- * The LOV API is due for a serious rewriting anyways, and this
- * can be addressed then. */
- if (rc == ELDLM_OK) {
- struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
- __u64 tmp = submd->lsm_oinfo->loi_rss;
-
- LASSERT(lock != NULL);
- loi->loi_rss = tmp;
- loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
- /* Extend KMS up to the end of this lock and no further
- * A lock on [x,y] means a KMS of up to y + 1 bytes! */
- if (tmp > lock->l_policy_data.l_extent.end)
- tmp = lock->l_policy_data.l_extent.end + 1;
- if (tmp >= loi->loi_kms) {
- CDEBUG(D_INODE, "lock acquired, setting rss="
- LPU64", kms="LPU64"\n", loi->loi_rss,
- tmp);
- loi->loi_kms = tmp;
- loi->loi_kms_valid = 1;
- } else {
- CDEBUG(D_INODE, "lock acquired, setting rss="
- LPU64"; leaving kms="LPU64", end="LPU64
- "\n", loi->loi_rss, loi->loi_kms,
- lock->l_policy_data.l_extent.end);
- }
- ldlm_lock_allow_match(lock);
- LDLM_LOCK_PUT(lock);
- } else if (rc == ELDLM_LOCK_ABORTED &&
- save_flags & LDLM_FL_HAS_INTENT) {
- memset(lov_lockhp, 0, sizeof(*lov_lockhp));
- loi->loi_rss = submd->lsm_oinfo->loi_rss;
- loi->loi_blocks = submd->lsm_oinfo->loi_blocks;
- CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
- " kms="LPU64"\n", loi->loi_rss, loi->loi_kms);
- } else {
- memset(lov_lockhp, 0, sizeof(*lov_lockhp));
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: enqueue objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
- lsm->lsm_object_id, loi->loi_id,
- loi->loi_ost_idx, rc);
- GOTO(out_locks, rc);
- }
- }
- }
- if (lsm->lsm_stripe_count > 1)
- lov_llh_put(lov_lockh);
- RETURN(ELDLM_OK);
-
- out_locks:
- while (loi--, lov_lockhp--, i-- > 0) {
- struct lov_stripe_md submd;
- int err;
-
- if (lov_lockhp->cookie == 0)
- continue;
-
- /* XXX LOV STACKING: submd should be from the subobj */
- submd.lsm_object_id = loi->loi_id;
- submd.lsm_object_gr = lsm->lsm_object_gr;
- submd.lsm_stripe_count = 0;
- err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
- mode, lov_lockhp);
- if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: cancelling objid "LPX64" on OST "
- "idx %d after enqueue error: rc = %d\n",
- loi->loi_id, loi->loi_ost_idx, err);
- }
+ sub_policy.l_extent.start = req->rq_extent.start;
+ sub_policy.l_extent.end = req->rq_extent.end;
+
+ rc = obd_enqueue(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+ type, &sub_policy, mode, flags, bl_cb,
+ cp_cb, gl_cb, data, lvb_len, lvb_swabber,
+ lov_lockhp);
+ rc = lov_update_enqueue_set(set, req, rc, save_flags);
+ if (rc != ELDLM_OK)
+ break;
}
- if (lsm->lsm_stripe_count > 1) {
- lov_llh_destroy(lov_lockh);
- lov_llh_put(lov_lockh);
- }
- return rc;
+ lov_fini_enqueue_set(set, mode);
+ RETURN(rc);
}
static int lov_match(struct obd_export *exp, struct lov_stripe_md *lsm,
__u32 type, ldlm_policy_data_t *policy, __u32 mode,
int *flags, void *data, struct lustre_handle *lockh)
{
- struct lov_lock_handles *lov_lockh = NULL;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
struct lustre_handle *lov_lockhp;
- struct lov_obd *lov;
- struct lov_oinfo *loi;
- struct lov_stripe_md submd;
- ldlm_error_t rc = 0;
- int i;
+ int lov_flags, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
if (!exp || !exp->exp_obd)
RETURN(-ENODEV);
- if (lsm->lsm_stripe_count > 1) {
- lov_lockh = lov_llh_new(lsm);
- if (lov_lockh == NULL)
- RETURN(-ENOMEM);
-
- lockh->cookie = lov_lockh->llh_handle.h_cookie;
- lov_lockhp = lov_lockh->llh_handles;
- } else {
- lov_lockhp = lockh;
- }
-
lov = &exp->exp_obd->u.lov;
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++, lov_lockhp++) {
- ldlm_policy_data_t sub_ext;
- obd_off start, end;
- int lov_flags;
-
- if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
- policy->l_extent.end, &start, &end))
- continue;
-
- sub_ext.l_extent.start = start;
- sub_ext.l_extent.end = end;
+ rc = lov_prep_match_set(exp, lsm, policy, mode, lockh, &set);
+ if (rc)
+ RETURN(rc);
- if (obd_uuid_empty(&lov->tgts[loi->loi_ost_idx].uuid)) {
- CDEBUG(D_HA, "lov idx %d deleted\n", loi->loi_ost_idx);
- continue;
- }
- if (lov->tgts[loi->loi_ost_idx].active == 0) {
- CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
- rc = -EIO;
- break;
- }
+ list_for_each (pos, &set->set_list) {
+ ldlm_policy_data_t sub_policy;
+ req = list_entry(pos, struct lov_request, rq_link);
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+ LASSERT(lov_lockhp);
- /* XXX LOV STACKING: submd should be from the subobj */
- submd.lsm_object_id = loi->loi_id;
- submd.lsm_object_gr = lsm->lsm_object_gr;
- submd.lsm_stripe_count = 0;
+ sub_policy.l_extent.start = req->rq_extent.start;
+ sub_policy.l_extent.end = req->rq_extent.end;
lov_flags = *flags;
- /* XXX submd is not fully initialized here */
- rc = obd_match(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
- type, &sub_ext, mode, &lov_flags, data,
+
+ rc = obd_match(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+ type, &sub_policy, mode, &lov_flags, data,
lov_lockhp);
+ rc = lov_update_match_set(set, req, rc);
if (rc != 1)
break;
}
- if (rc == 1) {
- if (lsm->lsm_stripe_count > 1) {
- if (*flags & LDLM_FL_TEST_LOCK)
- lov_llh_destroy(lov_lockh);
- lov_llh_put(lov_lockh);
- }
- RETURN(1);
- }
-
- while (loi--, lov_lockhp--, i-- > 0) {
- struct lov_stripe_md submd;
- int err;
-
- if (lov_lockhp->cookie == 0)
- continue;
-
- /* XXX LOV STACKING: submd should be from the subobj */
- submd.lsm_object_id = loi->loi_id;
- submd.lsm_object_gr = lsm->lsm_object_gr;
- submd.lsm_stripe_count = 0;
- err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
- mode, lov_lockhp);
- if (err && lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: cancelling objid "LPX64" on OST "
- "idx %d after match failure: rc = %d\n",
- loi->loi_id, loi->loi_ost_idx, err);
- }
- }
-
- if (lsm->lsm_stripe_count > 1) {
- lov_llh_destroy(lov_lockh);
- lov_llh_put(lov_lockh);
- }
+ lov_fini_match_set(set, mode, *flags);
RETURN(rc);
}
static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm,
__u32 mode, struct lustre_handle *lockh)
{
- struct lov_lock_handles *lov_lockh = NULL;
+ struct lov_request_set *set;
+ struct lov_request *req;
+ struct list_head *pos;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
struct lustre_handle *lov_lockhp;
- struct lov_obd *lov;
- struct lov_oinfo *loi;
- int rc = 0, i;
+ int err = 0, rc = 0;
ENTRY;
if (lsm_bad_magic(lsm))
LASSERT(lsm->lsm_object_gr > 0);
LASSERT(lockh);
- if (lsm->lsm_stripe_count > 1) {
- lov_lockh = lov_handle2llh(lockh);
- if (!lov_lockh) {
- CERROR("LOV: invalid lov lock handle %p\n", lockh);
- RETURN(-EINVAL);
- }
-
- lov_lockhp = lov_lockh->llh_handles;
- } else {
- lov_lockhp = lockh;
- }
-
lov = &exp->exp_obd->u.lov;
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++, lov_lockhp++) {
- struct lov_stripe_md submd;
- int err;
+ rc = lov_prep_cancel_set(exp, lsm, mode, lockh, &set);
+ if (rc)
+ RETURN(rc);
- if (lov_lockhp->cookie == 0) {
- CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n",
- loi->loi_ost_idx, loi->loi_id);
- continue;
- }
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
- /* XXX LOV STACKING: submd should be from the subobj */
- submd.lsm_object_id = loi->loi_id;
- submd.lsm_object_gr = lsm->lsm_object_gr;
- submd.lsm_stripe_count = 0;
- err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd,
- mode, lov_lockhp);
- if (err) {
- if (lov->tgts[loi->loi_ost_idx].active) {
- CERROR("error: cancel objid "LPX64" subobj "
- LPX64" on OST idx %d: rc = %d\n",
- lsm->lsm_object_id,
- loi->loi_id, loi->loi_ost_idx, err);
- if (!rc)
- rc = err;
- }
+ rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+ mode, lov_lockhp);
+ rc = lov_update_common_set(set, req, rc);
+ if (rc) {
+ CERROR("error: cancel objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ lsm->lsm_object_id,
+ req->rq_md->lsm_object_id, req->rq_idx, rc);
+ err = rc;
}
+
}
-
- if (lsm->lsm_stripe_count > 1)
- lov_llh_destroy(lov_lockh);
- if (lov_lockh != NULL)
- lov_llh_put(lov_lockh);
- RETURN(rc);
+ lov_fini_cancel_set(set);
+ RETURN(err);
}
static int lov_cancel_unused(struct obd_export *exp,
- struct lov_stripe_md *lsm, int flags, void *opaque)
+ struct lov_stripe_md *lsm,
+ int flags, void *opaque)
{
struct lov_obd *lov;
struct lov_oinfo *loi;
len, karg, uarg);
if (err) {
if (lov->tgts[i].active) {
- CERROR("error: iocontrol OSC %s on OST"
- "idx %d: cmd %x err = %d\n",
+ CERROR("error: iocontrol OSC %s on OST "
+ "idx %d cmd %x: err = %d\n",
lov->tgts[i].uuid.uuid, i,
cmd, err);
if (!rc)
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
if (!lov->tgts[i].active)
continue;
- rc = obd_get_info(lov->tgts[i].ltd_exp, keylen, key,
- &size, &(ids[i]));
+ rc = obd_get_info(lov->tgts[i].ltd_exp,
+ keylen, key, &size, &(ids[i]));
if (rc != 0)
RETURN(rc);
}
{
struct obd_device *obddev = class_exp2obd(exp);
struct lov_obd *lov = &obddev->u.lov;
- int i, rc = 0;
+ int i, rc = 0, err;
ENTRY;
#define KEY_IS(str) \
if (vallen != lov->desc.ld_tgt_count)
RETURN(-EINVAL);
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- int er;
-
- /* OST was deleted */
- if (obd_uuid_empty(&lov->tgts[i].uuid))
- continue;
-
/* initialize all OSCs, even inactive ones */
- er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
- sizeof(obd_id), ((obd_id*)val) + i);
+ err = obd_set_info(lov->tgts[i].ltd_exp,
+ keylen, key, sizeof(obd_id),
+ ((obd_id*)val) + i);
if (!rc)
- rc = er;
+ rc = err;
+ }
+ RETURN(rc);
+ }
+ if (KEY_IS("async")) {
+ struct lov_desc *desc = &lov->desc;
+ struct lov_tgt_desc *tgts = lov->tgts;
+
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ lov->async = *((int*) val);
+
+ for (i = 0; i < desc->ld_tgt_count; i++, tgts++) {
+ struct obd_uuid *tgt_uuid = &tgts->uuid;
+ struct obd_device *tgt_obd;
+
+ tgt_obd = class_find_client_obd(tgt_uuid,
+ LUSTRE_OSC_NAME,
+ &obddev->obd_uuid);
+ if (!tgt_obd) {
+ CERROR("Target %s not attached\n",
+ tgt_uuid->uuid);
+ if (!rc)
+ rc = -EINVAL;
+ continue;
+ }
+
+ err = obd_set_info(tgt_obd->obd_self_export,
+ keylen, key, vallen, val);
+ if (err) {
+ CERROR("Failed to set async on target %s\n",
+ tgt_obd->obd_name);
+ if (!rc)
+ rc = err;
+ }
}
RETURN(rc);
}
+
if (KEY_IS("growth_count")) {
if (vallen != sizeof(int))
RETURN(-EINVAL);
}
for (i = 0; i < lov->desc.ld_tgt_count; i++) {
- int er;
-
- /* OST was deleted */
- if (obd_uuid_empty(&lov->tgts[i].uuid))
+ if (val && !obd_uuid_equals(val, &lov->tgts[i].uuid))
continue;
if (!val && !lov->tgts[i].active)
continue;
- er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key, vallen,
- val);
+ err = obd_set_info(lov->tgts[i].ltd_exp,
+ keylen, key, vallen, val);
if (!rc)
- rc = er;
+ rc = err;
}
RETURN(rc);
#undef KEY_IS
-
-}
-
-/* Merge rss if @kms_only == 0
- *
- * Even when merging RSS, we will take the KMS value if it's larger.
- * This prevents getattr from stomping on dirty cached pages which
- * extend the file size. */
-__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms_only)
-{
- struct lov_oinfo *loi;
- __u64 size = 0;
- int i;
-
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
- obd_size lov_size, tmpsize;
-
- tmpsize = loi->loi_kms;
- if (kms_only == 0 && loi->loi_rss > tmpsize)
- tmpsize = loi->loi_rss;
-
- lov_size = lov_stripe_size(lsm, tmpsize, i);
- if (lov_size > size)
- size = lov_size;
- }
-
- return size;
-}
-EXPORT_SYMBOL(lov_merge_size);
-
-/* Merge blocks */
-__u64 lov_merge_blocks(struct lov_stripe_md *lsm)
-{
- struct lov_oinfo *loi;
- __u64 blocks = 0;
- int i;
-
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
- blocks += loi->loi_blocks;
- }
- return blocks;
}
-EXPORT_SYMBOL(lov_merge_blocks);
-
-__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time)
-{
- struct lov_oinfo *loi;
- int i;
-
- for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
- i++, loi++) {
- if (loi->loi_mtime > current_time)
- current_time = loi->loi_mtime;
- }
- return current_time;
-}
-EXPORT_SYMBOL(lov_merge_mtime);
#if 0
struct lov_multi_wait {
}
#endif
-void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
- obd_off size)
-{
- struct lov_oinfo *loi;
- int stripe = 0;
- __u64 kms;
- ENTRY;
-
- if (size > 0)
- stripe = lov_stripe_number(lsm, size - 1);
- kms = lov_size_to_stripe(lsm, size, stripe);
- loi = &(lsm->lsm_oinfo[stripe]);
-
- CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n",
- stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms);
- if (kms > loi->loi_kms)
- loi->loi_kms = kms;
- EXIT;
-}
-EXPORT_SYMBOL(lov_increase_kms);
-
struct obd_ops lov_obd_ops = {
.o_owner = THIS_MODULE,
.o_attach = lov_attach,
.o_queue_group_io = lov_queue_group_io,
.o_trigger_group_io = lov_trigger_group_io,
.o_teardown_async_page = lov_teardown_async_page,
+ .o_adjust_kms = lov_adjust_kms,
.o_punch = lov_punch,
.o_sync = lov_sync,
.o_enqueue = lov_enqueue,
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+
+#include "lov_internal.h"
+
+/* compute object size given "stripeno" and the ost size */
+obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size,
+ int stripeno)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ unsigned long swidth = ssize * lsm->lsm_stripe_count;
+ unsigned long stripe_size;
+ obd_size lov_size;
+ ENTRY;
+
+ if (ost_size == 0)
+ RETURN(0);
+
+ /* do_div(a, b) returns a % b, and a = a / b */
+ stripe_size = do_div(ost_size, ssize);
+ if (stripe_size)
+ lov_size = ost_size * swidth + stripeno * ssize + stripe_size;
+ else
+ lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize;
+
+ RETURN(lov_size);
+}
+
+/* we have an offset in file backed by an lov and want to find out where
+ * that offset lands in our given stripe of the file. for the easy
+ * case where the offset is within the stripe, we just have to scale the
+ * offset down to make it relative to the stripe instead of the lov.
+ *
+ * the harder case is what to do when the offset doesn't intersect the
+ * stripe. callers will want start offsets clamped ahead to the start
+ * of the nearest stripe in the file. end offsets similarly clamped to the
+ * nearest ending byte of a stripe in the file:
+ *
+ * all this function does is move offsets to the nearest region of the
+ * stripe, and it does its work "mod" the full length of all the stripes.
+ * consider a file with 3 stripes:
+ *
+ * S E
+ * ---------------------------------------------------------------------
+ * | 0 | 1 | 2 | 0 | 1 | 2 |
+ * ---------------------------------------------------------------------
+ *
+ * to find stripe 1's offsets for S and E, it divides by the full stripe
+ * width and does its math in the context of a single set of stripes:
+ *
+ * S E
+ * -----------------------------------
+ * | 0 | 1 | 2 |
+ * -----------------------------------
+ *
+ * it'll notice that E is outside stripe 1 and clamp it to the end of the
+ * stripe, then multiply it back out by lov_off to give the real offsets in
+ * the stripe:
+ *
+ * S E
+ * ---------------------------------------------------------------------
+ * | 1 | 1 | 1 | 1 | 1 | 1 |
+ * ---------------------------------------------------------------------
+ *
+ * it would have done similarly and pulled S forward to the start of a 1
+ * stripe if, say, S had landed in a 0 stripe.
+ *
+ * this rounding isn't always correct. consider an E lov offset that lands
+ * on a 0 stripe, the "mod stripe width" math will pull it forward to the
+ * start of a 1 stripe, when in fact it wanted to be rounded back to the end
+ * of a previous 1 stripe. this logic is handled by callers and this is why:
+ *
+ * this function returns < 0 when the offset was "before" the stripe and
+ * was moved forward to the start of the stripe in question; 0 when it
+ * falls in the stripe and no shifting was done; > 0 when the offset
+ * was outside the stripe and was pulled back to its final byte. */
+int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off,
+ int stripeno, obd_off *obd_off)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ unsigned long swidth = ssize * lsm->lsm_stripe_count;
+ unsigned long stripe_off, this_stripe;
+ int ret = 0;
+
+ if (lov_off == OBD_OBJECT_EOF) {
+ *obd_off = OBD_OBJECT_EOF;
+ return 0;
+ }
+
+ /* do_div(a, b) returns a % b, and a = a / b */
+ stripe_off = do_div(lov_off, swidth);
+
+ this_stripe = stripeno * ssize;
+ if (stripe_off < this_stripe) {
+ stripe_off = 0;
+ ret = -1;
+ } else {
+ stripe_off -= this_stripe;
+
+ if (stripe_off >= ssize) {
+ stripe_off = ssize;
+ ret = 1;
+ }
+ }
+
+ *obd_off = lov_off * ssize + stripe_off;
+ return ret;
+}
+
+/* Given a whole-file size and a stripe number, give the file size which
+ * corresponds to the individual object of that stripe.
+ *
+ * This behaves basically in the same was as lov_stripe_offset, except that
+ * file sizes falling before the beginning of a stripe are clamped to the end
+ * of the previous stripe, not the beginning of the next:
+ *
+ * S
+ * ---------------------------------------------------------------------
+ * | 0 | 1 | 2 | 0 | 1 | 2 |
+ * ---------------------------------------------------------------------
+ *
+ * if clamped to stripe 2 becomes:
+ *
+ * S
+ * ---------------------------------------------------------------------
+ * | 0 | 1 | 2 | 0 | 1 | 2 |
+ * ---------------------------------------------------------------------
+ */
+obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size,
+ int stripeno)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ unsigned long swidth = ssize * lsm->lsm_stripe_count;
+ unsigned long stripe_off, this_stripe;
+
+ if (file_size == OBD_OBJECT_EOF)
+ return OBD_OBJECT_EOF;
+
+ /* do_div(a, b) returns a % b, and a = a / b */
+ stripe_off = do_div(file_size, swidth);
+
+ this_stripe = stripeno * ssize;
+ if (stripe_off < this_stripe) {
+ /* Move to end of previous stripe, or zero */
+ if (file_size > 0) {
+ file_size--;
+ stripe_off = ssize;
+ } else {
+ stripe_off = 0;
+ }
+ } else {
+ stripe_off -= this_stripe;
+
+ if (stripe_off >= ssize) {
+ /* Clamp to end of this stripe */
+ stripe_off = ssize;
+ }
+ }
+
+ return (file_size * ssize + stripe_off);
+}
+
+/* given an extent in an lov and a stripe, calculate the extent of the stripe
+ * that is contained within the lov extent. this returns true if the given
+ * stripe does intersect with the lov extent. */
+int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno,
+ obd_off start, obd_off end,
+ obd_off *obd_start, obd_off *obd_end)
+{
+ int start_side, end_side;
+
+ start_side = lov_stripe_offset(lsm, start, stripeno, obd_start);
+ end_side = lov_stripe_offset(lsm, end, stripeno, obd_end);
+
+ CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n",
+ start, end, start_side, *obd_start, *obd_end, end_side);
+
+ /* this stripe doesn't intersect the file extent when neither
+ * start or the end intersected the stripe and obd_start and
+ * obd_end got rounded up to the save value. */
+ if (start_side != 0 && end_side != 0 && *obd_start == *obd_end)
+ return 0;
+
+ /* as mentioned in the lov_stripe_offset commentary, end
+ * might have been shifted in the wrong direction. This
+ * happens when an end offset is before the stripe when viewed
+ * through the "mod stripe size" math. we detect it being shifted
+ * in the wrong direction and touch it up.
+ * interestingly, this can't underflow since end must be > start
+ * if we passed through the previous check.
+ * (should we assert for that somewhere?) */
+ if (end_side != 0)
+ (*obd_end)--;
+
+ return 1;
+}
+
+/* compute which stripe number "lov_off" will be written into */
+int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off)
+{
+ unsigned long ssize = lsm->lsm_stripe_size;
+ unsigned long swidth = ssize * lsm->lsm_stripe_count;
+ unsigned long stripe_off;
+
+ stripe_off = do_div(lov_off, swidth);
+
+ return stripe_off / ssize;
+}
if (lsm) {
if (lsm->lsm_magic != LOV_MAGIC) {
- CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X\n",
+ CWARN("bad LOV MAGIC: 0x%08X != 0x%08X\n",
lsm->lsm_magic, LOV_MAGIC);
RETURN(-EINVAL);
}
return lsm_size;
}
EXPORT_SYMBOL(lov_alloc_memmd);
+
void lov_free_memmd(struct lov_stripe_md **lsmp)
{
OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count));
*lsmp = NULL;
}
EXPORT_SYMBOL(lov_free_memmd);
+
int lov_unpackmd_v0(struct lov_obd *lov, struct lov_stripe_md *lsm,
struct lov_mds_md_v0 *lmm)
{
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+
+#include "lov_internal.h"
+
+void qos_shrink_lsm(struct lov_request_set *set)
+{
+ struct lov_stripe_md *lsm = set->set_md;
+ struct lov_stripe_md *lsm_new;
+ /* XXX LOV STACKING call into osc for sizes */
+ unsigned oldsize, newsize;
+
+ if (set->set_oti && set->set_cookies && set->set_cookie_sent) {
+ struct llog_cookie *cookies;
+ oldsize = lsm->lsm_stripe_count * sizeof(*cookies);
+ newsize = set->set_count * sizeof(*cookies);
+
+ cookies = set->set_cookies;
+ oti_alloc_cookies(set->set_oti, set->set_count);
+ if (set->set_oti->oti_logcookies) {
+ memcpy(set->set_oti->oti_logcookies, cookies, newsize);
+ OBD_FREE(cookies, oldsize);
+ set->set_cookies = set->set_oti->oti_logcookies;
+ } else {
+ CWARN("'leaking' %d bytes\n", oldsize - newsize);
+ }
+ }
+
+ CWARN("using fewer stripes for object "LPX64": old %u new %u\n",
+ lsm->lsm_object_id, lsm->lsm_stripe_count, set->set_count);
+
+ oldsize = lov_stripe_md_size(lsm->lsm_stripe_count);
+ newsize = lov_stripe_md_size(set->set_count);
+ OBD_ALLOC(lsm_new, newsize);
+ if (lsm_new != NULL) {
+ memcpy(lsm_new, lsm, newsize);
+ lsm_new->lsm_stripe_count = set->set_count;
+ OBD_FREE(lsm, oldsize);
+ set->set_md = lsm_new;
+ } else {
+ CWARN("'leaking' %d bytes\n", oldsize - newsize);
+ }
+}
+
+#define LOV_CREATE_RESEED_INTERVAL 1000
+/* FIXME use real qos data to prepare the lov create request */
+int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
+{
+ static int ost_start_idx, ost_start_count;
+ unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+ struct lov_stripe_md *lsm = set->set_md;
+ struct obdo *src_oa = set->set_oa;
+ int i, rc = 0;
+ ENTRY;
+
+ LASSERT(src_oa->o_valid & OBD_MD_FLID);
+
+ lsm->lsm_object_id = src_oa->o_id;
+ lsm->lsm_object_gr = src_oa->o_gr;
+ if (!lsm->lsm_stripe_size)
+ lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
+ if (!lsm->lsm_pattern) {
+ lsm->lsm_pattern = lov->desc.ld_pattern ?
+ lov->desc.ld_pattern : LOV_PATTERN_RAID0;
+ }
+
+ if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
+ if (--ost_start_count <= 0) {
+ ost_start_idx = ll_insecure_random_int();
+ ost_start_count = LOV_CREATE_RESEED_INTERVAL;
+ } else if (lsm->lsm_stripe_count >=
+ lov->desc.ld_active_tgt_count) {
+ /* If we allocate from all of the stripes, make the
+ * next file start on the next OST. */
+ ++ost_start_idx;
+ }
+ ost_idx = ost_start_idx % ost_count;
+ } else {
+ ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
+ }
+
+ CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
+ lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
+
+ for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+ struct lov_request *req;
+
+ ++ost_start_idx;
+ if (lov->tgts[ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_md);
+ OBD_ALLOC(req->rq_md, req->rq_buflen);
+ if (req->rq_md == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req->rq_idx = ost_idx;
+ req->rq_stripe = i;
+ /* create data objects with "parent" OA */
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+
+ /* XXX When we start creating objects on demand, we need to
+ * make sure that we always create the object on the
+ * stripe which holds the existing file size.
+ */
+ if (src_oa->o_valid & OBD_MD_FLSIZE) {
+ if (lov_stripe_offset(lsm, src_oa->o_size, i,
+ &req->rq_oa->o_size) < 0 &&
+ req->rq_oa->o_size)
+ req->rq_oa->o_size--;
+
+ CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+ i, req->rq_oa->o_size, src_oa->o_size);
+ }
+
+ lov_set_add_req(req, set);
+
+ /* If we have allocated enough objects, we are OK */
+ if (set->set_count == lsm->lsm_stripe_count)
+ GOTO(out, rc = 0);
+ }
+
+ if (set->set_count == 0)
+ GOTO(out, rc = -EIO);
+
+ /* If we were passed specific striping params, then a failure to
+ * meet those requirements is an error, since we can't reallocate
+ * that memory (it might be part of a larger array or something).
+ *
+ * We can only get here if lsm_stripe_count was originally > 1.
+ */
+ if (!newea) {
+ CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
+ lsm->lsm_object_id, set->set_count,
+ lsm->lsm_stripe_count, rc);
+ rc = rc ? rc : -EFBIG;
+ } else {
+ qos_shrink_lsm(set);
+ rc = 0;
+ }
+out:
+ RETURN(rc);
+}
+
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LOV
+
+#ifdef __KERNEL__
+#include <asm/div64.h>
+#else
+#include <liblustre.h>
+#endif
+
+#include <linux/obd_class.h>
+#include <linux/obd_lov.h>
+#include <linux/lustre_idl.h>
+
+#include "lov_internal.h"
+
+static void lov_init_set(struct lov_request_set *set)
+{
+ set->set_count = 0;
+ set->set_completes = 0;
+ set->set_success = 0;
+ INIT_LIST_HEAD(&set->set_list);
+ atomic_set(&set->set_refcount, 1);
+}
+
+static void lov_finish_set(struct lov_request_set *set)
+{
+ struct list_head *pos, *n;
+ ENTRY;
+
+ LASSERT(set);
+ list_for_each_safe(pos, n, &set->set_list) {
+ struct lov_request *req = list_entry(pos, struct lov_request,
+ rq_link);
+ list_del_init(&req->rq_link);
+
+ if (req->rq_oa)
+ obdo_free(req->rq_oa);
+ if (req->rq_md)
+ OBD_FREE(req->rq_md, req->rq_buflen);
+ OBD_FREE(req, sizeof(*req));
+ }
+
+ if (set->set_pga) {
+ int len = set->set_oabufs * sizeof(*set->set_pga);
+ OBD_FREE(set->set_pga, len);
+ }
+ if (set->set_lockh)
+ lov_llh_put(set->set_lockh);
+
+ OBD_FREE(set, sizeof(*set));
+ EXIT;
+}
+
+static void lov_update_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ req->rq_complete = 1;
+ req->rq_rc = rc;
+
+ set->set_completes++;
+ if (rc == 0)
+ set->set_success++;
+}
+
+int lov_update_common_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ ENTRY;
+
+ lov_update_set(set, req, rc);
+
+ /* grace error on inactive ost */
+ if (rc && !lov->tgts[req->rq_idx].active)
+ rc = 0;
+
+ /* FIXME in raid1 regime, should return 0 */
+ RETURN(rc);
+}
+
+void lov_set_add_req(struct lov_request *req, struct lov_request_set *set)
+{
+ list_add_tail(&req->rq_link, &set->set_list);
+ set->set_count++;
+}
+
+int lov_update_enqueue_set(struct lov_request_set *set,
+ struct lov_request *req, int rc, int flags)
+{
+ struct lustre_handle *lov_lockhp;
+ struct lov_oinfo *loi;
+ ENTRY;
+
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+ loi = &set->set_md->lsm_oinfo[req->rq_stripe];
+
+ /* XXX FIXME: This unpleasantness doesn't belong here at *all*.
+ * It belongs in the OSC, except that the OSC doesn't have
+ * access to the real LOI -- it gets a copy, that we created
+ * above, and that copy can be arbitrarily out of date.
+ *
+ * The LOV API is due for a serious rewriting anyways, and this
+ * can be addressed then. */
+ if (rc == ELDLM_OK) {
+ struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp);
+ __u64 tmp = req->rq_md->lsm_oinfo->loi_rss;
+
+ LASSERT(lock != NULL);
+ loi->loi_rss = tmp;
+ loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
+ loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks;
+ /* Extend KMS up to the end of this lock and no further
+ * A lock on [x,y] means a KMS of up to y + 1 bytes! */
+ if (tmp > lock->l_policy_data.l_extent.end)
+ tmp = lock->l_policy_data.l_extent.end + 1;
+ if (tmp >= loi->loi_kms) {
+ CDEBUG(D_INODE, "lock acquired, setting rss="
+ LPU64", kms="LPU64"\n", loi->loi_rss, tmp);
+ loi->loi_kms = tmp;
+ loi->loi_kms_valid = 1;
+ } else {
+ CDEBUG(D_INODE, "lock acquired, setting rss="
+ LPU64"; leaving kms="LPU64", end="LPU64
+ "\n", loi->loi_rss, loi->loi_kms,
+ lock->l_policy_data.l_extent.end);
+ }
+ ldlm_lock_allow_match(lock);
+ LDLM_LOCK_PUT(lock);
+ } else if (rc == ELDLM_LOCK_ABORTED && flags & LDLM_FL_HAS_INTENT) {
+ memset(lov_lockhp, 0, sizeof(*lov_lockhp));
+ loi->loi_rss = req->rq_md->lsm_oinfo->loi_rss;
+ loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
+ loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks;
+ CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving"
+ " kms="LPU64"\n", loi->loi_rss, loi->loi_kms);
+ rc = ELDLM_OK;
+ } else {
+ struct obd_export *exp = set->set_exp;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+
+ memset(lov_lockhp, 0, sizeof(*lov_lockhp));
+ if (lov->tgts[req->rq_idx].active) {
+ CERROR("error: enqueue objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ set->set_md->lsm_object_id, loi->loi_id,
+ loi->loi_ost_idx, rc);
+ } else {
+ rc = ELDLM_OK;
+ }
+ }
+ lov_update_set(set, req, rc);
+ RETURN(rc);
+}
+
+static int enqueue_done(struct lov_request_set *set, __u32 mode)
+{
+ struct list_head *pos;
+ struct lov_request *req;
+ struct lustre_handle *lov_lockhp = NULL;
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_completes);
+ /* enqueue/match success, just return */
+ if (set->set_completes == set->set_success)
+ RETURN(0);
+
+ /* cancel enqueued/matched locks */
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+
+ lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe;
+ LASSERT(lov_lockhp);
+ if (lov_lockhp->cookie == 0)
+ continue;
+
+ rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md,
+ mode, lov_lockhp);
+ if (rc && lov->tgts[req->rq_idx].active)
+ CERROR("cancelling obdjid "LPX64" on OST "
+ "idx %d error: rc = %d\n",
+ req->rq_md->lsm_object_id, req->rq_idx, rc);
+ }
+ lov_llh_put(set->set_lockh);
+ RETURN(rc);
+}
+
+int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes)
+ rc = enqueue_done(set, mode);
+ else
+ lov_llh_put(set->set_lockh);
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_policy_data_t *policy, __u32 mode,
+ struct lustre_handle *lockh,
+ struct lov_request_set **reqset)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ struct lov_request_set *set;
+ int i, rc = 0;
+ struct lov_oinfo *loi;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_lockh = lov_llh_new(lsm);
+ if (set->set_lockh == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+ obd_off start, end;
+
+ if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
+ policy->l_extent.end, &start, &end))
+ continue;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_md) +
+ sizeof(struct lov_oinfo);
+ OBD_ALLOC(req->rq_md, req->rq_buflen);
+ if (req->rq_md == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_extent.start = start;
+ req->rq_extent.end = end;
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING: submd should be from the subobj */
+ req->rq_md->lsm_object_id = loi->loi_id;
+ req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+ req->rq_md->lsm_stripe_count = 0;
+ req->rq_md->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid;
+ req->rq_md->lsm_oinfo->loi_rss = loi->loi_rss;
+ req->rq_md->lsm_oinfo->loi_kms = loi->loi_kms;
+ req->rq_md->lsm_oinfo->loi_blocks = loi->loi_blocks;
+ loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(0);
+out_set:
+ lov_fini_enqueue_set(set, mode);
+ RETURN(rc);
+}
+
+int lov_update_match_set(struct lov_request_set *set, struct lov_request *req,
+ int rc)
+{
+ int ret = rc;
+ ENTRY;
+
+ if (rc == 1)
+ ret = 0;
+ lov_update_set(set, req, ret);
+ RETURN(rc);
+}
+
+int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ if (set->set_count == set->set_success &&
+ flags & LDLM_FL_TEST_LOCK)
+ lov_llh_put(set->set_lockh);
+ rc = enqueue_done(set, mode);
+ } else {
+ lov_llh_put(set->set_lockh);
+ }
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_policy_data_t *policy, __u32 mode,
+ struct lustre_handle *lockh,
+ struct lov_request_set **reqset)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ struct lov_request_set *set;
+ int i, rc = 0;
+ struct lov_oinfo *loi;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_lockh = lov_llh_new(lsm);
+ if (set->set_lockh == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+ obd_off start, end;
+
+ if (!lov_stripe_intersects(lsm, i, policy->l_extent.start,
+ policy->l_extent.end, &start, &end))
+ continue;
+
+ /* FIXME raid1 should grace this error */
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ GOTO(out_set, rc = -EIO);
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_md);
+ OBD_ALLOC(req->rq_md, req->rq_buflen);
+ if (req->rq_md == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_extent.start = start;
+ req->rq_extent.end = end;
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING: submd should be from the subobj */
+ req->rq_md->lsm_object_id = loi->loi_id;
+ req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+ req->rq_md->lsm_stripe_count = 0;
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_match_set(set, mode, 0);
+ RETURN(rc);
+}
+
+int lov_fini_cancel_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+
+ if (set->set_lockh)
+ lov_llh_put(set->set_lockh);
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm,
+ __u32 mode, struct lustre_handle *lockh,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ int i, rc = 0;
+ struct lov_oinfo *loi;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_lockh = lov_handle2llh(lockh);
+ if (set->set_lockh == NULL) {
+ CERROR("LOV: invalid lov lock handle %p\n", lockh);
+ GOTO(out_set, rc = -EINVAL);
+ }
+ lockh->cookie = set->set_lockh->llh_handle.h_cookie;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+ struct lustre_handle *lov_lockhp;
+
+ lov_lockhp = set->set_lockh->llh_handles + i;
+ if (lov_lockhp->cookie == 0) {
+ CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n",
+ loi->loi_ost_idx, loi->loi_id);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_buflen = sizeof(*req->rq_md);
+ OBD_ALLOC(req->rq_md, req->rq_buflen);
+ if (req->rq_md == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING: submd should be from the subobj */
+ req->rq_md->lsm_object_id = loi->loi_id;
+ req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+ req->rq_md->lsm_stripe_count = 0;
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_cancel_set(set);
+ RETURN(rc);
+}
+
+static int create_done(struct obd_export *exp, struct lov_request_set *set,
+ struct lov_stripe_md **ea)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ struct obd_trans_info *oti = set->set_oti;
+ struct obdo *src_oa = set->set_oa;
+ struct list_head *pos;
+ struct lov_request *req;
+ struct obdo *ret_oa = NULL;
+ int attrset = 0, rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_completes);
+
+ if (!set->set_success)
+ GOTO(cleanup, rc = -EIO);
+
+ if (*ea == NULL && set->set_count != set->set_success) {
+ set->set_count = set->set_success;
+ qos_shrink_lsm(set);
+ }
+
+ ret_oa = obdo_alloc();
+ if (ret_oa == NULL)
+ GOTO(cleanup, rc = -ENOMEM);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+ lov_merge_attrs(ret_oa, req->rq_oa, req->rq_oa->o_valid,
+ set->set_md, req->rq_stripe, &attrset);
+ }
+ if (src_oa->o_valid & OBD_MD_FLSIZE &&
+ ret_oa->o_size != src_oa->o_size) {
+ CERROR("original size "LPU64" isn't new object size "LPU64"\n",
+ src_oa->o_size, ret_oa->o_size);
+ LBUG();
+ }
+ ret_oa->o_id = src_oa->o_id;
+ ret_oa->o_gr = src_oa->o_gr;
+ ret_oa->o_valid |= OBD_MD_FLGROUP;
+ memcpy(src_oa, ret_oa, sizeof(*src_oa));
+ obdo_free(ret_oa);
+
+ *ea = set->set_md;
+ GOTO(done, rc = 0);
+
+ EXIT;
+cleanup:
+ list_for_each (pos, &set->set_list) {
+ struct obd_export *sub_exp;
+ int err = 0;
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+
+ sub_exp = lov->tgts[req->rq_idx].ltd_exp,
+ err = obd_destroy(sub_exp, req->rq_oa, NULL, oti);
+ if (err)
+ CERROR("Failed to uncreate objid "LPX64" subobj "
+ LPX64" on OST idx %d: rc = %d\n",
+ set->set_oa->o_id, req->rq_oa->o_id,
+ req->rq_idx, rc);
+ }
+ if (*ea == NULL)
+ obd_free_memmd(exp, &set->set_md);
+done:
+ if (oti && set->set_cookies) {
+ oti->oti_logcookies = set->set_cookies;
+ if (!set->set_cookie_sent) {
+ oti_free_cookies(oti);
+ src_oa->o_valid &= ~OBD_MD_FLCOOKIE;
+ } else {
+ src_oa->o_valid |= OBD_MD_FLCOOKIE;
+ }
+ }
+ return rc;
+}
+
+int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ rc = create_done(set->set_exp, set, ea);
+ /* FIXME update qos data here */
+ }
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_update_create_set(struct lov_request_set *set,
+ struct lov_request *req, int rc)
+{
+ struct obd_trans_info *oti = set->set_oti;
+ struct lov_stripe_md *lsm = set->set_md;
+ struct lov_oinfo *loi;
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ ENTRY;
+
+ req->rq_stripe = set->set_success;
+ loi = &lsm->lsm_oinfo[req->rq_stripe];
+
+ if (rc && lov->tgts[req->rq_idx].active) {
+ CERROR("error creating objid "LPX64" sub-object"
+ " on OST idx %d/%d: rc = %d\n",
+ set->set_oa->o_id, req->rq_idx,
+ lsm->lsm_stripe_count, rc);
+ if (rc > 0) {
+ CERROR("obd_create returned invalid err %d\n", rc);
+ rc = -EIO;
+ }
+ }
+ lov_update_set(set, req, rc);
+ if (rc)
+ RETURN(rc);
+
+ if (oti && oti->oti_objid)
+ oti->oti_objid[req->rq_idx] = req->rq_oa->o_id;
+
+ loi->loi_id = req->rq_oa->o_id;
+ loi->loi_gr = req->rq_oa->o_gr;
+ loi->loi_ost_idx = req->rq_idx;
+ CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPX64" at idx %d\n",
+ lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx);
+ loi_init(loi);
+
+ if (set->set_cookies)
+ ++oti->oti_logcookies;
+ if (req->rq_oa->o_valid & OBD_MD_FLCOOKIE)
+ set->set_cookie_sent++;
+
+ RETURN(0);
+}
+
+int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea,
+ struct obdo *src_oa, struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ struct lov_request_set *set;
+ int rc = 0, newea = 0;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = *ea;
+ set->set_oa = src_oa;
+ set->set_oti = oti;
+
+ if (set->set_md == NULL) {
+ int stripes, stripe_cnt;
+ stripe_cnt = lov_get_stripecnt(lov, 0);
+
+ /* If the MDS file was truncated up to some size, stripe over
+ * enough OSTs to allow the file to be created at that size. */
+ if (src_oa->o_valid & OBD_MD_FLSIZE) {
+ stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
+ do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
+
+ if (stripes > lov->desc.ld_active_tgt_count)
+ GOTO(out_set, rc = -EFBIG);
+ if (stripes < stripe_cnt)
+ stripes = stripe_cnt;
+ } else {
+ stripes = stripe_cnt;
+ }
+
+ rc = lov_alloc_memmd(&set->set_md, stripes,
+ lov->desc.ld_pattern ?
+ lov->desc.ld_pattern : LOV_PATTERN_RAID0);
+ if (rc < 0)
+ goto out_set;
+ newea = 1;
+ }
+
+ rc = qos_prep_create(lov, set, newea);
+ if (rc)
+ goto out_lsm;
+
+ if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+ oti_alloc_cookies(oti, set->set_count);
+ if (!oti->oti_logcookies)
+ goto out_lsm;
+ set->set_cookies = oti->oti_logcookies;
+ }
+ *reqset = set;
+ RETURN(rc);
+
+out_lsm:
+ if (*ea == NULL)
+ obd_free_memmd(exp, &set->set_md);
+out_set:
+ lov_fini_create_set(set, ea);
+ RETURN(rc);
+}
+
+static int common_attr_done(struct lov_request_set *set)
+{
+ struct list_head *pos;
+ struct lov_request *req;
+ struct obdo *tmp_oa;
+ int rc = 0, attrset = 0;
+ ENTRY;
+
+ if (set->set_oa == NULL)
+ RETURN(0);
+
+ if (!set->set_success)
+ RETURN(-EIO);
+
+ tmp_oa = obdo_alloc();
+ if (tmp_oa == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+ if (req->rq_oa->o_valid == 0) /* inactive stripe */
+ continue;
+ lov_merge_attrs(tmp_oa, req->rq_oa, req->rq_oa->o_valid,
+ set->set_md, req->rq_stripe, &attrset);
+ }
+ if (!attrset) {
+ CERROR("No stripes had valid attrs\n");
+ rc = -EIO;
+ }
+ tmp_oa->o_id = set->set_oa->o_id;
+ memcpy(set->set_oa, tmp_oa, sizeof(*set->set_oa));
+out:
+ if (tmp_oa)
+ obdo_free(tmp_oa);
+ RETURN(rc);
+
+}
+
+static int brw_done(struct lov_request_set *set)
+{
+ struct lov_stripe_md *lsm = set->set_md;
+ struct lov_oinfo *loi = NULL;
+ struct list_head *pos;
+ struct lov_request *req;
+ ENTRY;
+
+ list_for_each (pos, &set->set_list) {
+ req = list_entry(pos, struct lov_request, rq_link);
+
+ if (!req->rq_complete || req->rq_rc)
+ continue;
+
+ loi = &lsm->lsm_oinfo[req->rq_stripe];
+
+ if (req->rq_oa->o_valid & OBD_MD_FLBLOCKS)
+ loi->loi_blocks = req->rq_oa->o_blocks;
+ }
+
+ RETURN(0);
+}
+
+int lov_fini_brw_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ rc = brw_done(set);
+ /* FIXME update qos data here */
+ }
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, obd_count oa_bufs,
+ struct brw_page *pga, struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct {
+ obd_count index;
+ obd_count count;
+ obd_count off;
+ } *info = NULL;
+ struct lov_request_set *set;
+ struct lov_oinfo *loi = NULL;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i, shift;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_oa = src_oa;
+ set->set_oti = oti;
+ set->set_oabufs = oa_bufs;
+ OBD_ALLOC(set->set_pga, oa_bufs * sizeof(*set->set_pga));
+ if (!set->set_pga)
+ GOTO(out, rc = -ENOMEM);
+
+ OBD_ALLOC(info, sizeof(*info) * lsm->lsm_stripe_count);
+ if (!info)
+ GOTO(out, rc = -ENOMEM);
+
+ /* calculate the page count for each stripe */
+ for (i = 0; i < oa_bufs; i++) {
+ int stripe = lov_stripe_number(lsm, pga[i].disk_offset);
+ info[stripe].count++;
+ }
+
+ /* alloc and initialize lov request */
+ loi = lsm->lsm_oinfo;
+ shift = 0;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+
+ if (info[i].count == 0)
+ continue;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ GOTO(out, rc = -EIO);
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ if (src_oa)
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+ req->rq_oa->o_id = loi->loi_id;
+ req->rq_buflen = sizeof(*req->rq_md);
+ OBD_ALLOC(req->rq_md, req->rq_buflen);
+ if (req->rq_md == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ req->rq_idx = loi->loi_ost_idx;
+ req->rq_stripe = i;
+
+ /* XXX LOV STACKING */
+ req->rq_md->lsm_object_id = loi->loi_id;
+ req->rq_md->lsm_object_gr = lsm->lsm_object_gr;
+ req->rq_oabufs = info[i].count;
+ req->rq_pgaidx = shift;
+ shift += req->rq_oabufs;
+
+ /* remember the index for sort brw_page array */
+ info[i].index = req->rq_pgaidx;
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out, rc = -EIO);
+
+ /* rotate & sort the brw_page array */
+ for (i = 0; i < oa_bufs; i++) {
+ int stripe = lov_stripe_number(lsm, pga[i].disk_offset);
+
+ shift = info[stripe].index + info[stripe].off;
+ LASSERT(shift < oa_bufs);
+ set->set_pga[shift] = pga[i];
+ lov_stripe_offset(lsm, pga[i].disk_offset, stripe,
+ &set->set_pga[shift].disk_offset);
+ info[stripe].off++;
+ }
+out:
+ if (info)
+ OBD_FREE(info, sizeof(*info) * lsm->lsm_stripe_count);
+
+ if (rc == 0)
+ *reqset = set;
+ else
+ lov_fini_brw_set(set);
+
+ RETURN(rc);
+}
+
+static int getattr_done(struct lov_request_set *set)
+{
+ return common_attr_done(set);
+}
+
+int lov_fini_getattr_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes)
+ rc = getattr_done(set);
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_oinfo *loi = NULL;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_oa = src_oa;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+ req->rq_oa->o_id = loi->loi_id;
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_getattr_set(set);
+ RETURN(rc);
+}
+
+int lov_fini_destroy_set(struct lov_request_set *set)
+{
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ /* FIXME update qos data here */
+ }
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(0);
+}
+
+int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm,
+ struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_oinfo *loi = NULL;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, cookie_set = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_oa = src_oa;
+ set->set_oti = oti;
+ if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE)
+ set->set_cookies = oti->oti_logcookies;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+ req->rq_oa->o_id = loi->loi_id;
+
+ /* Setup the first request's cookie position */
+ if (!cookie_set && set->set_cookies) {
+ oti->oti_logcookies = set->set_cookies + i;
+ cookie_set = 1;
+ }
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_destroy_set(set);
+ RETURN(rc);
+}
+
+static int setattr_done(struct lov_request_set *set)
+{
+ return common_attr_done(set);
+}
+
+int lov_fini_setattr_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ rc = setattr_done(set);
+ /* FIXME update qos data here */
+ }
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+ RETURN(rc);
+}
+
+int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_oinfo *loi = NULL;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_oa = src_oa;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+ req->rq_oa->o_id = loi->loi_id;
+ LASSERT(!(req->rq_oa->o_valid & OBD_MD_FLGROUP) || req->rq_oa->o_gr>0);
+
+ if (src_oa->o_valid & OBD_MD_FLSIZE) {
+ if (lov_stripe_offset(lsm, src_oa->o_size, i,
+ &req->rq_oa->o_size) < 0 &&
+ req->rq_oa->o_size)
+ req->rq_oa->o_size--;
+ CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
+ i, req->rq_oa->o_size, src_oa->o_size);
+ }
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_setattr_set(set);
+ RETURN(rc);
+}
+
+int lov_update_punch_set(struct lov_request_set *set, struct lov_request *req,
+ int rc)
+{
+ struct lov_obd *lov = &set->set_exp->exp_obd->u.lov;
+ ENTRY;
+
+ lov_update_set(set, req, rc);
+ if (rc && !lov->tgts[req->rq_idx].active)
+ rc = 0;
+ /* FIXME in raid1 regime, should return 0 */
+ RETURN(rc);
+}
+
+int lov_fini_punch_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ if (!set->set_success)
+ rc = -EIO;
+ /* FIXME update qos data here */
+ }
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, obd_off start,
+ obd_off end, struct obd_trans_info *oti,
+ struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_oinfo *loi = NULL;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_oa = src_oa;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+ obd_off rs, re;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re))
+ continue;
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+ req->rq_oa->o_id = loi->loi_id;
+ req->rq_oa->o_gr = loi->loi_gr;
+ req->rq_oa->o_valid |= OBD_MD_FLGROUP;
+
+ req->rq_extent.start = rs;
+ req->rq_extent.end = re;
+
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_punch_set(set);
+ RETURN(rc);
+}
+
+int lov_fini_sync_set(struct lov_request_set *set)
+{
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(set->set_exp);
+ if (set == NULL)
+ RETURN(0);
+ if (set->set_completes) {
+ if (!set->set_success)
+ rc = -EIO;
+ /* FIXME update qos data here */
+ }
+
+ if (atomic_dec_and_test(&set->set_refcount))
+ lov_finish_set(set);
+
+ RETURN(rc);
+}
+
+int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa,
+ struct lov_stripe_md *lsm, obd_off start,
+ obd_off end, struct lov_request_set **reqset)
+{
+ struct lov_request_set *set;
+ struct lov_oinfo *loi = NULL;
+ struct lov_obd *lov = &exp->exp_obd->u.lov;
+ int rc = 0, i;
+ ENTRY;
+
+ OBD_ALLOC(set, sizeof(*set));
+ if (set == NULL)
+ RETURN(-ENOMEM);
+ lov_init_set(set);
+
+ set->set_exp = exp;
+ set->set_md = lsm;
+ set->set_oa = src_oa;
+
+ loi = lsm->lsm_oinfo;
+ for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) {
+ struct lov_request *req;
+ obd_off rs, re;
+
+ if (lov->tgts[loi->loi_ost_idx].active == 0) {
+ CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx);
+ continue;
+ }
+
+ if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re))
+ continue;
+
+ OBD_ALLOC(req, sizeof(*req));
+ if (req == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ req->rq_stripe = i;
+ req->rq_idx = loi->loi_ost_idx;
+
+ req->rq_oa = obdo_alloc();
+ if (req->rq_oa == NULL)
+ GOTO(out_set, rc = -ENOMEM);
+ memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa));
+ req->rq_oa->o_id = loi->loi_id;
+ req->rq_extent.start = rs;
+ req->rq_extent.end = re;
+ lov_set_add_req(req, set);
+ }
+ if (!set->set_count)
+ GOTO(out_set, rc = -EIO);
+ *reqset = set;
+ RETURN(rc);
+out_set:
+ lov_fini_sync_set(set);
+ RETURN(rc);
+}
@SNAPFS_TRUE@MODULES += fsfilt_snap_@BACKINGFS@ fsfilt_snap_smfs
lvfs-objs := fsfilt.o lvfs_common.o llog_lvfs.o lvfs_linux.o
-lvfs-objs += llog.o llog_cat.o
-
+lvfs-objs += llog.o llog_cat.o
+
ifeq ($(PATCHLEVEL),6)
fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o
-e "s/dx_hash_info/ext3_dx_hash_info/g" \
-e "s/dir_private_info/ext3_dir_private_info/g" \
-e "s/DX_HASH/EXT3_DX_HASH/g" \
+ -e "s/reserve_window/ext3_reserve_window/g" \
+ -e "s/rsv_window_add/ext3_rsv_window_add/g" \
-e "s/EXT3/LDISKFS/g" -e "s/ext3/ldiskfs/g"
fsfilt_ldiskfs.c: fsfilt_ext3.c
#endif
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7))
+# define lock_24kernel() lock_kernel()
+# define unlock_24kernel() unlock_kernel()
+#else
+# define lock_24kernel() do {} while (0)
+# define unlock_24kernel() do {} while (0)
+#endif
+
static kmem_cache_t *fcb_cache;
static atomic_t fcb_cache_count = ATOMIC_INIT(0);
#endif
#define XATTR_LUSTRE_MDS_LOV_EA "lov"
+#define XATTR_LUSTRE_MDS_MEA_EA "mea"
#define XATTR_LUSTRE_MDS_MID_EA "mid"
#define XATTR_LUSTRE_MDS_SID_EA "sid"
}
journal_start:
- lock_kernel();
+ LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks);
+ lock_24kernel();
handle = journal_start(EXT3_JOURNAL(inode), nblocks);
- unlock_kernel();
+ unlock_24kernel();
if (!IS_ERR(handle))
LASSERT(current->journal_info == handle);
needed = journal->j_max_transaction_buffers;
}
- lock_kernel();
+ LASSERTF(needed > 0, "can't start %d credit transaction\n", needed);
+ lock_24kernel();
handle = journal_start(journal, needed);
- unlock_kernel();
+ unlock_24kernel();
if (IS_ERR(handle)) {
CERROR("can't get handle for %d credits: rc = %ld\n", needed,
PTR_ERR(handle));
if (force_sync)
handle->h_sync = 1; /* recovery likes this */
- lock_kernel();
+ lock_24kernel();
rc = journal_stop(handle);
- unlock_kernel();
+ unlock_24kernel();
return rc;
}
}
static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
- void *lmm, int lmm_size)
-{
- int rc;
-
- LASSERT(down_trylock(&inode->i_sem) != 0);
-
- /* keep this when we get rid of OLD_EA (too noisy during conversion) */
- if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */)
- CWARN("setting EA on %lu/%u again... interesting\n",
- inode->i_ino, inode->i_generation);
-
- rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_LOV_EA,
- lmm, lmm_size);
- return rc;
-}
-
-/* Must be called with i_sem held */
-static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size)
-{
- int rc;
-
- rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_LOV_EA,
- lmm, lmm_size);
- return rc;
-}
-
-static int fsfilt_ext3_set_mid(struct inode *inode, void *handle,
- void *mid, int mid_size)
-{
- int rc;
-
- rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_MID_EA,
- mid, mid_size);
- return rc;
-}
-
-/* Must be called with i_sem held */
-static int fsfilt_ext3_get_mid(struct inode *inode, void *mid, int mid_size)
-{
- int rc;
-
- rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_MID_EA,
- mid, mid_size);
- return rc;
-}
-
-static int fsfilt_ext3_set_sid(struct inode *inode, void *handle,
- void *sid, int sid_size)
+ void *lmm, int lmm_size,
+ enum ea_type type)
{
int rc;
+
+ switch(type) {
+ case EA_LOV:
+ rc = fsfilt_ext3_set_xattr(inode, handle,
+ XATTR_LUSTRE_MDS_LOV_EA,
+ lmm, lmm_size);
+ break;
+ case EA_MEA:
+ rc = fsfilt_ext3_set_xattr(inode, handle,
+ XATTR_LUSTRE_MDS_MEA_EA,
+ lmm, lmm_size);
+ break;
+ case EA_SID:
+ rc = fsfilt_ext3_set_xattr(inode, handle,
+ XATTR_LUSTRE_MDS_SID_EA,
+ lmm, lmm_size);
+ break;
+ case EA_MID:
+ rc = fsfilt_ext3_set_xattr(inode, handle,
+ XATTR_LUSTRE_MDS_MID_EA,
+ lmm, lmm_size);
+ break;
+ default:
+ return -EINVAL;
+ }
- rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_SID_EA,
- sid, sid_size);
return rc;
}
-/* Must be called with i_sem held */
-static int fsfilt_ext3_get_sid(struct inode *inode, void *sid, int sid_size)
+static int fsfilt_ext3_get_md(struct inode *inode, void *lmm,
+ int lmm_size, enum ea_type type)
{
int rc;
-
- rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_SID_EA,
- sid, sid_size);
+
+ switch (type) {
+ case EA_LOV:
+ rc = fsfilt_ext3_get_xattr(inode,
+ XATTR_LUSTRE_MDS_LOV_EA,
+ lmm, lmm_size);
+ break;
+ case EA_MEA:
+ rc = fsfilt_ext3_get_xattr(inode,
+ XATTR_LUSTRE_MDS_MEA_EA,
+ lmm, lmm_size);
+ break;
+ case EA_SID:
+ rc = fsfilt_ext3_get_xattr(inode,
+ XATTR_LUSTRE_MDS_SID_EA,
+ lmm, lmm_size);
+ break;
+ case EA_MID:
+ rc = fsfilt_ext3_get_xattr(inode,
+ XATTR_LUSTRE_MDS_MID_EA,
+ lmm, lmm_size);
+ break;
+ default:
+ return -EINVAL;
+ }
+
return rc;
}
fcb->cb_data = cb_data;
CDEBUG(D_EXT2, "set callback for last_num: "LPD64"\n", last_num);
-
lock_kernel();
journal_callback_set(handle, fsfilt_ext3_cb_func,
(struct journal_callback *)fcb);
unlock_kernel();
-
return 0;
}
#define ext3_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem);
#define ext3_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem);
#endif
-
+
#include <linux/lustre_version.h>
#if EXT3_EXT_MAGIC == 0xf301
#define ee_start e_start
loff_t new_i_size;
handle_t *handle;
int i, aflags = 0;
-
+
i = EXT_DEPTH(tree);
EXT_ASSERT(i == path->p_depth);
EXT_ASSERT(path[i].p_hdr);
-
+
if (exist) {
err = EXT_CONTINUE;
goto map;
}
-
+
if (bp->create == 0) {
i = 0;
if (newex->ee_block < bp->start)
tgen = EXT_GENERATION(tree);
count = ext3_ext_calc_credits_for_insert(tree, path);
ext3_up_truncate_sem(inode);
-
lock_kernel();
handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1);
unlock_kernel();
ext3_down_truncate_sem(inode);
return PTR_ERR(handle);
}
-
+
if (tgen != EXT_GENERATION(tree)) {
/* the tree has changed. so path can be invalid at moment */
lock_kernel();
ext3_down_truncate_sem(inode);
return EXT_REPEAT;
}
-
ext3_down_truncate_sem(inode);
count = newex->ee_len;
goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags);
}
}
out:
- lock_kernel();
+ lock_24kernel();
journal_stop(handle);
- unlock_kernel();
+ unlock_24kernel();
map:
if (err >= 0) {
/* map blocks */
bp.start = block;
bp.init_num = bp.num = num;
bp.create = create;
-
+
ext3_down_truncate_sem(inode);
err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb);
ext3_ext_invalidate_cache(&tree);
ext3_up_truncate_sem(inode);
-
return err;
}
block_count = (block_count + blocksize - 1) >> inode->i_blkbits;
journal = EXT3_SB(inode->i_sb)->s_journal;
- lock_kernel();
+ lock_24kernel();
handle = journal_start(journal,
block_count * EXT3_DATA_TRANS_BLOCKS + 2);
- unlock_kernel();
+ unlock_24kernel();
if (IS_ERR(handle)) {
CERROR("can't start transaction\n");
return PTR_ERR(handle);
unlock_kernel();
}
- lock_kernel();
+ lock_24kernel();
journal_stop(handle);
- unlock_kernel();
+ unlock_24kernel();
if (err == 0)
*offs = offset;
.fs_iocontrol = fsfilt_ext3_iocontrol,
.fs_set_md = fsfilt_ext3_set_md,
.fs_get_md = fsfilt_ext3_get_md,
- .fs_set_mid = fsfilt_ext3_set_mid,
- .fs_get_mid = fsfilt_ext3_get_mid,
- .fs_set_sid = fsfilt_ext3_set_sid,
- .fs_get_sid = fsfilt_ext3_get_sid,
.fs_readpage = fsfilt_ext3_readpage,
.fs_add_journal_cb = fsfilt_ext3_add_journal_cb,
.fs_statfs = fsfilt_ext3_statfs,
RETURN(rc);
}
-typedef int (*set_ea_func_t) (struct inode *, void *, void *, int);
-typedef int (*get_ea_func_t) (struct inode *, void *, int);
+typedef int (*set_ea_func_t) (struct inode *, void *, void *,
+ int, enum ea_type);
+
+typedef int (*get_ea_func_t) (struct inode *, void *, int,
+ enum ea_type);
static int fsfilt_smfs_set_ea(struct inode *inode, void *handle,
- void *ea, int ea_size,
+ void *ea, int ea_size, enum ea_type type,
set_ea_func_t set_ea_func)
{
struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
pre_smfs_inode(inode, cache_inode);
down(&cache_inode->i_sem);
- rc = set_ea_func(cache_inode, handle, ea, ea_size);
+ rc = set_ea_func(cache_inode, handle, ea,
+ ea_size, type);
up(&cache_inode->i_sem);
post_smfs_inode(inode, cache_inode);
}
static int fsfilt_smfs_get_ea(struct inode *inode, void *ea,
- int ea_size, get_ea_func_t get_ea_func)
+ int ea_size, enum ea_type type,
+ get_ea_func_t get_ea_func)
{
struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
struct inode *cache_inode = NULL;
pre_smfs_inode(inode, cache_inode);
down(&cache_inode->i_sem);
- rc = get_ea_func(cache_inode, ea, ea_size);
+ rc = get_ea_func(cache_inode, ea, ea_size, type);
up(&cache_inode->i_sem);
post_smfs_inode(inode, cache_inode);
}
static int fsfilt_smfs_set_md(struct inode *inode, void *handle,
- void *lmm, int lmm_size)
+ void *lmm, int lmm_size, enum ea_type type)
{
- int rc = 0;
struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
-
- rc = fsfilt_smfs_set_ea(inode, handle, lmm, lmm_size,
- cache_fsfilt->fs_set_md);
+ int rc = fsfilt_smfs_set_ea(inode, handle, lmm, lmm_size,
+ type, cache_fsfilt->fs_set_md);
if (rc)
return rc;
- smfs_rec_md(inode, lmm, lmm_size);
+ smfs_rec_md(inode, lmm, lmm_size, type);
return rc;
}
-static int fsfilt_smfs_get_md(struct inode *inode, void *lmm, int
- lmm_size)
+static int fsfilt_smfs_get_md(struct inode *inode, void *lmm,
+ int lmm_size, enum ea_type type)
{
struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
- return fsfilt_smfs_get_ea(inode, lmm, lmm_size,
+ return fsfilt_smfs_get_ea(inode, lmm, lmm_size, type,
cache_fsfilt->fs_get_md);
}
-static int fsfilt_smfs_set_mid(struct inode *inode, void *handle,
- void *mid, int mid_size)
-{
- struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
- return fsfilt_smfs_set_ea(inode, handle, mid, mid_size,
- cache_fsfilt->fs_set_mid);
-}
-
-static int fsfilt_smfs_get_mid(struct inode *inode, void *mid,
- int mid_size)
-{
- struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
- return fsfilt_smfs_get_ea(inode, mid, mid_size,
- cache_fsfilt->fs_get_mid);
-}
-
-static int fsfilt_smfs_set_sid(struct inode *inode, void *handle,
- void *sid, int sid_size)
-{
- struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
- return fsfilt_smfs_set_ea(inode, handle, sid, sid_size,
- cache_fsfilt->fs_set_sid);
-}
-
-static int fsfilt_smfs_get_sid(struct inode *inode, void *sid,
- int sid_size)
-{
- struct fsfilt_operations *cache_fsfilt = I2FOPS(inode);
- return fsfilt_smfs_get_ea(inode, sid, sid_size,
- cache_fsfilt->fs_get_sid);
-}
-
static int fsfilt_smfs_send_bio(int rw, struct inode *inode, void *bio)
{
struct inode *cache_inode;
struct fsfilt_operations *cache_fsfilt;
-
ENTRY;
cache_fsfilt = I2FOPS(inode);
.fs_iocontrol = fsfilt_smfs_iocontrol,
.fs_set_md = fsfilt_smfs_set_md,
.fs_get_md = fsfilt_smfs_get_md,
- .fs_set_mid = fsfilt_smfs_set_mid,
- .fs_get_mid = fsfilt_smfs_get_mid,
- .fs_set_sid = fsfilt_smfs_set_sid,
- .fs_get_sid = fsfilt_smfs_get_sid,
.fs_readpage = fsfilt_smfs_readpage,
.fs_getpage = fsfilt_smfs_getpage,
.fs_add_journal_cb = fsfilt_smfs_add_journal_cb,
#define DEBUG_SUBSYSTEM S_FILTER
+#include <linux/obd.h>
#include <linux/lvfs.h>
struct dentry *lvfs_id2dentry(struct lvfs_run_ctxt *ctxt, __u64 id,
return ctxt->cb_ops.l_id2dentry(id, gen, gr, data);
}
EXPORT_SYMBOL(lvfs_id2dentry);
+
+static struct list_head lvfs_context_list;
+
+void lvfs_mount_list_init(void)
+{
+ INIT_LIST_HEAD(&lvfs_context_list);
+}
+
+void lvfs_mount_list_cleanup(void)
+{
+ struct list_head *tmp;
+
+ if (list_empty(&lvfs_context_list))
+ return;
+
+ list_for_each(tmp, &lvfs_context_list) {
+ struct lvfs_obd_ctxt *data =
+ list_entry(tmp, struct lvfs_obd_ctxt, loc_list);
+ CERROR("device %s still mounted with refcount %d\n",
+ data->loc_name, atomic_read(&data->loc_refcount));
+ }
+}
+
+static inline
+struct lvfs_obd_ctxt *get_lvfs_mount(struct lvfs_obd_ctxt *lvfs_ctxt)
+{
+ atomic_inc(&lvfs_ctxt->loc_refcount);
+ return lvfs_ctxt;
+}
+
+static struct lvfs_obd_ctxt *add_lvfs_mount(struct vfsmount *mnt, char *name)
+{
+ struct lvfs_obd_ctxt *lvfs_ctxt;
+ ENTRY;
+
+ OBD_ALLOC(lvfs_ctxt, sizeof(*lvfs_ctxt));
+ if (!lvfs_ctxt) {
+ CERROR("No Memory\n");
+ RETURN(NULL);
+ }
+
+ if (name) {
+ int length = strlen(name) + 1;
+
+ OBD_ALLOC(lvfs_ctxt->loc_name, length);
+ if (!lvfs_ctxt->loc_name) {
+ CERROR("No Memory\n");
+ OBD_FREE(lvfs_ctxt, sizeof(*lvfs_ctxt));
+ RETURN(NULL);
+ }
+ memcpy(lvfs_ctxt->loc_name, name, length);
+ }
+ lvfs_ctxt->loc_mnt = mnt;
+ list_add(&lvfs_ctxt->loc_list, &lvfs_context_list);
+ atomic_set(&lvfs_ctxt->loc_refcount, 1);
+ RETURN(lvfs_ctxt);
+}
+
+void lvfs_umount_fs(struct lvfs_obd_ctxt *lvfs_ctxt)
+{
+ if (lvfs_ctxt && atomic_dec_and_test(&lvfs_ctxt->loc_refcount)) {
+ struct vfsmount *mnt = lvfs_ctxt->loc_mnt;
+
+ list_del(&lvfs_ctxt->loc_list);
+ if (atomic_read(&mnt->mnt_count) > 2)
+ CERROR("mount busy, mnt %p mnt_count %d != 2\n", mnt,
+ atomic_read(&mnt->mnt_count));
+
+ mntput(mnt);
+
+ if (lvfs_ctxt->loc_name)
+ OBD_FREE(lvfs_ctxt->loc_name,
+ strlen(lvfs_ctxt->loc_name) + 1);
+ OBD_FREE(lvfs_ctxt, sizeof(*lvfs_ctxt));
+ dev_clear_rdonly(2);
+ }
+}
+EXPORT_SYMBOL(lvfs_umount_fs);
+
+int lvfs_mount_fs(char *name, char *fstype, char *options, int flags,
+ struct lvfs_obd_ctxt **lvfs_ctxt)
+{
+ struct vfsmount *mnt = NULL;
+ struct list_head *tmp;
+ int rc = 0;
+ ENTRY;
+
+ list_for_each(tmp, &lvfs_context_list) {
+ struct lvfs_obd_ctxt *data =
+ list_entry(tmp, struct lvfs_obd_ctxt, loc_list);
+ if (strcmp(data->loc_name, name) == 0) {
+ *lvfs_ctxt = get_lvfs_mount(data);
+ RETURN(0);
+ }
+ }
+ mnt = do_kern_mount(fstype, flags, name, options);
+
+ if (IS_ERR(mnt)) {
+ rc = PTR_ERR(mnt);
+ CERROR("do_kern_mount failed: rc = %d\n", rc);
+ GOTO(out, rc);
+ }
+ CDEBUG(D_SUPER, "%s: mnt = %p\n", name, mnt);
+ /*add this lvfs context to the lvfs_mount_list*/
+ *lvfs_ctxt = add_lvfs_mount(mnt, name);
+ if (!*lvfs_ctxt) {
+ mntput(mnt);
+ CERROR("add_lvfs_mount failed\n");
+ GOTO(out, rc = -EINVAL);
+ }
+out:
+ RETURN(rc);
+}
+EXPORT_SYMBOL(lvfs_mount_fs);
int fsfilt_ldiskfs_init(void);
void fsfilt_ldiskfs_exit(void);
-
int fsfilt_reiser_init(void);
void fsfilt_reiser_exit(void);
+void lvfs_mount_list_init(void);
+void lvfs_mount_list_cleanup(void);
+
int lookup_by_path(char *path, int flags, struct nameidata *nd);
atomic_t obd_memory;
int obd_memmax;
-
/* Debugging check only needed during development */
#ifdef OBD_CTXT_DEBUG
# define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC)
/*
CDEBUG(D_INFO,
- "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+ "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
save, current, current->fs, current->fs->pwd,
atomic_read(¤t->fs->pwd->d_count),
atomic_read(¤t->fs->pwd->d_inode->i_count),
/*
CDEBUG(D_INFO,
- "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+ "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
new_ctx, current, current->fs, current->fs->pwd,
atomic_read(¤t->fs->pwd->d_count),
atomic_read(¤t->fs->pwd->d_inode->i_count),
/*
CDEBUG(D_INFO,
- " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+ " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
new_ctx, current, current->fs, current->fs->pwd,
atomic_read(¤t->fs->pwd->d_count),
atomic_read(¤t->fs->pwd->d_inode->i_count),
/*
CDEBUG(D_INFO,
- "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n",
+ "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n",
saved, current, current->fs, current->fs->pwd,
atomic_read(¤t->fs->pwd->d_count),
atomic_read(¤t->fs->pwd->d_inode->i_count),
ENTRY;
ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n");
- CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name);
+ CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name);
dchild = ll_lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild))
ENTRY;
ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n");
- CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name);
+ CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name);
dchild = ll_lookup_one_len(name, dir, strlen(name));
if (IS_ERR(dchild))
GOTO(out_up, dchild);
if (dchild->d_inode) {
int old_mode = dchild->d_inode->i_mode;
- if (!S_ISDIR(old_mode))
+ if (!S_ISDIR(old_mode)) {
+ CERROR("found %s (%lu/%u) is mode %o\n", name,
+ dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation, old_mode);
GOTO(out_err, err = -ENOTDIR);
+ }
/* Fixup directory permissions if necessary */
if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
EXPORT_SYMBOL(obd_memory);
EXPORT_SYMBOL(obd_memmax);
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED;
+static struct hlist_head *obd_memtable;
+static unsigned long obd_memtable_size;
+
+static int lvfs_memdbg_init(int size)
+{
+ struct hlist_head *head;
+ int i;
+
+ LASSERT(size > sizeof(sizeof(struct hlist_head)));
+ obd_memtable_size = size / sizeof(struct hlist_head);
+
+ CWARN("Allocating %lu malloc entries...\n",
+ (unsigned long)obd_memtable_size);
+
+ obd_memtable = kmalloc(size, GFP_KERNEL);
+ if (!obd_memtable)
+ return -ENOMEM;
+
+ i = obd_memtable_size;
+ head = obd_memtable;
+ do {
+ INIT_HLIST_HEAD(head);
+ head++;
+ i--;
+ } while(i);
+
+ return 0;
+}
+
+static int lvfs_memdbg_cleanup(void)
+{
+ struct hlist_node *node = NULL, *tmp = NULL;
+ struct hlist_head *head;
+ struct mtrack *mt;
+ int i;
+
+ spin_lock(&obd_memlist_lock);
+ for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
+ hlist_for_each_safe(node, tmp, head) {
+ mt = hlist_entry(node, struct mtrack, m_hash);
+ hlist_del_init(&mt->m_hash);
+ kfree(mt);
+ }
+ }
+ spin_unlock(&obd_memlist_lock);
+ kfree(obd_memtable);
+ return 0;
+}
+
+static inline unsigned long const hashfn(void *ptr)
+{
+ return (unsigned long)ptr &
+ (obd_memtable_size - 1);
+}
+
+static void __lvfs_memdbg_insert(struct mtrack *mt)
+{
+ struct hlist_head *head = obd_memtable +
+ hashfn(mt->m_ptr);
+ hlist_add_head(&mt->m_hash, head);
+}
+
+void lvfs_memdbg_insert(struct mtrack *mt)
+{
+ spin_lock(&obd_memlist_lock);
+ __lvfs_memdbg_insert(mt);
+ spin_unlock(&obd_memlist_lock);
+}
+EXPORT_SYMBOL(lvfs_memdbg_insert);
+
+static void __lvfs_memdbg_remove(struct mtrack *mt)
+{
+ hlist_del_init(&mt->m_hash);
+}
+
+void lvfs_memdbg_remove(struct mtrack *mt)
+{
+ spin_lock(&obd_memlist_lock);
+ __lvfs_memdbg_remove(mt);
+ spin_unlock(&obd_memlist_lock);
+}
+EXPORT_SYMBOL(lvfs_memdbg_remove);
+
+static struct mtrack *__lvfs_memdbg_find(void *ptr)
+{
+ struct hlist_node *node = NULL;
+ struct mtrack *mt = NULL;
+ struct hlist_head *head;
+
+ head = obd_memtable + hashfn(ptr);
+
+ hlist_for_each(node, head) {
+ mt = hlist_entry(node, struct mtrack, m_hash);
+ if ((unsigned long)mt->m_ptr == (unsigned long)ptr)
+ break;
+ mt = NULL;
+ }
+ return mt;
+}
+
+struct mtrack *lvfs_memdbg_find(void *ptr)
+{
+ struct mtrack *mt;
+
+ spin_lock(&obd_memlist_lock);
+ mt = __lvfs_memdbg_find(ptr);
+ spin_unlock(&obd_memlist_lock);
+
+ return mt;
+}
+EXPORT_SYMBOL(lvfs_memdbg_find);
+
+int lvfs_memdbg_check_insert(struct mtrack *mt)
+{
+ spin_lock(&obd_memlist_lock);
+ if (!__lvfs_memdbg_find(mt->m_ptr)) {
+ __lvfs_memdbg_insert(mt);
+ spin_unlock(&obd_memlist_lock);
+ return 1;
+ }
+ spin_unlock(&obd_memlist_lock);
+ return 0;
+}
+EXPORT_SYMBOL(lvfs_memdbg_check_insert);
+
+struct mtrack *
+lvfs_memdbg_check_remove(void *ptr)
+{
+ struct mtrack *mt;
+
+ spin_lock(&obd_memlist_lock);
+ mt = __lvfs_memdbg_find(ptr);
+ if (mt) {
+ __lvfs_memdbg_remove(mt);
+ spin_unlock(&obd_memlist_lock);
+ return mt;
+ }
+ spin_unlock(&obd_memlist_lock);
+ return NULL;
+}
+EXPORT_SYMBOL(lvfs_memdbg_check_remove);
+
+static void lvfs_memdbg_show(void)
+{
+ struct hlist_node *node = NULL;
+ struct hlist_head *head;
+ struct mtrack *mt;
+ int leaked, i;
+
+ leaked = atomic_read(&obd_memory);
+
+ if (leaked > 0) {
+ CWARN("Memory leaks detected (max %d, leaked %d):\n",
+ obd_memmax, leaked);
+
+ spin_lock(&obd_memlist_lock);
+ for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) {
+ hlist_for_each(node, head) {
+ mt = hlist_entry(node, struct mtrack, m_hash);
+ CWARN(" ptr: 0x%p, size: %d, src at \"%s\"\n",
+ mt->m_ptr, mt->m_size, mt->m_loc);
+ }
+ }
+ spin_unlock(&obd_memlist_lock);
+ }
+}
+#endif
+
static int __init lvfs_linux_init(void)
{
+ ENTRY;
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+ lvfs_memdbg_init(PAGE_SIZE);
+#endif
+ lvfs_mount_list_init();
RETURN(0);
}
static void __exit lvfs_linux_exit(void)
{
- int leaked;
ENTRY;
- leaked = atomic_read(&obd_memory);
- CDEBUG(leaked ? D_ERROR : D_INFO,
- "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
+ lvfs_mount_list_cleanup();
+#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__)
+ lvfs_memdbg_show();
+ lvfs_memdbg_cleanup();
+#endif
+ EXIT;
return;
}
#ifndef MDC_INTERNAL_H
#define MDC_INTERNAL_H
+int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm);
+
+int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_size);
+
void mdc_getattr_pack(struct lustre_msg *msg, int offset,
__u64 valid, int flags, struct mdc_op_data *data);
void mdc_open_pack(struct lustre_msg *msg, int offset,
body->valid |= OBD_MD_FLFLAGS;
}
}
+
+/*
+ * these methods needed for saying higher levels that MDC does not pack/unpack
+ * any EAs. This is needed to have real abstraction and do not try to recognize
+ * what OBD type is to avoid calling these methods on it, as they may not be
+ * implemented.
+ *
+ * Sometimes pack/unpack calls happen to MDC too. This is for instance default
+ * striping info for directories and our goal here is to skip them with no
+ * errors or any complains.
+ */
+int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
+ struct lov_stripe_md *lsm)
+{
+ ENTRY;
+ RETURN(0);
+}
+
+int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp,
+ struct lov_mds_md *lmm, int lmm_size)
+{
+ ENTRY;
+ RETURN(0);
+}
+
return 0;
}
+static inline void
+mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
+{
+ /* Don't hold error requests for replay. */
+ if (req->rq_replay) {
+ unsigned long irqflags;
+ spin_lock_irqsave(&req->rq_lock, irqflags);
+ req->rq_replay = 0;
+ spin_unlock_irqrestore(&req->rq_lock, irqflags);
+ }
+ if (rc && req->rq_transno != 0) {
+ DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc);
+ LBUG();
+ }
+}
+
/* We always reserve enough space in the reply packet for a stripe MD, because
* we don't know in advance the file type. */
int mdc_enqueue(struct obd_export *exp,
ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } };
struct ldlm_intent *lit;
struct ldlm_request *lockreq;
- struct ldlm_reply *dlm_rep;
int reqsize[6] = {[MDS_REQ_SECDESC_OFF] = 0,
[MDS_REQ_INTENT_LOCKREQ_OFF] = sizeof(*lockreq),
[MDS_REQ_INTENT_IT_OFF] = sizeof(*lit)};
obddev->u.cli.cl_max_mds_easize};
int req_buffers = 3, reply_buffers = 0;
int rc, flags = LDLM_FL_HAS_INTENT;
+ struct ldlm_reply *dlm_rep = NULL;
void *eadata;
unsigned long irqflags;
ENTRY;
reqsize[req_buffers++] = sizeof(struct mds_rec_create);
reqsize[req_buffers++] = data->namelen + 1;
reqsize[req_buffers++] = obddev->u.cli.cl_max_mds_easize;
+
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION,
LDLM_ENQUEUE, req_buffers, reqsize, NULL);
if (!req)
/* pack the intended request */
mdc_open_pack(req->rq_reqmsg, MDS_REQ_INTENT_REC_OFF, data,
- it->it_create_mode, 0, it->it_flags,
- lmm, lmmsize);
+ it->it_create_mode, 0, it->it_flags, lmm, lmmsize);
/* get ready for the reply */
repsize[3] = 4;
repsize[4] = xattr_acl_size(LL_ACL_MAX_ENTRIES);
/* This can go when we're sure that this can never happen */
LASSERT(rc != -ENOENT);
+ /* We need dlm_rep to be assigned this early, to check lock mode of
+ returned lock from request to avoid possible race with lock
+ conversion */
+ if (rc == ELDLM_LOCK_ABORTED || !rc) {
+ dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
+ LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */
+ }
if (rc == ELDLM_LOCK_ABORTED) {
lock_mode = 0;
memset(lockh, 0, sizeof(*lockh));
rc = 0;
} else if (rc != 0) {
CERROR("ldlm_cli_enqueue: %d\n", rc);
- LASSERT (rc < 0);
+ LASSERTF(rc < 0, "rc = %d\n", rc);
+ mdc_clear_replay_flag(req, rc);
ptlrpc_req_finished(req);
RETURN(rc);
} else { /* rc = 0 */
/* If the server gave us back a different lock mode, we should
* fix up our variables. */
- if (lock->l_req_mode != lock_mode) {
- ldlm_lock_addref(lockh, lock->l_req_mode);
+ if (dlm_rep->lock_desc.l_req_mode != lock_mode) {
+ ldlm_lock_addref(lockh, dlm_rep->lock_desc.l_req_mode);
ldlm_lock_decref(lockh, lock_mode);
- lock_mode = lock->l_req_mode;
+ lock_mode = dlm_rep->lock_desc.l_req_mode;
}
ldlm_lock_allow_match(lock);
LDLM_LOCK_PUT(lock);
}
- dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
- LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */
LASSERT_REPSWABBED(req, 0); /* swabbed by ldlm_cli_enqueue() */
LUSTRE_IT(it)->it_disposition = (int) dlm_rep->lock_policy_res1;
LUSTRE_IT(it)->it_lock_mode = lock_mode;
LUSTRE_IT(it)->it_data = req;
- if (LUSTRE_IT(it)->it_status < 0 && req->rq_replay) {
- LASSERT(req->rq_transno == 0);
- /* Don't hold error requests for replay. */
- spin_lock(&req->rq_lock);
- req->rq_replay = 0;
- spin_unlock(&req->rq_lock);
- }
+ if (LUSTRE_IT(it)->it_status < 0 && req->rq_replay)
+ mdc_clear_replay_flag(req, LUSTRE_IT(it)->it_status);
DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d",
LUSTRE_IT(it)->it_disposition, LUSTRE_IT(it)->it_status);
id_group(cid)}};
struct lustre_handle lockh;
ldlm_policy_data_t policy;
- int mode = LCK_PR;
+ int mode;
/* For the GETATTR case, ll_revalidate_it issues two separate
queries - for LOOKUP and for UPDATE lock because it cannot
check them together - we might have those two bits to be
present in two separate granted locks */
policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ?
- MDS_INODELOCK_UPDATE: MDS_INODELOCK_LOOKUP;
+ MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP;
mode = LCK_PR;
rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
LDLM_FL_BLOCK_GRANTED, &res_id,
- LDLM_IBITS, &policy, LCK_PR, &lockh);
+ LDLM_IBITS, &policy, mode,
+ &lockh);
+
+ if (!rc) {
+ mode = LCK_CR;
+ rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED, &res_id,
+ LDLM_IBITS, &policy, mode,
+ &lockh);
+ }
if (!rc) {
mode = LCK_PW;
rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
LDLM_FL_BLOCK_GRANTED, &res_id,
- LDLM_IBITS, &policy, LCK_PW,
+ LDLM_IBITS, &policy, mode,
+ &lockh);
+ }
+ if (!rc) {
+ mode = LCK_CW;
+ rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
+ LDLM_FL_BLOCK_GRANTED, &res_id,
+ LDLM_IBITS, &policy, mode,
&lockh);
}
if (rc) {
* It's important that we do this first! Otherwise we might exit the
* function without doing so, and try to replay a failed create (bug
* 3440) */
- if (it->it_op & IT_OPEN) {
- if (!it_disposition(it, DISP_OPEN_OPEN) ||
- LUSTRE_IT(it)->it_status != 0) {
- unsigned long irqflags;
-
- spin_lock_irqsave(&request->rq_lock, irqflags);
- request->rq_replay = 0;
- spin_unlock_irqrestore(&request->rq_lock, irqflags);
- }
- }
+ if (it->it_op & IT_OPEN && request->rq_replay &&
+ (!it_disposition(it, DISP_OPEN_OPEN) || LUSTRE_IT(it)->it_status != 0))
+ mdc_clear_replay_flag(request, LUSTRE_IT(it)->it_status);
+
if (!it_disposition(it, DISP_IT_EXECD)) {
/* The server failed before it even started executing the
* intent, i.e. because it couldn't unpack the request. */
struct lustre_md *md)
{
void *buf;
+ int rc = 0;
int size, acl_off;
struct posix_acl *acl;
- int rc = 0;
+ struct lov_mds_md *lmm;
ENTRY;
- LASSERT(md);
+ LASSERT(md != NULL);
memset(md, 0, sizeof(*md));
- md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body));
- LASSERT (md->body != NULL);
- LASSERT_REPSWABBED (req, offset);
+ md->body = lustre_msg_buf(req->rq_repmsg, offset,
+ sizeof(*md->body));
+ if (!md->body)
+ RETURN(-ENOMEM);
+
+ LASSERT_REPSWABBED(req, offset);
if (!(md->body->valid & OBD_MD_FLEASIZE) &&
!(md->body->valid & OBD_MD_FLDIREA))
RETURN(0);
- /* ea is presented in reply, parse it */
if (S_ISREG(md->body->mode)) {
- int lmmsize;
- struct lov_mds_md *lmm;
-
if (md->body->eadatasize == 0) {
- CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n");
+ CERROR("invalid EA size (0) is detected\n");
RETURN(-EPROTO);
}
- lmmsize = md->body->eadatasize;
- lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize);
- LASSERT (lmm != NULL);
- LASSERT_REPSWABBED (req, offset + 1);
-
- rc = obd_unpackmd(exp_lov, &md->lsm, lmm, lmmsize);
- if (rc >= 0) {
- LASSERT (rc >= sizeof (*md->lsm));
+
+ lmm = lustre_msg_buf(req->rq_repmsg, offset + 1,
+ md->body->eadatasize);
+ if (!lmm)
+ RETURN(-EINVAL);
+
+ LASSERT(exp_lov != NULL);
+
+ rc = obd_unpackmd(exp_lov, &md->lsm, lmm,
+ md->body->eadatasize);
+ if (rc > 0) {
+ LASSERT(rc >= sizeof(*md->lsm));
rc = 0;
}
} else if (S_ISDIR(md->body->mode)) {
- struct mea *mea;
- int mdsize;
- LASSERT(exp_lmv != NULL);
-
/* dir can be non-splitted */
if (md->body->eadatasize == 0)
RETURN(0);
- mdsize = md->body->eadatasize;
- mea = lustre_msg_buf(req->rq_repmsg, offset + 1, mdsize);
- LASSERT(mea != NULL);
-
- /*
- * check mea for validness, as there is possible that old tests
- * will try to set lov EA to dir object and thus confuse this
- * stuff.
- */
- if (mea->mea_magic != MEA_MAGIC_LAST_CHAR &&
- mea->mea_magic != MEA_MAGIC_ALL_CHARS)
- GOTO(out_invalid_mea, rc = -EINVAL);
-
- if (mea->mea_count > 256 || mea->mea_master > 256 ||
- mea->mea_master > mea->mea_count)
- GOTO(out_invalid_mea, rc = -EINVAL);
-
- LASSERT(id_fid(&mea->mea_ids[0]));
-
- rc = obd_unpackmd(exp_lmv, (void *)&md->mea,
- (void *)mea, mdsize);
- if (rc >= 0) {
- LASSERT (rc >= sizeof (*md->mea));
- rc = 0;
- }
-
- RETURN(rc);
+ lmm = lustre_msg_buf(req->rq_repmsg, offset + 1,
+ md->body->eadatasize);
+ if (!lmm)
+ RETURN(-EINVAL);
- out_invalid_mea:
- CERROR("Detected invalid mea, which does not "
- "support neither old either new format.\n");
+ if (md->body->valid & OBD_MD_MEA) {
+ LASSERT(exp_lmv != NULL);
+
+ rc = obd_unpackmd(exp_lmv, (void *)&md->mea,
+ lmm, md->body->eadatasize);
+ if (rc > 0) {
+ LASSERT(rc >= sizeof(*md->mea));
+ rc = 0;
+ }
+ }
} else {
LASSERT(S_ISCHR(md->body->mode) ||
S_ISBLK(md->body->mode) ||
md->acl_access = acl;
}
-
RETURN(rc);
}
EXIT;
}
-int mdc_set_open_replay_data(struct obd_export *exp,
- struct obd_client_handle *och,
- struct ptlrpc_request *open_req)
+int mdc_set_open_replay_data(struct obd_export *exp,
+ struct obd_client_handle *och,
+ struct ptlrpc_request *open_req)
{
struct mdc_open_data *mod;
struct mds_rec_create *rec;
spin_unlock(&open_req->rq_lock);
}
-static int mdc_close_interpret(struct ptlrpc_request *req, void *data, int rc)
+static int mdc_close_interpret(struct ptlrpc_request *req,
+ void *data, int rc)
{
union ptlrpc_async_args *aa = data;
- struct mdc_rpc_lock *rpc_lock;
+ struct mdc_rpc_lock *close_lock;
struct obd_device *obd = aa->pointer_arg[1];
unsigned long flags;
spin_lock_irqsave(&req->rq_lock, flags);
- rpc_lock = aa->pointer_arg[0];
+ close_lock = aa->pointer_arg[0];
aa->pointer_arg[0] = NULL;
spin_unlock_irqrestore (&req->rq_lock, flags);
- if (rpc_lock == NULL) {
- CERROR("called with NULL rpc_lock\n");
+ if (close_lock == NULL) {
+ CERROR("called with NULL close_lock\n");
} else {
- mdc_put_rpc_lock(rpc_lock, NULL);
- LASSERTF(rpc_lock == obd->u.cli.cl_rpc_lock, "%p != %p\n",
- rpc_lock, obd->u.cli.cl_rpc_lock);
+ mdc_put_rpc_lock(close_lock, NULL);
+ LASSERTF(close_lock == obd->u.cli.cl_close_lock, "%p != %p\n",
+ close_lock, obd->u.cli.cl_close_lock);
}
wake_up(&req->rq_reply_waitq);
RETURN(rc);
return rc;
}
-static int go_back_to_sleep(void *unused)
-{
- return 0;
-}
-
int mdc_close(struct obd_export *exp, struct obdo *oa,
- struct obd_client_handle *och, struct ptlrpc_request **request)
+ struct obd_client_handle *och,
+ struct ptlrpc_request **request)
{
struct obd_device *obd = class_exp2obd(exp);
+ struct obd_import *imp = class_exp2cliimp(exp);
int reqsize[3] = {0, sizeof(struct mds_body),
obd->u.cli.cl_max_mds_cookiesize};
int rc, repsize[3] = {sizeof(struct mds_body),
struct l_wait_info lwi;
ENTRY;
+ if (imp->imp_connection == NULL) {
+ CERROR("request on not connected import %s\n",
+ imp->imp_obd->obd_name);
+ RETURN(-EIO);
+ }
+
//reqsize[0] = mdc_get_secdesc_size();
+ //mdc_pack_secdesc(req, reqsize[0]);
req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION,
MDS_CLOSE, 3, reqsize, NULL);
if (req == NULL)
GOTO(out, rc = -ENOMEM);
- //mdc_pack_secdesc(req, reqsize[0]);
-
/* Ensure that this close's handle is fixed up during replay. */
LASSERT(och != NULL);
mod = och->och_mod;
if (likely(mod != NULL)) {
mod->mod_close_req = req;
LASSERT(mod->mod_open_req->rq_type != LI_POISON);
- DEBUG_REQ(D_HA, mod->mod_open_req, "matched open req %p",
- mod->mod_open_req);
+ DEBUG_REQ(D_HA, mod->mod_open_req, "matched open");
} else {
- CDEBUG(D_HA, "couldn't find open req; expecting close error\n");
+ CDEBUG(D_HA, "couldn't find open req; "
+ "expecting close error\n");
}
mdc_close_pack(req, 1, oa, oa->o_valid, och);
/* We hand a ref to the rpcd here, so we need another one of our own. */
ptlrpc_request_addref(req);
- mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL);
+ mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL);
req->rq_interpret_reply = mdc_close_interpret;
- req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock;
+ req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_close_lock;
req->rq_async_args.pointer_arg[1] = obd;
ptlrpcd_add_req(req);
- lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), go_back_to_sleep,
- NULL, NULL);
+
+ lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), NULL, NULL, NULL);
rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req),
&lwi);
if (req->rq_repmsg == NULL) {
} else if (rc == 0) {
rc = req->rq_repmsg->status;
if (req->rq_repmsg->type == PTL_RPC_MSG_ERR) {
- DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err "
- "= %d", rc);
+ DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, "
+ "err = %d", rc);
if (rc > 0)
rc = -rc;
- } else if (mod == NULL) {
- CERROR("Unexpected: can't find mdc_open_data, but the "
- "close succeeded. Please tell CFS.\n");
- }
- if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
- lustre_swab_mds_body)) {
- CERROR("Error unpacking mds_body\n");
- rc = -EPROTO;
+ } else {
+ if (mod == NULL)
+ CERROR("Unexpected: can't find mdc_open_data, but "
+ "close succeeded. Please tell CFS.\n");
+ if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body),
+ lustre_swab_mds_body))
+ {
+ CERROR("Error unpacking mds_body\n");
+ rc = -EPROTO;
+ }
}
}
if (req->rq_async_args.pointer_arg[0] != NULL) {
- CERROR("returned without dropping rpc_lock: rc %d\n", rc);
+ CERROR("returned without dropping close lock: rc %d, "
+ "dropping it now\n", rc);
mdc_close_interpret(req, &req->rq_async_args, rc);
}
cli->cl_nllu = ((__u32 *) val)[0];
cli->cl_nllg = ((__u32 *) val)[1];
RETURN(0);
+ } else if (keylen == strlen("async") && memcmp(key, "async", keylen) == 0) {
+ struct client_obd *cl = &exp->exp_obd->u.cli;
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ cl->cl_async = *(int *)val;
+ CDEBUG(D_HA, "%s: set async = %d\n",
+ exp->exp_obd->obd_name, cl->cl_async);
+ RETURN(0);
}
RETURN(rc);
GOTO(err_rpc_lock, rc = -ENOMEM);
mdc_init_rpc_lock(cli->cl_setattr_lock);
+ OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
+ if (!cli->cl_close_lock)
+ GOTO(err_setattr_lock, rc = -ENOMEM);
+ mdc_init_rpc_lock(cli->cl_close_lock);
+
rc = client_obd_setup(obd, len, buf);
if (rc)
- GOTO(err_setattr_lock, rc);
+ GOTO(err_close_lock, rc);
rc = obd_llog_init(obd, &obd->obd_llogs, obd, 0, NULL);
if (rc) {
RETURN(rc);
+err_close_lock:
+ OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
err_setattr_lock:
OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
err_rpc_lock:
OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock));
OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock));
+ OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock));
ptlrpcd_decref();
.o_connect = client_connect_import,
.o_disconnect = client_disconnect_export,
.o_iocontrol = mdc_iocontrol,
+ .o_packmd = mdc_packmd,
+ .o_unpackmd = mdc_unpackmd,
.o_statfs = mdc_statfs,
.o_pin = mdc_pin,
.o_unpin = mdc_unpin,
return rc;
}
+extern char *ldlm_lockname[];
+
int mds_lock_mode_for_dir(struct obd_device *obd,
struct dentry *dentry, int mode)
{
}
}
}
- return ret_mode;
+
+ return ret_mode;
}
/* only valid locked dentries or errors should be returned */
#define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED
#endif
-
/* Look up an entry by inode number. This function ONLY returns valid dget'd
* dentries with an initialized inode or errors */
struct dentry *mds_id2dentry(struct obd_device *obd, struct lustre_id *id,
static int mds_destroy_export(struct obd_export *export)
{
- struct mds_export_data *med;
struct obd_device *obd = export->exp_obd;
+ struct mds_export_data *med;
struct lvfs_run_ctxt saved;
int rc = 0;
ENTRY;
struct list_head *tmp = med->med_open_head.next;
struct mds_file_data *mfd =
list_entry(tmp, struct mds_file_data, mfd_list);
+ struct lustre_id sid;
+
BDEVNAME_DECLARE_STORAGE(btmp);
/* bug 1579: fix force-closing for 2.5 */
list_del(&mfd->mfd_list);
spin_unlock(&med->med_open_lock);
+ down(&dentry->d_inode->i_sem);
+ rc = mds_read_inode_sid(obd, dentry->d_inode, &sid);
+ up(&dentry->d_inode->i_sem);
+ if (rc) {
+ CERROR("Can't read inode self id, inode %lu, "
+ "rc %d\n", dentry->d_inode->i_ino, rc);
+ memset(&sid, 0, sizeof(sid));
+ }
+
/* If you change this message, be sure to update
* replay_single:test_46 */
- CERROR("force closing client file handle for %*s (%s:%lu)\n",
- dentry->d_name.len, dentry->d_name.name,
+ CERROR("force closing client file handle for %.*s (%s:"
+ DLID4")\n", dentry->d_name.len, dentry->d_name.name,
ll_bdevname(dentry->d_inode->i_sb, btmp),
- dentry->d_inode->i_ino);
+ OLID4(&sid));
+
/* child inode->i_alloc_sem protects orphan_dec_test and
* is_orphan race, mds_mfd_close drops it */
DOWN_WRITE_I_ALLOC_SEM(dentry->d_inode);
RETURN(0);
}
-int mds_get_md(struct obd_device *obd, struct inode *inode, void *md,
- int *size, int lock)
+static int mds_convert_md(struct obd_device *obd, struct inode *inode,
+ void *md, int size, int mea)
+{
+ int rc = size;
+
+ if (S_ISREG(inode->i_mode)) {
+ rc = mds_convert_lov_ea(obd, inode, md, size);
+ } else if (S_ISDIR(inode->i_mode)) {
+ if (mea) {
+ rc = mds_convert_mea_ea(obd, inode, md, size);
+ } else {
+ rc = mds_convert_lov_ea(obd, inode, md, size);
+ }
+ if (rc == -EINVAL) {
+ CERROR("Invalid EA format (nor LOV or MEA) "
+ "is detected. Inode %lu/%u\n",
+ inode->i_ino, inode->i_generation);
+ }
+ }
+ return rc;
+}
+
+int mds_get_md(struct obd_device *obd, struct inode *inode,
+ void *md, int *size, int lock, int mea)
{
int lmm_size;
int rc = 0;
if (lock)
down(&inode->i_sem);
- rc = fsfilt_get_md(obd, inode, md, *size);
- if (lock)
- up(&inode->i_sem);
+ rc = fsfilt_get_md(obd, inode, md, *size,
+ (mea ? EA_MEA : EA_LOV));
if (rc < 0) {
CERROR("Error %d reading eadata for ino %lu\n",
rc, inode->i_ino);
} else if (rc > 0) {
lmm_size = rc;
-
- if (S_ISREG(inode->i_mode))
- rc = mds_convert_lov_ea(obd, inode, md, lmm_size);
- if (S_ISDIR(inode->i_mode))
- rc = mds_convert_mea_ea(obd, inode, md, lmm_size);
-
+ rc = mds_convert_md(obd, inode, md,
+ lmm_size, mea);
if (rc == 0) {
*size = lmm_size;
rc = lmm_size;
*size = rc;
}
}
+ if (lock)
+ up(&inode->i_sem);
- RETURN (rc);
+ RETURN(rc);
}
/* Call with lock=1 if you want mds_pack_md to take the i_sem.
* Call with lock=0 if the caller has already taken the i_sem. */
int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset,
- struct mds_body *body, struct inode *inode, int lock)
+ struct mds_body *body, struct inode *inode, int lock, int mea)
{
struct mds_obd *mds = &obd->u.mds;
void *lmm;
// RETURN(-EINVAL);
}
- rc = mds_get_md(obd, inode, lmm, &lmm_size, lock);
+ rc = mds_get_md(obd, inode, lmm, &lmm_size, lock, mea);
if (rc > 0) {
if (S_ISDIR(inode->i_mode))
body->valid |= OBD_MD_FLDIREA;
else
body->valid |= OBD_MD_FLEASIZE;
+
+ if (mea)
+ body->valid |= OBD_MD_MEA;
+
body->eadatasize = lmm_size;
rc = 0;
}
struct mds_body *body, struct inode *inode)
{
struct dentry de = { .d_inode = inode };
+ __u32 buflen, *sizep;
void *buf;
- __u32 buflen, *sizep, size;
+ int size;
ENTRY;
if (!inode->i_op->getxattr)
buf = lustre_msg_buf(repmsg, offset + 1, buflen);
size = inode->i_op->getxattr(&de, XATTR_NAME_ACL_ACCESS, buf, buflen);
- if (size == -ENODATA)
+ if (size == -ENODATA || size == -EOPNOTSUPP)
RETURN(0);
if (size < 0)
RETURN(size);
if ((S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE)) ||
(S_ISDIR(inode->i_mode) && (reqbody->valid & OBD_MD_FLDIREA))) {
rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, body,
- inode, 1);
+ inode, 1, (reqbody->valid & OBD_MD_MEA) ? 1 : 0);
/* if we have LOV EA data, the OST holds size, atime, mtime. */
if (!(body->valid & OBD_MD_FLEASIZE) &&
int rc = 0, size[4] = {sizeof(*body)}, bufcount = 1;
ENTRY;
- body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
+ body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
LASSERT(body != NULL); /* checked by caller */
LASSERT_REQSWABBED(req, offset); /* swabbed by caller */
if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) ||
(S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) {
int rc;
+
down(&inode->i_sem);
- rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0);
+ rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0,
+ ((body->valid & OBD_MD_MEA) ? EA_MEA : EA_LOV));
up(&inode->i_sem);
if (rc < 0) {
- if (rc != -ENODATA)
+ if (rc != -ENODATA && rc != -EOPNOTSUPP)
CERROR("error getting inode %lu MD: rc = %d\n",
inode->i_ino, rc);
size[bufcount] = 0;
rc = inode->i_op->getxattr(de, ea_name, NULL, 0);
if (rc < 0) {
- if (rc != -ENODATA)
+ if (rc != -ENODATA && rc != -EOPNOTSUPP)
CERROR("error getting inode %lu EA: rc = %d\n",
inode->i_ino, rc);
size[bufcount] = 0;
rc = inode->i_op->listxattr(de, NULL, 0);
if (rc < 0) {
- if (rc != -ENODATA)
+ if (rc != -ENODATA && rc != -EOPNOTSUPP)
CERROR("error getting inode %lu EA: rc = %d\n",
inode->i_ino, rc);
size[bufcount] = 0;
cleanup_phase = 2; /* dchild, dparent, locks */
- /*
+ /*
* let's make sure this name should leave on this mds
* node.
*/
int rc = 0;
ENTRY;
+ MD_COUNTER_INCREMENT(obd, getattr);
+
rsd = lustre_swab_mds_secdesc(req, MDS_REQ_SECDESC_OFF);
if (!rsd) {
CERROR("Can't unpack security desc\n");
RETURN (-EFAULT);
}
- MD_COUNTER_INCREMENT(obd, getattr);
-
rc = mds_init_ucred(&uc, req, rsd);
if (rc) {
mds_exit_ucred(&uc);
int rc, size = sizeof(struct obd_statfs);
ENTRY;
+ /* This will trigger a watchdog timeout */
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP,
+ (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1);
+
rc = lustre_pack_reply(req, 1, &size, NULL);
if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) {
CERROR("mds: statfs lustre_pack_reply failed: rc = %d\n", rc);
int rc, size = sizeof(*body);
ENTRY;
- body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body));
+ body = lustre_swab_reqbuf(req, offset, sizeof(*body),
+ lustre_swab_mds_body);
if (body == NULL)
GOTO(out, rc = -EPROTO);
mea->mea_count = 0;
down(&new->d_inode->i_sem);
- rc = fsfilt_set_md(obd, new->d_inode, handle, mea, mealen);
+ rc = fsfilt_set_md(obd, new->d_inode, handle,
+ mea, mealen, EA_MEA);
up(&new->d_inode->i_sem);
if (rc)
- CERROR("fsfilt_set_md() failed, rc = %d\n", rc);
+ CERROR("fsfilt_set_md() failed, "
+ "rc = %d\n", rc);
OBD_FREE(mea, mealen);
+
CDEBUG(D_OTHER, "%s: mark non-splittable %lu/%u - %d\n",
obd->obd_name, new->d_inode->i_ino,
new->d_inode->i_generation, flags);
return rc;
}
-static char str[PTL_NALFMT_SIZE];
-
int mds_handle(struct ptlrpc_request *req)
{
int should_process, fail = OBD_FAIL_MDS_ALL_REPLY_NET;
int recovering;
if (req->rq_export == NULL) {
- CERROR("operation %d on unconnected MDS from NID %s\n",
+ CERROR("operation %d on unconnected MDS from %s\n",
req->rq_reqmsg->opc,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
req->rq_status = -ENOTCONN;
GOTO(out, rc = -ENOTCONN);
}
/* sanity check: if the xid matches, the request must
* be marked as a resent or replayed */
- if (req->rq_xid == med->med_mcd->mcd_last_xid) {
+ if (req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_xid) ||
+ req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_close_xid)) {
LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) &
(MSG_RESENT | MSG_REPLAY),
"rq_xid "LPU64" matches last_xid, "
LASSERT(obd != NULL);
LASSERT(inode != NULL);
- rc = fsfilt_get_sid(obd, inode, &id->li_fid,
- sizeof(id->li_fid));
+ rc = fsfilt_get_md(obd, inode, &id->li_fid,
+ sizeof(id->li_fid), EA_SID);
if (rc < 0) {
- CERROR("fsfilt_get_sid() failed, "
+ CERROR("fsfilt_get_md() failed, "
"rc = %d\n", rc);
RETURN(rc);
} else if (!rc) {
LASSERT(obd != NULL);
LASSERT(inode != NULL);
- rc = fsfilt_set_sid(obd, inode, handle, &id->li_fid,
- sizeof(id->li_fid));
+ rc = fsfilt_set_md(obd, inode, handle, &id->li_fid,
+ sizeof(id->li_fid), EA_SID);
if (rc) {
- CERROR("fsfilt_set_sid() failed, rc = %d\n", rc);
+ CERROR("fsfilt_set_md() failed, rc = %d\n", rc);
RETURN(rc);
}
LASSERT(obd != NULL);
LASSERT(inode != NULL);
- rc = fsfilt_get_mid(obd, inode, id, sizeof(*id));
+ rc = fsfilt_get_md(obd, inode, id, sizeof(*id), EA_MID);
if (rc < 0) {
- CERROR("fsfilt_get_mid() failed, "
- "rc = %d\n", rc);
+ CERROR("fsfilt_get_md() failed, rc = %d\n", rc);
RETURN(rc);
} else if (!rc) {
rc = -ENODATA;
LASSERT(obd != NULL);
LASSERT(inode != NULL);
- rc = fsfilt_set_mid(obd, inode, handle, id, sizeof(*id));
+ rc = fsfilt_set_md(obd, inode, handle, id,
+ sizeof(*id), EA_MID);
if (rc) {
- CERROR("fsfilt_set_mid() failed, rc = %d\n", rc);
+ CERROR("fsfilt_set_md() failed, "
+ "rc = %d\n", rc);
RETURN(rc);
}
{
struct lustre_cfg* lcfg = buf;
struct mds_obd *mds = &obd->u.mds;
+ struct lvfs_obd_ctxt *lvfs_ctxt = NULL;
char *options = NULL;
struct vfsmount *mnt;
char ns_name[48];
lcfg->lcfg_inlbuf4);
/* we have to know mdsnum before touching underlying fs -bzzz */
+ atomic_set(&mds->mds_open_count, 0);
sema_init(&mds->mds_md_sem, 1);
mds->mds_md_connected = 0;
mds->mds_md_name = NULL;
}
}
- mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, lcfg->lcfg_inlbuf1, options);
+ rc = lvfs_mount_fs(lcfg->lcfg_inlbuf1, lcfg->lcfg_inlbuf2,
+ options, 0, &lvfs_ctxt);
+
free_page(page);
- if (IS_ERR(mnt)) {
- rc = PTR_ERR(mnt);
- CERROR("do_kern_mount failed: rc = %d\n", rc);
+ if (rc || !lvfs_ctxt) {
+ CERROR("lvfs_mount_fs failed: rc = %d\n", rc);
GOTO(err_ops, rc);
}
+ mnt = lvfs_ctxt->loc_mnt;
+ mds->mds_lvfs_ctxt = lvfs_ctxt;
+
CDEBUG(D_SUPER, "%s: mnt = %p\n", lcfg->lcfg_inlbuf1, mnt);
sema_init(&mds->mds_epoch_sem, 1);
obd->obd_namespace = NULL;
err_put:
unlock_kernel();
- mntput(mds->mds_vfsmnt);
+ lvfs_umount_fs(mds->mds_lvfs_ctxt);
mds->mds_sb = 0;
lock_kernel();
err_ops:
unlock_kernel();
- /*
- * 2 seems normal on mds, (may_umount() also expects 2 fwiw), but we
- * only see 1 at this point in obdfilter.
- */
- if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2)
- CERROR("%s: mount busy, mnt_count %d != 2\n", obd->obd_name,
- atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count));
+ /* 2 seems normal on mds, (may_umount() also expects 2
+ fwiw), but we only see 1 at this point in obdfilter. */
+ lvfs_umount_fs(mds->mds_lvfs_ctxt);
- mntput(mds->mds_vfsmnt);
mds->mds_sb = 0;
ldlm_namespace_free(obd->obd_namespace, flags & OBD_OPT_FORCE);
spin_unlock_bh(&obd->obd_processing_task_lock);
lock_kernel();
- dev_clear_rdonly(2);
fsfilt_put_ops(obd->obd_fsops);
#ifdef ENABLE_GSS
/* If the xid matches, then we know this is a resent request,
* and allow it. (It's probably an OPEN, for which we don't
* send a lock */
- if (req->rq_xid == exp->exp_mds_data.med_mcd->mcd_last_xid)
+ if (req->rq_xid ==
+ le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid))
+ return;
+
+ if (req->rq_xid ==
+ le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_close_xid))
return;
/* This remote handle isn't enqueued, so we never received or
switch ((long)it->opc) {
case IT_OPEN:
case IT_CREAT|IT_OPEN:
+ fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF,
+ lock, NULL, lockh);
/* XXX swab here to assert that an mds_open reint
* packet is following */
fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF,
lock, NULL, lockh);
rep->lock_policy_res2 = mds_reint(req, offset, lockh);
-#if 0
- /* We abort the lock if the lookup was negative and
- * we did not make it to the OPEN portion */
- if (!intent_disposition(rep, DISP_LOOKUP_EXECD))
+
+ if (rep->lock_policy_res2) {
+ /*
+ * mds_open() returns ENOLCK where it should return
+ * zero, but it has no lock to return.
+ */
+ if (rep->lock_policy_res2 == ENOLCK)
+ rep->lock_policy_res2 = 0;
+
RETURN(ELDLM_LOCK_ABORTED);
- if (intent_disposition(rep, DISP_LOOKUP_NEG) &&
- !intent_disposition(rep, DISP_OPEN_OPEN))
-#endif
- /* IT_OPEN may return lock on cross-node dentry
- * that we want to hold during attr retrival -bzzz */
- if (rc != 0 || lockh[0].cookie == 0)
+ }
+
+ /*
+ * IT_OPEN may return lock on cross-node dentry that we want to
+ * hold during attr retrival -bzzz
+ */
+ if (lockh[0].cookie == 0)
RETURN(ELDLM_LOCK_ABORTED);
+
break;
case IT_LOOKUP:
getattr_part = MDS_INODELOCK_LOOKUP;
LASSERTF(new_lock != NULL, "op "LPX64" lockh "LPX64"\n",
it->opc, lockh[0].cookie);
-
+
/* If we've already given this lock to a client once, then we should
* have no readers or writers. Otherwise, we should have one reader
* _or_ writer ref (which will be zeroed below) before returning the
mds->mds_service =
ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL,
+ MDS_SERVICE_WATCHDOG_TIMEOUT,
mds_handle, "mds", obd->obd_proc_entry);
if (!mds->mds_service) {
mds->mds_setattr_service =
ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL,
+ MDS_SERVICE_WATCHDOG_TIMEOUT,
mds_handle, "mds_setattr",
obd->obd_proc_entry);
if (!mds->mds_setattr_service) {
mds->mds_readpage_service =
ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL,
+ MDS_SERVICE_WATCHDOG_TIMEOUT,
mds_handle, "mds_readpage",
obd->obd_proc_entry);
if (!mds->mds_readpage_service) {
RETURN(mds_dt_update_config(obd, 0));
}
+static int lprocfs_rd_filesopen(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct obd_device *obd = data;
+ LASSERT(obd != NULL);
+ *eof = 1;
+
+ return snprintf(page, count, "%d\n",
+ atomic_read(&obd->u.mds.mds_open_count));
+}
+
static int lprocfs_rd_last_fid(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
{ "fstype", lprocfs_rd_fstype, 0, 0 },
{ "filestotal", lprocfs_rd_filestotal, 0, 0 },
{ "filesfree", lprocfs_rd_filesfree, 0, 0 },
+ { "filesopen", lprocfs_rd_filesopen, 0, 0 },
{ "mntdev", lprocfs_mds_rd_mntdev, 0, 0 },
{ "last_fid", lprocfs_rd_last_fid, 0, 0 },
{ "group", lprocfs_rd_group, 0, 0 },
/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
#define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
-#define MDS_MAX_CLIENT_WORDS (MDS_MAX_CLIENTS / sizeof(unsigned long))
#define LAST_RCVD "last_rcvd"
#define LOV_OBJID "lov_objid"
free_and_out:
OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
+ med->med_mcd = NULL;
return 0;
}
static int mds_server_free_data(struct mds_obd *mds)
{
- OBD_FREE(mds->mds_client_bitmap,
- MDS_MAX_CLIENT_WORDS * sizeof(unsigned long));
+ OBD_FREE(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8);
OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data));
mds->mds_server_data = NULL;
if (!msd)
RETURN(-ENOMEM);
- OBD_ALLOC_WAIT(mds->mds_client_bitmap,
- MDS_MAX_CLIENT_WORDS * sizeof(unsigned long));
+ OBD_ALLOC_WAIT(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8);
if (!mds->mds_client_bitmap) {
OBD_FREE(msd, sizeof(*msd));
RETURN(-ENOMEM);
continue;
}
- last_transno = le64_to_cpu(mcd->mcd_last_transno);
+ last_transno = le64_to_cpu(mcd->mcd_last_transno) >
+ le64_to_cpu(mcd->mcd_last_close_transno) ?
+ le64_to_cpu(mcd->mcd_last_transno) :
+ le64_to_cpu(mcd->mcd_last_close_transno);
/* These exports are cleaned up by mds_disconnect(), so they
* need to be set up like real exports as mds_connect() does.
GOTO(err_pop, rc);
}
mds->mds_id_de = dentry;
-
- if (!dentry->d_inode) {
+ if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) {
rc = -ENOENT;
CERROR("__iopen__ directory has no inode? rc = %d\n", rc);
GOTO(err_id_de, rc);
oa->o_id, oa->o_generation, rc);
err = fsfilt_commit(obd, mds->mds_sb, mds->mds_objects_dir->d_inode,
- handle, 0);
+ handle, exp->exp_sync);
if (err && !rc)
rc = err;
out_dput:
#include <linux/lustre_mds.h>
+#define MDS_SERVICE_WATCHDOG_TIMEOUT 30000
+
#define MAX_ATIME_DIFF 60
struct mds_filter_data {
#ifdef __KERNEL__
int mds_get_md(struct obd_device *, struct inode *, void *md,
- int *size, int lock);
+ int *size, int lock, int mea);
int mds_pack_md(struct obd_device *, struct lustre_msg *, int offset,
- struct mds_body *, struct inode *, int lock);
+ struct mds_body *, struct inode *, int lock, int mea);
int mds_pack_link(struct dentry *dentry, struct ptlrpc_request *req,
struct mds_body *repbody, int reply_off);
int mds_pack_ea(struct dentry *dentry, struct ptlrpc_request *req,
rec = lustre_swab_reqbuf (req, offset, sizeof (*rec),
lustre_swab_mds_rec_create);
if (rec == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
r->ur_id1 = &rec->cr_id;
r->ur_id2 = &rec->cr_replayid;
if (req->rq_reqmsg->bufcount > offset + 2) {
r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0);
if (r->ur_eadata == NULL)
- RETURN (-EFAULT);
+ RETURN(-EFAULT);
r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2];
}
RETURN(0);
rec->ur_opcode = opcode;
rc = mds_unpackers[opcode](req, offset, rec);
+
+#if CRAY_PORTALS
+ rec->ur_fsuid = req->rq_uid;
+#endif
RETURN(rc);
}
ucred->luc_ginfo = NULL;
ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid);
+#if CRAY_PORTALS
+ ucred->luc_fsuid = req->rq_uid;
+#else
+ ucred->luc_fsuid = rsd->rsd_fsuid;
+#endif
if (lsd) {
if (req->rq_remote) {
/* record the gid mapping here */
rsd->rsd_cap &= ~CAP_FS_MASK;
/* by now every fields in rsd have been granted */
- ucred->luc_fsuid = rsd->rsd_fsuid;
ucred->luc_fsgid = rsd->rsd_fsgid;
ucred->luc_cap = rsd->rsd_cap;
ucred->luc_uid = rsd->rsd_uid;
if (*mea_size < 0 || *mea == NULL)
return *mea_size < 0 ? *mea_size : -EINVAL;
- rc = mds_get_md(obd, inode, *mea, mea_size, 1);
+ rc = mds_get_md(obd, inode, *mea, mea_size, 1, 1);
if (rc <= 0) {
OBD_FREE(*mea, *mea_size);
GOTO(err_oa, rc = PTR_ERR(handle));
}
- rc = fsfilt_set_md(obd, dir, handle, *mea, mea_size);
+ rc = fsfilt_set_md(obd, dir, handle, *mea, mea_size, EA_MEA);
if (rc) {
up(&dir->i_sem);
CERROR("fsfilt_set_md() failed, error %d.\n", rc);
}
cleanup_phase = 1;
- /*
- * handling the case when remote MDS checks if dir is empty before
- * rename. But it also does it for all entries, because inode is stored
- * here and remote MDS does not know if rename point to dir or to reg
- * file. So we check it here.
- */
+ /*
+ * handling the case when remote MDS checks if dir is empty
+ * before rename. But it also does it for all entries, because
+ * inode is stored here and remote MDS does not know if rename
+ * point to dir or to reg file. So we check it here.
+ */
if (!S_ISDIR(dentry->d_inode->i_mode))
GOTO(cleanup, rc = 0);
}
int mds_convert_mea_ea(struct obd_device *obd, struct inode *inode,
- struct lov_mds_md *lmm, int lmmsize)
+ struct lov_mds_md *lmm, int lmm_size)
{
- int i, rc, err, size;
+ struct lov_stripe_md *lsm = NULL;
struct mea_old *old;
struct mea *mea;
- struct mea *new;
void *handle;
+ int rc, err;
ENTRY;
- mea = (struct mea *) lmm;
+ mea = (struct mea *)lmm;
+ old = (struct mea_old *)lmm;
+
if (mea->mea_magic == MEA_MAGIC_LAST_CHAR ||
- mea->mea_magic == MEA_MAGIC_ALL_CHARS)
+ mea->mea_magic == MEA_MAGIC_ALL_CHARS)
RETURN(0);
- old = (struct mea_old *) lmm;
-
- rc = sizeof(struct lustre_id) * old->mea_count +
- sizeof(struct mea_old);
-
- if (old->mea_count > 256 || old->mea_master > 256 || lmmsize < rc
- || old->mea_master > old->mea_count) {
- CWARN("unknown MEA format, dont convert it\n");
- CWARN(" count %u, master %u, size %u\n",
- old->mea_count, old->mea_master, rc);
- RETURN(0);
- }
-
- CWARN("converting MEA EA on %lu/%u from V0 to V1 (%u/%u)\n",
- inode->i_ino, inode->i_generation, old->mea_count,
- old->mea_master);
+ /*
+ * making MDS try LOV EA converting in the non-LMV configuration
+ * cases.
+ */
+ if (!obd->u.mds.mds_md_exp)
+ RETURN(-EINVAL);
- size = sizeof(struct lustre_id) * old->mea_count +
- sizeof(struct mea);
-
- OBD_ALLOC(new, size);
- if (new == NULL)
- RETURN(-ENOMEM);
+ CDEBUG(D_INODE, "converting MEA EA on %lu/%u from V0 to V1 (%u/%u)\n",
+ inode->i_ino, inode->i_generation, old->mea_count,
+ old->mea_master);
- new->mea_magic = MEA_MAGIC_LAST_CHAR;
- new->mea_count = old->mea_count;
- new->mea_master = old->mea_master;
- for (i = 0; i < new->mea_count; i++)
- new->mea_ids[i] = old->mea_ids[i];
+ rc = obd_unpackmd(obd->u.mds.mds_md_exp, &lsm, lmm, lmm_size);
+ if (rc < 0)
+ GOTO(conv_end, rc);
+
+ rc = obd_packmd(obd->u.mds.mds_md_exp, &lmm, lsm);
+ if (rc < 0)
+ GOTO(conv_free, rc);
+ lmm_size = rc;
handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
if (IS_ERR(handle)) {
GOTO(conv_free, rc);
}
- rc = fsfilt_set_md(obd, inode, handle, (struct lov_mds_md *) new, size);
- if (rc > lmmsize)
- size = lmmsize;
- memcpy(lmm, new, size);
-
+ rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_MEA);
err = fsfilt_commit(obd, obd->u.mds.mds_sb, inode, handle, 0);
if (!rc)
- rc = err ? err : size;
- EXIT;
+ rc = err ? err : lmm_size;
+ GOTO(conv_free, rc);
conv_free:
- OBD_FREE(new, size);
+ obd_free_memmd(obd->u.mds.mds_md_exp, &lsm);
+conv_end:
return rc;
}
int rc = 0;
ENTRY;
+ CDEBUG(D_INFO, "ioctl cmd %x\n", cmd);
switch (cmd) {
case OBD_IOC_RECORD: {
char *name = data->ioc_inlbuf1;
RETURN(0);
}
default:
+ CDEBUG(D_INFO, "unknown command %x\n", cmd);
RETURN(-EINVAL);
}
RETURN(0);
}
rc = 0;
+ EXIT;
cleanup:
up(&mds->mds_orphan_recovery_sem);
- RETURN(rc);
+ return rc;
}
int mds_dt_start_synchronize(struct obd_device *obd,
GOTO(conv_free, rc);
}
- rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size);
-
+ rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_LOV);
err = fsfilt_commit(obd, obd->u.mds.mds_sb, inode, handle, 0);
if (!rc)
rc = err ? err : lmm_size;
GOTO(out_oa, rc);
}
- rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size);
+ rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_LOV);
err = fsfilt_commit(obd, inode->i_sb, inode, handle, 0);
if (!rc)
rc = err;
LASSERT(lmm_buf);
LASSERT(lmm_bufsize >= lmm_size);
memcpy(lmm_buf, lmm, lmm_size);
- rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
+ rc = fsfilt_set_md(obd, inode, *handle, lmm,
+ lmm_size, EA_LOV);
if (rc)
CERROR("open replay failed to set md:%d\n", rc);
RETURN(0);
if (rc)
GOTO(out_oa, rc);
} else {
- /* Per-directory striping default code removed, because
- * it uses the same unnamed EA storage as the directory
- * striping for CMD. -p */
+ OBD_ALLOC(lmm, mds->mds_max_mdsize);
+ if (lmm == NULL)
+ GOTO(out_oa, rc = -ENOMEM);
+
+ lmm_size = mds->mds_max_mdsize;
+ rc = mds_get_md(obd, dchild->d_parent->d_inode,
+ lmm, &lmm_size, 1, 0);
+ if (rc > 0)
+ rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE,
+ mds->mds_dt_exp,
+ 0, &lsm, lmm);
+ OBD_FREE(lmm, mds->mds_max_mdsize);
+ if (rc)
+ GOTO(out_oa, rc);
}
LASSERT(oa->o_gr >= FILTER_GROUP_FIRST_MDS);
rc = obd_create(mds->mds_dt_exp, oa, &lsm, &oti);
GOTO(out_ids, rc);
}
- rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size);
+ rc = fsfilt_set_md(obd, inode, *handle, lmm,
+ lmm_size, EA_LOV);
+
lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0);
lmm_bufsize = req->rq_repmsg->buflens[offset];
LASSERT(lmm_buf);
OBD_FREE(*ids, mds->mds_dt_desc.ld_tgt_count * sizeof(**ids));
*ids = NULL;
}
+ if(lsm)
+ obd_free_memmd(mds->mds_dt_exp, &lsm);
RETURN(rc);
}
mds_pack_inode2body(obd, body, dchild->d_inode, 1);
if (S_ISREG(dchild->d_inode->i_mode)) {
rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
- dchild->d_inode, 1);
+ dchild->d_inode, 1, 0);
if (rc)
LASSERT(rc == req->rq_status);
/* Handles object creation, actual opening, and I/O epoch */
static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
struct mds_body *body, int flags, void **handle,
- struct mds_update_record *rec,struct ldlm_reply *rep)
+ struct mds_update_record *rec, struct ldlm_reply *rep)
{
struct mds_obd *mds = mds_req2mds(req);
struct obd_device *obd = req->rq_export->exp_obd;
if ((S_ISREG(mode) && !(body->valid & OBD_MD_FLEASIZE)) ||
(S_ISDIR(mode) && !(body->valid & OBD_MD_FLDIREA))) {
rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
- dchild->d_inode, 0);
+ dchild->d_inode, 0, 0);
if (rc) {
up(&dchild->d_inode->i_sem);
RETURN(rc);
req->rq_repmsg, 2);
if (!rc)
rc = mds_pack_md(obd, req->rq_repmsg, 2, body,
- dchild->d_inode, 0);
+ dchild->d_inode, 0, 0);
if (rc) {
up(&dchild->d_inode->i_sem);
RETURN(rc);
ENTRY;
down(&pending_dir->i_sem);
-
idlen = ll_id2str(idname, id_ino(id), id_gen(id));
-
dchild = lookup_one_len(idname, mds->mds_pending_dir,
idlen);
if (IS_ERR(dchild)) {
}
if (dchild->d_inode != NULL) {
- up(&pending_dir->i_sem);
mds_inode_set_orphan(dchild->d_inode);
mds_pack_inode2body(req2obd(req), body,
dchild->d_inode, 1);
idname);
goto open;
}
- dput(dchild);
- up(&pending_dir->i_sem);
+ l_dput(dchild);
/*
* we didn't find it in PENDING so it isn't an orphan. See if it was a
rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
req, rc, rep ? rep->lock_policy_res1 : 0);
/* XXX what do we do here if mds_finish_transno itself failed? */
-
l_dput(dchild);
RETURN(rc);
}
struct lustre_handle lockh;
int lock_flags = 0;
int rc;
+ ENTRY;
if (child_lockh == NULL)
child_lockh = &lockh;
struct dentry_params dp;
struct mea *mea = NULL;
int mea_size, update_mode;
+ int child_mode = LCK_PR;
+ /* Always returning LOOKUP lock if open succesful to guard
+ dentry on client. */
+ ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}};
+ struct ldlm_res_id child_res_id = { .name = {0}};
+ int lock_flags = 0;
ENTRY;
DEBUG_REQ(D_INODE, req, "parent "DLID4" name %*s mode %o",
OLID4(rec->ur_id1), rec->ur_namelen - 1, rec->ur_name,
rec->ur_mode);
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE,
+ (obd_timeout + 1) / 4);
+
if (offset == 3) { /* intent */
rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep));
body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body));
acc_mode = accmode(rec->ur_flags);
/* Step 1: Find and lock the parent */
- if (rec->ur_flags & O_CREAT) {
+ if (rec->ur_flags & MDS_OPEN_CREAT) {
/* XXX Well, in fact we only need this lock mode change if
in addition to O_CREAT, the file does not exist.
But we do not know if it exists or not yet */
if (!(rec->ur_flags & O_EXCL)) { /* bug 3313 */
rc = fsfilt_commit(obd, dchild->d_inode->i_sb,
- dchild->d_inode, handle, 0);
+ dchild->d_inode, handle,
+ req->rq_export->exp_sync);
handle = NULL;
}
mds_pack_inode2body(obd, body, dchild->d_inode, 1);
LASSERTF(!mds_inode_is_orphan(dchild->d_inode),
- "dchild %*s (%p) inode %p\n", dchild->d_name.len,
+ "dchild %.*s (%p) inode %p\n", dchild->d_name.len,
dchild->d_name.name, dchild, dchild->d_inode);
if (S_ISREG(dchild->d_inode->i_mode)) {
GOTO(cleanup, rc = -EEXIST); // returns a lock to the client
}
- /* if we are following a symlink, don't open */
- if (S_ISLNK(dchild->d_inode->i_mode))
- GOTO(cleanup, rc = 0);
-
- if ((rec->ur_flags & MDS_OPEN_DIRECTORY) &&
- !S_ISDIR(dchild->d_inode->i_mode))
- GOTO(cleanup, rc = -ENOTDIR);
-
if (S_ISDIR(dchild->d_inode->i_mode)) {
if (rec->ur_flags & MDS_OPEN_CREAT ||
rec->ur_flags & FMODE_WRITE) {
}
}
+ /* if we are following a symlink, don't open */
+ if (S_ISLNK(dchild->d_inode->i_mode))
+ GOTO(cleanup_no_trans, rc = 0);
+
+ if ((rec->ur_flags & MDS_OPEN_DIRECTORY) &&
+ !S_ISDIR(dchild->d_inode->i_mode))
+ GOTO(cleanup, rc = -ENOTDIR);
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_CREATE)) {
+ obd_fail_loc = OBD_FAIL_LDLM_REPLY | OBD_FAIL_ONCE;
+ GOTO(cleanup, rc = -EAGAIN);
+ }
+
+ /* Obtain OPEN lock as well */
+ policy.l_inodebits.bits |= MDS_INODELOCK_OPEN;
+
+ /* We cannot use acc_mode here, because it is zeroed in case of
+ creating a file, so we get wrong lockmode */
+ if (accmode(rec->ur_flags) & MAY_WRITE)
+ child_mode = LCK_CW;
+ else if (accmode(rec->ur_flags) & MAY_EXEC)
+ child_mode = LCK_PR;
+ else
+ child_mode = LCK_CR;
+
+ if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) {
+ struct lustre_id sid;
+
+ down(&dchild->d_inode->i_sem);
+ rc = mds_read_inode_sid(obd, dchild->d_inode, &sid);
+ up(&dchild->d_inode->i_sem);
+ if (rc) {
+ CERROR("Can't read inode self id, "
+ "inode %lu, rc %d\n",
+ dchild->d_inode->i_ino, rc);
+ GOTO(cleanup, rc);
+ }
+
+ /* In case of replay we do not get a lock assuming that the
+ caller has it already */
+ child_res_id.name[0] = id_fid(&sid);
+ child_res_id.name[1] = id_group(&sid);
+
+ rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace,
+ child_res_id, LDLM_IBITS, &policy,
+ child_mode, &lock_flags,
+ mds_blocking_ast, ldlm_completion_ast,
+ NULL, NULL, NULL, 0, NULL, child_lockh);
+ if (rc != ELDLM_OK)
+ GOTO(cleanup, rc);
+
+ cleanup_phase = 3;
+ }
+
/* Step 5: mds_open it */
rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle,
rec, rep);
GOTO(cleanup, rc);
- cleanup:
+cleanup:
rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle,
req, rc, rep ? rep->lock_policy_res1 : 0);
+cleanup_no_trans:
switch (cleanup_phase) {
+ case 3:
+ if (rc) {
+ ldlm_lock_decref(child_lockh, child_mode);
+ child_lockh->cookie = 0;
+ }
case 2:
if (rc && created) {
int err = vfs_unlink(dparent->d_inode, dchild);
if (err) {
- CERROR("unlink(%*s) in error path: %d\n",
+ CERROR("unlink(%.*s) in error path: %d\n",
dchild->d_name.len, dchild->d_name.name,
err);
}
}
if (mea)
OBD_FREE(mea, mea_size);
+ if (rc == 0)
+ atomic_inc(&mds->mds_open_count);
+
+ /*
+ * If we have not taken the "open" lock, we may not return 0 here,
+ * because caller expects 0 to mean "lock is taken", and it needs
+ * nonzero return here for caller to return EDLM_LOCK_ABORTED to
+ * client. Later caller should rewrite the return value back to zero
+ * if it to be used any further.
+ */
+ if ((cleanup_phase != 3) && !rc)
+ rc = ENOLCK;
RETURN(rc);
}
LASSERT(pending_child->d_inode != NULL);
cleanup_phase = 2; /* dput(pending_child) when finished */
+ if (S_ISDIR(pending_child->d_inode->i_mode)) {
+ rc = vfs_rmdir(pending_dir, pending_child);
+ if (rc)
+ CERROR("error unlinking orphan dir %s: rc %d\n",
+ idname, rc);
+ goto out;
+ }
+
if (req != NULL && req->rq_repmsg != NULL) {
lmm = lustre_msg_buf(req->rq_repmsg, 1, 0);
stripe_count = le32_to_cpu(lmm->lmm_stripe_count);
pending_child->d_fsdata = (void *) &dp;
dp.p_inum = 0;
dp.p_ptr = req;
- if (S_ISDIR(pending_child->d_inode->i_mode))
- rc = vfs_rmdir(pending_dir, pending_child);
- else
- rc = vfs_unlink(pending_dir, pending_child);
+ rc = vfs_unlink(pending_dir, pending_child);
if (rc)
CERROR("error unlinking orphan %s: rc %d\n",
idname, rc);
mds_mfd_destroy(mfd);
cleanup:
+ atomic_dec(&mds->mds_open_count);
if (req != NULL && reply_body != NULL) {
rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0);
} else if (handle) {
- int err = fsfilt_commit(obd, mds->mds_sb, pending_dir, handle, 0);
+ int err, force_sync = 0;
+
+ if (req && req->rq_export)
+ force_sync = req->rq_export->exp_sync;
+
+ err = fsfilt_commit(obd, mds->mds_sb, pending_dir, handle,
+ force_sync);
if (err) {
CERROR("error committing close: %d\n", err);
if (!rc)
req->rq_repmsg->buflens[2]);
}
-
body = lustre_swab_reqbuf(req, offset, sizeof(*body),
lustre_swab_mds_body);
if (body == NULL) {
(body->valid & OBD_MD_FID) ? 1 : 0);
mds_pack_md(obd, req->rq_repmsg, 1, rep_body,
- inode, MDS_PACK_MD_LOCK);
+ inode, MDS_PACK_MD_LOCK, 0);
}
spin_lock(&med->med_open_lock);
list_del(&mfd->mfd_list);
spin_unlock(&mds->mds_transno_lock);
}
req->rq_repmsg->transno = req->rq_transno = transno;
- mcd->mcd_last_transno = cpu_to_le64(transno);
- mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
- mcd->mcd_last_result = cpu_to_le32(rc);
- mcd->mcd_last_data = cpu_to_le32(op_data);
+ if (req->rq_reqmsg->opc == MDS_CLOSE) {
+ mcd->mcd_last_close_transno = cpu_to_le64(transno);
+ mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid);
+ mcd->mcd_last_close_result = cpu_to_le32(rc);
+ mcd->mcd_last_close_data = cpu_to_le32(op_data);
+ } else {
+ mcd->mcd_last_transno = cpu_to_le64(transno);
+ mcd->mcd_last_xid = cpu_to_le64(req->rq_xid);
+ mcd->mcd_last_result = cpu_to_le32(rc);
+ mcd->mcd_last_data = cpu_to_le32(op_data);
+ }
fsfilt_add_journal_cb(obd, mds->mds_sb, transno, handle,
mds_commit_last_transno_cb, NULL);
EXIT;
out_commit:
- err = fsfilt_commit(obd, mds->mds_sb, inode, handle, 0);
+ err = fsfilt_commit(obd, mds->mds_sb, inode, handle,
+ req->rq_export->exp_sync);
if (err) {
CERROR("error committing transaction: %d\n", err);
if (!rc)
void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd)
{
- DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
- mcd->mcd_last_transno, mcd->mcd_last_result);
- req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_transno;
- req->rq_repmsg->status = req->rq_status = mcd->mcd_last_result;
+ if (req->rq_reqmsg->opc == MDS_CLOSE) {
+ DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
+ mcd->mcd_last_close_transno, mcd->mcd_last_close_result);
+ req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_close_transno;
+ req->rq_repmsg->status = req->rq_status = mcd->mcd_last_close_result;
+ } else {
+ DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d",
+ mcd->mcd_last_transno, mcd->mcd_last_result);
+ req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_transno;
+ req->rq_repmsg->status = req->rq_status = mcd->mcd_last_result;
+ }
mds_steal_ack_locks(req);
}
rc = inode->i_op->removexattr(de,
rec->ur_eadata);
} else if ((S_ISREG(inode->i_mode) ||
- S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) {
- struct lov_stripe_md *lsm = NULL;
-
+ S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) {
+ struct lov_stripe_md *lsm = NULL;
+ struct lov_user_md *lum = NULL;
+
rc = ll_permission(inode, MAY_WRITE, NULL);
if (rc < 0)
GOTO(cleanup, rc);
- rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_dt_exp,
- 0, &lsm, rec->ur_eadata);
- if (rc)
- GOTO(cleanup, rc);
-
- obd_free_memmd(mds->mds_dt_exp, &lsm);
-
- rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata,
- rec->ur_eadatalen);
- if (rc)
- GOTO(cleanup, rc);
+ lum = rec->ur_eadata;
+ /* if lmm_stripe_size is -1 delete default stripe from dir */
+ if (S_ISDIR(inode->i_mode) &&
+ lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1)){
+ rc = fsfilt_set_md(obd, inode, handle, NULL, 0, EA_LOV);
+ if (rc)
+ GOTO(cleanup, rc);
+ } else {
+ rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_dt_exp,
+ 0, &lsm, rec->ur_eadata);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ obd_free_memmd(mds->mds_dt_exp, &lsm);
+ rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata,
+ rec->ur_eadatalen, EA_LOV);
+ if (rc)
+ GOTO(cleanup, rc);
+ }
}
}
dp.p_ptr = req;
switch (type) {
- case S_IFREG:{
+ case S_IFREG: {
handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL);
if (IS_ERR(handle))
GOTO(cleanup, rc = PTR_ERR(handle));
EXIT;
break;
}
- case S_IFDIR:{
+ case S_IFDIR: {
int i, nstripes = 0;
/*
oa->o_fid = id_fid(rec->ur_id2);
oa->o_generation = id_gen(rec->ur_id2);
oa->o_flags |= OBD_FL_RECREATE_OBJS;
-
- /*
- * fid should be defined here. It should be
- * passedfrom client.
- */
LASSERT(oa->o_fid != 0);
}
inode->i_generation = id_gen(rec->ur_id2);
if (type != S_IFDIR) {
- /*
- * updating inode self id, as inode already
- * exists and we should make sure, its sid will
- * be the same as we reveived.
- */
down(&inode->i_sem);
rc = mds_update_inode_sid(obd, inode,
handle, rec->ur_id2);
else
MD_COUNTER_INCREMENT(obd, create);
+ /* take care of default stripe inheritance */
+ if (type == S_IFDIR) {
+ struct lov_mds_md lmm;
+ int lmm_size = sizeof(lmm);
+
+ rc = mds_get_md(obd, dir, &lmm, &lmm_size, 1, 0);
+ if (rc > 0) {
+ down(&inode->i_sem);
+ rc = fsfilt_set_md(obd, inode, handle,
+ &lmm, lmm_size, EA_LOV);
+ up(&inode->i_sem);
+ }
+ if (rc) {
+ CERROR("error on copy stripe info: rc = %d\n",
+ rc);
+ rc = 0;
+ }
+ }
+
body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body));
mds_pack_inode2body(obd, body, inode, 1);
}
} else if (S_ISREG(child_inode->i_mode)) {
mds_pack_inode2body(obd, body, child_inode, 0);
mds_pack_md(obd, req->rq_repmsg, offset + 1,
- body, child_inode, MDS_PACK_MD_LOCK);
+ body, child_inode, MDS_PACK_MD_LOCK, 0);
}
}
EXIT;
cleanup:
rc = mds_finish_transno(mds, de_src ? de_src->d_inode : NULL,
- handle, req, rc, 0);
+ handle, req, rc, 0);
switch (cleanup_phase) {
case 2:
if (rc)
op_data->id1 = *(rec->ur_id1);
rc = md_link(mds->mds_md_exp, op_data, &request);
OBD_FREE(op_data, sizeof(*op_data));
+
+ if (request)
+ ptlrpc_req_finished(request);
if (rc)
GOTO(cleanup, rc);
cleanup_phase = 2;
- if (request)
- ptlrpc_req_finished(request);
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, de_tgt_dir->d_inode->i_sb);
struct ldlm_res_id c1_res_id = { .name = {0} };
struct ldlm_res_id c2_res_id = { .name = {0} };
ldlm_policy_data_t p_policy = {.l_inodebits = {MDS_INODELOCK_UPDATE}};
- /* Only dentry should change, but the inode itself would be
- intact otherwise */
+ /* Only dentry should disappear, but the inode itself would be
+ intact otherwise. */
ldlm_policy_data_t c1_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP}};
/* If something is going to be replaced, both dentry and inode locks are
needed */
- ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP|
- MDS_INODELOCK_UPDATE}};
+ ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_FULL}};
struct ldlm_res_id *maxres_src, *maxres_tgt;
struct inode *inode;
int rc = 0, cleanup_phase = 0;
old_len - 1);
if (IS_ERR(*de_oldp)) {
rc = PTR_ERR(*de_oldp);
- CERROR("old child lookup error (%*s): %d\n",
+ CERROR("old child lookup error (%.*s): %d\n",
old_len - 1, old_name, rc);
GOTO(cleanup, rc);
}
new_len - 1);
if (IS_ERR(*de_newp)) {
rc = PTR_ERR(*de_newp);
- CERROR("new child lookup error (%*s): %d\n",
+ CERROR("new child lookup error (%.*s): %d\n",
old_len - 1, old_name, rc);
GOTO(cleanup, rc);
}
&update_mode, rec->ur_name,
rec->ur_namelen, &child_lockh, &de_old,
LCK_EX, MDS_INODELOCK_LOOKUP);
- LASSERT(rc == 0);
+ if (rc) {
+ OBD_FREE(op_data, sizeof(*op_data));
+ RETURN(rc);
+ }
+
LASSERT(de_srcdir);
LASSERT(de_srcdir->d_inode);
LASSERT(de_old);
}
- /* check if inodes point to each other. */
+ /*
+ * check if inodes point to each other. This should be checked before
+ * is_subdir() check, as for the same entries it will think that they
+ * are subdirs.
+ */
if (!(de_old->d_flags & DCACHE_CROSS_REF) &&
!(de_new->d_flags & DCACHE_CROSS_REF) &&
old_inode == new_inode)
} else if (S_ISREG(new_inode->i_mode)) {
mds_pack_inode2body(obd, body, new_inode, 0);
mds_pack_md(obd, req->rq_repmsg, 1, body,
- new_inode, MDS_PACK_MD_LOCK);
+ new_inode, MDS_PACK_MD_LOCK, 0);
}
}
struct mds_obd *mds = &obd->u.mds;
struct lov_mds_md *lmm = NULL;
struct llog_cookie *logcookies = NULL;
- int lmm_size = 0, log_unlink = 0;
+ int lmm_size, log_unlink = 0;
void *handle = NULL;
int rc, err;
ENTRY;
LASSERT(mds->mds_dt_obd != NULL);
- OBD_ALLOC(lmm, mds->mds_max_mdsize);
+ /* We don't need to do any of these other things for orhpan dirs,
+ * especially not mds_get_md (may get a default LOV EA, bug 4554) */
+ if (S_ISDIR(inode->i_mode)) {
+ rc = vfs_rmdir(pending_dir, dchild);
+ if (rc)
+ CERROR("error %d unlinking dir %*s from PENDING\n",
+ rc, dchild->d_name.len, dchild->d_name.name);
+ RETURN(rc);
+ }
+
+ lmm_size = mds->mds_max_mdsize;
+ OBD_ALLOC(lmm, lmm_size);
if (lmm == NULL)
RETURN(-ENOMEM);
- down(&inode->i_sem);
- rc = fsfilt_get_md(obd, inode, lmm, mds->mds_max_mdsize);
- up(&inode->i_sem);
-
- if (rc < 0) {
- CERROR("Error %d reading eadata for ino %lu\n",
- rc, inode->i_ino);
+ rc = mds_get_md(obd, inode, lmm, &lmm_size, 1, 0);
+ if (rc < 0)
GOTO(out_free_lmm, rc);
- } else if (rc > 0) {
- lmm_size = rc;
- rc = mds_convert_lov_ea(obd, inode, lmm, lmm_size);
- if (rc > 0)
- lmm_size = rc;
- rc = 0;
- }
handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, NULL,
le32_to_cpu(lmm->lmm_stripe_count));
GOTO(out_free_lmm, rc);
}
- if (S_ISDIR(inode->i_mode))
- rc = vfs_rmdir(pending_dir, dchild);
- else
- rc = vfs_unlink(pending_dir, dchild);
-
- if (rc)
- CERROR("error %d unlinking orphan %*s from PENDING directory\n",
+ rc = vfs_unlink(pending_dir, dchild);
+ if (rc) {
+ CERROR("error %d unlinking orphan %.*s from PENDING\n",
rc, dchild->d_name.len, dchild->d_name.name);
-
- if (!rc && lmm_size) {
+ } else if (lmm_size) {
OBD_ALLOC(logcookies, mds->mds_max_cookiesize);
if (logcookies == NULL)
rc = -ENOMEM;
CERROR("error committing orphan unlink: %d\n", err);
if (!rc)
rc = err;
- }
- if (!rc) {
+ } else if (!rc) {
rc = mds_osc_destroy_orphan(mds, inode, lmm, lmm_size,
logcookies, log_unlink);
}
struct l_linux_dirent *dirent, *n;
struct list_head dentry_list;
char d_name[LL_ID_NAMELEN];
+ unsigned long inum;
__u64 i = 0;
int rc = 0, item = 0, namlen;
ENTRY;
GOTO(err_out, rc);
list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) {
- i ++;
+ i++;
list_del(&dirent->lld_list);
namlen = strlen(dirent->lld_name);
LASSERT(sizeof(d_name) >= namlen + 1);
strcpy(d_name, dirent->lld_name);
+ inum = dirent->lld_ino;
OBD_FREE(dirent, sizeof(*dirent));
CDEBUG(D_INODE, "entry "LPU64" of PENDING DIR: %s\n",
i, d_name);
if (((namlen == 1) && !strcmp(d_name, ".")) ||
- ((namlen == 2) && !strcmp(d_name, ".."))) {
+ ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0)
continue;
- }
down(&pending_dir->i_sem);
dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen);
GOTO(next, rc = 0);
}
+ if (is_bad_inode(dchild->d_inode)) {
+ CERROR("bad orphan inode found %lu/%u\n",
+ dchild->d_inode->i_ino,
+ dchild->d_inode->i_generation);
+ GOTO(next, rc = -ENOENT);
+ }
+
child_inode = dchild->d_inode;
DOWN_READ_I_ALLOC_SEM(child_inode);
if (mds_inode_is_orphan(child_inode) &&
RETURN(-EALREADY);
mgmt_service =
- ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE,
- MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL,
- mgmt_handler, "mgmt",
- obd->obd_proc_entry);
+ ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE,
+ MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, 30000,
+ mgmt_handler, "mgmt", obd->obd_proc_entry);
if (!mgmt_service) {
CERROR("Failed to start mgmt service\n");
RETURN(-ENOMEM);
-MODULES := obdclass llog_test
+MODULES := obdclass llog_test confobd
obdclass-objs := llog_obd.o class_obd.o
obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o
obdclass-objs += statfs_pack.o obdo.o obd_config.o mea.o
ifeq ($(PATCHLEVEL),6)
+
+confobd-objs := conf_obd.o
+
+$(obj)/conf_obd.c: $(obj)/confobd.c
+ ln -sf $< $@
+
llog_test-objs := llog-test.o
$(obj)/llog-test.c: $(obj)/llog_test.c
endif
if MODULES
-modulefs_DATA = obdclass$(KMODEXT)
+modulefs_DATA = obdclass$(KMODEXT) confobd$(KMODEXT)
noinst_DATA = llog_test$(KMODEXT)
endif # MODULES
-MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c
-DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c
+MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c
+DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) confobd.c llog_test.c
static void cleanup_obdclass(void)
{
int i;
- int leaked;
ENTRY;
misc_deregister(&obd_psdev);
class_handle_cleanup();
class_exit_uuidlist();
-
- leaked = atomic_read(&obd_memory);
- CDEBUG(leaked ? D_ERROR : D_INFO,
- "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
-
EXIT;
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_CONFOBD
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/obd_support.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_net.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_log.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/obd_class.h>
+#include <linux/lprocfs_status.h>
+
+#define LUSTRE_CONFOBD_NAME "confobd"
+
+static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
+static struct lprocfs_vars lprocfs_obd_vars[] = { {0} };
+
+LPROCFS_INIT_VARS(confobd, lprocfs_module_vars, lprocfs_obd_vars)
+
+static int confobd_fs_setup(struct obd_device *obd,
+ struct lvfs_obd_ctxt *lvfs_ctxt)
+{
+ struct conf_obd *confobd = &obd->u.conf;
+ struct lvfs_run_ctxt saved;
+ struct dentry *dentry;
+ int rc = 0;
+ ENTRY;
+
+ OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
+ obd->obd_lvfs_ctxt.pwdmnt = lvfs_ctxt->loc_mnt;
+ obd->obd_lvfs_ctxt.pwd = lvfs_ctxt->loc_mnt->mnt_root;
+ obd->obd_lvfs_ctxt.fs = get_ds();
+ /*Now we did not set cb_ops of CONFOBD FIXME later*/
+
+ /*setup llog ctxt*/
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create LOGS directory: rc = %d\n", rc);
+ GOTO(err_out, rc);
+ }
+ confobd->cfobd_logs_dir = dentry;
+
+ dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create OBJECTS directory: rc = %d\n", rc);
+ GOTO(err_logs, rc);
+ }
+ confobd->cfobd_objects_dir = dentry;
+
+ dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1);
+ if (IS_ERR(dentry)) {
+ rc = PTR_ERR(dentry);
+ CERROR("cannot create PENDING directory: rc = %d\n", rc);
+ GOTO(err_logs, rc);
+ }
+ confobd->cfobd_pending_dir = dentry;
+
+err_logs:
+ if (rc)
+ l_dput(confobd->cfobd_logs_dir);
+err_out:
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+}
+
+static int confobd_fs_cleanup(struct obd_device *obd, int flags)
+{
+ struct conf_obd *confobd = &obd->u.conf;
+ struct lvfs_run_ctxt saved;
+ int rc = 0;
+ ENTRY;
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (confobd->cfobd_logs_dir != NULL) {
+ l_dput(confobd->cfobd_logs_dir);
+ confobd->cfobd_logs_dir = NULL;
+ }
+ if (confobd->cfobd_objects_dir != NULL) {
+ l_dput(confobd->cfobd_objects_dir);
+ confobd->cfobd_objects_dir = NULL;
+ }
+ if (confobd->cfobd_pending_dir != NULL) {
+ l_dput(confobd->cfobd_pending_dir);
+ confobd->cfobd_pending_dir = NULL;
+ }
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ RETURN(rc);
+}
+
+int confobd_attach(struct obd_device *dev, obd_count len, void *data)
+{
+ struct lprocfs_static_vars lvars;
+ int rc = 0;
+ ENTRY;
+
+ lprocfs_init_vars(confobd, &lvars);
+ rc = lprocfs_obd_attach(dev, lvars.obd_vars);
+ if (rc)
+ RETURN(rc);
+
+ rc = lprocfs_alloc_md_stats(dev, 0);
+ RETURN(rc);
+}
+
+int confobd_detach(struct obd_device *dev)
+{
+ int rc;
+ ENTRY;
+
+ lprocfs_free_md_stats(dev);
+ rc = lprocfs_obd_detach(dev);
+ RETURN(rc);
+}
+
+static int confobd_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+ struct conf_obd *confobd = &obd->u.conf;
+ struct lustre_cfg* lcfg = buf;
+ struct lvfs_obd_ctxt *lvfs_ctxt = NULL;
+ char *name = NULL;
+ char *fstype = NULL;
+ char *mountoption = NULL;
+ int rc = 0;
+ ENTRY;
+
+ if (lcfg->lcfg_inllen1 < 1 || !lcfg->lcfg_inlbuf1) {
+ CERROR("CONFOBD setup requires device name\n");
+ RETURN(-EINVAL);
+ }
+ if (lcfg->lcfg_inllen2 < 1 || !lcfg->lcfg_inlbuf2) {
+ CERROR("CONFOBD setup requires fstype\n");
+ RETURN(-EINVAL);
+ }
+
+ OBD_ALLOC(name, lcfg->lcfg_inllen1 + 1);
+ if (!name) {
+ CERROR("No Memory\n");
+ GOTO(out, rc = -ENOMEM);
+ }
+ memcpy(name, lcfg->lcfg_inlbuf1, lcfg->lcfg_inllen1);
+
+ OBD_ALLOC(fstype, lcfg->lcfg_inllen2 + 1);
+ if (!fstype) {
+ CERROR("No Memory\n");
+ GOTO(out, rc = -ENOMEM);
+ }
+ memcpy(fstype, lcfg->lcfg_inlbuf2, lcfg->lcfg_inllen2);
+
+ obd->obd_fsops = fsfilt_get_ops(fstype);
+ if (IS_ERR(obd->obd_fsops)) {
+ CERROR("No fstype %s rc=%ld\n", fstype, PTR_ERR(obd->obd_fsops));
+ GOTO(err_ops, rc = PTR_ERR(obd->obd_fsops));
+ }
+
+ if (lcfg->lcfg_inllen3 >= 1 && lcfg->lcfg_inlbuf3) {
+ OBD_ALLOC(mountoption, lcfg->lcfg_inllen3 + 1);
+ if (!mountoption) {
+ CERROR("No Memory\n");
+ GOTO(err_ops, rc = -ENOMEM);
+ }
+ memcpy(mountoption, lcfg->lcfg_inlbuf3, lcfg->lcfg_inllen3);
+ }
+ rc = lvfs_mount_fs(name, fstype, mountoption, 0, &lvfs_ctxt);
+ if (rc)
+ GOTO(err_ops, rc);
+ LASSERT(lvfs_ctxt);
+
+ confobd->cfobd_lvfs_ctxt = lvfs_ctxt;
+
+ rc = confobd_fs_setup(obd, lvfs_ctxt);
+ if (rc)
+ GOTO(err_ops, rc);
+
+ rc = obd_llog_setup(obd, &obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT,
+ obd, 0, NULL, &llog_lvfs_ops);
+ if (rc)
+ GOTO(err_ops, rc);
+
+ EXIT;
+out:
+ if (rc && lvfs_ctxt)
+ lvfs_umount_fs(lvfs_ctxt);
+ if (name)
+ OBD_FREE(name, lcfg->lcfg_inllen1 + 1);
+ if (fstype)
+ OBD_FREE(fstype, lcfg->lcfg_inllen2 + 1);
+ if (mountoption)
+ OBD_FREE(mountoption, lcfg->lcfg_inllen3 + 1);
+
+ return rc;
+err_ops:
+ fsfilt_put_ops(obd->obd_fsops);
+ goto out;
+}
+
+static int confobd_cleanup(struct obd_device *obd, int flags)
+{
+ struct conf_obd *confobd = &obd->u.conf;
+ ENTRY;
+
+ /* stop recording any log in case lconf didn't do that for us */
+ if (confobd->cfobd_cfg_llh) {
+ struct lvfs_run_ctxt saved;
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ llog_close(confobd->cfobd_cfg_llh);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ }
+
+ obd_llog_cleanup(llog_get_context(&obd->obd_llogs,
+ LLOG_CONFIG_ORIG_CTXT));
+ confobd_fs_cleanup(obd, flags);
+ if (confobd->cfobd_lvfs_ctxt)
+ lvfs_umount_fs(confobd->cfobd_lvfs_ctxt);
+
+ if (!list_empty(&obd->obd_exports))
+ return (-EBUSY);
+ fsfilt_put_ops(obd->obd_fsops);
+ RETURN(0);
+}
+
+static int confobd_iocontrol(unsigned int cmd, struct obd_export *exp,
+ int len, void *karg, void *uarg)
+{
+ static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" };
+ struct obd_device *obd = exp->exp_obd;
+ struct conf_obd *confobd = &obd->u.conf;
+ struct obd_ioctl_data *data = karg;
+ struct lvfs_run_ctxt saved;
+ int rc = 0;
+ ENTRY;
+
+ CDEBUG(D_INFO, "ioctl cmd %x\n", cmd);
+ switch (cmd) {
+ case OBD_IOC_CLEAR_LOG: {
+ char *name = data->ioc_inlbuf1;
+ if (confobd->cfobd_cfg_llh)
+ RETURN(-EBUSY);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_open(llog_get_context(&obd->obd_llogs,
+ LLOG_CONFIG_ORIG_CTXT),
+ &confobd->cfobd_cfg_llh, NULL, name,
+ OBD_LLOG_FL_CREATE);
+ if (rc == 0) {
+ llog_init_handle(confobd->cfobd_cfg_llh,
+ LLOG_F_IS_PLAIN, NULL);
+
+ rc = llog_destroy(confobd->cfobd_cfg_llh);
+ llog_free_handle(confobd->cfobd_cfg_llh);
+ }
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ confobd->cfobd_cfg_llh = NULL;
+ RETURN(rc);
+ }
+ case OBD_IOC_RECORD: {
+ char *name = data->ioc_inlbuf1;
+ if (confobd->cfobd_cfg_llh)
+ RETURN(-EBUSY);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_open(llog_get_context(&obd->obd_llogs,
+ LLOG_CONFIG_ORIG_CTXT),
+ &confobd->cfobd_cfg_llh, NULL, name,
+ OBD_LLOG_FL_CREATE);
+ if (rc == 0)
+ llog_init_handle(confobd->cfobd_cfg_llh,
+ LLOG_F_IS_PLAIN, &cfg_uuid);
+ else
+ confobd->cfobd_cfg_llh = NULL;
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ RETURN(rc);
+ }
+ case OBD_IOC_ENDRECORD: {
+ if (!confobd->cfobd_cfg_llh)
+ RETURN(-EBADF);
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_close(confobd->cfobd_cfg_llh);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ confobd->cfobd_cfg_llh = NULL;
+ RETURN(rc);
+ }
+ case OBD_IOC_DORECORD: {
+ char *cfg_buf;
+ struct llog_rec_hdr rec;
+ if (!confobd->cfobd_cfg_llh)
+ RETURN(-EBADF);
+
+ rec.lrh_len = llog_data_len(data->ioc_plen1);
+
+ switch(data->ioc_type) {
+ case LUSTRE_CFG_TYPE:
+ rec.lrh_type = OBD_CFG_REC;
+ break;
+ case PORTALS_CFG_TYPE:
+ rec.lrh_type = PTL_CFG_REC;
+ break;
+ default:
+ CERROR("unknown cfg record type:%d \n", data->ioc_type);
+ RETURN(-EINVAL);
+ }
+
+ OBD_ALLOC(cfg_buf, data->ioc_plen1);
+ if (cfg_buf == NULL) {
+ CERROR("No Memory\n");
+ RETURN(-ENOMEM);
+ }
+ if (copy_from_user(cfg_buf, data->ioc_pbuf1, data->ioc_plen1)) {
+ OBD_FREE(cfg_buf, data->ioc_plen1);
+ RETURN(-EFAULT);
+ }
+
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = llog_write_rec(confobd->cfobd_cfg_llh, &rec, NULL, 0,
+ cfg_buf, -1);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+ OBD_FREE(cfg_buf, data->ioc_plen1);
+ RETURN(rc);
+ }
+ case OBD_IOC_DUMP_LOG: {
+ struct llog_ctxt *ctxt =
+ llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT);
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL);
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ if (rc)
+ RETURN(rc);
+
+ RETURN(rc);
+ }
+
+ case OBD_IOC_START: {
+ struct llog_ctxt *ctxt;
+ char *conf_prof;
+ char *name = data->ioc_inlbuf1;
+ int len = strlen(name) + sizeof("-conf");
+
+ OBD_ALLOC(conf_prof, len);
+ if (!conf_prof) {
+ CERROR("no memory\n");
+ RETURN(-ENOMEM);
+ }
+ sprintf(conf_prof, "%s-conf", name);
+
+ ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT);
+ rc = class_config_process_llog(ctxt, conf_prof, NULL);
+ if (rc < 0)
+ CERROR("Unable to process log: %s\n", conf_prof);
+ OBD_FREE(conf_prof, len);
+
+ RETURN(rc);
+ }
+
+ default:
+ CDEBUG(D_INFO, "unknown command %x\n", cmd);
+ RETURN(-EINVAL);
+ }
+ RETURN(0);
+}
+
+static struct obd_ops conf_obd_ops = {
+ .o_owner = THIS_MODULE,
+ .o_setup = confobd_setup,
+ .o_cleanup = confobd_cleanup,
+ .o_attach = confobd_attach,
+ .o_detach = confobd_detach,
+ .o_iocontrol = confobd_iocontrol,
+};
+
+static int __init confobd_init(void)
+{
+ struct lprocfs_static_vars lvars;
+ ENTRY;
+
+ lprocfs_init_vars(confobd, &lvars);
+ RETURN(class_register_type(&conf_obd_ops, NULL, lvars.module_vars,
+ LUSTRE_CONFOBD_NAME));
+}
+
+static void __exit confobd_exit(void)
+{
+ class_unregister_type(LUSTRE_CONFOBD_NAME);
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre Config OBD driver");
+MODULE_LICENSE("GPL");
+
+module_init(confobd_init);
+module_exit(confobd_exit);
int minor = obd->obd_minor;
spin_lock(&obd_dev_lock);
- memset(obd, 0, sizeof(*obd));
+ obd->obd_type = NULL;
+ //memset(obd, 0, sizeof(*obd));
obd->obd_minor = minor;
spin_unlock(&obd_dev_lock);
}
{
atomic_inc(&oig->oig_refcount);
}
+
void oig_release(struct obd_io_group *oig)
{
if (atomic_dec_and_test(&oig->oig_refcount))
case OBD_IOC_LLOG_PRINT: {
LASSERT(data->ioc_inllen1);
- err = llog_process(handle, llog_print_cb, data, NULL);
+ err = llog_process(handle, class_config_dump_handler,data,NULL);
if (err == -LLOG_EEMPTY)
err = 0;
+ else
+ err = llog_process(handle, llog_print_cb, data, NULL);
GOTO(out_close, err);
}
#include <libcfs/list.h>
/* helper functions for calling the llog obd methods */
-
int obd_llog_setup(struct obd_device *obd, struct obd_llogs *llogs,
int index, struct obd_device *disk_obd, int count,
struct llog_logid *logid, struct llog_operations *op)
{
- int rc = 0;
struct llog_ctxt *ctxt;
+ int rc = 0;
ENTRY;
LASSERT(llogs);
struct mds_obd *mds = &disk_obd->u.mds;
ctxt->loc_objects_dir = mds->mds_objects_dir;
ctxt->loc_logs_dir = mds->mds_logs_dir;
+ } else if (!strcmp(disk_obd->obd_type->typ_name, "confobd")) {
+ struct conf_obd *confobd = &disk_obd->u.conf;
+ ctxt->loc_objects_dir = confobd->cfobd_objects_dir;
+ ctxt->loc_logs_dir = confobd->cfobd_logs_dir;
}
}
LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate);
kernbuf[count] = '\0';
- *val = simple_strtoull(kernbuf, &end, 0);
+ if (kernbuf[0] == '-')
+ *val = -simple_strtoull(kernbuf + 1, &end, 0);
+ else
+ *val = simple_strtoull(kernbuf, &end, 0);
if (kernbuf == end)
return -EINVAL;
return -ENOMEM;
}
- CDEBUG(D_INFO, "add uuid %s "LPX64" %u\n", uuid, nid, nal);
+ CDEBUG(D_INFO, "add uuid %s "LPX64" %x\n", uuid, nid, nal);
memcpy(data->uuid, uuid, nob);
data->nid = nid;
data->nal = nal;
spin_unlock (&g_uuid_lock);
- if (list_empty (&deathrow))
+ if (list_empty (&deathrow)) {
+ if (uuid)
+ CERROR("del non-existed uuid %s\n", uuid);
return -EINVAL;
+ }
do {
data = list_entry(deathrow.next, struct uuid_nid_data, head);
class_release_dev(obd);
case 1:
class_put_type(type);
- obd->obd_type = NULL;
}
return rc;
}
}
case LCFG_ADD_UUID: {
CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64
- " (%s), nal %d\n", lcfg->lcfg_inlbuf1, lcfg->lcfg_nid,
+ " (%s), nal %x\n", lcfg->lcfg_inlbuf1, lcfg->lcfg_nid,
portals_nid2str(lcfg->lcfg_nal, lcfg->lcfg_nid, str),
lcfg->lcfg_nal);
}
}
out:
- RETURN(err);
+ return err;
}
static int class_config_parse_handler(struct llog_handle * handle,
RETURN(rc);
}
-static int class_config_dump_handler(struct llog_handle * handle,
- struct llog_rec_hdr *rec, void *data)
+int class_config_dump_handler(struct llog_handle * handle,
+ struct llog_rec_hdr *rec, void *data)
{
int cfg_len = rec->lrh_len;
char *cfg_buf = (char*) (rec + 1);
CDEBUG(D_INFO, "pcfg command: 0x%x\n", pcfg->pcfg_command);
if (pcfg->pcfg_nal)
- CDEBUG(D_INFO, " nal: %d\n",
+ CDEBUG(D_INFO, " nal: %x\n",
pcfg->pcfg_nal);
if (pcfg->pcfg_gw_nal)
- CDEBUG(D_INFO, " gw_nal: %d\n",
+ CDEBUG(D_INFO, " gw_nal: %x\n",
pcfg->pcfg_gw_nal);
if (pcfg->pcfg_nid)
CDEBUG(D_INFO, " nid: "LPX64"\n",
#include <linux/iobuf.h>
#endif
#include <asm/div64.h>
+#include <linux/smp_lock.h>
#else
#include <liblustre.h>
#endif
static struct ec_object *
echo_find_object_locked (struct obd_device *obd, obd_id id)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct ec_object *eco = NULL;
struct list_head *el;
echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm,
void *ulsm, int ulsm_nob)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
int nob;
if (ulsm_nob < sizeof (*lsm))
static struct ec_object *
echo_allocate_object (struct obd_device *obd)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct ec_object *eco;
int rc;
echo_free_object (struct ec_object *eco)
{
struct obd_device *obd = eco->eco_device;
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
LASSERT (eco->eco_refcount == 0);
obd_free_memmd(ec->ec_exp, &eco->eco_lsm);
struct obdo *oa, void *ulsm, int ulsm_nob,
struct obd_trans_info *oti)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct ec_object *eco2;
struct ec_object *eco;
struct lov_stripe_md *lsm;
echo_get_object (struct ec_object **ecop, struct obd_device *obd,
struct obdo *oa)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct ec_object *eco;
struct ec_object *eco2;
int rc;
echo_put_object (struct ec_object *eco)
{
struct obd_device *obd = eco->eco_device;
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
/* Release caller's ref on the object.
* delete => mark for deletion when last ref goes
struct lov_stripe_md *lsm, obd_off offset,
obd_size count, struct obd_trans_info *oti)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
obd_count npages;
struct brw_page *pga;
struct brw_page *pgp;
obd_off offset, obd_size count, char *buffer,
struct obd_trans_info *oti)
{
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
obd_count npages;
struct brw_page *pga;
struct brw_page *pgp;
struct list_head eap_item;
};
+#define EAP_FROM_COOKIE(c) \
+ (LASSERT(((struct echo_async_page *)(c))->eap_magic == EAP_MAGIC), \
+ (struct echo_async_page *)(c))
+
struct echo_async_state {
spinlock_t eas_lock;
obd_off eas_next_offset;
return rc;
};
-struct echo_async_page *eap_from_cookie(void *cookie)
-{
- struct echo_async_page *eap = cookie;
- if (eap->eap_magic != EAP_MAGIC)
- return ERR_PTR(-EINVAL);
- return eap;
-};
-
static int ec_ap_make_ready(void *data, int cmd)
{
/* our pages are issued ready */
}
static void ec_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
{
- struct echo_async_page *eap;
- eap = eap_from_cookie(data);
- if (IS_ERR(eap))
- return;
+ struct echo_async_page *eap = EAP_FROM_COOKIE(data);
memcpy(oa, &eap->eap_eas->eas_oa, sizeof(*oa));
}
static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
{
- struct echo_async_page *eap = eap_from_cookie(data);
+ struct echo_async_page *eap = EAP_FROM_COOKIE(data);
struct echo_async_state *eas;
unsigned long flags;
- if (IS_ERR(eap))
- return;
eas = eap->eap_eas;
if (cmd == OBD_BRW_READ &&
struct obd_ioctl_data *data)
{
struct obd_device *obd = class_exp2obd(exp);
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct obd_trans_info dummy_oti;
struct ec_object *eco;
int rc;
void *data, int flag)
{
struct ec_object *eco = (struct ec_object *)data;
- struct echo_client_obd *ec = &(eco->eco_device->u.echo_client);
+ struct echo_client_obd *ec = &(eco->eco_device->u.echocli);
struct lustre_handle lockh;
struct list_head *el;
int found = 0;
int mode, obd_off offset, obd_size nob)
{
struct obd_device *obd = exp->exp_obd;
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct lustre_handle *ulh = obdo_handle (oa);
struct ec_object *eco;
struct ec_lock *ecl;
echo_client_cancel(struct obd_export *exp, struct obdo *oa)
{
struct obd_device *obd = exp->exp_obd;
- struct echo_client_obd *ec = &obd->u.echo_client;
+ struct echo_client_obd *ec = &obd->u.echocli;
struct lustre_handle *ulh = obdo_handle (oa);
struct ec_lock *ecl = NULL;
int found = 0;
int i;
ENTRY;
+ unlock_kernel();
+
memset(&dummy_oti, 0, sizeof(dummy_oti));
obd = exp->exp_obd;
- ec = &obd->u.echo_client;
+ ec = &obd->u.echocli;
switch (cmd) {
case OBD_IOC_CREATE: /* may create echo object */
ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
}
+ lock_kernel();
+
return rc;
}
echo_client_setup(struct obd_device *obddev, obd_count len, void *buf)
{
struct lustre_cfg* lcfg = buf;
- struct echo_client_obd *ec = &obddev->u.echo_client;
+ struct echo_client_obd *ec = &obddev->u.echocli;
struct obd_device *tgt;
struct lustre_handle conn = {0, };
struct obd_uuid echo_uuid = { "ECHO_UUID" };
INIT_LIST_HEAD (&ec->ec_objects);
ec->ec_unique = 0;
- rc = obd_connect(&conn, tgt, &echo_uuid, 0);
+ rc = obd_connect(&conn, tgt, &echo_uuid, FILTER_GROUP_ECHO);
if (rc) {
CERROR("fail to connect to device %s\n", lcfg->lcfg_inlbuf1);
return (rc);
{
struct list_head *el;
struct ec_object *eco;
- struct echo_client_obd *ec = &obddev->u.echo_client;
+ struct echo_client_obd *ec = &obddev->u.echocli;
int rc;
ENTRY;
GOTO(out, rc = -EINVAL);
obd = exp->exp_obd;
- ec = &obd->u.echo_client;
+ ec = &obd->u.echocli;
/* no more contention on export's lock list */
while (!list_empty (&exp->exp_ec_data.eced_locks)) {
{
OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd));
filter->fo_fsd = NULL;
- OBD_FREE(filter->fo_last_rcvd_slots,
- FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long));
+ OBD_FREE(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8);
filter->fo_last_rcvd_slots = NULL;
return 0;
}
RETURN(-ENOMEM);
filter->fo_fsd = fsd;
- OBD_ALLOC(filter->fo_last_rcvd_slots,
- FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long));
+ OBD_ALLOC(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8);
if (filter->fo_last_rcvd_slots == NULL) {
OBD_FREE(fsd, sizeof(*fsd));
RETURN(-ENOMEM);
LASSERT(dparent->d_inode);
*lock = filter_lock_dentry(obd, dparent, objid);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow parent lock %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "parent lock");
return dparent;
}
len = sprintf(name, LPU64, id);
if (dir_dentry == NULL) {
dparent = filter_parent_lock(obd, group, id, &lock);
- if (IS_ERR(dparent))
+ if (IS_ERR(dparent)) {
+ CERROR("%s: error getting object "LPU64":"LPU64
+ " parent: rc %ld\n", obd->obd_name,
+ id, group, PTR_ERR(dparent));
RETURN(dparent);
+ }
}
- CDEBUG(D_INODE, "looking up object O/%*s/%s\n",
+ CDEBUG(D_INODE, "looking up object O/%.*s/%s\n",
dparent->d_name.len, dparent->d_name.name, name);
dchild = /*ll_*/lookup_one_len(name, dparent, len);
if (dir_dentry == NULL)
ENTRY;
if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) {
- CERROR("destroying objid %*s nlink = %lu, count = %d\n",
+ CERROR("destroying objid %.*s nlink = %lu, count = %d\n",
dchild->d_name.len, dchild->d_name.name,
(unsigned long)inode->i_nlink,
atomic_read(&inode->i_count));
rc = vfs_unlink(dparent->d_inode, dchild);
if (rc)
- CERROR("error unlinking objid %*s: rc %d\n",
+ CERROR("error unlinking objid %.*s: rc %d\n",
dchild->d_name.len, dchild->d_name.name, rc);
RETURN(rc);
{
struct lustre_cfg* lcfg = buf;
struct filter_obd *filter = &obd->u.filter;
+ struct lvfs_obd_ctxt *lvfs_ctxt = NULL;
struct vfsmount *mnt;
char ns_name[48];
int rc = 0, i;
if (IS_ERR(obd->obd_fsops))
RETURN(PTR_ERR(obd->obd_fsops));
- mnt = do_kern_mount(lcfg->lcfg_inlbuf2, MS_NOATIME | MS_NODIRATIME,
- lcfg->lcfg_inlbuf1, option);
- rc = PTR_ERR(mnt);
- if (IS_ERR(mnt))
+ rc = lvfs_mount_fs(lcfg->lcfg_inlbuf1, lcfg->lcfg_inlbuf2,
+ option, MS_NOATIME | MS_NODIRATIME, &lvfs_ctxt);
+ if (rc) {
+ CERROR("lvfs_mount_fs failed: rc = %d\n", rc);
GOTO(err_ops, rc);
+ }
+ LASSERT(lvfs_ctxt);
+
+ mnt = lvfs_ctxt->loc_mnt;
+ filter->fo_lvfs_ctxt = lvfs_ctxt;
if (lcfg->lcfg_inllen3 > 0 && lcfg->lcfg_inlbuf3) {
if (*lcfg->lcfg_inlbuf3 == 'f') {
filter_post(obd);
err_mntput:
unlock_kernel();
- mntput(mnt);
+ lvfs_umount_fs(filter->fo_lvfs_ctxt);
filter->fo_sb = 0;
lock_kernel();
err_ops:
shrink_dcache_parent(filter->fo_sb->s_root);
filter->fo_sb = 0;
- if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1)
- CERROR("%s: mount point %p busy, mnt_count: %d\n",
- obd->obd_name, filter->fo_vfsmnt,
- atomic_read(&filter->fo_vfsmnt->mnt_count));
-
unlock_kernel();
- mntput(filter->fo_vfsmnt);
+ lvfs_umount_fs(filter->fo_lvfs_ctxt);
//destroy_buffers(filter->fo_sb->s_dev);
filter->fo_sb = NULL;
fsfilt_put_ops(obd->obd_fsops);
obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize;
obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0;
obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted;
+ int level = D_CACHE;
if (list_empty(&obd->obd_exports))
return;
spin_lock(&obd->obd_dev_lock);
list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) {
fed = &exp->exp_filter_data;
- LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
- "cli %s/%p %lu+%lu > "LPU64"\n",
- exp->exp_client_uuid.uuid, exp,
- fed->fed_grant, fed->fed_pending, maxsize);
- LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64"\n",
- exp->exp_client_uuid.uuid, exp,fed->fed_dirty,maxsize);
- CDEBUG(D_CACHE,"%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
+ if (fed->fed_grant < 0 || fed->fed_pending < 0 ||
+ fed->fed_dirty < 0)
+ level = D_ERROR;
+ if (maxsize > 0) { /* we may not have done a statfs yet */
+ LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize,
+ "cli %s/%p %ld+%ld > "LPU64"\n",
+ exp->exp_client_uuid.uuid, exp,
+ fed->fed_grant, fed->fed_pending, maxsize);
+ LASSERTF(fed->fed_dirty <= maxsize,
+ "cli %s/%p %ld > "LPU64"\n",
+ exp->exp_client_uuid.uuid, exp,
+ fed->fed_dirty, maxsize);
+ }
+ CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
tot_granted += fed->fed_grant + fed->fed_pending;
struct obd_device *obd = exp->exp_obd;
struct filter_obd *filter = &obd->u.filter;
struct filter_export_data *fed = &exp->exp_filter_data;
+ int level = D_CACHE;
spin_lock(&obd->obd_osfs_lock);
spin_lock(&exp->exp_obd->obd_dev_lock);
list_del_init(&exp->exp_obd_chain);
spin_unlock(&exp->exp_obd->obd_dev_lock);
- CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n",
+ if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0)
+ level = D_ERROR;
+ CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n",
obd->obd_name, exp->exp_client_uuid.uuid, exp,
fed->fed_dirty, fed->fed_pending, fed->fed_grant);
LASSERTF(filter->fo_tot_granted >= fed->fed_grant,
- "%s: tot_granted "LPU64" cli %s/%p fed_grant %lu\n",
+ "%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n",
obd->obd_name, filter->fo_tot_granted,
exp->exp_client_uuid.uuid, exp, fed->fed_grant);
filter->fo_tot_granted -= fed->fed_grant;
- LASSERTF(exp->exp_obd->u.filter.fo_tot_pending >= fed->fed_pending,
- "%s: tot_pending "LPU64" cli %s/%p fed_pending %lu\n",
+ LASSERTF(filter->fo_tot_pending >= fed->fed_pending,
+ "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n",
obd->obd_name, filter->fo_tot_pending,
exp->exp_client_uuid.uuid, exp, fed->fed_pending);
LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty,
- "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %lu\n",
+ "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n",
obd->obd_name, filter->fo_tot_dirty,
exp->exp_client_uuid.uuid, exp, fed->fed_dirty);
filter->fo_tot_dirty -= fed->fed_dirty;
/* flush any remaining cancel messages out to the target */
filter_sync_llogs(obd, exp);
-
class_export_put(exp);
RETURN(rc);
}
dchild = filter_id2dentry(obd, NULL, group, oa->o_id);
if (IS_ERR(dchild)) {
- CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id);
+ CERROR("%s error looking up object: "LPU64"\n",
+ what, oa->o_id);
RETURN(dchild);
}
else
rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
rc = filter_finish_transno(exp, oti, rc);
- rc2 = fsfilt_commit(exp->exp_obd, filter->fo_sb, dentry->d_inode, handle, 0);
+ rc2 = fsfilt_commit(exp->exp_obd, filter->fo_sb, dentry->d_inode,
+ handle, exp->exp_sync);
if (rc2) {
CERROR("error on commit, err = %d\n", rc2);
if (!rc)
0, 0);
ldlm_resource_putref(res);
} else if (iattr.ia_valid & ATTR_SIZE) {
- CERROR("!!! resource_get failed for object "LPU64" -- "
- "filter_setattr with no lock?\n", oa->o_id);
+ /* called from MDS. */
}
oa->o_valid = OBD_MD_FLID;
(oa->o_flags & OBD_FL_DELORPHAN)) {
if (diff >= 0)
RETURN(diff);
- if (-diff > 10000) { /* XXX make this smarter */
+ if (-diff > OST_MAX_PRECREATE) {
CERROR("ignoring bogus orphan destroy request: obdid "
LPU64" last_id "LPU64"\n",
oa->o_id, filter_last_id(filter, oa->o_gr));
(/*group != 0 ||*/ oa->o_id == 0))
RETURN(1);
- LASSERT(diff >= 0);
+ LASSERTF(diff >= 0, LPU64" - "LPU64" = %d\n", oa->o_id,
+ filter_last_id(filter, oa->o_gr), diff);
RETURN(diff);
}
}
RETURN(rc);
}
+static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+ unsigned long max_age)
+{
+ struct filter_obd *filter = &obd->u.filter;
+ int blockbits = filter->fo_sb->s_blocksize_bits;
+ int rc;
+ ENTRY;
+
+ /* at least try to account for cached pages. its still racey and
+ * might be under-reporting if clients haven't announced their
+ * caches with brw recently */
+ spin_lock(&obd->obd_osfs_lock);
+ rc = fsfilt_statfs(obd, filter->fo_sb, max_age);
+ memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+ spin_unlock(&obd->obd_osfs_lock);
+
+ CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64
+ " pending "LPU64" free "LPU64" avail "LPU64"\n",
+ filter->fo_tot_dirty, filter->fo_tot_granted,
+ filter->fo_tot_pending,
+ osfs->os_bfree << blockbits, osfs->os_bavail << blockbits);
+
+ filter_grant_sanity_check(obd, __FUNCTION__);
+
+ osfs->os_bavail -= min(osfs->os_bavail,
+ (filter->fo_tot_dirty + filter->fo_tot_pending +
+ osfs->os_bsize -1) >> blockbits);
+
+ RETURN(rc);
+}
+
/* We rely on the fact that only one thread will be creating files in a given
* group at a time, which is why we don't need an atomic filter_get_new_id.
* Even if we had that atomic function, the following race would exist:
struct filter_obd *filter;
void *handle = NULL;
void *lock = NULL;
+ struct obd_statfs *osfs;
+ unsigned long enough_time = jiffies + (obd_timeout * HZ) / 3;
__u64 next_id;
ENTRY;
if ((oa->o_valid & OBD_MD_FLFLAGS) &&
(oa->o_flags & OBD_FL_RECREATE_OBJS)) {
recreate_obj = 1;
+ } else {
+ OBD_ALLOC(osfs, sizeof(*osfs));
+ if (osfs == NULL)
+ RETURN(-ENOMEM);
+ rc = filter_statfs(obd, osfs, jiffies-HZ);
+ if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
+ CDEBUG(D_HA, "OST out of space! avail "LPU64"\n",
+ osfs->os_bavail<<filter->fo_sb->s_blocksize_bits);
+ *num = 0;
+ rc = -ENOSPC;
+ }
+ OBD_FREE(osfs, sizeof(*osfs));
+ if (rc) {
+ RETURN(rc);
+ }
}
CDEBUG(D_HA, "%s: precreating %d objects\n", obd->obd_name, *num);
* already exists
*/
if (recreate_obj) {
- CERROR("%s: Serious error: recreating obj %*s "
- "but obj already exists \n",
+ CERROR("%s: recreating existing object %.*s?\n",
obd->obd_name, dchild->d_name.len,
dchild->d_name.name);
- LBUG();
} else {
- CERROR("%s: Serious error: objid %*s already "
+ CERROR("%s: Serious error: objid %.*s already "
"exists; is this filesystem corrupt?\n",
obd->obd_name, dchild->d_name.len,
dchild->d_name.name);
if (rc)
break;
+ if (time_after(jiffies, enough_time)) {
+ CDEBUG(D_INODE,"%s: precreate slow - want %d got %d \n",
+ obd->obd_name, *num, i);
+ break;
+ }
}
*num = i;
}
if ((oa->o_valid & OBD_MD_FLFLAGS) &&
- (oa->o_flags & OBD_FL_RECREATE_OBJS)) {
+ (oa->o_flags & OBD_FL_RECREATE_OBJS))
recreate_objs = 1;
- }
obd = exp->exp_obd;
fed = &exp->exp_filter_data;
if (diff > 0) {
oa->o_id = filter_last_id(&obd->u.filter, group);
rc = filter_precreate(obd, oa, group, &diff);
- oa->o_id += diff;
+ oa->o_id = filter_last_id(&obd->u.filter, oa->o_gr);
oa->o_valid = OBD_MD_FLID;
}
}
dchild = filter_id2dentry(obd, dparent, oa->o_gr, oa->o_id);
if (IS_ERR(dchild))
- GOTO(cleanup, rc = -ENOENT);
+ GOTO(cleanup, rc = PTR_ERR(dchild));
cleanup_phase = 2;
if (dchild->d_inode == NULL) {
}
rc = filter_finish_transno(exp, oti, rc);
rc2 = fsfilt_commit(obd, filter->fo_sb, dparent->d_inode,
- handle, 0);
+ handle, exp->exp_sync);
if (rc2) {
CERROR("error on commit, err = %d\n", rc2);
if (!rc)
RETURN(rc);
}
-static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
- unsigned long max_age)
-{
- struct filter_obd *filter = &obd->u.filter;
- int blockbits = filter->fo_sb->s_blocksize_bits;
- int rc;
- ENTRY;
-
- /* at least try to account for cached pages. its still racey and
- * might be under-reporting if clients haven't announced their
- * caches with brw recently */
- spin_lock(&obd->obd_osfs_lock);
- rc = fsfilt_statfs(obd, filter->fo_sb, max_age);
- memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
- spin_unlock(&obd->obd_osfs_lock);
-
- CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64
- " pending "LPU64" free "LPU64" avail "LPU64"\n",
- filter->fo_tot_dirty, filter->fo_tot_granted,
- filter->fo_tot_pending,
- osfs->os_bfree << blockbits, osfs->os_bavail << blockbits);
-
- filter_grant_sanity_check(obd, __FUNCTION__);
-
- osfs->os_bavail -= min(osfs->os_bavail,
- (filter->fo_tot_dirty + filter->fo_tot_pending +
- osfs->os_bsize -1) >> blockbits);
-
- RETURN(rc);
-}
-
static int filter_get_info(struct obd_export *exp, __u32 keylen,
void *key, __u32 *vallen, void *val)
{
lprocfs_init_vars(filter, &lvars);
+ OBD_ALLOC(obdfilter_created_scratchpad,
+ OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
+ sizeof(*obdfilter_created_scratchpad));
+ if (obdfilter_created_scratchpad == NULL) {
+ CERROR ("Can't allocate scratchpad\n");
+ return -ENOMEM;
+ }
+
rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars,
OBD_FILTER_DEVICENAME);
- if (rc)
+ if (rc) {
+ OBD_FREE(obdfilter_created_scratchpad,
+ OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
+ sizeof(*obdfilter_created_scratchpad));
return rc;
+ }
rc = class_register_type(&filter_sanobd_ops, NULL, lvars.module_vars,
OBD_FILTER_SAN_DEVICENAME);
- if (rc)
+ if (rc) {
class_unregister_type(OBD_FILTER_DEVICENAME);
+ OBD_FREE(obdfilter_created_scratchpad,
+ OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
+ sizeof(*obdfilter_created_scratchpad));
+ }
return rc;
}
{
class_unregister_type(OBD_FILTER_SAN_DEVICENAME);
class_unregister_type(OBD_FILTER_DEVICENAME);
+ OBD_FREE(obdfilter_created_scratchpad,
+ OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
+ sizeof(*obdfilter_created_scratchpad));
}
MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8)
-#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long))
#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
#define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */
+/* We have to pass a 'created' array to fsfilt_map_inode_pages() which we
+ * then ignore. So we pre-allocate one that everyone can use... */
+#define OBDFILTER_CREATED_SCRATCHPAD_ENTRIES 1024
+extern int *obdfilter_created_scratchpad;
+
/* filter.c */
void f_dput(struct dentry *);
struct dentry *filter_id2dentry(struct obd_device *, struct dentry *dir,
#include <linux/lustre_snap.h>
#include "filter_internal.h"
+int *obdfilter_created_scratchpad;
+
static int filter_alloc_dio_page(struct obd_device *obd, struct inode *inode,
struct niobuf_local *lnb)
return grant;
}
-
static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa,
int objcount, struct obd_ioobj *obj,
int niocount, struct niobuf_remote *nb,
struct niobuf_local *lnb;
struct dentry *dentry = NULL;
struct inode *inode;
- void *iobuf = NULL;
+ void *iobuf = NULL;
int rc = 0, i, tot_bytes = 0;
unsigned long now = jiffies;
ENTRY;
filter_grant_incoming(exp, oa);
oa->o_grant = 0;
-
spin_unlock(&obd->obd_osfs_lock);
}
inode = dentry->d_inode;
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow preprw_read setup %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "preprw_read setup: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "preprw_read setup");
for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt;
i++, rnb++, lnb++) {
filter_iobuf_add_page(obd, iobuf, inode, lnb->page);
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow start_page_read %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "start_page_read: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "start_page_read");
rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp,
NULL, NULL, NULL);
fso.fso_dentry = dentry;
fso.fso_bufcnt = obj->ioo_bufcnt;
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow preprw_write setup %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "preprw_write setup: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "preprw_write setup");
spin_lock(&exp->exp_obd->obd_osfs_lock);
if (oa)
if (oa && oa->o_valid & OBD_MD_FLGRANT)
oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left);
+ /* We're finishing using body->oa as an input variable, so reset
+ * o_valid here. */
+ oa->o_valid = 0;
+
spin_unlock(&exp->exp_obd->obd_osfs_lock);
if (rc)
rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp,
NULL, NULL, NULL);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow start_page_write %lus\n", (jiffies - now) / HZ);
- else
- CDEBUG(D_INFO, "start_page_write: %lu jiffies\n",
- (jiffies - now));
+ fsfilt_check_slow(now, obd_timeout, "start_page_write");
lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES,
tot_bytes);
{
struct obd_device *obd = exp->exp_obd;
struct inode *inode = dchild->d_inode;
- struct kiobuf *iobuf = buf;
+ struct kiobuf *iobuf = buf;
int rc, create = (rw == OBD_BRW_WRITE), *created = NULL, committed = 0;
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0;
struct semaphore *sem = NULL;
if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
GOTO(cleanup, rc = -EINVAL);
- OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
- if (created == NULL)
- GOTO(cleanup, rc = -ENOMEM);
+ if (iobuf->nr_pages * blocks_per_page >
+ OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)
+ GOTO(cleanup, rc = -EINVAL);
+
cleanup_phase = 1;
rc = lock_kiovec(1, &iobuf, 1);
}
rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist,
- iobuf->nr_pages, iobuf->blocks, created,
- create, sem);
+ iobuf->nr_pages, iobuf->blocks,
+ obdfilter_created_scratchpad, create, sem);
if (rc)
GOTO(cleanup, rc);
case 2:
unlock_kiovec(1, &iobuf);
case 1:
- OBD_FREE(created, sizeof(*created) *
- iobuf->nr_pages*blocks_per_page);
case 0:
if (cleanup_phase != 3 && rw == OBD_BRW_WRITE)
up(&inode->i_sem);
return 1;
}
-
/* some kernels require alloc_kiovec callers to zero members through the use of
* map_user_kiobuf and unmap_.. we don't use those, so we have a little helper
* that makes sure we don't break the rules. */
/* If overwriting an existing block, we don't need a grant */
if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
- filter_range_is_mapped(inode, lnb->offset, lnb->len))
+ filter_range_is_mapped(inode, lnb->offset, lnb->len))
lnb->rc = 0;
if (lnb->rc) /* ENOSPC, network RPC error */
continue;
filter_iobuf_add_page(obd, iobuf, inode, lnb->page);
+
/* We expect these pages to be in offset order, but we'll
* be forgiving */
this_size = lnb->offset + lnb->len;
GOTO(cleanup, rc);
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "brw_start");
iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
/* filter_direct_io drops i_sem */
if (rc == 0)
obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "direct_io");
err = fsfilt_commit_wait(obd, inode, wait_handle);
if (err)
rc = err;
- if (obd_sync_filter)
+ if (obd_sync_filter && !err)
LASSERT(oti->oti_transno <= obd->obd_last_committed);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "commitrw commit");
cleanup:
filter_grant_commit(exp, niocount, res);
/* 512byte block min */
#define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
struct dio_request {
- atomic_t numreqs; /* number of reqs being processed */
- struct bio *bio_current;/* bio currently being constructed */
- struct bio *bio_list; /* list of completed bios */
+ atomic_t dr_numreqs; /* number of reqs being processed */
+ struct bio *dr_bios; /* list of completed bios */
wait_queue_head_t dr_wait;
- int dr_num_pages;
- int dr_rw;
- int dr_error;
- int dr_created[MAX_BLOCKS_PER_PAGE];
- unsigned long dr_blocks[MAX_BLOCKS_PER_PAGE];
- spinlock_t dr_lock;
-
+ int dr_max_pages;
+ int dr_npages;
+ int dr_error;
+ struct page **dr_pages;
+ unsigned long *dr_blocks;
+ spinlock_t dr_lock;
};
static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
unsigned long flags;
spin_lock_irqsave(&dreq->dr_lock, flags);
- bio->bi_private = dreq->bio_list;
- dreq->bio_list = bio;
- spin_unlock_irqrestore(&dreq->dr_lock, flags);
- if (atomic_dec_and_test(&dreq->numreqs))
- wake_up(&dreq->dr_wait);
+ bio->bi_private = dreq->dr_bios;
+ dreq->dr_bios = bio;
if (dreq->dr_error == 0)
dreq->dr_error = error;
+ spin_unlock_irqrestore(&dreq->dr_lock, flags);
+
+ if (atomic_dec_and_test(&dreq->dr_numreqs))
+ wake_up(&dreq->dr_wait);
+
return 0;
}
size = bio->bi_size >> 9;
return bio->bi_sector + size == sector ? 1 : 0;
}
+
+
int filter_alloc_iobuf(int rw, int num_pages, void **ret)
{
struct dio_request *dreq;
OBD_ALLOC(dreq, sizeof(*dreq));
if (dreq == NULL)
- RETURN(-ENOMEM);
-
- dreq->bio_list = NULL;
+ goto failed_0;
+
+ OBD_ALLOC(dreq->dr_pages, num_pages * sizeof(*dreq->dr_pages));
+ if (dreq->dr_pages == NULL)
+ goto failed_1;
+
+ OBD_ALLOC(dreq->dr_blocks,
+ MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks));
+ if (dreq->dr_blocks == NULL)
+ goto failed_2;
+
+ dreq->dr_bios = NULL;
init_waitqueue_head(&dreq->dr_wait);
- atomic_set(&dreq->numreqs, 0);
+ atomic_set(&dreq->dr_numreqs, 0);
spin_lock_init(&dreq->dr_lock);
- dreq->dr_num_pages = num_pages;
- dreq->dr_rw = rw;
+ dreq->dr_max_pages = num_pages;
+ dreq->dr_npages = 0;
*ret = dreq;
RETURN(0);
+
+ failed_2:
+ OBD_FREE(dreq->dr_pages,
+ num_pages * sizeof(*dreq->dr_pages));
+ failed_1:
+ OBD_FREE(dreq, sizeof(*dreq));
+ failed_0:
+ RETURN(-ENOMEM);
}
void filter_free_iobuf(void *iobuf)
{
struct dio_request *dreq = iobuf;
+ int num_pages = dreq->dr_max_pages;
/* free all bios */
- while (dreq->bio_list) {
- struct bio *bio = dreq->bio_list;
- dreq->bio_list = bio->bi_private;
+ while (dreq->dr_bios) {
+ struct bio *bio = dreq->dr_bios;
+ dreq->dr_bios = bio->bi_private;
bio_put(bio);
}
+ OBD_FREE(dreq->dr_blocks,
+ MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks));
+ OBD_FREE(dreq->dr_pages,
+ num_pages * sizeof(*dreq->dr_pages));
OBD_FREE(dreq, sizeof(*dreq));
}
struct inode *inode, struct page *page)
{
struct dio_request *dreq = iobuf;
- int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
- unsigned int len = inode->i_sb->s_blocksize, offs;
- struct bio *bio = dreq->bio_current;
- sector_t sector;
- int k, rc;
+
+ LASSERT (dreq->dr_npages < dreq->dr_max_pages);
+ dreq->dr_pages[dreq->dr_npages++] = page;
+
+ return 0;
+}
+
+int filter_do_bio(struct obd_device *obd, struct inode *inode,
+ struct dio_request *dreq, int rw)
+{
+ int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+ struct page **pages = dreq->dr_pages;
+ int npages = dreq->dr_npages;
+ unsigned long *blocks = dreq->dr_blocks;
+ int total_blocks = npages * blocks_per_page;
+ int sector_bits = inode->i_sb->s_blocksize_bits - 9;
+ unsigned int blocksize = inode->i_sb->s_blocksize;
+ struct bio *bio = NULL;
+ struct page *page;
+ unsigned int page_offset;
+ sector_t sector;
+ int nblocks;
+ int block_idx;
+ int page_idx;
+ int i;
+ int rc = 0;
ENTRY;
- /* get block number for next page */
- rc = fsfilt_map_inode_pages(obd, inode, &page, 1, dreq->dr_blocks,
- dreq->dr_created,
- dreq->dr_rw == OBD_BRW_WRITE, NULL);
- if (rc)
- RETURN(rc);
+ LASSERT(dreq->dr_npages == npages);
+ LASSERT(total_blocks <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES);
- for (k = 0, offs = 0; k < blocks_per_page; k++, offs += len) {
- if (dreq->dr_created[k] == -1) {
- memset(kmap(page) + offs, 0, len);
- kunmap(page);
- continue;
- }
+ for (page_idx = 0, block_idx = 0;
+ page_idx < npages;
+ page_idx++, block_idx += blocks_per_page) {
+
+ page = pages[page_idx];
+ LASSERT (block_idx + blocks_per_page <= total_blocks);
+
+ for (i = 0, page_offset = 0;
+ i < blocks_per_page;
+ i += nblocks, page_offset += blocksize * nblocks) {
+
+ nblocks = 1;
- sector = dreq->dr_blocks[k] <<(inode->i_sb->s_blocksize_bits-9);
-
- if (!bio || !can_be_merged(bio, sector) ||
- !bio_add_page(bio, page, len, offs)) {
- if (bio) {
- atomic_inc(&dreq->numreqs);
- /* FIXME
- filter_tally_write(&obd->u.filter,dreq->maplist,
- dreq->nr_pages,dreq->blocks,
- blocks_per_page);
- */
- fsfilt_send_bio(dreq->dr_rw, obd, inode, bio);
- dreq->bio_current = bio = NULL;
+ if (blocks[block_idx + i] == 0) { /* hole */
+ LASSERT(rw == OBD_BRW_READ);
+ memset(kmap(page) + page_offset, 0, blocksize);
+ kunmap(page);
+ continue;
}
+
+ sector = blocks[block_idx + i] << sector_bits;
+
+ /* Additional contiguous file blocks? */
+ while (i + nblocks < blocks_per_page &&
+ (sector + nblocks*(blocksize>>9)) ==
+ (blocks[block_idx + i + nblocks] << sector_bits))
+ nblocks++;
+
+ if (bio != NULL &&
+ can_be_merged(bio, sector) &&
+ bio_add_page(bio, page,
+ blocksize * nblocks, page_offset) != 0)
+ continue; /* added this frag OK */
+
+ if (bio != NULL) {
+ request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+ /* Dang! I have to fragment this I/O */
+ CDEBUG(D_INODE, "bio++ sz %d vcnt %d(%d) "
+ "sectors %d(%d) psg %d(%d) hsg %d(%d)\n",
+ bio->bi_size,
+ bio->bi_vcnt, bio->bi_max_vecs,
+ bio->bi_size >> 9, q->max_sectors,
+ bio_phys_segments(q, bio),
+ q->max_phys_segments,
+ bio_hw_segments(q, bio),
+ q->max_hw_segments);
+
+ atomic_inc(&dreq->dr_numreqs);
+ rc = fsfilt_send_bio(rw, obd, inode, bio);
+ if (rc < 0) {
+ CERROR("Can't send bio: %d\n", rc);
+ /* OK do dec; we do the waiting */
+ atomic_dec(&dreq->dr_numreqs);
+ goto out;
+ }
+ rc = 0;
+
+ bio = NULL;
+ }
+
/* allocate new bio */
- dreq->bio_current = bio =
- bio_alloc(GFP_NOIO, dreq->dr_num_pages *
- blocks_per_page);
+ bio = bio_alloc(GFP_NOIO,
+ (npages - page_idx) * blocks_per_page);
+ if (bio == NULL) {
+ CERROR ("Can't allocate bio\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_sector = sector;
bio->bi_end_io = dio_complete_routine;
bio->bi_private = dreq;
- if (!bio_add_page(bio, page, len, offs))
- LBUG();
+ rc = bio_add_page(bio, page,
+ blocksize * nblocks, page_offset);
+ LASSERT (rc != 0);
}
}
- dreq->dr_num_pages--;
- RETURN(0);
+ if (bio != NULL) {
+ atomic_inc(&dreq->dr_numreqs);
+ rc = fsfilt_send_bio(rw, obd, inode, bio);
+ if (rc >= 0) {
+ rc = 0;
+ } else {
+ CERROR("Can't send bio: %d\n", rc);
+ /* OK do dec; we do the waiting */
+ atomic_dec(&dreq->dr_numreqs);
+ }
+ }
+
+ out:
+ wait_event(dreq->dr_wait, atomic_read(&dreq->dr_numreqs) == 0);
+
+ if (rc == 0)
+ rc = dreq->dr_error;
+ RETURN(rc);
}
static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
struct obd_export *exp, struct iattr *attr,
struct obd_trans_info *oti, void **wait_handle)
{
- struct dio_request *dreq = iobuf;
+ struct obd_device *obd = exp->exp_obd;
struct inode *inode = dchild->d_inode;
- int rc;
+ struct dio_request *dreq = iobuf;
+ int rc, rc2;
ENTRY;
LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw);
+ LASSERTF(dreq->dr_npages <= dreq->dr_max_pages, "%d,%d\n",
+ dreq->dr_npages, dreq->dr_max_pages);
+
+ if (dreq->dr_npages == 0)
+ RETURN(0);
+
+ if (dreq->dr_npages > OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)
+ RETURN(-EINVAL);
+
+ rc = fsfilt_map_inode_pages(obd, inode,
+ dreq->dr_pages, dreq->dr_npages,
+ dreq->dr_blocks,
+ obdfilter_created_scratchpad,
+ rw == OBD_BRW_WRITE, NULL);
+
+ if (rw == OBD_BRW_WRITE) {
+ if (rc == 0) {
+#if 0
+ filter_tally_write(&obd->u.filter,
+ dreq->dr_pages,
+ dreq->dr_page_idx,
+ dreq->dr_blocks,
+ blocks_per_page);
+#endif
+ if (attr->ia_size > inode->i_size)
+ attr->ia_valid |= ATTR_SIZE;
+ rc = fsfilt_setattr(obd, dchild,
+ oti->oti_handle, attr, 0);
+ }
+
+ up(&inode->i_sem);
+
+ rc2 = filter_finish_transno(exp, oti, 0);
+ if (rc2 != 0)
+ CERROR("can't close transaction: %d\n", rc);
+
+ if (rc == 0)
+ rc = rc2;
+ if (rc != 0)
+ RETURN(rc);
+ }
/* This is nearly osync_inode, without the waiting
rc = generic_osync_inode(inode, inode->i_mapping,
OSYNC_DATA|OSYNC_METADATA); */
rc = filemap_fdatawrite(inode->i_mapping);
+ rc2 = sync_mapping_buffers(inode->i_mapping);
if (rc == 0)
- rc = sync_mapping_buffers(inode->i_mapping);
+ rc = rc2;
+ rc2 = filemap_fdatawait(inode->i_mapping);
if (rc == 0)
- rc = filemap_fdatawait(inode->i_mapping);
- if (rc < 0)
- GOTO(cleanup, rc);
+ rc = rc2;
- if (rw == OBD_BRW_WRITE)
- up(&inode->i_sem);
+ if (rc != 0)
+ RETURN(rc);
/* be careful to call this after fsync_inode_data_buffers has waited
* for IO to complete before we evict it from the cache */
filter_clear_page_cache(inode, iobuf);
- if (dreq->bio_current != NULL) {
- atomic_inc(&dreq->numreqs);
- fsfilt_send_bio(rw, exp->exp_obd, inode, dreq->bio_current);
- dreq->bio_current = NULL;
- }
-
- /* time to wait for I/O completion */
- wait_event(dreq->dr_wait, atomic_read(&dreq->numreqs) == 0);
-
- rc = dreq->dr_error;
- if (rw == OBD_BRW_WRITE && rc == 0) {
- /* FIXME:
- filter_tally_write(&obd->u.filter, dreq->maplist,
- dreq->nr_pages, dreq->blocks,
- blocks_per_page);
- */
-
- if (attr->ia_size > inode->i_size) {
- CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
- attr->ia_size);
-
- attr->ia_valid |= ATTR_SIZE;
- down(&inode->i_sem);
- fsfilt_setattr(exp->exp_obd, dchild, oti->oti_handle,
- attr, 0);
- up(&inode->i_sem);
- }
- }
-
-cleanup:
- RETURN(rc);
+ RETURN(filter_do_bio(obd, inode, dreq, rw));
}
/* See if there are unallocated parts in given file region */
unsigned long now = jiffies;
int i, err, cleanup_phase = 0;
struct obd_device *obd = exp->exp_obd;
-
+ int total_size = 0;
ENTRY;
LASSERT(oti != NULL);
if (rc != 0)
GOTO(cleanup, rc);
-
- inode = res->dentry->d_inode;
-
+
rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, (void **)&dreq);
if (rc)
GOTO(cleanup, rc);
-
cleanup_phase = 1;
+
fso.fso_dentry = res->dentry;
fso.fso_bufcnt = obj->ioo_bufcnt;
+ inode = res->dentry->d_inode;
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- cleanup_phase = 2;
-
- generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA);
-
- oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res,
- oti);
- if (IS_ERR(oti->oti_handle)) {
- rc = PTR_ERR(oti->oti_handle);
- CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
- "error starting transaction: rc = %d\n", rc);
- oti->oti_handle = NULL;
- GOTO(cleanup, rc);
- }
-
- /* have to call fsfilt_commit() from this point on */
-
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
-
- down(&inode->i_sem);
for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
loff_t this_size;
filter_range_is_mapped(inode, lnb->offset, lnb->len))
lnb->rc = 0;
- if (lnb->rc) /* ENOSPC, network RPC error, etc. */
+ if (lnb->rc) { /* ENOSPC, network RPC error, etc. */
+ CDEBUG(D_INODE, "Skipping [%d] == %d\n", i, lnb->rc);
continue;
+ }
err = filter_iobuf_add_page(obd, dreq, inode, lnb->page);
- if (err != 0) {
- lnb->rc = err;
- continue;
- }
+ LASSERT (err == 0);
+
+ total_size += lnb->len;
/* we expect these pages to be in offset order, but we'll
* be forgiving */
if (this_size > iattr.ia_size)
iattr.ia_size = this_size;
}
+#if 0
+ /* I use this when I'm checking our lovely 1M I/Os reach the disk -eeb */
+ if (total_size != (1<<20))
+ CWARN("total size %d (%d pages)\n",
+ total_size, total_size/PAGE_SIZE);
+#endif
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ cleanup_phase = 2;
+
+ down(&inode->i_sem);
+ oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res,
+ oti);
+ if (IS_ERR(oti->oti_handle)) {
+ up(&inode->i_sem);
+ rc = PTR_ERR(oti->oti_handle);
+ CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+ "error starting transaction: rc = %d\n", rc);
+ oti->oti_handle = NULL;
+ GOTO(cleanup, rc);
+ }
+ /* have to call fsfilt_commit() from this point on */
+
+ fsfilt_check_slow(now, obd_timeout, "brw_start");
iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
+ /* filter_direct_io drops i_sem */
rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, dreq, exp, &iattr,
oti, NULL);
- rc = filter_finish_transno(exp, oti, rc);
-
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+ if (rc == 0)
+ obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
+ fsfilt_check_slow(now, obd_timeout, "direct_io");
err = fsfilt_commit(obd, obd->u.filter.fo_sb, inode, oti->oti_handle,
obd_sync_filter);
if (err)
rc = err;
- if (obd_sync_filter)
+ if (obd_sync_filter && !err)
LASSERT(oti->oti_transno <= obd->obd_last_committed);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "commitrw commit");
cleanup:
filter_grant_commit(exp, niocount, res);
lvb->lvb_mtime, new->lvb_mtime);
lvb->lvb_mtime = new->lvb_mtime;
}
- if (new->lvb_blocks > lvb->lvb_blocks || !increase) {
- CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb blocks: "
+ if (new->lvb_atime > lvb->lvb_atime || !increase) {
+ CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime: "
LPU64" -> "LPU64"\n", res->lr_name.name[0],
- lvb->lvb_blocks, new->lvb_blocks);
- lvb->lvb_blocks = new->lvb_blocks;
+ lvb->lvb_atime, new->lvb_atime);
+ lvb->lvb_atime = new->lvb_atime;
+ }
+ if (new->lvb_ctime > lvb->lvb_ctime || !increase) {
+ CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime: "
+ LPU64" -> "LPU64"\n", res->lr_name.name[0],
+ lvb->lvb_ctime, new->lvb_ctime);
+ lvb->lvb_ctime = new->lvb_ctime;
}
- GOTO(out, rc = 0);
}
/* Update the LVB from the disk inode */
lvb->lvb_mtime, LTIME_S(dentry->d_inode->i_mtime));
lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime);
}
+ if (LTIME_S(dentry->d_inode->i_atime) > lvb->lvb_atime || !increase) {
+ CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime from disk: "
+ LPU64" -> %lu\n", res->lr_name.name[0],
+ lvb->lvb_atime, LTIME_S(dentry->d_inode->i_atime));
+ lvb->lvb_atime = LTIME_S(dentry->d_inode->i_atime);
+ }
+ if (LTIME_S(dentry->d_inode->i_ctime) > lvb->lvb_ctime || !increase) {
+ CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime from disk: "
+ LPU64" -> %lu\n", res->lr_name.name[0],
+ lvb->lvb_ctime, LTIME_S(dentry->d_inode->i_ctime));
+ lvb->lvb_ctime = LTIME_S(dentry->d_inode->i_ctime);
+ }
CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb blocks from disk: "
LPU64" -> %lu\n", res->lr_name.name[0],
lvb->lvb_blocks, dentry->d_inode->i_blocks);
/* this sampling races with updates */
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "\n\t\t\tread\t\t\twrite\n");
return rc;
}
-int osc_rd_create_low_wm(char *page, char **start, off_t off, int count,
- int *eof, void *data)
-{
- struct obd_device *obd = data;
-
- if (obd == NULL)
- return 0;
-
- return snprintf(page, count, "%d\n",
- obd->u.cli.cl_oscc.oscc_kick_barrier);
-}
-
-int osc_wr_create_low_wm(struct file *file, const char *buffer,
- unsigned long count, void *data)
-{
- struct obd_device *obd = data;
- int val, rc;
-
- if (obd == NULL)
- return 0;
-
- rc = lprocfs_write_helper(buffer, count, &val);
- if (rc)
- return rc;
-
- if (val < 0)
- return -ERANGE;
-
- spin_lock(&obd->obd_dev_lock);
- obd->u.cli.cl_oscc.oscc_kick_barrier = val;
- spin_unlock(&obd->obd_dev_lock);
-
- return count;
-}
-
int osc_rd_create_count(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
{ "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 },
{ "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 },
{ "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 },
- {"create_low_watermark", osc_rd_create_low_wm, osc_wr_create_low_wm, 0},
{ "create_count", osc_rd_create_count, osc_wr_create_count, 0 },
{ "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 },
{ "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 },
spin_lock_irqsave(&cli->cl_loi_list_lock, flags);
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "read RPCs in flight: %d\n",
#include <linux/obd_class.h>
#include "osc_internal.h"
-static int osc_interpret_create(struct ptlrpc_request *req, void *data,
- int rc)
+static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc)
{
struct osc_creator *oscc;
struct ost_body *body = NULL;
oscc = req->rq_async_args.pointer_arg[0];
spin_lock(&oscc->oscc_lock);
oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
- if (body)
- oscc->oscc_last_id = body->oa.o_id;
- if (rc == -ENOSPC) {
+ if (rc == -ENOSPC || rc == -EROFS) {
oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+ if (body && rc == -ENOSPC) {
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
+ oscc->oscc_last_id = body->oa.o_id;
+ }
spin_unlock(&oscc->oscc_lock);
DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
} else if (rc != 0 && rc != -EIO) {
oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
spin_unlock(&oscc->oscc_lock);
DEBUG_REQ(D_ERROR, req,
"unknown rc %d from async create: failing oscc", rc);
ptlrpc_fail_import(req->rq_import, req->rq_import_generation);
} else {
+ if (rc == 0) {
+ oscc->oscc_flags &= ~OSCC_FLAG_LOW;
+ if (body) {
+ int diff = body->oa.o_id - oscc->oscc_last_id;
+ if (diff != oscc->oscc_grow_count)
+ oscc->oscc_grow_count =
+ max(diff/3, OST_MIN_PRECREATE);
+ oscc->oscc_last_id = body->oa.o_id;
+ }
+ }
spin_unlock(&oscc->oscc_lock);
}
ENTRY;
spin_lock(&oscc->oscc_lock);
+ if (oscc->oscc_grow_count < OST_MAX_PRECREATE &&
+ !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) &&
+ (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <=
+ (oscc->oscc_grow_count / 4 + 1)) {
+ oscc->oscc_flags |= OSCC_FLAG_LOW;
+ oscc->oscc_grow_count *= 2;
+ }
+
+ if (oscc->oscc_grow_count > OST_MAX_PRECREATE / 2)
+ oscc->oscc_grow_count = OST_MAX_PRECREATE / 2;
+
if (oscc->oscc_flags & OSCC_FLAG_CREATING ||
oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
spin_unlock(&oscc->oscc_lock);
int rc = 0;
ENTRY;
- if (oscc_has_objects(oscc, oscc->oscc_kick_barrier))
+ if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2))
RETURN(0);
if (!wait)
RETURN(rc);
}
-int oscc_recovering(struct osc_creator *oscc)
+int oscc_recovering(struct osc_creator *oscc)
{
int recov = 0;
oa->o_valid |= OBD_MD_FLID;
oa->o_id = oscc->oscc_next_id - 1;
- CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n",
- exp->exp_obd->obd_name, oa->o_id);
+ CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n",
+ oscc->oscc_obd->obd_name, oa->o_id);
rc = osc_real_create(exp, oa, ea, NULL);
if (oscc->oscc_obd == NULL) {
oscc->oscc_flags |= OSCC_FLAG_NOSPC;
oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
oscc->oscc_last_id = oa->o_id;
-
- CDEBUG(D_HA, "%s: oscc recovery finished: %d\n",
- exp->exp_obd->obd_name, rc);
+
+ CDEBUG(D_HA, "%s: oscc recovery finished: %d\n",
+ oscc->oscc_obd->obd_name, rc);
wake_up(&oscc->oscc_waitq);
} else {
- CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n",
- exp->exp_obd->obd_name, rc);
+ CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n",
+ oscc->oscc_obd->obd_name, rc);
}
spin_unlock(&oscc->oscc_lock);
}
spin_lock(&oscc->oscc_lock);
+ if (oscc->oscc_flags & OSCC_FLAG_EXITING) {
+ spin_unlock(&oscc->oscc_lock);
+ break;
+ }
+
if (oscc->oscc_last_id >= oscc->oscc_next_id) {
memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
oa->o_id = oscc->oscc_next_id;
}
spin_unlock(&oscc->oscc_lock);
rc = oscc_precreate(oscc, try_again);
- if (rc == -EIO)
+ if (rc)
break;
}
spin_lock_init(&oscc->oscc_lock);
oscc->oscc_obd = obd;
oscc->oscc_kick_barrier = 100;
- oscc->oscc_grow_count = 36;
oscc->oscc_max_grow_count = 2000;
+ oscc->oscc_grow_count = OST_MIN_PRECREATE;
oscc->oscc_next_id = 2;
oscc->oscc_last_id = 1;
void *oap_caller_data;
};
+#define OAP_FROM_COOKIE(c) \
+ (LASSERT(((struct osc_async_page *)(c))->oap_magic == OAP_MAGIC), \
+ (struct osc_async_page *)(c))
+
struct osc_cache_waiter {
struct list_head ocw_entry;
wait_queue_head_t ocw_waitq;
int ocw_rc;
};
-#define OSCC_FLAG_RECOVERING 1
-#define OSCC_FLAG_CREATING 2
-#define OSCC_FLAG_NOSPC 4 /* can't create more objects on this OST */
-#define OSCC_FLAG_SYNC_IN_PROGRESS 8 /* only allow one thread to sync */
+#define OSCC_FLAG_RECOVERING 0x01
+#define OSCC_FLAG_CREATING 0x02
+#define OSCC_FLAG_NOSPC 0x04 /* can't create more objects on OST */
+#define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */
+#define OSCC_FLAG_LOW 0x10
+#define OSCC_FLAG_EXITING 0x20
int osc_create(struct obd_export *exp, struct obdo *oa,
struct lov_stripe_md **ea, struct obd_trans_info *oti);
}
static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *md, obd_count page_count,
+ struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page *pga, struct obd_trans_info *oti)
{
ENTRY;
sort_brw_pages(pga, pages_per_brw);
pages_per_brw = check_elan_limit(pga, pages_per_brw);
- rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga);
+ rc = osc_brw_internal(cmd, exp, oa, lsm, pages_per_brw, pga);
if (rc != 0)
RETURN(rc);
}
static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *md, obd_count page_count,
+ struct lov_stripe_md *lsm, obd_count page_count,
struct brw_page *pga, struct ptlrpc_request_set *set,
struct obd_trans_info *oti)
{
sort_brw_pages(pga, pages_per_brw);
pages_per_brw = check_elan_limit(pga, pages_per_brw);
- rc = async_internal(cmd, exp, oa, md, pages_per_brw, pga, set);
+ rc = async_internal(cmd, exp, oa, lsm, pages_per_brw, pga, set);
if (rc != 0)
RETURN(rc);
RETURN(0);
}
-struct osc_async_page *oap_from_cookie(void *cookie)
-{
- struct osc_async_page *oap = cookie;
- if (oap->oap_magic != OAP_MAGIC)
- return ERR_PTR(-EINVAL);
- return oap;
-};
-
static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
struct lov_oinfo *loi, void *cookie,
int cmd, obd_off off, int count,
int rc;
ENTRY;
- oap = oap_from_cookie(cookie);
- if (IS_ERR(oap))
- RETURN(PTR_ERR(oap));
+ oap = OAP_FROM_COOKIE(cookie);
if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
RETURN(-EIO);
int rc = 0;
ENTRY;
- oap = oap_from_cookie(cookie);
- if (IS_ERR(oap))
- RETURN(PTR_ERR(oap));
+ oap = OAP_FROM_COOKIE(cookie);
if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
RETURN(-EIO);
struct loi_oap_pages *lop;
ENTRY;
- oap = oap_from_cookie(cookie);
- if (IS_ERR(oap))
- RETURN(PTR_ERR(oap));
+ oap = OAP_FROM_COOKIE(cookie);
if (cli->cl_import == NULL || cli->cl_import->imp_invalid)
RETURN(-EIO);
int rc = 0;
ENTRY;
- oap = oap_from_cookie(cookie);
- if (IS_ERR(oap))
- RETURN(PTR_ERR(oap));
+ oap = OAP_FROM_COOKIE(cookie);
if (loi == NULL)
loi = &lsm->lsm_oinfo[0];
if (lock->l_ast_data && lock->l_ast_data != data) {
struct inode *new_inode = data;
struct inode *old_inode = lock->l_ast_data;
+ if (!(old_inode->i_state & I_FREEING))
+ LDLM_ERROR(lock, "inconsistent l_ast_data found");
LASSERTF(old_inode->i_state & I_FREEING,
"Found existing inode %p/%lu/%u state %lu in lock: "
"setting data to %p/%lu/%u\n", old_inode,
}
}
+ if (mode == LCK_PW) {
+ rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type,
+ policy, LCK_PR, lockh);
+ if (rc == 1) {
+ rc = ldlm_cli_convert(lockh, mode, flags);
+ if (!rc) {
+ /* Update readers/writers accounting */
+ ldlm_lock_addref(lockh, LCK_PW);
+ ldlm_lock_decref(lockh, LCK_PR);
+ osc_set_data_with_check(lockh, data);
+ RETURN(ELDLM_OK);
+ }
+ /* If the conversion failed, we need to drop refcount
+ on matched lock before we get new one */
+ /* XXX Won't it save us some efforts if we cancel PR
+ lock here? We are going to take PW lock anyway and it
+ will invalidate PR lock */
+ ldlm_lock_decref(lockh, LCK_PR);
+ if (rc != EDEADLOCK) {
+ RETURN(rc);
+ }
+ }
+ }
+
no_match:
if (*flags & LDLM_FL_HAS_INTENT) {
int size[2] = {0, sizeof(struct ldlm_request)};
RETURN(0);
}
+ if (keylen == strlen("async") && memcmp(key, "async", keylen) == 0) {
+ struct client_obd *cl = &obd->u.cli;
+ if (vallen != sizeof(int))
+ RETURN(-EINVAL);
+ cl->cl_async = *(int *)val;
+ CDEBUG(D_HA, "%s: set async = %d\n",
+ obd->obd_name, cl->cl_async);
+ RETURN(0);
+ }
+
if (keylen == strlen("sec") && memcmp(key, "sec", keylen) == 0) {
struct client_obd *cli = &exp->exp_obd->u.cli;
RETURN(-EINVAL);
}
- if (keylen < strlen("mds_conn") ||
- memcmp(key, "mds_conn", strlen("mds_conn")) != 0)
+ if (keylen < strlen("mds_conn") || memcmp(key, "mds_conn", keylen) != 0)
RETURN(-EINVAL);
ctxt = llog_get_context(&exp->exp_obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT);
static int osc_cleanup(struct obd_device *obd, int flags)
{
+ struct osc_creator *oscc = &obd->u.cli.cl_oscc;
int rc;
rc = ldlm_cli_cancel_unused(obd->obd_namespace, NULL,
if (rc)
RETURN(rc);
+ spin_lock(&oscc->oscc_lock);
+ oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
+ oscc->oscc_flags |= OSCC_FLAG_EXITING;
+ spin_unlock(&oscc->oscc_lock);
+
rc = client_obd_cleanup(obd, flags);
ptlrpcd_decref();
RETURN(rc);
spin_lock(&ost->ost_lock);
- seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n",
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
now.tv_sec, now.tv_usec);
seq_printf(seq, "\nread rpc service time: (rpcs, average ms)\n");
}
}
-static char str[PTL_NALFMT_SIZE];
-
-
static int ost_brw_read(struct ptlrpc_request *req)
{
struct ptlrpc_bulk_desc *desc;
if (rc != 0)
GOTO(out_bulk, rc);
+ /* We're finishing using body->oa as an input variable */
+ body->oa.o_valid = 0;
+
nob = 0;
for (i = 0; i < npages; i++) {
int page_rc = local_nb[i].rc;
}
if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
CERROR("bulk IO comms error: "
- "evicting %s@%s nid %s\n",
+ "evicting %s@%s id %s\n",
req->rq_export->exp_client_uuid.uuid,
req->rq_export->exp_connection->c_remote_uuid.uuid,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
ptlrpc_fail_export(req->rq_export);
} else {
CERROR("ignoring bulk IO comms error: "
- "client reconnected %s@%s nid %s\n",
+ "client reconnected %s@%s id %s\n",
req->rq_export->exp_client_uuid.uuid,
req->rq_export->exp_connection->c_remote_uuid.uuid,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
}
}
obd_count cksum = ost_checksum_bulk(desc);
if (client_cksum != cksum) {
- CERROR("Bad checksum: client %x, server %x NID %s\n",
+ CERROR("Bad checksum: client %x, server %x id %s\n",
client_cksum, cksum,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
cksum_counter = 1;
repbody->oa.o_cksum = cksum;
} else {
cksum_counter++;
if ((cksum_counter & (-cksum_counter)) == cksum_counter)
CWARN("Checksum %u from NID %s: %x OK\n",
- cksum_counter,
- ptlrpc_peernid2str(&req->rq_peer, str),
- cksum);
+ cksum_counter, req->rq_peerstr, cksum);
}
}
#endif
lustre_free_reply_state (req->rq_reply_state);
}
if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) {
- CERROR("%s: bulk IO comm error evicting %s@%s NID %s\n",
+ CERROR("%s: bulk IO comm error evicting %s@%s id %s\n",
req->rq_export->exp_obd->obd_name,
req->rq_export->exp_client_uuid.uuid,
req->rq_export->exp_connection->c_remote_uuid.uuid,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
ptlrpc_fail_export(req->rq_export);
} else {
CERROR("ignoring bulk IO comms error: "
- "client reconnected %s@%s nid %s\n",
+ "client reconnected %s@%s id %s\n",
req->rq_export->exp_client_uuid.uuid,
req->rq_export->exp_connection->c_remote_uuid.uuid,
- ptlrpc_peernid2str(&req->rq_peer, str));
- }
+ req->rq_peerstr);
+ }
}
RETURN(rc);
}
}
static int ost_llog_handle_connect(struct obd_export *exp,
- struct ptlrpc_request *req)
+ struct ptlrpc_request *req)
{
struct llogd_conn_body *body;
int rc;
int ost_handle(struct ptlrpc_request *req)
{
- struct obd_trans_info trans_info = { 0, };
- struct obd_trans_info *oti = &trans_info;
int should_process, fail = OBD_FAIL_OST_ALL_REPLY_NET, rc = 0;
- struct obd_export *exp = NULL;
+ struct obd_trans_info *oti = NULL;
+ struct obd_device *obd = NULL;
ENTRY;
LASSERT(current->journal_info == NULL);
if (req->rq_reqmsg->opc == SEC_INIT ||
req->rq_reqmsg->opc == SEC_INIT_CONTINUE ||
req->rq_reqmsg->opc == SEC_FINI) {
- GOTO(out, rc = 0);
+ RETURN(0);
}
/* XXX identical to MDS */
if (req->rq_reqmsg->opc != OST_CONNECT) {
- struct obd_device *obd;
int recovering;
- exp = req->rq_export;
-
- if (exp == NULL) {
+ if (req->rq_export == NULL) {
CDEBUG(D_HA,"operation %d on unconnected OST from %s\n",
req->rq_reqmsg->opc,
- ptlrpc_peernid2str(&req->rq_peer, str));
+ req->rq_peerstr);
req->rq_status = -ENOTCONN;
- GOTO(out, rc = -ENOTCONN);
+ GOTO(out_check_req, rc = -ENOTCONN);
}
- obd = exp->exp_obd;
+ obd = req->rq_export->exp_obd;
/* Check for aborted recovery. */
spin_lock_bh(&obd->obd_processing_task_lock);
recovering = obd->obd_recovering;
spin_unlock_bh(&obd->obd_processing_task_lock);
- if (recovering) {
+ if (recovering) {
rc = ost_filter_recovery_request(req, obd,
&should_process);
if (rc || !should_process)
}
}
+ OBD_ALLOC(oti, sizeof(*oti));
+ if (oti == NULL)
+ RETURN(-ENOMEM);
+
oti_init(oti, req);
switch (req->rq_reqmsg->opc) {
case OST_CONNECT: {
CDEBUG(D_INODE, "connect\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_CONNECT_NET, out_free_oti, rc = 0);
rc = target_handle_connect(req);
+ if (!rc)
+ obd = req->rq_export->exp_obd;
break;
}
case OST_DISCONNECT:
CDEBUG(D_INODE, "disconnect\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_DISCONNECT_NET, out_free_oti, rc = 0);
rc = target_handle_disconnect(req);
break;
case OST_CREATE:
CDEBUG(D_INODE, "create\n");
- if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC))
- GOTO(out, rc = -ENOSPC);
- if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
- GOTO(out, rc = -EROFS);
- OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0);
- rc = ost_create(exp, req, oti);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_ENOSPC, out_check_req, rc = -ENOSPC);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_CREATE_NET, out_free_oti, rc = 0);
+ rc = ost_create(req->rq_export, req, oti);
break;
case OST_DESTROY:
CDEBUG(D_INODE, "destroy\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0);
- if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
- GOTO(out, rc = -EROFS);
- rc = ost_destroy(exp, req, oti);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_DESTROY_NET, out_free_oti, rc = 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
+ rc = ost_destroy(req->rq_export, req, oti);
break;
case OST_GETATTR:
CDEBUG(D_INODE, "getattr\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0);
- rc = ost_getattr(exp, req);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_GETATTR_NET, out_free_oti, rc = 0);
+ rc = ost_getattr(req->rq_export, req);
break;
case OST_SETATTR:
CDEBUG(D_INODE, "setattr\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0);
- rc = ost_setattr(exp, req, oti);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_SETATTR_NET, out_free_oti, rc = 0);
+ rc = ost_setattr(req->rq_export, req, oti);
break;
case OST_WRITE:
CDEBUG(D_INODE, "write\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
- if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC))
- GOTO(out, rc = -ENOSPC);
- if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
- GOTO(out, rc = -EROFS);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_ENOSPC, out_check_req, rc = -ENOSPC);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
rc = ost_brw_write(req, oti);
LASSERT(current->journal_info == NULL);
/* ost_brw sends its own replies */
- RETURN(rc);
+ GOTO(out_free_oti, rc);
case OST_READ:
CDEBUG(D_INODE, "read\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
rc = ost_brw_read(req);
LASSERT(current->journal_info == NULL);
/* ost_brw sends its own replies */
- RETURN(rc);
+ GOTO(out_free_oti, rc);
case OST_SAN_READ:
CDEBUG(D_INODE, "san read\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
rc = ost_san_brw(req, OBD_BRW_READ);
/* ost_san_brw sends its own replies */
- RETURN(rc);
+ GOTO(out_free_oti, rc);
case OST_SAN_WRITE:
CDEBUG(D_INODE, "san write\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0);
rc = ost_san_brw(req, OBD_BRW_WRITE);
/* ost_san_brw sends its own replies */
- RETURN(rc);
+ GOTO(out_free_oti, rc);
case OST_PUNCH:
CDEBUG(D_INODE, "punch\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0);
- if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS))
- GOTO(out, rc = -EROFS);
- rc = ost_punch(exp, req, oti);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_PUNCH_NET, out_free_oti, rc = 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS);
+ rc = ost_punch(req->rq_export, req, oti);
break;
case OST_STATFS:
CDEBUG(D_INODE, "statfs\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_STATFS_NET, out_free_oti, rc = 0);
rc = ost_statfs(req);
break;
case OST_SYNC:
CDEBUG(D_INODE, "sync\n");
- OBD_FAIL_RETURN(OBD_FAIL_OST_SYNC_NET, 0);
- rc = ost_sync(exp, req);
+ OBD_FAIL_GOTO(OBD_FAIL_OST_SYNC_NET, out_free_oti, rc = 0);
+ rc = ost_sync(req->rq_export, req);
break;
case OST_SET_INFO:
DEBUG_REQ(D_INODE, req, "set_info");
- rc = ost_set_info(exp, req);
+ rc = ost_set_info(req->rq_export, req);
break;
case OST_GET_INFO:
DEBUG_REQ(D_INODE, req, "get_info");
- rc = ost_get_info(exp, req);
+ rc = ost_get_info(req->rq_export, req);
break;
case OBD_PING:
DEBUG_REQ(D_INODE, req, "ping");
/* FIXME - just reply status */
case LLOG_ORIGIN_CONNECT:
DEBUG_REQ(D_INODE, req, "log connect\n");
- rc = ost_llog_handle_connect(exp, req);
+ rc = ost_llog_handle_connect(req->rq_export, req);
req->rq_status = rc;
rc = lustre_pack_reply(req, 0, NULL, NULL);
if (rc)
- RETURN(rc);
- RETURN(ptlrpc_reply(req));
+ GOTO(out_free_oti, rc);
+ GOTO(out_free_oti, rc = ptlrpc_reply(req));
case OBD_LOG_CANCEL:
CDEBUG(D_INODE, "log cancel\n");
- OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_OBD_LOG_CANCEL_NET, out_free_oti, rc = 0);
rc = llog_origin_handle_cancel(req);
req->rq_status = rc;
rc = lustre_pack_reply(req, 0, NULL, NULL);
if (rc)
- RETURN(rc);
- RETURN(ptlrpc_reply(req));
+ GOTO(out_free_oti, rc);
+ GOTO(out_free_oti, rc = ptlrpc_reply(req));
case LDLM_ENQUEUE:
CDEBUG(D_INODE, "enqueue\n");
- OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_LDLM_ENQUEUE, out_free_oti, rc = 0);
rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
ldlm_server_blocking_ast,
ldlm_server_glimpse_ast);
break;
case LDLM_CONVERT:
CDEBUG(D_INODE, "convert\n");
- OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_LDLM_CONVERT, out_free_oti, rc = 0);
rc = ldlm_handle_convert(req);
break;
case LDLM_CANCEL:
CDEBUG(D_INODE, "cancel\n");
- OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0);
+ OBD_FAIL_GOTO(OBD_FAIL_LDLM_CANCEL, out_free_oti, rc = 0);
rc = ldlm_handle_cancel(req);
break;
case LDLM_BL_CALLBACK:
CERROR("Unexpected opcode %d\n", req->rq_reqmsg->opc);
req->rq_status = -ENOTSUPP;
rc = ptlrpc_error(req);
- RETURN(rc);
+ GOTO(out_free_oti, rc);
}
LASSERT(current->journal_info == NULL);
EXIT;
/* If we're DISCONNECTing, the export_data is already freed */
if (!rc && req->rq_reqmsg->opc != OST_DISCONNECT) {
- struct obd_device *obd = req->rq_export->exp_obd;
if (!obd->obd_no_transno) {
req->rq_repmsg->last_committed =
obd->obd_last_committed;
obd->obd_last_committed, req->rq_xid);
}
-out:
+out_check_req:
if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) {
- struct obd_device *obd = req->rq_export->exp_obd;
-
if (obd && obd->obd_recovering) {
DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply");
- return target_queue_final_reply(req, rc);
+ rc = target_queue_final_reply(req, rc);
+ GOTO(out_free_oti, rc);
}
/* Lost a race with recovery; let the error path DTRT. */
rc = req->rq_status = -ENOTCONN;
if (!rc)
oti_to_request(oti, req);
-
target_send_reply(req, rc, fail);
- return 0;
+ rc = 0;
+
+out_free_oti:
+ if (oti)
+ OBD_FREE(oti, sizeof(*oti));
+ return rc;
}
EXPORT_SYMBOL(ost_handle);
ost->ost_service =
ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
- OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
+ OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, 30000,
ost_handle, "ost",
obd->obd_proc_entry);
if (ost->ost_service == NULL) {
ost->ost_create_service =
ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
- OST_CREATE_PORTAL, OSC_REPLY_PORTAL,
+ OST_CREATE_PORTAL, OSC_REPLY_PORTAL, 30000,
ost_handle, "ost_create",
obd->obd_proc_entry);
if (ost->ost_create_service == NULL) {
ptlbd->ptlbd_service =
ptlrpc_init_svc(PTLBD_NBUFS, PTLBD_BUFSIZE, PTLBD_MAXREQSIZE,
- PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL,
+ PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, 30000,
ptlbd_handle, "ptlbd_sv",
obd->obd_proc_entry);
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \
+LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \
$(top_srcdir)/lustre/ldlm/ldlm_lock.c \
$(top_srcdir)/lustre/ldlm/ldlm_resource.c \
$(top_srcdir)/lustre/ldlm/ldlm_lib.c \
$(top_srcdir)/lustre/ldlm/ldlm_plain.c \
$(top_srcdir)/lustre/ldlm/ldlm_extent.c \
- $(top_srcdir)/lustre/ldlm/ldlm_request.c \
+ $(top_srcdir)/lustre/ldlm/ldlm_request.c \
$(top_srcdir)/lustre/ldlm/ldlm_lockd.c \
$(top_srcdir)/lustre/ldlm/ldlm_internal.h \
$(top_srcdir)/lustre/ldlm/ldlm_inodebits.c
endif # MODULES
MOSTLYCLEANFILES = *.o *.ko *.mod.c ldlm_*.c l_lock.c
-
DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h
if (req->rq_import->imp_replayable) {
spin_lock_irqsave(&imp->imp_lock, flags);
- if (req->rq_replay || req->rq_transno != 0)
+ if (req->rq_transno != 0)
ptlrpc_retain_replayable_request(req, imp);
else if (req->rq_commit_cb != NULL) {
- spin_unlock_irqrestore(&imp->imp_lock, flags);
+ spin_unlock_irqrestore(&imp->imp_lock, flags);
req->rq_commit_cb(req);
- spin_lock_irqsave(&imp->imp_lock, flags);
+ spin_lock_irqsave(&imp->imp_lock, flags);
}
if (req->rq_transno > imp->imp_max_transno)
}
if (req->rq_phase == RQ_PHASE_RPC) {
- if (req->rq_waiting || req->rq_resend) {
+ if (req->rq_timedout||req->rq_waiting||req->rq_resend) {
int status;
ptlrpc_unregister_reply(req);
spin_unlock_irqrestore(&imp->imp_lock,
flags);
continue;
- }
+ }
list_del_init(&req->rq_list);
if (status != 0) {
int replied = 0;
ENTRY;
+ DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)",
+ (long)req->rq_sent, LTIME_S(CURRENT_TIME) - req->rq_sent);
+
spin_lock_irqsave (&req->rq_lock, flags);
replied = req->rq_replied;
if (!replied)
ptlrpc_unregister_reply (req);
+ if (obd_dump_on_timeout)
+ portals_debug_dumplog();
+
if (req->rq_bulk != NULL)
ptlrpc_unregister_bulk (req);
/* If this request is for recovery or other primordial tasks,
* then error it out here. */
- if (req->rq_send_state != LUSTRE_IMP_FULL ||
+ if (req->rq_send_state != LUSTRE_IMP_FULL ||
imp->imp_obd->obd_no_recov) {
spin_lock_irqsave (&req->rq_lock, flags);
req->rq_status = -ETIMEDOUT;
{
struct ptlrpc_request_set *set = data;
struct list_head *tmp;
- time_t now = LTIME_S (CURRENT_TIME);
+ time_t now = LTIME_S(CURRENT_TIME);
ENTRY;
LASSERT(set != NULL);
CDEBUG(D_HA, "set %p going to sleep for %d seconds\n",
set, timeout);
lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ,
- ptlrpc_expired_set,
+ ptlrpc_expired_set,
ptlrpc_interrupted_set, set);
rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
if (set->set_interpret != NULL) {
int (*interpreter)(struct ptlrpc_request_set *set,void *,int) =
set->set_interpret;
- rc = interpreter (set, &set->set_args, rc);
+ rc = interpreter (set, set->set_arg, rc);
}
RETURN(rc);
spin_lock(&conn_lock);
list_for_each(tmp, &conn_list) {
c = list_entry(tmp, struct ptlrpc_connection, c_link);
- if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) &&
+ if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
peer->peer_ni == c->c_peer.peer_ni) {
ptlrpc_connection_addref(c);
GOTO(out, c);
list_for_each_safe(tmp, pos, &conn_unused_list) {
c = list_entry(tmp, struct ptlrpc_connection, c_link);
- if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) &&
+ if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 &&
peer->peer_ni == c->c_peer.peer_ni) {
ptlrpc_connection_addref(c);
list_del(&c->c_link);
#include <linux/lustre_net.h>
#include "ptlrpc_internal.h"
-#if !defined(__KERNEL__) && defined(CRAY_PORTALS)
+#if !defined(__KERNEL__) && CRAY_PORTALS
/* forward ref in events.c */
static void cray_portals_callback(ptl_event_t *ev);
#endif
"Dropping %s RPC from %s\n",
service->srv_name,
portals_id2str(srv_ni->sni_ni->pni_number,
- ev->initiator, str));
+ ev->initiator, str));
return;
}
}
do_gettimeofday(&req->rq_arrival_time);
req->rq_peer.peer_id = ev->initiator;
req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni;
+ ptlrpc_id2str(&req->rq_peer, req->rq_peerstr);
req->rq_rqbd = rqbd;
-
+#if CRAY_PORTALS
+ req->rq_uid = ev->uid;
+#endif
+
spin_lock_irqsave (&service->srv_lock, flags);
if (ev->unlinked) {
for (i = 0; i < ptlrpc_ninterfaces; i++) {
pni = &ptlrpc_interfaces[i];
+#ifndef CRAY_PORTALS
if (pni->pni_number == peer_nal) {
+#else
+ /* compatible nals but may be from different bridges */
+ if (NALID_FROM_IFACE(pni->pni_number) ==
+ NALID_FROM_IFACE(peer_nal)) {
+#endif
peer->peer_id.nid = peer_nid;
- peer->peer_id.pid = LUSTRE_SRV_PTL_PID; //#4165:only client will call this func.
+ peer->peer_id.pid = LUSTRE_SRV_PTL_PID;
peer->peer_ni = pni;
return (0);
}
}
- CERROR("Can't find ptlrpc interface for NAL %d, NID %s\n",
+ CERROR("Can't find ptlrpc interface for NAL %x, NID %s\n",
peer_nal, portals_nid2str(peer_nal, peer_nid, str));
return (-ENOENT);
}
#ifndef __KERNEL__
pid = getpid();
+#ifdef CRAY_PORTALS
+ /* hack to keep pid in range accepted by ernal */
+ pid &= 0xFF;
+ if (pid == LUSTRE_SRV_PTL_PID)
+ pid++;
+#endif
#else
pid = LUSTRE_SRV_PTL_PID;
#endif
CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid());
PtlSnprintHandle(str, sizeof(str), nih);
- CDEBUG (D_NET, "init %d %s: %s\n", number, name, str);
+ CDEBUG (D_NET, "init %x %s: %s\n", number, name, str);
pni->pni_name = name;
pni->pni_number = number;
return found_something;
}
-#ifdef CRAY_PORTALS
+#if CRAY_PORTALS
static void cray_portals_callback(ptl_event_t *ev)
{
/* We get a callback from the client Cray portals implementation
{LONAL, "lonal"},
{RANAL, "ranal"},
#else
- {CRAY_KB_ERNAL, "cray_kb_ernal"},
+ {CRAY_KERN_NAL, "cray_kern_nal"},
+ {CRAY_QK_NAL, "cray_qk_nal"},
+ {CRAY_USER_NAL, "cray_user_nal"},
#endif
};
int rc;
*/
#define DEBUG_SUBSYSTEM S_RPC
-#ifdef __KERNEL__
-# include <linux/config.h>
-# include <linux/module.h>
-# include <linux/kmod.h>
-#else
+#ifndef __KERNEL__
# include <liblustre.h>
#endif
spin_lock_irqsave(&imp->imp_lock, flags);
if (imp->imp_state == LUSTRE_IMP_FULL) {
- CERROR("%s: connection lost to %s@%s\n",
- imp->imp_obd->obd_name,
- imp->imp_target_uuid.uuid,
- imp->imp_connection->c_remote_uuid.uuid);
+ CWARN("%s: connection lost to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
spin_unlock_irqrestore(&imp->imp_lock, flags);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
if (rc)
CERROR("%s: rc = %d waiting for callback (%d != %d)\n",
imp->imp_target_uuid.uuid, rc,
- atomic_read(&imp->imp_inflight), inflight);
+ atomic_read(&imp->imp_inflight), !!in_rpc);
obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE);
}
#ifndef __KERNEL__
lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_LIBCLIENT);
#endif
+ if (obd->u.cli.cl_async) {
+ lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_ASYNC);
+ }
request->rq_send_state = LUSTRE_IMP_CONNECTING;
request->rq_replen = lustre_msg_size(0, NULL);
atomic_inc(&imp->imp_replay_inflight);
req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL);
- if (!req)
+ if (!req) {
+ atomic_dec(&imp->imp_replay_inflight);
RETURN(-ENOMEM);
+ }
req->rq_replen = lustre_msg_size(0, NULL);
req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT;
RETURN(0);
}
+#ifdef __KERNEL__
+static int ptlrpc_invalidate_import_thread(void *data)
+{
+ struct obd_import *imp = data;
+ unsigned long flags;
+
+ ENTRY;
+
+ lock_kernel();
+ ptlrpc_daemonize();
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+ THREAD_NAME(current->comm, sizeof(current->comm), "ll_imp_inval");
+ unlock_kernel();
+
+ CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
+ imp->imp_obd->obd_name, imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
+
+ ptlrpc_invalidate_import(imp, 0);
+ IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+
+ ptlrpc_import_recovery_state_machine(imp);
+
+ RETURN(0);
+}
+#endif
+
int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
{
int rc = 0;
imp->imp_target_uuid.uuid,
imp->imp_connection->c_remote_uuid.uuid);
+#ifdef __KERNEL__
+ rc = kernel_thread(ptlrpc_invalidate_import_thread, imp,
+ CLONE_VM | CLONE_FILES);
+ if (rc < 0)
+ CERROR("error starting invalidate thread: %d\n", rc);
+ RETURN(rc);
+#else
ptlrpc_invalidate_import(imp, 1);
IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER);
+#endif
}
if (imp->imp_state == LUSTRE_IMP_REPLAY) {
GOTO(out, rc);
IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL);
ptlrpc_activate_import(imp);
- CERROR("%s: connection restored to %s@%s\n",
- imp->imp_obd->obd_name,
- imp->imp_target_uuid.uuid,
- imp->imp_connection->c_remote_uuid.uuid);
+ CWARN("%s: connection restored to %s@%s\n",
+ imp->imp_obd->obd_name,
+ imp->imp_target_uuid.uuid,
+ imp->imp_connection->c_remote_uuid.uuid);
}
if (imp->imp_state == LUSTRE_IMP_FULL) {
else
rc = PtlGet (desc->bd_md_h, peer->peer_id,
desc->bd_portal, 0, xid, 0);
-
+
if (rc != PTL_OK) {
/* Can't send, so we unlink the MD bound above. The UNLINK
* event this creates will signal completion with failure,
LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid);
desc->bd_registered = 1;
desc->bd_last_xid = req->rq_xid;
-
- rc = PtlMEAttach(peer->peer_ni->pni_ni_h,
- desc->bd_portal, desc->bd_import->imp_connection->c_peer.peer_id,
+
+ rc = PtlMEAttach(peer->peer_ni->pni_ni_h, desc->bd_portal,
+ desc->bd_import->imp_connection->c_peer.peer_id,
req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h);
if (rc != PTL_OK) {
CERROR("PtlMEAttach failed: %d\n", rc);
rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h,
request->rq_reply_portal, /* XXX FIXME bug 249 */
- connection->c_peer.peer_id, request->rq_xid, 0, PTL_UNLINK,
- PTL_INS_AFTER, &reply_me_h);
+ connection->c_peer.peer_id, request->rq_xid, 0,
+ PTL_UNLINK, PTL_INS_AFTER, &reply_me_h);
if (rc != PTL_OK) {
CERROR("PtlMEAttach failed: %d\n", rc);
LASSERT (rc == PTL_NO_SPACE);
#include "ptlrpc_internal.h"
#ifdef __KERNEL__
-#ifndef CRAY_PORTALS
+#if !CRAY_PORTALS
+
void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
{
LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
desc->bd_iov_count++;
}
-#else
+
+#else /* CRAY_PORTALS */
+#ifdef PTL_MD_KIOV
+#error "Conflicting compilation directives"
+#endif
+
void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
{
LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES);
- LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS)));
+ LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS)));
md->options |= (PTL_MD_IOVEC | PTL_MD_PHYS);
md->start = &desc->bd_iov[0];
desc->bd_iov_count++;
}
-#endif
+#endif /* CRAY_PORTALS */
#else /* !__KERNEL__ */
+
void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc)
{
+#if CRAY_PORTALS
+ LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS)));
+ LASSERT (desc->bd_iov_count == 1);
+#else
LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS)));
-
+#endif
if (desc->bd_iov_count == 1) {
md->start = desc->bd_iov[0].iov_base;
md->length = desc->bd_iov[0].iov_len;
return;
}
-#if CRAY_PORTALS
- LBUG();
-#endif
md->options |= PTL_MD_IOVEC;
md->start = &desc->bd_iov[0];
md->length = desc->bd_iov_count;
{
if (existing->iov_base + existing->iov_len == candidate->iov_base)
return 1;
- /* XXX it's good to have an warning here, but user-level echo_client
- * will hit this. reenable it when we fixed echo_client.
- */
#if 0
+ /* Enable this section to provide earlier evidence of fragmented bulk */
CERROR("Can't merge iovs %p for %x, %p for %x\n",
existing->iov_base, existing->iov_len,
candidate->iov_base, candidate->iov_len);
desc->bd_iov_count++;
}
}
-#endif
+
+#endif /* !__KERNEL__ */
spin_unlock_irqrestore(&imp->imp_lock, flags);
if (imp->imp_next_ping <= this_ping || force) {
- if (level == LUSTRE_IMP_DISCON) {
- /* wait at least a timeout before
+ if (level == LUSTRE_IMP_DISCON &&
+ !imp->imp_deactive) {
+ /* wait at least a timeout before
trying recovery again. */
imp->imp_next_ping =
ptlrpc_next_ping(imp);
imp->imp_obd->obd_no_recov) {
CDEBUG(D_HA,
"not pinging %s (in recovery "
- " or recovery disabled: %s)\n",
+ "or recovery disabled: %s)\n",
imp->imp_target_uuid.uuid,
ptlrpc_import_state_name(level));
} else if (imp->imp_pingable || force) {
atomic_read(&lcm->lcm_thread_total) == 0);
return 0;
}
+EXPORT_SYMBOL(llog_cleanup_commit_master);
static int log_process_thread(void *args)
{
argv[0], argv[1], argv[2], rc);
} else {
- CERROR("Invoked upcall %s %s %s\n",
- argv[0], argv[1], argv[2]);
+ CWARN("Invoked upcall %s %s %s\n",
+ argv[0], argv[1], argv[2]);
}
}
argv[0], argv[1], argv[2], argv[3], argv[4],rc);
} else {
- CERROR("Invoked upcall %s %s %s %s %s\n",
- argv[0], argv[1], argv[2], argv[3], argv[4]);
+ CWARN("Invoked upcall %s %s %s %s %s\n",
+ argv[0], argv[1], argv[2], argv[3], argv[4]);
}
#else
if (imp->imp_state == LUSTRE_IMP_CLOSED) {
rc = ptlrpc_connect_import(imp, NULL);
}
-
/* Wait for recovery to complete and resend. If evicted, then
this request will be errored out later.*/
spin_lock_irqsave(&failed_req->rq_lock, flags);
failed_req->rq_resend = 1;
spin_unlock_irqrestore(&failed_req->rq_lock, flags);
-
+
EXIT;
}
* requests. */
if (!active) {
ptlrpc_invalidate_import(imp, 0);
- }
+ imp->imp_deactive = 1;
+ }
/* When activating, mark import valid, and attempt recovery */
if (active) {
+ imp->imp_deactive = 0;
CDEBUG(D_HA, "setting import %s VALID\n",
imp->imp_target_uuid.uuid);
rc = ptlrpc_recover_import(imp, NULL);
{
int rc;
ENTRY;
-
+
/* force import to be disconnected. */
ptlrpc_set_import_discon(imp);
-
+
rc = ptlrpc_recover_import_no_retry(imp, new_uuid);
RETURN(rc);
OBD_FREE(req, sizeof(*req));
}
-
+
static char *
ptlrpc_alloc_request_buffer (int size)
{
char *ptr;
-
+
if (size > SVC_BUF_VMALLOC_THRESHOLD)
OBD_VMALLOC(ptr, size);
else
OBD_ALLOC(ptr, size);
-
+
return (ptr);
}
struct ptlrpc_request_buffer_desc *rqbd;
int i;
+ CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left)\n",
+ svc->srv_name, svc->srv_nbuf_per_group, svc->srv_buf_size,
+ srv_ni->sni_nrqbd_receiving, svc->srv_nbufs);
for (i = 0; i < svc->srv_nbuf_per_group; i++) {
rqbd = ptlrpc_alloc_rqbd(srv_ni);
struct ptlrpc_service *
ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
- int req_portal, int rep_portal,
+ int req_portal, int rep_portal, int watchdog_timeout,
svc_handler_t handler, char *name,
struct proc_dir_entry *proc_entry)
{
service->srv_buf_size = bufsize;
service->srv_rep_portal = rep_portal;
service->srv_req_portal = req_portal;
+ service->srv_watchdog_timeout = watchdog_timeout;
service->srv_handler = handler;
INIT_LIST_HEAD(&service->srv_request_queue);
ptlrpc_free_server_req(req);
}
-static char str[PTL_NALFMT_SIZE];
static int
ptlrpc_server_handle_request (struct ptlrpc_service *svc)
{
if (rc != 0) {
CERROR ("error unpacking request: ptl %d from %s"
" xid "LPU64"\n", svc->srv_req_portal,
- ptlrpc_peernid2str(&request->rq_peer, str),
- request->rq_xid);
+ request->rq_peerstr, request->rq_xid);
goto out;
}
rc = -EINVAL;
if (request->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) {
CERROR("wrong packet type received (type=%u) from %s\n",
- request->rq_reqmsg->type,
- ptlrpc_peernid2str(&request->rq_peer, str));
+ request->rq_reqmsg->type, request->rq_peerstr);
goto out;
}
if (timediff / 1000000 > (long)obd_timeout) {
CERROR("Dropping timed-out opc %d request from %s"
": %ld seconds old\n", request->rq_reqmsg->opc,
- ptlrpc_peernid2str(&request->rq_peer, str),
+ request->rq_peerstr,
timediff / 1000000);
goto out;
}
atomic_read(&request->rq_export->exp_refcount) : -99),
request->rq_reqmsg->status, request->rq_xid,
request->rq_peer.peer_ni->pni_name,
- ptlrpc_peernid2str(&request->rq_peer, str),
+ request->rq_peerstr,
request->rq_reqmsg->opc);
+
request->rq_svc = svc;
rc = svc->srv_handler(request);
request->rq_svc = NULL;
+
CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:ni:nid:opc "
"%s:%s+%d:%d:"LPU64":%s:%s:%d\n", current->comm,
(request->rq_export ?
atomic_read(&request->rq_export->exp_refcount) : -99),
request->rq_reqmsg->status, request->rq_xid,
request->rq_peer.peer_ni->pni_name,
- ptlrpc_peernid2str(&request->rq_peer, str),
+ request->rq_peerstr,
request->rq_reqmsg->opc);
if (export != NULL)
CDEBUG((timediff / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA,
"request "LPU64" opc %u from NID %s processed in %ldus "
- "(%ldus total)\n", request->rq_xid,
+ "(%ldus total)\n", request->rq_xid,
request->rq_reqmsg ? request->rq_reqmsg->opc : 0,
- ptlrpc_peernid2str(&request->rq_peer, str),
+ request->rq_peerstr,
timediff, timeval_sub(&work_end, &request->rq_arrival_time));
if (svc->srv_stats != NULL && request->rq_reqmsg != NULL) {
struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
struct ptlrpc_service *svc = data->svc;
struct ptlrpc_thread *thread = data->thread;
+ struct lc_watchdog *watchdog;
unsigned long flags;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+ struct group_info *ginfo = NULL;
+#endif
ENTRY;
lock_kernel();
unlock_kernel();
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+ ginfo = groups_alloc(0);
+ if (!ginfo) {
+ thread->t_flags = SVC_RUNNING;
+ wake_up(&thread->t_ctl_waitq);
+ return (-ENOMEM);
+ }
+ set_current_groups(ginfo);
+ put_group_info(ginfo);
+#endif
+
/* Record that the thread is running */
thread->t_flags = SVC_RUNNING;
wake_up(&thread->t_ctl_waitq);
+ watchdog = lc_watchdog_add(svc->srv_watchdog_timeout,
+ LC_WATCHDOG_DEFAULT_CB, NULL);
+
spin_lock_irqsave(&svc->srv_lock, flags);
svc->srv_nthreads++;
spin_unlock_irqrestore(&svc->srv_lock, flags);
struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout,
ptlrpc_retry_rqbds, svc);
+ lc_watchdog_disable(watchdog);
+
l_wait_event_exclusive (svc->srv_waitq,
((thread->t_flags & SVC_STOPPING) != 0 &&
svc->srv_n_difficult_replies == 0) ||
svc->srv_n_active_reqs <
(svc->srv_nthreads - 1))),
&lwi);
-
+
+ lc_watchdog_touch(watchdog);
ptlrpc_check_rqbd_pools(svc);
if (!list_empty (&svc->srv_reply_queue))
spin_unlock_irqrestore(&svc->srv_lock, flags);
+ lc_watchdog_delete(watchdog);
+
CDEBUG(D_NET, "service thread exiting, process %d\n", current->pid);
return 0;
}
: ${LCONF:=/usr/sbin/lconf}
: ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"}
: ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"}
+: ${LCTL:=/usr/sbin/lctl}
# Source function library.
if [ -f /etc/init.d/functions ] ; then
# Check that networking is up.
[ "${NETWORKING}" = "no" ] && exit 0
-[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0
+[ -x ${LCONF} -a -x ${LCTL} ] || exit 0
+
+[ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 )
# Create /var/lustre directory
# This is used by snmp agent for checking lustre services \
start
}
+status() {
+ ${LCTL} dl 2>/dev/null | while read INDEX STAT MODULE NAME; do
+ case $MODULE in
+ ost|mds|osc|mdc)
+ [ "`grep -v FULL /proc/fs/lustre/*c/*/*_server_uuid`" ] \
+ && echo "recovery" || echo "running"
+ return
+ ;;
+ esac
+ done
+ echo "stopped"
+}
+
# See how we were called.
case "$1" in
start)
# Authors: Bill Nottingham <notting@redhat.com>
# Miquel van Smoorenburg, <miquels@drinkel.nl.mugnet.org>
#
-# chkconfig: 345 25 75
+# chkconfig: 345 26 74
# description: Mounts and unmounts all Lustre mount points.
#
### BEGIN INIT INFO
--- /dev/null
+# Readlink is not present on some older distributions: emulate it.
+readlink() {
+ local path=$1 ll
+
+ if [ -L "$path" ]; then
+ ll="$(LC_ALL=C ls -l "$path" 2> /dev/null)" &&
+ echo "${ll/* -> }"
+ else
+ return 1
+ fi
+}
+relink() {
+ if [ -h "$2" ]; then
+ local old=$(readlink "$2")
+ [ "$old" = "$1" ] && return 0
+ echo "Changing symlink $2 from $old to $1"
+ elif [ -e "$2" ]; then
+ echo "Replacing file $2 with symlink to $1"
+ fi
+ rm -f "$2" \
+ && ln -s "$1" "$2"
+}
--- /dev/null
+if [ -f /boot/vmlinuz-%ver_str ]; then
+ image=vmlinuz
+elif [ -f /boot/image-%ver_str ]; then
+ image=image
+elif [ -f /boot/vmlinux-%ver_str ]; then
+ image=vmlinux
+else
+ # nothing to do (UML kernels for example).
+ exit 0
+fi
+
+# If we have old symlinks, rename them to *.previous
+if [ -L /boot/$image -a -L /boot/initrd -a \
+ "$(readlink /boot/$image)" != $image-%ver_str -a \
+ "$(readlink /boot/initrd)" != initrd-%ver_str ]; then
+ mv /boot/$image /boot/$image.previous
+ mv /boot/initrd /boot/initrd.previous
+fi
+
+# update /boot/vmlinuz symlink
+relink $image-%ver_str /boot/$image
+
+if test "$YAST_IS_RUNNING" != instsys ; then
+ if [ -f /etc/fstab ]; then
+ echo Setting up /lib/modules/%ver_str
+ /sbin/update-modules.dep -v %ver_str
+ cd /boot
+ /sbin/mkinitrd -k $image-%ver_str -i initrd-%ver_str
+
+ if [ -e /boot/initrd-%ver_str ]; then
+ relink initrd-%ver_str /boot/initrd
+ else
+ rm -f /boot/initrd
+ fi
+ else
+ echo "please run mkinitrd as soon as your system is complete"
+ fi
+fi
+
+if [ "$YAST_IS_RUNNING" != instsys -a -x /sbin/new-kernel-pkg ]; then
+ # Notify boot loader that a new kernel image has been installed.
+ # (during initial installation the boot loader configuration does not
+ # yet exist when the kernel is installed, but yast kicks the boot
+ # loader itself later.)
+ /sbin/new-kernel-pkg %ver_str
+fi
--- /dev/null
+if [ -L /boot/vmlinux ]; then
+ image=vmlinux
+elif [ -L /boot/vmlinuz ]; then
+ image=vmlinuz
+elif [ -L /boot/image ]; then
+ image=image
+else
+ # nothing to do (UML kernels for example).
+ exit 0
+fi
+
+if [ "$(readlink /boot/$image)" = $image-%ver_str ]; then
+ # This may be the last kernel RPM on the system, or it may
+ # be an update. In both of those cases the symlinks will
+ # eventually be correct. Only if this kernel
+ # is removed and other kernel rpms remain installed,
+ # find the most recent of the remaining kernels, and make
+ # the symlinks point to it. This makes sure that the boot
+ # manager will always have a kernel to boot in its default
+ # configuration.
+ shopt -s nullglob
+ for image in $(cd /boot ; ls -dt $image-*); do
+ initrd=initrd-${image#*-}
+ if [ -f /boot/$image -a -f /boot/$initrd ]; then
+ relink $image /boot/${image%%%%-*}
+ relink $initrd /boot/${initrd%%%%-*}
+ break
+ fi
+ done
+ shopt -u nullglob
+fi
+
+# Created in the other kernel's %post
+case "$(readlink /boot/$image.previous)" in
+$image-%ver_str|$(readlink /boot/$image))
+ rm -f /boot/$image.previous ;;
+esac
+case "$(readlink /boot/initrd.previous)" in
+initrd-%ver_str|$(readlink /boot/initrd))
+ rm -f /boot/initrd.previous ;;
+esac
+# created in %post
+rm -f /boot/initrd-%ver_str
--- /dev/null
+old_shopt=$(shopt -p nullglob || :)
+shopt -s nullglob
+for script in /lib/modules/scripts/* ; do
+ if [ -f "$script" -a -x "$script" ] \
+ && ! "$script" --@when@ %ver_str $1 ; then
+ echo "$script failed."
+ fi
+done
+eval $old_shopt
* Bruce Fields <bfields@umich.edu>
* Copyright (c) 2000 The Regents of the University of Michigan
*
- * $Id: gss_api.h,v 1.2 2005/03/31 22:18:24 ericm Exp $
+ * $Id: gss_api.h,v 1.3 2005/04/04 13:12:39 yury Exp $
*/
#ifndef __SEC_GSS_GSS_API_H_
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
- * $Id: sec_gss.c,v 1.2 2005/03/31 22:18:24 ericm Exp $
+ * $Id: sec_gss.c,v 1.3 2005/04/04 13:12:39 yury Exp $
*/
#ifndef EXPORT_SYMTAB
static void ptlrpcs_sec_destroy(struct ptlrpc_sec *sec)
{
struct ptlrpc_sec_type *type = sec->ps_type;
- struct ptlrpc_import *imp = sec->ps_import;
+ struct obd_import *imp = sec->ps_import;
LASSERT(type && type->pst_ops);
LASSERT(type->pst_ops->destroy_sec);
!smfs_snap_test_inode(inode, opaque))
return 0;
#endif
-
return 1;
}
sargs.s_inode = dir;
sargs.s_index = index;
CDEBUG(D_VFSTRACE, "get_inode: %lu\n", hash);
-
inode = smfs_iget(sb, hash, &sargs);
-
RETURN(inode);
}
}
EXPORT_SYMBOL(smfs_rec_setattr);
-int smfs_rec_md(struct inode *inode, void *lmm, int lmm_size)
+int smfs_rec_md(struct inode *inode, void *lmm, int lmm_size,
+ enum ea_type type)
{
char *set_lmm = NULL;
- int rc = 0;
+ int rc = 0;
ENTRY;
if (!SMFS_DO_REC(S2SMI(inode->i_sb)))
RETURN(0);
if (lmm) {
- OBD_ALLOC(set_lmm, lmm_size + sizeof(lmm_size));
+ int size = lmm_size + sizeof(lmm_size) +
+ sizeof(type);
+
+ OBD_ALLOC(set_lmm, size);
if (!set_lmm)
RETURN(-ENOMEM);
+
memcpy(set_lmm, &lmm_size, sizeof(lmm_size));
- memcpy(set_lmm + sizeof(lmm_size), lmm, lmm_size);
+ memcpy(set_lmm + sizeof(lmm_size), &type, sizeof(type));
+ memcpy(set_lmm + sizeof(lmm_size) + sizeof(type), lmm, lmm_size);
+
rc = smfs_post_rec_setattr(inode, NULL, NULL, set_lmm);
if (rc) {
- CERROR("Error: Record md for inode %lu rc=%d\n",
+ CERROR("Error: Record md for inode %lu rc = %d\n",
inode->i_ino, rc);
}
+ OBD_FREE(set_lmm, size);
}
- if (set_lmm)
- OBD_FREE(set_lmm, lmm_size + sizeof(lmm_size));
RETURN(rc);
}
EXPORT_SYMBOL(smfs_rec_md);
}
int smfs_post_rec_setattr(struct inode *inode, struct dentry *dentry,
- void *data1, void *data2)
+ void *data1, void *data2)
{
- struct smfs_super_info *sinfo;
struct iattr *attr = (struct iattr *)data1;
- char *buffer = NULL, *pbuf;
int rc = 0, length = 0, buf_len = 0;
+ struct smfs_super_info *sinfo;
+ char *buffer = NULL, *pbuf;
ENTRY;
sinfo = S2SMI(inode->i_sb);
}
extern char* smfs_options(char*, char**, char**, char*, int *);
+extern void cleanup_option(void);
int smfs_fill_super(struct super_block *sb, void *data, int silent)
{
rename_many
mmap_sanity
memhog
-
+rmdirmany
# Lustre test Makefile
-AM_CPPFLAGS = $(LLCPPFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+AM_CPPFLAGS = $(LLCPPFLAGS) -I/opt/lam/include -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
AM_CFLAGS = $(LLCFLAGS)
# LDADD = -lldap
# LDADD := -lreadline -ltermcap # -lefence
noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany
noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2
noinst_PROGRAMS += mmap_sanity
+if MPITESTS
+noinst_PROGRAMS += parallel_grouplock write_append_truncate createmany_mpi
+endif
# noinst_PROGRAMS += ldaptest copy_attr
bin_PROGRAMS = mcreate munlink
endif # TESTS
stat_SOURCES = stat.c stat_fs.h
mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE)
-#write_append_truncate_CC=mpicc
-#createmany_mpi_CC=mpicc
-#parallel_grouplock_SOURCES=parallel_grouplock.c lp_utils.c
-#parallel_grouplock_CC=mpicc
-
-#copy_attr_LDADD= -lattr
mmap_sanity_SOURCES= mmap_sanity.c
+if MPITESTS
+LAM_LD_FLAGS=-L/opt/lam/lib -lmpi -llam -lpthread
+write_append_truncate_SOURCES=write_append_truncate.c
+write_append_truncate_LDADD=$(LAM_LD_FLAGS)
+createmany_mpi_SOURCES=createmany-mpi.c
+createmany_mpi_LDADD=$(LAM_LD_FLAGS)
+parallel_grouplock_SOURCES=parallel_grouplock.c lp_utils.c
+parallel_grouplock_LDADD=$(LAM_LD_FLAGS)
+endif
start mds1 --reformat $MDSLCONFARGS || return 94
start_lsvcgssd || return 501
}
+
stop_mds() {
echo "stop mds1 service on `facet_active_host mds1`"
- stop mds1 $@ || return 97
+ stop mds1 $@ || return 97
stop_lsvcgssd
}
stop_ost() {
echo "stop ost service on `facet_active_host ost`"
- stop ost $@ || return 98
+ stop ost $@ || return 98
}
mount_client() {
# cleanup may return an error from the failed
# disconnects; for now I'll consider this successful
# if all the modules have unloaded.
- umount $MOUNT &
+
+ # as MDS is down, umount without -f may cause blocking
+ # and this test will never finish. Blocking is possible
+ # as umount may want to cancel locks with RPC's and these
+ # RPC's will wait forever, as pinger thread will try to
+ # recover failed import endlessly.
+ #
+ # Thus, main point is: nobody should expect umount finish
+ # quickly and cleanly without -f flag when MDS or OST is
+ # down for sure. --umka
+ umount -f $MOUNT &
UMOUNT_PID=$!
sleep 2
echo "killing umount"
stop_lgssd
# cleanup client modules
- $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
+ $LCONF --force --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
# stop_mds is a no-op here, and should not fail
- stop_mds || return 4
- stop_ost || return 5
+ stop_mds || return 4
+
+ # this should have --force flag specified, as umount -f
+ # will skip disconnect phase and thus OST will have one
+ # extra refcount what will cause class_cleanup() failure
+ # if --force is not specified. --umka
+ stop_ost --force || return 5
lsmod | grep -q portals && return 6
return 0
}
-run_test 5 "force cleanup mds, then cleanup"
+run_test 5 "force cleanup mds, then cleanup --force"
test_5b() {
start_ost
[ -d $MOUNT ] || mkdir -p $MOUNT
$LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
start_lgssd || return 1
- llmount $mds_HOST://mds1_svc/client_facet $MOUNT && exit 1
+ llmount -o nettype=$NETTYPE $mds_HOST://mds_svc/client_facet $MOUNT && exit 2
# cleanup client modules
$LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
stop_lgssd
# stop_mds is a no-op here, and should not fail
- stop_mds || return 2
- stop_ost || return 3
+ stop_mds || return 3
+ stop_ost || return 4
- lsmod | grep -q portals && return 4
+ lsmod | grep -q portals && return 5
return 0
}
[ -d $MOUNT ] || mkdir -p $MOUNT
$LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null
start_lgssd || return 1
- llmount $mds_HOST://wrong_mds1_svc/client_facet $MOUNT && return 2
+ llmount -o nettype=$NETTYPE $mds_HOST://wrong_mds_svc/client_facet $MOUNT && return 2
# cleanup client modules
$LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null
mount_client $MOUNT
check_mount || return 41
cleanup || return $?
- fi
+ fi
echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
[ -d $TMPMTPT ] || mkdir -p $TMPMTPT
void usage(char *prog)
{
- printf("usage: %s {-o|-m|-l<tgt>} filenamefmt count\n", prog);
- printf(" %s {-o|-m|-l<tgt>} filenamefmt -seconds\n", prog);
- printf(" %s {-o|-m|-l<tgt>} filenamefmt start count\n", prog);
+ printf("usage: %s {-o|-m|-d|-l<tgt>} filenamefmt count\n", prog);
+ printf(" %s {-o|-m|-d|-l<tgt>} filenamefmt -seconds\n", prog);
+ printf(" %s {-o|-m|-d|-l<tgt>} filenamefmt start count\n", prog);
}
int main(int argc, char ** argv)
{
- int i, rc = 0, do_open = 0, do_link = 0;
+ int i, rc = 0, do_open = 0, do_link = 0, do_mkdir = 0;
char format[4096], *fmt, *tgt = NULL;
char filename[4096];
long start, last, end;
return 1;
}
- if (strcmp(argv[1], "-o") == 0) {
+ if (strcmp(argv[1], "-d") == 0) {
+ do_mkdir = 1;
+ } else if (strcmp(argv[1], "-o") == 0) {
do_open = 1;
} else if (strncmp(argv[1], "-l", 2) == 0 && argv[1][2]) {
tgt = argv[1] + 2;
rc = link(tgt, filename);
if (rc) {
printf("link(%s, %s) error: %s\n",
- tgt, filename, strerror(errno));
+ tgt, filename, strerror(errno));
+ rc = errno;
+ break;
+ }
+ } else if (do_mkdir) {
+ rc = mkdir(filename, 0755);
+ if (rc) {
+ printf("mkdir(%s) error: %s\n",
+ filename, strerror(errno));
rc = errno;
break;
}
h2elan () {
echo $1 | sed 's/[^0-9]*//g'
}
+
+h2iib () {
+ case $1 in
+ client) echo '\*' ;;
+ *) echo $1 | sed "s/[^0-9]*//" ;;
+ esac
+}
# FIXME: make LMC not require MDS for obdecho LOV
MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
(monitorend == -1 || offset <= monitorend)))))))
return;
- prt("%06lu %lu.%06lu %*s%-10s %#08x %s %#08x\t(0x%x bytes)\n",
+ prt("%06lu %lu.%06lu %.*s%-10s %#08x %s %#08x\t(0x%x bytes)\n",
testcalls, tv->tv_sec, tv->tv_usec, max_tf_len,
tf_num, ops[op],
offset, op == OP_TRUNCATE ? " to " : "thru",
done
# create client config
-${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40
+${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40
#${LMC} --add mtpt --node localhost --path $MOUNT2 --mds mds1 --lov lov1 || exit 40
#!/bin/bash
set -vx
-set -e
+#set -e
. ./lfscktest_config.sh
+sh llmount.sh || exit 1
+
#Create mount points on target OST and MDS
#Create test directory
-
mkdir -p $OST_MOUNTPT
mkdir -p $MDS_MOUNTPT
mkdir -p $TEST_DIR
-export PATH=$LFSCK_PATH/e2fsck:`dirname $0`:`dirname $0`/../utils:$PATH
-
-sh llmount.sh || exit 1
+export PATH=$LFSCK_PATH:`dirname $0`:`dirname $0`/../utils:$PATH
# Create some files on the filesystem
for i in `seq 0 3`; do
done
done
done
-# Create Files to be modified
+# Create Files to be modified
file_name=${TESTNAME}
-
for FILE in `seq -f ${TEST_DIR}/${file_name}.%g 0 40`; do
- dd if=/dev/zero count=1 bs=64k of=$FILE || exit 1
+ dd if=/dev/zero count=1 bs=64K of=$FILE || exit 1
done
#Create some more files
-
for i in `seq 21 23`; do
mkdir -p ${MOUNT}/d$i
for j in `seq 0 5`; do
done
#Create EAs on files so objects are referenced twice from different mds files
-for i in `seq 40 59`; do
+for i in `seq 0 19`; do
touch $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.bad.$i
copy_attr $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.$i $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.bad.$i || (umount $MDS_MOUNTPT && exit 1)
i=`expr $i + 1`
done
- umount $MDS_MOUNTPT
- rmdir $MDS_MOUNTPT
- rmdir $OST_MOUNTPT
+umount $MDS_MOUNTPT
+rmdir $MDS_MOUNTPT
+rmdir $OST_MOUNTPT
# Run e2fsck to get mds and ost info
# a return status of 1 indicates e2fsck successfuly fixed problems found
-e2fsck -d -f -y --mdsdb $GPATH/mdsdb $MDSDEV
+e2fsck -d -f -y --mdsdb $GPATH/mdsdb $MDSDEV
RET=$?
[ $RET -ne 0 -a $RET -ne 1 ] && exit 1
i=0
OSTDB_LIST=""
while [ $i -lt $NUM_OSTS ]; do
- e2fsck -d -f -y --mdsdb $GPATH/mdsdb --ostdb $GPATH/ostdb-$i $TMP/ost$i-`hostname`
+ e2fsck -d -f -y --mdsdb $GPATH/mdsdb --ostdb $GPATH/ostdb-$i $TMP/ost`expr $i + 1`-`hostname`
RET=$?
[ $RET -ne 0 -a $RET -ne 1 ] && exit 1
if [ -z "${OSTDB_LIST}" ]; then
export TESTNAME="lfscktest"
export TESTDESC="Test of lfsck functionality"
-export LFSCK_PATH=${E2FSCK_PATH:-"/usr/src/e2fsprogs-1.34"}
+export LUSTRE=${LUSTRE:-".."}
+export LCONF=${LCONF:-"$LUSTRE/utils/lconf"}
+export LMC=${LMC:-"$LUSTRE/utils/lmc"}
+export LCTL=${LCTL:-"$LUSTRE/utils/lctl"}
+export LFIND=${LFIND:-"$LUSTRE/utils/lfind"}
+
+export LFSCK_PATH=${E2FSCK_PATH:-"/home/yangjun/e2fsprogs-1.35.lfsck2/build/e2fsck"}
export TMP=${TMP:-"/tmp"}
export LOG=${LOG:-"${TMP}/lfscktest.log"}
export LUSTRE_TAG=${LUSTRE_TAG:="HEAD"}
export OST_MOUNTPT="/mnt/ost_${TESTNAME}"
export MOUNT="/mnt/lustre"
export TEST_DIR="${MOUNT}/${TESTNAME}"
+export MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`}
+export NUM_OSTS=${NUM_OSTS:-1}
stripe_count = (int)lum_dir->lmm_stripe_count;
if (stripe_count == 0) {
- fd = open("/proc/fs/lustre/lov/lov1/stripecount", O_RDONLY);
+ fd = open("/proc/fs/lustre/llite/fs0/lov/stripecount", O_RDONLY);
if (fd == -1) {
fprintf(stderr, "open proc file error: %s\n",
strerror(errno));
stripe_size = (int)lum_dir->lmm_stripe_size;
if (stripe_size == 0) {
- fd = open("/proc/fs/lustre/lov/lov1/stripesize", O_RDONLY);
+ fd = open("/proc/fs/lustre/llite/fs0/lov/stripesize", O_RDONLY);
if (fd == -1) {
fprintf(stderr, "open proc file error: %s\n",
strerror(errno));
close(fd);
}
- fd = open("/proc/fs/lustre/lov/lov1/numobd", O_RDONLY);
+ fd = open("/proc/fs/lustre/llite/fs0/lov/numobd", O_RDONLY);
if(fd == -1) {
fprintf(stderr, "open proc file error: %s\n",
strerror(errno));
if ((lum_file1->lmm_stripe_count != stripe_count) ||
(lum_file1->lmm_stripe_size != stripe_size))
+ {
return -1;
+ }
stripe_offset = (short int)lum_dir->lmm_stripe_offset;
if (stripe_offset != -1) {
LLMOUNT=${LLMOUNT:-llmount}
SECURITY=${SECURITY:-"null"}
-config=$NAME.xml
-mkconfig=$NAME.sh
+config=$(dirname $0)/$NAME.xml
+mkconfig=$(dirname $0)/$NAME.sh
. krb5_env.sh
start_krb5_kdc || exit 1
fi
[ "$NODE" ] && node_opt="--node $NODE"
+[ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG"
# We'd better start lsvcgssd after gss modules loaded.
# remove this if we don't depend on lsvcgssd in the future
--backdev $OST_BACKDEV $OST_MOUNT_OPTS --size $OSTSIZE $JARG || exit 30
# create client config
-${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40
-${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41
+${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40
+${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --clientoptions async --lov lov1 || exit 41
if [ -z "$ECHO_CLIENT" ]; then
# create client config
- ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40
- ${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41
+ ${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40
+ ${LMC} --add mtpt --node client --path $MOUNT2 --clientoptions async --mds mds1 --lov lov1 || exit 41
else
${LMC} --add echo_client --node localhost --ost lov1 || exit 42
fi
$LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2tcp $GW_NODE` --lo $OST
# mount
-$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov ost_$OST
+$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds_$ACTIVEMDS --lov ost_$OST
# Client node
#${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1
${LMC} --add net --node client --nid '*' --nettype elan || exit 1
-${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1
+${LMC} --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds1 --lov lov1
# this is crude, but effective
let server_per_gw=($SERVER_CNT / $GW_CNT )
${LMC} --add mds --node $MDS --mds mds1 --dev $TMP/mds1 --size 100000 || exit 1
${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 1048576 --stripe_cnt 0 --stripe_pattern 0 || exit 1
-${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1
+${LMC} --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds1 --lov lov1
for s in $SERVERS
do
${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22
# create client config
-${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT1} --mds mds1 --lov lov1 || exit 30
-${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT2} --mds mds2 --lov lov2 || exit 30
+${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT1} --clientoptions async --mds mds1 --lov lov1 || exit 30
+${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT2} --clientoptions async --mds mds2 --lov lov2 || exit 30
" Y fdatasync\n"
" z seek to zero\n";
-void null_handler(int unused) { }
+static int usr1_received;
+void usr1_handler(int unused)
+{
+ usr1_received = 1;
+}
static const char *
pop_arg(int argc, char *argv[])
exit(1);
}
- signal(SIGUSR1, null_handler);
+ signal(SIGUSR1, usr1_handler);
fname = argv[1];
for (commands = argv[2]; *commands; commands++) {
switch (*commands) {
case '_':
- pause();
+ if (usr1_received == 0)
+ pause();
+ usr1_received = 0;
+ signal(SIGUSR1, usr1_handler);
break;
case 'c':
if (close(fd) == -1) {
--stripe_cnt 0 --stripe_pattern 0 || exit 6
lmc -m $CONFIG --add ost --nspath /mnt/ost_ns --node $OSTNODE \
--lov lov1 --dev $OSTDEV --size $OSTSIZE --fstype $FSTYPE || exit 7
- lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \
+ lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --clientoptions async --mds mds1 \
--lov lov1 || exit 8
}
set -e
-# bug 2986
-ALWAYS_EXCEPT="20b"
+# 20b: bug 2986
+ALWAYS_EXCEPT=" 20b"
LUSTRE=${LUSTRE:-`dirname $0`/..}
REFORMAT=--reformat $SETUP
unset REFORMAT
+[ "$ONLY" == "setup" ] && exit
+
test_1() {
drop_request "mcreate $MOUNT/1" || return 1
drop_reint_reply "mcreate $MOUNT/2" || return 2
#define OBD_FAIL_MDS_CLOSE_NET 0x115
sleep 2
kill -USR1 $PID
+ cancel_lru_locks MDC # force the close
echo "waiting for multiop $PID"
wait $PID || return 2
do_facet client munlink $MOUNT/$tfile || return 3
# Bug 113, check that readdir lost recv timeout works.
test_13() {
- mkdir /mnt/lustre/readdir
- touch /mnt/lustre/readdir/newentry
+ mkdir /mnt/lustre/readdir || return 1
+ touch /mnt/lustre/readdir/newentry || return
# OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE
do_facet mds "sysctl -w lustre.fail_loc=0x80000104"
- ls /mnt/lustre/readdir || return 1
+ ls /mnt/lustre/readdir || return 3
do_facet mds "sysctl -w lustre.fail_loc=0"
- rm -rf /mnt/lustre/readdir
+ rm -rf /mnt/lustre/readdir || return 4
}
run_test 13 "mdc_readpage restart test (bug 1138)"
done
}
+# recovery timeout. This actually should be taken from
+# obd_timeout
+RECOV_TIMEOUT=30
+
test_16() {
do_facet client cp /etc/termcap $MOUNT
sync
do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1
sysctl -w lustre.fail_loc=0
# give recovery a chance to finish (shouldn't take long)
- sleep $TIMEOUT
+ sleep $RECOV_TIMEOUT
do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2
start_read_ahead
}
sysctl -w lustre.fail_loc=0x80000503
do_facet client cp /etc/termcap $DIR/$tfile
- sleep $TIMEOUT
+ sleep $RECOV_TIMEOUT
sysctl -w lustre.fail_loc=0
do_facet client "df $DIR"
# expect cmp to fail
}
run_test 20b "ldlm_handle_enqueue error (should return error)"
+test_21a() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ close_pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+ multiop $DIR/$tdir-2/f Oc &
+ open_pid=$!
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks MDC # force the close
+ wait $close_pid || return 1
+ wait $open_pid || return 2
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+
+ rm -rf $DIR/$tdir-*
+}
+run_test 21a "drop close request while close and open are both in flight"
+
+test_21b() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ close_pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ mcreate $DIR/$tdir-2/f &
+ open_pid=$!
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ kill -USR1 $close_pid
+ cancel_lru_locks MDC # force the close
+ wait $close_pid || return 1
+ wait $open_pid || return 3
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 4
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 5
+ rm -rf $DIR/$tdir-*
+}
+run_test 21b "drop open request while close and open are both in flight"
+
+test_21c() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ close_pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ mcreate $DIR/$tdir-2/f &
+ open_pid=$!
+ sleep 3
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $close_pid
+ cancel_lru_locks MDC # force the close
+ wait $close_pid || return 1
+ wait $open_pid || return 2
+
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21c "drop both request while close and open are both in flight"
+
+test_21d() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000129"
+ multiop $DIR/$tdir-2/f Oc &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+
+ rm -rf $DIR/$tdir-*
+}
+run_test 21d "drop close reply while close and open are both in flight"
+
+test_21e() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/$tdir-2/f &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+
+ sleep $TIMEOUT
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21e "drop open reply while close and open are both in flight"
+
+test_21f() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/$tdir-2/f &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21f "drop both reply while close and open are both in flight"
+
+test_21g() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/$tdir-2/f &
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $pid
+ cancel_lru_locks MDC # force the close
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 2
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 3
+ rm -rf $DIR/$tdir-*
+}
+run_test 21g "drop open reply and close request while close and open are both in flight"
+
+test_21h() {
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ touch $DIR/$tdir-2/f &
+ touch_pid=$!
+ sleep 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ cancel_lru_locks MDC # force the close
+ kill -USR1 $pid
+ wait $pid || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ wait $touch_pid || return 2
+
+ $CHECKSTAT -t file $DIR/$tdir-1/f || return 3
+ $CHECKSTAT -t file $DIR/$tdir-2/f || return 4
+ rm -rf $DIR/$tdir-*
+}
+run_test 21h "drop open request and close reply while close and open are both in flight"
+
+# bug 3462 - multiple MDC requests
+test_22() {
+ f1=$DIR/${tfile}-1
+ f2=$DIR/${tfile}-2
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ multiop $f2 Oc &
+ close_pid=$!
+
+ sleep 1
+ multiop $f1 msu || return 1
+
+ cancel_lru_locks MDC # force the close
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ wait $close_pid || return 2
+ rm -rf $f2 || return 4
+}
+run_test 22 "drop close request and do mknod"
+
+test_23() { #b=4561
+ multiop $DIR/$tfile O_c &
+ pid=$!
+ # give a chance for open
+ sleep 5
+
+ # try the close
+ drop_request "kill -USR1 $pid"
+
+ fail mds
+ wait $pid || return 1
+ return 0
+}
+#run_test 23 "client hang when close a file after mds crash"
+
+
$CLEANUP
umount $MOUNT2 || true
umount $MOUNT || true
rmmod llite
+
+ # b=3941
+ # In mds recovery, the mds will clear orphans in ost by
+ # mds_lov_clear_orphan, which will sent the request to ost and waiting for
+ # the reply, if we stop mds at this time, we will got the obd_refcount > 1
+ # errors, because mds_lov_clear_orphan grab a export of mds,
+ # so the obd_refcount of mds will not be zero. So, wait a while before
+ # stop mds. This bug needs further work.
for mds in `mds_list`; do
+ sleep 5
stop $mds ${FORCE} $MDSLCONFARGS
done
stop_lgssd
facet_failover mds1
# expect failover to fail
df $MOUNT && return 1
+ sleep 1
# first 25 files shouuld have been
# replayed
facet_failover mds1
df $MOUNT || return 1
+ sleep 1
unlinkmany $MOUNT1/$tfile- 25 || return 2
sleep $TIMEOUT
facet_failover mds1
df $MOUNT || return 1
+ sleep 1
unlinkmany $MOUNT1/$tfile- 25 || return 2
sleep $TIMEOUT
facet_failover ost
df $MOUNT || return 1
+ sleep 1
unlinkmany $MOUNT1/$tfile- 25 || return 2
}
run_test 18 "replay open, Abort recovery, don't assert (3892)"
-
# cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for
# itexport NOW=0
run_test 4 "Fail OST during read, with verification"
test_5() {
- IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r 64 -s 1g"
+ FREE=`df -P -h $DIR | tail -n 1 | awk '{ print $3 }'`
+ case $FREE in
+ *T|*G) FREE=1G;;
+ esac
+ IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r 4 -s $FREE"
iozone $IOZONE_OPTS -f $DIR/$tfile &
PID=$!
- sleep 10
+ sleep 8
fail ost
wait $PID || return 1
rm -f $DIR/$tfile
assert_env MDSCOUNT
# Skip these tests
-ALWAYS_EXCEPT=""
+# 46 - The MDS will always have to force close the cached opens
+ALWAYS_EXCEPT="46"
if [ `using_krb5_sec $SECURITY` == 'n' ] ; then
ALWAYS_EXCEPT="0c $ALWAYS_EXCEPT"
wait $pid || return 1
$LCTL --device $mdcdev activate
+ sleep 1
$CHECKSTAT -t file $DIR/$tfile || return 2
return 0
}
run_test 50 "Double OSC recovery, don't LASSERT (3812)"
+# bug 3462 - simultaneous MDC requests
+test_51a() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/${tdir}-1
+ mkdir -p $DIR/${tdir}-2
+ touch $DIR/${tdir}-2/f
+ multiop $DIR/${tdir}-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $pid
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 1
+
+ fail mds
+
+ wait $pid || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51a "|X| close request while two MDC requests in flight"
+
+test_51b() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ touch $DIR/${tdir}-2/f &
+ usleep 500
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ kill -USR1 $pid
+ wait $pid || return 1
+
+ fail mds
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51b "|X| open request while two MDC requests in flight"
+
+test_51c() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/${tdir}-1
+ mkdir -p $DIR/${tdir}-2
+ multiop $DIR/${tdir}-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ touch $DIR/${tdir}-2/f &
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $pid
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ fail mds
+
+ wait $pid || return 1
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51c "|X| open request and close request while two MDC requests in flight"
+
+test_51d() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/${tdir}-1
+ mkdir -p $DIR/${tdir}-2
+ touch $DIR/${tdir}-2/f
+ multiop $DIR/${tdir}-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+ #$CHECKSTAT -t file $DIR/${tdir}-2/f || return 1
+
+ fail mds
+
+ wait $pid || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51d "|X| close reply while two MDC requests in flight"
+
+test_51e() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/$tdir-1
+ mkdir -p $DIR/$tdir-2
+ multiop $DIR/$tdir-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/${tdir}-2/f &
+ usleep 500
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ kill -USR1 $pid
+ wait $pid || return 1
+
+ fail mds
+
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51e "|X| open reply while two MDC requests in flight"
+
+test_51f() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/${tdir}-1
+ mkdir -p $DIR/${tdir}-2
+ multiop $DIR/${tdir}-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/${tdir}-2/f &
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ fail mds
+
+ wait $pid || return 1
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51f "|X| open reply and close reply while two MDC requests in flight"
+
+test_51g() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/${tdir}-1
+ mkdir -p $DIR/${tdir}-2
+ multiop $DIR/${tdir}-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000119"
+ touch $DIR/${tdir}-2/f &
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000115"
+ kill -USR1 $pid
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ fail mds
+
+ wait $pid || return 1
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51g "|X| open reply and close request while two MDC requests in flight"
+
+test_51h() {
+ replay_barrier_nodf mds
+ mkdir -p $DIR/${tdir}-1
+ mkdir -p $DIR/${tdir}-2
+ multiop $DIR/${tdir}-1/f O_c &
+ pid=$!
+ # give multiop a chance to open
+ sleep 1
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ touch $DIR/${tdir}-2/f &
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000122"
+ kill -USR1 $pid
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ fail mds
+
+ wait $pid || return 1
+ $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2
+ $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3
+ rm -rf $DIR/${tdir}-*
+}
+run_test 51h "|X| open request and close reply while two MDC requests in flight"
+
# b3764 timed out lock replay
test_52() {
touch $DIR/$tfile
}
run_test 52 "time out lock replay (3764)"
+test_53() {
+ replay_barrier_nodf mds
+ f1=$DIR/${tfile}-1
+ cat <<EOF > $f1
+#!/bin/sh
+true
+EOF
+ chmod +x $f1
+ do_facet mds "sysctl -w lustre.fail_loc=0x80000107"
+ $f1 || return 1
+ do_facet mds "sysctl -w lustre.fail_loc=0"
+
+ fail mds
+ rm -f $f1
+}
+run_test 53 "|X| open request and close reply while two MDC requests in flight"
+
+test_54() {
+ replay_barrier mds
+ createmany -o $DIR/$tfile 20
+ unlinkmany $DIR/$tfile 20
+ fail mds
+}
+run_test 54 "|X| open request and close reply while two MDC requests in flight"
+
+#b3440 ASSERTION(rec->ur_fid2->id) failed
+test_55() {
+ sysctl -w portals.debug=-1 portals.debug_mb=25
+ ln -s foo $DIR/$tfile
+ replay_barrier mds
+ #drop_reply "cat $DIR/$tfile"
+ fail mds
+ sleep 10
+ lctl dk /r/tmp/debug
+}
+run_test 55 "don't replay a symlink open request (3440)"
+
+#b3761 ASSERTION(hash != 0) failed
+test_56() {
+# OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE
+ do_facet mds "sysctl -w lustre.fail_loc=0x8000012b"
+ touch $DIR/$tfile
+ pid=$!
+ # give a chance for touch to run
+ sleep 5
+ do_facet mds "sysctl -w lustre.fail_loc=0x0"
+ wait $pid || return 1
+ rm $DIR/$tfile
+ return 0
+}
+run_test 56 "let MDS_CHECK_RESENT return the original return code instead of 0
+
equals_msg test complete, cleaning up
$CLEANUP
SRC=/etc
[ "$COUNT" ] || COUNT=1000
-[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf
+[ "$LCONF" ] || LCONF=lconf
-[ "$MCREATE" ] || MCREATE=$SRCDIR/../tests/mcreate
+[ "$MCREATE" ] || MCREATE=mcreate
[ "$MKDIRMANY" ] || MKDIRMANY=$SRCDIR/../tests/mkdirmany
fi
# mkdirmany test (bug 589)
-log "running mkdirmany $MOUNT/base$$ 100"
+log "running $MKDIRMANY $MOUNT/base$$ 100"
$MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed"
log "removing mkdirmany directories"
rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed"
SRCDIR=`dirname $0`
export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+export SECURITY=${SECURITY:-"null"}
TMP=${TMP:-/tmp}
FSTYPE=${FSTYPE:-ext3}
PTLDEBUG=${PTLDEBUG:-0}
MODE=${MODE:mds}
+. krb5_env.sh
+
if [ $UID -ne 0 ]; then
RUNAS_ID="$UID"
RUNAS=""
RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
fi
+if [ `using_krb5_sec $SECURITY` == 'y' ] ; then
+ start_krb5_kdc || exit 1
+ if [ $RUNAS_ID -ne $UID ]; then
+ $RUNAS ./krb5_refresh_cache.sh || exit 2
+ fi
+fi
+
export NAME=${NAME:-local}
SAVE_PWD=$PWD
SRCDIR=`dirname $0`
export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
+export SECURITY=${SECURITY:-"null"}
TMP=${TMP:-/tmp}
FSTYPE=${FSTYPE:-ext3}
IOPENTEST2=${IOPENTEST2:-iopentest2}
PTLDEBUG=${PTLDEBUG:-0}
+. krb5_env.sh
+
if [ $UID -ne 0 ]; then
RUNAS_ID="$UID"
RUNAS=""
RUNAS=${RUNAS:-"runas -u $RUNAS_ID"}
fi
+if [ `using_krb5_sec $SECURITY` == 'y' ] ; then
+ start_krb5_kdc || exit 1
+ if [ $RUNAS_ID -ne $UID ]; then
+ $RUNAS ./krb5_refresh_cache.sh || exit 2
+ fi
+fi
+
export NAME=${NAME:-local}
SAVE_PWD=$PWD
set -e
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 2739
-# 51b and 51c depend on kernel
-# 65* fixes in b_hd_cray_merge3
-# the new kernel api make 48 not valid anymore
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"48 51b 51c 65a 65b 65c 65d 65e 65f"}
+# bug number for skipped tests:
+# skipped test:
+# - 51b 51c depend on used kernel
+# more than only LOV EAs
+# - 65h (default stripe inheritance) is not implemented for LMV
+# configurations. Will be done in second phase of collibri.
+
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"51b 51c 65h"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
if ! mount | grep -q $DIR; then
$START
fi
- echo -1 >/proc/sys/portals/debug
- log "== test $1: $2= `date +%H:%M:%S`"
+ BEFORE=`date +%s`
+ log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)"
export TESTNAME=test_$1
test_$1 || error "test_$1: exit with rc=$?"
unset TESTNAME
- pass
+ pass "($((`date +%s` - $BEFORE))s)"
cd $SAVE_PWD
$CLEAN
}
}
_basetest() {
- echo $*
+ echo $*
}
basetest() {
- IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+ IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
}
run_test() {
}
pass() {
- echo PASS
+ echo PASS $@
}
MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`"
OSTCOUNT=`cat /proc/fs/lustre/llite/fs0/lov/numobd`
STRIPECOUNT=`cat /proc/fs/lustre/llite/fs0/lov/stripecount`
STRIPESIZE=`cat /proc/fs/lustre/llite/fs0/lov/stripesize`
+ORIGFREE=`cat /proc/fs/lustre/llite/fs0/lov/kbytesavail`
+MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))}
[ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo
[ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo
run_test 16 "touch .../d16/f; rm -rf .../d16/f ================="
test_17a() {
- mkdir $DIR/d17
+ mkdir -p $DIR/d17
touch $DIR/d17/f
ln -s $DIR/d17/f $DIR/d17/l-exist
ls -l $DIR/d17
run_test 17a "symlinks: create, remove (real) =================="
test_17b() {
- if [ ! -d $DIR/d17 ]; then
- mkdir $DIR/d17
- fi
+ mkdir -p $DIR/d17
ln -s no-such-file $DIR/d17/l-dangle
ls -l $DIR/d17
$CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error
}
run_test 17b "symlinks: create, remove (dangling) =============="
+test_17c() { # bug 3440 - don't save failed open RPC for replay
+ mkdir -p $DIR/d17
+ ln -s foo $DIR/d17/f17c
+ cat $DIR/d17/f17c && error "opened non-existent symlink" || true
+}
+run_test 17c "symlinks: open dangling (should return error) ===="
+
+test_17d() {
+ mkdir -p $DIR/d17
+ ln -s foo $DIR/d17/f17d
+ touch $DIR/d17/f17d || error "creating to new symlink"
+}
+run_test 17d "symlinks: create dangling ========================"
+
test_18() {
touch $DIR/f
ls $DIR || error
}
run_test 27l "check setstripe permissions (should return error)"
+test_27m() {
+ [ "$OSTCOUNT" -lt "2" ] && echo "skipping out-of-space test on OST0" && return
+ if [ $ORIGFREE -gt $MAXFREE ]; then
+ echo "skipping out-of-space test on OST0"
+ return
+ fi
+ mkdir -p $DIR/d27
+ $LSTRIPE $DIR/d27/f27m_1 0 0 1
+ dd if=/dev/zero of=$DIR/d27/f27m_1 bs=1024 count=$MAXFREE && \
+ error "dd should fill OST0"
+ i=2
+ while $LSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do
+ i=`expr $i + 1`
+ [ $i -gt 256 ] && break
+ done
+ i=`expr $i + 1`
+ touch $DIR/d27/f27m_$i
+ [ `$LFIND $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \
+ error "OST0 was full but new created file still use it"
+ i=`expr $i + 1`
+ touch $DIR/d27/f27m_$i
+ [ `$LFIND $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \
+ error "OST0 was full but new created file still use it"
+ rm $DIR/d27/f27m_1
+}
+run_test 27m "create file while OST0 was full =================="
+
test_28() {
mkdir $DIR/d28
$CREATETEST $DIR/d28/ct || error
}
run_test 31e "remove of open non-empty directory ==============="
+test_31f() { # bug 4554
+ set -vx
+ mkdir $DIR/d31f
+ lfs setstripe $DIR/d31f 1048576 -1 1
+ cp /etc/hosts $DIR/d31f
+ ls -l $DIR/d31f
+ lfs getstripe $DIR/d31f/hosts
+ multiop $DIR/d31f D_c &
+ MULTIPID=$!
+
+ sleep 1
+
+ rm -rv $DIR/d31f || error "first of $DIR/d31f"
+ mkdir $DIR/d31f
+ lfs setstripe $DIR/d31f 1048576 -1 1
+ cp /etc/hosts $DIR/d31f
+ ls -l $DIR/d31f
+ lfs getstripe $DIR/d31f/hosts
+ multiop $DIR/d31f D_c &
+ MULTIPID2=$!
+
+ sleep 6
+
+ kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running"
+ wait $MULTIPID || error "first opendir $MULTIPID failed"
+
+ sleep 6
+
+ kill -USR1 $MULTIPID2 || error "second opendir $MULTIPID not running"
+ wait $MULTIPID2 || error "second opendir $MULTIPID2 failed"
+ set +vx
+}
+run_test 31f "remove of open directory with open-unlink file ==="
+
test_32a() {
echo "== more mountpoints and symlinks ================="
[ -e $DIR/d32a ] && rm -fr $DIR/d32a
}
run_test 48d "Access removed parent subdir (should return errors)"
+test_48e() { # bug 4134
+ check_kernel_version 41 || return 0
+ #sysctl -w portals.debug=-1
+ #set -vx
+ mkdir -p $DIR/d48e/dir
+ # On a buggy kernel addition of "; touch file" after cd .. will
+ # produce kernel oops in lookup_hash_it
+
+ cd $DIR/d48e/dir
+ ( sleep 2 && cd -P .. ) &
+ cdpid=$!
+ $TRACE rmdir $DIR/d48e/dir || error "remove cwd $DIR/d48e/dir failed"
+ $TRACE rmdir $DIR/d48e || error "remove parent $DIR/d48e failed"
+ $TRACE touch $DIR/d48e || error "'touch $DIR/d48e' failed"
+ $TRACE chmod +x $DIR/d48e || error "'chmod +x $DIR/d48e' failed"
+ $TRACE wait $cdpid && error "'cd ..' worked after recreate parent"
+ $TRACE rm $DIR/d48e || error "'$DIR/d48e' failed"
+}
+run_test 48e "Access to recreated parent (should return errors) "
+
test_50() {
# bug 1485
mkdir $DIR/d50
}
run_test 51 "special situations: split htree with empty entry =="
+export NUMTEST=70000
test_51b() {
- NUMTEST=70000
- check_kernel_version 40 || NUMTEST=31000
- NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
- [ $NUMFREE -lt $NUMTEST ] && \
- echo "skipping test 51b, not enough free inodes($NUMFREE)" && \
- return
- mkdir -p $DIR/d51b
- (cd $DIR/d51b; mkdirmany t $NUMTEST)
+ NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'`
+ [ $NUMFREE -lt 21000 ] && \
+ echo "skipping test 51b, not enough free inodes($NUMFREE)" && \
+ return
+
+ check_kernel_version 40 || NUMTEST=31000
+ [ $NUMFREE -lt $NUMTEST ] && NUMTEST=$(($NUMFREE - 50))
+
+ mkdir -p $DIR/d51b
+ (cd $DIR/d51b; mkdirmany t $NUMTEST)
}
run_test 51b "mkdir .../t-0 --- .../t-$NUMTEST ===================="
# bug 1414 - set/get directories' stripe info
test_65a() {
- mkdir -p $DIR/d65
- touch $DIR/d65/f1
- $LVERIFY $DIR/d65 $DIR/d65/f1 || error "lverify failed"
+ mkdir -p $DIR/d65a
+ touch $DIR/d65a/f1
+ $LVERIFY $DIR/d65a $DIR/d65a/f1 || error "lverify failed"
}
run_test 65a "directory with no stripe info ===================="
test_65b() {
- mkdir -p $DIR/d65
- $LSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
- touch $DIR/d65/f2
- $LVERIFY $DIR/d65 $DIR/d65/f2 || error "lverify failed"
+ mkdir -p $DIR/d65b
+ $LSTRIPE $DIR/d65b $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
+ touch $DIR/d65b/f2
+ $LVERIFY $DIR/d65b $DIR/d65b/f2 || error "lverify failed"
}
run_test 65b "directory setstripe $(($STRIPESIZE * 2)) 0 1 ==============="
test_65c() {
if [ $OSTCOUNT -gt 1 ]; then
- mkdir -p $DIR/d65
- $LSTRIPE $DIR/d65 $(($STRIPESIZE * 4)) 1 \
+ mkdir -p $DIR/d65c
+ $LSTRIPE $DIR/d65c $(($STRIPESIZE * 4)) 1 \
$(($OSTCOUNT - 1)) || error "setstripe"
- touch $DIR/d65/f3
- $LVERIFY $DIR/d65 $DIR/d65/f3 || error "lverify failed"
+ touch $DIR/d65c/f3
+ $LVERIFY $DIR/d65c $DIR/d65c/f3 || error "lverify failed"
fi
}
run_test 65c "directory setstripe $(($STRIPESIZE * 4)) 1 $(($OSTCOUNT - 1))"
[ $STRIPECOUNT -eq 0 ] && sc=1 || sc=$(($STRIPECOUNT - 1))
test_65d() {
- mkdir -p $DIR/d65
- $LSTRIPE $DIR/d65 $STRIPESIZE -1 $sc || error "setstripe"
- touch $DIR/d65/f4 $DIR/d65/f5
- $LVERIFY $DIR/d65 $DIR/d65/f4 $DIR/d65/f5 || error "lverify failed"
+ mkdir -p $DIR/d65d
+ $LSTRIPE $DIR/d65d $STRIPESIZE -1 $sc || error "setstripe"
+ touch $DIR/d65d/f4 $DIR/d65d/f5
+ $LVERIFY $DIR/d65d $DIR/d65d/f4 $DIR/d65d/f5 || error "lverify failed"
}
run_test 65d "directory setstripe $STRIPESIZE -1 $sc ======================"
test_65e() {
- mkdir -p $DIR/d65
+ mkdir -p $DIR/d65e
- $LSTRIPE $DIR/d65 0 -1 0 || error "setstripe"
- touch $DIR/d65/f6
- $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed"
+ $LSTRIPE $DIR/d65e 0 -1 0 || error "setstripe"
+ $LFS find -v $DIR/d65e | grep "$DIR/d65e/ has no stripe info" || error "no stripe info failed"
+ touch $DIR/d65e/f6
+ $LVERIFY $DIR/d65e $DIR/d65e/f6 || error "lverify failed"
}
run_test 65e "directory setstripe 0 -1 0 (default) ============="
}
run_test 65f "dir setstripe permission (should return error) ==="
+test_65g() {
+ mkdir -p $DIR/d65g
+ $LSTRIPE $DIR/d65g $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
+ $LSTRIPE -d $DIR/d65g || error "deleting stripe info failed"
+ $LFS find -v $DIR/d65g | grep "$DIR/d65g/ has no stripe info" || error "no stripe info failed"
+}
+run_test 65g "directory setstripe -d ========"
+
+test_65h() {
+ mkdir -p $DIR/d65h
+ $LSTRIPE $DIR/d65h $(($STRIPESIZE * 2)) 0 1 || error "setstripe"
+ mkdir -p $DIR/d65h/dd1
+ [ "`$LFS find -v $DIR/d65h | grep "^count"`" == \
+ "`$LFS find -v $DIR/d65h/dd1 | grep "^count"`" ] || error "stripe info inherit failed"
+}
+run_test 65h "directory stripe info inherit ======"
+
# bug 2543 - update blocks count on client
test_66() {
COUNT=${COUNT:-8}
}
run_test 68 "support swapping to Lustre ========================"
+# bug 3462 - multiple simultaneous MDC requests
+test_69() {
+ mkdir $DIR/D68-1
+ mkdir $DIR/D68-2
+ multiop $DIR/D68-1/f68-1 O_c &
+ pid1=$!
+ #give multiop a chance to open
+ usleep 500
+
+ echo 0x80000129 > /proc/sys/lustre/fail_loc
+ multiop $DIR/D68-1/f68-2 Oc &
+ sleep 1
+ echo 0 > /proc/sys/lustre/fail_loc
+
+ multiop $DIR/D68-2/f68-3 Oc &
+ pid3=$!
+
+ kill -USR1 $pid1
+ wait $pid1 || return 1
+
+ sleep 25
+
+ $CHECKSTAT -t file $DIR/D68-1/f68-1 || return 4
+ $CHECKSTAT -t file $DIR/D68-1/f68-2 || return 5
+ $CHECKSTAT -t file $DIR/D68-2/f68-3 || return 6
+
+ rm -rf $DIR/D68-*
+}
+run_test 69 "multiple MDC requests (should not deadlock)"
+
+
+test_70() {
+ STAT="/proc/fs/lustre/osc/OSC*MNT*/stats"
+ mkdir $DIR/d70
+ dd if=/dev/zero of=$DIR/d70/file bs=512 count=5
+ cancel_lru_locks OSC
+ cat $DIR/d70/file >/dev/null
+ # Hopefully there is only one.
+ ENQ=`cat $STAT|awk -vnum=0 '/ldlm_enq/ {num += $2} END {print num;}'`
+ CONV=`cat $STAT|awk -vnum=0 '/ldlm_conv/ {num += $2} END {print num;}'`
+ CNCL=`cat $STAT|awk -vnum=0 '/ldlm_canc/ {num += $2} END {print num;}'`
+ dd if=/dev/zero of=$DIR/d70/file bs=512 count=5
+ ENQ1=`cat $STAT|awk -vnum=0 '/ldlm_enq/ {num += $2} END {print num;}'`
+ CONV1=`cat $STAT|awk -vnum=0 '/ldlm_conv/ {num += $2} END {print num;}'`
+ CNCL1=`cat $STAT|awk -vnum=0 '/ldlm_canc/ {num += $2} END {print num;}'`
+
+ if [ $CONV1 -le $CONV ] ; then
+ error "No conversion happened. Before: enq $ENQ, conv $CONV, cancel $CNCL ; After: enq $ENQ1, conv $CONV1, cancel $CNCL1"
+ else
+ echo "OK"
+ true
+ fi
+
+}
+run_test 70 "Test that PR->PW conversion takes place ==========="
+
+test_71() {
+ cp `which dbench` $DIR
+
+ [ ! -f $DIR/dbench ] && echo "dbench not installed, skip this test" && return 0
+
+ TGT=$DIR/client.txt
+ SRC=${SRC:-/usr/lib/dbench/client.txt}
+ [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
+ SRC=/usr/lib/dbench/client_plain.txt
+ [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
+
+ echo "copying /lib to $DIR"
+ cp -r /lib $DIR/lib
+
+ echo "chroot $DIR /dbench -c client.txt 2"
+ chroot $DIR /dbench -c client.txt 2
+ RC=$?
+
+ rm -f $DIR/dbench
+ rm -f $TGT
+ rm -fr $DIR/lib
+
+ return $RC
+}
+run_test 71 "Running dbench on lustre (don't segment fault) ===="
+
# on the LLNL clusters, runas will still pick up root's $TMP settings,
# which will not be writable for the runas user, and then you get a CVS
# error message with a corrupt path string (CVS bug) and panic.
}
run_test 99f "cvs commit ======================================="
+test_100() {
+ netstat -ta | while read PROT SND RCV LOCAL REMOTE STAT; do
+ LPORT=`echo $LOCAL | cut -d: -f2`
+ RPORT=`echo $REMOTE | cut -d: -f2`
+ if [ "$PROT" = "tcp" ] && [ "$LPORT" != "*" ] && [ "$RPORT" != "*" ] && [ $RPORT -eq 988 ] && [ $LPORT -gt 1024 ]; then
+ echo "local port: $LPORT > 1024"
+ error
+ fi
+ done
+}
+run_test 100 "check local port using privileged port ==========="
+
+
TMPDIR=$OLDTMPDIR
TMP=$OLDTMP
HOME=$OLDHOME
set -e
ONLY=${ONLY:-"$*"}
-# bug number for skipped test: 1768 3192
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"}
+# bug number for skipped test: 1768 3192 3192
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
[ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT"
OPENUNLINK=${OPENUNLINK:-openunlink}
TOEXCL=${TOEXCL:-toexcl}
TRUNCATE=${TRUNCATE:-truncate}
+export TMP=${TMP:-/tmp}
if [ $UID -ne 0 ]; then
RUNAS_ID="$UID"
lctl mark "$*" 2> /dev/null || true
}
+trace() {
+ log "STARTING: $*"
+ strace -o $TMP/$1.strace -ttt $*
+ RC=$?
+ log "FINISHED: $*: rc $RC"
+ return 1
+}
+TRACE=${TRACE:-""}
+
run_one() {
if ! mount | grep -q $DIR1; then
$START
fi
- log "== test $1: $2"
+ BEFORE=`date +%s`
+ log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)"
export TESTNAME=test_$1
test_$1 || error "test_$1: exit with rc=$?"
unset TESTNAME
- pass
+ pass "($((`date +%s` - $BEFORE))s)"
cd $SAVE_PWD
$CLEAN
}
}
pass() {
- echo PASS
+ echo PASS $@
}
export MOUNT1=`mount| awk '/ lustre/ { print $3 }'| head -n 1`
run_test 17 "resource creation/LVB creation race ==============="
test_18() {
- ./mmap_sanity -d $MOUNT1 -m $MOUNT2
+ ./mmap_sanity -d $MOUNT1 -m $MOUNT2
+ sync; sleep 1; sync
}
run_test 18 "mmap sanity check ================================="
$LCTL mark "REPLAY BARRIER"
}
+replay_barrier_nodf() {
+ local facet=$1
+ do_facet $facet sync
+ do_facet $facet $LCTL --device %${facet}_svc readonly
+ do_facet $facet $LCTL --device %${facet}_svc notransno
+ do_facet $facet $LCTL mark "REPLAY BARRIER"
+ $LCTL mark "REPLAY BARRIER"
+}
+
mds_evict_client() {
UUID=`cat /proc/fs/lustre/mdc/*_MNT_*/uuid`
do_facet mds "echo $UUID > /proc/fs/lustre/mds/mds1_svc/evict_client"
mds=$2
shift; shift
add_facet $facet --lustre_upcall $UPCALL
- do_lmc --add mtpt --node ${facet}_facet --mds ${mds}_svc $*
+ do_lmc --add mtpt --node ${facet}_facet --clientoptions async --mds ${mds}_svc $*
}
config_commit() {
}
build_test_filter() {
+ [ "$ONLY" ] && log "only running $ONLY"
for O in $ONLY; do
eval ONLY_${O}=true
done
+ [ "$EXCEPT$ALWAYS_EXCEPT" ] && log "skipping $EXCEPT $ALWAYS_EXCEPT"
for E in $EXCEPT $ALWAYS_EXCEPT; do
eval EXCEPT_${E}=true
done
lctl mark "$*" 2> /dev/null || true
}
+pass() {
+ echo PASS $@
+}
+
run_one() {
testnum=$1
message=$2
# Pretty tests run faster.
equals_msg $testnum: $message
- log "== test $1: $2"
+ BEFORE=`date +%s`
+ log "== test $testnum: $message ============ `date +%H:%M:%S` ($BEFORE)"
test_${testnum} || error "test_$testnum failed with $?"
+ pass "($((`date +%s` - $BEFORE))s)"
}
canonical_path() {
echo `gmnalnid -n$1`
}
+h2iib () {
+ case $1 in
+ client) echo '\*' ;;
+ *) echo $1 | sed "s/[^0-9]*//" ;;
+ esac
+}
+
# create nodes
echo -n "adding NET for:"
for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do
echo; echo -n "adding CLIENT on:"
for NODE in $CLIENTS; do
echo -n " $NODE"
- ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --mds mds1 --lov lov1 || exit 30
+ ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 30
done
echo
int main(int argc, char ** argv)
{
- int i, rc = 0;
+ int i, rc = 0, do_rmdir = 0;
char format[4096], *fmt;
char filename[4096];
long start, last;
long begin = 0, count;
- if (argc < 3 || argc > 4) {
+ if (argc < 3 || argc > 5) {
usage(argv[0]);
return 1;
}
+ if (strcmp(argv[1], "-d") == 0) {
+ do_rmdir = 1;
+ argv++;
+ argc--;
+ }
+
if (strlen(argv[1]) > 4080) {
printf("name too long\n");
return 1;
}
for (i = 0; i < count; i++, begin++) {
sprintf(filename, fmt, begin);
- rc = unlink(filename);
+ if (do_rmdir)
+ rc = rmdir(filename);
+ else
+ rc = unlink(filename);
if (rc) {
- printf("unlink(%s) error: %s\n",
+ printf("%s(%s) error: %s\n",
+ do_rmdir ? "rmdir" : "unlink",
filename, strerror(errno));
rc = errno;
break;
ret.append((net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi))
return ret
+ def get_hostaddr(self):
+ ret = []
+ list = self.dom_node.getElementsByTagName('hostaddr')
+ for node in list:
+ ret.append(node.firstChild.data)
+ return ret
+
def _update_active(self, tgt, new):
raise Lustre.LconfError("updates not implemented for XML")
quit""" % (type, name, uuid)
self.run(cmds)
+ def detach(self, name):
+ cmds = """
+ cfg_device %s
+ detach
+ quit""" % (name)
+ self.run(cmds)
+
def set_security(self, name, key, value):
cmds = """
cfg_device %s
quit""" % (name, conn_uuid)
self.run(cmds)
+ def start(self, name, conf_name):
+ cmds = """
+ device $%s
+ start %s
+ quit""" % (name, conf_name)
+ self.run(cmds)
# create a new device with lctl
def newdev(self, type, name, uuid, setup = ""):
self.cleanup(name, uuid, 0)
raise e
-
# cleanup a device
def cleanup(self, name, uuid, force, failover = 0):
if failover: force = 1
def add_module(self, manager):
manager.add_portals_module("libcfs", 'libcfs')
manager.add_portals_module("portals", 'portals')
- if node_needs_router():
+
+ if node_needs_router():
manager.add_portals_module("router", 'kptlrouter')
if self.net_type == 'tcp':
manager.add_portals_module("knals/socknal", 'ksocknal')
def correct_level(self, level, op=None):
return level
-class MDSDEV(Module):
- def __init__(self,db):
- Module.__init__(self, 'MDSDEV', db)
+class CONFDEV(Module):
+ def __init__(self, db, name, target_uuid, uuid):
+ Module.__init__(self, 'CONFDEV', db)
self.devpath = self.db.get_val('devpath','')
self.backdevpath = self.db.get_val('backdevpath','')
self.size = self.db.get_val_int('devsize', 0)
self.journal_size = self.db.get_val_int('journalsize', 0)
self.fstype = self.db.get_val('fstype', '')
self.backfstype = self.db.get_val('backfstype', '')
- self.nspath = self.db.get_val('nspath', '')
self.mkfsoptions = self.db.get_val('mkfsoptions', '')
self.mountfsoptions = self.db.get_val('mountfsoptions', '')
+ self.target = self.db.lookup(target_uuid)
+ self.name = "conf_%s" % self.target.getName()
+ self.client_uuids = self.target.get_refs('client')
self.obdtype = self.db.get_val('obdtype', '')
- self.root_squash = self.db.get_val('root_squash', '')
- self.no_root_squash = self.db.get_val('no_root_squash', '')
- # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid
- target_uuid = self.db.get_first_ref('target')
- self.mds = self.db.lookup(target_uuid)
- self.name = self.mds.getName()
- self.client_uuids = self.mds.get_refs('client')
-
+
+ if self.obdtype == None:
+ self.obdtype = 'dumb'
+
+ self.conf_name = name
+ self.conf_uuid = uuid
+ self.realdev = self.devpath
+
self.lmv = None
self.master = None
if self.lmv != None:
self.client_uuids = self.lmv.get_refs('client')
- # FIXME: if fstype not set, then determine based on kernel version
- self.format = self.db.get_val('autoformat', "no")
- if self.mds.get_val('failover', 0):
- self.failover_mds = 'f'
+ if self.target.get_class() == 'mds':
+ if self.target.get_val('failover', 0):
+ self.failover_mds = 'f'
+ else:
+ self.failover_mds = 'n'
+ self.format = self.db.get_val('autoformat', "no")
else:
- self.failover_mds = 'n'
- active_uuid = get_active_target(self.mds)
- if not active_uuid:
- panic("No target device found:", target_uuid)
- if active_uuid == self.uuid:
- self.active = 1
- else:
- self.active = 0
- if self.active and config.group and config.group != self.mds.get_val('group'):
- self.active = 0
+ self.format = self.db.get_val('autoformat', "yes")
+ self.osdtype = self.db.get_val('osdtype')
+ ost = self.db.lookup(target_uuid)
+ if ost.get_val('failover', 0):
+ self.failover_ost = 'f'
+ else:
+ self.failover_ost = 'n'
- # default inode inode for case when neither LOV either
- # LMV is accessible.
- self.inode_size = 256
-
+ self.inode_size = self.get_inode_size()
+
+ if self.lmv != None:
+ client_uuid = self.name + "_lmv_UUID"
+ self.master = LMV(self.lmv, client_uuid,
+ self.conf_name, self.conf_name)
+
+ def get_inode_size(self):
inode_size = self.db.get_val_int('inodesize', 0)
- if not inode_size == 0:
- self.inode_size = inode_size
- else:
+ if inode_size == 0 and self.target.get_class() == 'mds':
+
+ # default inode size for case when neither LOV either
+ # LMV is accessible.
+ self.inode_size = 256
+
# find the LOV for this MDS
- lovconfig_uuid = self.mds.get_first_ref('lovconfig')
+ lovconfig_uuid = self.target.get_first_ref('lovconfig')
if lovconfig_uuid or self.lmv != None:
if self.lmv != None:
lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
lovconfig = self.lmv.lookup(lovconfig_uuid)
lov_uuid = lovconfig.get_first_ref('lov')
if lov_uuid == None:
- panic(self.mds.getName() + ": No LOV found for lovconfig ",
+ panic(self.target.getName() + ": No LOV found for lovconfig ",
lovconfig.name)
else:
- lovconfig = self.mds.lookup(lovconfig_uuid)
+ lovconfig = self.target.lookup(lovconfig_uuid)
lov_uuid = lovconfig.get_first_ref('lov')
if lov_uuid == None:
- panic(self.mds.getName() + ": No LOV found for lovconfig ",
+ panic(self.target.getName() + ": No LOV found for lovconfig ",
lovconfig.name)
-
if self.lmv != None:
lovconfig_uuid = self.lmv.get_first_ref('lovconfig')
lovconfig = self.lmv.lookup(lovconfig_uuid)
config_only = 1)
# default stripe count controls default inode_size
- stripe_count = lov.stripe_cnt
+ if lov.stripe_cnt > 0:
+ stripe_count = lov.stripe_cnt
+ else:
+ stripe_count = len(lov.devlist)
if stripe_count > 77:
- self.inode_size = 4096
+ inode_size = 4096
elif stripe_count > 35:
- self.inode_size = 2048
+ inode_size = 2048
elif stripe_count > 13:
- self.inode_size = 1024
+ inode_size = 1024
elif stripe_count > 3:
- self.inode_size = 512
+ inode_size = 512
else:
- self.inode_size = 256
-
- self.target_dev_uuid = self.uuid
- self.uuid = target_uuid
-
- # setup LMV
- if self.lmv != None:
- client_uuid = self.name + "_lmv_UUID"
- self.master = LMV(self.lmv, client_uuid,
- self.name, self.name)
-
- def add_module(self, manager):
- if self.active:
- manager.add_lustre_module('mdc', 'mdc')
- manager.add_lustre_module('osc', 'osc')
- manager.add_lustre_module('ost', 'ost')
- manager.add_lustre_module('lov', 'lov')
- manager.add_lustre_module('mds', 'mds')
-
- if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
- manager.add_lustre_module(self.fstype, self.fstype)
-
- if self.fstype:
- manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
-
- # if fstype is smfs, then we should also take care about backing
- # store fs.
- if self.fstype == 'smfs':
- manager.add_lustre_module(self.backfstype, self.backfstype)
- manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
-
- for option in string.split(self.mountfsoptions, ','):
- if option == 'snap':
- if not self.fstype == 'smfs':
- panic("mountoptions has 'snap', but fstype is not smfs.")
- manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
- manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
-
- # add LMV modules
- if self.master != None:
- self.master.add_module(manager)
+ inode_size = 256
+
+ return inode_size
def get_mount_options(self, blkdev):
- options = def_mount_options(self.fstype, 'mds')
+ options = def_mount_options(self.fstype,
+ self.target.get_class())
if config.mountfsoptions:
if options:
if self.fstype == 'smfs':
if options:
- options = "%s,type=%s,dev=%s" %(options,
- self.backfstype, blkdev)
+ options = "%s,type=%s,dev=%s" %(options, self.backfstype,
+ blkdev)
else:
- options = "type=%s,dev=%s" %(self.backfstype, blkdev)
+ options = "type=%s,dev=%s" %(self.backfstype,
+ blkdev)
+
+ if self.target.get_class() == 'mds':
+ if options:
+ options = "%s,iopen_nopriv" %(options)
+ else:
+ options = "iopen_nopriv"
+
return options
-
+
def prepare(self):
- if not config.record and is_prepared(self.name):
- return
- if not self.active:
- debug(self.uuid, "not active")
+ if is_prepared(self.name):
return
- if config.reformat:
- # run write_conf automatically, if --reformat used
- self.write_conf()
- run_acceptors()
- # prepare LMV
- if self.master != None:
- self.master.prepare()
-
- # never reformat here
- blkdev = block_dev(self.devpath, self.size, self.fstype, 0,
- self.format, self.journal_size, self.inode_size,
- self.mkfsoptions, self.backfstype, self.backdevpath)
-
- if not is_prepared('MDT'):
- lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
- try:
- if self.fstype == 'smfs':
- realdev = self.fstype
- else:
- realdev = blkdev
-
- if self.obdtype == None:
- self.obdtype = 'dumb'
+ blkdev = block_dev(self.devpath, self.size, self.fstype,
+ config.reformat, self.format, self.journal_size,
+ self.inode_size, self.mkfsoptions, self.backfstype,
+ self.backdevpath)
+
+ if self.fstype == 'smfs':
+ realdev = self.fstype
+ else:
+ realdev = blkdev
- if self.master == None:
- master_name = 'dumb'
- else:
- master_name = self.master.name
-
- if self.client_uuids == None:
- profile_name = 'dumb'
- else:
- profile_name = self.name
-
- mountfsoptions = self.get_mount_options(blkdev)
+ mountfsoptions = self.get_mount_options(blkdev)
- self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
- self.format, master_name, profile_name, self.obdtype)
-
- lctl.attach("mds", self.name, self.uuid)
- if config.mds_mds_sec:
- lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
- if config.mds_ost_sec:
- lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
-
- lctl.setup(self.name, setup = "%s %s %s %s %s %s" %(realdev,
- self.fstype, profile_name, mountfsoptions,
- master_name, self.obdtype))
-
- if development_mode():
- procentry = "/proc/fs/lustre/mds/lsd_upcall"
- upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
- if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
- print "MDS Warning: failed to set lsd cache upcall"
- else:
- run("echo ", upcall, " > ", procentry)
+ self.info(self.target.get_class(), realdev, mountfsoptions,
+ self.fstype, self.size, self.format)
- except CommandError, e:
- if e.rc == 2:
- panic("MDS is missing the config log. Need to run " +
- "lconf --write_conf.")
- else:
- raise e
-
- if config.root_squash == None:
- config.root_squash = self.root_squash
- if config.no_root_squash == None:
- config.no_root_squash = self.no_root_squash
- if config.root_squash:
- if config.no_root_squash:
- nsnid = config.no_root_squash
- else:
- nsnid = "0"
- lctl.root_squash(self.name, config.root_squash, nsnid)
+ lctl.newdev("confobd", self.name, self.uuid,
+ setup ="%s %s %s" %(realdev, self.fstype,
+ mountfsoptions))
+
+ self.mountfsoptions = mountfsoptions
+ self.realdev = realdev
+
+ def add_module(self, manager):
+ manager.add_lustre_module('obdclass', 'confobd')
def write_conf(self):
- if not self.client_uuids:
- return 0
-
- do_cleanup = 0
- if not is_prepared(self.name):
- blkdev = block_dev(self.devpath, self.size, self.fstype,
- config.reformat, self.format, self.journal_size,
- self.inode_size, self.mkfsoptions,
- self.backfstype, self.backdevpath)
+ if self.target.get_class() == 'ost':
+ config.record = 1
+ lctl.clear_log(self.name, self.target.getName() + '-conf')
+ lctl.record(self.name, self.target.getName() + '-conf')
+ lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid,
+ setup ="%s %s %s %s" %(self.realdev, self.fstype,
+ self.failover_ost,
+ self.mountfsoptions))
+ lctl.end_record()
+ lctl.clear_log(self.name, 'OSS-conf')
+ lctl.record(self.name, 'OSS-conf')
+ lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
+ lctl.end_record()
+ config.record = 0
+ return
- if self.fstype == 'smfs':
- realdev = self.fstype
- else:
- realdev = blkdev
-
- # Even for writing logs we mount mds with supplied mount options
- # because it will not mount smfs (if used) otherwise.
- mountfsoptions = self.get_mount_options(blkdev)
+ if self.target.get_class() == 'mds':
+ if self.master != None:
+ master_name = self.master.name
+ else:
+ master_name = 'dumb'
- if self.obdtype == None:
- self.obdtype = 'dumb'
-
- self.info("mds", realdev, mountfsoptions, self.fstype, self.size,
- self.format, "dumb", "dumb", self.obdtype)
-
- lctl.newdev("mds", self.name, self.uuid,
- setup ="%s %s %s %s %s %s" %(realdev, self.fstype,
- 'dumb', mountfsoptions,
- 'dumb', self.obdtype))
- do_cleanup = 1
+ config.record = 1
+ lctl.clear_log(self.name, self.target.getName() + '-conf')
+ lctl.record(self.name, self.target.getName() + '-conf')
+ lctl.newdev("mds", self.conf_name, self.conf_uuid,
+ setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype,
+ self.conf_name, self.mountfsoptions,
+ master_name, self.obdtype))
+ lctl.end_record()
+ config.record = 0
- # record logs for all MDS clients
- for obd_uuid in self.client_uuids:
- log("recording client:", obd_uuid)
+ if not self.client_uuids:
+ return 0
+ for uuid in self.client_uuids:
+ log("recording client:", uuid)
client_uuid = generate_client_uuid(self.name)
- client = VOSC(self.db.lookup(obd_uuid), client_uuid,
- self.name, self.name)
+ client = VOSC(self.db.lookup(uuid), client_uuid,
+ self.target.getName(), self.name)
config.record = 1
- lctl.clear_log(self.name, self.name)
- lctl.record(self.name, self.name)
+ lctl.clear_log(self.name, self.target.getName())
+ lctl.record(self.name, self.target.getName())
client.prepare()
- lctl.mount_option(self.name, client.get_name(), "")
+ lctl.mount_option(self.target.getName(), client.get_name(), "")
lctl.end_record()
- process_updates(self.db, self.name, self.name, client)
config.cleanup = 1
- lctl.clear_log(self.name, self.name + '-clean')
- lctl.record(self.name, self.name + '-clean')
+ lctl.clear_log(self.name, self.target.getName() + '-clean')
+ lctl.record(self.name, self.target.getName() + '-clean')
client.cleanup()
- lctl.del_mount_option(self.name)
+ lctl.del_mount_option(self.target.getName())
lctl.end_record()
- process_updates(self.db, self.name, self.name + '-clean', client)
config.cleanup = 0
config.record = 0
+ if config.record:
+ return
+
# record logs for each client
- if config.noexec:
- noexec_opt = '-n'
- else:
- noexec_opt = ''
if config.ldapurl:
config_options = "--ldapurl " + config.ldapurl + " --config " + config.config
else:
debug("recording", client_name)
old_noexec = config.noexec
config.noexec = 0
- ret, out = run (sys.argv[0], noexec_opt,
+ noexec_opt = ('', '-n')
+ ret, out = run (sys.argv[0],
+ noexec_opt[old_noexec == 1],
" -v --record --nomod",
"--record_log", client_name,
"--record_device", self.name,
config_options)
if config.verbose:
for s in out: log("record> ", string.strip(s))
- ret, out = run (sys.argv[0], noexec_opt,
+ ret, out = run (sys.argv[0],
+ noexec_opt[old_noexec == 1],
"--cleanup -v --record --nomod",
"--record_log", client_name + "-clean",
"--record_device", self.name,
if config.verbose:
for s in out: log("record> ", string.strip(s))
config.noexec = old_noexec
- if do_cleanup:
+
+ def start(self):
+ try:
+ lctl.start(self.name, self.conf_name)
+ except CommandError, e:
+ raise e
+ if self.target.get_class() == 'ost':
+ if not is_prepared('OSS'):
+ try:
+ lctl.start(self.name, 'OSS')
+ except CommandError, e:
+ raise e
+
+ def cleanup(self):
+ if is_prepared(self.name):
try:
lctl.cleanup(self.name, self.uuid, 0, 0)
+ clean_dev(self.devpath, self.fstype,
+ self.backfstype, self.backdevpath)
except CommandError, e:
log(self.module_name, "cleanup failed: ", self.name)
e.dump()
cleanup_error(e.rc)
Module.cleanup(self)
- clean_dev(self.devpath, self.fstype, self.backfstype,
- self.backdevpath)
+class MDSDEV(Module):
+ def __init__(self,db):
+ Module.__init__(self, 'MDSDEV', db)
+ self.devpath = self.db.get_val('devpath','')
+ self.backdevpath = self.db.get_val('backdevpath','')
+ self.size = self.db.get_val_int('devsize', 0)
+ self.journal_size = self.db.get_val_int('journalsize', 0)
+ self.fstype = self.db.get_val('fstype', '')
+ self.backfstype = self.db.get_val('backfstype', '')
+ self.nspath = self.db.get_val('nspath', '')
+ self.mkfsoptions = self.db.get_val('mkfsoptions', '')
+ self.mountfsoptions = self.db.get_val('mountfsoptions', '')
+ self.obdtype = self.db.get_val('obdtype', '')
+ self.root_squash = self.db.get_val('root_squash', '')
+ self.no_root_squash = self.db.get_val('no_root_squash', '')
+
+ target_uuid = self.db.get_first_ref('target')
+ self.target = self.db.lookup(target_uuid)
+ self.name = self.target.getName()
+ self.master = None
+ self.lmv = None
+
+ lmv_uuid = self.db.get_first_ref('lmv')
+ if lmv_uuid != None:
+ self.lmv = self.db.lookup(lmv_uuid)
+
+ active_uuid = get_active_target(self.target)
+ if not active_uuid:
+ panic("No target device found:", target_uuid)
+ if active_uuid == self.uuid:
+ self.active = 1
+ group = self.target.get_val('group')
+ if config.group and config.group != group:
+ self.active = 0
+ else:
+ self.active = 0
+
+ self.uuid = target_uuid
+
+ # setup LMV
+ if self.lmv != None:
+ client_uuid = self.name + "_lmv_UUID"
+ self.master = LMV(self.lmv, client_uuid,
+ self.name, self.name)
+
+ self.confobd = CONFDEV(self.db, self.name,
+ target_uuid, self.uuid)
+
+ def add_module(self, manager):
+ if self.active:
+ manager.add_lustre_module('mdc', 'mdc')
+ manager.add_lustre_module('osc', 'osc')
+ manager.add_lustre_module('ost', 'ost')
+ manager.add_lustre_module('lov', 'lov')
+ manager.add_lustre_module('mds', 'mds')
+
+ if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
+ manager.add_lustre_module(self.fstype, self.fstype)
+
+ if self.fstype:
+ manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype))
+
+ # if fstype is smfs, then we should also take care about backing
+ # store fs.
+ if self.fstype == 'smfs':
+ manager.add_lustre_module(self.backfstype, self.backfstype)
+ manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype))
+
+ for option in string.split(self.mountfsoptions, ','):
+ if option == 'snap':
+ if not self.fstype == 'smfs':
+ panic("mountoptions has 'snap', but fstype is not smfs.")
+ manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
+ manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
+
+ # add LMV modules
+ if self.master != None:
+ self.master.add_module(manager)
+
+ # add CONFOBD modules
+ if self.confobd != None:
+ self.confobd.add_module(manager)
+
+ def write_conf(self):
+ if is_prepared(self.name):
+ return
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ run_acceptors()
+ self.confobd.prepare()
+ self.confobd.write_conf()
+ self.confobd.cleanup()
+
+ def prepare(self):
+ if is_prepared(self.name):
+ return
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+ run_acceptors()
+
+ self.confobd.prepare()
+ if config.reformat:
+ self.confobd.write_conf()
+
+ # prepare LMV
+ if self.master != None:
+ self.master.prepare()
+
+ lctl.attach("mds", self.name, self.uuid)
+ if config.mds_mds_sec:
+ lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec)
+ if config.mds_ost_sec:
+ lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec)
+ lctl.detach(self.name)
+
+ if not config.record:
+ self.confobd.start()
+
+ if not is_prepared('MDT'):
+ lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="")
+
+ if development_mode():
+ procentry = "/proc/fs/lustre/mds/lsd_upcall"
+ upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall")
+ if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)):
+ print "MDS Warning: failed to set lsd cache upcall"
+ else:
+ run("echo ", upcall, " > ", procentry)
+
+ if config.root_squash == None:
+ config.root_squash = self.root_squash
+ if config.no_root_squash == None:
+ config.no_root_squash = self.no_root_squash
+ if config.root_squash:
+ if config.no_root_squash:
+ nsnid = config.no_root_squash
+ else:
+ nsnid = "0"
+ lctl.root_squash(self.name, config.root_squash, nsnid)
def msd_remaining(self):
out = lctl.device_list()
e.dump()
cleanup_error(e.rc)
- clean_dev(self.devpath, self.fstype, self.backfstype,
- self.backdevpath)
+ if self.confobd:
+ self.confobd.cleanup()
def correct_level(self, level, op=None):
#if self.master != None:
# level = level + 2
return level
-
+
class OSD(Module):
def __init__(self, db):
Module.__init__(self, 'OSD', db)
panic("No target device found:", target_uuid)
if active_uuid == self.uuid:
self.active = 1
+ group = ost.get_val('group')
+ if config.group and config.group != group:
+ self.active = 0
else:
self.active = 0
- if self.active and config.group and config.group != ost.get_val('group'):
- self.active = 0
- self.target_dev_uuid = self.uuid
self.uuid = target_uuid
+ self.confobd = CONFDEV(self.db, self.name,
+ target_uuid, self.uuid)
def add_module(self, manager):
- if self.active:
- manager.add_lustre_module('ost', 'ost')
+ if not self.active:
+ return
+ manager.add_lustre_module('ost', 'ost')
- if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
- manager.add_lustre_module(self.fstype, self.fstype)
+ if self.fstype == 'smfs' or self.fstype == 'ldiskfs':
+ manager.add_lustre_module(self.fstype, self.fstype)
- if self.fstype:
- manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
-
- if self.fstype == 'smfs':
- manager.add_lustre_module(self.backfstype, self.backfstype)
- manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
+ if self.fstype:
+ manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype))
- for option in self.mountfsoptions:
- if option == 'snap':
- if not self.fstype == 'smfs':
- panic("mountoptions with snap, but fstype is not smfs\n")
- manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
- manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
+ if self.fstype == 'smfs':
+ manager.add_lustre_module(self.backfstype, self.backfstype)
+ manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype))
- manager.add_lustre_module(self.osdtype, self.osdtype)
+ for option in self.mountfsoptions:
+ if option == 'snap':
+ if not self.fstype == 'smfs':
+ panic("mountoptions with snap, but fstype is not smfs\n")
+ manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype))
+ manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype))
- def get_mount_options(self, blkdev):
- options = def_mount_options(self.fstype, 'ost')
-
- if config.mountfsoptions:
- if options:
- options = "%s,%s" %(options, config.mountfsoptions)
- else:
- options = config.mountfsoptions
- if self.mountfsoptions:
- options = "%s,%s" %(options, self.mountfsoptions)
- else:
- if self.mountfsoptions:
- if options:
- options = "%s,%s" %(options, self.mountfsoptions)
- else:
- options = self.mountfsoptions
-
- if self.fstype == 'smfs':
- if options:
- options = "%s,type=%s,dev=%s" %(options,
- self.backfstype, blkdev)
- else:
- options = "type=%s,dev=%s" %(self.backfstype,
- blkdev)
- return options
+ manager.add_lustre_module(self.osdtype, self.osdtype)
- # need to check /proc/mounts and /etc/mtab before
- # formatting anything.
- # FIXME: check if device is already formatted.
+ # add CONFOBD modules
+ if self.confobd != None:
+ self.confobd.add_module(manager)
+
def prepare(self):
if is_prepared(self.name):
return
if not self.active:
debug(self.uuid, "not active")
return
+
run_acceptors()
if self.osdtype == 'obdecho':
- blkdev = ''
- else:
- blkdev = block_dev(self.devpath, self.size, self.fstype,
- config.reformat, self.format, self.journal_size,
- self.inode_size, self.mkfsoptions, self.backfstype,
- self.backdevpath)
-
- if self.fstype == 'smfs':
- realdev = self.fstype
- else:
- realdev = blkdev
+ self.info(self.osdtype)
+ lctl.newdev("obdecho", self.name, self.uuid)
+ if not is_prepared('OSS'):
+ lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="")
+ else:
+ self.confobd.prepare()
+ if config.reformat:
+ self.confobd.write_conf()
+ if not config.record:
+ self.confobd.start()
- mountfsoptions = self.get_mount_options(blkdev)
-
- self.info(self.osdtype, realdev, mountfsoptions, self.fstype,
- self.size, self.format, self.journal_size, self.inode_size)
-
- lctl.newdev(self.osdtype, self.name, self.uuid,
- setup ="%s %s %s %s" %(realdev, self.fstype,
- self.failover_ost,
- mountfsoptions))
- if not is_prepared('OSS'):
- lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="")
+ def write_conf(self):
+ if is_prepared(self.name):
+ return
+ if not self.active:
+ debug(self.uuid, "not active")
+ return
+
+ run_acceptors()
+ if self.osdtype != 'obdecho':
+ self.confobd.prepare()
+ self.confobd.write_conf()
+ if not config.write_conf:
+ self.confobd.start()
+ self.confobd.cleanup()
def osd_remaining(self):
out = lctl.device_list()
if not self.active:
debug(self.uuid, "not active")
return
+
if is_prepared(self.name):
self.info()
try:
print "cleanup failed: ", self.name
e.dump()
cleanup_error(e.rc)
- if not self.osdtype == 'obdecho':
- clean_dev(self.devpath, self.fstype, self.backfstype,
- self.backdevpath)
+
+ if self.osdtype != 'obdecho':
+ if self.confobd:
+ self.confobd.cleanup()
def correct_level(self, level, op=None):
return level
-def mgmt_uuid_for_fs(mtpt_name):
- if not mtpt_name:
- return ''
- mtpt_db = toplustreDB.lookup_name(mtpt_name)
- fs_uuid = mtpt_db.get_first_ref('filesystem')
- fs = toplustreDB.lookup(fs_uuid)
- if not fs:
- return ''
- return fs.get_first_ref('mgmt')
-
# Generic client module, used by OSC and MDC
class Client(Module):
- def __init__(self, tgtdb, uuid, module, fs_name, self_name=None,
- module_dir=None):
+ def __init__(self, tgtdb, uuid, module, fs_name,
+ self_name=None, module_dir=None):
self.target_name = tgtdb.getName()
self.target_uuid = tgtdb.getUUID()
self.module_dir = module_dir
+ self.backup_targets = []
self.module = module
self.db = tgtdb
- self.active = 1
self.tgt_dev_uuid = get_active_target(tgtdb)
if not self.tgt_dev_uuid:
self.name = self_name
self.uuid = uuid
self.lookup_server(self.tgt_dev_uuid)
- mgmt_uuid = mgmt_uuid_for_fs(fs_name)
- if mgmt_uuid:
- self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid)
- else:
- self.mgmt_name = ''
+ self.lookup_backup_targets()
self.fs_name = fs_name
if not self.module_dir:
self.module_dir = module
def get_servers(self):
return self._server_nets
+ def lookup_backup_targets(self):
+ """ Lookup alternative network information """
+ prof_list = toplustreDB.get_refs('profile')
+ for prof_uuid in prof_list:
+ prof_db = toplustreDB.lookup(prof_uuid)
+ if not prof_db:
+ panic("profile:", prof_uuid, "not found.")
+ for ref_class, ref_uuid in prof_db.get_all_refs():
+ if ref_class in ('osd', 'mdsdev'):
+ devdb = toplustreDB.lookup(ref_uuid)
+ uuid = devdb.get_first_ref('target')
+ if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid:
+ self.backup_targets.append(ref_uuid)
+
def prepare(self, ignore_connect_failure = 0):
self.info(self.target_uuid)
if not config.record and is_prepared(self.name):
except CommandError, e:
if not ignore_connect_failure:
raise e
+
if srv:
- if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0):
+ if self.target_uuid in config.inactive and self.permits_inactive():
debug("%s inactive" % self.target_uuid)
inactive_p = "inactive"
else:
debug("%s active" % self.target_uuid)
inactive_p = ""
lctl.newdev(self.module, self.name, self.uuid,
- setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid,
- inactive_p, self.mgmt_name))
+ setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid,
+ inactive_p))
+ for tgt_dev_uuid in self.backup_targets:
+ this_nets = get_ost_net(toplustreDB, tgt_dev_uuid)
+ if len(this_nets) == 0:
+ panic ("Unable to find a server for:", tgt_dev_uuid)
+ srv = choose_local_server(this_nets)
+ if srv:
+ lctl.connect(srv)
+ else:
+ routes = find_route(this_nets);
+ if len(routes) == 0:
+ panic("no route to", tgt_dev_uuid)
+ for (srv, r) in routes:
+ lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3])
+ if srv:
+ lctl.add_conn(self.name, srv.nid_uuid);
def cleanup(self):
if is_prepared(self.name):
e.dump()
cleanup_error(e.rc)
+ for tgt_dev_uuid in self.backup_targets:
+ this_net = get_ost_net(toplustreDB, tgt_dev_uuid)
+ srv = choose_local_server(this_net)
+ if srv:
+ lctl.disconnect(srv)
+ else:
+ for (srv, r) in find_route(this_net):
+ lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3])
+
def correct_level(self, level, op=None):
return level
def permits_inactive(self):
return 1
-def mgmtcli_name_for_uuid(uuid):
- return 'MGMTCLI_%s' % uuid
-
-class ManagementClient(Client):
- def __init__(self, db, uuid):
- Client.__init__(self, db, uuid, 'mgmt_cli', '',
- self_name = mgmtcli_name_for_uuid(db.getUUID()),
- module_dir = 'mgmt')
-
class CMOBD(Module):
def __init__(self, db):
Module.__init__(self, 'CMOBD', db)
def __init__(self,db):
Module.__init__(self, 'MTPT', db)
self.path = self.db.get_val('path')
- self.clientoptions = self.db.get_val('clientoptions', '')
+ self.clientoptions = self.db.get_val('clientoptions', '')
self.fs_uuid = self.db.get_first_ref('filesystem')
fs = self.db.lookup(self.fs_uuid)
self.mds_uuid = fs.get_first_ref('lmv')
if not self.mds_uuid:
self.mds_uuid = fs.get_first_ref('mds')
self.obd_uuid = fs.get_first_ref('obd')
- self.mgmt_uuid = fs.get_first_ref('mgmt')
client_uuid = generate_client_uuid(self.name)
ost = self.db.lookup(self.obd_uuid)
self.vosc = VOSC(ost, client_uuid, self.name, self.name)
self.vmdc = VMDC(mds, client_uuid, self.name, self.name)
- if self.mgmt_uuid:
- self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid),
- client_uuid)
- else:
- self.mgmtcli = None
-
def prepare(self):
if not config.record and fs_is_mounted(self.path):
log(self.path, "already mounted.")
return
run_acceptors()
- if self.mgmtcli:
- self.mgmtcli.prepare()
- self.vosc.prepare()
+
+ self.vosc.prepare()
self.vmdc.prepare()
- vmdc_name = self.vmdc.get_name()
self.info(self.path, self.mds_uuid, self.obd_uuid)
if config.record or config.lctl_dump:
- lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name)
+ lctl.mount_option(local_node_name, self.vosc.get_name(),
+ self.vmdc.get_name())
return
if config.clientoptions:
if self.clientoptions:
- self.clientoptions = self.clientoptions + ',' + \
- config.clientoptions
+ self.clientoptions = self.clientoptions + ',' + config.clientoptions
else:
self.clientoptions = config.clientoptions
if self.clientoptions:
self.clientoptions = ',' + self.clientoptions
# Linux kernel will deal with async and not pass it to ll_fill_super,
# so replace it with Lustre async
- self.clientoptions = string.replace(self.clientoptions, "async",
- "lasync")
+ self.clientoptions = string.replace(self.clientoptions, "async", "lasync")
if not config.sec:
config.sec = "null"
cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,sec=%s%s %s %s" % \
- (self.vosc.get_name(), vmdc_name, config.sec, self.clientoptions,
- config.config, self.path)
+ (self.vosc.get_name(), self.vmdc.get_name(), config.sec,
+ self.clientoptions, config.config, self.path)
run("mkdir", self.path)
ret, val = run(cmd)
if ret:
self.vmdc.cleanup()
self.vosc.cleanup()
- if self.mgmtcli:
- self.mgmtcli.cleanup()
def add_module(self, manager):
- manager.add_lustre_module('mdc', 'mdc')
-
- if self.mgmtcli:
- self.mgmtcli.add_module(manager)
-
self.vosc.add_module(manager)
self.vmdc.add_module(manager)
-
manager.add_lustre_module('llite', 'llite')
def correct_level(self, level, op=None):
srv_list.append(Network(db))
return srv_list
-
# the order of iniitailization is based on level.
def getServiceLevel(self):
type = self.get_class()
debug("add_local", netuuid)
local_clusters.append((srv.net_type, srv.cluster_id, srv.nid))
if srv.port > 0:
- if acceptors.has_key(srv.port):
- panic("duplicate port:", srv.port)
- acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
+ if not acceptors.has_key(srv.port):
+ acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type)
# This node is a gateway.
is_router = 0
#if config.nosetup:
# return
for s in services:
- if s[1].get_class() == 'mdsdev':
+ if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd':
n = newService(s[1])
n.write_conf()
+ n.cleanup()
def doSetup(services):
if config.nosetup:
def doLoadModules(services):
if config.nomod:
return
-
+
# adding all needed modules from all services
for s in services:
n = newService(s[1])
fp.write('%d\n' %(max))
fp.close()
-
def sys_make_devices():
if not os.access('/dev/portals', os.R_OK):
run('mknod /dev/portals c 10 240')
if not os.access('/dev/obd', os.R_OK):
run('mknod /dev/obd c 10 241')
-
# Add dir to the global PATH, if not already there.
def add_to_path(new_dir):
syspath = string.split(os.environ['PATH'], ':')
else:
return script
-
DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin')
# ensure basic elements are in the system path
def sanitise_path():
"usage: --net <tcp/elan/myrinet> <command>"},
{"network", jt_ptl_network, 0, "commands that follow apply to net\n"
"usage: network <tcp/elan/myrinet>"},
-
{"interface_list", jt_ptl_print_interfaces, 0, "print interface entries\n"
"usage: interface_list"},
{"add_interface", jt_ptl_add_interface, 0, "add interface entry\n"
"usage: connect <host> <port> [iIOC]"},
{"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid\n"
"usage: disconnect [<nid>]"},
- {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)\n"
+ {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits\n"
"usage: active_tx"},
{"mynid", jt_ptl_mynid, 0, "inform the socknal of the local nid. "
"The nid defaults to hostname for tcp networks and is automatically "
"add an entry to the portals routing table\n"
"usage: add_route <gateway> <target> [<target>]"},
{"del_route", jt_ptl_del_route, 0,
- "delete the route via the given gateway to the given targets from the portals routing table\n"
+ "delete route via gateway to targets from the portals routing table\n"
"usage: del_route <gateway> [<target>] [<target>]"},
{"set_route", jt_ptl_notify_router, 0,
- "enable/disable routes via the given gateway in the portals routing table\n"
+ "enable/disable routes via gateway in the portals routing table\n"
"usage: set_route <gateway> <up/down> [<time>]"},
- {"route_list", jt_ptl_print_routes, 0, "print the portals routing table\n"
+ {"route_list", jt_ptl_print_routes, 0,
+ "print the portals routing table, same as show_route\n"
"usage: route_list"},
+ {"show_route", jt_ptl_print_routes, 0,
+ "print the portals routing table, same as route_list\n"
+ "usage: show_route"},
{"fail", jt_ptl_fail_nid, 0, "fail/restore communications.\n"
"Omitting the count means indefinitely, 0 means restore, "
"otherwise fail 'count' messages.\n"
{"root_squash", jt_obd_root_squash, 0,
"squash root to 'uid:gid' except client 'nid'\n"
"usage: root_squash [uid:gid [nid]]\n"},
+ {"start", jt_obd_start, 0,
+ "setup mds/ost from the llog file\n"
+ "usage: start <profilename>"},
{"mount_option", jt_lcfg_mount_option, 0,
"usage: mount_option profile osc_name [mdc_name] \n"},
{"del_mount_option", jt_lcfg_del_mount_option, 0,
{"======== debug =========", jt_noop, 0, "debug"},
{"debug_daemon", jt_dbg_debug_daemon, 0,
"debug daemon control and dump to a file\n"
- "usage: debug_daemon [start file <#MB>|stop|pause|continue]"},
+ "usage: debug_daemon {start file [#MB]|stop}"},
{"debug_kernel", jt_dbg_debug_kernel, 0,
- "get debug buffer and dump to a file\n"
+ "get debug buffer and dump to a file, same as dk\n"
"usage: debug_kernel [file] [raw]"},
{"dk", jt_dbg_debug_kernel, 0,
- "get debug buffer and dump to a file\n"
+ "get debug buffer and dump to a file, same as debug_kernel\n"
"usage: dk [file] [raw]"},
{"debug_file", jt_dbg_debug_file, 0,
- "read debug buffer from input and dump to output\n"
+ "read debug buffer from input and dump to output, same as df\n"
"usage: debug_file <input> [output] [raw]"},
+ {"df", jt_dbg_debug_file, 0,
+ "read debug buffer from input and dump to output, same as debug_file\n"
+ "usage: df <input> [output] [raw]"},
{"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer\n"
"usage: clear"},
{"mark", jt_dbg_mark_debug_buf, 0,"insert marker text in kernel debug buffer\n"
#!/bin/bash
PATH=`dirname $0`:$PATH
+
+if [ $# -eq 0 ] || [ "$1" == "-h" -o "$1" == "--help" ]; then
+ lfs help find 2>&1 | sed "s/find/lfind/g"
+ exit 0
+fi
+
lfs find "$@"
command_t cmdlist[] = {
{"setstripe", lfs_setstripe, 0,
"Create a new file with a specific striping pattern or\n"
- "Set the default striping pattern on an existing directory\n"
+ "set the default striping pattern on an existing directory or\n"
+ "delete the default striping pattern from an existing directory\n"
"usage: setstripe <filename|dirname> <stripe size> <stripe start> <stripe count>\n"
+ " or \n"
+ " setstripe -d <dirname>\n"
"\tstripe size: Number of bytes in each stripe (0 default)\n"
"\tstripe start: OST index of first stripe (-1 default)\n"
"\tstripe count: Number of OSTs to stripe over (0 default)"},
/* functions */
static int lfs_setstripe(int argc, char **argv)
{
+ char *fname;
int result;
- long st_size;
- int st_offset, st_count;
+ long st_size = 0;
+ int st_offset = -1, st_count = 0;
char *end;
- if (argc != 5)
+ if (argc != 5 && argc != 3)
return CMD_HELP;
- // get the stripe size
- st_size = strtoul(argv[2], &end, 0);
- if (*end != '\0') {
- fprintf(stderr, "error: %s: bad stripe size '%s'\n",
- argv[0], argv[2]);
- return CMD_HELP;
- }
- // get the stripe offset
- st_offset = strtoul(argv[3], &end, 0);
- if (*end != '\0') {
- fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
- argv[0], argv[3]);
- return CMD_HELP;
- }
- // get the stripe count
- st_count = strtoul(argv[4], &end, 0);
- if (*end != '\0') {
- fprintf(stderr, "error: %s: bad stripe count '%s'\n",
- argv[0], argv[4]);
- return CMD_HELP;
+ if (argc == 3) {
+ if (strcmp(argv[1], "-d") != 0)
+ return CMD_HELP;
+
+ fname = argv[2];
+ st_size = -1;
+ } else {
+ fname = argv[1];
+
+ // get the stripe size
+ st_size = strtoul(argv[2], &end, 0);
+ if (*end != '\0') {
+ fprintf(stderr, "error: %s: bad stripe size '%s'\n",
+ argv[0], argv[2]);
+ return CMD_HELP;
+ }
+ // get the stripe offset
+ st_offset = strtoul(argv[3], &end, 0);
+ if (*end != '\0') {
+ fprintf(stderr, "error: %s: bad stripe offset '%s'\n",
+ argv[0], argv[3]);
+ return CMD_HELP;
+ }
+ // get the stripe count
+ st_count = strtoul(argv[4], &end, 0);
+ if (*end != '\0') {
+ fprintf(stderr, "error: %s: bad stripe count '%s'\n",
+ argv[0], argv[4]);
+ return CMD_HELP;
+ }
}
- result = llapi_file_create(argv[1], st_size, st_offset, st_count, 0);
+ result = llapi_file_create(fname, st_size, st_offset, st_count, 0);
if (result)
fprintf(stderr, "error: %s: create stripe file failed\n",
argv[0]);
static int lfs_getstripe(int argc, char **argv)
{
+ struct option long_opts[] = {
+ {"quiet", 0, 0, 'q'},
+ {"verbose", 0, 0, 'v'},
+ {0, 0, 0, 0}
+ };
+ char short_opts[] = "qv";
+ int quiet, verbose, recursive, c, rc;
struct obd_uuid *obduuid = NULL;
- int rc;
- if (argc != 2)
- return CMD_HELP;
+ optind = 0;
+ quiet = verbose = recursive = 0;
+ while ((c = getopt_long(argc, argv, short_opts,
+ long_opts, NULL)) != -1) {
+ switch (c) {
+ case 'o':
+ if (obduuid) {
+ fprintf(stderr,
+ "error: %s: only one obduuid allowed",
+ argv[0]);
+ return CMD_HELP;
+ }
+ obduuid = (struct obd_uuid *)optarg;
+ break;
+ case 'q':
+ quiet++;
+ verbose = 0;
+ break;
+ case 'v':
+ verbose++;
+ quiet = 0;
+ break;
+ case '?':
+ return CMD_HELP;
+ break;
+ default:
+ fprintf(stderr, "error: %s: option '%s' unrecognized\n",
+ argv[0], argv[optind - 1]);
+ return CMD_HELP;
+ break;
+ }
+ }
- optind = 1;
+ if (optind >= argc)
+ return CMD_HELP;
do {
- rc = llapi_find(argv[optind], obduuid, 0, 0, 0, 0);
+ rc = llapi_find(argv[optind], obduuid, recursive, verbose, quiet, 0);
} while (++optind < argc && !rc);
if (rc)
if (argc != 2)
return CMD_HELP;
- obd_types[1] = obd_type1;
- obd_types[2] = obd_type2;
+ obd_types[0] = obd_type1;
+ obd_types[1] = obd_type2;
if (strcmp(argv[1], "osts") == 0) {
strcpy(obd_types[0], "osc");
#include <stdarg.h>
#include <sys/stat.h>
#include <sys/types.h>
+#ifdef HAVE_LINUX_TYPES_H
#include <linux/types.h>
+#else
+#include "types.h"
+#endif
+#ifdef HAVE_LINUX_UNISTD_H
#include <linux/unistd.h>
-
+#else
+#include <unistd.h>
+#endif
#include <liblustre.h>
#include <linux/obd.h>
#include <linux/lustre_lib.h>
{
struct lov_user_md lum = { 0 };
int fd, rc = 0;
+ int isdir = 0;
/* Initialize IOCTL striping pattern structure */
lum.lmm_magic = LOV_USER_MAGIC;
lum.lmm_stripe_offset = stripe_offset;
fd = open(name, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, 0644);
- if (errno == EISDIR)
+ if (errno == EISDIR) {
fd = open(name, O_DIRECTORY | O_RDONLY);
+ isdir++;
+ }
if (fd < 0) {
err_msg("unable to open '%s'",name);
return rc;
}
+ /* setting stripe pattern 0 -1 0 to a dir means to delete it */
+ if (isdir) {
+ if (stripe_size == 0 && stripe_count == 0 &&
+ stripe_offset == -1)
+ lum.lmm_stripe_size = -1;
+ } else {
+ if (stripe_size == -1) {
+ err_msg("deleting file stripe info is not allowed\n");
+ rc = -EPERM;
+ goto out;
+ }
+ }
+
if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &lum)) {
char *errmsg = "stripe already set";
if (errno != EEXIST && errno != EALREADY)
name, fd, errmsg);
rc = -errno;
}
+out:
if (close(fd) < 0) {
err_msg("error on close for '%s' (%d)", name, fd);
if (rc == 0)
out:
free(buf);
- return 0;
+ return rc;
}
static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
char rawbuf[OBD_MAX_IOCTL_BUFFER];
char *bufl = rawbuf;
char *bufp = buf;
- int max = sizeof(rawbuf);
- struct obd_ioctl_data datal;
+ struct obd_ioctl_data datal = { 0, };
struct obd_statfs osfs_buffer;
while(bufp[0] == ' ')
datal.ioc_inlbuf1 = obd_name;
datal.ioc_inllen1 = strlen(obd_name) + 1;
- obd_ioctl_pack(&datal,&bufl,max);
+ rc = obd_ioctl_pack(&datal, &bufl, OBD_MAX_IOCTL_BUFFER);
+ if (rc) {
+ fprintf(stderr, "internal buffer error packing\n");
+ break;
+ }
rc = ioctl(dirfd(opendir(dir)), OBD_IOC_PING,
bufl);
lmd->lmd_local_nid = PTL_NID_ANY;
lmd->lmd_port = 988; /* XXX define LUSTRE_DEFAULT_PORT */
lmd->lmd_nal = SOCKNAL;
+ lmd->lmd_async = 0;
lmd->lmd_nllu = 99;
lmd->lmd_nllg = 99;
strncpy(lmd->lmd_security, "null", sizeof(lmd->lmd_security));
printf("profile: %s\n", lmd->lmd_profile);
printf("sec_flavor: %s\n", lmd->lmd_security);
printf("server_nid: "LPX64"\n", lmd->lmd_server_nid);
- printf("local_nid: "LPX64"\n", lmd->lmd_local_nid);
- printf("nal: %d\n", lmd->lmd_nal);
- printf("server_ipaddr: 0x%x\n", lmd->lmd_server_ipaddr);
- printf("port: %d\n", lmd->lmd_port);
+#ifdef CRAY_PORTALS
+ if (lmd->lmd_nal != CRAY_KB_SSNAL) {
+#endif
+ printf("local_nid: "LPX64"\n", lmd->lmd_local_nid);
+#ifdef CRAY_PORTALS
+ }
+#endif
+ printf("nal: %x\n", lmd->lmd_nal);
+#ifdef CRAY_PORTALS
+ if (lmd->lmd_nal != CRAY_KB_SSNAL) {
+#endif
+ printf("server_ipaddr: 0x%x\n", lmd->lmd_server_ipaddr);
+ printf("port: %d\n", lmd->lmd_port);
+#ifdef CRAY_PORTALS
+ }
+#endif
for (i = 0; i < route_index; i++)
printf("route: "LPX64" : "LPX64" - "LPX64"\n",
}
if (!strcmp(opt, "debug")) {
debug = val;
+ } else if (!strcmp(opt, "async")) {
+ lmd->lmd_async = 1;
}
}
}
memset(buf, 0, sizeof(buf));
+#ifdef CRAY_PORTALS
+ if (lmd->lmd_nal == CRAY_KB_ERNAL) {
+#else
if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL ||
- lmd->lmd_nal == OPENIBNAL) {
+ lmd->lmd_nal == OPENIBNAL || lmd->lmd_nal == IIBNAL) {
+#endif
struct utsname uts;
rc = gethostname(buf, sizeof(buf) - 1);
progname, strerror(rc));
return rc;
}
+
rc = uname(&uts);
/* for 2.6 kernels, reserve at least 8MB free, or we will
* go OOM during heavy read load */
write(f, val, strlen(val));
close(f);
}
- }
+ }
+#ifndef CRAY_PORTALS
} else if (lmd->lmd_nal == QSWNAL) {
char *pfiles[] = {"/proc/qsnet/elan3/device0/position",
"/proc/qsnet/elan4/device0/position",
return -1;
}
+#else
+ } else if (lmd->lmd_nal == CRAY_KB_SSNAL) {
+ return 0;
+#endif
}
if (ptl_parse_nid (&nid, buf) != 0) {
{
ptl_nid_t nid = 0;
int rc;
-
+
+#ifdef CRAY_PORTALS
+ if (lmd->lmd_nal == CRAY_KB_ERNAL) {
+#else
if (lmd->lmd_nal == SOCKNAL || lmd->lmd_nal == TCPNAL ||
- lmd->lmd_nal == OPENIBNAL) {
+ lmd->lmd_nal == OPENIBNAL || lmd->lmd_nal == IIBNAL) {
+#endif
if (lmd->lmd_server_nid == PTL_NID_ANY) {
if (ptl_parse_nid (&nid, hostname) != 0) {
fprintf (stderr, "%s: can't parse NID %s\n",
progname, hostname);
return (-1);
}
- } else if (lmd->lmd_nal == QSWNAL) {
+#ifndef CRAY_PORTALS
+ } else if (lmd->lmd_nal == QSWNAL &&lmd->lmd_server_nid == PTL_NID_ANY){
char buf[64];
rc = sscanf(hostname, "%*[^0-9]%63[0-9]", buf);
if (rc != 1) {
return (-1);
}
lmd->lmd_server_nid = nid;
+#else
+ } else if (lmd->lmd_nal == CRAY_KB_SSNAL) {
+ lmd->lmd_server_nid = strtoll(hostname,0,0);
+#endif
}
exit(0);
}
+ rc = access(target, F_OK);
+ if (rc) {
+ rc = errno;
+ fprintf(stderr, "%s: %s inaccessible: %s\n", progname, target,
+ strerror(errno));
+ return rc;
+ }
+
rc = mount(source, target, "lustre", 0, (void *)&lmd);
if (rc) {
rc = errno;
perror(argv[0]);
+ fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", source,
+ target, progname, strerror(errno));
if (rc == ENODEV)
fprintf(stderr, "Are the lustre modules loaded?\n"
"Check /etc/modules.conf and /proc/filesystems\n");
if net_type in ('tcp','openib','ra'):
port = get_option_int(options, 'port')
- elif net_type in ('elan','gm','iib','vib','lo'):
+ elif net_type in ('elan', 'gm', 'iib', 'vib', 'lo', 'cray_kern_nal'):
port = 0
else:
print "Unknown net_type: ", net_type
if not mds_uuid:
mds_uuid = name2uuid(lustre, mds_name, tag='lmv', fatal=0)
if not mds_uuid:
- mds_uuid = name2uuid(lustre, mds_name, tag='cobd', fatal=1)
+ mds_uuid = name2uuid(lustre, mds_name, tag='cobd', fatal=0)
+ if not mds_uuid:
+ error("mds '" + mds_name + "' is not found")
obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=0)
if not obd_uuid:
obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0)
if not obd_uuid:
- obd_uuid = name2uuid(lustre, obd_name, tag='cobd', fatal=1)
+ obd_uuid = name2uuid(lustre, obd_name, tag='cobd', fatal=0)
+ if not obd_uuid:
+ error("ost '" + obd_name + "' is not found")
+
if mgmt_name:
mgmt_uuid = name2uuid(lustre, mgmt_name, tag='mgmt', fatal=1)
else:
LIBLUSTRE_MOUNT_TARGET=${LIBLUSTRE_MOUNT_TARGET:-"TARGET_NOT_SET"}
LIBLUSTRE_SECURITY=${LIBLUSTRE_SECURITY:-"null"}
LIBLUSTRE_DUMPFILE=${LIBLUSTRE_DUMPFILE:-"/tmp/DUMP_FILE"}
+LIBLUSTRE_DEBUG_MASK=${LIBLUSTRE_DEBUG_MASK:-"0"}
+LIBLUSTRE_DEBUG_SUBSYS=${LIBLUSTRE_DEBUG_SUBSYS:-"0"}
LD_PRELOAD=${LD_PRELOAD:-"/usr/lib/liblustre.so"}
export LIBLUSTRE_MOUNT_POINT
export LIBLUSTRE_MOUNT_TARGET
export LIBLUSTRE_SECURITY
export LIBLUSTRE_DUMPFILE
+export LIBLUSTRE_DEBUG_MASK
+export LIBLUSTRE_DEBUG_SUBSYS
export LD_PRELOAD
exec $@
#!/bin/bash
PATH=`dirname $0`:$PATH
+
+if [ $# -eq 0 ] || [ "$1" == "-h" -o "$1" == "--help" ]; then
+ lfs help setstripe 2>&1 | sed "s/setstripe/lstripe/g"
+ exit 0
+fi
+
lfs setstripe "$@"
--node "$DEVICE_NAME" \
--mds "$DEVICE_MDS" \
--lov "$DEVICE_LOV" \
- --path "$DEVICE"
+ --path "$DEVICE" \
+ --clientoptions "async"
;;
esac
done
#include <errno.h>
#include <string.h>
+#ifdef HAVE_ASM_PAGE_H
#include <asm/page.h> /* needed for PAGE_SIZE - rread */
+#endif
#include <linux/obd_class.h>
#include <portals/ptlctl.h>
char *end;
int rc = 0;
- if (argc < 2 && argc > 4)
+ if (argc < 2 || argc > 4)
return CMD_HELP;
IOC_INIT(data);
return 0;
}
+int jt_obd_start(int argc, char **argv)
+{
+ int rc;
+ struct obd_ioctl_data data;
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <logfile>\n", argv[0]);
+ return 0;
+ }
+
+ IOC_INIT(data);
+ data.ioc_inllen1 = strlen(argv[1]) + 1;
+ data.ioc_inlbuf1 = argv[1];
+
+ IOC_PACK(argv[0], data);
+ rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_START, buf);
+ if (rc) {
+ fprintf(stderr, "error: %s: ioctl error: %s\n",
+ jt_cmdname(argv[0]), strerror(errno));
+ return -1;
+ }
+ return 0;
+}
int jt_cfg_record(int argc, char **argv)
{
int jt_obd_mdc_lookup(int argc, char **argv);
int jt_get_version(int argc, char **argv);
int jt_obd_close_uuid(int argc, char **argv);
+int jt_obd_start(int argc, char **argv);
int jt_cfg_record(int argc, char **argv);
int jt_cfg_endrecord(int argc, char **argv);
int jt_cfg_parse(int argc, char **argv);
#define CHECK_VALUE(a) \
do { \
printf(" LASSERTF("#a \
- " == %d, \" found %%lld\\n\",\n "\
- "(long long)"#a");\n",a);\
+ " == %llu, \" found %%llu\\n\",\n "\
+ "(unsigned long long)"#a");\n",(unsigned long long)a);\
} while(0)
#define CHECK_MEMBER_OFFSET(s,m) \