From fbb7ead129258897f5a5d5c9ce28d31fbbe5bca2 Mon Sep 17 00:00:00 2001 From: yury Date: Mon, 4 Apr 2005 13:13:25 +0000 Subject: [PATCH] - landed b_hd_cray_merge3 --- .../patches/ext3-extents-2.6.5.patch | 2844 +++++++++++++++++++ .../patches/ext3-mballoc2-2.6-suse.patch | 1738 ++++++++++++ .../kernel_patches/patches/ext3-nlinks-2.6.7.patch | 170 ++ .../kernel_patches/patches/iopen-2.6-suse.patch | 48 +- lustre/ChangeLog | 170 +- lustre/autoconf/lustre-core.m4 | 1 + lustre/conf/lustre.dtd | 5 +- lustre/conf/lustre2ldif.xsl | 6 + lustre/include/liblustre.h | 19 +- lustre/include/linux/lprocfs_status.h | 40 +- lustre/include/linux/lustre_cfg.h | 1 + lustre/include/linux/lustre_commit_confd.h | 1 + lustre/include/linux/lustre_compat25.h | 17 +- lustre/include/linux/lustre_debug.h | 4 + lustre/include/linux/lustre_dlm.h | 42 +- lustre/include/linux/lustre_export.h | 9 +- lustre/include/linux/lustre_fsfilt.h | 115 +- lustre/include/linux/lustre_idl.h | 27 +- lustre/include/linux/lustre_import.h | 8 +- lustre/include/linux/lustre_lib.h | 50 +- lustre/include/linux/lustre_lite.h | 15 + lustre/include/linux/lustre_log.h | 3 +- lustre/include/linux/lustre_mds.h | 37 +- lustre/include/linux/lustre_net.h | 23 +- lustre/include/linux/lustre_smfs.h | 8 +- lustre/include/linux/lvfs.h | 48 +- lustre/include/linux/obd.h | 26 +- lustre/include/linux/obd_class.h | 142 +- lustre/include/linux/obd_lov.h | 15 - lustre/include/linux/obd_support.h | 281 +- lustre/include/lustre/lustre_user.h | 19 +- lustre/include/types.h | 27 + .../kernel-2.4.21-rhel-2.4-ia64-smp.config | 1424 ++++++++++ .../kernel-2.4.21-rhel-2.4-ia64.config | 1424 ++++++++++ .../kernel-2.6.5-2.6-suse-i686-bigsmp.config | 2845 ++++++++++++++++++++ .../kernel-2.6.5-2.6-suse-i686.config | 2845 ++++++++++++++++++++ .../patches/export-show_task-2.6-vanilla.patch | 21 + .../patches/ext3-extents-2.6.5.patch | 2844 +++++++++++++++++++ .../patches/ext3-mballoc2-2.6-suse.patch | 1738 ++++++++++++ .../patches/ext3-mballoc2-2.6.7.patch | 1750 ++++++++++++ .../kernel_patches/patches/ext3-nlinks-2.6.7.patch | 170 ++ .../patches/ext3-xattr-header-move-2.6.suse.patch | 506 ---- lustre/kernel_patches/patches/iopen-2.6-suse.patch | 48 +- ...atic-wbuf-2.6.7 => jbd-static-wbuf-2.6.7.patch} | 0 .../patches/nfs-cifs-intent-2.6-suse.patch | 39 +- .../patches/removepage-vanilla-2.6.5.patch | 31 - .../kernel_patches/patches/uml-2.6.7-01-bb2.patch | 2 +- .../patches/vfs_intent-2.6-suse.patch | 6 +- .../patches/vfs_lookup_in_file-2.6.patch | 16 + .../patches/vfs_nointent-2.6-suse.patch | 126 +- lustre/kernel_patches/series/2.6-vanilla.series | 1 + lustre/kernel_patches/targets/.cvsignore | 1 + lustre/kernel_patches/targets/2.6-suse.target.in | 27 + .../{2.6-suse.target => 2.6-vanilla.target.in} | 10 +- .../{hp_pnnl-2.4.target => hp_pnnl-2.4.target.in} | 9 +- lustre/kernel_patches/targets/rh-2.4.target | 13 - lustre/kernel_patches/targets/rh-2.4.target.in | 23 + lustre/kernel_patches/targets/rhel-2.4.target.in | 23 + ...use-2.4.21-2.target => suse-2.4.21-2.target.in} | 2 +- lustre/kernel_patches/which_patch | 9 +- lustre/ldlm/l_lock.c | 1 + lustre/ldlm/ldlm_flock.c | 47 +- lustre/ldlm/ldlm_lib.c | 54 +- lustre/ldlm/ldlm_lock.c | 50 +- lustre/ldlm/ldlm_lockd.c | 89 +- lustre/ldlm/ldlm_request.c | 69 +- lustre/ldlm/ldlm_resource.c | 95 +- lustre/ldlm/ldlm_test.c | 6 +- lustre/liblustre/Makefile.am | 14 +- lustre/liblustre/dir.c | 15 + lustre/liblustre/file.c | 4 + lustre/liblustre/genlib.sh | 16 +- lustre/liblustre/llite_lib.c | 89 +- lustre/liblustre/llite_lib.h | 35 +- lustre/liblustre/namei.c | 9 +- lustre/liblustre/rw.c | 33 +- lustre/liblustre/super.c | 39 +- lustre/liblustre/tests/Makefile.am | 18 +- lustre/liblustre/tests/echo_test.c | 1 + lustre/liblustre/tests/replay_single.c | 2 + lustre/liblustre/tests/sanity.c | 308 ++- lustre/llite/dcache.c | 107 +- lustre/llite/dir.c | 2 +- lustre/llite/file.c | 392 ++- lustre/llite/llite_internal.h | 33 +- lustre/llite/llite_lib.c | 208 +- lustre/llite/llite_mmap.c | 51 +- lustre/llite/llite_nfs.c | 47 +- lustre/llite/lproc_llite.c | 29 +- lustre/llite/namei.c | 152 +- lustre/llite/rw.c | 214 +- lustre/llite/rw24.c | 23 +- lustre/llite/special.c | 58 +- lustre/llite/symlink.c | 9 +- lustre/lmv/lmv_obd.c | 61 +- lustre/lmv/lmv_objmgr.c | 2 +- lustre/lov/Makefile.in | 2 +- lustre/lov/autoMakefile.am | 2 +- lustre/lov/lov_internal.h | 171 +- lustre/lov/lov_merge.c | 153 ++ lustre/lov/lov_obd.c | 1868 +++---------- lustre/lov/lov_offset.c | 240 ++ lustre/lov/lov_pack.c | 4 +- lustre/lov/lov_qos.c | 187 ++ lustre/lov/lov_request.c | 1295 +++++++++ lustre/lvfs/Makefile.in | 4 +- lustre/lvfs/autoMakefile.am | 2 + lustre/lvfs/fsfilt_ext3.c | 166 +- lustre/lvfs/fsfilt_smfs.c | 70 +- lustre/lvfs/lvfs_common.c | 115 + lustre/lvfs/lvfs_internal.h | 4 +- lustre/lvfs/lvfs_linux.c | 204 +- lustre/mdc/mdc_internal.h | 6 + lustre/mdc/mdc_lib.c | 25 + lustre/mdc/mdc_locks.c | 87 +- lustre/mdc/mdc_request.c | 189 +- lustre/mds/handler.c | 217 +- lustre/mds/lproc_mds.c | 12 + lustre/mds/mds_fs.c | 18 +- lustre/mds/mds_internal.h | 6 +- lustre/mds/mds_lib.c | 14 +- lustre/mds/mds_lmv.c | 85 +- lustre/mds/mds_lov.c | 10 +- lustre/mds/mds_open.c | 167 +- lustre/mds/mds_reint.c | 137 +- lustre/mds/mds_unlink_open.c | 61 +- lustre/mgmt/mgmt_svc.c | 7 +- lustre/obdclass/Makefile.in | 8 +- lustre/obdclass/autoMakefile.am | 6 +- lustre/obdclass/class_obd.c | 6 - lustre/obdclass/confobd.c | 423 +++ lustre/obdclass/genops.c | 4 +- lustre/obdclass/llog_ioctl.c | 4 +- lustre/obdclass/llog_obd.c | 7 +- lustre/obdclass/lprocfs_status.c | 6 +- lustre/obdclass/lustre_peer.c | 7 +- lustre/obdclass/obd_config.c | 13 +- lustre/obdecho/echo_client.c | 62 +- lustre/obdfilter/filter.c | 216 +- lustre/obdfilter/filter_internal.h | 6 +- lustre/obdfilter/filter_io.c | 34 +- lustre/obdfilter/filter_io_24.c | 30 +- lustre/obdfilter/filter_io_26.c | 392 ++- lustre/obdfilter/filter_lvb.c | 27 +- lustre/obdfilter/lproc_obdfilter.c | 2 +- lustre/osc/lproc_osc.c | 38 +- lustre/osc/osc_create.c | 60 +- lustre/osc/osc_internal.h | 14 +- lustre/osc/osc_request.c | 77 +- lustre/ost/lproc_ost.c | 2 +- lustre/ost/ost_handler.c | 168 +- lustre/ptlbd/server.c | 2 +- lustre/ptlrpc/autoMakefile.am | 5 +- lustre/ptlrpc/client.c | 24 +- lustre/ptlrpc/connection.c | 4 +- lustre/ptlrpc/events.c | 34 +- lustre/ptlrpc/import.c | 70 +- lustre/ptlrpc/niobuf.c | 12 +- lustre/ptlrpc/pers.c | 31 +- lustre/ptlrpc/pinger.c | 7 +- lustre/ptlrpc/recov_thread.c | 1 + lustre/ptlrpc/recover.c | 19 +- lustre/ptlrpc/service.c | 56 +- lustre/scripts/lustre | 18 +- lustre/scripts/lustrefs | 2 +- lustre/scripts/suse-functions.sh | 22 + lustre/scripts/suse-post.sh | 46 + lustre/scripts/suse-postun.sh | 43 + lustre/scripts/suse-trigger-script.sh.in | 9 + lustre/sec/gss/gss_api.h | 2 +- lustre/sec/gss/sec_gss.c | 2 +- lustre/sec/sec.c | 2 +- lustre/smfs/inode.c | 3 - lustre/smfs/kml.c | 26 +- lustre/smfs/smfs_lib.c | 1 + lustre/tests/.cvsignore | 2 +- lustre/tests/Makefile.am | 20 +- lustre/tests/conf-sanity.sh | 42 +- lustre/tests/createmany.c | 22 +- lustre/tests/echo.sh | 7 + lustre/tests/fsx.c | 2 +- lustre/tests/lfsck_config.sh | 2 +- lustre/tests/lfscktest.sh | 27 +- lustre/tests/lfscktest_config.sh | 10 +- lustre/tests/ll_dirstripe_verify.c | 8 +- lustre/tests/llmount.sh | 5 +- lustre/tests/local.sh | 4 +- lustre/tests/lov.sh | 4 +- lustre/tests/mcr-mds-failover-config.sh | 2 +- lustre/tests/mcr-routed-config.sh | 2 +- lustre/tests/mcrlov.sh | 2 +- lustre/tests/mount2fs.sh | 4 +- lustre/tests/multiop.c | 13 +- lustre/tests/recovery-cleanup.sh | 2 +- lustre/tests/recovery-small.sh | 252 +- lustre/tests/replay-dual.sh | 14 +- lustre/tests/replay-ost-single.sh | 8 +- lustre/tests/replay-single.sh | 257 +- lustre/tests/runtests | 6 +- lustre/tests/sanity-fid.sh | 10 + lustre/tests/sanity-gns.sh | 10 + lustre/tests/sanity.sh | 298 +- lustre/tests/sanityN.sh | 24 +- lustre/tests/test-framework.sh | 21 +- lustre/tests/uml.sh | 9 +- lustre/tests/unlinkmany.c | 18 +- lustre/utils/Lustre/lustredb.py | 7 + lustre/utils/lconf | 791 +++--- lustre/utils/lctl.c | 27 +- lustre/utils/lfind | 6 + lustre/utils/lfs.c | 115 +- lustre/utils/liblustreapi.c | 39 +- lustre/utils/llmount.c | 62 +- lustre/utils/lmc | 11 +- lustre/utils/lrun | 4 + lustre/utils/lstripe | 6 + lustre/utils/lwizard | 3 +- lustre/utils/obd.c | 27 +- lustre/utils/obdctl.h | 1 + lustre/utils/wirecheck.c | 4 +- 220 files changed, 29123 insertions(+), 4962 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch create mode 100644 ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch create mode 100644 ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch create mode 100644 lustre/include/types.h create mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config create mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config create mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config create mode 100644 lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config create mode 100644 lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch create mode 100644 lustre/kernel_patches/patches/ext3-extents-2.6.5.patch create mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch create mode 100644 lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch create mode 100644 lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch delete mode 100644 lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch rename lustre/kernel_patches/patches/{jbd-static-wbuf-2.6.7 => jbd-static-wbuf-2.6.7.patch} (100%) delete mode 100644 lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch create mode 100644 lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch create mode 100644 lustre/kernel_patches/targets/.cvsignore create mode 100644 lustre/kernel_patches/targets/2.6-suse.target.in rename lustre/kernel_patches/targets/{2.6-suse.target => 2.6-vanilla.target.in} (50%) rename lustre/kernel_patches/targets/{hp_pnnl-2.4.target => hp_pnnl-2.4.target.in} (54%) delete mode 100644 lustre/kernel_patches/targets/rh-2.4.target create mode 100644 lustre/kernel_patches/targets/rh-2.4.target.in create mode 100644 lustre/kernel_patches/targets/rhel-2.4.target.in rename lustre/kernel_patches/targets/{suse-2.4.21-2.target => suse-2.4.21-2.target.in} (86%) create mode 100644 lustre/lov/lov_merge.c create mode 100644 lustre/lov/lov_offset.c create mode 100644 lustre/lov/lov_qos.c create mode 100644 lustre/lov/lov_request.c create mode 100644 lustre/obdclass/confobd.c create mode 100644 lustre/scripts/suse-functions.sh create mode 100644 lustre/scripts/suse-post.sh create mode 100644 lustre/scripts/suse-postun.sh create mode 100644 lustre/scripts/suse-trigger-script.sh.in diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch new file mode 100644 index 0000000..cad7b54 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -0,0 +1,2844 @@ +%patch +Index: linux-2.6.5-sles9/fs/ext3/extents.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 +@@ -0,0 +1,2313 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++/* ++ * Extents support for EXT3 ++ * ++ * TODO: ++ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() ++ * - ext3_ext_calc_credits() could take 'mergable' into account ++ * - ext3*_error() should be used in some situations ++ * - find_goal() [to be tested and improved] ++ * - smart tree reduction ++ * - arch-independence ++ * common on-disk format for big/little-endian arch ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) ++{ ++ int err; ++ ++ if (handle->h_buffer_credits > needed) ++ return handle; ++ if (!ext3_journal_extend(handle, needed)) ++ return handle; ++ err = ext3_journal_restart(handle, needed); ++ ++ return handle; ++} ++ ++static int inline ++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->ops->get_write_access) ++ return tree->ops->get_write_access(h,tree->buffer); ++ else ++ return 0; ++} ++ ++static int inline ++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->ops->mark_buffer_dirty) ++ return tree->ops->mark_buffer_dirty(h,tree->buffer); ++ else ++ return 0; ++} ++ ++/* ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ */ ++static int ext3_ext_get_access(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int err; ++ ++ if (path->p_bh) { ++ /* path points to block */ ++ err = ext3_journal_get_write_access(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_get_access_for_root(handle, tree); ++ } ++ return err; ++} ++ ++/* ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ * - EIO ++ */ ++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int err; ++ if (path->p_bh) { ++ /* path points to block */ ++ err =ext3_journal_dirty_metadata(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_mark_root_dirty(handle, tree); ++ } ++ return err; ++} ++ ++static int inline ++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, struct ext3_extent *ex, ++ int *err) ++{ ++ int goal, depth, newblock; ++ struct inode *inode; ++ ++ EXT_ASSERT(tree); ++ if (tree->ops->new_block) ++ return tree->ops->new_block(handle, tree, path, ex, err); ++ ++ inode = tree->inode; ++ depth = EXT_DEPTH(tree); ++ if (path && depth > 0) { ++ goal = path[depth-1].p_block; ++ } else { ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; ++ ++ bg_start = (ei->i_block_group * ++ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ goal = bg_start + colour; ++ } ++ ++ newblock = ext3_new_block(handle, inode, goal, err); ++ return newblock; ++} ++ ++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *neh; ++ neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation++; ++} ++ ++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 6; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 5; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 3; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->buffer_len - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 4; ++#endif ++ return size; ++} ++ ++static void ext3_ext_show_path(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int k, l = path->p_depth; ++ ++ ext_debug(tree, "path:"); ++ for (k = 0; k <= l; k++, path++) { ++ if (path->p_idx) { ++ ext_debug(tree, " %d->%d", path->p_idx->ei_block, ++ path->p_idx->ei_leaf); ++ } else if (path->p_ext) { ++ ext_debug(tree, " %d:%d:%d", ++ path->p_ext->ee_block, ++ path->p_ext->ee_len, ++ path->p_ext->ee_start); ++ } else ++ ext_debug(tree, " []"); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *eh; ++ struct ext3_extent *ex; ++ int i; ++ ++ if (!path) ++ return; ++ ++ eh = path[depth].p_hdr; ++ ex = EXT_FIRST_EXTENT(eh); ++ ++ for (i = 0; i < eh->eh_entries; i++, ex++) { ++ ext_debug(tree, "%d:%d:%d ", ++ ex->ee_block, ex->ee_len, ex->ee_start); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_drop_refs(struct ext3_ext_path *path) ++{ ++ int depth = path->p_depth; ++ int i; ++ ++ for (i = 0; i <= depth; i++, path++) ++ if (path->p_bh) { ++ brelse(path->p_bh); ++ path->p_bh = NULL; ++ } ++} ++ ++/* ++ * binary search for closest index by given block ++ */ ++static inline void ++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent_idx *ix; ++ int l = 0, k, r; ++ ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ EXT_ASSERT(eh->eh_entries > 0); ++ ++ ext_debug(tree, "binsearch for %d(idx): ", block); ++ ++ path->p_idx = ix = EXT_FIRST_INDEX(eh); ++ ++ r = k = eh->eh_entries; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ix[l + k].ei_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } ++ ++ ix += l; ++ path->p_idx = ix; ++ ext_debug(tree, " -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf); ++ ++ while (l++ < r) { ++ if (block < ix->ei_block) ++ break; ++ path->p_idx = ix++; ++ } ++ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, ++ path->p_idx->ei_leaf); ++ ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent_idx *chix; ++ ++ chix = ix = EXT_FIRST_INDEX(eh); ++ for (k = 0; k < eh->eh_entries; k++, ix++) { ++ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { ++ printk("k=%d, ix=0x%p, first=0x%p\n", k, ++ ix, EXT_FIRST_INDEX(eh)); ++ printk("%u <= %u\n", ++ ix->ei_block,ix[-1].ei_block); ++ } ++ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); ++ if (block < ix->ei_block) ++ break; ++ chix = ix; ++ } ++ EXT_ASSERT(chix == path->p_idx); ++ } ++#endif ++ ++} ++ ++/* ++ * binary search for closest extent by given block ++ */ ++static inline void ++ext3_ext_binsearch(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent *ex; ++ int l = 0, k, r; ++ ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ ++ if (eh->eh_entries == 0) { ++ /* ++ * this leaf is empty yet: ++ * we get such a leaf in split/add case ++ */ ++ return; ++ } ++ ++ ext_debug(tree, "binsearch for %d: ", block); ++ ++ path->p_ext = ex = EXT_FIRST_EXTENT(eh); ++ ++ r = k = eh->eh_entries; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ex[l + k].ee_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } ++ ++ ex += l; ++ path->p_ext = ex; ++ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, ++ path->p_ext->ee_start, path->p_ext->ee_len); ++ ++ while (l++ < r) { ++ if (block < ex->ee_block) ++ break; ++ path->p_ext = ex++; ++ } ++ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, ++ path->p_ext->ee_start, path->p_ext->ee_len); ++ ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent *chex; ++ ++ chex = ex = EXT_FIRST_EXTENT(eh); ++ for (k = 0; k < eh->eh_entries; k++, ex++) { ++ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); ++ if (block < ex->ee_block) ++ break; ++ chex = ex; ++ } ++ EXT_ASSERT(chex == path->p_ext); ++ } ++#endif ++ ++} ++ ++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *eh; ++ ++ BUG_ON(tree->buffer_len == 0); ++ ext3_ext_get_access_for_root(handle, tree); ++ eh = EXT_ROOT_HDR(tree); ++ eh->eh_depth = 0; ++ eh->eh_entries = 0; ++ eh->eh_magic = EXT3_EXT_MAGIC; ++ eh->eh_max = ext3_ext_space_root(tree); ++ ext3_ext_mark_root_dirty(handle, tree); ++ ext3_ext_invalidate_cache(tree); ++ return 0; ++} ++ ++struct ext3_ext_path * ++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_extent_header *eh; ++ struct buffer_head *bh; ++ int depth, i, ppos = 0; ++ ++ EXT_ASSERT(tree); ++ EXT_ASSERT(tree->inode); ++ EXT_ASSERT(tree->root); ++ ++ eh = EXT_ROOT_HDR(tree); ++ EXT_ASSERT(eh); ++ i = depth = EXT_DEPTH(tree); ++ EXT_ASSERT(eh->eh_max); ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(i == 0 || eh->eh_entries > 0); ++ ++ /* account possible depth increase */ ++ if (!path) { ++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), ++ GFP_NOFS); ++ if (!path) ++ return ERR_PTR(-ENOMEM); ++ } ++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); ++ path[0].p_hdr = eh; ++ ++ /* walk through the tree */ ++ while (i) { ++ ext_debug(tree, "depth %d: num %d, max %d\n", ++ ppos, eh->eh_entries, eh->eh_max); ++ ext3_ext_binsearch_idx(tree, path + ppos, block); ++ path[ppos].p_block = path[ppos].p_idx->ei_leaf; ++ path[ppos].p_depth = i; ++ path[ppos].p_ext = NULL; ++ ++ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); ++ if (!bh) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ return ERR_PTR(-EIO); ++ } ++ eh = EXT_BLOCK_HDR(bh); ++ ppos++; ++ EXT_ASSERT(ppos <= depth); ++ path[ppos].p_bh = bh; ++ path[ppos].p_hdr = eh; ++ i--; ++ } ++ ++ path[ppos].p_depth = i; ++ path[ppos].p_hdr = eh; ++ path[ppos].p_ext = NULL; ++ ++ /* find extent */ ++ ext3_ext_binsearch(tree, path + ppos, block); ++ ++ ext3_ext_show_path(tree, path); ++ ++ return path; ++} ++ ++/* ++ * insert new index [logical;ptr] into the block at cupr ++ * it check where to insert: before curp or after curp ++ */ ++static int ext3_ext_insert_index(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *curp, ++ int logical, int ptr) ++{ ++ struct ext3_extent_idx *ix; ++ int len, err; ++ ++ if ((err = ext3_ext_get_access(handle, tree, curp))) ++ return err; ++ ++ EXT_ASSERT(logical != curp->p_idx->ei_block); ++ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; ++ if (logical > curp->p_idx->ei_block) { ++ /* insert after */ ++ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { ++ len = (len - 1) * sizeof(struct ext3_extent_idx); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert new index %d after: %d. " ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ (curp->p_idx + 1), (curp->p_idx + 2)); ++ memmove(curp->p_idx + 2, curp->p_idx + 1, len); ++ } ++ ix = curp->p_idx + 1; ++ } else { ++ /* insert before */ ++ len = len * sizeof(struct ext3_extent_idx); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert new index %d before: %d. " ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ curp->p_idx, (curp->p_idx + 1)); ++ memmove(curp->p_idx + 1, curp->p_idx, len); ++ ix = curp->p_idx; ++ } ++ ++ ix->ei_block = logical; ++ ix->ei_leaf = ptr; ++ curp->p_hdr->eh_entries++; ++ ++ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); ++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); ++ ++ err = ext3_ext_dirty(handle, tree, curp); ++ ext3_std_error(tree->inode->i_sb, err); ++ ++ return err; ++} ++ ++/* ++ * routine inserts new subtree into the path, using free index entry ++ * at depth 'at: ++ * - allocates all needed blocks (new leaf and all intermediate index blocks) ++ * - makes decision where to split ++ * - moves remaining extens and index entries (right to the split point) ++ * into the newly allocated blocks ++ * - initialize subtree ++ */ ++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext, int at) ++{ ++ struct buffer_head *bh = NULL; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *neh; ++ struct ext3_extent_idx *fidx; ++ struct ext3_extent *ex; ++ int i = at, k, m, a; ++ unsigned long newblock, oldblock, border; ++ int *ablocks = NULL; /* array of allocated blocks */ ++ int err = 0; ++ ++ /* make decision: where to split? */ ++ /* FIXME: now desicion is simplest: at current extent */ ++ ++ /* if current leaf will be splitted, then we should use ++ * border from split point */ ++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); ++ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { ++ border = path[depth].p_ext[1].ee_block; ++ ext_debug(tree, "leaf will be splitted." ++ " next leaf starts at %d\n", ++ (int)border); ++ } else { ++ border = newext->ee_block; ++ ext_debug(tree, "leaf will be added." ++ " next leaf starts at %d\n", ++ (int)border); ++ } ++ ++ /* ++ * if error occurs, then we break processing ++ * and turn filesystem read-only. so, index won't ++ * be inserted and tree will be in consistent ++ * state. next mount will repair buffers too ++ */ ++ ++ /* ++ * get array to track all allocated blocks ++ * we need this to handle errors and free blocks ++ * upon them ++ */ ++ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); ++ if (!ablocks) ++ return -ENOMEM; ++ memset(ablocks, 0, sizeof(unsigned long) * depth); ++ ++ /* allocate all needed blocks */ ++ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); ++ for (a = 0; a < depth - at; a++) { ++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); ++ if (newblock == 0) ++ goto cleanup; ++ ablocks[a] = newblock; ++ } ++ ++ /* initialize new leaf */ ++ newblock = ablocks[--a]; ++ EXT_ASSERT(newblock); ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) ++ goto cleanup; ++ ++ neh = EXT_BLOCK_HDR(bh); ++ neh->eh_entries = 0; ++ neh->eh_max = ext3_ext_space_block(tree); ++ neh->eh_magic = EXT3_EXT_MAGIC; ++ neh->eh_depth = 0; ++ ex = EXT_FIRST_EXTENT(neh); ++ ++ /* move remain of path[depth] to the new leaf */ ++ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); ++ /* start copy from next extent */ ++ /* TODO: we could do it by single memmove */ ++ m = 0; ++ path[depth].p_ext++; ++ while (path[depth].p_ext <= ++ EXT_MAX_EXTENT(path[depth].p_hdr)) { ++ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", ++ path[depth].p_ext->ee_block, ++ path[depth].p_ext->ee_start, ++ path[depth].p_ext->ee_len, ++ newblock); ++ memmove(ex++, path[depth].p_ext++, ++ sizeof(struct ext3_extent)); ++ neh->eh_entries++; ++ m++; ++ } ++ set_buffer_uptodate(bh); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto cleanup; ++ brelse(bh); ++ bh = NULL; ++ ++ /* correct old leaf */ ++ if (m) { ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ goto cleanup; ++ path[depth].p_hdr->eh_entries -= m; ++ if ((err = ext3_ext_dirty(handle, tree, path + depth))) ++ goto cleanup; ++ ++ } ++ ++ /* create intermediate indexes */ ++ k = depth - at - 1; ++ EXT_ASSERT(k >= 0); ++ if (k) ++ ext_debug(tree, "create %d intermediate indices\n", k); ++ /* insert new index into current index block */ ++ /* current depth stored in i var */ ++ i = depth - 1; ++ while (k--) { ++ oldblock = newblock; ++ newblock = ablocks[--a]; ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) ++ goto cleanup; ++ ++ neh = EXT_BLOCK_HDR(bh); ++ neh->eh_entries = 1; ++ neh->eh_magic = EXT3_EXT_MAGIC; ++ neh->eh_max = ext3_ext_space_block_idx(tree); ++ neh->eh_depth = depth - i; ++ fidx = EXT_FIRST_INDEX(neh); ++ fidx->ei_block = border; ++ fidx->ei_leaf = oldblock; ++ ++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", ++ i, newblock, border, oldblock); ++ /* copy indexes */ ++ m = 0; ++ path[i].p_idx++; ++ ++ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, ++ EXT_MAX_INDEX(path[i].p_hdr)); ++ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == ++ EXT_LAST_INDEX(path[i].p_hdr)); ++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ++ ext_debug(tree, "%d: move %d:%d in new index %lu\n", ++ i, path[i].p_idx->ei_block, ++ path[i].p_idx->ei_leaf, newblock); ++ memmove(++fidx, path[i].p_idx++, ++ sizeof(struct ext3_extent_idx)); ++ neh->eh_entries++; ++ EXT_ASSERT(neh->eh_entries <= neh->eh_max); ++ m++; ++ } ++ set_buffer_uptodate(bh); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto cleanup; ++ brelse(bh); ++ bh = NULL; ++ ++ /* correct old index */ ++ if (m) { ++ err = ext3_ext_get_access(handle, tree, path + i); ++ if (err) ++ goto cleanup; ++ path[i].p_hdr->eh_entries -= m; ++ err = ext3_ext_dirty(handle, tree, path + i); ++ if (err) ++ goto cleanup; ++ } ++ ++ i--; ++ } ++ ++ /* insert new index */ ++ if (!err) ++ err = ext3_ext_insert_index(handle, tree, path + at, ++ border, newblock); ++ ++cleanup: ++ if (bh) { ++ if (buffer_locked(bh)) ++ unlock_buffer(bh); ++ brelse(bh); ++ } ++ ++ if (err) { ++ /* free all allocated blocks in error case */ ++ for (i = 0; i < depth; i++) { ++ if (!ablocks[i]) ++ continue; ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ } ++ } ++ kfree(ablocks); ++ ++ return err; ++} ++ ++/* ++ * routine implements tree growing procedure: ++ * - allocates new block ++ * - moves top-level data (index block or leaf) into the new block ++ * - initialize new top-level, creating index that points to the ++ * just created block ++ */ ++static int ext3_ext_grow_indepth(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_ext_path *curp = path; ++ struct ext3_extent_header *neh; ++ struct ext3_extent_idx *fidx; ++ struct buffer_head *bh; ++ unsigned long newblock; ++ int err = 0; ++ ++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); ++ if (newblock == 0) ++ return err; ++ ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ ext3_std_error(tree->inode->i_sb, err); ++ return err; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) { ++ unlock_buffer(bh); ++ goto out; ++ } ++ ++ /* move top-level index/leaf into new block */ ++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); ++ ++ /* set size of new block */ ++ neh = EXT_BLOCK_HDR(bh); ++ /* old root could have indexes or leaves ++ * so calculate e_max right way */ ++ if (EXT_DEPTH(tree)) ++ neh->eh_max = ext3_ext_space_block_idx(tree); ++ else ++ neh->eh_max = ext3_ext_space_block(tree); ++ neh->eh_magic = EXT3_EXT_MAGIC; ++ set_buffer_uptodate(bh); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto out; ++ ++ /* create index in new top-level index: num,max,pointer */ ++ if ((err = ext3_ext_get_access(handle, tree, curp))) ++ goto out; ++ ++ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; ++ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); ++ curp->p_hdr->eh_entries = 1; ++ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); ++ /* FIXME: it works, but actually path[0] can be index */ ++ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; ++ curp->p_idx->ei_leaf = newblock; ++ ++ neh = EXT_ROOT_HDR(tree); ++ fidx = EXT_FIRST_INDEX(neh); ++ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", ++ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); ++ ++ neh->eh_depth = path->p_depth + 1; ++ err = ext3_ext_dirty(handle, tree, curp); ++out: ++ brelse(bh); ++ ++ return err; ++} ++ ++/* ++ * routine finds empty index and adds new leaf. if no free index found ++ * then it requests in-depth growing ++ */ ++static int ext3_ext_create_new_leaf(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_ext_path *curp; ++ int depth, i, err = 0; ++ ++repeat: ++ i = depth = EXT_DEPTH(tree); ++ ++ /* walk up to the tree and look for free index entry */ ++ curp = path + depth; ++ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { ++ i--; ++ curp--; ++ } ++ ++ /* we use already allocated block for index block ++ * so, subsequent data blocks should be contigoues */ ++ if (EXT_HAS_FREE_INDEX(curp)) { ++ /* if we found index with free entry, then use that ++ * entry: create all needed subtree and add new leaf */ ++ err = ext3_ext_split(handle, tree, path, newext, i); ++ ++ /* refill path */ ++ ext3_ext_drop_refs(path); ++ path = ext3_ext_find_extent(tree, newext->ee_block, path); ++ if (IS_ERR(path)) ++ err = PTR_ERR(path); ++ } else { ++ /* tree is full, time to grow in depth */ ++ err = ext3_ext_grow_indepth(handle, tree, path, newext); ++ ++ /* refill path */ ++ ext3_ext_drop_refs(path); ++ path = ext3_ext_find_extent(tree, newext->ee_block, path); ++ if (IS_ERR(path)) ++ err = PTR_ERR(path); ++ ++ /* ++ * only first (depth 0 -> 1) produces free space ++ * in all other cases we have to split growed tree ++ */ ++ depth = EXT_DEPTH(tree); ++ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { ++ /* now we need split */ ++ goto repeat; ++ } ++ } ++ ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++/* ++ * returns allocated block in subsequent extent or EXT_MAX_BLOCK ++ * NOTE: it consider block number from index entry as ++ * allocated block. thus, index entries have to be consistent ++ * with leafs ++ */ ++static unsigned long ++ext3_ext_next_allocated_block(struct ext3_ext_path *path) ++{ ++ int depth; ++ ++ EXT_ASSERT(path != NULL); ++ depth = path->p_depth; ++ ++ if (depth == 0 && path->p_ext == NULL) ++ return EXT_MAX_BLOCK; ++ ++ /* FIXME: what if index isn't full ?! */ ++ while (depth >= 0) { ++ if (depth == path->p_depth) { ++ /* leaf */ ++ if (path[depth].p_ext != ++ EXT_LAST_EXTENT(path[depth].p_hdr)) ++ return path[depth].p_ext[1].ee_block; ++ } else { ++ /* index */ ++ if (path[depth].p_idx != ++ EXT_LAST_INDEX(path[depth].p_hdr)) ++ return path[depth].p_idx[1].ei_block; ++ } ++ depth--; ++ } ++ ++ return EXT_MAX_BLOCK; ++} ++ ++/* ++ * returns first allocated block from next leaf or EXT_MAX_BLOCK ++ */ ++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int depth; ++ ++ EXT_ASSERT(path != NULL); ++ depth = path->p_depth; ++ ++ /* zero-tree has no leaf blocks at all */ ++ if (depth == 0) ++ return EXT_MAX_BLOCK; ++ ++ /* go to index block */ ++ depth--; ++ ++ while (depth >= 0) { ++ if (path[depth].p_idx != ++ EXT_LAST_INDEX(path[depth].p_hdr)) ++ return path[depth].p_idx[1].ei_block; ++ depth--; ++ } ++ ++ return EXT_MAX_BLOCK; ++} ++ ++/* ++ * if leaf gets modified and modified extent is first in the leaf ++ * then we have to correct all indexes above ++ * TODO: do we need to correct tree in all cases? ++ */ ++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_extent_header *eh; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent *ex; ++ unsigned long border; ++ int k, err = 0; ++ ++ eh = path[depth].p_hdr; ++ ex = path[depth].p_ext; ++ EXT_ASSERT(ex); ++ EXT_ASSERT(eh); ++ ++ if (depth == 0) { ++ /* there is no tree at all */ ++ return 0; ++ } ++ ++ if (ex != EXT_FIRST_EXTENT(eh)) { ++ /* we correct tree if first leaf got modified only */ ++ return 0; ++ } ++ ++ /* ++ * TODO: we need correction if border is smaller then current one ++ */ ++ k = depth - 1; ++ border = path[depth].p_ext->ee_block; ++ if ((err = ext3_ext_get_access(handle, tree, path + k))) ++ return err; ++ path[k].p_idx->ei_block = border; ++ if ((err = ext3_ext_dirty(handle, tree, path + k))) ++ return err; ++ ++ while (k--) { ++ /* change all left-side indexes */ ++ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) ++ break; ++ if ((err = ext3_ext_get_access(handle, tree, path + k))) ++ break; ++ path[k].p_idx->ei_block = border; ++ if ((err = ext3_ext_dirty(handle, tree, path + k))) ++ break; ++ } ++ ++ return err; ++} ++ ++static int inline ++ext3_can_extents_be_merged(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex1, ++ struct ext3_extent *ex2) ++{ ++ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) ++ return 0; ++ ++#ifdef AGRESSIVE_TEST ++ if (ex1->ee_len >= 4) ++ return 0; ++#endif ++ ++ if (!tree->ops->mergable) ++ return 1; ++ ++ return tree->ops->mergable(ex1, ex2); ++} ++ ++/* ++ * this routine tries to merge requsted extent into the existing ++ * extent or inserts requested extent as new one into the tree, ++ * creating new leaf in no-space case ++ */ ++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_extent_header * eh; ++ struct ext3_extent *ex, *fex; ++ struct ext3_extent *nearex; /* nearest extent */ ++ struct ext3_ext_path *npath = NULL; ++ int depth, len, err, next; ++ ++ EXT_ASSERT(newext->ee_len > 0); ++ EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK); ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ EXT_ASSERT(path[depth].p_hdr); ++ ++ /* try to insert block into found extent and return */ ++ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { ++ ext_debug(tree, "append %d block to %d:%d (from %d)\n", ++ newext->ee_len, ex->ee_block, ex->ee_len, ++ ex->ee_start); ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ return err; ++ ex->ee_len += newext->ee_len; ++ eh = path[depth].p_hdr; ++ nearex = ex; ++ goto merge; ++ } ++ ++repeat: ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ if (eh->eh_entries < eh->eh_max) ++ goto has_space; ++ ++ /* probably next leaf has space for us? */ ++ fex = EXT_LAST_EXTENT(eh); ++ next = ext3_ext_next_leaf_block(tree, path); ++ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { ++ ext_debug(tree, "next leaf block - %d\n", next); ++ EXT_ASSERT(!npath); ++ npath = ext3_ext_find_extent(tree, next, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ EXT_ASSERT(npath->p_depth == path->p_depth); ++ eh = npath[depth].p_hdr; ++ if (eh->eh_entries < eh->eh_max) { ++ ext_debug(tree, "next leaf isnt full(%d)\n", ++ eh->eh_entries); ++ path = npath; ++ goto repeat; ++ } ++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", ++ eh->eh_entries, eh->eh_max); ++ } ++ ++ /* ++ * there is no free space in found leaf ++ * we're gonna add new leaf in the tree ++ */ ++ err = ext3_ext_create_new_leaf(handle, tree, path, newext); ++ if (err) ++ goto cleanup; ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ ++has_space: ++ nearex = path[depth].p_ext; ++ ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ goto cleanup; ++ ++ if (!nearex) { ++ /* there is no extent in this leaf, create first one */ ++ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", ++ newext->ee_block, newext->ee_start, ++ newext->ee_len); ++ path[depth].p_ext = EXT_FIRST_EXTENT(eh); ++ } else if (newext->ee_block > nearex->ee_block) { ++ EXT_ASSERT(newext->ee_block != nearex->ee_block); ++ if (nearex != EXT_LAST_EXTENT(eh)) { ++ len = EXT_MAX_EXTENT(eh) - nearex; ++ len = (len - 1) * sizeof(struct ext3_extent); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " ++ "move %d from 0x%p to 0x%p\n", ++ newext->ee_block, newext->ee_start, ++ newext->ee_len, ++ nearex, len, nearex + 1, nearex + 2); ++ memmove(nearex + 2, nearex + 1, len); ++ } ++ path[depth].p_ext = nearex + 1; ++ } else { ++ EXT_ASSERT(newext->ee_block != nearex->ee_block); ++ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " ++ "move %d from 0x%p to 0x%p\n", ++ newext->ee_block, newext->ee_start, newext->ee_len, ++ nearex, len, nearex + 1, nearex + 2); ++ memmove(nearex + 1, nearex, len); ++ path[depth].p_ext = nearex; ++ } ++ ++ eh->eh_entries++; ++ nearex = path[depth].p_ext; ++ nearex->ee_block = newext->ee_block; ++ nearex->ee_start = newext->ee_start; ++ nearex->ee_len = newext->ee_len; ++ /* FIXME: support for large fs */ ++ nearex->ee_start_hi = 0; ++ ++merge: ++ /* try to merge extents to the right */ ++ while (nearex < EXT_LAST_EXTENT(eh)) { ++ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) ++ break; ++ /* merge with next extent! */ ++ nearex->ee_len += nearex[1].ee_len; ++ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { ++ len = (EXT_LAST_EXTENT(eh) - nearex - 1) ++ * sizeof(struct ext3_extent); ++ memmove(nearex + 1, nearex + 2, len); ++ } ++ eh->eh_entries--; ++ EXT_ASSERT(eh->eh_entries > 0); ++ } ++ ++ /* try to merge extents to the left */ ++ ++ /* time to correct all indexes above */ ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ if (err) ++ goto cleanup; ++ ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ ++cleanup: ++ if (npath) { ++ ext3_ext_drop_refs(npath); ++ kfree(npath); ++ } ++ ext3_ext_tree_changed(tree); ++ ext3_ext_invalidate_cache(tree); ++ return err; ++} ++ ++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, ++ unsigned long num, ext_prepare_callback func) ++{ ++ struct ext3_ext_path *path = NULL; ++ struct ext3_extent *ex, cbex; ++ unsigned long next, start = 0, end = 0; ++ unsigned long last = block + num; ++ int depth, exists, err = 0; ++ ++ EXT_ASSERT(tree); ++ EXT_ASSERT(func); ++ EXT_ASSERT(tree->inode); ++ EXT_ASSERT(tree->root); ++ ++ while (block < last && block != EXT_MAX_BLOCK) { ++ num = last - block; ++ /* find extent for this block */ ++ path = ext3_ext_find_extent(tree, block, path); ++ if (IS_ERR(path)) { ++ err = PTR_ERR(path); ++ path = NULL; ++ break; ++ } ++ ++ depth = EXT_DEPTH(tree); ++ EXT_ASSERT(path[depth].p_hdr); ++ ex = path[depth].p_ext; ++ next = ext3_ext_next_allocated_block(path); ++ ++ exists = 0; ++ if (!ex) { ++ /* there is no extent yet, so try to allocate ++ * all requested space */ ++ start = block; ++ end = block + num; ++ } else if (ex->ee_block > block) { ++ /* need to allocate space before found extent */ ++ start = block; ++ end = ex->ee_block; ++ if (block + num < end) ++ end = block + num; ++ } else if (block >= ex->ee_block + ex->ee_len) { ++ /* need to allocate space after found extent */ ++ start = block; ++ end = block + num; ++ if (end >= next) ++ end = next; ++ } else if (block >= ex->ee_block) { ++ /* ++ * some part of requested space is covered ++ * by found extent ++ */ ++ start = block; ++ end = ex->ee_block + ex->ee_len; ++ if (block + num < end) ++ end = block + num; ++ exists = 1; ++ } else { ++ BUG(); ++ } ++ EXT_ASSERT(end > start); ++ ++ if (!exists) { ++ cbex.ee_block = start; ++ cbex.ee_len = end - start; ++ cbex.ee_start = 0; ++ } else ++ cbex = *ex; ++ ++ EXT_ASSERT(path[depth].p_hdr); ++ err = func(tree, path, &cbex, exists); ++ ext3_ext_drop_refs(path); ++ ++ if (err < 0) ++ break; ++ if (err == EXT_REPEAT) ++ continue; ++ else if (err == EXT_BREAK) { ++ err = 0; ++ break; ++ } ++ ++ if (EXT_DEPTH(tree) != depth) { ++ /* depth was changed. we have to realloc path */ ++ kfree(path); ++ path = NULL; ++ } ++ ++ block = cbex.ee_block + cbex.ee_len; ++ } ++ ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } ++ ++ return err; ++} ++ ++static inline void ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) ++{ ++ EXT_ASSERT(len > 0); ++ if (tree->cex) { ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; ++ } ++} ++ ++/* ++ * this routine calculate boundaries of the gap requested block fits into ++ * and cache this gap ++ */ ++static inline void ++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ unsigned long block) ++{ ++ int depth = EXT_DEPTH(tree); ++ unsigned long lblock, len; ++ struct ext3_extent *ex; ++ ++ if (!tree->cex) ++ return; ++ ++ ex = path[depth].p_ext; ++ if (ex == NULL) { ++ /* there is no extent yet, so gap is [0;-] */ ++ lblock = 0; ++ len = EXT_MAX_BLOCK; ++ ext_debug(tree, "cache gap(whole file):"); ++ } else if (block < ex->ee_block) { ++ lblock = block; ++ len = ex->ee_block - block; ++ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", ++ (unsigned long) block, ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len); ++ } else if (block >= ex->ee_block + ex->ee_len) { ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); ++ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len, ++ (unsigned long) block); ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; ++ } else { ++ lblock = len = 0; ++ BUG(); ++ } ++ ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); ++} ++ ++static inline int ++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, ++ struct ext3_extent *ex) ++{ ++ struct ext3_ext_cache *cex = tree->cex; ++ ++ /* is there cache storage at all? */ ++ if (!cex) ++ return EXT3_EXT_CACHE_NO; ++ ++ /* has cache valid data? */ ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; ++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", ++ (unsigned long) block, ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len, ++ (unsigned long) ex->ee_start); ++ return cex->ec_type; ++ } ++ ++ /* not in cache */ ++ return EXT3_EXT_CACHE_NO; ++} ++ ++/* ++ * routine removes index from the index block ++ * it's used in truncate case only. thus all requests are for ++ * last index in the block only ++ */ ++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ struct buffer_head *bh; ++ int err; ++ ++ /* free index block */ ++ path--; ++ EXT_ASSERT(path->p_hdr->eh_entries); ++ if ((err = ext3_ext_get_access(handle, tree, path))) ++ return err; ++ path->p_hdr->eh_entries--; ++ if ((err = ext3_ext_dirty(handle, tree, path))) ++ return err; ++ ext_debug(tree, "index is empty, remove it, free block %d\n", ++ path->p_idx->ei_leaf); ++ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ++ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ return err; ++} ++ ++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int depth = EXT_DEPTH(tree); ++ int needed; ++ ++ if (path) { ++ /* probably there is space in leaf? */ ++ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) ++ return 1; ++ } ++ ++ /* ++ * the worste case we're expecting is creation of the ++ * new root (growing in depth) with index splitting ++ * for splitting we have to consider depth + 1 because ++ * previous growing could increase it ++ */ ++ depth = depth + 1; ++ ++ /* ++ * growing in depth: ++ * block allocation + new root + old root ++ */ ++ needed = EXT3_ALLOC_NEEDED + 2; ++ ++ /* index split. we may need: ++ * allocate intermediate indexes and new leaf ++ * change two blocks at each level, but root ++ * modify root block (inode) ++ */ ++ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; ++ ++ return needed; ++} ++ ++static int ++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) ++{ ++ struct ext3_extent *ex, tex; ++ struct ext3_ext_path *npath; ++ int depth, creds, err; ++ ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ EXT_ASSERT(ex); ++ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); ++ EXT_ASSERT(ex->ee_block < start); ++ ++ /* calculate tail extent */ ++ tex.ee_block = end + 1; ++ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); ++ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; ++ ++ creds = ext3_ext_calc_credits_for_insert(tree, path); ++ handle = ext3_ext_journal_restart(handle, creds); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ /* calculate head extent. use primary extent */ ++ err = ext3_ext_get_access(handle, tree, path + depth); ++ if (err) ++ return err; ++ ex->ee_len = start - ex->ee_block; ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ if (err) ++ return err; ++ ++ /* FIXME: some callback to free underlying resource ++ * and correct ee_start? */ ++ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", ++ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); ++ ++ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ depth = EXT_DEPTH(tree); ++ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); ++ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); ++ ++ err = ext3_ext_insert_extent(handle, tree, npath, &tex); ++ ext3_ext_drop_refs(npath); ++ kfree(npath); ++ ++ return err; ++ ++} ++ ++static int ++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) ++{ ++ struct ext3_extent *ex, *fu = NULL, *lu, *le; ++ int err = 0, correct_index = 0; ++ int depth = EXT_DEPTH(tree), credits; ++ struct ext3_extent_header *eh; ++ unsigned a, b, block, num; ++ ++ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); ++ if (!path[depth].p_hdr) ++ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); ++ eh = path[depth].p_hdr; ++ EXT_ASSERT(eh); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ ++ /* find where to start removing */ ++ le = ex = EXT_LAST_EXTENT(eh); ++ while (ex != EXT_FIRST_EXTENT(eh)) { ++ if (ex->ee_block <= end) ++ break; ++ ex--; ++ } ++ ++ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { ++ /* removal of internal part of the extent requested ++ * tail and head must be placed in different extent ++ * so, we have to insert one more extent */ ++ path[depth].p_ext = ex; ++ return ext3_ext_split_for_rm(handle, tree, path, start, end); ++ } ++ ++ lu = ex; ++ while (ex >= EXT_FIRST_EXTENT(eh) && ++ ex->ee_block + ex->ee_len > start) { ++ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); ++ path[depth].p_ext = ex; ++ ++ a = ex->ee_block > start ? ex->ee_block : start; ++ b = ex->ee_block + ex->ee_len - 1 < end ? ++ ex->ee_block + ex->ee_len - 1 : end; ++ ++ ext_debug(tree, " border %u:%u\n", a, b); ++ ++ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { ++ block = 0; ++ num = 0; ++ BUG(); ++ } else if (a != ex->ee_block) { ++ /* remove tail of the extent */ ++ block = ex->ee_block; ++ num = a - block; ++ } else if (b != ex->ee_block + ex->ee_len - 1) { ++ /* remove head of the extent */ ++ block = a; ++ num = b - a; ++ } else { ++ /* remove whole extent: excelent! */ ++ block = ex->ee_block; ++ num = 0; ++ EXT_ASSERT(a == ex->ee_block && ++ b == ex->ee_block + ex->ee_len - 1); ++ } ++ ++ if (ex == EXT_FIRST_EXTENT(eh)) ++ correct_index = 1; ++ ++ credits = 1; ++ if (correct_index) ++ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; ++ if (tree->ops->remove_extent_credits) ++ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); ++ ++ handle = ext3_ext_journal_restart(handle, credits); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out; ++ } ++ ++ err = ext3_ext_get_access(handle, tree, path + depth); ++ if (err) ++ goto out; ++ ++ if (tree->ops->remove_extent) ++ err = tree->ops->remove_extent(tree, ex, a, b); ++ if (err) ++ goto out; ++ ++ if (num == 0) { ++ /* this extent is removed entirely mark slot unused */ ++ ex->ee_start = 0; ++ eh->eh_entries--; ++ fu = ex; ++ } ++ ++ ex->ee_block = block; ++ ex->ee_len = num; ++ ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ if (err) ++ goto out; ++ ++ ext_debug(tree, "new extent: %u:%u:%u\n", ++ ex->ee_block, ex->ee_len, ex->ee_start); ++ ex--; ++ } ++ ++ if (fu) { ++ /* reuse unused slots */ ++ while (lu < le) { ++ if (lu->ee_start) { ++ *fu = *lu; ++ lu->ee_start = 0; ++ fu++; ++ } ++ lu++; ++ } ++ } ++ ++ if (correct_index && eh->eh_entries) ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ ++ /* if this leaf is free, then we should ++ * remove it from index block above */ ++ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) ++ err = ext3_ext_rm_idx(handle, tree, path + depth); ++ ++out: ++ return err; ++} ++ ++ ++static struct ext3_extent_idx * ++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) ++{ ++ struct ext3_extent_idx *ix; ++ ++ ix = EXT_LAST_INDEX(hdr); ++ while (ix != EXT_FIRST_INDEX(hdr)) { ++ if (ix->ei_block <= block) ++ break; ++ ix--; ++ } ++ return ix; ++} ++ ++/* ++ * returns 1 if current index have to be freed (even partial) ++ */ ++static int inline ++ext3_ext_more_to_rm(struct ext3_ext_path *path) ++{ ++ EXT_ASSERT(path->p_idx); ++ ++ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) ++ return 0; ++ ++ /* ++ * if truncate on deeper level happened it it wasn't partial ++ * so we have to consider current index for truncation ++ */ ++ if (path->p_hdr->eh_entries == path->p_block) ++ return 0; ++ return 1; ++} ++ ++int ext3_ext_remove_space(struct ext3_extents_tree *tree, ++ unsigned long start, unsigned long end) ++{ ++ struct inode *inode = tree->inode; ++ struct super_block *sb = inode->i_sb; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_ext_path *path; ++ handle_t *handle; ++ int i = 0, err = 0; ++ ++ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); ++ ++ /* probably first extent we're gonna free will be last in block */ ++ handle = ext3_journal_start(inode, depth + 1); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ ext3_ext_invalidate_cache(tree); ++ ++ /* ++ * we start scanning from right side freeing all the blocks ++ * after i_size and walking into the deep ++ */ ++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); ++ if (IS_ERR(path)) { ++ ext3_error(sb, "ext3_ext_remove_space", ++ "Can't allocate path array"); ++ ext3_journal_stop(handle); ++ return -ENOMEM; ++ } ++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); ++ path[i].p_hdr = EXT_ROOT_HDR(tree); ++ ++ while (i >= 0 && err == 0) { ++ if (i == depth) { ++ /* this is leaf block */ ++ err = ext3_ext_rm_leaf(handle, tree, path, start, end); ++ /* root level have p_bh == NULL, brelse() eats this */ ++ brelse(path[i].p_bh); ++ i--; ++ continue; ++ } ++ ++ /* this is index block */ ++ if (!path[i].p_hdr) { ++ ext_debug(tree, "initialize header\n"); ++ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); ++ } ++ ++ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); ++ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); ++ ++ if (!path[i].p_idx) { ++ /* this level hasn't touched yet */ ++ path[i].p_idx = ++ ext3_ext_last_covered(path[i].p_hdr, end); ++ path[i].p_block = path[i].p_hdr->eh_entries + 1; ++ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", ++ path[i].p_hdr, path[i].p_hdr->eh_entries); ++ } else { ++ /* we've already was here, see at next index */ ++ path[i].p_idx--; ++ } ++ ++ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", ++ i, EXT_FIRST_INDEX(path[i].p_hdr), ++ path[i].p_idx); ++ if (ext3_ext_more_to_rm(path + i)) { ++ /* go to the next level */ ++ ext_debug(tree, "move to level %d (block %d)\n", ++ i + 1, path[i].p_idx->ei_leaf); ++ memset(path + i + 1, 0, sizeof(*path)); ++ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); ++ if (!path[i+1].p_bh) { ++ /* should we reset i_size? */ ++ err = -EIO; ++ break; ++ } ++ /* put actual number of indexes to know is this ++ * number got changed at the next iteration */ ++ path[i].p_block = path[i].p_hdr->eh_entries; ++ i++; ++ } else { ++ /* we finish processing this index, go up */ ++ if (path[i].p_hdr->eh_entries == 0 && i > 0) { ++ /* index is empty, remove it ++ * handle must be already prepared by the ++ * truncatei_leaf() */ ++ err = ext3_ext_rm_idx(handle, tree, path + i); ++ } ++ /* root level have p_bh == NULL, brelse() eats this */ ++ brelse(path[i].p_bh); ++ i--; ++ ext_debug(tree, "return to level %d\n", i); ++ } ++ } ++ ++ /* TODO: flexible tree reduction should be here */ ++ if (path->p_hdr->eh_entries == 0) { ++ /* ++ * truncate to zero freed all the tree ++ * so, we need to correct eh_depth ++ */ ++ err = ext3_ext_get_access(handle, tree, path); ++ if (err == 0) { ++ EXT_ROOT_HDR(tree)->eh_depth = 0; ++ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); ++ err = ext3_ext_dirty(handle, tree, path); ++ } ++ } ++ ext3_ext_tree_changed(tree); ++ ++ kfree(path); ++ ext3_journal_stop(handle); ++ ++ return err; ++} ++ ++int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) ++{ ++ int lcap, icap, rcap, leafs, idxs, num; ++ ++ rcap = ext3_ext_space_root(tree); ++ if (blocks <= rcap) { ++ /* all extents fit to the root */ ++ return 0; ++ } ++ ++ rcap = ext3_ext_space_root_idx(tree); ++ lcap = ext3_ext_space_block(tree); ++ icap = ext3_ext_space_block_idx(tree); ++ ++ num = leafs = (blocks + lcap - 1) / lcap; ++ if (leafs <= rcap) { ++ /* all pointers to leafs fit to the root */ ++ return leafs; ++ } ++ ++ /* ok. we need separate index block(s) to link all leaf blocks */ ++ idxs = (leafs + icap - 1) / icap; ++ do { ++ num += idxs; ++ idxs = (idxs + icap - 1) / icap; ++ } while (idxs > rcap); ++ ++ return num; ++} ++ ++/* ++ * called at mount time ++ */ ++void ext3_ext_init(struct super_block *sb) ++{ ++ /* ++ * possible initialization would be here ++ */ ++ ++ if (test_opt(sb, EXTENTS)) { ++ printk("EXT3-fs: file extents enabled"); ++#ifdef AGRESSIVE_TEST ++ printk(", agressive tests"); ++#endif ++#ifdef CHECK_BINSEARCH ++ printk(", check binsearch"); ++#endif ++ printk("\n"); ++ } ++} ++ ++/* ++ * called at umount time ++ */ ++void ext3_ext_release(struct super_block *sb) ++{ ++} ++ ++/************************************************************************ ++ * VFS related routines ++ ************************************************************************/ ++ ++static int ext3_get_inode_write_access(handle_t *handle, void *buffer) ++{ ++ /* we use in-core data, not bh */ ++ return 0; ++} ++ ++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) ++{ ++ struct inode *inode = buffer; ++ return ext3_mark_inode_dirty(handle, inode); ++} ++ ++static int ext3_ext_mergable(struct ext3_extent *ex1, ++ struct ext3_extent *ex2) ++{ ++ /* FIXME: support for large fs */ ++ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) ++ return 1; ++ return 0; ++} ++ ++static int ++ext3_remove_blocks_credits(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) ++{ ++ int needed; ++ ++ /* at present, extent can't cross block group */; ++ needed = 4; /* bitmap + group desc + sb + inode */ ++ ++#ifdef CONFIG_QUOTA ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ return needed; ++} ++ ++static int ++ext3_remove_blocks(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) ++{ ++ int needed = ext3_remove_blocks_credits(tree, ex, from, to); ++ handle_t *handle = ext3_journal_start(tree->inode, needed); ++ struct buffer_head *bh; ++ int i; ++ ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { ++ /* tail removal */ ++ unsigned long num, start; ++ num = ex->ee_block + ex->ee_len - from; ++ start = ex->ee_start + ex->ee_len - num; ++ ext_debug(tree, "free last %lu blocks starting %lu\n", ++ num, start); ++ for (i = 0; i < num; i++) { ++ bh = sb_find_get_block(tree->inode->i_sb, start + i); ++ ext3_forget(handle, 0, tree->inode, bh, start + i); ++ } ++ ext3_free_blocks(handle, tree->inode, start, num); ++ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { ++ printk("strange request: removal %lu-%lu from %u:%u\n", ++ from, to, ex->ee_block, ex->ee_len); ++ } else { ++ printk("strange request: removal(2) %lu-%lu from %u:%u\n", ++ from, to, ex->ee_block, ex->ee_len); ++ } ++ ext3_journal_stop(handle); ++ return 0; ++} ++ ++static int ext3_ext_find_goal(struct inode *inode, ++ struct ext3_ext_path *path, unsigned long block) ++{ ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; ++ int depth; ++ ++ if (path) { ++ struct ext3_extent *ex; ++ depth = path->p_depth; ++ ++ /* try to predict block placement */ ++ if ((ex = path[depth].p_ext)) ++ return ex->ee_start + (block - ex->ee_block); ++ ++ /* it looks index is empty ++ * try to find starting from index itself */ ++ if (path[depth].p_bh) ++ return path[depth].p_bh->b_blocknr; ++ } ++ ++ /* OK. use inode's group */ ++ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ return bg_start + colour + block; ++} ++ ++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int *err) ++{ ++ struct inode *inode = tree->inode; ++ int newblock, goal; ++ ++ EXT_ASSERT(path); ++ EXT_ASSERT(ex); ++ EXT_ASSERT(ex->ee_start); ++ EXT_ASSERT(ex->ee_len); ++ ++ /* reuse block from the extent to order data/metadata */ ++ newblock = ex->ee_start++; ++ ex->ee_len--; ++ if (ex->ee_len == 0) { ++ ex->ee_len = 1; ++ /* allocate new block for the extent */ ++ goal = ext3_ext_find_goal(inode, path, ex->ee_block); ++ ex->ee_start = ext3_new_block(handle, inode, goal, err); ++ if (ex->ee_start == 0) { ++ /* error occured: restore old extent */ ++ ex->ee_start = newblock; ++ return 0; ++ } ++ } ++ return newblock; ++} ++ ++static struct ext3_extents_helpers ext3_blockmap_helpers = { ++ .get_write_access = ext3_get_inode_write_access, ++ .mark_buffer_dirty = ext3_mark_buffer_dirty, ++ .mergable = ext3_ext_mergable, ++ .new_block = ext3_new_block_cb, ++ .remove_extent = ext3_remove_blocks, ++ .remove_extent_credits = ext3_remove_blocks_credits, ++}; ++ ++void ext3_init_tree_desc(struct ext3_extents_tree *tree, ++ struct inode *inode) ++{ ++ tree->inode = inode; ++ tree->root = (void *) EXT3_I(inode)->i_data; ++ tree->buffer = (void *) inode; ++ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; ++ tree->ops = &ext3_blockmap_helpers; ++} ++ ++int ext3_ext_get_block(handle_t *handle, struct inode *inode, ++ long iblock, struct buffer_head *bh_result, ++ int create, int extend_disksize) ++{ ++ struct ext3_ext_path *path = NULL; ++ struct ext3_extent newex; ++ struct ext3_extent *ex; ++ int goal, newblock, err = 0, depth; ++ struct ext3_extents_tree tree; ++ ++ clear_buffer_new(bh_result); ++ ext3_init_tree_desc(&tree, inode); ++ ext_debug(&tree, "block %d requested for inode %u\n", ++ (int) iblock, (unsigned) inode->i_ino); ++ down(&EXT3_I(inode)->truncate_sem); ++ ++ /* check in cache */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { ++ if (!create) { ++ /* block isn't allocated yet and ++ * user don't want to allocate it */ ++ goto out2; ++ } ++ /* we should allocate requested block */ ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { ++ /* block is already allocated */ ++ newblock = iblock - newex.ee_block + newex.ee_start; ++ goto out; ++ } else { ++ EXT_ASSERT(0); ++ } ++ } ++ ++ /* find extent for this block */ ++ path = ext3_ext_find_extent(&tree, iblock, NULL); ++ if (IS_ERR(path)) { ++ err = PTR_ERR(path); ++ path = NULL; ++ goto out2; ++ } ++ ++ depth = EXT_DEPTH(&tree); ++ ++ /* ++ * consistent leaf must not be empty ++ * this situations is possible, though, _during_ tree modification ++ * this is why assert can't be put in ext3_ext_find_extent() ++ */ ++ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); ++ ++ if ((ex = path[depth].p_ext)) { ++ /* if found exent covers block, simple return it */ ++ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { ++ newblock = iblock - ex->ee_block + ex->ee_start; ++ ext_debug(&tree, "%d fit into %d:%d -> %d\n", ++ (int) iblock, ex->ee_block, ex->ee_len, ++ newblock); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); ++ goto out; ++ } ++ } ++ ++ /* ++ * requested block isn't allocated yet ++ * we couldn't try to create block if create flag is zero ++ */ ++ if (!create) { ++ /* put just found gap into cache to speedup subsequest reqs */ ++ ext3_ext_put_gap_in_cache(&tree, path, iblock); ++ goto out2; ++ } ++ ++ /* allocate new block */ ++ goal = ext3_ext_find_goal(inode, path, iblock); ++ newblock = ext3_new_block(handle, inode, goal, &err); ++ if (!newblock) ++ goto out2; ++ ext_debug(&tree, "allocate new block: goal %d, found %d\n", ++ goal, newblock); ++ ++ /* try to insert new extent into found leaf and return */ ++ newex.ee_block = iblock; ++ newex.ee_start = newblock; ++ newex.ee_len = 1; ++ err = ext3_ext_insert_extent(handle, &tree, path, &newex); ++ if (err) ++ goto out2; ++ ++ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ++ /* previous routine could use block we allocated */ ++ newblock = newex.ee_start; ++ set_buffer_new(bh_result); ++ ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); ++out: ++ ext3_ext_show_leaf(&tree, path); ++ map_bh(bh_result, inode->i_sb, newblock); ++out2: ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } ++ up(&EXT3_I(inode)->truncate_sem); ++ ++ return err; ++} ++ ++void ext3_ext_truncate(struct inode * inode, struct page *page) ++{ ++ struct address_space *mapping = inode->i_mapping; ++ struct super_block *sb = inode->i_sb; ++ struct ext3_extents_tree tree; ++ unsigned long last_block; ++ handle_t *handle; ++ int err = 0; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ /* ++ * probably first extent we're gonna free will be last in block ++ */ ++ err = ext3_writepage_trans_blocks(inode) + 3; ++ handle = ext3_journal_start(inode, err); ++ if (IS_ERR(handle)) { ++ if (page) { ++ clear_highpage(page); ++ flush_dcache_page(page); ++ unlock_page(page); ++ page_cache_release(page); ++ } ++ return; ++ } ++ ++ if (page) ++ ext3_block_truncate_page(handle, page, mapping, inode->i_size); ++ ++ down(&EXT3_I(inode)->truncate_sem); ++ ext3_ext_invalidate_cache(&tree); ++ ++ /* ++ * TODO: optimization is possible here ++ * probably we need not scaning at all, ++ * because page truncation is enough ++ */ ++ if (ext3_orphan_add(handle, inode)) ++ goto out_stop; ++ ++ /* we have to know where to truncate from in crash case */ ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ext3_mark_inode_dirty(handle, inode); ++ ++ last_block = (inode->i_size + sb->s_blocksize - 1) ++ >> EXT3_BLOCK_SIZE_BITS(sb); ++ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); ++ ++ /* In a multi-transaction truncate, we only make the final ++ * transaction synchronous */ ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ++out_stop: ++ /* ++ * If this was a simple ftruncate(), and the file will remain alive ++ * then we need to clear up the orphan record which we created above. ++ * However, if this was a real unlink then we were called by ++ * ext3_delete_inode(), and we allow that function to clean up the ++ * orphan info for us. ++ */ ++ if (inode->i_nlink) ++ ext3_orphan_del(handle, inode); ++ ++ up(&EXT3_I(inode)->truncate_sem); ++ ext3_journal_stop(handle); ++} ++ ++/* ++ * this routine calculate max number of blocks we could modify ++ * in order to allocate new block for an inode ++ */ ++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) ++{ ++ struct ext3_extents_tree tree; ++ int needed; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); ++ ++ /* caller want to allocate num blocks */ ++ needed *= num; ++ ++#ifdef CONFIG_QUOTA ++ /* ++ * FIXME: real calculation should be here ++ * it depends on blockmap format of qouta file ++ */ ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ ++ return needed; ++} ++ ++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ext3_extent_tree_init(handle, &tree); ++} ++ ++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ return ext3_ext_calc_metadata_amount(&tree, blocks); ++} ++ ++static int ++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newex, int exist) ++{ ++ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; ++ ++ if (!exist) ++ return EXT_CONTINUE; ++ if (buf->err < 0) ++ return EXT_BREAK; ++ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) ++ return EXT_BREAK; ++ ++ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { ++ buf->err++; ++ buf->cur += sizeof(*newex); ++ } else { ++ buf->err = -EFAULT; ++ return EXT_BREAK; ++ } ++ return EXT_CONTINUE; ++} ++ ++static int ++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int exist) ++{ ++ struct ext3_extent_tree_stats *buf = ++ (struct ext3_extent_tree_stats *) tree->private; ++ int depth; ++ ++ if (!exist) ++ return EXT_CONTINUE; ++ ++ depth = EXT_DEPTH(tree); ++ buf->extents_num++; ++ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) ++ buf->leaf_num++; ++ return EXT_CONTINUE; ++} ++ ++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ ++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) ++ return -EINVAL; ++ ++ if (cmd == EXT3_IOC_GET_EXTENTS) { ++ struct ext3_extent_buf buf; ++ struct ext3_extents_tree tree; ++ ++ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) ++ return -EFAULT; ++ ++ ext3_init_tree_desc(&tree, inode); ++ buf.cur = buf.buffer; ++ buf.err = 0; ++ tree.private = &buf; ++ down(&EXT3_I(inode)->truncate_sem); ++ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, ++ ext3_ext_store_extent_cb); ++ up(&EXT3_I(inode)->truncate_sem); ++ if (err == 0) ++ err = buf.err; ++ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { ++ struct ext3_extent_tree_stats buf; ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ buf.depth = EXT_DEPTH(&tree); ++ buf.extents_num = 0; ++ buf.leaf_num = 0; ++ tree.private = &buf; ++ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, ++ ext3_ext_collect_stats_cb); ++ up(&EXT3_I(inode)->truncate_sem); ++ if (!err) ++ err = copy_to_user((void *) arg, &buf, sizeof(buf)); ++ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { ++ struct ext3_extents_tree tree; ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ err = EXT_DEPTH(&tree); ++ up(&EXT3_I(inode)->truncate_sem); ++ } ++ ++ return err; ++} ++ ++EXPORT_SYMBOL(ext3_init_tree_desc); ++EXPORT_SYMBOL(ext3_mark_inode_dirty); ++EXPORT_SYMBOL(ext3_ext_invalidate_cache); ++EXPORT_SYMBOL(ext3_ext_insert_extent); ++EXPORT_SYMBOL(ext3_ext_walk_space); ++EXPORT_SYMBOL(ext3_ext_find_goal); ++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); ++ +Index: linux-2.6.5-sles9/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2004-11-09 02:22:55.763148128 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300 +@@ -647,6 +647,10 @@ + DQUOT_FREE_INODE(inode); + goto fail2; + } ++ if (test_opt(sb, EXTENTS)) { ++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ ext3_extents_initialize_blockmap(handle, inode); ++ } + err = ext3_mark_inode_dirty(handle, inode); + if (err) { + ext3_std_error(sb, err); +Index: linux-2.6.5-sles9/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:22:55.767147520 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 +@@ -796,6 +796,17 @@ + goto reread; + } + ++static inline int ++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, ++ struct buffer_head *bh, int create, int extend_disksize) ++{ ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_get_block(handle, inode, block, bh, create, ++ extend_disksize); ++ return ext3_get_block_handle(handle, inode, block, bh, create, ++ extend_disksize); ++} ++ + static int ext3_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) + { +@@ -806,8 +817,8 @@ + handle = ext3_journal_current_handle(); + J_ASSERT(handle != 0); + } +- ret = ext3_get_block_handle(handle, inode, iblock, +- bh_result, create, 1); ++ ret = ext3_get_block_wrap(handle, inode, iblock, ++ bh_result, create, 1); + return ret; + } + +@@ -833,8 +844,8 @@ + } + } + if (ret == 0) +- ret = ext3_get_block_handle(handle, inode, iblock, +- bh_result, create, 0); ++ ret = ext3_get_block_wrap(handle, inode, iblock, ++ bh_result, create, 0); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +@@ -855,7 +866,7 @@ + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); +- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); ++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); + if (!*errp && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); +@@ -1587,7 +1598,7 @@ + * This required during truncate. We need to physically zero the tail end + * of that block so it doesn't yield old data if the file is later grown. + */ +-static int ext3_block_truncate_page(handle_t *handle, struct page *page, ++int ext3_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from) + { + unsigned long index = from >> PAGE_CACHE_SHIFT; +@@ -2083,6 +2094,9 @@ + return; + } + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_truncate(inode, page); ++ + handle = start_transaction(inode); + if (IS_ERR(handle)) { + if (page) { +@@ -2789,6 +2803,9 @@ + int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; + int ret; + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_writepage_trans_blocks(inode, bpp); ++ + if (ext3_should_journal_data(inode)) + ret = 3 * (bpp + indirects) + 2; + else +Index: linux-2.6.5-sles9/fs/ext3/Makefile +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o ++ ioctl.o namei.o super.o symlink.o hash.o extents.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.5-sles9/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:22:56.450043704 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 +@@ -389,6 +389,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { +@@ -447,6 +448,10 @@ + #endif + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + ei->vfs_inode.i_version = 1; ++ ei->i_cached_extent[0] = 0; ++ ei->i_cached_extent[1] = 0; ++ ei->i_cached_extent[2] = 0; ++ ei->i_cached_extent[3] = 0; + return &ei->vfs_inode; + } + +@@ -537,7 +542,7 @@ + Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, +- Opt_err, ++ Opt_err, Opt_extents, Opt_extdebug + }; + + static match_table_t tokens = { +@@ -582,6 +587,8 @@ + {Opt_iopen, "iopen"}, + {Opt_noiopen, "noiopen"}, + {Opt_iopen_nopriv, "iopen_nopriv"}, ++ {Opt_extents, "extents"}, ++ {Opt_extdebug, "extdebug"}, + {Opt_err, NULL} + }; + +@@ -797,6 +804,12 @@ + break; + case Opt_ignore: + break; ++ case Opt_extents: ++ set_opt (sbi->s_mount_opt, EXTENTS); ++ break; ++ case Opt_extdebug: ++ set_opt (sbi->s_mount_opt, EXTDEBUG); ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1449,6 +1462,8 @@ + percpu_counter_mod(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + ++ ext3_ext_init(sb); ++ + return 0; + + failed_mount3: +Index: linux-2.6.5-sles9/fs/ext3/ioctl.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2004-11-09 02:15:44.610693264 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2004-11-09 02:23:52.991448104 +0300 +@@ -124,6 +124,10 @@ + err = ext3_change_inode_journal_flag(inode, jflag); + return err; + } ++ case EXT3_IOC_GET_EXTENTS: ++ case EXT3_IOC_GET_TREE_STATS: ++ case EXT3_IOC_GET_TREE_DEPTH: ++ return ext3_ext_ioctl(inode, filp, cmd, arg); + case EXT3_IOC_GETVERSION: + case EXT3_IOC_GETVERSION_OLD: + return put_user(inode->i_generation, (int *) arg); +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:22:58.767691368 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 +@@ -186,6 +186,7 @@ + #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ + #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ + #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ ++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ + + #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ + #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +@@ -211,6 +212,9 @@ + #endif + #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) + #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) ++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) ++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) ++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) + + /* + * Structure of an inode on the disk +@@ -333,6 +337,8 @@ + #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ + #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ ++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ ++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -729,6 +735,7 @@ + + + /* inode.c */ ++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); + extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); +@@ -802,6 +809,14 @@ + extern struct inode_operations ext3_symlink_inode_operations; + extern struct inode_operations ext3_fast_symlink_inode_operations; + ++/* extents.c */ ++extern int ext3_ext_writepage_trans_blocks(struct inode *, int); ++extern int ext3_ext_get_block(handle_t *, struct inode *, long, ++ struct buffer_head *, int, int); ++extern void ext3_ext_truncate(struct inode *, struct page *); ++extern void ext3_ext_init(struct super_block *); ++extern void ext3_ext_release(struct super_block *); ++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); + + #endif /* __KERNEL__ */ + +Index: linux-2.6.5-sles9/include/linux/ext3_extents.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2004-11-09 02:23:21.606219384 +0300 +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++#ifndef _LINUX_EXT3_EXTENTS ++#define _LINUX_EXT3_EXTENTS ++ ++/* ++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks ++ * become very little, so index split, in-depth growing and ++ * other hard changes happens much more often ++ * this is for debug purposes only ++ */ ++#define AGRESSIVE_TEST_ ++ ++/* ++ * if CHECK_BINSEARCH defined, then results of binary search ++ * will be checked by linear search ++ */ ++#define CHECK_BINSEARCH_ ++ ++/* ++ * if EXT_DEBUG is defined you can use 'extdebug' mount option ++ * to get lots of info what's going on ++ */ ++#define EXT_DEBUG_ ++#ifdef EXT_DEBUG ++#define ext_debug(tree,fmt,a...) \ ++do { \ ++ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ ++ printk(fmt, ##a); \ ++} while (0); ++#else ++#define ext_debug(tree,fmt,a...) ++#endif ++ ++/* ++ * if EXT_STATS is defined then stats numbers are collected ++ * these number will be displayed at umount time ++ */ ++#define EXT_STATS_ ++ ++ ++#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ ++ ++/* ++ * ext3_inode has i_block array (total 60 bytes) ++ * first 4 bytes are used to store: ++ * - tree depth (0 mean there is no tree yet. all extents in the inode) ++ * - number of alive extents in the inode ++ */ ++ ++/* ++ * this is extent on-disk structure ++ * it's used at the bottom of the tree ++ */ ++struct ext3_extent { ++ __u32 ee_block; /* first logical block extent covers */ ++ __u16 ee_len; /* number of blocks covered by extent */ ++ __u16 ee_start_hi; /* high 16 bits of physical block */ ++ __u32 ee_start; /* low 32 bigs of physical block */ ++}; ++ ++/* ++ * this is index on-disk structure ++ * it's used at all the levels, but the bottom ++ */ ++struct ext3_extent_idx { ++ __u32 ei_block; /* index covers logical blocks from 'block' */ ++ __u32 ei_leaf; /* pointer to the physical block of the next * ++ * level. leaf or next index could bet here */ ++ __u16 ei_leaf_hi; /* high 16 bits of physical block */ ++ __u16 ei_unused; ++}; ++ ++/* ++ * each block (leaves and indexes), even inode-stored has header ++ */ ++struct ext3_extent_header { ++ __u16 eh_magic; /* probably will support different formats */ ++ __u16 eh_entries; /* number of valid entries */ ++ __u16 eh_max; /* capacity of store in entries */ ++ __u16 eh_depth; /* has tree real underlaying blocks? */ ++ __u32 eh_generation; /* generation of the tree */ ++}; ++ ++#define EXT3_EXT_MAGIC 0xf30a ++ ++/* ++ * array of ext3_ext_path contains path to some extent ++ * creation/lookup routines use it for traversal/splitting/etc ++ * truncate uses it to simulate recursive walking ++ */ ++struct ext3_ext_path { ++ __u32 p_block; ++ __u16 p_depth; ++ struct ext3_extent *p_ext; ++ struct ext3_extent_idx *p_idx; ++ struct ext3_extent_header *p_hdr; ++ struct buffer_head *p_bh; ++}; ++ ++/* ++ * structure for external API ++ */ ++ ++/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* ++ * ext3_extents_tree is used to pass initial information ++ * to top-level extents API ++ */ ++struct ext3_extents_helpers; ++struct ext3_extents_tree { ++ struct inode *inode; /* inode which tree belongs to */ ++ void *root; /* ptr to data top of tree resides at */ ++ void *buffer; /* will be passed as arg to ^^ routines */ ++ int buffer_len; ++ void *private; ++ struct ext3_ext_cache *cex;/* last found extent */ ++ struct ext3_extents_helpers *ops; ++}; ++ ++struct ext3_extents_helpers { ++ int (*get_write_access)(handle_t *h, void *buffer); ++ int (*mark_buffer_dirty)(handle_t *h, void *buffer); ++ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); ++ int (*remove_extent_credits)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*remove_extent)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*new_block)(handle_t *, struct ext3_extents_tree *, ++ struct ext3_ext_path *, struct ext3_extent *, ++ int *); ++}; ++ ++/* ++ * to be called by ext3_ext_walk_space() ++ * negative retcode - error ++ * positive retcode - signal for ext3_ext_walk_space(), see below ++ * callback must return valid extent (passed or newly created) ++ */ ++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, ++ struct ext3_ext_path *, ++ struct ext3_extent *, int); ++ ++#define EXT_CONTINUE 0 ++#define EXT_BREAK 1 ++#define EXT_REPEAT 2 ++ ++ ++#define EXT_MAX_BLOCK 0xffffffff ++#define EXT_CACHE_MARK 0xffff ++ ++ ++#define EXT_FIRST_EXTENT(__hdr__) \ ++ ((struct ext3_extent *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_FIRST_INDEX(__hdr__) \ ++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_HAS_FREE_INDEX(__path__) \ ++ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) ++#define EXT_LAST_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) ++#define EXT_LAST_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) ++#define EXT_MAX_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_MAX_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++ ++#define EXT_ROOT_HDR(tree) \ ++ ((struct ext3_extent_header *) (tree)->root) ++#define EXT_BLOCK_HDR(bh) \ ++ ((struct ext3_extent_header *) (bh)->b_data) ++#define EXT_DEPTH(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) ++#define EXT_GENERATION(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++ ++ ++#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); ++ ++ ++/* ++ * this structure is used to gather extents from the tree via ioctl ++ */ ++struct ext3_extent_buf { ++ unsigned long start; ++ int buflen; ++ void *buffer; ++ void *cur; ++ int err; ++}; ++ ++/* ++ * this structure is used to collect stats info about the tree ++ */ ++struct ext3_extent_tree_stats { ++ int depth; ++ int extents_num; ++ int leaf_num; ++}; ++ ++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); ++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); ++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); ++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); ++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); ++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); ++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); ++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); ++ ++static inline void ++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) ++{ ++ if (tree->cex) ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; ++} ++ ++ ++#endif /* _LINUX_EXT3_EXTENTS */ ++ +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:22:55.780145544 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 +@@ -128,6 +128,8 @@ + */ + struct semaphore truncate_sem; + struct inode vfs_inode; ++ ++ __u32 i_cached_extent[4]; + }; + + #endif /* _LINUX_EXT3_FS_I */ + +%diffstat + fs/ext3/Makefile | 2 + fs/ext3/extents.c | 2313 +++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/ialloc.c | 4 + fs/ext3/inode.c | 29 + fs/ext3/ioctl.c | 4 + fs/ext3/super.c | 17 + include/linux/ext3_extents.h | 252 ++++ + include/linux/ext3_fs.h | 15 + include/linux/ext3_fs_i.h | 2 + 9 files changed, 2630 insertions(+), 8 deletions(-) + diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch new file mode 100644 index 0000000..2408cc7 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -0,0 +1,1738 @@ +Index: linux-2.6.5-sles9/fs/ext3/mballoc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300 +@@ -0,0 +1,1428 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++ ++/* ++ * mballoc.c contains the multiblocks allocation routines ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * TODO: ++ * - do not scan from the beginning, try to remember first free block ++ * - mb_mark_used_* may allocate chunk right after splitting buddy ++ * - special flag to advice allocator to look for requested + N blocks ++ * this may improve interaction between extents and mballoc ++ */ ++ ++/* ++ * with AGRESSIVE_CHECK allocator runs consistency checks over ++ * structures. this checks slow things down a lot ++ */ ++#define AGGRESSIVE_CHECK__ ++ ++/* ++ */ ++#define MB_DEBUG__ ++#ifdef MB_DEBUG ++#define mb_debug(fmt,a...) printk(fmt, ##a) ++#else ++#define mb_debug(fmt,a...) ++#endif ++ ++/* ++ * where to save buddies structures beetween umount/mount (clean case only) ++ */ ++#define EXT3_BUDDY_FILE ".buddy" ++ ++/* ++ * max. number of chunks to be tracked in ext3_free_extent struct ++ */ ++#define MB_ARR_SIZE 32 ++ ++struct ext3_allocation_context { ++ struct super_block *ac_sb; ++ ++ /* search goals */ ++ int ac_g_group; ++ int ac_g_start; ++ int ac_g_len; ++ int ac_g_flags; ++ ++ /* the best found extent */ ++ int ac_b_group; ++ int ac_b_start; ++ int ac_b_len; ++ ++ /* number of iterations done. we have to track to limit searching */ ++ int ac_repeats; ++ int ac_groups_scanned; ++ int ac_status; ++}; ++ ++#define AC_STATUS_CONTINUE 1 ++#define AC_STATUS_FOUND 2 ++ ++ ++struct ext3_buddy { ++ void *bd_bitmap; ++ void *bd_buddy; ++ int bd_blkbits; ++ struct buffer_head *bd_bh; ++ struct buffer_head *bd_bh2; ++ struct ext3_buddy_group_blocks *bd_bd; ++ struct super_block *bd_sb; ++}; ++ ++struct ext3_free_extent { ++ int fe_start; ++ int fe_len; ++ unsigned char fe_orders[MB_ARR_SIZE]; ++ unsigned char fe_nums; ++ unsigned char fe_back; ++}; ++ ++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) ++ ++ ++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); ++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); ++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long); ++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); ++int ext3_mb_reserve_blocks(struct super_block *, int); ++void ext3_mb_release_blocks(struct super_block *, int); ++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); ++void ext3_mb_free_committed_blocks(struct super_block *); ++ ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ if ((unsigned) addr & 1) { \ ++ bit += 8; \ ++ addr--; \ ++ } \ ++ if ((unsigned) addr & 2) { \ ++ bit += 16; \ ++ addr--; \ ++ addr--; \ ++ } \ ++} ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ set_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ clear_bit(bit, addr); ++} ++ ++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) ++{ ++ int i = 1; ++ void *bb; ++ ++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); ++ J_ASSERT(max != NULL); ++ ++ if (order > e3b->bd_blkbits + 1) ++ return NULL; ++ ++ /* at order 0 we see each particular block */ ++ *max = 1 << (e3b->bd_blkbits + 3); ++ if (order == 0) ++ return e3b->bd_bitmap; ++ ++ bb = e3b->bd_buddy; ++ *max = *max >> 1; ++ while (i < order) { ++ bb += 1 << (e3b->bd_blkbits - i); ++ i++; ++ *max = *max >> 1; ++ } ++ return bb; ++} ++ ++static int ext3_mb_load_desc(struct super_block *sb, int group, ++ struct ext3_buddy *e3b) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap); ++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy); ++ ++ /* load bitmap */ ++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap); ++ if (e3b->bd_bh == NULL) { ++ ext3_error(sb, "ext3_mb_load_desc", ++ "can't get block for buddy bitmap\n"); ++ goto out; ++ } ++ if (!buffer_uptodate(e3b->bd_bh)) { ++ ll_rw_block(READ, 1, &e3b->bd_bh); ++ wait_on_buffer(e3b->bd_bh); ++ } ++ J_ASSERT(buffer_uptodate(e3b->bd_bh)); ++ ++ /* load buddy */ ++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy); ++ if (e3b->bd_bh2 == NULL) { ++ ext3_error(sb, "ext3_mb_load_desc", ++ "can't get block for buddy bitmap\n"); ++ goto out; ++ } ++ if (!buffer_uptodate(e3b->bd_bh2)) { ++ ll_rw_block(READ, 1, &e3b->bd_bh2); ++ wait_on_buffer(e3b->bd_bh2); ++ } ++ J_ASSERT(buffer_uptodate(e3b->bd_bh2)); ++ ++ e3b->bd_bitmap = e3b->bd_bh->b_data; ++ e3b->bd_buddy = e3b->bd_bh2->b_data; ++ e3b->bd_blkbits = sb->s_blocksize_bits; ++ e3b->bd_bd = sbi->s_buddy_blocks + group; ++ e3b->bd_sb = sb; ++ ++ return 0; ++out: ++ brelse(e3b->bd_bh); ++ brelse(e3b->bd_bh2); ++ e3b->bd_bh = NULL; ++ e3b->bd_bh2 = NULL; ++ return -EIO; ++} ++ ++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b) ++{ ++ mark_buffer_dirty(e3b->bd_bh); ++ mark_buffer_dirty(e3b->bd_bh2); ++} ++ ++static void ext3_mb_release_desc(struct ext3_buddy *e3b) ++{ ++ brelse(e3b->bd_bh); ++ brelse(e3b->bd_bh2); ++} ++ ++#ifdef AGGRESSIVE_CHECK ++static void mb_check_buddy(struct ext3_buddy *e3b) ++{ ++ int order = e3b->bd_blkbits + 1; ++ int max, max2, i, j, k, count; ++ void *buddy, *buddy2; ++ ++ if (!test_opt(e3b->bd_sb, MBALLOC)) ++ return; ++ ++ while (order > 1) { ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ buddy2 = mb_find_buddy(e3b, order - 1, &max2); ++ J_ASSERT(buddy2); ++ J_ASSERT(buddy != buddy2); ++ J_ASSERT(max * 2 == max2); ++ ++ count = 0; ++ for (i = 0; i < max; i++) { ++ ++ if (!mb_test_bit(i, buddy)) { ++ /* only single bit in buddy2 may be 1 */ ++ if (mb_test_bit(i << 1, buddy2)) ++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2)); ++ else if (mb_test_bit((i << 1) + 1, buddy2)) ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ continue; ++ } ++ ++ /* both bits in buddy2 must be 0 */ ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2)); ++ ++ for (j = 0; j < (1 << order); j++) { ++ k = (i * (1 << order)) + j; ++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap)); ++ } ++ count++; ++ } ++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count); ++ order--; ++ } ++ ++ buddy = mb_find_buddy(e3b, 0, &max); ++ for (i = 0; i < max; i++) { ++ if (mb_test_bit(i, buddy)) ++ continue; ++ /* check used bits only */ ++ for (j = 0; j < e3b->bd_blkbits + 1; j++) { ++ buddy2 = mb_find_buddy(e3b, j, &max2); ++ k = i >> j; ++ J_ASSERT(k < max2); ++ J_ASSERT(!mb_test_bit(k, buddy2)); ++ } ++ } ++} ++#else ++#define mb_check_buddy(e3b) ++#endif ++ ++static inline void ++ext3_lock_group(struct super_block *sb, int group) ++{ ++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); ++} ++ ++static inline void ++ext3_unlock_group(struct super_block *sb, int group) ++{ ++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); ++} ++ ++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) ++{ ++ int order = 1; ++ void *bb; ++ ++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); ++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); ++ ++ bb = e3b->bd_buddy; ++ while (order <= e3b->bd_blkbits + 1) { ++ block = block >> 1; ++ if (mb_test_bit(block, bb)) { ++ /* this block is part of buddy of order 'order' */ ++ return order; ++ } ++ bb += 1 << (e3b->bd_blkbits - order); ++ order++; ++ } ++ return 0; ++} ++ ++static inline void mb_clear_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0; ++ cur += 32; ++ continue; ++ } ++ mb_clear_bit(cur, bm); ++ cur++; ++ } ++} ++ ++static inline void mb_set_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0xffffffff; ++ cur += 32; ++ continue; ++ } ++ mb_set_bit(cur, bm); ++ cur++; ++ } ++} ++ ++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) ++{ ++ int block, max, order; ++ void *buddy, *buddy2; ++ ++ mb_check_buddy(e3b); ++ while (count-- > 0) { ++ block = first++; ++ order = 0; ++ ++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap)); ++ mb_set_bit(block, e3b->bd_bitmap); ++ e3b->bd_bd->bb_counters[order]++; ++ ++ /* start of the buddy */ ++ buddy = mb_find_buddy(e3b, order, &max); ++ ++ do { ++ block &= ~1UL; ++ if (!mb_test_bit(block, buddy) || ++ !mb_test_bit(block + 1, buddy)) ++ break; ++ ++ /* both the buddies are free, try to coalesce them */ ++ buddy2 = mb_find_buddy(e3b, order + 1, &max); ++ ++ if (!buddy2) ++ break; ++ ++ if (order > 0) { ++ /* for special purposes, we don't clear ++ * free bits in bitmap */ ++ mb_clear_bit(block, buddy); ++ mb_clear_bit(block + 1, buddy); ++ } ++ e3b->bd_bd->bb_counters[order]--; ++ e3b->bd_bd->bb_counters[order]--; ++ ++ block = block >> 1; ++ order++; ++ e3b->bd_bd->bb_counters[order]++; ++ ++ mb_set_bit(block, buddy2); ++ buddy = buddy2; ++ } while (1); ++ } ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++/* ++ * returns 1 if out extent is enough to fill needed space ++ */ ++int mb_make_backward_extent(struct ext3_free_extent *in, ++ struct ext3_free_extent *out, int needed) ++{ ++ int i; ++ ++ J_ASSERT(in); ++ J_ASSERT(out); ++ J_ASSERT(in->fe_nums < MB_ARR_SIZE); ++ ++ out->fe_len = 0; ++ out->fe_start = in->fe_start + in->fe_len; ++ out->fe_nums = 0; ++ ++ /* for single-chunk extent we need not back order ++ * also, if an extent doesn't fill needed space ++ * then it makes no sense to try back order becase ++ * if we select this extent then it'll be use as is */ ++ if (in->fe_nums < 2 || in->fe_len < needed) ++ return 0; ++ ++ i = in->fe_nums - 1; ++ while (i >= 0 && out->fe_len < needed) { ++ out->fe_len += (1 << in->fe_orders[i]); ++ out->fe_start -= (1 << in->fe_orders[i]); ++ i--; ++ } ++ /* FIXME: in some situation fe_orders may be too small to hold ++ * all the buddies */ ++ J_ASSERT(out->fe_len >= needed); ++ ++ for (i++; i < in->fe_nums; i++) ++ out->fe_orders[out->fe_nums++] = in->fe_orders[i]; ++ J_ASSERT(out->fe_nums < MB_ARR_SIZE); ++ out->fe_back = 1; ++ ++ return 1; ++} ++ ++int mb_find_extent(struct ext3_buddy *e3b, int order, int block, ++ int needed, struct ext3_free_extent *ex) ++{ ++ int space = needed; ++ int next, max, ord; ++ void *buddy; ++ ++ J_ASSERT(ex != NULL); ++ ++ ex->fe_nums = 0; ++ ex->fe_len = 0; ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ J_ASSERT(block < max); ++ if (!mb_test_bit(block, buddy)) ++ goto nofree; ++ ++ if (order == 0) { ++ /* find actual order */ ++ order = mb_find_order_for_block(e3b, block); ++ block = block >> order; ++ } ++ ++ ex->fe_orders[ex->fe_nums++] = order; ++ ex->fe_len = 1 << order; ++ ex->fe_start = block << order; ++ ex->fe_back = 0; ++ ++ while ((space = space - (1 << order)) > 0) { ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ ++ if (block + 1 >= max) ++ break; ++ ++ next = (block + 1) * (1 << order); ++ if (!mb_test_bit(next, e3b->bd_bitmap)) ++ break; ++ ++ ord = mb_find_order_for_block(e3b, next); ++ ++ if ((1 << ord) >= needed) { ++ /* we dont want to coalesce with self-enough buddies */ ++ break; ++ } ++ order = ord; ++ block = next >> order; ++ ex->fe_len += 1 << order; ++ ++ if (ex->fe_nums < MB_ARR_SIZE) ++ ex->fe_orders[ex->fe_nums++] = order; ++ } ++ ++nofree: ++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); ++ return ex->fe_len; ++} ++ ++static int mb_mark_used_backward(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int start = ex->fe_start, len0 = len; ++ int ord, mlen, max, cur; ++ void *buddy; ++ ++ start = ex->fe_start + ex->fe_len - 1; ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) && ++ len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ J_ASSERT((start >> ord) < max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ start -= mlen; ++ len -= mlen; ++ J_ASSERT(len >= 0); ++ J_ASSERT(start >= 0); ++ continue; ++ } ++ ++ /* we have to split large buddy */ ++ J_ASSERT(ord > 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); ++ e3b->bd_bd->bb_counters[ord]++; ++ e3b->bd_bd->bb_counters[ord]++; ++ } ++ ++ /* now drop all the bits in bitmap */ ++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0); ++ ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++static int mb_mark_used_forward(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int start = ex->fe_start, len0 = len; ++ int ord, mlen, max, cur; ++ void *buddy; ++ ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ ++ if (((start >> ord) << ord) == start && len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ J_ASSERT((start >> ord) < max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ start += mlen; ++ len -= mlen; ++ J_ASSERT(len >= 0); ++ continue; ++ } ++ ++ /* we have to split large buddy */ ++ J_ASSERT(ord > 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); ++ e3b->bd_bd->bb_counters[ord]++; ++ e3b->bd_bd->bb_counters[ord]++; ++ } ++ ++ /* now drop all the bits in bitmap */ ++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0); ++ ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++int inline mb_mark_used(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int err; ++ ++ J_ASSERT(ex); ++ if (ex->fe_back == 0) ++ err = mb_mark_used_forward(e3b, ex, len); ++ else ++ err = mb_mark_used_backward(e3b, ex, len); ++ return err; ++} ++ ++int ext3_mb_new_in_group(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b, int group) ++{ ++ struct super_block *sb = ac->ac_sb; ++ int err, gorder, max, i; ++ struct ext3_free_extent curex; ++ ++ /* let's know order of allocation */ ++ gorder = 0; ++ while (ac->ac_g_len > (1 << gorder)) ++ gorder++; ++ ++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { ++ /* someone asks for space at this specified block ++ * probably he wants to merge it into existing extent */ ++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) { ++ /* good. at least one block is free */ ++ max = mb_find_extent(e3b, 0, ac->ac_g_start, ++ ac->ac_g_len, &curex); ++ max = min(curex.fe_len, ac->ac_g_len); ++ mb_mark_used(e3b, &curex, max); ++ ++ ac->ac_b_group = group; ++ ac->ac_b_start = curex.fe_start; ++ ac->ac_b_len = max; ++ ac->ac_status = AC_STATUS_FOUND; ++ err = 0; ++ goto out; ++ } ++ /* don't try to find goal anymore */ ++ ac->ac_g_flags &= ~1; ++ } ++ ++ i = 0; ++ while (1) { ++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i); ++ if (i >= sb->s_blocksize * 8) ++ break; ++ ++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex); ++ if (max >= ac->ac_g_len) { ++ max = min(curex.fe_len, ac->ac_g_len); ++ mb_mark_used(e3b, &curex, max); ++ ++ ac->ac_b_group = group; ++ ac->ac_b_start = curex.fe_start; ++ ac->ac_b_len = max; ++ ac->ac_status = AC_STATUS_FOUND; ++ break; ++ } ++ i += max; ++ } ++ ++ return 0; ++ ++out: ++ return err; ++} ++ ++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr) ++{ ++ struct ext3_group_desc *gdp; ++ int free_blocks; ++ ++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL); ++ if (!gdp) ++ return 0; ++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); ++ if (free_blocks == 0) ++ return 0; ++ ++ /* someone wants this block very much */ ++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) ++ return 1; ++ ++ /* FIXME: I'd like to take fragmentation into account here */ ++ if (cr == 0) { ++ if (free_blocks >= ac->ac_g_len >> 1) ++ return 1; ++ } else if (cr == 1) { ++ if (free_blocks >= ac->ac_g_len >> 2) ++ return 1; ++ } else if (cr == 2) { ++ return 1; ++ } else { ++ BUG(); ++ } ++ return 0; ++} ++ ++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, ++ unsigned long goal, int *len, int flags, int *errp) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_allocation_context ac; ++ int i, group, block, cr, err = 0; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ struct buffer_head *gdp_bh; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ ++ J_ASSERT(len != NULL); ++ J_ASSERT(*len > 0); ++ ++ sb = inode->i_sb; ++ if (!sb) { ++ printk("ext3_mb_new_nblocks: nonexistent device"); ++ return 0; ++ } ++ ++ if (!test_opt(sb, MBALLOC)) { ++ static int ext3_mballoc_warning = 0; ++ if (ext3_mballoc_warning == 0) { ++ printk(KERN_ERR "EXT3-fs: multiblock request with " ++ "mballoc disabled!\n"); ++ ext3_mballoc_warning++; ++ } ++ *len = 1; ++ err = ext3_new_block_old(handle, inode, goal, errp); ++ return err; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ ++ if (!(flags & 2)) { ++ /* someone asks for non-reserved blocks */ ++ BUG_ON(*len > 1); ++ err = ext3_mb_reserve_blocks(sb, 1); ++ if (err) { ++ *errp = err; ++ return 0; ++ } ++ } ++ ++ /* ++ * Check quota for allocation of this blocks. ++ */ ++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) ++ *len -= 1; ++ if (*len == 0) { ++ *errp = -EDQUOT; ++ block = 0; ++ goto out; ++ } ++ ++ /* start searching from the goal */ ++ if (goal < le32_to_cpu(es->s_first_data_block) || ++ goal >= le32_to_cpu(es->s_blocks_count)) ++ goal = le32_to_cpu(es->s_first_data_block); ++ group = (goal - le32_to_cpu(es->s_first_data_block)) / ++ EXT3_BLOCKS_PER_GROUP(sb); ++ block = ((goal - le32_to_cpu(es->s_first_data_block)) % ++ EXT3_BLOCKS_PER_GROUP(sb)); ++ ++ /* set up allocation goals */ ++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0; ++ ac.ac_status = 0; ++ ac.ac_groups_scanned = 0; ++ ac.ac_sb = inode->i_sb; ++ ac.ac_g_group = group; ++ ac.ac_g_start = block; ++ ac.ac_g_len = *len; ++ ac.ac_g_flags = flags; ++ ++ /* loop over the groups */ ++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) { ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { ++ if (group == EXT3_SB(sb)->s_groups_count) ++ group = 0; ++ ++ /* check is group good for our criteries */ ++ if (!mb_good_group(&ac, group, cr)) ++ continue; ++ ++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b); ++ if (err) ++ goto out_err; ++ ++ ext3_lock_group(sb, group); ++ if (!mb_good_group(&ac, group, cr)) { ++ /* someone did allocation from this group */ ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ continue; ++ } ++ ++ err = ext3_mb_new_in_group(&ac, &e3b, group); ++ ext3_unlock_group(sb, group); ++ if (ac.ac_status == AC_STATUS_FOUND) ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ if (err) ++ goto out_err; ++ if (ac.ac_status == AC_STATUS_FOUND) ++ break; ++ } ++ } ++ ++ if (ac.ac_status != AC_STATUS_FOUND) { ++ /* unfortunately, we can't satisfy this request */ ++ J_ASSERT(ac.ac_b_len == 0); ++ DQUOT_FREE_BLOCK(inode, *len); ++ *errp = -ENOSPC; ++ block = 0; ++ goto out; ++ } ++ ++ /* good news - free block(s) have been found. now it's time ++ * to mark block(s) in good old journaled bitmap */ ++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block); ++ ++ /* we made a desicion, now mark found blocks in good old ++ * bitmap to be journaled */ ++ ++ ext3_debug("using block group %d(%d)\n", ++ ac.ac_b_group.group, gdp->bg_free_blocks_count); ++ ++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group); ++ if (!bitmap_bh) { ++ *errp = -EIO; ++ goto out_err; ++ } ++ ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) { ++ *errp = err; ++ goto out_err; ++ } ++ ++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh); ++ if (!gdp) { ++ *errp = -EIO; ++ goto out_err; ++ } ++ ++ err = ext3_journal_get_write_access(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + le32_to_cpu(es->s_first_data_block); ++ ++ if (block == le32_to_cpu(gdp->bg_block_bitmap) || ++ block == le32_to_cpu(gdp->bg_inode_bitmap) || ++ in_range(block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error(sb, "ext3_new_block", ++ "Allocating block in system zone - " ++ "block = %u", block); ++#if 0 ++ for (i = 0; i < ac.ac_b_len; i++) ++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); ++#endif ++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); ++ ++ ext3_lock_group(sb, ac.ac_b_group); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - ++ ac.ac_b_len); ++ ext3_unlock_group(sb, ac.ac_b_group); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len); ++ ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ if (err) ++ goto out_err; ++ err = ext3_journal_dirty_metadata(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ sb->s_dirt = 1; ++ *errp = 0; ++ brelse(bitmap_bh); ++ ++ /* drop non-allocated, but dquote'd blocks */ ++ J_ASSERT(*len >= ac.ac_b_len); ++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len); ++ ++ *len = ac.ac_b_len; ++ J_ASSERT(block != 0); ++ goto out; ++ ++out_err: ++ /* if we've already allocated something, roll it back */ ++ if (ac.ac_status == AC_STATUS_FOUND) { ++ /* FIXME: free blocks here */ ++ } ++ ++ DQUOT_FREE_BLOCK(inode, *len); ++ brelse(bitmap_bh); ++ *errp = err; ++ block = 0; ++out: ++ if (!(flags & 2)) { ++ /* block wasn't reserved before and we reserved it ++ * at the beginning of allocation. it doesn't matter ++ * whether we allocated anything or we failed: time ++ * to release reservation. NOTE: because I expect ++ * any multiblock request from delayed allocation ++ * path only, here is single block always */ ++ ext3_mb_release_blocks(sb, 1); ++ } ++ return block; ++} ++ ++int ext3_mb_generate_buddy(struct super_block *sb, int group) ++{ ++ struct buffer_head *bh; ++ int i, err, count = 0; ++ struct ext3_buddy e3b; ++ ++ err = ext3_mb_load_desc(sb, group, &e3b); ++ if (err) ++ goto out; ++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize); ++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize); ++ ++ bh = read_block_bitmap(sb, group); ++ if (bh == NULL) { ++ err = -EIO; ++ goto out2; ++ } ++ ++ /* loop over the blocks, nad create buddies for free ones */ ++ for (i = 0; i < sb->s_blocksize * 8; i++) { ++ if (!mb_test_bit(i, (void *) bh->b_data)) { ++ mb_free_blocks(&e3b, i, 1); ++ count++; ++ } ++ } ++ brelse(bh); ++ mb_check_buddy(&e3b); ++ ext3_mb_dirty_buddy(&e3b); ++ ++out2: ++ ext3_mb_release_desc(&e3b); ++out: ++ return err; ++} ++ ++EXPORT_SYMBOL(ext3_mb_new_blocks); ++ ++#define MB_CREDITS \ ++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \ ++ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS) ++ ++int ext3_mb_init_backend(struct super_block *sb) ++{ ++ struct inode *root = sb->s_root->d_inode; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct dentry *db; ++ tid_t target; ++ int err, i; ++ ++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) * ++ sbi->s_groups_count, GFP_KERNEL); ++ if (sbi->s_buddy_blocks == NULL) { ++ printk("can't allocate mem for buddy maps\n"); ++ return -ENOMEM; ++ } ++ memset(sbi->s_buddy_blocks, 0, ++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count); ++ sbi->s_buddy = NULL; ++ ++ down(&root->i_sem); ++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root, ++ strlen(EXT3_BUDDY_FILE)); ++ if (IS_ERR(db)) { ++ err = PTR_ERR(db); ++ printk("can't lookup buddy file: %d\n", err); ++ goto out; ++ } ++ ++ if (db->d_inode != NULL) { ++ sbi->s_buddy = igrab(db->d_inode); ++ goto map; ++ } ++ ++ err = ext3_create(root, db, S_IFREG, NULL); ++ if (err) { ++ printk("error while creation buddy file: %d\n", err); ++ } else { ++ sbi->s_buddy = igrab(db->d_inode); ++ } ++ ++map: ++ for (i = 0; i < sbi->s_groups_count; i++) { ++ struct buffer_head *bh = NULL; ++ handle_t *handle; ++ ++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out2; ++ } ++ ++ /* allocate block for bitmap */ ++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err); ++ if (bh == NULL) { ++ printk("can't get block for buddy bitmap: %d\n", err); ++ goto out2; ++ } ++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr; ++ brelse(bh); ++ ++ /* allocate block for buddy */ ++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err); ++ if (bh == NULL) { ++ printk("can't get block for buddy: %d\n", err); ++ goto out2; ++ } ++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr; ++ brelse(bh); ++ ext3_journal_stop(handle); ++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock); ++ sbi->s_buddy_blocks[i].bb_md_cur = NULL; ++ sbi->s_buddy_blocks[i].bb_tid = 0; ++ } ++ ++ if (journal_start_commit(sbi->s_journal, &target)) ++ log_wait_commit(sbi->s_journal, target); ++ ++out2: ++ dput(db); ++out: ++ up(&root->i_sem); ++ return err; ++} ++ ++int ext3_mb_release(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* release freed, non-committed blocks */ ++ spin_lock(&sbi->s_md_lock); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_committed_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ ext3_mb_free_committed_blocks(sb); ++ ++ if (sbi->s_buddy_blocks) ++ kfree(sbi->s_buddy_blocks); ++ if (sbi->s_buddy) ++ iput(sbi->s_buddy); ++ if (sbi->s_blocks_reserved) ++ printk("ext3-fs: %ld blocks being reserved at umount!\n", ++ sbi->s_blocks_reserved); ++ return 0; ++} ++ ++int ext3_mb_init(struct super_block *sb) ++{ ++ struct ext3_super_block *es; ++ int i; ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* init file for buddy data */ ++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); ++ ext3_mb_init_backend(sb); ++ ++ es = EXT3_SB(sb)->s_es; ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) ++ ext3_mb_generate_buddy(sb, i); ++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock); ++ spin_lock_init(&EXT3_SB(sb)->s_md_lock); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction); ++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); ++ printk("EXT3-fs: mballoc enabled\n"); ++ return 0; ++} ++ ++void ext3_mb_free_committed_blocks(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int err, i, count = 0, count2 = 0; ++ struct ext3_free_metadata *md; ++ struct ext3_buddy e3b; ++ ++ if (list_empty(&sbi->s_committed_transaction)) ++ return; ++ ++ /* there is committed blocks to be freed yet */ ++ do { ++ /* get next array of blocks */ ++ md = NULL; ++ spin_lock(&sbi->s_md_lock); ++ if (!list_empty(&sbi->s_committed_transaction)) { ++ md = list_entry(sbi->s_committed_transaction.next, ++ struct ext3_free_metadata, list); ++ list_del(&md->list); ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ if (md == NULL) ++ break; ++ ++ mb_debug("gonna free %u blocks in group %u (0x%p):", ++ md->num, md->group, md); ++ ++ err = ext3_mb_load_desc(sb, md->group, &e3b); ++ BUG_ON(err != 0); ++ ++ /* there are blocks to put in buddy to make them really free */ ++ count += md->num; ++ count2++; ++ ext3_lock_group(sb, md->group); ++ for (i = 0; i < md->num; i++) { ++ mb_debug(" %u", md->blocks[i]); ++ mb_free_blocks(&e3b, md->blocks[i], 1); ++ } ++ mb_debug("\n"); ++ ext3_unlock_group(sb, md->group); ++ ++ kfree(md); ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ ++ } while (md); ++ mb_debug("freed %u blocks in %u structures\n", count, count2); ++} ++ ++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (sbi->s_last_transaction == handle->h_transaction->t_tid) ++ return; ++ ++ /* new transaction! time to close last one and free blocks for ++ * committed transaction. we know that only transaction can be ++ * active, so previos transaction can be being logged and we ++ * know that transaction before previous is known to be alreade ++ * logged. this means that now we may free blocks freed in all ++ * transactions before previous one. hope I'm clear enough ... */ ++ ++ spin_lock(&sbi->s_md_lock); ++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { ++ mb_debug("new transaction %lu, old %lu\n", ++ (unsigned long) handle->h_transaction->t_tid, ++ (unsigned long) sbi->s_last_transaction); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_closed_transaction); ++ sbi->s_last_transaction = handle->h_transaction->t_tid; ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ ext3_mb_free_committed_blocks(sb); ++} ++ ++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, ++ int group, int block, int count) ++{ ++ struct ext3_buddy_group_blocks *db = e3b->bd_bd; ++ struct super_block *sb = e3b->bd_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct ext3_free_metadata *md; ++ int i; ++ ++ ext3_lock_group(sb, group); ++ for (i = 0; i < count; i++) { ++ md = db->bb_md_cur; ++ if (md && db->bb_tid != handle->h_transaction->t_tid) { ++ db->bb_md_cur = NULL; ++ md = NULL; ++ } ++ ++ if (md == NULL) { ++ ext3_unlock_group(sb, group); ++ md = kmalloc(sizeof(*md), GFP_KERNEL); ++ if (md == NULL) ++ return -ENOMEM; ++ md->num = 0; ++ md->group = group; ++ ++ ext3_lock_group(sb, group); ++ if (db->bb_md_cur == NULL) { ++ spin_lock(&sbi->s_md_lock); ++ list_add(&md->list, &sbi->s_active_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ db->bb_md_cur = md; ++ db->bb_tid = handle->h_transaction->t_tid; ++ mb_debug("new md 0x%p for group %u\n", ++ md, md->group); ++ } else { ++ kfree(md); ++ md = db->bb_md_cur; ++ } ++ } ++ ++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); ++ md->blocks[md->num] = block + i; ++ md->num++; ++ if (md->num == EXT3_BB_MAX_BLOCKS) { ++ /* no more space, put full container on a sb's list */ ++ db->bb_md_cur = NULL; ++ } ++ } ++ ext3_unlock_group(sb, group); ++ return 0; ++} ++ ++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ unsigned long bit, overflow; ++ struct buffer_head *gd_bh; ++ unsigned long block_group; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ int err = 0, ret; ++ ++ sb = inode->i_sb; ++ if (!sb) { ++ printk ("ext3_free_blocks: nonexistent device"); ++ return; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ if (block < le32_to_cpu(es->s_first_data_block) || ++ block + count < block || ++ block + count > le32_to_cpu(es->s_blocks_count)) { ++ ext3_error (sb, "ext3_free_blocks", ++ "Freeing blocks not in datazone - " ++ "block = %lu, count = %lu", block, count); ++ goto error_return; ++ } ++ ++ ext3_debug("freeing block %lu\n", block); ++ ++do_more: ++ overflow = 0; ++ block_group = (block - le32_to_cpu(es->s_first_data_block)) / ++ EXT3_BLOCKS_PER_GROUP(sb); ++ bit = (block - le32_to_cpu(es->s_first_data_block)) % ++ EXT3_BLOCKS_PER_GROUP(sb); ++ /* ++ * Check to see if we are freeing blocks across a group ++ * boundary. ++ */ ++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { ++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); ++ count -= overflow; ++ } ++ brelse(bitmap_bh); ++ bitmap_bh = read_block_bitmap(sb, block_group); ++ if (!bitmap_bh) ++ goto error_return; ++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); ++ if (!gdp) ++ goto error_return; ++ ++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || ++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || ++ in_range (block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group) || ++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error (sb, "ext3_free_blocks", ++ "Freeing blocks in system zones - " ++ "Block = %lu, count = %lu", ++ block, count); ++ ++ BUFFER_TRACE(bitmap_bh, "getting write access"); ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) ++ goto error_return; ++ ++ /* ++ * We are about to modify some metadata. Call the journal APIs ++ * to unshare ->b_data if a currently-committing transaction is ++ * using it ++ */ ++ BUFFER_TRACE(gd_bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, gd_bh); ++ if (err) ++ goto error_return; ++ ++ err = ext3_mb_load_desc(sb, block_group, &e3b); ++ if (err) ++ goto error_return; ++ ++ if (metadata) { ++ /* blocks being freed are metadata. these blocks shouldn't ++ * be used until this transaction is committed */ ++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); ++ } else { ++ ext3_lock_group(sb, block_group); ++ mb_free_blocks(&e3b, bit, count); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ ext3_unlock_group(sb, block_group); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, count); ++ } ++ ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ ++ /* FIXME: undo logic will be implemented later and another way */ ++ mb_clear_bits(bitmap_bh->b_data, bit, count); ++ DQUOT_FREE_BLOCK(inode, count); ++ ++ /* We dirtied the bitmap block */ ++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ ++ /* And the group descriptor block */ ++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); ++ ret = ext3_journal_dirty_metadata(handle, gd_bh); ++ if (!err) err = ret; ++ ++ if (overflow && !err) { ++ block += count; ++ count = overflow; ++ goto do_more; ++ } ++ sb->s_dirt = 1; ++error_return: ++ brelse(bitmap_bh); ++ ext3_std_error(sb, err); ++ return; ++} ++ ++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int free, ret = -ENOSPC; ++ ++ BUG_ON(blocks < 0); ++ spin_lock(&sbi->s_reserve_lock); ++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); ++ if (blocks <= free - sbi->s_blocks_reserved) { ++ sbi->s_blocks_reserved += blocks; ++ ret = 0; ++ } ++ spin_unlock(&sbi->s_reserve_lock); ++ return ret; ++} ++ ++void ext3_mb_release_blocks(struct super_block *sb, int blocks) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ BUG_ON(blocks < 0); ++ spin_lock(&sbi->s_reserve_lock); ++ sbi->s_blocks_reserved -= blocks; ++ WARN_ON(sbi->s_blocks_reserved < 0); ++ if (sbi->s_blocks_reserved < 0) ++ sbi->s_blocks_reserved = 0; ++ spin_unlock(&sbi->s_reserve_lock); ++} ++ ++int ext3_new_block(handle_t *handle, struct inode *inode, ++ unsigned long goal, int *errp) ++{ ++ int ret, len; ++ ++ if (!test_opt(inode->i_sb, MBALLOC)) { ++ ret = ext3_new_block_old(handle, inode, goal, errp); ++ goto out; ++ } ++ len = 1; ++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); ++out: ++ return ret; ++} ++ ++ ++void ext3_free_blocks(handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ if (!test_opt(inode->i_sb, MBALLOC)) ++ ext3_free_blocks_old(handle, inode, block, count); ++ else ++ ext3_mb_free_blocks(handle, inode, block, count, metadata); ++ return; ++} ++ +Index: linux-2.6.5-sles9/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300 +@@ -389,6 +389,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_mb_release(sb); + ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); +@@ -542,7 +543,7 @@ + Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, +- Opt_err, Opt_extents, Opt_extdebug ++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc, + }; + + static match_table_t tokens = { +@@ -589,6 +590,7 @@ + {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_extents, "extents"}, + {Opt_extdebug, "extdebug"}, ++ {Opt_mballoc, "mballoc"}, + {Opt_err, NULL} + }; + +@@ -810,6 +812,9 @@ + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt (sbi->s_mount_opt, MBALLOC); ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1463,7 +1468,8 @@ + ext3_count_dirs(sb)); + + ext3_ext_init(sb); +- ++ ext3_mb_init(sb); ++ + return 0; + + failed_mount3: +Index: linux-2.6.5-sles9/fs/ext3/Makefile +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o extents.o ++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.5-sles9/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 +@@ -78,7 +78,7 @@ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -274,7 +274,7 @@ + } + + /* Free given blocks, update quota and i_blocks field */ +-void ext3_free_blocks(handle_t *handle, struct inode *inode, ++void ext3_free_blocks_old(handle_t *handle, struct inode *inode, + unsigned long block, unsigned long count) + { + struct buffer_head *bitmap_bh = NULL; +@@ -1142,7 +1142,7 @@ + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) + { + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.5-sles9/fs/ext3/namei.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300 ++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300 +@@ -1640,7 +1640,7 @@ + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, ++int ext3_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) + { + handle_t *handle; +Index: linux-2.6.5-sles9/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300 +@@ -572,7 +572,7 @@ + ext3_journal_forget(handle, branch[i].bh); + } + for (i = 0; i < keys; i++) +- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); ++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); + return err; + } + +@@ -673,7 +673,7 @@ + if (err == -EAGAIN) + for (i = 0; i < num; i++) + ext3_free_blocks(handle, inode, +- le32_to_cpu(where[i].key), 1); ++ le32_to_cpu(where[i].key), 1, 1); + return err; + } + +@@ -1829,7 +1829,7 @@ + } + } + +- ext3_free_blocks(handle, inode, block_to_free, count); ++ ext3_free_blocks(handle, inode, block_to_free, count, 1); + } + + /** +@@ -2000,7 +2000,7 @@ + ext3_journal_test_restart(handle, inode); + } + +- ext3_free_blocks(handle, inode, nr, 1); ++ ext3_free_blocks(handle, inode, nr, 1, 1); + + if (parent_bh) { + /* +Index: linux-2.6.5-sles9/fs/ext3/extents.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300 +@@ -740,7 +740,7 @@ + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1391,7 +1391,7 @@ + path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); +- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); + return err; + } + +@@ -1879,10 +1879,12 @@ + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); ++ if (S_ISDIR(tree->inode->i_mode)) ++ metadata = 1; + if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { + /* tail removal */ + unsigned long num, start; +@@ -1894,7 +1896,7 @@ + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); + } +- ext3_free_blocks(handle, tree->inode, start, num); ++ ext3_free_blocks(handle, tree->inode, start, num, metadata); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, ex->ee_block, ex->ee_len); +Index: linux-2.6.5-sles9/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300 +@@ -1366,7 +1366,7 @@ + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +@@ -1408,7 +1408,7 @@ + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { + /* Free the old block. */ + ea_bdebug(old_bh, "freeing"); +- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); + + /* ext3_forget() calls bforget() for us, but we + let our caller release old_bh, so we need to +@@ -1504,7 +1504,7 @@ + lock_buffer(bh); + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + ext3_xattr_cache_remove(bh); +- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); ++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); + } else { +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300 +@@ -57,6 +57,8 @@ + #define ext3_debug(f, a...) do {} while (0) + #endif + ++#define EXT3_MULTIBLOCK_ALLOCATOR 1 ++ + /* + * Special inodes numbers + */ +@@ -339,6 +341,7 @@ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -698,7 +701,7 @@ + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); + extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, +- unsigned long); ++ unsigned long, int); + extern unsigned long ext3_count_free_blocks (struct super_block *); + extern void ext3_check_blocks_bitmap (struct super_block *); + extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 +@@ -23,10 +23,30 @@ + #define EXT_INCLUDE + #include + #include ++#include + #endif + #endif + #include + ++#define EXT3_BB_MAX_BLOCKS 30 ++struct ext3_free_metadata { ++ unsigned short group; ++ unsigned short num; ++ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; ++ struct list_head list; ++}; ++ ++#define EXT3_BB_MAX_ORDER 14 ++ ++struct ext3_buddy_group_blocks { ++ sector_t bb_bitmap; ++ sector_t bb_buddy; ++ spinlock_t bb_lock; ++ unsigned bb_counters[EXT3_BB_MAX_ORDER]; ++ struct ext3_free_metadata *bb_md_cur; ++ unsigned long bb_tid; ++}; ++ + /* + * third extended-fs super-block data in memory + */ +@@ -78,6 +98,17 @@ + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ + #endif ++ ++ /* for buddy allocator */ ++ struct ext3_buddy_group_blocks *s_buddy_blocks; ++ struct inode *s_buddy; ++ long s_blocks_reserved; ++ spinlock_t s_reserve_lock; ++ struct list_head s_active_transaction; ++ struct list_head s_closed_transaction; ++ struct list_head s_committed_transaction; ++ spinlock_t s_md_lock; ++ tid_t s_last_transaction; + }; + + #endif /* _LINUX_EXT3_FS_SB */ diff --git a/ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch b/ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch new file mode 100644 index 0000000..b20be23 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-nlinks-2.6.7.patch @@ -0,0 +1,170 @@ +Index: linux-2.6.7/fs/ext3/namei.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/namei.c 2004-06-15 23:19:36.000000000 -0600 ++++ linux-2.6.7/fs/ext3/namei.c 2004-08-20 17:48:54.000000000 -0600 +@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t * + static inline void ext3_inc_count(handle_t *handle, struct inode *inode) + { + inode->i_nlink++; ++ if (is_dx(inode) && inode->i_nlink > 1) { ++ /* limit is 16-bit i_links_count */ ++ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) ++ inode->i_nlink = 1; ++ } + } + + static inline void ext3_dec_count(handle_t *handle, struct inode *inode) + { +- inode->i_nlink--; ++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) ++ inode->i_nlink--; + } + + static int ext3_add_nondir(handle_t *handle, +@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir + struct ext3_dir_entry_2 * de; + int err; + +- if (dir->i_nlink >= EXT3_LINK_MAX) ++ if (EXT3_DIR_LINK_MAXED(dir)) + return -EMLINK; + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + +@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir + inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { +- inode->i_nlink--; /* is this nlink == 0? */ ++ ext3_dec_count(handle, inode); /* is this nlink == 0? */ + ext3_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; +@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir + iput (inode); + goto out_stop; + } +- dir->i_nlink++; ++ ext3_inc_count(handle, dir); + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + d_instantiate(dentry, inode); +@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di + retval = ext3_delete_entry(handle, dir, de, bh); + if (retval) + goto end_rmdir; +- if (inode->i_nlink != 2) +- ext3_warning (inode->i_sb, "ext3_rmdir", +- "empty directory has nlink!=2 (%d)", +- inode->i_nlink); ++ if (!EXT3_DIR_LINK_EMPTY(inode)) ++ ext3_warning(inode->i_sb, "ext3_rmdir", ++ "empty directory has too many links (%d)", ++ inode->i_nlink); + inode->i_version++; + inode->i_nlink = 0; + /* There's no need to set i_disksize: the fact that i_nlink is +@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); +- dir->i_nlink--; ++ ext3_dec_count(handle, dir); + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + +@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); +- inode->i_nlink--; ++ ext3_dec_count(handle, inode); + if (!inode->i_nlink) + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime; +@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol + struct inode *inode = old_dentry->d_inode; + int err; + +- if (inode->i_nlink >= EXT3_LINK_MAX) ++ if (EXT3_DIR_LINK_MAXED(inode)) + return -EMLINK; + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + +@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o + if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; +- if (!new_inode && new_dir!=old_dir && +- new_dir->i_nlink >= EXT3_LINK_MAX) ++ if (!new_inode && new_dir != old_dir && ++ EXT3_DIR_LINK_MAXED(new_dir)) + goto end_rename; + } + if (!new_bh) { +@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o + } + + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + new_inode->i_ctime = CURRENT_TIME; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; +@@ -2299,11 +2304,11 @@ static int ext3_rename (struct inode * o + PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); + BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); + ext3_journal_dirty_metadata(handle, dir_bh); +- old_dir->i_nlink--; ++ ext3_dec_count(handle, old_dir); + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + } else { +- new_dir->i_nlink++; ++ ext3_inc_count(handle, new_dir); + ext3_update_dx_flag(new_dir); + ext3_mark_inode_dirty(handle, new_dir); + } +Index: linux-2.6.7/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-06-15 23:19:36.000000000 -0600 ++++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-20 17:41:27.000000000 -0600 +@@ -41,7 +41,7 @@ struct statfs; + /* + * Always enable hashed directories + */ +-#define CONFIG_EXT3_INDEX ++#define CONFIG_EXT3_INDEX 1 + + /* + * Debug code +@@ -79,7 +81,7 @@ + /* + * Maximal count of links to a file + */ +-#define EXT3_LINK_MAX 32000 ++#define EXT3_LINK_MAX 65000 + + /* + * Macro-instructions used to manage several block sizes +@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 { + */ + + #ifdef CONFIG_EXT3_INDEX +- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ +- EXT3_FEATURE_COMPAT_DIR_INDEX) && \ ++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ ++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) +-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) +-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) ++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \ ++ (is_dx(dir) && (dir)->i_nlink == 1)) + #else + #define is_dx(dir) 0 +-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) + #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) + #endif + diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch index 8a8d115..4a51eb8 100644 --- a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch @@ -8,8 +8,8 @@ Index: linux-stage/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-05-11 17:21:20.000000000 -0400 -+++ linux-stage/fs/ext3/Makefile 2004-05-11 17:21:21.000000000 -0400 +--- linux-stage.orig/fs/ext3/Makefile 2004-11-03 14:41:24.747805262 -0500 ++++ linux-stage/fs/ext3/Makefile 2004-11-03 14:41:25.123696274 -0500 @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -21,8 +21,8 @@ Index: linux-stage/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o Index: linux-stage/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400 -+++ linux-stage/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400 +--- linux-stage.orig/fs/ext3/inode.c 2004-11-03 14:41:25.040720333 -0500 ++++ linux-stage/fs/ext3/inode.c 2004-11-03 14:46:08.458515670 -0500 @@ -37,6 +37,7 @@ #include #include @@ -31,20 +31,20 @@ Index: linux-stage/fs/ext3/inode.c #include "acl.h" /* -@@ -2472,6 +2473,9 @@ - ei->i_acl = EXT3_ACL_NOT_CACHED; +@@ -2401,6 +2402,9 @@ ei->i_default_acl = EXT3_ACL_NOT_CACHED; #endif -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; ++ ++ if (ext3_iopen_get_inode(inode)) ++ return; + if (ext3_get_inode_loc(inode, &iloc, 0)) goto bad_inode; - bh = iloc.bh; Index: linux-stage/fs/ext3/iopen.c =================================================================== --- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.c 2004-05-11 17:21:21.000000000 -0400 ++++ linux-stage/fs/ext3/iopen.c 2004-11-03 14:41:25.125695694 -0500 @@ -0,0 +1,272 @@ +/* + * linux/fs/ext3/iopen.c @@ -321,7 +321,7 @@ Index: linux-stage/fs/ext3/iopen.c Index: linux-stage/fs/ext3/iopen.h =================================================================== --- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.h 2004-05-11 17:21:21.000000000 -0400 ++++ linux-stage/fs/ext3/iopen.h 2004-11-03 14:41:25.126695404 -0500 @@ -0,0 +1,15 @@ +/* + * iopen.h @@ -340,8 +340,8 @@ Index: linux-stage/fs/ext3/iopen.h + struct inode *inode, int rehash); Index: linux-stage/fs/ext3/namei.c =================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-05-11 17:21:20.000000000 -0400 -+++ linux-stage/fs/ext3/namei.c 2004-05-11 17:21:21.000000000 -0400 +--- linux-stage.orig/fs/ext3/namei.c 2004-11-03 14:41:24.957744391 -0500 ++++ linux-stage/fs/ext3/namei.c 2004-11-03 14:41:25.127695114 -0500 @@ -37,6 +37,7 @@ #include #include @@ -373,7 +373,7 @@ Index: linux-stage/fs/ext3/namei.c } -@@ -2019,10 +2021,6 @@ +@@ -2029,10 +2031,6 @@ inode->i_nlink); inode->i_version++; inode->i_nlink = 0; @@ -384,7 +384,7 @@ Index: linux-stage/fs/ext3/namei.c ext3_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; ext3_mark_inode_dirty(handle, inode); -@@ -2139,6 +2137,23 @@ +@@ -2152,6 +2150,23 @@ return err; } @@ -408,7 +408,7 @@ Index: linux-stage/fs/ext3/namei.c static int ext3_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { -@@ -2161,7 +2176,8 @@ +@@ -2175,7 +2190,8 @@ ext3_inc_count(handle, inode); atomic_inc(&inode->i_count); @@ -416,14 +416,14 @@ Index: linux-stage/fs/ext3/namei.c + err = ext3_add_link(handle, dentry, inode); + ext3_orphan_del(handle,inode); ext3_journal_stop(handle); - return err; - } + if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) + goto retry; Index: linux-stage/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-05-11 17:21:21.000000000 -0400 -+++ linux-stage/fs/ext3/super.c 2004-05-11 17:44:53.000000000 -0400 -@@ -536,7 +536,7 @@ - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload, +--- linux-stage.orig/fs/ext3/super.c 2004-11-03 14:41:25.043719463 -0500 ++++ linux-stage/fs/ext3/super.c 2004-11-03 14:41:25.129694535 -0500 +@@ -534,7 +534,7 @@ + Opt_reservation, Opt_noreservation, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_ignore, Opt_barrier, @@ -441,7 +441,7 @@ Index: linux-stage/fs/ext3/super.c {Opt_err, NULL} }; -@@ -772,6 +775,18 @@ +@@ -778,6 +781,18 @@ else clear_opt(sbi->s_mount_opt, BARRIER); break; diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 083b5b0..be83e1e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,5 +1,40 @@ -tbd Cluster File Systems, Inc. - * version 1.2.x +tbd Cluster File Systems, Inc. + * version 1.3.4 + * bug fixes + - fixes from lustre 1.2.8 + - print NAL number in %x format (4645) + - the watchdog thread now runs as interruptible (5246) + - drop import inflight refcount on signal_completed_replay error (5255) + * miscellania + - add pid to ldlm debugging output (4922) + +2004-10-08 Cluster File Systems, Inc. + * version 1.3.3 + * bug fixes + - properly handle portals process identifiers in messages (4165) + - finish default directory EA handling (3048) + - fixes from lustre 1.2.7 + - removed PTL_MD_KIOV usage under CRAY_PORTALS (4420) + - allow EADDRNOTAVAIL as retry for connect in liblustre tcpnal (4822) + +2004-09-16 Cluster File Systems, Inc. + * version 1.3.2 + * bug fixes + - many liblustre fixes + - fixes from lustre 1.2.6 + * miscellania + - update to new libsysio-head-0806 + - reorganization of lov code + +2004-08-30 Cluster File Systems, Inc. + * version 1.3.1 + * bug fixes + - add locking for mmapped files (2828) + - lmc/lconf changes to support multiple interfaces (3376) + - fixes from lustre 1.2.5 + +2004-08-14 Cluster File Systems, Inc. + * version 1.3.0 * bug fixes - don't dereference NULL peer_ni in ldlm_handle_ast_error (3258) - don't allow unlinking open directory if it isn't empty (2904) @@ -9,6 +44,137 @@ tbd Cluster File Systems, Inc. - chose better nal ids in liblustre (3292) - initialize liblustre with uid/group membership (2862) - let lconf resolve symlinked-to devices (4629) + - balance journal closure when 2.6 filter write fails (3401) + - add second rpc_lock and last_rcvd info for close reqs (3462) + - don't hold llog sem during network request (3652) + - update server last transno after client disconnects (2525) + - replace config semaphore with spinlock (3306) + - ext3 exents and multi-block allocation (3024) + - service time statistics in /proc + - minor fixes to liblustre build (3317) + - client recovery without upcall (3262) + - use transno after validating reply (3892) + - use different name for 2nd ptlrpcd thread (3887) + - get a client lock in ll_inode_revalidate_it (3597) + - direct IO reads on OST (4048) + - process timed out requests if import state changes (3754) + - ignore -ENOENT errors in osc_destroy (3639) + - fixes from lustre 1.2.0-1.2.4 + * miscellania + - use "CATALOGS" for the llog catalogs, not "CATLIST" (old) (b=2841) + - added kernel patch for /dev/sd I/O stats (4385) + +2004-11-16 Cluster File Systems, Inc. + * version 1.2.8 + * bug fixes + - fix TCP_NODELAY bug, which caused extreme perf regression (5134) + - allocate qswnal tx descriptors singly to avoid fragmentation (4504) + - don't LBUG on obdo_alloc() failure, use OBD_SLAB_ALLOC() (4800) + - fix NULL dereference in /proc/sys/portals/routes (4827) + - allow failed mdc_close() operations to be interrupted (4561) + - stop precreate on OST before MDS would time out on it (4778) + - don't free dentries not owned by NFS code, check generation (4806) + - fix lsm leak if mds_create_objects() fails (4801) + - limit debug_daemon file size, always print CERROR messages (4789) + - use transno after validating reply (3892) + - process timed out requests if import state changes (3754) + - update mtime on OST during writes, return in glimpse (4829) + - add mkfsoptions to LDAP (4679) + - use ->max_readahead method instead of zapping global ra (5039) + - don't interrupt __l_wait_event() during strace + * miscellania + - add software watchdogs to catch hung threads quickly (4941) + - make lustrefs init script start after nfs is mounted + - fix CWARN/ERROR duplication (4930) + - return async write errors to application if possible (2248) + - update barely-supported suse-2.4.21-171 series (4842) + - support for sles 9 %post scripts + - support for building 2.6 kernel-source packages + - support for sles km_* packages + +2004-10-07 Cluster File Systems, Inc. + * version 1.2.7 + * bug fixes + - ignore -ENOENT errors in osc_destroy (3639) + - notify osc create thread that OSC is being cleaned up (4600) + - add nettype argument for llmount in #5d in conf-sanity.sh (3936) + - reconstruct ost_handle() like mds_handle() (4657) + - create a new thread to do import eviction to avoid deadlock (3969) + - let lconf resolve symlinked-to devices (4629) + - don't unlink "objects" from directory with default EA (4554) + - hold socknal file ref over connect in case target is down (4394) + - allow more than 32000 subdirectories in a single directory (3244) + - OST returns ENOSPC from object create when no space left (4539) + - don't send truncate RPC if file size isn't changing (4410) + - limit OSC precreate to 1/2 of value OST considers bogus (4778) + - bind to privileged port in socknal and tcpnal (3689) + * miscellania + - rate limit CERROR/CWARN console message to avoid overload (4519) + - basic mmap support (3918) + - kernel patch series update from b1_4 (4711) + +2004-09-16 Cluster File Systems, Inc. + * version 1.2.6 + * bug fixes + - avoid crash during MDS cleanup with OST shut down (2775) + - fix loi_list_lock/oig_lock inversion on interrupted IO (4136) + - don't use bad inodes on the MDS (3744) + - dynamic object preallocation to improve recovery speed (4236) + - don't hold spinlock over lock dumping or change debug flags (4401) + - don't zero obd_dev when it is force cleaned (3651) + - "lctl deactivate" will stop automatic recovery attempts (3406) + - look for existing replayed locks to avoid duplicates (3764) + - don't resolve lock handle twice in recovery avoiding race (4401) + - revalidate should check working dir is a directory (4134) + * miscellania + - don't always mark "slow" obdfilter messages as errors (4418) + +2004-08-24 Cluster File Systems, Inc. + * version 1.2.5 + * bug fixes + - don't close LustreDB during write_conf until it is done (3860) + - fix typo in lconf for_each_profile (3821) + - allow dumping logs from multiple threads at one time (3820) + - don't allow multiple threads in OSC recovery (3812) + - fix debug_size parameters (3864) + - fix mds_postrecov to initialize import for llog ctxt (3121) + - replace config semaphore with spinlock (3306) + - be sure to send a reply for a CANCEL rpc with bad export (3863) + - don't allow enqueue to complete on a destroyed export (3822) + - down write_lock before checking llog header bitmap (3825) + - recover from lock replay timeout (3764) + - up llog sem before sending rpc (3652) + - reduce ns lock hold times when setting kms (3267) + - change a dlm LBUG to LASSERTF, to maybe learn something (4228) + - fix NULL deref and obd_dev leak on setup error (3312) + - replace some LBUG about llog ops with error handling (3841) + - don't match INVALID dentries from d_lookup and spin (3784) + - hold dcache_lock while marking dentries INVALID and hashing (4255) + - fix invalid assertion in ptlrpc_set_wait (3880) + * miscellania + - add libwrap support for the TCP acceptor (3996) + - add /proc/sys/portals/routes for non-root route listing (3994) + - allow setting MDS UUID in .xml (2580) + - print the stack of a process that LBUGs (4228) + +2004-07-14 Cluster File Systems, Inc. + * version 1.2.4 + * bug fixes + - don't cleanup request in ll_file_open() on failed MDS open (3430) + - make sure to unset replay flag from failed open requests (3440) + - if default stripe count is 0, use OST count for inode size (3636) + - update parent mtime/ctime on client for create/unlink (2611) + - drop dentry ref in ext3_add_link from open_connect_dentry (3266) + - free recovery state on server during a forced cleanup (3571) + - unregister_reply for resent reqs (3063) + - loop back devices mounting and status check on 2.6 (3563) + - fix resource-creation race that can provoke i_size == 0 (3513) + - don't try to use bad inodes returned from MDS/OST fs lookup (3688) + - more debugging for page-accounting assertion (3746) + - return -ENOENT instead of asserting if ost getattr+unlink race (3558) + - avoid deadlock after precreation failure (3758) + - fix race and lock order deadlock in orphan handling (3450, 3750) + - add validity checks when grabbing inodes from l_ast_data (3599) * miscellania - drop scimac NAL (unmaintained) diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index dae4f44..61c5ab2 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -443,6 +443,7 @@ AM_CONDITIONAL(SNAPFS, test x$enable_snapfs = xyes) AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes) AM_CONDITIONAL(GSS, test x$enable_gss = xyes) AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes) +AM_CONDITIONAL(LIBLUSTRE_TESTS, test x$enable_liblustre_tests = xyes) AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests) ]) diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd index 8fd57d8..0e8ad33 100644 --- a/lustre/conf/lustre.dtd +++ b/lustre/conf/lustre.dtd @@ -53,7 +53,7 @@ + target_ref | node_ref | journalsize | mkfsoptions)*> @@ -71,7 +71,7 @@ failover ( 1 | 0 ) #IMPLIED> + target_ref | node_ref | journalsize | mkfsoptions)*> @@ -90,6 +90,7 @@ + diff --git a/lustre/conf/lustre2ldif.xsl b/lustre/conf/lustre2ldif.xsl index 3713ec8..8c3c24a 100644 --- a/lustre/conf/lustre2ldif.xsl +++ b/lustre/conf/lustre2ldif.xsl @@ -122,6 +122,9 @@ devsize: journalsize: + +mkfsoptions: + nodeRef: targetRef: @@ -173,6 +176,9 @@ devsize: journalsize: + +mkfsoptions: + diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index c99e6a5..72fb1aa 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -25,13 +25,18 @@ #define LIBLUSTRE_H__ #include -#ifndef __CYGWIN__ -#include -#include -#else -#include -#include "ioctl.h" +#ifdef HAVE_STDINT_H +# include +#endif +#ifdef HAVE_ASM_PAGE_H +# include +#endif +#ifdef HAVE_SYS_USER_H +# include #endif + +#include "ioctl.h" + #include #include #include @@ -625,6 +630,7 @@ static inline int schedule_timeout(signed long t) } #define lock_kernel() do {} while (0) +#define unlock_kernel() do {} while (0) #define daemonize(l) do {} while (0) #define sigfillset(l) do {} while (0) #define recalc_sigpending(l) do {} while (0) @@ -786,5 +792,4 @@ int liblustre_wait_event(int timeout); #include #include - #endif diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h index 17ad969..2e61e82 100644 --- a/lustre/include/linux/lprocfs_status.h +++ b/lustre/include/linux/lprocfs_status.h @@ -186,26 +186,26 @@ extern int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, struct lprocfs_stats *stats); -#define LPROCFS_INIT_MULTI_VARS(array, size) \ -void lprocfs_init_multi_vars(unsigned int idx, \ - struct lprocfs_static_vars *x) \ -{ \ - struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \ - LASSERT(glob != 0); \ - LASSERT(idx < (unsigned int)(size)); \ - x->module_vars = glob[idx].module_vars; \ - x->obd_vars = glob[idx].obd_vars; \ -} \ - -#define LPROCFS_INIT_VARS(name, vclass, vinstance) \ -void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \ -{ \ - x->module_vars = vclass; \ - x->obd_vars = vinstance; \ -} \ - -#define lprocfs_init_vars(NAME, VAR) \ -do { \ +#define LPROCFS_INIT_MULTI_VARS(array, size) \ +void lprocfs_init_multi_vars(unsigned int idx, \ + struct lprocfs_static_vars *x) \ +{ \ + struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \ + LASSERT(glob != 0); \ + LASSERT(idx < (unsigned int)(size)); \ + x->module_vars = glob[idx].module_vars; \ + x->obd_vars = glob[idx].obd_vars; \ +} \ + +#define LPROCFS_INIT_VARS(name, vclass, vinstance) \ +void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \ +{ \ + x->module_vars = vclass; \ + x->obd_vars = vinstance; \ +} \ + +#define lprocfs_init_vars(NAME, VAR) \ +do { \ extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \ lprocfs_##NAME##_init_vars(VAR); \ } while (0) diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/linux/lustre_cfg.h index 3f2038f..e2b3cd3 100644 --- a/lustre/include/linux/lustre_cfg.h +++ b/lustre/include/linux/lustre_cfg.h @@ -280,6 +280,7 @@ struct lustre_mount_data { uint32_t lmd_nal; uint32_t lmd_server_ipaddr; uint32_t lmd_port; + uint32_t lmd_async; uint32_t lmd_nllu; uint32_t lmd_nllg; char lmd_security[16]; diff --git a/lustre/include/linux/lustre_commit_confd.h b/lustre/include/linux/lustre_commit_confd.h index 6183596..94f72b3 100644 --- a/lustre/include/linux/lustre_commit_confd.h +++ b/lustre/include/linux/lustre_commit_confd.h @@ -67,6 +67,7 @@ struct llog_commit_daemon { /* ptlrpc/recov_thread.c */ int llog_start_commit_thread(void); +int llog_cleanup_commit_master(int force); struct llog_canceld_ctxt *llcd_grab(void); void llcd_send(struct llog_canceld_ctxt *llcd); diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 03a88a4..2adb2bd 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -63,7 +63,6 @@ int groups_search(struct group_info *ginfo, gid_t grp); } while (0) #define groups_sort(gi) do {} while (0) - #define GROUP_AT(gi, i) ((gi)->small_block[(i)]) static inline int cleanup_group_info(void) @@ -174,6 +173,10 @@ static inline void lustre_daemonize_helper(void) page->private = 0; \ } while(0) +#ifndef smp_num_cpus +#define smp_num_cpus num_online_cpus() +#endif + #define kiobuf bio #include @@ -257,7 +260,11 @@ static inline void cond_resched(void) static inline int mapping_mapped(struct address_space *mapping) { - return mapping->i_mmap_shared ? 1 : 0; + if (mapping->i_mmap_shared) + return 1; + if (mapping->i_mmap) + return 1; + return 0; } /* to find proc_dir_entry from inode. 2.6 has native one -bzzz */ @@ -327,6 +334,12 @@ static inline int mapping_has_pages(struct address_space *mapping) #define ll_vfs_symlink(dir, dentry, path, mode) vfs_symlink(dir, dentry, path, mode) #endif +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) +#endif + #ifdef HAVE_I_ALLOC_SEM #define UP_WRITE_I_ALLOC_SEM(i) do { up_write(&(i)->i_alloc_sem); } while (0) #define DOWN_WRITE_I_ALLOC_SEM(i) do { down_write(&(i)->i_alloc_sem); } while(0) diff --git a/lustre/include/linux/lustre_debug.h b/lustre/include/linux/lustre_debug.h index 669c0e8..95ff69f 100644 --- a/lustre/include/linux/lustre_debug.h +++ b/lustre/include/linux/lustre_debug.h @@ -42,6 +42,10 @@ do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \ OP; \ }} while(0) +#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \ + CDEBUG(mask, "page %p map %p ind %lu priv %0lx: " fmt, \ + page, page->mapping, page->index, page->private, ## arg) + /* lib/debug.c */ int dump_lniobuf(struct niobuf_local *lnb); int dump_rniobuf(struct niobuf_remote *rnb); diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index ef991c2..a54ad3e 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -177,6 +177,7 @@ struct ldlm_namespace { struct list_head ns_unused_list; /* all root resources in ns */ int ns_nr_unused; unsigned int ns_max_unused; + unsigned long ns_next_dump; /* next dump time */ spinlock_t ns_counter_lock; __u64 ns_locks; @@ -252,6 +253,8 @@ struct ldlm_lock { /* Server-side-only members */ struct list_head l_pending_chain; /* callbacks pending */ unsigned long l_callback_timeout; + + __u32 l_pid; /* pid which created this lock */ }; #define LDLM_PLAIN 10 @@ -311,22 +314,23 @@ do { \ CDEBUG(level, "### " format \ " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\ "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " \ - LPX64" expref: %d\n" , ## a, lock, \ + LPX64" expref: %d pid: %u\n" , ## a, lock, \ lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ lock->l_readers, lock->l_writers, \ ldlm_lockname[lock->l_granted_mode], \ ldlm_lockname[lock->l_req_mode], \ lock->l_flags, lock->l_remote_handle.cookie, \ lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99); \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ break; \ } \ if (lock->l_resource->lr_type == LDLM_EXTENT) { \ CDEBUG(level, "### " format \ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ "res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s ["LPU64 \ - "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \ - LPX64" expref: %d\n" , ## a, \ + "->"LPU64"] (req "LPU64"->"LPU64") flags: %x remote: " \ + LPX64" expref: %d pid: %u\n" , ## a, \ lock->l_resource->lr_namespace->ns_name, lock, \ lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ lock->l_readers, lock->l_writers, \ @@ -342,15 +346,16 @@ do { \ lock->l_req_extent.start, lock->l_req_extent.end, \ lock->l_flags, lock->l_remote_handle.cookie, \ lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99); \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ break; \ } \ if (lock->l_resource->lr_type == LDLM_FLOCK) { \ CDEBUG(level, "### " format \ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ "res: "LPU64"/"LPU64"/"LPU64" rrc: %d type: %s pid: " \ - LPU64" ["LPU64"->"LPU64"] flags: %x remote: "LPX64 \ - " expref: %d\n" , ## a, \ + LPU64" " "["LPU64"->"LPU64"] flags: %x remote: "LPX64 \ + " expref: %d pid: %u\n" , ## a, \ lock->l_resource->lr_namespace->ns_name, lock, \ lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ lock->l_readers, lock->l_writers, \ @@ -366,14 +371,16 @@ do { \ lock->l_policy_data.l_flock.end, \ lock->l_flags, lock->l_remote_handle.cookie, \ lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99); \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ break; \ } \ if (lock->l_resource->lr_type == LDLM_IBITS) { \ CDEBUG(level, "### " format \ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ "res: "LPU64"/"LPU64"/"LPU64" bits "LPX64" rrc: %d " \ - "type: %s flags: %x remote: "LPX64" expref: %d\n" , ## a,\ + "type: %s flags: %x remote: "LPX64" expref: %d " \ + "pid %u\n" , ## a, \ lock->l_resource->lr_namespace->ns_name, \ lock, lock->l_handle.h_cookie, \ atomic_read (&lock->l_refc), \ @@ -388,14 +395,16 @@ do { \ ldlm_typename[lock->l_resource->lr_type], \ lock->l_flags, lock->l_remote_handle.cookie, \ lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99); \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ break; \ } \ { \ CDEBUG(level, "### " format \ " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s "\ - "flags: %x remote: "LPX64" expref: %d\n" , ## a, \ + "res: "LPU64"/"LPU64"/"LPU64"/"LPU64" rrc: %d type: %s " \ + "flags: %x remote: "LPX64" expref: %d " \ + "pid: %u\n" , ## a, \ lock->l_resource->lr_namespace->ns_name, \ lock, lock->l_handle.h_cookie, \ atomic_read (&lock->l_refc), \ @@ -410,7 +419,8 @@ do { \ ldlm_typename[lock->l_resource->lr_type], \ lock->l_flags, lock->l_remote_handle.cookie, \ lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99); \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ } \ } while (0) @@ -542,9 +552,9 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, struct ldlm_lock *lock); void ldlm_resource_unlink_lock(struct ldlm_lock *lock); void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(void); -void ldlm_namespace_dump(struct ldlm_namespace *); -void ldlm_resource_dump(struct ldlm_resource *); +void ldlm_dump_all_namespaces(int level); +void ldlm_namespace_dump(int level, struct ldlm_namespace *); +void ldlm_resource_dump(int level, struct ldlm_resource *); int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, struct ldlm_res_id); diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h index 2e4e760..7404bd7 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/linux/lustre_export.h @@ -63,9 +63,9 @@ struct filter_export_data { struct filter_client_data *fed_fcd; loff_t fed_lr_off; int fed_lr_idx; - unsigned long fed_dirty; /* in bytes */ - unsigned long fed_grant; /* in bytes */ - unsigned long fed_pending; /* bytes just being written */ + long fed_dirty; /* in bytes */ + long fed_grant; /* in bytes */ + long fed_pending; /* bytes just being written */ }; struct obd_export { @@ -86,7 +86,8 @@ struct obd_export { unsigned long exp_flags; int exp_failed:1, exp_replay_needed:1, - exp_libclient:1; /* liblustre client? */ + exp_libclient:1, /* liblustre client? */ + exp_sync:1; union { struct mds_export_data eu_mds_data; struct filter_export_data eu_filter_data; diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index 62b5686..70d027b 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -39,10 +39,19 @@ struct fsfilt_objinfo { int fso_bufcnt; }; +/* lustre EA type (MEA, LOV, etc.) */ +enum ea_type { + EA_LOV = (1 << 0), + EA_MEA = (1 << 1), + EA_SID = (1 << 2), + EA_MID = (1 << 3) +}; + struct fsfilt_operations { struct list_head fs_list; struct module *fs_owner; char *fs_type; + void *(* fs_start)(struct inode *inode, int op, void *desc_private, int logs); void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso, @@ -57,23 +66,12 @@ struct fsfilt_operations { struct iattr *iattr, int do_trunc); int (* fs_iocontrol)(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); - - /* two methods for getting lov EA and setting it back to inode xattr. */ - int (* fs_set_md)(struct inode *inode, void *handle, void *md, - int size); - int (* fs_get_md)(struct inode *inode, void *md, int size); - - /* two methods for getting MID (master id) EA and setting it back to - * inode xattr. */ - int (* fs_set_mid)(struct inode *inode, void *handle, void *fid, - int size); - int (* fs_get_mid)(struct inode *inode, void *fid, int size); - /* two methods for getting self id EA and setting it back to inode - * xattr. */ - int (* fs_set_sid)(struct inode *inode, void *handle, void *sid, - int size); - int (* fs_get_sid)(struct inode *inode, void *sid, int size); + /* two methods for setting getting diff. kind of EAs from inode. */ + int (* fs_set_md)(struct inode *inode, void *handle, void *md, + int size, enum ea_type type); + int (* fs_get_md)(struct inode *inode, void *md, int size, + enum ea_type type); int (* fs_send_bio)(int rw, struct inode *inode, void *bio); @@ -84,11 +82,10 @@ struct fsfilt_operations { ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, loff_t *offset); - int (* fs_add_journal_cb)(struct obd_device *obd, + int (* fs_add_journal_cb)(struct obd_device *obd, struct super_block *sb, - __u64 last_rcvd, void *handle, - fsfilt_cb_t cb_func, - void *cb_data); + __u64 last_rcvd, void *handle, + fsfilt_cb_t cb_func, void *cb_data); int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); int (* fs_sync)(struct super_block *sb); int (* fs_map_inode_pages)(struct inode *inode, struct page **page, @@ -228,6 +225,16 @@ extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); #define LMV_EA 1 #define LOV_EA 0 +#define fsfilt_check_slow(start, timeout, msg) \ +do { \ + if (time_before(jiffies, start + 15 * HZ)) \ + break; \ + else if (time_before(jiffies, start + timeout / 2 * HZ)) \ + CWARN("slow %s %lus\n", msg, (jiffies - start) / HZ); \ + else \ + CERROR("slow %s %lus\n", msg, (jiffies - start) / HZ); \ +} while (0) + static inline void * fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode, int op, struct obd_trans_info *oti, int logs) @@ -246,8 +253,7 @@ fsfilt_start_ops(struct fsfilt_operations *ops, struct inode *inode, LBUG(); } } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, 60, "journal start"); return handle; } @@ -280,8 +286,7 @@ fsfilt_commit_ops(struct fsfilt_operations *ops, struct super_block *sb, int rc = ops->fs_commit(sb, inode, handle, force_sync); CDEBUG(D_INFO, "committing handle %p\n", handle); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, 60, "journal start"); return rc; } @@ -322,8 +327,7 @@ fsfilt_brw_start_log(struct obd_device *obd, int objcount, LBUG(); } } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return handle; } @@ -344,8 +348,7 @@ fsfilt_commit_async(struct obd_device *obd, struct inode *inode, int rc = obd->obd_fsops->fs_commit_async(inode, handle, wait_handle); CDEBUG(D_INFO, "committing handle %p (async)\n", *wait_handle); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return rc; } @@ -356,8 +359,7 @@ fsfilt_commit_wait(struct obd_device *obd, struct inode *inode, void *handle) unsigned long now = jiffies; int rc = obd->obd_fsops->fs_commit_wait(inode, handle); CDEBUG(D_INFO, "waiting for completion %p\n", handle); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "journal start"); return rc; } @@ -368,8 +370,7 @@ fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, unsigned long now = jiffies; int rc; rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long setattr time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "setattr"); return rc; } @@ -391,44 +392,24 @@ static inline int fsfilt_setup(struct obd_device *obd, static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode, - void *handle, void *md, int size) + void *handle, void *md, int size, enum ea_type type) { - return obd->obd_fsops->fs_set_md(inode, handle, md, size); + if (!obd->obd_fsops->fs_set_md) + return -ENOSYS; + + return obd->obd_fsops->fs_set_md(inode, handle, md, + size, type); } static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode, - void *md, int size) -{ - return obd->obd_fsops->fs_get_md(inode, md, size); -} - -static inline int -fsfilt_set_mid(struct obd_device *obd, struct inode *inode, - void *handle, void *mid, int size) + void *md, int size, enum ea_type type) { - return obd->obd_fsops->fs_set_mid(inode, handle, mid, size); -} - -static inline int -fsfilt_get_mid(struct obd_device *obd, struct inode *inode, - void *mid, int size) -{ - return obd->obd_fsops->fs_get_mid(inode, mid, size); -} - -static inline int -fsfilt_set_sid(struct obd_device *obd, struct inode *inode, - void *handle, void *sid, int size) -{ - return obd->obd_fsops->fs_set_sid(inode, handle, sid, size); -} - -static inline int -fsfilt_get_sid(struct obd_device *obd, struct inode *inode, - void *sid, int size) -{ - return obd->obd_fsops->fs_get_sid(inode, sid, size); + if (!obd->obd_fsops->fs_get_md) + return -ENOSYS; + + return obd->obd_fsops->fs_get_md(inode, md, size, + type); } static inline int fsfilt_send_bio(int rw, struct obd_device *obd, @@ -463,8 +444,7 @@ fsfilt_putpage(struct obd_device *obd, struct inode *inode, rc = obd->obd_fsops->fs_putpage(inode, page); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long putpage time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "putpage"); return rc; } @@ -486,8 +466,7 @@ fsfilt_getpage(struct obd_device *obd, struct inode *inode, page = obd->obd_fsops->fs_getpage(inode, index); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("long getpage time %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "getpage"); return page; } diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index 184572f..903d713 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -48,9 +48,15 @@ #ifndef _LUSTRE_IDL_H_ #define _LUSTRE_IDL_H_ +#ifdef HAVE_ASM_TYPES_H +#include +#else +#include "types.h" +#endif + + #ifdef __KERNEL__ # include -# include # include # include # include /* for strncpy, below */ @@ -59,7 +65,6 @@ #ifdef __CYGWIN__ # include #else -# include # include #endif # include @@ -214,9 +219,10 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) #define MSG_CONNECT_RECOVERING 0x1 #define MSG_CONNECT_RECONNECT 0x2 #define MSG_CONNECT_REPLAYABLE 0x4 -//#define MSG_CONNECT_PEER 0x8 +#define MSG_CONNECT_PEER 0x8 #define MSG_CONNECT_LIBCLIENT 0x10 #define MSG_CONNECT_INITIAL 0x20 +#define MSG_CONNECT_ASYNC 0x40 /* * OST requests: OBDO & OBD request records @@ -381,8 +387,9 @@ struct lov_mds_md_v0 { /* LOV EA mds/wire data (little-endian) */ #define OBD_MD_FLDIREA (0x0000000020000000LL) /* dir's extended attribute data */ #define OBD_MD_REINT (0x0000000040000000LL) /* reintegrate oa */ #define OBD_MD_FID (0x0000000080000000LL) /* lustre_id data */ -#define OBD_MD_FLEALIST (0x0000000100000000LL) /* list extended attributes */ -#define OBD_MD_FLACL_ACCESS (0x0000000200000000LL) /*access acl*/ +#define OBD_MD_MEA (0x0000000100000000LL) /* shows we are interested in MEA */ +#define OBD_MD_FLEALIST (0x0000000200000000LL) /* list extended attributes */ +#define OBD_MD_FLACL_ACCESS (0x0000000400000000LL) /*access acl*/ #define OBD_MD_FLNOTOBD (~(OBD_MD_FLBLOCKS | OBD_MD_LINKNAME | \ OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | \ @@ -432,6 +439,9 @@ extern void lustre_swab_obd_statfs (struct obd_statfs *os); #define OBD_OBJECT_EOF 0xffffffffffffffffULL +#define OST_MIN_PRECREATE 32 +#define OST_MAX_PRECREATE 20000 + struct obd_ioobj { obd_id ioo_id; obd_gr ioo_gr; @@ -552,6 +562,13 @@ typedef enum { /* INODE LOCK PARTS */ #define MDS_INODELOCK_LOOKUP 0x000001 /* dentry, mode, owner, group */ #define MDS_INODELOCK_UPDATE 0x000002 /* size, links, timestamps */ +#define MDS_INODELOCK_OPEN 0x000004 /* for opened files */ + +/* do not forget to increase MDS_INODELOCK_MAXSHIFT when adding new bits */ +#define MDS_INODELOCK_MAXSHIFT 2 + +/* this FULL lock is useful to take on unlink sort of operations */ +#define MDS_INODELOCK_FULL ((1 << (MDS_INODELOCK_MAXSHIFT + 1)) - 1) /* lustre store cookie */ struct lustre_stc { diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index d3c182c..ad4faaa 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -28,10 +28,9 @@ enum lustre_imp_state { static inline char * ptlrpc_import_state_name(enum lustre_imp_state state) { - static char* import_state_names[] = { - "", "CLOSED", "NEW", "DISCONN", - "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", + "", "CLOSED", "NEW", "DISCONN", + "CONNECTING", "REPLAY", "REPLAY_LOCKS", "REPLAY_WAIT", "RECOVER", "FULL", "EVICTED", }; @@ -101,7 +100,8 @@ struct obd_import { int imp_invalid:1, imp_replayable:1, imp_dlm_fake:1, imp_server_timeout:1, imp_initial_recov:1, imp_force_verify:1, - imp_pingable:1, imp_resend_replay:1; + imp_pingable:1, imp_resend_replay:1, + imp_deactive:1; __u32 imp_connect_op; __u32 imp_connect_flags; }; diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 8851c35..c4ec73e 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -463,6 +463,7 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_PROCESS_CFG _IOWR('f', 184, long) #define OBD_IOC_DUMP_LOG _IOWR('f', 185, long) #define OBD_IOC_CLEAR_LOG _IOWR('f', 186, long) +#define OBD_IOC_START _IOWR('f', 187, long) #define OBD_IOC_CATLOGLIST _IOWR('f', 190, long) #define OBD_IOC_LLOG_INFO _IOWR('f', 191, long) @@ -609,6 +610,7 @@ do { \ int __timed_out = 0; \ unsigned long irqflags; \ sigset_t blocked; \ + signed long timeout_remaining; \ \ init_waitqueue_entry(&__wait, current); \ if (excl) \ @@ -622,18 +624,15 @@ do { \ else \ blocked = l_w_e_set_sigs(0); \ \ + timeout_remaining = info->lwi_timeout; \ + \ for (;;) { \ set_current_state(TASK_INTERRUPTIBLE); \ if (condition) \ break; \ - if (signal_pending(current)) { \ - if (info->lwi_on_signal) \ - info->lwi_on_signal(info->lwi_cb_data); \ - ret = -EINTR; \ - break; \ - } \ if (info->lwi_timeout && !__timed_out) { \ - if (schedule_timeout(info->lwi_timeout) == 0) { \ + timeout_remaining = schedule_timeout(timeout_remaining); \ + if (timeout_remaining == 0) { \ __timed_out = 1; \ if (!info->lwi_on_timeout || \ info->lwi_on_timeout(info->lwi_cb_data)) { \ @@ -647,6 +646,24 @@ do { \ } else { \ schedule(); \ } \ + if (condition) \ + break; \ + if (signal_pending(current)) { \ + if (__timed_out) { \ + break; \ + } else { \ + /* We have to do this here because some signals */ \ + /* are not blockable - ie from strace(1). */ \ + /* In these cases we want to schedule_timeout() */ \ + /* again, because we don't want that to return */ \ + /* -EINTR when the RPC actually succeeded. */ \ + /* the RECALC_SIGPENDING below will deliver the */ \ + /* signal properly. */ \ + SIGNAL_MASK_LOCK(current, irqflags); \ + CLEAR_SIGPENDING; \ + SIGNAL_MASK_UNLOCK(current, irqflags); \ + } \ + } \ } \ \ SIGNAL_MASK_LOCK(current, irqflags); \ @@ -654,6 +671,12 @@ do { \ RECALC_SIGPENDING; \ SIGNAL_MASK_UNLOCK(current, irqflags); \ \ + if (__timed_out && signal_pending(current)) { \ + if (info->lwi_on_signal) \ + info->lwi_on_signal(info->lwi_cb_data); \ + ret = -EINTR; \ + } \ + \ current->state = TASK_RUNNING; \ remove_wait_queue(&wq, &__wait); \ } while(0) @@ -661,18 +684,21 @@ do { \ #else /* !__KERNEL__ */ #define __l_wait_event(wq, condition, info, ret, excl) \ do { \ - int timeout = info->lwi_timeout, elapse; \ + long timeout = info->lwi_timeout, elapse, last = 0; \ int __timed_out = 0; \ - long last; \ \ - last = time(NULL); \ + if (info->lwi_timeout == 0) \ + timeout = 1000000000; \ + else \ + last = time(NULL); \ + \ for (;;) { \ if (condition) \ break; \ if (liblustre_wait_event(timeout)) { \ - if (timeout == 0) \ + if (timeout == 0 || info->lwi_timeout == 0) \ continue; \ - elapse = (int) (time(NULL) - last); \ + elapse = time(NULL) - last; \ if (elapse) { \ last += elapse; \ timeout -= elapse; \ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 866d429..cdd5875 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -77,6 +77,7 @@ struct ll_inode_info { struct lustre_id lli_id; /* full lustre_id */ char *lli_symlink_name; struct semaphore lli_open_sem; + struct semaphore lli_size_sem; __u64 lli_maxbytes; __u64 lli_io_epoch; unsigned long lli_flags; @@ -98,6 +99,16 @@ struct ll_inode_info { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct inode lli_vfs_inode; #endif + struct semaphore lli_och_sem; /* Protects access to och pointers + and their usage counters */ + /* We need all three because every inode may be opened in different + modes */ + struct obd_client_handle *lli_mds_read_och; + __u64 lli_open_fd_read_count; + struct obd_client_handle *lli_mds_write_och; + __u64 lli_open_fd_write_count; + struct obd_client_handle *lli_mds_exec_och; + __u64 lli_open_fd_exec_count; struct posix_acl *lli_acl_access; }; @@ -195,6 +206,10 @@ ll_prepare_mdc_data(struct mdc_op_data *data, struct inode *i1, #include #endif /* __KERNEL__ */ +#define LLAP_FROM_COOKIE(c) \ + (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC), \ + (struct ll_async_page *)(c)) + #include #endif diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 1694a5f..d260d2e 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -97,6 +97,8 @@ struct llog_process_cat_data { int llog_cat_id2handle(struct llog_handle *cathandle, struct llog_handle **res, struct llog_logid *logid); +int class_config_dump_handler(struct llog_handle * handle, + struct llog_rec_hdr *rec, void *data); int llog_cat_put(struct llog_handle *cathandle); int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, struct llog_cookie *reccookie, void *buf, @@ -514,5 +516,4 @@ static inline void llog_create_lock_free(struct llog_create_locks *lcl) size = offset + sizeof(struct rw_semaphore *) * lcl->lcl_count; OBD_FREE(lcl, size); } - #endif diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index da6aafe..2d53a04 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -130,7 +130,12 @@ struct mds_client_data { __u64 mcd_last_xid; /* xid for the last transaction */ __u32 mcd_last_result; /* result from last RPC */ __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ - __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 64]; + /* for MDS_CLOSE requests */ + __u64 mcd_last_close_transno; /* last completed transaction ID */ + __u64 mcd_last_close_xid; /* xid for the last transaction */ + __u32 mcd_last_close_result; /* result from last RPC */ + __u32 mcd_last_close_data; /* per-op data (disposition for open &c.) */ + __u8 mcd_padding[MDS_LR_CLIENT_SIZE - 88]; }; /* simple uid/gid mapping hash table */ @@ -325,18 +330,24 @@ int mdc_done_writing(struct obd_export *, struct obdo *); #define IOC_REQUEST_CLOSE _IOWR('f', 35, long) #define IOC_REQUEST_MAX_NR 35 -#define MDS_CHECK_RESENT(req, reconstruct) \ -{ \ - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \ - struct mds_client_data *mcd = \ - req->rq_export->exp_mds_data.med_mcd; \ - if (mcd->mcd_last_xid == req->rq_xid) { \ - reconstruct; \ - RETURN(req->rq_repmsg->status); \ - } \ - DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", \ - mcd->mcd_last_xid); \ - } \ +#define MDS_CHECK_RESENT(req, reconstruct) \ +{ \ + if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \ + struct mds_client_data *mcd = \ + req->rq_export->exp_mds_data.med_mcd; \ + \ + if (le64_to_cpu(mcd->mcd_last_xid) == req->rq_xid) { \ + reconstruct; \ + RETURN(le32_to_cpu(mcd->mcd_last_result)); \ + } \ + if (le64_to_cpu(mcd->mcd_last_close_xid) == req->rq_xid) { \ + reconstruct; \ + RETURN(le32_to_cpu(mcd->mcd_last_close_result));\ + } \ + DEBUG_REQ(D_HA, req, "no reply for RESENT req" \ + "(have "LPD64", and "LPD64")", \ + mcd->mcd_last_xid, mcd->mcd_last_close_xid); \ + } \ } #endif diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 019e1de..e3fd354 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -110,7 +110,7 @@ #define MDT_MAX_THREADS 32UL #define MDT_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \ MDT_MAX_THREADS), 2UL) -#define MDS_NBUFS (64 * smp_num_cpus) +#define MDS_NBUFS (64 * smp_num_cpus) #define MDS_BUFSIZE (8 * 1024) /* Assume file name length = FNAME_MAX = 256 (true for extN). * path name length = PATH_MAX = 4096 @@ -131,7 +131,7 @@ #define OST_MAX_THREADS 36UL #define OST_NUM_THREADS max(min_t(unsigned long, num_physpages / 8192, \ OST_MAX_THREADS), 2UL) -#define OST_NBUFS (64 * smp_num_cpus) +#define OST_NBUFS (64 * smp_num_cpus) #define OST_BUFSIZE (8 * 1024) /* OST_MAXREQSIZE ~= 1640 bytes = * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote @@ -147,9 +147,7 @@ #define PTLBD_MAXREQSIZE 1024 struct ptlrpc_peer { -/* bugfix #4615 - */ - ptl_process_id_t peer_id; + ptl_process_id_t peer_id; struct ptlrpc_ni *peer_ni; }; @@ -224,7 +222,7 @@ struct ptlrpc_request_set { wait_queue_head_t *set_wakeup_ptr; struct list_head set_requests; set_interpreter_func set_interpret; /* completion callback */ - union ptlrpc_async_args set_args; /* completion context */ + void *set_arg; /* completion context */ /* locked so that any old caller can communicate requests to * the set holder who can then fold them into the lock-free set */ spinlock_t set_new_req_lock; @@ -348,6 +346,9 @@ struct ptlrpc_request { struct timeval rq_arrival_time; /* request arrival time */ struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer */ +#if CRAY_PORTALS + ptl_uid_t rq_uid; /* peer uid, used in MDS only */ +#endif /* client-only incoming reply */ ptl_handle_md_t rq_reply_md_h; @@ -355,6 +356,7 @@ struct ptlrpc_request { struct ptlrpc_cb_id rq_reply_cbid; struct ptlrpc_peer rq_peer; /* XXX see service.c can this be factored away? */ + char rq_peerstr[PTL_NALFMT_SIZE]; struct obd_export *rq_export; struct obd_import *rq_import; @@ -509,7 +511,8 @@ struct ptlrpc_service { int srv_n_difficult_replies; /* # 'difficult' replies */ int srv_n_active_reqs; /* # reqs being served */ int srv_rqbd_timeout; /* timeout before re-posting reqs */ - + int srv_watchdog_timeout; /* soft watchdog timeout, in ms */ + __u32 srv_req_portal; __u32 srv_rep_portal; @@ -533,7 +536,7 @@ struct ptlrpc_service { struct proc_dir_entry *srv_procroot; struct lprocfs_stats *srv_stats; - + struct ptlrpc_srv_ni srv_interfaces[0]; }; @@ -543,7 +546,6 @@ static inline char *ptlrpc_peernid2str(struct ptlrpc_peer *p, char *str) return (portals_nid2str(p->peer_ni->pni_number, p->peer_id.nid, str)); } -/* For bug #4615 */ static inline char *ptlrpc_id2str(struct ptlrpc_peer *p, char *str) { LASSERT(p->peer_ni != NULL); @@ -684,7 +686,8 @@ void ptlrpc_save_llog_lock (struct ptlrpc_request *req, void ptlrpc_commit_replies (struct obd_device *obd); void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, - int req_portal, int rep_portal, + int req_portal, int rep_portal, + int watchdog_timeout, /* in ms */ svc_handler_t, char *name, struct proc_dir_entry *proc_entry); void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); diff --git a/lustre/include/linux/lustre_smfs.h b/lustre/include/linux/lustre_smfs.h index 7f83f04..792cbe9 100644 --- a/lustre/include/linux/lustre_smfs.h +++ b/lustre/include/linux/lustre_smfs.h @@ -26,7 +26,9 @@ #ifndef __LUSTRE_SMFS_H #define __LUSTRE_SMFS_H +#include #include + struct snap_inode_info { int sn_flags; /*the flags indicated inode type */ int sn_gen; /*the inode generation*/ @@ -509,8 +511,10 @@ extern int smfs_write_extents(struct inode *dir, struct dentry *dentry, unsigned long from, unsigned long num); extern int smfs_rec_setattr(struct inode *dir, struct dentry *dentry, struct iattr *attr); -extern int smfs_rec_precreate(struct dentry *dentry, int *num, struct obdo *oa); -extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size); +extern int smfs_rec_precreate(struct dentry *dentry, int *num, + struct obdo *oa); +extern int smfs_rec_md(struct inode *inode, void * lmm, int lmm_size, + enum ea_type type); extern int smfs_rec_unpack(struct smfs_proc_args *args, char *record, char **pbuf, int *opcode); diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index 96898fd..ffa1396 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -1,6 +1,28 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: - */ + * + * Copyright (C) 2001, 2002, 2003, 2004 Cluster File Systems, Inc. + * + * Author: + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * lustre VFS/process permission interface + */ + #ifndef __LVFS_H__ #define __LVFS_H__ @@ -13,7 +35,7 @@ #include #include #include -#endif +#endif #ifdef LIBLUSTRE #include @@ -57,6 +79,13 @@ struct lvfs_run_ctxt { #endif }; +struct lvfs_obd_ctxt { + struct vfsmount *loc_mnt; + atomic_t loc_refcount; + char *loc_name; + struct list_head loc_list; +}; + #ifdef OBD_CTXT_DEBUG #define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC #else @@ -81,7 +110,9 @@ int lustre_fread(struct file *file, void *buf, int len, loff_t *off); int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off); int lustre_fsync(struct file *file); long l_readdir(struct file * file, struct list_head *dentry_list); - +int lvfs_mount_fs(char *name, char *fstype, char *options, int flags, + struct lvfs_obd_ctxt **lvfs_ctxt); +void lvfs_umount_fs(struct lvfs_obd_ctxt *lvfs_ctxt); static inline void l_dput(struct dentry *de) { if (!de || IS_ERR(de)) @@ -121,6 +152,17 @@ ll_lookup_one_len(const char *name, struct dentry *dparent, int namelen) #else up(&dparent->d_inode->i_sem); #endif + + if (IS_ERR(dchild) || dchild->d_inode == NULL) + return dchild; + + if (is_bad_inode(dchild->d_inode)) { + CERROR("bad inode returned %lu/%u\n", + dchild->d_inode->i_ino, dchild->d_inode->i_generation); + dput(dchild); + dchild = ERR_PTR(-ENOENT); + } + return dchild; } diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index a7f8b5f..9727313 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -205,6 +205,7 @@ struct filter_obd { const char *fo_fstype; struct super_block *fo_sb; struct vfsmount *fo_vfsmnt; + struct lvfs_obd_ctxt *fo_lvfs_ctxt; int fo_group_count; struct dentry *fo_dentry_O; /* the "O"bject directory dentry */ @@ -315,8 +316,10 @@ struct client_obd { struct obd_service_time cl_enter_stime; struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_setattr_lock; + struct mdc_rpc_lock *cl_setattr_lock; + struct mdc_rpc_lock *cl_close_lock; struct osc_creator cl_oscc; + int cl_async:1; }; /* Like a client, with some hangers-on. Keep mc_client_obd first so that we @@ -338,6 +341,7 @@ struct mds_obd { struct super_block *mds_sb; struct vfsmount *mds_vfsmnt; struct dentry *mds_id_de; + struct lvfs_obd_ctxt *mds_lvfs_ctxt; int mds_max_mdsize; int mds_max_cookiesize; struct file *mds_rcvd_filp; @@ -388,6 +392,7 @@ struct mds_obd { gid_t mds_squash_gid; ptl_nid_t mds_nosquash_nid; atomic_t mds_real_clients; + atomic_t mds_open_count; struct dentry *mds_id_dir; int mds_obd_type; struct dentry *mds_unnamed_dir; /* for mdt_obd_create only */ @@ -468,7 +473,17 @@ struct cm_obd { int master_group; struct cmobd_write_service *write_srv; }; - + +struct conf_obd { + struct super_block *cfobd_sb; + struct vfsmount *cfobd_vfsmnt; + struct dentry *cfobd_logs_dir; + struct dentry *cfobd_objects_dir; + struct dentry *cfobd_pending_dir; + struct llog_handle *cfobd_cfg_llh; + struct lvfs_obd_ctxt *cfobd_lvfs_ctxt; +}; + struct lov_tgt_desc { struct obd_uuid uuid; __u32 ltd_gen; @@ -481,7 +496,7 @@ struct lov_obd { struct lov_desc desc; int bufsize; int refcount; - int lo_catalog_loaded:1; + int lo_catalog_loaded:1, async:1; struct semaphore lov_llog_sem; unsigned long lov_connect_flags; struct lov_tgt_desc *tgts; @@ -649,7 +664,7 @@ struct obd_device { struct mds_obd mds; struct client_obd cli; struct ost_obd ost; - struct echo_client_obd echo_client; + struct echo_client_obd echocli; struct echo_obd echo; struct recovd_obd recovd; struct lov_obd lov; @@ -658,6 +673,7 @@ struct obd_device { struct mgmtcli_obd mgmtcli; struct lmv_obd lmv; struct cm_obd cm; + struct conf_obd conf; } u; /* fields used by LProcFS */ @@ -761,6 +777,8 @@ struct obd_ops { int (*o_teardown_async_page)(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_oinfo *loi, void *cookie); + int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm, + obd_off size, int shrink); int (*o_punch)(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *ea, obd_size start, obd_size end, struct obd_trans_info *oti); diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 6bb4dca..66cecd7 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -89,8 +89,6 @@ struct config_llog_instance { struct obd_uuid cfg_uuid; ptl_nid_t cfg_local_nid; }; -int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, - struct config_llog_instance *cfg); int class_config_process_llog(struct llog_ctxt *ctxt, char *name, struct config_llog_instance *cfg); @@ -175,103 +173,24 @@ void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_valid valid); int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_valid compare); void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj); -static inline int obd_check_conn(struct lustre_handle *conn) -{ - struct obd_device *obd; - if (!conn) { - CERROR("NULL conn\n"); - RETURN(-ENOTCONN); - } - - obd = class_conn2obd(conn); - if (!obd) { - CERROR("NULL obd\n"); - RETURN(-ENODEV); - } - - if (!obd->obd_attached) { - CERROR("obd %d not attached\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_set_up) { - CERROR("obd %d not setup\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_type) { - CERROR("obd %d not typed\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_type->typ_ops) { - CERROR("obd_check_conn: obd %d no operations\n", - obd->obd_minor); - RETURN(-EOPNOTSUPP); - } - return 0; -} - - #define OBT(dev) (dev)->obd_type #define OBP(dev, op) (dev)->obd_type->typ_ops->o_ ## op #define MDP(dev, op) (dev)->obd_type->typ_md_ops->m_ ## op #define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op -/* Ensure obd_setup: used for disconnect which might be called while - an obd is stopping. */ -#define OBD_CHECK_SETUP(conn, exp) \ -do { \ - if (!(conn)) { \ - CERROR("NULL connection\n"); \ - RETURN(-EINVAL); \ - } \ - \ - exp = class_conn2export(conn); \ - if (!(exp)) { \ - CERROR("No export for conn "LPX64"\n", (conn)->cookie); \ - RETURN(-EINVAL); \ - } \ - \ - if (!(exp)->exp_obd->obd_set_up) { \ - CERROR("Device %d not setup\n", \ - (exp)->exp_obd->obd_minor); \ - class_export_put(exp); \ - RETURN(-EINVAL); \ - } \ -} while (0) - -/* Ensure obd_setup and !obd_stopping. */ -#define OBD_CHECK_ACTIVE(conn, exp) \ -do { \ - if (!(conn)) { \ - CERROR("NULL connection\n"); \ - RETURN(-EINVAL); \ - } \ - \ - exp = class_conn2export(conn); \ - if (!(exp)) { \ - CERROR("No export for conn "LPX64"\n", (conn)->cookie); \ - RETURN(-EINVAL); \ - } \ - \ - if (!(exp)->exp_obd->obd_set_up || (exp)->exp_obd->obd_stopping) { \ - CERROR("Device %d not setup\n", \ - (exp)->exp_obd->obd_minor); \ - class_export_put(exp); \ - RETURN(-EINVAL); \ - } \ -} while (0) - /* Ensure obd_setup: used for cleanup which must be called while obd is stopping */ -#define OBD_CHECK_DEV_STOPPING(obd) \ +#define OBD_CHECK_DEV(obd) \ do { \ if (!(obd)) { \ CERROR("NULL device\n"); \ RETURN(-ENODEV); \ } \ - \ +} while (0) + +#define OBD_CHECK_DEV_STOPPING(obd) \ +do { \ + OBD_CHECK_DEV(obd); \ if (!(obd)->obd_set_up) { \ CERROR("Device %d not setup\n", \ (obd)->obd_minor); \ @@ -288,11 +207,7 @@ do { \ /* ensure obd_setup and !obd_stopping */ #define OBD_CHECK_DEV_ACTIVE(obd) \ do { \ - if (!(obd)) { \ - CERROR("NULL device\n"); \ - RETURN(-ENODEV); \ - } \ - \ + OBD_CHECK_DEV(obd); \ if (!(obd)->obd_set_up || (obd)->obd_stopping) { \ CERROR("Device %d not setup\n", \ (obd)->obd_minor); \ @@ -313,7 +228,7 @@ do { \ coffset = (unsigned int)(obd)->obd_cntr_base + \ OBD_COUNTER_OFFSET(op); \ LASSERT(coffset < (obd)->obd_stats->ls_num); \ - lprocfs_counter_incr((obd)->obd_stats, coffset); \ + lprocfs_counter_incr((obd)->obd_stats, coffset);\ } #define MD_COUNTER_OFFSET(op) \ @@ -340,8 +255,8 @@ do { \ do { \ if (!OBT(obd) || !MDP((obd), op)) {\ if (err) \ - CERROR("obd_md" #op ": dev %d no operation\n", \ - obd->obd_minor); \ + CERROR("md_" #op ": dev %s/%d no operation\n", \ + obd->obd_name, obd->obd_minor); \ RETURN(err); \ } \ } while (0) @@ -358,8 +273,9 @@ do { \ RETURN(-EOPNOTSUPP); \ } \ if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - (exp)->exp_obd->obd_minor); \ + CERROR("obd_" #op ": dev %s/%d no operation\n", \ + (exp)->exp_obd->obd_name, \ + (exp)->exp_obd->obd_minor); \ RETURN(-EOPNOTSUPP); \ } \ } while (0) @@ -368,8 +284,8 @@ do { \ do { \ if (!OBT(obd) || !OBP((obd), op)) {\ if (err) \ - CERROR("obd_" #op ": dev %d no operation\n", \ - obd->obd_minor); \ + CERROR("obd_" #op ": dev %s/%d no operation\n", \ + obd->obd_name, obd->obd_minor); \ RETURN(err); \ } \ } while (0) @@ -386,8 +302,9 @@ do { \ RETURN(-EOPNOTSUPP); \ } \ if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - (exp)->exp_obd->obd_minor); \ + CERROR("obd_" #op ": dev %s/%d no operation\n", \ + (exp)->exp_obd->obd_name, \ + (exp)->exp_obd->obd_minor); \ RETURN(-EOPNOTSUPP); \ } \ } while (0) @@ -1035,6 +952,20 @@ static inline int obd_write_extents(struct obd_export *exp, RETURN(rc); } +static inline int obd_adjust_kms(struct obd_export *exp, + struct lov_stripe_md *lsm, + obd_off size, int shrink) +{ + int rc; + ENTRY; + + OBD_CHECK_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms); + + rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink); + RETURN(rc); +} + static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -1169,6 +1100,11 @@ static inline void obd_import_event(struct obd_device *obd, struct obd_import *imp, enum obd_import_event event) { + if (!obd) { + CERROR("NULL device\n"); + EXIT; + return; + } if (obd->obd_set_up && OBP(obd, import_event)) { OBD_COUNTER_INCREMENT(obd, import_event); OBP(obd, import_event)(obd, imp, event); @@ -1186,6 +1122,7 @@ static inline int obd_llog_connect(struct obd_export *exp, static inline int obd_notify(struct obd_device *obd, struct obd_device *watched, int active, void *data) { + OBD_CHECK_DEV(obd); if (!obd->obd_set_up) { CERROR("obd %s not set up\n", obd->obd_name); return -EINVAL; @@ -1204,6 +1141,7 @@ static inline int obd_register_observer(struct obd_device *obd, struct obd_device *observer) { ENTRY; + OBD_CHECK_DEV(obd); if (obd->obd_observer && observer) RETURN(-EALREADY); obd->obd_observer = observer; @@ -1534,8 +1472,6 @@ static inline struct obdo *obdo_alloc(void) static inline void obdo_free(struct obdo *oa) { - if (!oa) - return; OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa)); } diff --git a/lustre/include/linux/obd_lov.h b/lustre/include/linux/obd_lov.h index cf3ccec..9692a9b 100644 --- a/lustre/include/linux/obd_lov.h +++ b/lustre/include/linux/obd_lov.h @@ -7,21 +7,6 @@ #define OBD_LOV_DEVICENAME "lov" -struct lov_brw_async_args { - struct lov_stripe_md *aa_lsm; - struct obdo *aa_obdos; - struct obdo *aa_oa; - struct brw_page *aa_ioarr; - obd_count aa_oa_bufs; -}; - -struct lov_getattr_async_args { - struct lov_stripe_md *aa_lsm; - struct obdo *aa_oa; - struct obdo *aa_obdos; - struct lov_obd *aa_lov; -}; - static inline int lov_stripe_md_size(int stripes) { return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo); diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 64db5f7..06e6144 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -33,14 +33,15 @@ #include /* global variables */ -extern atomic_t obd_memory; extern int obd_memmax; +extern atomic_t obd_memory; + extern unsigned int obd_fail_loc; -extern unsigned int obd_dump_on_timeout; extern unsigned int obd_timeout; extern unsigned int ldlm_timeout; extern char obd_lustre_upcall[128]; extern unsigned int obd_sync_filter; +extern unsigned int obd_dump_on_timeout; extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_MDS 0x100 @@ -84,6 +85,9 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 #define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 #define OBD_FAIL_MDS_ALLOC_OBDO 0x128 +#define OBD_FAIL_MDS_PAUSE_OPEN 0x129 +#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a +#define OBD_FAIL_MDS_OPEN_CREATE 0x12b #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -153,6 +157,8 @@ extern wait_queue_head_t obd_race_waitq; #define OBD_FAIL_SVCGSS_INIT_REQ 0x780 #define OBD_FAIL_SVCGSS_INIT_REP 0x781 +#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 + /* preparation for a more advanced failure testbed (not functional yet) */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) @@ -185,6 +191,13 @@ do { \ } \ } while(0) +#define OBD_FAIL_GOTO(id, label, ret) \ +do { \ + if (OBD_FAIL_CHECK_ONCE(id)) { \ + GOTO(label, (ret)); \ + } \ +} while(0) + #define OBD_FAIL_TIMEOUT(id, secs) \ do { \ if (OBD_FAIL_CHECK_ONCE(id)) { \ @@ -262,22 +275,119 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) extern atomic_t portal_kmemory; -#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ -do { \ - (ptr) = kmalloc(size, (gfp_mask)); \ - if ((ptr) == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) +#define MEM_LOC_LEN 128 + +struct mtrack { + struct hlist_node m_hash; + char m_loc[MEM_LOC_LEN]; + void *m_ptr; + int m_size; +}; + +void lvfs_memdbg_insert(struct mtrack *mt); +void lvfs_memdbg_remove(struct mtrack *mt); +struct mtrack *lvfs_memdbg_find(void *ptr); + +int lvfs_memdbg_check_insert(struct mtrack *mt); +struct mtrack *lvfs_memdbg_check_remove(void *ptr); + +static inline struct mtrack * +__new_mtrack(void *ptr, int size, + char *file, int line) +{ + struct mtrack *mt; + + mt = kmalloc(sizeof(*mt), GFP_KERNEL); + if (!mt) + return NULL; + + snprintf(mt->m_loc, sizeof(mt->m_loc) - 1, + "%s:%d", file, line); + + mt->m_size = size; + mt->m_ptr = ptr; + return mt; +} + +static inline void +__free_mtrack(struct mtrack *mt) +{ + kfree(mt); +} + +static inline int +__get_mtrack(void *ptr, int size, + char *file, int line) +{ + struct mtrack *mt; + + mt = __new_mtrack(ptr, size, file, line); + if (!mt) { + CWARN("can't allocate new memory track\n"); + return 0; + } + + if (!lvfs_memdbg_check_insert(mt)) + __free_mtrack(mt); + + return 1; +} + +static inline int +__put_mtrack(void *ptr, int size, + char *file, int line) +{ + struct mtrack *mt; + + if (!(mt = lvfs_memdbg_check_remove(ptr))) { + CWARN("ptr 0x%p is not allocated. Attempt to free " + "not allocated memory at %s:%d\n", ptr, + file, line); + return 0; + } else { + if (mt->m_size != size) { + CWARN("freeing memory chunk of different size " + "than allocated (%d != %d) at %s:%d\n", + mt->m_size, size, file, line); + } + __free_mtrack(mt); + return 1; + } +} + +#define get_mtrack(ptr, size, file, line) \ + __get_mtrack((ptr), (size), (file), (line)) + +#define put_mtrack(ptr, size, file, line) \ + __put_mtrack((ptr), (size), (file), (line)) + +#else /* !CONFIG_DEBUG_MEMORY */ + +#define get_mtrack(ptr, size, file, line) \ + do {} while (0) + +#define put_mtrack(ptr, size, file, line) \ + do {} while (0) +#endif /* !CONFIG_DEBUG_MEMORY */ + +#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ +do { \ + (ptr) = kmalloc(size, (gfp_mask)); \ + if ((ptr) == NULL) { \ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&portal_kmemory)); \ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + get_mtrack((ptr), (size), __FILE__, __LINE__); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), (ptr), atomic_read(&obd_memory)); \ + } \ } while (0) #ifndef OBD_GFP_MASK @@ -290,22 +400,23 @@ do { \ #ifdef __arch_um__ # define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size) #else -# define OBD_VMALLOC(ptr, size) \ -do { \ - (ptr) = vmalloc(size); \ - if ((ptr) == NULL) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ +# define OBD_VMALLOC(ptr, size) \ +do { \ + (ptr) = vmalloc(size); \ + if ((ptr) == NULL) { \ + CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&portal_kmemory)); \ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + get_mtrack((ptr), (size), __FILE__, __LINE__); \ + CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ } while (0) #endif @@ -316,69 +427,75 @@ do { \ #endif #if POISON_BULK -#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE); \ +#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE); \ kunmap(page); } while (0) #else #define POISON_PAGE(page, val) do { } while (0) #endif -#define OBD_FREE(ptr, size) \ -do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ - kfree(ptr); \ - (ptr) = (void *)0xdeadbeef; \ +#define OBD_FREE(ptr, size) \ +do { \ + LASSERT(ptr); \ + put_mtrack((ptr), (size), __FILE__, __LINE__); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size); \ + kfree(ptr); \ + (ptr) = (void *)0xdeadbeef; \ } while (0) #ifdef __arch_um__ # define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) #else -# define OBD_VFREE(ptr, size) \ -do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ - vfree(ptr); \ - (ptr) = (void *)0xdeadbeef; \ +# define OBD_VFREE(ptr, size) \ +do { \ + LASSERT(ptr); \ + put_mtrack((ptr), (size), __FILE__, __LINE__); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size); \ + vfree(ptr); \ + (ptr) = (void *)0xdeadbeef; \ } while (0) #endif -/* we memset() the slab object to 0 when allocation succeeds, so DO NOT - * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd - * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */ -#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ -do { \ - LASSERT(!in_interrupt()); \ - (ptr) = kmem_cache_alloc(slab, (type)); \ - if ((ptr) == NULL) { \ - CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&portal_kmemory));\ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ +/* + * we memset() the slab object to 0 when allocation succeeds, so DO NOT HAVE A + * CTOR THAT DOES ANYTHING. Its work will be cleared here. We'd love to assert + * on that, but slab.c keeps kmem_cache_s all to itself. + */ +#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ +do { \ + LASSERT(!in_interrupt()); \ + (ptr) = kmem_cache_alloc(slab, (type)); \ + if ((ptr) == NULL) { \ + CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&portal_kmemory)); \ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + get_mtrack((ptr), (size), __FILE__, __LINE__); \ + CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ } while (0) -#define OBD_SLAB_FREE(ptr, slab, size) \ -do { \ - LASSERT(ptr); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - atomic_sub(size, &obd_memory); \ - POISON(ptr, 0x5a, size); \ - kmem_cache_free(slab, ptr); \ - (ptr) = (void *)0xdeadbeef; \ +#define OBD_SLAB_FREE(ptr, slab, size) \ +do { \ + LASSERT(ptr); \ + CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + put_mtrack((ptr), (size), __FILE__, __LINE__); \ + atomic_sub(size, &obd_memory); \ + POISON(ptr, 0x5a, size); \ + kmem_cache_free(slab, ptr); \ + (ptr) = (void *)0xdeadbeef; \ } while (0) #endif diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index e1e758f..c3de92a 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -22,7 +22,13 @@ */ #ifndef _LUSTRE_USER_H #define _LUSTRE_USER_H + +#ifdef HAVE_ASM_TYPES_H #include +#else +#include "types.h" +#endif + #ifdef __KERNEL__ #include #else @@ -32,18 +38,17 @@ /* for statfs() */ #define LL_SUPER_MAGIC 0x0BD00BD0 - #define IOC_MDC_TYPE 'i' #define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) #define IOC_MDC_SHOWFID _IOWR(IOC_MDC_TYPE, 23, struct lustre_id *) #ifndef EXT3_IOC_GETFLAGS -#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) -#define EXT3_IOC_SETFLAGS _IOW('f', 2, long) -#define EXT3_IOC_GETVERSION _IOR('f', 3, long) -#define EXT3_IOC_SETVERSION _IOW('f', 4, long) -#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) -#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) +#define EXT3_IOC_GETFLAGS _IOR('f', 1, long) +#define EXT3_IOC_SETFLAGS _IOW('f', 2, long) +#define EXT3_IOC_GETVERSION _IOR('f', 3, long) +#define EXT3_IOC_SETVERSION _IOW('f', 4, long) +#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) +#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) #endif #define LL_IOC_GETFLAGS _IOR ('f', 151, long) diff --git a/lustre/include/types.h b/lustre/include/types.h new file mode 100644 index 0000000..5389d37 --- /dev/null +++ b/lustre/include/types.h @@ -0,0 +1,27 @@ +#ifndef _LUSTRE_TYPES_H +#define _LUSTRE_TYPES_H + +typedef unsigned short umode_t; + +#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \ + !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H)) + +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif + +#endif diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config new file mode 100644 index 0000000..a8afabf --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config @@ -0,0 +1,1424 @@ +# +# Automatically generated make config: don't edit +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# General setup +# +CONFIG_IA64=y +# CONFIG_HIGHPTE is not set +CONFIG_HIGHMEM=y +CONFIG_HIGHIO=y +# CONFIG_ISA is not set +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_SBUS is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +CONFIG_IA64_GENERIC=y +# CONFIG_IA64_DIG is not set +# CONFIG_IA64_HP_SIM is not set +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_SGI_SN1 is not set +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +CONFIG_IA64_MCA=y +CONFIG_PM=y +CONFIG_KCORE_ELF=y +CONFIG_FORCE_MAX_ZONEORDER=15 +# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set +# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set +CONFIG_HUGETLB_PAGE_SIZE_256MB=y +# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set +# CONFIG_IA64_PAL_IDLE is not set +CONFIG_SMP=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_EFI_VARS=y +CONFIG_IA64_CYCLONE=y +CONFIG_NET=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_AUDIT=m +CONFIG_ACPI=y +CONFIG_ACPI_EFI=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_KERNEL_CONFIG=y + +# +# ACPI Support +# +CONFIG_ACPI_PCI=y +CONFIG_ACPI=y +CONFIG_ACPI_EFI=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_BUS=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_SYSTEM=y +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +# CONFIG_ACPI_DEBUG is not set +CONFIG_PCI=y +CONFIG_PCI_NAMES=y +CONFIG_HOTPLUG=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_COMPAQ is not set +# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set +CONFIG_HOTPLUG_PCI_ACPI=m + +# +# PCMCIA/CardBus support +# +# CONFIG_PCMCIA is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_TUX=m +CONFIG_TUX_EXTCGI=y +# CONFIG_TUX_EXTENDED_LOG is not set +# CONFIG_TUX_DEBUG is not set +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_LIMIT=m +CONFIG_IP_NF_MATCH_MAC=m +CONFIG_IP_NF_MATCH_PKTTYPE=m +CONFIG_IP_NF_MATCH_MARK=m +CONFIG_IP_NF_MATCH_MULTIPORT=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_DSCP=m +CONFIG_IP_NF_MATCH_AH_ESP=m +CONFIG_IP_NF_MATCH_LENGTH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_TCPMSS=m +CONFIG_IP_NF_MATCH_HELPER=m +CONFIG_IP_NF_MATCH_STATE=m +CONFIG_IP_NF_MATCH_CONNTRACK=m +CONFIG_IP_NF_MATCH_UNCLEAN=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_MIRROR=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_LOCAL=y +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_DSCP=m +CONFIG_IP_NF_TARGET_MARK=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_IP_NF_COMPAT_IPCHAINS=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_COMPAT_IPFWADM=m +CONFIG_IP_NF_NAT_NEEDED=y + +# +# IP: Virtual Server Configuration +# +CONFIG_IP_VS=m +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=16 + +# +# IPVS scheduler +# +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m + +# +# IPVS application helper +# +CONFIG_IP_VS_FTP=m +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y + +# +# IPv6: Netfilter Configuration +# +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_LIMIT=m +CONFIG_IP6_NF_MATCH_MAC=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_MULTIPORT=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_MARK=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AHESP=m +CONFIG_IP6_NF_MATCH_LENGTH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_MARK=m +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_TUNNEL=m +CONFIG_XFRM=y +CONFIG_XFRM_USER=y +# CONFIG_KHTTPD is not set +# CONFIG_ATM is not set +CONFIG_VLAN_8021Q=m + +# +# +# +CONFIG_IPX=m +# CONFIG_IPX_INTERN is not set +CONFIG_ATALK=m + +# +# Appletalk devices +# +CONFIG_DEV_APPLETALK=y +CONFIG_COPS_DAYNA=y +CONFIG_COPS_TANGENT=y +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_DECNET=m +CONFIG_DECNET_SIOCGIFCONF=y +CONFIG_DECNET_ROUTER=y +CONFIG_DECNET_ROUTE_FWMARK=y +CONFIG_BRIDGE=m +# CONFIG_X25 is not set +CONFIG_EDP2=m +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +CONFIG_NET_DIVERT=y +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_CSZ=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_QOS=y +CONFIG_NET_ESTIMATOR=y +CONFIG_NET_CLS=y +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_POLICE=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set +# CONFIG_ISAPNP is not set +# CONFIG_PNPBIOS is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +CONFIG_BLK_CPQ_CISS_DA=m +CONFIG_CISS_SCSI_TAPE=y +# CONFIG_CISS_MONITOR_THREAD is not set +CONFIG_BLK_DEV_DAC960=m +CONFIG_BLK_DEV_UMEM=m +CONFIG_BLK_DEV_SX8=m +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_STATS=y +CONFIG_DISKDUMP=m + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +CONFIG_IEEE1394=m + +# +# Device Drivers +# + +# +# Texas Instruments PCILynx requires I2C bit-banging +# +CONFIG_IEEE1394_OHCI1394=m + +# +# Protocol Drivers +# +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +CONFIG_IEEE1394_SBP2_PHYS_DMA=y +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=m +CONFIG_IEEE1394_CMP=m +CONFIG_IEEE1394_AMDTP=m +# CONFIG_IEEE1394_VERBOSEDEBUG is not set + +# +# I2O device support +# +CONFIG_I2O=m +CONFIG_I2O_PCI=m +CONFIG_I2O_BLOCK=m +CONFIG_I2O_LAN=m +CONFIG_I2O_SCSI=m +CONFIG_I2O_PROC=m + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID5=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_LVM=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=m +# CONFIG_FUSION_BOOT is not set +CONFIG_FUSION_MAX_SGE=40 +# CONFIG_FUSION_ISENSE is not set +CONFIG_FUSION_CTL=m +CONFIG_FUSION_LAN=m +CONFIG_NET_FC=y + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_IDEDISK_STROKE is not set +# CONFIG_BLK_DEV_IDECS is not set +CONFIG_BLK_DEV_IDECD=m +CONFIG_BLK_DEV_IDETAPE=m +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_CMD640_ENHANCED is not set +# CONFIG_BLK_DEV_ISAPNP is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_PCI_WIP is not set +CONFIG_BLK_DEV_ADMA100=y +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_AMD74XX_OVERRIDE is not set +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_TRIFLEX=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5530=y +CONFIG_BLK_DEV_HPT34X=y +# CONFIG_HPT34X_AUTODMA is not set +CONFIG_BLK_DEV_HPT366=y +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_PDC202XX_OLD=y +# CONFIG_PDC202XX_BURST is not set +CONFIG_BLK_DEV_PDC202XX_NEW=y +CONFIG_PDC202XX_FORCE=y +# CONFIG_BLK_DEV_RZ1000 is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_SVWKS=y +CONFIG_BLK_DEV_SIIMAGE=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_IDE_CHIPSETS is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set +CONFIG_BLK_DEV_PDC202XX=y +CONFIG_BLK_DEV_IDE_MODES=y +CONFIG_BLK_DEV_ATARAID=m +CONFIG_BLK_DEV_ATARAID_PDC=m +CONFIG_BLK_DEV_ATARAID_HPT=m +CONFIG_BLK_DEV_ATARAID_SII=m + +# +# SCSI support +# +CONFIG_SCSI=m + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=m +CONFIG_SD_EXTRA_DEVS=256 +CONFIG_SD_IOSTATS=y +CONFIG_SCSI_DUMP=m +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_SR_EXTRA_DEVS=4 +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_DEBUG_QUEUES=y +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI low-level drivers +# +CONFIG_BLK_DEV_3W_XXXX_RAID=m +# CONFIG_SCSI_7000FASST is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AHA1542 is not set +# CONFIG_SCSI_AHA1740 is not set +CONFIG_SCSI_AACRAID=m +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 +# CONFIG_AIC7XXX_PROBE_EISA_VL is not set +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_AIC7XXX_DEBUG_MASK=0 +# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set +CONFIG_SCSI_AIC79XX=m +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=15000 +# CONFIG_AIC79XX_BUILD_FIRMWARE is not set +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +CONFIG_SCSI_AIC7XXX_OLD=m +CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y +CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_OLD_PROC_STATS=y +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_IN2000 is not set +# CONFIG_SCSI_AM53C974 is not set +CONFIG_SCSI_MEGARAID=m +CONFIG_SCSI_MEGARAID2=m +CONFIG_SCSI_SATA=y +CONFIG_SCSI_SATA_SVW=m +CONFIG_SCSI_ATA_PIIX=m +CONFIG_SCSI_SATA_NV=m +CONFIG_SCSI_SATA_PROMISE=m +CONFIG_SCSI_SATA_SX4=m +CONFIG_SCSI_SATA_SIL=m +CONFIG_SCSI_SATA_SIS=m +CONFIG_SCSI_SATA_VIA=m +CONFIG_SCSI_SATA_VITESSE=m +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_DTC3280 is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_DMA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +CONFIG_SCSI_GDTH=m +# CONFIG_SCSI_GENERIC_NCR5380 is not set +CONFIG_SCSI_IPS=m +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_NCR53C406A is not set +# CONFIG_SCSI_NCR53C7xx is not set +CONFIG_SCSI_SYM53C8XX_2=m +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_NCR53C8XX=m +CONFIG_SCSI_SYM53C8XX=m +CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 +CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 +CONFIG_SCSI_NCR53C8XX_SYNC=40 +# CONFIG_SCSI_NCR53C8XX_PROFILE is not set +# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set +# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set +# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set +# CONFIG_SCSI_PAS16 is not set +# CONFIG_SCSI_PCI2000 is not set +# CONFIG_SCSI_PCI2220I is not set +# CONFIG_SCSI_PSI240I is not set +CONFIG_SCSI_QLOGIC_FAS=m +CONFIG_SCSI_QLOGIC_ISP=m +CONFIG_SCSI_QLOGIC_FC=m +# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set +CONFIG_SCSI_QLOGIC_1280=m +# CONFIG_SCSI_SIM710 is not set +# CONFIG_SCSI_SYM53C416 is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_T128 is not set +# CONFIG_SCSI_U14_34F is not set +CONFIG_SCSI_NSP32=m +# CONFIG_SCSI_DEBUG is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m +CONFIG_ETHERTAP=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_SUNLANCE is not set +CONFIG_HAPPYMEAL=m +# CONFIG_SUNBMAC is not set +# CONFIG_SUNQE is not set +CONFIG_SUNGEM=m +CONFIG_NET_VENDOR_3COM=y +# CONFIG_EL1 is not set +# CONFIG_EL2 is not set +# CONFIG_ELPLUS is not set +# CONFIG_EL16 is not set +# CONFIG_ELMC is not set +# CONFIG_ELMC_II is not set +CONFIG_VORTEX=m +CONFIG_TYPHOON=m +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=m +CONFIG_AMD8111_ETH=m +CONFIG_ADAPTEC_STARFIRE=m +# CONFIG_APRICOT is not set +CONFIG_B44=m +# CONFIG_CS89x0 is not set +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +CONFIG_TULIP_MMIO=y +# CONFIG_DE4X5 is not set +# CONFIG_DGRS is not set +CONFIG_DM9102=m +CONFIG_EEPRO100=m +# CONFIG_EEPRO100_PIO is not set +CONFIG_E100=m +# CONFIG_LNE390 is not set +CONFIG_FEALNX=m +CONFIG_NATSEMI=m +CONFIG_NE2K_PCI=m +# CONFIG_NE3210 is not set +# CONFIG_ES3210 is not set +CONFIG_8139CP=m +CONFIG_8139TOO=m +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +CONFIG_8139TOO_8129=y +# CONFIG_8139_OLD_RX_RESET is not set +CONFIG_SIS900=m +CONFIG_EPIC100=m +# CONFIG_SUNDANCE is not set +# CONFIG_SUNDANCE_MMIO is not set +CONFIG_TLAN=m +CONFIG_TC35815=m +CONFIG_VIA_RHINE=m +# CONFIG_VIA_RHINE_MMIO is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +CONFIG_DL2K=m +CONFIG_E1000=m +CONFIG_E1000_NAPI=y +# CONFIG_MYRI_SBUS is not set +CONFIG_NS83820=m +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_R8169=m +CONFIG_SK98LIN=m +CONFIG_TIGON3=m +# CONFIG_FDDI is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPPOE is not set +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +CONFIG_TR=y +CONFIG_IBMOL=m +CONFIG_IBMLS=m +CONFIG_3C359=m +# CONFIG_TMS380TR is not set +CONFIG_NET_FC=y +CONFIG_IPHASE5526=m +# CONFIG_RCPCI is not set +CONFIG_SHAPER=m + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +CONFIG_INPUT=m +CONFIG_INPUT_KEYBDEV=m +CONFIG_INPUT_MOUSEDEV=m +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_EVDEV=m + +# +# Character devices +# +CONFIG_VT=y +# CONFIG_ECC is not set +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SERIAL_HCDP=y +CONFIG_SERIAL_ACPI=y +CONFIG_HP_DIVA=y +CONFIG_SERIAL_EXTENDED=y +CONFIG_SERIAL_MANY_PORTS=y +CONFIG_SERIAL_SHARE_IRQ=y +# CONFIG_SERIAL_DETECT_IRQ is not set +CONFIG_SERIAL_MULTIPORT=y +# CONFIG_HUB6 is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=2048 + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +CONFIG_MK712_MOUSE=m + +# +# Joysticks +# +CONFIG_INPUT_GAMEPORT=m +CONFIG_INPUT_NS558=m +CONFIG_INPUT_LIGHTNING=m +CONFIG_INPUT_PCIGAME=m +CONFIG_INPUT_CS461X=m +CONFIG_INPUT_EMU10K1=m +CONFIG_INPUT_SERIO=m +CONFIG_INPUT_SERPORT=m + +# +# Joysticks +# +CONFIG_INPUT_ANALOG=m +CONFIG_INPUT_A3D=m +CONFIG_INPUT_ADI=m +CONFIG_INPUT_COBRA=m +CONFIG_INPUT_GF2K=m +CONFIG_INPUT_GRIP=m +CONFIG_INPUT_INTERACT=m +CONFIG_INPUT_TMDC=m +CONFIG_INPUT_SIDEWINDER=m +CONFIG_INPUT_IFORCE_USB=m +CONFIG_INPUT_IFORCE_232=m +CONFIG_INPUT_WARRIOR=m +CONFIG_INPUT_MAGELLAN=m +CONFIG_INPUT_SPACEORB=m +CONFIG_INPUT_SPACEBALL=m +CONFIG_INPUT_STINGER=m +# CONFIG_INPUT_DB9 is not set +# CONFIG_INPUT_GAMECON is not set +# CONFIG_INPUT_TURBOGRAFX is not set +# CONFIG_QIC02_TAPE is not set +CONFIG_IPMI_HANDLER=m +# CONFIG_IPMI_PANIC_EVENT is not set +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_KCS=m +CONFIG_IPMI_WATCHDOG=m + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +CONFIG_HANGCHECK_TIMER=m +CONFIG_HANGCHECK_DELAY=m +# CONFIG_SCx200_GPIO is not set +CONFIG_INTEL_RNG=m +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +CONFIG_AGP=m +CONFIG_AGP_INTEL=y +# CONFIG_AGP_I810 is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD_8151=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_SWORKS is not set +CONFIG_AGP_I460=y +CONFIG_AGP_HP_ZX1=y +CONFIG_DRM=y +# CONFIG_DRM_OLD is not set + +# +# DRM 4.1 drivers +# +CONFIG_DRM_NEW=y +CONFIG_DRM_TDFX=m +CONFIG_DRM_GAMMA=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I810_XFREE_41 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_MGA=m +# CONFIG_DRM_SIS is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +CONFIG_QUOTA=y +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +# CONFIG_QIFACE_COMPAT is not set +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +CONFIG_REISERFS_PROC_INFO=y +# CONFIG_ADFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +CONFIG_HFS_FS=m +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BFS_FS is not set +CONFIG_EXT3_FS=m +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_XATTR_SHARING=y +CONFIG_EXT3_FS_XATTR_USER=y +CONFIG_EXT3_FS_XATTR_TRUSTED=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_JBD=m +# CONFIG_JBD_DEBUG is not set +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_UMSDOS_FS=m +CONFIG_VFAT_FS=m +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_CRAMFS=m +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_DEBUG=y +# CONFIG_JFS_STATISTICS is not set +CONFIG_MINIX_FS=m +CONFIG_VXFS_FS=m +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +CONFIG_ROMFS_FS=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_XATTR_SHARING=y +CONFIG_EXT2_FS_XATTR_USER=y +CONFIG_SYSV_FS=m +CONFIG_UDF_FS=m +CONFIG_UDF_RW=y +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +CONFIG_CODA_FS=m +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFS_ACL=y +# CONFIG_ROOT_NFS is not set +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_ACL=y +CONFIG_NFSD_TCP=y +CONFIG_SUNRPC=m +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_SMALLDOS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_ZISOFS_FS=y +CONFIG_FS_MBCACHE=y +CONFIG_FS_POSIX_ACL=y +CONFIG_FS_MBCACHE=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_SMB_NLS=y +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y + +# +# Frame-buffer support +# +CONFIG_FB=y +CONFIG_DUMMY_CONSOLE=y +# CONFIG_FB_RIVA is not set +# CONFIG_FB_CLGEN is not set +# CONFIG_FB_PM2 is not set +CONFIG_FB_PM3=m +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_VGA16=m +CONFIG_FB_MATROX=m +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G450=y +CONFIG_FB_MATROX_G100=y +# CONFIG_FB_MATROX_PROC is not set +CONFIG_FB_MATROX_MULTIHEAD=y +# CONFIG_FB_ATY is not set +# CONFIG_FB_RADEON is not set +CONFIG_FB_ATY128=m +# CONFIG_FB_INTEL is not set +# CONFIG_FB_SIS is not set +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_3DFX=m +CONFIG_FB_VOODOO1=m +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FBCON_ADVANCED is not set +CONFIG_FBCON_CFB8=y +CONFIG_FBCON_CFB16=y +CONFIG_FBCON_CFB24=y +CONFIG_FBCON_CFB32=y +CONFIG_FBCON_VGA_PLANES=m +CONFIG_FBCON_HGA=m +# CONFIG_FBCON_FONTWIDTH8_ONLY is not set +# CONFIG_FBCON_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_PCI_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=m +CONFIG_SOUND_ALI5455=m +# CONFIG_SOUND_BT878 is not set +CONFIG_SOUND_CMPCI=m +CONFIG_SOUND_CMPCI_FM=y +CONFIG_SOUND_CMPCI_FMIO=388 +CONFIG_SOUND_CMPCI_FMIO=388 +CONFIG_SOUND_CMPCI_MIDI=y +CONFIG_SOUND_CMPCI_MPUIO=330 +CONFIG_SOUND_CMPCI_JOYSTICK=y +CONFIG_SOUND_CMPCI_CM8738=y +# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set +CONFIG_SOUND_CMPCI_SPDIFLOOP=y +CONFIG_SOUND_CMPCI_SPEAKERS=2 +CONFIG_SOUND_EMU10K1=m +CONFIG_MIDI_EMU10K1=y +CONFIG_SOUND_AUDIGY=m +CONFIG_SOUND_FUSION=m +CONFIG_SOUND_CS4281=m +CONFIG_SOUND_ES1370=m +CONFIG_SOUND_ES1371=m +CONFIG_SOUND_ESSSOLO1=m +CONFIG_SOUND_MAESTRO=m +CONFIG_SOUND_MAESTRO3=m +CONFIG_SOUND_FORTE=m +CONFIG_SOUND_ICH=m +CONFIG_SOUND_RME96XX=m +CONFIG_SOUND_SONICVIBES=m +CONFIG_SOUND_TRIDENT=m +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_VIA82CXXX=m +CONFIG_MIDI_VIA82CXXX=y +CONFIG_SOUND_OSS=m +# CONFIG_SOUND_TRACEINIT is not set +CONFIG_SOUND_DMAP=y +# CONFIG_SOUND_AD1816 is not set +CONFIG_SOUND_AD1889=m +# CONFIG_SOUND_SGALAXY is not set +# CONFIG_SOUND_ADLIB is not set +# CONFIG_SOUND_ACI_MIXER is not set +# CONFIG_SOUND_CS4232 is not set +# CONFIG_SOUND_SSCAPE is not set +# CONFIG_SOUND_GUS is not set +CONFIG_SOUND_VMIDI=m +# CONFIG_SOUND_TRIX is not set +# CONFIG_SOUND_MSS is not set +# CONFIG_SOUND_MPU401 is not set +# CONFIG_SOUND_NM256 is not set +# CONFIG_SOUND_MAD16 is not set +# CONFIG_SOUND_PAS is not set +# CONFIG_PAS_JOYSTICK is not set +# CONFIG_SOUND_PSS is not set +# CONFIG_SOUND_SB is not set +# CONFIG_SOUND_AWE32_SYNTH is not set +# CONFIG_SOUND_KAHLUA is not set +# CONFIG_SOUND_WAVEFRONT is not set +# CONFIG_SOUND_MAUI is not set +# CONFIG_SOUND_YM3812 is not set +# CONFIG_SOUND_OPL3SA1 is not set +# CONFIG_SOUND_OPL3SA2 is not set +CONFIG_SOUND_YMFPCI=m +CONFIG_SOUND_YMFPCI_LEGACY=y +# CONFIG_SOUND_UART6850 is not set +# CONFIG_SOUND_AEDSP16 is not set +# CONFIG_SOUND_TVMIXER is not set + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_UHCI=m +CONFIG_USB_UHCI_ALT=m +CONFIG_USB_OHCI=m + +# +# USB Device Class drivers +# +CONFIG_USB_AUDIO=m +# CONFIG_USB_EMI26 is not set + +# +# USB Bluetooth can only be used with disabled Bluetooth subsystem +# +CONFIG_USB_MIDI=m +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_HP8200e=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m + +# +# USB Human Interface Devices (HID) +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +CONFIG_USB_HIDDEV=y +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +CONFIG_USB_AIPTEK=m +CONFIG_USB_WACOM=m +CONFIG_USB_KBTAB=m +CONFIG_USB_POWERMATE=m + +# +# USB Imaging devices +# +# CONFIG_USB_DC2XX is not set +CONFIG_USB_MDC800=m +CONFIG_USB_SCANNER=m +CONFIG_USB_MICROTEK=m +CONFIG_USB_HPUSBSCSI=m + +# +# USB Multimedia devices +# + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network adaptors +# +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_KAWETH=m +CONFIG_USB_CATC=m +# CONFIG_USB_AX8817X is not set +CONFIG_USB_CDCETHER=m +CONFIG_USB_USBNET=m + +# +# USB port drivers +# +# CONFIG_USB_USS720 is not set + +# +# USB Serial Converter support +# +CONFIG_USB_SERIAL=m +# CONFIG_USB_SERIAL_DEBUG is not set +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_WHITEHEAT=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OMNINET=m + +# +# USB Miscellaneous drivers +# +CONFIG_USB_RIO500=m +CONFIG_USB_AUERSWALD=m +CONFIG_USB_TIGL=m +CONFIG_USB_BRLVGER=m +CONFIG_USB_LCD=m + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_TEST=m + +# +# Library routines +# +CONFIG_CRC32=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_QSORT=y + +# +# Bluetooth support +# +CONFIG_BLUEZ=m +CONFIG_BLUEZ_L2CAP=m +CONFIG_BLUEZ_SCO=m +CONFIG_BLUEZ_RFCOMM=m +CONFIG_BLUEZ_RFCOMM_TTY=y +CONFIG_BLUEZ_BNEP=m +CONFIG_BLUEZ_BNEP_MC_FILTER=y +CONFIG_BLUEZ_BNEP_PROTO_FILTER=y + +# +# Bluetooth device drivers +# +CONFIG_BLUEZ_HCIUSB=m +CONFIG_BLUEZ_USB_SCO=y +CONFIG_BLUEZ_USB_ZERO_PACKET=y +CONFIG_BLUEZ_HCIUART=m +CONFIG_BLUEZ_HCIUART_H4=y +CONFIG_BLUEZ_HCIUART_BCSP=y +CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y +# CONFIG_BLUEZ_HCIDTL1 is not set +# CONFIG_BLUEZ_HCIBT3C is not set +# CONFIG_BLUEZ_HCIBLUECARD is not set +# CONFIG_BLUEZ_HCIBTUART is not set +CONFIG_BLUEZ_HCIVHCI=m + +# +# Simulated drivers +# +# CONFIG_HP_SIMETH is not set +# CONFIG_HP_SIMSERIAL is not set +# CONFIG_HP_SIMSCSI is not set + +# +# Additional device driver support +# +CONFIG_NET_BROADCOM=m +CONFIG_CIPE=m +# CONFIG_CRYPTO_AEP is not set +CONFIG_CRYPTO_BROADCOM=m +# CONFIG_MEGARAC is not set +CONFIG_FC_QLA2100=m +CONFIG_FC_QLA2200=m +CONFIG_FC_QLA2300=m +CONFIG_SCSI_ISCSI=m +# CONFIG_SCSI_IPR is not set +CONFIG_SCSI_LPFC=m + +# +# Kernel hacking +# +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_IA64_PRINT_HAZARDS=y +# CONFIG_DISABLE_VHPT is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_IA64_EARLY_PRINTK is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_KALLSYMS=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config new file mode 100644 index 0000000..a8afabf --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config @@ -0,0 +1,1424 @@ +# +# Automatically generated make config: don't edit +# + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y + +# +# General setup +# +CONFIG_IA64=y +# CONFIG_HIGHPTE is not set +CONFIG_HIGHMEM=y +CONFIG_HIGHIO=y +# CONFIG_ISA is not set +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_SBUS is not set +CONFIG_RWSEM_GENERIC_SPINLOCK=y +# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set +# CONFIG_ITANIUM is not set +CONFIG_MCKINLEY=y +CONFIG_IA64_GENERIC=y +# CONFIG_IA64_DIG is not set +# CONFIG_IA64_HP_SIM is not set +# CONFIG_IA64_HP_ZX1 is not set +# CONFIG_IA64_SGI_SN1 is not set +# CONFIG_IA64_SGI_SN2 is not set +# CONFIG_IA64_PAGE_SIZE_4KB is not set +# CONFIG_IA64_PAGE_SIZE_8KB is not set +CONFIG_IA64_PAGE_SIZE_16KB=y +# CONFIG_IA64_PAGE_SIZE_64KB is not set +CONFIG_IA64_L1_CACHE_SHIFT=7 +CONFIG_IA64_MCA=y +CONFIG_PM=y +CONFIG_KCORE_ELF=y +CONFIG_FORCE_MAX_ZONEORDER=15 +# CONFIG_HUGETLB_PAGE_SIZE_4GB is not set +# CONFIG_HUGETLB_PAGE_SIZE_1GB is not set +CONFIG_HUGETLB_PAGE_SIZE_256MB=y +# CONFIG_HUGETLB_PAGE_SIZE_64MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_16MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_4MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_1MB is not set +# CONFIG_HUGETLB_PAGE_SIZE_256KB is not set +# CONFIG_IA64_PAL_IDLE is not set +CONFIG_SMP=y +CONFIG_IA32_SUPPORT=y +CONFIG_COMPAT=y +CONFIG_PERFMON=y +CONFIG_IA64_PALINFO=y +CONFIG_EFI_VARS=y +CONFIG_IA64_CYCLONE=y +CONFIG_NET=y +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_MISC=m +CONFIG_AUDIT=m +CONFIG_ACPI=y +CONFIG_ACPI_EFI=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_KERNEL_CONFIG=y + +# +# ACPI Support +# +CONFIG_ACPI_PCI=y +CONFIG_ACPI=y +CONFIG_ACPI_EFI=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_BUS=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_SYSTEM=y +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +# CONFIG_ACPI_DEBUG is not set +CONFIG_PCI=y +CONFIG_PCI_NAMES=y +CONFIG_HOTPLUG=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +# CONFIG_HOTPLUG_PCI_COMPAQ is not set +# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set +CONFIG_HOTPLUG_PCI_ACPI=m + +# +# PCMCIA/CardBus support +# +# CONFIG_PCMCIA is not set + +# +# Parallel port support +# +# CONFIG_PARPORT is not set + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# +# Networking options +# +CONFIG_PACKET=y +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=y +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_FILTER=y +CONFIG_UNIX=y +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_TUX=m +CONFIG_TUX_EXTCGI=y +# CONFIG_TUX_EXTENDED_LOG is not set +# CONFIG_TUX_DEBUG is not set +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +# CONFIG_INET_ECN is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_LIMIT=m +CONFIG_IP_NF_MATCH_MAC=m +CONFIG_IP_NF_MATCH_PKTTYPE=m +CONFIG_IP_NF_MATCH_MARK=m +CONFIG_IP_NF_MATCH_MULTIPORT=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_DSCP=m +CONFIG_IP_NF_MATCH_AH_ESP=m +CONFIG_IP_NF_MATCH_LENGTH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_TCPMSS=m +CONFIG_IP_NF_MATCH_HELPER=m +CONFIG_IP_NF_MATCH_STATE=m +CONFIG_IP_NF_MATCH_CONNTRACK=m +CONFIG_IP_NF_MATCH_UNCLEAN=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_TARGET_MIRROR=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_NAT_LOCAL=y +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_DSCP=m +CONFIG_IP_NF_TARGET_MARK=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_IP_NF_COMPAT_IPCHAINS=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_COMPAT_IPFWADM=m +CONFIG_IP_NF_NAT_NEEDED=y + +# +# IP: Virtual Server Configuration +# +CONFIG_IP_VS=m +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=16 + +# +# IPVS scheduler +# +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m + +# +# IPVS application helper +# +CONFIG_IP_VS_FTP=m +CONFIG_IPV6=m +CONFIG_IPV6_PRIVACY=y + +# +# IPv6: Netfilter Configuration +# +# CONFIG_IP6_NF_QUEUE is not set +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_LIMIT=m +CONFIG_IP6_NF_MATCH_MAC=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_MULTIPORT=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_MARK=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AHESP=m +CONFIG_IP6_NF_MATCH_LENGTH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_MARK=m +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_TUNNEL=m +CONFIG_XFRM=y +CONFIG_XFRM_USER=y +# CONFIG_KHTTPD is not set +# CONFIG_ATM is not set +CONFIG_VLAN_8021Q=m + +# +# +# +CONFIG_IPX=m +# CONFIG_IPX_INTERN is not set +CONFIG_ATALK=m + +# +# Appletalk devices +# +CONFIG_DEV_APPLETALK=y +CONFIG_COPS_DAYNA=y +CONFIG_COPS_TANGENT=y +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_DECNET=m +CONFIG_DECNET_SIOCGIFCONF=y +CONFIG_DECNET_ROUTER=y +CONFIG_DECNET_ROUTE_FWMARK=y +CONFIG_BRIDGE=m +# CONFIG_X25 is not set +CONFIG_EDP2=m +# CONFIG_LAPB is not set +# CONFIG_LLC is not set +CONFIG_NET_DIVERT=y +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_CSZ=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_QOS=y +CONFIG_NET_ESTIMATOR=y +CONFIG_NET_CLS=y +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_POLICE=y + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Plug and Play configuration +# +# CONFIG_PNP is not set +# CONFIG_ISAPNP is not set +# CONFIG_PNPBIOS is not set + +# +# Block devices +# +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +CONFIG_BLK_CPQ_CISS_DA=m +CONFIG_CISS_SCSI_TAPE=y +# CONFIG_CISS_MONITOR_THREAD is not set +CONFIG_BLK_DEV_DAC960=m +CONFIG_BLK_DEV_UMEM=m +CONFIG_BLK_DEV_SX8=m +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=8192 +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_STATS=y +CONFIG_DISKDUMP=m + +# +# IEEE 1394 (FireWire) support (EXPERIMENTAL) +# +CONFIG_IEEE1394=m + +# +# Device Drivers +# + +# +# Texas Instruments PCILynx requires I2C bit-banging +# +CONFIG_IEEE1394_OHCI1394=m + +# +# Protocol Drivers +# +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +CONFIG_IEEE1394_SBP2_PHYS_DMA=y +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=m +CONFIG_IEEE1394_CMP=m +CONFIG_IEEE1394_AMDTP=m +# CONFIG_IEEE1394_VERBOSEDEBUG is not set + +# +# I2O device support +# +CONFIG_I2O=m +CONFIG_I2O_PCI=m +CONFIG_I2O_BLOCK=m +CONFIG_I2O_LAN=m +CONFIG_I2O_SCSI=m +CONFIG_I2O_PROC=m + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID5=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_LVM=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=m +# CONFIG_FUSION_BOOT is not set +CONFIG_FUSION_MAX_SGE=40 +# CONFIG_FUSION_ISENSE is not set +CONFIG_FUSION_CTL=m +CONFIG_FUSION_LAN=m +CONFIG_NET_FC=y + +# +# ATA/IDE/MFM/RLL support +# +CONFIG_IDE=y + +# +# IDE, ATA and ATAPI Block devices +# +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +# CONFIG_IDEDISK_STROKE is not set +# CONFIG_BLK_DEV_IDECS is not set +CONFIG_BLK_DEV_IDECD=m +CONFIG_BLK_DEV_IDETAPE=m +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +# CONFIG_BLK_DEV_CMD640 is not set +# CONFIG_BLK_DEV_CMD640_ENHANCED is not set +# CONFIG_BLK_DEV_ISAPNP is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_PCI_WIP is not set +CONFIG_BLK_DEV_ADMA100=y +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +# CONFIG_AMD74XX_OVERRIDE is not set +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_TRIFLEX=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5530=y +CONFIG_BLK_DEV_HPT34X=y +# CONFIG_HPT34X_AUTODMA is not set +CONFIG_BLK_DEV_HPT366=y +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_OPTI621 is not set +CONFIG_BLK_DEV_PDC202XX_OLD=y +# CONFIG_PDC202XX_BURST is not set +CONFIG_BLK_DEV_PDC202XX_NEW=y +CONFIG_PDC202XX_FORCE=y +# CONFIG_BLK_DEV_RZ1000 is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_SVWKS=y +CONFIG_BLK_DEV_SIIMAGE=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +# CONFIG_BLK_DEV_TRM290 is not set +CONFIG_BLK_DEV_VIA82CXXX=y +# CONFIG_IDE_CHIPSETS is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_IDEDMA_IVB is not set +# CONFIG_DMA_NONPCI is not set +CONFIG_BLK_DEV_PDC202XX=y +CONFIG_BLK_DEV_IDE_MODES=y +CONFIG_BLK_DEV_ATARAID=m +CONFIG_BLK_DEV_ATARAID_PDC=m +CONFIG_BLK_DEV_ATARAID_HPT=m +CONFIG_BLK_DEV_ATARAID_SII=m + +# +# SCSI support +# +CONFIG_SCSI=m + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=m +CONFIG_SD_EXTRA_DEVS=256 +CONFIG_SD_IOSTATS=y +CONFIG_SCSI_DUMP=m +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +CONFIG_BLK_DEV_SR_VENDOR=y +CONFIG_SR_EXTRA_DEVS=4 +CONFIG_CHR_DEV_SG=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_DEBUG_QUEUES=y +# CONFIG_SCSI_MULTI_LUN is not set +CONFIG_SCSI_CONSTANTS=y +# CONFIG_SCSI_LOGGING is not set + +# +# SCSI low-level drivers +# +CONFIG_BLK_DEV_3W_XXXX_RAID=m +# CONFIG_SCSI_7000FASST is not set +# CONFIG_SCSI_ACARD is not set +# CONFIG_SCSI_AHA152X is not set +# CONFIG_SCSI_AHA1542 is not set +# CONFIG_SCSI_AHA1740 is not set +CONFIG_SCSI_AACRAID=m +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=15000 +# CONFIG_AIC7XXX_PROBE_EISA_VL is not set +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_AIC7XXX_DEBUG_MASK=0 +# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set +CONFIG_SCSI_AIC79XX=m +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=15000 +# CONFIG_AIC79XX_BUILD_FIRMWARE is not set +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set +CONFIG_SCSI_AIC7XXX_OLD=m +CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y +CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_OLD_PROC_STATS=y +# CONFIG_SCSI_DPT_I2O is not set +# CONFIG_SCSI_ADVANSYS is not set +# CONFIG_SCSI_IN2000 is not set +# CONFIG_SCSI_AM53C974 is not set +CONFIG_SCSI_MEGARAID=m +CONFIG_SCSI_MEGARAID2=m +CONFIG_SCSI_SATA=y +CONFIG_SCSI_SATA_SVW=m +CONFIG_SCSI_ATA_PIIX=m +CONFIG_SCSI_SATA_NV=m +CONFIG_SCSI_SATA_PROMISE=m +CONFIG_SCSI_SATA_SX4=m +CONFIG_SCSI_SATA_SIL=m +CONFIG_SCSI_SATA_SIS=m +CONFIG_SCSI_SATA_VIA=m +CONFIG_SCSI_SATA_VITESSE=m +# CONFIG_SCSI_BUSLOGIC is not set +# CONFIG_SCSI_CPQFCTS is not set +# CONFIG_SCSI_DMX3191D is not set +# CONFIG_SCSI_DTC3280 is not set +# CONFIG_SCSI_EATA is not set +# CONFIG_SCSI_EATA_DMA is not set +# CONFIG_SCSI_EATA_PIO is not set +# CONFIG_SCSI_FUTURE_DOMAIN is not set +CONFIG_SCSI_GDTH=m +# CONFIG_SCSI_GENERIC_NCR5380 is not set +CONFIG_SCSI_IPS=m +# CONFIG_SCSI_INITIO is not set +# CONFIG_SCSI_INIA100 is not set +# CONFIG_SCSI_NCR53C406A is not set +# CONFIG_SCSI_NCR53C7xx is not set +CONFIG_SCSI_SYM53C8XX_2=m +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_NCR53C8XX=m +CONFIG_SCSI_SYM53C8XX=m +CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 +CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 +CONFIG_SCSI_NCR53C8XX_SYNC=40 +# CONFIG_SCSI_NCR53C8XX_PROFILE is not set +# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set +# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set +# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set +# CONFIG_SCSI_PAS16 is not set +# CONFIG_SCSI_PCI2000 is not set +# CONFIG_SCSI_PCI2220I is not set +# CONFIG_SCSI_PSI240I is not set +CONFIG_SCSI_QLOGIC_FAS=m +CONFIG_SCSI_QLOGIC_ISP=m +CONFIG_SCSI_QLOGIC_FC=m +# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set +CONFIG_SCSI_QLOGIC_1280=m +# CONFIG_SCSI_SIM710 is not set +# CONFIG_SCSI_SYM53C416 is not set +# CONFIG_SCSI_DC390T is not set +# CONFIG_SCSI_T128 is not set +# CONFIG_SCSI_U14_34F is not set +CONFIG_SCSI_NSP32=m +# CONFIG_SCSI_DEBUG is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set +CONFIG_DUMMY=m +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m +CONFIG_ETHERTAP=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +# CONFIG_SUNLANCE is not set +CONFIG_HAPPYMEAL=m +# CONFIG_SUNBMAC is not set +# CONFIG_SUNQE is not set +CONFIG_SUNGEM=m +CONFIG_NET_VENDOR_3COM=y +# CONFIG_EL1 is not set +# CONFIG_EL2 is not set +# CONFIG_ELPLUS is not set +# CONFIG_EL16 is not set +# CONFIG_ELMC is not set +# CONFIG_ELMC_II is not set +CONFIG_VORTEX=m +CONFIG_TYPHOON=m +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +CONFIG_PCNET32=m +CONFIG_AMD8111_ETH=m +CONFIG_ADAPTEC_STARFIRE=m +# CONFIG_APRICOT is not set +CONFIG_B44=m +# CONFIG_CS89x0 is not set +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +CONFIG_TULIP_MMIO=y +# CONFIG_DE4X5 is not set +# CONFIG_DGRS is not set +CONFIG_DM9102=m +CONFIG_EEPRO100=m +# CONFIG_EEPRO100_PIO is not set +CONFIG_E100=m +# CONFIG_LNE390 is not set +CONFIG_FEALNX=m +CONFIG_NATSEMI=m +CONFIG_NE2K_PCI=m +# CONFIG_NE3210 is not set +# CONFIG_ES3210 is not set +CONFIG_8139CP=m +CONFIG_8139TOO=m +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +CONFIG_8139TOO_8129=y +# CONFIG_8139_OLD_RX_RESET is not set +CONFIG_SIS900=m +CONFIG_EPIC100=m +# CONFIG_SUNDANCE is not set +# CONFIG_SUNDANCE_MMIO is not set +CONFIG_TLAN=m +CONFIG_TC35815=m +CONFIG_VIA_RHINE=m +# CONFIG_VIA_RHINE_MMIO is not set +# CONFIG_WINBOND_840 is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +CONFIG_DL2K=m +CONFIG_E1000=m +CONFIG_E1000_NAPI=y +# CONFIG_MYRI_SBUS is not set +CONFIG_NS83820=m +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_R8169=m +CONFIG_SK98LIN=m +CONFIG_TIGON3=m +# CONFIG_FDDI is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +# CONFIG_PPP_DEFLATE is not set +# CONFIG_PPP_BSDCOMP is not set +# CONFIG_PPPOE is not set +# CONFIG_SLIP is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Token Ring devices +# +CONFIG_TR=y +CONFIG_IBMOL=m +CONFIG_IBMLS=m +CONFIG_3C359=m +# CONFIG_TMS380TR is not set +CONFIG_NET_FC=y +CONFIG_IPHASE5526=m +# CONFIG_RCPCI is not set +CONFIG_SHAPER=m + +# +# Wan interfaces +# +# CONFIG_WAN is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# CD-ROM drivers (not for SCSI or IDE/ATAPI drives) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Input core support +# +CONFIG_INPUT=m +CONFIG_INPUT_KEYBDEV=m +CONFIG_INPUT_MOUSEDEV=m +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_EVDEV=m + +# +# Character devices +# +CONFIG_VT=y +# CONFIG_ECC is not set +CONFIG_VT_CONSOLE=y +CONFIG_SERIAL=y +CONFIG_SERIAL_CONSOLE=y +CONFIG_SERIAL_HCDP=y +CONFIG_SERIAL_ACPI=y +CONFIG_HP_DIVA=y +CONFIG_SERIAL_EXTENDED=y +CONFIG_SERIAL_MANY_PORTS=y +CONFIG_SERIAL_SHARE_IRQ=y +# CONFIG_SERIAL_DETECT_IRQ is not set +CONFIG_SERIAL_MULTIPORT=y +# CONFIG_HUB6 is not set +# CONFIG_SERIAL_NONSTANDARD is not set +CONFIG_UNIX98_PTYS=y +CONFIG_UNIX98_PTY_COUNT=2048 + +# +# I2C support +# +# CONFIG_I2C is not set + +# +# Mice +# +# CONFIG_BUSMOUSE is not set +CONFIG_MOUSE=y +CONFIG_PSMOUSE=y +# CONFIG_82C710_MOUSE is not set +# CONFIG_PC110_PAD is not set +CONFIG_MK712_MOUSE=m + +# +# Joysticks +# +CONFIG_INPUT_GAMEPORT=m +CONFIG_INPUT_NS558=m +CONFIG_INPUT_LIGHTNING=m +CONFIG_INPUT_PCIGAME=m +CONFIG_INPUT_CS461X=m +CONFIG_INPUT_EMU10K1=m +CONFIG_INPUT_SERIO=m +CONFIG_INPUT_SERPORT=m + +# +# Joysticks +# +CONFIG_INPUT_ANALOG=m +CONFIG_INPUT_A3D=m +CONFIG_INPUT_ADI=m +CONFIG_INPUT_COBRA=m +CONFIG_INPUT_GF2K=m +CONFIG_INPUT_GRIP=m +CONFIG_INPUT_INTERACT=m +CONFIG_INPUT_TMDC=m +CONFIG_INPUT_SIDEWINDER=m +CONFIG_INPUT_IFORCE_USB=m +CONFIG_INPUT_IFORCE_232=m +CONFIG_INPUT_WARRIOR=m +CONFIG_INPUT_MAGELLAN=m +CONFIG_INPUT_SPACEORB=m +CONFIG_INPUT_SPACEBALL=m +CONFIG_INPUT_STINGER=m +# CONFIG_INPUT_DB9 is not set +# CONFIG_INPUT_GAMECON is not set +# CONFIG_INPUT_TURBOGRAFX is not set +# CONFIG_QIC02_TAPE is not set +CONFIG_IPMI_HANDLER=m +# CONFIG_IPMI_PANIC_EVENT is not set +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_KCS=m +CONFIG_IPMI_WATCHDOG=m + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +CONFIG_HANGCHECK_TIMER=m +CONFIG_HANGCHECK_DELAY=m +# CONFIG_SCx200_GPIO is not set +CONFIG_INTEL_RNG=m +# CONFIG_AMD_PM768 is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +CONFIG_EFI_RTC=y +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_FTAPE is not set +CONFIG_AGP=m +CONFIG_AGP_INTEL=y +# CONFIG_AGP_I810 is not set +# CONFIG_AGP_VIA is not set +# CONFIG_AGP_AMD is not set +CONFIG_AGP_AMD_8151=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_ALI is not set +# CONFIG_AGP_SWORKS is not set +CONFIG_AGP_I460=y +CONFIG_AGP_HP_ZX1=y +CONFIG_DRM=y +# CONFIG_DRM_OLD is not set + +# +# DRM 4.1 drivers +# +CONFIG_DRM_NEW=y +CONFIG_DRM_TDFX=m +CONFIG_DRM_GAMMA=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +# CONFIG_DRM_I810 is not set +# CONFIG_DRM_I810_XFREE_41 is not set +# CONFIG_DRM_I830 is not set +CONFIG_DRM_MGA=m +# CONFIG_DRM_SIS is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# File systems +# +CONFIG_QUOTA=y +# CONFIG_QFMT_V1 is not set +CONFIG_QFMT_V2=y +# CONFIG_QIFACE_COMPAT is not set +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +CONFIG_REISERFS_PROC_INFO=y +# CONFIG_ADFS_FS is not set +# CONFIG_ADFS_FS_RW is not set +# CONFIG_AFFS_FS is not set +CONFIG_HFS_FS=m +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BFS_FS is not set +CONFIG_EXT3_FS=m +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_XATTR_SHARING=y +CONFIG_EXT3_FS_XATTR_USER=y +CONFIG_EXT3_FS_XATTR_TRUSTED=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_JBD=m +# CONFIG_JBD_DEBUG is not set +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_UMSDOS_FS=m +CONFIG_VFAT_FS=m +# CONFIG_EFS_FS is not set +# CONFIG_JFFS_FS is not set +# CONFIG_JFFS2_FS is not set +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_CRAMFS=m +CONFIG_TMPFS=y +CONFIG_RAMFS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_DEBUG=y +# CONFIG_JFS_STATISTICS is not set +CONFIG_MINIX_FS=m +CONFIG_VXFS_FS=m +# CONFIG_NTFS_FS is not set +# CONFIG_NTFS_RW is not set +# CONFIG_HPFS_FS is not set +CONFIG_PROC_FS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVFS_MOUNT is not set +# CONFIG_DEVFS_DEBUG is not set +CONFIG_DEVPTS_FS=y +# CONFIG_QNX4FS_FS is not set +# CONFIG_QNX4FS_RW is not set +CONFIG_ROMFS_FS=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_XATTR_SHARING=y +CONFIG_EXT2_FS_XATTR_USER=y +CONFIG_SYSV_FS=m +CONFIG_UDF_FS=m +CONFIG_UDF_RW=y +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +CONFIG_CODA_FS=m +# CONFIG_INTERMEZZO_FS is not set +CONFIG_NFS_FS=m +CONFIG_NFS_V3=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFS_ACL=y +# CONFIG_ROOT_NFS is not set +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_ACL=y +CONFIG_NFSD_TCP=y +CONFIG_SUNRPC=m +CONFIG_LOCKD=m +CONFIG_LOCKD_V4=y +CONFIG_SMB_FS=m +# CONFIG_SMB_NLS_DEFAULT is not set +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_SMALLDOS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_ZISOFS_FS=y +CONFIG_FS_MBCACHE=y +CONFIG_FS_POSIX_ACL=y +CONFIG_FS_MBCACHE=y + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +# CONFIG_ATARI_PARTITION is not set +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +# CONFIG_LDM_PARTITION is not set +CONFIG_SGI_PARTITION=y +# CONFIG_ULTRIX_PARTITION is not set +CONFIG_SUN_PARTITION=y +CONFIG_EFI_PARTITION=y +CONFIG_SMB_NLS=y +CONFIG_NLS=y + +# +# Native Language Support +# +CONFIG_NLS_DEFAULT="iso8859-1" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m + +# +# Console drivers +# +CONFIG_VGA_CONSOLE=y + +# +# Frame-buffer support +# +CONFIG_FB=y +CONFIG_DUMMY_CONSOLE=y +# CONFIG_FB_RIVA is not set +# CONFIG_FB_CLGEN is not set +# CONFIG_FB_PM2 is not set +CONFIG_FB_PM3=m +# CONFIG_FB_CYBER2000 is not set +CONFIG_FB_VGA16=m +CONFIG_FB_MATROX=m +CONFIG_FB_MATROX_MILLENIUM=y +CONFIG_FB_MATROX_MYSTIQUE=y +CONFIG_FB_MATROX_G450=y +CONFIG_FB_MATROX_G100=y +# CONFIG_FB_MATROX_PROC is not set +CONFIG_FB_MATROX_MULTIHEAD=y +# CONFIG_FB_ATY is not set +# CONFIG_FB_RADEON is not set +CONFIG_FB_ATY128=m +# CONFIG_FB_INTEL is not set +# CONFIG_FB_SIS is not set +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_3DFX=m +CONFIG_FB_VOODOO1=m +# CONFIG_FB_TRIDENT is not set +# CONFIG_FB_VIRTUAL is not set +# CONFIG_FBCON_ADVANCED is not set +CONFIG_FBCON_CFB8=y +CONFIG_FBCON_CFB16=y +CONFIG_FBCON_CFB24=y +CONFIG_FBCON_CFB32=y +CONFIG_FBCON_VGA_PLANES=m +CONFIG_FBCON_HGA=m +# CONFIG_FBCON_FONTWIDTH8_ONLY is not set +# CONFIG_FBCON_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_PCI_CONSOLE=y + +# +# Sound +# +CONFIG_SOUND=m +CONFIG_SOUND_ALI5455=m +# CONFIG_SOUND_BT878 is not set +CONFIG_SOUND_CMPCI=m +CONFIG_SOUND_CMPCI_FM=y +CONFIG_SOUND_CMPCI_FMIO=388 +CONFIG_SOUND_CMPCI_FMIO=388 +CONFIG_SOUND_CMPCI_MIDI=y +CONFIG_SOUND_CMPCI_MPUIO=330 +CONFIG_SOUND_CMPCI_JOYSTICK=y +CONFIG_SOUND_CMPCI_CM8738=y +# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set +CONFIG_SOUND_CMPCI_SPDIFLOOP=y +CONFIG_SOUND_CMPCI_SPEAKERS=2 +CONFIG_SOUND_EMU10K1=m +CONFIG_MIDI_EMU10K1=y +CONFIG_SOUND_AUDIGY=m +CONFIG_SOUND_FUSION=m +CONFIG_SOUND_CS4281=m +CONFIG_SOUND_ES1370=m +CONFIG_SOUND_ES1371=m +CONFIG_SOUND_ESSSOLO1=m +CONFIG_SOUND_MAESTRO=m +CONFIG_SOUND_MAESTRO3=m +CONFIG_SOUND_FORTE=m +CONFIG_SOUND_ICH=m +CONFIG_SOUND_RME96XX=m +CONFIG_SOUND_SONICVIBES=m +CONFIG_SOUND_TRIDENT=m +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_VIA82CXXX=m +CONFIG_MIDI_VIA82CXXX=y +CONFIG_SOUND_OSS=m +# CONFIG_SOUND_TRACEINIT is not set +CONFIG_SOUND_DMAP=y +# CONFIG_SOUND_AD1816 is not set +CONFIG_SOUND_AD1889=m +# CONFIG_SOUND_SGALAXY is not set +# CONFIG_SOUND_ADLIB is not set +# CONFIG_SOUND_ACI_MIXER is not set +# CONFIG_SOUND_CS4232 is not set +# CONFIG_SOUND_SSCAPE is not set +# CONFIG_SOUND_GUS is not set +CONFIG_SOUND_VMIDI=m +# CONFIG_SOUND_TRIX is not set +# CONFIG_SOUND_MSS is not set +# CONFIG_SOUND_MPU401 is not set +# CONFIG_SOUND_NM256 is not set +# CONFIG_SOUND_MAD16 is not set +# CONFIG_SOUND_PAS is not set +# CONFIG_PAS_JOYSTICK is not set +# CONFIG_SOUND_PSS is not set +# CONFIG_SOUND_SB is not set +# CONFIG_SOUND_AWE32_SYNTH is not set +# CONFIG_SOUND_KAHLUA is not set +# CONFIG_SOUND_WAVEFRONT is not set +# CONFIG_SOUND_MAUI is not set +# CONFIG_SOUND_YM3812 is not set +# CONFIG_SOUND_OPL3SA1 is not set +# CONFIG_SOUND_OPL3SA2 is not set +CONFIG_SOUND_YMFPCI=m +CONFIG_SOUND_YMFPCI_LEGACY=y +# CONFIG_SOUND_UART6850 is not set +# CONFIG_SOUND_AEDSP16 is not set +# CONFIG_SOUND_TVMIXER is not set + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_UHCI=m +CONFIG_USB_UHCI_ALT=m +CONFIG_USB_OHCI=m + +# +# USB Device Class drivers +# +CONFIG_USB_AUDIO=m +# CONFIG_USB_EMI26 is not set + +# +# USB Bluetooth can only be used with disabled Bluetooth subsystem +# +CONFIG_USB_MIDI=m +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_HP8200e=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m + +# +# USB Human Interface Devices (HID) +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +CONFIG_USB_HIDDEV=y +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +CONFIG_USB_AIPTEK=m +CONFIG_USB_WACOM=m +CONFIG_USB_KBTAB=m +CONFIG_USB_POWERMATE=m + +# +# USB Imaging devices +# +# CONFIG_USB_DC2XX is not set +CONFIG_USB_MDC800=m +CONFIG_USB_SCANNER=m +CONFIG_USB_MICROTEK=m +CONFIG_USB_HPUSBSCSI=m + +# +# USB Multimedia devices +# + +# +# Video4Linux support is needed for USB Multimedia device support +# + +# +# USB Network adaptors +# +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_KAWETH=m +CONFIG_USB_CATC=m +# CONFIG_USB_AX8817X is not set +CONFIG_USB_CDCETHER=m +CONFIG_USB_USBNET=m + +# +# USB port drivers +# +# CONFIG_USB_USS720 is not set + +# +# USB Serial Converter support +# +CONFIG_USB_SERIAL=m +# CONFIG_USB_SERIAL_DEBUG is not set +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_WHITEHEAT=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set +# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OMNINET=m + +# +# USB Miscellaneous drivers +# +CONFIG_USB_RIO500=m +CONFIG_USB_AUERSWALD=m +CONFIG_USB_TIGL=m +CONFIG_USB_BRLVGER=m +CONFIG_USB_LCD=m + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_DEFLATE=y +CONFIG_CRYPTO_TEST=m + +# +# Library routines +# +CONFIG_CRC32=m +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=y +CONFIG_QSORT=y + +# +# Bluetooth support +# +CONFIG_BLUEZ=m +CONFIG_BLUEZ_L2CAP=m +CONFIG_BLUEZ_SCO=m +CONFIG_BLUEZ_RFCOMM=m +CONFIG_BLUEZ_RFCOMM_TTY=y +CONFIG_BLUEZ_BNEP=m +CONFIG_BLUEZ_BNEP_MC_FILTER=y +CONFIG_BLUEZ_BNEP_PROTO_FILTER=y + +# +# Bluetooth device drivers +# +CONFIG_BLUEZ_HCIUSB=m +CONFIG_BLUEZ_USB_SCO=y +CONFIG_BLUEZ_USB_ZERO_PACKET=y +CONFIG_BLUEZ_HCIUART=m +CONFIG_BLUEZ_HCIUART_H4=y +CONFIG_BLUEZ_HCIUART_BCSP=y +CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y +# CONFIG_BLUEZ_HCIDTL1 is not set +# CONFIG_BLUEZ_HCIBT3C is not set +# CONFIG_BLUEZ_HCIBLUECARD is not set +# CONFIG_BLUEZ_HCIBTUART is not set +CONFIG_BLUEZ_HCIVHCI=m + +# +# Simulated drivers +# +# CONFIG_HP_SIMETH is not set +# CONFIG_HP_SIMSERIAL is not set +# CONFIG_HP_SIMSCSI is not set + +# +# Additional device driver support +# +CONFIG_NET_BROADCOM=m +CONFIG_CIPE=m +# CONFIG_CRYPTO_AEP is not set +CONFIG_CRYPTO_BROADCOM=m +# CONFIG_MEGARAC is not set +CONFIG_FC_QLA2100=m +CONFIG_FC_QLA2200=m +CONFIG_FC_QLA2300=m +CONFIG_SCSI_ISCSI=m +# CONFIG_SCSI_IPR is not set +CONFIG_SCSI_LPFC=m + +# +# Kernel hacking +# +CONFIG_IA64_GRANULE_16MB=y +# CONFIG_IA64_GRANULE_64MB is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_IA64_PRINT_HAZARDS=y +# CONFIG_DISABLE_VHPT is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_IA64_EARLY_PRINTK is not set +# CONFIG_DEBUG_SLAB is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_IA64_DEBUG_CMPXCHG is not set +# CONFIG_IA64_DEBUG_IRQ is not set +CONFIG_KALLSYMS=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config new file mode 100644 index 0000000..349bca7 --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686-bigsmp.config @@ -0,0 +1,2845 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_UID16=y +CONFIG_GENERIC_ISA_DMA=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +# CONFIG_STANDALONE is not set + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_HOTPLUG=y +CONFIG_EVLOG=y +# CONFIG_EVLOG_FWPRINTK is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_EMBEDDED is not set + +# +# Class Based Kernel Resource Management +# +CONFIG_CKRM=y +CONFIG_RCFS_FS=m +CONFIG_CKRM_TYPE_TASKCLASS=y +CONFIG_CKRM_RES_NUMTASKS=m +CONFIG_CKRM_TYPE_SOCKETCLASS=y +CONFIG_CKRM_RBCE=m +CONFIG_CKRM_CRBCE=m +CONFIG_DELAY_ACCT=y +CONFIG_KALLSYMS=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_OBSOLETE_MODPARM=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +# CONFIG_X86_PC is not set +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +CONFIG_X86_GENERICARCH=y +# CONFIG_X86_ES7000 is not set +CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +CONFIG_MPENTIUMII=y +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +# CONFIG_HPET_TIMER is not set +# CONFIG_HPET_EMULATE_RTC is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=128 +CONFIG_SCHED_SMT=y +# CONFIG_PREEMPT is not set +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_TSC=y +CONFIG_X86_MCE=y +# CONFIG_X86_MCE_NONFATAL is not set +CONFIG_X86_MCE_P4THERMAL=y +CONFIG_TOSHIBA=m +CONFIG_I8K=m +CONFIG_MICROCODE=m +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m + +# +# Firmware Drivers +# +CONFIG_EDD=m +# CONFIG_NOHIGHMEM is not set +# CONFIG_HIGHMEM4G is not set +CONFIG_HIGHMEM64G=y +CONFIG_HIGHMEM=y +CONFIG_X86_PAE=y +# CONFIG_NUMA is not set +CONFIG_HIGHPTE=y +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +CONFIG_EFI=y +CONFIG_IRQBALANCE=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_BOOT_IOREMAP=y +CONFIG_REGPARM=y + +# +# Special options +# +CONFIG_PROC_MM=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +# CONFIG_SOFTWARE_SUSPEND is not set +# CONFIG_PM_DISK is not set + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SLEEP_PROC_FS=y +CONFIG_ACPI_AC=m +CONFIG_ACPI_BATTERY=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +# CONFIG_ACPI_ASUS is not set +CONFIG_ACPI_TOSHIBA=m +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y +CONFIG_ACPI_INITRD=y + +# +# APM (Advanced Power Management) BIOS Support +# +CONFIG_APM=y +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +CONFIG_APM_DO_ENABLE=y +# CONFIG_APM_CPU_IDLE is not set +CONFIG_APM_DISPLAY_BLANK=y +# CONFIG_APM_RTC_IS_GMT is not set +CONFIG_APM_ALLOW_INTS=y +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_PROC_INTF=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=m +CONFIG_CPU_FREQ_GOV_ONDEMAND=m +# CONFIG_CPU_FREQ_24_API is not set +CONFIG_CPU_FREQ_TABLE=m + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=m +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set +CONFIG_X86_POWERNOW_K6=m +CONFIG_X86_POWERNOW_K7=m +CONFIG_X86_POWERNOW_K8=m +CONFIG_X86_POWERNOW_K8_ACPI=y +CONFIG_X86_GX_SUSPMOD=m +CONFIG_X86_SPEEDSTEP_CENTRINO=m +CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y +# CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI is not set +CONFIG_X86_SPEEDSTEP_ICH=m +CONFIG_X86_SPEEDSTEP_SMI=m +CONFIG_X86_P4_CLOCKMOD=m +CONFIG_X86_SPEEDSTEP_LIB=m +CONFIG_X86_LONGRUN=m +CONFIG_X86_LONGHAUL=m + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +# CONFIG_PCI_USE_VECTOR is not set +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_NAMES is not set +CONFIG_ISA=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +CONFIG_SCx200=m + +# +# PCMCIA/CardBus support +# +CONFIG_PCMCIA=m +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_YENTA=m +CONFIG_CARDBUS=y +CONFIG_I82092=m +CONFIG_I82365=m +CONFIG_TCIC=m +CONFIG_PCMCIA_PROBE=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +CONFIG_HOTPLUG_PCI_FAKE=m +CONFIG_HOTPLUG_PCI_COMPAQ=m +CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y +CONFIG_HOTPLUG_PCI_IBM=m +CONFIG_HOTPLUG_PCI_AMD=m +CONFIG_HOTPLUG_PCI_ACPI=m +CONFIG_HOTPLUG_PCI_CPCI=y +CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m +CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m +CONFIG_HOTPLUG_PCI_PCIE=m +# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_AOUT=m +CONFIG_BINFMT_MISC=m + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_FW_LOADER=m +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=m +# CONFIG_MTD_DEBUG is not set +CONFIG_MTD_PARTITIONS=m +CONFIG_MTD_CONCAT=m +CONFIG_MTD_REDBOOT_PARTS=m +CONFIG_MTD_CMDLINE_PARTS=m + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=m +CONFIG_MTD_BLOCK=m +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=m +CONFIG_MTD_JEDECPROBE=m +CONFIG_MTD_GEN_PROBE=m +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_NOSWAP=y +# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_GEOMETRY is not set +CONFIG_MTD_CFI_INTELEXT=m +CONFIG_MTD_CFI_AMDSTD=m +CONFIG_MTD_CFI_STAA=m +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +CONFIG_MTD_ABSENT=m +CONFIG_MTD_OBSOLETE_CHIPS=y +CONFIG_MTD_AMDSTD=m +CONFIG_MTD_SHARP=m +CONFIG_MTD_JEDEC=m + +# +# Mapping drivers for chip access +# +CONFIG_MTD_COMPLEX_MAPPINGS=y +CONFIG_MTD_PHYSMAP=m +CONFIG_MTD_PHYSMAP_START=0x8000000 +CONFIG_MTD_PHYSMAP_LEN=0x4000000 +CONFIG_MTD_PHYSMAP_BUSWIDTH=2 +CONFIG_MTD_PNC2000=m +CONFIG_MTD_SC520CDP=m +CONFIG_MTD_NETSC520=m +CONFIG_MTD_SBC_GXX=m +CONFIG_MTD_ELAN_104NC=m +CONFIG_MTD_OCTAGON=m +CONFIG_MTD_VMAX=m +CONFIG_MTD_SCx200_DOCFLASH=m +CONFIG_MTD_AMD76XROM=m +CONFIG_MTD_ICH2ROM=m +CONFIG_MTD_SCB2_FLASH=m +CONFIG_MTD_NETtel=m +CONFIG_MTD_DILNETPC=m +CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000 +CONFIG_MTD_L440GX=m +CONFIG_MTD_PCI=m + +# +# Self-contained MTD device drivers +# +CONFIG_MTD_PMC551=m +CONFIG_MTD_PMC551_BUGFIX=y +# CONFIG_MTD_PMC551_DEBUG is not set +CONFIG_MTD_SLRAM=m +CONFIG_MTD_MTDRAM=m +CONFIG_MTDRAM_TOTAL_SIZE=4096 +CONFIG_MTDRAM_ERASE_SIZE=128 +CONFIG_MTD_BLKMTD=m + +# +# Disk-On-Chip Device Drivers +# +CONFIG_MTD_DOC2000=m +CONFIG_MTD_DOC2001=m +CONFIG_MTD_DOC2001PLUS=m +CONFIG_MTD_DOCPROBE=m +CONFIG_MTD_DOCPROBE_ADVANCED=y +CONFIG_MTD_DOCPROBE_ADDRESS=0x0000 +CONFIG_MTD_DOCPROBE_HIGH=y +CONFIG_MTD_DOCPROBE_55AA=y + +# +# NAND Flash Device Drivers +# +CONFIG_MTD_NAND=m +# CONFIG_MTD_NAND_VERIFY_WRITE is not set +CONFIG_MTD_NAND_IDS=m + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_PC_CML1=m +CONFIG_PARPORT_SERIAL=m +CONFIG_PARPORT_PC_FIFO=y +CONFIG_PARPORT_PC_SUPERIO=y +CONFIG_PARPORT_PC_PCMCIA=m +CONFIG_PARPORT_OTHER=y +CONFIG_PARPORT_1284=y + +# +# Plug and Play support +# +CONFIG_PNP=y +# CONFIG_PNP_DEBUG is not set + +# +# Protocols +# +CONFIG_ISAPNP=y +CONFIG_PNPBIOS=y +CONFIG_PNPBIOS_PROC_FS=y + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +CONFIG_BLK_DEV_XD=m +CONFIG_PARIDE=m +CONFIG_PARIDE_PARPORT=m + +# +# Parallel IDE high-level drivers +# +CONFIG_PARIDE_PD=m +CONFIG_PARIDE_PCD=m +CONFIG_PARIDE_PF=m +CONFIG_PARIDE_PT=m +CONFIG_PARIDE_PG=m + +# +# Parallel IDE protocol modules +# +CONFIG_PARIDE_ATEN=m +CONFIG_PARIDE_BPCK=m +CONFIG_PARIDE_BPCK6=m +CONFIG_PARIDE_COMM=m +CONFIG_PARIDE_DSTR=m +CONFIG_PARIDE_FIT2=m +CONFIG_PARIDE_FIT3=m +CONFIG_PARIDE_EPAT=m +CONFIG_PARIDE_EPATC8=y +CONFIG_PARIDE_EPIA=m +CONFIG_PARIDE_FRIQ=m +CONFIG_PARIDE_FRPW=m +CONFIG_PARIDE_KBIC=m +CONFIG_PARIDE_KTTI=m +CONFIG_PARIDE_ON20=m +CONFIG_PARIDE_ON26=m +CONFIG_BLK_CPQ_DA=m +CONFIG_BLK_CPQ_CISS_DA=m +CONFIG_CISS_SCSI_TAPE=y +CONFIG_BLK_DEV_DAC960=m +CONFIG_BLK_DEV_UMEM=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_CARMEL=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=64000 +CONFIG_BLK_DEV_INITRD=y +CONFIG_LBD=y +CONFIG_CIPHER_TWOFISH=m + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_IDEDISK_STROKE=y +CONFIG_BLK_DEV_IDECS=m +CONFIG_BLK_DEV_IDECD=m +CONFIG_BLK_DEV_IDETAPE=m +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set +# CONFIG_IDE_TASKFILE_IO is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_CMD640=y +CONFIG_BLK_DEV_CMD640_ENHANCED=y +CONFIG_BLK_DEV_IDEPNP=y +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_OFFBOARD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_OPTI621=y +CONFIG_BLK_DEV_RZ1000=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +CONFIG_IDEDMA_ONLYDISK=y +CONFIG_BLK_DEV_ADMA=y +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +CONFIG_BLK_DEV_ATIIXP=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_TRIFLEX=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5520=m +CONFIG_BLK_DEV_CS5530=m +CONFIG_BLK_DEV_HPT34X=y +CONFIG_HPT34X_AUTODMA=y +CONFIG_BLK_DEV_HPT366=y +CONFIG_BLK_DEV_SC1200=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_BLK_DEV_NS87415=y +CONFIG_BLK_DEV_PDC202XX_OLD=y +CONFIG_PDC202XX_BURST=y +CONFIG_BLK_DEV_PDC202XX_NEW=y +CONFIG_PDC202XX_FORCE=y +CONFIG_BLK_DEV_SVWKS=y +CONFIG_BLK_DEV_SIIMAGE=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +CONFIG_BLK_DEV_TRM290=y +CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_IDE_CHIPSETS=y + +# +# Note: most of these also require special kernel boot parameters +# +CONFIG_BLK_DEV_4DRIVES=y +CONFIG_BLK_DEV_ALI14XX=y +CONFIG_BLK_DEV_DTC2278=y +CONFIG_BLK_DEV_HT6560B=y +# CONFIG_BLK_DEV_PDC4030 is not set +CONFIG_BLK_DEV_QD65XX=y +CONFIG_BLK_DEV_UMC8672=y +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=m +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=m +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=m +CONFIG_CHR_DEV_SCH=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_FC_ATTRS=m + +# +# SCSI low-level drivers +# +CONFIG_BLK_DEV_3W_XXXX_RAID=m +CONFIG_SCSI_7000FASST=m +CONFIG_SCSI_ACARD=m +CONFIG_SCSI_AHA152X=m +CONFIG_SCSI_AHA1542=m +CONFIG_SCSI_AACRAID=m +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +CONFIG_SCSI_AIC7XXX_OLD=m +CONFIG_SCSI_AIC79XX=m +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=15000 +# CONFIG_AIC79XX_BUILD_FIRMWARE is not set +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +CONFIG_AIC79XX_REG_PRETTY_PRINT=y +# CONFIG_SCSI_AIC79XX_NEW is not set +CONFIG_SCSI_DPT_I2O=m +CONFIG_SCSI_ADVANSYS=m +CONFIG_SCSI_IN2000=m +CONFIG_MEGARAID_NEWGEN=y +CONFIG_MEGARAID_MM=m +CONFIG_MEGARAID_MAILBOX=m +CONFIG_MEGARAID_LEGACY=m +CONFIG_SCSI_SATA=y +CONFIG_SCSI_SATA_SVW=m +CONFIG_SCSI_ATA_PIIX=m +CONFIG_SCSI_SATA_PROMISE=m +CONFIG_SCSI_SATA_SIL=m +CONFIG_SCSI_SATA_SIS=m +CONFIG_SCSI_SATA_VIA=m +CONFIG_SCSI_SATA_VITESSE=m +CONFIG_SCSI_BUSLOGIC=m +# CONFIG_SCSI_OMIT_FLASHPOINT is not set +# CONFIG_SCSI_CPQFCTS is not set +CONFIG_SCSI_DMX3191D=m +CONFIG_SCSI_DTC3280=m +CONFIG_SCSI_EATA=m +CONFIG_SCSI_EATA_TAGGED_QUEUE=y +CONFIG_SCSI_EATA_LINKED_COMMANDS=y +CONFIG_SCSI_EATA_MAX_TAGS=16 +CONFIG_SCSI_EATA_PIO=m +CONFIG_SCSI_FUTURE_DOMAIN=m +CONFIG_SCSI_GDTH=m +CONFIG_SCSI_GENERIC_NCR5380=m +CONFIG_SCSI_GENERIC_NCR5380_MMIO=m +CONFIG_SCSI_GENERIC_NCR53C400=y +CONFIG_SCSI_IPS=m +CONFIG_SCSI_INIA100=m +CONFIG_SCSI_PPA=m +CONFIG_SCSI_IMM=m +# CONFIG_SCSI_IZIP_EPP16 is not set +# CONFIG_SCSI_IZIP_SLOW_CTR is not set +CONFIG_SCSI_NCR53C406A=m +CONFIG_SCSI_SYM53C8XX_2=m +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_IPR=m +CONFIG_SCSI_IPR_TRACE=y +CONFIG_SCSI_IPR_DUMP=y +CONFIG_SCSI_PAS16=m +CONFIG_SCSI_PSI240I=m +CONFIG_SCSI_QLOGIC_FAS=m +CONFIG_SCSI_QLOGIC_ISP=m +CONFIG_SCSI_QLOGIC_FC=m +CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y +CONFIG_SCSI_QLOGIC_1280=m +CONFIG_SCSI_QLA2XXX=m +CONFIG_SCSI_QLA21XX=m +CONFIG_SCSI_QLA22XX=m +CONFIG_SCSI_QLA2300=m +CONFIG_SCSI_QLA2322=m +CONFIG_SCSI_QLA6312=m +CONFIG_SCSI_QLA6322=m +CONFIG_SCSI_QLA2XXX_FAILOVER=y +CONFIG_SCSI_QLA4XXX=m +CONFIG_SCSI_QLA4XXX_FAILOVER=y +CONFIG_SCSI_SYM53C416=m +CONFIG_SCSI_DC395x=m +CONFIG_SCSI_DC390T=m +CONFIG_SCSI_T128=m +CONFIG_SCSI_U14_34F=m +CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y +CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y +CONFIG_SCSI_U14_34F_MAX_TAGS=8 +CONFIG_SCSI_ULTRASTOR=m +CONFIG_SCSI_NSP32=m +CONFIG_SCSI_DEBUG=m + +# +# PCMCIA SCSI adapter support +# +CONFIG_PCMCIA_AHA152X=m +CONFIG_PCMCIA_FDOMAIN=m +CONFIG_PCMCIA_NINJA_SCSI=m +CONFIG_PCMCIA_QLOGIC=m + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +CONFIG_CD_NO_IDESCSI=y +CONFIG_AZTCD=m +CONFIG_GSCD=m +CONFIG_MCD=m +CONFIG_MCD_IRQ=11 +CONFIG_MCD_BASE=0x300 +CONFIG_OPTCD=m +CONFIG_SJCD=m +CONFIG_ISP16_CDI=m +CONFIG_CDU535=m + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID5=m +CONFIG_MD_RAID6=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_FLAKEY=m +CONFIG_BLK_DEV_DM_BBR=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=m +CONFIG_FUSION_MAX_SGE=40 +CONFIG_FUSION_CTL=m +CONFIG_FUSION_LAN=m + +# +# IEEE 1394 (FireWire) support +# +CONFIG_IEEE1394=m + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +# CONFIG_IEEE1394_OUI_DB is not set +CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y +CONFIG_IEEE1394_CONFIG_ROM_IP1394=y + +# +# Device Drivers +# +CONFIG_IEEE1394_PCILYNX=m +CONFIG_IEEE1394_OHCI1394=m + +# +# Protocol Drivers +# +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=m +CONFIG_IEEE1394_CMP=m +CONFIG_IEEE1394_AMDTP=m + +# +# I2O device support +# +CONFIG_I2O=m +CONFIG_I2O_CONFIG=m +CONFIG_I2O_BLOCK=m +CONFIG_I2O_SCSI=m +CONFIG_I2O_PROC=m + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=m +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=m +CONFIG_UNIX=y +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +# CONFIG_ACCEPT_QUEUES is not set + +# +# IP: Virtual Server Configuration +# +CONFIG_IP_VS=m +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=12 + +# +# IPVS transport protocol load balancing support +# +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y + +# +# IPVS scheduler +# +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m + +# +# IPVS application helper +# +CONFIG_IP_VS_FTP=m +CONFIG_IPV6=m +CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_NDISC_NEW=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_TUNNEL=m + +# +# MOBILE IPv6 (EXPERIMENTAL) +# +CONFIG_IPV6_MOBILITY=m +CONFIG_IPV6_MOBILITY_MN=m +CONFIG_IPV6_MOBILITY_HA=m +# CONFIG_IPV6_MOBILITY_DEBUG is not set +CONFIG_DECNET=m +CONFIG_DECNET_SIOCGIFCONF=y +# CONFIG_DECNET_ROUTER is not set +CONFIG_BRIDGE=m +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_LIMIT=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_MAC=m +CONFIG_IP_NF_MATCH_PKTTYPE=m +CONFIG_IP_NF_MATCH_POLICY=m +CONFIG_IP_NF_MATCH_MARK=m +CONFIG_IP_NF_MATCH_MULTIPORT=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_DSCP=m +CONFIG_IP_NF_MATCH_AH_ESP=m +CONFIG_IP_NF_MATCH_LENGTH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_TCPMSS=m +CONFIG_IP_NF_MATCH_HELPER=m +CONFIG_IP_NF_MATCH_STATE=m +CONFIG_IP_NF_MATCH_CONNTRACK=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_PHYSDEV=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +# CONFIG_IP_NF_NAT_LOCAL is not set +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_DSCP=m +CONFIG_IP_NF_TARGET_MARK=m +CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_IP_NF_COMPAT_IPCHAINS=m +CONFIG_IP_NF_COMPAT_IPFWADM=m +CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_TARGET_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m + +# +# IPv6: Netfilter Configuration +# +CONFIG_IP6_NF_FTP=m +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_LIMIT=m +CONFIG_IP6_NF_MATCH_MAC=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_MULTIPORT=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_MARK=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AHESP=m +CONFIG_IP6_NF_MATCH_LENGTH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_CONNTRACK=m +CONFIG_IP6_NF_MATCH_STATE=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_MARK=m + +# +# DECnet: Netfilter Configuration +# +CONFIG_DECNET_NF_GRABULATOR=m + +# +# Bridge: Netfilter Configuration +# +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_XFRM=y +CONFIG_XFRM_USER=m + +# +# SCTP Configuration (EXPERIMENTAL) +# +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_ATM=y +CONFIG_ATM_CLIP=y +CONFIG_ATM_CLIP_NO_ICMP=y +CONFIG_ATM_LANE=m +CONFIG_ATM_MPOA=m +CONFIG_ATM_BR2684=m +# CONFIG_ATM_BR2684_IPFILTER is not set +CONFIG_VLAN_8021Q=m +CONFIG_LLC=y +CONFIG_LLC2=m +CONFIG_IPX=m +# CONFIG_IPX_INTERN is not set +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=y +CONFIG_LTPC=m +CONFIG_COPS=m +CONFIG_COPS_DAYNA=y +CONFIG_COPS_TANGENT=y +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +# CONFIG_NET_DIVERT is not set +CONFIG_ECONET=m +# CONFIG_ECONET_AUNUDP is not set +# CONFIG_ECONET_NATIVE is not set +CONFIG_WAN_ROUTER=m +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_CSZ=m +CONFIG_NET_SCH_ATM=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_DELAY=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_QOS=y +CONFIG_NET_ESTIMATOR=y +CONFIG_NET_CLS=y +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_POLICE=y + +# +# Network testing +# +CONFIG_NET_PKTGEN=m +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +CONFIG_ARCNET=m +CONFIG_ARCNET_1201=m +CONFIG_ARCNET_1051=m +CONFIG_ARCNET_RAW=m +CONFIG_ARCNET_COM90xx=m +CONFIG_ARCNET_COM90xxIO=m +CONFIG_ARCNET_RIM_I=m +CONFIG_ARCNET_COM20020=m +CONFIG_ARCNET_COM20020_ISA=m +CONFIG_ARCNET_COM20020_PCI=m +CONFIG_DUMMY=m +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m +CONFIG_ETHERTAP=m +CONFIG_NET_SB1000=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=m +CONFIG_HAPPYMEAL=m +CONFIG_SUNGEM=m +CONFIG_NET_VENDOR_3COM=y +CONFIG_EL1=m +CONFIG_EL2=m +CONFIG_ELPLUS=m +CONFIG_EL16=m +CONFIG_EL3=m +CONFIG_3C515=m +CONFIG_VORTEX=m +CONFIG_TYPHOON=m +CONFIG_LANCE=m +CONFIG_NET_VENDOR_SMC=y +CONFIG_WD80x3=m +CONFIG_ULTRA=m +CONFIG_SMC9194=m +CONFIG_NET_VENDOR_RACAL=y +CONFIG_NI52=m +CONFIG_NI65=m + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +CONFIG_DE2104X=m +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +CONFIG_TULIP_NAPI=y +CONFIG_TULIP_NAPI_HW_MITIGATION=y +CONFIG_DE4X5=m +CONFIG_WINBOND_840=m +CONFIG_DM9102=m +CONFIG_PCMCIA_XIRCOM=m +CONFIG_AT1700=m +CONFIG_DEPCA=m +CONFIG_HP100=m +CONFIG_NET_ISA=y +CONFIG_E2100=m +CONFIG_EWRK3=m +CONFIG_EEXPRESS=m +CONFIG_EEXPRESS_PRO=m +CONFIG_HPLAN_PLUS=m +CONFIG_HPLAN=m +CONFIG_LP486E=m +CONFIG_ETH16I=m +CONFIG_NE2000=m +CONFIG_ZNET=m +CONFIG_SEEQ8005=m +CONFIG_NET_PCI=y +CONFIG_PCNET32=m +CONFIG_AMD8111_ETH=m +CONFIG_ADAPTEC_STARFIRE=m +CONFIG_ADAPTEC_STARFIRE_NAPI=y +CONFIG_AC3200=m +CONFIG_APRICOT=m +CONFIG_B44=m +CONFIG_FORCEDETH=m +CONFIG_CS89x0=m +CONFIG_DGRS=m +CONFIG_EEPRO100=m +# CONFIG_EEPRO100_PIO is not set +CONFIG_E100=m +CONFIG_E100_NAPI=y +CONFIG_FEALNX=m +CONFIG_NATSEMI=m +CONFIG_NE2K_PCI=m +CONFIG_8139CP=m +CONFIG_8139TOO=m +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +CONFIG_8139TOO_8129=y +# CONFIG_8139_OLD_RX_RESET is not set +CONFIG_8139_RXBUF_IDX=2 +CONFIG_SIS900=m +CONFIG_EPIC100=m +CONFIG_SUNDANCE=m +# CONFIG_SUNDANCE_MMIO is not set +CONFIG_TLAN=m +CONFIG_VIA_RHINE=m +# CONFIG_VIA_RHINE_MMIO is not set +CONFIG_NET_POCKET=y +CONFIG_ATP=m +CONFIG_DE600=m +CONFIG_DE620=m + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +CONFIG_DL2K=m +CONFIG_E1000=m +CONFIG_E1000_NAPI=y +CONFIG_E1000_NEW=m +CONFIG_E1000_NEW_NAPI=y +CONFIG_NS83820=m +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_R8169=m +CONFIG_SIS190=m +CONFIG_SK98LIN=m +CONFIG_TIGON3=m +CONFIG_NET_BROADCOM=m +CONFIG_NET_BROADCOM_NEW=m +CONFIG_NET_BCM44=m +CONFIG_TIGON3_NEW=m + +# +# Ethernet (10000 Mbit) +# +CONFIG_IXGB=m +CONFIG_IXGB_NAPI=y +CONFIG_S2IO=m +CONFIG_S2IO_NAPI=y +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +CONFIG_SKFP=m +CONFIG_HIPPI=y +CONFIG_ROADRUNNER=m +CONFIG_ROADRUNNER_LARGE_RINGS=y +CONFIG_PLIP=m +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPPOATM=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y + +# +# Wireless LAN (non-hamradio) +# +CONFIG_NET_RADIO=y + +# +# Obsolete Wireless cards support (pre-802.11) +# +CONFIG_STRIP=m +# CONFIG_ARLAN is not set +CONFIG_WAVELAN=m +CONFIG_PCMCIA_WAVELAN=m +CONFIG_PCMCIA_NETWAVE=m + +# +# Wireless 802.11 Frequency Hopping cards support +# +CONFIG_PCMCIA_RAYCS=m + +# +# Wireless 802.11b ISA/PCI cards support +# +CONFIG_AIRO=m +CONFIG_HERMES=m +CONFIG_PLX_HERMES=m +CONFIG_TMD_HERMES=m +CONFIG_PCI_HERMES=m +CONFIG_ATMEL=m +CONFIG_PCI_ATMEL=m + +# +# Wireless 802.11b Pcmcia/Cardbus cards support +# +CONFIG_PCMCIA_HERMES=m +CONFIG_AIRO_CS=m +CONFIG_PCMCIA_ATMEL=m +CONFIG_PCMCIA_WL3501=m + +# +# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support +# +CONFIG_PRISM54=m +CONFIG_NET_WIRELESS=y + +# +# Token Ring devices +# +CONFIG_TR=y +CONFIG_IBMTR=m +CONFIG_IBMOL=m +CONFIG_IBMLS=m +CONFIG_3C359=m +CONFIG_TMS380TR=m +CONFIG_TMSPCI=m +CONFIG_SKISA=m +CONFIG_PROTEON=m +CONFIG_ABYSS=m +CONFIG_SMCTR=m +CONFIG_NET_FC=y +CONFIG_NET_LPFC=m +CONFIG_RCPCI=m +CONFIG_SHAPER=m +CONFIG_NETCONSOLE=m + +# +# Wan interfaces +# +CONFIG_WAN=y +CONFIG_HOSTESS_SV11=m +# CONFIG_COSA is not set +CONFIG_DSCC4=m +CONFIG_DSCC4_PCISYNC=y +CONFIG_DSCC4_PCI_RST=y +CONFIG_LANMEDIA=m +CONFIG_SEALEVEL_4021=m +CONFIG_SYNCLINK_SYNCPPP=m +CONFIG_HDLC=m +CONFIG_HDLC_RAW=y +CONFIG_HDLC_RAW_ETH=y +CONFIG_HDLC_CISCO=y +CONFIG_HDLC_FR=y +CONFIG_HDLC_PPP=y +CONFIG_HDLC_X25=y +CONFIG_PCI200SYN=m +CONFIG_WANXL=m +# CONFIG_WANXL_BUILD_FIRMWARE is not set +CONFIG_PC300=m +CONFIG_PC300_MLPPP=y +CONFIG_N2=m +CONFIG_C101=m +CONFIG_FARSYNC=m +CONFIG_DLCI=m +CONFIG_DLCI_COUNT=24 +CONFIG_DLCI_MAX=8 +CONFIG_SDLA=m +# CONFIG_WAN_ROUTER_DRIVERS is not set +CONFIG_LAPBETHER=m +CONFIG_X25_ASY=m +# CONFIG_SBNI is not set + +# +# PCMCIA network device support +# +CONFIG_NET_PCMCIA=y +CONFIG_PCMCIA_3C589=m +CONFIG_PCMCIA_3C574=m +CONFIG_PCMCIA_FMVJ18X=m +CONFIG_PCMCIA_PCNET=m +CONFIG_PCMCIA_NMCLAN=m +CONFIG_PCMCIA_SMC91C92=m +CONFIG_PCMCIA_XIRC2PS=m +CONFIG_PCMCIA_AXNET=m +CONFIG_ARCNET_COM20020_CS=m +CONFIG_PCMCIA_IBMTR=m + +# +# ATM drivers +# +CONFIG_ATM_TCP=m +CONFIG_ATM_LANAI=m +CONFIG_ATM_ENI=m +# CONFIG_ATM_ENI_DEBUG is not set +# CONFIG_ATM_ENI_TUNE_BURST is not set +CONFIG_ATM_FIRESTREAM=m +CONFIG_ATM_ZATM=m +# CONFIG_ATM_ZATM_DEBUG is not set +CONFIG_ATM_NICSTAR=m +CONFIG_ATM_NICSTAR_USE_SUNI=y +CONFIG_ATM_NICSTAR_USE_IDT77105=y +CONFIG_ATM_IDT77252=m +# CONFIG_ATM_IDT77252_DEBUG is not set +CONFIG_ATM_IDT77252_RCV_ALL=y +CONFIG_ATM_IDT77252_USE_SUNI=y +CONFIG_ATM_AMBASSADOR=m +# CONFIG_ATM_AMBASSADOR_DEBUG is not set +CONFIG_ATM_HORIZON=m +# CONFIG_ATM_HORIZON_DEBUG is not set +CONFIG_ATM_IA=m +# CONFIG_ATM_IA_DEBUG is not set +CONFIG_ATM_FORE200E_MAYBE=m +CONFIG_ATM_FORE200E_PCA=y +CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y +CONFIG_ATM_FORE200E_TX_RETRY=16 +CONFIG_ATM_FORE200E_DEBUG=0 +CONFIG_ATM_FORE200E=m +CONFIG_ATM_HE=m +CONFIG_ATM_HE_USE_SUNI=y + +# +# Amateur Radio support +# +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +CONFIG_AX25=m +CONFIG_AX25_DAMA_SLAVE=y +CONFIG_NETROM=m +CONFIG_ROSE=m + +# +# AX.25 network device drivers +# +CONFIG_BPQETHER=m +CONFIG_SCC=m +CONFIG_SCC_DELAY=y +CONFIG_SCC_TRXECHO=y +CONFIG_BAYCOM_SER_FDX=m +CONFIG_BAYCOM_SER_HDX=m +CONFIG_BAYCOM_PAR=m +CONFIG_BAYCOM_EPP=m +CONFIG_YAM=m + +# +# IrDA (infrared) support +# +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +CONFIG_IRDA_ULTRA=y + +# +# IrDA options +# +CONFIG_IRDA_CACHE_LAST_LSAP=y +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +CONFIG_IRTTY_SIR=m + +# +# Dongle support +# +CONFIG_DONGLE=y +CONFIG_ESI_DONGLE=m +CONFIG_ACTISYS_DONGLE=m +CONFIG_TEKRAM_DONGLE=m +CONFIG_LITELINK_DONGLE=m +CONFIG_MA600_DONGLE=m +CONFIG_GIRBIL_DONGLE=m +CONFIG_MCP2120_DONGLE=m +CONFIG_OLD_BELKIN_DONGLE=m +CONFIG_ACT200L_DONGLE=m + +# +# Old SIR device drivers +# + +# +# Old Serial dongle support +# + +# +# FIR device drivers +# +CONFIG_USB_IRDA=m +CONFIG_SIGMATEL_FIR=m +CONFIG_NSC_FIR=m +CONFIG_WINBOND_FIR=m +CONFIG_TOSHIBA_FIR=m +CONFIG_SMC_IRCC_FIR=m +CONFIG_ALI_FIR=m +CONFIG_VLSI_FIR=m +CONFIG_VIA_FIR=m + +# +# Bluetooth support +# +CONFIG_BT=m +CONFIG_BT_L2CAP=m +CONFIG_BT_SCO=m +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +CONFIG_BT_BNEP_MC_FILTER=y +CONFIG_BT_BNEP_PROTO_FILTER=y +CONFIG_BT_CMTP=m + +# +# Bluetooth device drivers +# +CONFIG_BT_HCIUSB=m +CONFIG_BT_HCIUSB_SCO=y +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_H4=y +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_BCSP_TXCRC=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBFUSB=m +CONFIG_BT_HCIDTL1=m +CONFIG_BT_HCIBT3C=m +CONFIG_BT_HCIBLUECARD=m +CONFIG_BT_HCIBTUART=m +CONFIG_BT_HCIVHCI=m +CONFIG_NETPOLL=y +CONFIG_NETPOLL_RX=y +CONFIG_NETPOLL_TRAP=y +CONFIG_NET_POLL_CONTROLLER=y + +# +# ISDN subsystem +# +CONFIG_ISDN=m + +# +# Old ISDN4Linux +# +CONFIG_ISDN_I4L=m +CONFIG_ISDN_PPP=y +CONFIG_ISDN_PPP_VJ=y +CONFIG_ISDN_MPP=y +CONFIG_IPPP_FILTER=y +CONFIG_ISDN_PPP_BSDCOMP=m +CONFIG_ISDN_AUDIO=y +CONFIG_ISDN_TTY_FAX=y +CONFIG_ISDN_X25=y + +# +# ISDN feature submodules +# + +# +# ISDN4Linux hardware drivers +# + +# +# Passive cards +# +CONFIG_ISDN_DRV_HISAX=m + +# +# D-channel protocol features +# +CONFIG_HISAX_EURO=y +CONFIG_DE_AOC=y +# CONFIG_HISAX_NO_SENDCOMPLETE is not set +# CONFIG_HISAX_NO_LLC is not set +# CONFIG_HISAX_NO_KEYPAD is not set +CONFIG_HISAX_1TR6=y +CONFIG_HISAX_NI1=y +CONFIG_HISAX_MAX_CARDS=8 + +# +# HiSax supported cards +# +CONFIG_HISAX_16_0=y +CONFIG_HISAX_16_3=y +CONFIG_HISAX_TELESPCI=y +CONFIG_HISAX_S0BOX=y +CONFIG_HISAX_AVM_A1=y +CONFIG_HISAX_FRITZPCI=y +CONFIG_HISAX_AVM_A1_PCMCIA=y +CONFIG_HISAX_ELSA=y +CONFIG_HISAX_IX1MICROR2=y +CONFIG_HISAX_DIEHLDIVA=y +CONFIG_HISAX_ASUSCOM=y +CONFIG_HISAX_TELEINT=y +CONFIG_HISAX_HFCS=y +CONFIG_HISAX_SEDLBAUER=y +CONFIG_HISAX_SPORTSTER=y +CONFIG_HISAX_MIC=y +CONFIG_HISAX_NETJET=y +CONFIG_HISAX_NETJET_U=y +CONFIG_HISAX_NICCY=y +CONFIG_HISAX_ISURF=y +CONFIG_HISAX_HSTSAPHIR=y +CONFIG_HISAX_BKM_A4T=y +CONFIG_HISAX_SCT_QUADRO=y +CONFIG_HISAX_GAZEL=y +CONFIG_HISAX_HFC_PCI=y +CONFIG_HISAX_W6692=y +CONFIG_HISAX_HFC_SX=y +CONFIG_HISAX_ENTERNOW_PCI=y +CONFIG_HISAX_DEBUG=y + +# +# HiSax PCMCIA card service modules +# +CONFIG_HISAX_SEDLBAUER_CS=m +CONFIG_HISAX_ELSA_CS=m +CONFIG_HISAX_AVM_A1_CS=m +CONFIG_HISAX_TELES_CS=m + +# +# HiSax sub driver modules +# +CONFIG_HISAX_ST5481=m +CONFIG_HISAX_HFCUSB=m +CONFIG_HISAX_FRITZ_PCIPNP=m +CONFIG_HISAX_HDLC=y + +# +# Active cards +# +CONFIG_ISDN_DRV_ICN=m +CONFIG_ISDN_DRV_PCBIT=m +CONFIG_ISDN_DRV_SC=m +CONFIG_ISDN_DRV_ACT2000=m +CONFIG_ISDN_DRV_TPAM=m + +# +# CAPI subsystem +# +CONFIG_ISDN_CAPI=m +CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y +CONFIG_ISDN_CAPI_MIDDLEWARE=y +CONFIG_ISDN_CAPI_CAPI20=m +CONFIG_ISDN_CAPI_CAPIFS_BOOL=y +CONFIG_ISDN_CAPI_CAPIFS=m +CONFIG_ISDN_CAPI_CAPIDRV=m + +# +# CAPI hardware drivers +# + +# +# Active AVM cards +# +CONFIG_CAPI_AVM=y +CONFIG_ISDN_DRV_AVMB1_B1ISA=m +CONFIG_ISDN_DRV_AVMB1_B1PCI=m +CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y +CONFIG_ISDN_DRV_AVMB1_T1ISA=m +CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m +CONFIG_ISDN_DRV_AVMB1_AVM_CS=m +CONFIG_ISDN_DRV_AVMB1_T1PCI=m +CONFIG_ISDN_DRV_AVMB1_C4=m + +# +# Active Eicon DIVA Server cards +# +CONFIG_CAPI_EICON=y +CONFIG_ISDN_DIVAS=m +CONFIG_ISDN_DIVAS_BRIPCI=y +CONFIG_ISDN_DIVAS_PRIPCI=y +CONFIG_ISDN_DIVAS_DIVACAPI=m +CONFIG_ISDN_DIVAS_USERIDI=m +CONFIG_ISDN_DIVAS_MAINT=m + +# +# Telephony Support +# +CONFIG_PHONE=m +CONFIG_PHONE_IXJ=m +CONFIG_PHONE_IXJ_PCMCIA=m + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_TSDEV=m +CONFIG_INPUT_TSDEV_SCREEN_X=240 +CONFIG_INPUT_TSDEV_SCREEN_Y=320 +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +CONFIG_GAMEPORT=m +CONFIG_SOUND_GAMEPORT=m +CONFIG_GAMEPORT_NS558=m +CONFIG_GAMEPORT_L4=m +CONFIG_GAMEPORT_EMU10K1=m +CONFIG_GAMEPORT_VORTEX=m +CONFIG_GAMEPORT_FM801=m +CONFIG_GAMEPORT_CS461x=m +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=m +CONFIG_SERIO_CT82C710=m +CONFIG_SERIO_PARKBD=m +CONFIG_SERIO_PCIPS2=m + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +CONFIG_KEYBOARD_SUNKBD=m +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_XTKBD=m +CONFIG_KEYBOARD_NEWTON=m +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_SERIAL=m +CONFIG_MOUSE_INPORT=m +CONFIG_MOUSE_ATIXL=y +CONFIG_MOUSE_LOGIBM=m +CONFIG_MOUSE_PC110PAD=m +# CONFIG_MOUSE_VSXXXAA is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_ANALOG=m +CONFIG_JOYSTICK_A3D=m +CONFIG_JOYSTICK_ADI=m +CONFIG_JOYSTICK_COBRA=m +CONFIG_JOYSTICK_GF2K=m +CONFIG_JOYSTICK_GRIP=m +CONFIG_JOYSTICK_GRIP_MP=m +CONFIG_JOYSTICK_GUILLEMOT=m +CONFIG_JOYSTICK_INTERACT=m +CONFIG_JOYSTICK_SIDEWINDER=m +CONFIG_JOYSTICK_TMDC=m +CONFIG_JOYSTICK_IFORCE=m +CONFIG_JOYSTICK_IFORCE_USB=y +CONFIG_JOYSTICK_IFORCE_232=y +CONFIG_JOYSTICK_WARRIOR=m +CONFIG_JOYSTICK_MAGELLAN=m +CONFIG_JOYSTICK_SPACEORB=m +CONFIG_JOYSTICK_SPACEBALL=m +CONFIG_JOYSTICK_STINGER=m +CONFIG_JOYSTICK_TWIDDLER=m +CONFIG_JOYSTICK_DB9=m +CONFIG_JOYSTICK_GAMECON=m +CONFIG_JOYSTICK_TURBOGRAFX=m +# CONFIG_INPUT_JOYDUMP is not set +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_GUNZE=m +CONFIG_INPUT_MISC=y +CONFIG_INPUT_PCSPKR=y +CONFIG_INPUT_UINPUT=m + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_ECC=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_ROCKETPORT=m +CONFIG_SYNCLINK=m +CONFIG_SYNCLINKMP=m +CONFIG_N_HDLC=m +CONFIG_STALDRV=y + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_CS=m +# CONFIG_SERIAL_8250_ACPI is not set +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +CONFIG_SERIAL_8250_MULTIPORT=y +CONFIG_SERIAL_8250_RSA=y + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_ICOM is not set +CONFIG_SERIAL_JSM=m +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_PRINTER=m +# CONFIG_LP_CONSOLE is not set +CONFIG_PPDEV=m +CONFIG_TIPAR=m +CONFIG_QIC02_TAPE=m +CONFIG_QIC02_DYNCONF=y + +# +# Setting runtime QIC-02 configuration is done with qic02conf +# + +# +# from the tpqic02-support package. It is available at +# + +# +# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/ +# + +# +# IPMI +# +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_PANIC_EVENT=y +CONFIG_IPMI_PANIC_STRING=y +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_KCS=m +CONFIG_IPMI_WATCHDOG=m + +# +# Watchdog Cards +# +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=m +CONFIG_ACQUIRE_WDT=m +CONFIG_ADVANTECH_WDT=m +CONFIG_ALIM1535_WDT=m +CONFIG_ALIM7101_WDT=m +CONFIG_AMD7XX_TCO=m +CONFIG_SC520_WDT=m +CONFIG_EUROTECH_WDT=m +CONFIG_IB700_WDT=m +CONFIG_WAFER_WDT=m +CONFIG_I8XX_TCO=m +CONFIG_SC1200_WDT=m +CONFIG_SCx200_WDT=m +CONFIG_60XX_WDT=m +CONFIG_CPU5_WDT=m +CONFIG_W83627HF_WDT=m +CONFIG_W83877F_WDT=m +CONFIG_MACHZ_WDT=m + +# +# ISA-based Watchdog Cards +# +CONFIG_PCWATCHDOG=m +CONFIG_MIXCOMWD=m +CONFIG_WDT=m +CONFIG_WDT_501=y + +# +# PCI-based Watchdog Cards +# +CONFIG_PCIPCWATCHDOG=m +CONFIG_WDTPCI=m +CONFIG_WDT_501_PCI=y + +# +# USB-based Watchdog Cards +# +CONFIG_USBPCWATCHDOG=m +CONFIG_HW_RANDOM=m +CONFIG_NVRAM=m +CONFIG_RTC=y +CONFIG_DTLK=m +CONFIG_R3964=m +CONFIG_APPLICOM=m +CONFIG_SONYPI=m + +# +# Ftape, the floppy tape device driver +# +CONFIG_AGP=m +CONFIG_AGP_ALI=m +CONFIG_AGP_ATI=m +CONFIG_AGP_AMD=m +CONFIG_AGP_AMD64=m +CONFIG_AGP_INTEL=m +CONFIG_AGP_INTEL_MCH=m +CONFIG_AGP_NVIDIA=m +CONFIG_AGP_SIS=m +CONFIG_AGP_SWORKS=m +CONFIG_AGP_VIA=m +CONFIG_AGP_EFFICEON=m +# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +CONFIG_SYNCLINK_CS=m +# CONFIG_MWAVE is not set +CONFIG_SCx200_GPIO=m +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=4096 +CONFIG_HANGCHECK_TIMER=m +CONFIG_VTUNE=m + +# +# Linux InfraRed Controller +# +CONFIG_LIRC_SUPPORT=m +CONFIG_LIRC_MAX_DEV=2 +CONFIG_LIRC_BT829=m +CONFIG_LIRC_IT87=m +CONFIG_LIRC_ATIUSB=m +CONFIG_LIRC_SERIAL=m +# CONFIG_LIRC_HOMEBREW is not set +CONFIG_LIRC_PORT_SERIAL=0x3f8 +CONFIG_LIRC_IRQ_SERIAL=4 +CONFIG_LIRC_SIR=m +CONFIG_LIRC_PORT_SIR=0x3f8 +CONFIG_LIRC_IRQ_SIR=4 + +# +# I2C support +# +CONFIG_I2C=m +CONFIG_I2C_CHARDEV=m + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=m +CONFIG_I2C_ALGOPCF=m + +# +# I2C Hardware Bus support +# +CONFIG_I2C_ALI1535=m +CONFIG_I2C_ALI15X3=m +CONFIG_I2C_AMD756=m +CONFIG_I2C_AMD8111=m +CONFIG_I2C_I801=m +CONFIG_I2C_I810=m +CONFIG_I2C_ISA=m +CONFIG_I2C_NFORCE2=m +CONFIG_I2C_PARPORT=m +CONFIG_I2C_PARPORT_LIGHT=m +CONFIG_I2C_PIIX4=m +CONFIG_I2C_PROSAVAGE=m +CONFIG_I2C_SAVAGE4=m +CONFIG_SCx200_I2C=m +CONFIG_SCx200_I2C_SCL=12 +CONFIG_SCx200_I2C_SDA=13 +CONFIG_SCx200_ACB=m +CONFIG_I2C_SIS5595=m +CONFIG_I2C_SIS630=m +CONFIG_I2C_SIS96X=m +CONFIG_I2C_VIA=m +CONFIG_I2C_VIAPRO=m +CONFIG_I2C_VOODOO3=m + +# +# Hardware Sensors Chip support +# +CONFIG_I2C_SENSOR=m +CONFIG_SENSORS_ADM1021=m +CONFIG_SENSORS_ASB100=m +CONFIG_SENSORS_DS1621=m +CONFIG_SENSORS_FSCHER=m +CONFIG_SENSORS_GL518SM=m +CONFIG_SENSORS_IT87=m +CONFIG_SENSORS_LM75=m +CONFIG_SENSORS_LM78=m +CONFIG_SENSORS_LM80=m +CONFIG_SENSORS_LM83=m +CONFIG_SENSORS_LM85=m +CONFIG_SENSORS_LM90=m +CONFIG_SENSORS_VIA686A=m +CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83L785TS=m +CONFIG_SENSORS_W83627HF=m + +# +# Other I2C Chip support +# +CONFIG_SENSORS_EEPROM=m +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Misc devices +# +CONFIG_IBM_ASM=m + +# +# Multimedia devices +# +CONFIG_VIDEO_DEV=m + +# +# Video For Linux +# + +# +# Video Adapters +# +CONFIG_VIDEO_BT848=m +CONFIG_VIDEO_PMS=m +CONFIG_VIDEO_BWQCAM=m +CONFIG_VIDEO_CQCAM=m +CONFIG_VIDEO_W9966=m +CONFIG_VIDEO_CPIA=m +CONFIG_VIDEO_CPIA_PP=m +CONFIG_VIDEO_CPIA_USB=m +CONFIG_VIDEO_SAA5246A=m +CONFIG_VIDEO_SAA5249=m +CONFIG_TUNER_3036=m +CONFIG_VIDEO_STRADIS=m +CONFIG_VIDEO_ZORAN=m +CONFIG_VIDEO_ZORAN_BUZ=m +CONFIG_VIDEO_ZORAN_DC10=m +CONFIG_VIDEO_ZORAN_DC30=m +CONFIG_VIDEO_ZORAN_LML33=m +CONFIG_VIDEO_ZORAN_LML33R10=m +CONFIG_VIDEO_SAA7134=m +CONFIG_VIDEO_MXB=m +CONFIG_VIDEO_DPC=m +CONFIG_VIDEO_HEXIUM_ORION=m +CONFIG_VIDEO_HEXIUM_GEMINI=m +CONFIG_VIDEO_CX88=m + +# +# Radio Adapters +# +CONFIG_RADIO_CADET=m +CONFIG_RADIO_RTRACK=m +CONFIG_RADIO_RTRACK2=m +CONFIG_RADIO_AZTECH=m +CONFIG_RADIO_GEMTEK=m +CONFIG_RADIO_GEMTEK_PCI=m +CONFIG_RADIO_MAXIRADIO=m +CONFIG_RADIO_MAESTRO=m +CONFIG_RADIO_MIROPCM20=m +# CONFIG_RADIO_MIROPCM20_RDS is not set +CONFIG_RADIO_SF16FMI=m +CONFIG_RADIO_SF16FMR2=m +CONFIG_RADIO_TERRATEC=m +CONFIG_RADIO_TRUST=m +CONFIG_RADIO_TYPHOON=m +CONFIG_RADIO_TYPHOON_PROC_FS=y +CONFIG_RADIO_ZOLTRIX=m + +# +# Digital Video Broadcasting Devices +# +CONFIG_DVB=y +CONFIG_DVB_CORE=m + +# +# Supported Frontend Modules +# +CONFIG_DVB_TWINHAN_DST=m +CONFIG_DVB_STV0299=m +CONFIG_DVB_SP887X=m +CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc" +CONFIG_DVB_ALPS_TDLB7=m +CONFIG_DVB_ALPS_TDMB7=m +CONFIG_DVB_ATMEL_AT76C651=m +CONFIG_DVB_CX24110=m +CONFIG_DVB_GRUNDIG_29504_491=m +CONFIG_DVB_GRUNDIG_29504_401=m +CONFIG_DVB_MT312=m +CONFIG_DVB_VES1820=m +CONFIG_DVB_VES1X93=m +CONFIG_DVB_TDA1004X=m +CONFIG_DVB_TDA1004X_FIRMWARE_FILE="/usr/lib/hotplug/firmware/tda1004x.bin" +CONFIG_DVB_NXT6000=m + +# +# Supported SAA7146 based PCI Adapters +# +CONFIG_DVB_AV7110=m +# CONFIG_DVB_AV7110_FIRMWARE is not set +CONFIG_DVB_AV7110_OSD=y +CONFIG_DVB_BUDGET=m +CONFIG_DVB_BUDGET_CI=m +CONFIG_DVB_BUDGET_AV=m +CONFIG_DVB_BUDGET_PATCH=m + +# +# Supported USB Adapters +# +CONFIG_DVB_TTUSB_BUDGET=m +CONFIG_DVB_TTUSB_DEC=m + +# +# Supported FlexCopII (B2C2) Adapters +# +CONFIG_DVB_B2C2_SKYSTAR=m + +# +# Supported BT878 Adapters +# +CONFIG_DVB_BT8XX=m +CONFIG_VIDEO_SAA7146=m +CONFIG_VIDEO_SAA7146_VV=m +CONFIG_VIDEO_VIDEOBUF=m +CONFIG_VIDEO_TUNER=m +CONFIG_VIDEO_BUF=m +CONFIG_VIDEO_BTCX=m +CONFIG_VIDEO_IR=m + +# +# Graphics support +# +CONFIG_FB=y +CONFIG_FB_PM2=m +CONFIG_FB_PM2_FIFO_DISCONNECT=y +CONFIG_FB_CYBER2000=m +CONFIG_FB_IMSTT=y +CONFIG_FB_VGA16=m +CONFIG_FB_VESA=y +CONFIG_VIDEO_SELECT=y +CONFIG_FB_HGA=m +CONFIG_FB_RIVA=m +CONFIG_FB_I810=m +CONFIG_FB_I810_GTF=y +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON_OLD is not set +CONFIG_FB_RADEON=m +CONFIG_FB_RADEON_I2C=y +# CONFIG_FB_RADEON_DEBUG is not set +# CONFIG_FB_ATY128 is not set +CONFIG_FB_ATY=m +CONFIG_FB_ATY_CT=y +CONFIG_FB_ATY_GX=y +CONFIG_FB_ATY_XL_INIT=y +CONFIG_FB_SIS=m +CONFIG_FB_SIS_300=y +CONFIG_FB_SIS_315=y +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_KYRO=m +CONFIG_FB_3DFX=m +CONFIG_FB_VOODOO1=m +CONFIG_FB_TRIDENT=m +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_MDA_CONSOLE=m +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_PCI_CONSOLE=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +# CONFIG_LOGO is not set + +# +# Bootsplash configuration +# +CONFIG_BOOTSPLASH=y + +# +# Sound +# +CONFIG_SOUND=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_HWDEP=m +CONFIG_SND_RAWMIDI=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_RTCTIMER=m +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DEBUG=y +CONFIG_SND_DEBUG_MEMORY=y +# CONFIG_SND_DEBUG_DETECT is not set + +# +# Generic devices +# +CONFIG_SND_MPU401_UART=m +CONFIG_SND_OPL3_LIB=m +CONFIG_SND_OPL4_LIB=m +CONFIG_SND_VX_LIB=m +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m + +# +# ISA devices +# +CONFIG_SND_AD1816A=m +CONFIG_SND_AD1848=m +CONFIG_SND_CS4231=m +CONFIG_SND_CS4232=m +CONFIG_SND_CS4236=m +CONFIG_SND_ES968=m +CONFIG_SND_ES1688=m +CONFIG_SND_ES18XX=m +CONFIG_SND_GUSCLASSIC=m +CONFIG_SND_GUSEXTREME=m +CONFIG_SND_GUSMAX=m +CONFIG_SND_INTERWAVE=m +CONFIG_SND_INTERWAVE_STB=m +CONFIG_SND_OPTI92X_AD1848=m +CONFIG_SND_OPTI92X_CS4231=m +CONFIG_SND_OPTI93X=m +CONFIG_SND_SB8=m +CONFIG_SND_SB16=m +CONFIG_SND_SBAWE=m +CONFIG_SND_SB16_CSP=y +CONFIG_SND_WAVEFRONT=m +CONFIG_SND_ALS100=m +CONFIG_SND_AZT2320=m +CONFIG_SND_CMI8330=m +CONFIG_SND_DT019X=m +CONFIG_SND_OPL3SA2=m +CONFIG_SND_SGALAXY=m +CONFIG_SND_SSCAPE=m + +# +# PCI devices +# +CONFIG_SND_AC97_CODEC=m +CONFIG_SND_ALI5451=m +CONFIG_SND_ATIIXP=m +CONFIG_SND_AU8810=m +CONFIG_SND_AU8820=m +CONFIG_SND_AU8830=m +CONFIG_SND_AZT3328=m +CONFIG_SND_BT87X=m +CONFIG_SND_CS46XX=m +CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_CS4281=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_KORG1212=m +CONFIG_SND_MIXART=m +CONFIG_SND_NM256=m +CONFIG_SND_RME32=m +CONFIG_SND_RME96=m +CONFIG_SND_RME9652=m +CONFIG_SND_HDSP=m +CONFIG_SND_TRIDENT=m +CONFIG_SND_YMFPCI=m +CONFIG_SND_ALS4000=m +CONFIG_SND_CMIPCI=m +CONFIG_SND_ENS1370=m +CONFIG_SND_ENS1371=m +CONFIG_SND_ES1938=m +CONFIG_SND_ES1968=m +CONFIG_SND_MAESTRO3=m +CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_ICE1712=m +CONFIG_SND_ICE1724=m +CONFIG_SND_INTEL8X0=m +CONFIG_SND_INTEL8X0M=m +CONFIG_SND_SONICVIBES=m +CONFIG_SND_VIA82XX=m +CONFIG_SND_VX222=m + +# +# ALSA USB devices +# +CONFIG_SND_USB_AUDIO=m + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_VXP440 is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# Open Sound System +# +CONFIG_SOUND_PRIME=m +CONFIG_SOUND_BT878=m +CONFIG_SOUND_CMPCI=m +CONFIG_SOUND_CMPCI_FM=y +CONFIG_SOUND_CMPCI_FMIO=0x388 +CONFIG_SOUND_CMPCI_MIDI=y +CONFIG_SOUND_CMPCI_MPUIO=0x330 +CONFIG_SOUND_CMPCI_JOYSTICK=y +CONFIG_SOUND_CMPCI_CM8738=y +# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set +CONFIG_SOUND_CMPCI_SPDIFLOOP=y +CONFIG_SOUND_CMPCI_SPEAKERS=2 +CONFIG_SOUND_EMU10K1=m +CONFIG_MIDI_EMU10K1=y +# CONFIG_SOUND_FUSION is not set +CONFIG_SOUND_CS4281=m +CONFIG_SOUND_ES1370=m +CONFIG_SOUND_ES1371=m +CONFIG_SOUND_ESSSOLO1=m +CONFIG_SOUND_MAESTRO=m +CONFIG_SOUND_MAESTRO3=m +CONFIG_SOUND_ICH=m +CONFIG_SOUND_SONICVIBES=m +CONFIG_SOUND_TRIDENT=m +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_VIA82CXXX=m +CONFIG_MIDI_VIA82CXXX=y +CONFIG_SOUND_OSS=m +CONFIG_SOUND_TRACEINIT=y +CONFIG_SOUND_DMAP=y +# CONFIG_SOUND_AD1816 is not set +CONFIG_SOUND_AD1889=m +CONFIG_SOUND_SGALAXY=m +CONFIG_SOUND_ADLIB=m +CONFIG_SOUND_ACI_MIXER=m +CONFIG_SOUND_CS4232=m +CONFIG_SOUND_SSCAPE=m +CONFIG_SOUND_GUS=m +# CONFIG_SOUND_GUS16 is not set +CONFIG_SOUND_GUSMAX=y +CONFIG_SOUND_VMIDI=m +CONFIG_SOUND_TRIX=m +CONFIG_SOUND_MSS=m +CONFIG_SOUND_MPU401=m +CONFIG_SOUND_NM256=m +CONFIG_SOUND_MAD16=m +CONFIG_MAD16_OLDCARD=y +CONFIG_SOUND_PAS=m +CONFIG_SOUND_PSS=m +CONFIG_PSS_MIXER=y +# CONFIG_PSS_HAVE_BOOT is not set +CONFIG_SOUND_SB=m +# CONFIG_SOUND_AWE32_SYNTH is not set +CONFIG_SOUND_WAVEFRONT=m +CONFIG_SOUND_MAUI=m +CONFIG_SOUND_YM3812=m +CONFIG_SOUND_OPL3SA1=m +CONFIG_SOUND_OPL3SA2=m +CONFIG_SOUND_YMFPCI=m +CONFIG_SOUND_YMFPCI_LEGACY=y +CONFIG_SOUND_UART6850=m +CONFIG_SOUND_AEDSP16=m +CONFIG_SC6600=y +CONFIG_SC6600_JOY=y +CONFIG_SC6600_CDROM=4 +CONFIG_SC6600_CDROMBASE=0x0 +# CONFIG_AEDSP16_MSS is not set +# CONFIG_AEDSP16_SBPRO is not set +CONFIG_AEDSP16_MPU401=y +CONFIG_SOUND_TVMIXER=m +CONFIG_SOUND_KAHLUA=m +CONFIG_SOUND_ALI5455=m +CONFIG_SOUND_FORTE=m +CONFIG_SOUND_RME96XX=m +CONFIG_SOUND_AD1980=m + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_EHCI_SPLIT_ISO=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m + +# +# USB Device Class drivers +# +CONFIG_USB_AUDIO=m + +# +# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem +# +CONFIG_USB_MIDI=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_HP8200e=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y + +# +# USB Human Interface Devices (HID) +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_USB_HIDDEV=y + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +CONFIG_USB_AIPTEK=m +CONFIG_USB_WACOM=m +CONFIG_USB_KBTAB=m +CONFIG_USB_POWERMATE=m +CONFIG_USB_MTOUCH=m +CONFIG_USB_XPAD=m +CONFIG_USB_ATI_REMOTE=m + +# +# USB Imaging devices +# +CONFIG_USB_MDC800=m +CONFIG_USB_MICROTEK=m +CONFIG_USB_HPUSBSCSI=m + +# +# USB Multimedia devices +# +CONFIG_USB_DABUSB=m +CONFIG_USB_VICAM=m +CONFIG_USB_DSBR=m +CONFIG_USB_IBMCAM=m +CONFIG_USB_KONICAWC=m +CONFIG_USB_OV511=m +CONFIG_USB_SE401=m +CONFIG_USB_STV680=m +CONFIG_USB_W9968CF=m + +# +# USB Network adaptors +# +CONFIG_USB_CATC=m +CONFIG_USB_KAWETH=m +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_USBNET=m + +# +# USB Host-to-Host Cables +# +CONFIG_USB_ALI_M5632=y +CONFIG_USB_AN2720=y +CONFIG_USB_BELKIN=y +CONFIG_USB_GENESYS=y +CONFIG_USB_NET1080=y +CONFIG_USB_PL2301=y + +# +# Intelligent USB Devices/Gadgets +# +CONFIG_USB_ARMLINUX=y +CONFIG_USB_EPSON2888=y +CONFIG_USB_ZAURUS=y +CONFIG_USB_CDCETHER=y + +# +# USB Network Adapters +# +CONFIG_USB_AX8817X=y + +# +# USB port drivers +# +CONFIG_USB_USS720=m + +# +# USB Serial Converter support +# +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA28=y +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +CONFIG_USB_SERIAL_KEYSPAN_USA19=y +CONFIG_USB_SERIAL_KEYSPAN_USA18X=y +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_SAFE=m +CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OMNINET=m +CONFIG_USB_EZUSB=y + +# +# USB Miscellaneous drivers +# +CONFIG_USB_EMI62=m +CONFIG_USB_EMI26=m +CONFIG_USB_TIGL=m +CONFIG_USB_AUERSWALD=m +CONFIG_USB_RIO500=m +CONFIG_USB_LEGOTOWER=m +CONFIG_USB_LCD=m +CONFIG_USB_LED=m +CONFIG_USB_CYTHERM=m +CONFIG_USB_SPEEDTOUCH=m +# CONFIG_USB_TEST is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# InfiniBand support +# +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_SDP is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_UDAPL_HELPER=m +CONFIG_INFINIBAND_MELLANOX_HCA=m +CONFIG_AUDIT=m + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=m +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_JBD=m +CONFIG_JBD_DEBUG=y +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_DMAPI=y +# CONFIG_JFS_DEBUG is not set +CONFIG_JFS_STATISTICS=y +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=m +CONFIG_XFS_RT=y +CONFIG_XFS_QUOTA=m +CONFIG_XFS_DMAPI=y +CONFIG_XFS_SECURITY=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_MINIX_FS=y +CONFIG_ROMFS_FS=m +CONFIG_DMAPI=m +# CONFIG_DMAPI_DEBUG is not set +CONFIG_QUOTA=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_QUOTACTL=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_ZISOFS_FS=y +CONFIG_UDF_FS=m + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +# CONFIG_DEVFS_FS is not set +CONFIG_DEVPTS_FS_XATTR=y +CONFIG_DEVPTS_FS_SECURITY=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y +CONFIG_RELAYFS_FS=m +# CONFIG_KLOG_CHANNEL is not set + +# +# Miscellaneous filesystems +# +CONFIG_ADFS_FS=m +# CONFIG_ADFS_FS_RW is not set +CONFIG_AFFS_FS=m +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +CONFIG_BFS_FS=m +CONFIG_EFS_FS=m +CONFIG_JFFS_FS=m +CONFIG_JFFS_FS_VERBOSE=0 +CONFIG_JFFS2_FS=m +CONFIG_JFFS2_FS_DEBUG=0 +# CONFIG_JFFS2_FS_NAND is not set +CONFIG_CRAMFS=m +CONFIG_VXFS_FS=m +CONFIG_HPFS_FS=m +CONFIG_QNX4FS_FS=m +# CONFIG_QNX4FS_RW is not set +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_ACL=y +CONFIG_NFS_ACL_SUPPORT=y +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=y +CONFIG_STATD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +CONFIG_SMB_FS=m +CONFIG_SMB_NLS_DEFAULT=y +CONFIG_SMB_NLS_REMOTE="cp850" +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_SMALLDOS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_CODA_FS=m +# CONFIG_CODA_FS_OLD_API is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_AFS_FS=m +CONFIG_RXRPC=m + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +CONFIG_ATARI_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +# CONFIG_MINIX_SUBPARTITION is not set +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_LDM_PARTITION=y +# CONFIG_LDM_DEBUG is not set +CONFIG_NEC98_PARTITION=y +CONFIG_SGI_PARTITION=y +CONFIG_ULTRIX_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_FSHOOKS=y + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# +# Kernel hacking +# +CONFIG_CRASH_DUMP=m +CONFIG_KERNTYPES=y +CONFIG_CRASH_DUMP_BLOCKDEV=m +CONFIG_CRASH_DUMP_NETDEV=m +# CONFIG_CRASH_DUMP_MEMDEV is not set +CONFIG_CRASH_DUMP_COMPRESS_RLE=m +CONFIG_CRASH_DUMP_COMPRESS_GZIP=m +CONFIG_DEBUG_KERNEL=y +CONFIG_EARLY_PRINTK=y +# CONFIG_KPROBES is not set +# CONFIG_DEBUGREG is not set +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_SLAB is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_KDB is not set +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +# CONFIG_HOOK is not set + +# +# Security options +# +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_CAPABILITIES=m +CONFIG_SECURITY_ROOTPLUG=m +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +# CONFIG_SECURITY_SELINUX_MLS is not set + +# +# IBM Crypto Hardware support +# +CONFIG_IBM_CRYPTO=m +CONFIG_ICA_LEEDSLITE=m + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_TEST=m + +# +# Library routines +# +CONFIG_CRC32=y +CONFIG_QSORT=y +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m + +# +# Build options +# +CONFIG_SUSE_KERNEL=y +CONFIG_CFGNAME="bigsmp" +CONFIG_RELEASE="SLES9_SP1_BRANCH_2004110217390391" +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_PC=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config new file mode 100644 index 0000000..349bca7 --- /dev/null +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.5-2.6-suse-i686.config @@ -0,0 +1,2845 @@ +# +# Automatically generated make config: don't edit +# +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_UID16=y +CONFIG_GENERIC_ISA_DMA=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +# CONFIG_STANDALONE is not set + +# +# General setup +# +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_SYSCTL=y +CONFIG_LOG_BUF_SHIFT=17 +CONFIG_HOTPLUG=y +CONFIG_EVLOG=y +# CONFIG_EVLOG_FWPRINTK is not set +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +# CONFIG_EMBEDDED is not set + +# +# Class Based Kernel Resource Management +# +CONFIG_CKRM=y +CONFIG_RCFS_FS=m +CONFIG_CKRM_TYPE_TASKCLASS=y +CONFIG_CKRM_RES_NUMTASKS=m +CONFIG_CKRM_TYPE_SOCKETCLASS=y +CONFIG_CKRM_RBCE=m +CONFIG_CKRM_CRBCE=m +CONFIG_DELAY_ACCT=y +CONFIG_KALLSYMS=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_OBSOLETE_MODPARM=y +CONFIG_MODVERSIONS=y +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +# CONFIG_X86_PC is not set +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +CONFIG_X86_GENERICARCH=y +# CONFIG_X86_ES7000 is not set +CONFIG_X86_CYCLONE_TIMER=y +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +# CONFIG_M686 is not set +CONFIG_MPENTIUMII=y +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +CONFIG_X86_GENERIC=y +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_L1_CACHE_SHIFT=7 +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_INTEL_USERCOPY=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +# CONFIG_HPET_TIMER is not set +# CONFIG_HPET_EMULATE_RTC is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=128 +CONFIG_SCHED_SMT=y +# CONFIG_PREEMPT is not set +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_TSC=y +CONFIG_X86_MCE=y +# CONFIG_X86_MCE_NONFATAL is not set +CONFIG_X86_MCE_P4THERMAL=y +CONFIG_TOSHIBA=m +CONFIG_I8K=m +CONFIG_MICROCODE=m +CONFIG_X86_MSR=m +CONFIG_X86_CPUID=m + +# +# Firmware Drivers +# +CONFIG_EDD=m +# CONFIG_NOHIGHMEM is not set +# CONFIG_HIGHMEM4G is not set +CONFIG_HIGHMEM64G=y +CONFIG_HIGHMEM=y +CONFIG_X86_PAE=y +# CONFIG_NUMA is not set +CONFIG_HIGHPTE=y +# CONFIG_MATH_EMULATION is not set +CONFIG_MTRR=y +CONFIG_EFI=y +CONFIG_IRQBALANCE=y +CONFIG_HAVE_DEC_LOCK=y +CONFIG_BOOT_IOREMAP=y +CONFIG_REGPARM=y + +# +# Special options +# +CONFIG_PROC_MM=y + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +# CONFIG_SOFTWARE_SUSPEND is not set +# CONFIG_PM_DISK is not set + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +CONFIG_ACPI_SLEEP=y +CONFIG_ACPI_SLEEP_PROC_FS=y +CONFIG_ACPI_AC=m +CONFIG_ACPI_BATTERY=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_ACPI_THERMAL=m +# CONFIG_ACPI_ASUS is not set +CONFIG_ACPI_TOSHIBA=m +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y +CONFIG_ACPI_INITRD=y + +# +# APM (Advanced Power Management) BIOS Support +# +CONFIG_APM=y +# CONFIG_APM_IGNORE_USER_SUSPEND is not set +CONFIG_APM_DO_ENABLE=y +# CONFIG_APM_CPU_IDLE is not set +CONFIG_APM_DISPLAY_BLANK=y +# CONFIG_APM_RTC_IS_GMT is not set +CONFIG_APM_ALLOW_INTS=y +# CONFIG_APM_REAL_MODE_POWER_OFF is not set + +# +# CPU Frequency scaling +# +CONFIG_CPU_FREQ=y +CONFIG_CPU_FREQ_PROC_INTF=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set +CONFIG_CPU_FREQ_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_POWERSAVE=m +CONFIG_CPU_FREQ_GOV_USERSPACE=m +CONFIG_CPU_FREQ_GOV_ONDEMAND=m +# CONFIG_CPU_FREQ_24_API is not set +CONFIG_CPU_FREQ_TABLE=m + +# +# CPUFreq processor drivers +# +CONFIG_X86_ACPI_CPUFREQ=m +# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set +CONFIG_X86_POWERNOW_K6=m +CONFIG_X86_POWERNOW_K7=m +CONFIG_X86_POWERNOW_K8=m +CONFIG_X86_POWERNOW_K8_ACPI=y +CONFIG_X86_GX_SUSPMOD=m +CONFIG_X86_SPEEDSTEP_CENTRINO=m +CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y +# CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI is not set +CONFIG_X86_SPEEDSTEP_ICH=m +CONFIG_X86_SPEEDSTEP_SMI=m +CONFIG_X86_P4_CLOCKMOD=m +CONFIG_X86_SPEEDSTEP_LIB=m +CONFIG_X86_LONGRUN=m +CONFIG_X86_LONGHAUL=m + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +# CONFIG_PCI_USE_VECTOR is not set +# CONFIG_PCI_LEGACY_PROC is not set +# CONFIG_PCI_NAMES is not set +CONFIG_ISA=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +CONFIG_SCx200=m + +# +# PCMCIA/CardBus support +# +CONFIG_PCMCIA=m +# CONFIG_PCMCIA_DEBUG is not set +CONFIG_YENTA=m +CONFIG_CARDBUS=y +CONFIG_I82092=m +CONFIG_I82365=m +CONFIG_TCIC=m +CONFIG_PCMCIA_PROBE=y + +# +# PCI Hotplug Support +# +CONFIG_HOTPLUG_PCI=m +CONFIG_HOTPLUG_PCI_FAKE=m +CONFIG_HOTPLUG_PCI_COMPAQ=m +CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y +CONFIG_HOTPLUG_PCI_IBM=m +CONFIG_HOTPLUG_PCI_AMD=m +CONFIG_HOTPLUG_PCI_ACPI=m +CONFIG_HOTPLUG_PCI_CPCI=y +CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m +CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m +CONFIG_HOTPLUG_PCI_PCIE=m +# CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set +# CONFIG_HOTPLUG_PCI_SHPC is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_AOUT=m +CONFIG_BINFMT_MISC=m + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_FW_LOADER=m +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +CONFIG_MTD=m +# CONFIG_MTD_DEBUG is not set +CONFIG_MTD_PARTITIONS=m +CONFIG_MTD_CONCAT=m +CONFIG_MTD_REDBOOT_PARTS=m +CONFIG_MTD_CMDLINE_PARTS=m + +# +# User Modules And Translation Layers +# +CONFIG_MTD_CHAR=m +CONFIG_MTD_BLOCK=m +# CONFIG_MTD_BLOCK_RO is not set +# CONFIG_FTL is not set +# CONFIG_NFTL is not set +# CONFIG_INFTL is not set + +# +# RAM/ROM/Flash chip drivers +# +CONFIG_MTD_CFI=m +CONFIG_MTD_JEDECPROBE=m +CONFIG_MTD_GEN_PROBE=m +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_NOSWAP=y +# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set +# CONFIG_MTD_CFI_GEOMETRY is not set +CONFIG_MTD_CFI_INTELEXT=m +CONFIG_MTD_CFI_AMDSTD=m +CONFIG_MTD_CFI_STAA=m +# CONFIG_MTD_RAM is not set +# CONFIG_MTD_ROM is not set +CONFIG_MTD_ABSENT=m +CONFIG_MTD_OBSOLETE_CHIPS=y +CONFIG_MTD_AMDSTD=m +CONFIG_MTD_SHARP=m +CONFIG_MTD_JEDEC=m + +# +# Mapping drivers for chip access +# +CONFIG_MTD_COMPLEX_MAPPINGS=y +CONFIG_MTD_PHYSMAP=m +CONFIG_MTD_PHYSMAP_START=0x8000000 +CONFIG_MTD_PHYSMAP_LEN=0x4000000 +CONFIG_MTD_PHYSMAP_BUSWIDTH=2 +CONFIG_MTD_PNC2000=m +CONFIG_MTD_SC520CDP=m +CONFIG_MTD_NETSC520=m +CONFIG_MTD_SBC_GXX=m +CONFIG_MTD_ELAN_104NC=m +CONFIG_MTD_OCTAGON=m +CONFIG_MTD_VMAX=m +CONFIG_MTD_SCx200_DOCFLASH=m +CONFIG_MTD_AMD76XROM=m +CONFIG_MTD_ICH2ROM=m +CONFIG_MTD_SCB2_FLASH=m +CONFIG_MTD_NETtel=m +CONFIG_MTD_DILNETPC=m +CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000 +CONFIG_MTD_L440GX=m +CONFIG_MTD_PCI=m + +# +# Self-contained MTD device drivers +# +CONFIG_MTD_PMC551=m +CONFIG_MTD_PMC551_BUGFIX=y +# CONFIG_MTD_PMC551_DEBUG is not set +CONFIG_MTD_SLRAM=m +CONFIG_MTD_MTDRAM=m +CONFIG_MTDRAM_TOTAL_SIZE=4096 +CONFIG_MTDRAM_ERASE_SIZE=128 +CONFIG_MTD_BLKMTD=m + +# +# Disk-On-Chip Device Drivers +# +CONFIG_MTD_DOC2000=m +CONFIG_MTD_DOC2001=m +CONFIG_MTD_DOC2001PLUS=m +CONFIG_MTD_DOCPROBE=m +CONFIG_MTD_DOCPROBE_ADVANCED=y +CONFIG_MTD_DOCPROBE_ADDRESS=0x0000 +CONFIG_MTD_DOCPROBE_HIGH=y +CONFIG_MTD_DOCPROBE_55AA=y + +# +# NAND Flash Device Drivers +# +CONFIG_MTD_NAND=m +# CONFIG_MTD_NAND_VERIFY_WRITE is not set +CONFIG_MTD_NAND_IDS=m + +# +# Parallel port support +# +CONFIG_PARPORT=m +CONFIG_PARPORT_PC=m +CONFIG_PARPORT_PC_CML1=m +CONFIG_PARPORT_SERIAL=m +CONFIG_PARPORT_PC_FIFO=y +CONFIG_PARPORT_PC_SUPERIO=y +CONFIG_PARPORT_PC_PCMCIA=m +CONFIG_PARPORT_OTHER=y +CONFIG_PARPORT_1284=y + +# +# Plug and Play support +# +CONFIG_PNP=y +# CONFIG_PNP_DEBUG is not set + +# +# Protocols +# +CONFIG_ISAPNP=y +CONFIG_PNPBIOS=y +CONFIG_PNPBIOS_PROC_FS=y + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +CONFIG_BLK_DEV_XD=m +CONFIG_PARIDE=m +CONFIG_PARIDE_PARPORT=m + +# +# Parallel IDE high-level drivers +# +CONFIG_PARIDE_PD=m +CONFIG_PARIDE_PCD=m +CONFIG_PARIDE_PF=m +CONFIG_PARIDE_PT=m +CONFIG_PARIDE_PG=m + +# +# Parallel IDE protocol modules +# +CONFIG_PARIDE_ATEN=m +CONFIG_PARIDE_BPCK=m +CONFIG_PARIDE_BPCK6=m +CONFIG_PARIDE_COMM=m +CONFIG_PARIDE_DSTR=m +CONFIG_PARIDE_FIT2=m +CONFIG_PARIDE_FIT3=m +CONFIG_PARIDE_EPAT=m +CONFIG_PARIDE_EPATC8=y +CONFIG_PARIDE_EPIA=m +CONFIG_PARIDE_FRIQ=m +CONFIG_PARIDE_FRPW=m +CONFIG_PARIDE_KBIC=m +CONFIG_PARIDE_KTTI=m +CONFIG_PARIDE_ON20=m +CONFIG_PARIDE_ON26=m +CONFIG_BLK_CPQ_DA=m +CONFIG_BLK_CPQ_CISS_DA=m +CONFIG_CISS_SCSI_TAPE=y +CONFIG_BLK_DEV_DAC960=m +CONFIG_BLK_DEV_UMEM=m +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_CRYPTOLOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_CARMEL=m +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=64000 +CONFIG_BLK_DEV_INITRD=y +CONFIG_LBD=y +CONFIG_CIPHER_TWOFISH=m + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +CONFIG_IDEDISK_MULTI_MODE=y +CONFIG_IDEDISK_STROKE=y +CONFIG_BLK_DEV_IDECS=m +CONFIG_BLK_DEV_IDECD=m +CONFIG_BLK_DEV_IDETAPE=m +CONFIG_BLK_DEV_IDEFLOPPY=y +CONFIG_BLK_DEV_IDESCSI=m +# CONFIG_IDE_TASK_IOCTL is not set +# CONFIG_IDE_TASKFILE_IO is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +CONFIG_BLK_DEV_CMD640=y +CONFIG_BLK_DEV_CMD640_ENHANCED=y +CONFIG_BLK_DEV_IDEPNP=y +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +CONFIG_BLK_DEV_OFFBOARD=y +CONFIG_BLK_DEV_GENERIC=y +CONFIG_BLK_DEV_OPTI621=y +CONFIG_BLK_DEV_RZ1000=y +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +CONFIG_IDEDMA_ONLYDISK=y +CONFIG_BLK_DEV_ADMA=y +CONFIG_BLK_DEV_AEC62XX=y +CONFIG_BLK_DEV_ALI15X3=y +# CONFIG_WDC_ALI15X3 is not set +CONFIG_BLK_DEV_AMD74XX=y +CONFIG_BLK_DEV_ATIIXP=y +CONFIG_BLK_DEV_CMD64X=y +CONFIG_BLK_DEV_TRIFLEX=y +CONFIG_BLK_DEV_CY82C693=y +CONFIG_BLK_DEV_CS5520=m +CONFIG_BLK_DEV_CS5530=m +CONFIG_BLK_DEV_HPT34X=y +CONFIG_HPT34X_AUTODMA=y +CONFIG_BLK_DEV_HPT366=y +CONFIG_BLK_DEV_SC1200=y +CONFIG_BLK_DEV_PIIX=y +CONFIG_BLK_DEV_NS87415=y +CONFIG_BLK_DEV_PDC202XX_OLD=y +CONFIG_PDC202XX_BURST=y +CONFIG_BLK_DEV_PDC202XX_NEW=y +CONFIG_PDC202XX_FORCE=y +CONFIG_BLK_DEV_SVWKS=y +CONFIG_BLK_DEV_SIIMAGE=y +CONFIG_BLK_DEV_SIS5513=y +CONFIG_BLK_DEV_SLC90E66=y +CONFIG_BLK_DEV_TRM290=y +CONFIG_BLK_DEV_VIA82CXXX=y +CONFIG_IDE_CHIPSETS=y + +# +# Note: most of these also require special kernel boot parameters +# +CONFIG_BLK_DEV_4DRIVES=y +CONFIG_BLK_DEV_ALI14XX=y +CONFIG_BLK_DEV_DTC2278=y +CONFIG_BLK_DEV_HT6560B=y +# CONFIG_BLK_DEV_PDC4030 is not set +CONFIG_BLK_DEV_QD65XX=y +CONFIG_BLK_DEV_UMC8672=y +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +CONFIG_SCSI=m +CONFIG_SCSI_PROC_FS=y + +# +# SCSI support type (disk, tape, CD-ROM) +# +CONFIG_BLK_DEV_SD=m +CONFIG_CHR_DEV_ST=m +CONFIG_CHR_DEV_OSST=m +CONFIG_BLK_DEV_SR=m +# CONFIG_BLK_DEV_SR_VENDOR is not set +CONFIG_CHR_DEV_SG=m +CONFIG_CHR_DEV_SCH=m + +# +# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +# +CONFIG_SCSI_MULTI_LUN=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y + +# +# SCSI Transport Attributes +# +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_FC_ATTRS=m + +# +# SCSI low-level drivers +# +CONFIG_BLK_DEV_3W_XXXX_RAID=m +CONFIG_SCSI_7000FASST=m +CONFIG_SCSI_ACARD=m +CONFIG_SCSI_AHA152X=m +CONFIG_SCSI_AHA1542=m +CONFIG_SCSI_AACRAID=m +CONFIG_SCSI_AIC7XXX=m +CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 +CONFIG_AIC7XXX_RESET_DELAY_MS=5000 +# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set +# CONFIG_AIC7XXX_DEBUG_ENABLE is not set +CONFIG_AIC7XXX_DEBUG_MASK=0 +CONFIG_AIC7XXX_REG_PRETTY_PRINT=y +CONFIG_SCSI_AIC7XXX_OLD=m +CONFIG_SCSI_AIC79XX=m +CONFIG_AIC79XX_CMDS_PER_DEVICE=32 +CONFIG_AIC79XX_RESET_DELAY_MS=15000 +# CONFIG_AIC79XX_BUILD_FIRMWARE is not set +# CONFIG_AIC79XX_ENABLE_RD_STRM is not set +# CONFIG_AIC79XX_DEBUG_ENABLE is not set +CONFIG_AIC79XX_DEBUG_MASK=0 +CONFIG_AIC79XX_REG_PRETTY_PRINT=y +# CONFIG_SCSI_AIC79XX_NEW is not set +CONFIG_SCSI_DPT_I2O=m +CONFIG_SCSI_ADVANSYS=m +CONFIG_SCSI_IN2000=m +CONFIG_MEGARAID_NEWGEN=y +CONFIG_MEGARAID_MM=m +CONFIG_MEGARAID_MAILBOX=m +CONFIG_MEGARAID_LEGACY=m +CONFIG_SCSI_SATA=y +CONFIG_SCSI_SATA_SVW=m +CONFIG_SCSI_ATA_PIIX=m +CONFIG_SCSI_SATA_PROMISE=m +CONFIG_SCSI_SATA_SIL=m +CONFIG_SCSI_SATA_SIS=m +CONFIG_SCSI_SATA_VIA=m +CONFIG_SCSI_SATA_VITESSE=m +CONFIG_SCSI_BUSLOGIC=m +# CONFIG_SCSI_OMIT_FLASHPOINT is not set +# CONFIG_SCSI_CPQFCTS is not set +CONFIG_SCSI_DMX3191D=m +CONFIG_SCSI_DTC3280=m +CONFIG_SCSI_EATA=m +CONFIG_SCSI_EATA_TAGGED_QUEUE=y +CONFIG_SCSI_EATA_LINKED_COMMANDS=y +CONFIG_SCSI_EATA_MAX_TAGS=16 +CONFIG_SCSI_EATA_PIO=m +CONFIG_SCSI_FUTURE_DOMAIN=m +CONFIG_SCSI_GDTH=m +CONFIG_SCSI_GENERIC_NCR5380=m +CONFIG_SCSI_GENERIC_NCR5380_MMIO=m +CONFIG_SCSI_GENERIC_NCR53C400=y +CONFIG_SCSI_IPS=m +CONFIG_SCSI_INIA100=m +CONFIG_SCSI_PPA=m +CONFIG_SCSI_IMM=m +# CONFIG_SCSI_IZIP_EPP16 is not set +# CONFIG_SCSI_IZIP_SLOW_CTR is not set +CONFIG_SCSI_NCR53C406A=m +CONFIG_SCSI_SYM53C8XX_2=m +CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 +CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 +CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 +# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set +CONFIG_SCSI_LPFC=m +CONFIG_SCSI_IPR=m +CONFIG_SCSI_IPR_TRACE=y +CONFIG_SCSI_IPR_DUMP=y +CONFIG_SCSI_PAS16=m +CONFIG_SCSI_PSI240I=m +CONFIG_SCSI_QLOGIC_FAS=m +CONFIG_SCSI_QLOGIC_ISP=m +CONFIG_SCSI_QLOGIC_FC=m +CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y +CONFIG_SCSI_QLOGIC_1280=m +CONFIG_SCSI_QLA2XXX=m +CONFIG_SCSI_QLA21XX=m +CONFIG_SCSI_QLA22XX=m +CONFIG_SCSI_QLA2300=m +CONFIG_SCSI_QLA2322=m +CONFIG_SCSI_QLA6312=m +CONFIG_SCSI_QLA6322=m +CONFIG_SCSI_QLA2XXX_FAILOVER=y +CONFIG_SCSI_QLA4XXX=m +CONFIG_SCSI_QLA4XXX_FAILOVER=y +CONFIG_SCSI_SYM53C416=m +CONFIG_SCSI_DC395x=m +CONFIG_SCSI_DC390T=m +CONFIG_SCSI_T128=m +CONFIG_SCSI_U14_34F=m +CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y +CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y +CONFIG_SCSI_U14_34F_MAX_TAGS=8 +CONFIG_SCSI_ULTRASTOR=m +CONFIG_SCSI_NSP32=m +CONFIG_SCSI_DEBUG=m + +# +# PCMCIA SCSI adapter support +# +CONFIG_PCMCIA_AHA152X=m +CONFIG_PCMCIA_FDOMAIN=m +CONFIG_PCMCIA_NINJA_SCSI=m +CONFIG_PCMCIA_QLOGIC=m + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +CONFIG_CD_NO_IDESCSI=y +CONFIG_AZTCD=m +CONFIG_GSCD=m +CONFIG_MCD=m +CONFIG_MCD_IRQ=11 +CONFIG_MCD_BASE=0x300 +CONFIG_OPTCD=m +CONFIG_SJCD=m +CONFIG_ISP16_CDI=m +CONFIG_CDU535=m + +# +# Multi-device support (RAID and LVM) +# +CONFIG_MD=y +CONFIG_BLK_DEV_MD=y +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID5=m +CONFIG_MD_RAID6=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_MULTIPATH=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_FLAKEY=m +CONFIG_BLK_DEV_DM_BBR=m + +# +# Fusion MPT device support +# +CONFIG_FUSION=m +CONFIG_FUSION_MAX_SGE=40 +CONFIG_FUSION_CTL=m +CONFIG_FUSION_LAN=m + +# +# IEEE 1394 (FireWire) support +# +CONFIG_IEEE1394=m + +# +# Subsystem Options +# +# CONFIG_IEEE1394_VERBOSEDEBUG is not set +# CONFIG_IEEE1394_OUI_DB is not set +CONFIG_IEEE1394_EXTRA_CONFIG_ROMS=y +CONFIG_IEEE1394_CONFIG_ROM_IP1394=y + +# +# Device Drivers +# +CONFIG_IEEE1394_PCILYNX=m +CONFIG_IEEE1394_OHCI1394=m + +# +# Protocol Drivers +# +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_SBP2=m +# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_IEEE1394_RAWIO=m +CONFIG_IEEE1394_CMP=m +CONFIG_IEEE1394_AMDTP=m + +# +# I2O device support +# +CONFIG_I2O=m +CONFIG_I2O_CONFIG=m +CONFIG_I2O_BLOCK=m +CONFIG_I2O_SCSI=m +CONFIG_I2O_PROC=m + +# +# Networking support +# +CONFIG_NET=y + +# +# Networking options +# +CONFIG_PACKET=m +CONFIG_PACKET_MMAP=y +CONFIG_NETLINK_DEV=m +CONFIG_UNIX=y +CONFIG_NET_KEY=m +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_ROUTE_FWMARK=y +CONFIG_IP_ROUTE_NAT=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_TOS=y +CONFIG_IP_ROUTE_VERBOSE=y +# CONFIG_IP_PNP is not set +CONFIG_NET_IPIP=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_IP_MROUTE=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +# CONFIG_ARPD is not set +CONFIG_SYN_COOKIES=y +CONFIG_INET_AH=m +CONFIG_INET_ESP=m +CONFIG_INET_IPCOMP=m +# CONFIG_ACCEPT_QUEUES is not set + +# +# IP: Virtual Server Configuration +# +CONFIG_IP_VS=m +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=12 + +# +# IPVS transport protocol load balancing support +# +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y + +# +# IPVS scheduler +# +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m + +# +# IPVS application helper +# +CONFIG_IP_VS_FTP=m +CONFIG_IPV6=m +CONFIG_IPV6_SUBTREES=y +CONFIG_IPV6_PRIVACY=y +CONFIG_IPV6_NDISC_NEW=y +CONFIG_INET6_AH=m +CONFIG_INET6_ESP=m +CONFIG_INET6_IPCOMP=m +CONFIG_IPV6_TUNNEL=m + +# +# MOBILE IPv6 (EXPERIMENTAL) +# +CONFIG_IPV6_MOBILITY=m +CONFIG_IPV6_MOBILITY_MN=m +CONFIG_IPV6_MOBILITY_HA=m +# CONFIG_IPV6_MOBILITY_DEBUG is not set +CONFIG_DECNET=m +CONFIG_DECNET_SIOCGIFCONF=y +# CONFIG_DECNET_ROUTER is not set +CONFIG_BRIDGE=m +CONFIG_NETFILTER=y +# CONFIG_NETFILTER_DEBUG is not set +CONFIG_BRIDGE_NETFILTER=y + +# +# IP: Netfilter Configuration +# +CONFIG_IP_NF_CONNTRACK=m +CONFIG_IP_NF_FTP=m +CONFIG_IP_NF_IRC=m +CONFIG_IP_NF_TFTP=m +CONFIG_IP_NF_AMANDA=m +CONFIG_IP_NF_QUEUE=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_MATCH_LIMIT=m +CONFIG_IP_NF_MATCH_IPRANGE=m +CONFIG_IP_NF_MATCH_MAC=m +CONFIG_IP_NF_MATCH_PKTTYPE=m +CONFIG_IP_NF_MATCH_POLICY=m +CONFIG_IP_NF_MATCH_MARK=m +CONFIG_IP_NF_MATCH_MULTIPORT=m +CONFIG_IP_NF_MATCH_TOS=m +CONFIG_IP_NF_MATCH_RECENT=m +CONFIG_IP_NF_MATCH_ECN=m +CONFIG_IP_NF_MATCH_DSCP=m +CONFIG_IP_NF_MATCH_AH_ESP=m +CONFIG_IP_NF_MATCH_LENGTH=m +CONFIG_IP_NF_MATCH_TTL=m +CONFIG_IP_NF_MATCH_TCPMSS=m +CONFIG_IP_NF_MATCH_HELPER=m +CONFIG_IP_NF_MATCH_STATE=m +CONFIG_IP_NF_MATCH_CONNTRACK=m +CONFIG_IP_NF_MATCH_OWNER=m +CONFIG_IP_NF_MATCH_PHYSDEV=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_NAT_NEEDED=y +CONFIG_IP_NF_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REDIRECT=m +CONFIG_IP_NF_TARGET_NETMAP=m +CONFIG_IP_NF_TARGET_SAME=m +# CONFIG_IP_NF_NAT_LOCAL is not set +CONFIG_IP_NF_NAT_SNMP_BASIC=m +CONFIG_IP_NF_NAT_IRC=m +CONFIG_IP_NF_NAT_FTP=m +CONFIG_IP_NF_NAT_TFTP=m +CONFIG_IP_NF_NAT_AMANDA=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_TARGET_TOS=m +CONFIG_IP_NF_TARGET_ECN=m +CONFIG_IP_NF_TARGET_DSCP=m +CONFIG_IP_NF_TARGET_MARK=m +CONFIG_IP_NF_TARGET_CLASSIFY=m +CONFIG_IP_NF_TARGET_LOG=m +CONFIG_IP_NF_TARGET_ULOG=m +CONFIG_IP_NF_TARGET_TCPMSS=m +CONFIG_IP_NF_ARPTABLES=m +CONFIG_IP_NF_ARPFILTER=m +CONFIG_IP_NF_ARP_MANGLE=m +CONFIG_IP_NF_COMPAT_IPCHAINS=m +CONFIG_IP_NF_COMPAT_IPFWADM=m +CONFIG_IP_NF_CONNTRACK_MARK=y +CONFIG_IP_NF_TARGET_CONNMARK=m +CONFIG_IP_NF_MATCH_CONNMARK=m +CONFIG_IP_NF_TARGET_CLUSTERIP=m + +# +# IPv6: Netfilter Configuration +# +CONFIG_IP6_NF_FTP=m +CONFIG_IP6_NF_QUEUE=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_MATCH_LIMIT=m +CONFIG_IP6_NF_MATCH_MAC=m +CONFIG_IP6_NF_MATCH_RT=m +CONFIG_IP6_NF_MATCH_OPTS=m +CONFIG_IP6_NF_MATCH_FRAG=m +CONFIG_IP6_NF_MATCH_HL=m +CONFIG_IP6_NF_MATCH_MULTIPORT=m +CONFIG_IP6_NF_MATCH_OWNER=m +CONFIG_IP6_NF_MATCH_MARK=m +CONFIG_IP6_NF_MATCH_IPV6HEADER=m +CONFIG_IP6_NF_MATCH_AHESP=m +CONFIG_IP6_NF_MATCH_LENGTH=m +CONFIG_IP6_NF_MATCH_EUI64=m +CONFIG_IP6_NF_CONNTRACK=m +CONFIG_IP6_NF_MATCH_STATE=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_TARGET_MARK=m + +# +# DECnet: Netfilter Configuration +# +CONFIG_DECNET_NF_GRABULATOR=m + +# +# Bridge: Netfilter Configuration +# +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_EBT_T_NAT=m +CONFIG_BRIDGE_EBT_802_3=m +CONFIG_BRIDGE_EBT_AMONG=m +CONFIG_BRIDGE_EBT_ARP=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_LIMIT=m +CONFIG_BRIDGE_EBT_MARK=m +CONFIG_BRIDGE_EBT_PKTTYPE=m +CONFIG_BRIDGE_EBT_STP=m +CONFIG_BRIDGE_EBT_VLAN=m +CONFIG_BRIDGE_EBT_ARPREPLY=m +CONFIG_BRIDGE_EBT_DNAT=m +CONFIG_BRIDGE_EBT_MARK_T=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_SNAT=m +CONFIG_BRIDGE_EBT_LOG=m +CONFIG_XFRM=y +CONFIG_XFRM_USER=m + +# +# SCTP Configuration (EXPERIMENTAL) +# +CONFIG_IP_SCTP=m +# CONFIG_SCTP_DBG_MSG is not set +# CONFIG_SCTP_DBG_OBJCNT is not set +# CONFIG_SCTP_HMAC_NONE is not set +# CONFIG_SCTP_HMAC_SHA1 is not set +CONFIG_SCTP_HMAC_MD5=y +CONFIG_ATM=y +CONFIG_ATM_CLIP=y +CONFIG_ATM_CLIP_NO_ICMP=y +CONFIG_ATM_LANE=m +CONFIG_ATM_MPOA=m +CONFIG_ATM_BR2684=m +# CONFIG_ATM_BR2684_IPFILTER is not set +CONFIG_VLAN_8021Q=m +CONFIG_LLC=y +CONFIG_LLC2=m +CONFIG_IPX=m +# CONFIG_IPX_INTERN is not set +CONFIG_ATALK=m +CONFIG_DEV_APPLETALK=y +CONFIG_LTPC=m +CONFIG_COPS=m +CONFIG_COPS_DAYNA=y +CONFIG_COPS_TANGENT=y +CONFIG_IPDDP=m +CONFIG_IPDDP_ENCAP=y +CONFIG_IPDDP_DECAP=y +CONFIG_X25=m +CONFIG_LAPB=m +# CONFIG_NET_DIVERT is not set +CONFIG_ECONET=m +# CONFIG_ECONET_AUNUDP is not set +# CONFIG_ECONET_NATIVE is not set +CONFIG_WAN_ROUTER=m +# CONFIG_NET_FASTROUTE is not set +# CONFIG_NET_HW_FLOWCONTROL is not set + +# +# QoS and/or fair queueing +# +CONFIG_NET_SCHED=y +CONFIG_NET_SCH_CBQ=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_HFSC=m +CONFIG_NET_SCH_CSZ=m +CONFIG_NET_SCH_ATM=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_SFQ=m +CONFIG_NET_SCH_TEQL=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_SCH_GRED=m +CONFIG_NET_SCH_DSMARK=m +CONFIG_NET_SCH_DELAY=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_QOS=y +CONFIG_NET_ESTIMATOR=y +CONFIG_NET_CLS=y +CONFIG_NET_CLS_TCINDEX=m +CONFIG_NET_CLS_ROUTE4=m +CONFIG_NET_CLS_ROUTE=y +CONFIG_NET_CLS_FW=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_CLS_RSVP=m +CONFIG_NET_CLS_RSVP6=m +CONFIG_NET_CLS_POLICE=y + +# +# Network testing +# +CONFIG_NET_PKTGEN=m +CONFIG_NETDEVICES=y + +# +# ARCnet devices +# +CONFIG_ARCNET=m +CONFIG_ARCNET_1201=m +CONFIG_ARCNET_1051=m +CONFIG_ARCNET_RAW=m +CONFIG_ARCNET_COM90xx=m +CONFIG_ARCNET_COM90xxIO=m +CONFIG_ARCNET_RIM_I=m +CONFIG_ARCNET_COM20020=m +CONFIG_ARCNET_COM20020_ISA=m +CONFIG_ARCNET_COM20020_PCI=m +CONFIG_DUMMY=m +CONFIG_BONDING=m +CONFIG_EQUALIZER=m +CONFIG_TUN=m +CONFIG_ETHERTAP=m +CONFIG_NET_SB1000=m + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=m +CONFIG_HAPPYMEAL=m +CONFIG_SUNGEM=m +CONFIG_NET_VENDOR_3COM=y +CONFIG_EL1=m +CONFIG_EL2=m +CONFIG_ELPLUS=m +CONFIG_EL16=m +CONFIG_EL3=m +CONFIG_3C515=m +CONFIG_VORTEX=m +CONFIG_TYPHOON=m +CONFIG_LANCE=m +CONFIG_NET_VENDOR_SMC=y +CONFIG_WD80x3=m +CONFIG_ULTRA=m +CONFIG_SMC9194=m +CONFIG_NET_VENDOR_RACAL=y +CONFIG_NI52=m +CONFIG_NI65=m + +# +# Tulip family network device support +# +CONFIG_NET_TULIP=y +CONFIG_DE2104X=m +CONFIG_TULIP=m +# CONFIG_TULIP_MWI is not set +# CONFIG_TULIP_MMIO is not set +CONFIG_TULIP_NAPI=y +CONFIG_TULIP_NAPI_HW_MITIGATION=y +CONFIG_DE4X5=m +CONFIG_WINBOND_840=m +CONFIG_DM9102=m +CONFIG_PCMCIA_XIRCOM=m +CONFIG_AT1700=m +CONFIG_DEPCA=m +CONFIG_HP100=m +CONFIG_NET_ISA=y +CONFIG_E2100=m +CONFIG_EWRK3=m +CONFIG_EEXPRESS=m +CONFIG_EEXPRESS_PRO=m +CONFIG_HPLAN_PLUS=m +CONFIG_HPLAN=m +CONFIG_LP486E=m +CONFIG_ETH16I=m +CONFIG_NE2000=m +CONFIG_ZNET=m +CONFIG_SEEQ8005=m +CONFIG_NET_PCI=y +CONFIG_PCNET32=m +CONFIG_AMD8111_ETH=m +CONFIG_ADAPTEC_STARFIRE=m +CONFIG_ADAPTEC_STARFIRE_NAPI=y +CONFIG_AC3200=m +CONFIG_APRICOT=m +CONFIG_B44=m +CONFIG_FORCEDETH=m +CONFIG_CS89x0=m +CONFIG_DGRS=m +CONFIG_EEPRO100=m +# CONFIG_EEPRO100_PIO is not set +CONFIG_E100=m +CONFIG_E100_NAPI=y +CONFIG_FEALNX=m +CONFIG_NATSEMI=m +CONFIG_NE2K_PCI=m +CONFIG_8139CP=m +CONFIG_8139TOO=m +# CONFIG_8139TOO_PIO is not set +# CONFIG_8139TOO_TUNE_TWISTER is not set +CONFIG_8139TOO_8129=y +# CONFIG_8139_OLD_RX_RESET is not set +CONFIG_8139_RXBUF_IDX=2 +CONFIG_SIS900=m +CONFIG_EPIC100=m +CONFIG_SUNDANCE=m +# CONFIG_SUNDANCE_MMIO is not set +CONFIG_TLAN=m +CONFIG_VIA_RHINE=m +# CONFIG_VIA_RHINE_MMIO is not set +CONFIG_NET_POCKET=y +CONFIG_ATP=m +CONFIG_DE600=m +CONFIG_DE620=m + +# +# Ethernet (1000 Mbit) +# +CONFIG_ACENIC=m +# CONFIG_ACENIC_OMIT_TIGON_I is not set +CONFIG_DL2K=m +CONFIG_E1000=m +CONFIG_E1000_NAPI=y +CONFIG_E1000_NEW=m +CONFIG_E1000_NEW_NAPI=y +CONFIG_NS83820=m +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_R8169=m +CONFIG_SIS190=m +CONFIG_SK98LIN=m +CONFIG_TIGON3=m +CONFIG_NET_BROADCOM=m +CONFIG_NET_BROADCOM_NEW=m +CONFIG_NET_BCM44=m +CONFIG_TIGON3_NEW=m + +# +# Ethernet (10000 Mbit) +# +CONFIG_IXGB=m +CONFIG_IXGB_NAPI=y +CONFIG_S2IO=m +CONFIG_S2IO_NAPI=y +CONFIG_FDDI=y +# CONFIG_DEFXX is not set +CONFIG_SKFP=m +CONFIG_HIPPI=y +CONFIG_ROADRUNNER=m +CONFIG_ROADRUNNER_LARGE_RINGS=y +CONFIG_PLIP=m +CONFIG_PPP=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPP_FILTER=y +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_MPPE=m +CONFIG_PPPOE=m +CONFIG_PPPOATM=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y +CONFIG_SLIP_MODE_SLIP6=y + +# +# Wireless LAN (non-hamradio) +# +CONFIG_NET_RADIO=y + +# +# Obsolete Wireless cards support (pre-802.11) +# +CONFIG_STRIP=m +# CONFIG_ARLAN is not set +CONFIG_WAVELAN=m +CONFIG_PCMCIA_WAVELAN=m +CONFIG_PCMCIA_NETWAVE=m + +# +# Wireless 802.11 Frequency Hopping cards support +# +CONFIG_PCMCIA_RAYCS=m + +# +# Wireless 802.11b ISA/PCI cards support +# +CONFIG_AIRO=m +CONFIG_HERMES=m +CONFIG_PLX_HERMES=m +CONFIG_TMD_HERMES=m +CONFIG_PCI_HERMES=m +CONFIG_ATMEL=m +CONFIG_PCI_ATMEL=m + +# +# Wireless 802.11b Pcmcia/Cardbus cards support +# +CONFIG_PCMCIA_HERMES=m +CONFIG_AIRO_CS=m +CONFIG_PCMCIA_ATMEL=m +CONFIG_PCMCIA_WL3501=m + +# +# Prism GT/Duette 802.11(a/b/g) PCI/Cardbus support +# +CONFIG_PRISM54=m +CONFIG_NET_WIRELESS=y + +# +# Token Ring devices +# +CONFIG_TR=y +CONFIG_IBMTR=m +CONFIG_IBMOL=m +CONFIG_IBMLS=m +CONFIG_3C359=m +CONFIG_TMS380TR=m +CONFIG_TMSPCI=m +CONFIG_SKISA=m +CONFIG_PROTEON=m +CONFIG_ABYSS=m +CONFIG_SMCTR=m +CONFIG_NET_FC=y +CONFIG_NET_LPFC=m +CONFIG_RCPCI=m +CONFIG_SHAPER=m +CONFIG_NETCONSOLE=m + +# +# Wan interfaces +# +CONFIG_WAN=y +CONFIG_HOSTESS_SV11=m +# CONFIG_COSA is not set +CONFIG_DSCC4=m +CONFIG_DSCC4_PCISYNC=y +CONFIG_DSCC4_PCI_RST=y +CONFIG_LANMEDIA=m +CONFIG_SEALEVEL_4021=m +CONFIG_SYNCLINK_SYNCPPP=m +CONFIG_HDLC=m +CONFIG_HDLC_RAW=y +CONFIG_HDLC_RAW_ETH=y +CONFIG_HDLC_CISCO=y +CONFIG_HDLC_FR=y +CONFIG_HDLC_PPP=y +CONFIG_HDLC_X25=y +CONFIG_PCI200SYN=m +CONFIG_WANXL=m +# CONFIG_WANXL_BUILD_FIRMWARE is not set +CONFIG_PC300=m +CONFIG_PC300_MLPPP=y +CONFIG_N2=m +CONFIG_C101=m +CONFIG_FARSYNC=m +CONFIG_DLCI=m +CONFIG_DLCI_COUNT=24 +CONFIG_DLCI_MAX=8 +CONFIG_SDLA=m +# CONFIG_WAN_ROUTER_DRIVERS is not set +CONFIG_LAPBETHER=m +CONFIG_X25_ASY=m +# CONFIG_SBNI is not set + +# +# PCMCIA network device support +# +CONFIG_NET_PCMCIA=y +CONFIG_PCMCIA_3C589=m +CONFIG_PCMCIA_3C574=m +CONFIG_PCMCIA_FMVJ18X=m +CONFIG_PCMCIA_PCNET=m +CONFIG_PCMCIA_NMCLAN=m +CONFIG_PCMCIA_SMC91C92=m +CONFIG_PCMCIA_XIRC2PS=m +CONFIG_PCMCIA_AXNET=m +CONFIG_ARCNET_COM20020_CS=m +CONFIG_PCMCIA_IBMTR=m + +# +# ATM drivers +# +CONFIG_ATM_TCP=m +CONFIG_ATM_LANAI=m +CONFIG_ATM_ENI=m +# CONFIG_ATM_ENI_DEBUG is not set +# CONFIG_ATM_ENI_TUNE_BURST is not set +CONFIG_ATM_FIRESTREAM=m +CONFIG_ATM_ZATM=m +# CONFIG_ATM_ZATM_DEBUG is not set +CONFIG_ATM_NICSTAR=m +CONFIG_ATM_NICSTAR_USE_SUNI=y +CONFIG_ATM_NICSTAR_USE_IDT77105=y +CONFIG_ATM_IDT77252=m +# CONFIG_ATM_IDT77252_DEBUG is not set +CONFIG_ATM_IDT77252_RCV_ALL=y +CONFIG_ATM_IDT77252_USE_SUNI=y +CONFIG_ATM_AMBASSADOR=m +# CONFIG_ATM_AMBASSADOR_DEBUG is not set +CONFIG_ATM_HORIZON=m +# CONFIG_ATM_HORIZON_DEBUG is not set +CONFIG_ATM_IA=m +# CONFIG_ATM_IA_DEBUG is not set +CONFIG_ATM_FORE200E_MAYBE=m +CONFIG_ATM_FORE200E_PCA=y +CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y +CONFIG_ATM_FORE200E_TX_RETRY=16 +CONFIG_ATM_FORE200E_DEBUG=0 +CONFIG_ATM_FORE200E=m +CONFIG_ATM_HE=m +CONFIG_ATM_HE_USE_SUNI=y + +# +# Amateur Radio support +# +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +CONFIG_AX25=m +CONFIG_AX25_DAMA_SLAVE=y +CONFIG_NETROM=m +CONFIG_ROSE=m + +# +# AX.25 network device drivers +# +CONFIG_BPQETHER=m +CONFIG_SCC=m +CONFIG_SCC_DELAY=y +CONFIG_SCC_TRXECHO=y +CONFIG_BAYCOM_SER_FDX=m +CONFIG_BAYCOM_SER_HDX=m +CONFIG_BAYCOM_PAR=m +CONFIG_BAYCOM_EPP=m +CONFIG_YAM=m + +# +# IrDA (infrared) support +# +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +CONFIG_IRDA_ULTRA=y + +# +# IrDA options +# +CONFIG_IRDA_CACHE_LAST_LSAP=y +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +CONFIG_IRTTY_SIR=m + +# +# Dongle support +# +CONFIG_DONGLE=y +CONFIG_ESI_DONGLE=m +CONFIG_ACTISYS_DONGLE=m +CONFIG_TEKRAM_DONGLE=m +CONFIG_LITELINK_DONGLE=m +CONFIG_MA600_DONGLE=m +CONFIG_GIRBIL_DONGLE=m +CONFIG_MCP2120_DONGLE=m +CONFIG_OLD_BELKIN_DONGLE=m +CONFIG_ACT200L_DONGLE=m + +# +# Old SIR device drivers +# + +# +# Old Serial dongle support +# + +# +# FIR device drivers +# +CONFIG_USB_IRDA=m +CONFIG_SIGMATEL_FIR=m +CONFIG_NSC_FIR=m +CONFIG_WINBOND_FIR=m +CONFIG_TOSHIBA_FIR=m +CONFIG_SMC_IRCC_FIR=m +CONFIG_ALI_FIR=m +CONFIG_VLSI_FIR=m +CONFIG_VIA_FIR=m + +# +# Bluetooth support +# +CONFIG_BT=m +CONFIG_BT_L2CAP=m +CONFIG_BT_SCO=m +CONFIG_BT_RFCOMM=m +CONFIG_BT_RFCOMM_TTY=y +CONFIG_BT_BNEP=m +CONFIG_BT_BNEP_MC_FILTER=y +CONFIG_BT_BNEP_PROTO_FILTER=y +CONFIG_BT_CMTP=m + +# +# Bluetooth device drivers +# +CONFIG_BT_HCIUSB=m +CONFIG_BT_HCIUSB_SCO=y +CONFIG_BT_HCIUART=m +CONFIG_BT_HCIUART_H4=y +CONFIG_BT_HCIUART_BCSP=y +CONFIG_BT_HCIUART_BCSP_TXCRC=y +CONFIG_BT_HCIBCM203X=m +CONFIG_BT_HCIBFUSB=m +CONFIG_BT_HCIDTL1=m +CONFIG_BT_HCIBT3C=m +CONFIG_BT_HCIBLUECARD=m +CONFIG_BT_HCIBTUART=m +CONFIG_BT_HCIVHCI=m +CONFIG_NETPOLL=y +CONFIG_NETPOLL_RX=y +CONFIG_NETPOLL_TRAP=y +CONFIG_NET_POLL_CONTROLLER=y + +# +# ISDN subsystem +# +CONFIG_ISDN=m + +# +# Old ISDN4Linux +# +CONFIG_ISDN_I4L=m +CONFIG_ISDN_PPP=y +CONFIG_ISDN_PPP_VJ=y +CONFIG_ISDN_MPP=y +CONFIG_IPPP_FILTER=y +CONFIG_ISDN_PPP_BSDCOMP=m +CONFIG_ISDN_AUDIO=y +CONFIG_ISDN_TTY_FAX=y +CONFIG_ISDN_X25=y + +# +# ISDN feature submodules +# + +# +# ISDN4Linux hardware drivers +# + +# +# Passive cards +# +CONFIG_ISDN_DRV_HISAX=m + +# +# D-channel protocol features +# +CONFIG_HISAX_EURO=y +CONFIG_DE_AOC=y +# CONFIG_HISAX_NO_SENDCOMPLETE is not set +# CONFIG_HISAX_NO_LLC is not set +# CONFIG_HISAX_NO_KEYPAD is not set +CONFIG_HISAX_1TR6=y +CONFIG_HISAX_NI1=y +CONFIG_HISAX_MAX_CARDS=8 + +# +# HiSax supported cards +# +CONFIG_HISAX_16_0=y +CONFIG_HISAX_16_3=y +CONFIG_HISAX_TELESPCI=y +CONFIG_HISAX_S0BOX=y +CONFIG_HISAX_AVM_A1=y +CONFIG_HISAX_FRITZPCI=y +CONFIG_HISAX_AVM_A1_PCMCIA=y +CONFIG_HISAX_ELSA=y +CONFIG_HISAX_IX1MICROR2=y +CONFIG_HISAX_DIEHLDIVA=y +CONFIG_HISAX_ASUSCOM=y +CONFIG_HISAX_TELEINT=y +CONFIG_HISAX_HFCS=y +CONFIG_HISAX_SEDLBAUER=y +CONFIG_HISAX_SPORTSTER=y +CONFIG_HISAX_MIC=y +CONFIG_HISAX_NETJET=y +CONFIG_HISAX_NETJET_U=y +CONFIG_HISAX_NICCY=y +CONFIG_HISAX_ISURF=y +CONFIG_HISAX_HSTSAPHIR=y +CONFIG_HISAX_BKM_A4T=y +CONFIG_HISAX_SCT_QUADRO=y +CONFIG_HISAX_GAZEL=y +CONFIG_HISAX_HFC_PCI=y +CONFIG_HISAX_W6692=y +CONFIG_HISAX_HFC_SX=y +CONFIG_HISAX_ENTERNOW_PCI=y +CONFIG_HISAX_DEBUG=y + +# +# HiSax PCMCIA card service modules +# +CONFIG_HISAX_SEDLBAUER_CS=m +CONFIG_HISAX_ELSA_CS=m +CONFIG_HISAX_AVM_A1_CS=m +CONFIG_HISAX_TELES_CS=m + +# +# HiSax sub driver modules +# +CONFIG_HISAX_ST5481=m +CONFIG_HISAX_HFCUSB=m +CONFIG_HISAX_FRITZ_PCIPNP=m +CONFIG_HISAX_HDLC=y + +# +# Active cards +# +CONFIG_ISDN_DRV_ICN=m +CONFIG_ISDN_DRV_PCBIT=m +CONFIG_ISDN_DRV_SC=m +CONFIG_ISDN_DRV_ACT2000=m +CONFIG_ISDN_DRV_TPAM=m + +# +# CAPI subsystem +# +CONFIG_ISDN_CAPI=m +CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y +CONFIG_ISDN_CAPI_MIDDLEWARE=y +CONFIG_ISDN_CAPI_CAPI20=m +CONFIG_ISDN_CAPI_CAPIFS_BOOL=y +CONFIG_ISDN_CAPI_CAPIFS=m +CONFIG_ISDN_CAPI_CAPIDRV=m + +# +# CAPI hardware drivers +# + +# +# Active AVM cards +# +CONFIG_CAPI_AVM=y +CONFIG_ISDN_DRV_AVMB1_B1ISA=m +CONFIG_ISDN_DRV_AVMB1_B1PCI=m +CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y +CONFIG_ISDN_DRV_AVMB1_T1ISA=m +CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m +CONFIG_ISDN_DRV_AVMB1_AVM_CS=m +CONFIG_ISDN_DRV_AVMB1_T1PCI=m +CONFIG_ISDN_DRV_AVMB1_C4=m + +# +# Active Eicon DIVA Server cards +# +CONFIG_CAPI_EICON=y +CONFIG_ISDN_DIVAS=m +CONFIG_ISDN_DIVAS_BRIPCI=y +CONFIG_ISDN_DIVAS_PRIPCI=y +CONFIG_ISDN_DIVAS_DIVACAPI=m +CONFIG_ISDN_DIVAS_USERIDI=m +CONFIG_ISDN_DIVAS_MAINT=m + +# +# Telephony Support +# +CONFIG_PHONE=m +CONFIG_PHONE_IXJ=m +CONFIG_PHONE_IXJ_PCMCIA=m + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +CONFIG_INPUT_JOYDEV=m +CONFIG_INPUT_TSDEV=m +CONFIG_INPUT_TSDEV_SCREEN_X=240 +CONFIG_INPUT_TSDEV_SCREEN_Y=320 +CONFIG_INPUT_EVDEV=m +# CONFIG_INPUT_EVBUG is not set + +# +# Input I/O drivers +# +CONFIG_GAMEPORT=m +CONFIG_SOUND_GAMEPORT=m +CONFIG_GAMEPORT_NS558=m +CONFIG_GAMEPORT_L4=m +CONFIG_GAMEPORT_EMU10K1=m +CONFIG_GAMEPORT_VORTEX=m +CONFIG_GAMEPORT_FM801=m +CONFIG_GAMEPORT_CS461x=m +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +CONFIG_SERIO_SERPORT=m +CONFIG_SERIO_CT82C710=m +CONFIG_SERIO_PARKBD=m +CONFIG_SERIO_PCIPS2=m + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +CONFIG_KEYBOARD_SUNKBD=m +# CONFIG_KEYBOARD_LKKBD is not set +CONFIG_KEYBOARD_XTKBD=m +CONFIG_KEYBOARD_NEWTON=m +CONFIG_INPUT_MOUSE=y +CONFIG_MOUSE_PS2=y +CONFIG_MOUSE_SERIAL=m +CONFIG_MOUSE_INPORT=m +CONFIG_MOUSE_ATIXL=y +CONFIG_MOUSE_LOGIBM=m +CONFIG_MOUSE_PC110PAD=m +# CONFIG_MOUSE_VSXXXAA is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_JOYSTICK_ANALOG=m +CONFIG_JOYSTICK_A3D=m +CONFIG_JOYSTICK_ADI=m +CONFIG_JOYSTICK_COBRA=m +CONFIG_JOYSTICK_GF2K=m +CONFIG_JOYSTICK_GRIP=m +CONFIG_JOYSTICK_GRIP_MP=m +CONFIG_JOYSTICK_GUILLEMOT=m +CONFIG_JOYSTICK_INTERACT=m +CONFIG_JOYSTICK_SIDEWINDER=m +CONFIG_JOYSTICK_TMDC=m +CONFIG_JOYSTICK_IFORCE=m +CONFIG_JOYSTICK_IFORCE_USB=y +CONFIG_JOYSTICK_IFORCE_232=y +CONFIG_JOYSTICK_WARRIOR=m +CONFIG_JOYSTICK_MAGELLAN=m +CONFIG_JOYSTICK_SPACEORB=m +CONFIG_JOYSTICK_SPACEBALL=m +CONFIG_JOYSTICK_STINGER=m +CONFIG_JOYSTICK_TWIDDLER=m +CONFIG_JOYSTICK_DB9=m +CONFIG_JOYSTICK_GAMECON=m +CONFIG_JOYSTICK_TURBOGRAFX=m +# CONFIG_INPUT_JOYDUMP is not set +CONFIG_INPUT_TOUCHSCREEN=y +CONFIG_TOUCHSCREEN_GUNZE=m +CONFIG_INPUT_MISC=y +CONFIG_INPUT_PCSPKR=y +CONFIG_INPUT_UINPUT=m + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +CONFIG_ECC=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_ROCKETPORT=m +CONFIG_SYNCLINK=m +CONFIG_SYNCLINKMP=m +CONFIG_N_HDLC=m +CONFIG_STALDRV=y + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_CS=m +# CONFIG_SERIAL_8250_ACPI is not set +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +CONFIG_SERIAL_8250_MULTIPORT=y +CONFIG_SERIAL_8250_RSA=y + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_ICOM is not set +CONFIG_SERIAL_JSM=m +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +CONFIG_PRINTER=m +# CONFIG_LP_CONSOLE is not set +CONFIG_PPDEV=m +CONFIG_TIPAR=m +CONFIG_QIC02_TAPE=m +CONFIG_QIC02_DYNCONF=y + +# +# Setting runtime QIC-02 configuration is done with qic02conf +# + +# +# from the tpqic02-support package. It is available at +# + +# +# metalab.unc.edu or ftp://titus.cfw.com/pub/Linux/util/ +# + +# +# IPMI +# +CONFIG_IPMI_HANDLER=m +CONFIG_IPMI_PANIC_EVENT=y +CONFIG_IPMI_PANIC_STRING=y +CONFIG_IPMI_DEVICE_INTERFACE=m +CONFIG_IPMI_KCS=m +CONFIG_IPMI_WATCHDOG=m + +# +# Watchdog Cards +# +CONFIG_WATCHDOG=y +# CONFIG_WATCHDOG_NOWAYOUT is not set + +# +# Watchdog Device Drivers +# +CONFIG_SOFT_WATCHDOG=m +CONFIG_ACQUIRE_WDT=m +CONFIG_ADVANTECH_WDT=m +CONFIG_ALIM1535_WDT=m +CONFIG_ALIM7101_WDT=m +CONFIG_AMD7XX_TCO=m +CONFIG_SC520_WDT=m +CONFIG_EUROTECH_WDT=m +CONFIG_IB700_WDT=m +CONFIG_WAFER_WDT=m +CONFIG_I8XX_TCO=m +CONFIG_SC1200_WDT=m +CONFIG_SCx200_WDT=m +CONFIG_60XX_WDT=m +CONFIG_CPU5_WDT=m +CONFIG_W83627HF_WDT=m +CONFIG_W83877F_WDT=m +CONFIG_MACHZ_WDT=m + +# +# ISA-based Watchdog Cards +# +CONFIG_PCWATCHDOG=m +CONFIG_MIXCOMWD=m +CONFIG_WDT=m +CONFIG_WDT_501=y + +# +# PCI-based Watchdog Cards +# +CONFIG_PCIPCWATCHDOG=m +CONFIG_WDTPCI=m +CONFIG_WDT_501_PCI=y + +# +# USB-based Watchdog Cards +# +CONFIG_USBPCWATCHDOG=m +CONFIG_HW_RANDOM=m +CONFIG_NVRAM=m +CONFIG_RTC=y +CONFIG_DTLK=m +CONFIG_R3964=m +CONFIG_APPLICOM=m +CONFIG_SONYPI=m + +# +# Ftape, the floppy tape device driver +# +CONFIG_AGP=m +CONFIG_AGP_ALI=m +CONFIG_AGP_ATI=m +CONFIG_AGP_AMD=m +CONFIG_AGP_AMD64=m +CONFIG_AGP_INTEL=m +CONFIG_AGP_INTEL_MCH=m +CONFIG_AGP_NVIDIA=m +CONFIG_AGP_SIS=m +CONFIG_AGP_SWORKS=m +CONFIG_AGP_VIA=m +CONFIG_AGP_EFFICEON=m +# CONFIG_DRM is not set + +# +# PCMCIA character devices +# +CONFIG_SYNCLINK_CS=m +# CONFIG_MWAVE is not set +CONFIG_SCx200_GPIO=m +CONFIG_RAW_DRIVER=m +CONFIG_MAX_RAW_DEVS=4096 +CONFIG_HANGCHECK_TIMER=m +CONFIG_VTUNE=m + +# +# Linux InfraRed Controller +# +CONFIG_LIRC_SUPPORT=m +CONFIG_LIRC_MAX_DEV=2 +CONFIG_LIRC_BT829=m +CONFIG_LIRC_IT87=m +CONFIG_LIRC_ATIUSB=m +CONFIG_LIRC_SERIAL=m +# CONFIG_LIRC_HOMEBREW is not set +CONFIG_LIRC_PORT_SERIAL=0x3f8 +CONFIG_LIRC_IRQ_SERIAL=4 +CONFIG_LIRC_SIR=m +CONFIG_LIRC_PORT_SIR=0x3f8 +CONFIG_LIRC_IRQ_SIR=4 + +# +# I2C support +# +CONFIG_I2C=m +CONFIG_I2C_CHARDEV=m + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=m +CONFIG_I2C_ALGOPCF=m + +# +# I2C Hardware Bus support +# +CONFIG_I2C_ALI1535=m +CONFIG_I2C_ALI15X3=m +CONFIG_I2C_AMD756=m +CONFIG_I2C_AMD8111=m +CONFIG_I2C_I801=m +CONFIG_I2C_I810=m +CONFIG_I2C_ISA=m +CONFIG_I2C_NFORCE2=m +CONFIG_I2C_PARPORT=m +CONFIG_I2C_PARPORT_LIGHT=m +CONFIG_I2C_PIIX4=m +CONFIG_I2C_PROSAVAGE=m +CONFIG_I2C_SAVAGE4=m +CONFIG_SCx200_I2C=m +CONFIG_SCx200_I2C_SCL=12 +CONFIG_SCx200_I2C_SDA=13 +CONFIG_SCx200_ACB=m +CONFIG_I2C_SIS5595=m +CONFIG_I2C_SIS630=m +CONFIG_I2C_SIS96X=m +CONFIG_I2C_VIA=m +CONFIG_I2C_VIAPRO=m +CONFIG_I2C_VOODOO3=m + +# +# Hardware Sensors Chip support +# +CONFIG_I2C_SENSOR=m +CONFIG_SENSORS_ADM1021=m +CONFIG_SENSORS_ASB100=m +CONFIG_SENSORS_DS1621=m +CONFIG_SENSORS_FSCHER=m +CONFIG_SENSORS_GL518SM=m +CONFIG_SENSORS_IT87=m +CONFIG_SENSORS_LM75=m +CONFIG_SENSORS_LM78=m +CONFIG_SENSORS_LM80=m +CONFIG_SENSORS_LM83=m +CONFIG_SENSORS_LM85=m +CONFIG_SENSORS_LM90=m +CONFIG_SENSORS_VIA686A=m +CONFIG_SENSORS_W83781D=m +CONFIG_SENSORS_W83L785TS=m +CONFIG_SENSORS_W83627HF=m + +# +# Other I2C Chip support +# +CONFIG_SENSORS_EEPROM=m +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Misc devices +# +CONFIG_IBM_ASM=m + +# +# Multimedia devices +# +CONFIG_VIDEO_DEV=m + +# +# Video For Linux +# + +# +# Video Adapters +# +CONFIG_VIDEO_BT848=m +CONFIG_VIDEO_PMS=m +CONFIG_VIDEO_BWQCAM=m +CONFIG_VIDEO_CQCAM=m +CONFIG_VIDEO_W9966=m +CONFIG_VIDEO_CPIA=m +CONFIG_VIDEO_CPIA_PP=m +CONFIG_VIDEO_CPIA_USB=m +CONFIG_VIDEO_SAA5246A=m +CONFIG_VIDEO_SAA5249=m +CONFIG_TUNER_3036=m +CONFIG_VIDEO_STRADIS=m +CONFIG_VIDEO_ZORAN=m +CONFIG_VIDEO_ZORAN_BUZ=m +CONFIG_VIDEO_ZORAN_DC10=m +CONFIG_VIDEO_ZORAN_DC30=m +CONFIG_VIDEO_ZORAN_LML33=m +CONFIG_VIDEO_ZORAN_LML33R10=m +CONFIG_VIDEO_SAA7134=m +CONFIG_VIDEO_MXB=m +CONFIG_VIDEO_DPC=m +CONFIG_VIDEO_HEXIUM_ORION=m +CONFIG_VIDEO_HEXIUM_GEMINI=m +CONFIG_VIDEO_CX88=m + +# +# Radio Adapters +# +CONFIG_RADIO_CADET=m +CONFIG_RADIO_RTRACK=m +CONFIG_RADIO_RTRACK2=m +CONFIG_RADIO_AZTECH=m +CONFIG_RADIO_GEMTEK=m +CONFIG_RADIO_GEMTEK_PCI=m +CONFIG_RADIO_MAXIRADIO=m +CONFIG_RADIO_MAESTRO=m +CONFIG_RADIO_MIROPCM20=m +# CONFIG_RADIO_MIROPCM20_RDS is not set +CONFIG_RADIO_SF16FMI=m +CONFIG_RADIO_SF16FMR2=m +CONFIG_RADIO_TERRATEC=m +CONFIG_RADIO_TRUST=m +CONFIG_RADIO_TYPHOON=m +CONFIG_RADIO_TYPHOON_PROC_FS=y +CONFIG_RADIO_ZOLTRIX=m + +# +# Digital Video Broadcasting Devices +# +CONFIG_DVB=y +CONFIG_DVB_CORE=m + +# +# Supported Frontend Modules +# +CONFIG_DVB_TWINHAN_DST=m +CONFIG_DVB_STV0299=m +CONFIG_DVB_SP887X=m +CONFIG_DVB_SP887X_FIRMWARE_FILE="/etc/dvb/sc_main.mc" +CONFIG_DVB_ALPS_TDLB7=m +CONFIG_DVB_ALPS_TDMB7=m +CONFIG_DVB_ATMEL_AT76C651=m +CONFIG_DVB_CX24110=m +CONFIG_DVB_GRUNDIG_29504_491=m +CONFIG_DVB_GRUNDIG_29504_401=m +CONFIG_DVB_MT312=m +CONFIG_DVB_VES1820=m +CONFIG_DVB_VES1X93=m +CONFIG_DVB_TDA1004X=m +CONFIG_DVB_TDA1004X_FIRMWARE_FILE="/usr/lib/hotplug/firmware/tda1004x.bin" +CONFIG_DVB_NXT6000=m + +# +# Supported SAA7146 based PCI Adapters +# +CONFIG_DVB_AV7110=m +# CONFIG_DVB_AV7110_FIRMWARE is not set +CONFIG_DVB_AV7110_OSD=y +CONFIG_DVB_BUDGET=m +CONFIG_DVB_BUDGET_CI=m +CONFIG_DVB_BUDGET_AV=m +CONFIG_DVB_BUDGET_PATCH=m + +# +# Supported USB Adapters +# +CONFIG_DVB_TTUSB_BUDGET=m +CONFIG_DVB_TTUSB_DEC=m + +# +# Supported FlexCopII (B2C2) Adapters +# +CONFIG_DVB_B2C2_SKYSTAR=m + +# +# Supported BT878 Adapters +# +CONFIG_DVB_BT8XX=m +CONFIG_VIDEO_SAA7146=m +CONFIG_VIDEO_SAA7146_VV=m +CONFIG_VIDEO_VIDEOBUF=m +CONFIG_VIDEO_TUNER=m +CONFIG_VIDEO_BUF=m +CONFIG_VIDEO_BTCX=m +CONFIG_VIDEO_IR=m + +# +# Graphics support +# +CONFIG_FB=y +CONFIG_FB_PM2=m +CONFIG_FB_PM2_FIFO_DISCONNECT=y +CONFIG_FB_CYBER2000=m +CONFIG_FB_IMSTT=y +CONFIG_FB_VGA16=m +CONFIG_FB_VESA=y +CONFIG_VIDEO_SELECT=y +CONFIG_FB_HGA=m +CONFIG_FB_RIVA=m +CONFIG_FB_I810=m +CONFIG_FB_I810_GTF=y +# CONFIG_FB_MATROX is not set +# CONFIG_FB_RADEON_OLD is not set +CONFIG_FB_RADEON=m +CONFIG_FB_RADEON_I2C=y +# CONFIG_FB_RADEON_DEBUG is not set +# CONFIG_FB_ATY128 is not set +CONFIG_FB_ATY=m +CONFIG_FB_ATY_CT=y +CONFIG_FB_ATY_GX=y +CONFIG_FB_ATY_XL_INIT=y +CONFIG_FB_SIS=m +CONFIG_FB_SIS_300=y +CONFIG_FB_SIS_315=y +CONFIG_FB_NEOMAGIC=m +CONFIG_FB_KYRO=m +CONFIG_FB_3DFX=m +CONFIG_FB_VOODOO1=m +CONFIG_FB_TRIDENT=m +# CONFIG_FB_VIRTUAL is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +CONFIG_MDA_CONSOLE=m +CONFIG_DUMMY_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_PCI_CONSOLE=y +# CONFIG_FONTS is not set +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y + +# +# Logo configuration +# +# CONFIG_LOGO is not set + +# +# Bootsplash configuration +# +CONFIG_BOOTSPLASH=y + +# +# Sound +# +CONFIG_SOUND=m + +# +# Advanced Linux Sound Architecture +# +CONFIG_SND=m +CONFIG_SND_TIMER=m +CONFIG_SND_PCM=m +CONFIG_SND_HWDEP=m +CONFIG_SND_RAWMIDI=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_OSSEMUL=y +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_RTCTIMER=m +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DEBUG=y +CONFIG_SND_DEBUG_MEMORY=y +# CONFIG_SND_DEBUG_DETECT is not set + +# +# Generic devices +# +CONFIG_SND_MPU401_UART=m +CONFIG_SND_OPL3_LIB=m +CONFIG_SND_OPL4_LIB=m +CONFIG_SND_VX_LIB=m +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m + +# +# ISA devices +# +CONFIG_SND_AD1816A=m +CONFIG_SND_AD1848=m +CONFIG_SND_CS4231=m +CONFIG_SND_CS4232=m +CONFIG_SND_CS4236=m +CONFIG_SND_ES968=m +CONFIG_SND_ES1688=m +CONFIG_SND_ES18XX=m +CONFIG_SND_GUSCLASSIC=m +CONFIG_SND_GUSEXTREME=m +CONFIG_SND_GUSMAX=m +CONFIG_SND_INTERWAVE=m +CONFIG_SND_INTERWAVE_STB=m +CONFIG_SND_OPTI92X_AD1848=m +CONFIG_SND_OPTI92X_CS4231=m +CONFIG_SND_OPTI93X=m +CONFIG_SND_SB8=m +CONFIG_SND_SB16=m +CONFIG_SND_SBAWE=m +CONFIG_SND_SB16_CSP=y +CONFIG_SND_WAVEFRONT=m +CONFIG_SND_ALS100=m +CONFIG_SND_AZT2320=m +CONFIG_SND_CMI8330=m +CONFIG_SND_DT019X=m +CONFIG_SND_OPL3SA2=m +CONFIG_SND_SGALAXY=m +CONFIG_SND_SSCAPE=m + +# +# PCI devices +# +CONFIG_SND_AC97_CODEC=m +CONFIG_SND_ALI5451=m +CONFIG_SND_ATIIXP=m +CONFIG_SND_AU8810=m +CONFIG_SND_AU8820=m +CONFIG_SND_AU8830=m +CONFIG_SND_AZT3328=m +CONFIG_SND_BT87X=m +CONFIG_SND_CS46XX=m +CONFIG_SND_CS46XX_NEW_DSP=y +CONFIG_SND_CS4281=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_KORG1212=m +CONFIG_SND_MIXART=m +CONFIG_SND_NM256=m +CONFIG_SND_RME32=m +CONFIG_SND_RME96=m +CONFIG_SND_RME9652=m +CONFIG_SND_HDSP=m +CONFIG_SND_TRIDENT=m +CONFIG_SND_YMFPCI=m +CONFIG_SND_ALS4000=m +CONFIG_SND_CMIPCI=m +CONFIG_SND_ENS1370=m +CONFIG_SND_ENS1371=m +CONFIG_SND_ES1938=m +CONFIG_SND_ES1968=m +CONFIG_SND_MAESTRO3=m +CONFIG_SND_FM801=m +CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_ICE1712=m +CONFIG_SND_ICE1724=m +CONFIG_SND_INTEL8X0=m +CONFIG_SND_INTEL8X0M=m +CONFIG_SND_SONICVIBES=m +CONFIG_SND_VIA82XX=m +CONFIG_SND_VX222=m + +# +# ALSA USB devices +# +CONFIG_SND_USB_AUDIO=m + +# +# PCMCIA devices +# +# CONFIG_SND_VXPOCKET is not set +# CONFIG_SND_VXP440 is not set +# CONFIG_SND_PDAUDIOCF is not set + +# +# Open Sound System +# +CONFIG_SOUND_PRIME=m +CONFIG_SOUND_BT878=m +CONFIG_SOUND_CMPCI=m +CONFIG_SOUND_CMPCI_FM=y +CONFIG_SOUND_CMPCI_FMIO=0x388 +CONFIG_SOUND_CMPCI_MIDI=y +CONFIG_SOUND_CMPCI_MPUIO=0x330 +CONFIG_SOUND_CMPCI_JOYSTICK=y +CONFIG_SOUND_CMPCI_CM8738=y +# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set +CONFIG_SOUND_CMPCI_SPDIFLOOP=y +CONFIG_SOUND_CMPCI_SPEAKERS=2 +CONFIG_SOUND_EMU10K1=m +CONFIG_MIDI_EMU10K1=y +# CONFIG_SOUND_FUSION is not set +CONFIG_SOUND_CS4281=m +CONFIG_SOUND_ES1370=m +CONFIG_SOUND_ES1371=m +CONFIG_SOUND_ESSSOLO1=m +CONFIG_SOUND_MAESTRO=m +CONFIG_SOUND_MAESTRO3=m +CONFIG_SOUND_ICH=m +CONFIG_SOUND_SONICVIBES=m +CONFIG_SOUND_TRIDENT=m +# CONFIG_SOUND_MSNDCLAS is not set +# CONFIG_SOUND_MSNDPIN is not set +CONFIG_SOUND_VIA82CXXX=m +CONFIG_MIDI_VIA82CXXX=y +CONFIG_SOUND_OSS=m +CONFIG_SOUND_TRACEINIT=y +CONFIG_SOUND_DMAP=y +# CONFIG_SOUND_AD1816 is not set +CONFIG_SOUND_AD1889=m +CONFIG_SOUND_SGALAXY=m +CONFIG_SOUND_ADLIB=m +CONFIG_SOUND_ACI_MIXER=m +CONFIG_SOUND_CS4232=m +CONFIG_SOUND_SSCAPE=m +CONFIG_SOUND_GUS=m +# CONFIG_SOUND_GUS16 is not set +CONFIG_SOUND_GUSMAX=y +CONFIG_SOUND_VMIDI=m +CONFIG_SOUND_TRIX=m +CONFIG_SOUND_MSS=m +CONFIG_SOUND_MPU401=m +CONFIG_SOUND_NM256=m +CONFIG_SOUND_MAD16=m +CONFIG_MAD16_OLDCARD=y +CONFIG_SOUND_PAS=m +CONFIG_SOUND_PSS=m +CONFIG_PSS_MIXER=y +# CONFIG_PSS_HAVE_BOOT is not set +CONFIG_SOUND_SB=m +# CONFIG_SOUND_AWE32_SYNTH is not set +CONFIG_SOUND_WAVEFRONT=m +CONFIG_SOUND_MAUI=m +CONFIG_SOUND_YM3812=m +CONFIG_SOUND_OPL3SA1=m +CONFIG_SOUND_OPL3SA2=m +CONFIG_SOUND_YMFPCI=m +CONFIG_SOUND_YMFPCI_LEGACY=y +CONFIG_SOUND_UART6850=m +CONFIG_SOUND_AEDSP16=m +CONFIG_SC6600=y +CONFIG_SC6600_JOY=y +CONFIG_SC6600_CDROM=4 +CONFIG_SC6600_CDROMBASE=0x0 +# CONFIG_AEDSP16_MSS is not set +# CONFIG_AEDSP16_SBPRO is not set +CONFIG_AEDSP16_MPU401=y +CONFIG_SOUND_TVMIXER=m +CONFIG_SOUND_KAHLUA=m +CONFIG_SOUND_ALI5455=m +CONFIG_SOUND_FORTE=m +CONFIG_SOUND_RME96XX=m +CONFIG_SOUND_AD1980=m + +# +# USB support +# +CONFIG_USB=m +# CONFIG_USB_DEBUG is not set + +# +# Miscellaneous USB options +# +CONFIG_USB_DEVICEFS=y +# CONFIG_USB_BANDWIDTH is not set +# CONFIG_USB_DYNAMIC_MINORS is not set + +# +# USB Host Controller Drivers +# +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_EHCI_SPLIT_ISO=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m + +# +# USB Device Class drivers +# +CONFIG_USB_AUDIO=m + +# +# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem +# +CONFIG_USB_MIDI=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +# CONFIG_USB_STORAGE_DEBUG is not set +CONFIG_USB_STORAGE_DATAFAB=y +CONFIG_USB_STORAGE_FREECOM=y +CONFIG_USB_STORAGE_ISD200=y +CONFIG_USB_STORAGE_DPCM=y +CONFIG_USB_STORAGE_HP8200e=y +CONFIG_USB_STORAGE_SDDR09=y +CONFIG_USB_STORAGE_SDDR55=y +CONFIG_USB_STORAGE_JUMPSHOT=y + +# +# USB Human Interface Devices (HID) +# +CONFIG_USB_HID=m +CONFIG_USB_HIDINPUT=y +CONFIG_HID_FF=y +CONFIG_HID_PID=y +CONFIG_LOGITECH_FF=y +CONFIG_THRUSTMASTER_FF=y +CONFIG_USB_HIDDEV=y + +# +# USB HID Boot Protocol drivers +# +# CONFIG_USB_KBD is not set +# CONFIG_USB_MOUSE is not set +CONFIG_USB_AIPTEK=m +CONFIG_USB_WACOM=m +CONFIG_USB_KBTAB=m +CONFIG_USB_POWERMATE=m +CONFIG_USB_MTOUCH=m +CONFIG_USB_XPAD=m +CONFIG_USB_ATI_REMOTE=m + +# +# USB Imaging devices +# +CONFIG_USB_MDC800=m +CONFIG_USB_MICROTEK=m +CONFIG_USB_HPUSBSCSI=m + +# +# USB Multimedia devices +# +CONFIG_USB_DABUSB=m +CONFIG_USB_VICAM=m +CONFIG_USB_DSBR=m +CONFIG_USB_IBMCAM=m +CONFIG_USB_KONICAWC=m +CONFIG_USB_OV511=m +CONFIG_USB_SE401=m +CONFIG_USB_STV680=m +CONFIG_USB_W9968CF=m + +# +# USB Network adaptors +# +CONFIG_USB_CATC=m +CONFIG_USB_KAWETH=m +CONFIG_USB_PEGASUS=m +CONFIG_USB_RTL8150=m +CONFIG_USB_USBNET=m + +# +# USB Host-to-Host Cables +# +CONFIG_USB_ALI_M5632=y +CONFIG_USB_AN2720=y +CONFIG_USB_BELKIN=y +CONFIG_USB_GENESYS=y +CONFIG_USB_NET1080=y +CONFIG_USB_PL2301=y + +# +# Intelligent USB Devices/Gadgets +# +CONFIG_USB_ARMLINUX=y +CONFIG_USB_EPSON2888=y +CONFIG_USB_ZAURUS=y +CONFIG_USB_CDCETHER=y + +# +# USB Network Adapters +# +CONFIG_USB_AX8817X=y + +# +# USB port drivers +# +CONFIG_USB_USS720=m + +# +# USB Serial Converter support +# +CONFIG_USB_SERIAL=m +CONFIG_USB_SERIAL_GENERIC=y +CONFIG_USB_SERIAL_BELKIN=m +CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m +CONFIG_USB_SERIAL_EMPEG=m +CONFIG_USB_SERIAL_FTDI_SIO=m +CONFIG_USB_SERIAL_VISOR=m +CONFIG_USB_SERIAL_IPAQ=m +CONFIG_USB_SERIAL_IR=m +CONFIG_USB_SERIAL_EDGEPORT=m +CONFIG_USB_SERIAL_EDGEPORT_TI=m +CONFIG_USB_SERIAL_KEYSPAN_PDA=m +CONFIG_USB_SERIAL_KEYSPAN=m +CONFIG_USB_SERIAL_KEYSPAN_MPR=y +CONFIG_USB_SERIAL_KEYSPAN_USA28=y +CONFIG_USB_SERIAL_KEYSPAN_USA28X=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y +CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y +CONFIG_USB_SERIAL_KEYSPAN_USA19=y +CONFIG_USB_SERIAL_KEYSPAN_USA18X=y +CONFIG_USB_SERIAL_KEYSPAN_USA19W=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y +CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y +CONFIG_USB_SERIAL_KEYSPAN_USA49W=y +CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y +CONFIG_USB_SERIAL_KLSI=m +CONFIG_USB_SERIAL_KOBIL_SCT=m +CONFIG_USB_SERIAL_MCT_U232=m +CONFIG_USB_SERIAL_PL2303=m +CONFIG_USB_SERIAL_SAFE=m +CONFIG_USB_SERIAL_SAFE_PADDED=y +CONFIG_USB_SERIAL_CYBERJACK=m +CONFIG_USB_SERIAL_XIRCOM=m +CONFIG_USB_SERIAL_OMNINET=m +CONFIG_USB_EZUSB=y + +# +# USB Miscellaneous drivers +# +CONFIG_USB_EMI62=m +CONFIG_USB_EMI26=m +CONFIG_USB_TIGL=m +CONFIG_USB_AUERSWALD=m +CONFIG_USB_RIO500=m +CONFIG_USB_LEGOTOWER=m +CONFIG_USB_LCD=m +CONFIG_USB_LED=m +CONFIG_USB_CYTHERM=m +CONFIG_USB_SPEEDTOUCH=m +# CONFIG_USB_TEST is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# InfiniBand support +# +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_SDP is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_UDAPL_HELPER=m +CONFIG_INFINIBAND_MELLANOX_HCA=m +CONFIG_AUDIT=m + +# +# File systems +# +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=m +CONFIG_EXT3_FS_XATTR=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_JBD=m +CONFIG_JBD_DEBUG=y +CONFIG_FS_MBCACHE=y +CONFIG_REISERFS_FS=m +# CONFIG_REISERFS_CHECK is not set +# CONFIG_REISERFS_PROC_INFO is not set +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_JFS_FS=m +CONFIG_JFS_POSIX_ACL=y +CONFIG_JFS_DMAPI=y +# CONFIG_JFS_DEBUG is not set +CONFIG_JFS_STATISTICS=y +CONFIG_FS_POSIX_ACL=y +CONFIG_XFS_FS=m +CONFIG_XFS_RT=y +CONFIG_XFS_QUOTA=m +CONFIG_XFS_DMAPI=y +CONFIG_XFS_SECURITY=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_MINIX_FS=y +CONFIG_ROMFS_FS=m +CONFIG_DMAPI=m +# CONFIG_DMAPI_DEBUG is not set +CONFIG_QUOTA=y +CONFIG_QFMT_V1=m +CONFIG_QFMT_V2=m +CONFIG_QUOTACTL=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_ZISOFS=y +CONFIG_ZISOFS_FS=y +CONFIG_UDF_FS=m + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=m +CONFIG_MSDOS_FS=m +CONFIG_VFAT_FS=m +CONFIG_NTFS_FS=m +# CONFIG_NTFS_DEBUG is not set +# CONFIG_NTFS_RW is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +# CONFIG_DEVFS_FS is not set +CONFIG_DEVPTS_FS_XATTR=y +CONFIG_DEVPTS_FS_SECURITY=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_HUGETLB_PAGE=y +CONFIG_RAMFS=y +CONFIG_RELAYFS_FS=m +# CONFIG_KLOG_CHANNEL is not set + +# +# Miscellaneous filesystems +# +CONFIG_ADFS_FS=m +# CONFIG_ADFS_FS_RW is not set +CONFIG_AFFS_FS=m +CONFIG_HFS_FS=m +CONFIG_HFSPLUS_FS=m +CONFIG_BEFS_FS=m +# CONFIG_BEFS_DEBUG is not set +CONFIG_BFS_FS=m +CONFIG_EFS_FS=m +CONFIG_JFFS_FS=m +CONFIG_JFFS_FS_VERBOSE=0 +CONFIG_JFFS2_FS=m +CONFIG_JFFS2_FS_DEBUG=0 +# CONFIG_JFFS2_FS_NAND is not set +CONFIG_CRAMFS=m +CONFIG_VXFS_FS=m +CONFIG_HPFS_FS=m +CONFIG_QNX4FS_FS=m +# CONFIG_QNX4FS_RW is not set +CONFIG_SYSV_FS=m +CONFIG_UFS_FS=m +# CONFIG_UFS_FS_WRITE is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +CONFIG_NFS_ACL=y +CONFIG_NFS_V4=y +CONFIG_NFS_DIRECTIO=y +CONFIG_NFSD=m +CONFIG_NFSD_V3=y +CONFIG_NFSD_ACL=y +CONFIG_NFS_ACL_SUPPORT=y +# CONFIG_NFSD_V4 is not set +CONFIG_NFSD_TCP=y +CONFIG_LOCKD=y +CONFIG_STATD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=m +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +CONFIG_RPCSEC_GSS_KRB5=y +CONFIG_SMB_FS=m +CONFIG_SMB_NLS_DEFAULT=y +CONFIG_SMB_NLS_REMOTE="cp850" +CONFIG_CIFS=m +CONFIG_CIFS_STATS=y +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_NCP_FS=m +CONFIG_NCPFS_PACKET_SIGNING=y +CONFIG_NCPFS_IOCTL_LOCKING=y +CONFIG_NCPFS_STRONG=y +CONFIG_NCPFS_NFS_NS=y +CONFIG_NCPFS_OS2_NS=y +CONFIG_NCPFS_SMALLDOS=y +CONFIG_NCPFS_NLS=y +CONFIG_NCPFS_EXTRAS=y +CONFIG_CODA_FS=m +# CONFIG_CODA_FS_OLD_API is not set +# CONFIG_INTERMEZZO_FS is not set +CONFIG_AFS_FS=m +CONFIG_RXRPC=m + +# +# Partition Types +# +CONFIG_PARTITION_ADVANCED=y +# CONFIG_ACORN_PARTITION is not set +CONFIG_OSF_PARTITION=y +# CONFIG_AMIGA_PARTITION is not set +CONFIG_ATARI_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_MSDOS_PARTITION=y +CONFIG_BSD_DISKLABEL=y +# CONFIG_MINIX_SUBPARTITION is not set +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_LDM_PARTITION=y +# CONFIG_LDM_DEBUG is not set +CONFIG_NEC98_PARTITION=y +CONFIG_SGI_PARTITION=y +CONFIG_ULTRIX_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_EFI_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NLS_CODEPAGE_437=m +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=m +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_FSHOOKS=y + +# +# Profiling support +# +CONFIG_PROFILING=y +CONFIG_OPROFILE=m + +# +# Kernel hacking +# +CONFIG_CRASH_DUMP=m +CONFIG_KERNTYPES=y +CONFIG_CRASH_DUMP_BLOCKDEV=m +CONFIG_CRASH_DUMP_NETDEV=m +# CONFIG_CRASH_DUMP_MEMDEV is not set +CONFIG_CRASH_DUMP_COMPRESS_RLE=m +CONFIG_CRASH_DUMP_COMPRESS_GZIP=m +CONFIG_DEBUG_KERNEL=y +CONFIG_EARLY_PRINTK=y +# CONFIG_KPROBES is not set +# CONFIG_DEBUGREG is not set +CONFIG_DEBUG_STACKOVERFLOW=y +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_SLAB is not set +CONFIG_MAGIC_SYSRQ=y +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_DEBUG_HIGHMEM is not set +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_FRAME_POINTER is not set +# CONFIG_KDB is not set +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +# CONFIG_HOOK is not set + +# +# Security options +# +CONFIG_SECURITY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_CAPABILITIES=m +CONFIG_SECURITY_ROOTPLUG=m +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +# CONFIG_SECURITY_SELINUX_MLS is not set + +# +# IBM Crypto Hardware support +# +CONFIG_IBM_CRYPTO=m +CONFIG_ICA_LEEDSLITE=m + +# +# Cryptographic options +# +CONFIG_CRYPTO=y +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_NULL=m +CONFIG_CRYPTO_MD4=m +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_SHA1=m +CONFIG_CRYPTO_SHA256=m +CONFIG_CRYPTO_SHA512=m +CONFIG_CRYPTO_DES=y +CONFIG_CRYPTO_BLOWFISH=m +CONFIG_CRYPTO_TWOFISH=m +CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_AES=m +CONFIG_CRYPTO_CAST5=m +CONFIG_CRYPTO_CAST6=m +CONFIG_CRYPTO_ARC4=m +CONFIG_CRYPTO_DEFLATE=m +CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_TEST=m + +# +# Library routines +# +CONFIG_CRC32=y +CONFIG_QSORT=y +CONFIG_ZLIB_INFLATE=y +CONFIG_ZLIB_DEFLATE=m + +# +# Build options +# +CONFIG_SUSE_KERNEL=y +CONFIG_CFGNAME="bigsmp" +CONFIG_RELEASE="SLES9_SP1_BRANCH_2004110217390391" +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_PC=y diff --git a/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch b/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch new file mode 100644 index 0000000..81f62ff --- /dev/null +++ b/lustre/kernel_patches/patches/export-show_task-2.6-vanilla.patch @@ -0,0 +1,21 @@ +Index: linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c +=================================================================== +--- linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091.orig/kernel/sched.c 2004-10-22 15:25:05.000000000 -0400 ++++ linux-2.6.5-SLES9_SP1_BRANCH_2004102113353091/kernel/sched.c 2004-10-22 15:39:18.000000000 -0400 +@@ -3147,7 +3147,7 @@ + return list_entry(p->sibling.next,struct task_struct,sibling); + } + +-static void show_task(task_t * p) ++void show_task(task_t * p) + { + task_t *relative; + unsigned state; +@@ -3200,6 +3200,7 @@ + if (state != TASK_RUNNING) + show_stack(p, NULL); + } ++EXPORT_SYMBOL(show_task); + + void show_state(void) + { diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch new file mode 100644 index 0000000..cad7b54 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -0,0 +1,2844 @@ +%patch +Index: linux-2.6.5-sles9/fs/ext3/extents.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 +@@ -0,0 +1,2313 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++/* ++ * Extents support for EXT3 ++ * ++ * TODO: ++ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() ++ * - ext3_ext_calc_credits() could take 'mergable' into account ++ * - ext3*_error() should be used in some situations ++ * - find_goal() [to be tested and improved] ++ * - smart tree reduction ++ * - arch-independence ++ * common on-disk format for big/little-endian arch ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) ++{ ++ int err; ++ ++ if (handle->h_buffer_credits > needed) ++ return handle; ++ if (!ext3_journal_extend(handle, needed)) ++ return handle; ++ err = ext3_journal_restart(handle, needed); ++ ++ return handle; ++} ++ ++static int inline ++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->ops->get_write_access) ++ return tree->ops->get_write_access(h,tree->buffer); ++ else ++ return 0; ++} ++ ++static int inline ++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->ops->mark_buffer_dirty) ++ return tree->ops->mark_buffer_dirty(h,tree->buffer); ++ else ++ return 0; ++} ++ ++/* ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ */ ++static int ext3_ext_get_access(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int err; ++ ++ if (path->p_bh) { ++ /* path points to block */ ++ err = ext3_journal_get_write_access(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_get_access_for_root(handle, tree); ++ } ++ return err; ++} ++ ++/* ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ * - EIO ++ */ ++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int err; ++ if (path->p_bh) { ++ /* path points to block */ ++ err =ext3_journal_dirty_metadata(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_mark_root_dirty(handle, tree); ++ } ++ return err; ++} ++ ++static int inline ++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, struct ext3_extent *ex, ++ int *err) ++{ ++ int goal, depth, newblock; ++ struct inode *inode; ++ ++ EXT_ASSERT(tree); ++ if (tree->ops->new_block) ++ return tree->ops->new_block(handle, tree, path, ex, err); ++ ++ inode = tree->inode; ++ depth = EXT_DEPTH(tree); ++ if (path && depth > 0) { ++ goal = path[depth-1].p_block; ++ } else { ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; ++ ++ bg_start = (ei->i_block_group * ++ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ goal = bg_start + colour; ++ } ++ ++ newblock = ext3_new_block(handle, inode, goal, err); ++ return newblock; ++} ++ ++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *neh; ++ neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation++; ++} ++ ++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 6; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 5; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 3; ++#endif ++ return size; ++} ++ ++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) ++{ ++ int size; ++ ++ size = (tree->buffer_len - ++ sizeof(struct ext3_extent_header)) ++ / sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 4; ++#endif ++ return size; ++} ++ ++static void ext3_ext_show_path(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int k, l = path->p_depth; ++ ++ ext_debug(tree, "path:"); ++ for (k = 0; k <= l; k++, path++) { ++ if (path->p_idx) { ++ ext_debug(tree, " %d->%d", path->p_idx->ei_block, ++ path->p_idx->ei_leaf); ++ } else if (path->p_ext) { ++ ext_debug(tree, " %d:%d:%d", ++ path->p_ext->ee_block, ++ path->p_ext->ee_len, ++ path->p_ext->ee_start); ++ } else ++ ext_debug(tree, " []"); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *eh; ++ struct ext3_extent *ex; ++ int i; ++ ++ if (!path) ++ return; ++ ++ eh = path[depth].p_hdr; ++ ex = EXT_FIRST_EXTENT(eh); ++ ++ for (i = 0; i < eh->eh_entries; i++, ex++) { ++ ext_debug(tree, "%d:%d:%d ", ++ ex->ee_block, ex->ee_len, ex->ee_start); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_drop_refs(struct ext3_ext_path *path) ++{ ++ int depth = path->p_depth; ++ int i; ++ ++ for (i = 0; i <= depth; i++, path++) ++ if (path->p_bh) { ++ brelse(path->p_bh); ++ path->p_bh = NULL; ++ } ++} ++ ++/* ++ * binary search for closest index by given block ++ */ ++static inline void ++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent_idx *ix; ++ int l = 0, k, r; ++ ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ EXT_ASSERT(eh->eh_entries > 0); ++ ++ ext_debug(tree, "binsearch for %d(idx): ", block); ++ ++ path->p_idx = ix = EXT_FIRST_INDEX(eh); ++ ++ r = k = eh->eh_entries; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ix[l + k].ei_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } ++ ++ ix += l; ++ path->p_idx = ix; ++ ext_debug(tree, " -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf); ++ ++ while (l++ < r) { ++ if (block < ix->ei_block) ++ break; ++ path->p_idx = ix++; ++ } ++ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, ++ path->p_idx->ei_leaf); ++ ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent_idx *chix; ++ ++ chix = ix = EXT_FIRST_INDEX(eh); ++ for (k = 0; k < eh->eh_entries; k++, ix++) { ++ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { ++ printk("k=%d, ix=0x%p, first=0x%p\n", k, ++ ix, EXT_FIRST_INDEX(eh)); ++ printk("%u <= %u\n", ++ ix->ei_block,ix[-1].ei_block); ++ } ++ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); ++ if (block < ix->ei_block) ++ break; ++ chix = ix; ++ } ++ EXT_ASSERT(chix == path->p_idx); ++ } ++#endif ++ ++} ++ ++/* ++ * binary search for closest extent by given block ++ */ ++static inline void ++ext3_ext_binsearch(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent *ex; ++ int l = 0, k, r; ++ ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ ++ if (eh->eh_entries == 0) { ++ /* ++ * this leaf is empty yet: ++ * we get such a leaf in split/add case ++ */ ++ return; ++ } ++ ++ ext_debug(tree, "binsearch for %d: ", block); ++ ++ path->p_ext = ex = EXT_FIRST_EXTENT(eh); ++ ++ r = k = eh->eh_entries; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ex[l + k].ee_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } ++ ++ ex += l; ++ path->p_ext = ex; ++ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, ++ path->p_ext->ee_start, path->p_ext->ee_len); ++ ++ while (l++ < r) { ++ if (block < ex->ee_block) ++ break; ++ path->p_ext = ex++; ++ } ++ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, ++ path->p_ext->ee_start, path->p_ext->ee_len); ++ ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent *chex; ++ ++ chex = ex = EXT_FIRST_EXTENT(eh); ++ for (k = 0; k < eh->eh_entries; k++, ex++) { ++ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); ++ if (block < ex->ee_block) ++ break; ++ chex = ex; ++ } ++ EXT_ASSERT(chex == path->p_ext); ++ } ++#endif ++ ++} ++ ++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *eh; ++ ++ BUG_ON(tree->buffer_len == 0); ++ ext3_ext_get_access_for_root(handle, tree); ++ eh = EXT_ROOT_HDR(tree); ++ eh->eh_depth = 0; ++ eh->eh_entries = 0; ++ eh->eh_magic = EXT3_EXT_MAGIC; ++ eh->eh_max = ext3_ext_space_root(tree); ++ ext3_ext_mark_root_dirty(handle, tree); ++ ext3_ext_invalidate_cache(tree); ++ return 0; ++} ++ ++struct ext3_ext_path * ++ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_extent_header *eh; ++ struct buffer_head *bh; ++ int depth, i, ppos = 0; ++ ++ EXT_ASSERT(tree); ++ EXT_ASSERT(tree->inode); ++ EXT_ASSERT(tree->root); ++ ++ eh = EXT_ROOT_HDR(tree); ++ EXT_ASSERT(eh); ++ i = depth = EXT_DEPTH(tree); ++ EXT_ASSERT(eh->eh_max); ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(i == 0 || eh->eh_entries > 0); ++ ++ /* account possible depth increase */ ++ if (!path) { ++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), ++ GFP_NOFS); ++ if (!path) ++ return ERR_PTR(-ENOMEM); ++ } ++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); ++ path[0].p_hdr = eh; ++ ++ /* walk through the tree */ ++ while (i) { ++ ext_debug(tree, "depth %d: num %d, max %d\n", ++ ppos, eh->eh_entries, eh->eh_max); ++ ext3_ext_binsearch_idx(tree, path + ppos, block); ++ path[ppos].p_block = path[ppos].p_idx->ei_leaf; ++ path[ppos].p_depth = i; ++ path[ppos].p_ext = NULL; ++ ++ bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); ++ if (!bh) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ return ERR_PTR(-EIO); ++ } ++ eh = EXT_BLOCK_HDR(bh); ++ ppos++; ++ EXT_ASSERT(ppos <= depth); ++ path[ppos].p_bh = bh; ++ path[ppos].p_hdr = eh; ++ i--; ++ } ++ ++ path[ppos].p_depth = i; ++ path[ppos].p_hdr = eh; ++ path[ppos].p_ext = NULL; ++ ++ /* find extent */ ++ ext3_ext_binsearch(tree, path + ppos, block); ++ ++ ext3_ext_show_path(tree, path); ++ ++ return path; ++} ++ ++/* ++ * insert new index [logical;ptr] into the block at cupr ++ * it check where to insert: before curp or after curp ++ */ ++static int ext3_ext_insert_index(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *curp, ++ int logical, int ptr) ++{ ++ struct ext3_extent_idx *ix; ++ int len, err; ++ ++ if ((err = ext3_ext_get_access(handle, tree, curp))) ++ return err; ++ ++ EXT_ASSERT(logical != curp->p_idx->ei_block); ++ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx; ++ if (logical > curp->p_idx->ei_block) { ++ /* insert after */ ++ if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) { ++ len = (len - 1) * sizeof(struct ext3_extent_idx); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert new index %d after: %d. " ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ (curp->p_idx + 1), (curp->p_idx + 2)); ++ memmove(curp->p_idx + 2, curp->p_idx + 1, len); ++ } ++ ix = curp->p_idx + 1; ++ } else { ++ /* insert before */ ++ len = len * sizeof(struct ext3_extent_idx); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert new index %d before: %d. " ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ curp->p_idx, (curp->p_idx + 1)); ++ memmove(curp->p_idx + 1, curp->p_idx, len); ++ ix = curp->p_idx; ++ } ++ ++ ix->ei_block = logical; ++ ix->ei_leaf = ptr; ++ curp->p_hdr->eh_entries++; ++ ++ EXT_ASSERT(curp->p_hdr->eh_entries <= curp->p_hdr->eh_max); ++ EXT_ASSERT(ix <= EXT_LAST_INDEX(curp->p_hdr)); ++ ++ err = ext3_ext_dirty(handle, tree, curp); ++ ext3_std_error(tree->inode->i_sb, err); ++ ++ return err; ++} ++ ++/* ++ * routine inserts new subtree into the path, using free index entry ++ * at depth 'at: ++ * - allocates all needed blocks (new leaf and all intermediate index blocks) ++ * - makes decision where to split ++ * - moves remaining extens and index entries (right to the split point) ++ * into the newly allocated blocks ++ * - initialize subtree ++ */ ++static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext, int at) ++{ ++ struct buffer_head *bh = NULL; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *neh; ++ struct ext3_extent_idx *fidx; ++ struct ext3_extent *ex; ++ int i = at, k, m, a; ++ unsigned long newblock, oldblock, border; ++ int *ablocks = NULL; /* array of allocated blocks */ ++ int err = 0; ++ ++ /* make decision: where to split? */ ++ /* FIXME: now desicion is simplest: at current extent */ ++ ++ /* if current leaf will be splitted, then we should use ++ * border from split point */ ++ EXT_ASSERT(path[depth].p_ext <= EXT_MAX_EXTENT(path[depth].p_hdr)); ++ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { ++ border = path[depth].p_ext[1].ee_block; ++ ext_debug(tree, "leaf will be splitted." ++ " next leaf starts at %d\n", ++ (int)border); ++ } else { ++ border = newext->ee_block; ++ ext_debug(tree, "leaf will be added." ++ " next leaf starts at %d\n", ++ (int)border); ++ } ++ ++ /* ++ * if error occurs, then we break processing ++ * and turn filesystem read-only. so, index won't ++ * be inserted and tree will be in consistent ++ * state. next mount will repair buffers too ++ */ ++ ++ /* ++ * get array to track all allocated blocks ++ * we need this to handle errors and free blocks ++ * upon them ++ */ ++ ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS); ++ if (!ablocks) ++ return -ENOMEM; ++ memset(ablocks, 0, sizeof(unsigned long) * depth); ++ ++ /* allocate all needed blocks */ ++ ext_debug(tree, "allocate %d blocks for indexes/leaf\n", depth - at); ++ for (a = 0; a < depth - at; a++) { ++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); ++ if (newblock == 0) ++ goto cleanup; ++ ablocks[a] = newblock; ++ } ++ ++ /* initialize new leaf */ ++ newblock = ablocks[--a]; ++ EXT_ASSERT(newblock); ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) ++ goto cleanup; ++ ++ neh = EXT_BLOCK_HDR(bh); ++ neh->eh_entries = 0; ++ neh->eh_max = ext3_ext_space_block(tree); ++ neh->eh_magic = EXT3_EXT_MAGIC; ++ neh->eh_depth = 0; ++ ex = EXT_FIRST_EXTENT(neh); ++ ++ /* move remain of path[depth] to the new leaf */ ++ EXT_ASSERT(path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max); ++ /* start copy from next extent */ ++ /* TODO: we could do it by single memmove */ ++ m = 0; ++ path[depth].p_ext++; ++ while (path[depth].p_ext <= ++ EXT_MAX_EXTENT(path[depth].p_hdr)) { ++ ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", ++ path[depth].p_ext->ee_block, ++ path[depth].p_ext->ee_start, ++ path[depth].p_ext->ee_len, ++ newblock); ++ memmove(ex++, path[depth].p_ext++, ++ sizeof(struct ext3_extent)); ++ neh->eh_entries++; ++ m++; ++ } ++ set_buffer_uptodate(bh); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto cleanup; ++ brelse(bh); ++ bh = NULL; ++ ++ /* correct old leaf */ ++ if (m) { ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ goto cleanup; ++ path[depth].p_hdr->eh_entries -= m; ++ if ((err = ext3_ext_dirty(handle, tree, path + depth))) ++ goto cleanup; ++ ++ } ++ ++ /* create intermediate indexes */ ++ k = depth - at - 1; ++ EXT_ASSERT(k >= 0); ++ if (k) ++ ext_debug(tree, "create %d intermediate indices\n", k); ++ /* insert new index into current index block */ ++ /* current depth stored in i var */ ++ i = depth - 1; ++ while (k--) { ++ oldblock = newblock; ++ newblock = ablocks[--a]; ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ goto cleanup; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) ++ goto cleanup; ++ ++ neh = EXT_BLOCK_HDR(bh); ++ neh->eh_entries = 1; ++ neh->eh_magic = EXT3_EXT_MAGIC; ++ neh->eh_max = ext3_ext_space_block_idx(tree); ++ neh->eh_depth = depth - i; ++ fidx = EXT_FIRST_INDEX(neh); ++ fidx->ei_block = border; ++ fidx->ei_leaf = oldblock; ++ ++ ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", ++ i, newblock, border, oldblock); ++ /* copy indexes */ ++ m = 0; ++ path[i].p_idx++; ++ ++ ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, ++ EXT_MAX_INDEX(path[i].p_hdr)); ++ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == ++ EXT_LAST_INDEX(path[i].p_hdr)); ++ while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { ++ ext_debug(tree, "%d: move %d:%d in new index %lu\n", ++ i, path[i].p_idx->ei_block, ++ path[i].p_idx->ei_leaf, newblock); ++ memmove(++fidx, path[i].p_idx++, ++ sizeof(struct ext3_extent_idx)); ++ neh->eh_entries++; ++ EXT_ASSERT(neh->eh_entries <= neh->eh_max); ++ m++; ++ } ++ set_buffer_uptodate(bh); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto cleanup; ++ brelse(bh); ++ bh = NULL; ++ ++ /* correct old index */ ++ if (m) { ++ err = ext3_ext_get_access(handle, tree, path + i); ++ if (err) ++ goto cleanup; ++ path[i].p_hdr->eh_entries -= m; ++ err = ext3_ext_dirty(handle, tree, path + i); ++ if (err) ++ goto cleanup; ++ } ++ ++ i--; ++ } ++ ++ /* insert new index */ ++ if (!err) ++ err = ext3_ext_insert_index(handle, tree, path + at, ++ border, newblock); ++ ++cleanup: ++ if (bh) { ++ if (buffer_locked(bh)) ++ unlock_buffer(bh); ++ brelse(bh); ++ } ++ ++ if (err) { ++ /* free all allocated blocks in error case */ ++ for (i = 0; i < depth; i++) { ++ if (!ablocks[i]) ++ continue; ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ } ++ } ++ kfree(ablocks); ++ ++ return err; ++} ++ ++/* ++ * routine implements tree growing procedure: ++ * - allocates new block ++ * - moves top-level data (index block or leaf) into the new block ++ * - initialize new top-level, creating index that points to the ++ * just created block ++ */ ++static int ext3_ext_grow_indepth(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_ext_path *curp = path; ++ struct ext3_extent_header *neh; ++ struct ext3_extent_idx *fidx; ++ struct buffer_head *bh; ++ unsigned long newblock; ++ int err = 0; ++ ++ newblock = ext3_ext_new_block(handle, tree, path, newext, &err); ++ if (newblock == 0) ++ return err; ++ ++ bh = sb_getblk(tree->inode->i_sb, newblock); ++ if (!bh) { ++ err = -EIO; ++ ext3_std_error(tree->inode->i_sb, err); ++ return err; ++ } ++ lock_buffer(bh); ++ ++ if ((err = ext3_journal_get_create_access(handle, bh))) { ++ unlock_buffer(bh); ++ goto out; ++ } ++ ++ /* move top-level index/leaf into new block */ ++ memmove(bh->b_data, curp->p_hdr, tree->buffer_len); ++ ++ /* set size of new block */ ++ neh = EXT_BLOCK_HDR(bh); ++ /* old root could have indexes or leaves ++ * so calculate e_max right way */ ++ if (EXT_DEPTH(tree)) ++ neh->eh_max = ext3_ext_space_block_idx(tree); ++ else ++ neh->eh_max = ext3_ext_space_block(tree); ++ neh->eh_magic = EXT3_EXT_MAGIC; ++ set_buffer_uptodate(bh); ++ unlock_buffer(bh); ++ ++ if ((err = ext3_journal_dirty_metadata(handle, bh))) ++ goto out; ++ ++ /* create index in new top-level index: num,max,pointer */ ++ if ((err = ext3_ext_get_access(handle, tree, curp))) ++ goto out; ++ ++ curp->p_hdr->eh_magic = EXT3_EXT_MAGIC; ++ curp->p_hdr->eh_max = ext3_ext_space_root_idx(tree); ++ curp->p_hdr->eh_entries = 1; ++ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr); ++ /* FIXME: it works, but actually path[0] can be index */ ++ curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block; ++ curp->p_idx->ei_leaf = newblock; ++ ++ neh = EXT_ROOT_HDR(tree); ++ fidx = EXT_FIRST_INDEX(neh); ++ ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", ++ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); ++ ++ neh->eh_depth = path->p_depth + 1; ++ err = ext3_ext_dirty(handle, tree, curp); ++out: ++ brelse(bh); ++ ++ return err; ++} ++ ++/* ++ * routine finds empty index and adds new leaf. if no free index found ++ * then it requests in-depth growing ++ */ ++static int ext3_ext_create_new_leaf(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_ext_path *curp; ++ int depth, i, err = 0; ++ ++repeat: ++ i = depth = EXT_DEPTH(tree); ++ ++ /* walk up to the tree and look for free index entry */ ++ curp = path + depth; ++ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) { ++ i--; ++ curp--; ++ } ++ ++ /* we use already allocated block for index block ++ * so, subsequent data blocks should be contigoues */ ++ if (EXT_HAS_FREE_INDEX(curp)) { ++ /* if we found index with free entry, then use that ++ * entry: create all needed subtree and add new leaf */ ++ err = ext3_ext_split(handle, tree, path, newext, i); ++ ++ /* refill path */ ++ ext3_ext_drop_refs(path); ++ path = ext3_ext_find_extent(tree, newext->ee_block, path); ++ if (IS_ERR(path)) ++ err = PTR_ERR(path); ++ } else { ++ /* tree is full, time to grow in depth */ ++ err = ext3_ext_grow_indepth(handle, tree, path, newext); ++ ++ /* refill path */ ++ ext3_ext_drop_refs(path); ++ path = ext3_ext_find_extent(tree, newext->ee_block, path); ++ if (IS_ERR(path)) ++ err = PTR_ERR(path); ++ ++ /* ++ * only first (depth 0 -> 1) produces free space ++ * in all other cases we have to split growed tree ++ */ ++ depth = EXT_DEPTH(tree); ++ if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) { ++ /* now we need split */ ++ goto repeat; ++ } ++ } ++ ++ if (err) ++ return err; ++ ++ return 0; ++} ++ ++/* ++ * returns allocated block in subsequent extent or EXT_MAX_BLOCK ++ * NOTE: it consider block number from index entry as ++ * allocated block. thus, index entries have to be consistent ++ * with leafs ++ */ ++static unsigned long ++ext3_ext_next_allocated_block(struct ext3_ext_path *path) ++{ ++ int depth; ++ ++ EXT_ASSERT(path != NULL); ++ depth = path->p_depth; ++ ++ if (depth == 0 && path->p_ext == NULL) ++ return EXT_MAX_BLOCK; ++ ++ /* FIXME: what if index isn't full ?! */ ++ while (depth >= 0) { ++ if (depth == path->p_depth) { ++ /* leaf */ ++ if (path[depth].p_ext != ++ EXT_LAST_EXTENT(path[depth].p_hdr)) ++ return path[depth].p_ext[1].ee_block; ++ } else { ++ /* index */ ++ if (path[depth].p_idx != ++ EXT_LAST_INDEX(path[depth].p_hdr)) ++ return path[depth].p_idx[1].ei_block; ++ } ++ depth--; ++ } ++ ++ return EXT_MAX_BLOCK; ++} ++ ++/* ++ * returns first allocated block from next leaf or EXT_MAX_BLOCK ++ */ ++static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int depth; ++ ++ EXT_ASSERT(path != NULL); ++ depth = path->p_depth; ++ ++ /* zero-tree has no leaf blocks at all */ ++ if (depth == 0) ++ return EXT_MAX_BLOCK; ++ ++ /* go to index block */ ++ depth--; ++ ++ while (depth >= 0) { ++ if (path[depth].p_idx != ++ EXT_LAST_INDEX(path[depth].p_hdr)) ++ return path[depth].p_idx[1].ei_block; ++ depth--; ++ } ++ ++ return EXT_MAX_BLOCK; ++} ++ ++/* ++ * if leaf gets modified and modified extent is first in the leaf ++ * then we have to correct all indexes above ++ * TODO: do we need to correct tree in all cases? ++ */ ++int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ struct ext3_extent_header *eh; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent *ex; ++ unsigned long border; ++ int k, err = 0; ++ ++ eh = path[depth].p_hdr; ++ ex = path[depth].p_ext; ++ EXT_ASSERT(ex); ++ EXT_ASSERT(eh); ++ ++ if (depth == 0) { ++ /* there is no tree at all */ ++ return 0; ++ } ++ ++ if (ex != EXT_FIRST_EXTENT(eh)) { ++ /* we correct tree if first leaf got modified only */ ++ return 0; ++ } ++ ++ /* ++ * TODO: we need correction if border is smaller then current one ++ */ ++ k = depth - 1; ++ border = path[depth].p_ext->ee_block; ++ if ((err = ext3_ext_get_access(handle, tree, path + k))) ++ return err; ++ path[k].p_idx->ei_block = border; ++ if ((err = ext3_ext_dirty(handle, tree, path + k))) ++ return err; ++ ++ while (k--) { ++ /* change all left-side indexes */ ++ if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr)) ++ break; ++ if ((err = ext3_ext_get_access(handle, tree, path + k))) ++ break; ++ path[k].p_idx->ei_block = border; ++ if ((err = ext3_ext_dirty(handle, tree, path + k))) ++ break; ++ } ++ ++ return err; ++} ++ ++static int inline ++ext3_can_extents_be_merged(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex1, ++ struct ext3_extent *ex2) ++{ ++ if (ex1->ee_block + ex1->ee_len != ex2->ee_block) ++ return 0; ++ ++#ifdef AGRESSIVE_TEST ++ if (ex1->ee_len >= 4) ++ return 0; ++#endif ++ ++ if (!tree->ops->mergable) ++ return 1; ++ ++ return tree->ops->mergable(ex1, ex2); ++} ++ ++/* ++ * this routine tries to merge requsted extent into the existing ++ * extent or inserts requested extent as new one into the tree, ++ * creating new leaf in no-space case ++ */ ++int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) ++{ ++ struct ext3_extent_header * eh; ++ struct ext3_extent *ex, *fex; ++ struct ext3_extent *nearex; /* nearest extent */ ++ struct ext3_ext_path *npath = NULL; ++ int depth, len, err, next; ++ ++ EXT_ASSERT(newext->ee_len > 0); ++ EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK); ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ EXT_ASSERT(path[depth].p_hdr); ++ ++ /* try to insert block into found extent and return */ ++ if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { ++ ext_debug(tree, "append %d block to %d:%d (from %d)\n", ++ newext->ee_len, ex->ee_block, ex->ee_len, ++ ex->ee_start); ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ return err; ++ ex->ee_len += newext->ee_len; ++ eh = path[depth].p_hdr; ++ nearex = ex; ++ goto merge; ++ } ++ ++repeat: ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ if (eh->eh_entries < eh->eh_max) ++ goto has_space; ++ ++ /* probably next leaf has space for us? */ ++ fex = EXT_LAST_EXTENT(eh); ++ next = ext3_ext_next_leaf_block(tree, path); ++ if (newext->ee_block > fex->ee_block && next != EXT_MAX_BLOCK) { ++ ext_debug(tree, "next leaf block - %d\n", next); ++ EXT_ASSERT(!npath); ++ npath = ext3_ext_find_extent(tree, next, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ EXT_ASSERT(npath->p_depth == path->p_depth); ++ eh = npath[depth].p_hdr; ++ if (eh->eh_entries < eh->eh_max) { ++ ext_debug(tree, "next leaf isnt full(%d)\n", ++ eh->eh_entries); ++ path = npath; ++ goto repeat; ++ } ++ ext_debug(tree, "next leaf hasno free space(%d,%d)\n", ++ eh->eh_entries, eh->eh_max); ++ } ++ ++ /* ++ * there is no free space in found leaf ++ * we're gonna add new leaf in the tree ++ */ ++ err = ext3_ext_create_new_leaf(handle, tree, path, newext); ++ if (err) ++ goto cleanup; ++ depth = EXT_DEPTH(tree); ++ eh = path[depth].p_hdr; ++ ++has_space: ++ nearex = path[depth].p_ext; ++ ++ if ((err = ext3_ext_get_access(handle, tree, path + depth))) ++ goto cleanup; ++ ++ if (!nearex) { ++ /* there is no extent in this leaf, create first one */ ++ ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", ++ newext->ee_block, newext->ee_start, ++ newext->ee_len); ++ path[depth].p_ext = EXT_FIRST_EXTENT(eh); ++ } else if (newext->ee_block > nearex->ee_block) { ++ EXT_ASSERT(newext->ee_block != nearex->ee_block); ++ if (nearex != EXT_LAST_EXTENT(eh)) { ++ len = EXT_MAX_EXTENT(eh) - nearex; ++ len = (len - 1) * sizeof(struct ext3_extent); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " ++ "move %d from 0x%p to 0x%p\n", ++ newext->ee_block, newext->ee_start, ++ newext->ee_len, ++ nearex, len, nearex + 1, nearex + 2); ++ memmove(nearex + 2, nearex + 1, len); ++ } ++ path[depth].p_ext = nearex + 1; ++ } else { ++ EXT_ASSERT(newext->ee_block != nearex->ee_block); ++ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); ++ len = len < 0 ? 0 : len; ++ ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " ++ "move %d from 0x%p to 0x%p\n", ++ newext->ee_block, newext->ee_start, newext->ee_len, ++ nearex, len, nearex + 1, nearex + 2); ++ memmove(nearex + 1, nearex, len); ++ path[depth].p_ext = nearex; ++ } ++ ++ eh->eh_entries++; ++ nearex = path[depth].p_ext; ++ nearex->ee_block = newext->ee_block; ++ nearex->ee_start = newext->ee_start; ++ nearex->ee_len = newext->ee_len; ++ /* FIXME: support for large fs */ ++ nearex->ee_start_hi = 0; ++ ++merge: ++ /* try to merge extents to the right */ ++ while (nearex < EXT_LAST_EXTENT(eh)) { ++ if (!ext3_can_extents_be_merged(tree, nearex, nearex + 1)) ++ break; ++ /* merge with next extent! */ ++ nearex->ee_len += nearex[1].ee_len; ++ if (nearex + 1 < EXT_LAST_EXTENT(eh)) { ++ len = (EXT_LAST_EXTENT(eh) - nearex - 1) ++ * sizeof(struct ext3_extent); ++ memmove(nearex + 1, nearex + 2, len); ++ } ++ eh->eh_entries--; ++ EXT_ASSERT(eh->eh_entries > 0); ++ } ++ ++ /* try to merge extents to the left */ ++ ++ /* time to correct all indexes above */ ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ if (err) ++ goto cleanup; ++ ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ ++cleanup: ++ if (npath) { ++ ext3_ext_drop_refs(npath); ++ kfree(npath); ++ } ++ ext3_ext_tree_changed(tree); ++ ext3_ext_invalidate_cache(tree); ++ return err; ++} ++ ++int ext3_ext_walk_space(struct ext3_extents_tree *tree, unsigned long block, ++ unsigned long num, ext_prepare_callback func) ++{ ++ struct ext3_ext_path *path = NULL; ++ struct ext3_extent *ex, cbex; ++ unsigned long next, start = 0, end = 0; ++ unsigned long last = block + num; ++ int depth, exists, err = 0; ++ ++ EXT_ASSERT(tree); ++ EXT_ASSERT(func); ++ EXT_ASSERT(tree->inode); ++ EXT_ASSERT(tree->root); ++ ++ while (block < last && block != EXT_MAX_BLOCK) { ++ num = last - block; ++ /* find extent for this block */ ++ path = ext3_ext_find_extent(tree, block, path); ++ if (IS_ERR(path)) { ++ err = PTR_ERR(path); ++ path = NULL; ++ break; ++ } ++ ++ depth = EXT_DEPTH(tree); ++ EXT_ASSERT(path[depth].p_hdr); ++ ex = path[depth].p_ext; ++ next = ext3_ext_next_allocated_block(path); ++ ++ exists = 0; ++ if (!ex) { ++ /* there is no extent yet, so try to allocate ++ * all requested space */ ++ start = block; ++ end = block + num; ++ } else if (ex->ee_block > block) { ++ /* need to allocate space before found extent */ ++ start = block; ++ end = ex->ee_block; ++ if (block + num < end) ++ end = block + num; ++ } else if (block >= ex->ee_block + ex->ee_len) { ++ /* need to allocate space after found extent */ ++ start = block; ++ end = block + num; ++ if (end >= next) ++ end = next; ++ } else if (block >= ex->ee_block) { ++ /* ++ * some part of requested space is covered ++ * by found extent ++ */ ++ start = block; ++ end = ex->ee_block + ex->ee_len; ++ if (block + num < end) ++ end = block + num; ++ exists = 1; ++ } else { ++ BUG(); ++ } ++ EXT_ASSERT(end > start); ++ ++ if (!exists) { ++ cbex.ee_block = start; ++ cbex.ee_len = end - start; ++ cbex.ee_start = 0; ++ } else ++ cbex = *ex; ++ ++ EXT_ASSERT(path[depth].p_hdr); ++ err = func(tree, path, &cbex, exists); ++ ext3_ext_drop_refs(path); ++ ++ if (err < 0) ++ break; ++ if (err == EXT_REPEAT) ++ continue; ++ else if (err == EXT_BREAK) { ++ err = 0; ++ break; ++ } ++ ++ if (EXT_DEPTH(tree) != depth) { ++ /* depth was changed. we have to realloc path */ ++ kfree(path); ++ path = NULL; ++ } ++ ++ block = cbex.ee_block + cbex.ee_len; ++ } ++ ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } ++ ++ return err; ++} ++ ++static inline void ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) ++{ ++ EXT_ASSERT(len > 0); ++ if (tree->cex) { ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; ++ } ++} ++ ++/* ++ * this routine calculate boundaries of the gap requested block fits into ++ * and cache this gap ++ */ ++static inline void ++ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ unsigned long block) ++{ ++ int depth = EXT_DEPTH(tree); ++ unsigned long lblock, len; ++ struct ext3_extent *ex; ++ ++ if (!tree->cex) ++ return; ++ ++ ex = path[depth].p_ext; ++ if (ex == NULL) { ++ /* there is no extent yet, so gap is [0;-] */ ++ lblock = 0; ++ len = EXT_MAX_BLOCK; ++ ext_debug(tree, "cache gap(whole file):"); ++ } else if (block < ex->ee_block) { ++ lblock = block; ++ len = ex->ee_block - block; ++ ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", ++ (unsigned long) block, ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len); ++ } else if (block >= ex->ee_block + ex->ee_len) { ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); ++ ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len, ++ (unsigned long) block); ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; ++ } else { ++ lblock = len = 0; ++ BUG(); ++ } ++ ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); ++} ++ ++static inline int ++ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, ++ struct ext3_extent *ex) ++{ ++ struct ext3_ext_cache *cex = tree->cex; ++ ++ /* is there cache storage at all? */ ++ if (!cex) ++ return EXT3_EXT_CACHE_NO; ++ ++ /* has cache valid data? */ ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; ++ ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", ++ (unsigned long) block, ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len, ++ (unsigned long) ex->ee_start); ++ return cex->ec_type; ++ } ++ ++ /* not in cache */ ++ return EXT3_EXT_CACHE_NO; ++} ++ ++/* ++ * routine removes index from the index block ++ * it's used in truncate case only. thus all requests are for ++ * last index in the block only ++ */ ++int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ struct buffer_head *bh; ++ int err; ++ ++ /* free index block */ ++ path--; ++ EXT_ASSERT(path->p_hdr->eh_entries); ++ if ((err = ext3_ext_get_access(handle, tree, path))) ++ return err; ++ path->p_hdr->eh_entries--; ++ if ((err = ext3_ext_dirty(handle, tree, path))) ++ return err; ++ ext_debug(tree, "index is empty, remove it, free block %d\n", ++ path->p_idx->ei_leaf); ++ bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ++ ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ return err; ++} ++ ++int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int depth = EXT_DEPTH(tree); ++ int needed; ++ ++ if (path) { ++ /* probably there is space in leaf? */ ++ if (path[depth].p_hdr->eh_entries < path[depth].p_hdr->eh_max) ++ return 1; ++ } ++ ++ /* ++ * the worste case we're expecting is creation of the ++ * new root (growing in depth) with index splitting ++ * for splitting we have to consider depth + 1 because ++ * previous growing could increase it ++ */ ++ depth = depth + 1; ++ ++ /* ++ * growing in depth: ++ * block allocation + new root + old root ++ */ ++ needed = EXT3_ALLOC_NEEDED + 2; ++ ++ /* index split. we may need: ++ * allocate intermediate indexes and new leaf ++ * change two blocks at each level, but root ++ * modify root block (inode) ++ */ ++ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1; ++ ++ return needed; ++} ++ ++static int ++ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) ++{ ++ struct ext3_extent *ex, tex; ++ struct ext3_ext_path *npath; ++ int depth, creds, err; ++ ++ depth = EXT_DEPTH(tree); ++ ex = path[depth].p_ext; ++ EXT_ASSERT(ex); ++ EXT_ASSERT(end < ex->ee_block + ex->ee_len - 1); ++ EXT_ASSERT(ex->ee_block < start); ++ ++ /* calculate tail extent */ ++ tex.ee_block = end + 1; ++ EXT_ASSERT(tex.ee_block < ex->ee_block + ex->ee_len); ++ tex.ee_len = ex->ee_block + ex->ee_len - tex.ee_block; ++ ++ creds = ext3_ext_calc_credits_for_insert(tree, path); ++ handle = ext3_ext_journal_restart(handle, creds); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ /* calculate head extent. use primary extent */ ++ err = ext3_ext_get_access(handle, tree, path + depth); ++ if (err) ++ return err; ++ ex->ee_len = start - ex->ee_block; ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ if (err) ++ return err; ++ ++ /* FIXME: some callback to free underlying resource ++ * and correct ee_start? */ ++ ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", ++ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); ++ ++ npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); ++ if (IS_ERR(npath)) ++ return PTR_ERR(npath); ++ depth = EXT_DEPTH(tree); ++ EXT_ASSERT(npath[depth].p_ext->ee_block == ex->ee_block); ++ EXT_ASSERT(npath[depth].p_ext->ee_len == ex->ee_len); ++ ++ err = ext3_ext_insert_extent(handle, tree, npath, &tex); ++ ext3_ext_drop_refs(npath); ++ kfree(npath); ++ ++ return err; ++ ++} ++ ++static int ++ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) ++{ ++ struct ext3_extent *ex, *fu = NULL, *lu, *le; ++ int err = 0, correct_index = 0; ++ int depth = EXT_DEPTH(tree), credits; ++ struct ext3_extent_header *eh; ++ unsigned a, b, block, num; ++ ++ ext_debug(tree, "remove [%lu:%lu] in leaf\n", start, end); ++ if (!path[depth].p_hdr) ++ path[depth].p_hdr = EXT_BLOCK_HDR(path[depth].p_bh); ++ eh = path[depth].p_hdr; ++ EXT_ASSERT(eh); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ ++ /* find where to start removing */ ++ le = ex = EXT_LAST_EXTENT(eh); ++ while (ex != EXT_FIRST_EXTENT(eh)) { ++ if (ex->ee_block <= end) ++ break; ++ ex--; ++ } ++ ++ if (start > ex->ee_block && end < ex->ee_block + ex->ee_len - 1) { ++ /* removal of internal part of the extent requested ++ * tail and head must be placed in different extent ++ * so, we have to insert one more extent */ ++ path[depth].p_ext = ex; ++ return ext3_ext_split_for_rm(handle, tree, path, start, end); ++ } ++ ++ lu = ex; ++ while (ex >= EXT_FIRST_EXTENT(eh) && ++ ex->ee_block + ex->ee_len > start) { ++ ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); ++ path[depth].p_ext = ex; ++ ++ a = ex->ee_block > start ? ex->ee_block : start; ++ b = ex->ee_block + ex->ee_len - 1 < end ? ++ ex->ee_block + ex->ee_len - 1 : end; ++ ++ ext_debug(tree, " border %u:%u\n", a, b); ++ ++ if (a != ex->ee_block && b != ex->ee_block + ex->ee_len - 1) { ++ block = 0; ++ num = 0; ++ BUG(); ++ } else if (a != ex->ee_block) { ++ /* remove tail of the extent */ ++ block = ex->ee_block; ++ num = a - block; ++ } else if (b != ex->ee_block + ex->ee_len - 1) { ++ /* remove head of the extent */ ++ block = a; ++ num = b - a; ++ } else { ++ /* remove whole extent: excelent! */ ++ block = ex->ee_block; ++ num = 0; ++ EXT_ASSERT(a == ex->ee_block && ++ b == ex->ee_block + ex->ee_len - 1); ++ } ++ ++ if (ex == EXT_FIRST_EXTENT(eh)) ++ correct_index = 1; ++ ++ credits = 1; ++ if (correct_index) ++ credits += (EXT_DEPTH(tree) * EXT3_ALLOC_NEEDED) + 1; ++ if (tree->ops->remove_extent_credits) ++ credits+=tree->ops->remove_extent_credits(tree,ex,a,b); ++ ++ handle = ext3_ext_journal_restart(handle, credits); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out; ++ } ++ ++ err = ext3_ext_get_access(handle, tree, path + depth); ++ if (err) ++ goto out; ++ ++ if (tree->ops->remove_extent) ++ err = tree->ops->remove_extent(tree, ex, a, b); ++ if (err) ++ goto out; ++ ++ if (num == 0) { ++ /* this extent is removed entirely mark slot unused */ ++ ex->ee_start = 0; ++ eh->eh_entries--; ++ fu = ex; ++ } ++ ++ ex->ee_block = block; ++ ex->ee_len = num; ++ ++ err = ext3_ext_dirty(handle, tree, path + depth); ++ if (err) ++ goto out; ++ ++ ext_debug(tree, "new extent: %u:%u:%u\n", ++ ex->ee_block, ex->ee_len, ex->ee_start); ++ ex--; ++ } ++ ++ if (fu) { ++ /* reuse unused slots */ ++ while (lu < le) { ++ if (lu->ee_start) { ++ *fu = *lu; ++ lu->ee_start = 0; ++ fu++; ++ } ++ lu++; ++ } ++ } ++ ++ if (correct_index && eh->eh_entries) ++ err = ext3_ext_correct_indexes(handle, tree, path); ++ ++ /* if this leaf is free, then we should ++ * remove it from index block above */ ++ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) ++ err = ext3_ext_rm_idx(handle, tree, path + depth); ++ ++out: ++ return err; ++} ++ ++ ++static struct ext3_extent_idx * ++ext3_ext_last_covered(struct ext3_extent_header *hdr, unsigned long block) ++{ ++ struct ext3_extent_idx *ix; ++ ++ ix = EXT_LAST_INDEX(hdr); ++ while (ix != EXT_FIRST_INDEX(hdr)) { ++ if (ix->ei_block <= block) ++ break; ++ ix--; ++ } ++ return ix; ++} ++ ++/* ++ * returns 1 if current index have to be freed (even partial) ++ */ ++static int inline ++ext3_ext_more_to_rm(struct ext3_ext_path *path) ++{ ++ EXT_ASSERT(path->p_idx); ++ ++ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr)) ++ return 0; ++ ++ /* ++ * if truncate on deeper level happened it it wasn't partial ++ * so we have to consider current index for truncation ++ */ ++ if (path->p_hdr->eh_entries == path->p_block) ++ return 0; ++ return 1; ++} ++ ++int ext3_ext_remove_space(struct ext3_extents_tree *tree, ++ unsigned long start, unsigned long end) ++{ ++ struct inode *inode = tree->inode; ++ struct super_block *sb = inode->i_sb; ++ int depth = EXT_DEPTH(tree); ++ struct ext3_ext_path *path; ++ handle_t *handle; ++ int i = 0, err = 0; ++ ++ ext_debug(tree, "space to be removed: %lu:%lu\n", start, end); ++ ++ /* probably first extent we're gonna free will be last in block */ ++ handle = ext3_journal_start(inode, depth + 1); ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ ++ ext3_ext_invalidate_cache(tree); ++ ++ /* ++ * we start scanning from right side freeing all the blocks ++ * after i_size and walking into the deep ++ */ ++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); ++ if (IS_ERR(path)) { ++ ext3_error(sb, "ext3_ext_remove_space", ++ "Can't allocate path array"); ++ ext3_journal_stop(handle); ++ return -ENOMEM; ++ } ++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1)); ++ path[i].p_hdr = EXT_ROOT_HDR(tree); ++ ++ while (i >= 0 && err == 0) { ++ if (i == depth) { ++ /* this is leaf block */ ++ err = ext3_ext_rm_leaf(handle, tree, path, start, end); ++ /* root level have p_bh == NULL, brelse() eats this */ ++ brelse(path[i].p_bh); ++ i--; ++ continue; ++ } ++ ++ /* this is index block */ ++ if (!path[i].p_hdr) { ++ ext_debug(tree, "initialize header\n"); ++ path[i].p_hdr = EXT_BLOCK_HDR(path[i].p_bh); ++ } ++ ++ EXT_ASSERT(path[i].p_hdr->eh_entries <= path[i].p_hdr->eh_max); ++ EXT_ASSERT(path[i].p_hdr->eh_magic == EXT3_EXT_MAGIC); ++ ++ if (!path[i].p_idx) { ++ /* this level hasn't touched yet */ ++ path[i].p_idx = ++ ext3_ext_last_covered(path[i].p_hdr, end); ++ path[i].p_block = path[i].p_hdr->eh_entries + 1; ++ ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", ++ path[i].p_hdr, path[i].p_hdr->eh_entries); ++ } else { ++ /* we've already was here, see at next index */ ++ path[i].p_idx--; ++ } ++ ++ ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", ++ i, EXT_FIRST_INDEX(path[i].p_hdr), ++ path[i].p_idx); ++ if (ext3_ext_more_to_rm(path + i)) { ++ /* go to the next level */ ++ ext_debug(tree, "move to level %d (block %d)\n", ++ i + 1, path[i].p_idx->ei_leaf); ++ memset(path + i + 1, 0, sizeof(*path)); ++ path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); ++ if (!path[i+1].p_bh) { ++ /* should we reset i_size? */ ++ err = -EIO; ++ break; ++ } ++ /* put actual number of indexes to know is this ++ * number got changed at the next iteration */ ++ path[i].p_block = path[i].p_hdr->eh_entries; ++ i++; ++ } else { ++ /* we finish processing this index, go up */ ++ if (path[i].p_hdr->eh_entries == 0 && i > 0) { ++ /* index is empty, remove it ++ * handle must be already prepared by the ++ * truncatei_leaf() */ ++ err = ext3_ext_rm_idx(handle, tree, path + i); ++ } ++ /* root level have p_bh == NULL, brelse() eats this */ ++ brelse(path[i].p_bh); ++ i--; ++ ext_debug(tree, "return to level %d\n", i); ++ } ++ } ++ ++ /* TODO: flexible tree reduction should be here */ ++ if (path->p_hdr->eh_entries == 0) { ++ /* ++ * truncate to zero freed all the tree ++ * so, we need to correct eh_depth ++ */ ++ err = ext3_ext_get_access(handle, tree, path); ++ if (err == 0) { ++ EXT_ROOT_HDR(tree)->eh_depth = 0; ++ EXT_ROOT_HDR(tree)->eh_max = ext3_ext_space_root(tree); ++ err = ext3_ext_dirty(handle, tree, path); ++ } ++ } ++ ext3_ext_tree_changed(tree); ++ ++ kfree(path); ++ ext3_journal_stop(handle); ++ ++ return err; ++} ++ ++int ext3_ext_calc_metadata_amount(struct ext3_extents_tree *tree, int blocks) ++{ ++ int lcap, icap, rcap, leafs, idxs, num; ++ ++ rcap = ext3_ext_space_root(tree); ++ if (blocks <= rcap) { ++ /* all extents fit to the root */ ++ return 0; ++ } ++ ++ rcap = ext3_ext_space_root_idx(tree); ++ lcap = ext3_ext_space_block(tree); ++ icap = ext3_ext_space_block_idx(tree); ++ ++ num = leafs = (blocks + lcap - 1) / lcap; ++ if (leafs <= rcap) { ++ /* all pointers to leafs fit to the root */ ++ return leafs; ++ } ++ ++ /* ok. we need separate index block(s) to link all leaf blocks */ ++ idxs = (leafs + icap - 1) / icap; ++ do { ++ num += idxs; ++ idxs = (idxs + icap - 1) / icap; ++ } while (idxs > rcap); ++ ++ return num; ++} ++ ++/* ++ * called at mount time ++ */ ++void ext3_ext_init(struct super_block *sb) ++{ ++ /* ++ * possible initialization would be here ++ */ ++ ++ if (test_opt(sb, EXTENTS)) { ++ printk("EXT3-fs: file extents enabled"); ++#ifdef AGRESSIVE_TEST ++ printk(", agressive tests"); ++#endif ++#ifdef CHECK_BINSEARCH ++ printk(", check binsearch"); ++#endif ++ printk("\n"); ++ } ++} ++ ++/* ++ * called at umount time ++ */ ++void ext3_ext_release(struct super_block *sb) ++{ ++} ++ ++/************************************************************************ ++ * VFS related routines ++ ************************************************************************/ ++ ++static int ext3_get_inode_write_access(handle_t *handle, void *buffer) ++{ ++ /* we use in-core data, not bh */ ++ return 0; ++} ++ ++static int ext3_mark_buffer_dirty(handle_t *handle, void *buffer) ++{ ++ struct inode *inode = buffer; ++ return ext3_mark_inode_dirty(handle, inode); ++} ++ ++static int ext3_ext_mergable(struct ext3_extent *ex1, ++ struct ext3_extent *ex2) ++{ ++ /* FIXME: support for large fs */ ++ if (ex1->ee_start + ex1->ee_len == ex2->ee_start) ++ return 1; ++ return 0; ++} ++ ++static int ++ext3_remove_blocks_credits(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) ++{ ++ int needed; ++ ++ /* at present, extent can't cross block group */; ++ needed = 4; /* bitmap + group desc + sb + inode */ ++ ++#ifdef CONFIG_QUOTA ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ return needed; ++} ++ ++static int ++ext3_remove_blocks(struct ext3_extents_tree *tree, ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) ++{ ++ int needed = ext3_remove_blocks_credits(tree, ex, from, to); ++ handle_t *handle = ext3_journal_start(tree->inode, needed); ++ struct buffer_head *bh; ++ int i; ++ ++ if (IS_ERR(handle)) ++ return PTR_ERR(handle); ++ if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { ++ /* tail removal */ ++ unsigned long num, start; ++ num = ex->ee_block + ex->ee_len - from; ++ start = ex->ee_start + ex->ee_len - num; ++ ext_debug(tree, "free last %lu blocks starting %lu\n", ++ num, start); ++ for (i = 0; i < num; i++) { ++ bh = sb_find_get_block(tree->inode->i_sb, start + i); ++ ext3_forget(handle, 0, tree->inode, bh, start + i); ++ } ++ ext3_free_blocks(handle, tree->inode, start, num); ++ } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { ++ printk("strange request: removal %lu-%lu from %u:%u\n", ++ from, to, ex->ee_block, ex->ee_len); ++ } else { ++ printk("strange request: removal(2) %lu-%lu from %u:%u\n", ++ from, to, ex->ee_block, ex->ee_len); ++ } ++ ext3_journal_stop(handle); ++ return 0; ++} ++ ++static int ext3_ext_find_goal(struct inode *inode, ++ struct ext3_ext_path *path, unsigned long block) ++{ ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; ++ int depth; ++ ++ if (path) { ++ struct ext3_extent *ex; ++ depth = path->p_depth; ++ ++ /* try to predict block placement */ ++ if ((ex = path[depth].p_ext)) ++ return ex->ee_start + (block - ex->ee_block); ++ ++ /* it looks index is empty ++ * try to find starting from index itself */ ++ if (path[depth].p_bh) ++ return path[depth].p_bh->b_blocknr; ++ } ++ ++ /* OK. use inode's group */ ++ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ return bg_start + colour + block; ++} ++ ++static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int *err) ++{ ++ struct inode *inode = tree->inode; ++ int newblock, goal; ++ ++ EXT_ASSERT(path); ++ EXT_ASSERT(ex); ++ EXT_ASSERT(ex->ee_start); ++ EXT_ASSERT(ex->ee_len); ++ ++ /* reuse block from the extent to order data/metadata */ ++ newblock = ex->ee_start++; ++ ex->ee_len--; ++ if (ex->ee_len == 0) { ++ ex->ee_len = 1; ++ /* allocate new block for the extent */ ++ goal = ext3_ext_find_goal(inode, path, ex->ee_block); ++ ex->ee_start = ext3_new_block(handle, inode, goal, err); ++ if (ex->ee_start == 0) { ++ /* error occured: restore old extent */ ++ ex->ee_start = newblock; ++ return 0; ++ } ++ } ++ return newblock; ++} ++ ++static struct ext3_extents_helpers ext3_blockmap_helpers = { ++ .get_write_access = ext3_get_inode_write_access, ++ .mark_buffer_dirty = ext3_mark_buffer_dirty, ++ .mergable = ext3_ext_mergable, ++ .new_block = ext3_new_block_cb, ++ .remove_extent = ext3_remove_blocks, ++ .remove_extent_credits = ext3_remove_blocks_credits, ++}; ++ ++void ext3_init_tree_desc(struct ext3_extents_tree *tree, ++ struct inode *inode) ++{ ++ tree->inode = inode; ++ tree->root = (void *) EXT3_I(inode)->i_data; ++ tree->buffer = (void *) inode; ++ tree->buffer_len = sizeof(EXT3_I(inode)->i_data); ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; ++ tree->ops = &ext3_blockmap_helpers; ++} ++ ++int ext3_ext_get_block(handle_t *handle, struct inode *inode, ++ long iblock, struct buffer_head *bh_result, ++ int create, int extend_disksize) ++{ ++ struct ext3_ext_path *path = NULL; ++ struct ext3_extent newex; ++ struct ext3_extent *ex; ++ int goal, newblock, err = 0, depth; ++ struct ext3_extents_tree tree; ++ ++ clear_buffer_new(bh_result); ++ ext3_init_tree_desc(&tree, inode); ++ ext_debug(&tree, "block %d requested for inode %u\n", ++ (int) iblock, (unsigned) inode->i_ino); ++ down(&EXT3_I(inode)->truncate_sem); ++ ++ /* check in cache */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { ++ if (!create) { ++ /* block isn't allocated yet and ++ * user don't want to allocate it */ ++ goto out2; ++ } ++ /* we should allocate requested block */ ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { ++ /* block is already allocated */ ++ newblock = iblock - newex.ee_block + newex.ee_start; ++ goto out; ++ } else { ++ EXT_ASSERT(0); ++ } ++ } ++ ++ /* find extent for this block */ ++ path = ext3_ext_find_extent(&tree, iblock, NULL); ++ if (IS_ERR(path)) { ++ err = PTR_ERR(path); ++ path = NULL; ++ goto out2; ++ } ++ ++ depth = EXT_DEPTH(&tree); ++ ++ /* ++ * consistent leaf must not be empty ++ * this situations is possible, though, _during_ tree modification ++ * this is why assert can't be put in ext3_ext_find_extent() ++ */ ++ EXT_ASSERT(path[depth].p_ext != NULL || depth == 0); ++ ++ if ((ex = path[depth].p_ext)) { ++ /* if found exent covers block, simple return it */ ++ if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { ++ newblock = iblock - ex->ee_block + ex->ee_start; ++ ext_debug(&tree, "%d fit into %d:%d -> %d\n", ++ (int) iblock, ex->ee_block, ex->ee_len, ++ newblock); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); ++ goto out; ++ } ++ } ++ ++ /* ++ * requested block isn't allocated yet ++ * we couldn't try to create block if create flag is zero ++ */ ++ if (!create) { ++ /* put just found gap into cache to speedup subsequest reqs */ ++ ext3_ext_put_gap_in_cache(&tree, path, iblock); ++ goto out2; ++ } ++ ++ /* allocate new block */ ++ goal = ext3_ext_find_goal(inode, path, iblock); ++ newblock = ext3_new_block(handle, inode, goal, &err); ++ if (!newblock) ++ goto out2; ++ ext_debug(&tree, "allocate new block: goal %d, found %d\n", ++ goal, newblock); ++ ++ /* try to insert new extent into found leaf and return */ ++ newex.ee_block = iblock; ++ newex.ee_start = newblock; ++ newex.ee_len = 1; ++ err = ext3_ext_insert_extent(handle, &tree, path, &newex); ++ if (err) ++ goto out2; ++ ++ if (extend_disksize && inode->i_size > EXT3_I(inode)->i_disksize) ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ++ /* previous routine could use block we allocated */ ++ newblock = newex.ee_start; ++ set_buffer_new(bh_result); ++ ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); ++out: ++ ext3_ext_show_leaf(&tree, path); ++ map_bh(bh_result, inode->i_sb, newblock); ++out2: ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } ++ up(&EXT3_I(inode)->truncate_sem); ++ ++ return err; ++} ++ ++void ext3_ext_truncate(struct inode * inode, struct page *page) ++{ ++ struct address_space *mapping = inode->i_mapping; ++ struct super_block *sb = inode->i_sb; ++ struct ext3_extents_tree tree; ++ unsigned long last_block; ++ handle_t *handle; ++ int err = 0; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ /* ++ * probably first extent we're gonna free will be last in block ++ */ ++ err = ext3_writepage_trans_blocks(inode) + 3; ++ handle = ext3_journal_start(inode, err); ++ if (IS_ERR(handle)) { ++ if (page) { ++ clear_highpage(page); ++ flush_dcache_page(page); ++ unlock_page(page); ++ page_cache_release(page); ++ } ++ return; ++ } ++ ++ if (page) ++ ext3_block_truncate_page(handle, page, mapping, inode->i_size); ++ ++ down(&EXT3_I(inode)->truncate_sem); ++ ext3_ext_invalidate_cache(&tree); ++ ++ /* ++ * TODO: optimization is possible here ++ * probably we need not scaning at all, ++ * because page truncation is enough ++ */ ++ if (ext3_orphan_add(handle, inode)) ++ goto out_stop; ++ ++ /* we have to know where to truncate from in crash case */ ++ EXT3_I(inode)->i_disksize = inode->i_size; ++ ext3_mark_inode_dirty(handle, inode); ++ ++ last_block = (inode->i_size + sb->s_blocksize - 1) ++ >> EXT3_BLOCK_SIZE_BITS(sb); ++ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); ++ ++ /* In a multi-transaction truncate, we only make the final ++ * transaction synchronous */ ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ++out_stop: ++ /* ++ * If this was a simple ftruncate(), and the file will remain alive ++ * then we need to clear up the orphan record which we created above. ++ * However, if this was a real unlink then we were called by ++ * ext3_delete_inode(), and we allow that function to clean up the ++ * orphan info for us. ++ */ ++ if (inode->i_nlink) ++ ext3_orphan_del(handle, inode); ++ ++ up(&EXT3_I(inode)->truncate_sem); ++ ext3_journal_stop(handle); ++} ++ ++/* ++ * this routine calculate max number of blocks we could modify ++ * in order to allocate new block for an inode ++ */ ++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) ++{ ++ struct ext3_extents_tree tree; ++ int needed; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); ++ ++ /* caller want to allocate num blocks */ ++ needed *= num; ++ ++#ifdef CONFIG_QUOTA ++ /* ++ * FIXME: real calculation should be here ++ * it depends on blockmap format of qouta file ++ */ ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ ++ return needed; ++} ++ ++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ext3_extent_tree_init(handle, &tree); ++} ++ ++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ return ext3_ext_calc_metadata_amount(&tree, blocks); ++} ++ ++static int ++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newex, int exist) ++{ ++ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; ++ ++ if (!exist) ++ return EXT_CONTINUE; ++ if (buf->err < 0) ++ return EXT_BREAK; ++ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) ++ return EXT_BREAK; ++ ++ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { ++ buf->err++; ++ buf->cur += sizeof(*newex); ++ } else { ++ buf->err = -EFAULT; ++ return EXT_BREAK; ++ } ++ return EXT_CONTINUE; ++} ++ ++static int ++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int exist) ++{ ++ struct ext3_extent_tree_stats *buf = ++ (struct ext3_extent_tree_stats *) tree->private; ++ int depth; ++ ++ if (!exist) ++ return EXT_CONTINUE; ++ ++ depth = EXT_DEPTH(tree); ++ buf->extents_num++; ++ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) ++ buf->leaf_num++; ++ return EXT_CONTINUE; ++} ++ ++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ ++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) ++ return -EINVAL; ++ ++ if (cmd == EXT3_IOC_GET_EXTENTS) { ++ struct ext3_extent_buf buf; ++ struct ext3_extents_tree tree; ++ ++ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) ++ return -EFAULT; ++ ++ ext3_init_tree_desc(&tree, inode); ++ buf.cur = buf.buffer; ++ buf.err = 0; ++ tree.private = &buf; ++ down(&EXT3_I(inode)->truncate_sem); ++ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, ++ ext3_ext_store_extent_cb); ++ up(&EXT3_I(inode)->truncate_sem); ++ if (err == 0) ++ err = buf.err; ++ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { ++ struct ext3_extent_tree_stats buf; ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ buf.depth = EXT_DEPTH(&tree); ++ buf.extents_num = 0; ++ buf.leaf_num = 0; ++ tree.private = &buf; ++ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, ++ ext3_ext_collect_stats_cb); ++ up(&EXT3_I(inode)->truncate_sem); ++ if (!err) ++ err = copy_to_user((void *) arg, &buf, sizeof(buf)); ++ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { ++ struct ext3_extents_tree tree; ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ err = EXT_DEPTH(&tree); ++ up(&EXT3_I(inode)->truncate_sem); ++ } ++ ++ return err; ++} ++ ++EXPORT_SYMBOL(ext3_init_tree_desc); ++EXPORT_SYMBOL(ext3_mark_inode_dirty); ++EXPORT_SYMBOL(ext3_ext_invalidate_cache); ++EXPORT_SYMBOL(ext3_ext_insert_extent); ++EXPORT_SYMBOL(ext3_ext_walk_space); ++EXPORT_SYMBOL(ext3_ext_find_goal); ++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); ++ +Index: linux-2.6.5-sles9/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2004-11-09 02:22:55.763148128 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300 +@@ -647,6 +647,10 @@ + DQUOT_FREE_INODE(inode); + goto fail2; + } ++ if (test_opt(sb, EXTENTS)) { ++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ ext3_extents_initialize_blockmap(handle, inode); ++ } + err = ext3_mark_inode_dirty(handle, inode); + if (err) { + ext3_std_error(sb, err); +Index: linux-2.6.5-sles9/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:22:55.767147520 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 +@@ -796,6 +796,17 @@ + goto reread; + } + ++static inline int ++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, ++ struct buffer_head *bh, int create, int extend_disksize) ++{ ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_get_block(handle, inode, block, bh, create, ++ extend_disksize); ++ return ext3_get_block_handle(handle, inode, block, bh, create, ++ extend_disksize); ++} ++ + static int ext3_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) + { +@@ -806,8 +817,8 @@ + handle = ext3_journal_current_handle(); + J_ASSERT(handle != 0); + } +- ret = ext3_get_block_handle(handle, inode, iblock, +- bh_result, create, 1); ++ ret = ext3_get_block_wrap(handle, inode, iblock, ++ bh_result, create, 1); + return ret; + } + +@@ -833,8 +844,8 @@ + } + } + if (ret == 0) +- ret = ext3_get_block_handle(handle, inode, iblock, +- bh_result, create, 0); ++ ret = ext3_get_block_wrap(handle, inode, iblock, ++ bh_result, create, 0); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +@@ -855,7 +866,7 @@ + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); +- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); ++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); + if (!*errp && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); +@@ -1587,7 +1598,7 @@ + * This required during truncate. We need to physically zero the tail end + * of that block so it doesn't yield old data if the file is later grown. + */ +-static int ext3_block_truncate_page(handle_t *handle, struct page *page, ++int ext3_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from) + { + unsigned long index = from >> PAGE_CACHE_SHIFT; +@@ -2083,6 +2094,9 @@ + return; + } + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_truncate(inode, page); ++ + handle = start_transaction(inode); + if (IS_ERR(handle)) { + if (page) { +@@ -2789,6 +2803,9 @@ + int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; + int ret; + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_writepage_trans_blocks(inode, bpp); ++ + if (ext3_should_journal_data(inode)) + ret = 3 * (bpp + indirects) + 2; + else +Index: linux-2.6.5-sles9/fs/ext3/Makefile +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o ++ ioctl.o namei.o super.o symlink.o hash.o extents.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.5-sles9/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:22:56.450043704 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 +@@ -389,6 +389,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { +@@ -447,6 +448,10 @@ + #endif + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + ei->vfs_inode.i_version = 1; ++ ei->i_cached_extent[0] = 0; ++ ei->i_cached_extent[1] = 0; ++ ei->i_cached_extent[2] = 0; ++ ei->i_cached_extent[3] = 0; + return &ei->vfs_inode; + } + +@@ -537,7 +542,7 @@ + Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, +- Opt_err, ++ Opt_err, Opt_extents, Opt_extdebug + }; + + static match_table_t tokens = { +@@ -582,6 +587,8 @@ + {Opt_iopen, "iopen"}, + {Opt_noiopen, "noiopen"}, + {Opt_iopen_nopriv, "iopen_nopriv"}, ++ {Opt_extents, "extents"}, ++ {Opt_extdebug, "extdebug"}, + {Opt_err, NULL} + }; + +@@ -797,6 +804,12 @@ + break; + case Opt_ignore: + break; ++ case Opt_extents: ++ set_opt (sbi->s_mount_opt, EXTENTS); ++ break; ++ case Opt_extdebug: ++ set_opt (sbi->s_mount_opt, EXTDEBUG); ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1449,6 +1462,8 @@ + percpu_counter_mod(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + ++ ext3_ext_init(sb); ++ + return 0; + + failed_mount3: +Index: linux-2.6.5-sles9/fs/ext3/ioctl.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2004-11-09 02:15:44.610693264 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2004-11-09 02:23:52.991448104 +0300 +@@ -124,6 +124,10 @@ + err = ext3_change_inode_journal_flag(inode, jflag); + return err; + } ++ case EXT3_IOC_GET_EXTENTS: ++ case EXT3_IOC_GET_TREE_STATS: ++ case EXT3_IOC_GET_TREE_DEPTH: ++ return ext3_ext_ioctl(inode, filp, cmd, arg); + case EXT3_IOC_GETVERSION: + case EXT3_IOC_GETVERSION_OLD: + return put_user(inode->i_generation, (int *) arg); +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:22:58.767691368 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 +@@ -186,6 +186,7 @@ + #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ + #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ + #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ ++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ + + #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ + #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +@@ -211,6 +212,9 @@ + #endif + #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) + #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) ++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) ++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) ++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) + + /* + * Structure of an inode on the disk +@@ -333,6 +337,8 @@ + #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ + #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ ++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ ++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -729,6 +735,7 @@ + + + /* inode.c */ ++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); + extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); +@@ -802,6 +809,14 @@ + extern struct inode_operations ext3_symlink_inode_operations; + extern struct inode_operations ext3_fast_symlink_inode_operations; + ++/* extents.c */ ++extern int ext3_ext_writepage_trans_blocks(struct inode *, int); ++extern int ext3_ext_get_block(handle_t *, struct inode *, long, ++ struct buffer_head *, int, int); ++extern void ext3_ext_truncate(struct inode *, struct page *); ++extern void ext3_ext_init(struct super_block *); ++extern void ext3_ext_release(struct super_block *); ++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); + + #endif /* __KERNEL__ */ + +Index: linux-2.6.5-sles9/include/linux/ext3_extents.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2004-11-09 02:23:21.606219384 +0300 +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++#ifndef _LINUX_EXT3_EXTENTS ++#define _LINUX_EXT3_EXTENTS ++ ++/* ++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks ++ * become very little, so index split, in-depth growing and ++ * other hard changes happens much more often ++ * this is for debug purposes only ++ */ ++#define AGRESSIVE_TEST_ ++ ++/* ++ * if CHECK_BINSEARCH defined, then results of binary search ++ * will be checked by linear search ++ */ ++#define CHECK_BINSEARCH_ ++ ++/* ++ * if EXT_DEBUG is defined you can use 'extdebug' mount option ++ * to get lots of info what's going on ++ */ ++#define EXT_DEBUG_ ++#ifdef EXT_DEBUG ++#define ext_debug(tree,fmt,a...) \ ++do { \ ++ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ ++ printk(fmt, ##a); \ ++} while (0); ++#else ++#define ext_debug(tree,fmt,a...) ++#endif ++ ++/* ++ * if EXT_STATS is defined then stats numbers are collected ++ * these number will be displayed at umount time ++ */ ++#define EXT_STATS_ ++ ++ ++#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ ++ ++/* ++ * ext3_inode has i_block array (total 60 bytes) ++ * first 4 bytes are used to store: ++ * - tree depth (0 mean there is no tree yet. all extents in the inode) ++ * - number of alive extents in the inode ++ */ ++ ++/* ++ * this is extent on-disk structure ++ * it's used at the bottom of the tree ++ */ ++struct ext3_extent { ++ __u32 ee_block; /* first logical block extent covers */ ++ __u16 ee_len; /* number of blocks covered by extent */ ++ __u16 ee_start_hi; /* high 16 bits of physical block */ ++ __u32 ee_start; /* low 32 bigs of physical block */ ++}; ++ ++/* ++ * this is index on-disk structure ++ * it's used at all the levels, but the bottom ++ */ ++struct ext3_extent_idx { ++ __u32 ei_block; /* index covers logical blocks from 'block' */ ++ __u32 ei_leaf; /* pointer to the physical block of the next * ++ * level. leaf or next index could bet here */ ++ __u16 ei_leaf_hi; /* high 16 bits of physical block */ ++ __u16 ei_unused; ++}; ++ ++/* ++ * each block (leaves and indexes), even inode-stored has header ++ */ ++struct ext3_extent_header { ++ __u16 eh_magic; /* probably will support different formats */ ++ __u16 eh_entries; /* number of valid entries */ ++ __u16 eh_max; /* capacity of store in entries */ ++ __u16 eh_depth; /* has tree real underlaying blocks? */ ++ __u32 eh_generation; /* generation of the tree */ ++}; ++ ++#define EXT3_EXT_MAGIC 0xf30a ++ ++/* ++ * array of ext3_ext_path contains path to some extent ++ * creation/lookup routines use it for traversal/splitting/etc ++ * truncate uses it to simulate recursive walking ++ */ ++struct ext3_ext_path { ++ __u32 p_block; ++ __u16 p_depth; ++ struct ext3_extent *p_ext; ++ struct ext3_extent_idx *p_idx; ++ struct ext3_extent_header *p_hdr; ++ struct buffer_head *p_bh; ++}; ++ ++/* ++ * structure for external API ++ */ ++ ++/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* ++ * ext3_extents_tree is used to pass initial information ++ * to top-level extents API ++ */ ++struct ext3_extents_helpers; ++struct ext3_extents_tree { ++ struct inode *inode; /* inode which tree belongs to */ ++ void *root; /* ptr to data top of tree resides at */ ++ void *buffer; /* will be passed as arg to ^^ routines */ ++ int buffer_len; ++ void *private; ++ struct ext3_ext_cache *cex;/* last found extent */ ++ struct ext3_extents_helpers *ops; ++}; ++ ++struct ext3_extents_helpers { ++ int (*get_write_access)(handle_t *h, void *buffer); ++ int (*mark_buffer_dirty)(handle_t *h, void *buffer); ++ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); ++ int (*remove_extent_credits)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*remove_extent)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*new_block)(handle_t *, struct ext3_extents_tree *, ++ struct ext3_ext_path *, struct ext3_extent *, ++ int *); ++}; ++ ++/* ++ * to be called by ext3_ext_walk_space() ++ * negative retcode - error ++ * positive retcode - signal for ext3_ext_walk_space(), see below ++ * callback must return valid extent (passed or newly created) ++ */ ++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, ++ struct ext3_ext_path *, ++ struct ext3_extent *, int); ++ ++#define EXT_CONTINUE 0 ++#define EXT_BREAK 1 ++#define EXT_REPEAT 2 ++ ++ ++#define EXT_MAX_BLOCK 0xffffffff ++#define EXT_CACHE_MARK 0xffff ++ ++ ++#define EXT_FIRST_EXTENT(__hdr__) \ ++ ((struct ext3_extent *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_FIRST_INDEX(__hdr__) \ ++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_HAS_FREE_INDEX(__path__) \ ++ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) ++#define EXT_LAST_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) ++#define EXT_LAST_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) ++#define EXT_MAX_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_MAX_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++ ++#define EXT_ROOT_HDR(tree) \ ++ ((struct ext3_extent_header *) (tree)->root) ++#define EXT_BLOCK_HDR(bh) \ ++ ((struct ext3_extent_header *) (bh)->b_data) ++#define EXT_DEPTH(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) ++#define EXT_GENERATION(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++ ++ ++#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); ++ ++ ++/* ++ * this structure is used to gather extents from the tree via ioctl ++ */ ++struct ext3_extent_buf { ++ unsigned long start; ++ int buflen; ++ void *buffer; ++ void *cur; ++ int err; ++}; ++ ++/* ++ * this structure is used to collect stats info about the tree ++ */ ++struct ext3_extent_tree_stats { ++ int depth; ++ int extents_num; ++ int leaf_num; ++}; ++ ++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); ++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); ++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); ++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); ++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); ++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); ++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); ++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); ++ ++static inline void ++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) ++{ ++ if (tree->cex) ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; ++} ++ ++ ++#endif /* _LINUX_EXT3_EXTENTS */ ++ +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:22:55.780145544 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 +@@ -128,6 +128,8 @@ + */ + struct semaphore truncate_sem; + struct inode vfs_inode; ++ ++ __u32 i_cached_extent[4]; + }; + + #endif /* _LINUX_EXT3_FS_I */ + +%diffstat + fs/ext3/Makefile | 2 + fs/ext3/extents.c | 2313 +++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/ialloc.c | 4 + fs/ext3/inode.c | 29 + fs/ext3/ioctl.c | 4 + fs/ext3/super.c | 17 + include/linux/ext3_extents.h | 252 ++++ + include/linux/ext3_fs.h | 15 + include/linux/ext3_fs_i.h | 2 + 9 files changed, 2630 insertions(+), 8 deletions(-) + diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch new file mode 100644 index 0000000..2408cc7 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -0,0 +1,1738 @@ +Index: linux-2.6.5-sles9/fs/ext3/mballoc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300 +@@ -0,0 +1,1428 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++ ++/* ++ * mballoc.c contains the multiblocks allocation routines ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * TODO: ++ * - do not scan from the beginning, try to remember first free block ++ * - mb_mark_used_* may allocate chunk right after splitting buddy ++ * - special flag to advice allocator to look for requested + N blocks ++ * this may improve interaction between extents and mballoc ++ */ ++ ++/* ++ * with AGRESSIVE_CHECK allocator runs consistency checks over ++ * structures. this checks slow things down a lot ++ */ ++#define AGGRESSIVE_CHECK__ ++ ++/* ++ */ ++#define MB_DEBUG__ ++#ifdef MB_DEBUG ++#define mb_debug(fmt,a...) printk(fmt, ##a) ++#else ++#define mb_debug(fmt,a...) ++#endif ++ ++/* ++ * where to save buddies structures beetween umount/mount (clean case only) ++ */ ++#define EXT3_BUDDY_FILE ".buddy" ++ ++/* ++ * max. number of chunks to be tracked in ext3_free_extent struct ++ */ ++#define MB_ARR_SIZE 32 ++ ++struct ext3_allocation_context { ++ struct super_block *ac_sb; ++ ++ /* search goals */ ++ int ac_g_group; ++ int ac_g_start; ++ int ac_g_len; ++ int ac_g_flags; ++ ++ /* the best found extent */ ++ int ac_b_group; ++ int ac_b_start; ++ int ac_b_len; ++ ++ /* number of iterations done. we have to track to limit searching */ ++ int ac_repeats; ++ int ac_groups_scanned; ++ int ac_status; ++}; ++ ++#define AC_STATUS_CONTINUE 1 ++#define AC_STATUS_FOUND 2 ++ ++ ++struct ext3_buddy { ++ void *bd_bitmap; ++ void *bd_buddy; ++ int bd_blkbits; ++ struct buffer_head *bd_bh; ++ struct buffer_head *bd_bh2; ++ struct ext3_buddy_group_blocks *bd_bd; ++ struct super_block *bd_sb; ++}; ++ ++struct ext3_free_extent { ++ int fe_start; ++ int fe_len; ++ unsigned char fe_orders[MB_ARR_SIZE]; ++ unsigned char fe_nums; ++ unsigned char fe_back; ++}; ++ ++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) ++ ++ ++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); ++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); ++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long); ++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); ++int ext3_mb_reserve_blocks(struct super_block *, int); ++void ext3_mb_release_blocks(struct super_block *, int); ++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); ++void ext3_mb_free_committed_blocks(struct super_block *); ++ ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ if ((unsigned) addr & 1) { \ ++ bit += 8; \ ++ addr--; \ ++ } \ ++ if ((unsigned) addr & 2) { \ ++ bit += 16; \ ++ addr--; \ ++ addr--; \ ++ } \ ++} ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ set_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ clear_bit(bit, addr); ++} ++ ++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) ++{ ++ int i = 1; ++ void *bb; ++ ++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); ++ J_ASSERT(max != NULL); ++ ++ if (order > e3b->bd_blkbits + 1) ++ return NULL; ++ ++ /* at order 0 we see each particular block */ ++ *max = 1 << (e3b->bd_blkbits + 3); ++ if (order == 0) ++ return e3b->bd_bitmap; ++ ++ bb = e3b->bd_buddy; ++ *max = *max >> 1; ++ while (i < order) { ++ bb += 1 << (e3b->bd_blkbits - i); ++ i++; ++ *max = *max >> 1; ++ } ++ return bb; ++} ++ ++static int ext3_mb_load_desc(struct super_block *sb, int group, ++ struct ext3_buddy *e3b) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap); ++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy); ++ ++ /* load bitmap */ ++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap); ++ if (e3b->bd_bh == NULL) { ++ ext3_error(sb, "ext3_mb_load_desc", ++ "can't get block for buddy bitmap\n"); ++ goto out; ++ } ++ if (!buffer_uptodate(e3b->bd_bh)) { ++ ll_rw_block(READ, 1, &e3b->bd_bh); ++ wait_on_buffer(e3b->bd_bh); ++ } ++ J_ASSERT(buffer_uptodate(e3b->bd_bh)); ++ ++ /* load buddy */ ++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy); ++ if (e3b->bd_bh2 == NULL) { ++ ext3_error(sb, "ext3_mb_load_desc", ++ "can't get block for buddy bitmap\n"); ++ goto out; ++ } ++ if (!buffer_uptodate(e3b->bd_bh2)) { ++ ll_rw_block(READ, 1, &e3b->bd_bh2); ++ wait_on_buffer(e3b->bd_bh2); ++ } ++ J_ASSERT(buffer_uptodate(e3b->bd_bh2)); ++ ++ e3b->bd_bitmap = e3b->bd_bh->b_data; ++ e3b->bd_buddy = e3b->bd_bh2->b_data; ++ e3b->bd_blkbits = sb->s_blocksize_bits; ++ e3b->bd_bd = sbi->s_buddy_blocks + group; ++ e3b->bd_sb = sb; ++ ++ return 0; ++out: ++ brelse(e3b->bd_bh); ++ brelse(e3b->bd_bh2); ++ e3b->bd_bh = NULL; ++ e3b->bd_bh2 = NULL; ++ return -EIO; ++} ++ ++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b) ++{ ++ mark_buffer_dirty(e3b->bd_bh); ++ mark_buffer_dirty(e3b->bd_bh2); ++} ++ ++static void ext3_mb_release_desc(struct ext3_buddy *e3b) ++{ ++ brelse(e3b->bd_bh); ++ brelse(e3b->bd_bh2); ++} ++ ++#ifdef AGGRESSIVE_CHECK ++static void mb_check_buddy(struct ext3_buddy *e3b) ++{ ++ int order = e3b->bd_blkbits + 1; ++ int max, max2, i, j, k, count; ++ void *buddy, *buddy2; ++ ++ if (!test_opt(e3b->bd_sb, MBALLOC)) ++ return; ++ ++ while (order > 1) { ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ buddy2 = mb_find_buddy(e3b, order - 1, &max2); ++ J_ASSERT(buddy2); ++ J_ASSERT(buddy != buddy2); ++ J_ASSERT(max * 2 == max2); ++ ++ count = 0; ++ for (i = 0; i < max; i++) { ++ ++ if (!mb_test_bit(i, buddy)) { ++ /* only single bit in buddy2 may be 1 */ ++ if (mb_test_bit(i << 1, buddy2)) ++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2)); ++ else if (mb_test_bit((i << 1) + 1, buddy2)) ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ continue; ++ } ++ ++ /* both bits in buddy2 must be 0 */ ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2)); ++ ++ for (j = 0; j < (1 << order); j++) { ++ k = (i * (1 << order)) + j; ++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap)); ++ } ++ count++; ++ } ++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count); ++ order--; ++ } ++ ++ buddy = mb_find_buddy(e3b, 0, &max); ++ for (i = 0; i < max; i++) { ++ if (mb_test_bit(i, buddy)) ++ continue; ++ /* check used bits only */ ++ for (j = 0; j < e3b->bd_blkbits + 1; j++) { ++ buddy2 = mb_find_buddy(e3b, j, &max2); ++ k = i >> j; ++ J_ASSERT(k < max2); ++ J_ASSERT(!mb_test_bit(k, buddy2)); ++ } ++ } ++} ++#else ++#define mb_check_buddy(e3b) ++#endif ++ ++static inline void ++ext3_lock_group(struct super_block *sb, int group) ++{ ++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); ++} ++ ++static inline void ++ext3_unlock_group(struct super_block *sb, int group) ++{ ++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); ++} ++ ++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) ++{ ++ int order = 1; ++ void *bb; ++ ++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); ++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); ++ ++ bb = e3b->bd_buddy; ++ while (order <= e3b->bd_blkbits + 1) { ++ block = block >> 1; ++ if (mb_test_bit(block, bb)) { ++ /* this block is part of buddy of order 'order' */ ++ return order; ++ } ++ bb += 1 << (e3b->bd_blkbits - order); ++ order++; ++ } ++ return 0; ++} ++ ++static inline void mb_clear_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0; ++ cur += 32; ++ continue; ++ } ++ mb_clear_bit(cur, bm); ++ cur++; ++ } ++} ++ ++static inline void mb_set_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0xffffffff; ++ cur += 32; ++ continue; ++ } ++ mb_set_bit(cur, bm); ++ cur++; ++ } ++} ++ ++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) ++{ ++ int block, max, order; ++ void *buddy, *buddy2; ++ ++ mb_check_buddy(e3b); ++ while (count-- > 0) { ++ block = first++; ++ order = 0; ++ ++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap)); ++ mb_set_bit(block, e3b->bd_bitmap); ++ e3b->bd_bd->bb_counters[order]++; ++ ++ /* start of the buddy */ ++ buddy = mb_find_buddy(e3b, order, &max); ++ ++ do { ++ block &= ~1UL; ++ if (!mb_test_bit(block, buddy) || ++ !mb_test_bit(block + 1, buddy)) ++ break; ++ ++ /* both the buddies are free, try to coalesce them */ ++ buddy2 = mb_find_buddy(e3b, order + 1, &max); ++ ++ if (!buddy2) ++ break; ++ ++ if (order > 0) { ++ /* for special purposes, we don't clear ++ * free bits in bitmap */ ++ mb_clear_bit(block, buddy); ++ mb_clear_bit(block + 1, buddy); ++ } ++ e3b->bd_bd->bb_counters[order]--; ++ e3b->bd_bd->bb_counters[order]--; ++ ++ block = block >> 1; ++ order++; ++ e3b->bd_bd->bb_counters[order]++; ++ ++ mb_set_bit(block, buddy2); ++ buddy = buddy2; ++ } while (1); ++ } ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++/* ++ * returns 1 if out extent is enough to fill needed space ++ */ ++int mb_make_backward_extent(struct ext3_free_extent *in, ++ struct ext3_free_extent *out, int needed) ++{ ++ int i; ++ ++ J_ASSERT(in); ++ J_ASSERT(out); ++ J_ASSERT(in->fe_nums < MB_ARR_SIZE); ++ ++ out->fe_len = 0; ++ out->fe_start = in->fe_start + in->fe_len; ++ out->fe_nums = 0; ++ ++ /* for single-chunk extent we need not back order ++ * also, if an extent doesn't fill needed space ++ * then it makes no sense to try back order becase ++ * if we select this extent then it'll be use as is */ ++ if (in->fe_nums < 2 || in->fe_len < needed) ++ return 0; ++ ++ i = in->fe_nums - 1; ++ while (i >= 0 && out->fe_len < needed) { ++ out->fe_len += (1 << in->fe_orders[i]); ++ out->fe_start -= (1 << in->fe_orders[i]); ++ i--; ++ } ++ /* FIXME: in some situation fe_orders may be too small to hold ++ * all the buddies */ ++ J_ASSERT(out->fe_len >= needed); ++ ++ for (i++; i < in->fe_nums; i++) ++ out->fe_orders[out->fe_nums++] = in->fe_orders[i]; ++ J_ASSERT(out->fe_nums < MB_ARR_SIZE); ++ out->fe_back = 1; ++ ++ return 1; ++} ++ ++int mb_find_extent(struct ext3_buddy *e3b, int order, int block, ++ int needed, struct ext3_free_extent *ex) ++{ ++ int space = needed; ++ int next, max, ord; ++ void *buddy; ++ ++ J_ASSERT(ex != NULL); ++ ++ ex->fe_nums = 0; ++ ex->fe_len = 0; ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ J_ASSERT(block < max); ++ if (!mb_test_bit(block, buddy)) ++ goto nofree; ++ ++ if (order == 0) { ++ /* find actual order */ ++ order = mb_find_order_for_block(e3b, block); ++ block = block >> order; ++ } ++ ++ ex->fe_orders[ex->fe_nums++] = order; ++ ex->fe_len = 1 << order; ++ ex->fe_start = block << order; ++ ex->fe_back = 0; ++ ++ while ((space = space - (1 << order)) > 0) { ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ ++ if (block + 1 >= max) ++ break; ++ ++ next = (block + 1) * (1 << order); ++ if (!mb_test_bit(next, e3b->bd_bitmap)) ++ break; ++ ++ ord = mb_find_order_for_block(e3b, next); ++ ++ if ((1 << ord) >= needed) { ++ /* we dont want to coalesce with self-enough buddies */ ++ break; ++ } ++ order = ord; ++ block = next >> order; ++ ex->fe_len += 1 << order; ++ ++ if (ex->fe_nums < MB_ARR_SIZE) ++ ex->fe_orders[ex->fe_nums++] = order; ++ } ++ ++nofree: ++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); ++ return ex->fe_len; ++} ++ ++static int mb_mark_used_backward(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int start = ex->fe_start, len0 = len; ++ int ord, mlen, max, cur; ++ void *buddy; ++ ++ start = ex->fe_start + ex->fe_len - 1; ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) && ++ len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ J_ASSERT((start >> ord) < max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ start -= mlen; ++ len -= mlen; ++ J_ASSERT(len >= 0); ++ J_ASSERT(start >= 0); ++ continue; ++ } ++ ++ /* we have to split large buddy */ ++ J_ASSERT(ord > 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); ++ e3b->bd_bd->bb_counters[ord]++; ++ e3b->bd_bd->bb_counters[ord]++; ++ } ++ ++ /* now drop all the bits in bitmap */ ++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0); ++ ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++static int mb_mark_used_forward(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int start = ex->fe_start, len0 = len; ++ int ord, mlen, max, cur; ++ void *buddy; ++ ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ ++ if (((start >> ord) << ord) == start && len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ J_ASSERT((start >> ord) < max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ start += mlen; ++ len -= mlen; ++ J_ASSERT(len >= 0); ++ continue; ++ } ++ ++ /* we have to split large buddy */ ++ J_ASSERT(ord > 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); ++ e3b->bd_bd->bb_counters[ord]++; ++ e3b->bd_bd->bb_counters[ord]++; ++ } ++ ++ /* now drop all the bits in bitmap */ ++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0); ++ ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++int inline mb_mark_used(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int err; ++ ++ J_ASSERT(ex); ++ if (ex->fe_back == 0) ++ err = mb_mark_used_forward(e3b, ex, len); ++ else ++ err = mb_mark_used_backward(e3b, ex, len); ++ return err; ++} ++ ++int ext3_mb_new_in_group(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b, int group) ++{ ++ struct super_block *sb = ac->ac_sb; ++ int err, gorder, max, i; ++ struct ext3_free_extent curex; ++ ++ /* let's know order of allocation */ ++ gorder = 0; ++ while (ac->ac_g_len > (1 << gorder)) ++ gorder++; ++ ++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { ++ /* someone asks for space at this specified block ++ * probably he wants to merge it into existing extent */ ++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) { ++ /* good. at least one block is free */ ++ max = mb_find_extent(e3b, 0, ac->ac_g_start, ++ ac->ac_g_len, &curex); ++ max = min(curex.fe_len, ac->ac_g_len); ++ mb_mark_used(e3b, &curex, max); ++ ++ ac->ac_b_group = group; ++ ac->ac_b_start = curex.fe_start; ++ ac->ac_b_len = max; ++ ac->ac_status = AC_STATUS_FOUND; ++ err = 0; ++ goto out; ++ } ++ /* don't try to find goal anymore */ ++ ac->ac_g_flags &= ~1; ++ } ++ ++ i = 0; ++ while (1) { ++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i); ++ if (i >= sb->s_blocksize * 8) ++ break; ++ ++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex); ++ if (max >= ac->ac_g_len) { ++ max = min(curex.fe_len, ac->ac_g_len); ++ mb_mark_used(e3b, &curex, max); ++ ++ ac->ac_b_group = group; ++ ac->ac_b_start = curex.fe_start; ++ ac->ac_b_len = max; ++ ac->ac_status = AC_STATUS_FOUND; ++ break; ++ } ++ i += max; ++ } ++ ++ return 0; ++ ++out: ++ return err; ++} ++ ++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr) ++{ ++ struct ext3_group_desc *gdp; ++ int free_blocks; ++ ++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL); ++ if (!gdp) ++ return 0; ++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); ++ if (free_blocks == 0) ++ return 0; ++ ++ /* someone wants this block very much */ ++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) ++ return 1; ++ ++ /* FIXME: I'd like to take fragmentation into account here */ ++ if (cr == 0) { ++ if (free_blocks >= ac->ac_g_len >> 1) ++ return 1; ++ } else if (cr == 1) { ++ if (free_blocks >= ac->ac_g_len >> 2) ++ return 1; ++ } else if (cr == 2) { ++ return 1; ++ } else { ++ BUG(); ++ } ++ return 0; ++} ++ ++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, ++ unsigned long goal, int *len, int flags, int *errp) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_allocation_context ac; ++ int i, group, block, cr, err = 0; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ struct buffer_head *gdp_bh; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ ++ J_ASSERT(len != NULL); ++ J_ASSERT(*len > 0); ++ ++ sb = inode->i_sb; ++ if (!sb) { ++ printk("ext3_mb_new_nblocks: nonexistent device"); ++ return 0; ++ } ++ ++ if (!test_opt(sb, MBALLOC)) { ++ static int ext3_mballoc_warning = 0; ++ if (ext3_mballoc_warning == 0) { ++ printk(KERN_ERR "EXT3-fs: multiblock request with " ++ "mballoc disabled!\n"); ++ ext3_mballoc_warning++; ++ } ++ *len = 1; ++ err = ext3_new_block_old(handle, inode, goal, errp); ++ return err; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ ++ if (!(flags & 2)) { ++ /* someone asks for non-reserved blocks */ ++ BUG_ON(*len > 1); ++ err = ext3_mb_reserve_blocks(sb, 1); ++ if (err) { ++ *errp = err; ++ return 0; ++ } ++ } ++ ++ /* ++ * Check quota for allocation of this blocks. ++ */ ++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) ++ *len -= 1; ++ if (*len == 0) { ++ *errp = -EDQUOT; ++ block = 0; ++ goto out; ++ } ++ ++ /* start searching from the goal */ ++ if (goal < le32_to_cpu(es->s_first_data_block) || ++ goal >= le32_to_cpu(es->s_blocks_count)) ++ goal = le32_to_cpu(es->s_first_data_block); ++ group = (goal - le32_to_cpu(es->s_first_data_block)) / ++ EXT3_BLOCKS_PER_GROUP(sb); ++ block = ((goal - le32_to_cpu(es->s_first_data_block)) % ++ EXT3_BLOCKS_PER_GROUP(sb)); ++ ++ /* set up allocation goals */ ++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0; ++ ac.ac_status = 0; ++ ac.ac_groups_scanned = 0; ++ ac.ac_sb = inode->i_sb; ++ ac.ac_g_group = group; ++ ac.ac_g_start = block; ++ ac.ac_g_len = *len; ++ ac.ac_g_flags = flags; ++ ++ /* loop over the groups */ ++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) { ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { ++ if (group == EXT3_SB(sb)->s_groups_count) ++ group = 0; ++ ++ /* check is group good for our criteries */ ++ if (!mb_good_group(&ac, group, cr)) ++ continue; ++ ++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b); ++ if (err) ++ goto out_err; ++ ++ ext3_lock_group(sb, group); ++ if (!mb_good_group(&ac, group, cr)) { ++ /* someone did allocation from this group */ ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ continue; ++ } ++ ++ err = ext3_mb_new_in_group(&ac, &e3b, group); ++ ext3_unlock_group(sb, group); ++ if (ac.ac_status == AC_STATUS_FOUND) ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ if (err) ++ goto out_err; ++ if (ac.ac_status == AC_STATUS_FOUND) ++ break; ++ } ++ } ++ ++ if (ac.ac_status != AC_STATUS_FOUND) { ++ /* unfortunately, we can't satisfy this request */ ++ J_ASSERT(ac.ac_b_len == 0); ++ DQUOT_FREE_BLOCK(inode, *len); ++ *errp = -ENOSPC; ++ block = 0; ++ goto out; ++ } ++ ++ /* good news - free block(s) have been found. now it's time ++ * to mark block(s) in good old journaled bitmap */ ++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block); ++ ++ /* we made a desicion, now mark found blocks in good old ++ * bitmap to be journaled */ ++ ++ ext3_debug("using block group %d(%d)\n", ++ ac.ac_b_group.group, gdp->bg_free_blocks_count); ++ ++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group); ++ if (!bitmap_bh) { ++ *errp = -EIO; ++ goto out_err; ++ } ++ ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) { ++ *errp = err; ++ goto out_err; ++ } ++ ++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh); ++ if (!gdp) { ++ *errp = -EIO; ++ goto out_err; ++ } ++ ++ err = ext3_journal_get_write_access(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + le32_to_cpu(es->s_first_data_block); ++ ++ if (block == le32_to_cpu(gdp->bg_block_bitmap) || ++ block == le32_to_cpu(gdp->bg_inode_bitmap) || ++ in_range(block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error(sb, "ext3_new_block", ++ "Allocating block in system zone - " ++ "block = %u", block); ++#if 0 ++ for (i = 0; i < ac.ac_b_len; i++) ++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); ++#endif ++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); ++ ++ ext3_lock_group(sb, ac.ac_b_group); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - ++ ac.ac_b_len); ++ ext3_unlock_group(sb, ac.ac_b_group); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len); ++ ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ if (err) ++ goto out_err; ++ err = ext3_journal_dirty_metadata(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ sb->s_dirt = 1; ++ *errp = 0; ++ brelse(bitmap_bh); ++ ++ /* drop non-allocated, but dquote'd blocks */ ++ J_ASSERT(*len >= ac.ac_b_len); ++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len); ++ ++ *len = ac.ac_b_len; ++ J_ASSERT(block != 0); ++ goto out; ++ ++out_err: ++ /* if we've already allocated something, roll it back */ ++ if (ac.ac_status == AC_STATUS_FOUND) { ++ /* FIXME: free blocks here */ ++ } ++ ++ DQUOT_FREE_BLOCK(inode, *len); ++ brelse(bitmap_bh); ++ *errp = err; ++ block = 0; ++out: ++ if (!(flags & 2)) { ++ /* block wasn't reserved before and we reserved it ++ * at the beginning of allocation. it doesn't matter ++ * whether we allocated anything or we failed: time ++ * to release reservation. NOTE: because I expect ++ * any multiblock request from delayed allocation ++ * path only, here is single block always */ ++ ext3_mb_release_blocks(sb, 1); ++ } ++ return block; ++} ++ ++int ext3_mb_generate_buddy(struct super_block *sb, int group) ++{ ++ struct buffer_head *bh; ++ int i, err, count = 0; ++ struct ext3_buddy e3b; ++ ++ err = ext3_mb_load_desc(sb, group, &e3b); ++ if (err) ++ goto out; ++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize); ++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize); ++ ++ bh = read_block_bitmap(sb, group); ++ if (bh == NULL) { ++ err = -EIO; ++ goto out2; ++ } ++ ++ /* loop over the blocks, nad create buddies for free ones */ ++ for (i = 0; i < sb->s_blocksize * 8; i++) { ++ if (!mb_test_bit(i, (void *) bh->b_data)) { ++ mb_free_blocks(&e3b, i, 1); ++ count++; ++ } ++ } ++ brelse(bh); ++ mb_check_buddy(&e3b); ++ ext3_mb_dirty_buddy(&e3b); ++ ++out2: ++ ext3_mb_release_desc(&e3b); ++out: ++ return err; ++} ++ ++EXPORT_SYMBOL(ext3_mb_new_blocks); ++ ++#define MB_CREDITS \ ++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \ ++ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS) ++ ++int ext3_mb_init_backend(struct super_block *sb) ++{ ++ struct inode *root = sb->s_root->d_inode; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct dentry *db; ++ tid_t target; ++ int err, i; ++ ++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) * ++ sbi->s_groups_count, GFP_KERNEL); ++ if (sbi->s_buddy_blocks == NULL) { ++ printk("can't allocate mem for buddy maps\n"); ++ return -ENOMEM; ++ } ++ memset(sbi->s_buddy_blocks, 0, ++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count); ++ sbi->s_buddy = NULL; ++ ++ down(&root->i_sem); ++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root, ++ strlen(EXT3_BUDDY_FILE)); ++ if (IS_ERR(db)) { ++ err = PTR_ERR(db); ++ printk("can't lookup buddy file: %d\n", err); ++ goto out; ++ } ++ ++ if (db->d_inode != NULL) { ++ sbi->s_buddy = igrab(db->d_inode); ++ goto map; ++ } ++ ++ err = ext3_create(root, db, S_IFREG, NULL); ++ if (err) { ++ printk("error while creation buddy file: %d\n", err); ++ } else { ++ sbi->s_buddy = igrab(db->d_inode); ++ } ++ ++map: ++ for (i = 0; i < sbi->s_groups_count; i++) { ++ struct buffer_head *bh = NULL; ++ handle_t *handle; ++ ++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out2; ++ } ++ ++ /* allocate block for bitmap */ ++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err); ++ if (bh == NULL) { ++ printk("can't get block for buddy bitmap: %d\n", err); ++ goto out2; ++ } ++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr; ++ brelse(bh); ++ ++ /* allocate block for buddy */ ++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err); ++ if (bh == NULL) { ++ printk("can't get block for buddy: %d\n", err); ++ goto out2; ++ } ++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr; ++ brelse(bh); ++ ext3_journal_stop(handle); ++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock); ++ sbi->s_buddy_blocks[i].bb_md_cur = NULL; ++ sbi->s_buddy_blocks[i].bb_tid = 0; ++ } ++ ++ if (journal_start_commit(sbi->s_journal, &target)) ++ log_wait_commit(sbi->s_journal, target); ++ ++out2: ++ dput(db); ++out: ++ up(&root->i_sem); ++ return err; ++} ++ ++int ext3_mb_release(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* release freed, non-committed blocks */ ++ spin_lock(&sbi->s_md_lock); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_committed_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ ext3_mb_free_committed_blocks(sb); ++ ++ if (sbi->s_buddy_blocks) ++ kfree(sbi->s_buddy_blocks); ++ if (sbi->s_buddy) ++ iput(sbi->s_buddy); ++ if (sbi->s_blocks_reserved) ++ printk("ext3-fs: %ld blocks being reserved at umount!\n", ++ sbi->s_blocks_reserved); ++ return 0; ++} ++ ++int ext3_mb_init(struct super_block *sb) ++{ ++ struct ext3_super_block *es; ++ int i; ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* init file for buddy data */ ++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); ++ ext3_mb_init_backend(sb); ++ ++ es = EXT3_SB(sb)->s_es; ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) ++ ext3_mb_generate_buddy(sb, i); ++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock); ++ spin_lock_init(&EXT3_SB(sb)->s_md_lock); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction); ++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); ++ printk("EXT3-fs: mballoc enabled\n"); ++ return 0; ++} ++ ++void ext3_mb_free_committed_blocks(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int err, i, count = 0, count2 = 0; ++ struct ext3_free_metadata *md; ++ struct ext3_buddy e3b; ++ ++ if (list_empty(&sbi->s_committed_transaction)) ++ return; ++ ++ /* there is committed blocks to be freed yet */ ++ do { ++ /* get next array of blocks */ ++ md = NULL; ++ spin_lock(&sbi->s_md_lock); ++ if (!list_empty(&sbi->s_committed_transaction)) { ++ md = list_entry(sbi->s_committed_transaction.next, ++ struct ext3_free_metadata, list); ++ list_del(&md->list); ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ if (md == NULL) ++ break; ++ ++ mb_debug("gonna free %u blocks in group %u (0x%p):", ++ md->num, md->group, md); ++ ++ err = ext3_mb_load_desc(sb, md->group, &e3b); ++ BUG_ON(err != 0); ++ ++ /* there are blocks to put in buddy to make them really free */ ++ count += md->num; ++ count2++; ++ ext3_lock_group(sb, md->group); ++ for (i = 0; i < md->num; i++) { ++ mb_debug(" %u", md->blocks[i]); ++ mb_free_blocks(&e3b, md->blocks[i], 1); ++ } ++ mb_debug("\n"); ++ ext3_unlock_group(sb, md->group); ++ ++ kfree(md); ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ ++ } while (md); ++ mb_debug("freed %u blocks in %u structures\n", count, count2); ++} ++ ++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (sbi->s_last_transaction == handle->h_transaction->t_tid) ++ return; ++ ++ /* new transaction! time to close last one and free blocks for ++ * committed transaction. we know that only transaction can be ++ * active, so previos transaction can be being logged and we ++ * know that transaction before previous is known to be alreade ++ * logged. this means that now we may free blocks freed in all ++ * transactions before previous one. hope I'm clear enough ... */ ++ ++ spin_lock(&sbi->s_md_lock); ++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { ++ mb_debug("new transaction %lu, old %lu\n", ++ (unsigned long) handle->h_transaction->t_tid, ++ (unsigned long) sbi->s_last_transaction); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_closed_transaction); ++ sbi->s_last_transaction = handle->h_transaction->t_tid; ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ ext3_mb_free_committed_blocks(sb); ++} ++ ++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, ++ int group, int block, int count) ++{ ++ struct ext3_buddy_group_blocks *db = e3b->bd_bd; ++ struct super_block *sb = e3b->bd_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct ext3_free_metadata *md; ++ int i; ++ ++ ext3_lock_group(sb, group); ++ for (i = 0; i < count; i++) { ++ md = db->bb_md_cur; ++ if (md && db->bb_tid != handle->h_transaction->t_tid) { ++ db->bb_md_cur = NULL; ++ md = NULL; ++ } ++ ++ if (md == NULL) { ++ ext3_unlock_group(sb, group); ++ md = kmalloc(sizeof(*md), GFP_KERNEL); ++ if (md == NULL) ++ return -ENOMEM; ++ md->num = 0; ++ md->group = group; ++ ++ ext3_lock_group(sb, group); ++ if (db->bb_md_cur == NULL) { ++ spin_lock(&sbi->s_md_lock); ++ list_add(&md->list, &sbi->s_active_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ db->bb_md_cur = md; ++ db->bb_tid = handle->h_transaction->t_tid; ++ mb_debug("new md 0x%p for group %u\n", ++ md, md->group); ++ } else { ++ kfree(md); ++ md = db->bb_md_cur; ++ } ++ } ++ ++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); ++ md->blocks[md->num] = block + i; ++ md->num++; ++ if (md->num == EXT3_BB_MAX_BLOCKS) { ++ /* no more space, put full container on a sb's list */ ++ db->bb_md_cur = NULL; ++ } ++ } ++ ext3_unlock_group(sb, group); ++ return 0; ++} ++ ++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ unsigned long bit, overflow; ++ struct buffer_head *gd_bh; ++ unsigned long block_group; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ int err = 0, ret; ++ ++ sb = inode->i_sb; ++ if (!sb) { ++ printk ("ext3_free_blocks: nonexistent device"); ++ return; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ if (block < le32_to_cpu(es->s_first_data_block) || ++ block + count < block || ++ block + count > le32_to_cpu(es->s_blocks_count)) { ++ ext3_error (sb, "ext3_free_blocks", ++ "Freeing blocks not in datazone - " ++ "block = %lu, count = %lu", block, count); ++ goto error_return; ++ } ++ ++ ext3_debug("freeing block %lu\n", block); ++ ++do_more: ++ overflow = 0; ++ block_group = (block - le32_to_cpu(es->s_first_data_block)) / ++ EXT3_BLOCKS_PER_GROUP(sb); ++ bit = (block - le32_to_cpu(es->s_first_data_block)) % ++ EXT3_BLOCKS_PER_GROUP(sb); ++ /* ++ * Check to see if we are freeing blocks across a group ++ * boundary. ++ */ ++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { ++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); ++ count -= overflow; ++ } ++ brelse(bitmap_bh); ++ bitmap_bh = read_block_bitmap(sb, block_group); ++ if (!bitmap_bh) ++ goto error_return; ++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); ++ if (!gdp) ++ goto error_return; ++ ++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || ++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || ++ in_range (block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group) || ++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error (sb, "ext3_free_blocks", ++ "Freeing blocks in system zones - " ++ "Block = %lu, count = %lu", ++ block, count); ++ ++ BUFFER_TRACE(bitmap_bh, "getting write access"); ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) ++ goto error_return; ++ ++ /* ++ * We are about to modify some metadata. Call the journal APIs ++ * to unshare ->b_data if a currently-committing transaction is ++ * using it ++ */ ++ BUFFER_TRACE(gd_bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, gd_bh); ++ if (err) ++ goto error_return; ++ ++ err = ext3_mb_load_desc(sb, block_group, &e3b); ++ if (err) ++ goto error_return; ++ ++ if (metadata) { ++ /* blocks being freed are metadata. these blocks shouldn't ++ * be used until this transaction is committed */ ++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); ++ } else { ++ ext3_lock_group(sb, block_group); ++ mb_free_blocks(&e3b, bit, count); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ ext3_unlock_group(sb, block_group); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, count); ++ } ++ ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ ++ /* FIXME: undo logic will be implemented later and another way */ ++ mb_clear_bits(bitmap_bh->b_data, bit, count); ++ DQUOT_FREE_BLOCK(inode, count); ++ ++ /* We dirtied the bitmap block */ ++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ ++ /* And the group descriptor block */ ++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); ++ ret = ext3_journal_dirty_metadata(handle, gd_bh); ++ if (!err) err = ret; ++ ++ if (overflow && !err) { ++ block += count; ++ count = overflow; ++ goto do_more; ++ } ++ sb->s_dirt = 1; ++error_return: ++ brelse(bitmap_bh); ++ ext3_std_error(sb, err); ++ return; ++} ++ ++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int free, ret = -ENOSPC; ++ ++ BUG_ON(blocks < 0); ++ spin_lock(&sbi->s_reserve_lock); ++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); ++ if (blocks <= free - sbi->s_blocks_reserved) { ++ sbi->s_blocks_reserved += blocks; ++ ret = 0; ++ } ++ spin_unlock(&sbi->s_reserve_lock); ++ return ret; ++} ++ ++void ext3_mb_release_blocks(struct super_block *sb, int blocks) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ BUG_ON(blocks < 0); ++ spin_lock(&sbi->s_reserve_lock); ++ sbi->s_blocks_reserved -= blocks; ++ WARN_ON(sbi->s_blocks_reserved < 0); ++ if (sbi->s_blocks_reserved < 0) ++ sbi->s_blocks_reserved = 0; ++ spin_unlock(&sbi->s_reserve_lock); ++} ++ ++int ext3_new_block(handle_t *handle, struct inode *inode, ++ unsigned long goal, int *errp) ++{ ++ int ret, len; ++ ++ if (!test_opt(inode->i_sb, MBALLOC)) { ++ ret = ext3_new_block_old(handle, inode, goal, errp); ++ goto out; ++ } ++ len = 1; ++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); ++out: ++ return ret; ++} ++ ++ ++void ext3_free_blocks(handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ if (!test_opt(inode->i_sb, MBALLOC)) ++ ext3_free_blocks_old(handle, inode, block, count); ++ else ++ ext3_mb_free_blocks(handle, inode, block, count, metadata); ++ return; ++} ++ +Index: linux-2.6.5-sles9/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300 +@@ -389,6 +389,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_mb_release(sb); + ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); +@@ -542,7 +543,7 @@ + Opt_commit, Opt_journal_update, Opt_journal_inum, + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, +- Opt_err, Opt_extents, Opt_extdebug ++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc, + }; + + static match_table_t tokens = { +@@ -589,6 +590,7 @@ + {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_extents, "extents"}, + {Opt_extdebug, "extdebug"}, ++ {Opt_mballoc, "mballoc"}, + {Opt_err, NULL} + }; + +@@ -810,6 +812,9 @@ + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt (sbi->s_mount_opt, MBALLOC); ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1463,7 +1468,8 @@ + ext3_count_dirs(sb)); + + ext3_ext_init(sb); +- ++ ext3_mb_init(sb); ++ + return 0; + + failed_mount3: +Index: linux-2.6.5-sles9/fs/ext3/Makefile +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o extents.o ++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.5-sles9/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 +@@ -78,7 +78,7 @@ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -274,7 +274,7 @@ + } + + /* Free given blocks, update quota and i_blocks field */ +-void ext3_free_blocks(handle_t *handle, struct inode *inode, ++void ext3_free_blocks_old(handle_t *handle, struct inode *inode, + unsigned long block, unsigned long count) + { + struct buffer_head *bitmap_bh = NULL; +@@ -1142,7 +1142,7 @@ + * bitmap, and then for any free bit if that fails. + * This function also updates quota and i_blocks field. + */ +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) + { + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.5-sles9/fs/ext3/namei.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300 ++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300 +@@ -1640,7 +1640,7 @@ + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, ++int ext3_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) + { + handle_t *handle; +Index: linux-2.6.5-sles9/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300 +@@ -572,7 +572,7 @@ + ext3_journal_forget(handle, branch[i].bh); + } + for (i = 0; i < keys; i++) +- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); ++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); + return err; + } + +@@ -673,7 +673,7 @@ + if (err == -EAGAIN) + for (i = 0; i < num; i++) + ext3_free_blocks(handle, inode, +- le32_to_cpu(where[i].key), 1); ++ le32_to_cpu(where[i].key), 1, 1); + return err; + } + +@@ -1829,7 +1829,7 @@ + } + } + +- ext3_free_blocks(handle, inode, block_to_free, count); ++ ext3_free_blocks(handle, inode, block_to_free, count, 1); + } + + /** +@@ -2000,7 +2000,7 @@ + ext3_journal_test_restart(handle, inode); + } + +- ext3_free_blocks(handle, inode, nr, 1); ++ ext3_free_blocks(handle, inode, nr, 1, 1); + + if (parent_bh) { + /* +Index: linux-2.6.5-sles9/fs/ext3/extents.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300 +@@ -740,7 +740,7 @@ + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1391,7 +1391,7 @@ + path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); +- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); + return err; + } + +@@ -1879,10 +1879,12 @@ + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); ++ if (S_ISDIR(tree->inode->i_mode)) ++ metadata = 1; + if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { + /* tail removal */ + unsigned long num, start; +@@ -1894,7 +1896,7 @@ + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); + } +- ext3_free_blocks(handle, tree->inode, start, num); ++ ext3_free_blocks(handle, tree->inode, start, num, metadata); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, ex->ee_block, ex->ee_len); +Index: linux-2.6.5-sles9/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300 +@@ -1366,7 +1366,7 @@ + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +@@ -1408,7 +1408,7 @@ + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { + /* Free the old block. */ + ea_bdebug(old_bh, "freeing"); +- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); + + /* ext3_forget() calls bforget() for us, but we + let our caller release old_bh, so we need to +@@ -1504,7 +1504,7 @@ + lock_buffer(bh); + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + ext3_xattr_cache_remove(bh); +- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); ++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); + } else { +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300 +@@ -57,6 +57,8 @@ + #define ext3_debug(f, a...) do {} while (0) + #endif + ++#define EXT3_MULTIBLOCK_ALLOCATOR 1 ++ + /* + * Special inodes numbers + */ +@@ -339,6 +341,7 @@ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -698,7 +701,7 @@ + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); + extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, +- unsigned long); ++ unsigned long, int); + extern unsigned long ext3_count_free_blocks (struct super_block *); + extern void ext3_check_blocks_bitmap (struct super_block *); + extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 +@@ -23,10 +23,30 @@ + #define EXT_INCLUDE + #include + #include ++#include + #endif + #endif + #include + ++#define EXT3_BB_MAX_BLOCKS 30 ++struct ext3_free_metadata { ++ unsigned short group; ++ unsigned short num; ++ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; ++ struct list_head list; ++}; ++ ++#define EXT3_BB_MAX_ORDER 14 ++ ++struct ext3_buddy_group_blocks { ++ sector_t bb_bitmap; ++ sector_t bb_buddy; ++ spinlock_t bb_lock; ++ unsigned bb_counters[EXT3_BB_MAX_ORDER]; ++ struct ext3_free_metadata *bb_md_cur; ++ unsigned long bb_tid; ++}; ++ + /* + * third extended-fs super-block data in memory + */ +@@ -78,6 +98,17 @@ + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ + #endif ++ ++ /* for buddy allocator */ ++ struct ext3_buddy_group_blocks *s_buddy_blocks; ++ struct inode *s_buddy; ++ long s_blocks_reserved; ++ spinlock_t s_reserve_lock; ++ struct list_head s_active_transaction; ++ struct list_head s_closed_transaction; ++ struct list_head s_committed_transaction; ++ spinlock_t s_md_lock; ++ tid_t s_last_transaction; + }; + + #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch new file mode 100644 index 0000000..9d782c4 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch @@ -0,0 +1,1750 @@ +Index: linux-2.6.7/fs/ext3/mballoc.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.7/fs/ext3/mballoc.c 2004-09-06 12:51:42.000000000 +0400 +@@ -0,0 +1,1428 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++ ++/* ++ * mballoc.c contains the multiblocks allocation routines ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * TODO: ++ * - do not scan from the beginning, try to remember first free block ++ * - mb_mark_used_* may allocate chunk right after splitting buddy ++ * - special flag to advice allocator to look for requested + N blocks ++ * this may improve interaction between extents and mballoc ++ */ ++ ++/* ++ * with AGRESSIVE_CHECK allocator runs consistency checks over ++ * structures. this checks slow things down a lot ++ */ ++#define AGGRESSIVE_CHECK__ ++ ++/* ++ */ ++#define MB_DEBUG__ ++#ifdef MB_DEBUG ++#define mb_debug(fmt,a...) printk(fmt, ##a) ++#else ++#define mb_debug(fmt,a...) ++#endif ++ ++/* ++ * where to save buddies structures beetween umount/mount (clean case only) ++ */ ++#define EXT3_BUDDY_FILE ".buddy" ++ ++/* ++ * max. number of chunks to be tracked in ext3_free_extent struct ++ */ ++#define MB_ARR_SIZE 32 ++ ++struct ext3_allocation_context { ++ struct super_block *ac_sb; ++ ++ /* search goals */ ++ int ac_g_group; ++ int ac_g_start; ++ int ac_g_len; ++ int ac_g_flags; ++ ++ /* the best found extent */ ++ int ac_b_group; ++ int ac_b_start; ++ int ac_b_len; ++ ++ /* number of iterations done. we have to track to limit searching */ ++ int ac_repeats; ++ int ac_groups_scanned; ++ int ac_status; ++}; ++ ++#define AC_STATUS_CONTINUE 1 ++#define AC_STATUS_FOUND 2 ++ ++ ++struct ext3_buddy { ++ void *bd_bitmap; ++ void *bd_buddy; ++ int bd_blkbits; ++ struct buffer_head *bd_bh; ++ struct buffer_head *bd_bh2; ++ struct ext3_buddy_group_blocks *bd_bd; ++ struct super_block *bd_sb; ++}; ++ ++struct ext3_free_extent { ++ int fe_start; ++ int fe_len; ++ unsigned char fe_orders[MB_ARR_SIZE]; ++ unsigned char fe_nums; ++ unsigned char fe_back; ++}; ++ ++#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) ++ ++ ++int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); ++struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); ++void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long); ++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *); ++int ext3_mb_reserve_blocks(struct super_block *, int); ++void ext3_mb_release_blocks(struct super_block *, int); ++void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); ++void ext3_mb_free_committed_blocks(struct super_block *); ++ ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ if ((unsigned) addr & 1) { \ ++ bit += 8; \ ++ addr--; \ ++ } \ ++ if ((unsigned) addr & 2) { \ ++ bit += 16; \ ++ addr--; \ ++ addr--; \ ++ } \ ++} ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ set_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ clear_bit(bit, addr); ++} ++ ++static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) ++{ ++ int i = 1; ++ void *bb; ++ ++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); ++ J_ASSERT(max != NULL); ++ ++ if (order > e3b->bd_blkbits + 1) ++ return NULL; ++ ++ /* at order 0 we see each particular block */ ++ *max = 1 << (e3b->bd_blkbits + 3); ++ if (order == 0) ++ return e3b->bd_bitmap; ++ ++ bb = e3b->bd_buddy; ++ *max = *max >> 1; ++ while (i < order) { ++ bb += 1 << (e3b->bd_blkbits - i); ++ i++; ++ *max = *max >> 1; ++ } ++ return bb; ++} ++ ++static int ext3_mb_load_desc(struct super_block *sb, int group, ++ struct ext3_buddy *e3b) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap); ++ J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy); ++ ++ /* load bitmap */ ++ e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap); ++ if (e3b->bd_bh == NULL) { ++ ext3_error(sb, "ext3_mb_load_desc", ++ "can't get block for buddy bitmap\n"); ++ goto out; ++ } ++ if (!buffer_uptodate(e3b->bd_bh)) { ++ ll_rw_block(READ, 1, &e3b->bd_bh); ++ wait_on_buffer(e3b->bd_bh); ++ } ++ J_ASSERT(buffer_uptodate(e3b->bd_bh)); ++ ++ /* load buddy */ ++ e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy); ++ if (e3b->bd_bh2 == NULL) { ++ ext3_error(sb, "ext3_mb_load_desc", ++ "can't get block for buddy bitmap\n"); ++ goto out; ++ } ++ if (!buffer_uptodate(e3b->bd_bh2)) { ++ ll_rw_block(READ, 1, &e3b->bd_bh2); ++ wait_on_buffer(e3b->bd_bh2); ++ } ++ J_ASSERT(buffer_uptodate(e3b->bd_bh2)); ++ ++ e3b->bd_bitmap = e3b->bd_bh->b_data; ++ e3b->bd_buddy = e3b->bd_bh2->b_data; ++ e3b->bd_blkbits = sb->s_blocksize_bits; ++ e3b->bd_bd = sbi->s_buddy_blocks + group; ++ e3b->bd_sb = sb; ++ ++ return 0; ++out: ++ brelse(e3b->bd_bh); ++ brelse(e3b->bd_bh2); ++ e3b->bd_bh = NULL; ++ e3b->bd_bh2 = NULL; ++ return -EIO; ++} ++ ++static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b) ++{ ++ mark_buffer_dirty(e3b->bd_bh); ++ mark_buffer_dirty(e3b->bd_bh2); ++} ++ ++static void ext3_mb_release_desc(struct ext3_buddy *e3b) ++{ ++ brelse(e3b->bd_bh); ++ brelse(e3b->bd_bh2); ++} ++ ++#ifdef AGGRESSIVE_CHECK ++static void mb_check_buddy(struct ext3_buddy *e3b) ++{ ++ int order = e3b->bd_blkbits + 1; ++ int max, max2, i, j, k, count; ++ void *buddy, *buddy2; ++ ++ if (!test_opt(e3b->bd_sb, MBALLOC)) ++ return; ++ ++ while (order > 1) { ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ buddy2 = mb_find_buddy(e3b, order - 1, &max2); ++ J_ASSERT(buddy2); ++ J_ASSERT(buddy != buddy2); ++ J_ASSERT(max * 2 == max2); ++ ++ count = 0; ++ for (i = 0; i < max; i++) { ++ ++ if (!mb_test_bit(i, buddy)) { ++ /* only single bit in buddy2 may be 1 */ ++ if (mb_test_bit(i << 1, buddy2)) ++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2)); ++ else if (mb_test_bit((i << 1) + 1, buddy2)) ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ continue; ++ } ++ ++ /* both bits in buddy2 must be 0 */ ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2)); ++ ++ for (j = 0; j < (1 << order); j++) { ++ k = (i * (1 << order)) + j; ++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap)); ++ } ++ count++; ++ } ++ J_ASSERT(e3b->bd_bd->bb_counters[order] == count); ++ order--; ++ } ++ ++ buddy = mb_find_buddy(e3b, 0, &max); ++ for (i = 0; i < max; i++) { ++ if (mb_test_bit(i, buddy)) ++ continue; ++ /* check used bits only */ ++ for (j = 0; j < e3b->bd_blkbits + 1; j++) { ++ buddy2 = mb_find_buddy(e3b, j, &max2); ++ k = i >> j; ++ J_ASSERT(k < max2); ++ J_ASSERT(!mb_test_bit(k, buddy2)); ++ } ++ } ++} ++#else ++#define mb_check_buddy(e3b) ++#endif ++ ++static inline void ++ext3_lock_group(struct super_block *sb, int group) ++{ ++ spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); ++} ++ ++static inline void ++ext3_unlock_group(struct super_block *sb, int group) ++{ ++ spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock); ++} ++ ++static int mb_find_order_for_block(struct ext3_buddy *e3b, int block) ++{ ++ int order = 1; ++ void *bb; ++ ++ J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy); ++ J_ASSERT(block < (1 << (e3b->bd_blkbits + 3))); ++ ++ bb = e3b->bd_buddy; ++ while (order <= e3b->bd_blkbits + 1) { ++ block = block >> 1; ++ if (mb_test_bit(block, bb)) { ++ /* this block is part of buddy of order 'order' */ ++ return order; ++ } ++ bb += 1 << (e3b->bd_blkbits - order); ++ order++; ++ } ++ return 0; ++} ++ ++static inline void mb_clear_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0; ++ cur += 32; ++ continue; ++ } ++ mb_clear_bit(cur, bm); ++ cur++; ++ } ++} ++ ++static inline void mb_set_bits(void *bm, int cur, int len) ++{ ++ __u32 *addr; ++ ++ len = cur + len; ++ while (cur < len) { ++ if ((cur & 31) == 0 && (len - cur) >= 32) { ++ /* fast path: clear whole word at once */ ++ addr = bm + (cur >> 3); ++ *addr = 0xffffffff; ++ cur += 32; ++ continue; ++ } ++ mb_set_bit(cur, bm); ++ cur++; ++ } ++} ++ ++static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count) ++{ ++ int block, max, order; ++ void *buddy, *buddy2; ++ ++ mb_check_buddy(e3b); ++ while (count-- > 0) { ++ block = first++; ++ order = 0; ++ ++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap)); ++ mb_set_bit(block, e3b->bd_bitmap); ++ e3b->bd_bd->bb_counters[order]++; ++ ++ /* start of the buddy */ ++ buddy = mb_find_buddy(e3b, order, &max); ++ ++ do { ++ block &= ~1UL; ++ if (!mb_test_bit(block, buddy) || ++ !mb_test_bit(block + 1, buddy)) ++ break; ++ ++ /* both the buddies are free, try to coalesce them */ ++ buddy2 = mb_find_buddy(e3b, order + 1, &max); ++ ++ if (!buddy2) ++ break; ++ ++ if (order > 0) { ++ /* for special purposes, we don't clear ++ * free bits in bitmap */ ++ mb_clear_bit(block, buddy); ++ mb_clear_bit(block + 1, buddy); ++ } ++ e3b->bd_bd->bb_counters[order]--; ++ e3b->bd_bd->bb_counters[order]--; ++ ++ block = block >> 1; ++ order++; ++ e3b->bd_bd->bb_counters[order]++; ++ ++ mb_set_bit(block, buddy2); ++ buddy = buddy2; ++ } while (1); ++ } ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++/* ++ * returns 1 if out extent is enough to fill needed space ++ */ ++int mb_make_backward_extent(struct ext3_free_extent *in, ++ struct ext3_free_extent *out, int needed) ++{ ++ int i; ++ ++ J_ASSERT(in); ++ J_ASSERT(out); ++ J_ASSERT(in->fe_nums < MB_ARR_SIZE); ++ ++ out->fe_len = 0; ++ out->fe_start = in->fe_start + in->fe_len; ++ out->fe_nums = 0; ++ ++ /* for single-chunk extent we need not back order ++ * also, if an extent doesn't fill needed space ++ * then it makes no sense to try back order becase ++ * if we select this extent then it'll be use as is */ ++ if (in->fe_nums < 2 || in->fe_len < needed) ++ return 0; ++ ++ i = in->fe_nums - 1; ++ while (i >= 0 && out->fe_len < needed) { ++ out->fe_len += (1 << in->fe_orders[i]); ++ out->fe_start -= (1 << in->fe_orders[i]); ++ i--; ++ } ++ /* FIXME: in some situation fe_orders may be too small to hold ++ * all the buddies */ ++ J_ASSERT(out->fe_len >= needed); ++ ++ for (i++; i < in->fe_nums; i++) ++ out->fe_orders[out->fe_nums++] = in->fe_orders[i]; ++ J_ASSERT(out->fe_nums < MB_ARR_SIZE); ++ out->fe_back = 1; ++ ++ return 1; ++} ++ ++int mb_find_extent(struct ext3_buddy *e3b, int order, int block, ++ int needed, struct ext3_free_extent *ex) ++{ ++ int space = needed; ++ int next, max, ord; ++ void *buddy; ++ ++ J_ASSERT(ex != NULL); ++ ++ ex->fe_nums = 0; ++ ex->fe_len = 0; ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ J_ASSERT(block < max); ++ if (!mb_test_bit(block, buddy)) ++ goto nofree; ++ ++ if (order == 0) { ++ /* find actual order */ ++ order = mb_find_order_for_block(e3b, block); ++ block = block >> order; ++ } ++ ++ ex->fe_orders[ex->fe_nums++] = order; ++ ex->fe_len = 1 << order; ++ ex->fe_start = block << order; ++ ex->fe_back = 0; ++ ++ while ((space = space - (1 << order)) > 0) { ++ ++ buddy = mb_find_buddy(e3b, order, &max); ++ J_ASSERT(buddy); ++ ++ if (block + 1 >= max) ++ break; ++ ++ next = (block + 1) * (1 << order); ++ if (!mb_test_bit(next, e3b->bd_bitmap)) ++ break; ++ ++ ord = mb_find_order_for_block(e3b, next); ++ ++ if ((1 << ord) >= needed) { ++ /* we dont want to coalesce with self-enough buddies */ ++ break; ++ } ++ order = ord; ++ block = next >> order; ++ ex->fe_len += 1 << order; ++ ++ if (ex->fe_nums < MB_ARR_SIZE) ++ ex->fe_orders[ex->fe_nums++] = order; ++ } ++ ++nofree: ++ J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3))); ++ return ex->fe_len; ++} ++ ++static int mb_mark_used_backward(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int start = ex->fe_start, len0 = len; ++ int ord, mlen, max, cur; ++ void *buddy; ++ ++ start = ex->fe_start + ex->fe_len - 1; ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ if (((start >> ord) << ord) == (start - (1 << ord) + 1) && ++ len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ J_ASSERT((start >> ord) < max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ start -= mlen; ++ len -= mlen; ++ J_ASSERT(len >= 0); ++ J_ASSERT(start >= 0); ++ continue; ++ } ++ ++ /* we have to split large buddy */ ++ J_ASSERT(ord > 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); ++ e3b->bd_bd->bb_counters[ord]++; ++ e3b->bd_bd->bb_counters[ord]++; ++ } ++ ++ /* now drop all the bits in bitmap */ ++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0); ++ ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++static int mb_mark_used_forward(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int start = ex->fe_start, len0 = len; ++ int ord, mlen, max, cur; ++ void *buddy; ++ ++ while (len) { ++ ord = mb_find_order_for_block(e3b, start); ++ ++ if (((start >> ord) << ord) == start && len >= (1 << ord)) { ++ /* the whole chunk may be allocated at once! */ ++ mlen = 1 << ord; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ J_ASSERT((start >> ord) < max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ start += mlen; ++ len -= mlen; ++ J_ASSERT(len >= 0); ++ continue; ++ } ++ ++ /* we have to split large buddy */ ++ J_ASSERT(ord > 0); ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_clear_bit(start >> ord, buddy); ++ e3b->bd_bd->bb_counters[ord]--; ++ ++ ord--; ++ cur = (start >> ord) & ~1U; ++ buddy = mb_find_buddy(e3b, ord, &max); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); ++ e3b->bd_bd->bb_counters[ord]++; ++ e3b->bd_bd->bb_counters[ord]++; ++ } ++ ++ /* now drop all the bits in bitmap */ ++ mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0); ++ ++ mb_check_buddy(e3b); ++ ++ return 0; ++} ++ ++int inline mb_mark_used(struct ext3_buddy *e3b, ++ struct ext3_free_extent *ex, int len) ++{ ++ int err; ++ ++ J_ASSERT(ex); ++ if (ex->fe_back == 0) ++ err = mb_mark_used_forward(e3b, ex, len); ++ else ++ err = mb_mark_used_backward(e3b, ex, len); ++ return err; ++} ++ ++int ext3_mb_new_in_group(struct ext3_allocation_context *ac, ++ struct ext3_buddy *e3b, int group) ++{ ++ struct super_block *sb = ac->ac_sb; ++ int err, gorder, max, i; ++ struct ext3_free_extent curex; ++ ++ /* let's know order of allocation */ ++ gorder = 0; ++ while (ac->ac_g_len > (1 << gorder)) ++ gorder++; ++ ++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { ++ /* someone asks for space at this specified block ++ * probably he wants to merge it into existing extent */ ++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) { ++ /* good. at least one block is free */ ++ max = mb_find_extent(e3b, 0, ac->ac_g_start, ++ ac->ac_g_len, &curex); ++ max = min(curex.fe_len, ac->ac_g_len); ++ mb_mark_used(e3b, &curex, max); ++ ++ ac->ac_b_group = group; ++ ac->ac_b_start = curex.fe_start; ++ ac->ac_b_len = max; ++ ac->ac_status = AC_STATUS_FOUND; ++ err = 0; ++ goto out; ++ } ++ /* don't try to find goal anymore */ ++ ac->ac_g_flags &= ~1; ++ } ++ ++ i = 0; ++ while (1) { ++ i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i); ++ if (i >= sb->s_blocksize * 8) ++ break; ++ ++ max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex); ++ if (max >= ac->ac_g_len) { ++ max = min(curex.fe_len, ac->ac_g_len); ++ mb_mark_used(e3b, &curex, max); ++ ++ ac->ac_b_group = group; ++ ac->ac_b_start = curex.fe_start; ++ ac->ac_b_len = max; ++ ac->ac_status = AC_STATUS_FOUND; ++ break; ++ } ++ i += max; ++ } ++ ++ return 0; ++ ++out: ++ return err; ++} ++ ++int mb_good_group(struct ext3_allocation_context *ac, int group, int cr) ++{ ++ struct ext3_group_desc *gdp; ++ int free_blocks; ++ ++ gdp = ext3_get_group_desc(ac->ac_sb, group, NULL); ++ if (!gdp) ++ return 0; ++ free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); ++ if (free_blocks == 0) ++ return 0; ++ ++ /* someone wants this block very much */ ++ if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) ++ return 1; ++ ++ /* FIXME: I'd like to take fragmentation into account here */ ++ if (cr == 0) { ++ if (free_blocks >= ac->ac_g_len >> 1) ++ return 1; ++ } else if (cr == 1) { ++ if (free_blocks >= ac->ac_g_len >> 2) ++ return 1; ++ } else if (cr == 2) { ++ return 1; ++ } else { ++ BUG(); ++ } ++ return 0; ++} ++ ++int ext3_mb_new_blocks(handle_t *handle, struct inode *inode, ++ unsigned long goal, int *len, int flags, int *errp) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_allocation_context ac; ++ int i, group, block, cr, err = 0; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ struct buffer_head *gdp_bh; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ ++ J_ASSERT(len != NULL); ++ J_ASSERT(*len > 0); ++ ++ sb = inode->i_sb; ++ if (!sb) { ++ printk("ext3_mb_new_nblocks: nonexistent device"); ++ return 0; ++ } ++ ++ if (!test_opt(sb, MBALLOC)) { ++ static int ext3_mballoc_warning = 0; ++ if (ext3_mballoc_warning == 0) { ++ printk(KERN_ERR "EXT3-fs: multiblock request with " ++ "mballoc disabled!\n"); ++ ext3_mballoc_warning++; ++ } ++ *len = 1; ++ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp); ++ return err; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ ++ if (!(flags & 2)) { ++ /* someone asks for non-reserved blocks */ ++ BUG_ON(*len > 1); ++ err = ext3_mb_reserve_blocks(sb, 1); ++ if (err) { ++ *errp = err; ++ return 0; ++ } ++ } ++ ++ /* ++ * Check quota for allocation of this blocks. ++ */ ++ while (*len && DQUOT_ALLOC_BLOCK(inode, *len)) ++ *len -= 1; ++ if (*len == 0) { ++ *errp = -EDQUOT; ++ block = 0; ++ goto out; ++ } ++ ++ /* start searching from the goal */ ++ if (goal < le32_to_cpu(es->s_first_data_block) || ++ goal >= le32_to_cpu(es->s_blocks_count)) ++ goal = le32_to_cpu(es->s_first_data_block); ++ group = (goal - le32_to_cpu(es->s_first_data_block)) / ++ EXT3_BLOCKS_PER_GROUP(sb); ++ block = ((goal - le32_to_cpu(es->s_first_data_block)) % ++ EXT3_BLOCKS_PER_GROUP(sb)); ++ ++ /* set up allocation goals */ ++ ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0; ++ ac.ac_status = 0; ++ ac.ac_groups_scanned = 0; ++ ac.ac_sb = inode->i_sb; ++ ac.ac_g_group = group; ++ ac.ac_g_start = block; ++ ac.ac_g_len = *len; ++ ac.ac_g_flags = flags; ++ ++ /* loop over the groups */ ++ for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) { ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) { ++ if (group == EXT3_SB(sb)->s_groups_count) ++ group = 0; ++ ++ /* check is group good for our criteries */ ++ if (!mb_good_group(&ac, group, cr)) ++ continue; ++ ++ err = ext3_mb_load_desc(ac.ac_sb, group, &e3b); ++ if (err) ++ goto out_err; ++ ++ ext3_lock_group(sb, group); ++ if (!mb_good_group(&ac, group, cr)) { ++ /* someone did allocation from this group */ ++ ext3_unlock_group(sb, group); ++ ext3_mb_release_desc(&e3b); ++ continue; ++ } ++ ++ err = ext3_mb_new_in_group(&ac, &e3b, group); ++ ext3_unlock_group(sb, group); ++ if (ac.ac_status == AC_STATUS_FOUND) ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ if (err) ++ goto out_err; ++ if (ac.ac_status == AC_STATUS_FOUND) ++ break; ++ } ++ } ++ ++ if (ac.ac_status != AC_STATUS_FOUND) { ++ /* unfortunately, we can't satisfy this request */ ++ J_ASSERT(ac.ac_b_len == 0); ++ DQUOT_FREE_BLOCK(inode, *len); ++ *errp = -ENOSPC; ++ block = 0; ++ goto out; ++ } ++ ++ /* good news - free block(s) have been found. now it's time ++ * to mark block(s) in good old journaled bitmap */ ++ block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + ac.ac_b_start + le32_to_cpu(es->s_first_data_block); ++ ++ /* we made a desicion, now mark found blocks in good old ++ * bitmap to be journaled */ ++ ++ ext3_debug("using block group %d(%d)\n", ++ ac.ac_b_group.group, gdp->bg_free_blocks_count); ++ ++ bitmap_bh = read_block_bitmap(sb, ac.ac_b_group); ++ if (!bitmap_bh) { ++ *errp = -EIO; ++ goto out_err; ++ } ++ ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) { ++ *errp = err; ++ goto out_err; ++ } ++ ++ gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh); ++ if (!gdp) { ++ *errp = -EIO; ++ goto out_err; ++ } ++ ++ err = ext3_journal_get_write_access(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb) ++ + le32_to_cpu(es->s_first_data_block); ++ ++ if (block == le32_to_cpu(gdp->bg_block_bitmap) || ++ block == le32_to_cpu(gdp->bg_inode_bitmap) || ++ in_range(block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error(sb, "ext3_new_block", ++ "Allocating block in system zone - " ++ "block = %u", block); ++#if 0 ++ for (i = 0; i < ac.ac_b_len; i++) ++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); ++#endif ++ mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); ++ ++ ext3_lock_group(sb, ac.ac_b_group); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - ++ ac.ac_b_len); ++ ext3_unlock_group(sb, ac.ac_b_group); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len); ++ ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ if (err) ++ goto out_err; ++ err = ext3_journal_dirty_metadata(handle, gdp_bh); ++ if (err) ++ goto out_err; ++ ++ sb->s_dirt = 1; ++ *errp = 0; ++ brelse(bitmap_bh); ++ ++ /* drop non-allocated, but dquote'd blocks */ ++ J_ASSERT(*len >= ac.ac_b_len); ++ DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len); ++ ++ *len = ac.ac_b_len; ++ J_ASSERT(block != 0); ++ goto out; ++ ++out_err: ++ /* if we've already allocated something, roll it back */ ++ if (ac.ac_status == AC_STATUS_FOUND) { ++ /* FIXME: free blocks here */ ++ } ++ ++ DQUOT_FREE_BLOCK(inode, *len); ++ brelse(bitmap_bh); ++ *errp = err; ++ block = 0; ++out: ++ if (!(flags & 2)) { ++ /* block wasn't reserved before and we reserved it ++ * at the beginning of allocation. it doesn't matter ++ * whether we allocated anything or we failed: time ++ * to release reservation. NOTE: because I expect ++ * any multiblock request from delayed allocation ++ * path only, here is single block always */ ++ ext3_mb_release_blocks(sb, 1); ++ } ++ return block; ++} ++ ++int ext3_mb_generate_buddy(struct super_block *sb, int group) ++{ ++ struct buffer_head *bh; ++ int i, err, count = 0; ++ struct ext3_buddy e3b; ++ ++ err = ext3_mb_load_desc(sb, group, &e3b); ++ if (err) ++ goto out; ++ memset(e3b.bd_bh->b_data, 0, sb->s_blocksize); ++ memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize); ++ ++ bh = read_block_bitmap(sb, group); ++ if (bh == NULL) { ++ err = -EIO; ++ goto out2; ++ } ++ ++ /* loop over the blocks, nad create buddies for free ones */ ++ for (i = 0; i < sb->s_blocksize * 8; i++) { ++ if (!mb_test_bit(i, (void *) bh->b_data)) { ++ mb_free_blocks(&e3b, i, 1); ++ count++; ++ } ++ } ++ brelse(bh); ++ mb_check_buddy(&e3b); ++ ext3_mb_dirty_buddy(&e3b); ++ ++out2: ++ ext3_mb_release_desc(&e3b); ++out: ++ return err; ++} ++ ++EXPORT_SYMBOL(ext3_mb_new_blocks); ++ ++#define MB_CREDITS \ ++ (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \ ++ 2 * EXT3_QUOTA_INIT_BLOCKS) ++ ++int ext3_mb_init_backend(struct super_block *sb) ++{ ++ struct inode *root = sb->s_root->d_inode; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct dentry *db; ++ tid_t target; ++ int err, i; ++ ++ sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) * ++ sbi->s_groups_count, GFP_KERNEL); ++ if (sbi->s_buddy_blocks == NULL) { ++ printk("can't allocate mem for buddy maps\n"); ++ return -ENOMEM; ++ } ++ memset(sbi->s_buddy_blocks, 0, ++ sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count); ++ sbi->s_buddy = NULL; ++ ++ down(&root->i_sem); ++ db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root, ++ strlen(EXT3_BUDDY_FILE)); ++ if (IS_ERR(db)) { ++ err = PTR_ERR(db); ++ printk("can't lookup buddy file: %d\n", err); ++ goto out; ++ } ++ ++ if (db->d_inode != NULL) { ++ sbi->s_buddy = igrab(db->d_inode); ++ goto map; ++ } ++ ++ err = ext3_create(root, db, S_IFREG, NULL); ++ if (err) { ++ printk("error while creation buddy file: %d\n", err); ++ } else { ++ sbi->s_buddy = igrab(db->d_inode); ++ } ++ ++map: ++ for (i = 0; i < sbi->s_groups_count; i++) { ++ struct buffer_head *bh = NULL; ++ handle_t *handle; ++ ++ handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS); ++ if (IS_ERR(handle)) { ++ err = PTR_ERR(handle); ++ goto out2; ++ } ++ ++ /* allocate block for bitmap */ ++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err); ++ if (bh == NULL) { ++ printk("can't get block for buddy bitmap: %d\n", err); ++ goto out2; ++ } ++ sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr; ++ brelse(bh); ++ ++ /* allocate block for buddy */ ++ bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err); ++ if (bh == NULL) { ++ printk("can't get block for buddy: %d\n", err); ++ goto out2; ++ } ++ sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr; ++ brelse(bh); ++ ext3_journal_stop(handle); ++ spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock); ++ sbi->s_buddy_blocks[i].bb_md_cur = NULL; ++ sbi->s_buddy_blocks[i].bb_tid = 0; ++ } ++ ++ if (journal_start_commit(sbi->s_journal, &target)) ++ log_wait_commit(sbi->s_journal, target); ++ ++out2: ++ dput(db); ++out: ++ up(&root->i_sem); ++ return err; ++} ++ ++int ext3_mb_release(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* release freed, non-committed blocks */ ++ spin_lock(&sbi->s_md_lock); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_committed_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ ext3_mb_free_committed_blocks(sb); ++ ++ if (sbi->s_buddy_blocks) ++ kfree(sbi->s_buddy_blocks); ++ if (sbi->s_buddy) ++ iput(sbi->s_buddy); ++ if (sbi->s_blocks_reserved) ++ printk("ext3-fs: %ld blocks being reserved at umount!\n", ++ sbi->s_blocks_reserved); ++ return 0; ++} ++ ++int ext3_mb_init(struct super_block *sb) ++{ ++ struct ext3_super_block *es; ++ int i; ++ ++ if (!test_opt(sb, MBALLOC)) ++ return 0; ++ ++ /* init file for buddy data */ ++ clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); ++ ext3_mb_init_backend(sb); ++ ++ es = EXT3_SB(sb)->s_es; ++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) ++ ext3_mb_generate_buddy(sb, i); ++ spin_lock_init(&EXT3_SB(sb)->s_reserve_lock); ++ spin_lock_init(&EXT3_SB(sb)->s_md_lock); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction); ++ INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction); ++ set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC); ++ printk("EXT3-fs: mballoc enabled\n"); ++ return 0; ++} ++ ++void ext3_mb_free_committed_blocks(struct super_block *sb) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int err, i, count = 0, count2 = 0; ++ struct ext3_free_metadata *md; ++ struct ext3_buddy e3b; ++ ++ if (list_empty(&sbi->s_committed_transaction)) ++ return; ++ ++ /* there is committed blocks to be freed yet */ ++ do { ++ /* get next array of blocks */ ++ md = NULL; ++ spin_lock(&sbi->s_md_lock); ++ if (!list_empty(&sbi->s_committed_transaction)) { ++ md = list_entry(sbi->s_committed_transaction.next, ++ struct ext3_free_metadata, list); ++ list_del(&md->list); ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ if (md == NULL) ++ break; ++ ++ mb_debug("gonna free %u blocks in group %u (0x%p):", ++ md->num, md->group, md); ++ ++ err = ext3_mb_load_desc(sb, md->group, &e3b); ++ BUG_ON(err != 0); ++ ++ /* there are blocks to put in buddy to make them really free */ ++ count += md->num; ++ count2++; ++ ext3_lock_group(sb, md->group); ++ for (i = 0; i < md->num; i++) { ++ mb_debug(" %u", md->blocks[i]); ++ mb_free_blocks(&e3b, md->blocks[i], 1); ++ } ++ mb_debug("\n"); ++ ext3_unlock_group(sb, md->group); ++ ++ kfree(md); ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ ++ } while (md); ++ mb_debug("freed %u blocks in %u structures\n", count, count2); ++} ++ ++void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ if (sbi->s_last_transaction == handle->h_transaction->t_tid) ++ return; ++ ++ /* new transaction! time to close last one and free blocks for ++ * committed transaction. we know that only transaction can be ++ * active, so previos transaction can be being logged and we ++ * know that transaction before previous is known to be alreade ++ * logged. this means that now we may free blocks freed in all ++ * transactions before previous one. hope I'm clear enough ... */ ++ ++ spin_lock(&sbi->s_md_lock); ++ if (sbi->s_last_transaction != handle->h_transaction->t_tid) { ++ mb_debug("new transaction %lu, old %lu\n", ++ (unsigned long) handle->h_transaction->t_tid, ++ (unsigned long) sbi->s_last_transaction); ++ list_splice_init(&sbi->s_closed_transaction, ++ &sbi->s_committed_transaction); ++ list_splice_init(&sbi->s_active_transaction, ++ &sbi->s_closed_transaction); ++ sbi->s_last_transaction = handle->h_transaction->t_tid; ++ } ++ spin_unlock(&sbi->s_md_lock); ++ ++ ext3_mb_free_committed_blocks(sb); ++} ++ ++int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b, ++ int group, int block, int count) ++{ ++ struct ext3_buddy_group_blocks *db = e3b->bd_bd; ++ struct super_block *sb = e3b->bd_sb; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ struct ext3_free_metadata *md; ++ int i; ++ ++ ext3_lock_group(sb, group); ++ for (i = 0; i < count; i++) { ++ md = db->bb_md_cur; ++ if (md && db->bb_tid != handle->h_transaction->t_tid) { ++ db->bb_md_cur = NULL; ++ md = NULL; ++ } ++ ++ if (md == NULL) { ++ ext3_unlock_group(sb, group); ++ md = kmalloc(sizeof(*md), GFP_KERNEL); ++ if (md == NULL) ++ return -ENOMEM; ++ md->num = 0; ++ md->group = group; ++ ++ ext3_lock_group(sb, group); ++ if (db->bb_md_cur == NULL) { ++ spin_lock(&sbi->s_md_lock); ++ list_add(&md->list, &sbi->s_active_transaction); ++ spin_unlock(&sbi->s_md_lock); ++ db->bb_md_cur = md; ++ db->bb_tid = handle->h_transaction->t_tid; ++ mb_debug("new md 0x%p for group %u\n", ++ md, md->group); ++ } else { ++ kfree(md); ++ md = db->bb_md_cur; ++ } ++ } ++ ++ BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS); ++ md->blocks[md->num] = block + i; ++ md->num++; ++ if (md->num == EXT3_BB_MAX_BLOCKS) { ++ /* no more space, put full container on a sb's list */ ++ db->bb_md_cur = NULL; ++ } ++ } ++ ext3_unlock_group(sb, group); ++ return 0; ++} ++ ++void ext3_mb_free_blocks(handle_t *handle, struct inode *inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ struct buffer_head *bitmap_bh = NULL; ++ struct ext3_group_desc *gdp; ++ struct ext3_super_block *es; ++ unsigned long bit, overflow; ++ struct buffer_head *gd_bh; ++ unsigned long block_group; ++ struct ext3_sb_info *sbi; ++ struct super_block *sb; ++ struct ext3_buddy e3b; ++ int err = 0, ret; ++ ++ sb = inode->i_sb; ++ if (!sb) { ++ printk ("ext3_free_blocks: nonexistent device"); ++ return; ++ } ++ ++ ext3_mb_poll_new_transaction(sb, handle); ++ ++ sbi = EXT3_SB(sb); ++ es = EXT3_SB(sb)->s_es; ++ if (block < le32_to_cpu(es->s_first_data_block) || ++ block + count < block || ++ block + count > le32_to_cpu(es->s_blocks_count)) { ++ ext3_error (sb, "ext3_free_blocks", ++ "Freeing blocks not in datazone - " ++ "block = %lu, count = %lu", block, count); ++ goto error_return; ++ } ++ ++ ext3_debug("freeing block %lu\n", block); ++ ++do_more: ++ overflow = 0; ++ block_group = (block - le32_to_cpu(es->s_first_data_block)) / ++ EXT3_BLOCKS_PER_GROUP(sb); ++ bit = (block - le32_to_cpu(es->s_first_data_block)) % ++ EXT3_BLOCKS_PER_GROUP(sb); ++ /* ++ * Check to see if we are freeing blocks across a group ++ * boundary. ++ */ ++ if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { ++ overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); ++ count -= overflow; ++ } ++ brelse(bitmap_bh); ++ bitmap_bh = read_block_bitmap(sb, block_group); ++ if (!bitmap_bh) ++ goto error_return; ++ gdp = ext3_get_group_desc (sb, block_group, &gd_bh); ++ if (!gdp) ++ goto error_return; ++ ++ if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || ++ in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || ++ in_range (block, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group) || ++ in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), ++ EXT3_SB(sb)->s_itb_per_group)) ++ ext3_error (sb, "ext3_free_blocks", ++ "Freeing blocks in system zones - " ++ "Block = %lu, count = %lu", ++ block, count); ++ ++ BUFFER_TRACE(bitmap_bh, "getting write access"); ++ err = ext3_journal_get_write_access(handle, bitmap_bh); ++ if (err) ++ goto error_return; ++ ++ /* ++ * We are about to modify some metadata. Call the journal APIs ++ * to unshare ->b_data if a currently-committing transaction is ++ * using it ++ */ ++ BUFFER_TRACE(gd_bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, gd_bh); ++ if (err) ++ goto error_return; ++ ++ err = ext3_mb_load_desc(sb, block_group, &e3b); ++ if (err) ++ goto error_return; ++ ++ if (metadata) { ++ /* blocks being freed are metadata. these blocks shouldn't ++ * be used until this transaction is committed */ ++ ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); ++ } else { ++ ext3_lock_group(sb, block_group); ++ mb_free_blocks(&e3b, bit, count); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ ext3_unlock_group(sb, block_group); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, count); ++ } ++ ++ ext3_mb_dirty_buddy(&e3b); ++ ext3_mb_release_desc(&e3b); ++ ++ /* FIXME: undo logic will be implemented later and another way */ ++ mb_clear_bits(bitmap_bh->b_data, bit, count); ++ DQUOT_FREE_BLOCK(inode, count); ++ ++ /* We dirtied the bitmap block */ ++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); ++ ++ /* And the group descriptor block */ ++ BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); ++ ret = ext3_journal_dirty_metadata(handle, gd_bh); ++ if (!err) err = ret; ++ ++ if (overflow && !err) { ++ block += count; ++ count = overflow; ++ goto do_more; ++ } ++ sb->s_dirt = 1; ++error_return: ++ brelse(bitmap_bh); ++ ext3_std_error(sb, err); ++ return; ++} ++ ++int ext3_mb_reserve_blocks(struct super_block *sb, int blocks) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ int free, ret = -ENOSPC; ++ ++ BUG_ON(blocks < 0); ++ spin_lock(&sbi->s_reserve_lock); ++ free = percpu_counter_read_positive(&sbi->s_freeblocks_counter); ++ if (blocks <= free - sbi->s_blocks_reserved) { ++ sbi->s_blocks_reserved += blocks; ++ ret = 0; ++ } ++ spin_unlock(&sbi->s_reserve_lock); ++ return ret; ++} ++ ++void ext3_mb_release_blocks(struct super_block *sb, int blocks) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ BUG_ON(blocks < 0); ++ spin_lock(&sbi->s_reserve_lock); ++ sbi->s_blocks_reserved -= blocks; ++ WARN_ON(sbi->s_blocks_reserved < 0); ++ if (sbi->s_blocks_reserved < 0) ++ sbi->s_blocks_reserved = 0; ++ spin_unlock(&sbi->s_reserve_lock); ++} ++ ++int ext3_new_block(handle_t *handle, struct inode *inode, ++ unsigned long goal, u32 *pc, u32 *pb, int *errp) ++{ ++ int ret, len; ++ ++ if (!test_opt(inode->i_sb, MBALLOC)) { ++ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp); ++ goto out; ++ } ++ len = 1; ++ ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp); ++out: ++ return ret; ++} ++ ++ ++void ext3_free_blocks(handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count, int metadata) ++{ ++ if (!test_opt(inode->i_sb, MBALLOC)) ++ ext3_free_blocks_old(handle, inode, block, count); ++ else ++ ext3_mb_free_blocks(handle, inode, block, count, metadata); ++ return; ++} ++ +Index: linux-2.6.7/fs/ext3/super.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400 +@@ -392,6 +392,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_mb_release(sb); + ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); +@@ -594,7 +595,7 @@ + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, +- Opt_ignore, Opt_err, Opt_extents, Opt_extdebug ++ Opt_ignore, Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc, + }; + + static match_table_t tokens = { +@@ -644,6 +645,7 @@ + {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_extents, "extents"}, + {Opt_extdebug, "extdebug"}, ++ {Opt_mballoc, "mballoc"}, + {Opt_err, NULL} + }; + +@@ -929,6 +931,9 @@ + case Opt_extdebug: + set_opt (sbi->s_mount_opt, EXTDEBUG); + break; ++ case Opt_mballoc: ++ set_opt (sbi->s_mount_opt, MBALLOC); ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1602,7 +1607,8 @@ + ext3_count_dirs(sb)); + + ext3_ext_init(sb); +- ++ ext3_mb_init(sb); ++ + return 0; + + failed_mount3: +Index: linux-2.6.7/fs/ext3/Makefile +=================================================================== +--- linux-2.6.7.orig/fs/ext3/Makefile 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/fs/ext3/Makefile 2004-09-03 08:46:59.000000000 +0400 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ +- ioctl.o namei.o super.o symlink.o hash.o extents.o ++ ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.7/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/balloc.c 2004-08-26 17:11:16.000000000 +0400 ++++ linux-2.6.7/fs/ext3/balloc.c 2004-09-03 08:46:59.000000000 +0400 +@@ -78,7 +78,7 @@ + * + * Return buffer_head on success or NULL in case of failure. + */ +-static struct buffer_head * ++struct buffer_head * + read_block_bitmap(struct super_block *sb, unsigned int block_group) + { + struct ext3_group_desc * desc; +@@ -98,8 +98,8 @@ + } + + /* Free given blocks, update quota and i_blocks field */ +-void ext3_free_blocks (handle_t *handle, struct inode * inode, +- unsigned long block, unsigned long count) ++void ext3_free_blocks_old (handle_t *handle, struct inode * inode, ++ unsigned long block, unsigned long count) + { + struct buffer_head *bitmap_bh = NULL; + struct buffer_head *gd_bh; +@@ -474,8 +474,8 @@ + * This function also updates quota and i_blocks field. + */ + int +-ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, +- u32 *prealloc_count, u32 *prealloc_block, int *errp) ++ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, ++ u32 *prealloc_count, u32 *prealloc_block, int *errp) + { + struct buffer_head *bitmap_bh = NULL; /* bh */ + struct buffer_head *gdp_bh; /* bh2 */ +Index: linux-2.6.7/fs/ext3/namei.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400 +@@ -1640,7 +1640,7 @@ + * If the create succeeds, we fill in the inode information + * with d_instantiate(). + */ +-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, ++int ext3_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) + { + handle_t *handle; +Index: linux-2.6.7/fs/ext3/inode.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400 +@@ -254,7 +254,7 @@ + ei->i_prealloc_count = 0; + ei->i_prealloc_block = 0; + /* Writer: end */ +- ext3_free_blocks (inode, block, total); ++ ext3_free_blocks (inode, block, total, 1); + } + #endif + } +@@ -633,7 +633,7 @@ + ext3_journal_forget(handle, branch[i].bh); + } + for (i = 0; i < keys; i++) +- ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1); ++ ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1); + return err; + } + +@@ -734,7 +734,7 @@ + if (err == -EAGAIN) + for (i = 0; i < num; i++) + ext3_free_blocks(handle, inode, +- le32_to_cpu(where[i].key), 1); ++ le32_to_cpu(where[i].key), 1, 1); + return err; + } + +@@ -1911,7 +1911,7 @@ + } + } + +- ext3_free_blocks(handle, inode, block_to_free, count); ++ ext3_free_blocks(handle, inode, block_to_free, count, 1); + } + + /** +@@ -2082,7 +2082,7 @@ + ext3_journal_test_restart(handle, inode); + } + +- ext3_free_blocks(handle, inode, nr, 1); ++ ext3_free_blocks(handle, inode, nr, 1, 1); + + if (parent_bh) { + /* +Index: linux-2.6.7/fs/ext3/extents.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/fs/ext3/extents.c 2004-09-03 08:46:59.000000000 +0400 +@@ -740,7 +740,7 @@ + for (i = 0; i < depth; i++) { + if (!ablocks[i]) + continue; +- ext3_free_blocks(handle, tree->inode, ablocks[i], 1); ++ ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1); + } + } + kfree(ablocks); +@@ -1388,7 +1388,7 @@ + path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); +- ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); ++ ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1); + return err; + } + +@@ -1876,10 +1876,12 @@ + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); + struct buffer_head *bh; +- int i; ++ int i, metadata = 0; + + if (IS_ERR(handle)) + return PTR_ERR(handle); ++ if (S_ISDIR(tree->inode->i_mode)) ++ metadata = 1; + if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { + /* tail removal */ + unsigned long num, start; +@@ -1891,7 +1893,7 @@ + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); + } +- ext3_free_blocks(handle, tree->inode, start, num); ++ ext3_free_blocks(handle, tree->inode, start, num, metadata); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", + from, to, ex->ee_block, ex->ee_len); +Index: linux-2.6.7/fs/ext3/xattr.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400 +@@ -1366,7 +1366,7 @@ + new_bh = sb_getblk(sb, block); + if (!new_bh) { + getblk_failed: +- ext3_free_blocks(handle, inode, block, 1); ++ ext3_free_blocks(handle, inode, block, 1, 1); + error = -EIO; + goto cleanup; + } +@@ -1408,7 +1408,7 @@ + if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) { + /* Free the old block. */ + ea_bdebug(old_bh, "freeing"); +- ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1); ++ ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1); + + /* ext3_forget() calls bforget() for us, but we + let our caller release old_bh, so we need to +@@ -1497,7 +1497,7 @@ + lock_buffer(bh); + if (HDR(bh)->h_refcount == cpu_to_le32(1)) { + ext3_xattr_cache_remove(bh); +- ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1); ++ ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1); + get_bh(bh); + ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); + } else { +Index: linux-2.6.7/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/include/linux/ext3_fs.h 2004-09-03 08:47:35.000000000 +0400 +@@ -57,6 +57,8 @@ + #define ext3_debug(f, a...) do {} while (0) + #endif + ++#define EXT3_MULTIBLOCK_ALLOCATOR 1 ++ + /* + * Special inodes numbers + */ +@@ -335,6 +337,7 @@ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ + #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -695,7 +698,7 @@ + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, + __u32 *, __u32 *, int *); + extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, +- unsigned long); ++ unsigned long, int); + extern unsigned long ext3_count_free_blocks (struct super_block *); + extern void ext3_check_blocks_bitmap (struct super_block *); + extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, +Index: linux-2.6.7/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.7.orig/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400 ++++ linux-2.6.7/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400 +@@ -23,9 +23,29 @@ + #define EXT_INCLUDE + #include + #include ++#include + #endif + #endif + ++#define EXT3_BB_MAX_BLOCKS 30 ++struct ext3_free_metadata { ++ unsigned short group; ++ unsigned short num; ++ unsigned short blocks[EXT3_BB_MAX_BLOCKS]; ++ struct list_head list; ++}; ++ ++#define EXT3_BB_MAX_ORDER 14 ++ ++struct ext3_buddy_group_blocks { ++ sector_t bb_bitmap; ++ sector_t bb_buddy; ++ spinlock_t bb_lock; ++ unsigned bb_counters[EXT3_BB_MAX_ORDER]; ++ struct ext3_free_metadata *bb_md_cur; ++ unsigned long bb_tid; ++}; ++ + /* + * third extended-fs super-block data in memory + */ +@@ -76,6 +96,17 @@ + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ + #endif ++ ++ /* for buddy allocator */ ++ struct ext3_buddy_group_blocks *s_buddy_blocks; ++ struct inode *s_buddy; ++ long s_blocks_reserved; ++ spinlock_t s_reserve_lock; ++ struct list_head s_active_transaction; ++ struct list_head s_closed_transaction; ++ struct list_head s_committed_transaction; ++ spinlock_t s_md_lock; ++ tid_t s_last_transaction; + }; + + #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch b/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch new file mode 100644 index 0000000..b20be23 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-nlinks-2.6.7.patch @@ -0,0 +1,170 @@ +Index: linux-2.6.7/fs/ext3/namei.c +=================================================================== +--- linux-2.6.7.orig/fs/ext3/namei.c 2004-06-15 23:19:36.000000000 -0600 ++++ linux-2.6.7/fs/ext3/namei.c 2004-08-20 17:48:54.000000000 -0600 +@@ -1596,11 +1596,17 @@ static int ext3_delete_entry (handle_t * + static inline void ext3_inc_count(handle_t *handle, struct inode *inode) + { + inode->i_nlink++; ++ if (is_dx(inode) && inode->i_nlink > 1) { ++ /* limit is 16-bit i_links_count */ ++ if (inode->i_nlink >= EXT3_LINK_MAX || inode->i_nlink == 2) ++ inode->i_nlink = 1; ++ } + } + + static inline void ext3_dec_count(handle_t *handle, struct inode *inode) + { +- inode->i_nlink--; ++ if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) ++ inode->i_nlink--; + } + + static int ext3_add_nondir(handle_t *handle, +@@ -1693,7 +1698,7 @@ static int ext3_mkdir(struct inode * dir + struct ext3_dir_entry_2 * de; + int err; + +- if (dir->i_nlink >= EXT3_LINK_MAX) ++ if (EXT3_DIR_LINK_MAXED(dir)) + return -EMLINK; + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + +@@ -1715,7 +1720,7 @@ static int ext3_mkdir(struct inode * dir + inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; + dir_block = ext3_bread (handle, inode, 0, 1, &err); + if (!dir_block) { +- inode->i_nlink--; /* is this nlink == 0? */ ++ ext3_dec_count(handle, inode); /* is this nlink == 0? */ + ext3_mark_inode_dirty(handle, inode); + iput (inode); + goto out_stop; +@@ -1747,7 +1752,7 @@ static int ext3_mkdir(struct inode * dir + iput (inode); + goto out_stop; + } +- dir->i_nlink++; ++ ext3_inc_count(handle, dir); + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + d_instantiate(dentry, inode); +@@ -2010,10 +2015,10 @@ static int ext3_rmdir (struct inode * di + retval = ext3_delete_entry(handle, dir, de, bh); + if (retval) + goto end_rmdir; +- if (inode->i_nlink != 2) +- ext3_warning (inode->i_sb, "ext3_rmdir", +- "empty directory has nlink!=2 (%d)", +- inode->i_nlink); ++ if (!EXT3_DIR_LINK_EMPTY(inode)) ++ ext3_warning(inode->i_sb, "ext3_rmdir", ++ "empty directory has too many links (%d)", ++ inode->i_nlink); + inode->i_version++; + inode->i_nlink = 0; + /* There's no need to set i_disksize: the fact that i_nlink is +@@ -2023,7 +2028,7 @@ static int ext3_rmdir (struct inode * di + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_mark_inode_dirty(handle, inode); +- dir->i_nlink--; ++ ext3_dec_count(handle, dir); + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); + +@@ -2074,7 +2079,7 @@ static int ext3_unlink(struct inode * di + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + ext3_update_dx_flag(dir); + ext3_mark_inode_dirty(handle, dir); +- inode->i_nlink--; ++ ext3_dec_count(handle, inode); + if (!inode->i_nlink) + ext3_orphan_add(handle, inode); + inode->i_ctime = dir->i_ctime; +@@ -2146,7 +2151,7 @@ static int ext3_link (struct dentry * ol + struct inode *inode = old_dentry->d_inode; + int err; + +- if (inode->i_nlink >= EXT3_LINK_MAX) ++ if (EXT3_DIR_LINK_MAXED(inode)) + return -EMLINK; + + handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + +@@ -2230,8 +2235,8 @@ static int ext3_rename (struct inode * o + if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) + goto end_rename; + retval = -EMLINK; +- if (!new_inode && new_dir!=old_dir && +- new_dir->i_nlink >= EXT3_LINK_MAX) ++ if (!new_inode && new_dir != old_dir && ++ EXT3_DIR_LINK_MAXED(new_dir)) + goto end_rename; + } + if (!new_bh) { +@@ -2288,7 +2293,7 @@ static int ext3_rename (struct inode * o + } + + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + new_inode->i_ctime = CURRENT_TIME; + } + old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; +@@ -2299,11 +2304,11 @@ static int ext3_rename (struct inode * o + PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); + BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); + ext3_journal_dirty_metadata(handle, dir_bh); +- old_dir->i_nlink--; ++ ext3_dec_count(handle, old_dir); + if (new_inode) { +- new_inode->i_nlink--; ++ ext3_dec_count(handle, new_inode); + } else { +- new_dir->i_nlink++; ++ ext3_inc_count(handle, new_dir); + ext3_update_dx_flag(new_dir); + ext3_mark_inode_dirty(handle, new_dir); + } +Index: linux-2.6.7/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-06-15 23:19:36.000000000 -0600 ++++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-20 17:41:27.000000000 -0600 +@@ -41,7 +41,7 @@ struct statfs; + /* + * Always enable hashed directories + */ +-#define CONFIG_EXT3_INDEX ++#define CONFIG_EXT3_INDEX 1 + + /* + * Debug code +@@ -79,7 +81,7 @@ + /* + * Maximal count of links to a file + */ +-#define EXT3_LINK_MAX 32000 ++#define EXT3_LINK_MAX 65000 + + /* + * Macro-instructions used to manage several block sizes +@@ -595,14 +595,15 @@ struct ext3_dir_entry_2 { + */ + + #ifdef CONFIG_EXT3_INDEX +- #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ +- EXT3_FEATURE_COMPAT_DIR_INDEX) && \ ++#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ ++ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ + (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) +-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) +-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) ++#define EXT3_DIR_LINK_MAXED(dir) (!is_dx(dir) && (dir)->i_nlink >=EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || \ ++ (is_dx(dir) && (dir)->i_nlink == 1)) + #else + #define is_dx(dir) 0 +-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) ++#define EXT3_DIR_LINK_MAXED(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) + #define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) + #endif + diff --git a/lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch b/lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch deleted file mode 100644 index 7c2d97d..0000000 --- a/lustre/kernel_patches/patches/ext3-xattr-header-move-2.6.suse.patch +++ /dev/null @@ -1,506 +0,0 @@ -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/acl.c linux-2.6.4-51.0/fs/ext3/acl.c ---- linux-2.6.4-51.0.orig/fs/ext3/acl.c 2004-04-05 19:41:59.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/acl.c 2004-04-07 11:06:33.000000000 +0300 -@@ -10,7 +10,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - #include "acl.h" - - /* -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/file.c linux-2.6.4-51.0/fs/ext3/file.c ---- linux-2.6.4-51.0.orig/fs/ext3/file.c 2004-04-05 19:41:59.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/file.c 2004-04-07 11:06:39.000000000 +0300 -@@ -23,7 +23,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - #include "acl.h" - - /* -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/ialloc.c linux-2.6.4-51.0/fs/ext3/ialloc.c ---- linux-2.6.4-51.0.orig/fs/ext3/ialloc.c 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/ialloc.c 2004-04-07 11:06:46.000000000 +0300 -@@ -26,7 +26,7 @@ - #include - #include - --#include "xattr.h" -+#include - #include "acl.h" - - /* -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/inode.c linux-2.6.4-51.0/fs/ext3/inode.c ---- linux-2.6.4-51.0.orig/fs/ext3/inode.c 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/inode.c 2004-04-07 11:25:05.000000000 +0300 -@@ -36,7 +36,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - #include "iopen.h" - #include "acl.h" - -@@ -2340,7 +2340,7 @@ static unsigned long ext3_get_inode_bloc - * performed. - */ - int ext3_get_inode_loc(struct inode *inode, -- struct ext3_iloc *iloc, int in_mem) -+ struct ext3_iloc *iloc, int in_mem) - { - unsigned long block; - struct buffer_head *bh; -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/namei.c linux-2.6.4-51.0/fs/ext3/namei.c ---- linux-2.6.4-51.0.orig/fs/ext3/namei.c 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/namei.c 2004-04-07 11:06:57.000000000 +0300 -@@ -36,7 +36,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - #include "iopen.h" - #include "acl.h" - -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/super.c linux-2.6.4-51.0/fs/ext3/super.c ---- linux-2.6.4-51.0.orig/fs/ext3/super.c 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/super.c 2004-04-07 11:07:05.000000000 +0300 -@@ -33,7 +33,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - #include "acl.h" - - static int ext3_load_journal(struct super_block *, struct ext3_super_block *); -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/symlink.c linux-2.6.4-51.0/fs/ext3/symlink.c ---- linux-2.6.4-51.0.orig/fs/ext3/symlink.c 2004-04-05 19:41:59.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/symlink.c 2004-04-07 11:07:16.000000000 +0300 -@@ -20,7 +20,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - - static int - ext3_readlink(struct dentry *dentry, char __user *buffer, int buflen) -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr.c linux-2.6.4-51.0/fs/ext3/xattr.c ---- linux-2.6.4-51.0.orig/fs/ext3/xattr.c 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/xattr.c 2004-04-07 11:22:34.000000000 +0300 -@@ -59,7 +59,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - #include "acl.h" - - #define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -@@ -348,7 +348,7 @@ cleanup: - */ - int - ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, -- void *buffer, size_t buffer_size) -+ void *buffer, size_t buffer_size) - { - int size, name_len = strlen(name), storage_size; - struct ext3_xattr_entry *last; -@@ -360,7 +360,7 @@ ext3_xattr_ibody_get(struct inode *inode - if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) - return -ENOENT; - -- ret = ext3_get_inode_loc(inode, &iloc); -+ ret = ext3_get_inode_loc(inode, &iloc, 1); - if (ret) - return ret; - raw_inode = ext3_raw_inode(&iloc); -@@ -542,7 +542,7 @@ ext3_xattr_ibody_list(struct inode *inod - if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) - return 0; - -- ret = ext3_get_inode_loc(inode, &iloc); -+ ret = ext3_get_inode_loc(inode, &iloc, 1); - if (ret) - return ret; - raw_inode = ext3_raw_inode(&iloc); -@@ -693,7 +693,7 @@ ext3_xattr_ibody_find(struct inode *inod - if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) - return ret; - -- err = ext3_get_inode_loc(inode, &iloc); -+ err = ext3_get_inode_loc(inode, &iloc, 1); - if (err) - return -EIO; - raw_inode = ext3_raw_inode(&iloc); -@@ -824,7 +824,7 @@ ext3_xattr_ibody_set(handle_t *handle, s - if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE) - return -ENOSPC; - -- err = ext3_get_inode_loc(inode, &iloc); -+ err = ext3_get_inode_loc(inode, &iloc, 1); - if (err) - return err; - raw_inode = ext3_raw_inode(&iloc); -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr.h linux-2.6.4-51.0/fs/ext3/xattr.h ---- linux-2.6.4-51.0.orig/fs/ext3/xattr.h 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/xattr.h 1970-01-01 03:00:00.000000000 +0300 -@@ -1,147 +0,0 @@ --/* -- File: fs/ext3/xattr.h -- -- On-disk format of extended attributes for the ext3 filesystem. -- -- (C) 2001 Andreas Gruenbacher, --*/ -- --#include --#include -- --/* Magic value in attribute blocks */ --#define EXT3_XATTR_MAGIC 0xEA020000 -- --/* Maximum number of references to one attribute block */ --#define EXT3_XATTR_REFCOUNT_MAX 1024 -- --/* Name indexes */ --#define EXT3_XATTR_INDEX_MAX 10 --#define EXT3_XATTR_INDEX_USER 1 --#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 --#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 --#define EXT3_XATTR_INDEX_TRUSTED 4 --#define EXT3_XATTR_INDEX_LUSTRE 5 --#define EXT3_XATTR_INDEX_SECURITY 6 -- --struct ext3_xattr_header { -- __u32 h_magic; /* magic number for identification */ -- __u32 h_refcount; /* reference count */ -- __u32 h_blocks; /* number of disk blocks used */ -- __u32 h_hash; /* hash value of all attributes */ -- __u32 h_reserved[4]; /* zero right now */ --}; -- --struct ext3_xattr_entry { -- __u8 e_name_len; /* length of name */ -- __u8 e_name_index; /* attribute name index */ -- __u16 e_value_offs; /* offset in disk block of value */ -- __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -- __u32 e_value_size; /* size of attribute value */ -- __u32 e_hash; /* hash value of name and value */ -- char e_name[0]; /* attribute name */ --}; -- --#define EXT3_XATTR_PAD_BITS 2 --#define EXT3_XATTR_PAD (1<e_name_len)) ) --#define EXT3_XATTR_SIZE(size) \ -- (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -- --# ifdef CONFIG_EXT3_FS_XATTR -- --struct ext3_xattr_handler { -- char *prefix; -- size_t (*list)(char *list, struct inode *inode, const char *name, -- int name_len); -- int (*get)(struct inode *inode, const char *name, void *buffer, -- size_t size); -- int (*set)(struct inode *inode, const char *name, const void *buffer, -- size_t size, int flags); --}; -- --extern int ext3_xattr_register(int, struct ext3_xattr_handler *); --extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -- --extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); --extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); --extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); --extern int ext3_removexattr(struct dentry *, const char *); -- --extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); --extern int ext3_xattr_list(struct inode *, char *, size_t); --extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); --extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int); --extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int); -- --extern void ext3_xattr_delete_inode(handle_t *, struct inode *); --extern void ext3_xattr_put_super(struct super_block *); -- --extern int init_ext3_xattr(void); --extern void exit_ext3_xattr(void); -- --# else /* CONFIG_EXT3_FS_XATTR */ --# define ext3_setxattr NULL --# define ext3_getxattr NULL --# define ext3_listxattr NULL --# define ext3_removexattr NULL -- --static inline int --ext3_xattr_get(struct inode *inode, int name_index, const char *name, -- void *buffer, size_t size, int flags) --{ -- return -EOPNOTSUPP; --} -- --static inline int --ext3_xattr_list(struct inode *inode, void *buffer, size_t size) --{ -- return -EOPNOTSUPP; --} -- --static inline int --ext3_xattr_set(struct inode *inode, int name_index, const char *name, -- const void *value, size_t size, int flags) --{ -- return -EOPNOTSUPP; --} -- --static inline int --ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -- const char *name, const void *value, size_t size, int flags) --{ -- return -EOPNOTSUPP; --} -- --static inline void --ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) --{ --} -- --static inline void --ext3_xattr_put_super(struct super_block *sb) --{ --} -- --static inline int --init_ext3_xattr(void) --{ -- return 0; --} -- --static inline void --exit_ext3_xattr(void) --{ --} -- --# endif /* CONFIG_EXT3_FS_XATTR */ -- --extern struct ext3_xattr_handler ext3_xattr_user_handler; --extern struct ext3_xattr_handler ext3_xattr_trusted_handler; --extern struct ext3_xattr_handler ext3_xattr_security_handler; -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_security.c linux-2.6.4-51.0/fs/ext3/xattr_security.c ---- linux-2.6.4-51.0.orig/fs/ext3/xattr_security.c 2004-04-05 19:41:59.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/xattr_security.c 2004-04-07 11:06:22.000000000 +0300 -@@ -9,7 +9,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - - static size_t - ext3_xattr_security_list(char *list, struct inode *inode, -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_trusted.c linux-2.6.4-51.0/fs/ext3/xattr_trusted.c ---- linux-2.6.4-51.0.orig/fs/ext3/xattr_trusted.c 2004-04-05 19:41:59.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/xattr_trusted.c 2004-04-07 11:07:41.000000000 +0300 -@@ -11,7 +11,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - - #define XATTR_TRUSTED_PREFIX "trusted." - -diff -rupN linux-2.6.4-51.0.orig/fs/ext3/xattr_user.c linux-2.6.4-51.0/fs/ext3/xattr_user.c ---- linux-2.6.4-51.0.orig/fs/ext3/xattr_user.c 2004-04-05 19:41:59.000000000 +0300 -+++ linux-2.6.4-51.0/fs/ext3/xattr_user.c 2004-04-07 11:07:47.000000000 +0300 -@@ -11,7 +11,7 @@ - #include - #include - #include --#include "xattr.h" -+#include - - #define XATTR_USER_PREFIX "user." - -diff -rupN linux-2.6.4-51.0.orig/include/linux/ext3_fs.h linux-2.6.4-51.0/include/linux/ext3_fs.h ---- linux-2.6.4-51.0.orig/include/linux/ext3_fs.h 2004-04-06 22:17:15.000000000 +0300 -+++ linux-2.6.4-51.0/include/linux/ext3_fs.h 2004-04-07 11:13:26.000000000 +0300 -@@ -741,6 +741,9 @@ extern void ext3_truncate (struct inode - extern void ext3_set_inode_flags(struct inode *); - extern void ext3_set_aops(struct inode *inode); - -+extern int ext3_get_inode_loc(struct inode *inode, -+ struct ext3_iloc *iloc, int in_mem); -+ - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, - unsigned long); -diff -rupN linux-2.6.4-51.0.orig/include/linux/ext3_xattr.h linux-2.6.4-51.0/include/linux/ext3_xattr.h ---- linux-2.6.4-51.0.orig/include/linux/ext3_xattr.h 1970-01-01 03:00:00.000000000 +0300 -+++ linux-2.6.4-51.0/include/linux/ext3_xattr.h 2004-04-07 11:08:34.000000000 +0300 -@@ -0,0 +1,152 @@ -+/* -+ File: linux/include/linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#ifndef _LINUX_EXT3_XATTR_H -+#define _LINUX_EXT3_XATTR_H -+ -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+#define EXT3_XATTR_INDEX_TRUSTED 4 -+#define EXT3_XATTR_INDEX_LUSTRE 5 -+#define EXT3_XATTR_INDEX_SECURITY 6 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *,const void *,size_t,int); -+extern int ext3_xattr_block_set(handle_t *, struct inode *, int, const char *,const void *,size_t,int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void); -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline int -+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -EOPNOTSUPP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+extern struct ext3_xattr_handler ext3_xattr_user_handler; -+extern struct ext3_xattr_handler ext3_xattr_trusted_handler; -+extern struct ext3_xattr_handler ext3_xattr_security_handler; -+ -+#endif diff --git a/lustre/kernel_patches/patches/iopen-2.6-suse.patch b/lustre/kernel_patches/patches/iopen-2.6-suse.patch index 8a8d115..4a51eb8 100644 --- a/lustre/kernel_patches/patches/iopen-2.6-suse.patch +++ b/lustre/kernel_patches/patches/iopen-2.6-suse.patch @@ -8,8 +8,8 @@ Index: linux-stage/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-05-11 17:21:20.000000000 -0400 -+++ linux-stage/fs/ext3/Makefile 2004-05-11 17:21:21.000000000 -0400 +--- linux-stage.orig/fs/ext3/Makefile 2004-11-03 14:41:24.747805262 -0500 ++++ linux-stage/fs/ext3/Makefile 2004-11-03 14:41:25.123696274 -0500 @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -21,8 +21,8 @@ Index: linux-stage/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o Index: linux-stage/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400 -+++ linux-stage/fs/ext3/inode.c 2004-05-11 17:21:21.000000000 -0400 +--- linux-stage.orig/fs/ext3/inode.c 2004-11-03 14:41:25.040720333 -0500 ++++ linux-stage/fs/ext3/inode.c 2004-11-03 14:46:08.458515670 -0500 @@ -37,6 +37,7 @@ #include #include @@ -31,20 +31,20 @@ Index: linux-stage/fs/ext3/inode.c #include "acl.h" /* -@@ -2472,6 +2473,9 @@ - ei->i_acl = EXT3_ACL_NOT_CACHED; +@@ -2401,6 +2402,9 @@ ei->i_default_acl = EXT3_ACL_NOT_CACHED; #endif -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; ++ ++ if (ext3_iopen_get_inode(inode)) ++ return; + if (ext3_get_inode_loc(inode, &iloc, 0)) goto bad_inode; - bh = iloc.bh; Index: linux-stage/fs/ext3/iopen.c =================================================================== --- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.c 2004-05-11 17:21:21.000000000 -0400 ++++ linux-stage/fs/ext3/iopen.c 2004-11-03 14:41:25.125695694 -0500 @@ -0,0 +1,272 @@ +/* + * linux/fs/ext3/iopen.c @@ -321,7 +321,7 @@ Index: linux-stage/fs/ext3/iopen.c Index: linux-stage/fs/ext3/iopen.h =================================================================== --- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.h 2004-05-11 17:21:21.000000000 -0400 ++++ linux-stage/fs/ext3/iopen.h 2004-11-03 14:41:25.126695404 -0500 @@ -0,0 +1,15 @@ +/* + * iopen.h @@ -340,8 +340,8 @@ Index: linux-stage/fs/ext3/iopen.h + struct inode *inode, int rehash); Index: linux-stage/fs/ext3/namei.c =================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-05-11 17:21:20.000000000 -0400 -+++ linux-stage/fs/ext3/namei.c 2004-05-11 17:21:21.000000000 -0400 +--- linux-stage.orig/fs/ext3/namei.c 2004-11-03 14:41:24.957744391 -0500 ++++ linux-stage/fs/ext3/namei.c 2004-11-03 14:41:25.127695114 -0500 @@ -37,6 +37,7 @@ #include #include @@ -373,7 +373,7 @@ Index: linux-stage/fs/ext3/namei.c } -@@ -2019,10 +2021,6 @@ +@@ -2029,10 +2031,6 @@ inode->i_nlink); inode->i_version++; inode->i_nlink = 0; @@ -384,7 +384,7 @@ Index: linux-stage/fs/ext3/namei.c ext3_orphan_add(handle, inode); inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; ext3_mark_inode_dirty(handle, inode); -@@ -2139,6 +2137,23 @@ +@@ -2152,6 +2150,23 @@ return err; } @@ -408,7 +408,7 @@ Index: linux-stage/fs/ext3/namei.c static int ext3_link (struct dentry * old_dentry, struct inode * dir, struct dentry *dentry) { -@@ -2161,7 +2176,8 @@ +@@ -2175,7 +2190,8 @@ ext3_inc_count(handle, inode); atomic_inc(&inode->i_count); @@ -416,14 +416,14 @@ Index: linux-stage/fs/ext3/namei.c + err = ext3_add_link(handle, dentry, inode); + ext3_orphan_del(handle,inode); ext3_journal_stop(handle); - return err; - } + if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) + goto retry; Index: linux-stage/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-05-11 17:21:21.000000000 -0400 -+++ linux-stage/fs/ext3/super.c 2004-05-11 17:44:53.000000000 -0400 -@@ -536,7 +536,7 @@ - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_noload, +--- linux-stage.orig/fs/ext3/super.c 2004-11-03 14:41:25.043719463 -0500 ++++ linux-stage/fs/ext3/super.c 2004-11-03 14:41:25.129694535 -0500 +@@ -534,7 +534,7 @@ + Opt_reservation, Opt_noreservation, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_ignore, Opt_barrier, @@ -441,7 +441,7 @@ Index: linux-stage/fs/ext3/super.c {Opt_err, NULL} }; -@@ -772,6 +775,18 @@ +@@ -778,6 +781,18 @@ else clear_opt(sbi->s_mount_opt, BARRIER); break; diff --git a/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 b/lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7.patch similarity index 100% rename from lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7 rename to lustre/kernel_patches/patches/jbd-static-wbuf-2.6.7.patch diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch index 023159c..77d5b30 100644 --- a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch +++ b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-suse.patch @@ -1,11 +1,7 @@ -.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c -.new.........fs/nfs/dir.c -.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c -.new.........fs/nfs/dir.c -Index: linux-2.6.4-51.0/fs/nfs/dir.c +Index: linux-2.6.5-7.108/fs/nfs/dir.c =================================================================== ---- linux-2.6.4-51.0.orig/fs/nfs/dir.c 2004-04-05 17:09:16.000000000 -0400 -+++ linux-2.6.4-51.0/fs/nfs/dir.c 2004-04-05 17:09:23.000000000 -0400 +--- linux-2.6.5-7.108.orig/fs/nfs/dir.c 2004-09-15 19:26:43.012732408 +0300 ++++ linux-2.6.5-7.108/fs/nfs/dir.c 2004-09-15 20:03:32.882781096 +0300 @@ -782,7 +782,7 @@ if (nd->flags & LOOKUP_DIRECTORY) return 0; @@ -51,10 +47,10 @@ Index: linux-2.6.4-51.0/fs/nfs/dir.c if (openflags & O_CREAT) { /* If this is a negative dentry, just drop it */ if (!inode) -Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c +Index: linux-2.6.5-7.108/fs/nfs/nfs4proc.c =================================================================== ---- linux-2.6.4-51.0.orig/fs/nfs/nfs4proc.c 2004-04-05 12:41:59.000000000 -0400 -+++ linux-2.6.4-51.0/fs/nfs/nfs4proc.c 2004-04-05 17:09:23.000000000 -0400 +--- linux-2.6.5-7.108.orig/fs/nfs/nfs4proc.c 2004-04-04 06:37:39.000000000 +0300 ++++ linux-2.6.5-7.108/fs/nfs/nfs4proc.c 2004-09-15 20:03:32.885780640 +0300 @@ -792,17 +792,17 @@ struct nfs4_state *state; @@ -76,22 +72,23 @@ Index: linux-2.6.4-51.0/fs/nfs/nfs4proc.c put_rpccred(cred); if (IS_ERR(state)) return (struct inode *)state; -Index: linux-2.6.4-51.0/fs/cifs/dir.c +Index: linux-2.6.5-7.108/fs/cifs/dir.c =================================================================== ---- linux-2.6.4-51.0.orig/fs/cifs/dir.c 2004-04-05 12:41:59.000000000 -0400 -+++ linux-2.6.4-51.0/fs/cifs/dir.c 2004-04-05 17:13:47.000000000 -0400 -@@ -146,22 +146,22 @@ - if(nd) { - cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path)); +--- linux-2.6.5-7.108.orig/fs/cifs/dir.c 2004-09-04 13:28:22.000000000 +0300 ++++ linux-2.6.5-7.108/fs/cifs/dir.c 2004-09-15 20:03:40.065689128 +0300 +@@ -173,23 +173,23 @@ + } + if(nd) { - if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) + if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) desiredAccess = GENERIC_READ; -- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) -+ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) +- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) { ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) { desiredAccess = GENERIC_WRITE; -- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { -+ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) { + write_only = TRUE; +- } else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) { ++ } else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) { /* GENERIC_ALL is too much permission to request */ /* can cause unnecessary access denied on create */ /* desiredAccess = GENERIC_ALL; */ @@ -109,7 +106,7 @@ Index: linux-2.6.4-51.0/fs/cifs/dir.c disposition = FILE_OPEN_IF; else { cFYI(1,("Create flag not set in create function")); -@@ -319,7 +319,7 @@ +@@ -359,7 +359,7 @@ parent_dir_inode, direntry->d_name.name, direntry)); if(nd) { /* BB removeme */ diff --git a/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch b/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch deleted file mode 100644 index 9cdd51a..0000000 --- a/lustre/kernel_patches/patches/removepage-vanilla-2.6.5.patch +++ /dev/null @@ -1,31 +0,0 @@ - include/linux/fs.h | 1 + - mm/filemap.c | 3 +++ - 2 files changed, 4 insertions(+) - -Index: linux-2.6.4-30.1/include/linux/fs.h -=================================================================== ---- linux-2.6.4-30.1.orig/include/linux/fs.h 2004-04-02 03:20:19.000000000 -0500 -+++ linux-2.6.4-30.1/include/linux/fs.h 2004-04-02 03:20:19.000000000 -0500 -@@ -320,6 +320,7 @@ - int (*releasepage) (struct page *, int); - int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); -+ void (*removepage)(struct page *); /* called when page gets removed from the inode */ - }; - - struct backing_dev_info; -Index: linux-2.6.4-30.1/mm/filemap.c -=================================================================== ---- linux-2.6.4-30.1.orig/mm/filemap.c 2004-04-02 03:19:42.000000000 -0500 -+++ linux-2.6.4-30.1/mm/filemap.c 2004-04-02 03:23:10.000000000 -0500 -@@ -102,6 +102,9 @@ - { - struct address_space *mapping = page->mapping; - -+ if (mapping->a_ops->removepage) -+ mapping->a_ops->removepage(page); -+ - radix_tree_delete(&mapping->page_tree, page->index); - list_del(&page->list); - page->mapping = NULL; - diff --git a/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch b/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch index f10484d..92ad3cc 100644 --- a/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch +++ b/lustre/kernel_patches/patches/uml-2.6.7-01-bb2.patch @@ -6423,7 +6423,7 @@ Index: uml-2.6.7/fs/hostfs/hostfs_kern.c =================================================================== --- uml-2.6.7.orig/fs/hostfs/hostfs_kern.c 2004-07-16 19:47:23.631218720 +0300 +++ uml-2.6.7/fs/hostfs/hostfs_kern.c 2004-07-16 19:47:24.263122656 +0300 -@@ -0,0 +1,1024 @@ +@@ -0,0 +1,1022 @@ +/* + * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch index 12436a7..c32be9d 100644 --- a/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-suse.patch @@ -101,12 +101,12 @@ Index: linux-2.6.5-12.1/fs/namei.c + struct dentry *dentry = nd->dentry; + int err, counter = 0; + ++ revalidate_again: + if (!dentry->d_op || !dentry->d_op->d_revalidate) + return 0; -+ revalidate_again: + if (!dentry->d_op->d_revalidate(dentry, nd)) { + struct dentry *new; -+ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd))) ++ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd))) + return err; + new = real_lookup(dentry->d_parent, &dentry->d_name, nd); + if (IS_ERR(new)) @@ -654,7 +654,7 @@ Index: linux-2.6.5-12.1/include/linux/fs.h spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -+ struct lookup_intent *f_it; ++ struct lookup_intent *f_it; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); diff --git a/lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch b/lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch new file mode 100644 index 0000000..3665cbb --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_lookup_in_file-2.6.patch @@ -0,0 +1,16 @@ +--- linux-2.6.7.orig/fs/namei.c 2005-04-01 11:14:26.000000000 +0300 ++++ linux-2.6.7/fs/namei.c 2005-04-01 11:23:01.748305104 +0300 +@@ -762,6 +762,13 @@ last_component: + inode = nd->dentry->d_inode; + /* fallthrough */ + case 1: ++ if (lookup_flags & LOOKUP_DIRECTORY) { ++ err = -ENOTDIR; ++ if (!nd->dentry->d_inode->i_op || ++ !nd->dentry->d_inode->i_op->lookup) { ++ goto return_err; ++ } ++ } + goto return_reval; + } + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch index 934dd77..cfd0db0 100644 --- a/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch +++ b/lustre/kernel_patches/patches/vfs_nointent-2.6-suse.patch @@ -47,22 +47,24 @@ Index: linux-2.6.5-12.1/fs/namei.c path_release(&nd); out: putname(tmp); -@@ -1626,10 +1637,18 @@ +@@ -1626,10 +1637,20 @@ struct dentry *dentry; struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent, IT_LOOKUP); error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; -+ if (nd.dentry->d_inode->i_op->mkdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir_raw(&nd, mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } ++ ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mkdir_raw(&nd, mode); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ dentry = lookup_create(&nd, 1); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { @@ -78,22 +80,24 @@ Index: linux-2.6.5-12.1/fs/namei.c char * name; struct dentry *dentry; struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent, IT_LOOKUP); name = getname(pathname); if(IS_ERR(name)) -@@ -1744,6 +1765,14 @@ +@@ -1744,6 +1765,16 @@ error = -EBUSY; goto exit1; } -+ if (nd.dentry->d_inode->i_op->rmdir_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; + -+ error = op->rmdir_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } ++ down(&nd.dentry->d_inode->i_sem); dentry = lookup_hash(&nd.last, nd.dentry); error = PTR_ERR(dentry); @@ -101,7 +105,7 @@ Index: linux-2.6.5-12.1/fs/namei.c struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; -+ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent, IT_LOOKUP); name = getname(pathname); if(IS_ERR(name)) @@ -109,13 +113,13 @@ Index: linux-2.6.5-12.1/fs/namei.c error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink_raw(&nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } down(&nd.dentry->d_inode->i_sem); dentry = lookup_hash(&nd.last, nd.dentry); error = PTR_ERR(dentry); @@ -123,7 +127,7 @@ Index: linux-2.6.5-12.1/fs/namei.c struct dentry *dentry; struct nameidata nd; -+ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent, IT_LOOKUP); error = path_lookup(to, LOOKUP_PARENT, &nd); if (error) @@ -150,8 +154,8 @@ Index: linux-2.6.5-12.1/fs/namei.c struct nameidata nd, old_nd; int error; char * to; -+ intent_init(&nd.intent, IT_LOOKUP); -+ intent_init(&old_nd.intent, IT_LOOKUP); ++ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&old_nd.intent, IT_LOOKUP); to = getname(newname); if (IS_ERR(to)) @@ -159,40 +163,22 @@ Index: linux-2.6.5-12.1/fs/namei.c error = -EXDEV; if (old_nd.mnt != nd.mnt) goto out_release; -+ if (nd.dentry->d_inode->i_op->link_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link_raw(&old_nd, &nd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } ++ if (nd.dentry->d_inode->i_op->link_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->link_raw(&old_nd, &nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out_release; ++ } new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { -@@ -2038,7 +2093,7 @@ - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - int error = 0; - struct inode *target; -@@ -2083,7 +2138,7 @@ - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry) - { - struct inode *target; - int error; @@ -2160,6 +2215,8 @@ struct dentry * old_dentry, *new_dentry; struct dentry * trap; struct nameidata oldnd, newnd; -+ intent_init(&oldnd.intent, IT_LOOKUP); -+ intent_init(&newnd.intent, IT_LOOKUP); ++ intent_init(&oldnd.intent, IT_LOOKUP); ++ intent_init(&newnd.intent, IT_LOOKUP); error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); if (error) @@ -200,12 +186,12 @@ Index: linux-2.6.5-12.1/fs/namei.c if (newnd.last_type != LAST_NORM) goto exit2; -+ if (old_dir->d_inode->i_op->rename_raw) { -+ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit2; -+ } ++ if (old_dir->d_inode->i_op->rename_raw) { ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } + trap = lock_rename(new_dir, old_dir); @@ -286,10 +272,10 @@ Index: linux-2.6.5-12.1/fs/open.c + if (error != -EOPNOTSUPP) + goto dput_and_out; + } else { -+ down(&inode->i_sem); -+ error = notify_change(nd.dentry, &newattrs); -+ up(&inode->i_sem); -+ } ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } dput_and_out: path_release(&nd); @@ -425,14 +411,6 @@ Index: linux-2.6.5-12.1/fs/open.c if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; newattrs.ia_valid = ATTR_CTIME; -@@ -723,6 +749,7 @@ - } - if (!S_ISDIR(inode->i_mode)) - newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; -+ - down(&inode->i_sem); - error = notify_change(dentry, &newattrs); - up(&inode->i_sem); Index: linux-2.6.5-12.1/fs/exec.c =================================================================== --- linux-2.6.5-12.1.orig/fs/exec.c 2004-05-11 15:41:54.000000000 -0400 diff --git a/lustre/kernel_patches/series/2.6-vanilla.series b/lustre/kernel_patches/series/2.6-vanilla.series index b5f5e74d..f8dfd66 100644 --- a/lustre/kernel_patches/series/2.6-vanilla.series +++ b/lustre/kernel_patches/series/2.6-vanilla.series @@ -20,3 +20,4 @@ jbd-buffer-release-2.6.7.patch dev_read_only-2.6-suse.patch vfs_gns-2.6-vanilla.patch linux-2.6.7-CITI_NFS4_ALL-7-lsec.patch +vfs_lookup_in_file-2.6.patch diff --git a/lustre/kernel_patches/targets/.cvsignore b/lustre/kernel_patches/targets/.cvsignore new file mode 100644 index 0000000..ba141e8 --- /dev/null +++ b/lustre/kernel_patches/targets/.cvsignore @@ -0,0 +1 @@ +*.target diff --git a/lustre/kernel_patches/targets/2.6-suse.target.in b/lustre/kernel_patches/targets/2.6-suse.target.in new file mode 100644 index 0000000..9166d1b --- /dev/null +++ b/lustre/kernel_patches/targets/2.6-suse.target.in @@ -0,0 +1,27 @@ +lnxmaj="2.6.5" +lnxrel="SLES9_SP1_BRANCH_2004111114454891" + +KERNEL=linux-$lnxmaj-$lnxrel.tar.gz +# they include our patches +SERIES= +VERSION=$lnxmaj +EXTRA_VERSION="${lnxrel}_lustre.@VERSION@" +RHBUILD=0 +LINUX26=1 +SUSEBUILD=1 + +BASE_ARCHS="i686 ppc" +BIGMEM_ARCHS="" +BOOT_ARCHS="" +JENSEN_ARCHS="" +SMP_ARCHS="" +BIGSMP_ARCHS="i686 ppc" +UP_ARCHS="" +SRC_ARCHS="" + +for cc in gcc33 ; do + if which $cc >/dev/null 2>/dev/null ; then + export CC=$cc + break + fi +done diff --git a/lustre/kernel_patches/targets/2.6-suse.target b/lustre/kernel_patches/targets/2.6-vanilla.target.in similarity index 50% rename from lustre/kernel_patches/targets/2.6-suse.target rename to lustre/kernel_patches/targets/2.6-vanilla.target.in index d8b192b..00c05df 100644 --- a/lustre/kernel_patches/targets/2.6-suse.target +++ b/lustre/kernel_patches/targets/2.6-vanilla.target.in @@ -1,7 +1,9 @@ -KERNEL=linux-2.6.5-12.1.tar.gz -SERIES=2.6-suse -VERSION=2.6.5 -EXTRA_VERSION=12.1_lustre +lnxmaj=2.6.6 + +KERNEL=linux-$lnxmaj.tar.gz +SERIES=2.6-vanilla +VERSION=$lnxmaj +EXTRA_VERSION=lustre.@VERSION@ RHBUILD=0 BASE_ARCHS="" diff --git a/lustre/kernel_patches/targets/hp_pnnl-2.4.target b/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in similarity index 54% rename from lustre/kernel_patches/targets/hp_pnnl-2.4.target rename to lustre/kernel_patches/targets/hp_pnnl-2.4.target.in index 0d60be7..620e698 100644 --- a/lustre/kernel_patches/targets/hp_pnnl-2.4.target +++ b/lustre/kernel_patches/targets/hp_pnnl-2.4.target.in @@ -1,7 +1,10 @@ -KERNEL=linux-2.4.20-hp_pnnl.tar.gz +lnxmaj=2.4.20 +lnxrel=hp_pnnl + +KERNEL=linux-$lnxmaj-$lnxrel.tar.gz SERIES=hp-pnnl-2.4.20 -VERSION=2.4.20 -EXTRA_VERSION=hp_pnnl_lustre +VERSION=$lnxmaj +EXTRA_VERSION=$lnxrel_lustre.@VERSION@ RHBUILD=0 BASE_ARCHS="ia64" diff --git a/lustre/kernel_patches/targets/rh-2.4.target b/lustre/kernel_patches/targets/rh-2.4.target deleted file mode 100644 index f7e04c1..0000000 --- a/lustre/kernel_patches/targets/rh-2.4.target +++ /dev/null @@ -1,13 +0,0 @@ -KERNEL=linux-2.4.20-31.9.tar.gz -SERIES=rh-2.4.20 -VERSION=2.4.20 -EXTRA_VERSION=31.9_lustre -RHBUILD=1 - -BASE_ARCHS="i686" -BIGMEM_ARCHS="" -BOOT_ARCHS="" -JENSEN_ARCHS="" -SMP_ARCHS="i686" -UP_ARCHS="" -SRC_ARCHS="i686" diff --git a/lustre/kernel_patches/targets/rh-2.4.target.in b/lustre/kernel_patches/targets/rh-2.4.target.in new file mode 100644 index 0000000..fa9140d --- /dev/null +++ b/lustre/kernel_patches/targets/rh-2.4.target.in @@ -0,0 +1,23 @@ +lnxmaj="2.4.20" +lnxrel="31.9" + +KERNEL=linux-${lnxmaj}-${lnxrel}.tar.gz +SERIES=rh-2.4.20 +VERSION=$lnxmaj +EXTRA_VERSION=${lnxrel}_lustre.@VERSION@ +RHBUILD=1 + +BASE_ARCHS="i686" +BIGMEM_ARCHS="" +BOOT_ARCHS="" +JENSEN_ARCHS="" +SMP_ARCHS="i686" +UP_ARCHS="" + +# the modules in this kernel do not build with gcc 3 +for cc in i386-redhat-linux-gcc-2.96 gcc296 gcc ; do + if which $cc >/dev/null 2>/dev/null ; then + CC=$cc + break + fi +done diff --git a/lustre/kernel_patches/targets/rhel-2.4.target.in b/lustre/kernel_patches/targets/rhel-2.4.target.in new file mode 100644 index 0000000..df41ed0 --- /dev/null +++ b/lustre/kernel_patches/targets/rhel-2.4.target.in @@ -0,0 +1,23 @@ +lnxmaj="2.4.21" +lnxrel="20.EL" + +KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2 +SERIES=rhel-2.4.21 +VERSION=${lnxmaj} +EXTRA_VERSION=${lnxrel}_lustre.@VERSION@ +RHBUILD=1 + +BASE_ARCHS="i686 x86_64 ia64" +BIGMEM_ARCHS="" +BOOT_ARCHS="" +JENSEN_ARCHS="" +SMP_ARCHS="i686 x86_64 ia64" +UP_ARCHS="" + +# the modules in this kernel do not build with gcc 3.3 or 2.96 +for cc in gcc33 ; do + if which $cc >/dev/null 2>/dev/null ; then + export CC=$cc + break + fi +done diff --git a/lustre/kernel_patches/targets/suse-2.4.21-2.target b/lustre/kernel_patches/targets/suse-2.4.21-2.target.in similarity index 86% rename from lustre/kernel_patches/targets/suse-2.4.21-2.target rename to lustre/kernel_patches/targets/suse-2.4.21-2.target.in index c27c3de..245c085 100644 --- a/lustre/kernel_patches/targets/suse-2.4.21-2.target +++ b/lustre/kernel_patches/targets/suse-2.4.21-2.target.in @@ -1,7 +1,7 @@ KERNEL=linux-2.4.21-x86_64.tar.gz SERIES=suse-2.4.21-2 VERSION=2.4.21 -EXTRA_VERSION=lustre.1.2.1 +EXTRA_VERSION=lustre.@VERSION@ RHBUILD=0 BASE_ARCHS="x86_64" diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index 93f3411..f18c048 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -1,9 +1,16 @@ SERIES MNEMONIC COMMENT ARCH -chaos-2.4.18 linux-chaos-2.4.18 LLNL 2.4.18 chaos ~65 i386 +SUPPORTED KERNELS: +rhel-2.4.21 linux-2.4.21-20.3EL same as chaos-2.4.21 all +2.6-suse linux-2.6 SLES9 SP1 kernel all + +UNSUPPORTED KERNELS; BEING PHASED OUT; MAY BE MISSING CRITICAL BUG FIXES: hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml ia64 vanilla-2.4.20 linux-2.4.20 patch with uml-2.4.20-6 um chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8 i386 kgdb-2.5.73 linux-2.5.73 vanilla 2.5.73 with kgdb i386 bproc-2.4.20-hp-pnnl linux-2.4.20-hp4_pnnl9 hp-pnnl + bproc i386 suse-2.4.19 SUSE ES 8 +vanilla-2.4.24 linux-2.4.24 patch with uml-2.4.24-6 um +chaos-2.4.21 linux-chaos-2.4.21 same as rh-2.4.21-20.EL i386 +suse-2.4.21-jvn linux-2.4.21-241 sles8 2.4 kernel i386 diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c index 9940df1..11cd02d 100644 --- a/lustre/ldlm/l_lock.c +++ b/lustre/ldlm/l_lock.c @@ -123,6 +123,7 @@ void l_check_no_ns_lock(struct ldlm_namespace *ns) if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) { CERROR("namespace %s lock held illegally; tell phil\n", ns->ns_name); + portals_debug_dumpstack(NULL); next_msg = jiffies + 60 * HZ; } } diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 15f46e2..63fb58c 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -79,7 +79,6 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) mode, flags); LASSERT(list_empty(&lock->l_flock_waitq)); - list_del_init(&lock->l_res_link); if (flags == LDLM_FL_WAIT_NOREPROC) { /* client side - set a flag to prevent sending a CANCEL */ @@ -135,9 +134,9 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, int overlaps = 0; ENTRY; - CDEBUG(D_DLMTRACE, "flags %#x pid "LPU64" mode %u start "LPU64" end " - LPU64"\n", *flags, new->l_policy_data.l_flock.pid, mode, - req->l_policy_data.l_flock.start, + CDEBUG(D_DLMTRACE, "flags %#x pid %u mode %u start "LPU64" end " + LPU64"\n", *flags, (unsigned int)new->l_policy_data.l_flock.pid, + mode, req->l_policy_data.l_flock.start, req->l_policy_data.l_flock.end); *err = ELDLM_OK; @@ -412,7 +411,7 @@ restart: if (added) ldlm_flock_destroy(req, mode, *flags); - ldlm_resource_dump(res); + ldlm_resource_dump(D_OTHER, res); RETURN(LDLM_ITER_CONTINUE); } @@ -426,7 +425,6 @@ ldlm_flock_interrupted_wait(void *data) { struct ldlm_lock *lock; struct lustre_handle lockh; - int rc; ENTRY; lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; @@ -434,9 +432,12 @@ ldlm_flock_interrupted_wait(void *data) /* take lock off the deadlock detection waitq. */ list_del_init(&lock->l_flock_waitq); + /* client side - set flag to prevent lock from being put on lru list */ + lock->l_flags |= LDLM_FL_CBPENDING; + ldlm_lock_decref_internal(lock, lock->l_req_mode); ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); + ldlm_cli_cancel(&lockh); EXIT; } @@ -459,11 +460,6 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) LASSERT(flags != LDLM_FL_WAIT_NOREPROC); - if (flags == 0) { - wake_up(&lock->l_waitq); - RETURN(0); - } - if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV))) goto granted; @@ -472,7 +468,6 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) "sleeping"); ldlm_lock_dump(D_DLMTRACE, lock, 0); - fwd.fwd_lock = lock; obd = class_exp2obd(lock->l_conn_export); @@ -493,17 +488,12 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) ((lock->l_req_mode == lock->l_granted_mode) || lock->l_destroyed), &lwi); - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - RETURN(rc); - } - - LASSERT(!(lock->l_destroyed)); - + LDLM_DEBUG(lock, "client-side enqueue waking up: rc = %d", rc); + RETURN(rc); + granted: - LDLM_DEBUG(lock, "client-side enqueue waking up"); + LDLM_DEBUG(lock, "client-side enqueue granted"); ns = lock->l_resource->lr_namespace; l_lock(&ns->ns_lock); @@ -532,10 +522,13 @@ granted: getlk->fl_start = lock->l_policy_data.l_flock.start; getlk->fl_end = lock->l_policy_data.l_flock.end; } else { + int noreproc = LDLM_FL_WAIT_NOREPROC; + /* We need to reprocess the lock to do merges or splits * with existing locks owned by this process. */ - flags = LDLM_FL_WAIT_NOREPROC; - ldlm_process_flock_lock(lock, &flags, 1, &err); + ldlm_process_flock_lock(lock, &noreproc, 1, &err); + if (flags == 0) + wake_up(&lock->l_waitq); } l_unlock(&ns->ns_lock); RETURN(0); @@ -546,12 +539,12 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, { struct ldlm_namespace *ns; ENTRY; - + LASSERT(lock); LASSERT(flag == LDLM_CB_CANCELING); - + ns = lock->l_resource->lr_namespace; - + /* take lock off the deadlock detection waitq. */ l_lock(&ns->ns_lock); list_del_init(&lock->l_flock_waitq); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index d1b8914..d87e551 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -259,7 +259,6 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT; } - rc = ldlm_get_ref(); if (rc) { CERROR("ldlm_get_ref failed: %d\n", rc); @@ -523,7 +522,14 @@ int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, RETURN(0); } -static char nidstr[PTL_NALFMT_SIZE]; +static inline int ptlrpc_peer_is_local(struct ptlrpc_peer *peer) +{ + ptl_process_id_t myid; + + PtlGetId(peer->peer_ni->pni_ni_h, &myid); + return (memcmp(&peer->peer_id, &myid, sizeof(myid)) == 0); +} + int target_handle_connect(struct ptlrpc_request *req) { unsigned long connect_flags = 0, *cfp; @@ -553,13 +559,12 @@ int target_handle_connect(struct ptlrpc_request *req) obd_str2uuid (&tgtuuid, str); target = class_uuid2obd(&tgtuuid); - if (!target) { + if (!target) target = class_name2obd(str); - } if (!target || target->obd_stopping || !target->obd_set_up) { - CERROR("UUID '%s' is not available for connect from NID %s\n", - str, ptlrpc_peernid2str(&req->rq_peer, nidstr)); + CERROR("UUID '%s' is not available for connect from %s\n", + str, req->rq_peerstr); GOTO(out, rc = -ENODEV); } @@ -638,8 +643,7 @@ int target_handle_connect(struct ptlrpc_request *req) ptlrpc_peernid2str(&req->rq_peer, peer_str), export, atomic_read(&export->exp_rpc_count)); GOTO(out, rc = -EBUSY); - } - else if (req->rq_reqmsg->conn_cnt == 1 && !initial_conn) { + } else if (req->rq_reqmsg->conn_cnt == 1 && !initial_conn) { CERROR("%s reconnected with 1 conn_cnt; cookies not random?\n", cluuid.uuid); GOTO(out, rc = -EALREADY); @@ -650,15 +654,18 @@ int target_handle_connect(struct ptlrpc_request *req) CWARN("%s: connection from %s@%s/%lu %s\n", target->obd_name, cluuid.uuid, ptlrpc_peernid2str(&req->rq_peer, peer_str), *cfp, target->obd_recovering ? "(recovering)" : ""); + if (target->obd_recovering) { lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING); target_start_recovery_timer(target); } + #if 0 /* Tell the client if we support replayable requests */ if (target->obd_replayable) lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE); #endif + if (export == NULL) { if (target->obd_recovering) { CERROR("%s denying connection for new client %s@%s: " @@ -717,6 +724,15 @@ int target_handle_connect(struct ptlrpc_request *req) if (lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_LIBCLIENT) export->exp_libclient = 1; + if (!(lustre_msg_get_op_flags(req->rq_reqmsg) & MSG_CONNECT_ASYNC) && + ptlrpc_peer_is_local(&req->rq_peer)) { + CWARN("%s: exp %p set sync\n", target->obd_name, export); + export->exp_sync = 1; + } else { + CDEBUG(D_HA, "%s: exp %p set async\n",target->obd_name,export); + export->exp_sync = 0; + } + if (export->exp_connection != NULL) ptlrpc_put_connection(export->exp_connection); export->exp_connection = ptlrpc_get_connection(&req->rq_peer, @@ -728,12 +744,11 @@ int target_handle_connect(struct ptlrpc_request *req) GOTO(out, rc = 0); } - if (target->obd_recovering) { + if (target->obd_recovering) target->obd_connected_clients++; - } - memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn), - sizeof conn); + memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof(conn)), + sizeof(conn)); if (export->exp_imp_reverse != NULL) { /* same logic as client_obd_cleanup */ @@ -838,6 +853,7 @@ ptlrpc_clone_req( struct ptlrpc_request *orig_req) return copy_req; } + void ptlrpc_free_clone( struct ptlrpc_request *req) { if (req->rq_svcsec) @@ -849,8 +865,6 @@ void ptlrpc_free_clone( struct ptlrpc_request *req) OBD_FREE(req, sizeof *req); } - - static void target_release_saved_req(struct ptlrpc_request *req) { if (req->rq_svcsec) @@ -861,7 +875,6 @@ static void target_release_saved_req(struct ptlrpc_request *req) OBD_FREE(req, sizeof *req); } -#ifdef __KERNEL__ static void target_finish_recovery(struct obd_device *obd) { struct list_head *tmp, *n; @@ -917,7 +930,6 @@ static void abort_recovery_queue(struct obd_device *obd) target_release_saved_req(req); } } -#endif /* Called from a cleanup function if the device is being cleaned up forcefully. The exports should all have been disconnected already, @@ -955,14 +967,13 @@ void target_cleanup_recovery(struct obd_device *obd) list_del(&req->rq_list); LASSERT (req->rq_reply_state == 0); target_release_saved_req(req); - } + } } -#ifdef __KERNEL__ static void target_abort_recovery(void *data) { struct obd_device *obd = data; - + LASSERT(!obd->obd_recovering); class_disconnect_stale_exports(obd, 0); @@ -974,7 +985,6 @@ static void target_abort_recovery(void *data) target_finish_recovery(obd); ptlrpc_run_recovery_over_upcall(obd); } -#endif static void target_recovery_expired(unsigned long castmeharder) { @@ -1260,14 +1270,12 @@ int target_queue_recovery_request(struct ptlrpc_request *req, } } - if (!inserted) { + if (!inserted) list_add_tail(&req->rq_list, &obd->obd_recovery_queue); - } obd->obd_requests_queued_for_recovery++; wake_up(&obd->obd_next_transno_waitq); spin_unlock_bh(&obd->obd_processing_task_lock); - return 0; } diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 9f863b4..cbabea0 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -162,6 +162,7 @@ void ldlm_lock_remove_from_lru(struct ldlm_lock *lock) ENTRY; l_lock(&lock->l_resource->lr_namespace->ns_lock); if (!list_empty(&lock->l_lru)) { + LASSERT(lock->l_resource->lr_type != LDLM_FLOCK); list_del_init(&lock->l_lru); lock->l_resource->lr_namespace->ns_nr_unused--; LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0); @@ -443,9 +444,9 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode) if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP)) lock->l_writers++; lock->l_last_used = jiffies; - l_unlock(&lock->l_resource->lr_namespace->ns_lock); LDLM_LOCK_GET(lock); LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); } void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) @@ -453,9 +454,10 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) struct ldlm_namespace *ns; ENTRY; - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); ns = lock->l_resource->lr_namespace; + l_lock(&ns->ns_lock); + LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); if (mode & (LCK_NL | LCK_CR | LCK_PR)) { LASSERT(lock->l_readers > 0); lock->l_readers--; @@ -607,7 +609,7 @@ static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode, policy->l_inodebits.bits)) continue; - if (lock->l_destroyed) + if (lock->l_destroyed || (lock->l_flags & LDLM_FL_FAILED)) continue; if ((flags & LDLM_FL_LOCAL_ONLY) && @@ -701,10 +703,15 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, ldlm_lock2handle(lock, lockh); if (!(lock->l_flags & LDLM_FL_CAN_MATCH)) { struct l_wait_info lwi; - if (lock->l_completion_ast) - lock->l_completion_ast(lock, - LDLM_FL_WAIT_NOREPROC, - NULL); + if (lock->l_completion_ast) { + int err = lock->l_completion_ast(lock, + LDLM_FL_WAIT_NOREPROC, + NULL); + if (err) { + rc = 0; + goto out2; + } + } lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL); @@ -713,20 +720,25 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, (lock->l_flags & LDLM_FL_CAN_MATCH), &lwi); } } - if (rc) + +out2: + if (rc) { + l_lock(&ns->ns_lock); LDLM_DEBUG(lock, "matched ("LPU64" "LPU64")", type == LDLM_PLAIN ? res_id->name[2] : policy->l_extent.start, type == LDLM_PLAIN ? res_id->name[3] : - policy->l_extent.end); - else if (!(flags & LDLM_FL_TEST_LOCK)) /* less verbose for test-only */ + policy->l_extent.end); + l_unlock(&ns->ns_lock); + } else if (!(flags & LDLM_FL_TEST_LOCK)) {/* less verbose for test-only */ LDLM_DEBUG_NOLOCK("not matched ns %p type %u mode %u res " LPU64"/"LPU64" ("LPU64" "LPU64")", ns, type, mode, res_id->name[0], res_id->name[1], type == LDLM_PLAIN ? res_id->name[2] : - policy->l_extent.start, + policy->l_extent.start, type == LDLM_PLAIN ? res_id->name[3] : - policy->l_extent.end); + policy->l_extent.end); + } if (old_lock) LDLM_LOCK_PUT(old_lock); @@ -773,6 +785,7 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, lock->l_blocking_ast = blocking; lock->l_completion_ast = completion; lock->l_glimpse_ast = glimpse; + lock->l_pid = current->pid; if (lvb_len) { lock->l_lvb_len = lvb_len; @@ -1039,15 +1052,12 @@ void ldlm_lock_cancel(struct ldlm_lock *lock) struct ldlm_namespace *ns; ENTRY; - /* There's no race between calling this and taking the ns lock below; - * a lock can only be put on the waiting list once, because it can only - * issue a blocking AST once. */ - ldlm_del_waiting_lock(lock); - res = lock->l_resource; ns = res->lr_namespace; l_lock(&ns->ns_lock); + ldlm_del_waiting_lock(lock); + /* Please do not, no matter how tempting, remove this LBUG without * talking to me first. -phik */ if (lock->l_readers || lock->l_writers) { @@ -1112,7 +1122,7 @@ struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, *flags |= LDLM_FL_BLOCK_GRANTED; RETURN(lock->l_resource); } - + LASSERTF(new_mode == LCK_PW && lock->l_granted_mode == LCK_PR, "new_mode %u, granted %u\n", new_mode, lock->l_granted_mode); @@ -1181,9 +1191,9 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos) return; } - CDEBUG(level, " -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d)\n", + CDEBUG(level, " -- Lock dump: %p/"LPX64" (rc: %d) (pos: %d) (pid: %d)\n", lock, lock->l_handle.h_cookie, atomic_read(&lock->l_refc), - pos); + pos, lock->l_pid); if (lock->l_conn_export != NULL) obd = lock->l_conn_export->exp_obd; if (lock->l_export && lock->l_export->exp_connection) { diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 5999373..0a5d6a1 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -176,6 +176,9 @@ static void waiting_locks_callback(unsigned long unused) struct ldlm_lock *lock; char str[PTL_NALFMT_SIZE]; + if (obd_dump_on_timeout) + portals_debug_dumplog(); + spin_lock_bh(&waiting_locks_spinlock); while (!list_empty(&waiting_locks_list)) { lock = list_entry(waiting_locks_list.next, struct ldlm_lock, @@ -314,6 +317,9 @@ static void ldlm_failed_ast(struct ldlm_lock *lock, int rc,const char *ast_type) " (%s)", ast_type, rc, lock->l_export->exp_client_uuid.uuid, conn->c_remote_uuid.uuid, conn->c_peer.peer_id.nid, ptlrpc_peernid2str(&conn->c_peer, str)); + + if (obd_dump_on_timeout) + portals_debug_dumplog(); ptlrpc_fail_export(lock->l_export); } @@ -333,7 +339,9 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock, ldlm_lock_cancel(lock); rc = -ERESTART; } else { + l_lock(&lock->l_resource->lr_namespace->ns_lock); ldlm_del_waiting_lock(lock); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); ldlm_failed_ast(lock, rc, ast_type); } } else if (rc) { @@ -376,8 +384,8 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, if (lock->l_granted_mode != lock->l_req_mode) { /* this blocking AST will be communicated as part of the * completion AST instead */ + LDLM_DEBUG(lock, "lock not granted, not sending blocking AST"); l_unlock(&lock->l_resource->lr_namespace->ns_lock); - LDLM_DEBUG(lock, "lock not granted, not sending blocking AST"); RETURN(0); } @@ -527,7 +535,9 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) sizeof(body->lock_handle1)); ldlm_lock2desc(lock, &body->lock_desc); + down(&lock->l_resource->lr_lvb_sem); size = lock->l_resource->lr_lvb_len; + up(&lock->l_resource->lr_lvb_sem); req->rq_replen = lustre_msg_size(1, &size); req->rq_send_state = LUSTRE_IMP_FULL; @@ -545,6 +555,27 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) RETURN(rc); } +static struct ldlm_lock * +find_existing_lock(struct obd_export *exp, struct lustre_handle *remote_hdl) +{ + struct obd_device *obd = exp->exp_obd; + struct list_head *iter; + + l_lock(&obd->obd_namespace->ns_lock); + list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) { + struct ldlm_lock *lock; + lock = list_entry(iter, struct ldlm_lock, l_export_chain); + if (lock->l_remote_handle.cookie == remote_hdl->cookie) { + LDLM_LOCK_GET(lock); + l_unlock(&obd->obd_namespace->ns_lock); + return lock; + } + } + l_unlock(&obd->obd_namespace->ns_lock); + return NULL; +} + + int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback completion_callback, ldlm_blocking_callback blocking_callback, @@ -572,6 +603,18 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, flags = dlm_req->lock_flags; + LASSERT(req->rq_export); + + if (flags & LDLM_FL_REPLAY) { + lock = find_existing_lock(req->rq_export, + &dlm_req->lock_handle1); + if (lock != NULL) { + DEBUG_REQ(D_HA, req, "found existing lock cookie "LPX64, + lock->l_handle.h_cookie); + GOTO(existing_lock, rc = 0); + } + } + /* The lock's callback data might be set in the policy function */ lock = ldlm_lock_create(obddev->obd_namespace, &dlm_req->lock_handle2, dlm_req->lock_desc.l_resource.lr_name, @@ -601,6 +644,8 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, &lock->l_export->exp_ldlm_data.led_held_locks); l_unlock(&lock->l_resource->lr_namespace->ns_lock); +existing_lock: + if (flags & LDLM_FL_HAS_INTENT) { /* In this case, the reply buffer is allocated deep in * local_lock_enqueue by the policy function. */ @@ -661,8 +706,10 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this * ldlm_reprocess_all. If this moves, revisit that code. -phil */ if (lock) { + l_lock(&lock->l_resource->lr_namespace->ns_lock); LDLM_DEBUG(lock, "server-side enqueue handler, sending reply" "(err=%d, rc=%d)", err, rc); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); if (rc == 0) { down(&lock->l_resource->lr_lvb_sem); @@ -679,7 +726,6 @@ int ldlm_handle_enqueue(struct ptlrpc_request *req, up(&lock->l_resource->lr_lvb_sem); } else { ldlm_lock_destroy(lock); - } if (!err && dlm_req->lock_desc.l_resource.lr_type != LDLM_FLOCK) @@ -720,14 +766,18 @@ int ldlm_handle_convert(struct ptlrpc_request *req) req->rq_status = EINVAL; } else { void *res = NULL; - + + l_lock(&lock->l_resource->lr_namespace->ns_lock); LDLM_DEBUG(lock, "server-side convert handler START"); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); + res = ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode, &dlm_rep->lock_flags); - if (res) { + l_lock(&lock->l_resource->lr_namespace->ns_lock); if (ldlm_del_waiting_lock(lock)) CDEBUG(D_DLMTRACE, "converted waiting lock %p\n", lock); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); req->rq_status = 0; } else { req->rq_status = EDEADLOCK; @@ -735,8 +785,11 @@ int ldlm_handle_convert(struct ptlrpc_request *req) } if (lock) { - ldlm_reprocess_all(lock->l_resource); + if (!req->rq_status) + ldlm_reprocess_all(lock->l_resource); + l_lock(&lock->l_resource->lr_namespace->ns_lock); LDLM_DEBUG(lock, "server-side convert handler END"); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); LDLM_LOCK_PUT(lock); } else LDLM_DEBUG_NOLOCK("server-side convert handler END"); @@ -749,7 +802,6 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) struct ldlm_request *dlm_req; struct ldlm_lock *lock; struct ldlm_resource *res; - char str[PTL_NALFMT_SIZE]; int rc; ENTRY; @@ -769,10 +821,10 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) lock = ldlm_handle2lock(&dlm_req->lock_handle1); if (!lock) { CERROR("received cancel for unknown lock cookie "LPX64 - " from client %s nid %s\n", + " from client %s id %s\n", dlm_req->lock_handle1.cookie, req->rq_export->exp_client_uuid.uuid, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock " "(cookie "LPU64")", dlm_req->lock_handle1.cookie); @@ -787,9 +839,11 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) //(res, req->rq_reqmsg, 1); } + l_lock(&res->lr_namespace->ns_lock); ldlm_lock_cancel(lock); if (ldlm_del_waiting_lock(lock)) CDEBUG(D_DLMTRACE, "cancelled waiting lock %p\n", lock); + l_unlock(&res->lr_namespace->ns_lock); req->rq_status = rc; } @@ -798,7 +852,9 @@ int ldlm_handle_cancel(struct ptlrpc_request *req) if (lock) { ldlm_reprocess_all(lock->l_resource); + l_lock(&lock->l_resource->lr_namespace->ns_lock); LDLM_DEBUG(lock, "server-side cancel handler END"); + l_unlock(&lock->l_resource->lr_namespace->ns_lock); LDLM_LOCK_PUT(lock); } @@ -1023,7 +1079,6 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) struct ldlm_namespace *ns; struct ldlm_request *dlm_req; struct ldlm_lock *lock; - char str[PTL_NALFMT_SIZE]; int rc; ENTRY; @@ -1041,11 +1096,11 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) if (req->rq_export == NULL) { struct ldlm_request *dlm_req; - CDEBUG(D_RPCTRACE, "operation %d from nid %s with bad " + CDEBUG(D_RPCTRACE, "operation %d from %s with bad " "export cookie "LPX64"; this is " "normal if this node rebooted with a lock held\n", req->rq_reqmsg->opc, - ptlrpc_peernid2str(&req->rq_peer, str), + req->rq_peerstr, req->rq_reqmsg->handle.cookie); dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), lustre_swab_ldlm_request); @@ -1179,11 +1234,9 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) if (req->rq_export == NULL) { struct ldlm_request *dlm_req; - char str[PTL_NALFMT_SIZE]; - CERROR("operation %d with bad export from NID %s\n", + CERROR("operation %d with bad export from %s\n", req->rq_reqmsg->opc, - ptlrpc_peernid2str(&req->rq_peer, str)); - + req->rq_peerstr); CERROR("--> export cookie: "LPX64"\n", req->rq_reqmsg->handle.cookie); dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), @@ -1335,7 +1388,7 @@ static int ldlm_setup(void) ldlm_state->ldlm_cb_service = ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - ldlm_callback_handler, "ldlm_cbd", + 1500, ldlm_callback_handler, "ldlm_cbd", ldlm_svc_proc_dir); if (!ldlm_state->ldlm_cb_service) { @@ -1346,7 +1399,7 @@ static int ldlm_setup(void) ldlm_state->ldlm_cancel_service = ptlrpc_init_svc(LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL, - LDLM_CANCEL_REPLY_PORTAL, + LDLM_CANCEL_REPLY_PORTAL, 30000, ldlm_cancel_handler, "ldlm_canceld", ldlm_svc_proc_dir); @@ -1439,7 +1492,7 @@ static int ldlm_cleanup(int force) if (!list_empty(&ldlm_namespace_list)) { CERROR("ldlm still has namespaces; clean these up first.\n"); - ldlm_dump_all_namespaces(); + ldlm_dump_all_namespaces(D_DLMTRACE); RETURN(-EBUSY); } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 14e0610..094ae033 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -48,15 +48,17 @@ int ldlm_expired_completion_wait(void *data) struct obd_device *obd; if (lock->l_conn_export == NULL) { - static unsigned long next_dump = 0; + static unsigned long next_dump = 0, last_dump = 0; LDLM_ERROR(lock, "lock timed out; not entering recovery in " "server code, just going back to sleep"); if (time_after(jiffies, next_dump)) { - ldlm_namespace_dump(lock->l_resource->lr_namespace); - if (next_dump == 0) - portals_debug_dumplog(); + last_dump = next_dump; next_dump = jiffies + 300 * HZ; + ldlm_namespace_dump(D_DLMTRACE, + lock->l_resource->lr_namespace); + if (last_dump == 0) + portals_debug_dumplog(); } RETURN(0); } @@ -213,6 +215,12 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, l_unlock(&ns->ns_lock); ldlm_lock_decref_and_cancel(lockh, mode); + + /* XXX - HACK because we shouldn't call ldlm_lock_destroy() + * from llite/file.c/ll_file_flock(). */ + if (lock->l_resource->lr_type == LDLM_FLOCK) { + ldlm_lock_destroy(lock); + } } int ldlm_cli_enqueue(struct obd_export *exp, @@ -341,11 +349,6 @@ int ldlm_cli_enqueue(struct obd_export *exp, GOTO(cleanup, rc = -EPROTO); } - /* XXX - Phil, wasn't sure if this should go before or after the - * lustre_swab_repbuf() ? If we can't unpack the reply then we - * don't know what occurred on the server so I think the safest - * bet is to cleanup the lock as if it didn't make it ? */ - /* lock enqueued on the server */ cleanup_phase = 1; @@ -416,10 +419,10 @@ int ldlm_cli_enqueue(struct obd_export *exp, rc = ldlm_lock_enqueue(ns, &lock, NULL, flags); if (lock->l_completion_ast != NULL) { int err = lock->l_completion_ast(lock, *flags, NULL); - if (!rc) { - cleanup_phase = 2; + if (!rc) rc = err; - } + if (rc) + cleanup_phase = 2; } } @@ -448,6 +451,8 @@ cleanup: static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode, int *flags) { + struct ldlm_resource *res; + int rc; ENTRY; if (lock->l_resource->lr_namespace->ns_client) { CERROR("Trying to cancel local lock\n"); @@ -455,23 +460,29 @@ static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode, } LDLM_DEBUG(lock, "client-side local convert"); - ldlm_lock_convert(lock, new_mode, flags); - ldlm_reprocess_all(lock->l_resource); - + res = ldlm_lock_convert(lock, new_mode, flags); + if (res) { + ldlm_reprocess_all(res); + rc = 0; + } else { + rc = EDEADLOCK; + } LDLM_DEBUG(lock, "client-side local convert handler END"); LDLM_LOCK_PUT(lock); - RETURN(0); + RETURN(rc); } /* FIXME: one of ldlm_cli_convert or the server side should reject attempted * conversion of locks which are on the waiting or converting queue */ +/* Caller of this code is supposed to take care of lock readers/writers + accounting */ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) { struct ldlm_request *body; struct ldlm_reply *reply; struct ldlm_lock *lock; struct ldlm_resource *res; - struct ptlrpc_request *req; + struct ptlrpc_request *req = NULL; int rc, size = sizeof(*body); ENTRY; @@ -513,13 +524,23 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) GOTO (out, rc = -EPROTO); } + if (req->rq_status) + GOTO(out, rc = req->rq_status); + res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags); - if (res != NULL) + if (res != NULL) { ldlm_reprocess_all(res); - /* Go to sleep until the lock is granted. */ - /* FIXME: or cancelled. */ - if (lock->l_completion_ast) - lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL); + /* Go to sleep until the lock is granted. */ + /* FIXME: or cancelled. */ + if (lock->l_completion_ast) { + rc = lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, + NULL); + if (rc) + GOTO(out, rc); + } + } else { + rc = EDEADLOCK; + } EXIT; out: LDLM_LOCK_PUT(lock); @@ -548,7 +569,7 @@ int ldlm_cli_cancel(struct lustre_handle *lockh) /* Set this flag to prevent others from getting new references*/ l_lock(&lock->l_resource->lr_namespace->ns_lock); lock->l_flags |= LDLM_FL_CBPENDING; - local_only = (lock->l_flags & LDLM_FL_LOCAL_ONLY); + local_only = lock->l_flags & LDLM_FL_LOCAL_ONLY; l_unlock(&lock->l_resource->lr_namespace->ns_lock); ldlm_cancel_callback(lock); @@ -902,7 +923,7 @@ void ldlm_change_cbdata(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, ENTRY; if (ns == NULL) { - CERROR("must pass in namespace"); + CERROR("must pass in namespace\n"); LBUG(); } diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 910fcf1..2051268 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -33,7 +33,7 @@ kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; -spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED; +DECLARE_MUTEX(ldlm_namespace_lock); struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list); struct proc_dir_entry *ldlm_type_proc_dir = NULL; struct proc_dir_entry *ldlm_ns_proc_dir = NULL; @@ -43,7 +43,7 @@ struct proc_dir_entry *ldlm_svc_proc_dir = NULL; static int ldlm_proc_dump_ns(struct file *file, const char *buffer, unsigned long count, void *data) { - ldlm_dump_all_namespaces(); + ldlm_dump_all_namespaces(D_DLMTRACE); RETURN(count); } @@ -252,9 +252,9 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) ns->ns_nr_unused = 0; ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; - spin_lock(&ldlm_namespace_lock); + down(&ldlm_namespace_lock); list_add(&ns->ns_list_chain, &ldlm_namespace_list); - spin_unlock(&ldlm_namespace_lock); + up(&ldlm_namespace_lock); #ifdef __KERNEL__ ldlm_proc_namespace(ns); #endif @@ -363,7 +363,7 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags) CERROR("Resource refcount nonzero (%d) after " "lock cleanup; forcing cleanup.\n", atomic_read(&res->lr_refcount)); - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); atomic_set(&res->lr_refcount, 1); ldlm_resource_putref(res); } @@ -380,10 +380,9 @@ int ldlm_namespace_free(struct ldlm_namespace *ns, int force) if (!ns) RETURN(ELDLM_OK); - spin_lock(&ldlm_namespace_lock); + down(&ldlm_namespace_lock); list_del(&ns->ns_list_chain); - - spin_unlock(&ldlm_namespace_lock); + up(&ldlm_namespace_lock); /* At shutdown time, don't call the cancellation callback */ ldlm_namespace_cleanup(ns, 0); @@ -429,10 +428,9 @@ static struct ldlm_resource *ldlm_resource_new(void) struct ldlm_resource *res; OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res); - if (res == NULL) { - LBUG(); + if (res == NULL) return NULL; - } + memset(res, 0, sizeof(*res)); INIT_LIST_HEAD(&res->lr_children); @@ -460,10 +458,8 @@ ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, "type: %d", type); res = ldlm_resource_new(); - if (!res) { - LBUG(); + if (!res) RETURN(NULL); - } spin_lock(&ns->ns_counter_lock); ns->ns_resources++; @@ -520,11 +516,13 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, } } - if (create) + if (create) { res = ldlm_resource_add(ns, parent, name, type); - else + if (res == NULL) + GOTO(out, NULL); + } else { res = NULL; - + } if (create && ns->ns_lvbo && ns->ns_lvbo->lvbo_init) { int rc; @@ -542,6 +540,7 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, CERROR("lvbo_init failed for resource "LPU64": rc %d\n", name.name[0], rc); } else { +out: l_unlock(&ns->ns_lock); } @@ -582,22 +581,22 @@ int ldlm_resource_putref(struct ldlm_resource *res) } if (!list_empty(&res->lr_granted)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } if (!list_empty(&res->lr_converting)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } if (!list_empty(&res->lr_waiting)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } if (!list_empty(&res->lr_children)) { - ldlm_resource_dump(res); + ldlm_resource_dump(D_ERROR, res); LBUG(); } @@ -628,7 +627,7 @@ void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, { l_lock(&res->lr_namespace->ns_lock); - ldlm_resource_dump(res); + ldlm_resource_dump(D_OTHER, res); CDEBUG(D_OTHER, "About to add this lock:\n"); ldlm_lock_dump(D_OTHER, lock, 0); @@ -651,7 +650,7 @@ void ldlm_resource_insert_lock_after(struct ldlm_lock *original, l_lock(&res->lr_namespace->ns_lock); - ldlm_resource_dump(res); + ldlm_resource_dump(D_OTHER, res); CDEBUG(D_OTHER, "About to insert this lock after %p:\n", original); ldlm_lock_dump(D_OTHER, new, 0); @@ -680,44 +679,44 @@ void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc) memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name)); } -void ldlm_dump_all_namespaces(void) +void ldlm_dump_all_namespaces(int level) { struct list_head *tmp; - spin_lock(&ldlm_namespace_lock); + down(&ldlm_namespace_lock); list_for_each(tmp, &ldlm_namespace_list) { struct ldlm_namespace *ns; ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain); - ldlm_namespace_dump(ns); + ldlm_namespace_dump(level, ns); } - spin_unlock(&ldlm_namespace_lock); + up(&ldlm_namespace_lock); } -void ldlm_namespace_dump(struct ldlm_namespace *ns) +void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) { struct list_head *tmp; - unsigned int debug_save = portal_debug; - portal_debug |= D_OTHER; - l_lock(&ns->ns_lock); - CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name, - ns->ns_refcount, ns->ns_client); + CDEBUG(level, "--- Namespace: %s (rc: %d, client: %d)\n", + ns->ns_name, ns->ns_refcount, ns->ns_client); - list_for_each(tmp, &ns->ns_root_list) { - struct ldlm_resource *res; - res = list_entry(tmp, struct ldlm_resource, lr_childof); + l_lock(&ns->ns_lock); + if (time_after(jiffies, ns->ns_next_dump)) { + list_for_each(tmp, &ns->ns_root_list) { + struct ldlm_resource *res; + res = list_entry(tmp, struct ldlm_resource, lr_childof); - /* Once we have resources with children, this should really dump - * them recursively. */ - ldlm_resource_dump(res); + /* Once we have resources with children, this should + * really dump them recursively. */ + ldlm_resource_dump(level, res); + } + ns->ns_next_dump = jiffies + 10 * HZ; } l_unlock(&ns->ns_lock); - portal_debug = debug_save; } -void ldlm_resource_dump(struct ldlm_resource *res) +void ldlm_resource_dump(int level, struct ldlm_resource *res) { struct list_head *tmp; int pos; @@ -725,36 +724,36 @@ void ldlm_resource_dump(struct ldlm_resource *res) if (RES_NAME_SIZE != 4) LBUG(); - CDEBUG(D_OTHER, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64 + CDEBUG(level, "--- Resource: %p ("LPU64"/"LPU64"/"LPU64"/"LPU64 ") (rc: %d)\n", res, res->lr_name.name[0], res->lr_name.name[1], res->lr_name.name[2], res->lr_name.name[3], atomic_read(&res->lr_refcount)); if (!list_empty(&res->lr_granted)) { pos = 0; - CDEBUG(D_OTHER, "Granted locks:\n"); + CDEBUG(level, "Granted locks:\n"); list_for_each(tmp, &res->lr_granted) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock, ++pos); + ldlm_lock_dump(level, lock, ++pos); } } if (!list_empty(&res->lr_converting)) { pos = 0; - CDEBUG(D_OTHER, "Converting locks:\n"); + CDEBUG(level, "Converting locks:\n"); list_for_each(tmp, &res->lr_converting) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock, ++pos); + ldlm_lock_dump(level, lock, ++pos); } } if (!list_empty(&res->lr_waiting)) { pos = 0; - CDEBUG(D_OTHER, "Waiting locks:\n"); + CDEBUG(level, "Waiting locks:\n"); list_for_each(tmp, &res->lr_waiting) { struct ldlm_lock *lock; lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock, ++pos); + ldlm_lock_dump(level, lock, ++pos); } } } diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c index 5381b5b..7a5e066 100644 --- a/lustre/ldlm/ldlm_test.c +++ b/lustre/ldlm/ldlm_test.c @@ -218,8 +218,7 @@ int ldlm_test_extents(struct obd_device *obddev) LBUG(); flags = 0; - lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL, - 0); + lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0); if (lock1 == NULL) LBUG(); err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL, @@ -230,8 +229,7 @@ int ldlm_test_extents(struct obd_device *obddev) LBUG(); flags = 0; - lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, - NULL, 0); + lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL,0); err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL, NULL); if (err != ELDLM_OK) diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am index 860d2ec..44fd10d 100644 --- a/lustre/liblustre/Makefile.am +++ b/lustre/liblustre/Makefile.am @@ -7,9 +7,9 @@ AM_CPPFLAGS = $(HAVE_EFENCE) -I$(SYSIO)/include -D_LARGEFILE64_SOURCE=1 \ $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals AM_CFLAGS = $(LLCFLAGS) -LIBS = $(LIBEFENCE) +AM_LIBS = $(LIBEFENCE) -LUSTRE_LIBS = liblutils.a libllite.a \ +LUSTRE_LIBS = libllite.a \ $(top_builddir)/lustre/lov/liblov.a \ $(top_builddir)/lustre/obdecho/libobdecho.a \ $(top_builddir)/lustre/osc/libosc.a \ @@ -25,14 +25,14 @@ PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \ $(top_builddir)/portals/portals/libportals.a else PTL_LIBS = $(top_builddir)/portals/utils/libuptlctl.a \ - $(CRAY_PORTALS_PATH)/lib_TV/snos64/libportals.a + $(CRAY_PORTALS_LIBS)/libportals.a endif SYSIO_LIBS = $(SYSIO)/lib/libsysio.a if LIBLUSTRE lib_LIBRARIES = liblustre.a -noinst_LIBRARIES = liblutils.a libllite.a +noinst_LIBRARIES = libllite.a install-exec-hook: liblustre.so @$(NORMAL_INSTALL) @@ -48,16 +48,14 @@ else install-exec-hook: endif -liblutils_a_SOURCES = lutil.c lutil.h - libllite_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c \ - llite_lib.h + lutil.c lutil.h llite_lib.h # for make rpms -- need cleanup liblustre_a_SOURCES = llite_lib.c super.c namei.c rw.c file.c dir.c liblustre.a : $(LUSTRE_LIBS) $(PTL_LIBS) $(SYSIO_LIBS) - sh $(srcdir)/genlib.sh $(SYSIO) $(CRAY_PORTALS_PATH) + sh $(srcdir)/genlib.sh "$(SYSIO)" "$(CRAY_PORTALS_LIBS)" "$(LIBS)" EXTRA_DIST = genlib.sh diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index 7e1d7dd..f685beb 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -33,16 +33,31 @@ #include #include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD +#ifdef HAVE_LINUX_TYPES_H #include +#elif defined(HAVE_SYS_TYPES_H) +#include +#endif + +#ifdef HAVE_LINUX_UNISTD_H #include +#elif defined(HAVE_UNISTD_H) +#include +#endif + #include #include "llite_lib.h" diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index e393198..b2fa8f4 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -31,12 +31,16 @@ #include #include +#ifdef HAVE_XTIO_H #include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD diff --git a/lustre/liblustre/genlib.sh b/lustre/liblustre/genlib.sh index 04e27fe2..f5c489e 100755 --- a/lustre/liblustre/genlib.sh +++ b/lustre/liblustre/genlib.sh @@ -17,7 +17,8 @@ RANLIB=/usr/bin/ranlib CWD=`pwd` SYSIO=$1 -CRAY_PORTALS_PATH=$2 +CRAY_PORTALS_LIBS=$2 +LIBS=$3 if [ ! -f $SYSIO/lib/libsysio.a ]; then echo "ERROR: $SYSIO/lib/libsysio.a dosen't exist" @@ -52,7 +53,7 @@ build_sysio_obj_list() { _objs=`$AR -t $1` mkdir -p $sysio_tmp cd $sysio_tmp - $AR -x $1 + $AR -x ../$1 cd .. for _lib in $_objs; do ALL_OBJS=$ALL_OBJS"$sysio_tmp/$_lib "; @@ -76,7 +77,7 @@ build_cray_portals_obj_list() { } # lustre components libs -build_obj_list . liblutils.a +build_obj_list . libllite.a build_obj_list ../lov liblov.a build_obj_list ../obdecho libobdecho.a build_obj_list ../osc libosc.a @@ -101,11 +102,10 @@ rm -f $CWD/liblsupport.a $AR -cru $CWD/liblsupport.a $ALL_OBJS $RANLIB $CWD/liblsupport.a -# libllite should be at the beginning of obj list -prepend_obj_list . libllite.a - -# libsysio -build_sysio_obj_list $SYSIO/lib/libsysio.a +# if libsysio is already in our LIBS we don't need to link against it here +if $(echo "$LIBS" | grep -v -- "-lsysio" >/dev/null) ; then + build_sysio_obj_list $SYSIO/lib/libsysio.a +fi # create static lib lustre rm -f $CWD/liblustre.a diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index bf006fa..395eb3f 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -28,11 +28,25 @@ #include #include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif + +/* env variables */ +#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT" +#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET" +#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT" +#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE" +#define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK" +#define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS" +#define ENV_LUSTRE_NAL_NAME "LIBLUSTRE_NAL_NAME" #ifdef REDSTORM #define CSTART_INIT @@ -84,6 +98,7 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) struct llog_ctxt *ctxt; ptl_nid_t nid = 0; int nal, err, rc = 0; + char *nal_name; ENTRY; generate_random_uuid(uuid); @@ -94,9 +109,12 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) RETURN(-EINVAL); } - nal = ptl_name2nal(LIBLUSTRE_NAL_NAME); + nal_name = getenv(ENV_LUSTRE_NAL_NAME); + if (!nal_name) + nal_name = "tcp"; + nal = ptl_name2nal(nal_name); if (nal <= 0) { - CERROR("Can't parse NAL %s\n", LIBLUSTRE_NAL_NAME); + CERROR("Can't parse NAL %s\n", nal_name); RETURN(-EINVAL); } LCFG_INIT(lcfg, LCFG_ADD_UUID, name); @@ -125,7 +143,7 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) err = class_process_config(&lcfg); if (err < 0) GOTO(out_detach, err); - + obd = class_name2obd(name); if (obd == NULL) GOTO(out_cleanup, err = -EINVAL); @@ -141,9 +159,9 @@ int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov) g_zconf_mdsname, err); GOTO(out_cleanup, err); } - + exp = class_conn2export(&mdc_conn); - + ctxt = exp->exp_obd->obd_llog_ctxt[LLOG_CONFIG_REPL_CTXT]; rc = class_config_process_llog(ctxt, g_zconf_profile, cfg); if (rc) @@ -172,7 +190,7 @@ out_del_uuid: out: if (rc == 0) rc = err; - + RETURN(rc); } @@ -189,7 +207,7 @@ int ll_parse_mount_target(const char *target, char **mdsnid, if ((s = strchr(buf, ':'))) { *mdsnid = buf; *s = '\0'; - + while (*++s == '/') ; *mdsname = s; @@ -213,7 +231,7 @@ int ll_parse_mount_target(const char *target, char **mdsnid, * or in the apps themselves. The NAMESPACE_STRING specifying * the initial set of fs ops (creates, mounts, etc.) is passed * as an environment variable. - * + * * _sysio_init(); * _sysio_incore_init(); * _sysio_native_init(); @@ -242,22 +260,14 @@ int _sysio_lustre_init(void) err = lllib_init(); if (err) { perror("init llite driver"); - } + } return err; } -/* env variables */ -#define ENV_LUSTRE_MNTPNT "LIBLUSTRE_MOUNT_POINT" -#define ENV_LUSTRE_MNTTGT "LIBLUSTRE_MOUNT_TARGET" -#define ENV_LUSTRE_TIMEOUT "LIBLUSTRE_TIMEOUT" -#define ENV_LUSTRE_DUMPFILE "LIBLUSTRE_DUMPFILE" -#define ENV_LUSTRE_DEBUG_MASK "LIBLUSTRE_DEBUG_MASK" -#define ENV_LUSTRE_DEBUG_SUBSYS "LIBLUSTRE_DEBUG_SUBSYS" - extern int _sysio_native_init(); extern unsigned int obd_timeout; -static char *lustre_path = NULL; +char *lustre_path = NULL; /* global variables */ char *g_zconf_mdsname = NULL; /* mdsname, for zeroconf */ @@ -319,30 +329,31 @@ void __liblustre_setup_(void) portal_subsystem_debug = (unsigned int) strtol(debug_subsys, NULL, 0); -#ifndef CSTART_INIT + +#ifdef INIT_SYSIO /* initialize libsysio & mount rootfs */ - if (_sysio_init()) { - perror("init sysio"); - exit(1); - } + if (_sysio_init()) { + perror("init sysio"); + exit(1); + } _sysio_native_init(); - err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL); - if (err) { - perror(root_driver); - exit(1); - } + err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL); + if (err) { + perror(root_driver); + exit(1); + } if (_sysio_lustre_init()) - exit(1); -#endif + exit(1); +#endif /* INIT_SYSIO */ err = mount("/", lustre_path, lustre_driver, mntflgs, NULL); - if (err) { - errno = -err; - perror(lustre_driver); - exit(1); - } + if (err) { + errno = -err; + perror(lustre_driver); + exit(1); + } } void __liblustre_cleanup_(void) @@ -362,7 +373,15 @@ void __liblustre_cleanup_(void) * liblutre. this delima lead to another hack in * libsysio/src/file_hack.c FIXME */ +#ifdef INIT_SYSIO _sysio_shutdown(); cleanup_lib_portals(); PtlFini(); +#else + /* + * don't do any libsysio or low level portals cleanups + * platform framework does it + */ + cleanup_lib_portals(); +#endif } diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 9a3b6cc..0c457a3 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -42,8 +42,7 @@ struct ll_file_data { unsigned long fd_gid; }; -struct llu_sb_info -{ +struct llu_sb_info { struct obd_uuid ll_sb_uuid; struct obd_export *ll_md_exp; struct obd_export *ll_dt_exp; @@ -108,14 +107,11 @@ struct llu_inode_info { unsigned long lli_st_generation; }; -static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs) -{ - return (struct llu_sb_info*)(fs->fs_private); -} +#define llu_fs2sbi(fs) (struct llu_sb_info *)(fs)->fs_private static inline struct llu_inode_info *llu_i2info(struct inode *inode) { - return (struct llu_inode_info*)(inode->i_private); + return (struct llu_inode_info *)inode->i_private; } static inline struct llu_sb_info *llu_i2sbi(struct inode *inode) @@ -135,7 +131,7 @@ static inline struct obd_export *llu_i2mdexp(struct inode *inode) static inline int llu_is_root_inode(struct inode *inode) { - return (llu_i2info(inode)->lli_id.li_stc.u.e3s.l3s_ino == + return (id_ino(&llu_i2info(inode)->lli_id) == llu_i2info(inode)->lli_sbi->ll_rootino); } @@ -178,10 +174,14 @@ static inline void ll_inode2id(struct lustre_id *id, } typedef int (*intent_finish_cb)(struct ptlrpc_request *, - struct inode *parent, struct pnode *pnode, - struct lookup_intent *, int offset, obd_id ino); + struct inode *parent, + struct pnode *pnode, + struct lookup_intent *, + int offset, obd_id ino); + int llu_intent_lock(struct inode *parent, struct pnode *pnode, - struct lookup_intent *, int flags, intent_finish_cb); + struct lookup_intent *, int flags, + intent_finish_cb); static inline __u64 ll_file_maxbytes(struct inode *inode) { @@ -194,18 +194,21 @@ struct mount_option_s char *osc_uuid; }; -#define IS_BAD_PTR(ptr) \ +#define IS_BAD_PTR(ptr) \ ((unsigned long)(ptr) == 0 || (unsigned long)(ptr) > -1000UL) /* llite_lib.c */ void generate_random_uuid(unsigned char uuid_out[16]); -int liblustre_process_log(struct config_llog_instance *cfg, int allow_recov); + +int liblustre_process_log(struct config_llog_instance *cfg, + int allow_recov); + int ll_parse_mount_target(const char *target, char **mdsnid, char **mdsname, char **profile); -extern char *g_zconf_mdsnid; -extern char *g_zconf_mdsname; -extern char *g_zconf_profile; +extern char *g_zconf_mdsnid; +extern char *g_zconf_mdsname; +extern char *g_zconf_profile; extern struct mount_option_s mount_option; /* super.c */ diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index 0949b5d..92d9444 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -32,11 +32,16 @@ #include #include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD @@ -215,8 +220,8 @@ int llu_pb_revalidate(struct pnode *pnode, int flags, struct lookup_intent *it) int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%x\n", - pb->pb_name.name, it ? it->it_op : 0); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,intent=%x\n", + (int)pb->pb_name.len, pb->pb_name.name, it ? it->it_op : 0); /* We don't want to cache negative dentries, so return 0 immediately. * We believe that this is safe, that negative dentries cannot be diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c index 5de64a6..c6f4bd0 100644 --- a/lustre/liblustre/rw.c +++ b/lustre/liblustre/rw.c @@ -32,11 +32,15 @@ #include #include -#include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD @@ -308,14 +312,6 @@ struct ll_async_page { struct inode *llap_inode; }; -static struct ll_async_page *llap_from_cookie(void *cookie) -{ - struct ll_async_page *llap = cookie; - if (llap->llap_magic != LLAP_MAGIC) - return ERR_PTR(-EINVAL); - return llap; -}; - static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa) { struct ll_async_page *llap; @@ -324,12 +320,7 @@ static void llu_ap_fill_obdo(void *data, int cmd, struct obdo *oa) obd_valid valid_flags; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); inode = llap->llap_inode; lsm = llu_i2info(inode)->lli_smd; @@ -349,12 +340,7 @@ static void llu_ap_completion(void *data, int cmd, struct obdo *oa, int rc) struct ll_async_page *llap; struct page *page; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); llap->llap_queued = 0; page = llap->llap_page; @@ -507,9 +493,6 @@ void put_io_group(struct llu_io_group *group) OBD_FREE(group, LLU_IO_GROUP_SIZE(group->lig_maxpages)); } -void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm, - obd_off size); - static ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen, _SYSIO_OFF_T pos, ssize_t len, @@ -618,7 +601,7 @@ ssize_t llu_file_prwv(const struct iovec *iovec, int iovlen, pos += ret; if (!is_read) { LASSERT(ret == count); - lov_increase_kms(exp, lsm, pos); + obd_adjust_kms(exp, lsm, pos, 0); /* file size grow immediately */ if (pos > lli->lli_st_size) lli->lli_st_size = pos; diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 1962920..26a9720 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -38,11 +38,16 @@ # include #endif -#include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include +#include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD #include "llite_lib.h" @@ -84,7 +89,7 @@ static int ll_permission(struct inode *inode, int mask) static void llu_fsop_gone(struct filesys *fs) { - struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private; + struct llu_sb_info *sbi = (struct llu_sb_info *)fs->fs_private; struct obd_device *obd = class_exp2obd(sbi->ll_md_exp); struct lustre_cfg lcfg; int next = 0; @@ -181,8 +186,8 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_valid valid) valid &= src->o_valid; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %llx, cur time %lu/%lu, new %lu/%lu\n", - (unsigned long long)src->o_valid, + CDEBUG(D_INODE, "valid "LPX64", cur time %lu/%lu, new %lu/%lu\n", + src->o_valid, LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime), (long)src->o_mtime, (long)src->o_ctime); @@ -221,8 +226,8 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_valid valid) obd_valid newvalid = 0; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %llx, new time %lu/%lu\n", - (unsigned long long)valid, LTIME_S(lli->lli_st_mtime), + CDEBUG(D_INODE, "valid "LPX64", new time %lu/%lu\n", + valid, LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime)); if (valid & OBD_MD_FLATIME) { @@ -944,8 +949,9 @@ static int llu_iop_mknod_raw(struct pnode *pno, int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n", - pno->p_base->pb_name.name, llu_i2info(dir)->lli_st_ino); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu\n", + (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name, + llu_i2info(dir)->lli_st_ino); if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX) RETURN(err); @@ -1179,8 +1185,8 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) struct mdc_op_data op_data; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n", - name, lli->lli_st_ino, lli->lli_st_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%lu(%p)\n", + len, name, lli->lli_st_ino, lli->lli_st_generation, dir); if (lli->lli_st_nlink >= EXT2_LINK_MAX) RETURN(err); @@ -1204,8 +1210,8 @@ static int llu_iop_rmdir_raw(struct pnode *pno) struct llu_inode_info *lli = llu_i2info(dir); int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n", - name, lli->lli_st_ino, lli->lli_st_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%lu(%p)\n", + len, name, lli->lli_st_ino, lli->lli_st_generation, dir); llu_prepare_mdc_data(&op_data, dir, NULL, name, len, S_IFDIR); rc = mdc_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); @@ -1234,7 +1240,7 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) flags = va_arg(ap, long); flags &= FCNTL_FLMASK; if (flags & FCNTL_FLMASK_INVALID) { - CERROR("liblustre does not support O_NONBLOCK, O_ASYNC, " + CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, " "and O_DIRECT on file descriptor\n"); *rtn = -1; return EINVAL; @@ -1356,8 +1362,8 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) if ((md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) != (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) { - CERROR("bad md body valid mask 0x%llx\n", - (unsigned long long)md->body->valid); + CERROR("bad md body valid mask 0x"LPX64"\n", + md->body->valid); LBUG(); return ERR_PTR(-EPERM); } @@ -1477,9 +1483,8 @@ llu_fsswop_mount(const char *source, } obd_set_info(obd->obd_self_export, strlen("async"), "async", sizeof(async), &async); -#warning "FIXME ASAP!" #if 0 - if (mdc_init_ea_size(obd, lov)) + if (mdc_init_ea_size(obd, osc)) GOTO(out_free, err = -EINVAL); #endif /* setup mdc */ diff --git a/lustre/liblustre/tests/Makefile.am b/lustre/liblustre/tests/Makefile.am index ee16557..4b670c6 100644 --- a/lustre/liblustre/tests/Makefile.am +++ b/lustre/liblustre/tests/Makefile.am @@ -2,19 +2,25 @@ AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir)/portals/unals AM_CFLAGS = $(LLCFLAGS) -LIBS = $(LIBEFENCE) $(LIBREADLINE) +AM_LIBS = $(LIBEFENCE) $(LIBREADLINE) LLIB_EXEC= $(top_builddir)/lustre/liblustre/liblustre.a -lcap -lpthread if LIBLUSTRE noinst_LIBRARIES = libtestcommon.a -def_tests = echo_test sanity recovery_small replay_single replay_ost_single + +if LIBLUSTRE_TESTS +noinst_PROGRAMS = sanity recovery_small replay_single replay_ost_single + +if TESTS +noinst_PROGRAMS += echo_test +endif # TESTS if MPITESTS -noinst_PROGRAMS = $(def_tests) test_lock_cancel -else -noinst_PROGRAMS = $(def_tests) -endif +noinst_PROGRAMS += test_lock_cancel +endif # MPITESTS + +endif # LIBLUSTRE_TESTS endif # LIBLUSTRE libtestcommon_a_SOURCES = test_common.c test_common.h diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index 4223fac..3ec12e01 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -39,6 +39,7 @@ struct pingcli_args { int count; int size; }; + /* bug #4615 */ #if 0 char *portals_id2str(int nal, ptl_process_id_t id, char *str) diff --git a/lustre/liblustre/tests/replay_single.c b/lustre/liblustre/tests/replay_single.c index 235d330..9628354 100644 --- a/lustre/liblustre/tests/replay_single.c +++ b/lustre/liblustre/tests/replay_single.c @@ -92,9 +92,11 @@ static void mds_failover() void t0() { + char *path="/mnt/lustre/f0"; ENTRY("empty replay"); replay_barrier(); mds_failover(); + t_check_stat_fail("/mnt/lustre/f0"); LEAVE(); } diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index 15d16a1..fce471c 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -35,9 +35,13 @@ #include #include #include +#include +#include #include "test_common.h" +extern char *lustre_path; + #define ENTRY(str) \ do { \ char buf[100]; \ @@ -63,8 +67,10 @@ void t1() { - char *path="/mnt/lustre/test_t1"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("create/delete"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t1", lustre_path); t_touch(path); t_unlink(path); @@ -73,8 +79,10 @@ void t1() void t2() { - char *path="/mnt/lustre/test_t2"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("mkdir/rmdir"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t2", lustre_path); t_mkdir(path); t_rmdir(path); @@ -83,8 +91,10 @@ void t2() void t3() { - char *path="/mnt/lustre/test_t3"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("regular stat"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t3", lustre_path); t_touch(path); t_check_stat(path, NULL); @@ -94,8 +104,10 @@ void t3() void t4() { - char *path="/mnt/lustre/test_t4"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("dir stat"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t4", lustre_path); t_mkdir(path); t_check_stat(path, NULL); @@ -105,9 +117,12 @@ void t4() void t6() { - char *path="/mnt/lustre/test_t6"; - char *path2="/mnt/lustre/test_t6_link"; + char path[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; + ENTRY("symlink"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t6", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t6_link", lustre_path); t_touch(path); t_symlink(path, path2); @@ -119,9 +134,11 @@ void t6() void t7() { - char *path="/mnt/lustre/test_t7"; + char path[MAX_PATH_LENGTH] = ""; int rc; + ENTRY("mknod"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t7", lustre_path); if (geteuid() != 0) { rc = mknod(path, S_IFCHR | 0644, (5<<8 | 4)); @@ -139,8 +156,10 @@ void t7() void t8() { - char *path="/mnt/lustre/test_t8"; + char path[MAX_PATH_LENGTH] = ""; + ENTRY("chmod"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t8", lustre_path); t_touch(path); t_chmod_raw(path, 0700); @@ -151,9 +170,12 @@ void t8() void t9() { - char *path="/mnt/lustre/test_t9"; - char *path2="/mnt/lustre/test_t9_link"; + char path[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; + ENTRY("hard link"); + snprintf(path, MAX_PATH_LENGTH, "%s/test_t9", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t9_link", lustre_path); t_touch(path); t_link(path, path2); @@ -166,14 +188,22 @@ void t9() void t10() { - char *dir1="/mnt/lustre/test_t10_dir1"; - char *dir2="/mnt/lustre/test_t10_dir2"; - char *path1="/mnt/lustre/test_t10_reg1"; - char *path2="/mnt/lustre/test_t10_reg2"; - char *rename1="/mnt/lustre/test_t10_dir1/rename1"; - char *rename2="/mnt/lustre/test_t10_dir2/rename2"; - char *rename3="/mnt/lustre/test_t10_dir2/rename3"; + char dir1[MAX_PATH_LENGTH] = ""; + char dir2[MAX_PATH_LENGTH] = ""; + char path1[MAX_PATH_LENGTH] = ""; + char path2[MAX_PATH_LENGTH] = ""; + char rename1[MAX_PATH_LENGTH] = ""; + char rename2[MAX_PATH_LENGTH] = ""; + char rename3[MAX_PATH_LENGTH] = ""; + ENTRY("rename"); + snprintf(dir1, MAX_PATH_LENGTH, "%s/test_t10_dir1", lustre_path); + snprintf(dir2, MAX_PATH_LENGTH, "%s/test_t10_dir2", lustre_path); + snprintf(path1, MAX_PATH_LENGTH, "%s/test_t10_reg1", lustre_path); + snprintf(path2, MAX_PATH_LENGTH, "%s/test_t10_reg2", lustre_path); + snprintf(rename1, MAX_PATH_LENGTH, "%s/test_t10_dir1/rename1", lustre_path); + snprintf(rename2, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename2", lustre_path); + snprintf(rename3, MAX_PATH_LENGTH, "%s/test_t10_dir2/rename3", lustre_path); t_mkdir(dir1); t_mkdir(dir2); @@ -191,7 +221,7 @@ void t10() void t11() { - char *base="/mnt/lustre"; + char *base=lustre_path; char path[MAX_PATH_LENGTH], path2[MAX_PATH_LENGTH]; int i, j, level = 5, nreg = 5; ENTRY("deep tree"); @@ -227,10 +257,11 @@ void t11() void t12() { - char *dir="/mnt/lustre/test_t12_dir"; + char dir[MAX_PATH_LENGTH] = ""; char buf[1024*128]; int fd; ENTRY("empty directory readdir"); + snprintf(dir, MAX_PATH_LENGTH, "%s/test_t12_dir", lustre_path); t_mkdir(dir); fd = t_opendir(dir); @@ -242,13 +273,14 @@ void t12() void t13() { - char *dir="/mnt/lustre/test_t13_dir/"; + char dir[MAX_PATH_LENGTH] = ""; char name[1024]; char buf[1024]; const int nfiles = 20; char *prefix = "test13_filename_prefix_"; int fd, i; ENTRY("multiple entries directory readdir"); + snprintf(dir, MAX_PATH_LENGTH, "%s/test_t13_dir/", lustre_path); t_mkdir(dir); printf("Creating %d files...\n", nfiles); @@ -270,7 +302,7 @@ void t13() void t14() { - char *dir="/mnt/lustre/test_t14_dir/"; + char dir[MAX_PATH_LENGTH] = ""; char name[1024]; char buf[1024]; const int nfiles = 256; @@ -279,6 +311,7 @@ void t14() int fd, i, rc, pos, index; loff_t base = 0; ENTRY(">1 block(4k) directory readdir"); + snprintf(dir, MAX_PATH_LENGTH, "%s/test_t14_dir/", lustre_path); t_mkdir(dir); printf("Creating %d files...\n", nfiles); @@ -328,9 +361,10 @@ iter: void t15() { - char *file = "/mnt/lustre/test_t15_file"; + char file[MAX_PATH_LENGTH] = ""; int fd; ENTRY("open-stat-close"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t15_file", lustre_path); t_touch(file); fd = t_open(file); @@ -342,9 +376,9 @@ void t15() void t16() { - char *file = "/mnt/lustre/test_t16_file"; - int fd; + char file[MAX_PATH_LENGTH] = ""; ENTRY("small-write-read"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t16_file", lustre_path); t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaa"); t_grep(file, "aaaaaaaaaaaaaaaaaaaaaa"); @@ -354,9 +388,10 @@ void t16() void t17() { - char *file = "/mnt/lustre/test_t17_file"; + char file[MAX_PATH_LENGTH] = ""; int fd; ENTRY("open-unlink without close"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t17_file", lustre_path); fd = open(file, O_WRONLY | O_CREAT, 0666); if (fd < 0) { @@ -369,11 +404,12 @@ void t17() void t18() { - char *file = "/mnt/lustre/test_t18_file"; + char file[MAX_PATH_LENGTH] = ""; char buf[128]; int fd, i; struct stat statbuf[3]; ENTRY("write should change mtime/atime"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t18_file", lustre_path); for (i = 0; i < 3; i++) { fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666); @@ -390,7 +426,7 @@ void t18() printf("Error stat\n"); exit(1); } - printf("mtime %ld, ctime %d\n", + printf("mtime %lu, ctime %lu\n", statbuf[i].st_atime, statbuf[i].st_mtime); sleep(2); } @@ -403,8 +439,208 @@ void t18() } } t_unlink(file); + LEAVE(); } +void t19() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + struct stat statbuf; + ENTRY("open(O_TRUNC) should trancate file to 0-length"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t19_file", lustre_path); + + t_echo_create(file, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + + fd = open(file, O_RDWR|O_CREAT|O_TRUNC, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + close(fd); + if(stat(file, &statbuf) != 0) { + printf("Error stat\n"); + exit(1); + } + if (statbuf.st_size != 0) { + printf("size %ld is not zero\n", statbuf.st_size); + exit(-1); + } + t_unlink(file); + LEAVE(); +} + +void t20() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + struct iovec iov[2]; + char buf[100]; + ssize_t ret; + ENTRY("trap app's general bad pointer for file i/o"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t20_file", lustre_path); + + fd = open(file, O_RDWR|O_CREAT, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + + ret = write(fd, NULL, 20); + if (ret != -1 || errno != EFAULT) { + printf("write 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + ret = write(fd, (void *)-1, 20); + if (ret != -1 || errno != EFAULT) { + printf("write 2: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 10; + iov[1].iov_base = (void *)-1; + iov[1].iov_len = 10; + ret = writev(fd, iov, 2); + if (ret != -1 || errno != EFAULT) { + printf("writev 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 0; + iov[1].iov_base = buf; + iov[1].iov_len = sizeof(buf); + ret = writev(fd, iov, 2); + if (ret != sizeof(buf)) { + printf("write 3 ret %ld, error %d\n", ret, errno); + exit(1); + } + lseek(fd, 0, SEEK_SET); + + ret = read(fd, NULL, 20); + if (ret != -1 || errno != EFAULT) { + printf("read 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + ret = read(fd, (void *)-1, 20); + if (ret != -1 || errno != EFAULT) { + printf("read 2: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 10; + iov[1].iov_base = (void *)-1; + iov[1].iov_len = 10; + ret = readv(fd, iov, 2); + if (ret != -1 || errno != EFAULT) { + printf("readv 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + iov[0].iov_base = NULL; + iov[0].iov_len = 0; + iov[1].iov_base = buf; + iov[1].iov_len = sizeof(buf); + ret = readv(fd, iov, 2); + if (ret != sizeof(buf)) { + printf("read 3 ret %ld, error %d\n", ret, errno); + exit(1); + } + + close(fd); + t_unlink(file); + LEAVE(); +} + +void t21() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd, ret; + ENTRY("basic fcntl support"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t21_file", lustre_path); + + fd = open(file, O_RDWR|O_CREAT, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + if (fcntl(fd, F_SETFL, O_APPEND)) { + printf("error set flag: %s\n", strerror(errno)); + exit(-1); + } + if ((ret = fcntl(fd, F_GETFL)) != O_APPEND) { + printf("error get flag: ret %x\n", ret); + exit(-1); + } + + close(fd); + t_unlink(file); + LEAVE(); +} + +void t22() +{ + char file[MAX_PATH_LENGTH] = ""; + int fd; + char *str = "1234567890"; + char buf[100]; + ssize_t ret; + ENTRY("make sure O_APPEND take effect"); + snprintf(file, MAX_PATH_LENGTH, "%s/test_t22_file", lustre_path); + + fd = open(file, O_RDWR|O_CREAT|O_APPEND, (mode_t)0666); + if (fd < 0) { + printf("error open file: %s\n", strerror(errno)); + exit(-1); + } + + lseek(fd, 100, SEEK_SET); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + printf("write 1: ret %ld, errno %d\n", ret, errno); + exit(1); + } + + lseek(fd, 0, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret != strlen(str)) { + printf("read 1 got %ld\n", ret); + exit(1); + } + + if (memcmp(buf, str, strlen(str))) { + printf("read 1 data err\n"); + exit(1); + } + + if (fcntl(fd, F_SETFL, 0)) { + printf("fcntl err: %s\n", strerror(errno)); + exit(1); + } + + lseek(fd, 100, SEEK_SET); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + printf("write 2: ret %ld, errno %d\n", ret, errno); + exit(1); + } + + lseek(fd, 100, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret != strlen(str)) { + printf("read 2 got %ld\n", ret); + exit(1); + } + + if (memcmp(buf, str, strlen(str))) { + printf("read 2 data err\n"); + exit(1); + } + + close(fd); + t_unlink(file); + LEAVE(); +} + + #define PAGE_SIZE (4096) #define _npages (2048) @@ -415,12 +651,14 @@ static int _buffer[_npages][PAGE_SIZE/sizeof(int)]; */ static void pages_io(int xfer, loff_t pos) { - char *path="/mnt/lustre/test_t50"; + char path[MAX_PATH_LENGTH] = ""; + int check_sum[_npages] = {0,}; - int fd, rc, i, j; + int fd, rc, i, j, data_error = 0; struct timeval tw1, tw2, tr1, tr2; double tw, tr; + snprintf(path, MAX_PATH_LENGTH, "%s/test_t50", lustre_path); memset(_buffer, 0, sizeof(_buffer)); /* create sample data */ @@ -474,6 +712,7 @@ static void pages_io(int xfer, loff_t pos) sum += _buffer[i][j]; } if (sum != check_sum[i]) { + data_error = 1; printf("chunk %d checksum error: expected 0x%x, get 0x%x\n", i, check_sum[i], sum); } @@ -486,12 +725,15 @@ static void pages_io(int xfer, loff_t pos) printf(" (R:%.3fM/s, W:%.3fM/s)\n", (_npages * PAGE_SIZE) / (tw / 1000000.0) / (1024 * 1024), (_npages * PAGE_SIZE) / (tr / 1000000.0) / (1024 * 1024)); + + if (data_error) + exit(1); } void t50() { - char text[256]; - loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191, 1024*1024*1024*1024ULL}; + loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191, + 1024*1024*1024*1024ULL}; int np = 1, i; loff_t offset = 0; @@ -576,6 +818,10 @@ int main(int argc, char * const argv[]) t16(); t17(); t18(); + t19(); + t20(); + t21(); + t22(); t50(); printf("liblustre is about shutdown\n"); diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index b8a6d0a..beb5f53 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -53,6 +53,34 @@ static void ll_release(struct dentry *de) EXIT; } +/* Compare if two dentries are the same. Don't match if the existing dentry + * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same. + * + * This avoids a race where ll_lookup_it() instantiates a dentry, but we get + * an AST before calling d_revalidate_it(). The dentry still exists (marked + * INVALID) so d_lookup() matches it, but we have no lock on it (so + * lock_match() fails) and we spin around real_lookup(). */ +static int ll_dcompare(struct dentry *parent, struct qstr *d_name, + struct qstr *name){ + struct dentry *dchild; + ENTRY; + + if (d_name->len != name->len) + RETURN(1); + + if (memcmp(d_name->name, name->name, name->len)) + RETURN(1); + + dchild = container_of(d_name, struct dentry, d_name); /* ugh */ + if (dchild->d_flags & DCACHE_LUSTRE_INVALID) { + CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n", + dchild); + RETURN(1); + } + + RETURN(0); +} + /* should NOT be called with the dcache lock, see fs/dcache.c */ static int ll_ddelete(struct dentry *de) { @@ -71,7 +99,7 @@ void ll_set_dd(struct dentry *de) ENTRY; LASSERT(de != NULL); - CDEBUG(D_DENTRY, "ldd on dentry %*s (%p) parent %p inode %p refc %d\n", + CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n", de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, atomic_read(&de->d_count)); lock_kernel(); @@ -128,6 +156,7 @@ void ll_unhash_aliases(struct inode *inode) if (inode == NULL) { CERROR("unexpected NULL inode, tell phil\n"); + EXIT; return; } @@ -142,7 +171,7 @@ restart: while ((tmp = tmp->next) != head) { struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); if (atomic_read(&dentry->d_count) == 0) { - CDEBUG(D_DENTRY, "deleting dentry %*s (%p) parent %p " + CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p " "inode %p\n", dentry->d_name.len, dentry->d_name.name, dentry, dentry->d_parent, dentry->d_inode); @@ -152,7 +181,7 @@ restart: dput(dentry); goto restart; } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) { - CDEBUG(D_DENTRY, "unhashing dentry %*s (%p) parent %p " + CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p " "inode %p refc %d\n", dentry->d_name.len, dentry->d_name.name, dentry, dentry->d_parent, dentry->d_inode, atomic_read(&dentry->d_count)); @@ -265,8 +294,8 @@ int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd, { struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; struct ptlrpc_request *req = NULL; - struct it_cb_data icbd; struct obd_export *exp; + struct it_cb_data icbd; struct lustre_id pid; struct lustre_id cid; int orig_it, rc = 0; @@ -324,6 +353,7 @@ int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd, if (nd != NULL) nd->mnt->mnt_last_used = jiffies; + OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5); orig_it = it ? it->it_op : IT_OPEN; ll_frob_intent(&it, &lookup_it); LASSERT(it != NULL); @@ -362,11 +392,68 @@ int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd, ll_intent_release(&lookup_it); } +#if 1 + if ((it->it_op == IT_OPEN) && de->d_inode) { + struct inode *inode = de->d_inode; + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_client_handle **och_p; + __u64 *och_usecount; + struct obd_device *obddev; + struct lustre_handle lockh; + int flags = LDLM_FL_BLOCK_GRANTED; + ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}}; + struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id), + id_group(&lli->lli_id)}}; + int lockmode; + + if (it->it_flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + lockmode = LCK_CW; + } else if (it->it_flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + lockmode = LCK_PR; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + lockmode = LCK_CR; + } + + /* Check for the proper lock */ + obddev = md_get_real_obd(exp, &lli->lli_id); + if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id, + LDLM_IBITS, &policy, lockmode, &lockh)) + goto do_lock; + down(&lli->lli_och_sem); + if (*och_p) { /* Everything is open already, do nothing */ + /*(*och_usecount)++; Do not let them steal our open + handle from under us */ + /* XXX The code above was my original idea, but in case + we have the handle, but we cannot use it due to later + checks (e.g. O_CREAT|O_EXCL flags set), nobody + would decrement counter increased here. So we just + hope the lock won't be invalidated in between. But + if it would be, we'll reopen the open request to + MDS later during file open path */ + up(&lli->lli_och_sem); + memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh, + sizeof(lockh)); + LUSTRE_IT(it)->it_lock_mode = lockmode; + RETURN(1); + } else { + /* Hm, interesting. Lock is present, but no open + handle? */ + up(&lli->lli_och_sem); + ldlm_lock_decref(&lockh, lockmode); + } + } +#endif + +do_lock: rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len, - NULL, 0, &cid, it, flags, &req, - ll_mdc_blocking_ast); - - /* If req is NULL, then mdc_intent_lock only tried to do a lock match; + NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast); + /* If req is NULL, then md_intent_lock() only tried to do a lock match; * if all was well, it will return 1 if it found locks, 0 otherwise. */ if (req == NULL && rc >= 0) { if (!rc) @@ -409,13 +496,14 @@ out: ptlrpc_req_finished(req); } ll_unhash_aliases(de->d_inode); - return rc; + return 0; } CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p " "inode %p refc %d\n", de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode, atomic_read(&de->d_count)); + ll_lookup_finish_locks(it, de); de->d_flags &= ~DCACHE_LUSTRE_INVALID; if (it == &lookup_it) @@ -594,6 +682,7 @@ struct dentry_operations ll_d_ops = { .d_release = ll_release, .d_iput = ll_dentry_iput, .d_delete = ll_ddelete, + .d_compare = ll_dcompare, #if 0 .d_pin = ll_pin, .d_unpin = ll_unpin, diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index fa9a335..1f25e82 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -228,7 +228,7 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) - RETURN(ERR_PTR(-ENOMEM)); + return ERR_PTR(-ENOMEM); ll_prepare_mdc_data(op_data, dir, NULL, NULL, 0, 0); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index e13260c..3b75acf 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -34,25 +34,32 @@ #include "llite_internal.h" #include -#define XATTR_NAME_MAX 255 -int ll_md_close(struct obd_export *md_exp, struct inode *inode, - struct file *file) +int ll_md_och_close(struct obd_export *md_exp, struct inode *inode, + struct obd_client_handle *och) { - struct ll_file_data *fd = file->private_data; struct ptlrpc_request *req = NULL; - struct obd_client_handle *och = &fd->fd_mds_och; struct obdo *obdo = NULL; + struct obd_device *obd; int rc; ENTRY; - /* clear group lock, if present */ - if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); - rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, - &fd->fd_cwlockh); + obd = class_exp2obd(md_exp); + if (obd == NULL) { + CERROR("Invalid MDC connection handle "LPX64"\n", + md_exp->exp_handle.h_cookie); + EXIT; + return 0; } + /* + * here we check if this is forced umount. If so this is called on + * canceling "open lock" and we do not call md_close() in this case , as + * it will not successful, as import is already deactivated. + */ + if (obd->obd_no_recov) + GOTO(out, rc = 0); + + /* closing opened file */ obdo = obdo_alloc(); if (obdo == NULL) RETURN(-ENOMEM); @@ -67,29 +74,140 @@ int ll_md_close(struct obd_export *md_exp, struct inode *inode, obdo->o_flags = MDS_BFLAG_UNCOMMITTED_WRITES; obdo->o_valid |= OBD_MD_FLFLAGS; } + obdo->o_fid = id_fid(&ll_i2info(inode)->lli_id); obdo->o_mds = id_group(&ll_i2info(inode)->lli_id); rc = md_close(md_exp, obdo, och, &req); obdo_free(obdo); if (rc == EAGAIN) { - /* We are the last writer, so the MDS has instructed us to get - * the file size and any write cookies, then close again. */ + /* + * we are the last writer, so the MDS has instructed us to get + * the file size and any write cookies, then close again. + */ + //ll_queue_done_writing(inode); rc = 0; } else if (rc) { CERROR("inode %lu mdc close failed: rc = %d\n", - inode->i_ino, rc); + (unsigned long)inode->i_ino, rc); } + + /* objects are destroed on OST only if metadata close was + * successful.*/ if (rc == 0) { - rc = ll_objects_destroy(req, file->f_dentry->d_inode, 1); + rc = ll_objects_destroy(req, inode, 1); if (rc) CERROR("inode %lu ll_objects destroy: rc = %d\n", inode->i_ino, rc); } - mdc_clear_open_replay_data(md_exp, och); ptlrpc_req_finished(req); + EXIT; +out: + mdc_clear_open_replay_data(md_exp, och); och->och_fh.cookie = DEAD_HANDLE_MAGIC; + OBD_FREE(och, sizeof *och); + return rc; +} + +int ll_md_real_close(struct obd_export *md_exp, + struct inode *inode, int flags) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct obd_client_handle **och_p; + struct obd_client_handle *och; + __u64 *och_usecount; + int rc = 0; + ENTRY; + + if (flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else if (flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + + down(&lli->lli_och_sem); + if (*och_usecount) { /* There are still users of this handle, so + skip freeing it. */ + up(&lli->lli_och_sem); + RETURN(0); + } + och = *och_p; + + *och_p = NULL; + up(&lli->lli_och_sem); + + /* + * there might be a race and somebody have freed this och + * already. Another way to have this twice called is if file closing + * will fail due to netwok problems and on umount lock will be canceled + * and this will be called from block_ast callack. + */ + if (och && och->och_fh.cookie != DEAD_HANDLE_MAGIC) + rc = ll_md_och_close(md_exp, inode, och); + + RETURN(rc); +} + +int ll_md_close(struct obd_export *md_exp, struct inode *inode, + struct file *file) +{ + struct ll_file_data *fd = file->private_data; + struct ll_inode_info *lli = ll_i2info(inode); + int rc = 0; + ENTRY; + + /* clear group lock, if present */ + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, + &fd->fd_cwlockh); + } + + /* Let's see if we have good enough OPEN lock on the file and if + we can skip talking to MDS */ + if (file->f_dentry->d_inode) { + int lockmode; + struct obd_device *obddev; + struct lustre_handle lockh; + int flags = LDLM_FL_BLOCK_GRANTED; + struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id), + id_group(&lli->lli_id)}}; + ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_OPEN}}; + + down(&lli->lli_och_sem); + if (fd->fd_omode & FMODE_WRITE) { + lockmode = LCK_CW; + LASSERT(lli->lli_open_fd_write_count); + lli->lli_open_fd_write_count--; + } else if (fd->fd_omode & FMODE_EXEC) { + lockmode = LCK_PR; + LASSERT(lli->lli_open_fd_exec_count); + lli->lli_open_fd_exec_count--; + } else { + lockmode = LCK_CR; + LASSERT(lli->lli_open_fd_read_count); + lli->lli_open_fd_read_count--; + } + up(&lli->lli_och_sem); + + obddev = md_get_real_obd(md_exp, &lli->lli_id); + if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id, + LDLM_IBITS, &policy, lockmode, &lockh)) + { + rc = ll_md_real_close(md_exp, file->f_dentry->d_inode, + fd->fd_omode); + } else { + ldlm_lock_decref(&lockh, lockmode); + } + } + file->private_data = NULL; OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof(*fd)); RETURN(rc); @@ -139,7 +257,9 @@ static int ll_intent_file_open(struct file *file, void *lmm, OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, parent->d_inode, NULL, name, len, O_RDWR); + + ll_prepare_mdc_data(op_data, parent->d_inode, NULL, + name, len, O_RDWR); rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PR, op_data, &lockh, lmm, lmmsize, ldlm_completion_ast, @@ -153,24 +273,38 @@ static int ll_intent_file_open(struct file *file, void *lmm, } else if (rc < 0) { CERROR("lock enqueue: err: %d\n", rc); } - RETURN(rc); } -int ll_local_open(struct file *file, struct lookup_intent *it) +void ll_och_fill(struct inode *inode, struct lookup_intent *it, + struct obd_client_handle *och) { struct ptlrpc_request *req = LUSTRE_IT(it)->it_data; - struct ll_inode_info *lli = ll_i2info(file->f_dentry->d_inode); - struct obd_export *md_exp = ll_i2mdexp(file->f_dentry->d_inode); - struct ll_file_data *fd; + struct ll_inode_info *lli = ll_i2info(inode); struct mds_body *body; - ENTRY; + LASSERT(och); body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED (req, 1); /* and swabbed down */ + LASSERT (body != NULL); /* reply already checked out */ + LASSERT_REPSWABBED (req, 1); /* and swabbed down */ + + memcpy(&och->och_fh, &body->handle, sizeof(body->handle)); + och->och_magic = OBD_CLIENT_HANDLE_MAGIC; + lli->lli_io_epoch = body->io_epoch; + mdc_set_open_replay_data(ll_i2mdexp(inode), och, + LUSTRE_IT(it)->it_data); +} + +int ll_local_open(struct file *file, struct lookup_intent *it, + struct obd_client_handle *och) +{ + struct ll_file_data *fd; + ENTRY; - LASSERTF(file->private_data == NULL, "file %*s/%*s ino %lu/%u (%o)\n", + if (och) + ll_och_fill(file->f_dentry->d_inode, it, och); + + LASSERTF(file->private_data == NULL, "file %.*s/%.*s ino %lu/%u (%o)\n", file->f_dentry->d_name.len, file->f_dentry->d_name.name, file->f_dentry->d_parent->d_name.len, file->f_dentry->d_parent->d_name.name, @@ -178,22 +312,15 @@ int ll_local_open(struct file *file, struct lookup_intent *it) file->f_dentry->d_inode->i_generation, file->f_dentry->d_inode->i_mode); - OBD_SLAB_ALLOC(fd, ll_file_data_slab, SLAB_KERNEL, sizeof *fd); /* We can't handle this well without reorganizing ll_file_open and * ll_md_close(), so don't even try right now. */ LASSERT(fd != NULL); - memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); - fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; file->private_data = fd; ll_readahead_init(file->f_dentry->d_inode, &fd->fd_ras); - - lli->lli_io_epoch = body->io_epoch; - - mdc_set_open_replay_data(md_exp, &fd->fd_mds_och, LUSTRE_IT(it)->it_data); - + fd->fd_omode = it->it_flags; RETURN(0); } @@ -220,38 +347,117 @@ int ll_file_open(struct inode *inode, struct file *file) struct lov_stripe_md *lsm; struct ptlrpc_request *req; int rc = 0; + struct obd_client_handle **och_p; + __u64 *och_usecount; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", inode->i_ino, - inode->i_generation, inode, file->f_flags); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p), flags %o\n", + inode->i_ino, inode->i_generation, inode, file->f_flags); /* don't do anything for / */ if (inode->i_sb->s_root == file->f_dentry) RETURN(0); + if ((file->f_flags+1) & O_ACCMODE) + oit.it_flags++; + if (file->f_flags & O_TRUNC) + oit.it_flags |= 2; + it = file->f_it; - if (!it || !LUSTRE_IT(it) || !LUSTRE_IT(it)->it_disposition) { + /* + * sometimes LUSTRE_IT(it) may not be allocated like opening file by + * dentry_open() from GNS stuff. + */ + if (!it || !LUSTRE_IT(it)) { it = &oit; rc = ll_intent_alloc(it); if (rc) GOTO(out, rc); - rc = ll_intent_file_open(file, NULL, 0, it); - if (rc) - GOTO(out, rc); } - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); - /* mdc_intent_lock() didn't get a request ref if there was an open - * error, so don't do cleanup on the request here (bug 3430) */ - rc = it_open_error(DISP_OPEN_OPEN, it); - if (rc) - RETURN(rc); + + /* + * mdc_intent_lock() didn't get a request ref if there was an open + * error, so don't do cleanup on the * request here (bug 3430) + */ + if (LUSTRE_IT(it)->it_disposition) { + rc = it_open_error(DISP_OPEN_OPEN, it); + if (rc) + RETURN(rc); + } + + /* Let's see if we have file open on MDS already. */ + if (it->it_flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else if (it->it_flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + + down(&lli->lli_och_sem); + if (*och_p) { /* Open handle is present */ + if (LUSTRE_IT(it)->it_disposition) { + struct obd_client_handle *och; + /* Well, there's extra open request that we do not need, + let's close it somehow*/ + OBD_ALLOC(och, sizeof (struct obd_client_handle)); + if (!och) { + up(&lli->lli_och_sem); + RETURN(-ENOMEM); + } - rc = ll_local_open(file, it); + ll_och_fill(inode, it, och); + /* ll_md_och_close() will free och */ + ll_md_och_close(ll_i2mdexp(inode), inode, och); + } + (*och_usecount)++; + + rc = ll_local_open(file, it, NULL); + if (rc) + LBUG(); + } else { + LASSERT(*och_usecount == 0); + OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); + if (!*och_p) + GOTO(out, rc = -ENOMEM); + (*och_usecount)++; + + if (!it || !LUSTRE_IT(it) || !LUSTRE_IT(it)->it_disposition) { + /* We are going to replace intent here, and that may + possibly change access mode (FMODE_EXEC can only be + set in intent), but I hope it never happens (I was + not able to trigger it yet at least) -- green */ + /* FIXME: FMODE_EXEC is not covered by O_ACCMODE! */ + LASSERT(!(it->it_flags & FMODE_EXEC)); + LASSERTF((it->it_flags & O_ACCMODE) == + (oit.it_flags & O_ACCMODE), "Changing intent " + "flags %x to incompatible %x\n",it->it_flags, + oit.it_flags); + it = &oit; + rc = ll_intent_file_open(file, NULL, 0, it); + if (rc) + GOTO(out, rc); + rc = it_open_error(DISP_OPEN_OPEN, it); + if (rc) + GOTO(out_och_free, rc); - LASSERTF(rc == 0, "rc = %d\n", rc); + mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, + file->f_dentry->d_inode); + } + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); + rc = ll_local_open(file, it, *och_p); + LASSERTF(rc == 0, "rc = %d\n", rc); + } + up(&lli->lli_och_sem); + /* Must do this outside lli_och_sem lock to prevent deadlock where + different kind of OPEN lock for this same inode gets cancelled + by ldlm_cancel_lru */ if (!S_ISREG(inode->i_mode)) GOTO(out, rc); @@ -268,11 +474,21 @@ int ll_file_open(struct inode *inode, struct file *file) GOTO(out, rc); out: req = LUSTRE_IT(it)->it_data; + ll_intent_drop_lock(it); ll_intent_release(it); - ptlrpc_req_finished(req); - if (rc == 0) + if (rc == 0) { ll_open_complete(inode); + } else { +out_och_free: + if (*och_p) { + OBD_FREE(*och_p, sizeof (struct obd_client_handle)); + *och_p = NULL; /* OBD_FREE writes some magic there */ + (*och_usecount)--; + } + up(&lli->lli_och_sem); + } + return rc; } @@ -424,6 +640,7 @@ void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, for (i = start; i <= end; i += (j + skip)) { j = min(count - (i % count), end - i + 1); + LASSERT(j > 0); LASSERT(inode->i_mapping); if (ll_teardown_mmaps(inode->i_mapping, i << PAGE_CACHE_SHIFT, ((i+j) << PAGE_CACHE_SHIFT) - 1) ) @@ -545,13 +762,8 @@ static int ll_extent_lock_callback(struct ldlm_lock *lock, goto iput; ll_pgcache_remove_extent(inode, lsm, lock, stripe); - /* grabbing the i_sem will wait for write() to complete. ns - * lock hold times should be very short as ast processing - * requires them and has a short timeout. so, i_sem before ns - * lock.*/ - - down(&inode->i_sem); l_lock(&lock->l_resource->lr_namespace->ns_lock); + down(&lli->lli_size_sem); kms = ldlm_extent_shift_kms(lock, lsm->lsm_oinfo[stripe].loi_kms); @@ -559,8 +771,8 @@ static int ll_extent_lock_callback(struct ldlm_lock *lock, LDLM_DEBUG(lock, "updating kms from "LPU64" to "LPU64, lsm->lsm_oinfo[stripe].loi_kms, kms); lsm->lsm_oinfo[stripe].loi_kms = kms; + up(&lli->lli_size_sem); l_unlock(&lock->l_resource->lr_namespace->ns_lock); - up(&inode->i_sem); //ll_try_done_writing(inode); iput: iput(inode); @@ -661,6 +873,9 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp) lvb = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*lvb)); lvb->lvb_size = lli->lli_smd->lsm_oinfo[stripe].loi_kms; + lvb->lvb_mtime = LTIME_S(inode->i_mtime); + lvb->lvb_atime = LTIME_S(inode->i_atime); + lvb->lvb_ctime = LTIME_S(inode->i_ctime); LDLM_DEBUG(lock, "i_size: %llu -> stripe number %u -> kms "LPU64, inode->i_size, stripe, lvb->lvb_size); @@ -707,12 +922,17 @@ int ll_glimpse_size(struct inode *inode) RETURN(rc > 0 ? -EIO : rc); } + down(&lli->lli_size_sem); inode->i_size = lov_merge_size(lli->lli_smd, 0); inode->i_blocks = lov_merge_blocks(lli->lli_smd); - //inode->i_mtime = lov_merge_mtime(lli->lli_smd, inode->i_mtime); + up(&lli->lli_size_sem); + + LTIME_S(inode->i_mtime) = lov_merge_mtime(lli->lli_smd, + LTIME_S(inode->i_mtime)); CDEBUG(D_DLMTRACE, "glimpse: size: "LPU64", blocks: "LPU64"\n", (__u64)inode->i_size, (__u64)inode->i_blocks); + obd_cancel(sbi->ll_dt_exp, lli->lli_smd, LCK_PR, &lockh); RETURN(rc); } @@ -733,6 +953,7 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, ldlm_policy_data_t *policy, struct lustre_handle *lockh, int ast_flags, struct obd_service_time *stime) { + struct ll_inode_info *lli = ll_i2info(inode); struct ll_sb_info *sbi = ll_i2sbi(inode); struct timeval start; int rc; @@ -768,11 +989,15 @@ int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, * when doing appending writes and effectively cancel the * result of the truncate. Getting the i_sem after the enqueue * maintains the DLM -> i_sem acquiry order. */ - down(&inode->i_sem); + down(&lli->lli_size_sem); inode->i_size = lov_merge_size(lsm, 1); - up(&inode->i_sem); + up(&lli->lli_size_sem); + } + + if (rc == 0) { + LTIME_S(inode->i_mtime) = + lov_merge_mtime(lsm, LTIME_S(inode->i_mtime)); } - //inode->i_mtime = lov_merge_mtime(lsm, inode->i_mtime); RETURN(rc); } @@ -831,15 +1056,18 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count, if (rc != 0) RETURN(rc); + down(&lli->lli_size_sem); kms = lov_merge_size(lsm, 1); if (*ppos + count - 1 > kms) { /* A glimpse is necessary to determine whether we return a short * read or some zeroes at the end of the buffer */ + up(&lli->lli_size_sem); retval = ll_glimpse_size(inode); if (retval) goto out; } else { inode->i_size = kms; + up(&lli->lli_size_sem); } CDEBUG(D_INFO, "Read ino %lu, "LPSZ" bytes, offset %lld, i_size %llu\n", @@ -1001,8 +1229,15 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, struct ptlrpc_request *req = NULL; int rc = 0; struct lustre_md md; + struct obd_client_handle *och; ENTRY; + + if ((file->f_flags+1) & O_ACCMODE) + oit.it_flags++; + if (file->f_flags & O_TRUNC) + oit.it_flags |= 2; + down(&lli->lli_open_sem); lsm = lli->lli_smd; if (lsm) { @@ -1018,6 +1253,7 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, f->f_dentry = file->f_dentry; f->f_vfsmnt = file->f_vfsmnt; + f->f_flags = flags; rc = ll_intent_alloc(&oit); if (rc) @@ -1040,12 +1276,30 @@ static int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file, GOTO(out, rc); ll_update_inode(f->f_dentry->d_inode, &md); - rc = ll_local_open(f, &oit); - if (rc) + OBD_ALLOC(och, sizeof(struct obd_client_handle)); + rc = ll_local_open(f, &oit, och); + if (rc) { /* Actually ll_local_open cannot fail! */ GOTO(out, rc); + } + if (LUSTRE_IT(&oit)->it_lock_mode) { + ldlm_lock_decref_and_cancel((struct lustre_handle *) + &LUSTRE_IT(&oit)->it_lock_handle, + LUSTRE_IT(&oit)->it_lock_mode); + LUSTRE_IT(&oit)->it_lock_mode = 0; + } + ll_intent_release(&oit); + /* ll_file_release will decrease the count, but won't free anything + because we have at least one more reference coming from actual open + */ + down(&lli->lli_och_sem); + lli->lli_open_fd_write_count++; + up(&lli->lli_och_sem); rc = ll_file_release(f->f_dentry->d_inode, f); + + /* Now also destroy our supplemental och */ + ll_md_och_close(ll_i2mdexp(inode), f->f_dentry->d_inode, och); EXIT; out: ll_intent_release(&oit); @@ -1118,7 +1372,7 @@ static int ll_lov_getstripe(struct inode *inode, unsigned long arg) RETURN(-ENODATA); return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0, lsm, - (void *)arg); + (void *)arg); } static int ll_get_grouplock(struct inode *inode, struct file *file, @@ -1289,6 +1543,7 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK); if (origin == 2) { /* SEEK_END */ ldlm_policy_data_t policy = { .l_extent = {0, OBD_OBJECT_EOF }}; + struct ll_inode_info *lli = ll_i2info(inode); int nonblock = 0, rc; if (file->f_flags & O_NONBLOCK) @@ -1299,7 +1554,9 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) if (rc != 0) RETURN(rc); + down(&lli->lli_size_sem); offset += inode->i_size; + up(&lli->lli_size_sem); } else if (origin == 1) { /* SEEK_CUR */ offset += file->f_pos; } @@ -1522,6 +1779,7 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) { int res = 0; struct inode *inode = de->d_inode; + struct ll_inode_info *lli = ll_i2info(inode); res = ll_inode_revalidate_it(de); lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR); @@ -1537,9 +1795,13 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) stat->atime = inode->i_atime; stat->mtime = inode->i_mtime; stat->ctime = inode->i_ctime; - stat->size = inode->i_size; stat->blksize = inode->i_blksize; + + down(&lli->lli_size_sem); + stat->size = inode->i_size; stat->blocks = inode->i_blocks; + up(&lli->lli_size_sem); + stat->rdev = kdev_t_to_nr(inode->i_rdev); stat->dev = id_group(&ll_i2info(inode)->lli_id); return 0; diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index ec99d29..5839ba9 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -10,6 +10,8 @@ #ifndef LLITE_INTERNAL_H #define LLITE_INTERNAL_H +#include + /* default to about 40meg of readahead on a given system. That much tied * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */ #define SBI_DEFAULT_RA_MAX ((40 << 20) >> PAGE_CACHE_SHIFT) @@ -130,9 +132,9 @@ extern kmem_cache_t *ll_file_data_slab; extern kmem_cache_t *ll_intent_slab; struct lustre_handle; struct ll_file_data { - struct obd_client_handle fd_mds_och; struct ll_readahead_state fd_ras; __u32 fd_flags; + int fd_omode; struct lustre_handle fd_cwlockh; unsigned long fd_gid; }; @@ -174,14 +176,20 @@ struct ll_async_page { /* only trust these if the page lock is providing exclusion */ unsigned llap_write_queued:1, llap_defer_uptodate:1, + llap_origin:3, llap_ra_used:1; struct list_head llap_proc_item; }; -#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \ - CDEBUG(mask, "page %p map %p ind %lu priv %0lx: " fmt, \ - page, page->mapping, page->index, page->private, ## arg) +enum { + LLAP_ORIGIN_UNKNOWN = 0, + LLAP_ORIGIN_READPAGE, + LLAP_ORIGIN_READAHEAD, + LLAP_ORIGIN_COMMIT_WRITE, + LLAP_ORIGIN_WRITEPAGE, + LLAP__ORIGIN_MAX, +}; /* llite/lproc_llite.c */ int lprocfs_register_mountpoint(struct proc_dir_entry *parent, @@ -212,7 +220,7 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc); void ll_removepage(struct page *page); int ll_readpage(struct file *file, struct page *page); struct ll_async_page *llap_from_cookie(void *cookie); -struct ll_async_page *llap_from_page(struct page *page); +struct ll_async_page *llap_from_page(struct page *page, unsigned origin); struct ll_async_page *llap_cast_private(struct page *page); void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras); @@ -222,6 +230,8 @@ void ll_truncate(struct inode *inode); /* llite/file.c */ extern struct file_operations ll_file_operations; extern struct inode_operations ll_file_inode_operations; +int ll_md_real_close(struct obd_export *md_exp, + struct inode *inode, int flags); extern int ll_inode_revalidate_it(struct dentry *); extern int ll_setxattr(struct dentry *, const char *, const void *, size_t, int); @@ -240,9 +250,15 @@ int ll_file_open(struct inode *inode, struct file *file); int ll_file_release(struct inode *inode, struct file *file); int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *); int ll_glimpse_size(struct inode *inode); -int ll_local_open(struct file *file, struct lookup_intent *it); +int ll_local_open(struct file *file, struct lookup_intent *it, + struct obd_client_handle *och); int ll_md_close(struct obd_export *md_exp, struct inode *inode, - struct file *file); + struct file *file); +int ll_md_och_close(struct obd_export *md_exp, struct inode *inode, + struct obd_client_handle *och); +void ll_och_fill(struct inode *inode, struct lookup_intent *it, + struct obd_client_handle *och); + #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); #endif @@ -282,7 +298,8 @@ extern struct super_operations lustre_super_operations; char *ll_read_opt(const char *opt, char *data); int ll_set_opt(const char *opt, char *data, int fl); -void ll_options(char *options, char **ost, char **mds, char **sec, int *flags); +void ll_options(char *options, char **ost, char **mds, char **sec, + int *async, int *flags); void ll_lli_init(struct ll_inode_info *lli); int ll_fill_super(struct super_block *sb, void *data, int silent); int lustre_fill_super(struct super_block *sb, void *data, int silent); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 338a597..31b2468 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -124,7 +124,7 @@ int lustre_init_dt_desc(struct ll_sb_info *sbi) extern struct dentry_operations ll_d_ops; int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov, - char *security, __u32 *nllu) + char *security, __u32 *nllu, int async) { struct ll_sb_info *sbi = ll_s2sbi(sb); struct ptlrpc_request *request = NULL; @@ -143,6 +143,8 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov, CERROR("MDC %s: not setup or attached\n", lmv); RETURN(-EINVAL); } + obd_set_info(obd->obd_self_export, strlen("async"), "async", + sizeof(async), &async); if (security == NULL) security = "null"; @@ -172,7 +174,7 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov, err = obd_connect(&md_conn, obd, &sbi->ll_sb_uuid, OBD_OPT_REAL_CLIENT); if (err == -EBUSY) { - CERROR("An MDS (mdc %s) is performing recovery, of which this" + CERROR("An MDS (lmv %s) is performing recovery, of which this" " client is not a part. Please wait for recovery to " "complete, abort, or time out.\n", lmv); GOTO(out, err); @@ -205,10 +207,12 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov, CERROR("OSC %s: not setup or attached\n", lov); GOTO(out_lmv, err); } + obd_set_info(obd->obd_self_export, strlen("async"), "async", + sizeof(async), &async); err = obd_connect(&dt_conn, obd, &sbi->ll_sb_uuid, OBD_OPT_REAL_CLIENT); if (err == -EBUSY) { - CERROR("An OST (osc %s) is performing recovery, of which this" + CERROR("An OST (lov %s) is performing recovery, of which this" " client is not a part. Please wait for recovery to " "complete, abort, or time out.\n", lov); GOTO(out, err); @@ -271,10 +275,11 @@ int lustre_common_fill_super(struct super_block *sb, char *lmv, char *lov, ll_gns_add_timer(sbi); - /* making vm readahead 0 for 2.4.x. In the case of 2.6.x, backing dev - info assigned to inode mapping is used for determining maximal - readahead. */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) + /* making vm readahead 0 for 2.4.x. In the case of 2.6.x, + backing dev info assigned to inode mapping is used for + determining maximal readahead. */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \ + !defined(KERNEL_HAS_AS_MAX_READAHEAD) /* bug 2805 - set VM readahead to zero */ vm_max_readahead = vm_min_readahead = 0; #endif @@ -321,14 +326,12 @@ void lustre_common_put_super(struct super_block *sb) obd_disconnect(sbi->ll_md_exp, 0); // We do this to get rid of orphaned dentries. That is not really trw. - spin_lock(&dcache_lock); hlist_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { struct dentry *dentry = hlist_entry(tmp, struct dentry, d_hash); - CWARN("orphan dentry %*s (%p) at unmount\n", - dentry->d_name.len, dentry->d_name.name, dentry); + CWARN("orphan dentry %.*s (%p->%p) at unmount\n", + dentry->d_name.len, dentry->d_name.name, dentry, next); shrink_dcache_parent(dentry); } - spin_unlock(&dcache_lock); EXIT; } @@ -367,7 +370,8 @@ int ll_set_opt(const char *opt, char *data, int fl) RETURN(fl); } -void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags) +void ll_options(char *options, char **lov, char **lmv, char **sec, + int *async, int *flags) { char *this_char; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) @@ -380,6 +384,7 @@ void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags) return; } + *async = 0; #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) for (this_char = strtok (options, ","); this_char != NULL; @@ -392,6 +397,10 @@ void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags) continue; if (!*lmv && (*lmv = ll_read_opt("mdc", this_char))) continue; + if (!strncmp(this_char, "lasync", strlen("lasync"))) { + *async = 1; + continue; + } if (!*sec && (*sec = ll_read_opt("sec", this_char))) continue; if (!(*flags & LL_SBI_NOLCK) && @@ -407,12 +416,18 @@ void ll_options(char *options, char **lov, char **lmv, char **sec, int *flags) void ll_lli_init(struct ll_inode_info *lli) { sema_init(&lli->lli_open_sem, 1); + sema_init(&lli->lli_size_sem, 1); lli->lli_flags = 0; lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; spin_lock_init(&lli->lli_lock); INIT_LIST_HEAD(&lli->lli_pending_write_llaps); lli->lli_inode_magic = LLI_INODE_MAGIC; memset(&lli->lli_id, 0, sizeof(lli->lli_id)); + sema_init(&lli->lli_och_sem, 1); + lli->lli_mds_read_och = lli->lli_mds_write_och = NULL; + lli->lli_mds_exec_och = NULL; + lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0; + lli->lli_open_fd_exec_count = 0; } int ll_fill_super(struct super_block *sb, void *data, int silent) @@ -420,9 +435,9 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) struct ll_sb_info *sbi; char *lov = NULL; char *lmv = NULL; + int async, err; char *sec = NULL; __u32 nllu[2] = { 99, 99 }; - int err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); @@ -432,7 +447,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) RETURN(-ENOMEM); sbi->ll_flags |= LL_SBI_READAHEAD; - ll_options(data, &lov, &lmv, &sec, &sbi->ll_flags); + ll_options(data, &lov, &lmv, &sec, &async, &sbi->ll_flags); if (!lov) { CERROR("no osc\n"); @@ -444,7 +459,7 @@ int ll_fill_super(struct super_block *sb, void *data, int silent) GOTO(out, err = -EINVAL); } - err = lustre_common_fill_super(sb, lmv, lov, sec, nllu); + err = lustre_common_fill_super(sb, lmv, lov, sec, nllu, async); EXIT; out: if (err) @@ -607,6 +622,35 @@ out: return rc; } +static void lustre_manual_cleanup(struct ll_sb_info *sbi) +{ + struct lustre_cfg lcfg; + struct obd_device *obd; + int next = 0; + + while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) + { + int err; + + LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name); + err = class_process_config(&lcfg); + if (err) { + CERROR("cleanup failed: %s\n", obd->obd_name); + //continue; + } + + LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name); + err = class_process_config(&lcfg); + if (err) { + CERROR("detach failed: %s\n", obd->obd_name); + //continue; + } + } + + if (sbi->ll_lmd != NULL) + class_del_profile(sbi->ll_lmd->lmd_profile); +} + int lustre_fill_super(struct super_block *sb, void *data, int silent) { struct lustre_mount_data * lmd = data; @@ -687,7 +731,7 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) } err = lustre_common_fill_super(sb, lmv, lov, lmd->lmd_security, - &lmd->lmd_nllu); + &lmd->lmd_nllu, lmd->lmd_async); if (err) GOTO(out_free, err); @@ -707,57 +751,28 @@ out_free: if (sbi->ll_instance != NULL) { struct lustre_mount_data *lmd = sbi->ll_lmd; - char * cln_prof; struct config_llog_instance cfg; + char *cl_prof; cfg.cfg_instance = sbi->ll_instance; cfg.cfg_uuid = sbi->ll_sb_uuid; - OBD_ALLOC(cln_prof, len); - sprintf(cln_prof, "%s-clean", lmd->lmd_profile); - - err = lustre_process_log(lmd, cln_prof, &cfg, 0); - if (err < 0) - CERROR("Unable to process log: %s\n", cln_prof); - OBD_FREE(cln_prof, len); - OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance)+ 1); + OBD_ALLOC(cl_prof, len); + sprintf(cl_prof, "%s-clean", lmd->lmd_profile); + err = lustre_process_log(lmd, cl_prof, &cfg, 0); + if (err < 0) { + CERROR("Unable to process log: %s\n", cl_prof); + lustre_manual_cleanup(sbi); + } + OBD_FREE(cl_prof, len); + OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1); } OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd)); } lustre_free_sbi(sb); - goto out_dev; } /* lustre_fill_super */ -static void lustre_manual_cleanup(struct ll_sb_info *sbi) -{ - struct lustre_cfg lcfg; - struct obd_device *obd; - int next = 0; - - while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) - { - int err; - - LCFG_INIT(lcfg, LCFG_CLEANUP, obd->obd_name); - err = class_process_config(&lcfg); - if (err) { - CERROR("cleanup failed: %s\n", obd->obd_name); - //continue; - } - - LCFG_INIT(lcfg, LCFG_DETACH, obd->obd_name); - err = class_process_config(&lcfg); - if (err) { - CERROR("detach failed: %s\n", obd->obd_name); - //continue; - } - } - - if (sbi->ll_lmd != NULL) - class_del_profile(sbi->ll_lmd->lmd_profile); -} - void lustre_put_super(struct super_block *sb) { struct obd_device *obd; @@ -773,7 +788,7 @@ void lustre_put_super(struct super_block *sb) lustre_common_put_super(sb); if (sbi->ll_lmd != NULL) { - char * cln_prof; + char *cl_prof; int len = strlen(sbi->ll_lmd->lmd_profile) + sizeof("-clean")+1; int err; struct config_llog_instance cfg; @@ -787,17 +802,16 @@ void lustre_put_super(struct super_block *sb) cfg.cfg_instance = sbi->ll_instance; cfg.cfg_uuid = sbi->ll_sb_uuid; - OBD_ALLOC(cln_prof, len); - sprintf(cln_prof, "%s-clean", sbi->ll_lmd->lmd_profile); - - err = lustre_process_log(sbi->ll_lmd, cln_prof, &cfg, 0); + OBD_ALLOC(cl_prof, len); + sprintf(cl_prof, "%s-clean", sbi->ll_lmd->lmd_profile); + err = lustre_process_log(sbi->ll_lmd, cl_prof, &cfg, 0); if (err < 0) { CERROR("Unable to process log: %s, doing manual cleanup" - "\n", cln_prof); + "\n", cl_prof); lustre_manual_cleanup(sbi); } - OBD_FREE(cln_prof, len); + OBD_FREE(cl_prof, len); free_lmd: OBD_FREE(sbi->ll_lmd, sizeof(*sbi->ll_lmd)); OBD_FREE(sbi->ll_instance, strlen(sbi->ll_instance) + 1); @@ -887,8 +901,11 @@ struct inode *ll_inode_from_lock(struct ldlm_lock *lock) if (lli->lli_inode_magic == LLI_INODE_MAGIC) { inode = igrab(lock->l_ast_data); } else { - CERROR("DEBUG: l_ast_data %p is bogus: magic %x\n", + inode = lock->l_ast_data; + CDEBUG(inode->i_state & I_FREEING ? D_INFO : D_WARNING, + "l_ast_data %p is bogus: magic %0x8\n", lock->l_ast_data, lli->lli_inode_magic); + inode = NULL; } } l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -923,6 +940,17 @@ void ll_clear_inode(struct inode *inode) clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(ll_i2info(inode)->lli_flags)); md_change_cbdata(sbi->ll_md_exp, &id, null_if_equal, inode); + LASSERT(!lli->lli_open_fd_write_count); + LASSERT(!lli->lli_open_fd_read_count); + LASSERT(!lli->lli_open_fd_exec_count); + + if (lli->lli_mds_write_och) + ll_md_real_close(sbi->ll_md_exp, inode, FMODE_WRITE); + if (lli->lli_mds_exec_och) + ll_md_real_close(sbi->ll_md_exp, inode, FMODE_EXEC); + if (lli->lli_mds_read_och) + ll_md_real_close(sbi->ll_md_exp, inode, FMODE_READ); + if (lli->lli_smd) obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd, null_if_equal, inode); @@ -943,6 +971,7 @@ void ll_clear_inode(struct inode *inode) strlen(lli->lli_symlink_name) + 1); lli->lli_symlink_name = NULL; } + lli->lli_inode_magic = LLI_INODE_DEAD; EXIT; } @@ -1015,6 +1044,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) * inode ourselves so we can call obdo_from_inode() always. */ if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN /*| ATTR_RAW*/) : ~0)) { struct lustre_md md; + int save_valid; OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) @@ -1038,9 +1068,16 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) RETURN(rc); } - /* Won't invoke vmtruncate as we already cleared ATTR_SIZE, - * but needed to set timestamps backwards on utime. */ + /* We call inode_setattr to adjust timestamps, but we first + * clear ATTR_SIZE to avoid invoking vmtruncate. + * + * NB: ATTR_SIZE will only be set at this point if the size + * resides on the MDS, ie, this file has no objects. */ + save_valid = attr->ia_valid; + attr->ia_valid &= ~ATTR_SIZE; inode_setattr(inode, attr); + attr->ia_valid = save_valid; + ll_update_inode(inode, &md); ptlrpc_req_finished(request); @@ -1081,6 +1118,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) ldlm_policy_data_t policy = { .l_extent = {attr->ia_size, OBD_OBJECT_EOF } }; struct lustre_handle lockh = { 0 }; + struct ll_inode_info *lli = ll_i2info(inode); int err, ast_flags = 0; /* XXX when we fix the AST intents to pass the discard-range * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA @@ -1088,38 +1126,20 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) if (attr->ia_size == 0) ast_flags = LDLM_AST_DISCARD_DATA; - /* bug 1639: avoid write/truncate i_sem/DLM deadlock */ - LASSERT(atomic_read(&inode->i_sem.count) <= 0); - up(&inode->i_sem); - UP_WRITE_I_ALLOC_SEM(inode); rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh, ast_flags, &ll_i2sbi(inode)->ll_seek_stime); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - DOWN_WRITE_I_ALLOC_SEM(inode); - down(&inode->i_sem); -#else - down(&inode->i_sem); - DOWN_WRITE_I_ALLOC_SEM(inode); -#endif + if (rc != 0) RETURN(rc); + down(&lli->lli_size_sem); rc = vmtruncate(inode, attr->ia_size); + if (rc != 0) { + LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0); + up(&lli->lli_size_sem); + } - /* We need to drop the semaphore here, because this unlock may - * result in a cancellation, which will need the i_sem */ - up(&inode->i_sem); - UP_WRITE_I_ALLOC_SEM(inode); - /* unlock now as we don't mind others file lockers racing with - * the mds updates below? */ err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - DOWN_WRITE_I_ALLOC_SEM(inode); - down(&inode->i_sem); -#else - down(&inode->i_sem); - DOWN_WRITE_I_ALLOC_SEM(inode); -#endif if (err) { CERROR("ll_extent_unlock failed: %d\n", err); if (!rc) @@ -1164,7 +1184,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age); if (rc) { - CERROR("mdc_statfs fails: rc = %d\n", rc); + CERROR("obd_statfs fails: rc = %d\n", rc); RETURN(rc); } @@ -1244,8 +1264,12 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) ENTRY; LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0)); + if (md->lsm && md->lsm->lsm_magic != LOV_MAGIC) { + /* check for default striping info for dir. */ + LASSERT((mea != NULL) == ((body->valid & OBD_MD_FLDIREA) != 0)); + } + if (lsm != NULL) { LASSERT(lsm->lsm_object_gr > 0); if (lli->lli_smd == NULL) { @@ -1575,8 +1599,8 @@ int ll_iocontrol(struct inode *inode, struct file *file, void ll_umount_begin(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; struct obd_ioctl_data ioc_data = { 0 }; + struct obd_device *obd; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb, diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index ff3eefb..26bac6c 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -91,6 +91,10 @@ struct ll_lock_tree_node * ll_node_from_inode(struct inode *inode, __u64 start, int lt_compare(struct ll_lock_tree_node *one, struct ll_lock_tree_node *two) { + /* XXX remove this assert when we really want to use this function + * to compare different file's region */ + LASSERT(one->lt_oid == two->lt_oid); + if ( one->lt_oid < two->lt_oid) return -1; if ( one->lt_oid > two->lt_oid) @@ -212,6 +216,8 @@ int ll_tree_lock(struct ll_lock_tree *tree, if (first_node != NULL) lt_insert(tree, first_node); + /* order locking. what we have to concern about is ONLY double lock: + * the buffer is mapped to exactly this file. */ if (mapping_mapped(inode->i_mapping)) { rc = lt_get_mmap_locks(tree, inode, (unsigned long)buf, count); if (rc) @@ -259,7 +265,9 @@ static void policy_from_vma(ldlm_policy_data_t *policy, policy->l_extent.end = (policy->l_extent.start + count - 1) | (PAGE_CACHE_SIZE - 1); } -static struct vm_area_struct * our_vma(unsigned long addr, size_t count) + +static struct vm_area_struct *our_vma(unsigned long addr, size_t count, + struct inode *inode) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *ret = NULL; @@ -268,7 +276,8 @@ static struct vm_area_struct * our_vma(unsigned long addr, size_t count) spin_lock(&mm->page_table_lock); for(vma = find_vma(mm, addr); vma != NULL && vma->vm_start < (addr + count); vma = vma->vm_next) { - if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage) { + if (vma->vm_ops && vma->vm_ops->nopage == ll_nopage && + vma->vm_file && vma->vm_file->f_dentry->d_inode == inode) { ret = vma; break; } @@ -292,7 +301,7 @@ int lt_get_mmap_locks(struct ll_lock_tree *tree, struct inode *inode, count += addr & (PAGE_SIZE - 1); addr -= addr & (PAGE_SIZE - 1); - while ((vma = our_vma(addr, count)) != NULL) { + while ((vma = our_vma(addr, count, inode)) != NULL) { policy_from_vma(&policy, vma, addr, count); node = ll_node_from_inode(inode, policy.l_extent.start, @@ -360,7 +369,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, mode = mode_from_vma(vma); stime = (mode & LCK_PW) ? &ll_i2sbi(inode)->ll_write_stime : &ll_i2sbi(inode)->ll_read_stime; - + rc = ll_extent_lock(fd, inode, ll_i2info(inode)->lli_smd, mode, &policy, &lockh, LDLM_FL_CBPENDING, stime); if (rc != 0) @@ -404,13 +413,14 @@ static inline unsigned long file_to_user(struct vm_area_struct *vma, { return vma->vm_start + (byte - ((__u64)vma->vm_pgoff << PAGE_CACHE_SHIFT)); - } #define VMA_DEBUG(vma, fmt, arg...) \ - CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p): " \ - fmt, vma, vma->vm_start, vma->vm_end, vma->vm_pgoff, \ - vma->vm_file->f_dentry->d_inode, ## arg); + CDEBUG(D_MMAP, "vma(%p) start(%ld) end(%ld) pgoff(%ld) inode(%p) " \ + "ino(%lu) iname(%s): " fmt, vma, vma->vm_start, vma->vm_end, \ + vma->vm_pgoff, vma->vm_file->f_dentry->d_inode, \ + vma->vm_file->f_dentry->d_inode->i_ino, \ + vma->vm_file->f_dentry->d_iname, ## arg); \ #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) /* [first, last] are the byte offsets affected. @@ -422,20 +432,32 @@ static void teardown_vmas(struct vm_area_struct *vma, __u64 first, { unsigned long address, len; for (; vma ; vma = vma->vm_next_share) { - if (last >> PAGE_CACHE_SHIFT < vma->vm_pgoff) + if (last >> PAGE_SHIFT < vma->vm_pgoff) continue; if (first >> PAGE_CACHE_SHIFT > (vma->vm_pgoff + ((vma->vm_end - vma->vm_start) >> PAGE_CACHE_SHIFT))) continue; - address = max((unsigned long)vma->vm_start, + /* XXX in case of unmap the cow pages of a running file, + * don't unmap these private writeable mapping here! + * though that will break private mappping a little. + * + * the clean way is to check the mapping of every page + * and just unmap the non-cow pages, just like + * unmap_mapping_range() with even_cow=0 in kernel 2.6. + */ + if (!(vma->vm_flags & VM_SHARED) && + (vma->vm_flags & VM_WRITE)) + continue; + + address = max((unsigned long)vma->vm_start, file_to_user(vma, first)); len = min((unsigned long)vma->vm_end, file_to_user(vma, last) + 1) - address; - VMA_DEBUG(vma, "zapping vma [address=%ld len=%ld]\n", - address, len); - LASSERT(vma->vm_mm); + VMA_DEBUG(vma, "zapping vma [first="LPU64" last="LPU64" " + "address=%ld len=%ld]\n", first, last, address, len); + LASSERT(len > 0); ll_zap_page_range(vma, address, len); } } @@ -449,11 +471,12 @@ int ll_teardown_mmaps(struct address_space *mapping, __u64 first, int rc = -ENOENT; ENTRY; + LASSERT(last > first); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) if (mapping_mapped(mapping)) { rc = 0; unmap_mapping_range(mapping, first + PAGE_SIZE - 1, - last - first + 1, 1); + last - first + 1, 0); } #else spin_lock(&mapping->i_shared_lock); diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index 2d35405..6b2c3c8 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -37,10 +37,25 @@ __u32 get_uuid2int(const char *name, int len) return (key0 << 1); } -static struct inode *search_inode_for_lustre(struct super_block *sb, - unsigned long ino, - unsigned long generation, - int mode) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static int ll_nfs_test_inode(struct inode *inode, unsigned long ino, + void *opaque) +#else +static int ll_nfs_test_inode(struct inode *inode, void *opaque) +#endif +{ + struct lustre_id *iid = opaque; + + if (inode->i_ino == id_ino(iid) && + inode->i_generation == id_gen(iid)) + return 1; + + return 0; +} +static struct inode * search_inode_for_lustre(struct super_block *sb, + unsigned long ino, + unsigned long generation, + int mode) { struct ptlrpc_request *req = NULL; struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -48,8 +63,11 @@ static struct inode *search_inode_for_lustre(struct super_block *sb, __u64 valid = 0; int eadatalen = 0, rc; struct inode *inode = NULL; - - inode = ILOOKUP(sb, ino, NULL, NULL); + struct lustre_id iid; + + id_ino(&iid) = (__u64)ino; + id_gen(&iid) = generation; + inode = ILOOKUP(sb, ino, ll_nfs_test_inode, &iid); if (inode) return inode; @@ -95,20 +113,17 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, unsigned long ino, if (IS_ERR(inode)) { return ERR_PTR(PTR_ERR(inode)); } - if (is_bad_inode(inode) - || (generation && inode->i_generation != generation) - ){ + if (is_bad_inode(inode) || + (generation && inode->i_generation != generation)){ /* we didn't find the right inode.. */ - CERROR(" Inode %lu, Bad count: %lu %d or version %u %u\n", - inode->i_ino, - (unsigned long)inode->i_nlink, - atomic_read(&inode->i_count), - inode->i_generation, - generation); + CERROR(" Inode %lu, Bad count: %lu %d or version %u %u\n", + inode->i_ino, (unsigned long)inode->i_nlink, + atomic_read(&inode->i_count), inode->i_generation, + generation); iput(inode); return ERR_PTR(-ESTALE); } - + /* now to find a dentry. * If possible, get a well-connected one */ diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 815c1ac..e796f4d 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -26,6 +26,9 @@ #include #include #include +#ifdef HAVE_MM_INLINE +#include +#endif #include "llite_internal.h" @@ -642,8 +645,8 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) /* 2.4 doesn't seem to have SEQ_START_TOKEN, so we implement * it in our own state */ if (dummy_llap->llap_magic == 0) { - seq_printf(seq, "generation | llap .cookie | page "); - seq_printf(seq, "inode .index [ page flags ]\n"); + seq_printf(seq, "generation | llap cookie origin | page "); + seq_printf(seq, "inode index count [ page flags ]\n"); return 0; } @@ -653,11 +656,23 @@ static int llite_dump_pgcache_seq_show(struct seq_file *seq, void *v) if (llap != NULL) { int has_flags = 0; struct page *page = llap->llap_page; - - seq_printf(seq, "%lu | %p %p | %p %p %lu [", + static char *origins[] = { + [LLAP_ORIGIN_UNKNOWN] = "--", + [LLAP_ORIGIN_READPAGE] = "rp", + [LLAP_ORIGIN_READAHEAD] = "ra", + [LLAP_ORIGIN_COMMIT_WRITE] = "cw", + [LLAP_ORIGIN_WRITEPAGE] = "wp", + }; + + LASSERTF(llap->llap_origin < LLAP__ORIGIN_MAX, "%u\n", + llap->llap_origin); + + seq_printf(seq, "%lu | %p %p %s | %p %p %lu %u [", sbi->ll_pglist_gen, llap, llap->llap_cookie, - page, page->mapping->host, page->index); + origins[llap->llap_origin], + page, page->mapping->host, page->index, + page_count(page)); seq_page_flag(seq, page, locked, has_flags); seq_page_flag(seq, page, error, has_flags); seq_page_flag(seq, page, referenced, has_flags); @@ -814,7 +829,7 @@ static int ll_ra_stats_seq_show(struct seq_file *seq, void *v) spin_lock(&sbi->ll_lock); - seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n", + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "pending issued pages: %lu\n", ra->ra_cur_pages); @@ -898,7 +913,7 @@ static int llite_wait_times_seq_show(struct seq_file *seq, void *v) spin_lock(&sbi->ll_lock); - seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n\n", + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n\n", now.tv_sec, now.tv_usec); seq_printf(seq, "lock wait times: (num, average ms)\n"); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index d291096..f8f9a17 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -183,6 +183,31 @@ int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, (unsigned long)id_group(&li->lli_id)); } + if (bits & MDS_INODELOCK_OPEN) { + int flags = 0; + switch (lock->l_req_mode) { + case LCK_CW: + flags = FMODE_WRITE; + break; + case LCK_PR: + flags = FMODE_EXEC; + break; + case LCK_CR: + flags = FMODE_READ; + break; + default: + CERROR("Unexpected lock mode for OPEN lock " + "%d, inode %ld\n", lock->l_req_mode, + inode->i_ino); + } + ll_md_real_close(ll_i2mdexp(inode), inode, flags); + } + + if (bits & MDS_INODELOCK_UPDATE) + clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, + &(ll_i2info(inode)->lli_flags)); + + /* If lookup lock is cancelled, we just drop the dentry and this will cause us to reget data from MDS when we'd want to access this dentry/inode again. If this is lock on @@ -340,9 +365,12 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, int rc, orig_it; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", - dentry->d_name.name, parent->i_ino, parent->i_generation, - parent, LL_IT2STR(it)); + if (dentry->d_name.len > EXT3_NAME_LEN) + RETURN(ERR_PTR(-ENAMETOOLONG)); + + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n", + dentry->d_name.len, dentry->d_name.name, parent->i_ino, + parent->i_generation, parent, LL_IT2STR(it)); if (d_mountpoint(dentry)) CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it)); @@ -481,9 +509,9 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, int rc = 0; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", - dentry->d_name.name, dir->i_ino, dir->i_generation, dir, - LL_IT2STR(it)); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n", + dentry->d_name.len, dentry->d_name.name, dir->i_ino, + dir->i_generation, dir, LL_IT2STR(it)); rc = it_open_error(DISP_OPEN_CREATE, it); if (rc) @@ -528,15 +556,13 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) { struct ptlrpc_request *request = NULL; struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data *op_data; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", + nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); mode &= ~current->fs->umask; @@ -551,7 +577,8 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, + nd->last.len, 0); err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode, current->fsuid, current->fsgid, rdev, &request); @@ -569,20 +596,19 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) RETURN(err); } -static int ll_mknod(struct inode *dir, struct dentry *child, +static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode, ll_dev_t rdev) { struct ptlrpc_request *request = NULL; struct inode *inode = NULL; - const char *name = child->d_name.name; - int len = child->d_name.len; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data *op_data; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", + dchild->d_name.len, dchild->d_name.name, + dir->i_ino, dir->i_generation, dir); mode &= ~current->fs->umask; @@ -597,7 +623,8 @@ static int ll_mknod(struct inode *dir, struct dentry *child, OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_data(op_data, dir, NULL, dchild->d_name.name, + dchild->d_name.len, 0); err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode, current->fsuid, current->fsgid, rdev, &request); @@ -606,9 +633,8 @@ static int ll_mknod(struct inode *dir, struct dentry *child, GOTO(out_err, err); ll_update_times(request, 0, dir); - err = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp, - &inode, request, 0, child->d_sb); + &inode, request, 0, dchild->d_sb); if (err) GOTO(out_err, err); break; @@ -619,7 +645,7 @@ static int ll_mknod(struct inode *dir, struct dentry *child, RETURN(-EINVAL); } - d_instantiate(child, inode); + d_instantiate(dchild, inode); EXIT; out_err: ptlrpc_req_finished(request); @@ -629,17 +655,21 @@ static int ll_mknod(struct inode *dir, struct dentry *child, static int ll_symlink_raw(struct nameidata *nd, const char *tgt) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); + const char *name = nd->last.name; struct mdc_op_data *op_data; + int len = nd->last.len; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n", - name, dir->i_ino, dir->i_generation, dir, tgt); - + CDEBUG(D_VFSTRACE, "VFS Op:name=%*s,dir=%lu/%u(%p),target=%s\n", + nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, + dir, tgt); + + if (dir->i_nlink >= EXT3_LINK_MAX) + RETURN(err); + OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); @@ -660,22 +690,22 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { struct inode *src = srcnd->dentry->d_inode; struct inode *dir = tgtnd->dentry->d_inode; - const char *name = tgtnd->last.name; - int len = tgtnd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data *op_data; int err; struct ll_sb_info *sbi = ll_i2sbi(dir); ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n", - src->i_ino, src->i_generation, src, dir->i_ino, dir->i_generation, - dir, name); + CDEBUG(D_VFSTRACE, + "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n", + src->i_ino, src->i_generation, src, dir->i_ino, + dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name); OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, src, dir, name, len, 0); + ll_prepare_mdc_data(op_data, src, dir, tgtnd->last.name, + tgtnd->last.len, 0); err = md_link(sbi->ll_md_exp, op_data, &request); OBD_FREE(op_data, sizeof(*op_data)); if (err == 0) @@ -688,21 +718,20 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) static int ll_mkdir_raw(struct nameidata *nd, int mode) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct mdc_op_data *op_data; int err = -EMLINK; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", + nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, + nd->last.len, 0); err = md_create(sbi->ll_md_exp, op_data, NULL, 0, mode, current->fsuid, current->fsgid, 0, &request); OBD_FREE(op_data, sizeof(*op_data)); @@ -715,19 +744,19 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode) static int ll_rmdir_raw(struct nameidata *nd) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data *op_data; int rc; + ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", + nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, dir, NULL, name, len, S_IFDIR); + ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, + nd->last.len, S_IFDIR); rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); OBD_FREE(op_data, sizeof(*op_data)); if (rc == 0) @@ -758,10 +787,12 @@ int ll_objects_destroy(struct ptlrpc_request *request, GOTO(out, rc = -EPROTO); } - /* The MDS sent back the EA because we unlinked the last reference - * to this file. Use this EA to unlink the objects on the OST. - * It's opaque so we don't swab here; we leave it to obd_unpackmd() to - * check it is complete and sensible. */ + /* + * the MDS sent back the EA because we unlinked the last reference to + * this file. Use this EA to unlink the objects on the OST. It's opaque + * so we don't swab here; we leave it to obd_unpackmd() to check it is + * complete and sensible. + */ eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL); LASSERT(eadata != NULL); if (eadata == NULL) { @@ -820,19 +851,17 @@ int ll_objects_destroy(struct ptlrpc_request *request, static int ll_unlink_raw(struct nameidata *nd) { struct inode *dir = nd->dentry->d_inode; - const char *name = nd->last.name; - int len = nd->last.len; struct ptlrpc_request *request = NULL; struct mdc_op_data *op_data; int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", + nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); - ll_prepare_mdc_data(op_data, dir, NULL, name, len, 0); + ll_prepare_mdc_data(op_data, dir, NULL, nd->last.name, nd->last.len, 0); rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); OBD_FREE(op_data, sizeof(*op_data)); if (rc) @@ -846,29 +875,28 @@ out: return rc; } -static int ll_rename_raw(struct nameidata *oldnd, struct nameidata *newnd) +static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd) { - struct inode *src = oldnd->dentry->d_inode; - struct inode *tgt = newnd->dentry->d_inode; - const char *oldname = oldnd->last.name; - int oldlen = oldnd->last.len; - const char *newname = newnd->last.name; - int newlen = newnd->last.len; + struct inode *src = srcnd->dentry->d_inode; + struct inode *tgt = tgtnd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(src); struct mdc_op_data *op_data; int err; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s, src_dir=%lu/%u(%p), newname=%s, " - "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation, - src, newname, tgt->i_ino, tgt->i_generation, tgt); + + CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s," + "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name, + src->i_ino, src->i_generation, src, tgtnd->last.len, + tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt); OBD_ALLOC(op_data, sizeof(*op_data)); if (op_data == NULL) RETURN(-ENOMEM); ll_prepare_mdc_data(op_data, src, tgt, NULL, 0, 0); - err = md_rename(sbi->ll_md_exp, op_data, oldname, oldlen, - newname, newlen, &request); + err = md_rename(sbi->ll_md_exp, op_data, srcnd->last.name, + srcnd->last.len, tgtnd->last.name, tgtnd->last.len, + &request); OBD_FREE(op_data, sizeof(*op_data)); if (!err) { ll_update_times(request, 0, src); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index d16f0af..549aec9 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -106,62 +106,74 @@ static int ll_brw(int cmd, struct inode *inode, struct obdo *oa, __u64 lov_merge_size(struct lov_stripe_md *lsm, int kms); -/* this isn't where truncate starts. roughly: +/* + * this isn't where truncate starts. roughly: * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate - * we grab the lock back in setattr_raw to avoid races. */ + * we grab the lock back in setattr_raw to avoid races. + * + * must be called with lli_size_sem held. + */ void ll_truncate(struct inode *inode) { struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + struct ll_inode_info *lli = ll_i2info(inode); struct obdo *oa = NULL; int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) to %llu\n", inode->i_ino, + inode->i_generation, inode, inode->i_size); if (!lsm) { CDEBUG(D_INODE, "truncate on inode %lu with no objects\n", inode->i_ino); - EXIT; - return; + GOTO(out_unlock, 0); } if (lov_merge_size(lsm, 0) == inode->i_size) { CDEBUG(D_VFSTRACE, "skipping punch for "LPX64" (size = %llu)\n", lsm->lsm_object_id, inode->i_size); - } else { - CDEBUG(D_INFO, "calling punch for "LPX64" (new size %llu)\n", - lsm->lsm_object_id, inode->i_size); + GOTO(out_unlock, 0); + } + + CDEBUG(D_INFO, "calling punch for "LPX64" (new size %llu)\n", + lsm->lsm_object_id, inode->i_size); - oa = obdo_alloc(); - if (oa == NULL) { - CERROR("cannot alloc oa, error %d\n", - -ENOMEM); - EXIT; - return; - } - - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - /* truncate == punch from new size to absolute end of file */ - /* NB: obd_punch must be called with i_sem held! It updates the kms! */ - rc = obd_punch(ll_i2dtexp(inode), oa, lsm, inode->i_size, - OBD_OBJECT_EOF, NULL); - if (rc) - CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino); - else - obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - obdo_free(oa); - } + oa = obdo_alloc(); + if (oa == NULL) { + CERROR("cannot alloc oa, error %d\n", + -ENOMEM); + EXIT; + return; + } + + oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; + obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLMODE | + OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + obd_adjust_kms(ll_i2dtexp(inode), lsm, inode->i_size, 1); + + LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0); + up(&lli->lli_size_sem); + + rc = obd_punch(ll_i2dtexp(inode), oa, lsm, inode->i_size, + OBD_OBJECT_EOF, NULL); + if (rc) + CERROR("obd_truncate fails (%d) ino %lu\n", rc, inode->i_ino); + else + obdo_to_inode(inode, oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + obdo_free(oa); EXIT; return; + +out_unlock: + LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0); + up(&lli->lli_size_sem); } /* ll_truncate */ int ll_prepare_write(struct file *file, struct page *page, unsigned from, @@ -214,7 +226,9 @@ int ll_prepare_write(struct file *file, struct page *page, unsigned from, /* If are writing to a new page, no need to read old data. The extent * locking will have updated the KMS, and for our purposes here we can * treat it like i_size. */ + down(&lli->lli_size_sem); kms = lov_merge_size(lsm, 1); + up(&lli->lli_size_sem); if (kms <= offset) { memset(kmap(page), 0, PAGE_SIZE); kunmap(page); @@ -238,24 +252,13 @@ out_free_oa: return rc; } -struct ll_async_page *llap_from_cookie(void *cookie) -{ - struct ll_async_page *llap = cookie; - if (llap->llap_magic != LLAP_MAGIC) - return ERR_PTR(-EINVAL); - return llap; -}; - static int ll_ap_make_ready(void *data, int cmd) { struct ll_async_page *llap; struct page *page; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) - RETURN(-EINVAL); - + llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; LASSERT(cmd != OBD_BRW_READ); @@ -303,10 +306,7 @@ static int ll_ap_refresh_count(void *data, int cmd) /* readpage queues with _COUNT_STABLE, shouldn't get here. */ LASSERT(cmd != OBD_BRW_READ); - llap = llap_from_cookie(data); - if (IS_ERR(llap)) - RETURN(PTR_ERR(llap)); - + llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; lsm = ll_i2info(page->mapping->host)->lli_smd; kms = lov_merge_size(lsm, 1); @@ -351,12 +351,7 @@ static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa) struct ll_async_page *llap; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); ll_inode_fill_obdo(llap->llap_page->mapping->host, cmd, oa); EXIT; } @@ -368,6 +363,7 @@ static struct obd_async_page_ops ll_async_page_ops = { .ap_completion = ll_ap_completion, }; + struct ll_async_page *llap_cast_private(struct page *page) { struct ll_async_page *llap = (struct ll_async_page *)page->private; @@ -380,7 +376,7 @@ struct ll_async_page *llap_cast_private(struct page *page) } /* XXX have the exp be an argument? */ -struct ll_async_page *llap_from_page(struct page *page) +struct ll_async_page *llap_from_page(struct page *page, unsigned origin) { struct ll_async_page *llap; struct obd_export *exp; @@ -389,9 +385,11 @@ struct ll_async_page *llap_from_page(struct page *page) int rc; ENTRY; + LASSERTF(origin < LLAP__ORIGIN_MAX, "%u\n", origin); + llap = llap_cast_private(page); if (llap != NULL) - RETURN(llap); + GOTO(out, llap); exp = ll_i2dtexp(page->mapping->host); if (exp == NULL) @@ -420,6 +418,8 @@ struct ll_async_page *llap_from_page(struct page *page) list_add_tail(&llap->llap_proc_item, &sbi->ll_pglist); spin_unlock(&sbi->ll_lock); +out: + llap->llap_origin = origin; RETURN(llap); } @@ -475,9 +475,6 @@ out: return rc; } -void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm, - obd_off size); - /* be careful not to return success without setting the page Uptodate or * the next pass through prepare_write will read in stale data from disk. */ int ll_commit_write(struct file *file, struct page *page, unsigned from, @@ -499,22 +496,22 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n", inode, page, from, to, page->index); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_COMMIT_WRITE); if (IS_ERR(llap)) RETURN(PTR_ERR(llap)); + exp = ll_i2dtexp(inode); + if (exp == NULL) + RETURN(-EINVAL); + /* queue a write for some time in the future the first time we * dirty the page */ if (!PageDirty(page)) { lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_DIRTY_MISSES); - exp = ll_i2dtexp(inode); - if (exp == NULL) - RETURN(-EINVAL); - - rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, llap, - to, 0); + rc = queue_or_sync_write(exp, ll_i2info(inode)->lli_smd, + llap, to, 0); if (rc) GOTO(out, rc); } else { @@ -529,16 +526,47 @@ int ll_commit_write(struct file *file, struct page *page, unsigned from, set_page_dirty(page); EXIT; out: + size = (((obd_off)page->index) << PAGE_SHIFT) + to; + down(&lli->lli_size_sem); if (rc == 0) { - size = (((obd_off)page->index) << PAGE_SHIFT) + to; - lov_increase_kms(ll_i2dtexp(inode), lsm, size); + obd_adjust_kms(exp, lsm, size, 0); if (size > inode->i_size) inode->i_size = size; SetPageUptodate(page); + } else if (size > inode->i_size) { + /* this page beyond the pales of i_size, so it can't be + * truncated in ll_p_r_e during lock revoking. we must + * teardown our book-keeping here. */ + ll_removepage(page); } + up(&lli->lli_size_sem); return rc; } - + +static unsigned long ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len) +{ + struct ll_ra_info *ra = &sbi->ll_ra_info; + unsigned long ret; + ENTRY; + + spin_lock(&sbi->ll_lock); + ret = min(ra->ra_max_pages - ra->ra_cur_pages, len); + ra->ra_cur_pages += ret; + spin_unlock(&sbi->ll_lock); + + RETURN(ret); +} + +static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len) +{ + struct ll_ra_info *ra = &sbi->ll_ra_info; + spin_lock(&sbi->ll_lock); + LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n", + ra->ra_cur_pages, len); + ra->ra_cur_pages -= len; + spin_unlock(&sbi->ll_lock); +} + int ll_writepage(struct page *page) { struct inode *inode = page->mapping->host; @@ -554,7 +582,7 @@ int ll_writepage(struct page *page) if (exp == NULL) GOTO(out, rc = -EINVAL); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); @@ -578,31 +606,6 @@ out: return rc; } -static unsigned long -ll_ra_count_get(struct ll_sb_info *sbi, unsigned long len) -{ - struct ll_ra_info *ra = &sbi->ll_ra_info; - unsigned long ret; - ENTRY; - - spin_lock(&sbi->ll_lock); - ret = min(ra->ra_max_pages - ra->ra_cur_pages, len); - ra->ra_cur_pages += ret; - spin_unlock(&sbi->ll_lock); - - RETURN(ret); -} - -static void ll_ra_count_put(struct ll_sb_info *sbi, unsigned long len) -{ - struct ll_ra_info *ra = &sbi->ll_ra_info; - spin_lock(&sbi->ll_lock); - LASSERTF(ra->ra_cur_pages >= len, "r_c_p %lu len %lu\n", - ra->ra_cur_pages, len); - ra->ra_cur_pages -= len; - spin_unlock(&sbi->ll_lock); -} - /* called for each page in a completed rpc.*/ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { @@ -610,12 +613,7 @@ void ll_ap_completion(void *data, int cmd, struct obdo *oa, int rc) struct page *page; ENTRY; - llap = llap_from_cookie(data); - if (IS_ERR(llap)) { - EXIT; - return; - } - + llap = LLAP_FROM_COOKIE(data); page = llap->llap_page; LASSERT(PageLocked(page)); @@ -683,7 +681,7 @@ void ll_removepage(struct page *page) return; } - llap = llap_from_page(page); + llap = llap_from_page(page, 0); if (IS_ERR(llap)) { CERROR("page %p ind %lu couldn't find llap: %ld\n", page, page->index, PTR_ERR(llap)); @@ -773,7 +771,7 @@ void ll_ra_accounting(struct page *page, struct address_space *mapping) { struct ll_async_page *llap; - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_WRITEPAGE); if (IS_ERR(llap)) return; @@ -851,7 +849,7 @@ static int ll_readahead(struct ll_readahead_state *ras, /* we do this first so that we can see the page in the /proc * accounting */ - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_READAHEAD); if (IS_ERR(llap) || llap->llap_defer_uptodate) goto next_page; @@ -1031,7 +1029,7 @@ int ll_readpage(struct file *filp, struct page *page) if (exp == NULL) GOTO(out, rc = -EINVAL); - llap = llap_from_page(page); + llap = llap_from_page(page, LLAP_ORIGIN_READPAGE); if (IS_ERR(llap)) GOTO(out, rc = PTR_ERR(llap)); diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index 4ac9cad..40c915f 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -125,17 +125,10 @@ static int ll_direct_IO_24(int rw, struct inode *inode, struct kiobuf *iobuf, CERROR("error from callback: rc = %d\n", rc); } ptlrpc_set_destroy(set); - if (rc == 0 && rw == WRITE) { - void lov_increase_kms(struct obd_export *, - struct lov_stripe_md *, obd_off size); - obd_off size = offset + length; - lov_increase_kms(ll_i2dtexp(inode), lsm, size); - if (size > inode->i_size) - inode->i_size = size; - } if (rc == 0) { rc = iobuf->length; - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS); + if (rw == WRITE) + obd_adjust_kms(ll_i2dtexp(inode), lsm, offset, 0); } obdo_free(oa); EXIT; @@ -144,6 +137,13 @@ out_free_pga: return rc; } +#ifdef KERNEL_HAS_AS_MAX_READAHEAD +static int ll_max_readahead(struct inode *inode) +{ + return 0; +} +#endif + struct address_space_operations ll_aops = { .readpage = ll_readpage, .direct_IO = ll_direct_IO_24, @@ -152,5 +152,8 @@ struct address_space_operations ll_aops = { .commit_write = ll_commit_write, .removepage = ll_removepage, .sync_page = NULL, - .bmap = NULL + .bmap = NULL, +#ifdef KERNEL_HAS_AS_MAX_READAHEAD + .max_readahead = ll_max_readahead, +#endif }; diff --git a/lustre/llite/special.c b/lustre/llite/special.c index befc716..33401fc 100644 --- a/lustre/llite/special.c +++ b/lustre/llite/special.c @@ -283,11 +283,22 @@ static int ll_special_open(struct inode *inode, struct file *filp) { struct file_operations **pfop = get_save_fops(filp, INODE_OPS); struct file_operations *sfops = filp->f_op; + struct ll_inode_info *lli = ll_i2info(inode); struct ptlrpc_request *req; struct lookup_intent *it; int rc = -EINVAL, err; + struct obd_client_handle **och_p; + __u64 *och_usecount; ENTRY; + it = filp->f_it; + + if (LUSTRE_IT(it)->it_disposition) { + err = it_open_error(DISP_OPEN_OPEN, it); + if (err) + RETURN(err); + } + if (pfop && *pfop) { /* mostly we will have @def_blk_fops here and it is not in a * module but we do this just to be sure. */ @@ -303,11 +314,54 @@ static int ll_special_open(struct inode *inode, struct file *filp) } } + /* Let's see if we have file open on MDS already. */ + if (it->it_flags & FMODE_WRITE) { + och_p = &lli->lli_mds_write_och; + och_usecount = &lli->lli_open_fd_write_count; + } else if (it->it_flags & FMODE_EXEC) { + och_p = &lli->lli_mds_exec_och; + och_usecount = &lli->lli_open_fd_exec_count; + } else { + och_p = &lli->lli_mds_read_och; + och_usecount = &lli->lli_open_fd_read_count; + } + lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); - it = filp->f_it; + down(&lli->lli_och_sem); + if (*och_p) { /* Open handle is present */ + if (LUSTRE_IT(it)->it_disposition) { + struct obd_client_handle *och; + /* Well, there's extra open request that we do not need, + let's close it somehow*/ + OBD_ALLOC(och, sizeof (struct obd_client_handle)); + if (!och) { + /* XXX We leak open fd and open OPEN connectioni + to server here */ + up(&lli->lli_och_sem); + RETURN(-ENOMEM); + } + ll_och_fill(inode, it, och); + /* ll_md_och_close() will free och */ + ll_md_och_close(ll_i2mdexp(inode), inode, och); + } + (*och_usecount)++; + + err = ll_local_open(filp, it, NULL); + } else { + LASSERT(*och_usecount == 0); + OBD_ALLOC(*och_p, sizeof (struct obd_client_handle)); + if (!*och_p) { + // XXX Same as above + up(&lli->lli_och_sem); + RETURN(-ENOMEM); + } + (*och_usecount)++; + + err = ll_local_open(filp, it, *och_p); + } + up(&lli->lli_och_sem); - err = ll_local_open(filp, it); if (rc != 0) { CERROR("error opening special file: rc %d\n", rc); ll_md_close(ll_i2sbi(inode)->ll_md_exp, inode, filp); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index 6061f74..6d8a38b 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -55,7 +55,7 @@ static int ll_readlink_internal(struct inode *inode, if (rc) { if (rc != -ENOENT) CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); - RETURN(rc); + GOTO(failed, rc); } body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body)); @@ -92,7 +92,7 @@ static int ll_readlink_internal(struct inode *inode, failed: ptlrpc_req_finished (*request); - RETURN (-EPROTO); + RETURN(rc); } static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) @@ -141,8 +141,11 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) down(&lli->lli_open_sem); rc = ll_readlink_internal(inode, &request, &symname); up(&lli->lli_open_sem); - if (rc) + if (rc) { + path_release(nd); /* Kernel assumes that ->follow_link() + releases nameidata on error */ GOTO(out, rc); + } rc = vfs_follow_link(nd, symname); ptlrpc_req_finished(request); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 86b1f97..3e4bcde 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -51,6 +51,11 @@ #include #include "lmv_internal.h" +/* not defined for liblustre building */ +#if !defined(ATOMIC_INIT) +#define ATOMIC_INIT(val) { (val) } +#endif + /* object cache. */ kmem_cache_t *obj_cache; atomic_t obj_cache_count = ATOMIC_INIT(0); @@ -859,7 +864,7 @@ int lmv_get_mea_and_update_object(struct obd_export *exp, md.mea = NULL; mealen = MEA_SIZE_LMV(lmv); - valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; + valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA; /* time to update mea of parent id */ rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp, @@ -1905,6 +1910,10 @@ int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, lsmp = (struct mea *)lsm; meap = (struct mea *)*lmmp; + if (lsmp->mea_magic != MEA_MAGIC_LAST_CHAR && + lsmp->mea_magic != MEA_MAGIC_ALL_CHARS) + RETURN(-EINVAL); + meap->mea_magic = cpu_to_le32(lsmp->mea_magic); meap->mea_count = cpu_to_le32(lsmp->mea_count); meap->mea_master = cpu_to_le32(lsmp->mea_master); @@ -1917,45 +1926,69 @@ int lmv_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, RETURN(mea_size); } -int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int mdsize) +int lmv_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_mds_md *lmm, int lmm_size) { struct obd_device *obd = class_exp2obd(exp); - struct mea **tmea = (struct mea **)mem_tgt; - struct mea *mea = (struct mea *)disk_src; + struct mea **tmea = (struct mea **)lsmp; + struct mea *mea = (struct mea *)lmm; struct lmv_obd *lmv = &obd->u.lmv; - int mea_size, i; + int mea_size, i, rc = 0; + __u32 magic; ENTRY; - mea_size = sizeof(struct lustre_id) * + mea_size = sizeof(struct lustre_id) * lmv->desc.ld_tgt_count + sizeof(struct mea); - if (mem_tgt == NULL) + + if (lsmp == NULL) return mea_size; - if (*mem_tgt != NULL && disk_src == NULL) { + if (*lsmp != NULL && lmm == NULL) { OBD_FREE(*tmea, mea_size); RETURN(0); } - LASSERT(mea_size == mdsize); + LASSERT(mea_size == lmm_size); OBD_ALLOC(*tmea, mea_size); if (*tmea == NULL) RETURN(-ENOMEM); - if (!disk_src) + if (!lmm) RETURN(mea_size); - (*tmea)->mea_magic = le32_to_cpu(mea->mea_magic); + if (mea->mea_magic == MEA_MAGIC_LAST_CHAR || + mea->mea_magic == MEA_MAGIC_ALL_CHARS) + { + magic = le32_to_cpu(mea->mea_magic); + } else { + struct mea_old *old = (struct mea_old *)lmm; + + mea_size = sizeof(struct lustre_id) * old->mea_count + + sizeof(struct mea_old); + + if (old->mea_count > 256 || old->mea_master > 256 || + lmm_size < mea_size || old->mea_master > old->mea_count) { + CWARN("bad MEA: count %u, master %u, size %u\n", + old->mea_count, old->mea_master, mea_size); + GOTO(out_free_mea, rc = -EINVAL); + } + magic = MEA_MAGIC_LAST_CHAR; + } + + (*tmea)->mea_magic = magic; (*tmea)->mea_count = le32_to_cpu(mea->mea_count); (*tmea)->mea_master = le32_to_cpu(mea->mea_master); - for (i = 0; i < lmv->desc.ld_tgt_count; i++) { + for (i = 0; i < (*tmea)->mea_count; i++) { (*tmea)->mea_ids[i] = mea->mea_ids[i]; id_le_to_cpu(&(*tmea)->mea_ids[i]); } - RETURN(mea_size); + +out_free_mea: + OBD_FREE(*tmea, mea_size); + return rc; } int lmv_brw(int rw, struct obd_export *exp, struct obdo *oa, diff --git a/lustre/lmv/lmv_objmgr.c b/lustre/lmv/lmv_objmgr.c index 4c2ef10..5ed2544 100644 --- a/lustre/lmv/lmv_objmgr.c +++ b/lustre/lmv/lmv_objmgr.c @@ -307,7 +307,7 @@ lmv_create_obj(struct obd_export *exp, struct lustre_id *id, struct mea *mea) /* time to update mea of parent id */ md.mea = NULL; - valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA; + valid = OBD_MD_FLEASIZE | OBD_MD_FLDIREA | OBD_MD_MEA; rc = md_getattr(lmv->tgts[id_group(id)].ltd_exp, id, valid, NULL, 0, mealen, &req); diff --git a/lustre/lov/Makefile.in b/lustre/lov/Makefile.in index 52dba75..aebee3e 100644 --- a/lustre/lov/Makefile.in +++ b/lustre/lov/Makefile.in @@ -1,4 +1,4 @@ MODULES := lov -lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o +lov-objs := lov_log.o lov_obd.o lov_pack.o lproc_lov.o lov_offset.o lov_merge.o lov_request.o lov_qos.o @INCLUDE_RULES@ diff --git a/lustre/lov/autoMakefile.am b/lustre/lov/autoMakefile.am index 2847d56..e6854ec 100644 --- a/lustre/lov/autoMakefile.am +++ b/lustre/lov/autoMakefile.am @@ -5,7 +5,7 @@ if LIBLUSTRE noinst_LIBRARIES = liblov.a -liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_internal.h +liblov_a_SOURCES = lov_log.c lov_obd.c lov_pack.c lov_request.c lov_offset.c lov_qos.c lov_merge.c lov_internal.h liblov_a_CPPFLAGS = $(LLCPPFLAGS) liblov_a_CFLAGS = $(LLCFLAGS) endif diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 4291f39..b6172f5 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -12,6 +12,44 @@ #include +struct lov_lock_handles { + struct portals_handle llh_handle; + atomic_t llh_refcount; + int llh_stripe_count; + struct lustre_handle llh_handles[0]; +}; + +struct lov_request { + struct list_head rq_link; + struct ldlm_extent rq_extent; + int rq_idx; /* index in lov->tgts array */ + int rq_stripe; /* stripe number */ + int rq_complete; + int rq_rc; + int rq_buflen; /* length of sub_md */ + struct obdo *rq_oa; + struct lov_stripe_md *rq_md; + obd_count rq_oabufs; + obd_count rq_pgaidx; +}; + +struct lov_request_set { + atomic_t set_refcount; + struct obd_export *set_exp; + int set_count; + int set_completes; + int set_success; + struct llog_cookie *set_cookies; + int set_cookie_sent; + struct lov_stripe_md *set_md; + struct obdo *set_oa; + struct obd_trans_info *set_oti; + obd_count set_oabufs; + struct brw_page *set_pga; + struct lov_lock_handles *set_lockh; + struct list_head set_list; +}; + #define LAP_MAGIC 8200 #define LOV_MAX_TGT_COUNT 1024 @@ -27,10 +65,141 @@ struct lov_async_page { obd_off lap_sub_offset; void *lap_sub_cookie; struct obd_async_page_ops *lap_caller_ops; - struct obd_async_page_ops *lap_caller_data; + void *lap_caller_data; obd_id lap_loi_id; }; +#define LAP_FROM_COOKIE(c) \ + (LASSERT(((struct lov_async_page *)(c))->lap_magic == LAP_MAGIC), \ + (struct lov_async_page *)(c)) + +static inline void lov_llh_addref(void *llhp) +{ + struct lov_lock_handles *llh = llhp; + atomic_inc(&llh->llh_refcount); + CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh, + atomic_read(&llh->llh_refcount)); +} + +static inline struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm) +{ + struct lov_lock_handles *llh; + + OBD_ALLOC(llh, sizeof *llh + + sizeof(*llh->llh_handles) * lsm->lsm_stripe_count); + if (llh == NULL) + return NULL; + atomic_set(&llh->llh_refcount, 2); + llh->llh_stripe_count = lsm->lsm_stripe_count; + INIT_LIST_HEAD(&llh->llh_handle.h_link); + class_handle_hash(&llh->llh_handle, lov_llh_addref); + return llh; +} + +static inline struct lov_lock_handles * +lov_handle2llh(struct lustre_handle *handle) +{ + LASSERT(handle != NULL); + return(class_handle2object(handle->cookie)); +} + +static inline void lov_llh_put(struct lov_lock_handles *llh) +{ + CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh, + atomic_read(&llh->llh_refcount) - 1); + LASSERT(atomic_read(&llh->llh_refcount) > 0 && + atomic_read(&llh->llh_refcount) < 0x5a5a); + if (atomic_dec_and_test(&llh->llh_refcount)) { + class_handle_unhash(&llh->llh_handle); + LASSERT(list_empty(&llh->llh_handle.h_link)); + OBD_FREE(llh, sizeof *llh + + sizeof(*llh->llh_handles) * llh->llh_stripe_count); + } +} + +/* lov_merge.c */ +void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flags valid, + struct lov_stripe_md *lsm, int stripeno, int *set); + +int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, + obd_off size, int shrink); +/* lov_offset.c */ +obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size, + int stripeno); +int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, + int stripeno, obd_off *obd_off); +obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size, + int stripeno); +int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, + obd_off start, obd_off end, + obd_off *obd_start, obd_off *obd_end); +int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off); + +/* lov_qos.c */ +void qos_shrink_lsm(struct lov_request_set *set); +int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, + int newea); + +/* lov_request.c */ +void lov_set_add_req(struct lov_request *req, struct lov_request_set *set); +int lov_update_common_set(struct lov_request_set *set, + struct lov_request *req, int rc); +int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea, + struct obdo *src_oa, struct obd_trans_info *oti, + struct lov_request_set **reqset); +int lov_update_create_set(struct lov_request_set *set, + struct lov_request *req, int rc); +int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea); +int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, obd_count oa_bufs, + struct brw_page *pga, struct obd_trans_info *oti, + struct lov_request_set **reqset); +int lov_fini_brw_set(struct lov_request_set *set); +int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, + struct lov_request_set **reqset); +int lov_fini_getattr_set(struct lov_request_set *set); +int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, + struct obd_trans_info *oti, + struct lov_request_set **reqset); +int lov_update_destroy_set(struct lov_request_set *set, + struct lov_request *req, int rc); +int lov_fini_destroy_set(struct lov_request_set *set); +int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, struct obd_trans_info *oti, + struct lov_request_set **reqset); +int lov_fini_setattr_set(struct lov_request_set *set); +int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, obd_off start, + obd_off end, struct obd_trans_info *oti, + struct lov_request_set **reqset); +int lov_update_punch_set(struct lov_request_set *set, struct lov_request *req, + int rc); +int lov_fini_punch_set(struct lov_request_set *set); +int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, obd_off start, + obd_off end, struct lov_request_set **reqset); +int lov_fini_sync_set(struct lov_request_set *set); +int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm, + ldlm_policy_data_t *policy, __u32 mode, + struct lustre_handle *lockh, + struct lov_request_set **reqset); +int lov_update_enqueue_set(struct lov_request_set *set, + struct lov_request *req, int rc, int flags); +int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode); +int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm, + ldlm_policy_data_t *policy, __u32 mode, + struct lustre_handle *lockh, + struct lov_request_set **reqset); +int lov_update_match_set(struct lov_request_set *set, struct lov_request *req, + int rc); +int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags); +int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm, + __u32 mode, struct lustre_handle *lockh, + struct lov_request_set **reqset); +int lov_fini_cancel_set(struct lov_request_set *set); + /* lov_obd.c */ int lov_get_stripecnt(struct lov_obd *lov, int stripe_count); int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern); diff --git a/lustre/lov/lov_merge.c b/lustre/lov/lov_merge.c new file mode 100644 index 0000000..14ff9eb --- /dev/null +++ b/lustre/lov/lov_merge.c @@ -0,0 +1,153 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LOV + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#include +#include + +#include "lov_internal.h" + +/* Merge rss if kms == 0 + * + * Even when merging RSS, we will take the KMS value if it's larger. + * This prevents getattr from stomping on dirty cached pages which + * extend the file size. */ +__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms) +{ + struct lov_oinfo *loi; + __u64 size = 0; + int i; + + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; + i++, loi++) { + obd_size lov_size, tmpsize; + + tmpsize = loi->loi_kms; + if (kms == 0 && loi->loi_rss > tmpsize) + tmpsize = loi->loi_rss; + + lov_size = lov_stripe_size(lsm, tmpsize, i); + if (lov_size > size) + size = lov_size; + } + + return size; +} +EXPORT_SYMBOL(lov_merge_size); + +/* Merge blocks */ +__u64 lov_merge_blocks(struct lov_stripe_md *lsm) +{ + struct lov_oinfo *loi; + __u64 blocks = 0; + int i; + + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++) + blocks += loi->loi_blocks; + return blocks; +} +EXPORT_SYMBOL(lov_merge_blocks); + +__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time) +{ + struct lov_oinfo *loi; + int i; + + for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++, loi++) + if (loi->loi_mtime > current_time) + current_time = loi->loi_mtime; + return current_time; +} +EXPORT_SYMBOL(lov_merge_mtime); + +int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, + obd_off size, int shrink) +{ + struct lov_oinfo *loi; + int stripe = 0; + __u64 kms; + ENTRY; + + if (shrink) { + struct lov_oinfo *loi; + int i = 0; + for (loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; + i++, loi++) { + kms = lov_size_to_stripe(lsm, size, i); + loi->loi_kms = loi->loi_rss = kms; + } + RETURN(0); + } + + if (size > 0) + stripe = lov_stripe_number(lsm, size - 1); + kms = lov_size_to_stripe(lsm, size, stripe); + loi = &(lsm->lsm_oinfo[stripe]); + + CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n", + stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms); + if (kms > loi->loi_kms) + loi->loi_kms = kms; + + RETURN(0); +} +EXPORT_SYMBOL(lov_adjust_kms); + +void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flags valid, + struct lov_stripe_md *lsm, int stripeno, int *set) +{ + valid &= src->o_valid; + + if (*set) { + if (valid & OBD_MD_FLSIZE) { + /* this handles sparse files properly */ + obd_size lov_size; + + lov_size = lov_stripe_size(lsm, src->o_size, stripeno); + if (lov_size > tgt->o_size) + tgt->o_size = lov_size; + } + if (valid & OBD_MD_FLBLOCKS) + tgt->o_blocks += src->o_blocks; + if (valid & OBD_MD_FLBLKSZ) + tgt->o_blksize += src->o_blksize; + if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime) + tgt->o_ctime = src->o_ctime; + if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) + tgt->o_mtime = src->o_mtime; + } else { + memcpy(tgt, src, sizeof(*tgt)); + tgt->o_id = lsm->lsm_object_id; + if (valid & OBD_MD_FLSIZE) + tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno); + *set = 1; + } +} diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 7246a1d..218a518 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -51,68 +51,6 @@ #include "lov_internal.h" -static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, - int stripeno, obd_off *obd_off); - -struct lov_lock_handles { - struct portals_handle llh_handle; - atomic_t llh_refcount; - int llh_stripe_count; - struct lustre_handle llh_handles[0]; -}; - -static void lov_llh_addref(void *llhp) -{ - struct lov_lock_handles *llh = llhp; - - atomic_inc(&llh->llh_refcount); - CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh, - atomic_read(&llh->llh_refcount)); -} - -static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm) -{ - struct lov_lock_handles *llh; - - OBD_ALLOC(llh, sizeof *llh + - sizeof(*llh->llh_handles) * lsm->lsm_stripe_count); - if (llh == NULL) { - CERROR("out of memory\n"); - return NULL; - } - atomic_set(&llh->llh_refcount, 2); - llh->llh_stripe_count = lsm->lsm_stripe_count; - INIT_LIST_HEAD(&llh->llh_handle.h_link); - class_handle_hash(&llh->llh_handle, lov_llh_addref); - return llh; -} - -static struct lov_lock_handles *lov_handle2llh(struct lustre_handle *handle) -{ - ENTRY; - LASSERT(handle != NULL); - RETURN(class_handle2object(handle->cookie)); -} - -static void lov_llh_put(struct lov_lock_handles *llh) -{ - CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh, - atomic_read(&llh->llh_refcount) - 1); - LASSERT(atomic_read(&llh->llh_refcount) > 0 && - atomic_read(&llh->llh_refcount) < 0x5a5a); - if (atomic_dec_and_test(&llh->llh_refcount)) { - LASSERT(list_empty(&llh->llh_handle.h_link)); - OBD_FREE(llh, sizeof *llh + - sizeof(*llh->llh_handles) * llh->llh_stripe_count); - } -} - -static void lov_llh_destroy(struct lov_lock_handles *llh) -{ - class_handle_unhash(&llh->llh_handle); - lov_llh_put(llh); -} - /* obd methods */ #define MAX_STRING_SIZE 128 static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt, @@ -307,7 +245,6 @@ static int lov_disconnect_obd(struct obd_device *obd, } } #endif - if (obd->obd_no_recov) { /* Pass it on to our clients. * XXX This should be an argument to disconnect, @@ -331,7 +268,6 @@ static int lov_disconnect_obd(struct obd_device *obd, tgt->active = 0; lov->desc.ld_active_tgt_count--; } - tgt->ltd_exp = NULL; RETURN(0); } @@ -369,7 +305,7 @@ static int lov_disconnect(struct obd_export *exp, unsigned long flags) obd->obd_type->typ_name, obd->obd_name); } #endif - + out_local: rc = class_disconnect(exp, 0); RETURN(rc); @@ -393,6 +329,9 @@ static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid, spin_lock(&lov->lov_lock); for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { + if (tgt->ltd_exp == NULL) + continue; + CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n", i, tgt->uuid.uuid, tgt->ltd_exp->exp_handle.h_cookie); if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0) @@ -709,59 +648,6 @@ out: RETURN(rc); } -/* compute object size given "stripeno" and the ost size */ -static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size, - int stripeno) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_size; - obd_size lov_size; - - if (ost_size == 0) - return 0; - - /* do_div(a, b) returns a % b, and a = a / b */ - stripe_size = do_div(ost_size, ssize); - if (stripe_size) - lov_size = ost_size * swidth + stripeno * ssize + stripe_size; - else - lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize; - - return lov_size; -} - -static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid, - struct lov_stripe_md *lsm, int stripeno, int *set) -{ - valid &= src->o_valid; - - if (*set) { - if (valid & OBD_MD_FLSIZE) { - /* this handles sparse files properly */ - obd_size lov_size; - - lov_size = lov_stripe_size(lsm, src->o_size, stripeno); - if (lov_size > tgt->o_size) - tgt->o_size = lov_size; - } - if (valid & OBD_MD_FLBLOCKS) - tgt->o_blocks += src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - tgt->o_blksize += src->o_blksize; - if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime) - tgt->o_ctime = src->o_ctime; - if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) - tgt->o_mtime = src->o_mtime; - } else { - memcpy(tgt, src, sizeof(*tgt)); - tgt->o_id = lsm->lsm_object_id; - if (valid & OBD_MD_FLSIZE) - tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno); - *set = 1; - } -} - #ifndef log2 #define log2(n) ffz(~(n)) #endif @@ -823,23 +709,58 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa, RETURN(rc); } -#define LOV_CREATE_RESEED_INTERVAL 1000 +static int lov_recreate(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md **ea, struct obd_trans_info *oti) +{ + struct lov_stripe_md *obj_mdp, *lsm; + struct lov_obd *lov = &exp->exp_obd->u.lov; + unsigned ost_idx; + int rc, i; + ENTRY; + + LASSERT(src_oa->o_valid & OBD_MD_FLFLAGS && + src_oa->o_flags & OBD_FL_RECREATE_OBJS); + + OBD_ALLOC(obj_mdp, sizeof(*obj_mdp)); + if (obj_mdp == NULL) + RETURN(-ENOMEM); + + ost_idx = src_oa->o_nlink; + lsm = *ea; + if (lsm == NULL) + GOTO(out, rc = -EINVAL); + if (ost_idx >= lov->desc.ld_tgt_count) + GOTO(out, rc = -EINVAL); + + for (i = 0; i < lsm->lsm_stripe_count; i++) { + if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) { + if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id) + GOTO(out, rc = -EINVAL); + break; + } + } + if (i == lsm->lsm_stripe_count) + GOTO(out, rc = -EINVAL); + + rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, &obj_mdp, oti); +out: + OBD_FREE(obj_mdp, sizeof(*obj_mdp)); + RETURN(rc); +} /* the LOV expects oa->o_id to be set to the LOV object id */ static int lov_create(struct obd_export *exp, struct obdo *src_oa, struct lov_stripe_md **ea, struct obd_trans_info *oti) { - static int ost_start_idx, ost_start_count; + struct lov_request_set *set = NULL; + struct list_head *pos; struct lov_obd *lov; - struct lov_stripe_md *lsm; - struct lov_oinfo *loi = NULL; - struct obdo *tmp_oa, *ret_oa; - struct llog_cookie *cookies = NULL; - unsigned ost_count, ost_idx; - int set = 0, obj_alloc = 0, cookie_sent = 0, rc = 0, i; + int rc = 0; ENTRY; LASSERT(ea != NULL); + if (exp == NULL) + RETURN(-EINVAL); if ((src_oa->o_valid & OBD_MD_FLFLAGS) && src_oa->o_flags == OBD_FL_DELORPHAN) { @@ -847,324 +768,32 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, RETURN(rc); } - if (exp == NULL) - RETURN(-EINVAL); - lov = &exp->exp_obd->u.lov; - if (!lov->desc.ld_active_tgt_count) RETURN(-EIO); /* Recreate a specific object id at the given OST index */ if ((src_oa->o_valid & OBD_MD_FLFLAGS) && (src_oa->o_flags & OBD_FL_RECREATE_OBJS)) { - struct lov_stripe_md obj_md; - struct lov_stripe_md *obj_mdp = &obj_md; - - ost_idx = src_oa->o_nlink; - lsm = *ea; - if (lsm == NULL) - RETURN(-EINVAL); - if (ost_idx >= lov->desc.ld_tgt_count) - RETURN(-EINVAL); - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (lsm->lsm_oinfo[i].loi_ost_idx == ost_idx) { - if (lsm->lsm_oinfo[i].loi_id != src_oa->o_id || - lsm->lsm_oinfo[i].loi_gr != src_oa->o_gr) { - RETURN(-EINVAL); - } - break; - } - } - if (i == lsm->lsm_stripe_count) - RETURN(-EINVAL); - - rc = obd_create(lov->tgts[ost_idx].ltd_exp, src_oa, - &obj_mdp, oti); + rc = lov_recreate(exp, src_oa, ea, oti); RETURN(rc); } - ret_oa = obdo_alloc(); - if (!ret_oa) - RETURN(-ENOMEM); - - tmp_oa = obdo_alloc(); - if (!tmp_oa) - GOTO(out_oa, rc = -ENOMEM); - - lsm = *ea; - if (lsm == NULL) { - int stripes; - ost_count = lov_get_stripecnt(lov, 0); - - /* If the MDS file was truncated up to some size, stripe over - * enough OSTs to allow the file to be created at that size. */ - if (src_oa->o_valid & OBD_MD_FLSIZE) { - stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1; - do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12)); - - if (stripes > lov->desc.ld_active_tgt_count) - RETURN(-EFBIG); - if (stripes < ost_count) - stripes = ost_count; - } else { - stripes = ost_count; - } - - rc = lov_alloc_memmd(&lsm, stripes, lov->desc.ld_pattern ? - lov->desc.ld_pattern : LOV_PATTERN_RAID0); - if (rc < 0) - GOTO(out_tmp, rc); - - rc = 0; - } - - ost_count = lov->desc.ld_tgt_count; - - LASSERT(src_oa->o_gr > 0); - LASSERT(src_oa->o_valid & OBD_MD_FLID); - lsm->lsm_object_id = src_oa->o_id; - lsm->lsm_object_gr = src_oa->o_gr; - if (!lsm->lsm_stripe_size) - lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; - if (!lsm->lsm_pattern) { - lsm->lsm_pattern = lov->desc.ld_pattern ? - lov->desc.ld_pattern : LOV_PATTERN_RAID0; - } - - if (*ea == NULL || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) { - if (--ost_start_count <= 0) { - ost_start_idx = ll_insecure_random_int(); - ost_start_count = LOV_CREATE_RESEED_INTERVAL; - } else if (lsm->lsm_stripe_count >= - lov->desc.ld_active_tgt_count) { - /* If we allocate from all of the stripes, make the - * next file start on the next OST. */ - ++ost_start_idx; - } - ost_idx = ost_start_idx % ost_count; - } else { - ost_idx = lsm->lsm_oinfo[0].loi_ost_idx; - } - - CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n", - lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx); - - /* XXX LOV STACKING: need to figure out how many real OSCs */ - if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { - oti_alloc_cookies(oti, lsm->lsm_stripe_count); - if (!oti->oti_logcookies) - GOTO(out_cleanup, rc = -ENOMEM); - cookies = oti->oti_logcookies; - } - - loi = lsm->lsm_oinfo; - for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - struct lov_stripe_md obj_md; - struct lov_stripe_md *obj_mdp = &obj_md; - int err; - - ++ost_start_idx; - if (lov->tgts[ost_idx].active == 0) { - if (!obd_uuid_empty(&lov->tgts[ost_idx].uuid)) - CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); - continue; - } - - /* create data objects with "parent" OA */ - memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); - - /* XXX When we start creating objects on demand, we need to - * make sure that we always create the object on the - * stripe which holds the existing file size. - */ - if (src_oa->o_valid & OBD_MD_FLSIZE) { - if (lov_stripe_offset(lsm, src_oa->o_size, i, - &tmp_oa->o_size) < 0 && - tmp_oa->o_size) - tmp_oa->o_size--; - - CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", - i, tmp_oa->o_size, src_oa->o_size); - } + rc = lov_prep_create_set(exp, ea, src_oa, oti, &set); + if (rc) + RETURN(rc); + list_for_each (pos, &set->set_list) { + struct lov_request *req = + list_entry(pos, struct lov_request, rq_link); /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - err = obd_create(lov->tgts[ost_idx].ltd_exp, tmp_oa, &obj_mdp, - oti); - if (err) { - if (lov->tgts[ost_idx].active) { - CERROR("error creating objid "LPX64" sub-object" - " on OST idx %d/%d: rc = %d\n", - src_oa->o_id, ost_idx, - lsm->lsm_stripe_count, err); - if (err > 0) { - CERROR("obd_create returned invalid " - "err %d\n", err); - err = -EIO; - } - } - if (!rc) - rc = err; - continue; - } - if (oti->oti_objid) - oti->oti_objid[ost_idx] = tmp_oa->o_id; - loi->loi_id = tmp_oa->o_id; - loi->loi_gr = tmp_oa->o_gr; - loi->loi_ost_idx = ost_idx; - loi->loi_ost_gen = lov->tgts[ost_idx].ltd_gen; - CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at " - "idx %d gen %d\n", lsm->lsm_object_id, loi->loi_id, - ost_idx, loi->loi_ost_gen); - - lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, - obj_alloc, &set); - loi_init(loi); - - if (cookies) - ++oti->oti_logcookies; - if (tmp_oa->o_valid & OBD_MD_FLCOOKIE) - ++cookie_sent; - ++obj_alloc; - ++loi; - - /* If we have allocated enough objects, we are OK */ - if (obj_alloc == lsm->lsm_stripe_count) - GOTO(out_done, rc = 0); - } - - if (obj_alloc == 0) { - if (rc == 0) - rc = -EIO; - GOTO(out_cleanup, rc); - } - - /* If we were passed specific striping params, then a failure to - * meet those requirements is an error, since we can't reallocate - * that memory (it might be part of a larger array or something). - * - * We can only get here if lsm_stripe_count was originally > 1. - */ - if (*ea != NULL) { - CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", - lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc); - if (rc == 0) - rc = -EFBIG; - GOTO(out_cleanup, rc); - } else { - struct lov_stripe_md *lsm_new; - /* XXX LOV STACKING call into osc for sizes */ - unsigned oldsize, newsize; - - if (oti && cookies && cookie_sent) { - oldsize = lsm->lsm_stripe_count * sizeof(*cookies); - newsize = obj_alloc * sizeof(*cookies); - - oti_alloc_cookies(oti, obj_alloc); - if (oti->oti_logcookies) { - memcpy(oti->oti_logcookies, cookies, newsize); - OBD_FREE(cookies, oldsize); - cookies = oti->oti_logcookies; - } else { - CWARN("'leaking' %d bytes\n", oldsize-newsize); - } - } - - CWARN("using fewer stripes for object "LPX64": old %u new %u\n", - lsm->lsm_object_id, lsm->lsm_stripe_count, obj_alloc); - oldsize = lov_stripe_md_size(lsm->lsm_stripe_count); - newsize = lov_stripe_md_size(obj_alloc); - OBD_ALLOC(lsm_new, newsize); - if (lsm_new != NULL) { - memcpy(lsm_new, lsm, newsize); - lsm_new->lsm_stripe_count = obj_alloc; - OBD_FREE(lsm, oldsize); - lsm = lsm_new; - } else { - CWARN("'leaking' %d bytes\n", oldsize - newsize); - } - rc = 0; - } - EXIT; - out_done: - *ea = lsm; - if (src_oa->o_valid & OBD_MD_FLSIZE && - ret_oa->o_size != src_oa->o_size) { - CERROR("original size "LPU64" isn't new object size "LPU64"\n", - src_oa->o_size, ret_oa->o_size); - LBUG(); - } - ret_oa->o_id = src_oa->o_id; - ret_oa->o_gr = src_oa->o_gr; - ret_oa->o_valid |= OBD_MD_FLGROUP; - memcpy(src_oa, ret_oa, sizeof(*src_oa)); - - out_tmp: - obdo_free(tmp_oa); - out_oa: - obdo_free(ret_oa); - if (oti && cookies) { - oti->oti_logcookies = cookies; - if (!cookie_sent) { - oti_free_cookies(oti); - src_oa->o_valid &= ~OBD_MD_FLCOOKIE; - } else { - src_oa->o_valid |= OBD_MD_FLCOOKIE; - } + rc = obd_create(lov->tgts[req->rq_idx].ltd_exp, + req->rq_oa, &req->rq_md, oti); + lov_update_create_set(set, req, rc); } + rc = lov_fini_create_set(set, ea); RETURN(rc); - - out_cleanup: - while (obj_alloc-- > 0) { - struct obd_export *sub_exp; - int err; - - --loi; - sub_exp = lov->tgts[loi->loi_ost_idx].ltd_exp; - /* destroy already created objects here */ - memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); - tmp_oa->o_id = loi->loi_id; - - err = obd_destroy(sub_exp, tmp_oa, NULL, oti); - if (err) - CERROR("Failed to uncreate objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", src_oa->o_id, - loi->loi_id, loi->loi_ost_idx, err); - } - if (*ea == NULL) - obd_free_memmd(exp, &lsm); - goto out_tmp; -} - -static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm) -{ - static int next_idx = 0; - struct lov_tgt_desc *tgt; - int i, count; - - /* XXX - we should do something clever and take lsm - * into account but just do round robin for now. */ - - /* last_idx must always be less that count because - * ld_tgt_count currently cannot shrink. */ - count = lov->desc.ld_tgt_count; - - for (i = next_idx, tgt = lov->tgts + i; i < count; i++, tgt++) { - if (tgt->active) { - next_idx = (i + 1) % count; - RETURN(i); - } - } - - for (i = 0, tgt = lov->tgts; i < next_idx; i++, tgt++) { - if (tgt->active) { - next_idx = (i + 1) % count; - RETURN(i); - } - } - - RETURN(-EIO); } #define lsm_bad_magic(LSMP) \ @@ -1185,10 +814,11 @@ static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm) static int lov_destroy(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, struct obd_trans_info *oti) { - struct obdo *tmp = NULL; - struct lov_oinfo *loi; + struct lov_request_set *set; + struct lov_request *req; + struct list_head *pos; struct lov_obd *lov; - int rc = 0, i; + int rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1198,44 +828,40 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-ENODEV); lov = &exp->exp_obd->u.lov; - loi = lsm->lsm_oinfo; - for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { - int err; + rc = lov_prep_destroy_set(exp, oa, lsm, oti, &set); + if (rc) + RETURN(rc); - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - /* Orphan clean up will (someday) fix this up. */ - if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) - oti->oti_logcookies++; - continue; - } + list_for_each (pos, &set->set_list) { + int err; + req = list_entry(pos, struct lov_request, rq_link); - tmp = obdo_alloc(); - if (tmp == NULL) - RETURN(-ENOMEM); - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - err = obd_destroy(lov->tgts[loi->loi_ost_idx].ltd_exp, - tmp, NULL, oti); - obdo_free(tmp); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CDEBUG(D_INODE, "error: destroying objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, err); + /* XXX update the cookie position */ + oti->oti_logcookies = set->set_cookies + req->rq_stripe; + rc = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, + NULL, oti); + err = lov_update_common_set(set, req, rc); + if (rc) { + CERROR("error: destroying objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + set->set_oa->o_id, req->rq_oa->o_id, + req->rq_idx, rc); if (!rc) rc = err; } } + lov_fini_destroy_set(set); RETURN(rc); } static int lov_getattr(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm) { - struct obdo *tmp = NULL; - int i, rc = 0, set = 0; - struct lov_oinfo *loi; + struct lov_request_set *set; + struct lov_request *req; + struct list_head *pos; struct lov_obd *lov; + int err = 0, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1245,78 +871,49 @@ static int lov_getattr(struct obd_export *exp, struct obdo *oa, RETURN(-ENODEV); lov = &exp->exp_obd->u.lov; + + rc = lov_prep_getattr_set(exp, oa, lsm, &set); + if (rc) + RETURN(rc); - CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", - lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size); - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx); - /* create data objects with "parent" OA */ - tmp = obdo_alloc(); - if (tmp == NULL) - RETURN(-ENOMEM); - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - - err = obd_getattr(lov->tgts[loi->loi_ost_idx].ltd_exp, - tmp, NULL); + "%u\n", oa->o_id, req->rq_stripe, req->rq_oa->o_id, + req->rq_idx); + + rc = obd_getattr(lov->tgts[req->rq_idx].ltd_exp, + req->rq_oa, NULL); + err = lov_update_common_set(set, req, rc); if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: getattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); - obdo_free(tmp); - RETURN(err); - } - } else { - lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set); + CERROR("error: getattr objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + set->set_oa->o_id, req->rq_oa->o_id, + req->rq_idx, err); + break; } - obdo_free(tmp); } - if (!set) - rc = -EIO; + + rc = lov_fini_getattr_set(set); + if (err) + rc = err; RETURN(rc); } static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, void *data, int rc) { - struct lov_getattr_async_args *aa = data; - struct lov_stripe_md *lsm = aa->aa_lsm; - struct obdo *oa = aa->aa_oa; - struct obdo *obdos = aa->aa_obdos; - struct lov_oinfo *loi; - int i; - int set = 0; + struct lov_request_set *lovset = (struct lov_request_set *)data; ENTRY; - if (rc == 0) { - /* NB all stripe requests succeeded to get here */ - - loi = lsm->lsm_oinfo; - for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { - if (obdos[i].o_valid == 0) /* inactive stripe */ - continue; - - lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm, - i, &set); - } - - if (!set) { - CERROR ("No stripes had valid attrs\n"); - rc = -EIO; - } + /* don't do attribute merge if this aysnc op failed */ + if (rc) { + lovset->set_completes = 0; + lov_fini_getattr_set(lovset); + } else { + rc = lov_fini_getattr_set(lovset); } - - OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos)); RETURN (rc); } @@ -1324,11 +921,11 @@ static int lov_getattr_async(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, struct ptlrpc_request_set *rqset) { - struct obdo *obdos; + struct lov_request_set *lovset; struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_getattr_async_args *aa; - int i, rc = 0, set = 0; + struct list_head *pos; + struct lov_request *req; + int rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1339,67 +936,50 @@ static int lov_getattr_async(struct obd_export *exp, struct obdo *oa, lov = &exp->exp_obd->u.lov; - OBD_ALLOC (obdos, lsm->lsm_stripe_count * sizeof (*obdos)); - if (obdos == NULL) - RETURN(-ENOMEM); + rc = lov_prep_getattr_set(exp, oa, lsm, &lovset); + if (rc) + RETURN(rc); CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size); - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - /* leaves obdos[i].obd_valid unset */ - continue; - } - CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at " - "idx %u gen %d\n", oa->o_id, i, loi->loi_id, - loi->loi_ost_idx, loi->loi_ost_gen); - - /* create data objects with "parent" OA */ - memcpy(&obdos[i], oa, sizeof(obdos[i])); - obdos[i].o_id = loi->loi_id; - - err = obd_getattr_async(lov->tgts[loi->loi_ost_idx].ltd_exp, - &obdos[i], NULL, rqset); - if (err) { + list_for_each (pos, &lovset->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + + CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " + "%u\n", oa->o_id, req->rq_stripe, req->rq_oa->o_id, + req->rq_idx); + rc = obd_getattr_async(lov->tgts[req->rq_idx].ltd_exp, + req->rq_oa, NULL, rqset); + if (rc) { CERROR("error: getattr objid "LPX64" subobj " LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); - GOTO(out_obdos, rc = err); + lovset->set_oa->o_id, req->rq_oa->o_id, + req->rq_idx, rc); + GOTO(out, rc); } - set = 1; + lov_update_common_set(lovset, req, rc); } - if (!set) - GOTO (out_obdos, rc = -EIO); - + + LASSERT(rc == 0); LASSERT (rqset->set_interpret == NULL); rqset->set_interpret = lov_getattr_interpret; - LASSERT (sizeof (rqset->set_args) >= sizeof (*aa)); - aa = (struct lov_getattr_async_args *)&rqset->set_args; - aa->aa_lsm = lsm; - aa->aa_oa = oa; - aa->aa_obdos = obdos; - aa->aa_lov = lov; - GOTO(out, rc = 0); - -out_obdos: - OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos)); + rqset->set_arg = (void *)lovset; + RETURN(rc); out: + LASSERT(rc); + lov_fini_getattr_set(lovset); RETURN(rc); } - static int lov_setattr(struct obd_export *exp, struct obdo *src_oa, struct lov_stripe_md *lsm, struct obd_trans_info *oti) { - struct obdo *tmp_oa, *ret_oa; + struct lov_request_set *set; struct lov_obd *lov; - struct lov_oinfo *loi; - int rc = 0, i, set = 0; + struct list_head *pos; + struct lov_request *req; + int err = 0, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1416,258 +996,60 @@ static int lov_setattr(struct obd_export *exp, struct obdo *src_oa, LASSERT(!(src_oa->o_valid & OBD_MD_FLGROUP) || src_oa->o_gr > 0); - ret_oa = obdo_alloc(); - if (!ret_oa) - RETURN(-ENOMEM); - - tmp_oa = obdo_alloc(); - if (!tmp_oa) - GOTO(out_oa, rc = -ENOMEM); - lov = &exp->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); - tmp_oa->o_id = loi->loi_id; - LASSERT(!(tmp_oa->o_valid & OBD_MD_FLGROUP) || tmp_oa->o_gr>0); - - if (src_oa->o_valid & OBD_MD_FLSIZE) { - if (lov_stripe_offset(lsm, src_oa->o_size, i, - &tmp_oa->o_size) < 0 && - tmp_oa->o_size) - tmp_oa->o_size--; - - CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", - i, tmp_oa->o_size, src_oa->o_size); - } + rc = lov_prep_setattr_set(exp, src_oa, lsm, NULL, &set); + if (rc) + RETURN(rc); - err = obd_setattr(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp_oa, - NULL, NULL); + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + + rc = obd_setattr(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, + NULL, NULL); + err = lov_update_common_set(set, req, rc); if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: setattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - src_oa->o_id, loi->loi_id, - loi->loi_ost_idx, err); - if (!rc) - rc = err; - } - continue; + CERROR("error: setattr objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + set->set_oa->o_id, req->rq_oa->o_id, + req->rq_idx, err); + if (!rc) + rc = err; } - lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, i, &set); } - if (!set && !rc) - rc = -EIO; - - ret_oa->o_id = src_oa->o_id; - memcpy(src_oa, ret_oa, sizeof(*src_oa)); - GOTO(out_tmp, rc); -out_tmp: - obdo_free(tmp_oa); -out_oa: - obdo_free(ret_oa); - return rc; + err = lov_fini_setattr_set(set); + if (!rc) + rc = err; + RETURN(rc); } -/* we have an offset in file backed by an lov and want to find out where - * that offset lands in our given stripe of the file. for the easy - * case where the offset is within the stripe, we just have to scale the - * offset down to make it relative to the stripe instead of the lov. - * - * the harder case is what to do when the offset doesn't intersect the - * stripe. callers will want start offsets clamped ahead to the start - * of the nearest stripe in the file. end offsets similarly clamped to the - * nearest ending byte of a stripe in the file: - * - * all this function does is move offsets to the nearest region of the - * stripe, and it does its work "mod" the full length of all the stripes. - * consider a file with 3 stripes: - * - * S E - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - * - * to find stripe 1's offsets for S and E, it divides by the full stripe - * width and does its math in the context of a single set of stripes: - * - * S E - * ----------------------------------- - * | 0 | 1 | 2 | - * ----------------------------------- - * - * it'll notice that E is outside stripe 1 and clamp it to the end of the - * stripe, then multiply it back out by lov_off to give the real offsets in - * the stripe: - * - * S E - * --------------------------------------------------------------------- - * | 1 | 1 | 1 | 1 | 1 | 1 | - * --------------------------------------------------------------------- - * - * it would have done similarly and pulled S forward to the start of a 1 - * stripe if, say, S had landed in a 0 stripe. - * - * this rounding isn't always correct. consider an E lov offset that lands - * on a 0 stripe, the "mod stripe width" math will pull it forward to the - * start of a 1 stripe, when in fact it wanted to be rounded back to the end - * of a previous 1 stripe. this logic is handled by callers and this is why: - * - * this function returns < 0 when the offset was "before" the stripe and - * was moved forward to the start of the stripe in question; 0 when it - * falls in the stripe and no shifting was done; > 0 when the offset - * was outside the stripe and was pulled back to its final byte. */ -static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, - int stripeno, obd_off *obd_off) +static int lov_revalidate_policy(struct lov_obd *lov, struct lov_stripe_md *lsm) { - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_off, this_stripe; - int ret = 0; - - if (lov_off == OBD_OBJECT_EOF) { - *obd_off = OBD_OBJECT_EOF; - return 0; - } + static int next_idx = 0; + struct lov_tgt_desc *tgt; + int i, count; - /* do_div(a, b) returns a % b, and a = a / b */ - stripe_off = do_div(lov_off, swidth); + /* XXX - we should do something clever and take lsm + * into account but just do round robin for now. */ - this_stripe = stripeno * ssize; - if (stripe_off < this_stripe) { - stripe_off = 0; - ret = -1; - } else { - stripe_off -= this_stripe; + /* last_idx must always be less that count because + * ld_tgt_count currently cannot shrink. */ + count = lov->desc.ld_tgt_count; - if (stripe_off >= ssize) { - stripe_off = ssize; - ret = 1; + for (i = next_idx, tgt = lov->tgts + i; i < count; i++, tgt++) { + if (tgt->active) { + next_idx = (i + 1) % count; + RETURN(i); } } - *obd_off = lov_off * ssize + stripe_off; - return ret; -} - -/* Given a whole-file size and a stripe number, give the file size which - * corresponds to the individual object of that stripe. - * - * This behaves basically in the same was as lov_stripe_offset, except that - * file sizes falling before the beginning of a stripe are clamped to the end - * of the previous stripe, not the beginning of the next: - * - * S - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - * - * if clamped to stripe 2 becomes: - * - * S - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - */ -static obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size, - int stripeno) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_off, this_stripe; - - if (file_size == OBD_OBJECT_EOF) - return OBD_OBJECT_EOF; - - /* do_div(a, b) returns a % b, and a = a / b */ - stripe_off = do_div(file_size, swidth); - - this_stripe = stripeno * ssize; - if (stripe_off < this_stripe) { - /* Move to end of previous stripe, or zero */ - if (file_size > 0) { - file_size--; - stripe_off = ssize; - } else { - stripe_off = 0; - } - } else { - stripe_off -= this_stripe; - - if (stripe_off >= ssize) { - /* Clamp to end of this stripe */ - stripe_off = ssize; + for (i = 0, tgt = lov->tgts; i < next_idx; i++, tgt++) { + if (tgt->active) { + next_idx = (i + 1) % count; + RETURN(i); } } - return (file_size * ssize + stripe_off); -} - -/* given an extent in an lov and a stripe, calculate the extent of the stripe - * that is contained within the lov extent. this returns true if the given - * stripe does intersect with the lov extent. */ -static int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, - obd_off start, obd_off end, - obd_off *obd_start, obd_off *obd_end) -{ - int start_side = 0, end_side = 0; - - switch (lsm->lsm_pattern) { - case LOV_PATTERN_RAID0: - start_side = lov_stripe_offset(lsm, start, stripeno, obd_start); - end_side = lov_stripe_offset(lsm, end, stripeno, obd_end); - break; - case LOV_PATTERN_CMOBD: - *obd_start = start; - *obd_end = end; - start_side = end_side = 0; - break; - default: - LBUG(); - } - - CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n", - start, end, start_side, *obd_start, *obd_end, end_side); - - /* this stripe doesn't intersect the file extent when neither - * start or the end intersected the stripe and obd_start and - * obd_end got rounded up to the save value. */ - if (start_side != 0 && end_side != 0 && *obd_start == *obd_end) - return 0; - - /* as mentioned in the lov_stripe_offset commentary, end - * might have been shifted in the wrong direction. This - * happens when an end offset is before the stripe when viewed - * through the "mod stripe size" math. we detect it being shifted - * in the wrong direction and touch it up. - * interestingly, this can't underflow since end must be > start - * if we passed through the previous check. - * (should we assert for that somewhere?) */ - if (end_side != 0) - (*obd_end)--; - - return 1; -} - -/* compute which stripe number "lov_off" will be written into */ -static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_off; - - if (lsm->lsm_pattern == LOV_PATTERN_CMOBD) - return 0; - - stripe_off = do_div(lov_off, swidth); - - return stripe_off / ssize; + RETURN(-EIO); } static int lov_revalidate_md(struct obd_export *exp, struct obdo *src_oa, @@ -1715,6 +1097,7 @@ static int lov_revalidate_md(struct obd_export *exp, struct obdo *src_oa, if (oti->oti_objid) oti->oti_objid[ost_idx] = tmp_oa->o_id; loi->loi_id = tmp_oa->o_id; + loi->loi_gr = tmp_oa->o_gr; loi->loi_ost_idx = ost_idx; loi->loi_ost_gen = lov->tgts[ost_idx].ltd_gen; CDEBUG(D_INODE, "replacing objid "LPX64" subobj "LPX64 @@ -1738,10 +1121,11 @@ static int lov_punch(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_off start, obd_off end, struct obd_trans_info *oti) { - struct obdo *tmp = NULL; - struct lov_oinfo *loi; + struct lov_request_set *set; struct lov_obd *lov; - int rc = 0, i; + struct list_head *pos; + struct lov_request *req; + int err = 0, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1751,50 +1135,39 @@ static int lov_punch(struct obd_export *exp, struct obdo *oa, RETURN(-ENODEV); lov = &exp->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - obd_off starti, endi; - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi)) - continue; + rc = lov_prep_punch_set(exp, oa, lsm, start, end, oti, &set); + if (rc) + RETURN(rc); - /* create data objects with "parent" OA */ - tmp = obdo_alloc(); - if (tmp == NULL) - RETURN(-ENOMEM); - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); - err = obd_punch(lov->tgts[loi->loi_ost_idx].ltd_exp, - tmp, NULL, starti, endi, NULL); - obdo_free(tmp); + rc = obd_punch(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, + NULL, req->rq_extent.start, + req->rq_extent.end, NULL); + err = lov_update_punch_set(set, req, rc); if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: punch objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", oa->o_id, - loi->loi_id, loi->loi_ost_idx, err); - } + CERROR("error: punch objid "LPX64" subobj "LPX64 + " on OST idx %d: rc = %d\n", set->set_oa->o_id, + req->rq_oa->o_id, req->rq_idx, rc); if (!rc) rc = err; - } else { - loi->loi_kms = loi->loi_rss = starti; } } + err = lov_fini_punch_set(set); + if (!rc) + rc = err; RETURN(rc); } static int lov_sync(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md *lsm, obd_off start, obd_off end) { - struct obdo *tmp; + struct lov_request_set *set; struct lov_obd *lov; - struct lov_oinfo *loi; - int rc = 0, i; + struct list_head *pos; + struct lov_request *req; + int err = 0, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -1803,40 +1176,28 @@ static int lov_sync(struct obd_export *exp, struct obdo *oa, if (!exp->exp_obd) RETURN(-ENODEV); - tmp = obdo_alloc(); - if (!tmp) - RETURN(-ENOMEM); - lov = &exp->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - obd_off starti, endi; - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi)) - continue; + rc = lov_prep_sync_set(exp, oa, lsm, start, end, &set); + if (rc) + RETURN(rc); - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); - err = obd_sync(lov->tgts[loi->loi_ost_idx].ltd_exp, tmp, NULL, - starti, endi); + rc = obd_sync(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, + NULL, req->rq_extent.start, req->rq_extent.end); + err = lov_update_common_set(set, req, rc); if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: fsync objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", oa->o_id, - loi->loi_id, loi->loi_ost_idx, err); - } + CERROR("error: fsync objid "LPX64" subobj "LPX64 + " on OST idx %d: rc = %d\n", set->set_oa->o_id, + req->rq_oa->o_id, req->rq_idx, rc); if (!rc) rc = err; } } - - obdo_free(tmp); + err = lov_fini_sync_set(set); + if (!rc) + rc = err; RETURN(rc); } @@ -1875,155 +1236,58 @@ static int lov_brw(int cmd, struct obd_export *exp, struct obdo *src_oa, struct lov_stripe_md *lsm, obd_count oa_bufs, struct brw_page *pga, struct obd_trans_info *oti) { - struct { - int bufct; - int index; - int subcount; - struct lov_stripe_md lsm; - int ost_idx; - } *stripeinfo, *si, *si_last; - struct obdo *ret_oa = NULL, *tmp_oa = NULL; - struct lov_obd *lov; - struct brw_page *ioarr; - struct lov_oinfo *loi; - int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count, set = 0; + struct lov_request_set *set; + struct lov_request *req; + struct list_head *pos; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int err, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - lov = &exp->exp_obd->u.lov; - if (cmd == OBD_BRW_CHECK) { rc = lov_brw_check(lov, src_oa, lsm, oa_bufs, pga); RETURN(rc); } - OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo)); - if (!stripeinfo) - RETURN(-ENOMEM); - - OBD_ALLOC(where, sizeof(*where) * oa_bufs); - if (!where) - GOTO(out_sinfo, rc = -ENOMEM); - - OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs); - if (!ioarr) - GOTO(out_where, rc = -ENOMEM); - - if (src_oa) { - ret_oa = obdo_alloc(); - if (!ret_oa) - GOTO(out_ioarr, rc = -ENOMEM); - - tmp_oa = obdo_alloc(); - if (!tmp_oa) - GOTO(out_oa, rc = -ENOMEM); - } - - for (i = 0; i < oa_bufs; i++) { - where[i] = lov_stripe_number(lsm, pga[i].disk_offset); - stripeinfo[where[i]].bufct++; - } - - for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo; - i < stripe_count; i++, loi++, si_last = si, si++) { - if (i > 0) - si->index = si_last->index + si_last->bufct; - si->lsm.lsm_object_id = loi->loi_id; - si->lsm.lsm_object_gr = lsm->lsm_object_gr; - si->ost_idx = loi->loi_ost_idx; - } - - for (i = 0; i < oa_bufs; i++) { - int which = where[i]; - int shift; - - shift = stripeinfo[which].index + stripeinfo[which].subcount; - LASSERT(shift < oa_bufs); - ioarr[shift] = pga[i]; - lov_stripe_offset(lsm, pga[i].disk_offset, which, - &ioarr[shift].disk_offset); - stripeinfo[which].subcount++; - } - - for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) { - int shift = si->index; - - if (lov->tgts[si->ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx); - GOTO(out_oa, rc = -EIO); - } - - if (si->bufct) { - LASSERT(shift < oa_bufs); - if (src_oa) - memcpy(tmp_oa, src_oa, sizeof(*tmp_oa)); - - tmp_oa->o_id = si->lsm.lsm_object_id; - rc = obd_brw(cmd, lov->tgts[si->ost_idx].ltd_exp, - tmp_oa, &si->lsm, si->bufct, - &ioarr[shift], oti); - if (rc) - GOTO(out_oa, rc); + rc = lov_prep_brw_set(exp, src_oa, lsm, oa_bufs, pga, oti, &set); + if (rc) + RETURN(rc); - lov_merge_attrs(ret_oa, tmp_oa, tmp_oa->o_valid, lsm, - i, &set); - } + list_for_each (pos, &set->set_list) { + struct obd_export *sub_exp; + struct brw_page *sub_pga; + req = list_entry(pos, struct lov_request, rq_link); + + sub_exp = lov->tgts[req->rq_idx].ltd_exp; + sub_pga = set->set_pga + req->rq_pgaidx; + rc = obd_brw(cmd, sub_exp, req->rq_oa, req->rq_md, + req->rq_oabufs, sub_pga, oti); + if (rc) + break; + lov_update_common_set(set, req, rc); } - ret_oa->o_id = src_oa->o_id; - memcpy(src_oa, ret_oa, sizeof(*src_oa)); - - GOTO(out_oa, rc); - out_oa: - if (tmp_oa) - obdo_free(tmp_oa); - if (ret_oa) - obdo_free(ret_oa); - out_ioarr: - OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); - out_where: - OBD_FREE(where, sizeof(*where) * oa_bufs); - out_sinfo: - OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo)); - return rc; + err = lov_fini_brw_set(set); + if (!rc) + rc = err; + RETURN(rc); } static int lov_brw_interpret(struct ptlrpc_request_set *reqset, void *data, int rc) { - struct lov_brw_async_args *aa = data; - struct lov_stripe_md *lsm = aa->aa_lsm; - obd_count oa_bufs = aa->aa_oa_bufs; - struct obdo *oa = aa->aa_oa; - struct obdo *obdos = aa->aa_obdos; - struct brw_page *ioarr = aa->aa_ioarr; - struct lov_oinfo *loi; - int i, set = 0; + struct lov_request_set *lovset = (struct lov_request_set *)data; ENTRY; - - if (rc == 0) { - /* NB all stripe requests succeeded to get here */ - - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++) { - if (obdos[i].o_valid == 0) /* inactive stripe */ - continue; - - lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm, - i, &set); - } - - if (!set) { - CERROR("No stripes had valid attrs\n"); - rc = -EIO; - } + + if (rc) { + lovset->set_completes = 0; + lov_fini_brw_set(lovset); + } else { + rc = lov_fini_brw_set(lovset); } - oa->o_id = lsm->lsm_object_id; - - OBD_FREE(obdos, lsm->lsm_stripe_count * sizeof(*obdos)); - OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); + RETURN(rc); } @@ -2032,155 +1296,65 @@ static int lov_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, struct brw_page *pga, struct ptlrpc_request_set *set, struct obd_trans_info *oti) { - struct { - int bufct; - int index; - int subcount; - struct lov_stripe_md lsm; - int ost_idx; - } *stripeinfo, *si, *si_last; - struct lov_obd *lov; - struct brw_page *ioarr; - struct obdo *obdos = NULL; - struct lov_oinfo *loi; - struct lov_brw_async_args *aa; - int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count; + struct lov_request_set *lovset; + struct lov_request *req; + struct list_head *pos; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0; ENTRY; if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - lov = &exp->exp_obd->u.lov; - if (cmd == OBD_BRW_CHECK) { rc = lov_brw_check(lov, oa, lsm, oa_bufs, pga); RETURN(rc); } - OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo)); - if (!stripeinfo) - RETURN(-ENOMEM); - - OBD_ALLOC(where, sizeof(*where) * oa_bufs); - if (!where) - GOTO(out_sinfo, rc = -ENOMEM); - - if (oa) { - OBD_ALLOC(obdos, sizeof(*obdos) * stripe_count); - if (!obdos) - GOTO(out_where, rc = -ENOMEM); - } - - OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs); - if (!ioarr) - GOTO(out_obdos, rc = -ENOMEM); - - for (i = 0; i < oa_bufs; i++) { - where[i] = lov_stripe_number(lsm, pga[i].disk_offset); - stripeinfo[where[i]].bufct++; - } - - for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo; - i < stripe_count; i++, loi++, si_last = si, si++) { - if (i > 0) - si->index = si_last->index + si_last->bufct; - si->lsm.lsm_object_id = loi->loi_id; - si->ost_idx = loi->loi_ost_idx; - - if (oa) { - memcpy(&obdos[i], oa, sizeof(*obdos)); - obdos[i].o_id = si->lsm.lsm_object_id; - } - } - - for (i = 0; i < oa_bufs; i++) { - int which = where[i]; - int shift; - - shift = stripeinfo[which].index + stripeinfo[which].subcount; - LASSERT(shift < oa_bufs); - ioarr[shift] = pga[i]; - lov_stripe_offset(lsm, pga[i].disk_offset, which, - &ioarr[shift].disk_offset); - stripeinfo[which].subcount++; - } - - for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) { - int shift = si->index; - - if (si->bufct == 0) - continue; - - if (lov->tgts[si->ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx); - GOTO(out_ioarr, rc = -EIO); - } - - LASSERT(shift < oa_bufs); + rc = lov_prep_brw_set(exp, oa, lsm, oa_bufs, pga, oti, &lovset); + if (rc) + RETURN(rc); - rc = obd_brw_async(cmd, lov->tgts[si->ost_idx].ltd_exp, - &obdos[i], &si->lsm, si->bufct, - &ioarr[shift], set, oti); + list_for_each (pos, &lovset->set_list) { + struct obd_export *sub_exp; + struct brw_page *sub_pga; + req = list_entry(pos, struct lov_request, rq_link); + + sub_exp = lov->tgts[req->rq_idx].ltd_exp; + sub_pga = lovset->set_pga + req->rq_pgaidx; + rc = obd_brw_async(cmd, sub_exp, req->rq_oa, req->rq_md, + req->rq_oabufs, sub_pga, set, oti); if (rc) - GOTO(out_ioarr, rc); + GOTO(out, rc); + lov_update_common_set(lovset, req, rc); } LASSERT(rc == 0); LASSERT(set->set_interpret == NULL); set->set_interpret = (set_interpreter_func)lov_brw_interpret; - LASSERT(sizeof(set->set_args) >= sizeof(struct lov_brw_async_args)); - aa = (struct lov_brw_async_args *)&set->set_args; - aa->aa_lsm = lsm; - aa->aa_obdos = obdos; - aa->aa_oa = oa; - aa->aa_ioarr = ioarr; - aa->aa_oa_bufs = oa_bufs; - - /* Don't free ioarr or obdos - that's done in lov_brw_interpret */ - GOTO(out_where, rc); - - out_ioarr: - OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); - out_obdos: - OBD_FREE(obdos, stripe_count * sizeof(*obdos)); - out_where: - OBD_FREE(where, sizeof(*where) * oa_bufs); - out_sinfo: - OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo)); - return rc; + set->set_arg = (void *)lovset; + + RETURN(rc); +out: + lov_fini_brw_set(lovset); + RETURN(rc); } -struct lov_async_page *lap_from_cookie(void *cookie) -{ - struct lov_async_page *lap = cookie; - if (lap->lap_magic != LAP_MAGIC) - return ERR_PTR(-EINVAL); - return lap; -}; - static int lov_ap_make_ready(void *data, int cmd) { - struct lov_async_page *lap = lap_from_cookie(data); - /* XXX should these assert? */ - if (IS_ERR(lap)) - return -EINVAL; + struct lov_async_page *lap = LAP_FROM_COOKIE(data); return lap->lap_caller_ops->ap_make_ready(lap->lap_caller_data, cmd); } static int lov_ap_refresh_count(void *data, int cmd) { - struct lov_async_page *lap = lap_from_cookie(data); - if (IS_ERR(lap)) - return -EINVAL; + struct lov_async_page *lap = LAP_FROM_COOKIE(data); return lap->lap_caller_ops->ap_refresh_count(lap->lap_caller_data, cmd); } static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa) { - struct lov_async_page *lap = lap_from_cookie(data); - /* XXX should these assert? */ - if (IS_ERR(lap)) - return; + struct lov_async_page *lap = LAP_FROM_COOKIE(data); lap->lap_caller_ops->ap_fill_obdo(lap->lap_caller_data, cmd, oa); /* XXX woah, shouldn't we be altering more here? size? */ @@ -2189,9 +1363,7 @@ static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa) static void lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { - struct lov_async_page *lap = lap_from_cookie(data); - if (IS_ERR(lap)) - return; + struct lov_async_page *lap = LAP_FROM_COOKIE(data); /* in a raid1 regime this would down a count of many ios * in flight, onl calling the caller_ops completion when all @@ -2288,9 +1460,7 @@ static int lov_queue_async_io(struct obd_export *exp, if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - lap = lap_from_cookie(cookie); - if (IS_ERR(lap)) - RETURN(PTR_ERR(lap)); + lap = LAP_FROM_COOKIE(cookie); loi = &lsm->lsm_oinfo[lap->lap_stripe]; @@ -2314,9 +1484,7 @@ static int lov_set_async_flags(struct obd_export *exp, if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - lap = lap_from_cookie(cookie); - if (IS_ERR(lap)) - RETURN(PTR_ERR(lap)); + lap = LAP_FROM_COOKIE(cookie); loi = &lsm->lsm_oinfo[lap->lap_stripe]; @@ -2341,9 +1509,7 @@ static int lov_queue_group_io(struct obd_export *exp, if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - lap = lap_from_cookie(cookie); - if (IS_ERR(lap)) - RETURN(PTR_ERR(lap)); + lap = LAP_FROM_COOKIE(cookie); loi = &lsm->lsm_oinfo[lap->lap_stripe]; @@ -2397,9 +1563,7 @@ static int lov_teardown_async_page(struct obd_export *exp, if (lsm_bad_magic(lsm)) RETURN(-EINVAL); - lap = lap_from_cookie(cookie); - if (IS_ERR(lap)) - RETURN(PTR_ERR(lap)); + lap = LAP_FROM_COOKIE(cookie); loi = &lsm->lsm_oinfo[lap->lap_stripe]; @@ -2420,14 +1584,13 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, void *data,__u32 lvb_len, void *lvb_swabber, struct lustre_handle *lockh) { - struct lov_lock_handles *lov_lockh = NULL; + struct lov_request_set *set; + struct lov_request *req; + struct list_head *pos; struct lustre_handle *lov_lockhp; struct lov_obd *lov; - struct lov_oinfo *loi; - char submd_buf[sizeof(struct lov_stripe_md) + sizeof(struct lov_oinfo)]; - struct lov_stripe_md *submd = (void *)submd_buf; ldlm_error_t rc; - int i, save_flags = *flags; + int save_flags = *flags; ENTRY; if (lsm_bad_magic(lsm)) @@ -2439,145 +1602,44 @@ static int lov_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, if (!exp || !exp->exp_obd) RETURN(-ENODEV); - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_llh_new(lsm); - if (lov_lockh == NULL) - RETURN(-ENOMEM); - - lockh->cookie = lov_lockh->llh_handle.h_cookie; - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - lov = &exp->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - ldlm_policy_data_t sub_ext; - obd_off start, end; - - if (!lov_stripe_intersects(lsm, i, policy->l_extent.start, - policy->l_extent.end, &start, - &end)) - continue; - - sub_ext.l_extent.start = start; - sub_ext.l_extent.end = end; - sub_ext.l_extent.gid = policy->l_extent.gid; + rc = lov_prep_enqueue_set(exp, lsm, policy, mode, lockh, &set); + if (rc) + RETURN(rc); - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } + list_for_each (pos, &set->set_list) { + ldlm_policy_data_t sub_policy; + req = list_entry(pos, struct lov_request, rq_link); + lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; + LASSERT(lov_lockhp); - /* XXX LOV STACKING: submd should be from the subobj */ - submd->lsm_object_id = loi->loi_id; - submd->lsm_object_gr = lsm->lsm_object_gr; - submd->lsm_stripe_count = 0; - submd->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid; - submd->lsm_oinfo->loi_rss = loi->loi_rss; - submd->lsm_oinfo->loi_kms = loi->loi_kms; - submd->lsm_oinfo->loi_blocks = loi->loi_blocks; - loi->loi_mtime = submd->lsm_oinfo->loi_mtime; - /* XXX submd is not fully initialized here */ *flags = save_flags; - rc = obd_enqueue(lov->tgts[loi->loi_ost_idx].ltd_exp, submd, - type, &sub_ext, mode, flags, bl_cb, cp_cb, - gl_cb, data, lvb_len, lvb_swabber, lov_lockhp); - - /* XXX FIXME: This unpleasantness doesn't belong here at *all*. - * It belongs in the OSC, except that the OSC doesn't have - * access to the real LOI -- it gets a copy, that we created - * above, and that copy can be arbitrarily out of date. - * - * The LOV API is due for a serious rewriting anyways, and this - * can be addressed then. */ - if (rc == ELDLM_OK) { - struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp); - __u64 tmp = submd->lsm_oinfo->loi_rss; - - LASSERT(lock != NULL); - loi->loi_rss = tmp; - loi->loi_blocks = submd->lsm_oinfo->loi_blocks; - /* Extend KMS up to the end of this lock and no further - * A lock on [x,y] means a KMS of up to y + 1 bytes! */ - if (tmp > lock->l_policy_data.l_extent.end) - tmp = lock->l_policy_data.l_extent.end + 1; - if (tmp >= loi->loi_kms) { - CDEBUG(D_INODE, "lock acquired, setting rss=" - LPU64", kms="LPU64"\n", loi->loi_rss, - tmp); - loi->loi_kms = tmp; - loi->loi_kms_valid = 1; - } else { - CDEBUG(D_INODE, "lock acquired, setting rss=" - LPU64"; leaving kms="LPU64", end="LPU64 - "\n", loi->loi_rss, loi->loi_kms, - lock->l_policy_data.l_extent.end); - } - ldlm_lock_allow_match(lock); - LDLM_LOCK_PUT(lock); - } else if (rc == ELDLM_LOCK_ABORTED && - save_flags & LDLM_FL_HAS_INTENT) { - memset(lov_lockhp, 0, sizeof(*lov_lockhp)); - loi->loi_rss = submd->lsm_oinfo->loi_rss; - loi->loi_blocks = submd->lsm_oinfo->loi_blocks; - CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving" - " kms="LPU64"\n", loi->loi_rss, loi->loi_kms); - } else { - memset(lov_lockhp, 0, sizeof(*lov_lockhp)); - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: enqueue objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - lsm->lsm_object_id, loi->loi_id, - loi->loi_ost_idx, rc); - GOTO(out_locks, rc); - } - } - } - if (lsm->lsm_stripe_count > 1) - lov_llh_put(lov_lockh); - RETURN(ELDLM_OK); - - out_locks: - while (loi--, lov_lockhp--, i-- > 0) { - struct lov_stripe_md submd; - int err; - - if (lov_lockhp->cookie == 0) - continue; - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = lsm->lsm_object_gr; - submd.lsm_stripe_count = 0; - err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd, - mode, lov_lockhp); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancelling objid "LPX64" on OST " - "idx %d after enqueue error: rc = %d\n", - loi->loi_id, loi->loi_ost_idx, err); - } + sub_policy.l_extent.start = req->rq_extent.start; + sub_policy.l_extent.end = req->rq_extent.end; + + rc = obd_enqueue(lov->tgts[req->rq_idx].ltd_exp, req->rq_md, + type, &sub_policy, mode, flags, bl_cb, + cp_cb, gl_cb, data, lvb_len, lvb_swabber, + lov_lockhp); + rc = lov_update_enqueue_set(set, req, rc, save_flags); + if (rc != ELDLM_OK) + break; } - if (lsm->lsm_stripe_count > 1) { - lov_llh_destroy(lov_lockh); - lov_llh_put(lov_lockh); - } - return rc; + lov_fini_enqueue_set(set, mode); + RETURN(rc); } static int lov_match(struct obd_export *exp, struct lov_stripe_md *lsm, __u32 type, ldlm_policy_data_t *policy, __u32 mode, int *flags, void *data, struct lustre_handle *lockh) { - struct lov_lock_handles *lov_lockh = NULL; + struct lov_request_set *set; + struct lov_request *req; + struct list_head *pos; + struct lov_obd *lov = &exp->exp_obd->u.lov; struct lustre_handle *lov_lockhp; - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_stripe_md submd; - ldlm_error_t rc = 0; - int i; + int lov_flags, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -2586,86 +1648,29 @@ static int lov_match(struct obd_export *exp, struct lov_stripe_md *lsm, if (!exp || !exp->exp_obd) RETURN(-ENODEV); - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_llh_new(lsm); - if (lov_lockh == NULL) - RETURN(-ENOMEM); - - lockh->cookie = lov_lockh->llh_handle.h_cookie; - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - lov = &exp->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - ldlm_policy_data_t sub_ext; - obd_off start, end; - int lov_flags; - - if (!lov_stripe_intersects(lsm, i, policy->l_extent.start, - policy->l_extent.end, &start, &end)) - continue; - - sub_ext.l_extent.start = start; - sub_ext.l_extent.end = end; + rc = lov_prep_match_set(exp, lsm, policy, mode, lockh, &set); + if (rc) + RETURN(rc); - if (obd_uuid_empty(&lov->tgts[loi->loi_ost_idx].uuid)) { - CDEBUG(D_HA, "lov idx %d deleted\n", loi->loi_ost_idx); - continue; - } - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - rc = -EIO; - break; - } + list_for_each (pos, &set->set_list) { + ldlm_policy_data_t sub_policy; + req = list_entry(pos, struct lov_request, rq_link); + lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; + LASSERT(lov_lockhp); - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = lsm->lsm_object_gr; - submd.lsm_stripe_count = 0; + sub_policy.l_extent.start = req->rq_extent.start; + sub_policy.l_extent.end = req->rq_extent.end; lov_flags = *flags; - /* XXX submd is not fully initialized here */ - rc = obd_match(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd, - type, &sub_ext, mode, &lov_flags, data, + + rc = obd_match(lov->tgts[req->rq_idx].ltd_exp, req->rq_md, + type, &sub_policy, mode, &lov_flags, data, lov_lockhp); + rc = lov_update_match_set(set, req, rc); if (rc != 1) break; } - if (rc == 1) { - if (lsm->lsm_stripe_count > 1) { - if (*flags & LDLM_FL_TEST_LOCK) - lov_llh_destroy(lov_lockh); - lov_llh_put(lov_lockh); - } - RETURN(1); - } - - while (loi--, lov_lockhp--, i-- > 0) { - struct lov_stripe_md submd; - int err; - - if (lov_lockhp->cookie == 0) - continue; - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = lsm->lsm_object_gr; - submd.lsm_stripe_count = 0; - err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd, - mode, lov_lockhp); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancelling objid "LPX64" on OST " - "idx %d after match failure: rc = %d\n", - loi->loi_id, loi->loi_ost_idx, err); - } - } - - if (lsm->lsm_stripe_count > 1) { - lov_llh_destroy(lov_lockh); - lov_llh_put(lov_lockh); - } + lov_fini_match_set(set, mode, *flags); RETURN(rc); } @@ -2706,11 +1711,12 @@ static int lov_change_cbdata(struct obd_export *exp, static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, __u32 mode, struct lustre_handle *lockh) { - struct lov_lock_handles *lov_lockh = NULL; + struct lov_request_set *set; + struct lov_request *req; + struct list_head *pos; + struct lov_obd *lov = &exp->exp_obd->u.lov; struct lustre_handle *lov_lockhp; - struct lov_obd *lov; - struct lov_oinfo *loi; - int rc = 0, i; + int err = 0, rc = 0; ENTRY; if (lsm_bad_magic(lsm)) @@ -2722,57 +1728,34 @@ static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, LASSERT(lsm->lsm_object_gr > 0); LASSERT(lockh); - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_handle2llh(lockh); - if (!lov_lockh) { - CERROR("LOV: invalid lov lock handle %p\n", lockh); - RETURN(-EINVAL); - } - - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - lov = &exp->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - struct lov_stripe_md submd; - int err; + rc = lov_prep_cancel_set(exp, lsm, mode, lockh, &set); + if (rc) + RETURN(rc); - if (lov_lockhp->cookie == 0) { - CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n", - loi->loi_ost_idx, loi->loi_id); - continue; - } + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = lsm->lsm_object_gr; - submd.lsm_stripe_count = 0; - err = obd_cancel(lov->tgts[loi->loi_ost_idx].ltd_exp, &submd, - mode, lov_lockhp); - if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancel objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - lsm->lsm_object_id, - loi->loi_id, loi->loi_ost_idx, err); - if (!rc) - rc = err; - } + rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md, + mode, lov_lockhp); + rc = lov_update_common_set(set, req, rc); + if (rc) { + CERROR("error: cancel objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + lsm->lsm_object_id, + req->rq_md->lsm_object_id, req->rq_idx, rc); + err = rc; } + } - - if (lsm->lsm_stripe_count > 1) - lov_llh_destroy(lov_lockh); - if (lov_lockh != NULL) - lov_llh_put(lov_lockh); - RETURN(rc); + lov_fini_cancel_set(set); + RETURN(err); } static int lov_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *lsm, int flags, void *opaque) + struct lov_stripe_md *lsm, + int flags, void *opaque) { struct lov_obd *lov; struct lov_oinfo *loi; @@ -2977,8 +1960,8 @@ static int lov_iocontrol(unsigned int cmd, struct obd_export *exp, int len, len, karg, uarg); if (err) { if (lov->tgts[i].active) { - CERROR("error: iocontrol OSC %s on OST" - "idx %d: cmd %x err = %d\n", + CERROR("error: iocontrol OSC %s on OST " + "idx %d cmd %x: err = %d\n", lov->tgts[i].uuid.uuid, i, cmd, err); if (!rc) @@ -3061,8 +2044,8 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, for (i = 0; i < lov->desc.ld_tgt_count; i++) { if (!lov->tgts[i].active) continue; - rc = obd_get_info(lov->tgts[i].ltd_exp, keylen, key, - &size, &(ids[i])); + rc = obd_get_info(lov->tgts[i].ltd_exp, + keylen, key, &size, &(ids[i])); if (rc != 0) RETURN(rc); } @@ -3082,7 +2065,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; - int i, rc = 0; + int i, rc = 0, err; ENTRY; #define KEY_IS(str) \ @@ -3092,21 +2075,51 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, if (vallen != lov->desc.ld_tgt_count) RETURN(-EINVAL); for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int er; - - /* OST was deleted */ - if (obd_uuid_empty(&lov->tgts[i].uuid)) - continue; - /* initialize all OSCs, even inactive ones */ - er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key, - sizeof(obd_id), ((obd_id*)val) + i); + err = obd_set_info(lov->tgts[i].ltd_exp, + keylen, key, sizeof(obd_id), + ((obd_id*)val) + i); if (!rc) - rc = er; + rc = err; + } + RETURN(rc); + } + if (KEY_IS("async")) { + struct lov_desc *desc = &lov->desc; + struct lov_tgt_desc *tgts = lov->tgts; + + if (vallen != sizeof(int)) + RETURN(-EINVAL); + lov->async = *((int*) val); + + for (i = 0; i < desc->ld_tgt_count; i++, tgts++) { + struct obd_uuid *tgt_uuid = &tgts->uuid; + struct obd_device *tgt_obd; + + tgt_obd = class_find_client_obd(tgt_uuid, + LUSTRE_OSC_NAME, + &obddev->obd_uuid); + if (!tgt_obd) { + CERROR("Target %s not attached\n", + tgt_uuid->uuid); + if (!rc) + rc = -EINVAL; + continue; + } + + err = obd_set_info(tgt_obd->obd_self_export, + keylen, key, vallen, val); + if (err) { + CERROR("Failed to set async on target %s\n", + tgt_obd->obd_name); + if (!rc) + rc = err; + } } RETURN(rc); } + if (KEY_IS("growth_count")) { if (vallen != sizeof(int)) RETURN(-EINVAL); @@ -3156,81 +2169,20 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, } for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int er; - - /* OST was deleted */ - if (obd_uuid_empty(&lov->tgts[i].uuid)) + if (val && !obd_uuid_equals(val, &lov->tgts[i].uuid)) continue; if (!val && !lov->tgts[i].active) continue; - er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key, vallen, - val); + err = obd_set_info(lov->tgts[i].ltd_exp, + keylen, key, vallen, val); if (!rc) - rc = er; + rc = err; } RETURN(rc); #undef KEY_IS - -} - -/* Merge rss if @kms_only == 0 - * - * Even when merging RSS, we will take the KMS value if it's larger. - * This prevents getattr from stomping on dirty cached pages which - * extend the file size. */ -__u64 lov_merge_size(struct lov_stripe_md *lsm, int kms_only) -{ - struct lov_oinfo *loi; - __u64 size = 0; - int i; - - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++) { - obd_size lov_size, tmpsize; - - tmpsize = loi->loi_kms; - if (kms_only == 0 && loi->loi_rss > tmpsize) - tmpsize = loi->loi_rss; - - lov_size = lov_stripe_size(lsm, tmpsize, i); - if (lov_size > size) - size = lov_size; - } - - return size; -} -EXPORT_SYMBOL(lov_merge_size); - -/* Merge blocks */ -__u64 lov_merge_blocks(struct lov_stripe_md *lsm) -{ - struct lov_oinfo *loi; - __u64 blocks = 0; - int i; - - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++) { - blocks += loi->loi_blocks; - } - return blocks; } -EXPORT_SYMBOL(lov_merge_blocks); - -__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time) -{ - struct lov_oinfo *loi; - int i; - - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++) { - if (loi->loi_mtime > current_time) - current_time = loi->loi_mtime; - } - return current_time; -} -EXPORT_SYMBOL(lov_merge_mtime); #if 0 struct lov_multi_wait { @@ -3322,27 +2274,6 @@ int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm, } #endif -void lov_increase_kms(struct obd_export *exp, struct lov_stripe_md *lsm, - obd_off size) -{ - struct lov_oinfo *loi; - int stripe = 0; - __u64 kms; - ENTRY; - - if (size > 0) - stripe = lov_stripe_number(lsm, size - 1); - kms = lov_size_to_stripe(lsm, size, stripe); - loi = &(lsm->lsm_oinfo[stripe]); - - CDEBUG(D_INODE, "stripe %d KMS %sincreasing "LPU64"->"LPU64"\n", - stripe, kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms); - if (kms > loi->loi_kms) - loi->loi_kms = kms; - EXIT; -} -EXPORT_SYMBOL(lov_increase_kms); - struct obd_ops lov_obd_ops = { .o_owner = THIS_MODULE, .o_attach = lov_attach, @@ -3369,6 +2300,7 @@ struct obd_ops lov_obd_ops = { .o_queue_group_io = lov_queue_group_io, .o_trigger_group_io = lov_trigger_group_io, .o_teardown_async_page = lov_teardown_async_page, + .o_adjust_kms = lov_adjust_kms, .o_punch = lov_punch, .o_sync = lov_sync, .o_enqueue = lov_enqueue, diff --git a/lustre/lov/lov_offset.c b/lustre/lov/lov_offset.c new file mode 100644 index 0000000..66fad27 --- /dev/null +++ b/lustre/lov/lov_offset.c @@ -0,0 +1,240 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LOV + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#include +#include + +#include "lov_internal.h" + +/* compute object size given "stripeno" and the ost size */ +obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size, + int stripeno) +{ + unsigned long ssize = lsm->lsm_stripe_size; + unsigned long swidth = ssize * lsm->lsm_stripe_count; + unsigned long stripe_size; + obd_size lov_size; + ENTRY; + + if (ost_size == 0) + RETURN(0); + + /* do_div(a, b) returns a % b, and a = a / b */ + stripe_size = do_div(ost_size, ssize); + if (stripe_size) + lov_size = ost_size * swidth + stripeno * ssize + stripe_size; + else + lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize; + + RETURN(lov_size); +} + +/* we have an offset in file backed by an lov and want to find out where + * that offset lands in our given stripe of the file. for the easy + * case where the offset is within the stripe, we just have to scale the + * offset down to make it relative to the stripe instead of the lov. + * + * the harder case is what to do when the offset doesn't intersect the + * stripe. callers will want start offsets clamped ahead to the start + * of the nearest stripe in the file. end offsets similarly clamped to the + * nearest ending byte of a stripe in the file: + * + * all this function does is move offsets to the nearest region of the + * stripe, and it does its work "mod" the full length of all the stripes. + * consider a file with 3 stripes: + * + * S E + * --------------------------------------------------------------------- + * | 0 | 1 | 2 | 0 | 1 | 2 | + * --------------------------------------------------------------------- + * + * to find stripe 1's offsets for S and E, it divides by the full stripe + * width and does its math in the context of a single set of stripes: + * + * S E + * ----------------------------------- + * | 0 | 1 | 2 | + * ----------------------------------- + * + * it'll notice that E is outside stripe 1 and clamp it to the end of the + * stripe, then multiply it back out by lov_off to give the real offsets in + * the stripe: + * + * S E + * --------------------------------------------------------------------- + * | 1 | 1 | 1 | 1 | 1 | 1 | + * --------------------------------------------------------------------- + * + * it would have done similarly and pulled S forward to the start of a 1 + * stripe if, say, S had landed in a 0 stripe. + * + * this rounding isn't always correct. consider an E lov offset that lands + * on a 0 stripe, the "mod stripe width" math will pull it forward to the + * start of a 1 stripe, when in fact it wanted to be rounded back to the end + * of a previous 1 stripe. this logic is handled by callers and this is why: + * + * this function returns < 0 when the offset was "before" the stripe and + * was moved forward to the start of the stripe in question; 0 when it + * falls in the stripe and no shifting was done; > 0 when the offset + * was outside the stripe and was pulled back to its final byte. */ +int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, + int stripeno, obd_off *obd_off) +{ + unsigned long ssize = lsm->lsm_stripe_size; + unsigned long swidth = ssize * lsm->lsm_stripe_count; + unsigned long stripe_off, this_stripe; + int ret = 0; + + if (lov_off == OBD_OBJECT_EOF) { + *obd_off = OBD_OBJECT_EOF; + return 0; + } + + /* do_div(a, b) returns a % b, and a = a / b */ + stripe_off = do_div(lov_off, swidth); + + this_stripe = stripeno * ssize; + if (stripe_off < this_stripe) { + stripe_off = 0; + ret = -1; + } else { + stripe_off -= this_stripe; + + if (stripe_off >= ssize) { + stripe_off = ssize; + ret = 1; + } + } + + *obd_off = lov_off * ssize + stripe_off; + return ret; +} + +/* Given a whole-file size and a stripe number, give the file size which + * corresponds to the individual object of that stripe. + * + * This behaves basically in the same was as lov_stripe_offset, except that + * file sizes falling before the beginning of a stripe are clamped to the end + * of the previous stripe, not the beginning of the next: + * + * S + * --------------------------------------------------------------------- + * | 0 | 1 | 2 | 0 | 1 | 2 | + * --------------------------------------------------------------------- + * + * if clamped to stripe 2 becomes: + * + * S + * --------------------------------------------------------------------- + * | 0 | 1 | 2 | 0 | 1 | 2 | + * --------------------------------------------------------------------- + */ +obd_off lov_size_to_stripe(struct lov_stripe_md *lsm, obd_off file_size, + int stripeno) +{ + unsigned long ssize = lsm->lsm_stripe_size; + unsigned long swidth = ssize * lsm->lsm_stripe_count; + unsigned long stripe_off, this_stripe; + + if (file_size == OBD_OBJECT_EOF) + return OBD_OBJECT_EOF; + + /* do_div(a, b) returns a % b, and a = a / b */ + stripe_off = do_div(file_size, swidth); + + this_stripe = stripeno * ssize; + if (stripe_off < this_stripe) { + /* Move to end of previous stripe, or zero */ + if (file_size > 0) { + file_size--; + stripe_off = ssize; + } else { + stripe_off = 0; + } + } else { + stripe_off -= this_stripe; + + if (stripe_off >= ssize) { + /* Clamp to end of this stripe */ + stripe_off = ssize; + } + } + + return (file_size * ssize + stripe_off); +} + +/* given an extent in an lov and a stripe, calculate the extent of the stripe + * that is contained within the lov extent. this returns true if the given + * stripe does intersect with the lov extent. */ +int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, + obd_off start, obd_off end, + obd_off *obd_start, obd_off *obd_end) +{ + int start_side, end_side; + + start_side = lov_stripe_offset(lsm, start, stripeno, obd_start); + end_side = lov_stripe_offset(lsm, end, stripeno, obd_end); + + CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n", + start, end, start_side, *obd_start, *obd_end, end_side); + + /* this stripe doesn't intersect the file extent when neither + * start or the end intersected the stripe and obd_start and + * obd_end got rounded up to the save value. */ + if (start_side != 0 && end_side != 0 && *obd_start == *obd_end) + return 0; + + /* as mentioned in the lov_stripe_offset commentary, end + * might have been shifted in the wrong direction. This + * happens when an end offset is before the stripe when viewed + * through the "mod stripe size" math. we detect it being shifted + * in the wrong direction and touch it up. + * interestingly, this can't underflow since end must be > start + * if we passed through the previous check. + * (should we assert for that somewhere?) */ + if (end_side != 0) + (*obd_end)--; + + return 1; +} + +/* compute which stripe number "lov_off" will be written into */ +int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off) +{ + unsigned long ssize = lsm->lsm_stripe_size; + unsigned long swidth = ssize * lsm->lsm_stripe_count; + unsigned long stripe_off; + + stripe_off = do_div(lov_off, swidth); + + return stripe_off / ssize; +} diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index 378422e..4f65c2a 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -108,7 +108,7 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, if (lsm) { if (lsm->lsm_magic != LOV_MAGIC) { - CERROR("bad mem LOV MAGIC: 0x%08X != 0x%08X\n", + CWARN("bad LOV MAGIC: 0x%08X != 0x%08X\n", lsm->lsm_magic, LOV_MAGIC); RETURN(-EINVAL); } @@ -307,12 +307,14 @@ int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count, int pattern) return lsm_size; } EXPORT_SYMBOL(lov_alloc_memmd); + void lov_free_memmd(struct lov_stripe_md **lsmp) { OBD_FREE(*lsmp, lov_stripe_md_size((*lsmp)->lsm_stripe_count)); *lsmp = NULL; } EXPORT_SYMBOL(lov_free_memmd); + int lov_unpackmd_v0(struct lov_obd *lov, struct lov_stripe_md *lsm, struct lov_mds_md_v0 *lmm) { diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c new file mode 100644 index 0000000..b8ac8fe --- /dev/null +++ b/lustre/lov/lov_qos.c @@ -0,0 +1,187 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LOV + +#ifdef __KERNEL__ +#else +#include +#endif + +#include +#include + +#include "lov_internal.h" + +void qos_shrink_lsm(struct lov_request_set *set) +{ + struct lov_stripe_md *lsm = set->set_md; + struct lov_stripe_md *lsm_new; + /* XXX LOV STACKING call into osc for sizes */ + unsigned oldsize, newsize; + + if (set->set_oti && set->set_cookies && set->set_cookie_sent) { + struct llog_cookie *cookies; + oldsize = lsm->lsm_stripe_count * sizeof(*cookies); + newsize = set->set_count * sizeof(*cookies); + + cookies = set->set_cookies; + oti_alloc_cookies(set->set_oti, set->set_count); + if (set->set_oti->oti_logcookies) { + memcpy(set->set_oti->oti_logcookies, cookies, newsize); + OBD_FREE(cookies, oldsize); + set->set_cookies = set->set_oti->oti_logcookies; + } else { + CWARN("'leaking' %d bytes\n", oldsize - newsize); + } + } + + CWARN("using fewer stripes for object "LPX64": old %u new %u\n", + lsm->lsm_object_id, lsm->lsm_stripe_count, set->set_count); + + oldsize = lov_stripe_md_size(lsm->lsm_stripe_count); + newsize = lov_stripe_md_size(set->set_count); + OBD_ALLOC(lsm_new, newsize); + if (lsm_new != NULL) { + memcpy(lsm_new, lsm, newsize); + lsm_new->lsm_stripe_count = set->set_count; + OBD_FREE(lsm, oldsize); + set->set_md = lsm_new; + } else { + CWARN("'leaking' %d bytes\n", oldsize - newsize); + } +} + +#define LOV_CREATE_RESEED_INTERVAL 1000 +/* FIXME use real qos data to prepare the lov create request */ +int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea) +{ + static int ost_start_idx, ost_start_count; + unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; + struct lov_stripe_md *lsm = set->set_md; + struct obdo *src_oa = set->set_oa; + int i, rc = 0; + ENTRY; + + LASSERT(src_oa->o_valid & OBD_MD_FLID); + + lsm->lsm_object_id = src_oa->o_id; + lsm->lsm_object_gr = src_oa->o_gr; + if (!lsm->lsm_stripe_size) + lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; + if (!lsm->lsm_pattern) { + lsm->lsm_pattern = lov->desc.ld_pattern ? + lov->desc.ld_pattern : LOV_PATTERN_RAID0; + } + + if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) { + if (--ost_start_count <= 0) { + ost_start_idx = ll_insecure_random_int(); + ost_start_count = LOV_CREATE_RESEED_INTERVAL; + } else if (lsm->lsm_stripe_count >= + lov->desc.ld_active_tgt_count) { + /* If we allocate from all of the stripes, make the + * next file start on the next OST. */ + ++ost_start_idx; + } + ost_idx = ost_start_idx % ost_count; + } else { + ost_idx = lsm->lsm_oinfo[0].loi_ost_idx; + } + + CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n", + lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx); + + for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { + struct lov_request *req; + + ++ost_start_idx; + if (lov->tgts[ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + req->rq_buflen = sizeof(*req->rq_md); + OBD_ALLOC(req->rq_md, req->rq_buflen); + if (req->rq_md == NULL) + GOTO(out, rc = -ENOMEM); + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out, rc = -ENOMEM); + + req->rq_idx = ost_idx; + req->rq_stripe = i; + /* create data objects with "parent" OA */ + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + + /* XXX When we start creating objects on demand, we need to + * make sure that we always create the object on the + * stripe which holds the existing file size. + */ + if (src_oa->o_valid & OBD_MD_FLSIZE) { + if (lov_stripe_offset(lsm, src_oa->o_size, i, + &req->rq_oa->o_size) < 0 && + req->rq_oa->o_size) + req->rq_oa->o_size--; + + CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", + i, req->rq_oa->o_size, src_oa->o_size); + } + + lov_set_add_req(req, set); + + /* If we have allocated enough objects, we are OK */ + if (set->set_count == lsm->lsm_stripe_count) + GOTO(out, rc = 0); + } + + if (set->set_count == 0) + GOTO(out, rc = -EIO); + + /* If we were passed specific striping params, then a failure to + * meet those requirements is an error, since we can't reallocate + * that memory (it might be part of a larger array or something). + * + * We can only get here if lsm_stripe_count was originally > 1. + */ + if (!newea) { + CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", + lsm->lsm_object_id, set->set_count, + lsm->lsm_stripe_count, rc); + rc = rc ? rc : -EFBIG; + } else { + qos_shrink_lsm(set); + rc = 0; + } +out: + RETURN(rc); +} + + + diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c new file mode 100644 index 0000000..9df75b6 --- /dev/null +++ b/lustre/lov/lov_request.c @@ -0,0 +1,1295 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LOV + +#ifdef __KERNEL__ +#include +#else +#include +#endif + +#include +#include +#include + +#include "lov_internal.h" + +static void lov_init_set(struct lov_request_set *set) +{ + set->set_count = 0; + set->set_completes = 0; + set->set_success = 0; + INIT_LIST_HEAD(&set->set_list); + atomic_set(&set->set_refcount, 1); +} + +static void lov_finish_set(struct lov_request_set *set) +{ + struct list_head *pos, *n; + ENTRY; + + LASSERT(set); + list_for_each_safe(pos, n, &set->set_list) { + struct lov_request *req = list_entry(pos, struct lov_request, + rq_link); + list_del_init(&req->rq_link); + + if (req->rq_oa) + obdo_free(req->rq_oa); + if (req->rq_md) + OBD_FREE(req->rq_md, req->rq_buflen); + OBD_FREE(req, sizeof(*req)); + } + + if (set->set_pga) { + int len = set->set_oabufs * sizeof(*set->set_pga); + OBD_FREE(set->set_pga, len); + } + if (set->set_lockh) + lov_llh_put(set->set_lockh); + + OBD_FREE(set, sizeof(*set)); + EXIT; +} + +static void lov_update_set(struct lov_request_set *set, + struct lov_request *req, int rc) +{ + req->rq_complete = 1; + req->rq_rc = rc; + + set->set_completes++; + if (rc == 0) + set->set_success++; +} + +int lov_update_common_set(struct lov_request_set *set, + struct lov_request *req, int rc) +{ + struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; + ENTRY; + + lov_update_set(set, req, rc); + + /* grace error on inactive ost */ + if (rc && !lov->tgts[req->rq_idx].active) + rc = 0; + + /* FIXME in raid1 regime, should return 0 */ + RETURN(rc); +} + +void lov_set_add_req(struct lov_request *req, struct lov_request_set *set) +{ + list_add_tail(&req->rq_link, &set->set_list); + set->set_count++; +} + +int lov_update_enqueue_set(struct lov_request_set *set, + struct lov_request *req, int rc, int flags) +{ + struct lustre_handle *lov_lockhp; + struct lov_oinfo *loi; + ENTRY; + + lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; + loi = &set->set_md->lsm_oinfo[req->rq_stripe]; + + /* XXX FIXME: This unpleasantness doesn't belong here at *all*. + * It belongs in the OSC, except that the OSC doesn't have + * access to the real LOI -- it gets a copy, that we created + * above, and that copy can be arbitrarily out of date. + * + * The LOV API is due for a serious rewriting anyways, and this + * can be addressed then. */ + if (rc == ELDLM_OK) { + struct ldlm_lock *lock = ldlm_handle2lock(lov_lockhp); + __u64 tmp = req->rq_md->lsm_oinfo->loi_rss; + + LASSERT(lock != NULL); + loi->loi_rss = tmp; + loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime; + loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks; + /* Extend KMS up to the end of this lock and no further + * A lock on [x,y] means a KMS of up to y + 1 bytes! */ + if (tmp > lock->l_policy_data.l_extent.end) + tmp = lock->l_policy_data.l_extent.end + 1; + if (tmp >= loi->loi_kms) { + CDEBUG(D_INODE, "lock acquired, setting rss=" + LPU64", kms="LPU64"\n", loi->loi_rss, tmp); + loi->loi_kms = tmp; + loi->loi_kms_valid = 1; + } else { + CDEBUG(D_INODE, "lock acquired, setting rss=" + LPU64"; leaving kms="LPU64", end="LPU64 + "\n", loi->loi_rss, loi->loi_kms, + lock->l_policy_data.l_extent.end); + } + ldlm_lock_allow_match(lock); + LDLM_LOCK_PUT(lock); + } else if (rc == ELDLM_LOCK_ABORTED && flags & LDLM_FL_HAS_INTENT) { + memset(lov_lockhp, 0, sizeof(*lov_lockhp)); + loi->loi_rss = req->rq_md->lsm_oinfo->loi_rss; + loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime; + loi->loi_blocks = req->rq_md->lsm_oinfo->loi_blocks; + CDEBUG(D_INODE, "glimpsed, setting rss="LPU64"; leaving" + " kms="LPU64"\n", loi->loi_rss, loi->loi_kms); + rc = ELDLM_OK; + } else { + struct obd_export *exp = set->set_exp; + struct lov_obd *lov = &exp->exp_obd->u.lov; + + memset(lov_lockhp, 0, sizeof(*lov_lockhp)); + if (lov->tgts[req->rq_idx].active) { + CERROR("error: enqueue objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + set->set_md->lsm_object_id, loi->loi_id, + loi->loi_ost_idx, rc); + } else { + rc = ELDLM_OK; + } + } + lov_update_set(set, req, rc); + RETURN(rc); +} + +static int enqueue_done(struct lov_request_set *set, __u32 mode) +{ + struct list_head *pos; + struct lov_request *req; + struct lustre_handle *lov_lockhp = NULL; + struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; + int rc = 0; + ENTRY; + + LASSERT(set->set_completes); + /* enqueue/match success, just return */ + if (set->set_completes == set->set_success) + RETURN(0); + + /* cancel enqueued/matched locks */ + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + + if (!req->rq_complete || req->rq_rc) + continue; + + lov_lockhp = set->set_lockh->llh_handles + req->rq_stripe; + LASSERT(lov_lockhp); + if (lov_lockhp->cookie == 0) + continue; + + rc = obd_cancel(lov->tgts[req->rq_idx].ltd_exp, req->rq_md, + mode, lov_lockhp); + if (rc && lov->tgts[req->rq_idx].active) + CERROR("cancelling obdjid "LPX64" on OST " + "idx %d error: rc = %d\n", + req->rq_md->lsm_object_id, req->rq_idx, rc); + } + lov_llh_put(set->set_lockh); + RETURN(rc); +} + +int lov_fini_enqueue_set(struct lov_request_set *set, __u32 mode) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) + rc = enqueue_done(set, mode); + else + lov_llh_put(set->set_lockh); + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_enqueue_set(struct obd_export *exp, struct lov_stripe_md *lsm, + ldlm_policy_data_t *policy, __u32 mode, + struct lustre_handle *lockh, + struct lov_request_set **reqset) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + struct lov_request_set *set; + int i, rc = 0; + struct lov_oinfo *loi; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_lockh = lov_llh_new(lsm); + if (set->set_lockh == NULL) + GOTO(out_set, rc = -ENOMEM); + lockh->cookie = set->set_lockh->llh_handle.h_cookie; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + obd_off start, end; + + if (!lov_stripe_intersects(lsm, i, policy->l_extent.start, + policy->l_extent.end, &start, &end)) + continue; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_buflen = sizeof(*req->rq_md) + + sizeof(struct lov_oinfo); + OBD_ALLOC(req->rq_md, req->rq_buflen); + if (req->rq_md == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_extent.start = start; + req->rq_extent.end = end; + + req->rq_idx = loi->loi_ost_idx; + req->rq_stripe = i; + + /* XXX LOV STACKING: submd should be from the subobj */ + req->rq_md->lsm_object_id = loi->loi_id; + req->rq_md->lsm_object_gr = lsm->lsm_object_gr; + req->rq_md->lsm_stripe_count = 0; + req->rq_md->lsm_oinfo->loi_kms_valid = loi->loi_kms_valid; + req->rq_md->lsm_oinfo->loi_rss = loi->loi_rss; + req->rq_md->lsm_oinfo->loi_kms = loi->loi_kms; + req->rq_md->lsm_oinfo->loi_blocks = loi->loi_blocks; + loi->loi_mtime = req->rq_md->lsm_oinfo->loi_mtime; + + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(0); +out_set: + lov_fini_enqueue_set(set, mode); + RETURN(rc); +} + +int lov_update_match_set(struct lov_request_set *set, struct lov_request *req, + int rc) +{ + int ret = rc; + ENTRY; + + if (rc == 1) + ret = 0; + lov_update_set(set, req, ret); + RETURN(rc); +} + +int lov_fini_match_set(struct lov_request_set *set, __u32 mode, int flags) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + if (set->set_count == set->set_success && + flags & LDLM_FL_TEST_LOCK) + lov_llh_put(set->set_lockh); + rc = enqueue_done(set, mode); + } else { + lov_llh_put(set->set_lockh); + } + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_match_set(struct obd_export *exp, struct lov_stripe_md *lsm, + ldlm_policy_data_t *policy, __u32 mode, + struct lustre_handle *lockh, + struct lov_request_set **reqset) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + struct lov_request_set *set; + int i, rc = 0; + struct lov_oinfo *loi; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_lockh = lov_llh_new(lsm); + if (set->set_lockh == NULL) + GOTO(out_set, rc = -ENOMEM); + lockh->cookie = set->set_lockh->llh_handle.h_cookie; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + obd_off start, end; + + if (!lov_stripe_intersects(lsm, i, policy->l_extent.start, + policy->l_extent.end, &start, &end)) + continue; + + /* FIXME raid1 should grace this error */ + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + GOTO(out_set, rc = -EIO); + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_buflen = sizeof(*req->rq_md); + OBD_ALLOC(req->rq_md, req->rq_buflen); + if (req->rq_md == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_extent.start = start; + req->rq_extent.end = end; + + req->rq_idx = loi->loi_ost_idx; + req->rq_stripe = i; + + /* XXX LOV STACKING: submd should be from the subobj */ + req->rq_md->lsm_object_id = loi->loi_id; + req->rq_md->lsm_object_gr = lsm->lsm_object_gr; + req->rq_md->lsm_stripe_count = 0; + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_match_set(set, mode, 0); + RETURN(rc); +} + +int lov_fini_cancel_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + + if (set->set_lockh) + lov_llh_put(set->set_lockh); + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_cancel_set(struct obd_export *exp, struct lov_stripe_md *lsm, + __u32 mode, struct lustre_handle *lockh, + struct lov_request_set **reqset) +{ + struct lov_request_set *set; + int i, rc = 0; + struct lov_oinfo *loi; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_lockh = lov_handle2llh(lockh); + if (set->set_lockh == NULL) { + CERROR("LOV: invalid lov lock handle %p\n", lockh); + GOTO(out_set, rc = -EINVAL); + } + lockh->cookie = set->set_lockh->llh_handle.h_cookie; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + struct lustre_handle *lov_lockhp; + + lov_lockhp = set->set_lockh->llh_handles + i; + if (lov_lockhp->cookie == 0) { + CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n", + loi->loi_ost_idx, loi->loi_id); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_buflen = sizeof(*req->rq_md); + OBD_ALLOC(req->rq_md, req->rq_buflen); + if (req->rq_md == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_idx = loi->loi_ost_idx; + req->rq_stripe = i; + + /* XXX LOV STACKING: submd should be from the subobj */ + req->rq_md->lsm_object_id = loi->loi_id; + req->rq_md->lsm_object_gr = lsm->lsm_object_gr; + req->rq_md->lsm_stripe_count = 0; + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_cancel_set(set); + RETURN(rc); +} + +static int create_done(struct obd_export *exp, struct lov_request_set *set, + struct lov_stripe_md **ea) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + struct obd_trans_info *oti = set->set_oti; + struct obdo *src_oa = set->set_oa; + struct list_head *pos; + struct lov_request *req; + struct obdo *ret_oa = NULL; + int attrset = 0, rc = 0; + ENTRY; + + LASSERT(set->set_completes); + + if (!set->set_success) + GOTO(cleanup, rc = -EIO); + + if (*ea == NULL && set->set_count != set->set_success) { + set->set_count = set->set_success; + qos_shrink_lsm(set); + } + + ret_oa = obdo_alloc(); + if (ret_oa == NULL) + GOTO(cleanup, rc = -ENOMEM); + + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + if (!req->rq_complete || req->rq_rc) + continue; + lov_merge_attrs(ret_oa, req->rq_oa, req->rq_oa->o_valid, + set->set_md, req->rq_stripe, &attrset); + } + if (src_oa->o_valid & OBD_MD_FLSIZE && + ret_oa->o_size != src_oa->o_size) { + CERROR("original size "LPU64" isn't new object size "LPU64"\n", + src_oa->o_size, ret_oa->o_size); + LBUG(); + } + ret_oa->o_id = src_oa->o_id; + ret_oa->o_gr = src_oa->o_gr; + ret_oa->o_valid |= OBD_MD_FLGROUP; + memcpy(src_oa, ret_oa, sizeof(*src_oa)); + obdo_free(ret_oa); + + *ea = set->set_md; + GOTO(done, rc = 0); + + EXIT; +cleanup: + list_for_each (pos, &set->set_list) { + struct obd_export *sub_exp; + int err = 0; + req = list_entry(pos, struct lov_request, rq_link); + + if (!req->rq_complete || req->rq_rc) + continue; + + sub_exp = lov->tgts[req->rq_idx].ltd_exp, + err = obd_destroy(sub_exp, req->rq_oa, NULL, oti); + if (err) + CERROR("Failed to uncreate objid "LPX64" subobj " + LPX64" on OST idx %d: rc = %d\n", + set->set_oa->o_id, req->rq_oa->o_id, + req->rq_idx, rc); + } + if (*ea == NULL) + obd_free_memmd(exp, &set->set_md); +done: + if (oti && set->set_cookies) { + oti->oti_logcookies = set->set_cookies; + if (!set->set_cookie_sent) { + oti_free_cookies(oti); + src_oa->o_valid &= ~OBD_MD_FLCOOKIE; + } else { + src_oa->o_valid |= OBD_MD_FLCOOKIE; + } + } + return rc; +} + +int lov_fini_create_set(struct lov_request_set *set, struct lov_stripe_md **ea) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + rc = create_done(set->set_exp, set, ea); + /* FIXME update qos data here */ + } + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_update_create_set(struct lov_request_set *set, + struct lov_request *req, int rc) +{ + struct obd_trans_info *oti = set->set_oti; + struct lov_stripe_md *lsm = set->set_md; + struct lov_oinfo *loi; + struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; + ENTRY; + + req->rq_stripe = set->set_success; + loi = &lsm->lsm_oinfo[req->rq_stripe]; + + if (rc && lov->tgts[req->rq_idx].active) { + CERROR("error creating objid "LPX64" sub-object" + " on OST idx %d/%d: rc = %d\n", + set->set_oa->o_id, req->rq_idx, + lsm->lsm_stripe_count, rc); + if (rc > 0) { + CERROR("obd_create returned invalid err %d\n", rc); + rc = -EIO; + } + } + lov_update_set(set, req, rc); + if (rc) + RETURN(rc); + + if (oti && oti->oti_objid) + oti->oti_objid[req->rq_idx] = req->rq_oa->o_id; + + loi->loi_id = req->rq_oa->o_id; + loi->loi_gr = req->rq_oa->o_gr; + loi->loi_ost_idx = req->rq_idx; + CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64"/"LPX64" at idx %d\n", + lsm->lsm_object_id, loi->loi_id, loi->loi_id, req->rq_idx); + loi_init(loi); + + if (set->set_cookies) + ++oti->oti_logcookies; + if (req->rq_oa->o_valid & OBD_MD_FLCOOKIE) + set->set_cookie_sent++; + + RETURN(0); +} + +int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **ea, + struct obdo *src_oa, struct obd_trans_info *oti, + struct lov_request_set **reqset) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + struct lov_request_set *set; + int rc = 0, newea = 0; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = *ea; + set->set_oa = src_oa; + set->set_oti = oti; + + if (set->set_md == NULL) { + int stripes, stripe_cnt; + stripe_cnt = lov_get_stripecnt(lov, 0); + + /* If the MDS file was truncated up to some size, stripe over + * enough OSTs to allow the file to be created at that size. */ + if (src_oa->o_valid & OBD_MD_FLSIZE) { + stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1; + do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12)); + + if (stripes > lov->desc.ld_active_tgt_count) + GOTO(out_set, rc = -EFBIG); + if (stripes < stripe_cnt) + stripes = stripe_cnt; + } else { + stripes = stripe_cnt; + } + + rc = lov_alloc_memmd(&set->set_md, stripes, + lov->desc.ld_pattern ? + lov->desc.ld_pattern : LOV_PATTERN_RAID0); + if (rc < 0) + goto out_set; + newea = 1; + } + + rc = qos_prep_create(lov, set, newea); + if (rc) + goto out_lsm; + + if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { + oti_alloc_cookies(oti, set->set_count); + if (!oti->oti_logcookies) + goto out_lsm; + set->set_cookies = oti->oti_logcookies; + } + *reqset = set; + RETURN(rc); + +out_lsm: + if (*ea == NULL) + obd_free_memmd(exp, &set->set_md); +out_set: + lov_fini_create_set(set, ea); + RETURN(rc); +} + +static int common_attr_done(struct lov_request_set *set) +{ + struct list_head *pos; + struct lov_request *req; + struct obdo *tmp_oa; + int rc = 0, attrset = 0; + ENTRY; + + if (set->set_oa == NULL) + RETURN(0); + + if (!set->set_success) + RETURN(-EIO); + + tmp_oa = obdo_alloc(); + if (tmp_oa == NULL) + GOTO(out, rc = -ENOMEM); + + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + + if (!req->rq_complete || req->rq_rc) + continue; + if (req->rq_oa->o_valid == 0) /* inactive stripe */ + continue; + lov_merge_attrs(tmp_oa, req->rq_oa, req->rq_oa->o_valid, + set->set_md, req->rq_stripe, &attrset); + } + if (!attrset) { + CERROR("No stripes had valid attrs\n"); + rc = -EIO; + } + tmp_oa->o_id = set->set_oa->o_id; + memcpy(set->set_oa, tmp_oa, sizeof(*set->set_oa)); +out: + if (tmp_oa) + obdo_free(tmp_oa); + RETURN(rc); + +} + +static int brw_done(struct lov_request_set *set) +{ + struct lov_stripe_md *lsm = set->set_md; + struct lov_oinfo *loi = NULL; + struct list_head *pos; + struct lov_request *req; + ENTRY; + + list_for_each (pos, &set->set_list) { + req = list_entry(pos, struct lov_request, rq_link); + + if (!req->rq_complete || req->rq_rc) + continue; + + loi = &lsm->lsm_oinfo[req->rq_stripe]; + + if (req->rq_oa->o_valid & OBD_MD_FLBLOCKS) + loi->loi_blocks = req->rq_oa->o_blocks; + } + + RETURN(0); +} + +int lov_fini_brw_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + rc = brw_done(set); + /* FIXME update qos data here */ + } + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_brw_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, obd_count oa_bufs, + struct brw_page *pga, struct obd_trans_info *oti, + struct lov_request_set **reqset) +{ + struct { + obd_count index; + obd_count count; + obd_count off; + } *info = NULL; + struct lov_request_set *set; + struct lov_oinfo *loi = NULL; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0, i, shift; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_oa = src_oa; + set->set_oti = oti; + set->set_oabufs = oa_bufs; + OBD_ALLOC(set->set_pga, oa_bufs * sizeof(*set->set_pga)); + if (!set->set_pga) + GOTO(out, rc = -ENOMEM); + + OBD_ALLOC(info, sizeof(*info) * lsm->lsm_stripe_count); + if (!info) + GOTO(out, rc = -ENOMEM); + + /* calculate the page count for each stripe */ + for (i = 0; i < oa_bufs; i++) { + int stripe = lov_stripe_number(lsm, pga[i].disk_offset); + info[stripe].count++; + } + + /* alloc and initialize lov request */ + loi = lsm->lsm_oinfo; + shift = 0; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + + if (info[i].count == 0) + continue; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + GOTO(out, rc = -EIO); + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out, rc = -ENOMEM); + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out, rc = -ENOMEM); + + if (src_oa) + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + req->rq_oa->o_id = loi->loi_id; + req->rq_buflen = sizeof(*req->rq_md); + OBD_ALLOC(req->rq_md, req->rq_buflen); + if (req->rq_md == NULL) + GOTO(out, rc = -ENOMEM); + + req->rq_idx = loi->loi_ost_idx; + req->rq_stripe = i; + + /* XXX LOV STACKING */ + req->rq_md->lsm_object_id = loi->loi_id; + req->rq_md->lsm_object_gr = lsm->lsm_object_gr; + req->rq_oabufs = info[i].count; + req->rq_pgaidx = shift; + shift += req->rq_oabufs; + + /* remember the index for sort brw_page array */ + info[i].index = req->rq_pgaidx; + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out, rc = -EIO); + + /* rotate & sort the brw_page array */ + for (i = 0; i < oa_bufs; i++) { + int stripe = lov_stripe_number(lsm, pga[i].disk_offset); + + shift = info[stripe].index + info[stripe].off; + LASSERT(shift < oa_bufs); + set->set_pga[shift] = pga[i]; + lov_stripe_offset(lsm, pga[i].disk_offset, stripe, + &set->set_pga[shift].disk_offset); + info[stripe].off++; + } +out: + if (info) + OBD_FREE(info, sizeof(*info) * lsm->lsm_stripe_count); + + if (rc == 0) + *reqset = set; + else + lov_fini_brw_set(set); + + RETURN(rc); +} + +static int getattr_done(struct lov_request_set *set) +{ + return common_attr_done(set); +} + +int lov_fini_getattr_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) + rc = getattr_done(set); + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_getattr_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, + struct lov_request_set **reqset) +{ + struct lov_request_set *set; + struct lov_oinfo *loi = NULL; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0, i; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_oa = src_oa; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_stripe = i; + req->rq_idx = loi->loi_ost_idx; + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out_set, rc = -ENOMEM); + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + req->rq_oa->o_id = loi->loi_id; + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_getattr_set(set); + RETURN(rc); +} + +int lov_fini_destroy_set(struct lov_request_set *set) +{ + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + /* FIXME update qos data here */ + } + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(0); +} + +int lov_prep_destroy_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, + struct obd_trans_info *oti, + struct lov_request_set **reqset) +{ + struct lov_request_set *set; + struct lov_oinfo *loi = NULL; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0, cookie_set = 0, i; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_oa = src_oa; + set->set_oti = oti; + if (oti != NULL && src_oa->o_valid & OBD_MD_FLCOOKIE) + set->set_cookies = oti->oti_logcookies; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + + req->rq_stripe = i; + req->rq_idx = loi->loi_ost_idx; + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out_set, rc = -ENOMEM); + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + req->rq_oa->o_id = loi->loi_id; + + /* Setup the first request's cookie position */ + if (!cookie_set && set->set_cookies) { + oti->oti_logcookies = set->set_cookies + i; + cookie_set = 1; + } + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_destroy_set(set); + RETURN(rc); +} + +static int setattr_done(struct lov_request_set *set) +{ + return common_attr_done(set); +} + +int lov_fini_setattr_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + rc = setattr_done(set); + /* FIXME update qos data here */ + } + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + RETURN(rc); +} + +int lov_prep_setattr_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, struct obd_trans_info *oti, + struct lov_request_set **reqset) +{ + struct lov_request_set *set; + struct lov_oinfo *loi = NULL; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0, i; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_oa = src_oa; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + continue; + } + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + req->rq_stripe = i; + req->rq_idx = loi->loi_ost_idx; + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out_set, rc = -ENOMEM); + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + req->rq_oa->o_id = loi->loi_id; + LASSERT(!(req->rq_oa->o_valid & OBD_MD_FLGROUP) || req->rq_oa->o_gr>0); + + if (src_oa->o_valid & OBD_MD_FLSIZE) { + if (lov_stripe_offset(lsm, src_oa->o_size, i, + &req->rq_oa->o_size) < 0 && + req->rq_oa->o_size) + req->rq_oa->o_size--; + CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", + i, req->rq_oa->o_size, src_oa->o_size); + } + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_setattr_set(set); + RETURN(rc); +} + +int lov_update_punch_set(struct lov_request_set *set, struct lov_request *req, + int rc) +{ + struct lov_obd *lov = &set->set_exp->exp_obd->u.lov; + ENTRY; + + lov_update_set(set, req, rc); + if (rc && !lov->tgts[req->rq_idx].active) + rc = 0; + /* FIXME in raid1 regime, should return 0 */ + RETURN(rc); +} + +int lov_fini_punch_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + if (!set->set_success) + rc = -EIO; + /* FIXME update qos data here */ + } + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_punch_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, obd_off start, + obd_off end, struct obd_trans_info *oti, + struct lov_request_set **reqset) +{ + struct lov_request_set *set; + struct lov_oinfo *loi = NULL; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0, i; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_oa = src_oa; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + obd_off rs, re; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + continue; + } + + if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re)) + continue; + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + req->rq_stripe = i; + req->rq_idx = loi->loi_ost_idx; + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out_set, rc = -ENOMEM); + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + req->rq_oa->o_id = loi->loi_id; + req->rq_oa->o_gr = loi->loi_gr; + req->rq_oa->o_valid |= OBD_MD_FLGROUP; + + req->rq_extent.start = rs; + req->rq_extent.end = re; + + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_punch_set(set); + RETURN(rc); +} + +int lov_fini_sync_set(struct lov_request_set *set) +{ + int rc = 0; + ENTRY; + + LASSERT(set->set_exp); + if (set == NULL) + RETURN(0); + if (set->set_completes) { + if (!set->set_success) + rc = -EIO; + /* FIXME update qos data here */ + } + + if (atomic_dec_and_test(&set->set_refcount)) + lov_finish_set(set); + + RETURN(rc); +} + +int lov_prep_sync_set(struct obd_export *exp, struct obdo *src_oa, + struct lov_stripe_md *lsm, obd_off start, + obd_off end, struct lov_request_set **reqset) +{ + struct lov_request_set *set; + struct lov_oinfo *loi = NULL; + struct lov_obd *lov = &exp->exp_obd->u.lov; + int rc = 0, i; + ENTRY; + + OBD_ALLOC(set, sizeof(*set)); + if (set == NULL) + RETURN(-ENOMEM); + lov_init_set(set); + + set->set_exp = exp; + set->set_md = lsm; + set->set_oa = src_oa; + + loi = lsm->lsm_oinfo; + for (i = 0; i < lsm->lsm_stripe_count; i++, loi++) { + struct lov_request *req; + obd_off rs, re; + + if (lov->tgts[loi->loi_ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); + continue; + } + + if (!lov_stripe_intersects(lsm, i, start, end, &rs, &re)) + continue; + + OBD_ALLOC(req, sizeof(*req)); + if (req == NULL) + GOTO(out_set, rc = -ENOMEM); + req->rq_stripe = i; + req->rq_idx = loi->loi_ost_idx; + + req->rq_oa = obdo_alloc(); + if (req->rq_oa == NULL) + GOTO(out_set, rc = -ENOMEM); + memcpy(req->rq_oa, src_oa, sizeof(*req->rq_oa)); + req->rq_oa->o_id = loi->loi_id; + req->rq_extent.start = rs; + req->rq_extent.end = re; + lov_set_add_req(req, set); + } + if (!set->set_count) + GOTO(out_set, rc = -EIO); + *reqset = set; + RETURN(rc); +out_set: + lov_fini_sync_set(set); + RETURN(rc); +} diff --git a/lustre/lvfs/Makefile.in b/lustre/lvfs/Makefile.in index 7f98c6a..00a327d 100644 --- a/lustre/lvfs/Makefile.in +++ b/lustre/lvfs/Makefile.in @@ -3,8 +3,8 @@ MODULES := lvfs fsfilt_@BACKINGFS@ fsfilt_smfs @SNAPFS_TRUE@MODULES += fsfilt_snap_@BACKINGFS@ fsfilt_snap_smfs lvfs-objs := fsfilt.o lvfs_common.o llog_lvfs.o lvfs_linux.o -lvfs-objs += llog.o llog_cat.o - +lvfs-objs += llog.o llog_cat.o + ifeq ($(PATCHLEVEL),6) fsfilt_@BACKINGFS@-objs := fsfilt-@BACKINGFS@.o diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index ca9c839..4a80755 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -31,6 +31,8 @@ ldiskfs_sed_flags = \ -e "s/dx_hash_info/ext3_dx_hash_info/g" \ -e "s/dir_private_info/ext3_dir_private_info/g" \ -e "s/DX_HASH/EXT3_DX_HASH/g" \ + -e "s/reserve_window/ext3_reserve_window/g" \ + -e "s/rsv_window_add/ext3_rsv_window_add/g" \ -e "s/EXT3/LDISKFS/g" -e "s/ext3/ldiskfs/g" fsfilt_ldiskfs.c: fsfilt_ext3.c diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 0eda25b..03c1e04 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -52,6 +52,14 @@ #endif +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)) +# define lock_24kernel() lock_kernel() +# define unlock_24kernel() unlock_kernel() +#else +# define lock_24kernel() do {} while (0) +# define unlock_24kernel() do {} while (0) +#endif + static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); @@ -68,6 +76,7 @@ struct fsfilt_cb_data { #endif #define XATTR_LUSTRE_MDS_LOV_EA "lov" +#define XATTR_LUSTRE_MDS_MEA_EA "mea" #define XATTR_LUSTRE_MDS_MID_EA "mid" #define XATTR_LUSTRE_MDS_SID_EA "sid" @@ -161,9 +170,10 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, } journal_start: - lock_kernel(); + LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks); + lock_24kernel(); handle = journal_start(EXT3_JOURNAL(inode), nblocks); - unlock_kernel(); + unlock_24kernel(); if (!IS_ERR(handle)) LASSERT(current->journal_info == handle); @@ -299,9 +309,10 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, needed = journal->j_max_transaction_buffers; } - lock_kernel(); + LASSERTF(needed > 0, "can't start %d credit transaction\n", needed); + lock_24kernel(); handle = journal_start(journal, needed); - unlock_kernel(); + unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't get handle for %d credits: rc = %ld\n", needed, PTR_ERR(handle)); @@ -323,9 +334,9 @@ static int fsfilt_ext3_commit(struct super_block *sb, struct inode *inode, if (force_sync) handle->h_sync = 1; /* recovery likes this */ - lock_kernel(); + lock_24kernel(); rc = journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); return rc; } @@ -484,69 +495,69 @@ static int fsfilt_ext3_get_xattr(struct inode *inode, char *name, } static int fsfilt_ext3_set_md(struct inode *inode, void *handle, - void *lmm, int lmm_size) -{ - int rc; - - LASSERT(down_trylock(&inode->i_sem) != 0); - - /* keep this when we get rid of OLD_EA (too noisy during conversion) */ - if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */) - CWARN("setting EA on %lu/%u again... interesting\n", - inode->i_ino, inode->i_generation); - - rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_LOV_EA, - lmm, lmm_size); - return rc; -} - -/* Must be called with i_sem held */ -static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) -{ - int rc; - - rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_LOV_EA, - lmm, lmm_size); - return rc; -} - -static int fsfilt_ext3_set_mid(struct inode *inode, void *handle, - void *mid, int mid_size) -{ - int rc; - - rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_MID_EA, - mid, mid_size); - return rc; -} - -/* Must be called with i_sem held */ -static int fsfilt_ext3_get_mid(struct inode *inode, void *mid, int mid_size) -{ - int rc; - - rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_MID_EA, - mid, mid_size); - return rc; -} - -static int fsfilt_ext3_set_sid(struct inode *inode, void *handle, - void *sid, int sid_size) + void *lmm, int lmm_size, + enum ea_type type) { int rc; + + switch(type) { + case EA_LOV: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_LOV_EA, + lmm, lmm_size); + break; + case EA_MEA: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_MEA_EA, + lmm, lmm_size); + break; + case EA_SID: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_SID_EA, + lmm, lmm_size); + break; + case EA_MID: + rc = fsfilt_ext3_set_xattr(inode, handle, + XATTR_LUSTRE_MDS_MID_EA, + lmm, lmm_size); + break; + default: + return -EINVAL; + } - rc = fsfilt_ext3_set_xattr(inode, handle, XATTR_LUSTRE_MDS_SID_EA, - sid, sid_size); return rc; } -/* Must be called with i_sem held */ -static int fsfilt_ext3_get_sid(struct inode *inode, void *sid, int sid_size) +static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, + int lmm_size, enum ea_type type) { int rc; - - rc = fsfilt_ext3_get_xattr(inode, XATTR_LUSTRE_MDS_SID_EA, - sid, sid_size); + + switch (type) { + case EA_LOV: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_LOV_EA, + lmm, lmm_size); + break; + case EA_MEA: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_MEA_EA, + lmm, lmm_size); + break; + case EA_SID: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_SID_EA, + lmm, lmm_size); + break; + case EA_MID: + rc = fsfilt_ext3_get_xattr(inode, + XATTR_LUSTRE_MDS_MID_EA, + lmm, lmm_size); + break; + default: + return -EINVAL; + } + return rc; } @@ -681,12 +692,10 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, fcb->cb_data = cb_data; CDEBUG(D_EXT2, "set callback for last_num: "LPD64"\n", last_num); - lock_kernel(); journal_callback_set(handle, fsfilt_ext3_cb_func, (struct journal_callback *)fcb); unlock_kernel(); - return 0; } @@ -728,7 +737,7 @@ static int fsfilt_ext3_sync(struct super_block *sb) #define ext3_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem); #define ext3_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem); #endif - + #include #if EXT3_EXT_MAGIC == 0xf301 #define ee_start e_start @@ -793,16 +802,16 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, loff_t new_i_size; handle_t *handle; int i, aflags = 0; - + i = EXT_DEPTH(tree); EXT_ASSERT(i == path->p_depth); EXT_ASSERT(path[i].p_hdr); - + if (exist) { err = EXT_CONTINUE; goto map; } - + if (bp->create == 0) { i = 0; if (newex->ee_block < bp->start) @@ -824,7 +833,6 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, tgen = EXT_GENERATION(tree); count = ext3_ext_calc_credits_for_insert(tree, path); ext3_up_truncate_sem(inode); - lock_kernel(); handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1); unlock_kernel(); @@ -832,7 +840,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_down_truncate_sem(inode); return PTR_ERR(handle); } - + if (tgen != EXT_GENERATION(tree)) { /* the tree has changed. so path can be invalid at moment */ lock_kernel(); @@ -841,7 +849,6 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, ext3_down_truncate_sem(inode); return EXT_REPEAT; } - ext3_down_truncate_sem(inode); count = newex->ee_len; goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags); @@ -868,9 +875,9 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, } } out: - lock_kernel(); + lock_24kernel(); journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); map: if (err >= 0) { /* map blocks */ @@ -918,12 +925,11 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block, bp.start = block; bp.init_num = bp.num = num; bp.create = create; - + ext3_down_truncate_sem(inode); err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); ext3_ext_invalidate_cache(&tree); ext3_up_truncate_sem(inode); - return err; } @@ -1097,10 +1103,10 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, block_count = (block_count + blocksize - 1) >> inode->i_blkbits; journal = EXT3_SB(inode->i_sb)->s_journal; - lock_kernel(); + lock_24kernel(); handle = journal_start(journal, block_count * EXT3_DATA_TRANS_BLOCKS + 2); - unlock_kernel(); + unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't start transaction\n"); return PTR_ERR(handle); @@ -1158,9 +1164,9 @@ out: unlock_kernel(); } - lock_kernel(); + lock_24kernel(); journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); if (err == 0) *offs = offset; @@ -1392,10 +1398,6 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_iocontrol = fsfilt_ext3_iocontrol, .fs_set_md = fsfilt_ext3_set_md, .fs_get_md = fsfilt_ext3_get_md, - .fs_set_mid = fsfilt_ext3_set_mid, - .fs_get_mid = fsfilt_ext3_get_mid, - .fs_set_sid = fsfilt_ext3_set_sid, - .fs_get_sid = fsfilt_ext3_get_sid, .fs_readpage = fsfilt_ext3_readpage, .fs_add_journal_cb = fsfilt_ext3_add_journal_cb, .fs_statfs = fsfilt_ext3_statfs, diff --git a/lustre/lvfs/fsfilt_smfs.c b/lustre/lvfs/fsfilt_smfs.c index 1526902..8d61cd1 100644 --- a/lustre/lvfs/fsfilt_smfs.c +++ b/lustre/lvfs/fsfilt_smfs.c @@ -243,11 +243,14 @@ static int fsfilt_smfs_iocontrol(struct inode *inode, struct file *file, RETURN(rc); } -typedef int (*set_ea_func_t) (struct inode *, void *, void *, int); -typedef int (*get_ea_func_t) (struct inode *, void *, int); +typedef int (*set_ea_func_t) (struct inode *, void *, void *, + int, enum ea_type); + +typedef int (*get_ea_func_t) (struct inode *, void *, int, + enum ea_type); static int fsfilt_smfs_set_ea(struct inode *inode, void *handle, - void *ea, int ea_size, + void *ea, int ea_size, enum ea_type type, set_ea_func_t set_ea_func) { struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); @@ -268,7 +271,8 @@ static int fsfilt_smfs_set_ea(struct inode *inode, void *handle, pre_smfs_inode(inode, cache_inode); down(&cache_inode->i_sem); - rc = set_ea_func(cache_inode, handle, ea, ea_size); + rc = set_ea_func(cache_inode, handle, ea, + ea_size, type); up(&cache_inode->i_sem); post_smfs_inode(inode, cache_inode); @@ -277,7 +281,8 @@ static int fsfilt_smfs_set_ea(struct inode *inode, void *handle, } static int fsfilt_smfs_get_ea(struct inode *inode, void *ea, - int ea_size, get_ea_func_t get_ea_func) + int ea_size, enum ea_type type, + get_ea_func_t get_ea_func) { struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); struct inode *cache_inode = NULL; @@ -297,7 +302,7 @@ static int fsfilt_smfs_get_ea(struct inode *inode, void *ea, pre_smfs_inode(inode, cache_inode); down(&cache_inode->i_sem); - rc = get_ea_func(cache_inode, ea, ea_size); + rc = get_ea_func(cache_inode, ea, ea_size, type); up(&cache_inode->i_sem); post_smfs_inode(inode, cache_inode); @@ -306,65 +311,30 @@ static int fsfilt_smfs_get_ea(struct inode *inode, void *ea, } static int fsfilt_smfs_set_md(struct inode *inode, void *handle, - void *lmm, int lmm_size) + void *lmm, int lmm_size, enum ea_type type) { - int rc = 0; struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); - - rc = fsfilt_smfs_set_ea(inode, handle, lmm, lmm_size, - cache_fsfilt->fs_set_md); + int rc = fsfilt_smfs_set_ea(inode, handle, lmm, lmm_size, + type, cache_fsfilt->fs_set_md); if (rc) return rc; - smfs_rec_md(inode, lmm, lmm_size); + smfs_rec_md(inode, lmm, lmm_size, type); return rc; } -static int fsfilt_smfs_get_md(struct inode *inode, void *lmm, int - lmm_size) +static int fsfilt_smfs_get_md(struct inode *inode, void *lmm, + int lmm_size, enum ea_type type) { struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); - return fsfilt_smfs_get_ea(inode, lmm, lmm_size, + return fsfilt_smfs_get_ea(inode, lmm, lmm_size, type, cache_fsfilt->fs_get_md); } -static int fsfilt_smfs_set_mid(struct inode *inode, void *handle, - void *mid, int mid_size) -{ - struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); - return fsfilt_smfs_set_ea(inode, handle, mid, mid_size, - cache_fsfilt->fs_set_mid); -} - -static int fsfilt_smfs_get_mid(struct inode *inode, void *mid, - int mid_size) -{ - struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); - return fsfilt_smfs_get_ea(inode, mid, mid_size, - cache_fsfilt->fs_get_mid); -} - -static int fsfilt_smfs_set_sid(struct inode *inode, void *handle, - void *sid, int sid_size) -{ - struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); - return fsfilt_smfs_set_ea(inode, handle, sid, sid_size, - cache_fsfilt->fs_set_sid); -} - -static int fsfilt_smfs_get_sid(struct inode *inode, void *sid, - int sid_size) -{ - struct fsfilt_operations *cache_fsfilt = I2FOPS(inode); - return fsfilt_smfs_get_ea(inode, sid, sid_size, - cache_fsfilt->fs_get_sid); -} - static int fsfilt_smfs_send_bio(int rw, struct inode *inode, void *bio) { struct inode *cache_inode; struct fsfilt_operations *cache_fsfilt; - ENTRY; cache_fsfilt = I2FOPS(inode); @@ -1074,10 +1044,6 @@ static struct fsfilt_operations fsfilt_smfs_ops = { .fs_iocontrol = fsfilt_smfs_iocontrol, .fs_set_md = fsfilt_smfs_set_md, .fs_get_md = fsfilt_smfs_get_md, - .fs_set_mid = fsfilt_smfs_set_mid, - .fs_get_mid = fsfilt_smfs_get_mid, - .fs_set_sid = fsfilt_smfs_set_sid, - .fs_get_sid = fsfilt_smfs_get_sid, .fs_readpage = fsfilt_smfs_readpage, .fs_getpage = fsfilt_smfs_getpage, .fs_add_journal_cb = fsfilt_smfs_add_journal_cb, diff --git a/lustre/lvfs/lvfs_common.c b/lustre/lvfs/lvfs_common.c index caa6e01..744b8fb 100644 --- a/lustre/lvfs/lvfs_common.c +++ b/lustre/lvfs/lvfs_common.c @@ -25,6 +25,7 @@ #define DEBUG_SUBSYSTEM S_FILTER +#include #include struct dentry *lvfs_id2dentry(struct lvfs_run_ctxt *ctxt, __u64 id, @@ -33,3 +34,117 @@ struct dentry *lvfs_id2dentry(struct lvfs_run_ctxt *ctxt, __u64 id, return ctxt->cb_ops.l_id2dentry(id, gen, gr, data); } EXPORT_SYMBOL(lvfs_id2dentry); + +static struct list_head lvfs_context_list; + +void lvfs_mount_list_init(void) +{ + INIT_LIST_HEAD(&lvfs_context_list); +} + +void lvfs_mount_list_cleanup(void) +{ + struct list_head *tmp; + + if (list_empty(&lvfs_context_list)) + return; + + list_for_each(tmp, &lvfs_context_list) { + struct lvfs_obd_ctxt *data = + list_entry(tmp, struct lvfs_obd_ctxt, loc_list); + CERROR("device %s still mounted with refcount %d\n", + data->loc_name, atomic_read(&data->loc_refcount)); + } +} + +static inline +struct lvfs_obd_ctxt *get_lvfs_mount(struct lvfs_obd_ctxt *lvfs_ctxt) +{ + atomic_inc(&lvfs_ctxt->loc_refcount); + return lvfs_ctxt; +} + +static struct lvfs_obd_ctxt *add_lvfs_mount(struct vfsmount *mnt, char *name) +{ + struct lvfs_obd_ctxt *lvfs_ctxt; + ENTRY; + + OBD_ALLOC(lvfs_ctxt, sizeof(*lvfs_ctxt)); + if (!lvfs_ctxt) { + CERROR("No Memory\n"); + RETURN(NULL); + } + + if (name) { + int length = strlen(name) + 1; + + OBD_ALLOC(lvfs_ctxt->loc_name, length); + if (!lvfs_ctxt->loc_name) { + CERROR("No Memory\n"); + OBD_FREE(lvfs_ctxt, sizeof(*lvfs_ctxt)); + RETURN(NULL); + } + memcpy(lvfs_ctxt->loc_name, name, length); + } + lvfs_ctxt->loc_mnt = mnt; + list_add(&lvfs_ctxt->loc_list, &lvfs_context_list); + atomic_set(&lvfs_ctxt->loc_refcount, 1); + RETURN(lvfs_ctxt); +} + +void lvfs_umount_fs(struct lvfs_obd_ctxt *lvfs_ctxt) +{ + if (lvfs_ctxt && atomic_dec_and_test(&lvfs_ctxt->loc_refcount)) { + struct vfsmount *mnt = lvfs_ctxt->loc_mnt; + + list_del(&lvfs_ctxt->loc_list); + if (atomic_read(&mnt->mnt_count) > 2) + CERROR("mount busy, mnt %p mnt_count %d != 2\n", mnt, + atomic_read(&mnt->mnt_count)); + + mntput(mnt); + + if (lvfs_ctxt->loc_name) + OBD_FREE(lvfs_ctxt->loc_name, + strlen(lvfs_ctxt->loc_name) + 1); + OBD_FREE(lvfs_ctxt, sizeof(*lvfs_ctxt)); + dev_clear_rdonly(2); + } +} +EXPORT_SYMBOL(lvfs_umount_fs); + +int lvfs_mount_fs(char *name, char *fstype, char *options, int flags, + struct lvfs_obd_ctxt **lvfs_ctxt) +{ + struct vfsmount *mnt = NULL; + struct list_head *tmp; + int rc = 0; + ENTRY; + + list_for_each(tmp, &lvfs_context_list) { + struct lvfs_obd_ctxt *data = + list_entry(tmp, struct lvfs_obd_ctxt, loc_list); + if (strcmp(data->loc_name, name) == 0) { + *lvfs_ctxt = get_lvfs_mount(data); + RETURN(0); + } + } + mnt = do_kern_mount(fstype, flags, name, options); + + if (IS_ERR(mnt)) { + rc = PTR_ERR(mnt); + CERROR("do_kern_mount failed: rc = %d\n", rc); + GOTO(out, rc); + } + CDEBUG(D_SUPER, "%s: mnt = %p\n", name, mnt); + /*add this lvfs context to the lvfs_mount_list*/ + *lvfs_ctxt = add_lvfs_mount(mnt, name); + if (!*lvfs_ctxt) { + mntput(mnt); + CERROR("add_lvfs_mount failed\n"); + GOTO(out, rc = -EINVAL); + } +out: + RETURN(rc); +} +EXPORT_SYMBOL(lvfs_mount_fs); diff --git a/lustre/lvfs/lvfs_internal.h b/lustre/lvfs/lvfs_internal.h index def6c9a..d7123a9 100644 --- a/lustre/lvfs/lvfs_internal.h +++ b/lustre/lvfs/lvfs_internal.h @@ -7,8 +7,10 @@ void fsfilt_extN_exit(void); int fsfilt_ldiskfs_init(void); void fsfilt_ldiskfs_exit(void); - int fsfilt_reiser_init(void); void fsfilt_reiser_exit(void); +void lvfs_mount_list_init(void); +void lvfs_mount_list_cleanup(void); + int lookup_by_path(char *path, int flags, struct nameidata *nd); diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 4118a53..5c11665 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -53,7 +53,6 @@ atomic_t obd_memory; int obd_memmax; - /* Debugging check only needed during development */ #ifdef OBD_CTXT_DEBUG # define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) @@ -126,7 +125,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, /* CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", save, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -165,7 +164,7 @@ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, /* CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", new_ctx, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -189,7 +188,7 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, /* CDEBUG(D_INFO, - " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + " = pop %p==%p = cur %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", new_ctx, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -217,7 +216,7 @@ void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, /* CDEBUG(D_INFO, - "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", + "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%.*s), pwdmnt %p:%d\n", saved, current, current->fs, current->fs->pwd, atomic_read(¤t->fs->pwd->d_count), atomic_read(¤t->fs->pwd->d_inode->i_count), @@ -236,7 +235,7 @@ struct dentry *simple_mknod(struct dentry *dir, char *name, int mode, int fix) ENTRY; ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); - CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name); + CDEBUG(D_INODE, "creating file %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) @@ -281,15 +280,19 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) ENTRY; ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); - CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name); + CDEBUG(D_INODE, "creating directory %.*s\n", (int)strlen(name), name); dchild = ll_lookup_one_len(name, dir, strlen(name)); if (IS_ERR(dchild)) GOTO(out_up, dchild); if (dchild->d_inode) { int old_mode = dchild->d_inode->i_mode; - if (!S_ISDIR(old_mode)) + if (!S_ISDIR(old_mode)) { + CERROR("found %s (%lu/%u) is mode %o\n", name, + dchild->d_inode->i_ino, + dchild->d_inode->i_generation, old_mode); GOTO(out_err, err = -ENOTDIR); + } /* Fixup directory permissions if necessary */ if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { @@ -420,20 +423,197 @@ EXPORT_SYMBOL(l_readdir); EXPORT_SYMBOL(obd_memory); EXPORT_SYMBOL(obd_memmax); +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) +static spinlock_t obd_memlist_lock = SPIN_LOCK_UNLOCKED; +static struct hlist_head *obd_memtable; +static unsigned long obd_memtable_size; + +static int lvfs_memdbg_init(int size) +{ + struct hlist_head *head; + int i; + + LASSERT(size > sizeof(sizeof(struct hlist_head))); + obd_memtable_size = size / sizeof(struct hlist_head); + + CWARN("Allocating %lu malloc entries...\n", + (unsigned long)obd_memtable_size); + + obd_memtable = kmalloc(size, GFP_KERNEL); + if (!obd_memtable) + return -ENOMEM; + + i = obd_memtable_size; + head = obd_memtable; + do { + INIT_HLIST_HEAD(head); + head++; + i--; + } while(i); + + return 0; +} + +static int lvfs_memdbg_cleanup(void) +{ + struct hlist_node *node = NULL, *tmp = NULL; + struct hlist_head *head; + struct mtrack *mt; + int i; + + spin_lock(&obd_memlist_lock); + for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { + hlist_for_each_safe(node, tmp, head) { + mt = hlist_entry(node, struct mtrack, m_hash); + hlist_del_init(&mt->m_hash); + kfree(mt); + } + } + spin_unlock(&obd_memlist_lock); + kfree(obd_memtable); + return 0; +} + +static inline unsigned long const hashfn(void *ptr) +{ + return (unsigned long)ptr & + (obd_memtable_size - 1); +} + +static void __lvfs_memdbg_insert(struct mtrack *mt) +{ + struct hlist_head *head = obd_memtable + + hashfn(mt->m_ptr); + hlist_add_head(&mt->m_hash, head); +} + +void lvfs_memdbg_insert(struct mtrack *mt) +{ + spin_lock(&obd_memlist_lock); + __lvfs_memdbg_insert(mt); + spin_unlock(&obd_memlist_lock); +} +EXPORT_SYMBOL(lvfs_memdbg_insert); + +static void __lvfs_memdbg_remove(struct mtrack *mt) +{ + hlist_del_init(&mt->m_hash); +} + +void lvfs_memdbg_remove(struct mtrack *mt) +{ + spin_lock(&obd_memlist_lock); + __lvfs_memdbg_remove(mt); + spin_unlock(&obd_memlist_lock); +} +EXPORT_SYMBOL(lvfs_memdbg_remove); + +static struct mtrack *__lvfs_memdbg_find(void *ptr) +{ + struct hlist_node *node = NULL; + struct mtrack *mt = NULL; + struct hlist_head *head; + + head = obd_memtable + hashfn(ptr); + + hlist_for_each(node, head) { + mt = hlist_entry(node, struct mtrack, m_hash); + if ((unsigned long)mt->m_ptr == (unsigned long)ptr) + break; + mt = NULL; + } + return mt; +} + +struct mtrack *lvfs_memdbg_find(void *ptr) +{ + struct mtrack *mt; + + spin_lock(&obd_memlist_lock); + mt = __lvfs_memdbg_find(ptr); + spin_unlock(&obd_memlist_lock); + + return mt; +} +EXPORT_SYMBOL(lvfs_memdbg_find); + +int lvfs_memdbg_check_insert(struct mtrack *mt) +{ + spin_lock(&obd_memlist_lock); + if (!__lvfs_memdbg_find(mt->m_ptr)) { + __lvfs_memdbg_insert(mt); + spin_unlock(&obd_memlist_lock); + return 1; + } + spin_unlock(&obd_memlist_lock); + return 0; +} +EXPORT_SYMBOL(lvfs_memdbg_check_insert); + +struct mtrack * +lvfs_memdbg_check_remove(void *ptr) +{ + struct mtrack *mt; + + spin_lock(&obd_memlist_lock); + mt = __lvfs_memdbg_find(ptr); + if (mt) { + __lvfs_memdbg_remove(mt); + spin_unlock(&obd_memlist_lock); + return mt; + } + spin_unlock(&obd_memlist_lock); + return NULL; +} +EXPORT_SYMBOL(lvfs_memdbg_check_remove); + +static void lvfs_memdbg_show(void) +{ + struct hlist_node *node = NULL; + struct hlist_head *head; + struct mtrack *mt; + int leaked, i; + + leaked = atomic_read(&obd_memory); + + if (leaked > 0) { + CWARN("Memory leaks detected (max %d, leaked %d):\n", + obd_memmax, leaked); + + spin_lock(&obd_memlist_lock); + for (i = 0, head = obd_memtable; i < obd_memtable_size; i++, head++) { + hlist_for_each(node, head) { + mt = hlist_entry(node, struct mtrack, m_hash); + CWARN(" ptr: 0x%p, size: %d, src at \"%s\"\n", + mt->m_ptr, mt->m_size, mt->m_loc); + } + } + spin_unlock(&obd_memlist_lock); + } +} +#endif + static int __init lvfs_linux_init(void) { + ENTRY; +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + lvfs_memdbg_init(PAGE_SIZE); +#endif + lvfs_mount_list_init(); RETURN(0); } static void __exit lvfs_linux_exit(void) { - int leaked; ENTRY; - leaked = atomic_read(&obd_memory); - CDEBUG(leaked ? D_ERROR : D_INFO, - "obd mem max: %d leaked: %d\n", obd_memmax, leaked); + lvfs_mount_list_cleanup(); +#if defined (CONFIG_DEBUG_MEMORY) && defined(__KERNEL__) + lvfs_memdbg_show(); + lvfs_memdbg_cleanup(); +#endif + EXIT; return; } diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 273425e..587a24e 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -21,6 +21,12 @@ #ifndef MDC_INTERNAL_H #define MDC_INTERNAL_H +int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, + struct lov_stripe_md *lsm); + +int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_mds_md *lmm, int lmm_size); + void mdc_getattr_pack(struct lustre_msg *msg, int offset, __u64 valid, int flags, struct mdc_op_data *data); void mdc_open_pack(struct lustre_msg *msg, int offset, diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index bc6e4bb..c52644f 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -150,3 +150,28 @@ void mdc_close_pack(struct ptlrpc_request *req, int offset, struct obdo *oa, body->valid |= OBD_MD_FLFLAGS; } } + +/* + * these methods needed for saying higher levels that MDC does not pack/unpack + * any EAs. This is needed to have real abstraction and do not try to recognize + * what OBD type is to avoid calling these methods on it, as they may not be + * implemented. + * + * Sometimes pack/unpack calls happen to MDC too. This is for instance default + * striping info for directories and our goal here is to skip them with no + * errors or any complains. + */ +int mdc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, + struct lov_stripe_md *lsm) +{ + ENTRY; + RETURN(0); +} + +int mdc_unpackmd(struct obd_export *exp, struct lov_stripe_md **lsmp, + struct lov_mds_md *lmm, int lmm_size) +{ + ENTRY; + RETURN(0); +} + diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index e3bda59..436a155 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -175,6 +175,22 @@ int mdc_change_cbdata(struct obd_export *exp, struct lustre_id *id, return 0; } +static inline void +mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) +{ + /* Don't hold error requests for replay. */ + if (req->rq_replay) { + unsigned long irqflags; + spin_lock_irqsave(&req->rq_lock, irqflags); + req->rq_replay = 0; + spin_unlock_irqrestore(&req->rq_lock, irqflags); + } + if (rc && req->rq_transno != 0) { + DEBUG_REQ(D_ERROR, req, "transno returned on error rc %d", rc); + LBUG(); + } +} + /* We always reserve enough space in the reply packet for a stripe MD, because * we don't know in advance the file type. */ int mdc_enqueue(struct obd_export *exp, @@ -197,7 +213,6 @@ int mdc_enqueue(struct obd_export *exp, ldlm_policy_data_t policy = { .l_inodebits = { MDS_INODELOCK_LOOKUP } }; struct ldlm_intent *lit; struct ldlm_request *lockreq; - struct ldlm_reply *dlm_rep; int reqsize[6] = {[MDS_REQ_SECDESC_OFF] = 0, [MDS_REQ_INTENT_LOCKREQ_OFF] = sizeof(*lockreq), [MDS_REQ_INTENT_IT_OFF] = sizeof(*lit)}; @@ -206,6 +221,7 @@ int mdc_enqueue(struct obd_export *exp, obddev->u.cli.cl_max_mds_easize}; int req_buffers = 3, reply_buffers = 0; int rc, flags = LDLM_FL_HAS_INTENT; + struct ldlm_reply *dlm_rep = NULL; void *eadata; unsigned long irqflags; ENTRY; @@ -222,6 +238,7 @@ int mdc_enqueue(struct obd_export *exp, reqsize[req_buffers++] = sizeof(struct mds_rec_create); reqsize[req_buffers++] = data->namelen + 1; reqsize[req_buffers++] = obddev->u.cli.cl_max_mds_easize; + req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_DLM_VERSION, LDLM_ENQUEUE, req_buffers, reqsize, NULL); if (!req) @@ -238,8 +255,7 @@ int mdc_enqueue(struct obd_export *exp, /* pack the intended request */ mdc_open_pack(req->rq_reqmsg, MDS_REQ_INTENT_REC_OFF, data, - it->it_create_mode, 0, it->it_flags, - lmm, lmmsize); + it->it_create_mode, 0, it->it_flags, lmm, lmmsize); /* get ready for the reply */ repsize[3] = 4; repsize[4] = xattr_acl_size(LL_ACL_MAX_ENTRIES); @@ -330,13 +346,21 @@ int mdc_enqueue(struct obd_export *exp, /* This can go when we're sure that this can never happen */ LASSERT(rc != -ENOENT); + /* We need dlm_rep to be assigned this early, to check lock mode of + returned lock from request to avoid possible race with lock + conversion */ + if (rc == ELDLM_LOCK_ABORTED || !rc) { + dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); + LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */ + } if (rc == ELDLM_LOCK_ABORTED) { lock_mode = 0; memset(lockh, 0, sizeof(*lockh)); rc = 0; } else if (rc != 0) { CERROR("ldlm_cli_enqueue: %d\n", rc); - LASSERT (rc < 0); + LASSERTF(rc < 0, "rc = %d\n", rc); + mdc_clear_replay_flag(req, rc); ptlrpc_req_finished(req); RETURN(rc); } else { /* rc = 0 */ @@ -345,18 +369,16 @@ int mdc_enqueue(struct obd_export *exp, /* If the server gave us back a different lock mode, we should * fix up our variables. */ - if (lock->l_req_mode != lock_mode) { - ldlm_lock_addref(lockh, lock->l_req_mode); + if (dlm_rep->lock_desc.l_req_mode != lock_mode) { + ldlm_lock_addref(lockh, dlm_rep->lock_desc.l_req_mode); ldlm_lock_decref(lockh, lock_mode); - lock_mode = lock->l_req_mode; + lock_mode = dlm_rep->lock_desc.l_req_mode; } ldlm_lock_allow_match(lock); LDLM_LOCK_PUT(lock); } - dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); - LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */ LASSERT_REPSWABBED(req, 0); /* swabbed by ldlm_cli_enqueue() */ LUSTRE_IT(it)->it_disposition = (int) dlm_rep->lock_policy_res1; @@ -364,13 +386,8 @@ int mdc_enqueue(struct obd_export *exp, LUSTRE_IT(it)->it_lock_mode = lock_mode; LUSTRE_IT(it)->it_data = req; - if (LUSTRE_IT(it)->it_status < 0 && req->rq_replay) { - LASSERT(req->rq_transno == 0); - /* Don't hold error requests for replay. */ - spin_lock(&req->rq_lock); - req->rq_replay = 0; - spin_unlock(&req->rq_lock); - } + if (LUSTRE_IT(it)->it_status < 0 && req->rq_replay) + mdc_clear_replay_flag(req, LUSTRE_IT(it)->it_status); DEBUG_REQ(D_RPCTRACE, req, "disposition: %x, status: %d", LUSTRE_IT(it)->it_disposition, LUSTRE_IT(it)->it_status); @@ -472,24 +489,40 @@ int mdc_intent_lock(struct obd_export *exp, struct lustre_id *pid, id_group(cid)}}; struct lustre_handle lockh; ldlm_policy_data_t policy; - int mode = LCK_PR; + int mode; /* For the GETATTR case, ll_revalidate_it issues two separate queries - for LOOKUP and for UPDATE lock because it cannot check them together - we might have those two bits to be present in two separate granted locks */ policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ? - MDS_INODELOCK_UPDATE: MDS_INODELOCK_LOOKUP; + MDS_INODELOCK_UPDATE : MDS_INODELOCK_LOOKUP; mode = LCK_PR; rc = ldlm_lock_match(exp->exp_obd->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_IBITS, &policy, LCK_PR, &lockh); + LDLM_IBITS, &policy, mode, + &lockh); + + if (!rc) { + mode = LCK_CR; + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED, &res_id, + LDLM_IBITS, &policy, mode, + &lockh); + } if (!rc) { mode = LCK_PW; rc = ldlm_lock_match(exp->exp_obd->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, - LDLM_IBITS, &policy, LCK_PW, + LDLM_IBITS, &policy, mode, + &lockh); + } + if (!rc) { + mode = LCK_CW; + rc = ldlm_lock_match(exp->exp_obd->obd_namespace, + LDLM_FL_BLOCK_GRANTED, &res_id, + LDLM_IBITS, &policy, mode, &lockh); } if (rc) { @@ -543,16 +576,10 @@ int mdc_intent_lock(struct obd_export *exp, struct lustre_id *pid, * It's important that we do this first! Otherwise we might exit the * function without doing so, and try to replay a failed create (bug * 3440) */ - if (it->it_op & IT_OPEN) { - if (!it_disposition(it, DISP_OPEN_OPEN) || - LUSTRE_IT(it)->it_status != 0) { - unsigned long irqflags; - - spin_lock_irqsave(&request->rq_lock, irqflags); - request->rq_replay = 0; - spin_unlock_irqrestore(&request->rq_lock, irqflags); - } - } + if (it->it_op & IT_OPEN && request->rq_replay && + (!it_disposition(it, DISP_OPEN_OPEN) || LUSTRE_IT(it)->it_status != 0)) + mdc_clear_replay_flag(request, LUSTRE_IT(it)->it_status); + if (!it_disposition(it, DISP_IT_EXECD)) { /* The server failed before it even started executing the * intent, i.e. because it couldn't unpack the request. */ diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 6cf6e08..7a751af 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -335,81 +335,65 @@ int mdc_req2lustre_md(struct obd_export *exp_lmv, struct ptlrpc_request *req, struct lustre_md *md) { void *buf; + int rc = 0; int size, acl_off; struct posix_acl *acl; - int rc = 0; + struct lov_mds_md *lmm; ENTRY; - LASSERT(md); + LASSERT(md != NULL); memset(md, 0, sizeof(*md)); - md->body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*md->body)); - LASSERT (md->body != NULL); - LASSERT_REPSWABBED (req, offset); + md->body = lustre_msg_buf(req->rq_repmsg, offset, + sizeof(*md->body)); + if (!md->body) + RETURN(-ENOMEM); + + LASSERT_REPSWABBED(req, offset); if (!(md->body->valid & OBD_MD_FLEASIZE) && !(md->body->valid & OBD_MD_FLDIREA)) RETURN(0); - /* ea is presented in reply, parse it */ if (S_ISREG(md->body->mode)) { - int lmmsize; - struct lov_mds_md *lmm; - if (md->body->eadatasize == 0) { - CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n"); + CERROR("invalid EA size (0) is detected\n"); RETURN(-EPROTO); } - lmmsize = md->body->eadatasize; - lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, lmmsize); - LASSERT (lmm != NULL); - LASSERT_REPSWABBED (req, offset + 1); - - rc = obd_unpackmd(exp_lov, &md->lsm, lmm, lmmsize); - if (rc >= 0) { - LASSERT (rc >= sizeof (*md->lsm)); + + lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, + md->body->eadatasize); + if (!lmm) + RETURN(-EINVAL); + + LASSERT(exp_lov != NULL); + + rc = obd_unpackmd(exp_lov, &md->lsm, lmm, + md->body->eadatasize); + if (rc > 0) { + LASSERT(rc >= sizeof(*md->lsm)); rc = 0; } } else if (S_ISDIR(md->body->mode)) { - struct mea *mea; - int mdsize; - LASSERT(exp_lmv != NULL); - /* dir can be non-splitted */ if (md->body->eadatasize == 0) RETURN(0); - mdsize = md->body->eadatasize; - mea = lustre_msg_buf(req->rq_repmsg, offset + 1, mdsize); - LASSERT(mea != NULL); - - /* - * check mea for validness, as there is possible that old tests - * will try to set lov EA to dir object and thus confuse this - * stuff. - */ - if (mea->mea_magic != MEA_MAGIC_LAST_CHAR && - mea->mea_magic != MEA_MAGIC_ALL_CHARS) - GOTO(out_invalid_mea, rc = -EINVAL); - - if (mea->mea_count > 256 || mea->mea_master > 256 || - mea->mea_master > mea->mea_count) - GOTO(out_invalid_mea, rc = -EINVAL); - - LASSERT(id_fid(&mea->mea_ids[0])); - - rc = obd_unpackmd(exp_lmv, (void *)&md->mea, - (void *)mea, mdsize); - if (rc >= 0) { - LASSERT (rc >= sizeof (*md->mea)); - rc = 0; - } - - RETURN(rc); + lmm = lustre_msg_buf(req->rq_repmsg, offset + 1, + md->body->eadatasize); + if (!lmm) + RETURN(-EINVAL); - out_invalid_mea: - CERROR("Detected invalid mea, which does not " - "support neither old either new format.\n"); + if (md->body->valid & OBD_MD_MEA) { + LASSERT(exp_lmv != NULL); + + rc = obd_unpackmd(exp_lmv, (void *)&md->mea, + lmm, md->body->eadatasize); + if (rc > 0) { + LASSERT(rc >= sizeof(*md->mea)); + rc = 0; + } + } } else { LASSERT(S_ISCHR(md->body->mode) || S_ISBLK(md->body->mode) || @@ -442,7 +426,6 @@ int mdc_req2lustre_md(struct obd_export *exp_lmv, struct ptlrpc_request *req, md->acl_access = acl; } - RETURN(rc); } @@ -509,9 +492,9 @@ static void mdc_replay_open(struct ptlrpc_request *req) EXIT; } -int mdc_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct ptlrpc_request *open_req) +int mdc_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) { struct mdc_open_data *mod; struct mds_rec_create *rec; @@ -595,24 +578,25 @@ static void mdc_commit_close(struct ptlrpc_request *req) spin_unlock(&open_req->rq_lock); } -static int mdc_close_interpret(struct ptlrpc_request *req, void *data, int rc) +static int mdc_close_interpret(struct ptlrpc_request *req, + void *data, int rc) { union ptlrpc_async_args *aa = data; - struct mdc_rpc_lock *rpc_lock; + struct mdc_rpc_lock *close_lock; struct obd_device *obd = aa->pointer_arg[1]; unsigned long flags; spin_lock_irqsave(&req->rq_lock, flags); - rpc_lock = aa->pointer_arg[0]; + close_lock = aa->pointer_arg[0]; aa->pointer_arg[0] = NULL; spin_unlock_irqrestore (&req->rq_lock, flags); - if (rpc_lock == NULL) { - CERROR("called with NULL rpc_lock\n"); + if (close_lock == NULL) { + CERROR("called with NULL close_lock\n"); } else { - mdc_put_rpc_lock(rpc_lock, NULL); - LASSERTF(rpc_lock == obd->u.cli.cl_rpc_lock, "%p != %p\n", - rpc_lock, obd->u.cli.cl_rpc_lock); + mdc_put_rpc_lock(close_lock, NULL); + LASSERTF(close_lock == obd->u.cli.cl_close_lock, "%p != %p\n", + close_lock, obd->u.cli.cl_close_lock); } wake_up(&req->rq_reply_waitq); RETURN(rc); @@ -632,15 +616,12 @@ static int mdc_close_check_reply(struct ptlrpc_request *req) return rc; } -static int go_back_to_sleep(void *unused) -{ - return 0; -} - int mdc_close(struct obd_export *exp, struct obdo *oa, - struct obd_client_handle *och, struct ptlrpc_request **request) + struct obd_client_handle *och, + struct ptlrpc_request **request) { struct obd_device *obd = class_exp2obd(exp); + struct obd_import *imp = class_exp2cliimp(exp); int reqsize[3] = {0, sizeof(struct mds_body), obd->u.cli.cl_max_mds_cookiesize}; int rc, repsize[3] = {sizeof(struct mds_body), @@ -651,25 +632,30 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, struct l_wait_info lwi; ENTRY; + if (imp->imp_connection == NULL) { + CERROR("request on not connected import %s\n", + imp->imp_obd->obd_name); + RETURN(-EIO); + } + //reqsize[0] = mdc_get_secdesc_size(); + //mdc_pack_secdesc(req, reqsize[0]); req = ptlrpc_prep_req(class_exp2cliimp(exp), LUSTRE_MDS_VERSION, MDS_CLOSE, 3, reqsize, NULL); if (req == NULL) GOTO(out, rc = -ENOMEM); - //mdc_pack_secdesc(req, reqsize[0]); - /* Ensure that this close's handle is fixed up during replay. */ LASSERT(och != NULL); mod = och->och_mod; if (likely(mod != NULL)) { mod->mod_close_req = req; LASSERT(mod->mod_open_req->rq_type != LI_POISON); - DEBUG_REQ(D_HA, mod->mod_open_req, "matched open req %p", - mod->mod_open_req); + DEBUG_REQ(D_HA, mod->mod_open_req, "matched open"); } else { - CDEBUG(D_HA, "couldn't find open req; expecting close error\n"); + CDEBUG(D_HA, "couldn't find open req; " + "expecting close error\n"); } mdc_close_pack(req, 1, oa, oa->o_valid, och); @@ -682,13 +668,13 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, /* We hand a ref to the rpcd here, so we need another one of our own. */ ptlrpc_request_addref(req); - mdc_get_rpc_lock(obd->u.cli.cl_rpc_lock, NULL); + mdc_get_rpc_lock(obd->u.cli.cl_close_lock, NULL); req->rq_interpret_reply = mdc_close_interpret; - req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_rpc_lock; + req->rq_async_args.pointer_arg[0] = obd->u.cli.cl_close_lock; req->rq_async_args.pointer_arg[1] = obd; ptlrpcd_add_req(req); - lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), go_back_to_sleep, - NULL, NULL); + + lwi = LWI_TIMEOUT_INTR(MAX(req->rq_timeout * HZ, 1), NULL, NULL, NULL); rc = l_wait_event(req->rq_reply_waitq, mdc_close_check_reply(req), &lwi); if (req->rq_repmsg == NULL) { @@ -699,22 +685,25 @@ int mdc_close(struct obd_export *exp, struct obdo *oa, } else if (rc == 0) { rc = req->rq_repmsg->status; if (req->rq_repmsg->type == PTL_RPC_MSG_ERR) { - DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, err " - "= %d", rc); + DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR, " + "err = %d", rc); if (rc > 0) rc = -rc; - } else if (mod == NULL) { - CERROR("Unexpected: can't find mdc_open_data, but the " - "close succeeded. Please tell CFS.\n"); - } - if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body), - lustre_swab_mds_body)) { - CERROR("Error unpacking mds_body\n"); - rc = -EPROTO; + } else { + if (mod == NULL) + CERROR("Unexpected: can't find mdc_open_data, but " + "close succeeded. Please tell CFS.\n"); + if (!lustre_swab_repbuf(req, 0, sizeof(struct mds_body), + lustre_swab_mds_body)) + { + CERROR("Error unpacking mds_body\n"); + rc = -EPROTO; + } } } if (req->rq_async_args.pointer_arg[0] != NULL) { - CERROR("returned without dropping rpc_lock: rc %d\n", rc); + CERROR("returned without dropping close lock: rc %d, " + "dropping it now\n", rc); mdc_close_interpret(req, &req->rq_async_args, rc); } @@ -937,6 +926,14 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen, cli->cl_nllu = ((__u32 *) val)[0]; cli->cl_nllg = ((__u32 *) val)[1]; RETURN(0); + } else if (keylen == strlen("async") && memcmp(key, "async", keylen) == 0) { + struct client_obd *cl = &exp->exp_obd->u.cli; + if (vallen != sizeof(int)) + RETURN(-EINVAL); + cl->cl_async = *(int *)val; + CDEBUG(D_HA, "%s: set async = %d\n", + exp->exp_obd->obd_name, cl->cl_async); + RETURN(0); } RETURN(rc); @@ -1181,9 +1178,14 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf) GOTO(err_rpc_lock, rc = -ENOMEM); mdc_init_rpc_lock(cli->cl_setattr_lock); + OBD_ALLOC(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); + if (!cli->cl_close_lock) + GOTO(err_setattr_lock, rc = -ENOMEM); + mdc_init_rpc_lock(cli->cl_close_lock); + rc = client_obd_setup(obd, len, buf); if (rc) - GOTO(err_setattr_lock, rc); + GOTO(err_close_lock, rc); rc = obd_llog_init(obd, &obd->obd_llogs, obd, 0, NULL); if (rc) { @@ -1193,6 +1195,8 @@ static int mdc_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(rc); +err_close_lock: + OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); err_setattr_lock: OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); err_rpc_lock: @@ -1231,6 +1235,7 @@ static int mdc_cleanup(struct obd_device *obd, int flags) OBD_FREE(cli->cl_rpc_lock, sizeof (*cli->cl_rpc_lock)); OBD_FREE(cli->cl_setattr_lock, sizeof (*cli->cl_setattr_lock)); + OBD_FREE(cli->cl_close_lock, sizeof (*cli->cl_close_lock)); ptlrpcd_decref(); @@ -1495,6 +1500,8 @@ struct obd_ops mdc_obd_ops = { .o_connect = client_connect_import, .o_disconnect = client_disconnect_export, .o_iocontrol = mdc_iocontrol, + .o_packmd = mdc_packmd, + .o_unpackmd = mdc_unpackmd, .o_statfs = mdc_statfs, .o_pin = mdc_pin, .o_unpin = mdc_unpin, diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 2b2c223..ef0b9e6 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -159,6 +159,8 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, return rc; } +extern char *ldlm_lockname[]; + int mds_lock_mode_for_dir(struct obd_device *obd, struct dentry *dentry, int mode) { @@ -228,7 +230,8 @@ int mds_lock_mode_for_dir(struct obd_device *obd, } } } - return ret_mode; + + return ret_mode; } /* only valid locked dentries or errors should be returned */ @@ -298,7 +301,6 @@ struct dentry *mds_id2locked_dentry(struct obd_device *obd, struct lustre_id *id #define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED #endif - /* Look up an entry by inode number. This function ONLY returns valid dget'd * dentries with an initialized inode or errors */ struct dentry *mds_id2dentry(struct obd_device *obd, struct lustre_id *id, @@ -453,8 +455,8 @@ static int mds_init_export(struct obd_export *exp) static int mds_destroy_export(struct obd_export *export) { - struct mds_export_data *med; struct obd_device *obd = export->exp_obd; + struct mds_export_data *med; struct lvfs_run_ctxt saved; int rc = 0; ENTRY; @@ -473,6 +475,8 @@ static int mds_destroy_export(struct obd_export *export) struct list_head *tmp = med->med_open_head.next; struct mds_file_data *mfd = list_entry(tmp, struct mds_file_data, mfd_list); + struct lustre_id sid; + BDEVNAME_DECLARE_STORAGE(btmp); /* bug 1579: fix force-closing for 2.5 */ @@ -481,12 +485,22 @@ static int mds_destroy_export(struct obd_export *export) list_del(&mfd->mfd_list); spin_unlock(&med->med_open_lock); + down(&dentry->d_inode->i_sem); + rc = mds_read_inode_sid(obd, dentry->d_inode, &sid); + up(&dentry->d_inode->i_sem); + if (rc) { + CERROR("Can't read inode self id, inode %lu, " + "rc %d\n", dentry->d_inode->i_ino, rc); + memset(&sid, 0, sizeof(sid)); + } + /* If you change this message, be sure to update * replay_single:test_46 */ - CERROR("force closing client file handle for %*s (%s:%lu)\n", - dentry->d_name.len, dentry->d_name.name, + CERROR("force closing client file handle for %.*s (%s:" + DLID4")\n", dentry->d_name.len, dentry->d_name.name, ll_bdevname(dentry->d_inode->i_sb, btmp), - dentry->d_inode->i_ino); + OLID4(&sid)); + /* child inode->i_alloc_sem protects orphan_dec_test and * is_orphan race, mds_mfd_close drops it */ DOWN_WRITE_I_ALLOC_SEM(dentry->d_inode); @@ -643,8 +657,30 @@ int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, RETURN(0); } -int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, - int *size, int lock) +static int mds_convert_md(struct obd_device *obd, struct inode *inode, + void *md, int size, int mea) +{ + int rc = size; + + if (S_ISREG(inode->i_mode)) { + rc = mds_convert_lov_ea(obd, inode, md, size); + } else if (S_ISDIR(inode->i_mode)) { + if (mea) { + rc = mds_convert_mea_ea(obd, inode, md, size); + } else { + rc = mds_convert_lov_ea(obd, inode, md, size); + } + if (rc == -EINVAL) { + CERROR("Invalid EA format (nor LOV or MEA) " + "is detected. Inode %lu/%u\n", + inode->i_ino, inode->i_generation); + } + } + return rc; +} + +int mds_get_md(struct obd_device *obd, struct inode *inode, + void *md, int *size, int lock, int mea) { int lmm_size; int rc = 0; @@ -652,21 +688,16 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, if (lock) down(&inode->i_sem); - rc = fsfilt_get_md(obd, inode, md, *size); - if (lock) - up(&inode->i_sem); + rc = fsfilt_get_md(obd, inode, md, *size, + (mea ? EA_MEA : EA_LOV)); if (rc < 0) { CERROR("Error %d reading eadata for ino %lu\n", rc, inode->i_ino); } else if (rc > 0) { lmm_size = rc; - - if (S_ISREG(inode->i_mode)) - rc = mds_convert_lov_ea(obd, inode, md, lmm_size); - if (S_ISDIR(inode->i_mode)) - rc = mds_convert_mea_ea(obd, inode, md, lmm_size); - + rc = mds_convert_md(obd, inode, md, + lmm_size, mea); if (rc == 0) { *size = lmm_size; rc = lmm_size; @@ -674,15 +705,17 @@ int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, *size = rc; } } + if (lock) + up(&inode->i_sem); - RETURN (rc); + RETURN(rc); } /* Call with lock=1 if you want mds_pack_md to take the i_sem. * Call with lock=0 if the caller has already taken the i_sem. */ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset, - struct mds_body *body, struct inode *inode, int lock) + struct mds_body *body, struct inode *inode, int lock, int mea) { struct mds_obd *mds = &obd->u.mds; void *lmm; @@ -710,12 +743,16 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset, // RETURN(-EINVAL); } - rc = mds_get_md(obd, inode, lmm, &lmm_size, lock); + rc = mds_get_md(obd, inode, lmm, &lmm_size, lock, mea); if (rc > 0) { if (S_ISDIR(inode->i_mode)) body->valid |= OBD_MD_FLDIREA; else body->valid |= OBD_MD_FLEASIZE; + + if (mea) + body->valid |= OBD_MD_MEA; + body->eadatasize = lmm_size; rc = 0; } @@ -812,8 +849,9 @@ int mds_pack_acl(struct obd_device *obd, struct lustre_msg *repmsg, int offset, struct mds_body *body, struct inode *inode) { struct dentry de = { .d_inode = inode }; + __u32 buflen, *sizep; void *buf; - __u32 buflen, *sizep, size; + int size; ENTRY; if (!inode->i_op->getxattr) @@ -823,7 +861,7 @@ int mds_pack_acl(struct obd_device *obd, struct lustre_msg *repmsg, int offset, buf = lustre_msg_buf(repmsg, offset + 1, buflen); size = inode->i_op->getxattr(&de, XATTR_NAME_ACL_ACCESS, buf, buflen); - if (size == -ENODATA) + if (size == -ENODATA || size == -EOPNOTSUPP) RETURN(0); if (size < 0) RETURN(size); @@ -892,7 +930,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, if ((S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE)) || (S_ISDIR(inode->i_mode) && (reqbody->valid & OBD_MD_FLDIREA))) { rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, body, - inode, 1); + inode, 1, (reqbody->valid & OBD_MD_MEA) ? 1 : 0); /* if we have LOV EA data, the OST holds size, atime, mtime. */ if (!(body->valid & OBD_MD_FLEASIZE) && @@ -954,18 +992,20 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct dentry *de, int rc = 0, size[4] = {sizeof(*body)}, bufcount = 1; ENTRY; - body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); + body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); LASSERT(body != NULL); /* checked by caller */ LASSERT_REQSWABBED(req, offset); /* swabbed by caller */ if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) || (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) { int rc; + down(&inode->i_sem); - rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0); + rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0, + ((body->valid & OBD_MD_MEA) ? EA_MEA : EA_LOV)); up(&inode->i_sem); if (rc < 0) { - if (rc != -ENODATA) + if (rc != -ENODATA && rc != -EOPNOTSUPP) CERROR("error getting inode %lu MD: rc = %d\n", inode->i_ino, rc); size[bufcount] = 0; @@ -993,7 +1033,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct dentry *de, rc = inode->i_op->getxattr(de, ea_name, NULL, 0); if (rc < 0) { - if (rc != -ENODATA) + if (rc != -ENODATA && rc != -EOPNOTSUPP) CERROR("error getting inode %lu EA: rc = %d\n", inode->i_ino, rc); size[bufcount] = 0; @@ -1007,7 +1047,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct dentry *de, rc = inode->i_op->listxattr(de, NULL, 0); if (rc < 0) { - if (rc != -ENODATA) + if (rc != -ENODATA && rc != -EOPNOTSUPP) CERROR("error getting inode %lu EA: rc = %d\n", inode->i_ino, rc); size[bufcount] = 0; @@ -1192,7 +1232,7 @@ static int mds_getattr_lock(struct ptlrpc_request *req, int offset, cleanup_phase = 2; /* dchild, dparent, locks */ - /* + /* * let's make sure this name should leave on this mds * node. */ @@ -1318,6 +1358,8 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) int rc = 0; ENTRY; + MD_COUNTER_INCREMENT(obd, getattr); + rsd = lustre_swab_mds_secdesc(req, MDS_REQ_SECDESC_OFF); if (!rsd) { CERROR("Can't unpack security desc\n"); @@ -1331,8 +1373,6 @@ static int mds_getattr(struct ptlrpc_request *req, int offset) RETURN (-EFAULT); } - MD_COUNTER_INCREMENT(obd, getattr); - rc = mds_init_ucred(&uc, req, rsd); if (rc) { mds_exit_ucred(&uc); @@ -1384,6 +1424,10 @@ static int mds_statfs(struct ptlrpc_request *req) int rc, size = sizeof(struct obd_statfs); ENTRY; + /* This will trigger a watchdog timeout */ + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_STATFS_LCW_SLEEP, + (MDS_SERVICE_WATCHDOG_TIMEOUT / 1000) + 1); + rc = lustre_pack_reply(req, 1, &size, NULL); if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) { CERROR("mds: statfs lustre_pack_reply failed: rc = %d\n", rc); @@ -1414,7 +1458,8 @@ static int mds_sync(struct ptlrpc_request *req, int offset) int rc, size = sizeof(*body); ENTRY; - body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*body)); + body = lustre_swab_reqbuf(req, offset, sizeof(*body), + lustre_swab_mds_body); if (body == NULL) GOTO(out, rc = -EPROTO); @@ -2024,12 +2069,15 @@ repeat: mea->mea_count = 0; down(&new->d_inode->i_sem); - rc = fsfilt_set_md(obd, new->d_inode, handle, mea, mealen); + rc = fsfilt_set_md(obd, new->d_inode, handle, + mea, mealen, EA_MEA); up(&new->d_inode->i_sem); if (rc) - CERROR("fsfilt_set_md() failed, rc = %d\n", rc); + CERROR("fsfilt_set_md() failed, " + "rc = %d\n", rc); OBD_FREE(mea, mealen); + CDEBUG(D_OTHER, "%s: mark non-splittable %lu/%u - %d\n", obd->obd_name, new->d_inode->i_ino, new->d_inode->i_generation, flags); @@ -2280,8 +2328,6 @@ static int mds_msg_check_version(struct lustre_msg *msg) return rc; } -static char str[PTL_NALFMT_SIZE]; - int mds_handle(struct ptlrpc_request *req) { int should_process, fail = OBD_FAIL_MDS_ALL_REPLY_NET; @@ -2312,9 +2358,9 @@ int mds_handle(struct ptlrpc_request *req) int recovering; if (req->rq_export == NULL) { - CERROR("operation %d on unconnected MDS from NID %s\n", + CERROR("operation %d on unconnected MDS from %s\n", req->rq_reqmsg->opc, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); req->rq_status = -ENOTCONN; GOTO(out, rc = -ENOTCONN); } @@ -2325,7 +2371,8 @@ int mds_handle(struct ptlrpc_request *req) /* sanity check: if the xid matches, the request must * be marked as a resent or replayed */ - if (req->rq_xid == med->med_mcd->mcd_last_xid) { + if (req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_xid) || + req->rq_xid == le64_to_cpu(med->med_mcd->mcd_last_close_xid)) { LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) & (MSG_RESENT | MSG_REPLAY), "rq_xid "LPU64" matches last_xid, " @@ -2780,10 +2827,10 @@ int mds_read_inode_sid(struct obd_device *obd, struct inode *inode, LASSERT(obd != NULL); LASSERT(inode != NULL); - rc = fsfilt_get_sid(obd, inode, &id->li_fid, - sizeof(id->li_fid)); + rc = fsfilt_get_md(obd, inode, &id->li_fid, + sizeof(id->li_fid), EA_SID); if (rc < 0) { - CERROR("fsfilt_get_sid() failed, " + CERROR("fsfilt_get_md() failed, " "rc = %d\n", rc); RETURN(rc); } else if (!rc) { @@ -2807,10 +2854,10 @@ int mds_update_inode_sid(struct obd_device *obd, struct inode *inode, LASSERT(obd != NULL); LASSERT(inode != NULL); - rc = fsfilt_set_sid(obd, inode, handle, &id->li_fid, - sizeof(id->li_fid)); + rc = fsfilt_set_md(obd, inode, handle, &id->li_fid, + sizeof(id->li_fid), EA_SID); if (rc) { - CERROR("fsfilt_set_sid() failed, rc = %d\n", rc); + CERROR("fsfilt_set_md() failed, rc = %d\n", rc); RETURN(rc); } @@ -2832,10 +2879,9 @@ int mds_read_inode_mid(struct obd_device *obd, struct inode *inode, LASSERT(obd != NULL); LASSERT(inode != NULL); - rc = fsfilt_get_mid(obd, inode, id, sizeof(*id)); + rc = fsfilt_get_md(obd, inode, id, sizeof(*id), EA_MID); if (rc < 0) { - CERROR("fsfilt_get_mid() failed, " - "rc = %d\n", rc); + CERROR("fsfilt_get_md() failed, rc = %d\n", rc); RETURN(rc); } else if (!rc) { rc = -ENODATA; @@ -2862,9 +2908,11 @@ int mds_update_inode_mid(struct obd_device *obd, struct inode *inode, LASSERT(obd != NULL); LASSERT(inode != NULL); - rc = fsfilt_set_mid(obd, inode, handle, id, sizeof(*id)); + rc = fsfilt_set_md(obd, inode, handle, id, + sizeof(*id), EA_MID); if (rc) { - CERROR("fsfilt_set_mid() failed, rc = %d\n", rc); + CERROR("fsfilt_set_md() failed, " + "rc = %d\n", rc); RETURN(rc); } @@ -2876,6 +2924,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) { struct lustre_cfg* lcfg = buf; struct mds_obd *mds = &obd->u.mds; + struct lvfs_obd_ctxt *lvfs_ctxt = NULL; char *options = NULL; struct vfsmount *mnt; char ns_name[48]; @@ -2911,6 +2960,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) lcfg->lcfg_inlbuf4); /* we have to know mdsnum before touching underlying fs -bzzz */ + atomic_set(&mds->mds_open_count, 0); sema_init(&mds->mds_md_sem, 1); mds->mds_md_connected = 0; mds->mds_md_name = NULL; @@ -2950,15 +3000,19 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) } } - mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, lcfg->lcfg_inlbuf1, options); + rc = lvfs_mount_fs(lcfg->lcfg_inlbuf1, lcfg->lcfg_inlbuf2, + options, 0, &lvfs_ctxt); + free_page(page); - if (IS_ERR(mnt)) { - rc = PTR_ERR(mnt); - CERROR("do_kern_mount failed: rc = %d\n", rc); + if (rc || !lvfs_ctxt) { + CERROR("lvfs_mount_fs failed: rc = %d\n", rc); GOTO(err_ops, rc); } + mnt = lvfs_ctxt->loc_mnt; + mds->mds_lvfs_ctxt = lvfs_ctxt; + CDEBUG(D_SUPER, "%s: mnt = %p\n", lcfg->lcfg_inlbuf1, mnt); sema_init(&mds->mds_epoch_sem, 1); @@ -3037,7 +3091,7 @@ err_ns: obd->obd_namespace = NULL; err_put: unlock_kernel(); - mntput(mds->mds_vfsmnt); + lvfs_umount_fs(mds->mds_lvfs_ctxt); mds->mds_sb = 0; lock_kernel(); err_ops: @@ -3235,15 +3289,10 @@ static int mds_cleanup(struct obd_device *obd, int flags) unlock_kernel(); - /* - * 2 seems normal on mds, (may_umount() also expects 2 fwiw), but we - * only see 1 at this point in obdfilter. - */ - if (atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count) > 2) - CERROR("%s: mount busy, mnt_count %d != 2\n", obd->obd_name, - atomic_read(&obd->u.mds.mds_vfsmnt->mnt_count)); + /* 2 seems normal on mds, (may_umount() also expects 2 + fwiw), but we only see 1 at this point in obdfilter. */ + lvfs_umount_fs(mds->mds_lvfs_ctxt); - mntput(mds->mds_vfsmnt); mds->mds_sb = 0; ldlm_namespace_free(obd->obd_namespace, flags & OBD_OPT_FORCE); @@ -3256,7 +3305,6 @@ static int mds_cleanup(struct obd_device *obd, int flags) spin_unlock_bh(&obd->obd_processing_task_lock); lock_kernel(); - dev_clear_rdonly(2); fsfilt_put_ops(obd->obd_fsops); #ifdef ENABLE_GSS @@ -3356,7 +3404,12 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, /* If the xid matches, then we know this is a resent request, * and allow it. (It's probably an OPEN, for which we don't * send a lock */ - if (req->rq_xid == exp->exp_mds_data.med_mcd->mcd_last_xid) + if (req->rq_xid == + le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_xid)) + return; + + if (req->rq_xid == + le64_to_cpu(exp->exp_mds_data.med_mcd->mcd_last_close_xid)) return; /* This remote handle isn't enqueued, so we never received or @@ -3443,23 +3496,32 @@ static int mds_intent_policy(struct ldlm_namespace *ns, switch ((long)it->opc) { case IT_OPEN: case IT_CREAT|IT_OPEN: + fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF, + lock, NULL, lockh); /* XXX swab here to assert that an mds_open reint * packet is following */ fixup_handle_for_resent_req(req, MDS_REQ_INTENT_LOCKREQ_OFF, lock, NULL, lockh); rep->lock_policy_res2 = mds_reint(req, offset, lockh); -#if 0 - /* We abort the lock if the lookup was negative and - * we did not make it to the OPEN portion */ - if (!intent_disposition(rep, DISP_LOOKUP_EXECD)) + + if (rep->lock_policy_res2) { + /* + * mds_open() returns ENOLCK where it should return + * zero, but it has no lock to return. + */ + if (rep->lock_policy_res2 == ENOLCK) + rep->lock_policy_res2 = 0; + RETURN(ELDLM_LOCK_ABORTED); - if (intent_disposition(rep, DISP_LOOKUP_NEG) && - !intent_disposition(rep, DISP_OPEN_OPEN)) -#endif - /* IT_OPEN may return lock on cross-node dentry - * that we want to hold during attr retrival -bzzz */ - if (rc != 0 || lockh[0].cookie == 0) + } + + /* + * IT_OPEN may return lock on cross-node dentry that we want to + * hold during attr retrival -bzzz + */ + if (lockh[0].cookie == 0) RETURN(ELDLM_LOCK_ABORTED); + break; case IT_LOOKUP: getattr_part = MDS_INODELOCK_LOOKUP; @@ -3512,7 +3574,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, LASSERTF(new_lock != NULL, "op "LPX64" lockh "LPX64"\n", it->opc, lockh[0].cookie); - + /* If we've already given this lock to a client once, then we should * have no readers or writers. Otherwise, we should have one reader * _or_ writer ref (which will be zeroed below) before returning the @@ -3599,6 +3661,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, + MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, "mds", obd->obd_proc_entry); if (!mds->mds_service) { @@ -3614,6 +3677,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_setattr_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL, + MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, "mds_setattr", obd->obd_proc_entry); if (!mds->mds_setattr_service) { @@ -3629,6 +3693,7 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_readpage_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL, + MDS_SERVICE_WATCHDOG_TIMEOUT, mds_handle, "mds_readpage", obd->obd_proc_entry); if (!mds->mds_readpage_service) { diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 085c840..289b9ad 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -96,6 +96,17 @@ static int lprocfs_mds_wr_config_update(struct file *file, const char *buffer, RETURN(mds_dt_update_config(obd, 0)); } +static int lprocfs_rd_filesopen(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + LASSERT(obd != NULL); + *eof = 1; + + return snprintf(page, count, "%d\n", + atomic_read(&obd->u.mds.mds_open_count)); +} + static int lprocfs_rd_last_fid(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -131,6 +142,7 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "fstype", lprocfs_rd_fstype, 0, 0 }, { "filestotal", lprocfs_rd_filestotal, 0, 0 }, { "filesfree", lprocfs_rd_filesfree, 0, 0 }, + { "filesopen", lprocfs_rd_filesopen, 0, 0 }, { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, { "last_fid", lprocfs_rd_last_fid, 0, 0 }, { "group", lprocfs_rd_group, 0, 0 }, diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index b52bf4a..20c0145 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -46,7 +46,6 @@ /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ #define MDS_MAX_CLIENTS (PAGE_SIZE * 8) -#define MDS_MAX_CLIENT_WORDS (MDS_MAX_CLIENTS / sizeof(unsigned long)) #define LAST_RCVD "last_rcvd" #define LOV_OBJID "lov_objid" @@ -179,13 +178,13 @@ int mds_client_free(struct obd_export *exp, int clear_client) free_and_out: OBD_FREE(med->med_mcd, sizeof(*med->med_mcd)); + med->med_mcd = NULL; return 0; } static int mds_server_free_data(struct mds_obd *mds) { - OBD_FREE(mds->mds_client_bitmap, - MDS_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_FREE(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8); OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data)); mds->mds_server_data = NULL; @@ -255,8 +254,7 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file) if (!msd) RETURN(-ENOMEM); - OBD_ALLOC_WAIT(mds->mds_client_bitmap, - MDS_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_ALLOC_WAIT(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8); if (!mds->mds_client_bitmap) { OBD_FREE(msd, sizeof(*msd)); RETURN(-ENOMEM); @@ -357,7 +355,10 @@ static int mds_read_last_rcvd(struct obd_device *obd, struct file *file) continue; } - last_transno = le64_to_cpu(mcd->mcd_last_transno); + last_transno = le64_to_cpu(mcd->mcd_last_transno) > + le64_to_cpu(mcd->mcd_last_close_transno) ? + le64_to_cpu(mcd->mcd_last_transno) : + le64_to_cpu(mcd->mcd_last_close_transno); /* These exports are cleaned up by mds_disconnect(), so they * need to be set up like real exports as mds_connect() does. @@ -673,8 +674,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) GOTO(err_pop, rc); } mds->mds_id_de = dentry; - - if (!dentry->d_inode) { + if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) { rc = -ENOENT; CERROR("__iopen__ directory has no inode? rc = %d\n", rc); GOTO(err_id_de, rc); @@ -1070,7 +1070,7 @@ int mds_obd_destroy(struct obd_export *exp, struct obdo *oa, oa->o_id, oa->o_generation, rc); err = fsfilt_commit(obd, mds->mds_sb, mds->mds_objects_dir->d_inode, - handle, 0); + handle, exp->exp_sync); if (err && !rc) rc = err; out_dput: diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index d68b78e..98271df 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -7,6 +7,8 @@ #include +#define MDS_SERVICE_WATCHDOG_TIMEOUT 30000 + #define MAX_ATIME_DIFF 60 struct mds_filter_data { @@ -209,10 +211,10 @@ void mds_set_last_fid(struct obd_device *obd, __u64 fid); #ifdef __KERNEL__ int mds_get_md(struct obd_device *, struct inode *, void *md, - int *size, int lock); + int *size, int lock, int mea); int mds_pack_md(struct obd_device *, struct lustre_msg *, int offset, - struct mds_body *, struct inode *, int lock); + struct mds_body *, struct inode *, int lock, int mea); int mds_pack_link(struct dentry *dentry, struct ptlrpc_request *req, struct mds_body *repbody, int reply_off); int mds_pack_ea(struct dentry *dentry, struct ptlrpc_request *req, diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index 6706841..cdc1425 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -432,7 +432,7 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset, rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), lustre_swab_mds_rec_create); if (rec == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); r->ur_id1 = &rec->cr_id; r->ur_id2 = &rec->cr_replayid; @@ -451,7 +451,7 @@ static int mds_open_unpack(struct ptlrpc_request *req, int offset, if (req->rq_reqmsg->bufcount > offset + 2) { r->ur_eadata = lustre_msg_buf(req->rq_reqmsg, offset + 2, 0); if (r->ur_eadata == NULL) - RETURN (-EFAULT); + RETURN(-EFAULT); r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 2]; } RETURN(0); @@ -501,6 +501,10 @@ int mds_update_unpack(struct ptlrpc_request *req, int offset, rec->ur_opcode = opcode; rc = mds_unpackers[opcode](req, offset, rec); + +#if CRAY_PORTALS + rec->ur_fsuid = req->rq_uid; +#endif RETURN(rc); } @@ -786,6 +790,11 @@ int mds_init_ucred(struct lvfs_ucred *ucred, ucred->luc_ginfo = NULL; ucred->luc_lsd = lsd = mds_get_lsd(rsd->rsd_uid); +#if CRAY_PORTALS + ucred->luc_fsuid = req->rq_uid; +#else + ucred->luc_fsuid = rsd->rsd_fsuid; +#endif if (lsd) { if (req->rq_remote) { /* record the gid mapping here */ @@ -850,7 +859,6 @@ int mds_init_ucred(struct lvfs_ucred *ucred, rsd->rsd_cap &= ~CAP_FS_MASK; /* by now every fields in rsd have been granted */ - ucred->luc_fsuid = rsd->rsd_fsuid; ucred->luc_fsgid = rsd->rsd_fsgid; ucred->luc_cap = rsd->rsd_cap; ucred->luc_uid = rsd->rsd_uid; diff --git a/lustre/mds/mds_lmv.c b/lustre/mds/mds_lmv.c index 6685b2b..23d18d5 100644 --- a/lustre/mds/mds_lmv.c +++ b/lustre/mds/mds_lmv.c @@ -226,7 +226,7 @@ int mds_md_get_attr(struct obd_device *obd, struct inode *inode, if (*mea_size < 0 || *mea == NULL) return *mea_size < 0 ? *mea_size : -EINVAL; - rc = mds_get_md(obd, inode, *mea, mea_size, 1); + rc = mds_get_md(obd, inode, *mea, mea_size, 1, 1); if (rc <= 0) { OBD_FREE(*mea, *mea_size); @@ -710,7 +710,7 @@ int mds_try_to_split_dir(struct obd_device *obd, struct dentry *dentry, GOTO(err_oa, rc = PTR_ERR(handle)); } - rc = fsfilt_set_md(obd, dir, handle, *mea, mea_size); + rc = fsfilt_set_md(obd, dir, handle, *mea, mea_size, EA_MEA); if (rc) { up(&dir->i_sem); CERROR("fsfilt_set_md() failed, error %d.\n", rc); @@ -1168,12 +1168,12 @@ int mds_lock_and_check_slave(int offset, struct ptlrpc_request *req, } cleanup_phase = 1; - /* - * handling the case when remote MDS checks if dir is empty before - * rename. But it also does it for all entries, because inode is stored - * here and remote MDS does not know if rename point to dir or to reg - * file. So we check it here. - */ + /* + * handling the case when remote MDS checks if dir is empty + * before rename. But it also does it for all entries, because + * inode is stored here and remote MDS does not know if rename + * point to dir or to reg file. So we check it here. + */ if (!S_ISDIR(dentry->d_inode->i_mode)) GOTO(cleanup, rc = 0); @@ -1202,49 +1202,41 @@ cleanup: } int mds_convert_mea_ea(struct obd_device *obd, struct inode *inode, - struct lov_mds_md *lmm, int lmmsize) + struct lov_mds_md *lmm, int lmm_size) { - int i, rc, err, size; + struct lov_stripe_md *lsm = NULL; struct mea_old *old; struct mea *mea; - struct mea *new; void *handle; + int rc, err; ENTRY; - mea = (struct mea *) lmm; + mea = (struct mea *)lmm; + old = (struct mea_old *)lmm; + if (mea->mea_magic == MEA_MAGIC_LAST_CHAR || - mea->mea_magic == MEA_MAGIC_ALL_CHARS) + mea->mea_magic == MEA_MAGIC_ALL_CHARS) RETURN(0); - old = (struct mea_old *) lmm; - - rc = sizeof(struct lustre_id) * old->mea_count + - sizeof(struct mea_old); - - if (old->mea_count > 256 || old->mea_master > 256 || lmmsize < rc - || old->mea_master > old->mea_count) { - CWARN("unknown MEA format, dont convert it\n"); - CWARN(" count %u, master %u, size %u\n", - old->mea_count, old->mea_master, rc); - RETURN(0); - } - - CWARN("converting MEA EA on %lu/%u from V0 to V1 (%u/%u)\n", - inode->i_ino, inode->i_generation, old->mea_count, - old->mea_master); + /* + * making MDS try LOV EA converting in the non-LMV configuration + * cases. + */ + if (!obd->u.mds.mds_md_exp) + RETURN(-EINVAL); - size = sizeof(struct lustre_id) * old->mea_count + - sizeof(struct mea); - - OBD_ALLOC(new, size); - if (new == NULL) - RETURN(-ENOMEM); + CDEBUG(D_INODE, "converting MEA EA on %lu/%u from V0 to V1 (%u/%u)\n", + inode->i_ino, inode->i_generation, old->mea_count, + old->mea_master); - new->mea_magic = MEA_MAGIC_LAST_CHAR; - new->mea_count = old->mea_count; - new->mea_master = old->mea_master; - for (i = 0; i < new->mea_count; i++) - new->mea_ids[i] = old->mea_ids[i]; + rc = obd_unpackmd(obd->u.mds.mds_md_exp, &lsm, lmm, lmm_size); + if (rc < 0) + GOTO(conv_end, rc); + + rc = obd_packmd(obd->u.mds.mds_md_exp, &lmm, lsm); + if (rc < 0) + GOTO(conv_free, rc); + lmm_size = rc; handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) { @@ -1252,17 +1244,14 @@ int mds_convert_mea_ea(struct obd_device *obd, struct inode *inode, GOTO(conv_free, rc); } - rc = fsfilt_set_md(obd, inode, handle, (struct lov_mds_md *) new, size); - if (rc > lmmsize) - size = lmmsize; - memcpy(lmm, new, size); - + rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_MEA); err = fsfilt_commit(obd, obd->u.mds.mds_sb, inode, handle, 0); if (!rc) - rc = err ? err : size; - EXIT; + rc = err ? err : lmm_size; + GOTO(conv_free, rc); conv_free: - OBD_FREE(new, size); + obd_free_memmd(obd->u.mds.mds_md_exp, &lsm); +conv_end: return rc; } diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index bd3ed48..13d262e 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -363,6 +363,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, int rc = 0; ENTRY; + CDEBUG(D_INFO, "ioctl cmd %x\n", cmd); switch (cmd) { case OBD_IOC_RECORD: { char *name = data->ioc_inlbuf1; @@ -559,6 +560,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, RETURN(0); } default: + CDEBUG(D_INFO, "unknown command %x\n", cmd); RETURN(-EINVAL); } RETURN(0); @@ -665,9 +667,10 @@ int mds_dt_synchronize(void *data) } rc = 0; + EXIT; cleanup: up(&mds->mds_orphan_recovery_sem); - RETURN(rc); + return rc; } int mds_dt_start_synchronize(struct obd_device *obd, @@ -847,8 +850,7 @@ int mds_convert_lov_ea(struct obd_device *obd, struct inode *inode, GOTO(conv_free, rc); } - rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size); - + rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_LOV); err = fsfilt_commit(obd, obd->u.mds.mds_sb, inode, handle, 0); if (!rc) rc = err ? err : lmm_size; @@ -933,7 +935,7 @@ int mds_revalidate_lov_ea(struct obd_device *obd, struct inode *inode, GOTO(out_oa, rc); } - rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size); + rc = fsfilt_set_md(obd, inode, handle, lmm, lmm_size, EA_LOV); err = fsfilt_commit(obd, inode->i_sb, inode, handle, 0); if (!rc) rc = err; diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 8b66569..6948dd4 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -366,7 +366,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, LASSERT(lmm_buf); LASSERT(lmm_bufsize >= lmm_size); memcpy(lmm_buf, lmm, lmm_size); - rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size); + rc = fsfilt_set_md(obd, inode, *handle, lmm, + lmm_size, EA_LOV); if (rc) CERROR("open replay failed to set md:%d\n", rc); RETURN(0); @@ -400,9 +401,20 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, if (rc) GOTO(out_oa, rc); } else { - /* Per-directory striping default code removed, because - * it uses the same unnamed EA storage as the directory - * striping for CMD. -p */ + OBD_ALLOC(lmm, mds->mds_max_mdsize); + if (lmm == NULL) + GOTO(out_oa, rc = -ENOMEM); + + lmm_size = mds->mds_max_mdsize; + rc = mds_get_md(obd, dchild->d_parent->d_inode, + lmm, &lmm_size, 1, 0); + if (rc > 0) + rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, + mds->mds_dt_exp, + 0, &lsm, lmm); + OBD_FREE(lmm, mds->mds_max_mdsize); + if (rc) + GOTO(out_oa, rc); } LASSERT(oa->o_gr >= FILTER_GROUP_FIRST_MDS); rc = obd_create(mds->mds_dt_exp, oa, &lsm, &oti); @@ -465,7 +477,9 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, GOTO(out_ids, rc); } - rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size); + rc = fsfilt_set_md(obd, inode, *handle, lmm, + lmm_size, EA_LOV); + lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0); lmm_bufsize = req->rq_repmsg->buflens[offset]; LASSERT(lmm_buf); @@ -481,6 +495,8 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, OBD_FREE(*ids, mds->mds_dt_desc.ld_tgt_count * sizeof(**ids)); *ids = NULL; } + if(lsm) + obd_free_memmd(mds->mds_dt_exp, &lsm); RETURN(rc); } @@ -538,7 +554,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset, mds_pack_inode2body(obd, body, dchild->d_inode, 1); if (S_ISREG(dchild->d_inode->i_mode)) { rc = mds_pack_md(obd, req->rq_repmsg, 2, body, - dchild->d_inode, 1); + dchild->d_inode, 1, 0); if (rc) LASSERT(rc == req->rq_status); @@ -622,7 +638,7 @@ static int accmode(int flags) /* Handles object creation, actual opening, and I/O epoch */ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, struct mds_body *body, int flags, void **handle, - struct mds_update_record *rec,struct ldlm_reply *rep) + struct mds_update_record *rec, struct ldlm_reply *rep) { struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; @@ -638,7 +654,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, if ((S_ISREG(mode) && !(body->valid & OBD_MD_FLEASIZE)) || (S_ISDIR(mode) && !(body->valid & OBD_MD_FLDIREA))) { rc = mds_pack_md(obd, req->rq_repmsg, 2, body, - dchild->d_inode, 0); + dchild->d_inode, 0, 0); if (rc) { up(&dchild->d_inode->i_sem); RETURN(rc); @@ -667,7 +683,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, req->rq_repmsg, 2); if (!rc) rc = mds_pack_md(obd, req->rq_repmsg, 2, body, - dchild->d_inode, 0); + dchild->d_inode, 0, 0); if (rc) { up(&dchild->d_inode->i_sem); RETURN(rc); @@ -720,9 +736,7 @@ static int mds_open_by_id(struct ptlrpc_request *req, ENTRY; down(&pending_dir->i_sem); - idlen = ll_id2str(idname, id_ino(id), id_gen(id)); - dchild = lookup_one_len(idname, mds->mds_pending_dir, idlen); if (IS_ERR(dchild)) { @@ -734,7 +748,6 @@ static int mds_open_by_id(struct ptlrpc_request *req, } if (dchild->d_inode != NULL) { - up(&pending_dir->i_sem); mds_inode_set_orphan(dchild->d_inode); mds_pack_inode2body(req2obd(req), body, dchild->d_inode, 1); @@ -745,8 +758,7 @@ static int mds_open_by_id(struct ptlrpc_request *req, idname); goto open; } - dput(dchild); - up(&pending_dir->i_sem); + l_dput(dchild); /* * we didn't find it in PENDING so it isn't an orphan. See if it was a @@ -767,7 +779,6 @@ static int mds_open_by_id(struct ptlrpc_request *req, rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, req, rc, rep ? rep->lock_policy_res1 : 0); /* XXX what do we do here if mds_finish_transno itself failed? */ - l_dput(dchild); RETURN(rc); } @@ -808,6 +819,7 @@ int mds_lock_new_child(struct obd_device *obd, struct inode *inode, struct lustre_handle lockh; int lock_flags = 0; int rc; + ENTRY; if (child_lockh == NULL) child_lockh = &lockh; @@ -856,12 +868,21 @@ int mds_open(struct mds_update_record *rec, int offset, struct dentry_params dp; struct mea *mea = NULL; int mea_size, update_mode; + int child_mode = LCK_PR; + /* Always returning LOOKUP lock if open succesful to guard + dentry on client. */ + ldlm_policy_data_t policy = {.l_inodebits={MDS_INODELOCK_LOOKUP}}; + struct ldlm_res_id child_res_id = { .name = {0}}; + int lock_flags = 0; ENTRY; DEBUG_REQ(D_INODE, req, "parent "DLID4" name %*s mode %o", OLID4(rec->ur_id1), rec->ur_namelen - 1, rec->ur_name, rec->ur_mode); + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_OPEN | OBD_FAIL_ONCE, + (obd_timeout + 1) / 4); + if (offset == 3) { /* intent */ rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); @@ -909,7 +930,7 @@ int mds_open(struct mds_update_record *rec, int offset, acc_mode = accmode(rec->ur_flags); /* Step 1: Find and lock the parent */ - if (rec->ur_flags & O_CREAT) { + if (rec->ur_flags & MDS_OPEN_CREAT) { /* XXX Well, in fact we only need this lock mode change if in addition to O_CREAT, the file does not exist. But we do not know if it exists or not yet */ @@ -1129,7 +1150,8 @@ got_child: if (!(rec->ur_flags & O_EXCL)) { /* bug 3313 */ rc = fsfilt_commit(obd, dchild->d_inode->i_sb, - dchild->d_inode, handle, 0); + dchild->d_inode, handle, + req->rq_export->exp_sync); handle = NULL; } @@ -1138,7 +1160,7 @@ got_child: mds_pack_inode2body(obd, body, dchild->d_inode, 1); LASSERTF(!mds_inode_is_orphan(dchild->d_inode), - "dchild %*s (%p) inode %p\n", dchild->d_name.len, + "dchild %.*s (%p) inode %p\n", dchild->d_name.len, dchild->d_name.name, dchild, dchild->d_inode); if (S_ISREG(dchild->d_inode->i_mode)) { @@ -1166,14 +1188,6 @@ got_child: GOTO(cleanup, rc = -EEXIST); // returns a lock to the client } - /* if we are following a symlink, don't open */ - if (S_ISLNK(dchild->d_inode->i_mode)) - GOTO(cleanup, rc = 0); - - if ((rec->ur_flags & MDS_OPEN_DIRECTORY) && - !S_ISDIR(dchild->d_inode->i_mode)) - GOTO(cleanup, rc = -ENOTDIR); - if (S_ISDIR(dchild->d_inode->i_mode)) { if (rec->ur_flags & MDS_OPEN_CREAT || rec->ur_flags & FMODE_WRITE) { @@ -1191,21 +1205,81 @@ got_child: } } + /* if we are following a symlink, don't open */ + if (S_ISLNK(dchild->d_inode->i_mode)) + GOTO(cleanup_no_trans, rc = 0); + + if ((rec->ur_flags & MDS_OPEN_DIRECTORY) && + !S_ISDIR(dchild->d_inode->i_mode)) + GOTO(cleanup, rc = -ENOTDIR); + + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_CREATE)) { + obd_fail_loc = OBD_FAIL_LDLM_REPLY | OBD_FAIL_ONCE; + GOTO(cleanup, rc = -EAGAIN); + } + + /* Obtain OPEN lock as well */ + policy.l_inodebits.bits |= MDS_INODELOCK_OPEN; + + /* We cannot use acc_mode here, because it is zeroed in case of + creating a file, so we get wrong lockmode */ + if (accmode(rec->ur_flags) & MAY_WRITE) + child_mode = LCK_CW; + else if (accmode(rec->ur_flags) & MAY_EXEC) + child_mode = LCK_PR; + else + child_mode = LCK_CR; + + if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)) { + struct lustre_id sid; + + down(&dchild->d_inode->i_sem); + rc = mds_read_inode_sid(obd, dchild->d_inode, &sid); + up(&dchild->d_inode->i_sem); + if (rc) { + CERROR("Can't read inode self id, " + "inode %lu, rc %d\n", + dchild->d_inode->i_ino, rc); + GOTO(cleanup, rc); + } + + /* In case of replay we do not get a lock assuming that the + caller has it already */ + child_res_id.name[0] = id_fid(&sid); + child_res_id.name[1] = id_group(&sid); + + rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, + child_res_id, LDLM_IBITS, &policy, + child_mode, &lock_flags, + mds_blocking_ast, ldlm_completion_ast, + NULL, NULL, NULL, 0, NULL, child_lockh); + if (rc != ELDLM_OK) + GOTO(cleanup, rc); + + cleanup_phase = 3; + } + /* Step 5: mds_open it */ rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec, rep); GOTO(cleanup, rc); - cleanup: +cleanup: rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, req, rc, rep ? rep->lock_policy_res1 : 0); +cleanup_no_trans: switch (cleanup_phase) { + case 3: + if (rc) { + ldlm_lock_decref(child_lockh, child_mode); + child_lockh->cookie = 0; + } case 2: if (rc && created) { int err = vfs_unlink(dparent->d_inode, dchild); if (err) { - CERROR("unlink(%*s) in error path: %d\n", + CERROR("unlink(%.*s) in error path: %d\n", dchild->d_name.len, dchild->d_name.name, err); } @@ -1229,6 +1303,18 @@ got_child: } if (mea) OBD_FREE(mea, mea_size); + if (rc == 0) + atomic_inc(&mds->mds_open_count); + + /* + * If we have not taken the "open" lock, we may not return 0 here, + * because caller expects 0 to mean "lock is taken", and it needs + * nonzero return here for caller to return EDLM_LOCK_ABORTED to + * client. Later caller should rewrite the return value back to zero + * if it to be used any further. + */ + if ((cleanup_phase != 3) && !rc) + rc = ENOLCK; RETURN(rc); } @@ -1313,6 +1399,14 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset, LASSERT(pending_child->d_inode != NULL); cleanup_phase = 2; /* dput(pending_child) when finished */ + if (S_ISDIR(pending_child->d_inode->i_mode)) { + rc = vfs_rmdir(pending_dir, pending_child); + if (rc) + CERROR("error unlinking orphan dir %s: rc %d\n", + idname, rc); + goto out; + } + if (req != NULL && req->rq_repmsg != NULL) { lmm = lustre_msg_buf(req->rq_repmsg, 1, 0); stripe_count = le32_to_cpu(lmm->lmm_stripe_count); @@ -1329,10 +1423,7 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset, pending_child->d_fsdata = (void *) &dp; dp.p_inum = 0; dp.p_ptr = req; - if (S_ISDIR(pending_child->d_inode->i_mode)) - rc = vfs_rmdir(pending_dir, pending_child); - else - rc = vfs_unlink(pending_dir, pending_child); + rc = vfs_unlink(pending_dir, pending_child); if (rc) CERROR("error unlinking orphan %s: rc %d\n", idname, rc); @@ -1410,10 +1501,17 @@ out: mds_mfd_destroy(mfd); cleanup: + atomic_dec(&mds->mds_open_count); if (req != NULL && reply_body != NULL) { rc = mds_finish_transno(mds, pending_dir, handle, req, rc, 0); } else if (handle) { - int err = fsfilt_commit(obd, mds->mds_sb, pending_dir, handle, 0); + int err, force_sync = 0; + + if (req && req->rq_export) + force_sync = req->rq_export->exp_sync; + + err = fsfilt_commit(obd, mds->mds_sb, pending_dir, handle, + force_sync); if (err) { CERROR("error committing close: %d\n", err); if (!rc) @@ -1461,7 +1559,6 @@ int mds_close(struct ptlrpc_request *req, int offset) req->rq_repmsg->buflens[2]); } - body = lustre_swab_reqbuf(req, offset, sizeof(*body), lustre_swab_mds_body); if (body == NULL) { @@ -1496,7 +1593,7 @@ int mds_close(struct ptlrpc_request *req, int offset) (body->valid & OBD_MD_FID) ? 1 : 0); mds_pack_md(obd, req->rq_repmsg, 1, rep_body, - inode, MDS_PACK_MD_LOCK); + inode, MDS_PACK_MD_LOCK, 0); } spin_lock(&med->med_open_lock); list_del(&mfd->mfd_list); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index a4e7a9b..f9637b7 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -140,10 +140,17 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, spin_unlock(&mds->mds_transno_lock); } req->rq_repmsg->transno = req->rq_transno = transno; - mcd->mcd_last_transno = cpu_to_le64(transno); - mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_result = cpu_to_le32(rc); - mcd->mcd_last_data = cpu_to_le32(op_data); + if (req->rq_reqmsg->opc == MDS_CLOSE) { + mcd->mcd_last_close_transno = cpu_to_le64(transno); + mcd->mcd_last_close_xid = cpu_to_le64(req->rq_xid); + mcd->mcd_last_close_result = cpu_to_le32(rc); + mcd->mcd_last_close_data = cpu_to_le32(op_data); + } else { + mcd->mcd_last_transno = cpu_to_le64(transno); + mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); + mcd->mcd_last_result = cpu_to_le32(rc); + mcd->mcd_last_data = cpu_to_le32(op_data); + } fsfilt_add_journal_cb(obd, mds->mds_sb, transno, handle, mds_commit_last_transno_cb, NULL); @@ -178,7 +185,8 @@ int mds_finish_transno(struct mds_obd *mds, struct inode *inode, void *handle, EXIT; out_commit: - err = fsfilt_commit(obd, mds->mds_sb, inode, handle, 0); + err = fsfilt_commit(obd, mds->mds_sb, inode, handle, + req->rq_export->exp_sync); if (err) { CERROR("error committing transaction: %d\n", err); if (!rc) @@ -341,10 +349,17 @@ void mds_steal_ack_locks(struct ptlrpc_request *req) void mds_req_from_mcd(struct ptlrpc_request *req, struct mds_client_data *mcd) { - DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d", - mcd->mcd_last_transno, mcd->mcd_last_result); - req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_transno; - req->rq_repmsg->status = req->rq_status = mcd->mcd_last_result; + if (req->rq_reqmsg->opc == MDS_CLOSE) { + DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d", + mcd->mcd_last_close_transno, mcd->mcd_last_close_result); + req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_close_transno; + req->rq_repmsg->status = req->rq_status = mcd->mcd_last_close_result; + } else { + DEBUG_REQ(D_HA, req, "restoring transno "LPD64"/status %d", + mcd->mcd_last_transno, mcd->mcd_last_result); + req->rq_repmsg->transno = req->rq_transno = mcd->mcd_last_transno; + req->rq_repmsg->status = req->rq_status = mcd->mcd_last_result; + } mds_steal_ack_locks(req); } @@ -466,24 +481,33 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, rc = inode->i_op->removexattr(de, rec->ur_eadata); } else if ((S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) { - struct lov_stripe_md *lsm = NULL; - + S_ISDIR(inode->i_mode)) && rec->ur_eadata != NULL) { + struct lov_stripe_md *lsm = NULL; + struct lov_user_md *lum = NULL; + rc = ll_permission(inode, MAY_WRITE, NULL); if (rc < 0) GOTO(cleanup, rc); - rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_dt_exp, - 0, &lsm, rec->ur_eadata); - if (rc) - GOTO(cleanup, rc); - - obd_free_memmd(mds->mds_dt_exp, &lsm); - - rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata, - rec->ur_eadatalen); - if (rc) - GOTO(cleanup, rc); + lum = rec->ur_eadata; + /* if lmm_stripe_size is -1 delete default stripe from dir */ + if (S_ISDIR(inode->i_mode) && + lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1)){ + rc = fsfilt_set_md(obd, inode, handle, NULL, 0, EA_LOV); + if (rc) + GOTO(cleanup, rc); + } else { + rc = obd_iocontrol(OBD_IOC_LOV_SETSTRIPE, mds->mds_dt_exp, + 0, &lsm, rec->ur_eadata); + if (rc) + GOTO(cleanup, rc); + + obd_free_memmd(mds->mds_dt_exp, &lsm); + rc = fsfilt_set_md(obd, inode, handle, rec->ur_eadata, + rec->ur_eadatalen, EA_LOV); + if (rc) + GOTO(cleanup, rc); + } } } @@ -702,7 +726,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, dp.p_ptr = req; switch (type) { - case S_IFREG:{ + case S_IFREG: { handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE, NULL); if (IS_ERR(handle)) GOTO(cleanup, rc = PTR_ERR(handle)); @@ -710,7 +734,7 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, EXIT; break; } - case S_IFDIR:{ + case S_IFDIR: { int i, nstripes = 0; /* @@ -824,11 +848,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, oa->o_fid = id_fid(rec->ur_id2); oa->o_generation = id_gen(rec->ur_id2); oa->o_flags |= OBD_FL_RECREATE_OBJS; - - /* - * fid should be defined here. It should be - * passedfrom client. - */ LASSERT(oa->o_fid != 0); } @@ -941,11 +960,6 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, inode->i_generation = id_gen(rec->ur_id2); if (type != S_IFDIR) { - /* - * updating inode self id, as inode already - * exists and we should make sure, its sid will - * be the same as we reveived. - */ down(&inode->i_sem); rc = mds_update_inode_sid(obd, inode, handle, rec->ur_id2); @@ -1017,6 +1031,25 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, else MD_COUNTER_INCREMENT(obd, create); + /* take care of default stripe inheritance */ + if (type == S_IFDIR) { + struct lov_mds_md lmm; + int lmm_size = sizeof(lmm); + + rc = mds_get_md(obd, dir, &lmm, &lmm_size, 1, 0); + if (rc > 0) { + down(&inode->i_sem); + rc = fsfilt_set_md(obd, inode, handle, + &lmm, lmm_size, EA_LOV); + up(&inode->i_sem); + } + if (rc) { + CERROR("error on copy stripe info: rc = %d\n", + rc); + rc = 0; + } + } + body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body)); mds_pack_inode2body(obd, body, inode, 1); } @@ -2058,7 +2091,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset, } else if (S_ISREG(child_inode->i_mode)) { mds_pack_inode2body(obd, body, child_inode, 0); mds_pack_md(obd, req->rq_repmsg, offset + 1, - body, child_inode, MDS_PACK_MD_LOCK); + body, child_inode, MDS_PACK_MD_LOCK, 0); } } @@ -2247,7 +2280,7 @@ static int mds_reint_link_acquire(struct mds_update_record *rec, EXIT; cleanup: rc = mds_finish_transno(mds, de_src ? de_src->d_inode : NULL, - handle, req, rc, 0); + handle, req, rc, 0); switch (cleanup_phase) { case 2: if (rc) @@ -2306,12 +2339,13 @@ static int mds_reint_link_to_remote(struct mds_update_record *rec, op_data->id1 = *(rec->ur_id1); rc = md_link(mds->mds_md_exp, op_data, &request); OBD_FREE(op_data, sizeof(*op_data)); + + if (request) + ptlrpc_req_finished(request); if (rc) GOTO(cleanup, rc); cleanup_phase = 2; - if (request) - ptlrpc_req_finished(request); OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, de_tgt_dir->d_inode->i_sb); @@ -2588,13 +2622,12 @@ static int mds_get_parents_children_locked(struct obd_device *obd, struct ldlm_res_id c1_res_id = { .name = {0} }; struct ldlm_res_id c2_res_id = { .name = {0} }; ldlm_policy_data_t p_policy = {.l_inodebits = {MDS_INODELOCK_UPDATE}}; - /* Only dentry should change, but the inode itself would be - intact otherwise */ + /* Only dentry should disappear, but the inode itself would be + intact otherwise. */ ldlm_policy_data_t c1_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP}}; /* If something is going to be replaced, both dentry and inode locks are needed */ - ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_LOOKUP| - MDS_INODELOCK_UPDATE}}; + ldlm_policy_data_t c2_policy = {.l_inodebits = {MDS_INODELOCK_FULL}}; struct ldlm_res_id *maxres_src, *maxres_tgt; struct inode *inode; int rc = 0, cleanup_phase = 0; @@ -2662,7 +2695,7 @@ static int mds_get_parents_children_locked(struct obd_device *obd, old_len - 1); if (IS_ERR(*de_oldp)) { rc = PTR_ERR(*de_oldp); - CERROR("old child lookup error (%*s): %d\n", + CERROR("old child lookup error (%.*s): %d\n", old_len - 1, old_name, rc); GOTO(cleanup, rc); } @@ -2706,7 +2739,7 @@ static int mds_get_parents_children_locked(struct obd_device *obd, new_len - 1); if (IS_ERR(*de_newp)) { rc = PTR_ERR(*de_newp); - CERROR("new child lookup error (%*s): %d\n", + CERROR("new child lookup error (%.*s): %d\n", old_len - 1, old_name, rc); GOTO(cleanup, rc); } @@ -3094,7 +3127,11 @@ static int mds_reint_rename_to_remote(struct mds_update_record *rec, int offset, &update_mode, rec->ur_name, rec->ur_namelen, &child_lockh, &de_old, LCK_EX, MDS_INODELOCK_LOOKUP); - LASSERT(rc == 0); + if (rc) { + OBD_FREE(op_data, sizeof(*op_data)); + RETURN(rc); + } + LASSERT(de_srcdir); LASSERT(de_srcdir->d_inode); LASSERT(de_old); @@ -3274,7 +3311,11 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, } - /* check if inodes point to each other. */ + /* + * check if inodes point to each other. This should be checked before + * is_subdir() check, as for the same entries it will think that they + * are subdirs. + */ if (!(de_old->d_flags & DCACHE_CROSS_REF) && !(de_new->d_flags & DCACHE_CROSS_REF) && old_inode == new_inode) @@ -3312,7 +3353,7 @@ static int mds_reint_rename(struct mds_update_record *rec, int offset, } else if (S_ISREG(new_inode->i_mode)) { mds_pack_inode2body(obd, body, new_inode, 0); mds_pack_md(obd, req->rq_repmsg, 1, body, - new_inode, MDS_PACK_MD_LOCK); + new_inode, MDS_PACK_MD_LOCK, 0); } } diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index 9b023f9..127adc8 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -94,32 +94,31 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild, struct mds_obd *mds = &obd->u.mds; struct lov_mds_md *lmm = NULL; struct llog_cookie *logcookies = NULL; - int lmm_size = 0, log_unlink = 0; + int lmm_size, log_unlink = 0; void *handle = NULL; int rc, err; ENTRY; LASSERT(mds->mds_dt_obd != NULL); - OBD_ALLOC(lmm, mds->mds_max_mdsize); + /* We don't need to do any of these other things for orhpan dirs, + * especially not mds_get_md (may get a default LOV EA, bug 4554) */ + if (S_ISDIR(inode->i_mode)) { + rc = vfs_rmdir(pending_dir, dchild); + if (rc) + CERROR("error %d unlinking dir %*s from PENDING\n", + rc, dchild->d_name.len, dchild->d_name.name); + RETURN(rc); + } + + lmm_size = mds->mds_max_mdsize; + OBD_ALLOC(lmm, lmm_size); if (lmm == NULL) RETURN(-ENOMEM); - down(&inode->i_sem); - rc = fsfilt_get_md(obd, inode, lmm, mds->mds_max_mdsize); - up(&inode->i_sem); - - if (rc < 0) { - CERROR("Error %d reading eadata for ino %lu\n", - rc, inode->i_ino); + rc = mds_get_md(obd, inode, lmm, &lmm_size, 1, 0); + if (rc < 0) GOTO(out_free_lmm, rc); - } else if (rc > 0) { - lmm_size = rc; - rc = mds_convert_lov_ea(obd, inode, lmm, lmm_size); - if (rc > 0) - lmm_size = rc; - rc = 0; - } handle = fsfilt_start_log(obd, pending_dir, FSFILT_OP_UNLINK, NULL, le32_to_cpu(lmm->lmm_stripe_count)); @@ -130,16 +129,11 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild, GOTO(out_free_lmm, rc); } - if (S_ISDIR(inode->i_mode)) - rc = vfs_rmdir(pending_dir, dchild); - else - rc = vfs_unlink(pending_dir, dchild); - - if (rc) - CERROR("error %d unlinking orphan %*s from PENDING directory\n", + rc = vfs_unlink(pending_dir, dchild); + if (rc) { + CERROR("error %d unlinking orphan %.*s from PENDING\n", rc, dchild->d_name.len, dchild->d_name.name); - - if (!rc && lmm_size) { + } else if (lmm_size) { OBD_ALLOC(logcookies, mds->mds_max_cookiesize); if (logcookies == NULL) rc = -ENOMEM; @@ -152,8 +146,7 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild, CERROR("error committing orphan unlink: %d\n", err); if (!rc) rc = err; - } - if (!rc) { + } else if (!rc) { rc = mds_osc_destroy_orphan(mds, inode, lmm, lmm_size, logcookies, log_unlink); } @@ -176,6 +169,7 @@ int mds_cleanup_orphans(struct obd_device *obd) struct l_linux_dirent *dirent, *n; struct list_head dentry_list; char d_name[LL_ID_NAMELEN]; + unsigned long inum; __u64 i = 0; int rc = 0, item = 0, namlen; ENTRY; @@ -200,21 +194,21 @@ int mds_cleanup_orphans(struct obd_device *obd) GOTO(err_out, rc); list_for_each_entry_safe(dirent, n, &dentry_list, lld_list) { - i ++; + i++; list_del(&dirent->lld_list); namlen = strlen(dirent->lld_name); LASSERT(sizeof(d_name) >= namlen + 1); strcpy(d_name, dirent->lld_name); + inum = dirent->lld_ino; OBD_FREE(dirent, sizeof(*dirent)); CDEBUG(D_INODE, "entry "LPU64" of PENDING DIR: %s\n", i, d_name); if (((namlen == 1) && !strcmp(d_name, ".")) || - ((namlen == 2) && !strcmp(d_name, ".."))) { + ((namlen == 2) && !strcmp(d_name, "..")) || inum == 0) continue; - } down(&pending_dir->i_sem); dchild = lookup_one_len(d_name, mds->mds_pending_dir, namlen); @@ -227,6 +221,13 @@ int mds_cleanup_orphans(struct obd_device *obd) GOTO(next, rc = 0); } + if (is_bad_inode(dchild->d_inode)) { + CERROR("bad orphan inode found %lu/%u\n", + dchild->d_inode->i_ino, + dchild->d_inode->i_generation); + GOTO(next, rc = -ENOENT); + } + child_inode = dchild->d_inode; DOWN_READ_I_ALLOC_SEM(child_inode); if (mds_inode_is_orphan(child_inode) && diff --git a/lustre/mgmt/mgmt_svc.c b/lustre/mgmt/mgmt_svc.c index f0ff838..9debe47 100644 --- a/lustre/mgmt/mgmt_svc.c +++ b/lustre/mgmt/mgmt_svc.c @@ -106,10 +106,9 @@ static int mgmt_setup(struct obd_device *obd, obd_count len, void *buf) RETURN(-EALREADY); mgmt_service = - ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE, - MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, - mgmt_handler, "mgmt", - obd->obd_proc_entry); + ptlrpc_init_svc(MGMT_NBUFS, MGMT_BUFSIZE, MGMT_MAXREQSIZE, + MGMT_REQUEST_PORTAL, MGMT_REPLY_PORTAL, 30000, + mgmt_handler, "mgmt", obd->obd_proc_entry); if (!mgmt_service) { CERROR("Failed to start mgmt service\n"); RETURN(-ENOMEM); diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index 459c790..66f6eb2 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -1,4 +1,4 @@ -MODULES := obdclass llog_test +MODULES := obdclass llog_test confobd obdclass-objs := llog_obd.o class_obd.o obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o @@ -6,6 +6,12 @@ obdclass-objs += lprocfs_status.o lustre_handles.o lustre_peer.o obdclass-objs += statfs_pack.o obdo.o obd_config.o mea.o ifeq ($(PATCHLEVEL),6) + +confobd-objs := conf_obd.o + +$(obj)/conf_obd.c: $(obj)/confobd.c + ln -sf $< $@ + llog_test-objs := llog-test.o $(obj)/llog-test.c: $(obj)/llog_test.c diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am index d1d2ced9..6b659d4 100644 --- a/lustre/obdclass/autoMakefile.am +++ b/lustre/obdclass/autoMakefile.am @@ -10,9 +10,9 @@ liblustreclass_a_CFLAGS = $(LLCFLAGS) endif if MODULES -modulefs_DATA = obdclass$(KMODEXT) +modulefs_DATA = obdclass$(KMODEXT) confobd$(KMODEXT) noinst_DATA = llog_test$(KMODEXT) endif # MODULES -MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c -DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c +MOSTLYCLEANFILES = *.o *.ko *.mod.c llog-test.c +DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) confobd.c llog_test.c diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 90a61e1..5876810 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -635,7 +635,6 @@ int init_obdclass(void) static void cleanup_obdclass(void) { int i; - int leaked; ENTRY; misc_deregister(&obd_psdev); @@ -659,11 +658,6 @@ static void cleanup_obdclass(void) class_handle_cleanup(); class_exit_uuidlist(); - - leaked = atomic_read(&obd_memory); - CDEBUG(leaked ? D_ERROR : D_INFO, - "obd mem max: %d leaked: %d\n", obd_memmax, leaked); - EXIT; } diff --git a/lustre/obdclass/confobd.c b/lustre/obdclass/confobd.c new file mode 100644 index 0000000..d8517b8 --- /dev/null +++ b/lustre/obdclass/confobd.c @@ -0,0 +1,423 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_CONFOBD + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LUSTRE_CONFOBD_NAME "confobd" + +static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; +static struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; + +LPROCFS_INIT_VARS(confobd, lprocfs_module_vars, lprocfs_obd_vars) + +static int confobd_fs_setup(struct obd_device *obd, + struct lvfs_obd_ctxt *lvfs_ctxt) +{ + struct conf_obd *confobd = &obd->u.conf; + struct lvfs_run_ctxt saved; + struct dentry *dentry; + int rc = 0; + ENTRY; + + OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); + obd->obd_lvfs_ctxt.pwdmnt = lvfs_ctxt->loc_mnt; + obd->obd_lvfs_ctxt.pwd = lvfs_ctxt->loc_mnt->mnt_root; + obd->obd_lvfs_ctxt.fs = get_ds(); + /*Now we did not set cb_ops of CONFOBD FIXME later*/ + + /*setup llog ctxt*/ + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create LOGS directory: rc = %d\n", rc); + GOTO(err_out, rc); + } + confobd->cfobd_logs_dir = dentry; + + dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create OBJECTS directory: rc = %d\n", rc); + GOTO(err_logs, rc); + } + confobd->cfobd_objects_dir = dentry; + + dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1); + if (IS_ERR(dentry)) { + rc = PTR_ERR(dentry); + CERROR("cannot create PENDING directory: rc = %d\n", rc); + GOTO(err_logs, rc); + } + confobd->cfobd_pending_dir = dentry; + +err_logs: + if (rc) + l_dput(confobd->cfobd_logs_dir); +err_out: + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + RETURN(rc); +} + +static int confobd_fs_cleanup(struct obd_device *obd, int flags) +{ + struct conf_obd *confobd = &obd->u.conf; + struct lvfs_run_ctxt saved; + int rc = 0; + ENTRY; + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (confobd->cfobd_logs_dir != NULL) { + l_dput(confobd->cfobd_logs_dir); + confobd->cfobd_logs_dir = NULL; + } + if (confobd->cfobd_objects_dir != NULL) { + l_dput(confobd->cfobd_objects_dir); + confobd->cfobd_objects_dir = NULL; + } + if (confobd->cfobd_pending_dir != NULL) { + l_dput(confobd->cfobd_pending_dir); + confobd->cfobd_pending_dir = NULL; + } + + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + RETURN(rc); +} + +int confobd_attach(struct obd_device *dev, obd_count len, void *data) +{ + struct lprocfs_static_vars lvars; + int rc = 0; + ENTRY; + + lprocfs_init_vars(confobd, &lvars); + rc = lprocfs_obd_attach(dev, lvars.obd_vars); + if (rc) + RETURN(rc); + + rc = lprocfs_alloc_md_stats(dev, 0); + RETURN(rc); +} + +int confobd_detach(struct obd_device *dev) +{ + int rc; + ENTRY; + + lprocfs_free_md_stats(dev); + rc = lprocfs_obd_detach(dev); + RETURN(rc); +} + +static int confobd_setup(struct obd_device *obd, obd_count len, void *buf) +{ + struct conf_obd *confobd = &obd->u.conf; + struct lustre_cfg* lcfg = buf; + struct lvfs_obd_ctxt *lvfs_ctxt = NULL; + char *name = NULL; + char *fstype = NULL; + char *mountoption = NULL; + int rc = 0; + ENTRY; + + if (lcfg->lcfg_inllen1 < 1 || !lcfg->lcfg_inlbuf1) { + CERROR("CONFOBD setup requires device name\n"); + RETURN(-EINVAL); + } + if (lcfg->lcfg_inllen2 < 1 || !lcfg->lcfg_inlbuf2) { + CERROR("CONFOBD setup requires fstype\n"); + RETURN(-EINVAL); + } + + OBD_ALLOC(name, lcfg->lcfg_inllen1 + 1); + if (!name) { + CERROR("No Memory\n"); + GOTO(out, rc = -ENOMEM); + } + memcpy(name, lcfg->lcfg_inlbuf1, lcfg->lcfg_inllen1); + + OBD_ALLOC(fstype, lcfg->lcfg_inllen2 + 1); + if (!fstype) { + CERROR("No Memory\n"); + GOTO(out, rc = -ENOMEM); + } + memcpy(fstype, lcfg->lcfg_inlbuf2, lcfg->lcfg_inllen2); + + obd->obd_fsops = fsfilt_get_ops(fstype); + if (IS_ERR(obd->obd_fsops)) { + CERROR("No fstype %s rc=%ld\n", fstype, PTR_ERR(obd->obd_fsops)); + GOTO(err_ops, rc = PTR_ERR(obd->obd_fsops)); + } + + if (lcfg->lcfg_inllen3 >= 1 && lcfg->lcfg_inlbuf3) { + OBD_ALLOC(mountoption, lcfg->lcfg_inllen3 + 1); + if (!mountoption) { + CERROR("No Memory\n"); + GOTO(err_ops, rc = -ENOMEM); + } + memcpy(mountoption, lcfg->lcfg_inlbuf3, lcfg->lcfg_inllen3); + } + rc = lvfs_mount_fs(name, fstype, mountoption, 0, &lvfs_ctxt); + if (rc) + GOTO(err_ops, rc); + LASSERT(lvfs_ctxt); + + confobd->cfobd_lvfs_ctxt = lvfs_ctxt; + + rc = confobd_fs_setup(obd, lvfs_ctxt); + if (rc) + GOTO(err_ops, rc); + + rc = obd_llog_setup(obd, &obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT, + obd, 0, NULL, &llog_lvfs_ops); + if (rc) + GOTO(err_ops, rc); + + EXIT; +out: + if (rc && lvfs_ctxt) + lvfs_umount_fs(lvfs_ctxt); + if (name) + OBD_FREE(name, lcfg->lcfg_inllen1 + 1); + if (fstype) + OBD_FREE(fstype, lcfg->lcfg_inllen2 + 1); + if (mountoption) + OBD_FREE(mountoption, lcfg->lcfg_inllen3 + 1); + + return rc; +err_ops: + fsfilt_put_ops(obd->obd_fsops); + goto out; +} + +static int confobd_cleanup(struct obd_device *obd, int flags) +{ + struct conf_obd *confobd = &obd->u.conf; + ENTRY; + + /* stop recording any log in case lconf didn't do that for us */ + if (confobd->cfobd_cfg_llh) { + struct lvfs_run_ctxt saved; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + llog_close(confobd->cfobd_cfg_llh); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + } + + obd_llog_cleanup(llog_get_context(&obd->obd_llogs, + LLOG_CONFIG_ORIG_CTXT)); + confobd_fs_cleanup(obd, flags); + if (confobd->cfobd_lvfs_ctxt) + lvfs_umount_fs(confobd->cfobd_lvfs_ctxt); + + if (!list_empty(&obd->obd_exports)) + return (-EBUSY); + fsfilt_put_ops(obd->obd_fsops); + RETURN(0); +} + +static int confobd_iocontrol(unsigned int cmd, struct obd_export *exp, + int len, void *karg, void *uarg) +{ + static struct obd_uuid cfg_uuid = { .uuid = "config_uuid" }; + struct obd_device *obd = exp->exp_obd; + struct conf_obd *confobd = &obd->u.conf; + struct obd_ioctl_data *data = karg; + struct lvfs_run_ctxt saved; + int rc = 0; + ENTRY; + + CDEBUG(D_INFO, "ioctl cmd %x\n", cmd); + switch (cmd) { + case OBD_IOC_CLEAR_LOG: { + char *name = data->ioc_inlbuf1; + if (confobd->cfobd_cfg_llh) + RETURN(-EBUSY); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = llog_open(llog_get_context(&obd->obd_llogs, + LLOG_CONFIG_ORIG_CTXT), + &confobd->cfobd_cfg_llh, NULL, name, + OBD_LLOG_FL_CREATE); + if (rc == 0) { + llog_init_handle(confobd->cfobd_cfg_llh, + LLOG_F_IS_PLAIN, NULL); + + rc = llog_destroy(confobd->cfobd_cfg_llh); + llog_free_handle(confobd->cfobd_cfg_llh); + } + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + confobd->cfobd_cfg_llh = NULL; + RETURN(rc); + } + case OBD_IOC_RECORD: { + char *name = data->ioc_inlbuf1; + if (confobd->cfobd_cfg_llh) + RETURN(-EBUSY); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = llog_open(llog_get_context(&obd->obd_llogs, + LLOG_CONFIG_ORIG_CTXT), + &confobd->cfobd_cfg_llh, NULL, name, + OBD_LLOG_FL_CREATE); + if (rc == 0) + llog_init_handle(confobd->cfobd_cfg_llh, + LLOG_F_IS_PLAIN, &cfg_uuid); + else + confobd->cfobd_cfg_llh = NULL; + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + RETURN(rc); + } + case OBD_IOC_ENDRECORD: { + if (!confobd->cfobd_cfg_llh) + RETURN(-EBADF); + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = llog_close(confobd->cfobd_cfg_llh); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + confobd->cfobd_cfg_llh = NULL; + RETURN(rc); + } + case OBD_IOC_DORECORD: { + char *cfg_buf; + struct llog_rec_hdr rec; + if (!confobd->cfobd_cfg_llh) + RETURN(-EBADF); + + rec.lrh_len = llog_data_len(data->ioc_plen1); + + switch(data->ioc_type) { + case LUSTRE_CFG_TYPE: + rec.lrh_type = OBD_CFG_REC; + break; + case PORTALS_CFG_TYPE: + rec.lrh_type = PTL_CFG_REC; + break; + default: + CERROR("unknown cfg record type:%d \n", data->ioc_type); + RETURN(-EINVAL); + } + + OBD_ALLOC(cfg_buf, data->ioc_plen1); + if (cfg_buf == NULL) { + CERROR("No Memory\n"); + RETURN(-ENOMEM); + } + if (copy_from_user(cfg_buf, data->ioc_pbuf1, data->ioc_plen1)) { + OBD_FREE(cfg_buf, data->ioc_plen1); + RETURN(-EFAULT); + } + + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = llog_write_rec(confobd->cfobd_cfg_llh, &rec, NULL, 0, + cfg_buf, -1); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + OBD_FREE(cfg_buf, data->ioc_plen1); + RETURN(rc); + } + case OBD_IOC_DUMP_LOG: { + struct llog_ctxt *ctxt = + llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = class_config_dump_llog(ctxt, data->ioc_inlbuf1, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + if (rc) + RETURN(rc); + + RETURN(rc); + } + + case OBD_IOC_START: { + struct llog_ctxt *ctxt; + char *conf_prof; + char *name = data->ioc_inlbuf1; + int len = strlen(name) + sizeof("-conf"); + + OBD_ALLOC(conf_prof, len); + if (!conf_prof) { + CERROR("no memory\n"); + RETURN(-ENOMEM); + } + sprintf(conf_prof, "%s-conf", name); + + ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT); + rc = class_config_process_llog(ctxt, conf_prof, NULL); + if (rc < 0) + CERROR("Unable to process log: %s\n", conf_prof); + OBD_FREE(conf_prof, len); + + RETURN(rc); + } + + default: + CDEBUG(D_INFO, "unknown command %x\n", cmd); + RETURN(-EINVAL); + } + RETURN(0); +} + +static struct obd_ops conf_obd_ops = { + .o_owner = THIS_MODULE, + .o_setup = confobd_setup, + .o_cleanup = confobd_cleanup, + .o_attach = confobd_attach, + .o_detach = confobd_detach, + .o_iocontrol = confobd_iocontrol, +}; + +static int __init confobd_init(void) +{ + struct lprocfs_static_vars lvars; + ENTRY; + + lprocfs_init_vars(confobd, &lvars); + RETURN(class_register_type(&conf_obd_ops, NULL, lvars.module_vars, + LUSTRE_CONFOBD_NAME)); +} + +static void __exit confobd_exit(void) +{ + class_unregister_type(LUSTRE_CONFOBD_NAME); +} + +MODULE_AUTHOR("Cluster File Systems, Inc. "); +MODULE_DESCRIPTION("Lustre Config OBD driver"); +MODULE_LICENSE("GPL"); + +module_init(confobd_init); +module_exit(confobd_exit); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 2bd9b91..bb7781b 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -212,7 +212,8 @@ void class_release_dev(struct obd_device *obd) int minor = obd->obd_minor; spin_lock(&obd_dev_lock); - memset(obd, 0, sizeof(*obd)); + obd->obd_type = NULL; + //memset(obd, 0, sizeof(*obd)); obd->obd_minor = minor; spin_unlock(&obd_dev_lock); } @@ -788,6 +789,7 @@ static inline void oig_grab(struct obd_io_group *oig) { atomic_inc(&oig->oig_refcount); } + void oig_release(struct obd_io_group *oig) { if (atomic_dec_and_test(&oig->oig_refcount)) diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index 5644386..0e5d201 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -304,9 +304,11 @@ int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data) case OBD_IOC_LLOG_PRINT: { LASSERT(data->ioc_inllen1); - err = llog_process(handle, llog_print_cb, data, NULL); + err = llog_process(handle, class_config_dump_handler,data,NULL); if (err == -LLOG_EEMPTY) err = 0; + else + err = llog_process(handle, llog_print_cb, data, NULL); GOTO(out_close, err); } diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index bcecaae..259bcbc 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -25,13 +25,12 @@ #include /* helper functions for calling the llog obd methods */ - int obd_llog_setup(struct obd_device *obd, struct obd_llogs *llogs, int index, struct obd_device *disk_obd, int count, struct llog_logid *logid, struct llog_operations *op) { - int rc = 0; struct llog_ctxt *ctxt; + int rc = 0; ENTRY; LASSERT(llogs); @@ -60,6 +59,10 @@ int obd_llog_setup(struct obd_device *obd, struct obd_llogs *llogs, struct mds_obd *mds = &disk_obd->u.mds; ctxt->loc_objects_dir = mds->mds_objects_dir; ctxt->loc_logs_dir = mds->mds_logs_dir; + } else if (!strcmp(disk_obd->obd_type->typ_name, "confobd")) { + struct conf_obd *confobd = &disk_obd->u.conf; + ctxt->loc_objects_dir = confobd->cfobd_objects_dir; + ctxt->loc_logs_dir = confobd->cfobd_logs_dir; } } diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index e5796cf..812d6af 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -638,6 +638,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, LPROCFS_OBD_OP_INIT(num_private_stats, stats, queue_group_io); LPROCFS_OBD_OP_INIT(num_private_stats, stats, trigger_group_io); LPROCFS_OBD_OP_INIT(num_private_stats, stats, teardown_async_page); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, adjust_kms); LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch); LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync); LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate); @@ -807,7 +808,10 @@ int lprocfs_write_u64_helper(const char *buffer, unsigned long count, kernbuf[count] = '\0'; - *val = simple_strtoull(kernbuf, &end, 0); + if (kernbuf[0] == '-') + *val = -simple_strtoull(kernbuf + 1, &end, 0); + else + *val = simple_strtoull(kernbuf, &end, 0); if (kernbuf == end) return -EINVAL; diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index 8298fc3..1f4ef19 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -103,7 +103,7 @@ int class_add_uuid(char *uuid, __u64 nid, __u32 nal) return -ENOMEM; } - CDEBUG(D_INFO, "add uuid %s "LPX64" %u\n", uuid, nid, nal); + CDEBUG(D_INFO, "add uuid %s "LPX64" %x\n", uuid, nid, nal); memcpy(data->uuid, uuid, nob); data->nid = nid; data->nal = nal; @@ -142,8 +142,11 @@ int class_del_uuid (char *uuid) spin_unlock (&g_uuid_lock); - if (list_empty (&deathrow)) + if (list_empty (&deathrow)) { + if (uuid) + CERROR("del non-existed uuid %s\n", uuid); return -EINVAL; + } do { data = list_entry(deathrow.next, struct uuid_nid_data, head); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index fa0bda6..414fde4 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -158,7 +158,6 @@ static int class_attach(struct lustre_cfg *lcfg) class_release_dev(obd); case 1: class_put_type(type); - obd->obd_type = NULL; } return rc; } @@ -505,7 +504,7 @@ int class_process_config(struct lustre_cfg *lcfg) } case LCFG_ADD_UUID: { CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64 - " (%s), nal %d\n", lcfg->lcfg_inlbuf1, lcfg->lcfg_nid, + " (%s), nal %x\n", lcfg->lcfg_inlbuf1, lcfg->lcfg_nid, portals_nid2str(lcfg->lcfg_nal, lcfg->lcfg_nid, str), lcfg->lcfg_nal); @@ -596,7 +595,7 @@ int class_process_config(struct lustre_cfg *lcfg) } } out: - RETURN(err); + return err; } static int class_config_parse_handler(struct llog_handle * handle, @@ -696,8 +695,8 @@ int class_config_process_llog(struct llog_ctxt *ctxt, char *name, RETURN(rc); } -static int class_config_dump_handler(struct llog_handle * handle, - struct llog_rec_hdr *rec, void *data) +int class_config_dump_handler(struct llog_handle * handle, + struct llog_rec_hdr *rec, void *data) { int cfg_len = rec->lrh_len; char *cfg_buf = (char*) (rec + 1); @@ -745,10 +744,10 @@ static int class_config_dump_handler(struct llog_handle * handle, CDEBUG(D_INFO, "pcfg command: 0x%x\n", pcfg->pcfg_command); if (pcfg->pcfg_nal) - CDEBUG(D_INFO, " nal: %d\n", + CDEBUG(D_INFO, " nal: %x\n", pcfg->pcfg_nal); if (pcfg->pcfg_gw_nal) - CDEBUG(D_INFO, " gw_nal: %d\n", + CDEBUG(D_INFO, " gw_nal: %x\n", pcfg->pcfg_gw_nal); if (pcfg->pcfg_nid) CDEBUG(D_INFO, " nid: "LPX64"\n", diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index c232078..f28232d 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -29,6 +29,7 @@ #include #endif #include +#include #else #include #endif @@ -65,7 +66,7 @@ echo_printk_object (char *msg, struct ec_object *eco) static struct ec_object * echo_find_object_locked (struct obd_device *obd, obd_id id) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct ec_object *eco = NULL; struct list_head *el; @@ -97,7 +98,7 @@ static int echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm, void *ulsm, int ulsm_nob) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; int nob; if (ulsm_nob < sizeof (*lsm)) @@ -125,7 +126,7 @@ echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm, static struct ec_object * echo_allocate_object (struct obd_device *obd) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct ec_object *eco; int rc; @@ -152,7 +153,7 @@ static void echo_free_object (struct ec_object *eco) { struct obd_device *obd = eco->eco_device; - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; LASSERT (eco->eco_refcount == 0); obd_free_memmd(ec->ec_exp, &eco->eco_lsm); @@ -163,7 +164,7 @@ static int echo_create_object(struct obd_device *obd, int on_target, struct obdo *oa, void *ulsm, int ulsm_nob, struct obd_trans_info *oti) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct ec_object *eco2; struct ec_object *eco; struct lov_stripe_md *lsm; @@ -268,7 +269,7 @@ static int echo_get_object (struct ec_object **ecop, struct obd_device *obd, struct obdo *oa) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct ec_object *eco; struct ec_object *eco2; int rc; @@ -353,7 +354,7 @@ static void echo_put_object (struct ec_object *eco) { struct obd_device *obd = eco->eco_device; - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; /* Release caller's ref on the object. * delete => mark for deletion when last ref goes @@ -493,7 +494,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, struct lov_stripe_md *lsm, obd_off offset, obd_size count, struct obd_trans_info *oti) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; obd_count npages; struct brw_page *pga; struct brw_page *pgp; @@ -575,7 +576,7 @@ static int echo_client_ubrw(struct obd_device *obd, int rw, obd_off offset, obd_size count, char *buffer, struct obd_trans_info *oti) { - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; obd_count npages; struct brw_page *pga; struct brw_page *pgp; @@ -660,6 +661,10 @@ struct echo_async_page { struct list_head eap_item; }; +#define EAP_FROM_COOKIE(c) \ + (LASSERT(((struct echo_async_page *)(c))->eap_magic == EAP_MAGIC), \ + (struct echo_async_page *)(c)) + struct echo_async_state { spinlock_t eas_lock; obd_off eas_next_offset; @@ -683,14 +688,6 @@ static int eas_should_wake(struct echo_async_state *eas) return rc; }; -struct echo_async_page *eap_from_cookie(void *cookie) -{ - struct echo_async_page *eap = cookie; - if (eap->eap_magic != EAP_MAGIC) - return ERR_PTR(-EINVAL); - return eap; -}; - static int ec_ap_make_ready(void *data, int cmd) { /* our pages are issued ready */ @@ -705,22 +702,17 @@ static int ec_ap_refresh_count(void *data, int cmd) } static void ec_ap_fill_obdo(void *data, int cmd, struct obdo *oa) { - struct echo_async_page *eap; - eap = eap_from_cookie(data); - if (IS_ERR(eap)) - return; + struct echo_async_page *eap = EAP_FROM_COOKIE(data); memcpy(oa, &eap->eap_eas->eas_oa, sizeof(*oa)); } static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc) { - struct echo_async_page *eap = eap_from_cookie(data); + struct echo_async_page *eap = EAP_FROM_COOKIE(data); struct echo_async_state *eas; unsigned long flags; - if (IS_ERR(eap)) - return; eas = eap->eap_eas; if (cmd == OBD_BRW_READ && @@ -992,7 +984,7 @@ int echo_client_brw_ioctl(int rw, struct obd_export *exp, struct obd_ioctl_data *data) { struct obd_device *obd = class_exp2obd(exp); - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct obd_trans_info dummy_oti; struct ec_object *eco; int rc; @@ -1046,7 +1038,7 @@ echo_ldlm_callback (struct ldlm_lock *lock, struct ldlm_lock_desc *new, void *data, int flag) { struct ec_object *eco = (struct ec_object *)data; - struct echo_client_obd *ec = &(eco->eco_device->u.echo_client); + struct echo_client_obd *ec = &(eco->eco_device->u.echocli); struct lustre_handle lockh; struct list_head *el; int found = 0; @@ -1091,7 +1083,7 @@ echo_client_enqueue(struct obd_export *exp, struct obdo *oa, int mode, obd_off offset, obd_size nob) { struct obd_device *obd = exp->exp_obd; - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct lustre_handle *ulh = obdo_handle (oa); struct ec_object *eco; struct ec_lock *ecl; @@ -1152,7 +1144,7 @@ static int echo_client_cancel(struct obd_export *exp, struct obdo *oa) { struct obd_device *obd = exp->exp_obd; - struct echo_client_obd *ec = &obd->u.echo_client; + struct echo_client_obd *ec = &obd->u.echocli; struct lustre_handle *ulh = obdo_handle (oa); struct ec_lock *ecl = NULL; int found = 0; @@ -1203,10 +1195,12 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int i; ENTRY; + unlock_kernel(); + memset(&dummy_oti, 0, sizeof(dummy_oti)); obd = exp->exp_obd; - ec = &obd->u.echo_client; + ec = &obd->u.echocli; switch (cmd) { case OBD_IOC_CREATE: /* may create echo object */ @@ -1321,6 +1315,8 @@ echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, ldlm_lock_decref(&ack_lock->lock, ack_lock->mode); } + lock_kernel(); + return rc; } @@ -1328,7 +1324,7 @@ static int echo_client_setup(struct obd_device *obddev, obd_count len, void *buf) { struct lustre_cfg* lcfg = buf; - struct echo_client_obd *ec = &obddev->u.echo_client; + struct echo_client_obd *ec = &obddev->u.echocli; struct obd_device *tgt; struct lustre_handle conn = {0, }; struct obd_uuid echo_uuid = { "ECHO_UUID" }; @@ -1351,7 +1347,7 @@ echo_client_setup(struct obd_device *obddev, obd_count len, void *buf) INIT_LIST_HEAD (&ec->ec_objects); ec->ec_unique = 0; - rc = obd_connect(&conn, tgt, &echo_uuid, 0); + rc = obd_connect(&conn, tgt, &echo_uuid, FILTER_GROUP_ECHO); if (rc) { CERROR("fail to connect to device %s\n", lcfg->lcfg_inlbuf1); return (rc); @@ -1365,7 +1361,7 @@ static int echo_client_cleanup(struct obd_device *obddev, int flags) { struct list_head *el; struct ec_object *eco; - struct echo_client_obd *ec = &obddev->u.echo_client; + struct echo_client_obd *ec = &obddev->u.echocli; int rc; ENTRY; @@ -1423,7 +1419,7 @@ static int echo_client_disconnect(struct obd_export *exp, GOTO(out, rc = -EINVAL); obd = exp->exp_obd; - ec = &obd->u.echo_client; + ec = &obd->u.echocli; /* no more contention on export's lock list */ while (!list_empty (&exp->exp_ec_data.eced_locks)) { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index e7495de..4a82b28 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -300,8 +300,7 @@ static int filter_free_server_data(struct filter_obd *filter) { OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd)); filter->fo_fsd = NULL; - OBD_FREE(filter->fo_last_rcvd_slots, - FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_FREE(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8); filter->fo_last_rcvd_slots = NULL; return 0; } @@ -378,8 +377,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) RETURN(-ENOMEM); filter->fo_fsd = fsd; - OBD_ALLOC(filter->fo_last_rcvd_slots, - FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_ALLOC(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8); if (filter->fo_last_rcvd_slots == NULL) { OBD_FREE(fsd, sizeof(*fsd)); RETURN(-ENOMEM); @@ -1141,8 +1139,7 @@ struct dentry *filter_parent_lock(struct obd_device *obd, obd_gr group, LASSERT(dparent->d_inode); *lock = filter_lock_dentry(obd, dparent, objid); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow parent lock %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "parent lock"); return dparent; } @@ -1172,10 +1169,14 @@ struct dentry *filter_id2dentry(struct obd_device *obd, len = sprintf(name, LPU64, id); if (dir_dentry == NULL) { dparent = filter_parent_lock(obd, group, id, &lock); - if (IS_ERR(dparent)) + if (IS_ERR(dparent)) { + CERROR("%s: error getting object "LPU64":"LPU64 + " parent: rc %ld\n", obd->obd_name, + id, group, PTR_ERR(dparent)); RETURN(dparent); + } } - CDEBUG(D_INODE, "looking up object O/%*s/%s\n", + CDEBUG(D_INODE, "looking up object O/%.*s/%s\n", dparent->d_name.len, dparent->d_name.name, name); dchild = /*ll_*/lookup_one_len(name, dparent, len); if (dir_dentry == NULL) @@ -1234,7 +1235,7 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid, ENTRY; if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { - CERROR("destroying objid %*s nlink = %lu, count = %d\n", + CERROR("destroying objid %.*s nlink = %lu, count = %d\n", dchild->d_name.len, dchild->d_name.name, (unsigned long)inode->i_nlink, atomic_read(&inode->i_count)); @@ -1243,7 +1244,7 @@ static int filter_destroy_internal(struct obd_device *obd, obd_id objid, rc = vfs_unlink(dparent->d_inode, dchild); if (rc) - CERROR("error unlinking objid %*s: rc %d\n", + CERROR("error unlinking objid %.*s: rc %d\n", dchild->d_name.len, dchild->d_name.name, rc); RETURN(rc); @@ -1431,6 +1432,7 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, { struct lustre_cfg* lcfg = buf; struct filter_obd *filter = &obd->u.filter; + struct lvfs_obd_ctxt *lvfs_ctxt = NULL; struct vfsmount *mnt; char ns_name[48]; int rc = 0, i; @@ -1445,11 +1447,16 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, if (IS_ERR(obd->obd_fsops)) RETURN(PTR_ERR(obd->obd_fsops)); - mnt = do_kern_mount(lcfg->lcfg_inlbuf2, MS_NOATIME | MS_NODIRATIME, - lcfg->lcfg_inlbuf1, option); - rc = PTR_ERR(mnt); - if (IS_ERR(mnt)) + rc = lvfs_mount_fs(lcfg->lcfg_inlbuf1, lcfg->lcfg_inlbuf2, + option, MS_NOATIME | MS_NODIRATIME, &lvfs_ctxt); + if (rc) { + CERROR("lvfs_mount_fs failed: rc = %d\n", rc); GOTO(err_ops, rc); + } + LASSERT(lvfs_ctxt); + + mnt = lvfs_ctxt->loc_mnt; + filter->fo_lvfs_ctxt = lvfs_ctxt; if (lcfg->lcfg_inllen3 > 0 && lcfg->lcfg_inlbuf3) { if (*lcfg->lcfg_inlbuf3 == 'f') { @@ -1529,7 +1536,7 @@ err_post: filter_post(obd); err_mntput: unlock_kernel(); - mntput(mnt); + lvfs_umount_fs(filter->fo_lvfs_ctxt); filter->fo_sb = 0; lock_kernel(); err_ops: @@ -1612,13 +1619,8 @@ static int filter_cleanup(struct obd_device *obd, int flags) shrink_dcache_parent(filter->fo_sb->s_root); filter->fo_sb = 0; - if (atomic_read(&filter->fo_vfsmnt->mnt_count) > 1) - CERROR("%s: mount point %p busy, mnt_count: %d\n", - obd->obd_name, filter->fo_vfsmnt, - atomic_read(&filter->fo_vfsmnt->mnt_count)); - unlock_kernel(); - mntput(filter->fo_vfsmnt); + lvfs_umount_fs(filter->fo_lvfs_ctxt); //destroy_buffers(filter->fo_sb->s_dev); filter->fo_sb = NULL; fsfilt_put_ops(obd->obd_fsops); @@ -1787,6 +1789,7 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) obd_size maxsize = obd->obd_osfs.os_blocks * obd->obd_osfs.os_bsize; obd_size tot_dirty = 0, tot_pending = 0, tot_granted = 0; obd_size fo_tot_dirty, fo_tot_pending, fo_tot_granted; + int level = D_CACHE; if (list_empty(&obd->obd_exports)) return; @@ -1795,13 +1798,20 @@ static void filter_grant_sanity_check(struct obd_device *obd, const char *func) spin_lock(&obd->obd_dev_lock); list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) { fed = &exp->exp_filter_data; - LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, - "cli %s/%p %lu+%lu > "LPU64"\n", - exp->exp_client_uuid.uuid, exp, - fed->fed_grant, fed->fed_pending, maxsize); - LASSERTF(fed->fed_dirty <= maxsize, "cli %s/%p %lu > "LPU64"\n", - exp->exp_client_uuid.uuid, exp,fed->fed_dirty,maxsize); - CDEBUG(D_CACHE,"%s: cli %s/%p dirty %lu pend %lu grant %lu\n", + if (fed->fed_grant < 0 || fed->fed_pending < 0 || + fed->fed_dirty < 0) + level = D_ERROR; + if (maxsize > 0) { /* we may not have done a statfs yet */ + LASSERTF(fed->fed_grant + fed->fed_pending <= maxsize, + "cli %s/%p %ld+%ld > "LPU64"\n", + exp->exp_client_uuid.uuid, exp, + fed->fed_grant, fed->fed_pending, maxsize); + LASSERTF(fed->fed_dirty <= maxsize, + "cli %s/%p %ld > "LPU64"\n", + exp->exp_client_uuid.uuid, exp, + fed->fed_dirty, maxsize); + } + CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, fed->fed_dirty, fed->fed_pending, fed->fed_grant); tot_granted += fed->fed_grant + fed->fed_pending; @@ -1844,27 +1854,30 @@ static void filter_grant_discard(struct obd_export *exp) struct obd_device *obd = exp->exp_obd; struct filter_obd *filter = &obd->u.filter; struct filter_export_data *fed = &exp->exp_filter_data; + int level = D_CACHE; spin_lock(&obd->obd_osfs_lock); spin_lock(&exp->exp_obd->obd_dev_lock); list_del_init(&exp->exp_obd_chain); spin_unlock(&exp->exp_obd->obd_dev_lock); - CDEBUG(D_CACHE, "%s: cli %s/%p dirty %lu pend %lu grant %lu\n", + if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) + level = D_ERROR; + CDEBUG(level, "%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, fed->fed_dirty, fed->fed_pending, fed->fed_grant); LASSERTF(filter->fo_tot_granted >= fed->fed_grant, - "%s: tot_granted "LPU64" cli %s/%p fed_grant %lu\n", + "%s: tot_granted "LPU64" cli %s/%p fed_grant %ld\n", obd->obd_name, filter->fo_tot_granted, exp->exp_client_uuid.uuid, exp, fed->fed_grant); filter->fo_tot_granted -= fed->fed_grant; - LASSERTF(exp->exp_obd->u.filter.fo_tot_pending >= fed->fed_pending, - "%s: tot_pending "LPU64" cli %s/%p fed_pending %lu\n", + LASSERTF(filter->fo_tot_pending >= fed->fed_pending, + "%s: tot_pending "LPU64" cli %s/%p fed_pending %ld\n", obd->obd_name, filter->fo_tot_pending, exp->exp_client_uuid.uuid, exp, fed->fed_pending); LASSERTF(filter->fo_tot_dirty >= fed->fed_dirty, - "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %lu\n", + "%s: tot_dirty "LPU64" cli %s/%p fed_dirty %ld\n", obd->obd_name, filter->fo_tot_dirty, exp->exp_client_uuid.uuid, exp, fed->fed_dirty); filter->fo_tot_dirty -= fed->fed_dirty; @@ -1972,7 +1985,6 @@ static int filter_disconnect(struct obd_export *exp, unsigned long flags) /* flush any remaining cancel messages out to the target */ filter_sync_llogs(obd, exp); - class_export_put(exp); RETURN(rc); } @@ -1989,7 +2001,8 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, dchild = filter_id2dentry(obd, NULL, group, oa->o_id); if (IS_ERR(dchild)) { - CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id); + CERROR("%s error looking up object: "LPU64"\n", + what, oa->o_id); RETURN(dchild); } @@ -2072,7 +2085,8 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa, else rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1); rc = filter_finish_transno(exp, oti, rc); - rc2 = fsfilt_commit(exp->exp_obd, filter->fo_sb, dentry->d_inode, handle, 0); + rc2 = fsfilt_commit(exp->exp_obd, filter->fo_sb, dentry->d_inode, + handle, exp->exp_sync); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) @@ -2088,8 +2102,7 @@ static int filter_setattr(struct obd_export *exp, struct obdo *oa, 0, 0); ldlm_resource_putref(res); } else if (iattr.ia_valid & ATTR_SIZE) { - CERROR("!!! resource_get failed for object "LPU64" -- " - "filter_setattr with no lock?\n", oa->o_id); + /* called from MDS. */ } oa->o_valid = OBD_MD_FLID; @@ -2226,7 +2239,7 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, (oa->o_flags & OBD_FL_DELORPHAN)) { if (diff >= 0) RETURN(diff); - if (-diff > 10000) { /* XXX make this smarter */ + if (-diff > OST_MAX_PRECREATE) { CERROR("ignoring bogus orphan destroy request: obdid " LPU64" last_id "LPU64"\n", oa->o_id, filter_last_id(filter, oa->o_gr)); @@ -2244,7 +2257,8 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa, (/*group != 0 ||*/ oa->o_id == 0)) RETURN(1); - LASSERT(diff >= 0); + LASSERTF(diff >= 0, LPU64" - "LPU64" = %d\n", oa->o_id, + filter_last_id(filter, oa->o_gr), diff); RETURN(diff); } } @@ -2259,6 +2273,37 @@ static int filter_precreate_rec(struct obd_device *obd, struct dentry *dentry, RETURN(rc); } +static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) +{ + struct filter_obd *filter = &obd->u.filter; + int blockbits = filter->fo_sb->s_blocksize_bits; + int rc; + ENTRY; + + /* at least try to account for cached pages. its still racey and + * might be under-reporting if clients haven't announced their + * caches with brw recently */ + spin_lock(&obd->obd_osfs_lock); + rc = fsfilt_statfs(obd, filter->fo_sb, max_age); + memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); + spin_unlock(&obd->obd_osfs_lock); + + CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64 + " pending "LPU64" free "LPU64" avail "LPU64"\n", + filter->fo_tot_dirty, filter->fo_tot_granted, + filter->fo_tot_pending, + osfs->os_bfree << blockbits, osfs->os_bavail << blockbits); + + filter_grant_sanity_check(obd, __FUNCTION__); + + osfs->os_bavail -= min(osfs->os_bavail, + (filter->fo_tot_dirty + filter->fo_tot_pending + + osfs->os_bsize -1) >> blockbits); + + RETURN(rc); +} + /* We rely on the fact that only one thread will be creating files in a given * group at a time, which is why we don't need an atomic filter_get_new_id. * Even if we had that atomic function, the following race would exist: @@ -2276,6 +2321,8 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, struct filter_obd *filter; void *handle = NULL; void *lock = NULL; + struct obd_statfs *osfs; + unsigned long enough_time = jiffies + (obd_timeout * HZ) / 3; __u64 next_id; ENTRY; @@ -2284,6 +2331,21 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, if ((oa->o_valid & OBD_MD_FLFLAGS) && (oa->o_flags & OBD_FL_RECREATE_OBJS)) { recreate_obj = 1; + } else { + OBD_ALLOC(osfs, sizeof(*osfs)); + if (osfs == NULL) + RETURN(-ENOMEM); + rc = filter_statfs(obd, osfs, jiffies-HZ); + if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) { + CDEBUG(D_HA, "OST out of space! avail "LPU64"\n", + osfs->os_bavail<fo_sb->s_blocksize_bits); + *num = 0; + rc = -ENOSPC; + } + OBD_FREE(osfs, sizeof(*osfs)); + if (rc) { + RETURN(rc); + } } CDEBUG(D_HA, "%s: precreating %d objects\n", obd->obd_name, *num); @@ -2334,13 +2396,11 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, * already exists */ if (recreate_obj) { - CERROR("%s: Serious error: recreating obj %*s " - "but obj already exists \n", + CERROR("%s: recreating existing object %.*s?\n", obd->obd_name, dchild->d_name.len, dchild->d_name.name); - LBUG(); } else { - CERROR("%s: Serious error: objid %*s already " + CERROR("%s: Serious error: objid %.*s already " "exists; is this filesystem corrupt?\n", obd->obd_name, dchild->d_name.len, dchild->d_name.name); @@ -2390,6 +2450,11 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa, if (rc) break; + if (time_after(jiffies, enough_time)) { + CDEBUG(D_INODE,"%s: precreate slow - want %d got %d \n", + obd->obd_name, *num, i); + break; + } } *num = i; @@ -2431,9 +2496,8 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, } if ((oa->o_valid & OBD_MD_FLFLAGS) && - (oa->o_flags & OBD_FL_RECREATE_OBJS)) { + (oa->o_flags & OBD_FL_RECREATE_OBJS)) recreate_objs = 1; - } obd = exp->exp_obd; fed = &exp->exp_filter_data; @@ -2482,7 +2546,7 @@ static int filter_create(struct obd_export *exp, struct obdo *oa, if (diff > 0) { oa->o_id = filter_last_id(&obd->u.filter, group); rc = filter_precreate(obd, oa, group, &diff); - oa->o_id += diff; + oa->o_id = filter_last_id(&obd->u.filter, oa->o_gr); oa->o_valid = OBD_MD_FLID; } } @@ -2532,7 +2596,7 @@ static int filter_destroy(struct obd_export *exp, struct obdo *oa, dchild = filter_id2dentry(obd, dparent, oa->o_gr, oa->o_id); if (IS_ERR(dchild)) - GOTO(cleanup, rc = -ENOENT); + GOTO(cleanup, rc = PTR_ERR(dchild)); cleanup_phase = 2; if (dchild->d_inode == NULL) { @@ -2620,7 +2684,7 @@ cleanup: } rc = filter_finish_transno(exp, oti, rc); rc2 = fsfilt_commit(obd, filter->fo_sb, dparent->d_inode, - handle, 0); + handle, exp->exp_sync); if (rc2) { CERROR("error on commit, err = %d\n", rc2); if (!rc) @@ -2711,37 +2775,6 @@ static int filter_sync(struct obd_export *exp, struct obdo *oa, RETURN(rc); } -static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs, - unsigned long max_age) -{ - struct filter_obd *filter = &obd->u.filter; - int blockbits = filter->fo_sb->s_blocksize_bits; - int rc; - ENTRY; - - /* at least try to account for cached pages. its still racey and - * might be under-reporting if clients haven't announced their - * caches with brw recently */ - spin_lock(&obd->obd_osfs_lock); - rc = fsfilt_statfs(obd, filter->fo_sb, max_age); - memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); - spin_unlock(&obd->obd_osfs_lock); - - CDEBUG(D_SUPER | D_CACHE, "blocks cached "LPU64" granted "LPU64 - " pending "LPU64" free "LPU64" avail "LPU64"\n", - filter->fo_tot_dirty, filter->fo_tot_granted, - filter->fo_tot_pending, - osfs->os_bfree << blockbits, osfs->os_bavail << blockbits); - - filter_grant_sanity_check(obd, __FUNCTION__); - - osfs->os_bavail -= min(osfs->os_bavail, - (filter->fo_tot_dirty + filter->fo_tot_pending + - osfs->os_bsize -1) >> blockbits); - - RETURN(rc); -} - static int filter_get_info(struct obd_export *exp, __u32 keylen, void *key, __u32 *vallen, void *val) { @@ -3074,15 +3107,31 @@ static int __init obdfilter_init(void) lprocfs_init_vars(filter, &lvars); + OBD_ALLOC(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); + if (obdfilter_created_scratchpad == NULL) { + CERROR ("Can't allocate scratchpad\n"); + return -ENOMEM; + } + rc = class_register_type(&filter_obd_ops, NULL, lvars.module_vars, OBD_FILTER_DEVICENAME); - if (rc) + if (rc) { + OBD_FREE(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); return rc; + } rc = class_register_type(&filter_sanobd_ops, NULL, lvars.module_vars, OBD_FILTER_SAN_DEVICENAME); - if (rc) + if (rc) { class_unregister_type(OBD_FILTER_DEVICENAME); + OBD_FREE(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); + } return rc; } @@ -3090,6 +3139,9 @@ static void __exit obdfilter_exit(void) { class_unregister_type(OBD_FILTER_SAN_DEVICENAME); class_unregister_type(OBD_FILTER_DEVICENAME); + OBD_FREE(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index f6e0628..06b802a 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -34,7 +34,6 @@ /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ #define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8) -#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long)) #define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ @@ -90,6 +89,11 @@ enum { #define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */ +/* We have to pass a 'created' array to fsfilt_map_inode_pages() which we + * then ignore. So we pre-allocate one that everyone can use... */ +#define OBDFILTER_CREATED_SCRATCHPAD_ENTRIES 1024 +extern int *obdfilter_created_scratchpad; + /* filter.c */ void f_dput(struct dentry *); struct dentry *filter_id2dentry(struct obd_device *, struct dentry *dir, diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index e4cd7a9..21d7464 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -37,6 +37,8 @@ #include #include "filter_internal.h" +int *obdfilter_created_scratchpad; + static int filter_alloc_dio_page(struct obd_device *obd, struct inode *inode, struct niobuf_local *lnb) @@ -258,7 +260,6 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, return grant; } - static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_remote *nb, @@ -271,7 +272,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, struct niobuf_local *lnb; struct dentry *dentry = NULL; struct inode *inode; - void *iobuf = NULL; + void *iobuf = NULL; int rc = 0, i, tot_bytes = 0; unsigned long now = jiffies; ENTRY; @@ -286,7 +287,6 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, filter_grant_incoming(exp, oa); oa->o_grant = 0; - spin_unlock(&obd->obd_osfs_lock); } @@ -309,11 +309,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, inode = dentry->d_inode; - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow preprw_read setup %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "preprw_read setup: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "preprw_read setup"); for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt; i++, rnb++, lnb++) { @@ -347,11 +343,7 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, filter_iobuf_add_page(obd, iobuf, inode, lnb->page); } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow start_page_read %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "start_page_read: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "start_page_read"); rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp, NULL, NULL, NULL); @@ -543,11 +535,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, fso.fso_dentry = dentry; fso.fso_bufcnt = obj->ioo_bufcnt; - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow preprw_write setup %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "preprw_write setup: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "preprw_write setup"); spin_lock(&exp->exp_obd->obd_osfs_lock); if (oa) @@ -562,6 +550,10 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, if (oa && oa->o_valid & OBD_MD_FLGRANT) oa->o_grant = filter_grant(exp,oa->o_grant,oa->o_undirty,left); + /* We're finishing using body->oa as an input variable, so reset + * o_valid here. */ + oa->o_valid = 0; + spin_unlock(&exp->exp_obd->obd_osfs_lock); if (rc) @@ -610,11 +602,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp, NULL, NULL, NULL); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow start_page_write %lus\n", (jiffies - now) / HZ); - else - CDEBUG(D_INFO, "start_page_write: %lu jiffies\n", - (jiffies - now)); + fsfilt_check_slow(now, obd_timeout, "start_page_write"); lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES, tot_bytes); diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 6029329..43bf78d 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -134,7 +134,7 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf, { struct obd_device *obd = exp->exp_obd; struct inode *inode = dchild->d_inode; - struct kiobuf *iobuf = buf; + struct kiobuf *iobuf = buf; int rc, create = (rw == OBD_BRW_WRITE), *created = NULL, committed = 0; int blocks_per_page = PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0; struct semaphore *sem = NULL; @@ -148,9 +148,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf, if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS) GOTO(cleanup, rc = -EINVAL); - OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page); - if (created == NULL) - GOTO(cleanup, rc = -ENOMEM); + if (iobuf->nr_pages * blocks_per_page > + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES) + GOTO(cleanup, rc = -EINVAL); + cleanup_phase = 1; rc = lock_kiovec(1, &iobuf, 1); @@ -164,8 +165,8 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf, } rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist, - iobuf->nr_pages, iobuf->blocks, created, - create, sem); + iobuf->nr_pages, iobuf->blocks, + obdfilter_created_scratchpad, create, sem); if (rc) GOTO(cleanup, rc); @@ -244,8 +245,6 @@ cleanup: case 2: unlock_kiovec(1, &iobuf); case 1: - OBD_FREE(created, sizeof(*created) * - iobuf->nr_pages*blocks_per_page); case 0: if (cleanup_phase != 3 && rw == OBD_BRW_WRITE) up(&inode->i_sem); @@ -279,7 +278,6 @@ int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) return 1; } - /* some kernels require alloc_kiovec callers to zero members through the use of * map_user_kiobuf and unmap_.. we don't use those, so we have a little helper * that makes sure we don't break the rules. */ @@ -377,13 +375,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, /* If overwriting an existing block, we don't need a grant */ if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && - filter_range_is_mapped(inode, lnb->offset, lnb->len)) + filter_range_is_mapped(inode, lnb->offset, lnb->len)) lnb->rc = 0; if (lnb->rc) /* ENOSPC, network RPC error */ continue; filter_iobuf_add_page(obd, iobuf, inode, lnb->page); + /* We expect these pages to be in offset order, but we'll * be forgiving */ this_size = lnb->offset + lnb->len; @@ -406,8 +405,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, GOTO(cleanup, rc); } - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "brw_start"); iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); /* filter_direct_io drops i_sem */ @@ -416,16 +414,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (rc == 0) obdo_from_inode(oa, inode, FILTER_VALID_FLAGS); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "direct_io"); err = fsfilt_commit_wait(obd, inode, wait_handle); if (err) rc = err; - if (obd_sync_filter) + if (obd_sync_filter && !err) LASSERT(oti->oti_transno <= obd->obd_last_committed); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "commitrw commit"); cleanup: filter_grant_commit(exp, niocount, res); diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index d807f76..7237988 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -41,17 +41,15 @@ /* 512byte block min */ #define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512) struct dio_request { - atomic_t numreqs; /* number of reqs being processed */ - struct bio *bio_current;/* bio currently being constructed */ - struct bio *bio_list; /* list of completed bios */ + atomic_t dr_numreqs; /* number of reqs being processed */ + struct bio *dr_bios; /* list of completed bios */ wait_queue_head_t dr_wait; - int dr_num_pages; - int dr_rw; - int dr_error; - int dr_created[MAX_BLOCKS_PER_PAGE]; - unsigned long dr_blocks[MAX_BLOCKS_PER_PAGE]; - spinlock_t dr_lock; - + int dr_max_pages; + int dr_npages; + int dr_error; + struct page **dr_pages; + unsigned long *dr_blocks; + spinlock_t dr_lock; }; static int dio_complete_routine(struct bio *bio, unsigned int done, int error) @@ -60,13 +58,15 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error) unsigned long flags; spin_lock_irqsave(&dreq->dr_lock, flags); - bio->bi_private = dreq->bio_list; - dreq->bio_list = bio; - spin_unlock_irqrestore(&dreq->dr_lock, flags); - if (atomic_dec_and_test(&dreq->numreqs)) - wake_up(&dreq->dr_wait); + bio->bi_private = dreq->dr_bios; + dreq->dr_bios = bio; if (dreq->dr_error == 0) dreq->dr_error = error; + spin_unlock_irqrestore(&dreq->dr_lock, flags); + + if (atomic_dec_and_test(&dreq->dr_numreqs)) + wake_up(&dreq->dr_wait); + return 0; } @@ -79,6 +79,8 @@ static int can_be_merged(struct bio *bio, sector_t sector) size = bio->bi_size >> 9; return bio->bi_sector + size == sector ? 1 : 0; } + + int filter_alloc_iobuf(int rw, int num_pages, void **ret) { struct dio_request *dreq; @@ -87,30 +89,52 @@ int filter_alloc_iobuf(int rw, int num_pages, void **ret) OBD_ALLOC(dreq, sizeof(*dreq)); if (dreq == NULL) - RETURN(-ENOMEM); - - dreq->bio_list = NULL; + goto failed_0; + + OBD_ALLOC(dreq->dr_pages, num_pages * sizeof(*dreq->dr_pages)); + if (dreq->dr_pages == NULL) + goto failed_1; + + OBD_ALLOC(dreq->dr_blocks, + MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks)); + if (dreq->dr_blocks == NULL) + goto failed_2; + + dreq->dr_bios = NULL; init_waitqueue_head(&dreq->dr_wait); - atomic_set(&dreq->numreqs, 0); + atomic_set(&dreq->dr_numreqs, 0); spin_lock_init(&dreq->dr_lock); - dreq->dr_num_pages = num_pages; - dreq->dr_rw = rw; + dreq->dr_max_pages = num_pages; + dreq->dr_npages = 0; *ret = dreq; RETURN(0); + + failed_2: + OBD_FREE(dreq->dr_pages, + num_pages * sizeof(*dreq->dr_pages)); + failed_1: + OBD_FREE(dreq, sizeof(*dreq)); + failed_0: + RETURN(-ENOMEM); } void filter_free_iobuf(void *iobuf) { struct dio_request *dreq = iobuf; + int num_pages = dreq->dr_max_pages; /* free all bios */ - while (dreq->bio_list) { - struct bio *bio = dreq->bio_list; - dreq->bio_list = bio->bi_private; + while (dreq->dr_bios) { + struct bio *bio = dreq->dr_bios; + dreq->dr_bios = bio->bi_private; bio_put(bio); } + OBD_FREE(dreq->dr_blocks, + MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks)); + OBD_FREE(dreq->dr_pages, + num_pages * sizeof(*dreq->dr_pages)); OBD_FREE(dreq, sizeof(*dreq)); } @@ -118,57 +142,136 @@ int filter_iobuf_add_page(struct obd_device *obd, void *iobuf, struct inode *inode, struct page *page) { struct dio_request *dreq = iobuf; - int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; - unsigned int len = inode->i_sb->s_blocksize, offs; - struct bio *bio = dreq->bio_current; - sector_t sector; - int k, rc; + + LASSERT (dreq->dr_npages < dreq->dr_max_pages); + dreq->dr_pages[dreq->dr_npages++] = page; + + return 0; +} + +int filter_do_bio(struct obd_device *obd, struct inode *inode, + struct dio_request *dreq, int rw) +{ + int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + struct page **pages = dreq->dr_pages; + int npages = dreq->dr_npages; + unsigned long *blocks = dreq->dr_blocks; + int total_blocks = npages * blocks_per_page; + int sector_bits = inode->i_sb->s_blocksize_bits - 9; + unsigned int blocksize = inode->i_sb->s_blocksize; + struct bio *bio = NULL; + struct page *page; + unsigned int page_offset; + sector_t sector; + int nblocks; + int block_idx; + int page_idx; + int i; + int rc = 0; ENTRY; - /* get block number for next page */ - rc = fsfilt_map_inode_pages(obd, inode, &page, 1, dreq->dr_blocks, - dreq->dr_created, - dreq->dr_rw == OBD_BRW_WRITE, NULL); - if (rc) - RETURN(rc); + LASSERT(dreq->dr_npages == npages); + LASSERT(total_blocks <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES); - for (k = 0, offs = 0; k < blocks_per_page; k++, offs += len) { - if (dreq->dr_created[k] == -1) { - memset(kmap(page) + offs, 0, len); - kunmap(page); - continue; - } + for (page_idx = 0, block_idx = 0; + page_idx < npages; + page_idx++, block_idx += blocks_per_page) { + + page = pages[page_idx]; + LASSERT (block_idx + blocks_per_page <= total_blocks); + + for (i = 0, page_offset = 0; + i < blocks_per_page; + i += nblocks, page_offset += blocksize * nblocks) { + + nblocks = 1; - sector = dreq->dr_blocks[k] <<(inode->i_sb->s_blocksize_bits-9); - - if (!bio || !can_be_merged(bio, sector) || - !bio_add_page(bio, page, len, offs)) { - if (bio) { - atomic_inc(&dreq->numreqs); - /* FIXME - filter_tally_write(&obd->u.filter,dreq->maplist, - dreq->nr_pages,dreq->blocks, - blocks_per_page); - */ - fsfilt_send_bio(dreq->dr_rw, obd, inode, bio); - dreq->bio_current = bio = NULL; + if (blocks[block_idx + i] == 0) { /* hole */ + LASSERT(rw == OBD_BRW_READ); + memset(kmap(page) + page_offset, 0, blocksize); + kunmap(page); + continue; } + + sector = blocks[block_idx + i] << sector_bits; + + /* Additional contiguous file blocks? */ + while (i + nblocks < blocks_per_page && + (sector + nblocks*(blocksize>>9)) == + (blocks[block_idx + i + nblocks] << sector_bits)) + nblocks++; + + if (bio != NULL && + can_be_merged(bio, sector) && + bio_add_page(bio, page, + blocksize * nblocks, page_offset) != 0) + continue; /* added this frag OK */ + + if (bio != NULL) { + request_queue_t *q = bdev_get_queue(bio->bi_bdev); + + /* Dang! I have to fragment this I/O */ + CDEBUG(D_INODE, "bio++ sz %d vcnt %d(%d) " + "sectors %d(%d) psg %d(%d) hsg %d(%d)\n", + bio->bi_size, + bio->bi_vcnt, bio->bi_max_vecs, + bio->bi_size >> 9, q->max_sectors, + bio_phys_segments(q, bio), + q->max_phys_segments, + bio_hw_segments(q, bio), + q->max_hw_segments); + + atomic_inc(&dreq->dr_numreqs); + rc = fsfilt_send_bio(rw, obd, inode, bio); + if (rc < 0) { + CERROR("Can't send bio: %d\n", rc); + /* OK do dec; we do the waiting */ + atomic_dec(&dreq->dr_numreqs); + goto out; + } + rc = 0; + + bio = NULL; + } + /* allocate new bio */ - dreq->bio_current = bio = - bio_alloc(GFP_NOIO, dreq->dr_num_pages * - blocks_per_page); + bio = bio_alloc(GFP_NOIO, + (npages - page_idx) * blocks_per_page); + if (bio == NULL) { + CERROR ("Can't allocate bio\n"); + rc = -ENOMEM; + goto out; + } + bio->bi_bdev = inode->i_sb->s_bdev; bio->bi_sector = sector; bio->bi_end_io = dio_complete_routine; bio->bi_private = dreq; - if (!bio_add_page(bio, page, len, offs)) - LBUG(); + rc = bio_add_page(bio, page, + blocksize * nblocks, page_offset); + LASSERT (rc != 0); } } - dreq->dr_num_pages--; - RETURN(0); + if (bio != NULL) { + atomic_inc(&dreq->dr_numreqs); + rc = fsfilt_send_bio(rw, obd, inode, bio); + if (rc >= 0) { + rc = 0; + } else { + CERROR("Can't send bio: %d\n", rc); + /* OK do dec; we do the waiting */ + atomic_dec(&dreq->dr_numreqs); + } + } + + out: + wait_event(dreq->dr_wait, atomic_read(&dreq->dr_numreqs) == 0); + + if (rc == 0) + rc = dreq->dr_error; + RETURN(rc); } static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) @@ -197,62 +300,74 @@ int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, struct obd_export *exp, struct iattr *attr, struct obd_trans_info *oti, void **wait_handle) { - struct dio_request *dreq = iobuf; + struct obd_device *obd = exp->exp_obd; struct inode *inode = dchild->d_inode; - int rc; + struct dio_request *dreq = iobuf; + int rc, rc2; ENTRY; LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); + LASSERTF(dreq->dr_npages <= dreq->dr_max_pages, "%d,%d\n", + dreq->dr_npages, dreq->dr_max_pages); + + if (dreq->dr_npages == 0) + RETURN(0); + + if (dreq->dr_npages > OBDFILTER_CREATED_SCRATCHPAD_ENTRIES) + RETURN(-EINVAL); + + rc = fsfilt_map_inode_pages(obd, inode, + dreq->dr_pages, dreq->dr_npages, + dreq->dr_blocks, + obdfilter_created_scratchpad, + rw == OBD_BRW_WRITE, NULL); + + if (rw == OBD_BRW_WRITE) { + if (rc == 0) { +#if 0 + filter_tally_write(&obd->u.filter, + dreq->dr_pages, + dreq->dr_page_idx, + dreq->dr_blocks, + blocks_per_page); +#endif + if (attr->ia_size > inode->i_size) + attr->ia_valid |= ATTR_SIZE; + rc = fsfilt_setattr(obd, dchild, + oti->oti_handle, attr, 0); + } + + up(&inode->i_sem); + + rc2 = filter_finish_transno(exp, oti, 0); + if (rc2 != 0) + CERROR("can't close transaction: %d\n", rc); + + if (rc == 0) + rc = rc2; + if (rc != 0) + RETURN(rc); + } /* This is nearly osync_inode, without the waiting rc = generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA); */ rc = filemap_fdatawrite(inode->i_mapping); + rc2 = sync_mapping_buffers(inode->i_mapping); if (rc == 0) - rc = sync_mapping_buffers(inode->i_mapping); + rc = rc2; + rc2 = filemap_fdatawait(inode->i_mapping); if (rc == 0) - rc = filemap_fdatawait(inode->i_mapping); - if (rc < 0) - GOTO(cleanup, rc); + rc = rc2; - if (rw == OBD_BRW_WRITE) - up(&inode->i_sem); + if (rc != 0) + RETURN(rc); /* be careful to call this after fsync_inode_data_buffers has waited * for IO to complete before we evict it from the cache */ filter_clear_page_cache(inode, iobuf); - if (dreq->bio_current != NULL) { - atomic_inc(&dreq->numreqs); - fsfilt_send_bio(rw, exp->exp_obd, inode, dreq->bio_current); - dreq->bio_current = NULL; - } - - /* time to wait for I/O completion */ - wait_event(dreq->dr_wait, atomic_read(&dreq->numreqs) == 0); - - rc = dreq->dr_error; - if (rw == OBD_BRW_WRITE && rc == 0) { - /* FIXME: - filter_tally_write(&obd->u.filter, dreq->maplist, - dreq->nr_pages, dreq->blocks, - blocks_per_page); - */ - - if (attr->ia_size > inode->i_size) { - CDEBUG(D_INFO, "setting i_size to "LPU64"\n", - attr->ia_size); - - attr->ia_valid |= ATTR_SIZE; - down(&inode->i_sem); - fsfilt_setattr(exp->exp_obd, dchild, oti->oti_handle, - attr, 0); - up(&inode->i_sem); - } - } - -cleanup: - RETURN(rc); + RETURN(filter_do_bio(obd, inode, dreq, rw)); } /* See if there are unallocated parts in given file region */ @@ -290,7 +405,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, unsigned long now = jiffies; int i, err, cleanup_phase = 0; struct obd_device *obd = exp->exp_obd; - + int total_size = 0; ENTRY; LASSERT(oti != NULL); @@ -299,38 +414,16 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (rc != 0) GOTO(cleanup, rc); - - inode = res->dentry->d_inode; - + rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, (void **)&dreq); if (rc) GOTO(cleanup, rc); - cleanup_phase = 1; + fso.fso_dentry = res->dentry; fso.fso_bufcnt = obj->ioo_bufcnt; + inode = res->dentry->d_inode; - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - cleanup_phase = 2; - - generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA); - - oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res, - oti); - if (IS_ERR(oti->oti_handle)) { - rc = PTR_ERR(oti->oti_handle); - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error starting transaction: rc = %d\n", rc); - oti->oti_handle = NULL; - GOTO(cleanup, rc); - } - - /* have to call fsfilt_commit() from this point on */ - - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow brw_start %lus\n", (jiffies - now) / HZ); - - down(&inode->i_sem); for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { loff_t this_size; @@ -339,14 +432,15 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, filter_range_is_mapped(inode, lnb->offset, lnb->len)) lnb->rc = 0; - if (lnb->rc) /* ENOSPC, network RPC error, etc. */ + if (lnb->rc) { /* ENOSPC, network RPC error, etc. */ + CDEBUG(D_INODE, "Skipping [%d] == %d\n", i, lnb->rc); continue; + } err = filter_iobuf_add_page(obd, dreq, inode, lnb->page); - if (err != 0) { - lnb->rc = err; - continue; - } + LASSERT (err == 0); + + total_size += lnb->len; /* we expect these pages to be in offset order, but we'll * be forgiving */ @@ -354,26 +448,48 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (this_size > iattr.ia_size) iattr.ia_size = this_size; } +#if 0 + /* I use this when I'm checking our lovely 1M I/Os reach the disk -eeb */ + if (total_size != (1<<20)) + CWARN("total size %d (%d pages)\n", + total_size, total_size/PAGE_SIZE); +#endif + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + cleanup_phase = 2; + + down(&inode->i_sem); + oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res, + oti); + if (IS_ERR(oti->oti_handle)) { + up(&inode->i_sem); + rc = PTR_ERR(oti->oti_handle); + CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, + "error starting transaction: rc = %d\n", rc); + oti->oti_handle = NULL; + GOTO(cleanup, rc); + } + /* have to call fsfilt_commit() from this point on */ + + fsfilt_check_slow(now, obd_timeout, "brw_start"); iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); + /* filter_direct_io drops i_sem */ rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, dreq, exp, &iattr, oti, NULL); - rc = filter_finish_transno(exp, oti, rc); - - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow direct_io %lus\n", (jiffies - now) / HZ); + if (rc == 0) + obdo_from_inode(oa, inode, FILTER_VALID_FLAGS); + fsfilt_check_slow(now, obd_timeout, "direct_io"); err = fsfilt_commit(obd, obd->u.filter.fo_sb, inode, oti->oti_handle, obd_sync_filter); if (err) rc = err; - if (obd_sync_filter) + if (obd_sync_filter && !err) LASSERT(oti->oti_transno <= obd->obd_last_committed); - if (time_after(jiffies, now + 15 * HZ)) - CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ); + fsfilt_check_slow(now, obd_timeout, "commitrw commit"); cleanup: filter_grant_commit(exp, niocount, res); diff --git a/lustre/obdfilter/filter_lvb.c b/lustre/obdfilter/filter_lvb.c index 0b569c3..c54b1d7 100644 --- a/lustre/obdfilter/filter_lvb.c +++ b/lustre/obdfilter/filter_lvb.c @@ -153,13 +153,18 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m, lvb->lvb_mtime, new->lvb_mtime); lvb->lvb_mtime = new->lvb_mtime; } - if (new->lvb_blocks > lvb->lvb_blocks || !increase) { - CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb blocks: " + if (new->lvb_atime > lvb->lvb_atime || !increase) { + CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime: " LPU64" -> "LPU64"\n", res->lr_name.name[0], - lvb->lvb_blocks, new->lvb_blocks); - lvb->lvb_blocks = new->lvb_blocks; + lvb->lvb_atime, new->lvb_atime); + lvb->lvb_atime = new->lvb_atime; + } + if (new->lvb_ctime > lvb->lvb_ctime || !increase) { + CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime: " + LPU64" -> "LPU64"\n", res->lr_name.name[0], + lvb->lvb_ctime, new->lvb_ctime); + lvb->lvb_ctime = new->lvb_ctime; } - GOTO(out, rc = 0); } /* Update the LVB from the disk inode */ @@ -194,6 +199,18 @@ static int filter_lvbo_update(struct ldlm_resource *res, struct lustre_msg *m, lvb->lvb_mtime, LTIME_S(dentry->d_inode->i_mtime)); lvb->lvb_mtime = LTIME_S(dentry->d_inode->i_mtime); } + if (LTIME_S(dentry->d_inode->i_atime) > lvb->lvb_atime || !increase) { + CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb atime from disk: " + LPU64" -> %lu\n", res->lr_name.name[0], + lvb->lvb_atime, LTIME_S(dentry->d_inode->i_atime)); + lvb->lvb_atime = LTIME_S(dentry->d_inode->i_atime); + } + if (LTIME_S(dentry->d_inode->i_ctime) > lvb->lvb_ctime || !increase) { + CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb ctime from disk: " + LPU64" -> %lu\n", res->lr_name.name[0], + lvb->lvb_ctime, LTIME_S(dentry->d_inode->i_ctime)); + lvb->lvb_ctime = LTIME_S(dentry->d_inode->i_ctime); + } CDEBUG(D_DLMTRACE, "res: "LPU64" updating lvb blocks from disk: " LPU64" -> %lu\n", res->lr_name.name[0], lvb->lvb_blocks, dentry->d_inode->i_blocks); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index fdbdfde..ef632a9 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -240,7 +240,7 @@ static int filter_brw_stats_seq_show(struct seq_file *seq, void *v) /* this sampling races with updates */ - seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n", + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 4ac9a77..4f0035d 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -166,41 +166,6 @@ int osc_rd_cur_grant_bytes(char *page, char **start, off_t off, int count, return rc; } -int osc_rd_create_low_wm(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = data; - - if (obd == NULL) - return 0; - - return snprintf(page, count, "%d\n", - obd->u.cli.cl_oscc.oscc_kick_barrier); -} - -int osc_wr_create_low_wm(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - struct obd_device *obd = data; - int val, rc; - - if (obd == NULL) - return 0; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - if (val < 0) - return -ERANGE; - - spin_lock(&obd->obd_dev_lock); - obd->u.cli.cl_oscc.oscc_kick_barrier = val; - spin_unlock(&obd->obd_dev_lock); - - return count; -} - int osc_rd_create_count(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -276,7 +241,6 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "max_dirty_mb", osc_rd_max_dirty_mb, osc_wr_max_dirty_mb, 0 }, { "cur_dirty_bytes", osc_rd_cur_dirty_bytes, 0, 0 }, { "cur_grant_bytes", osc_rd_cur_grant_bytes, 0, 0 }, - {"create_low_watermark", osc_rd_create_low_wm, osc_wr_create_low_wm, 0}, { "create_count", osc_rd_create_count, osc_wr_create_count, 0 }, { "prealloc_next_id", osc_rd_prealloc_next_id, 0, 0 }, { "prealloc_last_id", osc_rd_prealloc_last_id, 0, 0 }, @@ -307,7 +271,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) spin_lock_irqsave(&cli->cl_loi_list_lock, flags); - seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n", + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "read RPCs in flight: %d\n", diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 557923b..184df58 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -56,8 +56,7 @@ #include #include "osc_internal.h" -static int osc_interpret_create(struct ptlrpc_request *req, void *data, - int rc) +static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) { struct osc_creator *oscc; struct ost_body *body = NULL; @@ -73,19 +72,32 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, oscc = req->rq_async_args.pointer_arg[0]; spin_lock(&oscc->oscc_lock); oscc->oscc_flags &= ~OSCC_FLAG_CREATING; - if (body) - oscc->oscc_last_id = body->oa.o_id; - if (rc == -ENOSPC) { + if (rc == -ENOSPC || rc == -EROFS) { oscc->oscc_flags |= OSCC_FLAG_NOSPC; + if (body && rc == -ENOSPC) { + oscc->oscc_grow_count = OST_MIN_PRECREATE; + oscc->oscc_last_id = body->oa.o_id; + } spin_unlock(&oscc->oscc_lock); DEBUG_REQ(D_INODE, req, "OST out of space, flagging"); } else if (rc != 0 && rc != -EIO) { oscc->oscc_flags |= OSCC_FLAG_RECOVERING; + oscc->oscc_grow_count = OST_MIN_PRECREATE; spin_unlock(&oscc->oscc_lock); DEBUG_REQ(D_ERROR, req, "unknown rc %d from async create: failing oscc", rc); ptlrpc_fail_import(req->rq_import, req->rq_import_generation); } else { + if (rc == 0) { + oscc->oscc_flags &= ~OSCC_FLAG_LOW; + if (body) { + int diff = body->oa.o_id - oscc->oscc_last_id; + if (diff != oscc->oscc_grow_count) + oscc->oscc_grow_count = + max(diff/3, OST_MIN_PRECREATE); + oscc->oscc_last_id = body->oa.o_id; + } + } spin_unlock(&oscc->oscc_lock); } @@ -104,6 +116,17 @@ static int oscc_internal_create(struct osc_creator *oscc) ENTRY; spin_lock(&oscc->oscc_lock); + if (oscc->oscc_grow_count < OST_MAX_PRECREATE && + !(oscc->oscc_flags & (OSCC_FLAG_LOW | OSCC_FLAG_RECOVERING)) && + (__s64)(oscc->oscc_last_id - oscc->oscc_next_id) <= + (oscc->oscc_grow_count / 4 + 1)) { + oscc->oscc_flags |= OSCC_FLAG_LOW; + oscc->oscc_grow_count *= 2; + } + + if (oscc->oscc_grow_count > OST_MAX_PRECREATE / 2) + oscc->oscc_grow_count = OST_MAX_PRECREATE / 2; + if (oscc->oscc_flags & OSCC_FLAG_CREATING || oscc->oscc_flags & OSCC_FLAG_RECOVERING) { spin_unlock(&oscc->oscc_lock); @@ -185,7 +208,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait) int rc = 0; ENTRY; - if (oscc_has_objects(oscc, oscc->oscc_kick_barrier)) + if (oscc_has_objects(oscc, oscc->oscc_grow_count / 2)) RETURN(0); if (!wait) @@ -203,7 +226,7 @@ static int oscc_precreate(struct osc_creator *oscc, int wait) RETURN(rc); } -int oscc_recovering(struct osc_creator *oscc) +int oscc_recovering(struct osc_creator *oscc) { int recov = 0; @@ -263,8 +286,8 @@ int osc_create(struct obd_export *exp, struct obdo *oa, oa->o_valid |= OBD_MD_FLID; oa->o_id = oscc->oscc_next_id - 1; - CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", - exp->exp_obd->obd_name, oa->o_id); + CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", + oscc->oscc_obd->obd_name, oa->o_id); rc = osc_real_create(exp, oa, ea, NULL); if (oscc->oscc_obd == NULL) { @@ -279,14 +302,14 @@ int osc_create(struct obd_export *exp, struct obdo *oa, oscc->oscc_flags |= OSCC_FLAG_NOSPC; oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; oscc->oscc_last_id = oa->o_id; - - CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", - exp->exp_obd->obd_name, rc); + + CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", + oscc->oscc_obd->obd_name, rc); wake_up(&oscc->oscc_waitq); } else { - CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", - exp->exp_obd->obd_name, rc); + CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", + oscc->oscc_obd->obd_name, rc); } spin_unlock(&oscc->oscc_lock); @@ -323,6 +346,11 @@ int osc_create(struct obd_export *exp, struct obdo *oa, } spin_lock(&oscc->oscc_lock); + if (oscc->oscc_flags & OSCC_FLAG_EXITING) { + spin_unlock(&oscc->oscc_lock); + break; + } + if (oscc->oscc_last_id >= oscc->oscc_next_id) { memcpy(oa, &oscc->oscc_oa, sizeof(*oa)); oa->o_id = oscc->oscc_next_id; @@ -339,7 +367,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa, } spin_unlock(&oscc->oscc_lock); rc = oscc_precreate(oscc, try_again); - if (rc == -EIO) + if (rc) break; } @@ -367,8 +395,8 @@ void oscc_init(struct obd_device *obd) spin_lock_init(&oscc->oscc_lock); oscc->oscc_obd = obd; oscc->oscc_kick_barrier = 100; - oscc->oscc_grow_count = 36; oscc->oscc_max_grow_count = 2000; + oscc->oscc_grow_count = OST_MIN_PRECREATE; oscc->oscc_next_id = 2; oscc->oscc_last_id = 1; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 592c3d0..b3d69a9 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -37,6 +37,10 @@ struct osc_async_page { void *oap_caller_data; }; +#define OAP_FROM_COOKIE(c) \ + (LASSERT(((struct osc_async_page *)(c))->oap_magic == OAP_MAGIC), \ + (struct osc_async_page *)(c)) + struct osc_cache_waiter { struct list_head ocw_entry; wait_queue_head_t ocw_waitq; @@ -44,10 +48,12 @@ struct osc_cache_waiter { int ocw_rc; }; -#define OSCC_FLAG_RECOVERING 1 -#define OSCC_FLAG_CREATING 2 -#define OSCC_FLAG_NOSPC 4 /* can't create more objects on this OST */ -#define OSCC_FLAG_SYNC_IN_PROGRESS 8 /* only allow one thread to sync */ +#define OSCC_FLAG_RECOVERING 0x01 +#define OSCC_FLAG_CREATING 0x02 +#define OSCC_FLAG_NOSPC 0x04 /* can't create more objects on OST */ +#define OSCC_FLAG_SYNC_IN_PROGRESS 0x08 /* only allow one thread to sync */ +#define OSCC_FLAG_LOW 0x10 +#define OSCC_FLAG_EXITING 0x20 int osc_create(struct obd_export *exp, struct obdo *oa, struct lov_stripe_md **ea, struct obd_trans_info *oti); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index b41258e..880a59d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1037,7 +1037,7 @@ static obd_count check_elan_limit(struct brw_page *pg, obd_count pages) } static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, obd_count page_count, + struct lov_stripe_md *lsm, obd_count page_count, struct brw_page *pga, struct obd_trans_info *oti) { ENTRY; @@ -1064,7 +1064,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, sort_brw_pages(pga, pages_per_brw); pages_per_brw = check_elan_limit(pga, pages_per_brw); - rc = osc_brw_internal(cmd, exp, oa, md, pages_per_brw, pga); + rc = osc_brw_internal(cmd, exp, oa, lsm, pages_per_brw, pga); if (rc != 0) RETURN(rc); @@ -1076,7 +1076,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, } static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *md, obd_count page_count, + struct lov_stripe_md *lsm, obd_count page_count, struct brw_page *pga, struct ptlrpc_request_set *set, struct obd_trans_info *oti) { @@ -1104,7 +1104,7 @@ static int osc_brw_async(int cmd, struct obd_export *exp, struct obdo *oa, sort_brw_pages(pga, pages_per_brw); pages_per_brw = check_elan_limit(pga, pages_per_brw); - rc = async_internal(cmd, exp, oa, md, pages_per_brw, pga, set); + rc = async_internal(cmd, exp, oa, lsm, pages_per_brw, pga, set); if (rc != 0) RETURN(rc); @@ -1819,14 +1819,6 @@ int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, RETURN(0); } -struct osc_async_page *oap_from_cookie(void *cookie) -{ - struct osc_async_page *oap = cookie; - if (oap->oap_magic != OAP_MAGIC) - return ERR_PTR(-EINVAL); - return oap; -}; - static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, struct lov_oinfo *loi, void *cookie, int cmd, obd_off off, int count, @@ -1838,9 +1830,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, int rc; ENTRY; - oap = oap_from_cookie(cookie); - if (IS_ERR(oap)) - RETURN(PTR_ERR(oap)); + oap = OAP_FROM_COOKIE(cookie); if (cli->cl_import == NULL || cli->cl_import->imp_invalid) RETURN(-EIO); @@ -1902,9 +1892,7 @@ static int osc_set_async_flags(struct obd_export *exp, int rc = 0; ENTRY; - oap = oap_from_cookie(cookie); - if (IS_ERR(oap)) - RETURN(PTR_ERR(oap)); + oap = OAP_FROM_COOKIE(cookie); if (cli->cl_import == NULL || cli->cl_import->imp_invalid) RETURN(-EIO); @@ -1956,9 +1944,7 @@ static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm, struct loi_oap_pages *lop; ENTRY; - oap = oap_from_cookie(cookie); - if (IS_ERR(oap)) - RETURN(PTR_ERR(oap)); + oap = OAP_FROM_COOKIE(cookie); if (cli->cl_import == NULL || cli->cl_import->imp_invalid) RETURN(-EIO); @@ -2045,9 +2031,7 @@ static int osc_teardown_async_page(struct obd_export *exp, int rc = 0; ENTRY; - oap = oap_from_cookie(cookie); - if (IS_ERR(oap)) - RETURN(PTR_ERR(oap)); + oap = OAP_FROM_COOKIE(cookie); if (loi == NULL) loi = &lsm->lsm_oinfo[0]; @@ -2377,6 +2361,8 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data) if (lock->l_ast_data && lock->l_ast_data != data) { struct inode *new_inode = data; struct inode *old_inode = lock->l_ast_data; + if (!(old_inode->i_state & I_FREEING)) + LDLM_ERROR(lock, "inconsistent l_ast_data found"); LASSERTF(old_inode->i_state & I_FREEING, "Found existing inode %p/%lu/%u state %lu in lock: " "setting data to %p/%lu/%u\n", old_inode, @@ -2490,6 +2476,30 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, } } + if (mode == LCK_PW) { + rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type, + policy, LCK_PR, lockh); + if (rc == 1) { + rc = ldlm_cli_convert(lockh, mode, flags); + if (!rc) { + /* Update readers/writers accounting */ + ldlm_lock_addref(lockh, LCK_PW); + ldlm_lock_decref(lockh, LCK_PR); + osc_set_data_with_check(lockh, data); + RETURN(ELDLM_OK); + } + /* If the conversion failed, we need to drop refcount + on matched lock before we get new one */ + /* XXX Won't it save us some efforts if we cancel PR + lock here? We are going to take PW lock anyway and it + will invalidate PR lock */ + ldlm_lock_decref(lockh, LCK_PR); + if (rc != EDEADLOCK) { + RETURN(rc); + } + } + } + no_match: if (*flags & LDLM_FL_HAS_INTENT) { int size[2] = {0, sizeof(struct ldlm_request)}; @@ -2881,6 +2891,16 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, RETURN(0); } + if (keylen == strlen("async") && memcmp(key, "async", keylen) == 0) { + struct client_obd *cl = &obd->u.cli; + if (vallen != sizeof(int)) + RETURN(-EINVAL); + cl->cl_async = *(int *)val; + CDEBUG(D_HA, "%s: set async = %d\n", + obd->obd_name, cl->cl_async); + RETURN(0); + } + if (keylen == strlen("sec") && memcmp(key, "sec", keylen) == 0) { struct client_obd *cli = &exp->exp_obd->u.cli; @@ -2906,8 +2926,7 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, RETURN(-EINVAL); } - if (keylen < strlen("mds_conn") || - memcmp(key, "mds_conn", strlen("mds_conn")) != 0) + if (keylen < strlen("mds_conn") || memcmp(key, "mds_conn", keylen) != 0) RETURN(-EINVAL); ctxt = llog_get_context(&exp->exp_obd->obd_llogs, LLOG_UNLINK_ORIG_CTXT); @@ -3105,6 +3124,7 @@ static int osc_setup(struct obd_device *obd, obd_count len, void *buf) static int osc_cleanup(struct obd_device *obd, int flags) { + struct osc_creator *oscc = &obd->u.cli.cl_oscc; int rc; rc = ldlm_cli_cancel_unused(obd->obd_namespace, NULL, @@ -3112,6 +3132,11 @@ static int osc_cleanup(struct obd_device *obd, int flags) if (rc) RETURN(rc); + spin_lock(&oscc->oscc_lock); + oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING; + oscc->oscc_flags |= OSCC_FLAG_EXITING; + spin_unlock(&oscc->oscc_lock); + rc = client_obd_cleanup(obd, flags); ptlrpcd_decref(); RETURN(rc); diff --git a/lustre/ost/lproc_ost.c b/lustre/ost/lproc_ost.c index 9773af1..beef27a 100644 --- a/lustre/ost/lproc_ost.c +++ b/lustre/ost/lproc_ost.c @@ -52,7 +52,7 @@ static int ost_stimes_seq_show(struct seq_file *seq, void *v) spin_lock(&ost->ost_lock); - seq_printf(seq, "snapshot_time: %lu:%lu (secs:usecs)\n", + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); seq_printf(seq, "\nread rpc service time: (rpcs, average ms)\n"); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index d18919a..42d3156 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -379,9 +379,6 @@ static void ost_stime_record(struct ptlrpc_request *req, struct timeval *start, } } -static char str[PTL_NALFMT_SIZE]; - - static int ost_brw_read(struct ptlrpc_request *req) { struct ptlrpc_bulk_desc *desc; @@ -458,6 +455,9 @@ static int ost_brw_read(struct ptlrpc_request *req) if (rc != 0) GOTO(out_bulk, rc); + /* We're finishing using body->oa as an input variable */ + body->oa.o_valid = 0; + nob = 0; for (i = 0; i < npages; i++) { int page_rc = local_nb[i].rc; @@ -549,17 +549,17 @@ static int ost_brw_read(struct ptlrpc_request *req) } if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) { CERROR("bulk IO comms error: " - "evicting %s@%s nid %s\n", + "evicting %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); ptlrpc_fail_export(req->rq_export); } else { CERROR("ignoring bulk IO comms error: " - "client reconnected %s@%s nid %s\n", + "client reconnected %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); } } @@ -702,18 +702,16 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) obd_count cksum = ost_checksum_bulk(desc); if (client_cksum != cksum) { - CERROR("Bad checksum: client %x, server %x NID %s\n", + CERROR("Bad checksum: client %x, server %x id %s\n", client_cksum, cksum, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); cksum_counter = 1; repbody->oa.o_cksum = cksum; } else { cksum_counter++; if ((cksum_counter & (-cksum_counter)) == cksum_counter) CWARN("Checksum %u from NID %s: %x OK\n", - cksum_counter, - ptlrpc_peernid2str(&req->rq_peer, str), - cksum); + cksum_counter, req->rq_peerstr, cksum); } } #endif @@ -770,19 +768,19 @@ int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) lustre_free_reply_state (req->rq_reply_state); } if (req->rq_reqmsg->conn_cnt == req->rq_export->exp_conn_cnt) { - CERROR("%s: bulk IO comm error evicting %s@%s NID %s\n", + CERROR("%s: bulk IO comm error evicting %s@%s id %s\n", req->rq_export->exp_obd->obd_name, req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); ptlrpc_fail_export(req->rq_export); } else { CERROR("ignoring bulk IO comms error: " - "client reconnected %s@%s nid %s\n", + "client reconnected %s@%s id %s\n", req->rq_export->exp_client_uuid.uuid, req->rq_export->exp_connection->c_remote_uuid.uuid, - ptlrpc_peernid2str(&req->rq_peer, str)); - } + req->rq_peerstr); + } } RETURN(rc); } @@ -916,7 +914,7 @@ static int ost_get_info(struct obd_export *exp, struct ptlrpc_request *req) } static int ost_llog_handle_connect(struct obd_export *exp, - struct ptlrpc_request *req) + struct ptlrpc_request *req) { struct llogd_conn_body *body; int rc; @@ -1020,10 +1018,9 @@ int ost_msg_check_version(struct lustre_msg *msg) int ost_handle(struct ptlrpc_request *req) { - struct obd_trans_info trans_info = { 0, }; - struct obd_trans_info *oti = &trans_info; int should_process, fail = OBD_FAIL_OST_ALL_REPLY_NET, rc = 0; - struct obd_export *exp = NULL; + struct obd_trans_info *oti = NULL; + struct obd_device *obd = NULL; ENTRY; LASSERT(current->journal_info == NULL); @@ -1038,31 +1035,28 @@ int ost_handle(struct ptlrpc_request *req) if (req->rq_reqmsg->opc == SEC_INIT || req->rq_reqmsg->opc == SEC_INIT_CONTINUE || req->rq_reqmsg->opc == SEC_FINI) { - GOTO(out, rc = 0); + RETURN(0); } /* XXX identical to MDS */ if (req->rq_reqmsg->opc != OST_CONNECT) { - struct obd_device *obd; int recovering; - exp = req->rq_export; - - if (exp == NULL) { + if (req->rq_export == NULL) { CDEBUG(D_HA,"operation %d on unconnected OST from %s\n", req->rq_reqmsg->opc, - ptlrpc_peernid2str(&req->rq_peer, str)); + req->rq_peerstr); req->rq_status = -ENOTCONN; - GOTO(out, rc = -ENOTCONN); + GOTO(out_check_req, rc = -ENOTCONN); } - obd = exp->exp_obd; + obd = req->rq_export->exp_obd; /* Check for aborted recovery. */ spin_lock_bh(&obd->obd_processing_task_lock); recovering = obd->obd_recovering; spin_unlock_bh(&obd->obd_processing_task_lock); - if (recovering) { + if (recovering) { rc = ost_filter_recovery_request(req, obd, &should_process); if (rc || !should_process) @@ -1075,100 +1069,100 @@ int ost_handle(struct ptlrpc_request *req) } } + OBD_ALLOC(oti, sizeof(*oti)); + if (oti == NULL) + RETURN(-ENOMEM); + oti_init(oti, req); switch (req->rq_reqmsg->opc) { case OST_CONNECT: { CDEBUG(D_INODE, "connect\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_CONNECT_NET, out_free_oti, rc = 0); rc = target_handle_connect(req); + if (!rc) + obd = req->rq_export->exp_obd; break; } case OST_DISCONNECT: CDEBUG(D_INODE, "disconnect\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_DISCONNECT_NET, out_free_oti, rc = 0); rc = target_handle_disconnect(req); break; case OST_CREATE: CDEBUG(D_INODE, "create\n"); - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC)) - GOTO(out, rc = -ENOSPC); - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS)) - GOTO(out, rc = -EROFS); - OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0); - rc = ost_create(exp, req, oti); + OBD_FAIL_GOTO(OBD_FAIL_OST_ENOSPC, out_check_req, rc = -ENOSPC); + OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS); + OBD_FAIL_GOTO(OBD_FAIL_OST_CREATE_NET, out_free_oti, rc = 0); + rc = ost_create(req->rq_export, req, oti); break; case OST_DESTROY: CDEBUG(D_INODE, "destroy\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0); - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS)) - GOTO(out, rc = -EROFS); - rc = ost_destroy(exp, req, oti); + OBD_FAIL_GOTO(OBD_FAIL_OST_DESTROY_NET, out_free_oti, rc = 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS); + rc = ost_destroy(req->rq_export, req, oti); break; case OST_GETATTR: CDEBUG(D_INODE, "getattr\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0); - rc = ost_getattr(exp, req); + OBD_FAIL_GOTO(OBD_FAIL_OST_GETATTR_NET, out_free_oti, rc = 0); + rc = ost_getattr(req->rq_export, req); break; case OST_SETATTR: CDEBUG(D_INODE, "setattr\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0); - rc = ost_setattr(exp, req, oti); + OBD_FAIL_GOTO(OBD_FAIL_OST_SETATTR_NET, out_free_oti, rc = 0); + rc = ost_setattr(req->rq_export, req, oti); break; case OST_WRITE: CDEBUG(D_INODE, "write\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC)) - GOTO(out, rc = -ENOSPC); - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS)) - GOTO(out, rc = -EROFS); + OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_ENOSPC, out_check_req, rc = -ENOSPC); + OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS); rc = ost_brw_write(req, oti); LASSERT(current->journal_info == NULL); /* ost_brw sends its own replies */ - RETURN(rc); + GOTO(out_free_oti, rc); case OST_READ: CDEBUG(D_INODE, "read\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0); rc = ost_brw_read(req); LASSERT(current->journal_info == NULL); /* ost_brw sends its own replies */ - RETURN(rc); + GOTO(out_free_oti, rc); case OST_SAN_READ: CDEBUG(D_INODE, "san read\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0); rc = ost_san_brw(req, OBD_BRW_READ); /* ost_san_brw sends its own replies */ - RETURN(rc); + GOTO(out_free_oti, rc); case OST_SAN_WRITE: CDEBUG(D_INODE, "san write\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_BRW_NET, out_free_oti, rc = 0); rc = ost_san_brw(req, OBD_BRW_WRITE); /* ost_san_brw sends its own replies */ - RETURN(rc); + GOTO(out_free_oti, rc); case OST_PUNCH: CDEBUG(D_INODE, "punch\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0); - if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_EROFS)) - GOTO(out, rc = -EROFS); - rc = ost_punch(exp, req, oti); + OBD_FAIL_GOTO(OBD_FAIL_OST_PUNCH_NET, out_free_oti, rc = 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_EROFS, out_check_req, rc = -EROFS); + rc = ost_punch(req->rq_export, req, oti); break; case OST_STATFS: CDEBUG(D_INODE, "statfs\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OST_STATFS_NET, out_free_oti, rc = 0); rc = ost_statfs(req); break; case OST_SYNC: CDEBUG(D_INODE, "sync\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_SYNC_NET, 0); - rc = ost_sync(exp, req); + OBD_FAIL_GOTO(OBD_FAIL_OST_SYNC_NET, out_free_oti, rc = 0); + rc = ost_sync(req->rq_export, req); break; case OST_SET_INFO: DEBUG_REQ(D_INODE, req, "set_info"); - rc = ost_set_info(exp, req); + rc = ost_set_info(req->rq_export, req); break; case OST_GET_INFO: DEBUG_REQ(D_INODE, req, "get_info"); - rc = ost_get_info(exp, req); + rc = ost_get_info(req->rq_export, req); break; case OBD_PING: DEBUG_REQ(D_INODE, req, "ping"); @@ -1177,24 +1171,24 @@ int ost_handle(struct ptlrpc_request *req) /* FIXME - just reply status */ case LLOG_ORIGIN_CONNECT: DEBUG_REQ(D_INODE, req, "log connect\n"); - rc = ost_llog_handle_connect(exp, req); + rc = ost_llog_handle_connect(req->rq_export, req); req->rq_status = rc; rc = lustre_pack_reply(req, 0, NULL, NULL); if (rc) - RETURN(rc); - RETURN(ptlrpc_reply(req)); + GOTO(out_free_oti, rc); + GOTO(out_free_oti, rc = ptlrpc_reply(req)); case OBD_LOG_CANCEL: CDEBUG(D_INODE, "log cancel\n"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0); + OBD_FAIL_GOTO(OBD_FAIL_OBD_LOG_CANCEL_NET, out_free_oti, rc = 0); rc = llog_origin_handle_cancel(req); req->rq_status = rc; rc = lustre_pack_reply(req, 0, NULL, NULL); if (rc) - RETURN(rc); - RETURN(ptlrpc_reply(req)); + GOTO(out_free_oti, rc); + GOTO(out_free_oti, rc = ptlrpc_reply(req)); case LDLM_ENQUEUE: CDEBUG(D_INODE, "enqueue\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); + OBD_FAIL_GOTO(OBD_FAIL_LDLM_ENQUEUE, out_free_oti, rc = 0); rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, ldlm_server_blocking_ast, ldlm_server_glimpse_ast); @@ -1202,12 +1196,12 @@ int ost_handle(struct ptlrpc_request *req) break; case LDLM_CONVERT: CDEBUG(D_INODE, "convert\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0); + OBD_FAIL_GOTO(OBD_FAIL_LDLM_CONVERT, out_free_oti, rc = 0); rc = ldlm_handle_convert(req); break; case LDLM_CANCEL: CDEBUG(D_INODE, "cancel\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0); + OBD_FAIL_GOTO(OBD_FAIL_LDLM_CANCEL, out_free_oti, rc = 0); rc = ldlm_handle_cancel(req); break; case LDLM_BL_CALLBACK: @@ -1219,7 +1213,7 @@ int ost_handle(struct ptlrpc_request *req) CERROR("Unexpected opcode %d\n", req->rq_reqmsg->opc); req->rq_status = -ENOTSUPP; rc = ptlrpc_error(req); - RETURN(rc); + GOTO(out_free_oti, rc); } LASSERT(current->journal_info == NULL); @@ -1227,7 +1221,6 @@ int ost_handle(struct ptlrpc_request *req) EXIT; /* If we're DISCONNECTing, the export_data is already freed */ if (!rc && req->rq_reqmsg->opc != OST_DISCONNECT) { - struct obd_device *obd = req->rq_export->exp_obd; if (!obd->obd_no_transno) { req->rq_repmsg->last_committed = obd->obd_last_committed; @@ -1239,13 +1232,12 @@ int ost_handle(struct ptlrpc_request *req) obd->obd_last_committed, req->rq_xid); } -out: +out_check_req: if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { - struct obd_device *obd = req->rq_export->exp_obd; - if (obd && obd->obd_recovering) { DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); - return target_queue_final_reply(req, rc); + rc = target_queue_final_reply(req, rc); + GOTO(out_free_oti, rc); } /* Lost a race with recovery; let the error path DTRT. */ rc = req->rq_status = -ENOTCONN; @@ -1253,9 +1245,13 @@ out: if (!rc) oti_to_request(oti, req); - target_send_reply(req, rc, fail); - return 0; + rc = 0; + +out_free_oti: + if (oti) + OBD_FREE(oti, sizeof(*oti)); + return rc; } EXPORT_SYMBOL(ost_handle); @@ -1293,7 +1289,7 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ost->ost_service = ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, + OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, 30000, ost_handle, "ost", obd->obd_proc_entry); if (ost->ost_service == NULL) { @@ -1308,7 +1304,7 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf) ost->ost_create_service = ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_CREATE_PORTAL, OSC_REPLY_PORTAL, + OST_CREATE_PORTAL, OSC_REPLY_PORTAL, 30000, ost_handle, "ost_create", obd->obd_proc_entry); if (ost->ost_create_service == NULL) { diff --git a/lustre/ptlbd/server.c b/lustre/ptlbd/server.c index e54e5b3..99ddeaa 100644 --- a/lustre/ptlbd/server.c +++ b/lustre/ptlbd/server.c @@ -63,7 +63,7 @@ static int ptlbd_sv_setup(struct obd_device *obd, obd_count len, void *buf) ptlbd->ptlbd_service = ptlrpc_init_svc(PTLBD_NBUFS, PTLBD_BUFSIZE, PTLBD_MAXREQSIZE, - PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, + PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, 30000, ptlbd_handle, "ptlbd_sv", obd->obd_proc_entry); diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index f2105e8..16db713 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -3,13 +3,13 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ +LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c \ $(top_srcdir)/lustre/ldlm/ldlm_lock.c \ $(top_srcdir)/lustre/ldlm/ldlm_resource.c \ $(top_srcdir)/lustre/ldlm/ldlm_lib.c \ $(top_srcdir)/lustre/ldlm/ldlm_plain.c \ $(top_srcdir)/lustre/ldlm/ldlm_extent.c \ - $(top_srcdir)/lustre/ldlm/ldlm_request.c \ + $(top_srcdir)/lustre/ldlm/ldlm_request.c \ $(top_srcdir)/lustre/ldlm/ldlm_lockd.c \ $(top_srcdir)/lustre/ldlm/ldlm_internal.h \ $(top_srcdir)/lustre/ldlm/ldlm_inodebits.c @@ -33,5 +33,4 @@ modulefs_DATA = ptlrpc$(KMODEXT) endif # MODULES MOSTLYCLEANFILES = *.o *.ko *.mod.c ldlm_*.c l_lock.c - DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 1f6127e..8bbbf62 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -542,12 +542,12 @@ static int after_reply(struct ptlrpc_request *req) if (req->rq_import->imp_replayable) { spin_lock_irqsave(&imp->imp_lock, flags); - if (req->rq_replay || req->rq_transno != 0) + if (req->rq_transno != 0) ptlrpc_retain_replayable_request(req, imp); else if (req->rq_commit_cb != NULL) { - spin_unlock_irqrestore(&imp->imp_lock, flags); + spin_unlock_irqrestore(&imp->imp_lock, flags); req->rq_commit_cb(req); - spin_lock_irqsave(&imp->imp_lock, flags); + spin_lock_irqsave(&imp->imp_lock, flags); } if (req->rq_transno > imp->imp_max_transno) @@ -698,7 +698,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) } if (req->rq_phase == RQ_PHASE_RPC) { - if (req->rq_waiting || req->rq_resend) { + if (req->rq_timedout||req->rq_waiting||req->rq_resend) { int status; ptlrpc_unregister_reply(req); @@ -709,7 +709,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) spin_unlock_irqrestore(&imp->imp_lock, flags); continue; - } + } list_del_init(&req->rq_list); if (status != 0) { @@ -856,6 +856,9 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) int replied = 0; ENTRY; + DEBUG_REQ(D_ERROR, req, "timeout (sent at %lu, %lus ago)", + (long)req->rq_sent, LTIME_S(CURRENT_TIME) - req->rq_sent); + spin_lock_irqsave (&req->rq_lock, flags); replied = req->rq_replied; if (!replied) @@ -869,6 +872,9 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) ptlrpc_unregister_reply (req); + if (obd_dump_on_timeout) + portals_debug_dumplog(); + if (req->rq_bulk != NULL) ptlrpc_unregister_bulk (req); @@ -883,7 +889,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req) /* If this request is for recovery or other primordial tasks, * then error it out here. */ - if (req->rq_send_state != LUSTRE_IMP_FULL || + if (req->rq_send_state != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov) { spin_lock_irqsave (&req->rq_lock, flags); req->rq_status = -ETIMEDOUT; @@ -901,7 +907,7 @@ int ptlrpc_expired_set(void *data) { struct ptlrpc_request_set *set = data; struct list_head *tmp; - time_t now = LTIME_S (CURRENT_TIME); + time_t now = LTIME_S(CURRENT_TIME); ENTRY; LASSERT(set != NULL); @@ -1014,7 +1020,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) CDEBUG(D_HA, "set %p going to sleep for %d seconds\n", set, timeout); lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ, - ptlrpc_expired_set, + ptlrpc_expired_set, ptlrpc_interrupted_set, set); rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi); @@ -1043,7 +1049,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) if (set->set_interpret != NULL) { int (*interpreter)(struct ptlrpc_request_set *set,void *,int) = set->set_interpret; - rc = interpreter (set, &set->set_args, rc); + rc = interpreter (set, set->set_arg, rc); } RETURN(rc); diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 646cb07..c2c5288 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -67,7 +67,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, spin_lock(&conn_lock); list_for_each(tmp, &conn_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) && + if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 && peer->peer_ni == c->c_peer.peer_ni) { ptlrpc_connection_addref(c); GOTO(out, c); @@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, list_for_each_safe(tmp, pos, &conn_unused_list) { c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (!memcmp(peer, &c->c_peer, sizeof(struct ptlrpc_peer)) && + if (memcmp(peer, &c->c_peer, sizeof(*peer)) == 0 && peer->peer_ni == c->c_peer.peer_ni) { ptlrpc_connection_addref(c); list_del(&c->c_link); diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 37a7f94..2a253c5 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -31,7 +31,7 @@ #include #include "ptlrpc_internal.h" -#if !defined(__KERNEL__) && defined(CRAY_PORTALS) +#if !defined(__KERNEL__) && CRAY_PORTALS /* forward ref in events.c */ static void cray_portals_callback(ptl_event_t *ev); #endif @@ -198,7 +198,7 @@ void request_in_callback(ptl_event_t *ev) "Dropping %s RPC from %s\n", service->srv_name, portals_id2str(srv_ni->sni_ni->pni_number, - ev->initiator, str)); + ev->initiator, str)); return; } } @@ -214,8 +214,12 @@ void request_in_callback(ptl_event_t *ev) do_gettimeofday(&req->rq_arrival_time); req->rq_peer.peer_id = ev->initiator; req->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni; + ptlrpc_id2str(&req->rq_peer, req->rq_peerstr); req->rq_rqbd = rqbd; - +#if CRAY_PORTALS + req->rq_uid = ev->uid; +#endif + spin_lock_irqsave (&service->srv_lock, flags); if (ev->unlinked) { @@ -360,15 +364,21 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) for (i = 0; i < ptlrpc_ninterfaces; i++) { pni = &ptlrpc_interfaces[i]; +#ifndef CRAY_PORTALS if (pni->pni_number == peer_nal) { +#else + /* compatible nals but may be from different bridges */ + if (NALID_FROM_IFACE(pni->pni_number) == + NALID_FROM_IFACE(peer_nal)) { +#endif peer->peer_id.nid = peer_nid; - peer->peer_id.pid = LUSTRE_SRV_PTL_PID; //#4165:only client will call this func. + peer->peer_id.pid = LUSTRE_SRV_PTL_PID; peer->peer_ni = pni; return (0); } } - CERROR("Can't find ptlrpc interface for NAL %d, NID %s\n", + CERROR("Can't find ptlrpc interface for NAL %x, NID %s\n", peer_nal, portals_nid2str(peer_nal, peer_nid, str)); return (-ENOENT); } @@ -416,6 +426,12 @@ ptl_pid_t ptl_get_pid(void) #ifndef __KERNEL__ pid = getpid(); +#ifdef CRAY_PORTALS + /* hack to keep pid in range accepted by ernal */ + pid &= 0xFF; + if (pid == LUSTRE_SRV_PTL_PID) + pid++; +#endif #else pid = LUSTRE_SRV_PTL_PID; #endif @@ -442,7 +458,7 @@ int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) CDEBUG(D_NET, "My pid is: %x\n", ptl_get_pid()); PtlSnprintHandle(str, sizeof(str), nih); - CDEBUG (D_NET, "init %d %s: %s\n", number, name, str); + CDEBUG (D_NET, "init %x %s: %s\n", number, name, str); pni->pni_name = name; pni->pni_number = number; @@ -580,7 +596,7 @@ liblustre_wait_event (int timeout) return found_something; } -#ifdef CRAY_PORTALS +#if CRAY_PORTALS static void cray_portals_callback(ptl_event_t *ev) { /* We get a callback from the client Cray portals implementation @@ -626,7 +642,9 @@ int ptlrpc_init_portals(void) {LONAL, "lonal"}, {RANAL, "ranal"}, #else - {CRAY_KB_ERNAL, "cray_kb_ernal"}, + {CRAY_KERN_NAL, "cray_kern_nal"}, + {CRAY_QK_NAL, "cray_qk_nal"}, + {CRAY_USER_NAL, "cray_user_nal"}, #endif }; int rc; diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 122f878..b8bcf5a 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -21,11 +21,7 @@ */ #define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -# include -# include -# include -#else +#ifndef __KERNEL__ # include #endif @@ -101,10 +97,10 @@ int ptlrpc_set_import_discon(struct obd_import *imp) spin_lock_irqsave(&imp->imp_lock, flags); if (imp->imp_state == LUSTRE_IMP_FULL) { - CERROR("%s: connection lost to %s@%s\n", - imp->imp_obd->obd_name, - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); + CWARN("%s: connection lost to %s@%s\n", + imp->imp_obd->obd_name, + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); spin_unlock_irqrestore(&imp->imp_lock, flags); obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON); @@ -180,7 +176,7 @@ void ptlrpc_invalidate_import(struct obd_import *imp, int in_rpc) if (rc) CERROR("%s: rc = %d waiting for callback (%d != %d)\n", imp->imp_target_uuid.uuid, rc, - atomic_read(&imp->imp_inflight), inflight); + atomic_read(&imp->imp_inflight), !!in_rpc); obd_import_event(imp->imp_obd, imp, IMP_EVENT_INVALIDATE); } @@ -374,6 +370,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) #ifndef __KERNEL__ lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_LIBCLIENT); #endif + if (obd->u.cli.cl_async) { + lustre_msg_add_op_flags(request->rq_reqmsg, MSG_CONNECT_ASYNC); + } request->rq_send_state = LUSTRE_IMP_CONNECTING; request->rq_replen = lustre_msg_size(0, NULL); @@ -559,8 +558,10 @@ static int signal_completed_replay(struct obd_import *imp) atomic_inc(&imp->imp_replay_inflight); req = ptlrpc_prep_req(imp, LUSTRE_OBD_VERSION, OBD_PING, 0, NULL, NULL); - if (!req) + if (!req) { + atomic_dec(&imp->imp_replay_inflight); RETURN(-ENOMEM); + } req->rq_replen = lustre_msg_size(0, NULL); req->rq_send_state = LUSTRE_IMP_REPLAY_WAIT; @@ -572,6 +573,37 @@ static int signal_completed_replay(struct obd_import *imp) RETURN(0); } +#ifdef __KERNEL__ +static int ptlrpc_invalidate_import_thread(void *data) +{ + struct obd_import *imp = data; + unsigned long flags; + + ENTRY; + + lock_kernel(); + ptlrpc_daemonize(); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + THREAD_NAME(current->comm, sizeof(current->comm), "ll_imp_inval"); + unlock_kernel(); + + CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n", + imp->imp_obd->obd_name, imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); + + ptlrpc_invalidate_import(imp, 0); + IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); + + ptlrpc_import_recovery_state_machine(imp); + + RETURN(0); +} +#endif + int ptlrpc_import_recovery_state_machine(struct obd_import *imp) { int rc = 0; @@ -582,9 +614,17 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) imp->imp_target_uuid.uuid, imp->imp_connection->c_remote_uuid.uuid); +#ifdef __KERNEL__ + rc = kernel_thread(ptlrpc_invalidate_import_thread, imp, + CLONE_VM | CLONE_FILES); + if (rc < 0) + CERROR("error starting invalidate thread: %d\n", rc); + RETURN(rc); +#else ptlrpc_invalidate_import(imp, 1); IMPORT_SET_STATE(imp, LUSTRE_IMP_RECOVER); +#endif } if (imp->imp_state == LUSTRE_IMP_REPLAY) { @@ -627,10 +667,10 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) GOTO(out, rc); IMPORT_SET_STATE(imp, LUSTRE_IMP_FULL); ptlrpc_activate_import(imp); - CERROR("%s: connection restored to %s@%s\n", - imp->imp_obd->obd_name, - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); + CWARN("%s: connection restored to %s@%s\n", + imp->imp_obd->obd_name, + imp->imp_target_uuid.uuid, + imp->imp_connection->c_remote_uuid.uuid); } if (imp->imp_state == LUSTRE_IMP_FULL) { diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index d03f2ed..8a7179b 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -144,7 +144,7 @@ int ptlrpc_start_bulk_transfer (struct ptlrpc_bulk_desc *desc) else rc = PtlGet (desc->bd_md_h, peer->peer_id, desc->bd_portal, 0, xid, 0); - + if (rc != PTL_OK) { /* Can't send, so we unlink the MD bound above. The UNLINK * event this creates will signal completion with failure, @@ -234,9 +234,9 @@ int ptlrpc_register_bulk (struct ptlrpc_request *req) LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid); desc->bd_registered = 1; desc->bd_last_xid = req->rq_xid; - - rc = PtlMEAttach(peer->peer_ni->pni_ni_h, - desc->bd_portal, desc->bd_import->imp_connection->c_peer.peer_id, + + rc = PtlMEAttach(peer->peer_ni->pni_ni_h, desc->bd_portal, + desc->bd_import->imp_connection->c_peer.peer_id, req->rq_xid, 0, PTL_UNLINK, PTL_INS_AFTER, &me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc); @@ -433,8 +433,8 @@ int ptl_send_rpc(struct ptlrpc_request *request) rc = PtlMEAttach(connection->c_peer.peer_ni->pni_ni_h, request->rq_reply_portal, /* XXX FIXME bug 249 */ - connection->c_peer.peer_id, request->rq_xid, 0, PTL_UNLINK, - PTL_INS_AFTER, &reply_me_h); + connection->c_peer.peer_id, request->rq_xid, 0, + PTL_UNLINK, PTL_INS_AFTER, &reply_me_h); if (rc != PTL_OK) { CERROR("PtlMEAttach failed: %d\n", rc); LASSERT (rc == PTL_NO_SPACE); diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index bcbf095..1443a6a 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -35,7 +35,8 @@ #include "ptlrpc_internal.h" #ifdef __KERNEL__ -#ifndef CRAY_PORTALS +#if !CRAY_PORTALS + void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) { LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); @@ -57,11 +58,16 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, desc->bd_iov_count++; } -#else + +#else /* CRAY_PORTALS */ +#ifdef PTL_MD_KIOV +#error "Conflicting compilation directives" +#endif + void ptlrpc_fill_bulk_md (ptl_md_t *md, struct ptlrpc_bulk_desc *desc) { LASSERT (desc->bd_iov_count <= PTLRPC_MAX_BRW_PAGES); - LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); + LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS))); md->options |= (PTL_MD_IOVEC | PTL_MD_PHYS); md->start = &desc->bd_iov[0]; @@ -79,22 +85,24 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, desc->bd_iov_count++; } -#endif +#endif /* CRAY_PORTALS */ #else /* !__KERNEL__ */ + void ptlrpc_fill_bulk_md(ptl_md_t *md, struct ptlrpc_bulk_desc *desc) { +#if CRAY_PORTALS + LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_PHYS))); + LASSERT (desc->bd_iov_count == 1); +#else LASSERT (!(md->options & (PTL_MD_IOVEC | PTL_MD_KIOV | PTL_MD_PHYS))); - +#endif if (desc->bd_iov_count == 1) { md->start = desc->bd_iov[0].iov_base; md->length = desc->bd_iov[0].iov_len; return; } -#if CRAY_PORTALS - LBUG(); -#endif md->options |= PTL_MD_IOVEC; md->start = &desc->bd_iov[0]; md->length = desc->bd_iov_count; @@ -104,10 +112,8 @@ static int can_merge_iovs(ptl_md_iovec_t *existing, ptl_md_iovec_t *candidate) { if (existing->iov_base + existing->iov_len == candidate->iov_base) return 1; - /* XXX it's good to have an warning here, but user-level echo_client - * will hit this. reenable it when we fixed echo_client. - */ #if 0 + /* Enable this section to provide earlier evidence of fragmented bulk */ CERROR("Can't merge iovs %p for %x, %p for %x\n", existing->iov_base, existing->iov_len, candidate->iov_base, candidate->iov_len); @@ -129,4 +135,5 @@ void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, desc->bd_iov_count++; } } -#endif + +#endif /* !__KERNEL__ */ diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 7fab9b9..89b1191 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -122,8 +122,9 @@ static int ptlrpc_pinger_main(void *arg) spin_unlock_irqrestore(&imp->imp_lock, flags); if (imp->imp_next_ping <= this_ping || force) { - if (level == LUSTRE_IMP_DISCON) { - /* wait at least a timeout before + if (level == LUSTRE_IMP_DISCON && + !imp->imp_deactive) { + /* wait at least a timeout before trying recovery again. */ imp->imp_next_ping = ptlrpc_next_ping(imp); @@ -132,7 +133,7 @@ static int ptlrpc_pinger_main(void *arg) imp->imp_obd->obd_no_recov) { CDEBUG(D_HA, "not pinging %s (in recovery " - " or recovery disabled: %s)\n", + "or recovery disabled: %s)\n", imp->imp_target_uuid.uuid, ptlrpc_import_state_name(level)); } else if (imp->imp_pingable || force) { diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index ca6e22a..8b386ee 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -486,6 +486,7 @@ int llog_cleanup_commit_master(int force) atomic_read(&lcm->lcm_thread_total) == 0); return 0; } +EXPORT_SYMBOL(llog_cleanup_commit_master); static int log_process_thread(void *args) { diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index c3eaf17..6731c7d 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -68,8 +68,8 @@ void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) argv[0], argv[1], argv[2], rc); } else { - CERROR("Invoked upcall %s %s %s\n", - argv[0], argv[1], argv[2]); + CWARN("Invoked upcall %s %s %s\n", + argv[0], argv[1], argv[2]); } } @@ -109,8 +109,8 @@ void ptlrpc_run_failed_import_upcall(struct obd_import* imp) argv[0], argv[1], argv[2], argv[3], argv[4],rc); } else { - CERROR("Invoked upcall %s %s %s %s %s\n", - argv[0], argv[1], argv[2], argv[3], argv[4]); + CWARN("Invoked upcall %s %s %s %s %s\n", + argv[0], argv[1], argv[2], argv[3], argv[4]); } #else if (imp->imp_state == LUSTRE_IMP_CLOSED) { @@ -289,13 +289,12 @@ void ptlrpc_request_handle_notconn(struct ptlrpc_request *failed_req) rc = ptlrpc_connect_import(imp, NULL); } - /* Wait for recovery to complete and resend. If evicted, then this request will be errored out later.*/ spin_lock_irqsave(&failed_req->rq_lock, flags); failed_req->rq_resend = 1; spin_unlock_irqrestore(&failed_req->rq_lock, flags); - + EXIT; } @@ -314,10 +313,12 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) * requests. */ if (!active) { ptlrpc_invalidate_import(imp, 0); - } + imp->imp_deactive = 1; + } /* When activating, mark import valid, and attempt recovery */ if (active) { + imp->imp_deactive = 0; CDEBUG(D_HA, "setting import %s VALID\n", imp->imp_target_uuid.uuid); rc = ptlrpc_recover_import(imp, NULL); @@ -330,10 +331,10 @@ int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) { int rc; ENTRY; - + /* force import to be disconnected. */ ptlrpc_set_import_discon(imp); - + rc = ptlrpc_recover_import_no_retry(imp, new_uuid); RETURN(rc); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 30217ab..edf9f5f 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -57,17 +57,17 @@ ptlrpc_free_server_req (struct ptlrpc_request *req) OBD_FREE(req, sizeof(*req)); } - + static char * ptlrpc_alloc_request_buffer (int size) { char *ptr; - + if (size > SVC_BUF_VMALLOC_THRESHOLD) OBD_VMALLOC(ptr, size); else OBD_ALLOC(ptr, size); - + return (ptr); } @@ -135,6 +135,9 @@ ptlrpc_grow_req_bufs(struct ptlrpc_srv_ni *srv_ni) struct ptlrpc_request_buffer_desc *rqbd; int i; + CDEBUG(D_RPCTRACE, "%s: allocate %d new %d-byte reqbufs (%d/%d left)\n", + svc->srv_name, svc->srv_nbuf_per_group, svc->srv_buf_size, + srv_ni->sni_nrqbd_receiving, svc->srv_nbufs); for (i = 0; i < svc->srv_nbuf_per_group; i++) { rqbd = ptlrpc_alloc_rqbd(srv_ni); @@ -308,7 +311,7 @@ ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) struct ptlrpc_service * ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, - int req_portal, int rep_portal, + int req_portal, int rep_portal, int watchdog_timeout, svc_handler_t handler, char *name, struct proc_dir_entry *proc_entry) { @@ -339,6 +342,7 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, service->srv_buf_size = bufsize; service->srv_rep_portal = rep_portal; service->srv_req_portal = req_portal; + service->srv_watchdog_timeout = watchdog_timeout; service->srv_handler = handler; INIT_LIST_HEAD(&service->srv_request_queue); @@ -405,7 +409,6 @@ ptlrpc_server_free_request(struct ptlrpc_service *svc, struct ptlrpc_request *re ptlrpc_free_server_req(req); } -static char str[PTL_NALFMT_SIZE]; static int ptlrpc_server_handle_request (struct ptlrpc_service *svc) { @@ -477,16 +480,14 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc) if (rc != 0) { CERROR ("error unpacking request: ptl %d from %s" " xid "LPU64"\n", svc->srv_req_portal, - ptlrpc_peernid2str(&request->rq_peer, str), - request->rq_xid); + request->rq_peerstr, request->rq_xid); goto out; } rc = -EINVAL; if (request->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) { CERROR("wrong packet type received (type=%u) from %s\n", - request->rq_reqmsg->type, - ptlrpc_peernid2str(&request->rq_peer, str)); + request->rq_reqmsg->type, request->rq_peerstr); goto out; } @@ -498,7 +499,7 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc) if (timediff / 1000000 > (long)obd_timeout) { CERROR("Dropping timed-out opc %d request from %s" ": %ld seconds old\n", request->rq_reqmsg->opc, - ptlrpc_peernid2str(&request->rq_peer, str), + request->rq_peerstr, timediff / 1000000); goto out; } @@ -528,11 +529,13 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc) atomic_read(&request->rq_export->exp_refcount) : -99), request->rq_reqmsg->status, request->rq_xid, request->rq_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&request->rq_peer, str), + request->rq_peerstr, request->rq_reqmsg->opc); + request->rq_svc = svc; rc = svc->srv_handler(request); request->rq_svc = NULL; + CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:ni:nid:opc " "%s:%s+%d:%d:"LPU64":%s:%s:%d\n", current->comm, (request->rq_export ? @@ -541,7 +544,7 @@ ptlrpc_server_handle_request (struct ptlrpc_service *svc) atomic_read(&request->rq_export->exp_refcount) : -99), request->rq_reqmsg->status, request->rq_xid, request->rq_peer.peer_ni->pni_name, - ptlrpc_peernid2str(&request->rq_peer, str), + request->rq_peerstr, request->rq_reqmsg->opc); if (export != NULL) @@ -558,9 +561,9 @@ put_conn: CDEBUG((timediff / 1000000 > (long)obd_timeout) ? D_ERROR : D_HA, "request "LPU64" opc %u from NID %s processed in %ldus " - "(%ldus total)\n", request->rq_xid, + "(%ldus total)\n", request->rq_xid, request->rq_reqmsg ? request->rq_reqmsg->opc : 0, - ptlrpc_peernid2str(&request->rq_peer, str), + request->rq_peerstr, timediff, timeval_sub(&work_end, &request->rq_arrival_time)); if (svc->srv_stats != NULL && request->rq_reqmsg != NULL) { @@ -769,7 +772,11 @@ static int ptlrpc_main(void *arg) struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; struct ptlrpc_service *svc = data->svc; struct ptlrpc_thread *thread = data->thread; + struct lc_watchdog *watchdog; unsigned long flags; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + struct group_info *ginfo = NULL; +#endif ENTRY; lock_kernel(); @@ -787,10 +794,24 @@ static int ptlrpc_main(void *arg) unlock_kernel(); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + ginfo = groups_alloc(0); + if (!ginfo) { + thread->t_flags = SVC_RUNNING; + wake_up(&thread->t_ctl_waitq); + return (-ENOMEM); + } + set_current_groups(ginfo); + put_group_info(ginfo); +#endif + /* Record that the thread is running */ thread->t_flags = SVC_RUNNING; wake_up(&thread->t_ctl_waitq); + watchdog = lc_watchdog_add(svc->srv_watchdog_timeout, + LC_WATCHDOG_DEFAULT_CB, NULL); + spin_lock_irqsave(&svc->srv_lock, flags); svc->srv_nthreads++; spin_unlock_irqrestore(&svc->srv_lock, flags); @@ -803,6 +824,8 @@ static int ptlrpc_main(void *arg) struct l_wait_info lwi = LWI_TIMEOUT(svc->srv_rqbd_timeout, ptlrpc_retry_rqbds, svc); + lc_watchdog_disable(watchdog); + l_wait_event_exclusive (svc->srv_waitq, ((thread->t_flags & SVC_STOPPING) != 0 && svc->srv_n_difficult_replies == 0) || @@ -814,7 +837,8 @@ static int ptlrpc_main(void *arg) svc->srv_n_active_reqs < (svc->srv_nthreads - 1))), &lwi); - + + lc_watchdog_touch(watchdog); ptlrpc_check_rqbd_pools(svc); if (!list_empty (&svc->srv_reply_queue)) @@ -845,6 +869,8 @@ static int ptlrpc_main(void *arg) spin_unlock_irqrestore(&svc->srv_lock, flags); + lc_watchdog_delete(watchdog); + CDEBUG(D_NET, "service thread exiting, process %d\n", current->pid); return 0; } diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index 95c1d06..8f8d890 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -19,6 +19,7 @@ LOCK=/var/lock/subsys/$SERVICE : ${LCONF:=/usr/sbin/lconf} : ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} : ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} +: ${LCTL:=/usr/sbin/lctl} # Source function library. if [ -f /etc/init.d/functions ] ; then @@ -33,7 +34,9 @@ fi # Check that networking is up. [ "${NETWORKING}" = "no" ] && exit 0 -[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0 +[ -x ${LCONF} -a -x ${LCTL} ] || exit 0 + +[ -f ${LUSTRE_CONFIG_XML} ] || ( echo "unconfigured" && exit 0 ) # Create /var/lustre directory # This is used by snmp agent for checking lustre services \ @@ -73,6 +76,19 @@ restart() { start } +status() { + ${LCTL} dl 2>/dev/null | while read INDEX STAT MODULE NAME; do + case $MODULE in + ost|mds|osc|mdc) + [ "`grep -v FULL /proc/fs/lustre/*c/*/*_server_uuid`" ] \ + && echo "recovery" || echo "running" + return + ;; + esac + done + echo "stopped" +} + # See how we were called. case "$1" in start) diff --git a/lustre/scripts/lustrefs b/lustre/scripts/lustrefs index 976e8e7..8148b75 100644 --- a/lustre/scripts/lustrefs +++ b/lustre/scripts/lustrefs @@ -9,7 +9,7 @@ # Authors: Bill Nottingham # Miquel van Smoorenburg, # -# chkconfig: 345 25 75 +# chkconfig: 345 26 74 # description: Mounts and unmounts all Lustre mount points. # ### BEGIN INIT INFO diff --git a/lustre/scripts/suse-functions.sh b/lustre/scripts/suse-functions.sh new file mode 100644 index 0000000..a7e421d --- /dev/null +++ b/lustre/scripts/suse-functions.sh @@ -0,0 +1,22 @@ +# Readlink is not present on some older distributions: emulate it. +readlink() { + local path=$1 ll + + if [ -L "$path" ]; then + ll="$(LC_ALL=C ls -l "$path" 2> /dev/null)" && + echo "${ll/* -> }" + else + return 1 + fi +} +relink() { + if [ -h "$2" ]; then + local old=$(readlink "$2") + [ "$old" = "$1" ] && return 0 + echo "Changing symlink $2 from $old to $1" + elif [ -e "$2" ]; then + echo "Replacing file $2 with symlink to $1" + fi + rm -f "$2" \ + && ln -s "$1" "$2" +} diff --git a/lustre/scripts/suse-post.sh b/lustre/scripts/suse-post.sh new file mode 100644 index 0000000..ec38664 --- /dev/null +++ b/lustre/scripts/suse-post.sh @@ -0,0 +1,46 @@ +if [ -f /boot/vmlinuz-%ver_str ]; then + image=vmlinuz +elif [ -f /boot/image-%ver_str ]; then + image=image +elif [ -f /boot/vmlinux-%ver_str ]; then + image=vmlinux +else + # nothing to do (UML kernels for example). + exit 0 +fi + +# If we have old symlinks, rename them to *.previous +if [ -L /boot/$image -a -L /boot/initrd -a \ + "$(readlink /boot/$image)" != $image-%ver_str -a \ + "$(readlink /boot/initrd)" != initrd-%ver_str ]; then + mv /boot/$image /boot/$image.previous + mv /boot/initrd /boot/initrd.previous +fi + +# update /boot/vmlinuz symlink +relink $image-%ver_str /boot/$image + +if test "$YAST_IS_RUNNING" != instsys ; then + if [ -f /etc/fstab ]; then + echo Setting up /lib/modules/%ver_str + /sbin/update-modules.dep -v %ver_str + cd /boot + /sbin/mkinitrd -k $image-%ver_str -i initrd-%ver_str + + if [ -e /boot/initrd-%ver_str ]; then + relink initrd-%ver_str /boot/initrd + else + rm -f /boot/initrd + fi + else + echo "please run mkinitrd as soon as your system is complete" + fi +fi + +if [ "$YAST_IS_RUNNING" != instsys -a -x /sbin/new-kernel-pkg ]; then + # Notify boot loader that a new kernel image has been installed. + # (during initial installation the boot loader configuration does not + # yet exist when the kernel is installed, but yast kicks the boot + # loader itself later.) + /sbin/new-kernel-pkg %ver_str +fi diff --git a/lustre/scripts/suse-postun.sh b/lustre/scripts/suse-postun.sh new file mode 100644 index 0000000..eb86d03 --- /dev/null +++ b/lustre/scripts/suse-postun.sh @@ -0,0 +1,43 @@ +if [ -L /boot/vmlinux ]; then + image=vmlinux +elif [ -L /boot/vmlinuz ]; then + image=vmlinuz +elif [ -L /boot/image ]; then + image=image +else + # nothing to do (UML kernels for example). + exit 0 +fi + +if [ "$(readlink /boot/$image)" = $image-%ver_str ]; then + # This may be the last kernel RPM on the system, or it may + # be an update. In both of those cases the symlinks will + # eventually be correct. Only if this kernel + # is removed and other kernel rpms remain installed, + # find the most recent of the remaining kernels, and make + # the symlinks point to it. This makes sure that the boot + # manager will always have a kernel to boot in its default + # configuration. + shopt -s nullglob + for image in $(cd /boot ; ls -dt $image-*); do + initrd=initrd-${image#*-} + if [ -f /boot/$image -a -f /boot/$initrd ]; then + relink $image /boot/${image%%%%-*} + relink $initrd /boot/${initrd%%%%-*} + break + fi + done + shopt -u nullglob +fi + +# Created in the other kernel's %post +case "$(readlink /boot/$image.previous)" in +$image-%ver_str|$(readlink /boot/$image)) + rm -f /boot/$image.previous ;; +esac +case "$(readlink /boot/initrd.previous)" in +initrd-%ver_str|$(readlink /boot/initrd)) + rm -f /boot/initrd.previous ;; +esac +# created in %post +rm -f /boot/initrd-%ver_str diff --git a/lustre/scripts/suse-trigger-script.sh.in b/lustre/scripts/suse-trigger-script.sh.in new file mode 100644 index 0000000..0ead9e8 --- /dev/null +++ b/lustre/scripts/suse-trigger-script.sh.in @@ -0,0 +1,9 @@ +old_shopt=$(shopt -p nullglob || :) +shopt -s nullglob +for script in /lib/modules/scripts/* ; do + if [ -f "$script" -a -x "$script" ] \ + && ! "$script" --@when@ %ver_str $1 ; then + echo "$script failed." + fi +done +eval $old_shopt diff --git a/lustre/sec/gss/gss_api.h b/lustre/sec/gss/gss_api.h index 06557d4..94f57ef 100644 --- a/lustre/sec/gss/gss_api.h +++ b/lustre/sec/gss/gss_api.h @@ -15,7 +15,7 @@ * Bruce Fields * Copyright (c) 2000 The Regents of the University of Michigan * - * $Id: gss_api.h,v 1.2 2005/03/31 22:18:24 ericm Exp $ + * $Id: gss_api.h,v 1.3 2005/04/04 13:12:39 yury Exp $ */ #ifndef __SEC_GSS_GSS_API_H_ diff --git a/lustre/sec/gss/sec_gss.c b/lustre/sec/gss/sec_gss.c index db89a71..e96d75b 100644 --- a/lustre/sec/gss/sec_gss.c +++ b/lustre/sec/gss/sec_gss.c @@ -43,7 +43,7 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * - * $Id: sec_gss.c,v 1.2 2005/03/31 22:18:24 ericm Exp $ + * $Id: sec_gss.c,v 1.3 2005/04/04 13:12:39 yury Exp $ */ #ifndef EXPORT_SYMTAB diff --git a/lustre/sec/sec.c b/lustre/sec/sec.c index 9dd5d4f..bf09bf7 100644 --- a/lustre/sec/sec.c +++ b/lustre/sec/sec.c @@ -611,7 +611,7 @@ struct ptlrpc_sec * ptlrpcs_sec_create(ptlrpcs_flavor_t *flavor, static void ptlrpcs_sec_destroy(struct ptlrpc_sec *sec) { struct ptlrpc_sec_type *type = sec->ps_type; - struct ptlrpc_import *imp = sec->ps_import; + struct obd_import *imp = sec->ps_import; LASSERT(type && type->pst_ops); LASSERT(type->pst_ops->destroy_sec); diff --git a/lustre/smfs/inode.c b/lustre/smfs/inode.c index 0243f2c..39432c4 100644 --- a/lustre/smfs/inode.c +++ b/lustre/smfs/inode.c @@ -123,7 +123,6 @@ static int smfs_test_inode(struct inode *inode, void *opaque) !smfs_snap_test_inode(inode, opaque)) return 0; #endif - return 1; } @@ -183,9 +182,7 @@ struct inode *smfs_get_inode(struct super_block *sb, ino_t hash, sargs.s_inode = dir; sargs.s_index = index; CDEBUG(D_VFSTRACE, "get_inode: %lu\n", hash); - inode = smfs_iget(sb, hash, &sargs); - RETURN(inode); } diff --git a/lustre/smfs/kml.c b/lustre/smfs/kml.c index ecac5fe..2692963 100644 --- a/lustre/smfs/kml.c +++ b/lustre/smfs/kml.c @@ -276,29 +276,35 @@ int smfs_rec_setattr(struct inode *dir, struct dentry *dentry, } EXPORT_SYMBOL(smfs_rec_setattr); -int smfs_rec_md(struct inode *inode, void *lmm, int lmm_size) +int smfs_rec_md(struct inode *inode, void *lmm, int lmm_size, + enum ea_type type) { char *set_lmm = NULL; - int rc = 0; + int rc = 0; ENTRY; if (!SMFS_DO_REC(S2SMI(inode->i_sb))) RETURN(0); if (lmm) { - OBD_ALLOC(set_lmm, lmm_size + sizeof(lmm_size)); + int size = lmm_size + sizeof(lmm_size) + + sizeof(type); + + OBD_ALLOC(set_lmm, size); if (!set_lmm) RETURN(-ENOMEM); + memcpy(set_lmm, &lmm_size, sizeof(lmm_size)); - memcpy(set_lmm + sizeof(lmm_size), lmm, lmm_size); + memcpy(set_lmm + sizeof(lmm_size), &type, sizeof(type)); + memcpy(set_lmm + sizeof(lmm_size) + sizeof(type), lmm, lmm_size); + rc = smfs_post_rec_setattr(inode, NULL, NULL, set_lmm); if (rc) { - CERROR("Error: Record md for inode %lu rc=%d\n", + CERROR("Error: Record md for inode %lu rc = %d\n", inode->i_ino, rc); } + OBD_FREE(set_lmm, size); } - if (set_lmm) - OBD_FREE(set_lmm, lmm_size + sizeof(lmm_size)); RETURN(rc); } EXPORT_SYMBOL(smfs_rec_md); @@ -690,12 +696,12 @@ out: } int smfs_post_rec_setattr(struct inode *inode, struct dentry *dentry, - void *data1, void *data2) + void *data1, void *data2) { - struct smfs_super_info *sinfo; struct iattr *attr = (struct iattr *)data1; - char *buffer = NULL, *pbuf; int rc = 0, length = 0, buf_len = 0; + struct smfs_super_info *sinfo; + char *buffer = NULL, *pbuf; ENTRY; sinfo = S2SMI(inode->i_sb); diff --git a/lustre/smfs/smfs_lib.c b/lustre/smfs/smfs_lib.c index 95c5fd4..2855883 100644 --- a/lustre/smfs/smfs_lib.c +++ b/lustre/smfs/smfs_lib.c @@ -218,6 +218,7 @@ static int smfs_init_hooks(struct super_block *sb) } extern char* smfs_options(char*, char**, char**, char*, int *); +extern void cleanup_option(void); int smfs_fill_super(struct super_block *sb, void *data, int silent) { diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore index 3fa72fa..f915f89 100644 --- a/lustre/tests/.cvsignore +++ b/lustre/tests/.cvsignore @@ -67,4 +67,4 @@ copy_attr rename_many mmap_sanity memhog - +rmdirmany diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index 5c603f7..c81ec07 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -1,5 +1,5 @@ # Lustre test Makefile -AM_CPPFLAGS = $(LLCPPFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +AM_CPPFLAGS = $(LLCPPFLAGS) -I/opt/lam/include -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 AM_CFLAGS = $(LLCFLAGS) # LDADD = -lldap # LDADD := -lreadline -ltermcap # -lefence @@ -28,6 +28,9 @@ noinst_PROGRAMS += small_write multiop sleeptest ll_sparseness_verify cmknod noinst_PROGRAMS += ll_sparseness_write mrename ll_dirstripe_verify mkdirmany rmdirmany noinst_PROGRAMS += openfilleddirunlink rename_many memhog iopentest1 iopentest2 noinst_PROGRAMS += mmap_sanity +if MPITESTS +noinst_PROGRAMS += parallel_grouplock write_append_truncate createmany_mpi +endif # noinst_PROGRAMS += ldaptest copy_attr bin_PROGRAMS = mcreate munlink endif # TESTS @@ -36,11 +39,14 @@ endif # TESTS stat_SOURCES = stat.c stat_fs.h mkdirdeep_LDADD=-L$(top_builddir)/portals/utils -lptlctl $(LIBREADLINE) -#write_append_truncate_CC=mpicc -#createmany_mpi_CC=mpicc -#parallel_grouplock_SOURCES=parallel_grouplock.c lp_utils.c -#parallel_grouplock_CC=mpicc - -#copy_attr_LDADD= -lattr mmap_sanity_SOURCES= mmap_sanity.c +if MPITESTS +LAM_LD_FLAGS=-L/opt/lam/lib -lmpi -llam -lpthread +write_append_truncate_SOURCES=write_append_truncate.c +write_append_truncate_LDADD=$(LAM_LD_FLAGS) +createmany_mpi_SOURCES=createmany-mpi.c +createmany_mpi_LDADD=$(LAM_LD_FLAGS) +parallel_grouplock_SOURCES=parallel_grouplock.c lp_utils.c +parallel_grouplock_LDADD=$(LAM_LD_FLAGS) +endif diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index acab3123..c6eff64 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -47,9 +47,10 @@ start_mds() { start mds1 --reformat $MDSLCONFARGS || return 94 start_lsvcgssd || return 501 } + stop_mds() { echo "stop mds1 service on `facet_active_host mds1`" - stop mds1 $@ || return 97 + stop mds1 $@ || return 97 stop_lsvcgssd } @@ -60,7 +61,7 @@ start_ost() { stop_ost() { echo "stop ost service on `facet_active_host ost`" - stop ost $@ || return 98 + stop ost $@ || return 98 } mount_client() { @@ -188,7 +189,17 @@ test_5() { # cleanup may return an error from the failed # disconnects; for now I'll consider this successful # if all the modules have unloaded. - umount $MOUNT & + + # as MDS is down, umount without -f may cause blocking + # and this test will never finish. Blocking is possible + # as umount may want to cancel locks with RPC's and these + # RPC's will wait forever, as pinger thread will try to + # recover failed import endlessly. + # + # Thus, main point is: nobody should expect umount finish + # quickly and cleanly without -f flag when MDS or OST is + # down for sure. --umka + umount -f $MOUNT & UMOUNT_PID=$! sleep 2 echo "killing umount" @@ -198,16 +209,21 @@ test_5() { stop_lgssd # cleanup client modules - $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null + $LCONF --force --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null # stop_mds is a no-op here, and should not fail - stop_mds || return 4 - stop_ost || return 5 + stop_mds || return 4 + + # this should have --force flag specified, as umount -f + # will skip disconnect phase and thus OST will have one + # extra refcount what will cause class_cleanup() failure + # if --force is not specified. --umka + stop_ost --force || return 5 lsmod | grep -q portals && return 6 return 0 } -run_test 5 "force cleanup mds, then cleanup" +run_test 5 "force cleanup mds, then cleanup --force" test_5b() { start_ost @@ -217,17 +233,17 @@ test_5b() { [ -d $MOUNT ] || mkdir -p $MOUNT $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null start_lgssd || return 1 - llmount $mds_HOST://mds1_svc/client_facet $MOUNT && exit 1 + llmount -o nettype=$NETTYPE $mds_HOST://mds_svc/client_facet $MOUNT && exit 2 # cleanup client modules $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null stop_lgssd # stop_mds is a no-op here, and should not fail - stop_mds || return 2 - stop_ost || return 3 + stop_mds || return 3 + stop_ost || return 4 - lsmod | grep -q portals && return 4 + lsmod | grep -q portals && return 5 return 0 } @@ -240,7 +256,7 @@ test_5c() { [ -d $MOUNT ] || mkdir -p $MOUNT $LCONF --nosetup --node client_facet $XMLCONFIG > /dev/null start_lgssd || return 1 - llmount $mds_HOST://wrong_mds1_svc/client_facet $MOUNT && return 2 + llmount -o nettype=$NETTYPE $mds_HOST://wrong_mds_svc/client_facet $MOUNT && return 2 # cleanup client modules $LCONF --cleanup --nosetup --node client_facet $XMLCONFIG > /dev/null @@ -642,7 +658,7 @@ test_16() { mount_client $MOUNT check_mount || return 41 cleanup || return $? - fi + fi echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555" [ -d $TMPMTPT ] || mkdir -p $TMPMTPT diff --git a/lustre/tests/createmany.c b/lustre/tests/createmany.c index 2bf9245..1e6ff1f 100644 --- a/lustre/tests/createmany.c +++ b/lustre/tests/createmany.c @@ -14,14 +14,14 @@ void usage(char *prog) { - printf("usage: %s {-o|-m|-l} filenamefmt count\n", prog); - printf(" %s {-o|-m|-l} filenamefmt -seconds\n", prog); - printf(" %s {-o|-m|-l} filenamefmt start count\n", prog); + printf("usage: %s {-o|-m|-d|-l} filenamefmt count\n", prog); + printf(" %s {-o|-m|-d|-l} filenamefmt -seconds\n", prog); + printf(" %s {-o|-m|-d|-l} filenamefmt start count\n", prog); } int main(int argc, char ** argv) { - int i, rc = 0, do_open = 0, do_link = 0; + int i, rc = 0, do_open = 0, do_link = 0, do_mkdir = 0; char format[4096], *fmt, *tgt = NULL; char filename[4096]; long start, last, end; @@ -32,7 +32,9 @@ int main(int argc, char ** argv) return 1; } - if (strcmp(argv[1], "-o") == 0) { + if (strcmp(argv[1], "-d") == 0) { + do_mkdir = 1; + } else if (strcmp(argv[1], "-o") == 0) { do_open = 1; } else if (strncmp(argv[1], "-l", 2) == 0 && argv[1][2]) { tgt = argv[1] + 2; @@ -85,7 +87,15 @@ int main(int argc, char ** argv) rc = link(tgt, filename); if (rc) { printf("link(%s, %s) error: %s\n", - tgt, filename, strerror(errno)); + tgt, filename, strerror(errno)); + rc = errno; + break; + } + } else if (do_mkdir) { + rc = mkdir(filename, 0755); + if (rc) { + printf("mkdir(%s) error: %s\n", + filename, strerror(errno)); rc = errno; break; } diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh index a45fd39..61a0378 100755 --- a/lustre/tests/echo.sh +++ b/lustre/tests/echo.sh @@ -31,6 +31,13 @@ h2gm () { h2elan () { echo $1 | sed 's/[^0-9]*//g' } + +h2iib () { + case $1 in + client) echo '\*' ;; + *) echo $1 | sed "s/[^0-9]*//" ;; + esac +} # FIXME: make LMC not require MDS for obdecho LOV MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c index ead1c85..0e0d198 100644 --- a/lustre/tests/fsx.c +++ b/lustre/tests/fsx.c @@ -608,7 +608,7 @@ output_line(struct test_file *tf, int op, unsigned offset, (monitorend == -1 || offset <= monitorend))))))) return; - prt("%06lu %lu.%06lu %*s%-10s %#08x %s %#08x\t(0x%x bytes)\n", + prt("%06lu %lu.%06lu %.*s%-10s %#08x %s %#08x\t(0x%x bytes)\n", testcalls, tv->tv_sec, tv->tv_usec, max_tf_len, tf_num, ops[op], offset, op == OP_TRUNCATE ? " to " : "thru", diff --git a/lustre/tests/lfsck_config.sh b/lustre/tests/lfsck_config.sh index e0a61de..dab466e 100755 --- a/lustre/tests/lfsck_config.sh +++ b/lustre/tests/lfsck_config.sh @@ -43,5 +43,5 @@ i=`expr $i + 1` done # create client config -${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 +${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40 #${LMC} --add mtpt --node localhost --path $MOUNT2 --mds mds1 --lov lov1 || exit 40 diff --git a/lustre/tests/lfscktest.sh b/lustre/tests/lfscktest.sh index b497521..95f7e55 100755 --- a/lustre/tests/lfscktest.sh +++ b/lustre/tests/lfscktest.sh @@ -1,19 +1,18 @@ #!/bin/bash set -vx -set -e +#set -e . ./lfscktest_config.sh +sh llmount.sh || exit 1 + #Create mount points on target OST and MDS #Create test directory - mkdir -p $OST_MOUNTPT mkdir -p $MDS_MOUNTPT mkdir -p $TEST_DIR -export PATH=$LFSCK_PATH/e2fsck:`dirname $0`:`dirname $0`/../utils:$PATH - -sh llmount.sh || exit 1 +export PATH=$LFSCK_PATH:`dirname $0`:`dirname $0`/../utils:$PATH # Create some files on the filesystem for i in `seq 0 3`; do @@ -27,16 +26,14 @@ for i in `seq 0 3`; do done done done -# Create Files to be modified +# Create Files to be modified file_name=${TESTNAME} - for FILE in `seq -f ${TEST_DIR}/${file_name}.%g 0 40`; do - dd if=/dev/zero count=1 bs=64k of=$FILE || exit 1 + dd if=/dev/zero count=1 bs=64K of=$FILE || exit 1 done #Create some more files - for i in `seq 21 23`; do mkdir -p ${MOUNT}/d$i for j in `seq 0 5`; do @@ -85,25 +82,25 @@ for i in $MDS_FILES; do done #Create EAs on files so objects are referenced twice from different mds files -for i in `seq 40 59`; do +for i in `seq 0 19`; do touch $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.bad.$i copy_attr $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.$i $MDS_MOUNTPT/ROOT/${TESTNAME}/${TESTNAME}.bad.$i || (umount $MDS_MOUNTPT && exit 1) i=`expr $i + 1` done - umount $MDS_MOUNTPT - rmdir $MDS_MOUNTPT - rmdir $OST_MOUNTPT +umount $MDS_MOUNTPT +rmdir $MDS_MOUNTPT +rmdir $OST_MOUNTPT # Run e2fsck to get mds and ost info # a return status of 1 indicates e2fsck successfuly fixed problems found -e2fsck -d -f -y --mdsdb $GPATH/mdsdb $MDSDEV +e2fsck -d -f -y --mdsdb $GPATH/mdsdb $MDSDEV RET=$? [ $RET -ne 0 -a $RET -ne 1 ] && exit 1 i=0 OSTDB_LIST="" while [ $i -lt $NUM_OSTS ]; do - e2fsck -d -f -y --mdsdb $GPATH/mdsdb --ostdb $GPATH/ostdb-$i $TMP/ost$i-`hostname` + e2fsck -d -f -y --mdsdb $GPATH/mdsdb --ostdb $GPATH/ostdb-$i $TMP/ost`expr $i + 1`-`hostname` RET=$? [ $RET -ne 0 -a $RET -ne 1 ] && exit 1 if [ -z "${OSTDB_LIST}" ]; then diff --git a/lustre/tests/lfscktest_config.sh b/lustre/tests/lfscktest_config.sh index 6bae2bc..3fc95c0 100644 --- a/lustre/tests/lfscktest_config.sh +++ b/lustre/tests/lfscktest_config.sh @@ -1,7 +1,13 @@ export TESTNAME="lfscktest" export TESTDESC="Test of lfsck functionality" -export LFSCK_PATH=${E2FSCK_PATH:-"/usr/src/e2fsprogs-1.34"} +export LUSTRE=${LUSTRE:-".."} +export LCONF=${LCONF:-"$LUSTRE/utils/lconf"} +export LMC=${LMC:-"$LUSTRE/utils/lmc"} +export LCTL=${LCTL:-"$LUSTRE/utils/lctl"} +export LFIND=${LFIND:-"$LUSTRE/utils/lfind"} + +export LFSCK_PATH=${E2FSCK_PATH:-"/home/yangjun/e2fsprogs-1.35.lfsck2/build/e2fsck"} export TMP=${TMP:-"/tmp"} export LOG=${LOG:-"${TMP}/lfscktest.log"} export LUSTRE_TAG=${LUSTRE_TAG:="HEAD"} @@ -13,3 +19,5 @@ export MDS_MOUNTPT="/mnt/mds_${TESTNAME}" export OST_MOUNTPT="/mnt/ost_${TESTNAME}" export MOUNT="/mnt/lustre" export TEST_DIR="${MOUNT}/${TESTNAME}" +export MDSDEV=${MDSDEV:-$TMP/mds1-`hostname`} +export NUM_OSTS=${NUM_OSTS:-1} diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index bfbe7bc..310587d 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -40,7 +40,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, stripe_count = (int)lum_dir->lmm_stripe_count; if (stripe_count == 0) { - fd = open("/proc/fs/lustre/lov/lov1/stripecount", O_RDONLY); + fd = open("/proc/fs/lustre/llite/fs0/lov/stripecount", O_RDONLY); if (fd == -1) { fprintf(stderr, "open proc file error: %s\n", strerror(errno)); @@ -60,7 +60,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, stripe_size = (int)lum_dir->lmm_stripe_size; if (stripe_size == 0) { - fd = open("/proc/fs/lustre/lov/lov1/stripesize", O_RDONLY); + fd = open("/proc/fs/lustre/llite/fs0/lov/stripesize", O_RDONLY); if (fd == -1) { fprintf(stderr, "open proc file error: %s\n", strerror(errno)); @@ -77,7 +77,7 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, close(fd); } - fd = open("/proc/fs/lustre/lov/lov1/numobd", O_RDONLY); + fd = open("/proc/fs/lustre/llite/fs0/lov/numobd", O_RDONLY); if(fd == -1) { fprintf(stderr, "open proc file error: %s\n", strerror(errno)); @@ -95,7 +95,9 @@ int compare(struct lov_user_md *lum_dir, struct lov_user_md *lum_file1, if ((lum_file1->lmm_stripe_count != stripe_count) || (lum_file1->lmm_stripe_size != stripe_size)) + { return -1; + } stripe_offset = (short int)lum_dir->lmm_stripe_offset; if (stripe_offset != -1) { diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh index 17ce6f5..a8901ba 100755 --- a/lustre/tests/llmount.sh +++ b/lustre/tests/llmount.sh @@ -8,8 +8,8 @@ NAME=${NAME:-local} LLMOUNT=${LLMOUNT:-llmount} SECURITY=${SECURITY:-"null"} -config=$NAME.xml -mkconfig=$NAME.sh +config=$(dirname $0)/$NAME.xml +mkconfig=$(dirname $0)/$NAME.sh . krb5_env.sh start_krb5_kdc || exit 1 @@ -30,6 +30,7 @@ else fi [ "$NODE" ] && node_opt="--node $NODE" +[ "$DEBUG" ] && debug_opt="--ptldebug=$DEBUG" # We'd better start lsvcgssd after gss modules loaded. # remove this if we don't depend on lsvcgssd in the future diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh index 13886f9..86dd409 100755 --- a/lustre/tests/local.sh +++ b/lustre/tests/local.sh @@ -62,5 +62,5 @@ ${LMC} --add ost --ost ost1 --nspath /mnt/ost_ns --node localhost --lov lov1 \ --backdev $OST_BACKDEV $OST_MOUNT_OPTS --size $OSTSIZE $JARG || exit 30 # create client config -${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 -${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41 +${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40 +${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --clientoptions async --lov lov1 || exit 41 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh index b302b80..598ece5 100755 --- a/lustre/tests/lov.sh +++ b/lustre/tests/lov.sh @@ -55,8 +55,8 @@ done if [ -z "$ECHO_CLIENT" ]; then # create client config - ${LMC} --add mtpt --node localhost --path $MOUNT --mds mds1 --lov lov1 || exit 40 - ${LMC} --add mtpt --node client --path $MOUNT2 --mds mds1 --lov lov1 || exit 41 + ${LMC} --add mtpt --node localhost --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 40 + ${LMC} --add mtpt --node client --path $MOUNT2 --clientoptions async --mds mds1 --lov lov1 || exit 41 else ${LMC} --add echo_client --node localhost --ost lov1 || exit 42 fi diff --git a/lustre/tests/mcr-mds-failover-config.sh b/lustre/tests/mcr-mds-failover-config.sh index 29ec215..29c0f61 100755 --- a/lustre/tests/mcr-mds-failover-config.sh +++ b/lustre/tests/mcr-mds-failover-config.sh @@ -47,4 +47,4 @@ $LMC -m $CONFIG --add ost --node $OST --ost ost_$OST $OST_UUID --dev bluearc $LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2tcp $GW_NODE` --lo $OST # mount -$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov ost_$OST +$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds_$ACTIVEMDS --lov ost_$OST diff --git a/lustre/tests/mcr-routed-config.sh b/lustre/tests/mcr-routed-config.sh index bf08dbb..7db8887 100755 --- a/lustre/tests/mcr-routed-config.sh +++ b/lustre/tests/mcr-routed-config.sh @@ -51,7 +51,7 @@ ${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 1048576 --stripe_cnt 1 --stri # Client node #${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1 ${LMC} --add net --node client --nid '*' --nettype elan || exit 1 -${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1 +${LMC} --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds1 --lov lov1 # this is crude, but effective let server_per_gw=($SERVER_CNT / $GW_CNT ) diff --git a/lustre/tests/mcrlov.sh b/lustre/tests/mcrlov.sh index d09866b..0e7981c 100755 --- a/lustre/tests/mcrlov.sh +++ b/lustre/tests/mcrlov.sh @@ -38,7 +38,7 @@ ${LMC} --add net --node $MDS --nid `h2elan $MDS` --nettype elan || exit 1 ${LMC} --add mds --node $MDS --mds mds1 --dev $TMP/mds1 --size 100000 || exit 1 ${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 1048576 --stripe_cnt 0 --stripe_pattern 0 || exit 1 -${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1 +${LMC} --add mtpt --node client --path /mnt/lustre --clientoptions async --mds mds1 --lov lov1 for s in $SERVERS do diff --git a/lustre/tests/mount2fs.sh b/lustre/tests/mount2fs.sh index 64decff..f5c8a85 100644 --- a/lustre/tests/mount2fs.sh +++ b/lustre/tests/mount2fs.sh @@ -40,5 +40,5 @@ ${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --fstype $FSTYPE --dev $O ${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --fstype $FSTYPE --dev $OSTDEV2 --size $OSTSIZE || exit 22 # create client config -${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT1} --mds mds1 --lov lov1 || exit 30 -${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT2} --mds mds2 --lov lov2 || exit 30 +${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT1} --clientoptions async --mds mds1 --lov lov1 || exit 30 +${LMC} -m $config --add mtpt --node $CLIENT --path ${MOUNT2} --clientoptions async --mds mds2 --lov lov2 || exit 30 diff --git a/lustre/tests/multiop.c b/lustre/tests/multiop.c index 776eaea..b147d9d 100755 --- a/lustre/tests/multiop.c +++ b/lustre/tests/multiop.c @@ -45,7 +45,11 @@ char usage[] = " Y fdatasync\n" " z seek to zero\n"; -void null_handler(int unused) { } +static int usr1_received; +void usr1_handler(int unused) +{ + usr1_received = 1; +} static const char * pop_arg(int argc, char *argv[]) @@ -73,14 +77,17 @@ int main(int argc, char **argv) exit(1); } - signal(SIGUSR1, null_handler); + signal(SIGUSR1, usr1_handler); fname = argv[1]; for (commands = argv[2]; *commands; commands++) { switch (*commands) { case '_': - pause(); + if (usr1_received == 0) + pause(); + usr1_received = 0; + signal(SIGUSR1, usr1_handler); break; case 'c': if (close(fd) == -1) { diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh index 114b4f9..9df34ef 100755 --- a/lustre/tests/recovery-cleanup.sh +++ b/lustre/tests/recovery-cleanup.sh @@ -59,7 +59,7 @@ make_config() { --stripe_cnt 0 --stripe_pattern 0 || exit 6 lmc -m $CONFIG --add ost --nspath /mnt/ost_ns --node $OSTNODE \ --lov lov1 --dev $OSTDEV --size $OSTSIZE --fstype $FSTYPE || exit 7 - lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \ + lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --clientoptions async --mds mds1 \ --lov lov1 || exit 8 } diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 6e9d31d..2f84c01 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2,8 +2,8 @@ set -e -# bug 2986 -ALWAYS_EXCEPT="20b" +# 20b: bug 2986 +ALWAYS_EXCEPT=" 20b" LUSTRE=${LUSTRE:-`dirname $0`/..} @@ -92,6 +92,8 @@ fi REFORMAT=--reformat $SETUP unset REFORMAT +[ "$ONLY" == "setup" ] && exit + test_1() { drop_request "mcreate $MOUNT/1" || return 1 drop_reint_reply "mcreate $MOUNT/2" || return 2 @@ -190,6 +192,7 @@ test_12(){ #define OBD_FAIL_MDS_CLOSE_NET 0x115 sleep 2 kill -USR1 $PID + cancel_lru_locks MDC # force the close echo "waiting for multiop $PID" wait $PID || return 2 do_facet client munlink $MOUNT/$tfile || return 3 @@ -198,13 +201,13 @@ run_test 12 "recover from timed out resend in ptlrpcd (b=2494)" # Bug 113, check that readdir lost recv timeout works. test_13() { - mkdir /mnt/lustre/readdir - touch /mnt/lustre/readdir/newentry + mkdir /mnt/lustre/readdir || return 1 + touch /mnt/lustre/readdir/newentry || return # OBD_FAIL_MDS_READPAGE_NET|OBD_FAIL_ONCE do_facet mds "sysctl -w lustre.fail_loc=0x80000104" - ls /mnt/lustre/readdir || return 1 + ls /mnt/lustre/readdir || return 3 do_facet mds "sysctl -w lustre.fail_loc=0" - rm -rf /mnt/lustre/readdir + rm -rf /mnt/lustre/readdir || return 4 } run_test 13 "mdc_readpage restart test (bug 1138)" @@ -238,6 +241,10 @@ start_read_ahead() { done } +# recovery timeout. This actually should be taken from +# obd_timeout +RECOV_TIMEOUT=30 + test_16() { do_facet client cp /etc/termcap $MOUNT sync @@ -250,7 +257,7 @@ test_16() { do_facet client "cmp /etc/termcap $MOUNT/termcap" && return 1 sysctl -w lustre.fail_loc=0 # give recovery a chance to finish (shouldn't take long) - sleep $TIMEOUT + sleep $RECOV_TIMEOUT do_facet client "cmp /etc/termcap $MOUNT/termcap" || return 2 start_read_ahead } @@ -262,7 +269,7 @@ test_17() { sysctl -w lustre.fail_loc=0x80000503 do_facet client cp /etc/termcap $DIR/$tfile - sleep $TIMEOUT + sleep $RECOV_TIMEOUT sysctl -w lustre.fail_loc=0 do_facet client "df $DIR" # expect cmp to fail @@ -380,4 +387,233 @@ test_20b() { # bug 2986 - ldlm_handle_enqueue error during open } run_test 20b "ldlm_handle_enqueue error (should return error)" +test_21a() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000129" + multiop $DIR/$tdir-2/f Oc & + open_pid=$! + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + wait $close_pid || return 1 + wait $open_pid || return 2 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 3 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 4 + + rm -rf $DIR/$tdir-* +} +run_test 21a "drop close request while close and open are both in flight" + +test_21b() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + mcreate $DIR/$tdir-2/f & + open_pid=$! + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + wait $close_pid || return 1 + wait $open_pid || return 3 + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 4 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 5 + rm -rf $DIR/$tdir-* +} +run_test 21b "drop open request while close and open are both in flight" + +test_21c() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + close_pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + mcreate $DIR/$tdir-2/f & + open_pid=$! + sleep 3 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $close_pid + cancel_lru_locks MDC # force the close + wait $close_pid || return 1 + wait $open_pid || return 2 + + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21c "drop both request while close and open are both in flight" + +test_21d() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000129" + multiop $DIR/$tdir-2/f Oc & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + + rm -rf $DIR/$tdir-* +} +run_test 21d "drop close reply while close and open are both in flight" + +test_21e() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/$tdir-2/f & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + + sleep $TIMEOUT + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21e "drop open reply while close and open are both in flight" + +test_21f() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/$tdir-2/f & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21f "drop both reply while close and open are both in flight" + +test_21g() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/$tdir-2/f & + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $pid + cancel_lru_locks MDC # force the close + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 2 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 3 + rm -rf $DIR/$tdir-* +} +run_test 21g "drop open reply and close request while close and open are both in flight" + +test_21h() { + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + touch $DIR/$tdir-2/f & + touch_pid=$! + sleep 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + cancel_lru_locks MDC # force the close + kill -USR1 $pid + wait $pid || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + wait $touch_pid || return 2 + + $CHECKSTAT -t file $DIR/$tdir-1/f || return 3 + $CHECKSTAT -t file $DIR/$tdir-2/f || return 4 + rm -rf $DIR/$tdir-* +} +run_test 21h "drop open request and close reply while close and open are both in flight" + +# bug 3462 - multiple MDC requests +test_22() { + f1=$DIR/${tfile}-1 + f2=$DIR/${tfile}-2 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + multiop $f2 Oc & + close_pid=$! + + sleep 1 + multiop $f1 msu || return 1 + + cancel_lru_locks MDC # force the close + do_facet mds "sysctl -w lustre.fail_loc=0" + + wait $close_pid || return 2 + rm -rf $f2 || return 4 +} +run_test 22 "drop close request and do mknod" + +test_23() { #b=4561 + multiop $DIR/$tfile O_c & + pid=$! + # give a chance for open + sleep 5 + + # try the close + drop_request "kill -USR1 $pid" + + fail mds + wait $pid || return 1 + return 0 +} +#run_test 23 "client hang when close a file after mds crash" + + $CLEANUP diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 8848b78..bd5a748 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -51,7 +51,16 @@ cleanup() { umount $MOUNT2 || true umount $MOUNT || true rmmod llite + + # b=3941 + # In mds recovery, the mds will clear orphans in ost by + # mds_lov_clear_orphan, which will sent the request to ost and waiting for + # the reply, if we stop mds at this time, we will got the obd_refcount > 1 + # errors, because mds_lov_clear_orphan grab a export of mds, + # so the obd_refcount of mds will not be zero. So, wait a while before + # stop mds. This bug needs further work. for mds in `mds_list`; do + sleep 5 stop $mds ${FORCE} $MDSLCONFARGS done stop_lgssd @@ -345,6 +354,7 @@ test_14() { facet_failover mds1 # expect failover to fail df $MOUNT && return 1 + sleep 1 # first 25 files shouuld have been # replayed @@ -364,6 +374,7 @@ test_15() { facet_failover mds1 df $MOUNT || return 1 + sleep 1 unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -381,6 +392,7 @@ test_16() { sleep $TIMEOUT facet_failover mds1 df $MOUNT || return 1 + sleep 1 unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -403,6 +415,7 @@ test_17() { sleep $TIMEOUT facet_failover ost df $MOUNT || return 1 + sleep 1 unlinkmany $MOUNT1/$tfile- 25 || return 2 @@ -431,7 +444,6 @@ test_18 () { } run_test 18 "replay open, Abort recovery, don't assert (3892)" - # cleanup with blocked enqueue fails until timer elapses (MDS busy), wait for # itexport NOW=0 diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index bda77df..398386d 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -138,11 +138,15 @@ test_4() { run_test 4 "Fail OST during read, with verification" test_5() { - IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r 64 -s 1g" + FREE=`df -P -h $DIR | tail -n 1 | awk '{ print $3 }'` + case $FREE in + *T|*G) FREE=1G;; + esac + IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r 4 -s $FREE" iozone $IOZONE_OPTS -f $DIR/$tfile & PID=$! - sleep 10 + sleep 8 fail ost wait $PID || return 1 rm -f $DIR/$tfile diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index ef0e09c..b8cbd9b 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -18,7 +18,8 @@ build_test_filter assert_env MDSCOUNT # Skip these tests -ALWAYS_EXCEPT="" +# 46 - The MDS will always have to force close the cached opens +ALWAYS_EXCEPT="46" if [ `using_krb5_sec $SECURITY` == 'n' ] ; then ALWAYS_EXCEPT="0c $ALWAYS_EXCEPT" @@ -935,6 +936,7 @@ test_45() { wait $pid || return 1 $LCTL --device $mdcdev activate + sleep 1 $CHECKSTAT -t file $DIR/$tfile || return 2 return 0 @@ -1017,6 +1019,207 @@ test_50() { } run_test 50 "Double OSC recovery, don't LASSERT (3812)" +# bug 3462 - simultaneous MDC requests +test_51a() { + replay_barrier_nodf mds + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + touch $DIR/${tdir}-2/f + multiop $DIR/${tdir}-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $pid + do_facet mds "sysctl -w lustre.fail_loc=0" + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 1 + + fail mds + + wait $pid || return 2 + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51a "|X| close request while two MDC requests in flight" + +test_51b() { + replay_barrier_nodf mds + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + touch $DIR/${tdir}-2/f & + usleep 500 + do_facet mds "sysctl -w lustre.fail_loc=0" + + kill -USR1 $pid + wait $pid || return 1 + + fail mds + + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51b "|X| open request while two MDC requests in flight" + +test_51c() { + replay_barrier_nodf mds + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + touch $DIR/${tdir}-2/f & + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $pid + do_facet mds "sysctl -w lustre.fail_loc=0" + + fail mds + + wait $pid || return 1 + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51c "|X| open request and close request while two MDC requests in flight" + +test_51d() { + replay_barrier_nodf mds + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + touch $DIR/${tdir}-2/f + multiop $DIR/${tdir}-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + do_facet mds "sysctl -w lustre.fail_loc=0" + #$CHECKSTAT -t file $DIR/${tdir}-2/f || return 1 + + fail mds + + wait $pid || return 2 + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51d "|X| close reply while two MDC requests in flight" + +test_51e() { + replay_barrier_nodf mds + mkdir -p $DIR/$tdir-1 + mkdir -p $DIR/$tdir-2 + multiop $DIR/$tdir-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/${tdir}-2/f & + usleep 500 + do_facet mds "sysctl -w lustre.fail_loc=0" + + kill -USR1 $pid + wait $pid || return 1 + + fail mds + + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51e "|X| open reply while two MDC requests in flight" + +test_51f() { + replay_barrier_nodf mds + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/${tdir}-2/f & + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + do_facet mds "sysctl -w lustre.fail_loc=0" + + fail mds + + wait $pid || return 1 + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51f "|X| open reply and close reply while two MDC requests in flight" + +test_51g() { + replay_barrier_nodf mds + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000119" + touch $DIR/${tdir}-2/f & + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000115" + kill -USR1 $pid + do_facet mds "sysctl -w lustre.fail_loc=0" + + fail mds + + wait $pid || return 1 + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51g "|X| open reply and close request while two MDC requests in flight" + +test_51h() { + replay_barrier_nodf mds + mkdir -p $DIR/${tdir}-1 + mkdir -p $DIR/${tdir}-2 + multiop $DIR/${tdir}-1/f O_c & + pid=$! + # give multiop a chance to open + sleep 1 + + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + touch $DIR/${tdir}-2/f & + do_facet mds "sysctl -w lustre.fail_loc=0" + + do_facet mds "sysctl -w lustre.fail_loc=0x80000122" + kill -USR1 $pid + do_facet mds "sysctl -w lustre.fail_loc=0" + + fail mds + + wait $pid || return 1 + $CHECKSTAT -t file $DIR/${tdir}-1/f || return 2 + $CHECKSTAT -t file $DIR/${tdir}-2/f || return 3 + rm -rf $DIR/${tdir}-* +} +run_test 51h "|X| open request and close reply while two MDC requests in flight" + # b3764 timed out lock replay test_52() { touch $DIR/$tfile @@ -1032,6 +1235,58 @@ test_52() { } run_test 52 "time out lock replay (3764)" +test_53() { + replay_barrier_nodf mds + f1=$DIR/${tfile}-1 + cat < $f1 +#!/bin/sh +true +EOF + chmod +x $f1 + do_facet mds "sysctl -w lustre.fail_loc=0x80000107" + $f1 || return 1 + do_facet mds "sysctl -w lustre.fail_loc=0" + + fail mds + rm -f $f1 +} +run_test 53 "|X| open request and close reply while two MDC requests in flight" + +test_54() { + replay_barrier mds + createmany -o $DIR/$tfile 20 + unlinkmany $DIR/$tfile 20 + fail mds +} +run_test 54 "|X| open request and close reply while two MDC requests in flight" + +#b3440 ASSERTION(rec->ur_fid2->id) failed +test_55() { + sysctl -w portals.debug=-1 portals.debug_mb=25 + ln -s foo $DIR/$tfile + replay_barrier mds + #drop_reply "cat $DIR/$tfile" + fail mds + sleep 10 + lctl dk /r/tmp/debug +} +run_test 55 "don't replay a symlink open request (3440)" + +#b3761 ASSERTION(hash != 0) failed +test_56() { +# OBD_FAIL_MDS_OPEN_CREATE | OBD_FAIL_ONCE + do_facet mds "sysctl -w lustre.fail_loc=0x8000012b" + touch $DIR/$tfile + pid=$! + # give a chance for touch to run + sleep 5 + do_facet mds "sysctl -w lustre.fail_loc=0x0" + wait $pid || return 1 + rm $DIR/$tfile + return 0 +} +run_test 56 "let MDS_CHECK_RESENT return the original return code instead of 0 + equals_msg test complete, cleaning up $CLEANUP diff --git a/lustre/tests/runtests b/lustre/tests/runtests index aa30f9f..05914d7 100755 --- a/lustre/tests/runtests +++ b/lustre/tests/runtests @@ -22,9 +22,9 @@ ERROR= SRC=/etc [ "$COUNT" ] || COUNT=1000 -[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf +[ "$LCONF" ] || LCONF=lconf -[ "$MCREATE" ] || MCREATE=$SRCDIR/../tests/mcreate +[ "$MCREATE" ] || MCREATE=mcreate [ "$MKDIRMANY" ] || MKDIRMANY=$SRCDIR/../tests/mkdirmany @@ -117,7 +117,7 @@ if [ $COUNT -gt 10 -o $COUNT -eq 0 ]; then fi # mkdirmany test (bug 589) -log "running mkdirmany $MOUNT/base$$ 100" +log "running $MKDIRMANY $MOUNT/base$$ 100" $MKDIRMANY $MOUNT/base$$ 100 || fail "mkdirmany failed" log "removing mkdirmany directories" rmdir $MOUNT/base$$* || fail "mkdirmany cleanup failed" diff --git a/lustre/tests/sanity-fid.sh b/lustre/tests/sanity-fid.sh index 5d61718..4fb86af 100644 --- a/lustre/tests/sanity-fid.sh +++ b/lustre/tests/sanity-fid.sh @@ -10,6 +10,7 @@ ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""} SRCDIR=`dirname $0` export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH +export SECURITY=${SECURITY:-"null"} TMP=${TMP:-/tmp} FSTYPE=${FSTYPE:-ext3} @@ -34,6 +35,8 @@ IOPENTEST2=${IOPENTEST2:-iopentest2} PTLDEBUG=${PTLDEBUG:-0} MODE=${MODE:mds} +. krb5_env.sh + if [ $UID -ne 0 ]; then RUNAS_ID="$UID" RUNAS="" @@ -42,6 +45,13 @@ else RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} fi +if [ `using_krb5_sec $SECURITY` == 'y' ] ; then + start_krb5_kdc || exit 1 + if [ $RUNAS_ID -ne $UID ]; then + $RUNAS ./krb5_refresh_cache.sh || exit 2 + fi +fi + export NAME=${NAME:-local} SAVE_PWD=$PWD diff --git a/lustre/tests/sanity-gns.sh b/lustre/tests/sanity-gns.sh index 74e5657..64497b4 100644 --- a/lustre/tests/sanity-gns.sh +++ b/lustre/tests/sanity-gns.sh @@ -12,6 +12,7 @@ ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-""} SRCDIR=`dirname $0` export PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH +export SECURITY=${SECURITY:-"null"} TMP=${TMP:-/tmp} FSTYPE=${FSTYPE:-ext3} @@ -35,6 +36,8 @@ IOPENTEST1=${IOPENTEST1:-iopentest1} IOPENTEST2=${IOPENTEST2:-iopentest2} PTLDEBUG=${PTLDEBUG:-0} +. krb5_env.sh + if [ $UID -ne 0 ]; then RUNAS_ID="$UID" RUNAS="" @@ -43,6 +46,13 @@ else RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} fi +if [ `using_krb5_sec $SECURITY` == 'y' ] ; then + start_krb5_kdc || exit 1 + if [ $RUNAS_ID -ne $UID ]; then + $RUNAS ./krb5_refresh_cache.sh || exit 2 + fi +fi + export NAME=${NAME:-local} SAVE_PWD=$PWD diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index ded1e08..33ad8e5 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -7,11 +7,14 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 2739 -# 51b and 51c depend on kernel -# 65* fixes in b_hd_cray_merge3 -# the new kernel api make 48 not valid anymore -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"48 51b 51c 65a 65b 65c 65d 65e 65f"} +# bug number for skipped tests: +# skipped test: +# - 51b 51c depend on used kernel +# more than only LOV EAs +# - 65h (default stripe inheritance) is not implemented for LMV +# configurations. Will be done in second phase of collibri. + +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"51b 51c 65h"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" @@ -106,12 +109,12 @@ run_one() { if ! mount | grep -q $DIR; then $START fi - echo -1 >/proc/sys/portals/debug - log "== test $1: $2= `date +%H:%M:%S`" + BEFORE=`date +%s` + log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)" export TESTNAME=test_$1 test_$1 || error "test_$1: exit with rc=$?" unset TESTNAME - pass + pass "($((`date +%s` - $BEFORE))s)" cd $SAVE_PWD $CLEAN } @@ -126,11 +129,11 @@ build_test_filter() { } _basetest() { - echo $* + echo $* } basetest() { - IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 + IFS=abcdefghijklmnopqrstuvwxyz _basetest $1 } run_test() { @@ -175,7 +178,7 @@ error() { } pass() { - echo PASS + echo PASS $@ } MOUNT="`mount | awk '/^'$NAME' .* lustre_lite / { print $3 }'`" @@ -194,6 +197,8 @@ DIR=${DIR:-$MOUNT} OSTCOUNT=`cat /proc/fs/lustre/llite/fs0/lov/numobd` STRIPECOUNT=`cat /proc/fs/lustre/llite/fs0/lov/stripecount` STRIPESIZE=`cat /proc/fs/lustre/llite/fs0/lov/stripesize` +ORIGFREE=`cat /proc/fs/lustre/llite/fs0/lov/kbytesavail` +MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo [ -f $DIR/d52b/foo ] && chattr -i $DIR/d52b/foo @@ -455,7 +460,7 @@ test_16() { run_test 16 "touch .../d16/f; rm -rf .../d16/f =================" test_17a() { - mkdir $DIR/d17 + mkdir -p $DIR/d17 touch $DIR/d17/f ln -s $DIR/d17/f $DIR/d17/l-exist ls -l $DIR/d17 @@ -467,9 +472,7 @@ test_17a() { run_test 17a "symlinks: create, remove (real) ==================" test_17b() { - if [ ! -d $DIR/d17 ]; then - mkdir $DIR/d17 - fi + mkdir -p $DIR/d17 ln -s no-such-file $DIR/d17/l-dangle ls -l $DIR/d17 $CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error @@ -479,6 +482,20 @@ test_17b() { } run_test 17b "symlinks: create, remove (dangling) ==============" +test_17c() { # bug 3440 - don't save failed open RPC for replay + mkdir -p $DIR/d17 + ln -s foo $DIR/d17/f17c + cat $DIR/d17/f17c && error "opened non-existent symlink" || true +} +run_test 17c "symlinks: open dangling (should return error) ====" + +test_17d() { + mkdir -p $DIR/d17 + ln -s foo $DIR/d17/f17d + touch $DIR/d17/f17d || error "creating to new symlink" +} +run_test 17d "symlinks: create dangling ========================" + test_18() { touch $DIR/f ls $DIR || error @@ -889,6 +906,33 @@ test_27l() { } run_test 27l "check setstripe permissions (should return error)" +test_27m() { + [ "$OSTCOUNT" -lt "2" ] && echo "skipping out-of-space test on OST0" && return + if [ $ORIGFREE -gt $MAXFREE ]; then + echo "skipping out-of-space test on OST0" + return + fi + mkdir -p $DIR/d27 + $LSTRIPE $DIR/d27/f27m_1 0 0 1 + dd if=/dev/zero of=$DIR/d27/f27m_1 bs=1024 count=$MAXFREE && \ + error "dd should fill OST0" + i=2 + while $LSTRIPE $DIR/d27/f27m_$i 0 0 1 ; do + i=`expr $i + 1` + [ $i -gt 256 ] && break + done + i=`expr $i + 1` + touch $DIR/d27/f27m_$i + [ `$LFIND $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \ + error "OST0 was full but new created file still use it" + i=`expr $i + 1` + touch $DIR/d27/f27m_$i + [ `$LFIND $DIR/d27/f27m_$i | grep -A 10 obdidx | awk '{print $1}'| grep -w "0"` ] && \ + error "OST0 was full but new created file still use it" + rm $DIR/d27/f27m_1 +} +run_test 27m "create file while OST0 was full ==================" + test_28() { mkdir $DIR/d28 $CREATETEST $DIR/d28/ct || error @@ -972,6 +1016,40 @@ test_31e() { # bug 2904 } run_test 31e "remove of open non-empty directory ===============" +test_31f() { # bug 4554 + set -vx + mkdir $DIR/d31f + lfs setstripe $DIR/d31f 1048576 -1 1 + cp /etc/hosts $DIR/d31f + ls -l $DIR/d31f + lfs getstripe $DIR/d31f/hosts + multiop $DIR/d31f D_c & + MULTIPID=$! + + sleep 1 + + rm -rv $DIR/d31f || error "first of $DIR/d31f" + mkdir $DIR/d31f + lfs setstripe $DIR/d31f 1048576 -1 1 + cp /etc/hosts $DIR/d31f + ls -l $DIR/d31f + lfs getstripe $DIR/d31f/hosts + multiop $DIR/d31f D_c & + MULTIPID2=$! + + sleep 6 + + kill -USR1 $MULTIPID || error "first opendir $MULTIPID not running" + wait $MULTIPID || error "first opendir $MULTIPID failed" + + sleep 6 + + kill -USR1 $MULTIPID2 || error "second opendir $MULTIPID not running" + wait $MULTIPID2 || error "second opendir $MULTIPID2 failed" + set +vx +} +run_test 31f "remove of open directory with open-unlink file ===" + test_32a() { echo "== more mountpoints and symlinks =================" [ -e $DIR/d32a ] && rm -fr $DIR/d32a @@ -1687,6 +1765,26 @@ test_48d() { # bug 2350 } run_test 48d "Access removed parent subdir (should return errors)" +test_48e() { # bug 4134 + check_kernel_version 41 || return 0 + #sysctl -w portals.debug=-1 + #set -vx + mkdir -p $DIR/d48e/dir + # On a buggy kernel addition of "; touch file" after cd .. will + # produce kernel oops in lookup_hash_it + + cd $DIR/d48e/dir + ( sleep 2 && cd -P .. ) & + cdpid=$! + $TRACE rmdir $DIR/d48e/dir || error "remove cwd $DIR/d48e/dir failed" + $TRACE rmdir $DIR/d48e || error "remove parent $DIR/d48e failed" + $TRACE touch $DIR/d48e || error "'touch $DIR/d48e' failed" + $TRACE chmod +x $DIR/d48e || error "'chmod +x $DIR/d48e' failed" + $TRACE wait $cdpid && error "'cd ..' worked after recreate parent" + $TRACE rm $DIR/d48e || error "'$DIR/d48e' failed" +} +run_test 48e "Access to recreated parent (should return errors) " + test_50() { # bug 1485 mkdir $DIR/d50 @@ -1712,15 +1810,18 @@ test_51() { } run_test 51 "special situations: split htree with empty entry ==" +export NUMTEST=70000 test_51b() { - NUMTEST=70000 - check_kernel_version 40 || NUMTEST=31000 - NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` - [ $NUMFREE -lt $NUMTEST ] && \ - echo "skipping test 51b, not enough free inodes($NUMFREE)" && \ - return - mkdir -p $DIR/d51b - (cd $DIR/d51b; mkdirmany t $NUMTEST) + NUMFREE=`df -i -P $DIR | tail -n 1 | awk '{ print $4 }'` + [ $NUMFREE -lt 21000 ] && \ + echo "skipping test 51b, not enough free inodes($NUMFREE)" && \ + return + + check_kernel_version 40 || NUMTEST=31000 + [ $NUMFREE -lt $NUMTEST ] && NUMTEST=$(($NUMFREE - 50)) + + mkdir -p $DIR/d51b + (cd $DIR/d51b; mkdirmany t $NUMTEST) } run_test 51b "mkdir .../t-0 --- .../t-$NUMTEST ====================" @@ -2055,27 +2156,27 @@ run_test 64b "check out-of-space detection on client ===========" # bug 1414 - set/get directories' stripe info test_65a() { - mkdir -p $DIR/d65 - touch $DIR/d65/f1 - $LVERIFY $DIR/d65 $DIR/d65/f1 || error "lverify failed" + mkdir -p $DIR/d65a + touch $DIR/d65a/f1 + $LVERIFY $DIR/d65a $DIR/d65a/f1 || error "lverify failed" } run_test 65a "directory with no stripe info ====================" test_65b() { - mkdir -p $DIR/d65 - $LSTRIPE $DIR/d65 $(($STRIPESIZE * 2)) 0 1 || error "setstripe" - touch $DIR/d65/f2 - $LVERIFY $DIR/d65 $DIR/d65/f2 || error "lverify failed" + mkdir -p $DIR/d65b + $LSTRIPE $DIR/d65b $(($STRIPESIZE * 2)) 0 1 || error "setstripe" + touch $DIR/d65b/f2 + $LVERIFY $DIR/d65b $DIR/d65b/f2 || error "lverify failed" } run_test 65b "directory setstripe $(($STRIPESIZE * 2)) 0 1 ===============" test_65c() { if [ $OSTCOUNT -gt 1 ]; then - mkdir -p $DIR/d65 - $LSTRIPE $DIR/d65 $(($STRIPESIZE * 4)) 1 \ + mkdir -p $DIR/d65c + $LSTRIPE $DIR/d65c $(($STRIPESIZE * 4)) 1 \ $(($OSTCOUNT - 1)) || error "setstripe" - touch $DIR/d65/f3 - $LVERIFY $DIR/d65 $DIR/d65/f3 || error "lverify failed" + touch $DIR/d65c/f3 + $LVERIFY $DIR/d65c $DIR/d65c/f3 || error "lverify failed" fi } run_test 65c "directory setstripe $(($STRIPESIZE * 4)) 1 $(($OSTCOUNT - 1))" @@ -2083,19 +2184,20 @@ run_test 65c "directory setstripe $(($STRIPESIZE * 4)) 1 $(($OSTCOUNT - 1))" [ $STRIPECOUNT -eq 0 ] && sc=1 || sc=$(($STRIPECOUNT - 1)) test_65d() { - mkdir -p $DIR/d65 - $LSTRIPE $DIR/d65 $STRIPESIZE -1 $sc || error "setstripe" - touch $DIR/d65/f4 $DIR/d65/f5 - $LVERIFY $DIR/d65 $DIR/d65/f4 $DIR/d65/f5 || error "lverify failed" + mkdir -p $DIR/d65d + $LSTRIPE $DIR/d65d $STRIPESIZE -1 $sc || error "setstripe" + touch $DIR/d65d/f4 $DIR/d65d/f5 + $LVERIFY $DIR/d65d $DIR/d65d/f4 $DIR/d65d/f5 || error "lverify failed" } run_test 65d "directory setstripe $STRIPESIZE -1 $sc ======================" test_65e() { - mkdir -p $DIR/d65 + mkdir -p $DIR/d65e - $LSTRIPE $DIR/d65 0 -1 0 || error "setstripe" - touch $DIR/d65/f6 - $LVERIFY $DIR/d65 $DIR/d65/f6 || error "lverify failed" + $LSTRIPE $DIR/d65e 0 -1 0 || error "setstripe" + $LFS find -v $DIR/d65e | grep "$DIR/d65e/ has no stripe info" || error "no stripe info failed" + touch $DIR/d65e/f6 + $LVERIFY $DIR/d65e $DIR/d65e/f6 || error "lverify failed" } run_test 65e "directory setstripe 0 -1 0 (default) =============" @@ -2105,6 +2207,23 @@ test_65f() { } run_test 65f "dir setstripe permission (should return error) ===" +test_65g() { + mkdir -p $DIR/d65g + $LSTRIPE $DIR/d65g $(($STRIPESIZE * 2)) 0 1 || error "setstripe" + $LSTRIPE -d $DIR/d65g || error "deleting stripe info failed" + $LFS find -v $DIR/d65g | grep "$DIR/d65g/ has no stripe info" || error "no stripe info failed" +} +run_test 65g "directory setstripe -d ========" + +test_65h() { + mkdir -p $DIR/d65h + $LSTRIPE $DIR/d65h $(($STRIPESIZE * 2)) 0 1 || error "setstripe" + mkdir -p $DIR/d65h/dd1 + [ "`$LFS find -v $DIR/d65h | grep "^count"`" == \ + "`$LFS find -v $DIR/d65h/dd1 | grep "^count"`" ] || error "stripe info inherit failed" +} +run_test 65h "directory stripe info inherit ======" + # bug 2543 - update blocks count on client test_66() { COUNT=${COUNT:-8} @@ -2170,6 +2289,88 @@ test_68() { } run_test 68 "support swapping to Lustre ========================" +# bug 3462 - multiple simultaneous MDC requests +test_69() { + mkdir $DIR/D68-1 + mkdir $DIR/D68-2 + multiop $DIR/D68-1/f68-1 O_c & + pid1=$! + #give multiop a chance to open + usleep 500 + + echo 0x80000129 > /proc/sys/lustre/fail_loc + multiop $DIR/D68-1/f68-2 Oc & + sleep 1 + echo 0 > /proc/sys/lustre/fail_loc + + multiop $DIR/D68-2/f68-3 Oc & + pid3=$! + + kill -USR1 $pid1 + wait $pid1 || return 1 + + sleep 25 + + $CHECKSTAT -t file $DIR/D68-1/f68-1 || return 4 + $CHECKSTAT -t file $DIR/D68-1/f68-2 || return 5 + $CHECKSTAT -t file $DIR/D68-2/f68-3 || return 6 + + rm -rf $DIR/D68-* +} +run_test 69 "multiple MDC requests (should not deadlock)" + + +test_70() { + STAT="/proc/fs/lustre/osc/OSC*MNT*/stats" + mkdir $DIR/d70 + dd if=/dev/zero of=$DIR/d70/file bs=512 count=5 + cancel_lru_locks OSC + cat $DIR/d70/file >/dev/null + # Hopefully there is only one. + ENQ=`cat $STAT|awk -vnum=0 '/ldlm_enq/ {num += $2} END {print num;}'` + CONV=`cat $STAT|awk -vnum=0 '/ldlm_conv/ {num += $2} END {print num;}'` + CNCL=`cat $STAT|awk -vnum=0 '/ldlm_canc/ {num += $2} END {print num;}'` + dd if=/dev/zero of=$DIR/d70/file bs=512 count=5 + ENQ1=`cat $STAT|awk -vnum=0 '/ldlm_enq/ {num += $2} END {print num;}'` + CONV1=`cat $STAT|awk -vnum=0 '/ldlm_conv/ {num += $2} END {print num;}'` + CNCL1=`cat $STAT|awk -vnum=0 '/ldlm_canc/ {num += $2} END {print num;}'` + + if [ $CONV1 -le $CONV ] ; then + error "No conversion happened. Before: enq $ENQ, conv $CONV, cancel $CNCL ; After: enq $ENQ1, conv $CONV1, cancel $CNCL1" + else + echo "OK" + true + fi + +} +run_test 70 "Test that PR->PW conversion takes place ===========" + +test_71() { + cp `which dbench` $DIR + + [ ! -f $DIR/dbench ] && echo "dbench not installed, skip this test" && return 0 + + TGT=$DIR/client.txt + SRC=${SRC:-/usr/lib/dbench/client.txt} + [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT + SRC=/usr/lib/dbench/client_plain.txt + [ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT + + echo "copying /lib to $DIR" + cp -r /lib $DIR/lib + + echo "chroot $DIR /dbench -c client.txt 2" + chroot $DIR /dbench -c client.txt 2 + RC=$? + + rm -f $DIR/dbench + rm -f $TGT + rm -fr $DIR/lib + + return $RC +} +run_test 71 "Running dbench on lustre (don't segment fault) ====" + # on the LLNL clusters, runas will still pick up root's $TMP settings, # which will not be writable for the runas user, and then you get a CVS # error message with a corrupt path string (CVS bug) and panic. @@ -2231,6 +2432,19 @@ test_99f() { } run_test 99f "cvs commit =======================================" +test_100() { + netstat -ta | while read PROT SND RCV LOCAL REMOTE STAT; do + LPORT=`echo $LOCAL | cut -d: -f2` + RPORT=`echo $REMOTE | cut -d: -f2` + if [ "$PROT" = "tcp" ] && [ "$LPORT" != "*" ] && [ "$RPORT" != "*" ] && [ $RPORT -eq 988 ] && [ $LPORT -gt 1024 ]; then + echo "local port: $LPORT > 1024" + error + fi + done +} +run_test 100 "check local port using privileged port ===========" + + TMPDIR=$OLDTMPDIR TMP=$OLDTMP HOME=$OLDHOME diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index faecfc4..96532b7 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -3,8 +3,8 @@ set -e ONLY=${ONLY:-"$*"} -# bug number for skipped test: 1768 3192 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"} +# bug number for skipped test: 1768 3192 3192 +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"4 14b 14c"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! [ "$ALWAYS_EXCEPT$EXCEPT" ] && echo "Skipping tests: $ALWAYS_EXCEPT $EXCEPT" @@ -22,6 +22,7 @@ OPENFILE=${OPENFILE:-openfile} OPENUNLINK=${OPENUNLINK:-openunlink} TOEXCL=${TOEXCL:-toexcl} TRUNCATE=${TRUNCATE:-truncate} +export TMP=${TMP:-/tmp} if [ $UID -ne 0 ]; then RUNAS_ID="$UID" @@ -53,15 +54,25 @@ log() { lctl mark "$*" 2> /dev/null || true } +trace() { + log "STARTING: $*" + strace -o $TMP/$1.strace -ttt $* + RC=$? + log "FINISHED: $*: rc $RC" + return 1 +} +TRACE=${TRACE:-""} + run_one() { if ! mount | grep -q $DIR1; then $START fi - log "== test $1: $2" + BEFORE=`date +%s` + log "== test $1: $2= `date +%H:%M:%S` ($BEFORE)" export TESTNAME=test_$1 test_$1 || error "test_$1: exit with rc=$?" unset TESTNAME - pass + pass "($((`date +%s` - $BEFORE))s)" cd $SAVE_PWD $CLEAN } @@ -100,7 +111,7 @@ error () { } pass() { - echo PASS + echo PASS $@ } export MOUNT1=`mount| awk '/ lustre/ { print $3 }'| head -n 1` @@ -362,7 +373,8 @@ test_17() { # bug 3513, 3667 run_test 17 "resource creation/LVB creation race ===============" test_18() { - ./mmap_sanity -d $MOUNT1 -m $MOUNT2 + ./mmap_sanity -d $MOUNT1 -m $MOUNT2 + sync; sleep 1; sync } run_test 18 "mmap sanity check =================================" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 395184d..66965ed 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -174,6 +174,15 @@ replay_barrier() { $LCTL mark "REPLAY BARRIER" } +replay_barrier_nodf() { + local facet=$1 + do_facet $facet sync + do_facet $facet $LCTL --device %${facet}_svc readonly + do_facet $facet $LCTL --device %${facet}_svc notransno + do_facet $facet $LCTL mark "REPLAY BARRIER" + $LCTL mark "REPLAY BARRIER" +} + mds_evict_client() { UUID=`cat /proc/fs/lustre/mdc/*_MNT_*/uuid` do_facet mds "echo $UUID > /proc/fs/lustre/mds/mds1_svc/evict_client" @@ -403,7 +412,7 @@ add_client() { mds=$2 shift; shift add_facet $facet --lustre_upcall $UPCALL - do_lmc --add mtpt --node ${facet}_facet --mds ${mds}_svc $* + do_lmc --add mtpt --node ${facet}_facet --clientoptions async --mds ${mds}_svc $* } config_commit() { @@ -550,9 +559,11 @@ error() { } build_test_filter() { + [ "$ONLY" ] && log "only running $ONLY" for O in $ONLY; do eval ONLY_${O}=true done + [ "$EXCEPT$ALWAYS_EXCEPT" ] && log "skipping $EXCEPT $ALWAYS_EXCEPT" for E in $EXCEPT $ALWAYS_EXCEPT; do eval EXCEPT_${E}=true done @@ -611,6 +622,10 @@ log() { lctl mark "$*" 2> /dev/null || true } +pass() { + echo PASS $@ +} + run_one() { testnum=$1 message=$2 @@ -620,8 +635,10 @@ run_one() { # Pretty tests run faster. equals_msg $testnum: $message - log "== test $1: $2" + BEFORE=`date +%s` + log "== test $testnum: $message ============ `date +%H:%M:%S` ($BEFORE)" test_${testnum} || error "test_$testnum failed with $?" + pass "($((`date +%s` - $BEFORE))s)" } canonical_path() { diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh index f9942a9..d945719 100644 --- a/lustre/tests/uml.sh +++ b/lustre/tests/uml.sh @@ -77,6 +77,13 @@ h2gm () { echo `gmnalnid -n$1` } +h2iib () { + case $1 in + client) echo '\*' ;; + *) echo $1 | sed "s/[^0-9]*//" ;; + esac +} + # create nodes echo -n "adding NET for:" for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do @@ -112,6 +119,6 @@ done echo; echo -n "adding CLIENT on:" for NODE in $CLIENTS; do echo -n " $NODE" - ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --mds mds1 --lov lov1 || exit 30 + ${LMC} -m $config --add mtpt --node $NODE --path $MOUNT --clientoptions async --mds mds1 --lov lov1 || exit 30 done echo diff --git a/lustre/tests/unlinkmany.c b/lustre/tests/unlinkmany.c index ba1bee7..080b1c6 100644 --- a/lustre/tests/unlinkmany.c +++ b/lustre/tests/unlinkmany.c @@ -16,17 +16,23 @@ void usage(char *prog) int main(int argc, char ** argv) { - int i, rc = 0; + int i, rc = 0, do_rmdir = 0; char format[4096], *fmt; char filename[4096]; long start, last; long begin = 0, count; - if (argc < 3 || argc > 4) { + if (argc < 3 || argc > 5) { usage(argv[0]); return 1; } + if (strcmp(argv[1], "-d") == 0) { + do_rmdir = 1; + argv++; + argc--; + } + if (strlen(argv[1]) > 4080) { printf("name too long\n"); return 1; @@ -53,9 +59,13 @@ int main(int argc, char ** argv) } for (i = 0; i < count; i++, begin++) { sprintf(filename, fmt, begin); - rc = unlink(filename); + if (do_rmdir) + rc = rmdir(filename); + else + rc = unlink(filename); if (rc) { - printf("unlink(%s) error: %s\n", + printf("%s(%s) error: %s\n", + do_rmdir ? "rmdir" : "unlink", filename, strerror(errno)); rc = errno; break; diff --git a/lustre/utils/Lustre/lustredb.py b/lustre/utils/Lustre/lustredb.py index 3d3c4ae..71716b6 100644 --- a/lustre/utils/Lustre/lustredb.py +++ b/lustre/utils/Lustre/lustredb.py @@ -307,6 +307,13 @@ class LustreDB_XML(LustreDB): ret.append((net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)) return ret + def get_hostaddr(self): + ret = [] + list = self.dom_node.getElementsByTagName('hostaddr') + for node in list: + ret.append(node.firstChild.data) + return ret + def _update_active(self, tgt, new): raise Lustre.LconfError("updates not implemented for XML") diff --git a/lustre/utils/lconf b/lustre/utils/lconf index d42ae9d..17fbe68 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -587,6 +587,13 @@ class LCTLInterface: quit""" % (type, name, uuid) self.run(cmds) + def detach(self, name): + cmds = """ + cfg_device %s + detach + quit""" % (name) + self.run(cmds) + def set_security(self, name, key, value): cmds = """ cfg_device %s @@ -608,6 +615,12 @@ class LCTLInterface: quit""" % (name, conn_uuid) self.run(cmds) + def start(self, name, conf_name): + cmds = """ + device $%s + start %s + quit""" % (name, conf_name) + self.run(cmds) # create a new device with lctl def newdev(self, type, name, uuid, setup = ""): @@ -618,7 +631,6 @@ class LCTLInterface: self.cleanup(name, uuid, 0) raise e - # cleanup a device def cleanup(self, name, uuid, force, failover = 0): if failover: force = 1 @@ -1375,7 +1387,8 @@ class Network(Module): def add_module(self, manager): manager.add_portals_module("libcfs", 'libcfs') manager.add_portals_module("portals", 'portals') - if node_needs_router(): + + if node_needs_router(): manager.add_portals_module("router", 'kptlrouter') if self.net_type == 'tcp': manager.add_portals_module("knals/socknal", 'ksocknal') @@ -1706,27 +1719,29 @@ class LMV(Module): def correct_level(self, level, op=None): return level -class MDSDEV(Module): - def __init__(self,db): - Module.__init__(self, 'MDSDEV', db) +class CONFDEV(Module): + def __init__(self, db, name, target_uuid, uuid): + Module.__init__(self, 'CONFDEV', db) self.devpath = self.db.get_val('devpath','') self.backdevpath = self.db.get_val('backdevpath','') self.size = self.db.get_val_int('devsize', 0) self.journal_size = self.db.get_val_int('journalsize', 0) self.fstype = self.db.get_val('fstype', '') self.backfstype = self.db.get_val('backfstype', '') - self.nspath = self.db.get_val('nspath', '') self.mkfsoptions = self.db.get_val('mkfsoptions', '') self.mountfsoptions = self.db.get_val('mountfsoptions', '') + self.target = self.db.lookup(target_uuid) + self.name = "conf_%s" % self.target.getName() + self.client_uuids = self.target.get_refs('client') self.obdtype = self.db.get_val('obdtype', '') - self.root_squash = self.db.get_val('root_squash', '') - self.no_root_squash = self.db.get_val('no_root_squash', '') - # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid - target_uuid = self.db.get_first_ref('target') - self.mds = self.db.lookup(target_uuid) - self.name = self.mds.getName() - self.client_uuids = self.mds.get_refs('client') - + + if self.obdtype == None: + self.obdtype = 'dumb' + + self.conf_name = name + self.conf_uuid = uuid + self.realdev = self.devpath + self.lmv = None self.master = None @@ -1736,47 +1751,52 @@ class MDSDEV(Module): if self.lmv != None: self.client_uuids = self.lmv.get_refs('client') - # FIXME: if fstype not set, then determine based on kernel version - self.format = self.db.get_val('autoformat', "no") - if self.mds.get_val('failover', 0): - self.failover_mds = 'f' + if self.target.get_class() == 'mds': + if self.target.get_val('failover', 0): + self.failover_mds = 'f' + else: + self.failover_mds = 'n' + self.format = self.db.get_val('autoformat', "no") else: - self.failover_mds = 'n' - active_uuid = get_active_target(self.mds) - if not active_uuid: - panic("No target device found:", target_uuid) - if active_uuid == self.uuid: - self.active = 1 - else: - self.active = 0 - if self.active and config.group and config.group != self.mds.get_val('group'): - self.active = 0 + self.format = self.db.get_val('autoformat', "yes") + self.osdtype = self.db.get_val('osdtype') + ost = self.db.lookup(target_uuid) + if ost.get_val('failover', 0): + self.failover_ost = 'f' + else: + self.failover_ost = 'n' - # default inode inode for case when neither LOV either - # LMV is accessible. - self.inode_size = 256 - + self.inode_size = self.get_inode_size() + + if self.lmv != None: + client_uuid = self.name + "_lmv_UUID" + self.master = LMV(self.lmv, client_uuid, + self.conf_name, self.conf_name) + + def get_inode_size(self): inode_size = self.db.get_val_int('inodesize', 0) - if not inode_size == 0: - self.inode_size = inode_size - else: + if inode_size == 0 and self.target.get_class() == 'mds': + + # default inode size for case when neither LOV either + # LMV is accessible. + self.inode_size = 256 + # find the LOV for this MDS - lovconfig_uuid = self.mds.get_first_ref('lovconfig') + lovconfig_uuid = self.target.get_first_ref('lovconfig') if lovconfig_uuid or self.lmv != None: if self.lmv != None: lovconfig_uuid = self.lmv.get_first_ref('lovconfig') lovconfig = self.lmv.lookup(lovconfig_uuid) lov_uuid = lovconfig.get_first_ref('lov') if lov_uuid == None: - panic(self.mds.getName() + ": No LOV found for lovconfig ", + panic(self.target.getName() + ": No LOV found for lovconfig ", lovconfig.name) else: - lovconfig = self.mds.lookup(lovconfig_uuid) + lovconfig = self.target.lookup(lovconfig_uuid) lov_uuid = lovconfig.get_first_ref('lov') if lov_uuid == None: - panic(self.mds.getName() + ": No LOV found for lovconfig ", + panic(self.target.getName() + ": No LOV found for lovconfig ", lovconfig.name) - if self.lmv != None: lovconfig_uuid = self.lmv.get_first_ref('lovconfig') lovconfig = self.lmv.lookup(lovconfig_uuid) @@ -1786,60 +1806,26 @@ class MDSDEV(Module): config_only = 1) # default stripe count controls default inode_size - stripe_count = lov.stripe_cnt + if lov.stripe_cnt > 0: + stripe_count = lov.stripe_cnt + else: + stripe_count = len(lov.devlist) if stripe_count > 77: - self.inode_size = 4096 + inode_size = 4096 elif stripe_count > 35: - self.inode_size = 2048 + inode_size = 2048 elif stripe_count > 13: - self.inode_size = 1024 + inode_size = 1024 elif stripe_count > 3: - self.inode_size = 512 + inode_size = 512 else: - self.inode_size = 256 - - self.target_dev_uuid = self.uuid - self.uuid = target_uuid - - # setup LMV - if self.lmv != None: - client_uuid = self.name + "_lmv_UUID" - self.master = LMV(self.lmv, client_uuid, - self.name, self.name) - - def add_module(self, manager): - if self.active: - manager.add_lustre_module('mdc', 'mdc') - manager.add_lustre_module('osc', 'osc') - manager.add_lustre_module('ost', 'ost') - manager.add_lustre_module('lov', 'lov') - manager.add_lustre_module('mds', 'mds') - - if self.fstype == 'smfs' or self.fstype == 'ldiskfs': - manager.add_lustre_module(self.fstype, self.fstype) - - if self.fstype: - manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) - - # if fstype is smfs, then we should also take care about backing - # store fs. - if self.fstype == 'smfs': - manager.add_lustre_module(self.backfstype, self.backfstype) - manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype)) - - for option in string.split(self.mountfsoptions, ','): - if option == 'snap': - if not self.fstype == 'smfs': - panic("mountoptions has 'snap', but fstype is not smfs.") - manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype)) - manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype)) - - # add LMV modules - if self.master != None: - self.master.add_module(manager) + inode_size = 256 + + return inode_size def get_mount_options(self, blkdev): - options = def_mount_options(self.fstype, 'mds') + options = def_mount_options(self.fstype, + self.target.get_class()) if config.mountfsoptions: if options: @@ -1857,156 +1843,110 @@ class MDSDEV(Module): if self.fstype == 'smfs': if options: - options = "%s,type=%s,dev=%s" %(options, - self.backfstype, blkdev) + options = "%s,type=%s,dev=%s" %(options, self.backfstype, + blkdev) else: - options = "type=%s,dev=%s" %(self.backfstype, blkdev) + options = "type=%s,dev=%s" %(self.backfstype, + blkdev) + + if self.target.get_class() == 'mds': + if options: + options = "%s,iopen_nopriv" %(options) + else: + options = "iopen_nopriv" + return options - + def prepare(self): - if not config.record and is_prepared(self.name): - return - if not self.active: - debug(self.uuid, "not active") + if is_prepared(self.name): return - if config.reformat: - # run write_conf automatically, if --reformat used - self.write_conf() - run_acceptors() - # prepare LMV - if self.master != None: - self.master.prepare() - - # never reformat here - blkdev = block_dev(self.devpath, self.size, self.fstype, 0, - self.format, self.journal_size, self.inode_size, - self.mkfsoptions, self.backfstype, self.backdevpath) - - if not is_prepared('MDT'): - lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") - try: - if self.fstype == 'smfs': - realdev = self.fstype - else: - realdev = blkdev - - if self.obdtype == None: - self.obdtype = 'dumb' + blkdev = block_dev(self.devpath, self.size, self.fstype, + config.reformat, self.format, self.journal_size, + self.inode_size, self.mkfsoptions, self.backfstype, + self.backdevpath) + + if self.fstype == 'smfs': + realdev = self.fstype + else: + realdev = blkdev - if self.master == None: - master_name = 'dumb' - else: - master_name = self.master.name - - if self.client_uuids == None: - profile_name = 'dumb' - else: - profile_name = self.name - - mountfsoptions = self.get_mount_options(blkdev) + mountfsoptions = self.get_mount_options(blkdev) - self.info("mds", realdev, mountfsoptions, self.fstype, self.size, - self.format, master_name, profile_name, self.obdtype) - - lctl.attach("mds", self.name, self.uuid) - if config.mds_mds_sec: - lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec) - if config.mds_ost_sec: - lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec) - - lctl.setup(self.name, setup = "%s %s %s %s %s %s" %(realdev, - self.fstype, profile_name, mountfsoptions, - master_name, self.obdtype)) - - if development_mode(): - procentry = "/proc/fs/lustre/mds/lsd_upcall" - upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall") - if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)): - print "MDS Warning: failed to set lsd cache upcall" - else: - run("echo ", upcall, " > ", procentry) + self.info(self.target.get_class(), realdev, mountfsoptions, + self.fstype, self.size, self.format) - except CommandError, e: - if e.rc == 2: - panic("MDS is missing the config log. Need to run " + - "lconf --write_conf.") - else: - raise e - - if config.root_squash == None: - config.root_squash = self.root_squash - if config.no_root_squash == None: - config.no_root_squash = self.no_root_squash - if config.root_squash: - if config.no_root_squash: - nsnid = config.no_root_squash - else: - nsnid = "0" - lctl.root_squash(self.name, config.root_squash, nsnid) + lctl.newdev("confobd", self.name, self.uuid, + setup ="%s %s %s" %(realdev, self.fstype, + mountfsoptions)) + + self.mountfsoptions = mountfsoptions + self.realdev = realdev + + def add_module(self, manager): + manager.add_lustre_module('obdclass', 'confobd') def write_conf(self): - if not self.client_uuids: - return 0 - - do_cleanup = 0 - if not is_prepared(self.name): - blkdev = block_dev(self.devpath, self.size, self.fstype, - config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions, - self.backfstype, self.backdevpath) + if self.target.get_class() == 'ost': + config.record = 1 + lctl.clear_log(self.name, self.target.getName() + '-conf') + lctl.record(self.name, self.target.getName() + '-conf') + lctl.newdev(self.osdtype, self.conf_name, self.conf_uuid, + setup ="%s %s %s %s" %(self.realdev, self.fstype, + self.failover_ost, + self.mountfsoptions)) + lctl.end_record() + lctl.clear_log(self.name, 'OSS-conf') + lctl.record(self.name, 'OSS-conf') + lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") + lctl.end_record() + config.record = 0 + return - if self.fstype == 'smfs': - realdev = self.fstype - else: - realdev = blkdev - - # Even for writing logs we mount mds with supplied mount options - # because it will not mount smfs (if used) otherwise. - mountfsoptions = self.get_mount_options(blkdev) + if self.target.get_class() == 'mds': + if self.master != None: + master_name = self.master.name + else: + master_name = 'dumb' - if self.obdtype == None: - self.obdtype = 'dumb' - - self.info("mds", realdev, mountfsoptions, self.fstype, self.size, - self.format, "dumb", "dumb", self.obdtype) - - lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s %s %s %s %s" %(realdev, self.fstype, - 'dumb', mountfsoptions, - 'dumb', self.obdtype)) - do_cleanup = 1 + config.record = 1 + lctl.clear_log(self.name, self.target.getName() + '-conf') + lctl.record(self.name, self.target.getName() + '-conf') + lctl.newdev("mds", self.conf_name, self.conf_uuid, + setup ="%s %s %s %s %s %s" %(self.realdev, self.fstype, + self.conf_name, self.mountfsoptions, + master_name, self.obdtype)) + lctl.end_record() + config.record = 0 - # record logs for all MDS clients - for obd_uuid in self.client_uuids: - log("recording client:", obd_uuid) + if not self.client_uuids: + return 0 + for uuid in self.client_uuids: + log("recording client:", uuid) client_uuid = generate_client_uuid(self.name) - client = VOSC(self.db.lookup(obd_uuid), client_uuid, - self.name, self.name) + client = VOSC(self.db.lookup(uuid), client_uuid, + self.target.getName(), self.name) config.record = 1 - lctl.clear_log(self.name, self.name) - lctl.record(self.name, self.name) + lctl.clear_log(self.name, self.target.getName()) + lctl.record(self.name, self.target.getName()) client.prepare() - lctl.mount_option(self.name, client.get_name(), "") + lctl.mount_option(self.target.getName(), client.get_name(), "") lctl.end_record() - process_updates(self.db, self.name, self.name, client) config.cleanup = 1 - lctl.clear_log(self.name, self.name + '-clean') - lctl.record(self.name, self.name + '-clean') + lctl.clear_log(self.name, self.target.getName() + '-clean') + lctl.record(self.name, self.target.getName() + '-clean') client.cleanup() - lctl.del_mount_option(self.name) + lctl.del_mount_option(self.target.getName()) lctl.end_record() - process_updates(self.db, self.name, self.name + '-clean', client) config.cleanup = 0 config.record = 0 + if config.record: + return + # record logs for each client - if config.noexec: - noexec_opt = '-n' - else: - noexec_opt = '' if config.ldapurl: config_options = "--ldapurl " + config.ldapurl + " --config " + config.config else: @@ -2023,7 +1963,9 @@ class MDSDEV(Module): debug("recording", client_name) old_noexec = config.noexec config.noexec = 0 - ret, out = run (sys.argv[0], noexec_opt, + noexec_opt = ('', '-n') + ret, out = run (sys.argv[0], + noexec_opt[old_noexec == 1], " -v --record --nomod", "--record_log", client_name, "--record_device", self.name, @@ -2031,7 +1973,8 @@ class MDSDEV(Module): config_options) if config.verbose: for s in out: log("record> ", string.strip(s)) - ret, out = run (sys.argv[0], noexec_opt, + ret, out = run (sys.argv[0], + noexec_opt[old_noexec == 1], "--cleanup -v --record --nomod", "--record_log", client_name + "-clean", "--record_device", self.name, @@ -2040,17 +1983,172 @@ class MDSDEV(Module): if config.verbose: for s in out: log("record> ", string.strip(s)) config.noexec = old_noexec - if do_cleanup: + + def start(self): + try: + lctl.start(self.name, self.conf_name) + except CommandError, e: + raise e + if self.target.get_class() == 'ost': + if not is_prepared('OSS'): + try: + lctl.start(self.name, 'OSS') + except CommandError, e: + raise e + + def cleanup(self): + if is_prepared(self.name): try: lctl.cleanup(self.name, self.uuid, 0, 0) + clean_dev(self.devpath, self.fstype, + self.backfstype, self.backdevpath) except CommandError, e: log(self.module_name, "cleanup failed: ", self.name) e.dump() cleanup_error(e.rc) Module.cleanup(self) - clean_dev(self.devpath, self.fstype, self.backfstype, - self.backdevpath) +class MDSDEV(Module): + def __init__(self,db): + Module.__init__(self, 'MDSDEV', db) + self.devpath = self.db.get_val('devpath','') + self.backdevpath = self.db.get_val('backdevpath','') + self.size = self.db.get_val_int('devsize', 0) + self.journal_size = self.db.get_val_int('journalsize', 0) + self.fstype = self.db.get_val('fstype', '') + self.backfstype = self.db.get_val('backfstype', '') + self.nspath = self.db.get_val('nspath', '') + self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') + self.obdtype = self.db.get_val('obdtype', '') + self.root_squash = self.db.get_val('root_squash', '') + self.no_root_squash = self.db.get_val('no_root_squash', '') + + target_uuid = self.db.get_first_ref('target') + self.target = self.db.lookup(target_uuid) + self.name = self.target.getName() + self.master = None + self.lmv = None + + lmv_uuid = self.db.get_first_ref('lmv') + if lmv_uuid != None: + self.lmv = self.db.lookup(lmv_uuid) + + active_uuid = get_active_target(self.target) + if not active_uuid: + panic("No target device found:", target_uuid) + if active_uuid == self.uuid: + self.active = 1 + group = self.target.get_val('group') + if config.group and config.group != group: + self.active = 0 + else: + self.active = 0 + + self.uuid = target_uuid + + # setup LMV + if self.lmv != None: + client_uuid = self.name + "_lmv_UUID" + self.master = LMV(self.lmv, client_uuid, + self.name, self.name) + + self.confobd = CONFDEV(self.db, self.name, + target_uuid, self.uuid) + + def add_module(self, manager): + if self.active: + manager.add_lustre_module('mdc', 'mdc') + manager.add_lustre_module('osc', 'osc') + manager.add_lustre_module('ost', 'ost') + manager.add_lustre_module('lov', 'lov') + manager.add_lustre_module('mds', 'mds') + + if self.fstype == 'smfs' or self.fstype == 'ldiskfs': + manager.add_lustre_module(self.fstype, self.fstype) + + if self.fstype: + manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.fstype)) + + # if fstype is smfs, then we should also take care about backing + # store fs. + if self.fstype == 'smfs': + manager.add_lustre_module(self.backfstype, self.backfstype) + manager.add_lustre_module('lvfs', 'fsfilt_%s' % (self.backfstype)) + + for option in string.split(self.mountfsoptions, ','): + if option == 'snap': + if not self.fstype == 'smfs': + panic("mountoptions has 'snap', but fstype is not smfs.") + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype)) + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype)) + + # add LMV modules + if self.master != None: + self.master.add_module(manager) + + # add CONFOBD modules + if self.confobd != None: + self.confobd.add_module(manager) + + def write_conf(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + run_acceptors() + self.confobd.prepare() + self.confobd.write_conf() + self.confobd.cleanup() + + def prepare(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + run_acceptors() + + self.confobd.prepare() + if config.reformat: + self.confobd.write_conf() + + # prepare LMV + if self.master != None: + self.master.prepare() + + lctl.attach("mds", self.name, self.uuid) + if config.mds_mds_sec: + lctl.set_security(self.name, "mds_mds_sec", config.mds_mds_sec) + if config.mds_ost_sec: + lctl.set_security(self.name, "mds_ost_sec", config.mds_ost_sec) + lctl.detach(self.name) + + if not config.record: + self.confobd.start() + + if not is_prepared('MDT'): + lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") + + if development_mode(): + procentry = "/proc/fs/lustre/mds/lsd_upcall" + upcall = os.path.abspath(os.path.dirname(sys.argv[0]) + "/lsd_upcall") + if not (os.access(procentry, os.R_OK) and os.access(upcall, os.R_OK)): + print "MDS Warning: failed to set lsd cache upcall" + else: + run("echo ", upcall, " > ", procentry) + + if config.root_squash == None: + config.root_squash = self.root_squash + if config.no_root_squash == None: + config.no_root_squash = self.no_root_squash + if config.root_squash: + if config.no_root_squash: + nsnid = config.no_root_squash + else: + nsnid = "0" + lctl.root_squash(self.name, config.root_squash, nsnid) def msd_remaining(self): out = lctl.device_list() @@ -2090,14 +2188,14 @@ class MDSDEV(Module): e.dump() cleanup_error(e.rc) - clean_dev(self.devpath, self.fstype, self.backfstype, - self.backdevpath) + if self.confobd: + self.confobd.cleanup() def correct_level(self, level, op=None): #if self.master != None: # level = level + 2 return level - + class OSD(Module): def __init__(self, db): Module.__init__(self, 'OSD', db) @@ -2126,97 +2224,78 @@ class OSD(Module): panic("No target device found:", target_uuid) if active_uuid == self.uuid: self.active = 1 + group = ost.get_val('group') + if config.group and config.group != group: + self.active = 0 else: self.active = 0 - if self.active and config.group and config.group != ost.get_val('group'): - self.active = 0 - self.target_dev_uuid = self.uuid self.uuid = target_uuid + self.confobd = CONFDEV(self.db, self.name, + target_uuid, self.uuid) def add_module(self, manager): - if self.active: - manager.add_lustre_module('ost', 'ost') + if not self.active: + return + manager.add_lustre_module('ost', 'ost') - if self.fstype == 'smfs' or self.fstype == 'ldiskfs': - manager.add_lustre_module(self.fstype, self.fstype) + if self.fstype == 'smfs' or self.fstype == 'ldiskfs': + manager.add_lustre_module(self.fstype, self.fstype) - if self.fstype: - manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) - - if self.fstype == 'smfs': - manager.add_lustre_module(self.backfstype, self.backfstype) - manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype)) + if self.fstype: + manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.fstype)) - for option in self.mountfsoptions: - if option == 'snap': - if not self.fstype == 'smfs': - panic("mountoptions with snap, but fstype is not smfs\n") - manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype)) - manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype)) + if self.fstype == 'smfs': + manager.add_lustre_module(self.backfstype, self.backfstype) + manager.add_lustre_module('lvfs' , 'fsfilt_%s' % (self.backfstype)) - manager.add_lustre_module(self.osdtype, self.osdtype) + for option in self.mountfsoptions: + if option == 'snap': + if not self.fstype == 'smfs': + panic("mountoptions with snap, but fstype is not smfs\n") + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.fstype)) + manager.add_lustre_module('lvfs', 'fsfilt_snap_%s' % (self.backfstype)) - def get_mount_options(self, blkdev): - options = def_mount_options(self.fstype, 'ost') - - if config.mountfsoptions: - if options: - options = "%s,%s" %(options, config.mountfsoptions) - else: - options = config.mountfsoptions - if self.mountfsoptions: - options = "%s,%s" %(options, self.mountfsoptions) - else: - if self.mountfsoptions: - if options: - options = "%s,%s" %(options, self.mountfsoptions) - else: - options = self.mountfsoptions - - if self.fstype == 'smfs': - if options: - options = "%s,type=%s,dev=%s" %(options, - self.backfstype, blkdev) - else: - options = "type=%s,dev=%s" %(self.backfstype, - blkdev) - return options + manager.add_lustre_module(self.osdtype, self.osdtype) - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. + # add CONFOBD modules + if self.confobd != None: + self.confobd.add_module(manager) + def prepare(self): if is_prepared(self.name): return if not self.active: debug(self.uuid, "not active") return + run_acceptors() if self.osdtype == 'obdecho': - blkdev = '' - else: - blkdev = block_dev(self.devpath, self.size, self.fstype, - config.reformat, self.format, self.journal_size, - self.inode_size, self.mkfsoptions, self.backfstype, - self.backdevpath) - - if self.fstype == 'smfs': - realdev = self.fstype - else: - realdev = blkdev + self.info(self.osdtype) + lctl.newdev("obdecho", self.name, self.uuid) + if not is_prepared('OSS'): + lctl.newdev("ost", 'OSS', 'OSS_UUID', setup="") + else: + self.confobd.prepare() + if config.reformat: + self.confobd.write_conf() + if not config.record: + self.confobd.start() - mountfsoptions = self.get_mount_options(blkdev) - - self.info(self.osdtype, realdev, mountfsoptions, self.fstype, - self.size, self.format, self.journal_size, self.inode_size) - - lctl.newdev(self.osdtype, self.name, self.uuid, - setup ="%s %s %s %s" %(realdev, self.fstype, - self.failover_ost, - mountfsoptions)) - if not is_prepared('OSS'): - lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") + def write_conf(self): + if is_prepared(self.name): + return + if not self.active: + debug(self.uuid, "not active") + return + + run_acceptors() + if self.osdtype != 'obdecho': + self.confobd.prepare() + self.confobd.write_conf() + if not config.write_conf: + self.confobd.start() + self.confobd.cleanup() def osd_remaining(self): out = lctl.device_list() @@ -2234,6 +2313,7 @@ class OSD(Module): if not self.active: debug(self.uuid, "not active") return + if is_prepared(self.name): self.info() try: @@ -2251,33 +2331,24 @@ class OSD(Module): print "cleanup failed: ", self.name e.dump() cleanup_error(e.rc) - if not self.osdtype == 'obdecho': - clean_dev(self.devpath, self.fstype, self.backfstype, - self.backdevpath) + + if self.osdtype != 'obdecho': + if self.confobd: + self.confobd.cleanup() def correct_level(self, level, op=None): return level -def mgmt_uuid_for_fs(mtpt_name): - if not mtpt_name: - return '' - mtpt_db = toplustreDB.lookup_name(mtpt_name) - fs_uuid = mtpt_db.get_first_ref('filesystem') - fs = toplustreDB.lookup(fs_uuid) - if not fs: - return '' - return fs.get_first_ref('mgmt') - # Generic client module, used by OSC and MDC class Client(Module): - def __init__(self, tgtdb, uuid, module, fs_name, self_name=None, - module_dir=None): + def __init__(self, tgtdb, uuid, module, fs_name, + self_name=None, module_dir=None): self.target_name = tgtdb.getName() self.target_uuid = tgtdb.getUUID() self.module_dir = module_dir + self.backup_targets = [] self.module = module self.db = tgtdb - self.active = 1 self.tgt_dev_uuid = get_active_target(tgtdb) if not self.tgt_dev_uuid: @@ -2295,11 +2366,7 @@ class Client(Module): self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) - mgmt_uuid = mgmt_uuid_for_fs(fs_name) - if mgmt_uuid: - self.mgmt_name = mgmtcli_name_for_uuid(mgmt_uuid) - else: - self.mgmt_name = '' + self.lookup_backup_targets() self.fs_name = fs_name if not self.module_dir: self.module_dir = module @@ -2319,6 +2386,20 @@ class Client(Module): def get_servers(self): return self._server_nets + def lookup_backup_targets(self): + """ Lookup alternative network information """ + prof_list = toplustreDB.get_refs('profile') + for prof_uuid in prof_list: + prof_db = toplustreDB.lookup(prof_uuid) + if not prof_db: + panic("profile:", prof_uuid, "not found.") + for ref_class, ref_uuid in prof_db.get_all_refs(): + if ref_class in ('osd', 'mdsdev'): + devdb = toplustreDB.lookup(ref_uuid) + uuid = devdb.get_first_ref('target') + if self.target_uuid == uuid and self.tgt_dev_uuid != ref_uuid: + self.backup_targets.append(ref_uuid) + def prepare(self, ignore_connect_failure = 0): self.info(self.target_uuid) if not config.record and is_prepared(self.name): @@ -2336,16 +2417,32 @@ class Client(Module): except CommandError, e: if not ignore_connect_failure: raise e + if srv: - if self.permits_inactive() and (self.target_uuid in config.inactive or self.active == 0): + if self.target_uuid in config.inactive and self.permits_inactive(): debug("%s inactive" % self.target_uuid) inactive_p = "inactive" else: debug("%s active" % self.target_uuid) inactive_p = "" lctl.newdev(self.module, self.name, self.uuid, - setup ="%s %s %s %s" % (self.target_uuid, srv.nid_uuid, - inactive_p, self.mgmt_name)) + setup ="%s %s %s" % (self.target_uuid, srv.nid_uuid, + inactive_p)) + for tgt_dev_uuid in self.backup_targets: + this_nets = get_ost_net(toplustreDB, tgt_dev_uuid) + if len(this_nets) == 0: + panic ("Unable to find a server for:", tgt_dev_uuid) + srv = choose_local_server(this_nets) + if srv: + lctl.connect(srv) + else: + routes = find_route(this_nets); + if len(routes) == 0: + panic("no route to", tgt_dev_uuid) + for (srv, r) in routes: + lctl.add_route_host(r[0]. srv.nid_uuid, r[1], r[3]) + if srv: + lctl.add_conn(self.name, srv.nid_uuid); def cleanup(self): if is_prepared(self.name): @@ -2362,6 +2459,15 @@ class Client(Module): e.dump() cleanup_error(e.rc) + for tgt_dev_uuid in self.backup_targets: + this_net = get_ost_net(toplustreDB, tgt_dev_uuid) + srv = choose_local_server(this_net) + if srv: + lctl.disconnect(srv) + else: + for (srv, r) in find_route(this_net): + lctl.del_route_host(r[0]. srv.nid_uuid, r[1], r[3]) + def correct_level(self, level, op=None): return level @@ -2387,15 +2493,6 @@ class OSC(Client): def permits_inactive(self): return 1 -def mgmtcli_name_for_uuid(uuid): - return 'MGMTCLI_%s' % uuid - -class ManagementClient(Client): - def __init__(self, db, uuid): - Client.__init__(self, db, uuid, 'mgmt_cli', '', - self_name = mgmtcli_name_for_uuid(db.getUUID()), - module_dir = 'mgmt') - class CMOBD(Module): def __init__(self, db): Module.__init__(self, 'CMOBD', db) @@ -2652,14 +2749,13 @@ class Mountpoint(Module): def __init__(self,db): Module.__init__(self, 'MTPT', db) self.path = self.db.get_val('path') - self.clientoptions = self.db.get_val('clientoptions', '') + self.clientoptions = self.db.get_val('clientoptions', '') self.fs_uuid = self.db.get_first_ref('filesystem') fs = self.db.lookup(self.fs_uuid) self.mds_uuid = fs.get_first_ref('lmv') if not self.mds_uuid: self.mds_uuid = fs.get_first_ref('mds') self.obd_uuid = fs.get_first_ref('obd') - self.mgmt_uuid = fs.get_first_ref('mgmt') client_uuid = generate_client_uuid(self.name) ost = self.db.lookup(self.obd_uuid) @@ -2673,46 +2769,37 @@ class Mountpoint(Module): self.vosc = VOSC(ost, client_uuid, self.name, self.name) self.vmdc = VMDC(mds, client_uuid, self.name, self.name) - if self.mgmt_uuid: - self.mgmtcli = ManagementClient(db.lookup(self.mgmt_uuid), - client_uuid) - else: - self.mgmtcli = None - def prepare(self): if not config.record and fs_is_mounted(self.path): log(self.path, "already mounted.") return run_acceptors() - if self.mgmtcli: - self.mgmtcli.prepare() - self.vosc.prepare() + + self.vosc.prepare() self.vmdc.prepare() - vmdc_name = self.vmdc.get_name() self.info(self.path, self.mds_uuid, self.obd_uuid) if config.record or config.lctl_dump: - lctl.mount_option(local_node_name, self.vosc.get_name(), vmdc_name) + lctl.mount_option(local_node_name, self.vosc.get_name(), + self.vmdc.get_name()) return if config.clientoptions: if self.clientoptions: - self.clientoptions = self.clientoptions + ',' + \ - config.clientoptions + self.clientoptions = self.clientoptions + ',' + config.clientoptions else: self.clientoptions = config.clientoptions if self.clientoptions: self.clientoptions = ',' + self.clientoptions # Linux kernel will deal with async and not pass it to ll_fill_super, # so replace it with Lustre async - self.clientoptions = string.replace(self.clientoptions, "async", - "lasync") + self.clientoptions = string.replace(self.clientoptions, "async", "lasync") if not config.sec: config.sec = "null" cmd = "mount -t lustre_lite -o osc=%s,mdc=%s,sec=%s%s %s %s" % \ - (self.vosc.get_name(), vmdc_name, config.sec, self.clientoptions, - config.config, self.path) + (self.vosc.get_name(), self.vmdc.get_name(), config.sec, + self.clientoptions, config.config, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: @@ -2739,18 +2826,10 @@ class Mountpoint(Module): self.vmdc.cleanup() self.vosc.cleanup() - if self.mgmtcli: - self.mgmtcli.cleanup() def add_module(self, manager): - manager.add_lustre_module('mdc', 'mdc') - - if self.mgmtcli: - self.mgmtcli.add_module(manager) - self.vosc.add_module(manager) self.vmdc.add_module(manager) - manager.add_lustre_module('llite', 'llite') def correct_level(self, level, op=None): @@ -2774,7 +2853,6 @@ def get_ost_net(self, osd_uuid): srv_list.append(Network(db)) return srv_list - # the order of iniitailization is based on level. def getServiceLevel(self): type = self.get_class() @@ -2851,9 +2929,8 @@ def find_local_clusters(node_db): debug("add_local", netuuid) local_clusters.append((srv.net_type, srv.cluster_id, srv.nid)) if srv.port > 0: - if acceptors.has_key(srv.port): - panic("duplicate port:", srv.port) - acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type) + if not acceptors.has_key(srv.port): + acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type) # This node is a gateway. is_router = 0 @@ -3134,9 +3211,10 @@ def doWriteconf(services): #if config.nosetup: # return for s in services: - if s[1].get_class() == 'mdsdev': + if s[1].get_class() == 'mdsdev' or s[1].get_class() == 'osd': n = newService(s[1]) n.write_conf() + n.cleanup() def doSetup(services): if config.nosetup: @@ -3157,7 +3235,7 @@ def doSetup(services): def doLoadModules(services): if config.nomod: return - + # adding all needed modules from all services for s in services: n = newService(s[1]) @@ -3444,14 +3522,12 @@ def sys_set_netmem_max(path, max): fp.write('%d\n' %(max)) fp.close() - def sys_make_devices(): if not os.access('/dev/portals', os.R_OK): run('mknod /dev/portals c 10 240') if not os.access('/dev/obd', os.R_OK): run('mknod /dev/obd c 10 241') - # Add dir to the global PATH, if not already there. def add_to_path(new_dir): syspath = string.split(os.environ['PATH'], ':') @@ -3473,7 +3549,6 @@ def default_gdb_script(): else: return script - DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') # ensure basic elements are in the system path def sanitise_path(): diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 27d2b5f..879237c 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -67,7 +67,6 @@ command_t cmdlist[] = { "usage: --net "}, {"network", jt_ptl_network, 0, "commands that follow apply to net\n" "usage: network "}, - {"interface_list", jt_ptl_print_interfaces, 0, "print interface entries\n" "usage: interface_list"}, {"add_interface", jt_ptl_add_interface, 0, "add interface entry\n" @@ -86,7 +85,7 @@ command_t cmdlist[] = { "usage: connect [iIOC]"}, {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid\n" "usage: disconnect []"}, - {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)\n" + {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits\n" "usage: active_tx"}, {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local nid. " "The nid defaults to hostname for tcp networks and is automatically " @@ -104,13 +103,17 @@ command_t cmdlist[] = { "add an entry to the portals routing table\n" "usage: add_route []"}, {"del_route", jt_ptl_del_route, 0, - "delete the route via the given gateway to the given targets from the portals routing table\n" + "delete route via gateway to targets from the portals routing table\n" "usage: del_route [] []"}, {"set_route", jt_ptl_notify_router, 0, - "enable/disable routes via the given gateway in the portals routing table\n" + "enable/disable routes via gateway in the portals routing table\n" "usage: set_route [