Whamcloud - gitweb
- update from b1_4_mountconf
authoryury <yury>
Fri, 19 May 2006 11:29:53 +0000 (11:29 +0000)
committeryury <yury>
Fri, 19 May 2006 11:29:53 +0000 (11:29 +0000)
347 files changed:
ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch
ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/ChangeLog
lustre/autoMakefile.am
lustre/autoconf/lustre-core.m4
lustre/autoconf/lustre-version.ac
lustre/cmm/cmm_device.c
lustre/cmm/cmm_internal.h
lustre/cmm/mdc_device.c
lustre/cmm/mdc_internal.h
lustre/cmm/mdc_object.c
lustre/fid/fid_misc.c
lustre/fid/fid_seq.c
lustre/fld/Makefile.in
lustre/fld/fld_handle.c
lustre/fld/fld_iam.c
lustre/include/.cvsignore
lustre/include/Makefile.am
lustre/include/darwin/lprocfs_status.h [new file with mode: 0644]
lustre/include/darwin/lustre_compat.h [new file with mode: 0644]
lustre/include/darwin/lustre_debug.h [new file with mode: 0644]
lustre/include/darwin/lustre_dlm.h [new file with mode: 0644]
lustre/include/darwin/lustre_fsfilt.h [new file with mode: 0644]
lustre/include/darwin/lustre_handles.h [new file with mode: 0644]
lustre/include/darwin/lustre_lib.h [new file with mode: 0644]
lustre/include/darwin/lustre_lite.h [new file with mode: 0644]
lustre/include/darwin/lustre_log.h [new file with mode: 0644]
lustre/include/darwin/lustre_mds.h [new file with mode: 0644]
lustre/include/darwin/lustre_net.h [new file with mode: 0644]
lustre/include/darwin/lustre_quota.h [new file with mode: 0644]
lustre/include/darwin/lustre_types.h [new file with mode: 0644]
lustre/include/darwin/lustre_user.h [new file with mode: 0644]
lustre/include/darwin/lvfs.h [new file with mode: 0644]
lustre/include/darwin/obd.h [new file with mode: 0644]
lustre/include/darwin/obd_class.h [new file with mode: 0644]
lustre/include/darwin/obd_support.h [new file with mode: 0644]
lustre/include/dt_object.h [new file with mode: 0644]
lustre/include/liblustre.h
lustre/include/linux/.cvsignore
lustre/include/linux/Makefile.am
lustre/include/linux/dt_object.h
lustre/include/linux/lprocfs_status.h
lustre/include/linux/lu_object.h
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_debug.h
lustre/include/linux/lustre_dlm.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_handles.h
lustre/include/linux/lustre_lib.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_log.h
lustre/include/linux/lustre_mds.h
lustre/include/linux/lustre_net.h
lustre/include/linux/lustre_quota.h
lustre/include/linux/lustre_types.h [new file with mode: 0644]
lustre/include/linux/lustre_user.h [new file with mode: 0644]
lustre/include/linux/lvfs.h
lustre/include/linux/lvfs_linux.h
lustre/include/linux/md_object.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_support.h
lustre/include/lprocfs_status.h [new file with mode: 0644]
lustre/include/lu_object.h [new file with mode: 0644]
lustre/include/lustre/Makefile.am
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h [moved from lustre/include/linux/lustre_idl.h with 90% similarity]
lustre/include/lustre/lustre_user.h
lustre/include/lustre/types.h
lustre/include/lustre_cfg.h [moved from lustre/include/linux/lustre_cfg.h with 99% similarity]
lustre/include/lustre_commit_confd.h [moved from lustre/include/linux/lustre_commit_confd.h with 96% similarity]
lustre/include/lustre_debug.h [new file with mode: 0644]
lustre/include/lustre_disk.h [moved from lustre/include/linux/lustre_disk.h with 94% similarity]
lustre/include/lustre_dlm.h [new file with mode: 0644]
lustre/include/lustre_export.h [moved from lustre/include/linux/lustre_export.h with 96% similarity]
lustre/include/lustre_fid.h [moved from lustre/include/linux/lustre_fid.h with 98% similarity]
lustre/include/lustre_fsfilt.h [new file with mode: 0644]
lustre/include/lustre_ha.h [moved from lustre/include/linux/lustre_ha.h with 100% similarity]
lustre/include/lustre_handles.h [new file with mode: 0644]
lustre/include/lustre_import.h [moved from lustre/include/linux/lustre_import.h with 92% similarity]
lustre/include/lustre_lib.h [new file with mode: 0644]
lustre/include/lustre_lite.h [new file with mode: 0644]
lustre/include/lustre_log.h [new file with mode: 0644]
lustre/include/lustre_mdc.h [moved from lustre/include/linux/lustre_mdc.h with 82% similarity]
lustre/include/lustre_mds.h [new file with mode: 0644]
lustre/include/lustre_net.h [new file with mode: 0644]
lustre/include/lustre_param.h [moved from lustre/include/linux/lustre_param.h with 100% similarity]
lustre/include/lustre_quota.h [new file with mode: 0644]
lustre/include/lustre_req_layout.h [moved from lustre/include/linux/lustre_req_layout.h with 95% similarity]
lustre/include/lustre_ucache.h [moved from lustre/include/linux/lustre_ucache.h with 91% similarity]
lustre/include/lustre_ver.h.in [moved from lustre/include/linux/lustre_ver.h.in with 96% similarity]
lustre/include/lvfs.h [new file with mode: 0644]
lustre/include/md_object.h [new file with mode: 0644]
lustre/include/obd.h [new file with mode: 0644]
lustre/include/obd_cache.h [moved from lustre/include/linux/obd_cache.h with 83% similarity]
lustre/include/obd_class.h [new file with mode: 0644]
lustre/include/obd_echo.h [moved from lustre/include/linux/obd_echo.h with 91% similarity]
lustre/include/obd_lov.h [moved from lustre/include/linux/obd_lov.h with 86% similarity]
lustre/include/obd_ost.h [moved from lustre/include/linux/obd_ost.h with 95% similarity]
lustre/include/obd_support.h [new file with mode: 0644]
lustre/kernel_patches/kernel_configs/uml-2.6.10-fc3.config
lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch
lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch
lustre/kernel_patches/patches/ext3-extents-2.4.24.patch
lustre/kernel_patches/patches/ext3-extents-2.4.29.patch
lustre/kernel_patches/patches/ext3-extents-2.6.12.patch
lustre/kernel_patches/patches/ext3-extents-2.6.5.patch
lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/kernel_patches/patches/iallocsem_consistency.patch [new file with mode: 0644]
lustre/kernel_patches/patches/nfs-cifs-intent-2.6-fc3.patch
lustre/kernel_patches/patches/tcp-zero-copy-2.6.12.6.patch [new file with mode: 0644]
lustre/kernel_patches/patches/tcp-zero-copy-2.6.5-7.244.patch [new file with mode: 0644]
lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch [new file with mode: 0644]
lustre/kernel_patches/series/2.6-fc3.series
lustre/kernel_patches/series/2.6-rhel4.series
lustre/kernel_patches/series/2.6-suse-newer.series
lustre/kernel_patches/series/2.6.12-vanilla.series
lustre/ldiskfs/lustre_quota_fmt.c
lustre/ldiskfs/quotafmt_test.c
lustre/ldlm/l_lock.c
lustre/ldlm/ldlm_extent.c
lustre/ldlm/ldlm_flock.c
lustre/ldlm/ldlm_inodebits.c
lustre/ldlm/ldlm_internal.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_plain.c
lustre/ldlm/ldlm_request.c
lustre/ldlm/ldlm_resource.c
lustre/liblustre/dir.c
lustre/liblustre/file.c
lustre/liblustre/llite_fid.c
lustre/liblustre/llite_lib.c
lustre/liblustre/llite_lib.h
lustre/liblustre/lutil.h
lustre/liblustre/namei.c
lustre/liblustre/super.c
lustre/liblustre/tests/echo_test.c
lustre/liblustre/tests/sanity.c
lustre/llite/Makefile.in
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_close.c
lustre/llite/llite_fid.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/llite_mmap.c
lustre/llite/llite_nfs.c
lustre/llite/lproc_llite.c
lustre/llite/namei.c
lustre/llite/rw.c
lustre/llite/rw24.c
lustre/llite/rw26.c
lustre/llite/special.c [deleted file]
lustre/llite/super.c
lustre/llite/super25.c
lustre/llite/symlink.c
lustre/llite/xattr.c
lustre/lmv/lmv_fld.c
lustre/lmv/lmv_intent.c
lustre/lmv/lmv_internal.h
lustre/lmv/lmv_obd.c
lustre/lmv/lmv_object.c
lustre/lmv/lproc_lmv.c
lustre/lov/Info.plist [new file with mode: 0644]
lustre/lov/autoMakefile.am
lustre/lov/lov_ea.c
lustre/lov/lov_internal.h
lustre/lov/lov_log.c
lustre/lov/lov_merge.c
lustre/lov/lov_obd.c
lustre/lov/lov_offset.c
lustre/lov/lov_pack.c
lustre/lov/lov_qos.c
lustre/lov/lov_request.c
lustre/lov/lproc_lov.c
lustre/lvfs/Info.plist [new file with mode: 0644]
lustre/lvfs/autoMakefile.am
lustre/lvfs/fsfilt.c
lustre/lvfs/fsfilt_ext3.c
lustre/lvfs/fsfilt_reiserfs.c
lustre/lvfs/lvfs_common.c
lustre/lvfs/lvfs_darwin.c [new file with mode: 0644]
lustre/lvfs/lvfs_linux.c
lustre/lvfs/lvfs_userfs.c
lustre/lvfs/upcall_cache.c
lustre/mdc/lproc_mdc.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_reint.c
lustre/mdc/mdc_request.c
lustre/mdd/mdd_handler.c
lustre/mdd/mdd_internal.h
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/mds/mds_fs.c
lustre/mds/mds_internal.h
lustre/mds/mds_join.c
lustre/mds/mds_lib.c
lustre/mds/mds_log.c
lustre/mds/mds_lov.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/mds/mds_unlink_open.c
lustre/mds/mds_xattr.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mgc/mgc_request.c
lustre/mgs/lproc_mgs.c
lustre/mgs/mgs_fs.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_internal.h
lustre/mgs/mgs_llog.c
lustre/obdclass/Info.plist [new file with mode: 0644]
lustre/obdclass/Makefile.in
lustre/obdclass/autoMakefile.am
lustre/obdclass/class_obd.c
lustre/obdclass/darwin/Makefile.am [new file with mode: 0644]
lustre/obdclass/darwin/darwin-module.c [new file with mode: 0644]
lustre/obdclass/darwin/darwin-sysctl.c [new file with mode: 0644]
lustre/obdclass/debug.c
lustre/obdclass/dt_object.c
lustre/obdclass/genops.c
lustre/obdclass/linux/Makefile.am [new file with mode: 0644]
lustre/obdclass/linux/linux-module.c [new file with mode: 0644]
lustre/obdclass/linux/linux-obdo.c [new file with mode: 0644]
lustre/obdclass/linux/linux-sysctl.c [moved from lustre/obdclass/sysctl.c with 99% similarity]
lustre/obdclass/llog.c
lustre/obdclass/llog_cat.c
lustre/obdclass/llog_ioctl.c
lustre/obdclass/llog_lvfs.c
lustre/obdclass/llog_obd.c
lustre/obdclass/llog_swab.c
lustre/obdclass/llog_test.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/lu_object.c
lustre/obdclass/lustre_handles.c
lustre/obdclass/lustre_peer.c
lustre/obdclass/mea.c
lustre/obdclass/obd_config.c
lustre/obdclass/obd_mount.c
lustre/obdclass/obdo.c
lustre/obdclass/prng.c
lustre/obdclass/statfs_pack.c
lustre/obdclass/uuid.c
lustre/obdecho/Info.plist [new file with mode: 0644]
lustre/obdecho/autoMakefile.am
lustre/obdecho/echo.c
lustre/obdecho/echo_client.c
lustre/obdecho/lproc_echo.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_24.c
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/filter_log.c
lustre/obdfilter/filter_lvb.c
lustre/obdfilter/filter_san.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/Info.plist [new file with mode: 0644]
lustre/osc/autoMakefile.am
lustre/osc/lproc_osc.c
lustre/osc/osc_create.c
lustre/osc/osc_internal.h
lustre/osc/osc_lib.c
lustre/osc/osc_request.c
lustre/osd/osd_handler.c
lustre/osd/osd_internal.h
lustre/osd/osd_oi.c
lustre/osd/osd_oi.h
lustre/ost/lproc_ost.c
lustre/ost/ost_handler.c
lustre/ptlrpc/Info.plist [new file with mode: 0644]
lustre/ptlrpc/autoMakefile.am
lustre/ptlrpc/client.c
lustre/ptlrpc/connection.c
lustre/ptlrpc/events.c
lustre/ptlrpc/import.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/llog_client.c
lustre/ptlrpc/llog_net.c
lustre/ptlrpc/llog_server.c
lustre/ptlrpc/lproc_ptlrpc.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/pers.c
lustre/ptlrpc/pinger.c
lustre/ptlrpc/ptlrpc_internal.h
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/ptlrpcd.c
lustre/ptlrpc/recov_thread.c
lustre/ptlrpc/recover.c
lustre/ptlrpc/service.c
lustre/quota/quota_check.c
lustre/quota/quota_context.c
lustre/quota/quota_ctl.c
lustre/quota/quota_interface.c
lustre/quota/quota_internal.h
lustre/quota/quota_master.c
lustre/quota/quotacheck_test.c
lustre/quota/quotactl_test.c
lustre/scripts/lustre
lustre/tests/cfg/local.sh
lustre/tests/conf-sanity.sh
lustre/tests/directio.c
lustre/tests/ll_dirstripe_verify.c
lustre/tests/mountconf.sh
lustre/tests/opendevunlink.c
lustre/tests/qos.sh [new file with mode: 0644]
lustre/tests/recovery-small.sh
lustre/tests/replay-single.sh
lustre/tests/rundbench
lustre/tests/sanity.sh
lustre/tests/sanityN.sh
lustre/tests/statmany.c
lustre/tests/statone.c
lustre/tests/testreq.c
lustre/tests/wantedi.c
lustre/utils/Makefile.am
lustre/utils/l_getgroups.c
lustre/utils/lconf
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/llmount.c
lustre/utils/llog_reader.c
lustre/utils/lustre_cfg.c
lustre/utils/mkfs_lustre.c
lustre/utils/mount_lustre.c
lustre/utils/obd.c
lustre/utils/obdctl.h
lustre/utils/obdiolib.h
lustre/utils/parser.c
lustre/utils/platform.h [new file with mode: 0644]
lustre/utils/rmmod_all.sh
lustre/utils/wirecheck.c
lustre/utils/wirehdr.c
lustre/utils/wiretest.c

index 657ecf4..b6439e6 100644 (file)
@@ -2,7 +2,7 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/ext3/extents.c    2005-06-14 16:31:25.756503133 +0200
 +++ linux-2.6.12-rc6/fs/ext3/extents.c 2005-06-14 16:31:25.836581257 +0200
-@@ -0,0 +1,2347 @@
+@@ -0,0 +1,2353 @@
 +/*
 + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -176,9 +176,9 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -448,8 +448,12 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 +
 +      eh = EXT_ROOT_HDR(tree);
 +      EXT_ASSERT(eh);
-+      if (ext3_ext_check_header(eh))
++      if (ext3_ext_check_header(eh)) {
++              /* don't free previously allocated path
++               * -- caller should take care */
++              path = NULL;
 +              goto err;
++      }
 +
 +      i = depth = EXT_DEPTH(tree);
 +      EXT_ASSERT(eh->eh_max);
@@ -506,8 +510,10 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 +
 +err:
 +      printk(KERN_ERR "EXT3-fs: header is corrupted!\n");
-+      ext3_ext_drop_refs(path);
-+      kfree(path);
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
 +      return ERR_PTR(-EIO);
 +}
 +
@@ -2644,7 +2650,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.6.12-rc6.orig/include/linux/ext3_extents.h 2005-06-14 16:31:25.780917195 +0200
 +++ linux-2.6.12-rc6/include/linux/ext3_extents.h      2005-06-14 16:31:25.932284381 +0200
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,262 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2742,7 +2748,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2843,15 +2849,13 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 0ee8d28..9e78214 100644 (file)
@@ -3,7 +3,7 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 ===================================================================
 --- linux-2.6.5-sles9.orig/fs/ext3/extents.c   2005-02-17 22:07:57.023609040 +0300
 +++ linux-2.6.5-sles9/fs/ext3/extents.c        2005-02-23 01:02:37.396435640 +0300
-@@ -0,0 +1,2349 @@
+@@ -0,0 +1,2355 @@
 +/*
 + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -177,9 +177,9 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -449,8 +449,12 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 +
 +      eh = EXT_ROOT_HDR(tree);
 +      EXT_ASSERT(eh);
-+      if (ext3_ext_check_header(eh))
++      if (ext3_ext_check_header(eh)) {
++              /* don't free previously allocated path
++               * -- caller should take care */
++              path = NULL;
 +              goto err;
++      }
 +
 +      i = depth = EXT_DEPTH(tree);
 +      EXT_ASSERT(eh->eh_max);
@@ -507,8 +511,10 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 +
 +err:
 +      printk(KERN_ERR "EXT3-fs: header is corrupted!\n");
-+      ext3_ext_drop_refs(path);
-+      kfree(path);
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
 +      return ERR_PTR(-EIO);
 +}
 +
@@ -2634,7 +2640,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h        2005-02-17 22:07:57.023609040 +0300
 +++ linux-2.6.5-sles9/include/linux/ext3_extents.h     2005-02-23 01:02:37.416432600 +0300
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,262 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2732,7 +2738,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2833,15 +2839,13 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 56fe653..bd95c54 100644 (file)
@@ -2,7 +2,7 @@ Index: linux-stage/fs/ext3/extents.c
 ===================================================================
 --- linux-stage.orig/fs/ext3/extents.c 2005-02-25 15:33:48.890198160 +0200
 +++ linux-stage/fs/ext3/extents.c      2005-02-25 15:33:48.917194056 +0200
-@@ -0,0 +1,2347 @@
+@@ -0,0 +1,2353 @@
 +/*
 + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -176,9 +176,9 @@ Index: linux-stage/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -448,8 +448,12 @@ Index: linux-stage/fs/ext3/extents.c
 +
 +      eh = EXT_ROOT_HDR(tree);
 +      EXT_ASSERT(eh);
-+      if (ext3_ext_check_header(eh))
++      if (ext3_ext_check_header(eh)) {
++              /* don't free previously allocated path
++               * -- caller should take care */
++              path = NULL;
 +              goto err;
++      }
 +
 +      i = depth = EXT_DEPTH(tree);
 +      EXT_ASSERT(eh->eh_max);
@@ -506,8 +510,10 @@ Index: linux-stage/fs/ext3/extents.c
 +
 +err:
 +      printk(KERN_ERR "EXT3-fs: header is corrupted!\n");
-+      ext3_ext_drop_refs(path);
-+      kfree(path);
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
 +      return ERR_PTR(-EIO);
 +}
 +
@@ -2629,7 +2635,7 @@ Index: linux-stage/include/linux/ext3_extents.h
 ===================================================================
 --- linux-stage.orig/include/linux/ext3_extents.h      2005-02-25 15:33:48.891198008 +0200
 +++ linux-stage/include/linux/ext3_extents.h   2005-02-25 15:33:48.944189952 +0200
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,262 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2727,7 +2733,7 @@ Index: linux-stage/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2828,15 +2834,13 @@ Index: linux-stage/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 1d8a4af..2a64875 100644 (file)
@@ -2570,7 +2570,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_old(handle, inode, block, count);
 +      else {
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
index 0c2f445..70f4f8a 100644 (file)
@@ -2565,7 +2565,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_sb(handle, sb, block, count, &freed);
 +      else
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
index 5ff3d3b..01e7387 100644 (file)
@@ -2584,7 +2584,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_sb(handle, sb, block, count, &freed);
 +      else
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
index 6fad0a6..727f180 100644 (file)
@@ -121,6 +121,57 @@ Description: write_conf for zeroconf mount queried LDAP incorrectly for client
 Details    : LDAP apparently contains 'lustreName' attributes instead of
             'name'.  A simple remapping of the name is sufficient.
 
+Severity   : major
+Frequency  : rare (only with non-default dump_on_timeout debug enabled)
+Bugzilla   : 10397
+Description: waiting_locks_callback trips kernel BUG if client is evicted
+Details    : Running with the dump_on_timeout debug flag turned on makes
+            it possible that the waiting_locks_callback() can try to dump
+            the Lustre kernel debug logs from an interrupt handler.  Defer
+            this log dumping to the expired_lock_main() thread.
+
+Severity   : enhancement
+Bugzilla   : 10420
+Description: Support NFS exporting on 2.6 kernels.
+Details    : Implement non-rawops metadata methods for NFS server to use without
+            changing NFS server code.
+
+Severity   : medium
+Frequency  : very rare (synthetic metadata workload only)
+Bugzilla   : 9974
+Description: two racing renames might cause an MDS thread to deadlock
+Details    : Running the "racer" program may cause one MDS thread to rename
+            a file from being the source of a rename to being the target of
+            a rename at exactly the same time that another thread is doing
+            so, and the second thread has already enqueued these locks after
+            doing a lookup of the target and is trying to relock them in
+            order.  Ensure that we don't try to re-lock the same resource.
+
+Severity   : major
+Frequency  : only very large systems with liblustre clients
+Bugzilla   : 7304 
+Description: slow eviction of liblustre clients with the "evict_by_nid" RPC
+Details    : Use asynchronous set_info RPCs to send the "evict_by_nid" to 
+            all OSTs in parallel.  This allows the eviction of stale liblustre
+            clients to proceed much faster than if they were done in series, 
+            and also offers similar improvements for other set_info RPCs.
+
+Severity   : minor
+Bugzilla   : 10265
+Description: excessive CPU usage during initial read phase on client
+Details    : During the initial read phase on a client, it would agressively
+            retry readahead on the file, consuming too much CPU and impacting
+            performance (since 1.4.5.8).  Improve the readahead algorithm
+            to avoid this, and also improve some other common cases (read
+            of small files in particular, where "small" is files smaller than
+            /proc/fs/lustre/llite/*/max_read_ahead_whole_mb, 2MB by default).
+
+Severity   : minor
+Bugzilla   : 10450
+Description: MDS crash when receiving packet with unknown intent.
+Details    : Do not LBUG in unknown intent case, just return -EFAULT
+
+
 ------------------------------------------------------------------------------
 
 02-14-2006  Cluster File Systems, Inc. <info@clusterfs.com>
index 272cb89..c9e56a5 100644 (file)
@@ -51,8 +51,10 @@ endif
 
 lvfs-sources:
        $(MAKE) sources -C lvfs
+obdclass-sources:
+       $(MAKE) sources -C obdclass
 
-sources: $(LDISKFS) lvfs-sources lustre_build_version
+sources: $(LDISKFS) lvfs-sources obdclass-sources lustre_build_version
 
 all-recursive: lustre_build_version
 
index a1fe35d..47a58f5 100644 (file)
@@ -38,7 +38,7 @@ AC_SUBST(pymoddir)
 #
 AC_DEFUN([LC_TARGET_SUPPORTED],
 [case $target_os in
-       linux*)
+       linux* | darwin*)
 $1
                ;;
        *)
@@ -602,7 +602,7 @@ AC_DEFUN([LC_CONFIGURE],
 [LC_CONFIG_OBD_BUFFER_SIZE
 
 # include/liblustre.h
-AC_CHECK_HEADERS([asm/page.h sys/user.h stdint.h])
+AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h])
 
 # include/lustre/lustre_user.h
 # See note there re: __ASM_X86_64_PROCESSOR_H
@@ -621,6 +621,10 @@ AC_CHECK_FUNCS([inet_ntoa])
 # llite/xattr.c
 AC_CHECK_HEADERS([linux/xattr_acl.h])
 
+# use universal lustre headers 
+# i.e: include/obd.h instead of include/linux/obd.h
+AC_CHECK_FILE($PWD/lustre/include/obd.h, [AC_DEFINE(UNIV_LUSTRE_HEADERS, 1, [Use universal lustre headers])])
+
 # Super safe df
 AC_ARG_ENABLE([mindf],
       AC_HELP_STRING([--enable-mindf],
@@ -661,8 +665,8 @@ lustre/autoconf/Makefile
 lustre/conf/Makefile
 lustre/doc/Makefile
 lustre/include/Makefile
+lustre/include/lustre_ver.h
 lustre/include/linux/Makefile
-lustre/include/linux/lustre_ver.h
 lustre/include/lustre/Makefile
 lustre/kernel_patches/targets/2.6-suse.target
 lustre/kernel_patches/targets/2.6-vanilla.target
@@ -701,6 +705,7 @@ lustre/fld/Makefile
 lustre/fld/autoMakefile
 lustre/obdclass/Makefile
 lustre/obdclass/autoMakefile
+lustre/obdclass/linux/Makefile
 lustre/obdecho/Makefile
 lustre/obdecho/autoMakefile
 lustre/obdfilter/Makefile
@@ -725,4 +730,10 @@ lustre/tests/Makefile
 lustre/utils/Lustre/Makefile
 lustre/utils/Makefile
 ])
+case $lb_target_os in
+        darwin)
+                AC_CONFIG_FILES([ lustre/obdclass/darwin/Makefile ])
+                ;;
+esac
+
 ])
index 85b2de6..5a2d2a0 100644 (file)
@@ -1,7 +1,7 @@
 m4_define([LUSTRE_MAJOR],[1])
-m4_define([LUSTRE_MINOR],[4])
-m4_define([LUSTRE_PATCH],[6])
-m4_define([LUSTRE_FIX],[90])
+m4_define([LUSTRE_MINOR],[5])
+m4_define([LUSTRE_PATCH],[1])
+m4_define([LUSTRE_FIX],[0])
 
 dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after
 dnl # next release candidate/beta would spill this warning already.
index 95bb99c..e466501 100644 (file)
 
 #include <linux/module.h>
 
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-
+#include <obd.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre_ver.h>
 #include "cmm_internal.h"
 #include "mdc_internal.h"
 
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_ver.h>
-
 static struct obd_ops cmm_obd_device_ops = {
         .o_owner           = THIS_MODULE
 };
index bceb019..2f5d630 100644 (file)
@@ -25,8 +25,8 @@
 
 #if defined(__KERNEL__)
 
-#include <linux/obd.h>
-#include <linux/md_object.h>
+#include <obd.h>
+#include <md_object.h>
 
 struct cmm_device {
         struct md_device cmm_md_dev;
index 8e6b364..805546c 100644 (file)
 #endif
 #define DEBUG_SUBSYSTEM S_MDS
 
-#include <linux/obd.h>
-#include<linux/obd_class.h>
-
+#include <obd.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre_ver.h>
 #include "mdc_internal.h"
 
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_ver.h>
-
 static struct lu_device_operations mdc_lu_ops;
 
 static inline int lu_device_is_mdc(struct lu_device *ld)
index 668fed1..01b360f 100644 (file)
@@ -25,8 +25,8 @@
 
 #if defined(__KERNEL__)
 
-#include <linux/obd.h>
-#include <linux/md_object.h>
+#include <obd.h>
+#include <md_object.h>
 
 struct mdc_cli_desc {
         struct obd_connect_data  cl_conn_data;
index c3a73b3..a0f35c7 100644 (file)
 #endif
 
 #define DEBUG_SUBSYSTEM S_MDS
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/obd_class.h>
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <obd_class.h>
 #include "mdc_internal.h"
 
 static struct md_object_operations mdc_mo_ops;
index 5d3bfa6..0488b30 100644 (file)
  */
 
 #include <linux/module.h>
+#include <lustre/lustre_idl.h>
 
-#include <linux/obd.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_fid.h>
+#include <obd.h>
+#include <lustre_fid.h>
 
 void fid_to_le(struct lu_fid *dst, const struct lu_fid *src)
 {
index 6e1e59f..3f5706e 100644 (file)
@@ -28,9 +28,9 @@
 
 #include <linux/module.h>
 
-#include <linux/obd.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_fid.h>
+#include <lustre/lustre_idl.h>
+#include <obd.h>
+#include <lustre_fid.h>
 
 /* sequence manager initialization/finalization stuff */
 struct lu_seq_mgr *seq_mgr_init(struct lu_seq_mgr_ops *ops,
index ff381b3..0283a4a 100644 (file)
@@ -1,5 +1,6 @@
 MODULES := fld 
 fld-objs := fld_handle.o fld_iam.o
 
-EXTRA_PRE_CFLAGS := -I@LUSTRE@/ldiskfs/
+EXTRA_PRE_CFLAGS := -I@LUSTRE@ -I@LUSTRE@/ldiskfs
+
 @INCLUDE_RULES@
index cf74f84..7b395af 100644 (file)
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/module.h>
-
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_ver.h>
-#include <linux/obd_support.h>
-#include <linux/lprocfs_status.h>
 #include <linux/jbd.h>
 
-#include <linux/dt_object.h>
-#include <linux/md_object.h>
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_fid.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <dt_object.h>
+#include <md_object.h>
+#include <lustre_mdc.h>
+#include <lustre_fid.h>
 #include "fld_internal.h"
 
 static int fld_handle(struct lu_context *ctx,
index ec51a8b..d69c6ed 100644 (file)
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/module.h>
-
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_ver.h>
-#include <linux/obd_support.h>
-#include <linux/lprocfs_status.h>
 #include <linux/jbd.h>
 
-#include <linux/dt_object.h>
-#include <linux/md_object.h>
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_fid.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <dt_object.h>
+#include <md_object.h>
+#include <lustre_mdc.h>
+#include <lustre_fid.h>
 #include <linux/lustre_iam.h>
 #include "fld_internal.h"
 
@@ -96,9 +96,9 @@ int fld_handle_delete(struct lu_context *ctx, struct fld *fld,
 int fld_handle_lookup(struct lu_context *ctx,
                       struct fld *fld, fidseq_t seq_num, mdsno_t *mds_num)
 {
+#if 0
         int size;
 
-#if 0
         size = fld_param.id_rec_size;
         return fld->fld_dt->dd_ops->dt_iam_lookup(&lctx, fld->fld_dt,
                                                   fld->fld_info->fi_container,
index a8dd680..e2f4176 100644 (file)
@@ -11,3 +11,4 @@ Makefile
 Makefile.in
 .deps
 TAGS
+lustre_ver.h
index 00c4db7..794ae12 100644 (file)
@@ -5,5 +5,15 @@
 # See the file COPYING in this distribution
 
 SUBDIRS = linux lustre
-EXTRA_DIST = ioctl.h liblustre.h
+
+EXTRA_DIST = ioctl.h liblustre.h lprocfs_status.h lustre_cfg.h         \
+            lustre_commit_confd.h lustre_debug.h lustre_dlm.h  \
+            lustre_export.h lustre_fsfilt.h lustre_ha.h        \
+            lustre_handles.h lustre_import.h lustre_lib.h      \
+            lustre_lite.h lustre_log.h lustre_mds.h lustre_mdc.h \
+            lustre_net.h lustre_quota.h lustre_ucache.h lvfs.h \
+            obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \
+            obd_ost.h obd_support.h lustre_ver.h lu_object.h   \
+             md_object.h dt_object.h lustre_param.h lustre_disk.h \
+             lustre_fid.h lustre_req_layout.h
 
diff --git a/lustre/include/darwin/lprocfs_status.h b/lustre/include/darwin/lprocfs_status.h
new file mode 100644 (file)
index 0000000..dc17b9f
--- /dev/null
@@ -0,0 +1,57 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *   Top level header file for LProc SNMP
+ *   Author: Hariharan Thantry thantry@users.sourceforge.net
+ */
+#ifndef _DARWIN_LPROCFS_SNMP_H
+#define _DARWIN_LPROCFS_SNMP_H
+
+#ifndef _LPROCFS_SNMP_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#ifdef LPROCFS
+#undef LPROCFS
+#endif
+
+#include <libcfs/libcfs.h>
+#define kstatfs statfs
+
+/*
+ * XXX nikita: temporary! Stubs for naked procfs calls made by Lustre
+ * code. Should be replaced with our own procfs-like API.
+ */
+
+static inline cfs_proc_dir_entry_t *proc_symlink(const char *name,
+                                                 cfs_proc_dir_entry_t *parent,
+                                                 const char *dest)
+{
+        return NULL;
+}
+
+static inline cfs_proc_dir_entry_t *create_proc_entry(const char *name,
+                                                      mode_t mode,
+                                                      cfs_proc_dir_entry_t *p)
+{
+        return NULL;
+}
+
+#endif /* XNU_LPROCFS_SNMP_H */
diff --git a/lustre/include/darwin/lustre_compat.h b/lustre/include/darwin/lustre_compat.h
new file mode 100644 (file)
index 0000000..d11c8d6
--- /dev/null
@@ -0,0 +1,75 @@
+#ifndef __DARWIN_LUSTRE_COMPAT_H__
+#define __DARWIN_LUSTRE_COMPAT_H__
+
+#include <libcfs/libcfs.h>
+
+#ifdef __KERNEL__
+
+#ifndef HLIST_HEAD
+#define hlist_entry                     list_entry
+#define hlist_head                      list_head
+#define hlist_node                      list_head
+#define hlist_del_init                  list_del_init
+#define hlist_add_head                  list_add
+#define hlist_for_each_safe             list_for_each_safe
+
+/* XXX */
+#define LOOKUP_COBD                    4096
+#define CURRENT_SECONDS                        cfs_unix_seconds()
+
+#endif
+
+struct module;
+static inline int try_module_get(struct module *module)
+{
+       return 1;
+}
+
+static inline void module_put(struct module *module)
+{
+}
+
+#define THIS_MODULE                     NULL
+
+static inline void lustre_daemonize_helper(void)
+{
+       return;
+}
+
+static inline int32_t ext2_set_bit(int nr, void *a)
+{
+       int32_t old = test_bit(nr, a);
+       set_bit(nr, a);
+       return old;
+}
+
+static inline int32_t ext2_clear_bit(int nr, void *a)
+{
+       int32_t old = test_bit(nr, a);
+       clear_bit(nr, a);
+       return old;
+}
+
+struct nameidata;
+
+#if !defined(__DARWIN8__)
+static inline int ll_path_lookup(const char *path, unsigned int flags, struct nameidata *nd)
+{
+       int ret = 0;
+       NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)path, current_proc());
+       if (ret = namei(nd)){
+               CERROR("ll_path_lookup fail!\n");
+       }
+       return ret;
+}
+#endif
+
+#define to_kdev_t(dev)                  (dev)
+#define kdev_t_to_nr(dev)               (dev)
+#define val_to_kdev(dev)                (dev)
+
+#define ext2_test_bit  test_bit
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/lustre/include/darwin/lustre_debug.h b/lustre/include/darwin/lustre_debug.h
new file mode 100644 (file)
index 0000000..b2b72f6
--- /dev/null
@@ -0,0 +1,36 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _DARWIN_LUSTRE_DEBUG_H
+#define _DARWIN_LUSTRE_DEBUG_H
+
+#ifndef _LUSTRE_DEBUG_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#ifdef __KERNEL__
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...)   do {} while (0)
+#else
+#define LL_CDEBUG_PAGE(mask, page, fmt, arg...)   do {} while (0) 
+#endif
+
+#endif
diff --git a/lustre/include/darwin/lustre_dlm.h b/lustre/include/darwin/lustre_dlm.h
new file mode 100644 (file)
index 0000000..98587f3
--- /dev/null
@@ -0,0 +1,25 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * (visit-tags-table FILE)
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef _DARWIN_LUSTRE_DLM_H__
+#define _DARWIN_LUSTRE_DLM_H__
+
+#ifndef _LUSTRE_DLM_H__
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
+
+#define IT_OPEN     0x0001
+#define IT_CREAT    0x0002
+#define IT_READDIR  0x0004
+#define IT_GETATTR  0x0008
+#define IT_LOOKUP   0x0010
+#define IT_UNLINK   0x0020
+#define IT_GETXATTR 0x0040
+#define IT_EXEC     0x0080
+#define IT_PIN      0x0100
+#define IT_CHDIR    0x0200
+
+
+#endif
diff --git a/lustre/include/darwin/lustre_fsfilt.h b/lustre/include/darwin/lustre_fsfilt.h
new file mode 100644 (file)
index 0000000..e3d9a7e
--- /dev/null
@@ -0,0 +1,32 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2004 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Filesystem interface helper.
+ *
+ */
+
+#ifndef _DARWIN_LUSTRE_FSFILT_H
+#define _DARWIN_LUSTRE_FSFILT_H
+
+#ifndef _LUSTRE_FSFILT_H
+#error Do not #include this file directly. #include <lustre_fsfilt.h> instead
+#endif
+
+#endif
diff --git a/lustre/include/darwin/lustre_handles.h b/lustre/include/darwin/lustre_handles.h
new file mode 100644 (file)
index 0000000..341a25b
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __DARWIN_LUSTRE_HANDLES_H_
+#define __DARWIN_LUSTR_HANDLES_H_
+
+#ifndef __LUSTRE_HANDLES_H_
+#error Do not #include this file directly. #include <lustre_handles.h> instead
+#endif
+
+#include <libcfs/list.h>
+#include <libcfs/libcfs.h>
+
+#endif
+
diff --git a/lustre/include/darwin/lustre_lib.h b/lustre/include/darwin/lustre_lib.h
new file mode 100644 (file)
index 0000000..5adadae
--- /dev/null
@@ -0,0 +1,76 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic Lustre library routines.
+ *
+ */
+
+#ifndef _DARWIN_LUSTRE_LIB_H
+#define _DARWIN_LUSTRE_LIB_H
+
+#ifndef _LUSTRE_LIB_H
+#error Do not #include this file directly. #include <lustre_lib.h> instead
+#endif
+
+#include <string.h>
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h> 
+
+#ifndef LP_POISON
+#define LI_POISON ((int)0x5a5a5a5a)
+#define LL_POISON ((long)0x5a5a5a5a)
+#define LP_POISON ((void *)(long)0x5a5a5a5a)
+#endif
+
+#ifndef LPU64
+#define LPU64 "%llu"
+#define LPD64 "%lld"
+#define LPX64 "%llx"
+#endif
+
+struct obd_ioctl_data;
+#define OBD_IOC_DATA_TYPE               struct obd_ioctl_data
+
+#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) |                \
+                           sigmask(SIGTERM) | sigmask(SIGQUIT) |               \
+                           sigmask(SIGALRM) | sigmask(SIGHUP))
+
+#ifdef __KERNEL__
+static inline sigset_t l_w_e_set_sigs(sigset_t sigs)
+{
+        sigset_t old = 0;
+
+        /* XXX Liang: how to change sigmask in Darwin8.x? 
+         * there is syscall like pthread_sigmask() but we cannot 
+         * use in kernel  */
+#if !defined(__DARWIN8__)
+        struct proc     *p = current_proc();
+        extern int block_procsigmask(struct proc *p,  int bit);
+        old = cfs_current()->uu_sigmask;
+        block_procsigmask(p, ~sigs);
+#endif
+
+        return old;
+}
+#endif
+
+#endif
+
+
diff --git a/lustre/include/darwin/lustre_lite.h b/lustre/include/darwin/lustre_lite.h
new file mode 100644 (file)
index 0000000..2fcfb96
--- /dev/null
@@ -0,0 +1,86 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * lustre lite cluster file system
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc. <info@clusterfs.com>
+ */
+
+
+
+#ifndef _DARWIN_LL_H
+#define _DARWIN_LL_H
+
+#ifndef _LL_H
+#error Do not #include this file directly. #include <lustre_lite.h> instead
+#endif
+
+#include <libcfs/libcfs.h>
+
+#ifdef __KERNEL__
+
+struct iattr {
+        unsigned int    ia_valid;
+        umode_t         ia_mode;
+        uid_t           ia_uid;
+        gid_t           ia_gid;
+        loff_t          ia_size;
+        time_t          ia_atime;
+        time_t          ia_mtime;
+        time_t          ia_ctime;
+        unsigned int    ia_attr_flags;
+};
+
+/*
+ * intent data-structured. For Linux they are defined in
+ * linux/include/linux/dcache.h
+ */
+#define IT_OPEN     0x0001
+#define IT_CREAT    0x0002
+#define IT_READDIR  0x0004
+#define IT_GETATTR  0x0008
+#define IT_LOOKUP   0x0010
+#define IT_UNLINK   0x0020
+#define IT_GETXATTR 0x0040
+#define IT_EXEC     0x0080
+#define IT_PIN      0x0100
+
+#define IT_FL_LOCKED   0x0001
+#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */
+
+#define INTENT_MAGIC 0x19620323 /* Happy birthday! */
+
+struct lustre_intent_data {
+        int     it_disposition;
+        int     it_status;
+        __u64   it_lock_handle;
+        void    *it_data;
+        int     it_lock_mode;
+};
+
+/*
+ * Liang: We keep the old lookup_intent struct in XNU 
+ * to avoid unnecessary allocate/free. 
+ */
+#define LUSTRE_IT(it) ((struct lustre_intent_data *)(&(it)->d.lustre))
+
+struct lookup_intent {
+       int     it_magic;
+       void    (*it_op_release)(struct lookup_intent *);
+       int     it_op;
+       int     it_flags;
+       int     it_create_mode;
+       union {
+                struct lustre_intent_data lustre;
+               void *fs_data;
+       } d;
+};
+
+struct super_operations{
+};
+#endif
+
+#endif
diff --git a/lustre/include/darwin/lustre_log.h b/lustre/include/darwin/lustre_log.h
new file mode 100644 (file)
index 0000000..d777465
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _DARWIN_LUSTRE_LOG_H
+#define _DARWIN_LUSTRE_LOG_H
+
+#ifndef _LUSTRE_LOG_H
+#error Do not #include this file directly. #include <lustre_log.h> instead
+#endif
+
+#undef LUSTRE_LOG_SERVER
+#include <darwin/lustre_compat.h>
+
+#endif
diff --git a/lustre/include/darwin/lustre_mds.h b/lustre/include/darwin/lustre_mds.h
new file mode 100644 (file)
index 0000000..7fd8549
--- /dev/null
@@ -0,0 +1,32 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _DARWIN_LUSTRE_MDS_H
+#define _DARWIN_LUSTRE_MDS_H
+
+#ifndef _LUSTRE_MDS_H
+#error Do not #include this file directly. #include <lustre_mds.h> instead
+#endif
+
+#include <lustre_lite.h>
+#include <libcfs/darwin/darwin-prim.h>
+
+#endif
diff --git a/lustre/include/darwin/lustre_net.h b/lustre/include/darwin/lustre_net.h
new file mode 100644 (file)
index 0000000..f028545
--- /dev/null
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _DARWIN_LUSTRE_NET_H
+#define _DARWIN_LUSTRE_NET_H
+
+#ifndef _LUSTRE_NET_H
+#error Do not #include this file directly. #include <lustre_net.h> instead
+#endif
+
+#include <libcfs/libcfs.h>
+
+#undef WITH_GROUP_INFO
+
+#endif
diff --git a/lustre/include/darwin/lustre_quota.h b/lustre/include/darwin/lustre_quota.h
new file mode 100644 (file)
index 0000000..5d0864f
--- /dev/null
@@ -0,0 +1,16 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _DARWIN_LUSTRE_QUOTA_H
+#define _DARWIN_LUSTRE_QUOTA_H
+
+#ifndef _LUSTRE_QUOTA_H
+#error Do not #include this file directly. #include <lustre_quota.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <libcfs/libcfs.h>
+#endif
+
+
+#endif /* _LUSTRE_QUOTA_H */
diff --git a/lustre/include/darwin/lustre_types.h b/lustre/include/darwin/lustre_types.h
new file mode 100644 (file)
index 0000000..651cf2d
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _LUSTRE_DARWIN_TYPES_H
+#define _LUSTRE_DARWIN_TYPES_H
+
+#include <string.h>
+#include <libcfs/libcfs.h>
+
+#endif
diff --git a/lustre/include/darwin/lustre_user.h b/lustre/include/darwin/lustre_user.h
new file mode 100644 (file)
index 0000000..a495e60
--- /dev/null
@@ -0,0 +1,47 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *   This file is part of Lustre, http://www.lustre.org
+ *
+ * Lustre public user-space interface definitions.
+ */
+
+#ifndef _DARWIN_LUSTRE_USER_H
+#define _DARWIN_LUSTRE_USER_H
+
+#include <lustre/types.h>
+
+#ifndef __KERNEL__
+/* for llmount */
+# define _GNU_SOURCE
+# include <getopt.h>
+# include <sys/utsname.h>
+# include <sys/stat.h>
+# include <errno.h>
+# include <sys/mount.h>
+# include <sys/fcntl.h>
+# include <sys/ioccom.h>
+# include <sys/wait.h>
+# include <string.h>
+#endif
+
+typedef struct stat     lstat_t;
+#define HAVE_LOV_USER_MDS_DATA
+
+#ifndef LPU64
+#if (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%llu"
+# define LPD64 "%lld"
+# define LPX64 "%#llx"
+# define LPSZ  "%u"
+# define LPSSZ "%d"
+#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%lu"
+# define LPD64 "%ld"
+# define LPX64 "%#lx"
+# define LPSZ  "%lu"
+# define LPSSZ "%ld"
+#endif
+#endif /* !LPU64 */
+
+#endif /* _LUSTRE_USER_H */
diff --git a/lustre/include/darwin/lvfs.h b/lustre/include/darwin/lvfs.h
new file mode 100644 (file)
index 0000000..d271854
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef __DARWIN_LVFS_H__
+#define __DARWIN_LVFS_H__
+
+#ifndef __LVFS_H__
+#error Do not #include this file directly. #include <lvfs.h> instead
+#endif
+
+#ifdef LLOG_LVFS
+#undef LLOG_LVFS
+#endif
+
+struct lvfs_ucred { 
+       __u32 luc_fsuid; 
+       __u32 luc_fsgid; 
+       __u32 luc_cap; 
+       __u32 luc_uid; 
+       __u32 luc_umask;
+};
+
+struct lvfs_run_ctxt {
+       int     pid;
+};
+
+#endif
diff --git a/lustre/include/darwin/obd.h b/lustre/include/darwin/obd.h
new file mode 100644 (file)
index 0000000..175758e
--- /dev/null
@@ -0,0 +1,39 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+#ifndef __DARWIN_OBD_H
+#define __DARWIN_OBD_H
+
+#ifndef __OBD_H
+#error Do not #include this file directly. #include <obd.h> instead
+#endif
+
+#include <libcfs/libcfs.h>
+
+typedef struct semaphore client_obd_lock_t;
+
+static inline void client_obd_list_lock_init(client_obd_lock_t *lock)
+{
+        sema_init(lock, 1);
+}
+
+static inline void client_obd_list_lock_done(client_obd_lock_t *lock)
+{}
+
+static inline void client_obd_list_lock(client_obd_lock_t *lock)
+{
+        mutex_down(lock);
+}
+
+static inline void client_obd_list_unlock(client_obd_lock_t *lock)
+{
+        mutex_up(lock);
+}
+
+#endif /* __DARWIN_OBD_H */
diff --git a/lustre/include/darwin/obd_class.h b/lustre/include/darwin/obd_class.h
new file mode 100644 (file)
index 0000000..833da61
--- /dev/null
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __DARWIN_CLASS_OBD_H
+#define __DARWIN_CLASS_OBD_H
+
+#ifndef __CLASS_OBD_H
+#error Do not #include this file directly. #include <obd_class.h> instead
+#endif
+
+#if !defined(__KERNEL__)
+#define to_kdev_t(dev)          (dev)
+#endif
+
+#endif /* __XNU_OBD_CLASS_H */
diff --git a/lustre/include/darwin/obd_support.h b/lustre/include/darwin/obd_support.h
new file mode 100644 (file)
index 0000000..8ff7200
--- /dev/null
@@ -0,0 +1,58 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _DARWIN_OBD_SUPPORT
+#define _DARWIN_OBD_SUPPORT
+
+#ifndef _OBD_SUPPORT
+#error Do not #include this file directly. #include <obd_support.h> instead
+#endif
+
+#include <darwin/lustre_compat.h>
+
+#define CRCPOLY_LE 0xedb88320
+/**
+ * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
+ * @crc - seed value for computation.  ~0 for Ethernet, sometimes 0 for
+ *        other uses, or the previous crc32 value if computing incrementally.
+ * @p   - pointer to buffer over which CRC is run
+ * @len - length of buffer @p
+ */
+static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len)
+{
+        int i;
+        while (len--) {
+                crc ^= *p++;
+                for (i = 0; i < 8; i++)
+                crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+        }
+        return crc;
+}
+
+#define OBD_SLEEP_ON(wq)        sleep_on(wq)
+
+/* for obd_class.h */
+# ifndef ERR_PTR
+#  define ERR_PTR(a) ((void *)(a))
+# endif
+
+#endif
diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h
new file mode 100644 (file)
index 0000000..95d0251
--- /dev/null
@@ -0,0 +1,329 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LUSTRE_DT_OBJECT_H
+#define __LUSTRE_DT_OBJECT_H
+
+/*
+ * Sub-class of lu_object with methods common for "data" objects in OST stack.
+ *
+ * Data objects behave like regular files: you can read/write them, get and
+ * set their attributes. Implementation of dt interface is supposed to
+ * implement some form of garbage collection, normally reference counting
+ * (nlink) based one.
+ *
+ * Examples: osd (lustre/osd) is an implementation of dt interface.
+ */
+
+
+/*
+ * super-class definitions.
+ */
+#include <lu_object.h>
+
+#include <libcfs/list.h>
+#include <libcfs/kp30.h>
+
+struct seq_file;
+struct proc_dir_entry;
+struct lustre_cfg;
+
+struct thandle;
+struct txn_param;
+struct dt_device;
+struct dt_object;
+
+/*
+ * Lock mode for DT objects.
+ */
+enum dt_lock_mode {
+        DT_WRITE_LOCK = 1,
+        DT_READ_LOCK  = 2,
+};
+
+/*
+ * Operations on dt device.
+ */
+struct dt_device_operations {
+        /*
+         * Method for getting/setting device wide back stored config data,
+         * like last used meta-sequence, etc.
+         *
+         * XXX this is ioctl()-like interface we want to get rid of.
+         */
+        int (*dt_config) (struct lu_context *ctx,
+                          struct dt_device *dev, const char *name,
+                          void *buf, int size, int mode);
+        /*
+         * Return device-wide statistics.
+         */
+        int   (*dt_statfs)(struct lu_context *ctx,
+                           struct dt_device *dev, struct kstatfs *sfs);
+        /*
+         * Start transaction, described by @param.
+         */
+        struct thandle *(*dt_trans_start)(struct lu_context *ctx,
+                                          struct dt_device *dev,
+                                          struct txn_param *param);
+        /*
+         * Finish previously started transaction.
+         */
+        void  (*dt_trans_stop)(struct lu_context *ctx, struct thandle *th);
+        /*
+         * Return fid of root index object.
+         */
+        int   (*dt_root_get)(struct lu_context *ctx,
+                             struct dt_device *dev, struct lu_fid *f);
+};
+
+/*
+ * Per-dt-object operations.
+ */
+struct dt_object_operations {
+        void  (*do_object_lock)(struct lu_context *ctx,
+                                struct dt_object *dt, enum dt_lock_mode mode);
+        void  (*do_object_unlock)(struct lu_context *ctx,
+                                  struct dt_object *dt, enum dt_lock_mode mode);
+        /*
+         * Note: following ->do_{x,}attr_{set,get}() operations are very
+         * similar to ->moo_{x,}attr_{set,get}() operations in struct
+         * md_object_operations (see md_object.h). These operations are not in
+         * lu_object_operations, because ->do_{x,}attr_set() versions take
+         * transaction handle as an argument (this transaction is started by
+         * caller). We might factor ->do_{x,}attr_get() into
+         * lu_object_operations, but that would break existing symmetry.
+         */
+
+        /*
+         * Return standard attributes.
+         *
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int   (*do_attr_get)(struct lu_context *ctxt, struct dt_object *dt,
+                             struct lu_attr *attr);
+        /*
+         * Set standard attributes.
+         *
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int   (*do_attr_set)(struct lu_context *ctxt, struct dt_object *dt,
+                             struct lu_attr *attr, struct thandle *handle);
+        /*
+         * Return a value of an extended attribute.
+         *
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int   (*do_xattr_get)(struct lu_context *ctxt, struct dt_object *dt,
+                              void *buf, int buf_len, const char *name);
+        /*
+         * Set value of an extended attribute.
+         *
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int   (*do_xattr_set)(struct lu_context *ctxt, struct dt_object *dt,
+                              void *buf, int buf_len, const char *name,
+                              struct thandle *handle);
+        /*
+         * Create new object on this device.
+         *
+         * precondition: !lu_object_exists(ctxt, &dt->do_lu);
+         * postcondition: ergo(result == 0, lu_object_exists(ctxt, &dt->do_lu));
+         */
+        int   (*do_object_create)(struct lu_context *ctxt, struct dt_object *dt,
+                                  struct lu_attr *attr, struct thandle *th);
+        /*
+         * Destroy existing object.
+         *
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         * postcondition: ergo(result == 0,
+         *                     !lu_object_exists(ctxt, &dt->do_lu));
+         */
+        int   (*do_object_destroy)(struct lu_context *ctxt,
+                                   struct dt_object *dt, struct thandle *th);
+};
+
+/*
+ * Per-dt-object operations on "file body".
+ */
+struct dt_body_operations {
+        /*
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int (*dbo_read)(struct lu_context *ctxt, struct dt_object *dt, ...);
+        /*
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int (*dbo_write)(struct lu_context *ctxt, struct dt_object *dt, ...);
+        /*
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int (*dbo_truncate)(struct lu_context *ctxt, struct dt_object *dt, ...);
+};
+
+/*
+ * Incomplete type of index record.
+ */
+struct dt_rec;
+
+/*
+ * Incomplete type of index key.
+ */
+struct dt_key;
+
+struct dt_index_features {
+        /* required feature flags from enum dt_index_flags */
+        __u32 dif_flags;
+        /* minimal required key size */
+        size_t dif_keysize_min;
+        /* maximal required key size, 0 if no limit */
+        size_t dif_keysize_max;
+        /* minimal required record size */
+        size_t dif_recsize_min;
+        /* maximal required record size, 0 if no limit */
+        size_t dif_recsize_max;
+};
+
+enum dt_index_flags {
+        /* index supports variable sized keys */
+        DT_IND_VARKEY = 1 << 0,
+        /* index supports variable sized records */
+        DT_IND_VARREC = 1 << 1,
+        /* index can be modified */
+        DT_IND_UPDATE = 1 << 2,
+        /* index supports records with non-unique (duplicate) keys */
+        DT_IND_NONUNQ = 1 << 3
+};
+
+/*
+ * Features, required from index to support file system directories (mapping
+ * names to fids).
+ */
+extern const struct dt_index_features dt_directory_features;
+
+/*
+ * Per-dt-object operations on object as index.
+ */
+struct dt_index_operations {
+        /*
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int (*dio_lookup)(struct lu_context *ctxt, struct dt_object *dt,
+                          struct dt_rec *rec, const struct dt_key *key);
+        /*
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int (*dio_insert)(struct lu_context *ctxt, struct dt_object *dt,
+                          const struct dt_rec *rec, const struct dt_key *key,
+                          struct thandle *handle);
+        /*
+         * precondition: lu_object_exists(ctxt, &dt->do_lu);
+         */
+        int (*dio_delete)(struct lu_context *ctxt, struct dt_object *dt,
+                          const struct dt_rec *rec, const struct dt_key *key,
+                          struct thandle *handle);
+
+        /*
+         * Features probing. Returns 1 if this index supports all features in
+         * @feat, -ve on error, 0 otherwise.
+         */
+        int (*dio_probe)(struct lu_context *ctxt, struct dt_object *dt,
+                         const struct dt_index_features *feat);
+};
+
+struct dt_device {
+        struct lu_device             dd_lu_dev;
+        struct dt_device_operations *dd_ops;
+        /*
+         * List of dt_txn_callback (see below). This is not protected in any
+         * way, because callbacks are supposed to be added/deleted only during
+         * single-threaded start-up shut-down procedures.
+         */
+        struct list_head             dd_txn_callbacks;
+};
+
+int  dt_device_init(struct dt_device *dev, struct lu_device_type *t);
+void dt_device_fini(struct dt_device *dev);
+
+static inline int lu_device_is_dt(const struct lu_device *d)
+{
+        return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT);
+}
+
+static inline struct dt_device * lu2dt_dev(struct lu_device *l)
+{
+        LASSERT(lu_device_is_dt(l));
+        return container_of0(l, struct dt_device, dd_lu_dev);
+}
+
+struct dt_object {
+        struct lu_object             do_lu;
+        struct dt_object_operations *do_ops;
+        struct dt_body_operations   *do_body_ops;
+        struct dt_index_operations  *do_index_ops;
+};
+
+int  dt_object_init(struct dt_object *obj,
+                    struct lu_object_header *h, struct lu_device *d);
+
+void dt_object_fini(struct dt_object *obj);
+
+struct txn_param {
+        unsigned int tp_credits;
+};
+
+struct thandle {
+        struct dt_device *th_dev;
+};
+
+/*
+ * Transaction call-backs.
+ *
+ * These are invoked by osd (or underlying transaction engine) when
+ * transaction changes state.
+ *
+ * Call-backs are used by upper layers to modify transaction parameters and to
+ * perform some actions on for each transaction state transition. Typical
+ * example is mdt registering call-back to write into last-received file
+ * before each transaction commit.
+ */
+struct dt_txn_callback {
+        int (*dtc_txn_start)(struct lu_context *ctx, struct dt_device *dev,
+                             struct txn_param *param, void *cookie);
+        int (*dtc_txn_stop)(struct lu_context *ctx, struct dt_device *dev,
+                            struct thandle *txn, void *cookie);
+        int (*dtc_txn_commit)(struct lu_context *ctx, struct dt_device *dev,
+                              struct thandle *txn, void *cookie);
+        void            *dtc_cookie;
+        struct list_head dtc_linkage;
+};
+
+void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb);
+void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb);
+
+int dt_txn_hook_start(struct lu_context *ctx,
+                      struct dt_device *dev, struct txn_param *param);
+int dt_txn_hook_stop(struct lu_context *ctx,
+                     struct dt_device *dev, struct thandle *txn);
+int dt_txn_hook_commit(struct lu_context *ctx,
+                       struct dt_device *dev, struct thandle *txn);
+
+#endif /* __LUSTRE_DT_OBJECT_H */
index b0f21cb..2bc0dcd 100644 (file)
@@ -57,8 +57,7 @@
 #include <string.h>
 #include <errno.h>
 #include <sys/stat.h>
-/* Hack for mkfs_lustre.c */
-#ifndef  NO_SYS_VFS
+#ifdef HAVE_SYS_VFS_H
 # include <sys/vfs.h>
 #endif
 #include <unistd.h>
@@ -403,9 +402,9 @@ static inline int kmem_cache_destroy(kmem_cache_t *a)
 #define kmap(page) (page)->addr
 #define kunmap(a) do {} while (0)
 
-static inline struct page *alloc_pages(int mask, unsigned long order)
+static inline cfs_page_t *alloc_pages(int mask, unsigned long order)
 {
-        struct page *pg = malloc(sizeof(*pg));
+        cfs_page_t *pg = malloc(sizeof(*pg));
 
         if (!pg)
                 return NULL;
@@ -424,7 +423,7 @@ static inline struct page *alloc_pages(int mask, unsigned long order)
 
 #define alloc_page(mask) alloc_pages((mask), 0)
 
-static inline void __free_pages(struct page *pg, int what)
+static inline void __free_pages(cfs_page_t *pg, int what)
 {
 #if 0 //#ifdef MAP_ANONYMOUS
         munmap(pg->addr, PAGE_SIZE);
@@ -437,9 +436,9 @@ static inline void __free_pages(struct page *pg, int what)
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(page) __free_page(page)
 
-static inline struct page* __grab_cache_page(unsigned long index)
+static inline cfs_page_t* __grab_cache_page(unsigned long index)
 {
-        struct page *pg = alloc_pages(0, 0);
+        cfs_page_t *pg = alloc_pages(0, 0);
 
         if (pg)
                 pg->index = index;
@@ -485,6 +484,7 @@ struct iattr {
         time_t          ia_ctime;
         unsigned int    ia_attr_flags;
 };
+#define ll_iattr_struct iattr
 
 #define IT_OPEN     0x0001
 #define IT_CREAT    0x0002
@@ -549,7 +549,9 @@ struct semaphore {
 
 /* use the macro's argument to avoid unused warnings */
 #define down(a) do { (void)a; } while (0)
+#define mutex_down(a)   down(a)
 #define up(a) do { (void)a; } while (0)
+#define mutex_up(a)     up(a)
 #define down_read(a) do { (void)a; } while (0)
 #define up_read(a) do { (void)a; } while (0)
 #define down_write(a) do { (void)a; } while (0)
@@ -563,6 +565,7 @@ static inline void init_MUTEX (struct semaphore *sem)
         sema_init(sem, 1);
 }
 
+#define init_mutex(s)   init_MUTEX(s)
 
 typedef struct  {
         struct list_head sleepers;
@@ -590,6 +593,11 @@ struct task_struct {
         __u32 cap_effective;
 };
 
+typedef struct task_struct cfs_task_t;
+#define cfs_current()           current
+#define cfs_curproc_pid()       (current->pid)
+#define cfs_curproc_comm()      (current->comm)
+
 extern struct task_struct *current;
 int in_group_p(gid_t gid);
 static inline int capable(int cap)
@@ -748,7 +756,7 @@ static inline void libcfs_run_lbug_upcall(char *file, const char *fn,
 /* completion */
 struct completion {
         unsigned int done;
-        wait_queue_head_t wait;
+        cfs_waitq_t wait;
 };
 
 #define COMPLETION_INITIALIZER(work) \
@@ -782,13 +790,13 @@ struct nfs_lock_info {
         void            *host;
 };
 
-struct file_lock {
+typedef struct file_lock {
         struct file_lock *fl_next;      /* singly linked list for this inode  */
         struct list_head fl_link;       /* doubly linked list of all locks */
         struct list_head fl_block;      /* circular list of blocked processes */
         void *fl_owner;
         unsigned int fl_pid;
-        wait_queue_head_t fl_wait;
+        cfs_waitq_t fl_wait;
         struct file *fl_file;
         unsigned char fl_flags;
         unsigned char fl_type;
@@ -805,7 +813,16 @@ struct file_lock {
         union {
                 struct nfs_lock_info    nfs_fl;
         } fl_u;
-};
+} cfs_flock_t;
+
+#define cfs_flock_type(fl)                  ((fl)->fl_type)
+#define cfs_flock_set_type(fl, type)        do { (fl)->fl_type = (type); } while(0)
+#define cfs_flock_pid(fl)                   ((fl)->fl_pid)
+#define cfs_flock_set_pid(fl, pid)          do { (fl)->fl_pid = (pid); } while(0)
+#define cfs_flock_start(fl)                 ((fl)->fl_start)
+#define cfs_flock_set_start(fl, start)      do { (fl)->fl_start = (start); } while(0)
+#define cfs_flock_end(fl)                   ((fl)->fl_end)
+#define cfs_flock_set_end(fl, end)          do { (fl)->fl_end = (end); } while(0)
 
 #ifndef OFFSET_MAX
 #define INT_LIMIT(x)    (~((x)1 << (sizeof(x)*8 - 1)))
@@ -820,13 +837,6 @@ struct file_lock {
 #define QUOTA_OK 0
 #define NO_QUOTA 1
 
-/* proc */
-#define proc_symlink(...)                       \
-({                                              \
-        void *result = NULL;                    \
-        result;                                 \
-})
-
 /* ACL */
 struct posix_acl_entry {
         short                   e_tag;
@@ -877,11 +887,11 @@ void posix_acl_release(struct posix_acl *acl)
 #define ENOTSUPP ENOTSUP
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_import.h>
-#include <linux/lustre_export.h>
-#include <linux/lustre_net.h>
+#include <obd_support.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_import.h>
+#include <lustre_export.h>
+#include <lustre_net.h>
 
 #endif
index ee57167..b731c89 100644 (file)
@@ -13,4 +13,3 @@ extN_jbd.h
 extN_xattr.h
 xattr.h
 lustre_build_version.h
-lustre_ver.h
index e9068bb..3d8313f 100644 (file)
@@ -6,13 +6,11 @@
 linuxdir = $(includedir)/linux
 
 if UTILS
-linux_HEADERS = lustre_idl.h
+linux_HEADERS = lustre_types.h lustre_user.h
 endif
 
-EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \
-  obd_cache.h obd_lov.h lustre_dlm.h lustre_handles.h lustre_disk.h \
-  lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \
-  lustre_export.h lustre_log.h obd_echo.h \
-  lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h lustre_mdc.h \
-  lustre_acl.h obd.h lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h  lustre_idl.h \
-  lustre_quota.h lustre_ucache.h lustre_ver.h.in lustre_param.h lu_object.h lustre_fid.h md_object.h lustre_req_layout.h
+EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_lib.h lustre_dlm.h    \
+            lustre_handles.h lustre_net.h obd_class.h obd_support.h     \
+            lustre_log.h lustre_compat25.h lustre_fsfilt.h lustre_mds.h \
+            obd.h lvfs.h lvfs_linux.h lustre_lite.h lustre_quota.h      \
+            lustre_user.h lustre_types.h
index 8af5b89..95d0251 100644 (file)
@@ -20,8 +20,8 @@
  *
  */
 
-#ifndef __LINUX_DT_OBJECT_H
-#define __LINUX_DT_OBJECT_H
+#ifndef __LUSTRE_DT_OBJECT_H
+#define __LUSTRE_DT_OBJECT_H
 
 /*
  * Sub-class of lu_object with methods common for "data" objects in OST stack.
@@ -38,7 +38,7 @@
 /*
  * super-class definitions.
  */
-#include <linux/lu_object.h>
+#include <lu_object.h>
 
 #include <libcfs/list.h>
 #include <libcfs/kp30.h>
@@ -283,6 +283,7 @@ struct dt_object {
 
 int  dt_object_init(struct dt_object *obj,
                     struct lu_object_header *h, struct lu_device *d);
+
 void dt_object_fini(struct dt_object *obj);
 
 struct txn_param {
@@ -325,4 +326,4 @@ int dt_txn_hook_stop(struct lu_context *ctx,
 int dt_txn_hook_commit(struct lu_context *ctx,
                        struct dt_device *dev, struct thandle *txn);
 
-#endif /* __LINUX_DT_OBJECT_H */
+#endif /* __LUSTRE_DT_OBJECT_H */
index 541975c..e3bf664 100644 (file)
  *   Top level header file for LProc SNMP
  *   Author: Hariharan Thantry thantry@users.sourceforge.net
  */
-#ifndef _LPROCFS_SNMP_H
-#define _LPROCFS_SNMP_H
+#ifndef _LINUX_LPROCFS_SNMP_H
+#define _LINUX_LPROCFS_SNMP_H
 
+#ifndef _LPROCFS_SNMP_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
 
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <linux/autoconf.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/version.h>
 #include <linux/smp.h>
 #include <libcfs/kp30.h>
 #  define kstatfs statfs
 #endif
 
-
-#undef LPROCFS
-#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS))
-# define LPROCFS
-#endif
-
-struct lprocfs_vars {
-        const char   *name;
-        read_proc_t *read_fptr;
-        write_proc_t *write_fptr;
-        void *data;
-};
-
-struct lprocfs_static_vars {
-        struct lprocfs_vars *module_vars;
-        struct lprocfs_vars *obd_vars;
-};
-
-/* An lprocfs counter can be configured using the enum bit masks below.
- *
- * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
- * protects this counter from concurrent updates. If not specified,
- * lprocfs an internal per-counter lock variable. External locks are
- * not used to protect counter increments, but are used to protect
- * counter readout and resets.
- *
- * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
- * (i.e. counter can be incremented by more than "1"). When specified,
- * the counter maintains min, max and sum in addition to a simple
- * invocation count. This allows averages to be be computed.
- * If not specified, the counter is an increment-by-1 counter.
- * min, max, sum, etc. are not maintained.
- *
- * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
- * squares (for multi-valued counter samples only). This allows
- * external computation of standard deviation, but involves a 64-bit
- * multiply per counter increment.
- */
-
-enum {
-        LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
-        LPROCFS_CNTR_AVGMINMAX    = 0x0002,
-        LPROCFS_CNTR_STDDEV       = 0x0004,
-
-        /* counter data type */
-        LPROCFS_TYPE_REGS         = 0x0100,
-        LPROCFS_TYPE_BYTES        = 0x0200,
-        LPROCFS_TYPE_PAGES        = 0x0400,
-        LPROCFS_TYPE_CYCLE        = 0x0800,
-};
-
-struct lprocfs_atomic {
-        atomic_t               la_entry;
-        atomic_t               la_exit;
-};
-
-struct lprocfs_counter {
-        struct lprocfs_atomic  lc_cntl;  /* may need to move to per set */
-        unsigned int           lc_config;
-        __u64                  lc_count;
-        __u64                  lc_sum;
-        __u64                  lc_min;
-        __u64                  lc_max;
-        __u64                  lc_sumsquare;
-        const char            *lc_name;   /* must be static */
-        const char            *lc_units;  /* must be static */
-};
-
-struct lprocfs_percpu {
-        struct lprocfs_counter lp_cntr[0];
-};
-
-
-struct lprocfs_stats {
-        unsigned int           ls_num;     /* # of counters */
-        unsigned int           ls_percpu_size;
-        struct lprocfs_percpu *ls_percpu[0];
-};
-
-
-/* class_obd.c */
-extern struct proc_dir_entry *proc_lustre_root;
-
-struct obd_device;
-struct file;
-struct obd_histogram;
-
-#ifdef LPROCFS
-
-/* Two optimized LPROCFS counter increment functions are provided:
- *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
- *     lprocfs_counter_add(cntr) - use for multi-valued counters
- * Counter data layout allows config flag, counter lock and the
- * count itself to reside within a single cache line.
- */
-
-static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
-                                       long amount)
-{
-        struct lprocfs_counter *percpu_cntr;
-
-        LASSERT(stats != NULL);
-        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
-        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
-        percpu_cntr->lc_count++;
-
-        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
-                percpu_cntr->lc_sum += amount;
-                if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV)
-                        percpu_cntr->lc_sumsquare += (__u64)amount * amount;
-                if (amount < percpu_cntr->lc_min)
-                        percpu_cntr->lc_min = amount;
-                if (amount > percpu_cntr->lc_max)
-                        percpu_cntr->lc_max = amount;
-        }
-        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
-}
-
-static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx)
-{
-        struct lprocfs_counter *percpu_cntr;
-
-        LASSERT(stats != NULL);
-        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
-        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
-        percpu_cntr->lc_count++;
-        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
-}
-
-extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num);
-extern void lprocfs_free_stats(struct lprocfs_stats *stats);
-extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-                                   unsigned int num_private_stats);
-extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
-                                 unsigned conf, const char *name,
-                                 const char *units);
-extern void lprocfs_free_obd_stats(struct obd_device *obddev);
-extern int lprocfs_register_stats(struct proc_dir_entry *root, const char *name,
-                                  struct lprocfs_stats *stats);
-
-#define LPROCFS_INIT_VARS(name, vclass, vinstance)           \
-void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)  \
-{                                                      \
-        x->module_vars = vclass;                       \
-        x->obd_vars = vinstance;                       \
-}                                                      \
-
-#define lprocfs_init_vars(NAME, VAR)     \
-do {      \
-        extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *);  \
-        lprocfs_##NAME##_init_vars(VAR);                                       \
-} while (0)
-/* lprocfs_status.c */
-extern int lprocfs_add_vars(struct proc_dir_entry *root,
-                            struct lprocfs_vars *var,
-                            void *data);
-
-extern struct proc_dir_entry *lprocfs_register(const char *name,
-                                               struct proc_dir_entry *parent,
-                                               struct lprocfs_vars *list,
-                                               void *data);
-
-extern void lprocfs_remove(struct proc_dir_entry *root);
-
-extern struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *root,
-                                           const char *name);
-
-extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
-extern int lprocfs_obd_cleanup(struct obd_device *obd);
-
-/* Generic callbacks */
-
-extern int lprocfs_rd_u64(char *page, char **start, off_t off,
-                          int count, int *eof, void *data);
-extern int lprocfs_rd_atomic(char *page, char **start, off_t off,
-                          int count, int *eof, void *data);
-extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
-                           int count, int *eof, void *data);
-extern int lprocfs_rd_name(char *page, char **start, off_t off,
-                           int count, int *eof, void *data);
-extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
-                             int count, int *eof, void *data);
-extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-                                  int count, int *eof, void *data);
-extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-                                int count, int *eof, void *data);
-extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-                                    int count, int *eof, void *data);
-extern int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-                                  int count, int *eof, void *data);
-extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-                              int count, int *eof, void *data);
-extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-                                   unsigned long count, void *data);
-extern int lprocfs_wr_ping(struct file *file, const char *buffer,
-                           unsigned long count, void *data);
-
-/* Statfs helpers */
-extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                              int count, int *eof, void *data);
-extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                                  int count, int *eof, void *data);
-extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data);
-extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data);
-extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data);
-extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                                int count, int *eof, void *data);
-extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data);
-
-extern int lprocfs_write_helper(const char *buffer, unsigned long count,
-                                int *val);
-extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
-                                    __u64 *val);
-int lprocfs_obd_seq_create(struct obd_device *dev, char *name, mode_t mode,
-                           struct file_operations *seq_fops, void *data);
-void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
-void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
-void lprocfs_oh_clear(struct obd_histogram *oh);
-unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
-
-/* lprocfs_status.c: counter read/write functions */
-extern int lprocfs_counter_read(char *page, char **start, off_t off,
-                                int count, int *eof, void *data);
-extern int lprocfs_counter_write(struct file *file, const char *buffer,
-                                 unsigned long count, void *data);
-
-/* lprocfs_status.c: recovery status */
-int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
-                                   int count, int *eof, void *data);
-#else
-/* LPROCFS is not defined */
-static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
-                                       int index, long amount) { return; }
-static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
-                                        int index) { return; }
-static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
-                                        int index, unsigned conf,
-                                        const char *name, const char *units)
-{ return; }
-
-static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num)
-{ return NULL; }
-static inline void lprocfs_free_stats(struct lprocfs_stats *stats)
-{ return; }
-
-static inline int lprocfs_register_stats(struct proc_dir_entry *root,
-                                            const char *name,
-                                            struct lprocfs_stats *stats)
-{ return 0; }
-static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
-                                             unsigned int num_private_stats)
-{ return 0; }
-static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
-{ return; }
-
-static inline struct proc_dir_entry *
-lprocfs_register(const char *name, struct proc_dir_entry *parent,
-                 struct lprocfs_vars *list, void *data) { return NULL; }
-#define LPROCFS_INIT_VARS(name, vclass, vinstance)
-#define lprocfs_init_vars(...) do {} while (0)
-static inline int lprocfs_add_vars(struct proc_dir_entry *root,
-                                   struct lprocfs_vars *var,
-                                   void *data) { return 0; }
-static inline void lprocfs_remove(struct proc_dir_entry *root) {};
-static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head,
-                                    const char *name) {return 0;}
-static inline int lprocfs_obd_setup(struct obd_device *dev,
-                                    struct lprocfs_vars *list) { return 0; }
-static inline int lprocfs_obd_cleanup(struct obd_device *dev)  { return 0; }
-static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
-                                 int count, int *eof, void *data) { return 0; }
-static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
-                                  int count, int *eof, void *data) { return 0; }
-static inline int lprocfs_rd_name(char *page, char **start, off_t off,
-                                  int count, int *eof, void *data) { return 0; }
-static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
-                                         int count, int *eof, void *data)
-{ return 0; }
-static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
-{ return 0; }
-static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
-                                           int count, int *eof, void *data)
-{ return 0; }
-
-static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off,
-                                         int count, int *eof, void *data)
-{ return 0; }
-static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
-                                     int count, int *eof, void *data)
-{ return 0; }
-static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
-                                          unsigned long count, void *data)
-{ return 0; }
-static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
-                                  unsigned long count, void *data)
-{ return 0; }
-
-
-/* Statfs helpers */
-static inline
-int lprocfs_rd_blksize(char *page, char **start, off_t off,
-                       int count, int *eof, void *data) { return 0; }
-static inline
-int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
-                           int count, int *eof, void *data) { return 0; }
-static inline
-int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
-                          int count, int *eof, void *data) { return 0; }
-static inline
-int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
-                           int count, int *eof, void *data) { return 0; }
-static inline
-int lprocfs_rd_filestotal(char *page, char **start, off_t off,
-                          int count, int *eof, void *data) { return 0; }
-static inline
-int lprocfs_rd_filesfree(char *page, char **start, off_t off,
-                         int count, int *eof, void *data)  { return 0; }
-static inline
-int lprocfs_rd_filegroups(char *page, char **start, off_t off,
-                          int count, int *eof, void *data) { return 0; }
-static inline
-void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {}
-static inline
-void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {}
-static inline
-void lprocfs_oh_clear(struct obd_histogram *oh) {}
-static inline
-unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; }
-static inline
-int lprocfs_counter_read(char *page, char **start, off_t off,
-                         int count, int *eof, void *data) { return 0; }
-static inline
-int lprocfs_counter_write(struct file *file, const char *buffer,
-                          unsigned long count, void *data) { return 0; }
-#endif /* LPROCFS */
-
 #endif /* LPROCFS_SNMP_H */
index a691224..7ae835c 100644 (file)
  *
  */
 
-#ifndef __LINUX_LU_OBJECT_H
-#define __LINUX_LU_OBJECT_H
+#ifndef __LUSTRE_LU_OBJECT_H
+#define __LUSTRE_LU_OBJECT_H
 
 /*
  * struct lu_fid
  */
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 
 #include <libcfs/list.h>
 #include <libcfs/kp30.h>
@@ -759,4 +759,4 @@ void lu_context_enter(struct lu_context *ctx);
 void lu_context_exit(struct lu_context *ctx);
 
 
-#endif /* __LINUX_LU_OBJECT_H */
+#endif /* __LUSTRE_LU_OBJECT_H */
index 5d804c8..066cc20 100644 (file)
@@ -20,8 +20,8 @@
  *
  */
 
-#ifndef _COMPAT25_H
-#define _COMPAT25_H
+#ifndef _LINUX_COMPAT25_H
+#define _LINUX_COMPAT25_H
 
 #ifdef __KERNEL__
 
 
 #include <libcfs/linux/portals_compat25.h>
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
+struct ll_iattr_struct {
+        struct iattr    iattr;
+        unsigned int    ia_attr_flags;
+};
+#else
+#define ll_iattr_struct iattr
+#endif
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
 #define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0)
 #define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0)
@@ -181,6 +190,7 @@ static inline int cleanup_group_info(void)
 #define ILOOKUP(sb, ino, test, data)        ilookup4(sb, ino, test, data);
 #define DCACHE_DISCONNECTED                 DCACHE_NFSD_DISCONNECTED
 #define ll_dev_t                            int
+#define old_encode_dev(dev)                 (dev)
 
 /* 2.5 uses hlists for some things, like the d_hash.  we'll treat them
  * as 2.5 and let macros drop back.. */
index 7d76d8d..db872a9 100644 (file)
  *
  */
 
-#ifndef _LUSTRE_DEBUG_H
-#define _LUSTRE_DEBUG_H
-
-#include <linux/lustre_net.h>
-
-#define ASSERT_MAX_SIZE_MB 60000ULL
-#define ASSERT_PAGE_INDEX(index, OP)                                    \
-do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)) {             \
-        CERROR("bad page index %lu > %Lu\n", index,                     \
-               ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT));                \
-        libcfs_debug = ~0UL;                                            \
-        OP;                                                             \
-}} while(0)
+#ifndef _LINUX_LUSTRE_DEBUG_H
+#define _LINUX_LUSTRE_DEBUG_H
 
-#define ASSERT_FILE_OFFSET(offset, OP)                                  \
-do { if (offset > ASSERT_MAX_SIZE_MB << 20) {                           \
-        CERROR("bad file offset %Lu > %Lu\n", offset,                   \
-               ASSERT_MAX_SIZE_MB << 20);                               \
-        libcfs_debug = ~0UL;                                            \
-        OP;                                                             \
-}} while(0)
+#ifndef _LUSTRE_DEBUG_H
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
 
 #ifdef __KERNEL__
 #define LL_CDEBUG_PAGE(mask, page, fmt, arg...)                               \
@@ -53,13 +38,4 @@ do { if (offset > ASSERT_MAX_SIZE_MB << 20) {                           \
                fmt, page, page->index, page_private(page), ## arg)
 #endif
 
-/* lib/debug.c */
-int dump_lniobuf(struct niobuf_local *lnb);
-int dump_rniobuf(struct niobuf_remote *rnb);
-int dump_ioo(struct obd_ioobj *nb);
-int dump_req(struct ptlrpc_request *req);
-int dump_obdo(struct obdo *oa);
-void dump_lsm(int level, struct lov_stripe_md *lsm);
-int block_debug_setup(void *addr, int len, __u64 off, __u64 id);
-int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id);
 #endif
index 8b198eb..a96c161 100644 (file)
  * vim:expandtab:shiftwidth=8:tabstop=8:
  */
 
+#ifndef _LINUX_LUSTRE_DLM_H__
+#define _LINUX_LUSTRE_DLM_H__
+
 #ifndef _LUSTRE_DLM_H__
-#define _LUSTRE_DLM_H__
+#error Do not #include this file directly. #include <lprocfs_status.h> instead
+#endif
 
 #ifdef __KERNEL__
 # include <linux/proc_fs.h>
 #endif
 
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_import.h>
-#include <linux/lustre_handles.h>
-#include <linux/lustre_export.h> /* for obd_export, for LDLM_DEBUG */
-
-struct obd_ops;
-struct obd_device;
-
-#define OBD_LDLM_DEVICENAME  "ldlm"
-
-#define LDLM_DEFAULT_LRU_SIZE (100 * smp_num_cpus)
-
-typedef enum {
-        ELDLM_OK = 0,
-
-        ELDLM_LOCK_CHANGED = 300,
-        ELDLM_LOCK_ABORTED = 301,
-        ELDLM_LOCK_REPLACED = 302,
-        ELDLM_NO_LOCK_DATA = 303,
-
-        ELDLM_NAMESPACE_EXISTS = 400,
-        ELDLM_BAD_NAMESPACE    = 401
-} ldlm_error_t;
-
-#define LDLM_NAMESPACE_SERVER 0
-#define LDLM_NAMESPACE_CLIENT 1
-
-#define LDLM_FL_LOCK_CHANGED   0x000001 /* extent, mode, or resource changed */
-
-/* If the server returns one of these flags, then the lock was put on that list.
- * If the client sends one of these flags (during recovery ONLY!), it wants the
- * lock added to the specified list, no questions asked. -p */
-#define LDLM_FL_BLOCK_GRANTED  0x000002
-#define LDLM_FL_BLOCK_CONV     0x000004
-#define LDLM_FL_BLOCK_WAIT     0x000008
-
-#define LDLM_FL_CBPENDING      0x000010 /* this lock is being destroyed */
-#define LDLM_FL_AST_SENT       0x000020 /* blocking or cancel packet was sent */
-#define LDLM_FL_WAIT_NOREPROC  0x000040 /* not a real flag, not saved in lock */
-#define LDLM_FL_CANCEL         0x000080 /* cancellation callback already run */
-
-/* Lock is being replayed.  This could probably be implied by the fact that one
- * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. */
-#define LDLM_FL_REPLAY         0x000100
-
-#define LDLM_FL_INTENT_ONLY    0x000200 /* don't grant lock, just do intent */
-#define LDLM_FL_LOCAL_ONLY     0x000400 /* see ldlm_cli_cancel_unused */
-
-/* don't run the cancel callback under ldlm_cli_cancel_unused */
-#define LDLM_FL_FAILED         0x000800
-
-#define LDLM_FL_HAS_INTENT     0x001000 /* lock request has intent */
-#define LDLM_FL_CANCELING      0x002000 /* lock cancel has already been sent */
-#define LDLM_FL_LOCAL          0x004000 /* local lock (ie, no srv/cli split) */
-#define LDLM_FL_WARN           0x008000 /* see ldlm_cli_cancel_unused */
-#define LDLM_FL_DISCARD_DATA   0x010000 /* discard (no writeback) on cancel */
-
-#define LDLM_FL_NO_TIMEOUT     0x020000 /* Blocked by group lock - wait
-                                         * indefinitely */
-
-/* file & record locking */
-#define LDLM_FL_BLOCK_NOWAIT   0x040000 // server told not to wait if blocked
-#define LDLM_FL_TEST_LOCK      0x080000 // return blocking lock
-
-/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that
- * the LVB filling happens _after_ the lock has been granted, so another thread
- * can match before the LVB has been updated.  As a dirty hack, we set
- * LDLM_FL_CAN_MATCH only after we've done the LVB poop.
- *
- * The proper fix is to do the granting inside of the completion AST, which can
- * be replaced with a LVB-aware wrapping function for OSC locks.  That change is
- * pretty high-risk, though, and would need a lot more testing. */
-#define LDLM_FL_CAN_MATCH      0x100000
-
-/* A lock contributes to the kms calculation until it has finished the part
- * of it's cancelation that performs write back on its dirty pages.  It
- * can remain on the granted list during this whole time.  Threads racing
- * to update the kms after performing their writeback need to know to
- * exclude each others locks from the calculation as they walk the granted
- * list. */
-#define LDLM_FL_KMS_IGNORE     0x200000
-
-/* Don't drop lock covering mmapped file in LRU */
-#define LDLM_FL_NO_LRU         0x400000
-
-/* Immediatelly cancel such locks when they block some other locks. Send
-   cancel notification to original lock holder, but expect no reply. */
-#define LDLM_FL_CANCEL_ON_BLOCK 0x800000
-
-/* Flags flags inherited from parent lock when doing intents. */
-#define LDLM_INHERIT_FLAGS     (LDLM_FL_CANCEL_ON_BLOCK)
-
-/* These are flags that are mapped into the flags and ASTs of blocking locks */
-#define LDLM_AST_DISCARD_DATA  0x80000000 /* Add FL_DISCARD to blocking ASTs */
-/* Flags sent in AST lock_flags to be mapped into the receiving lock. */
-#define LDLM_AST_FLAGS         (LDLM_FL_DISCARD_DATA)
-
-/* The blocking callback is overloaded to perform two functions.  These flags
- * indicate which operation should be performed. */
-#define LDLM_CB_BLOCKING    1
-#define LDLM_CB_CANCELING   2
-
-/* compatibility matrix */
-#define LCK_COMPAT_EX  LCK_NL
-#define LCK_COMPAT_PW  (LCK_COMPAT_EX | LCK_CR)
-#define LCK_COMPAT_PR  (LCK_COMPAT_PW | LCK_PR)
-#define LCK_COMPAT_CW  (LCK_COMPAT_PW | LCK_CW)
-#define LCK_COMPAT_CR  (LCK_COMPAT_CW | LCK_PR | LCK_PW)
-#define LCK_COMPAT_NL  (LCK_COMPAT_CR | LCK_EX)
-#define LCK_COMPAT_GROUP  (LCK_GROUP | LCK_NL)
-
-extern ldlm_mode_t lck_compat_array[];
-
-static inline void lockmode_verify(ldlm_mode_t mode)
-{
-       LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE);
-}
-
-static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new)
-{
-       return (lck_compat_array[exist] & new);
-}
-
-/*
- *
- * cluster name spaces
- *
- */
-
-#define DLM_OST_NAMESPACE 1
-#define DLM_MDS_NAMESPACE 2
-
-/* XXX
-   - do we just separate this by security domains and use a prefix for
-     multiple namespaces in the same domain?
-   -
-*/
-
-struct ldlm_lock;
-struct ldlm_resource;
-struct ldlm_namespace;
-
-typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
-                               void *req_cookie, ldlm_mode_t mode, int flags,
-                               void *data);
-
-struct ldlm_valblock_ops {
-        int (*lvbo_init)(struct ldlm_resource *res);
-        int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m,
-                           int buf_idx, int increase);
-};
-
-struct ldlm_namespace {
-        char                  *ns_name;
-        __u32                  ns_client; /* is this a client-side lock tree? */
-        struct list_head      *ns_hash; /* hash table for ns */
-        wait_queue_head_t      ns_refcount_waitq; /* for cleanup */
-        atomic_t               ns_refcount; /* count of resources in the hash */
-        struct list_head       ns_root_list; /* all root resources in ns */
-        struct lustre_lock     ns_lock; /* protects hash, refcount, list */
-        struct list_head       ns_list_chain; /* position in global NS list */
-
-        struct list_head       ns_unused_list; /* all root resources in ns */
-        int                    ns_nr_unused;
-        unsigned int           ns_max_unused;
-        unsigned long          ns_next_dump;   /* next debug dump, jiffies */
-
-        spinlock_t             ns_counter_lock;
-        __u64                  ns_locks;
-        ldlm_res_policy        ns_policy;
-        struct ldlm_valblock_ops *ns_lvbo;
-        void                    *ns_lvbp;
-};
-
-/*
- *
- * Resource hash table
- *
- */
-
-#define RES_HASH_BITS 10
-#define RES_HASH_SIZE (1UL << RES_HASH_BITS)
-#define RES_HASH_MASK (RES_HASH_SIZE - 1)
-
-struct ldlm_lock;
-
-typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
-                                      struct ldlm_lock_desc *new, void *data,
-                                      int flag);
-typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags,
-                                        void *data);
-typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
-
-struct ldlm_lock {
-        struct portals_handle l_handle; // must be first in the structure
-        atomic_t              l_refc;
-        struct ldlm_resource *l_resource;
-        struct ldlm_lock     *l_parent;
-        struct list_head      l_children;
-        struct list_head      l_childof;
-        struct list_head      l_lru;
-        struct list_head      l_res_link; // position in one of three res lists
-        struct list_head      l_export_chain; // per-export chain of locks
-
-        ldlm_mode_t           l_req_mode;
-        ldlm_mode_t           l_granted_mode;
-
-        ldlm_completion_callback l_completion_ast;
-        ldlm_blocking_callback   l_blocking_ast;
-        ldlm_glimpse_callback    l_glimpse_ast;
-
-        struct obd_export    *l_export;
-        struct obd_export    *l_conn_export;
-        __u32                 l_flags;
-        struct lustre_handle  l_remote_handle;
-        ldlm_policy_data_t    l_policy_data;
-
-        __u32                 l_readers;
-        __u32                 l_writers;
-        __u8                  l_destroyed;
-
-        /* If the lock is granted, a process sleeps on this waitq to learn when
-         * it's no longer in use.  If the lock is not granted, a process sleeps
-         * on this waitq to learn when it becomes granted. */
-        wait_queue_head_t     l_waitq;
-        struct timeval        l_enqueued_time;
-
-        unsigned long         l_last_used;      /* jiffies */
-        struct ldlm_extent    l_req_extent;
-
-        /* Client-side-only members */
-        __u32                 l_lvb_len;        /* temporary storage for */
-        void                 *l_lvb_data;       /* an LVB received during */
-        void                 *l_lvb_swabber;    /* an enqueue */
-        void                 *l_ast_data;
-
-        /* Server-side-only members */
-        struct list_head      l_pending_chain;  /* callbacks pending */
-        unsigned long         l_callback_timeout; /* jiffies */
-
-        __u32                 l_pid;            /* pid which created this lock */
-};
-
-struct ldlm_resource {
-        struct ldlm_namespace *lr_namespace;
-        struct list_head       lr_hash;
-        struct ldlm_resource  *lr_parent;   /* 0 for a root resource */
-        struct list_head       lr_children; /* list head for child resources */
-        struct list_head       lr_childof;  /* part of ns_root_list if root res,
-                                             * part of lr_children if child */
-
-        struct list_head       lr_granted;
-        struct list_head       lr_converting;
-        struct list_head       lr_waiting;
-        ldlm_mode_t            lr_most_restr;
-        ldlm_type_t            lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK} */
-        struct ldlm_resource  *lr_root;
-        struct ldlm_res_id     lr_name;
-        atomic_t               lr_refcount;
-
-        /* Server-side-only lock value block elements */
-        struct semaphore       lr_lvb_sem;
-        __u32                  lr_lvb_len;
-        void                  *lr_lvb_data;
-
-        /* lr_tmp holds a list head temporarily, during the building of a work
-         * queue.  see ldlm_add_ast_work_item and ldlm_run_ast_work */
-        void                  *lr_tmp;
-};
-
-struct ldlm_ast_work {
-        struct ldlm_lock *w_lock;
-        int               w_blocking;
-        struct ldlm_lock_desc w_desc;
-        struct list_head   w_list;
-        int w_flags;
-        void *w_data;
-        int w_datalen;
-};
-
-extern struct obd_ops ldlm_obd_ops;
-
-extern char *ldlm_lockname[];
-extern char *ldlm_typename[];
-extern char *ldlm_it2str(int it);
-
-#define __LDLM_DEBUG(level, lock, format, a...)                               \
-do {                                                                          \
-        if (lock->l_resource == NULL) {                                       \
-                CDEBUG(level, "### " format                                   \
-                       " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
-                       "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "   \
-                       LPX64" expref: %d pid: %u\n" , ## a, lock,             \
-                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        if (lock->l_resource->lr_type == LDLM_EXTENT) {                       \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
-                       "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64     \
-                       " expref: %d pid: %u\n" , ## a,                        \
-                       lock->l_resource->lr_namespace->ns_name, lock,         \
-                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_policy_data.l_extent.start,                    \
-                       lock->l_policy_data.l_extent.end,                      \
-                       lock->l_req_extent.start, lock->l_req_extent.end,      \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        if (lock->l_resource->lr_type == LDLM_FLOCK) {                        \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "       \
-                       "["LPU64"->"LPU64"] flags: %x remote: "LPX64           \
-                       " expref: %d pid: %u\n" , ## a,                        \
-                       lock->l_resource->lr_namespace->ns_name, lock,         \
-                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_policy_data.l_flock.pid,                       \
-                       lock->l_policy_data.l_flock.start,                     \
-                       lock->l_policy_data.l_flock.end,                       \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        if (lock->l_resource->lr_type == LDLM_IBITS) {                        \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "  \
-                       "flags: %x remote: "LPX64" expref: %d "                \
-                       "pid %u\n" , ## a,                                     \
-                       lock->l_resource->lr_namespace->ns_name,               \
-                       lock, lock->l_handle.h_cookie,                         \
-                       atomic_read (&lock->l_refc),                           \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       lock->l_policy_data.l_inodebits.bits,                  \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-                break;                                                        \
-        }                                                                     \
-        {                                                                     \
-                CDEBUG(level, "### " format                                   \
-                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
-                       "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "     \
-                       "remote: "LPX64" expref: %d pid: %u\n" , ## a,         \
-                       lock->l_resource->lr_namespace->ns_name,               \
-                       lock, lock->l_handle.h_cookie,                         \
-                       atomic_read (&lock->l_refc),                           \
-                       lock->l_readers, lock->l_writers,                      \
-                       ldlm_lockname[lock->l_granted_mode],                   \
-                       ldlm_lockname[lock->l_req_mode],                       \
-                       lock->l_resource->lr_name.name[0],                     \
-                       lock->l_resource->lr_name.name[1],                     \
-                       atomic_read(&lock->l_resource->lr_refcount),           \
-                       ldlm_typename[lock->l_resource->lr_type],              \
-                       lock->l_flags, lock->l_remote_handle.cookie,           \
-                       lock->l_export ?                                       \
-                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
-                       lock->l_pid);                                          \
-        }                                                                     \
-} while (0)
-
-#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \
-                                                    format, ## a)
-#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a)
-
-#define LDLM_DEBUG_NOLOCK(format, a...)                 \
-        CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
-
-typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, int *flags,
-                                      int first_enq, ldlm_error_t *err);
-
-/*
- * Iterators.
- */
-
-#define LDLM_ITER_CONTINUE 1 /* keep iterating */
-#define LDLM_ITER_STOP     2 /* stop iterating */
-
-typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
-typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
-
-int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
-                          void *closure);
-int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
-                           void *closure);
-int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
-                               ldlm_res_iterator_t iter, void *closure);
-
-int ldlm_replay_locks(struct obd_import *imp);
-void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
-                        ldlm_iterator_t iter, void *data);
-
-/* ldlm_flock.c */
-int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data);
-
-/* ldlm_extent.c */
-__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms);
-
-struct ldlm_callback_suite {
-        ldlm_completion_callback lcs_completion;
-        ldlm_blocking_callback   lcs_blocking;
-        ldlm_glimpse_callback    lcs_glimpse;
-};
-
-/* ldlm_lockd.c */
-int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
-                             void *data, int flag);
-int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data);
-int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data);
-int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback,
-                        ldlm_blocking_callback, ldlm_glimpse_callback);
-int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req,
-                         struct ldlm_request *dlm_req,
-                         struct ldlm_callback_suite *cbs);
-int ldlm_handle_convert(struct ptlrpc_request *req);
-int ldlm_handle_convert0(struct ptlrpc_request *req,
-                         struct ldlm_request *dlm_req);
-int ldlm_handle_cancel(struct ptlrpc_request *req);
-int ldlm_del_waiting_lock(struct ldlm_lock *lock);
-int ldlm_get_ref(void);
-void ldlm_put_ref(int force);
-
-/* ldlm_lock.c */
-ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res);
-void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg);
-void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh);
-struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags);
-void ldlm_cancel_callback(struct ldlm_lock *);
-int ldlm_lock_set_data(struct lustre_handle *, void *data);
-void ldlm_lock_remove_from_lru(struct ldlm_lock *);
-struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *,
-                                      struct lustre_handle *);
-
-static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h)
-{
-        return __ldlm_handle2lock(h, 0);
-}
-
-#define LDLM_LOCK_PUT(lock)                     \
-do {                                            \
-        /*LDLM_DEBUG((lock), "put");*/          \
-        ldlm_lock_put(lock);                    \
-} while (0)
-
-#define LDLM_LOCK_GET(lock)                     \
-({                                              \
-        ldlm_lock_get(lock);                    \
-        /*LDLM_DEBUG((lock), "get");*/          \
-        lock;                                   \
-})
-
-struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
-void ldlm_lock_put(struct ldlm_lock *lock);
-void ldlm_lock_destroy(struct ldlm_lock *lock);
-void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
-void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
-void ldlm_lock_allow_match(struct ldlm_lock *lock);
-int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *,
-                    ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode,
-                    struct lustre_handle *);
-struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
-                                        int *flags);
-void ldlm_lock_cancel(struct ldlm_lock *lock);
-void ldlm_cancel_locks_for_export(struct obd_export *export);
-void ldlm_reprocess_all(struct ldlm_resource *res);
-void ldlm_reprocess_all_ns(struct ldlm_namespace *ns);
-void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos);
-void ldlm_lock_dump_handle(int level, struct lustre_handle *);
-
-/* resource.c */
-struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 local);
-int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags);
-int ldlm_namespace_free(struct ldlm_namespace *ns, int force);
-int ldlm_proc_setup(void);
-#ifdef LPROCFS
-void ldlm_proc_cleanup(void);
-#else
-static inline void ldlm_proc_cleanup(void) {}
-#endif
-
-/* resource.c - internal */
-struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
-                                        struct ldlm_resource *parent,
-                                        struct ldlm_res_id, ldlm_type_t type,
-                                        int create);
-struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res);
-int ldlm_resource_putref(struct ldlm_resource *res);
-void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
-                            struct ldlm_lock *lock);
-void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
-void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
-void ldlm_dump_all_namespaces(int level);
-void ldlm_namespace_dump(int level, struct ldlm_namespace *);
-void ldlm_resource_dump(int level, struct ldlm_resource *);
-int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
-                              struct ldlm_res_id);
-
-/* ldlm_request.c */
-int ldlm_expired_completion_wait(void *data);
-int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                      void *data, int flag);
-int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp);
-int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data);
-int ldlm_cli_enqueue(struct obd_export *exp,
-                     struct ptlrpc_request *req,
-                     struct ldlm_namespace *ns,
-                     struct ldlm_res_id,
-                     ldlm_type_t type,
-                     ldlm_policy_data_t *,
-                     ldlm_mode_t mode,
-                     int *flags,
-                     ldlm_blocking_callback blocking,
-                     ldlm_completion_callback completion,
-                     ldlm_glimpse_callback glimpse,
-                     void *data,
-                     void *lvb,
-                     __u32 lvb_len,
-                     void *lvb_swabber,
-                     struct lustre_handle *lockh);
-int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
-                    void *data, __u32 data_len);
-int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags);
-int ldlm_cli_cancel(struct lustre_handle *lockh);
-int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
-                           int flags, void *opaque);
-int ldlm_cli_join_lru(struct ldlm_namespace *, struct ldlm_res_id *,
-                      int join);
-
-/* mds/handler.c */
-/* This has to be here because recursive inclusion sucks. */
-int intent_disposition(struct ldlm_reply *rep, int flag);
-void intent_set_disposition(struct ldlm_reply *rep, int flag);
-
-
-/* ioctls for trying requests */
-#define IOC_LDLM_TYPE                   'f'
-#define IOC_LDLM_MIN_NR                 40
-
-#define IOC_LDLM_TEST                   _IOWR('f', 40, long)
-#define IOC_LDLM_DUMP                   _IOWR('f', 41, long)
-#define IOC_LDLM_REGRESS_START          _IOWR('f', 42, long)
-#define IOC_LDLM_REGRESS_STOP           _IOWR('f', 43, long)
-#define IOC_LDLM_MAX_NR                 43
-
 #endif
index bc831e5..84e9af9 100644 (file)
  *
  */
 
+#ifndef _LINUX_LUSTRE_FSFILT_H
+#define _LINUX_LUSTRE_FSFILT_H
+
 #ifndef _LUSTRE_FSFILT_H
-#define _LUSTRE_FSFILT_H
+#error Do not #include this file directly. #include <lustre_fsfilt.h> instead
+#endif
 
 #ifdef __KERNEL__
 
-#include <linux/obd.h>
-#include <linux/obd_class.h>
+#include <obd.h>
+#include <obd_class.h>
 
 typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd,
                             void *data, int error);
index f644cf1..21eb047 100644 (file)
@@ -1,39 +1,15 @@
-#ifndef __LINUX_HANDLES_H_
-#define __LINUX_HANDLES_H_
+#ifndef __LINUX_LUSTRE_HANDLES_H_
+#define __LINUX_LUSTRE_HANDLES_H_
+
+#ifndef __LUSTRE_HANDLES_H_
+#error Do not #include this file directly. #include <lustre_handles.h> instead
+#endif
 
 #ifdef __KERNEL__
 #include <asm/types.h>
 #include <asm/atomic.h>
 #include <linux/list.h>
+#include <linux/random.h>
 #endif
 
-typedef void (*portals_handle_addref_cb)(void *object);
-
-/* These handles are most easily used by having them appear at the very top of
- * whatever object that you want to make handles for.  ie:
- *
- * struct ldlm_lock {
- *         struct portals_handle handle;
- *         ...
- * };
- *
- * Now you're able to assign the results of cookie2handle directly to an
- * ldlm_lock.  If it's not at the top, you'll want to hack up a macro that
- * uses some offsetof() magic. */
-
-struct portals_handle {
-        struct list_head h_link;
-        __u64 h_cookie;
-        portals_handle_addref_cb h_addref;
-};
-
-/* handles.c */
-
-/* Add a handle to the hash table */
-void class_handle_hash(struct portals_handle *, portals_handle_addref_cb);
-void class_handle_unhash(struct portals_handle *);
-void *class_handle2object(__u64 cookie);
-int class_handle_init(void);
-void class_handle_cleanup(void);
-
 #endif
index 730d1b0..e5be7d6 100644 (file)
  *
  */
 
+#ifndef _LINUX_LUSTRE_LIB_H
+#define _LINUX_LUSTRE_LIB_H
+
 #ifndef _LUSTRE_LIB_H
-#define _LUSTRE_LIB_H
+#error Do not #include this file directly. #include <lustre_lib.h> instead
+#endif
 
 #ifndef __KERNEL__
 # include <string.h>
@@ -35,9 +39,7 @@
 # include <linux/signal.h>
 # include <linux/types.h>
 #endif
-#include <libcfs/kp30.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_cfg.h>
+#include <linux/lustre_compat25.h>
 
 #ifndef LP_POISON
 #if BITS_PER_LONG > 32
 #endif
 #endif
 
-/* prng.c */
-unsigned int ll_rand(void);        /* returns a random 32-bit integer */
-void ll_srand(unsigned int, unsigned int);     /* seed the generator */
-
-/* target.c */
-struct ptlrpc_request;
-struct recovd_data;
-struct recovd_obd;
-struct obd_export;
-#include <linux/lustre_ha.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_compat25.h>
-#include <linux/lvfs.h>
-
-int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler);
-int target_handle_disconnect(struct ptlrpc_request *req);
-void target_destroy_export(struct obd_export *exp);
-int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
-                            struct obd_uuid *cluuid);
-int target_handle_ping(struct ptlrpc_request *req);
-void target_committed_to_req(struct ptlrpc_request *req);
-
-#ifdef HAVE_QUOTA_SUPPORT
-/* quotacheck callback, dqacq/dqrel callback handler */
-int target_handle_qc_callback(struct ptlrpc_request *req);
-int target_handle_dqacq_callback(struct ptlrpc_request *req);
-#else
-#define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP)
-#define target_handle_qc_callback(req) (0)
-#endif
-
-void target_cancel_recovery_timer(struct obd_device *obd);
-
-#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
-void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler);
-void target_abort_recovery(void *data);
-void target_cleanup_recovery(struct obd_device *obd);
-int target_queue_recovery_request(struct ptlrpc_request *req,
-                                  struct obd_device *obd);
-int target_queue_final_reply(struct ptlrpc_request *req, int rc);
-void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
-
-/* client.c */
-
-int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
-struct client_obd *client_conn2cli(struct lustre_handle *conn);
-
-struct mdc_open_data;
-struct obd_client_handle {
-        struct lustre_handle och_fh;
-        struct llog_cookie och_cookie;
-        struct mdc_open_data *och_mod;
-        __u32 och_magic;
-};
-#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
-
-/* statfs_pack.c */
-struct obd_statfs;
-struct kstatfs;
-void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
-void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
-
-/* l_lock.c */
-struct lustre_lock {
-        int l_depth;
-        struct task_struct *l_owner;
-        struct semaphore l_sem;
-        spinlock_t l_spin;
-};
-
-void l_lock_init(struct lustre_lock *);
-void l_lock(struct lustre_lock *);
-void l_unlock(struct lustre_lock *);
-int l_has_lock(struct lustre_lock *);
-
-
-/*
- *   OBD IOCTLS
- */
-#define OBD_IOCTL_VERSION 0x00010004
-
-struct obd_ioctl_data {
-        uint32_t ioc_len;
-        uint32_t ioc_version;
-
-        uint64_t ioc_cookie;
-        uint32_t ioc_conn1;
-        uint32_t ioc_conn2;
-
-        struct obdo ioc_obdo1;
-        struct obdo ioc_obdo2;
-
-        obd_size         ioc_count;
-        obd_off          ioc_offset;
-        uint32_t         ioc_dev;
-        uint32_t         ioc_command;
-
-        uint64_t ioc_nid;
-        uint32_t ioc_nal;
-        uint32_t ioc_type;
-
-        /* buffers the kernel will treat as user pointers */
-        uint32_t ioc_plen1;
-        char    *ioc_pbuf1;
-        uint32_t ioc_plen2;
-        char    *ioc_pbuf2;
-
-        /* inline buffers for various arguments */
-        uint32_t ioc_inllen1;
-        char    *ioc_inlbuf1;
-        uint32_t ioc_inllen2;
-        char    *ioc_inlbuf2;
-        uint32_t ioc_inllen3;
-        char    *ioc_inlbuf3;
-        uint32_t ioc_inllen4;
-        char    *ioc_inlbuf4;
-
-        char    ioc_bulk[0];
-};
-
-struct obd_ioctl_hdr {
-        uint32_t ioc_len;
-        uint32_t ioc_version;
-};
-
-static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
-{
-        int len = size_round(sizeof(struct obd_ioctl_data));
-        len += size_round(data->ioc_inllen1);
-        len += size_round(data->ioc_inllen2);
-        len += size_round(data->ioc_inllen3);
-        len += size_round(data->ioc_inllen4);
-        return len;
-}
-
-
-static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
-{
-        if (data->ioc_len > (1<<30)) {
-                CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inllen1 > (1<<30)) {
-                CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inllen2 > (1<<30)) {
-                CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inllen3 > (1<<30)) {
-                CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inllen4 > (1<<30)) {
-                CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-                CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-                CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
-                CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
-                CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_pbuf1 && !data->ioc_plen1) {
-                CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_pbuf2 && !data->ioc_plen2) {
-                CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_plen1 && !data->ioc_pbuf1) {
-                CERROR("OBD ioctl: plen1 set but NULL pointer\n");
-                return 1;
-        }
-        if (data->ioc_plen2 && !data->ioc_pbuf2) {
-                CERROR("OBD ioctl: plen2 set but NULL pointer\n");
-                return 1;
-        }
-        if (obd_ioctl_packlen(data) > data->ioc_len) {
-                CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
-                       obd_ioctl_packlen(data), data->ioc_len);
-                return 1;
-        }
-        return 0;
-}
-
-#ifndef __KERNEL__
-static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
-                                 int max)
-{
-        char *ptr;
-        struct obd_ioctl_data *overlay;
-        data->ioc_len = obd_ioctl_packlen(data);
-        data->ioc_version = OBD_IOCTL_VERSION;
-
-        if (*pbuf && data->ioc_len > max)
-                return 1;
-        if (*pbuf == NULL) {
-                *pbuf = malloc(data->ioc_len);
-        }
-        if (!*pbuf)
-                return 1;
-        overlay = (struct obd_ioctl_data *)*pbuf;
-        memcpy(*pbuf, data, sizeof(*data));
-
-        ptr = overlay->ioc_bulk;
-        if (data->ioc_inlbuf1)
-                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
-        if (data->ioc_inlbuf2)
-                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
-        if (data->ioc_inlbuf3)
-                LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
-        if (data->ioc_inlbuf4)
-                LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
-        if (obd_ioctl_is_invalid(overlay))
-                return 1;
-
-        return 0;
-}
-
-static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
-                                   int max)
-{
-        char *ptr;
-        struct obd_ioctl_data *overlay;
-
-        if (!pbuf)
-                return 1;
-        overlay = (struct obd_ioctl_data *)pbuf;
-
-        /* Preserve the caller's buffer pointers */
-        overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
-        overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
-        overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
-        overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
-
-        memcpy(data, pbuf, sizeof(*data));
-
-        ptr = overlay->ioc_bulk;
-        if (data->ioc_inlbuf1)
-                LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
-        if (data->ioc_inlbuf2)
-                LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
-        if (data->ioc_inlbuf3)
-                LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
-        if (data->ioc_inlbuf4)
-                LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
-
-        return 0;
-}
-#endif
-
-#include <linux/obd_support.h>
-
-/* buffer MUST be at least the size of obd_ioctl_hdr */
-static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
-{
-        struct obd_ioctl_hdr hdr;
-        struct obd_ioctl_data *data;
-        int err;
-        int offset = 0;
-        ENTRY;
-
-        err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
-        if (err)
-                RETURN(err);
-
-        if (hdr.ioc_version != OBD_IOCTL_VERSION) {
-                CERROR("Version mismatch kernel vs application\n");
-                RETURN(-EINVAL);
-        }
-
-        if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
-                CERROR("User buffer len %d exceeds %d max buffer\n",
-                       hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
-                RETURN(-EINVAL);
-        }
-
-        if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
-                CERROR("user buffer too small for ioctl (%d)\n", hdr.ioc_len);
-                RETURN(-EINVAL);
-        }
-
-        /* XXX allocate this more intelligently, using kmalloc when
-         * appropriate */
-        OBD_VMALLOC(*buf, hdr.ioc_len);
-        if (*buf == NULL) {
-                CERROR("Cannot allocate control buffer of len %d\n",
-                       hdr.ioc_len);
-                RETURN(-EINVAL);
-        }
-        *len = hdr.ioc_len;
-        data = (struct obd_ioctl_data *)*buf;
-
-        err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
-        if (err) {
-                OBD_VFREE(*buf, hdr.ioc_len);
-                RETURN(err);
-        }
-
-        if (obd_ioctl_is_invalid(data)) {
-                CERROR("ioctl not correctly formatted\n");
-                OBD_VFREE(*buf, hdr.ioc_len);
-                RETURN(-EINVAL);
-        }
-
-        if (data->ioc_inllen1) {
-                data->ioc_inlbuf1 = &data->ioc_bulk[0];
-                offset += size_round(data->ioc_inllen1);
-        }
-
-        if (data->ioc_inllen2) {
-                data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
-                offset += size_round(data->ioc_inllen2);
-        }
-
-        if (data->ioc_inllen3) {
-                data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
-                offset += size_round(data->ioc_inllen3);
-        }
-
-        if (data->ioc_inllen4) {
-                data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
-        }
-
-        RETURN(0);
-}
-
-static inline void obd_ioctl_freedata(char *buf, int len)
-{
-        ENTRY;
-
-        OBD_VFREE(buf, len);
-        EXIT;
-        return;
-}
-
-#define OBD_IOC_CREATE                 _IOR ('f', 101, long)
-#define OBD_IOC_DESTROY                _IOW ('f', 104, long)
-#define OBD_IOC_PREALLOCATE            _IOWR('f', 105, long)
-
-#define OBD_IOC_SETATTR                _IOW ('f', 107, long)
-#define OBD_IOC_GETATTR                _IOR ('f', 108, long)
-#define OBD_IOC_READ                   _IOWR('f', 109, long)
-#define OBD_IOC_WRITE                  _IOWR('f', 110, long)
-
-
-#define OBD_IOC_STATFS                 _IOWR('f', 113, long)
-#define OBD_IOC_SYNC                   _IOW ('f', 114, long)
-#define OBD_IOC_READ2                  _IOWR('f', 115, long)
-#define OBD_IOC_FORMAT                 _IOWR('f', 116, long)
-#define OBD_IOC_PARTITION              _IOWR('f', 117, long)
-#define OBD_IOC_COPY                   _IOWR('f', 120, long)
-#define OBD_IOC_MIGR                   _IOWR('f', 121, long)
-#define OBD_IOC_PUNCH                  _IOWR('f', 122, long)
-
-#define OBD_IOC_MODULE_DEBUG           _IOWR('f', 124, long)
-#define OBD_IOC_BRW_READ               _IOWR('f', 125, long)
-#define OBD_IOC_BRW_WRITE              _IOWR('f', 126, long)
-#define OBD_IOC_NAME2DEV               _IOWR('f', 127, long)
-#define OBD_IOC_UUID2DEV               _IOWR('f', 130, long)
-#define OBD_IOC_GETNAME                _IOR ('f', 131, long)
-
-#define OBD_IOC_LOV_GET_CONFIG         _IOWR('f', 132, long)
-#define OBD_IOC_CLIENT_RECOVER         _IOW ('f', 133, long)
-
-#define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139      )
-#define OBD_IOC_NO_TRANSNO             _IOW ('f', 140, long)
-#define OBD_IOC_SET_READONLY           _IOW ('f', 141, long)
-#define OBD_IOC_ABORT_RECOVERY         _IOR ('f', 142, long)
-
-#define OBD_GET_VERSION                _IOWR ('f', 144, long)
-
-#define OBD_IOC_CLOSE_UUID             _IOWR ('f', 147, long)
-
-#define OBD_IOC_LOV_SETSTRIPE          _IOW ('f', 154, long)
-#define OBD_IOC_LOV_GETSTRIPE          _IOW ('f', 155, long)
-#define OBD_IOC_LOV_SETEA              _IOW ('f', 156, long)
-
-#define OBD_IOC_QUOTACHECK             _IOW ('f', 160, int)
-#define OBD_IOC_POLL_QUOTACHECK        _IOR ('f', 161, struct if_quotacheck *)
-#define OBD_IOC_QUOTACTL               _IOWR('f', 162, struct if_quotactl *)
-
-#define OBD_IOC_MOUNTOPT               _IOWR('f', 170, long)
-
-#define OBD_IOC_RECORD                 _IOWR('f', 180, long)
-#define OBD_IOC_ENDRECORD              _IOWR('f', 181, long)
-#define OBD_IOC_PARSE                  _IOWR('f', 182, long)
-#define OBD_IOC_DORECORD               _IOWR('f', 183, long)
-#define OBD_IOC_PROCESS_CFG            _IOWR('f', 184, long)
-#define OBD_IOC_DUMP_LOG               _IOWR('f', 185, long)
-#define OBD_IOC_CLEAR_LOG              _IOWR('f', 186, long)
-#define OBD_IOC_PARAM                  _IOW ('f', 187, long)
-
-#define OBD_IOC_CATLOGLIST             _IOWR('f', 190, long)
-#define OBD_IOC_LLOG_INFO              _IOWR('f', 191, long)
-#define OBD_IOC_LLOG_PRINT             _IOWR('f', 192, long)
-#define OBD_IOC_LLOG_CANCEL            _IOWR('f', 193, long)
-#define OBD_IOC_LLOG_REMOVE            _IOWR('f', 194, long)
-#define OBD_IOC_LLOG_CHECK             _IOWR('f', 195, long)
-#define OBD_IOC_LLOG_CATINFO           _IOWR('f', 196, long)
-
-#define ECHO_IOC_GET_STRIPE            _IOWR('f', 200, long)
-#define ECHO_IOC_SET_STRIPE            _IOWR('f', 201, long)
-#define ECHO_IOC_ENQUEUE               _IOWR('f', 202, long)
-#define ECHO_IOC_CANCEL                _IOWR('f', 203, long)
-
-/* XXX _IOWR('f', 250, long) has been defined in
- * lnet/include/libcfs/kp30.h for debug, don't use it
- */
-
-/* Until such time as we get_info the per-stripe maximum from the OST,
- * we define this to be 2T - 4k, which is the ext3 maxbytes. */
-#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
-
-#define POISON_BULK 0
-
-/*
- * l_wait_event is a flexible sleeping function, permitting simple caller
- * configuration of interrupt and timeout sensitivity along with actions to
- * be performed in the event of either exception.
- *
- * The first form of usage looks like this:
- *
- * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
- *                                           intr_handler, callback_data);
- * rc = l_wait_event(waitq, condition, &lwi);
- *
- * l_wait_event() makes the current process wait on 'waitq' until 'condition'
- * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending.  It
- * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before
- * 'condition' becomes true, it optionally calls the specified 'intr_handler'
- * if not NULL, and returns -EINTR.
- *
- * If a non-zero timeout is specified, signals are ignored until the timeout
- * has expired.  At this time, if 'timeout_handler' is not NULL it is called.
- * If it returns FALSE l_wait_event() continues to wait as described above with
- * signals enabled.  Otherwise it returns -ETIMEDOUT.
- *
- * LWI_INTR(intr_handler, callback_data) is shorthand for
- * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data)
- *
- * The second form of usage looks like this:
- *
- * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler);
- * rc = l_wait_event(waitq, condition, &lwi);
- *
- * This form is the same as the first except that it COMPLETELY IGNORES
- * SIGNALS.  The caller must therefore beware that if 'timeout' is zero, or if
- * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that
- * can unblock the current process is 'condition' becoming TRUE.
- *
- * Another form of usage is:
- * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval,
- *                                               timeout_handler);
- * rc = l_wait_event(waitq, condition, &lwi);
- * This is the same as previous case, but condition is checked once every
- * 'interval' jiffies (if non-zero).
- *
- * Subtle synchronization point: this macro does *not* necessary takes
- * wait-queue spin-lock before returning, and, hence, following idiom is safe
- * ONLY when caller provides some external locking:
- *
- *             Thread1                            Thread2
- *
- *   l_wait_event(&obj->wq, ....);                                       (1)
- *
- *                                    wake_up(&obj->wq):                 (2)
- *                                         spin_lock(&q->lock);          (2.1)
- *                                         __wake_up_common(q, ...);     (2.2)
- *                                         spin_unlock(&q->lock, flags); (2.3)
- *
- *   OBD_FREE_PTR(obj);                                                  (3)
- *
- * As l_wait_event() may "short-cut" execution and return without taking
- * wait-queue spin-lock, some additional synchronization is necessary to
- * guarantee that step (3) can begin only after (2.3) finishes.
- *
- * XXX nikita: some ptlrpc daemon threads have races of that sort.
- *
- */
-
-#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1))
-
-struct l_wait_info {
-        long   lwi_timeout;
-        long   lwi_interval;
-        int  (*lwi_on_timeout)(void *);
-        void (*lwi_on_signal)(void *);
-        void  *lwi_cb_data;
-};
-
-/* NB: LWI_TIMEOUT ignores signals completely */
-#define LWI_TIMEOUT(time, cb, data)             \
-((struct l_wait_info) {                         \
-        .lwi_timeout    = time,                 \
-        .lwi_on_timeout = cb,                   \
-        .lwi_cb_data    = data,                 \
-        .lwi_interval   = 0                     \
-})
-
-#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data)  \
-((struct l_wait_info) {                                 \
-        .lwi_timeout    = time,                         \
-        .lwi_on_timeout = cb,                           \
-        .lwi_cb_data    = data,                         \
-        .lwi_interval   = interval                      \
-})
-
-
-#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data)                          \
-((struct l_wait_info) {                                                        \
-        .lwi_timeout    = time,                                                \
-        .lwi_on_timeout = time_cb,                                             \
-        .lwi_on_signal = (sig_cb == NULL) ? LWI_ON_SIGNAL_NOOP : sig_cb,       \
-        .lwi_cb_data    = data,                                                \
-        .lwi_interval    = 0                                                   \
-})
-
-#define LWI_INTR(cb, data)  LWI_TIMEOUT_INTR(0, NULL, cb, data)
+#define OBD_IOC_DATA_TYPE               long
 
 #define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) |                \
                            sigmask(SIGTERM) | sigmask(SIGQUIT) |               \
@@ -602,152 +73,7 @@ static inline sigset_t l_w_e_set_sigs(int sigs)
 
         return old;
 }
-
-/*
- * wait for @condition to become true, but no longer than timeout, specified
- * by @info.
- */
-#define __l_wait_event(wq, condition, info, ret, excl)                         \
-do {                                                                           \
-        wait_queue_t  __wait;                                                  \
-        unsigned long __timeout = info->lwi_timeout;                           \
-        unsigned long __irqflags;                                              \
-        sigset_t      __blocked;                                               \
-                                                                               \
-        ret = 0;                                                               \
-        if (condition)                                                         \
-                break;                                                         \
-                                                                               \
-        init_waitqueue_entry(&__wait, current);                                \
-        if (excl)                                                              \
-                add_wait_queue_exclusive(&wq, &__wait);                        \
-        else                                                                   \
-                add_wait_queue(&wq, &__wait);                                  \
-                                                                               \
-        /* Block all signals (just the non-fatal ones if no timeout). */       \
-        if (info->lwi_on_signal != NULL && __timeout == 0)                     \
-                __blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS);                 \
-        else                                                                   \
-                __blocked = l_w_e_set_sigs(0);                                 \
-                                                                               \
-        for (;;) {                                                             \
-                set_current_state(TASK_INTERRUPTIBLE);                         \
-                                                                               \
-                if (condition)                                                 \
-                        break;                                                 \
-                                                                               \
-                if (__timeout == 0) {                                          \
-                        schedule();                                            \
-                } else {                                                       \
-                        unsigned long interval = info->lwi_interval?           \
-                                             min_t(unsigned long,              \
-                                                 info->lwi_interval,__timeout):\
-                                             __timeout;                        \
-                        __timeout -= interval - schedule_timeout(interval);    \
-                        if (__timeout == 0) {                                  \
-                                if (info->lwi_on_timeout == NULL ||            \
-                                    info->lwi_on_timeout(info->lwi_cb_data)) { \
-                                        ret = -ETIMEDOUT;                      \
-                                        break;                                 \
-                                }                                              \
-                                /* Take signals after the timeout expires. */  \
-                                if (info->lwi_on_signal != NULL)               \
-                                    (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS);   \
-                        }                                                      \
-                }                                                              \
-                                                                               \
-                if (condition)                                                 \
-                        break;                                                 \
-                                                                               \
-                if (signal_pending(current)) {                                 \
-                        if (info->lwi_on_signal != NULL && __timeout == 0) {   \
-                                if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
-                                        info->lwi_on_signal(info->lwi_cb_data);\
-                                ret = -EINTR;                                  \
-                                break;                                         \
-                        }                                                      \
-                        /* We have to do this here because some signals */     \
-                        /* are not blockable - ie from strace(1).       */     \
-                        /* In these cases we want to schedule_timeout() */     \
-                        /* again, because we don't want that to return  */     \
-                        /* -EINTR when the RPC actually succeeded.      */     \
-                        /* the RECALC_SIGPENDING below will deliver the */     \
-                        /* signal properly.                             */     \
-                        SIGNAL_MASK_LOCK(current, __irqflags);                 \
-                        CLEAR_SIGPENDING;                                      \
-                        SIGNAL_MASK_UNLOCK(current, __irqflags);               \
-                }                                                              \
-        }                                                                      \
-                                                                               \
-        SIGNAL_MASK_LOCK(current, __irqflags);                                 \
-        current->blocked = __blocked;                                          \
-        RECALC_SIGPENDING;                                                     \
-        SIGNAL_MASK_UNLOCK(current, __irqflags);                               \
-                                                                               \
-        current->state = TASK_RUNNING;                                         \
-        remove_wait_queue(&wq, &__wait);                                       \
-} while(0)
-
-#else /* !__KERNEL__ */
-#define __l_wait_event(wq, condition, info, ret, excl)                  \
-do {                                                                    \
-        long __timeout = info->lwi_timeout;                             \
-        long __now;                                                     \
-        long __then = 0;                                                \
-        int  __timed_out = 0;                                           \
-                                                                        \
-        ret = 0;                                                        \
-        if (condition)                                                  \
-                break;                                                  \
-                                                                        \
-        if (__timeout == 0)                                             \
-                __timeout = 1000000000;                                 \
-        else                                                            \
-                __then = time(NULL);                                    \
-                                                                        \
-        while (!(condition)) {                                          \
-                if (liblustre_wait_event(info->lwi_interval?:__timeout) || \
-                    (info->lwi_interval && info->lwi_interval < __timeout)) {\
-                        if (__timeout != 0 && info->lwi_timeout != 0) { \
-                                __now = time(NULL);                     \
-                                __timeout -= __now - __then;            \
-                                if (__timeout < 0)                      \
-                                        __timeout = 0;                  \
-                                __then = __now;                         \
-                        }                                               \
-                        continue;                                       \
-                }                                                       \
-                                                                        \
-                if (info->lwi_timeout != 0 && !__timed_out) {           \
-                        __timed_out = 1;                                \
-                        if (info->lwi_on_timeout == NULL ||             \
-                            info->lwi_on_timeout(info->lwi_cb_data)) {  \
-                                ret = -ETIMEDOUT;                       \
-                                break;                                  \
-                        }                                               \
-                }                                                       \
-        }                                                               \
-} while (0)
-
-#endif /* __KERNEL__ */
-
-#define l_wait_event(wq, condition, info)                       \
-({                                                              \
-        int                 __ret;                              \
-        struct l_wait_info *__info = (info);                    \
-                                                                \
-        __l_wait_event(wq, condition, __info, __ret, 0);        \
-        __ret;                                                  \
-})
-
-#define l_wait_event_exclusive(wq, condition, info)             \
-({                                                              \
-        int                 __ret;                              \
-        struct l_wait_info *__info = (info);                    \
-                                                                \
-        __l_wait_event(wq, condition, __info, __ret, 1);        \
-        __ret;                                                  \
-})
+#endif
 
 #ifdef __KERNEL__
 /* initialize ost_lvb according to inode */
@@ -763,11 +89,5 @@ static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb)
 /* defined in liblustre/llite_lib.h */
 #endif
 
-#ifdef __KERNEL__
-#define LIBLUSTRE_CLIENT (0)
-#else
-#define LIBLUSTRE_CLIENT (1)
-#endif
-
 #endif /* _LUSTRE_LIB_H */
 
index 578a7c0..17f4546 100644 (file)
@@ -2,8 +2,12 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
 
 
+#ifndef _LINUX_LL_H
+#define _LINUX_LL_H
+
 #ifndef _LL_H
-#define _LL_H
+#error Do not #include this file directly. #include <lustre_lite.h> instead
+#endif
 
 #ifdef __KERNEL__
 
 #include <linux/ext3_fs.h>
 #include <linux/proc_fs.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_ha.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_mds.h>
+#include <lustre_ha.h>
 
 #include <linux/rbtree.h>
 #include <linux/lustre_compat25.h>
 #include <linux/pagemap.h>
 
-/* careful, this is easy to screw up */
-#define PAGE_CACHE_MAXBYTES ((__u64)(~0UL) << PAGE_CACHE_SHIFT)
-
 /* lprocfs.c */
 enum {
          LPROC_LL_DIRTY_HITS = 0,
@@ -68,114 +69,7 @@ enum {
 };
 
 #else
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 #endif /* __KERNEL__ */
 
-#define LLAP_FROM_COOKIE(c)                                                    \
-        (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC),     \
-         (struct ll_async_page *)(c))
-
-#define LL_MAX_BLKSIZE          (4UL * 1024 * 1024)
-
-#include <lustre/lustre_user.h>
-
 #endif
-
-struct lustre_rw_params {
-        int                lrp_lock_mode;
-        ldlm_policy_data_t lrp_policy;
-        obd_flag           lrp_brw_flags;
-        int                lrp_ast_flags;
-};
-
-/*
- * XXX nikita: this function lives in the header because it is used by both
- * llite kernel module and liblustre library, and there is no (?) better place
- * to put it in.
- */
-static inline void lustre_build_lock_params(int cmd, unsigned long open_flags,
-                                            __u64 connect_flags,
-                                            loff_t pos, ssize_t len,
-                                            struct lustre_rw_params *params)
-{
-        params->lrp_lock_mode = (cmd == OBD_BRW_READ) ? LCK_PR : LCK_PW;
-        params->lrp_brw_flags = 0;
-
-        params->lrp_policy.l_extent.start = pos;
-        params->lrp_policy.l_extent.end = pos + len - 1;
-        /*
-         * for now O_APPEND always takes local locks.
-         */
-        if (cmd == OBD_BRW_WRITE && (open_flags & O_APPEND)) {
-                params->lrp_policy.l_extent.start = 0;
-                params->lrp_policy.l_extent.end   = OBD_OBJECT_EOF;
-        } else if (LIBLUSTRE_CLIENT && (connect_flags & OBD_CONNECT_SRVLOCK)) {
-                /*
-                 * liblustre: OST-side locking for all non-O_APPEND
-                 * reads/writes.
-                 */
-                params->lrp_lock_mode = LCK_NL;
-                params->lrp_brw_flags = OBD_BRW_SRVLOCK;
-        } else {
-                /*
-                 * nothing special for the kernel. In the future llite may use
-                 * OST-side locks for small writes into highly contended
-                 * files.
-                 */
-        }
-        params->lrp_ast_flags = (open_flags & O_NONBLOCK) ?
-                LDLM_FL_BLOCK_NOWAIT : 0;
-}
-
-/*
- * This is embedded into liblustre and llite super-blocks to keep track of
- * connect flags (capabilities) supported by all imports given mount is
- * connected to.
- */
-struct lustre_client_ocd {
-        /*
-         * This is conjunction of connect_flags across all imports (LOVs) this
-         * mount is connected to. This field is updated by ll_ocd_update()
-         * under ->lco_lock.
-         */
-        __u64      lco_flags;
-        spinlock_t lco_lock;
-};
-
-/*
- * This function is used as an upcall-callback hooked by liblustre and llite
- * clients into obd_notify() listeners chain to handle notifications about
- * change of import connect_flags. See llu_fsswop_mount() and
- * lustre_common_fill_super().
- *
- * Again, it is dumped into this header for the lack of a better place.
- */
-static inline int ll_ocd_update(struct obd_device *host,
-                                struct obd_device *watched,
-                                enum obd_notify_event ev, void *owner)
-{
-        struct lustre_client_ocd *lco;
-        struct client_obd        *cli;
-        __u64 flags;
-        int   result;
-
-        ENTRY;
-        if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
-                cli = &watched->u.cli;
-                lco = owner;
-                flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
-                CDEBUG(D_SUPER, "Changing connect_flags: "LPX64" -> "LPX64"\n",
-                       lco->lco_flags, flags);
-                spin_lock(&lco->lco_lock);
-                lco->lco_flags &= flags;
-                spin_unlock(&lco->lco_lock);
-                result = 0;
-        } else {
-                CERROR("unexpected notification from %s %s!\n",
-                       watched->obd_type->typ_name,
-                       watched->obd_name);
-                result = -EINVAL;
-        }
-        RETURN(result);
-}
-
index 2055a0f..65e1c51 100644 (file)
  * - MDS replication logs
  */
 
-#ifndef _LUSTRE_LOG_H
-#define _LUSTRE_LOG_H
-
-#include <linux/obd.h>
-#include <linux/lustre_idl.h>
-
-#define LOG_NAME_LIMIT(logname, name)                   \
-        snprintf(logname, sizeof(logname), "LOGS/%s", name)
-#define LLOG_EEMPTY 4711
-
-struct plain_handle_data {
-        struct list_head    phd_entry;
-        struct llog_handle *phd_cat_handle;
-        struct llog_cookie  phd_cookie; /* cookie of this log in its cat */
-        int                 phd_last_idx;
-};
-
-struct cat_handle_data {
-        struct list_head        chd_head;
-        struct llog_handle     *chd_current_log; /* currently open log */
-};
-
-/* In-memory descriptor for a log object or log catalog */
-struct llog_handle {
-        struct rw_semaphore     lgh_lock;
-        struct llog_logid       lgh_id;              /* id of this log */
-        struct llog_log_hdr    *lgh_hdr;
-        struct file            *lgh_file;
-        int                     lgh_last_idx;
-        struct llog_ctxt       *lgh_ctxt;
-        union {
-                struct plain_handle_data phd;
-                struct cat_handle_data   chd;
-        } u;
-};
-
-/* llog.c  -  general API */
-typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *);
-typedef int (*llog_fill_rec_cb_t)(struct llog_rec_hdr *rec, void *data);
-extern struct llog_handle *llog_alloc_handle(void);
-int llog_init_handle(struct llog_handle *handle, int flags,
-                     struct obd_uuid *uuid);
-extern void llog_free_handle(struct llog_handle *handle);
-int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
-                 void *data, void *catdata);
-int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
-                         void *data, void *catdata);
-extern int llog_cancel_rec(struct llog_handle *loghandle, int index);
-extern int llog_close(struct llog_handle *cathandle);
-extern int llog_get_size(struct llog_handle *loghandle);
-
-/* llog_cat.c   -  catalog api */
-struct llog_process_data {
-        void *lpd_data;
-        llog_cb_t lpd_cb;
-};
-
-struct llog_process_cat_data {
-        int     first_idx;
-        int     last_idx;
-        /* to process catalog across zero record */
-};
-
-int llog_cat_put(struct llog_handle *cathandle);
-int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
-                     struct llog_cookie *reccookie, void *buf);
-int llog_cat_cancel_records(struct llog_handle *cathandle, int count,
-                            struct llog_cookie *cookies);
-int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data);
-int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data);
-int llog_cat_set_first_idx(struct llog_handle *cathandle, int index);
-
-/* llog_obd.c */
-int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
-               int count,  struct llog_logid *logid,struct llog_operations *op);
-int llog_cleanup(struct llog_ctxt *);
-int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
-int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
-             struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
-             int numcookies);
-int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm,
-                int count, struct llog_cookie *cookies, int flags);
-
-int llog_obd_origin_setup(struct obd_device *obd, int index,
-                          struct obd_device *disk_obd, int count,
-                          struct llog_logid *logid);
-int llog_obd_origin_cleanup(struct llog_ctxt *ctxt);
-int llog_obd_origin_add(struct llog_ctxt *ctxt,
-                        struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
-                        struct llog_cookie *logcookies, int numcookies);
-
-int llog_cat_initialize(struct obd_device *obd, int count);
-int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
-                  int count, struct llog_catid *logid);
-
-int obd_llog_finish(struct obd_device *obd, int count);
-
-/* llog_ioctl.c */
-int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data);
-int llog_catalog_list(struct obd_device *obd, int count,
-                      struct obd_ioctl_data *data);
-
-/* llog_net.c */
-int llog_initiator_connect(struct llog_ctxt *ctxt);
-int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp);
-int llog_origin_connect(struct llog_ctxt *ctxt, int count,
-                        struct llog_logid *logid, struct llog_gen *gen,
-                        struct obd_uuid *uuid);
-int llog_handle_connect(struct ptlrpc_request *req);
-
-/* recov_thread.c */
-int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
-                         struct lov_stripe_md *lsm, int count,
-                         struct llog_cookie *cookies, int flags);
-int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
-int llog_repl_connect(struct llog_ctxt *ctxt, int count,
-                      struct llog_logid *logid, struct llog_gen *gen,
-                      struct obd_uuid *uuid);
-
-struct llog_operations {
-        int (*lop_write_rec)(struct llog_handle *loghandle,
-                             struct llog_rec_hdr *rec,
-                             struct llog_cookie *logcookies, int numcookies,
-                             void *, int idx);
-        int (*lop_destroy)(struct llog_handle *handle);
-        int (*lop_next_block)(struct llog_handle *h, int *curr_idx,
-                              int next_idx, __u64 *offset, void *buf, int len);
-        int (*lop_prev_block)(struct llog_handle *h,
-                              int prev_idx, void *buf, int len);
-        int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **,
-                          struct llog_logid *logid, char *name);
-        int (*lop_close)(struct llog_handle *handle);
-        int (*lop_read_header)(struct llog_handle *handle);
-
-        int (*lop_setup)(struct obd_device *obd, int ctxt_idx,
-                         struct obd_device *disk_obd, int count,
-                         struct llog_logid *logid);
-        int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp);
-        int (*lop_cleanup)(struct llog_ctxt *ctxt);
-        int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
-                       struct lov_stripe_md *lsm,
-                       struct llog_cookie *logcookies, int numcookies);
-        int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm,
-                          int count, struct llog_cookie *cookies, int flags);
-        int (*lop_connect)(struct llog_ctxt *ctxt, int count,
-                           struct llog_logid *logid, struct llog_gen *gen,
-                           struct obd_uuid *uuid);
-        /* XXX add 2 more: commit callbacks and llog recovery functions */
-};
-
-/* llog_lvfs.c */
-extern struct llog_operations llog_lvfs_ops;
-int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
-                      char *name, int count, struct llog_catid *idarray);
-
-struct llog_ctxt {
-        int                      loc_idx; /* my index the obd array of ctxt's */
-        struct llog_gen          loc_gen;
-        struct obd_device       *loc_obd; /* points back to the containing obd*/
-        struct obd_export       *loc_exp; /* parent "disk" export (e.g. MDS) */
-        struct obd_import       *loc_imp; /* to use in RPC's: can be backward
-                                             pointing import */
-        struct llog_operations  *loc_logops;
-        struct llog_handle      *loc_handle;
-        struct llog_canceld_ctxt *loc_llcd;
-        struct semaphore         loc_sem; /* protects loc_llcd and loc_imp */
-        void                    *llog_proc_cb;
-};
-
-static inline void llog_gen_init(struct llog_ctxt *ctxt)
-{
-        struct obd_device *obd = ctxt->loc_exp->exp_obd;
-
-        if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME))
-                ctxt->loc_gen.mnt_cnt = obd->u.mds.mds_mount_count;
-        else if (!strstr(obd->obd_type->typ_name, LUSTRE_FILTER_NAME))
-                ctxt->loc_gen.mnt_cnt = obd->u.filter.fo_mount_count;
-        else
-                ctxt->loc_gen.mnt_cnt = 0;
-}
-
-static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b)
-{
-        if (a.mnt_cnt < b.mnt_cnt)
-                return 1;
-        if (a.mnt_cnt > b.mnt_cnt)
-                return 0;
-        return(a.conn_cnt < b.conn_cnt ? 1 : 0);
-}
-
-#define LLOG_GEN_INC(gen)  ((gen).conn_cnt ++)
-#define LLOG_PROC_BREAK 0x0001
-#define LLOG_DEL_RECORD 0x0002
-
-static inline int llog_obd2ops(struct llog_ctxt *ctxt,
-                               struct llog_operations **lop)
-{
-        if (ctxt == NULL)
-                return -ENOTCONN;
-
-        *lop = ctxt->loc_logops;
-        if (*lop == NULL)
-                return -EOPNOTSUPP;
-
-        return 0;
-}
-
-static inline int llog_handle2ops(struct llog_handle *loghandle,
-                                  struct llog_operations **lop)
-{
-        if (loghandle == NULL)
-                return -EINVAL;
+#ifndef _LINUX_LUSTRE_LOG_H
+#define _LINUX_LUSTRE_LOG_H
 
-        return llog_obd2ops(loghandle->lgh_ctxt, lop);
-}
-
-static inline int llog_data_len(int len)
-{
-        return size_round(len);
-}
-
-static inline struct llog_ctxt *llog_get_context(struct obd_device *obd,
-                                                 int index)
-{
-        if (index < 0 || index >= LLOG_MAX_CTXTS)
-                return NULL;
-
-        return obd->obd_llog_ctxt[index];
-}
-
-static inline int llog_write_rec(struct llog_handle *handle,
-                                 struct llog_rec_hdr *rec,
-                                 struct llog_cookie *logcookies,
-                                 int numcookies, void *buf, int idx)
-{
-        struct llog_operations *lop;
-        int rc, buflen;
-        ENTRY;
-
-        rc = llog_handle2ops(handle, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_write_rec == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        if (buf)
-                buflen = rec->lrh_len + sizeof(struct llog_rec_hdr)
-                                + sizeof(struct llog_rec_tail);
-        else
-                buflen = rec->lrh_len;
-        LASSERT(size_round(buflen) == buflen);
-
-        rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx);
-        RETURN(rc);
-}
-
-static inline int llog_read_header(struct llog_handle *handle)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_handle2ops(handle, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_read_header == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_read_header(handle);
-        RETURN(rc);
-}
-
-static inline int llog_destroy(struct llog_handle *handle)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_handle2ops(handle, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_destroy == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_destroy(handle);
-        RETURN(rc);
-}
-
-#if 0
-static inline int llog_cancel(struct obd_export *exp,
-                              struct lov_stripe_md *lsm, int count,
-                              struct llog_cookie *cookies, int flags)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_handle2ops(loghandle, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_cancel == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_cancel(exp, lsm, count, cookies, flags);
-        RETURN(rc);
-}
+#ifndef _LUSTRE_LOG_H
+#error Do not #include this file directly. #include <lustre_log.h> instead
 #endif
 
-static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx,
-                                  int next_idx, __u64 *cur_offset, void *buf,
-                                  int len)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_handle2ops(loghandle, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_next_block == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_next_block(loghandle, cur_idx, next_idx, cur_offset, buf,
-                                 len);
-        RETURN(rc);
-}
-
-static inline int llog_prev_block(struct llog_handle *loghandle,
-                                  int prev_idx, void *buf, int len)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_handle2ops(loghandle, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_prev_block == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_prev_block(loghandle, prev_idx, buf, len);
-        RETURN(rc);
-}
-
-static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res,
-                              struct llog_logid *logid, char *name)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_obd2ops(ctxt, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_create == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_create(ctxt, res, logid, name);
-        RETURN(rc);
-}
-
-static inline int llog_connect(struct llog_ctxt *ctxt, int count,
-                               struct llog_logid *logid, struct llog_gen *gen,
-                               struct obd_uuid *uuid)
-{
-        struct llog_operations *lop;
-        int rc;
-        ENTRY;
-
-        rc = llog_obd2ops(ctxt, &lop);
-        if (rc)
-                RETURN(rc);
-        if (lop->lop_connect == NULL)
-                RETURN(-EOPNOTSUPP);
-
-        rc = lop->lop_connect(ctxt, count, logid, gen, uuid);
-        RETURN(rc);
-}
+#define LUSTRE_LOG_SERVER
 
 #endif
index 97f089c..b305234 100644 (file)
@@ -7,59 +7,23 @@
  * See also lustre_idl.h for wire formats of requests.
  */
 
+#ifndef _LINUX_LUSTRE_MDS_H
+#define _LINUX_LUSTRE_MDS_H
+
 #ifndef _LUSTRE_MDS_H
-#define _LUSTRE_MDS_H
+#error Do not #include this file directly. #include <lustre_mds.h> instead
+#endif
 
 #ifdef __KERNEL__
 # include <linux/fs.h>
 # include <linux/dcache.h>
+# include <linux/xattr_acl.h>
 #endif
-#include <linux/lustre_handles.h>
-#include <libcfs/kp30.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_export.h>
 
 struct mds_obd;
 struct ptlrpc_request;
 struct obd_device;
-
-struct mds_update_record {
-        __u32 ur_opcode;
-        struct ll_fid *ur_fid1;
-        struct ll_fid *ur_fid2;
-        int ur_namelen;
-        char *ur_name;
-        int ur_tgtlen;
-        char *ur_tgt;
-        int ur_eadatalen;
-        void *ur_eadata;
-        int ur_cookielen;
-        struct llog_cookie *ur_logcookies;
-        struct iattr ur_iattr;
-        struct lvfs_ucred ur_uc;
-        __u64 ur_rdev;
-        __u64 ur_time;
-        __u32 ur_mode;
-        __u32 ur_flags;
-        struct lvfs_grp_hash_entry *ur_grp_entry;
-};
-
-/* file data for open files on MDS */
-struct mds_file_data {
-        struct portals_handle mfd_handle; /* must be first */
-        atomic_t              mfd_refcount;
-        struct list_head      mfd_list; /* protected by med_open_lock */
-        __u64                 mfd_xid;
-        int                   mfd_mode;
-        struct dentry        *mfd_dentry;
-};
-
-/* mds/mds_reint.c */
-int mds_reint_rec(struct mds_update_record *r, int offset,
-                  struct ptlrpc_request *req, struct lustre_handle *);
+struct ll_file_data;
 
 /* mds/handler.c */
 #ifdef __KERNEL__
@@ -76,16 +40,4 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt);
 int mds_fs_cleanup(struct obd_device *obddev);
 #endif
 
-/* ioctls for trying requests */
-#define IOC_REQUEST_TYPE                   'f'
-#define IOC_REQUEST_MIN_NR                 30
-
-#define IOC_REQUEST_GETATTR             _IOWR('f', 30, long)
-#define IOC_REQUEST_READPAGE            _IOWR('f', 31, long)
-#define IOC_REQUEST_SETATTR             _IOWR('f', 32, long)
-#define IOC_REQUEST_CREATE              _IOWR('f', 33, long)
-#define IOC_REQUEST_OPEN                _IOWR('f', 34, long)
-#define IOC_REQUEST_CLOSE               _IOWR('f', 35, long)
-#define IOC_REQUEST_MAX_NR               35
-
 #endif
index caee8db..1d2f17e 100644 (file)
  *
  */
 
+#ifndef _LINUX_LUSTRE_NET_H
+#define _LINUX_LUSTRE_NET_H
+
 #ifndef _LUSTRE_NET_H
-#define _LUSTRE_NET_H
+#error Do not #include this file directly. #include <lustre_net.h> instead
+#endif
 
 #ifdef __KERNEL__
 #include <linux/version.h>
 #endif
 #endif
 
-#include <libcfs/kp30.h>
-// #include <linux/obd.h>
-#include <lnet/lnet.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_import.h>
-#include <linux/lprocfs_status.h>
-
-/* MD flags we _always_ use */
-#define PTLRPC_MD_OPTIONS  0
-
-/* Define maxima for bulk I/O
- * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
- * these limits are system wide and not interface-local. */
-#define PTLRPC_MAX_BRW_SIZE     LNET_MTU
-#define PTLRPC_MAX_BRW_PAGES    (PTLRPC_MAX_BRW_SIZE/PAGE_SIZE)
-
-/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
-#ifdef __KERNEL__
-# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
-#  error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
-# endif
-# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE))
-#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE"
-# endif
-# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU)
-#  error "PTLRPC_MAX_BRW_SIZE too big"
-# endif
-# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV)
-#  error "PTLRPC_MAX_BRW_PAGES too big"
-# endif
-#endif /* __KERNEL__ */
-
-/* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request
- * buffers */
-#define SVC_BUF_VMALLOC_THRESHOLD (2 * PAGE_SIZE)
-
-/* The following constants determine how memory is used to buffer incoming
- * service requests.
- *
- * ?_NBUFS              # buffers to allocate when growing the pool
- * ?_BUFSIZE            # bytes in a single request buffer
- * ?_MAXREQSIZE         # maximum request service will receive
- *
- * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk
- * of ?_NBUFS is added to the pool.
- *
- * Messages larger than ?_MAXREQSIZE are dropped.  Request buffers are
- * considered full when less than ?_MAXREQSIZE is left in them.
- */
-
-#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64)
-#define LDLM_NBUFS      (64 * smp_num_cpus)
-#define LDLM_BUFSIZE    (8 * 1024)
-#define LDLM_MAXREQSIZE (5 * 1024)
-#define LDLM_MAXREPSIZE (1024)
-
-#define MDT_MIN_THREADS 2UL
-#define MDT_MAX_THREADS 32UL
-#define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \
-                                  num_physpages >> (25 - PAGE_SHIFT)), 2UL)
-#define FLD_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \
-                                  num_physpages >> (25 - PAGE_SHIFT)), 2UL)
-
-#define MDS_NBUFS       (64 * smp_num_cpus)
-#define MDS_BUFSIZE     (8 * 1024)
-/* Assume file name length = FNAME_MAX = 256 (true for ext3).
- *        path name length = PATH_MAX = 4096
- *        LOV MD size max  = EA_MAX = 4000
- * symlink:  FNAME_MAX + PATH_MAX  <- largest
- * link:     FNAME_MAX + PATH_MAX  (mds_rec_link < mds_rec_create)
- * rename:   FNAME_MAX + FNAME_MAX
- * open:     FNAME_MAX + EA_MAX
- *
- * MDS_MAXREQSIZE ~= 4736 bytes =
- * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX
- * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header
- * or, for mds_close() and mds_reint_unlink() on a many-OST filesystem:
- *      = 9210 bytes = lustre_msg + mds_body + 160 * (easize + cookiesize)
- *
- * Realistic size is about 512 bytes (20 character name + 128 char symlink),
- * except in the open case where there are a large number of OSTs in a LOV.
- */
-#define MDS_MAXREQSIZE  (5 * 1024)
-#define MDS_MAXREPSIZE  max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56)
-
-/* FIXME fix all constants here */
-#define MGS_MAX_THREADS 8UL
-#define MGS_NUM_THREADS max(2UL, min_t(unsigned long, MGS_MAX_THREADS, \
-                            num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT)))
-
-#define MGS_NBUFS       (64 * smp_num_cpus)
-#define MGS_BUFSIZE     (8 * 1024)
-#define MGS_MAXREQSIZE  (5 * 1024)
-#define MGS_MAXREPSIZE  (9 * 1024)
-
-#define OST_MAX_THREADS 512UL
-#define OST_DEF_THREADS max_t(unsigned long, 2, \
-                              (num_physpages >> (26-PAGE_SHIFT)) * smp_num_cpus)
-#define OST_NBUFS       (64 * smp_num_cpus)
-#define OST_BUFSIZE     (8 * 1024)
-/* OST_MAXREQSIZE ~= 4768 bytes =
- * lustre_msg + obdo + 16 * obd_ioobj + 256 * niobuf_remote
- *
- * - single object with 16 pages is 512 bytes
- * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover
- */
-#define OST_MAXREQSIZE  (5 * 1024)
-#define OST_MAXREPSIZE  (9 * 1024)
-
-struct ptlrpc_connection {
-        struct list_head        c_link;
-        lnet_nid_t              c_self;
-        lnet_process_id_t       c_peer;
-        struct obd_uuid         c_remote_uuid;
-        atomic_t                c_refcount;
-};
-
-struct ptlrpc_client {
-        __u32                     cli_request_portal;
-        __u32                     cli_reply_portal;
-        char                     *cli_name;
-};
-
-/* state flags of requests */
-/* XXX only ones left are those used by the bulk descs as well! */
-#define PTL_RPC_FL_INTR      (1 << 0)  /* reply wait was interrupted by user */
-#define PTL_RPC_FL_TIMEOUT   (1 << 7)  /* request timed out waiting for reply */
-
-#define REQ_MAX_ACK_LOCKS 8
-
-#define SWAB_PARANOIA 1
-#if SWAB_PARANOIA
-/* unpacking: assert idx not unpacked already */
-#define LASSERT_REQSWAB(rq, idx)                                \
-do {                                                            \
-        LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8);  \
-        LASSERT (((rq)->rq_req_swab_mask & (1 << (idx))) == 0); \
-        (rq)->rq_req_swab_mask |= (1 << (idx));                 \
-} while (0)
-
-#define LASSERT_REPSWAB(rq, idx)                                \
-do {                                                            \
-        LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8);  \
-        LASSERT (((rq)->rq_rep_swab_mask & (1 << (idx))) == 0); \
-        (rq)->rq_rep_swab_mask |= (1 << (idx));                 \
-} while (0)
-
-/* just looking: assert idx already unpacked */
-#define LASSERT_REQSWABBED(rq, idx)                     \
-LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8 && \
-         ((rq)->rq_req_swab_mask & (1 << (idx))) != 0)
-
-#define LASSERT_REPSWABBED(rq, idx)                     \
-LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8 && \
-         ((rq)->rq_rep_swab_mask & (1 << (idx))) != 0)
-#else
-#define LASSERT_REQSWAB(rq, idx)
-#define LASSERT_REPSWAB(rq, idx)
-#define LASSERT_REQSWABBED(rq, idx)
-#define LASSERT_REPSWABBED(rq, idx)
-#endif
-
-union ptlrpc_async_args {
-        /* Scratchpad for passing args to completion interpreter. Users
-         * cast to the struct of their choosing, and LASSERT that this is
-         * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
-         * a pointer to it here.  The pointer_arg ensures this struct is at
-         * least big enough for that. */
-        void      *pointer_arg[9];
-        __u64      space[4];
-};
-
-struct ptlrpc_request_set;
-typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
-
-struct ptlrpc_request_set {
-        int               set_remaining; /* # uncompleted requests */
-        wait_queue_head_t set_waitq;
-        wait_queue_head_t *set_wakeup_ptr;
-        struct list_head  set_requests;
-        set_interpreter_func    set_interpret; /* completion callback */
-        void              *set_arg; /* completion context */
-        /* locked so that any old caller can communicate requests to
-         * the set holder who can then fold them into the lock-free set */
-        spinlock_t        set_new_req_lock;
-        struct list_head  set_new_requests;
-};
-
-struct ptlrpc_bulk_desc;
-
-/*
- * ptlrpc callback & work item stuff
- */
-struct ptlrpc_cb_id {
-        void   (*cbid_fn)(lnet_event_t *ev);     /* specific callback fn */
-        void    *cbid_arg;                      /* additional arg */
-};
-
-#define RS_MAX_LOCKS 4
-#define RS_DEBUG     1
-
-struct ptlrpc_reply_state {
-        struct ptlrpc_cb_id    rs_cb_id;
-        struct list_head       rs_list;
-        struct list_head       rs_exp_list;
-        struct list_head       rs_obd_list;
-#if RS_DEBUG
-        struct list_head       rs_debug_list;
-#endif
-        /* updates to following flag serialised by srv_request_lock */
-        unsigned int           rs_difficult:1;     /* ACK/commit stuff */
-        unsigned int           rs_scheduled:1;     /* being handled? */
-        unsigned int           rs_scheduled_ever:1;/* any schedule attempts? */
-        unsigned int           rs_handled:1;  /* been handled yet? */
-        unsigned int           rs_on_net:1;   /* reply_out_callback pending? */
-        unsigned int           rs_prealloc:1; /* rs from prealloc list */
-
-        int                    rs_size;
-        __u64                  rs_transno;
-        __u64                  rs_xid;
-        struct obd_export     *rs_export;
-        struct ptlrpc_service *rs_service;
-        lnet_handle_md_t       rs_md_h;
-        atomic_t               rs_refcount;
-
-        /* locks awaiting client reply ACK */
-        int                    rs_nlocks;
-        struct lustre_handle   rs_locks[RS_MAX_LOCKS];
-        ldlm_mode_t            rs_modes[RS_MAX_LOCKS];
-        /* last member: variable sized reply message */
-        struct lustre_msg      rs_msg;
-};
-
-struct ptlrpc_thread;
-
-enum rq_phase {
-        RQ_PHASE_NEW         = 0xebc0de00,
-        RQ_PHASE_RPC         = 0xebc0de01,
-        RQ_PHASE_BULK        = 0xebc0de02,
-        RQ_PHASE_INTERPRET   = 0xebc0de03,
-        RQ_PHASE_COMPLETE    = 0xebc0de04,
-};
-
-struct ptlrpc_request_pool {
-        spinlock_t prp_lock;
-        struct list_head prp_req_list;    /* list of ptlrpc_request structs */
-        int prp_rq_size;
-        void (*prp_populate)(struct ptlrpc_request_pool *, int);
-};
-
-struct ptlrpc_request {
-        int rq_type; /* one of PTL_RPC_MSG_* */
-        struct list_head rq_list;
-        struct list_head rq_history_list;       /* server-side history */
-        __u64            rq_history_seq;        /* history sequence # */
-        int rq_status;
-        spinlock_t rq_lock;
-        /* client-side flags */
-        unsigned int rq_intr:1, rq_replied:1, rq_err:1,
-                rq_timedout:1, rq_resend:1, rq_restart:1,
-                /*
-                 * when ->rq_replay is set, request is kept by the client even
-                 * after server commits corresponding transaction. This is
-                 * used for operations that require sequence of multiple
-                 * requests to be replayed. The only example currently is file
-                 * open/close. When last request in such a sequence is
-                 * committed, ->rq_replay is cleared on all requests in the
-                 * sequence.
-                 */
-                rq_replay:1,
-                rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
-                rq_no_delay:1, rq_net_err:1;
-        enum rq_phase rq_phase; /* one of RQ_PHASE_* */
-        atomic_t rq_refcount;   /* client-side refcount for SENT race */
-
-        struct ptlrpc_thread *rq_svc_thread; /* initial thread servicing req */
-
-        int rq_request_portal;  /* XXX FIXME bug 249 */
-        int rq_reply_portal;    /* XXX FIXME bug 249 */
-
-        int rq_nob_received; /* client-side # reply bytes actually received  */
-
-        int rq_reqlen;
-        struct lustre_msg *rq_reqmsg;
-
-        int rq_timeout;         /* time to wait for reply (seconds) */
-        int rq_replen;
-        struct lustre_msg *rq_repmsg;
-        __u64 rq_transno;
-        __u64 rq_xid;
-        struct list_head rq_replay_list;
-
-#if SWAB_PARANOIA
-        __u32 rq_req_swab_mask;
-        __u32 rq_rep_swab_mask;
+/* XXX Liang: should be moved to other header instead of here */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+#ifndef WITH_GROUP_INFO
+#define WITH_GROUP_INFO
 #endif
-
-        int rq_import_generation;
-        enum lustre_imp_state rq_send_state;
-
-        /* client+server request */
-        lnet_handle_md_t     rq_req_md_h;
-        struct ptlrpc_cb_id  rq_req_cbid;
-
-        /* server-side... */
-        struct timeval       rq_arrival_time;       /* request arrival time */
-        struct ptlrpc_reply_state *rq_reply_state;  /* separated reply state */
-        struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/
-#if CRAY_XT3
-        __u32                rq_uid;            /* peer uid, used in MDS only */
 #endif
 
-        /* client-only incoming reply */
-        lnet_handle_md_t     rq_reply_md_h;
-        wait_queue_head_t    rq_reply_waitq;
-        struct ptlrpc_cb_id  rq_reply_cbid;
-
-        lnet_nid_t           rq_self;
-        lnet_process_id_t    rq_peer;
-        struct obd_export   *rq_export;
-        struct obd_import   *rq_import;
-
-        void (*rq_replay_cb)(struct ptlrpc_request *);
-        void (*rq_commit_cb)(struct ptlrpc_request *);
-        void  *rq_cb_data;
-
-        struct ptlrpc_bulk_desc *rq_bulk;       /* client side bulk */
-        time_t rq_sent;                         /* when request sent, seconds */
-
-        /* Multi-rpc bits */
-        struct list_head rq_set_chain;
-        struct ptlrpc_request_set *rq_set;
-        void *rq_interpret_reply;               /* Async completion handler */
-        union ptlrpc_async_args rq_async_args;  /* Async completion context */
-        void *rq_ptlrpcd_data;
-        struct ptlrpc_request_pool *rq_pool;    /* Pool if request from
-                                                   preallocated list */
-};
-
-static inline const char *
-ptlrpc_rqphase2str(const struct ptlrpc_request *req)
-{
-        switch (req->rq_phase) {
-        case RQ_PHASE_NEW:
-                return "New";
-        case RQ_PHASE_RPC:
-                return "Rpc";
-        case RQ_PHASE_BULK:
-                return "Bulk";
-        case RQ_PHASE_INTERPRET:
-                return "Interpret";
-        case RQ_PHASE_COMPLETE:
-                return "Complete";
-        default:
-                return "?Phase?";
-        }
-}
-
-/* Spare the preprocessor, spoil the bugs. */
-#define FLAG(field, str) (field ? str : "")
-
-#define DEBUG_REQ_FLAGS(req)                                                    \
-        ptlrpc_rqphase2str(req),                                                \
-        FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),                    \
-        FLAG(req->rq_err, "E"),                                                 \
-        FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
-        FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),                  \
-        FLAG(req->rq_no_resend, "N"),                                           \
-        FLAG(req->rq_waiting, "W")
-
-#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s"
-
-#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...)                       \
-CDEB_TYPE(level, "@@@ " fmt                                                    \
-       " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl "         \
-       REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid,           \
-       req->rq_transno,                                                        \
-       req->rq_reqmsg ? req->rq_reqmsg->opc : -1,                              \
-       req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "<?>",  \
-       req->rq_import ?                                                        \
-          (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "<?>",  \
-       (req->rq_import && req->rq_import->imp_client) ?                        \
-           req->rq_import->imp_client->cli_request_portal : -1,                \
-       req->rq_reqlen, req->rq_replen,                                         \
-       atomic_read(&req->rq_refcount),                                         \
-       DEBUG_REQ_FLAGS(req),                                                   \
-       req->rq_reqmsg ? req->rq_reqmsg->flags : 0,                             \
-       req->rq_repmsg ? req->rq_repmsg->flags : 0,                             \
-       req->rq_status, req->rq_repmsg ? req->rq_repmsg->status : 0)
-
-/* for most callers (level is a constant) this is resolved at compile time */
-#define DEBUG_REQ(level, req, fmt, args...)                                    \
-do {                                                                           \
-        if ((level) & (D_ERROR | D_WARNING))                                   \
-            __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args);               \
-        else                                                                   \
-            __DEBUG_REQ(CDEBUG, level, req, fmt, ## args);                     \
-} while (0)
-
-#define DEBUG_REQ_EX(level, req, fmt, args...)                          \
-do {                                                                    \
-        if ((level) & (D_ERROR | D_WARNING))                            \
-            __DEBUG_REQ(CDEBUG_LIMIT, D_ERROR, req, fmt, ## args);      \
-        else                                                            \
-            __DEBUG_REQ(CDEBUG_EX, level, req, fmt, ## args);           \
-} while (0)
-
-struct ptlrpc_bulk_page {
-        struct list_head bp_link;
-        int bp_buflen;
-        int bp_pageoffset;                      /* offset within a page */
-        struct page *bp_page;
-};
-
-#define BULK_GET_SOURCE   0
-#define BULK_PUT_SINK     1
-#define BULK_GET_SINK     2
-#define BULK_PUT_SOURCE   3
-
-struct ptlrpc_bulk_desc {
-        unsigned int bd_success:1;              /* completed successfully */
-        unsigned int bd_network_rw:1;           /* accessible to the network */
-        unsigned int bd_type:2;                 /* {put,get}{source,sink} */
-        unsigned int bd_registered:1;           /* client side */
-        spinlock_t   bd_lock;                   /* serialise with callback */
-        int bd_import_generation;
-        struct obd_export *bd_export;
-        struct obd_import *bd_import;
-        __u32 bd_portal;
-        struct ptlrpc_request *bd_req;          /* associated request */
-        wait_queue_head_t      bd_waitq;        /* server side only WQ */
-        int                    bd_iov_count;    /* # entries in bd_iov */
-        int                    bd_max_iov;      /* allocated size of bd_iov */
-        int                    bd_nob;          /* # bytes covered */
-        int                    bd_nob_transferred; /* # bytes GOT/PUT */
-
-        __u64                  bd_last_xid;
-
-        struct ptlrpc_cb_id    bd_cbid;         /* network callback info */
-        lnet_handle_md_t        bd_md_h;         /* associated MD */
-
-#if defined(__KERNEL__)
-        lnet_kiov_t             bd_iov[0];
-#else
-        lnet_md_iovec_t         bd_iov[0];
-#endif
-};
-
-struct lu_context;
-struct ptlrpc_thread {
-
-        struct list_head t_link; /* active threads for service, from svc->srv_threads */
-
-        void *t_data;            /* thread-private data (preallocated memory) */
-        __u32 t_flags;
-
-        unsigned int t_id; /* service thread index, from ptlrpc_start_threads */
-        wait_queue_head_t t_ctl_waitq;
-        struct lu_context *t_ctx;
-};
-
-struct ptlrpc_request_buffer_desc {
-        struct list_head       rqbd_list;
-        struct list_head       rqbd_reqs;
-        struct ptlrpc_service *rqbd_service;
-        lnet_handle_md_t       rqbd_md_h;
-        int                    rqbd_refcount;
-        char                  *rqbd_buffer;
-        struct ptlrpc_cb_id    rqbd_cbid;
-        struct ptlrpc_request  rqbd_req;
-};
-
-typedef int (*svc_handler_t)(struct ptlrpc_request *req);
-typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *);
-
-struct ptlrpc_service {
-        struct list_head srv_list;              /* chain thru all services */
-        int              srv_max_req_size;      /* biggest request to receive */
-        int              srv_max_reply_size;    /* biggest reply to send */
-        int              srv_buf_size;          /* size of individual buffers */
-        int              srv_nbuf_per_group;    /* # buffers to allocate in 1 group */
-        int              srv_nbufs;             /* total # req buffer descs allocated */
-        int              srv_nthreads;          /* # running threads */
-        int              srv_n_difficult_replies; /* # 'difficult' replies */
-        int              srv_n_active_reqs;     /* # reqs being served */
-        int              srv_rqbd_timeout;      /* timeout before re-posting reqs */
-        int              srv_watchdog_timeout; /* soft watchdog timeout, in ms */
-        int              srv_num_threads;       /* # threads to start/started */
-        unsigned         srv_cpu_affinity:1;    /* bind threads to CPUs */
-
-        __u32            srv_req_portal;
-        __u32            srv_rep_portal;
-
-        int               srv_n_queued_reqs;    /* # reqs waiting to be served */
-        struct list_head  srv_request_queue;    /* reqs waiting for service */
-
-        struct list_head  srv_request_history;  /* request history */
-        __u64             srv_request_seq;      /* next request sequence # */
-        __u64             srv_request_max_cull_seq; /* highest seq culled from history */
-        svcreq_printfn_t  srv_request_history_print_fn; /* service-specific print fn */
-
-        struct list_head  srv_idle_rqbds;       /* request buffers to be reposted */
-        struct list_head  srv_active_rqbds;     /* req buffers receiving */
-        struct list_head  srv_history_rqbds;    /* request buffer history */
-        int               srv_nrqbd_receiving;  /* # posted request buffers */
-        int               srv_n_history_rqbds;  /* # request buffers in history */
-        int               srv_max_history_rqbds; /* max # request buffers in history */
-
-        atomic_t          srv_outstanding_replies;
-        struct list_head  srv_active_replies;   /* all the active replies */
-        struct list_head  srv_reply_queue;      /* replies waiting for service */
-
-        wait_queue_head_t srv_waitq; /* all threads sleep on this. This
-                                      * wait-queue is signalled when new
-                                      * incoming request arrives and when
-                                      * difficult reply has to be handled. */
-
-        struct list_head   srv_threads;
-        svc_handler_t      srv_handler;
-
-        char *srv_name;  /* only statically allocated strings here; we don't clean them */
-
-        spinlock_t               srv_lock;
-
-        struct proc_dir_entry   *srv_procroot;
-        struct lprocfs_stats    *srv_stats;
-
-        /* List of free reply_states */
-        struct list_head         srv_free_rs_list;
-        /* waitq to run, when adding stuff to srv_free_rs_list */
-        wait_queue_head_t        srv_free_rs_waitq;
-
-        /*
-         * if non-NULL called during thread creation (ptlrpc_start_thread())
-         * to initialize service specific per-thread state.
-         */
-        int (*srv_init)(struct ptlrpc_thread *thread);
-        /*
-         * if non-NULL called during thread shutdown (ptlrpc_main()) to
-         * destruct state created by ->srv_init().
-         */
-        void (*srv_done)(struct ptlrpc_thread *thread);
-
-        //struct ptlrpc_srv_ni srv_interfaces[0];
-};
-
-/* ptlrpc/events.c */
-extern lnet_handle_eq_t ptlrpc_eq_h;
-extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
-                               lnet_process_id_t *peer, lnet_nid_t *self);
-extern void request_out_callback (lnet_event_t *ev);
-extern void reply_in_callback(lnet_event_t *ev);
-extern void client_bulk_callback (lnet_event_t *ev);
-extern void request_in_callback(lnet_event_t *ev);
-extern void reply_out_callback(lnet_event_t *ev);
-extern void server_bulk_callback (lnet_event_t *ev);
-
-/* ptlrpc/connection.c */
-void ptlrpc_dump_connections(void);
-void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *);
-struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer,
-                                                lnet_nid_t self, struct obd_uuid *uuid);
-int ptlrpc_put_connection(struct ptlrpc_connection *c);
-struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
-void ptlrpc_init_connection(void);
-void ptlrpc_cleanup_connection(void);
-extern lnet_pid_t ptl_get_pid(void);
-
-/* ptlrpc/niobuf.c */
-int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc);
-void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc);
-int ptlrpc_register_bulk(struct ptlrpc_request *req);
-void ptlrpc_unregister_bulk (struct ptlrpc_request *req);
-
-static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc)
-{
-        unsigned long flags;
-        int           rc;
-
-        spin_lock_irqsave (&desc->bd_lock, flags);
-        rc = desc->bd_network_rw;
-        spin_unlock_irqrestore (&desc->bd_lock, flags);
-        return (rc);
-}
-
-int ptlrpc_send_reply(struct ptlrpc_request *req, int);
-int ptlrpc_reply(struct ptlrpc_request *req);
-int ptlrpc_error(struct ptlrpc_request *req);
-void ptlrpc_resend_req(struct ptlrpc_request *request);
-int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
-int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd);
-
-/* ptlrpc/client.c */
-void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
-                        struct ptlrpc_client *);
-void ptlrpc_cleanup_client(struct obd_import *imp);
-struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
-
-static inline int
-ptlrpc_client_receiving_reply (struct ptlrpc_request *req)
-{
-        unsigned long flags;
-        int           rc;
-
-        spin_lock_irqsave(&req->rq_lock, flags);
-        rc = req->rq_receiving_reply;
-        spin_unlock_irqrestore(&req->rq_lock, flags);
-        return (rc);
-}
-
-static inline int
-ptlrpc_client_replied (struct ptlrpc_request *req)
-{
-        unsigned long flags;
-        int           rc;
-
-        spin_lock_irqsave(&req->rq_lock, flags);
-        rc = req->rq_replied;
-        spin_unlock_irqrestore(&req->rq_lock, flags);
-        return (rc);
-}
-
-static inline void
-ptlrpc_wake_client_req (struct ptlrpc_request *req)
-{
-        if (req->rq_set == NULL)
-                wake_up(&req->rq_reply_waitq);
-        else
-                wake_up(&req->rq_set->set_waitq);
-}
-
-int ptlrpc_queue_wait(struct ptlrpc_request *req);
-int ptlrpc_replay_req(struct ptlrpc_request *req);
-void ptlrpc_unregister_reply(struct ptlrpc_request *req);
-void ptlrpc_restart_req(struct ptlrpc_request *req);
-void ptlrpc_abort_inflight(struct obd_import *imp);
-
-struct ptlrpc_request_set *ptlrpc_prep_set(void);
-int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
-int ptlrpc_check_set(struct ptlrpc_request_set *set);
-int ptlrpc_set_wait(struct ptlrpc_request_set *);
-int ptlrpc_expired_set(void *data);
-void ptlrpc_interrupted_set(void *data);
-void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
-void ptlrpc_set_destroy(struct ptlrpc_request_set *);
-void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
-void ptlrpc_set_add_new_req(struct ptlrpc_request_set *,
-                            struct ptlrpc_request *);
-
-void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
-void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
-struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int, int,
-                                                void (*populate_pool)(struct ptlrpc_request_pool *, int));
-struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode,
-                                       int count, int *lengths, char **bufs);
-struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
-                                            int count, int *lengths, char **bufs,
-                                            struct ptlrpc_request_pool *pool);
-void ptlrpc_free_req(struct ptlrpc_request *request);
-void ptlrpc_req_finished(struct ptlrpc_request *request);
-void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request);
-struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
-struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
-                                               int npages, int type, int portal);
-struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp(struct ptlrpc_request *req,
-                                              int npages, int type, int portal);
-void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
-void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
-                           struct page *page, int pageoffset, int len);
-void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
-                                      struct obd_import *imp);
-__u64 ptlrpc_next_xid(void);
-__u64 ptlrpc_sample_next_xid(void);
-__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
-
-struct ptlrpc_service_conf {
-        int psc_nbufs;
-        int psc_bufsize;
-        int psc_max_req_size;
-        int psc_max_reply_size;
-        int psc_req_portal;
-        int psc_rep_portal;
-        int psc_watchdog_timeout; /* in ms */
-        int psc_num_threads;
-};
-
-
-/* ptlrpc/service.c */
-void ptlrpc_save_lock (struct ptlrpc_request *req,
-                       struct lustre_handle *lock, int mode);
-void ptlrpc_commit_replies (struct obd_device *obd);
-void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs);
-
-struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c,
-                                            svc_handler_t h, char *name,
-                                            struct proc_dir_entry *proc_entry,
-                                            svcreq_printfn_t prntfn);
-
-struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
-                                       int max_reply_size,
-                                       int req_portal, int rep_portal,
-                                       int watchdog_timeout, /* in ms */
-                                       svc_handler_t, char *name,
-                                       struct proc_dir_entry *proc_entry,
-                                       svcreq_printfn_t, int num_threads);
-void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
-
-int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc,
-                         char *base_name);
-int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
-                        char *name, int id);
-int ptlrpc_unregister_service(struct ptlrpc_service *service);
-int liblustre_check_services (void *arg);
-void ptlrpc_daemonize(char *name);
-int ptlrpc_service_health_check(struct ptlrpc_service *);
-
-
-struct ptlrpc_svc_data {
-        char *name;
-        struct ptlrpc_service *svc;
-        struct ptlrpc_thread *thread;
-        struct obd_device *dev;
-};
-
-/* ptlrpc/import.c */
-int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid);
-int ptlrpc_init_import(struct obd_import *imp);
-int ptlrpc_disconnect_import(struct obd_import *imp);
-int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
-
-/* ptlrpc/pack_generic.c */
-int lustre_msg_swabbed(struct lustre_msg *msg);
-int lustre_msg_check_version(struct lustre_msg *msg, __u32 version);
-int lustre_pack_request(struct ptlrpc_request *, int count, const int *lens,
-                        char **bufs);
-int lustre_pack_reply(struct ptlrpc_request *, int count, const int *lens,
-                      char **bufs);
-void lustre_shrink_reply(struct ptlrpc_request *req,
-                         int segment, unsigned int newlen, int move_data);
-void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
-int lustre_msg_size(int count, const int *lengths);
-int lustre_unpack_msg(struct lustre_msg *m, int len);
-void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
-int lustre_msg_buflen(struct lustre_msg *m, int n);
-char *lustre_msg_string (struct lustre_msg *m, int n, int max_len);
-void *lustre_swab_buf(struct lustre_msg *, int n, int minlen, void *swabber);
-void *lustre_swab_reqbuf (struct ptlrpc_request *req, int n, int minlen,
-                          void *swabber);
-void *lustre_swab_repbuf (struct ptlrpc_request *req, int n, int minlen,
-                          void *swabber);
-
-static inline void
-ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
-{
-        LASSERT(atomic_read(&rs->rs_refcount) > 0);
-        atomic_inc(&rs->rs_refcount);
-}
-
-static inline void
-ptlrpc_rs_decref(struct ptlrpc_reply_state *rs)
-{
-        LASSERT(atomic_read(&rs->rs_refcount) > 0);
-        if (atomic_dec_and_test(&rs->rs_refcount))
-                lustre_free_reply_state(rs);
-}
-
-/* ldlm/ldlm_lib.c */
-int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
-int client_obd_cleanup(struct obd_device * obddev);
-int client_connect_import(struct lustre_handle *conn, struct obd_device *obd,
-                          struct obd_uuid *cluuid, struct obd_connect_data *);
-int client_disconnect_export(struct obd_export *exp);
-int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
-                           int priority);
-int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
-int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
-
-/* ptlrpc/pinger.c */
-int ptlrpc_pinger_add_import(struct obd_import *imp);
-int ptlrpc_pinger_del_import(struct obd_import *imp);
-#ifdef __KERNEL__
-void ping_evictor_start(void);
-void ping_evictor_stop(void);
-#else
-#define ping_evictor_start()    do {} while (0)
-#define ping_evictor_stop()     do {} while (0)
-#endif
-
-/* ptlrpc/ptlrpcd.c */
-void ptlrpcd_wake(struct ptlrpc_request *req);
-void ptlrpcd_add_req(struct ptlrpc_request *req);
-int ptlrpcd_addref(void);
-void ptlrpcd_decref(void);
-
-/* ptlrpc/lproc_ptlrpc.c */
-#ifdef LPROCFS
-void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
-void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd);
-#else
-static inline void ptlrpc_lprocfs_register_obd(struct obd_device *obd) {}
-static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {}
-#endif
-
-/* ptlrpc/llog_server.c */
-int llog_origin_handle_create(struct ptlrpc_request *req);
-int llog_origin_handle_destroy(struct ptlrpc_request *req);
-int llog_origin_handle_prev_block(struct ptlrpc_request *req);
-int llog_origin_handle_next_block(struct ptlrpc_request *req);
-int llog_origin_handle_read_header(struct ptlrpc_request *req);
-int llog_origin_handle_close(struct ptlrpc_request *req);
-int llog_origin_handle_cancel(struct ptlrpc_request *req);
-int llog_catinfo(struct ptlrpc_request *req);
-
-/* ptlrpc/llog_client.c */
-extern struct llog_operations llog_client_ops;
-
 #endif
index c597b7a..248bd5d 100644 (file)
@@ -1,8 +1,12 @@
 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  */
+#ifndef _LINUX_LUSTRE_QUOTA_H
+#define _LINUX_LUSTRE_QUOTA_H
+
 #ifndef _LUSTRE_QUOTA_H
-#define _LUSTRE_QUOTA_H
+#error Do not #include this file directly. #include <lustre_quota.h> instead
+#endif
 
 #ifdef __KERNEL__
 #include <linux/version.h>
 #include <linux/quota.h>
 #include <linux/quotaops.h>
 #endif
-#include <linux/lustre_idl.h>
-#include <linux/lustre_net.h>
-#include <linux/lvfs.h>
-
-struct obd_device;
-struct client_obd;
-
-#ifndef NR_DQHASH
-#define NR_DQHASH 45
-#endif
-
-#ifdef HAVE_QUOTA_SUPPORT
-
-#ifdef __KERNEL__
-
-/* structures to access admin quotafile */
-struct lustre_mem_dqinfo {
-        unsigned int dqi_bgrace;
-        unsigned int dqi_igrace;
-        unsigned long dqi_flags;
-        unsigned int dqi_blocks;
-        unsigned int dqi_free_blk;
-        unsigned int dqi_free_entry;
-};
-
-struct lustre_quota_info {
-        struct file *qi_files[MAXQUOTAS];
-        struct lustre_mem_dqinfo qi_info[MAXQUOTAS];
-};
-
-#define DQ_STATUS_AVAIL         0x0     /* Available dquot */
-#define DQ_STATUS_SET           0x01    /* Sombody is setting dquot */
-#define DQ_STATUS_RECOVERY      0x02    /* dquot is in recovery */
-
-struct lustre_dquot {
-        /* Hash list in memory, protect by dquot_hash_lock */
-        struct list_head dq_hash;
-        /* Protect the data in lustre_dquot */
-        struct semaphore dq_sem;
-        /* Use count */
-        int dq_refcnt;
-        /* Pointer of quota info it belongs to */
-        struct lustre_quota_info *dq_info;
-        
-        loff_t dq_off;                  /* Offset of dquot on disk */
-        unsigned int dq_id;             /* ID this applies to (uid, gid) */
-        int dq_type;                    /* Type fo quota (USRQUOTA, GRPQUOUTA) */
-        unsigned short dq_status;       /* See DQ_STATUS_ */
-        unsigned long dq_flags;         /* See DQ_ in quota.h */
-        struct mem_dqblk dq_dqb;        /* Diskquota usage */
-};
-
-struct dquot_id {
-        struct list_head        di_link;
-        __u32                   di_id;
-};
-
-#define QFILE_CHK               1
-#define QFILE_RD_INFO           2
-#define QFILE_WR_INFO           3
-#define QFILE_INIT_INFO         4
-#define QFILE_RD_DQUOT          5
-#define QFILE_WR_DQUOT          6
-
-/* admin quotafile operations */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-int lustre_check_quota_file(struct lustre_quota_info *lqi, int type);
-int lustre_read_quota_info(struct lustre_quota_info *lqi, int type);
-int lustre_write_quota_info(struct lustre_quota_info *lqi, int type);
-int lustre_read_dquot(struct lustre_dquot *dquot);
-int lustre_commit_dquot(struct lustre_dquot *dquot);
-int lustre_init_quota_info(struct lustre_quota_info *lqi, int type);
-int lustre_get_qids(struct file *file, struct inode *inode, int type, 
-                    struct list_head *list);
-#else
-
-#ifndef DQ_FAKE_B
-#define DQ_FAKE_B       6
-#endif
-
-static inline int lustre_check_quota_file(struct lustre_quota_info *lqi,
-                                          int type)
-{
-        return 0;
-}
-static inline int lustre_read_quota_info(struct lustre_quota_info *lqi,
-                                         int type)
-{
-        return 0;
-}
-static inline int lustre_write_quota_info(struct lustre_quota_info *lqi,
-                                          int type)
-{
-        return 0;
-}
-static inline int lustre_read_dquot(struct lustre_dquot *dquot)
-{
-        return 0;
-}
-static inline int lustre_commit_dquot(struct lustre_dquot *dquot)
-{
-        return 0;
-}
-static inline int lustre_init_quota_info(struct lustre_quota_info *lqi,
-                                         int type)
-{
-        return 0;
-}
-#endif  /* KERNEL_VERSION(2,5,0) */
-
-#define LL_DQUOT_OFF(sb)    DQUOT_OFF(sb)
-
-typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd,
-                                int opc);
-struct lustre_quota_ctxt {
-        struct super_block *lqc_sb;     /* superblock this applies to */
-        struct obd_import *lqc_import;  /* import used to send dqacq/dqrel RPC */
-        dqacq_handler_t lqc_handler;    /* dqacq/dqrel RPC handler, only for quota master */ 
-        unsigned long lqc_recovery:1;   /* Doing recovery */ 
-        unsigned long lqc_iunit_sz;     /* Unit size of file quota */
-        unsigned long lqc_itune_sz;     /* Trigger dqacq when available file quota less than
-                                         * this value, trigger dqrel when available file quota
-                                         * more than this value + 1 iunit */
-        unsigned long lqc_bunit_sz;     /* Unit size of block quota */
-        unsigned long lqc_btune_sz;     /* See comment of lqc_itune_sz */
-};
-
-#else
-
-struct lustre_quota_info {
-};
-
-struct lustre_quota_ctxt {
-};
-
-#endif  /* !__KERNEL__ */
-
-#else
-
-#define LL_DQUOT_OFF(sb) do {} while(0)
-
-struct lustre_quota_info {
-};
-
-struct lustre_quota_ctxt {
-};
-
-#endif /* !HAVE_QUOTA_SUPPORT */
-
-/* If the (quota limit < qunit * slave count), the slave which can't
- * acquire qunit should set it's local limit as MIN_QLIMIT */
-#define MIN_QLIMIT      1
-
-struct quotacheck_thread_args {
-        struct obd_export   *qta_exp;   /* obd export */
-        struct obd_quotactl  qta_oqctl; /* obd_quotactl args */
-        struct super_block  *qta_sb;    /* obd super block */
-        atomic_t            *qta_sem;   /* obt_quotachecking */
-};
-
-typedef struct {
-        int (*quota_init) (void);
-        int (*quota_exit) (void);
-        int (*quota_setup) (struct obd_device *, struct lustre_cfg *);
-        int (*quota_cleanup) (struct obd_device *);
-        /* For quota master, close admin quota files */
-        int (*quota_fs_cleanup) (struct obd_device *);
-        int (*quota_ctl) (struct obd_export *, struct obd_quotactl *);
-        int (*quota_check) (struct obd_export *, struct obd_quotactl *);
-        int (*quota_recovery) (struct obd_device *);
-        
-        /* For quota master/slave, adjust quota limit after fs operation */
-        int (*quota_adjust) (struct obd_device *, unsigned int[], 
-                             unsigned int[], int, int); 
-        
-        /* For quota slave, set import, trigger quota recovery */
-        int (*quota_setinfo) (struct obd_export *, struct obd_device *);
-        
-        /* For quota slave, set proper thread resoure capability */
-        int (*quota_enforce) (struct obd_device *, unsigned int);
-        
-        /* For quota slave, check whether specified uid/gid is over quota */
-        int (*quota_getflag) (struct obd_device *, struct obdo *);
-        
-        /* For quota slave, acquire/release quota from master if needed */
-        int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int);
-        
-        /* For quota client, poll if the quota check done */
-        int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *);
-        
-        /* For quota client, check whether specified uid/gid is over quota */
-        int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int);
-        
-        /* For quota client, set over quota flag for specifed uid/gid */
-        int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int,
-                            obd_flag, obd_flag);
-} quota_interface_t;
-
-#define Q_COPY(out, in, member) (out)->member = (in)->member
-
-#define QUOTA_OP(interface, op) interface->quota_ ## op         
-
-#define QUOTA_CHECK_OP(interface, op)                           \
-do {                                                            \
-        if (!interface)                                         \
-                RETURN(0);                                      \
-        if (!QUOTA_OP(interface, op)) {                         \
-                CERROR("no quota operation: " #op "\n");        \
-                RETURN(-EOPNOTSUPP);                            \
-        }                                                       \
-} while(0)
-
-static inline int lquota_init(quota_interface_t *interface)
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, init);
-        rc = QUOTA_OP(interface, init)();
-        RETURN(rc);
-}
-
-static inline int lquota_exit(quota_interface_t *interface) 
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, exit);
-        rc = QUOTA_OP(interface, exit)();
-        RETURN(rc);
-}
-
-static inline int lquota_setup(quota_interface_t *interface,
-                               struct obd_device *obd, 
-                               struct lustre_cfg *lcfg) 
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, setup);
-        rc = QUOTA_OP(interface, setup)(obd, lcfg);
-        RETURN(rc);
-}
-
-static inline int lquota_cleanup(quota_interface_t *interface,
-                                 struct obd_device *obd) 
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, cleanup);
-        rc = QUOTA_OP(interface, cleanup)(obd);
-        RETURN(rc);
-}
-
-static inline int lquota_fs_cleanup(quota_interface_t *interface,
-                                    struct obd_device *obd)
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, fs_cleanup);
-        rc = QUOTA_OP(interface, fs_cleanup)(obd);
-        RETURN(rc);
-}
-
-static inline int lquota_recovery(quota_interface_t *interface,
-                                  struct obd_device *obd) 
-{        
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, recovery);
-        rc = QUOTA_OP(interface, recovery)(obd);
-        RETURN(rc);
-}
-
-static inline int lquota_adjust(quota_interface_t *interface,
-                                struct obd_device *obd, 
-                                unsigned int qcids[], 
-                                unsigned int qpids[], 
-                                int rc, int opc) 
-{
-        int ret;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, adjust);
-        ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc);
-        RETURN(ret);
-}
-
-static inline int lquota_chkdq(quota_interface_t *interface,
-                               struct client_obd *cli,
-                               unsigned int uid, unsigned int gid)
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, chkdq);
-        rc = QUOTA_OP(interface, chkdq)(cli, uid, gid);
-        RETURN(rc);
-}
-
-static inline int lquota_setdq(quota_interface_t *interface,
-                               struct client_obd *cli,
-                               unsigned int uid, unsigned int gid,
-                               obd_flag valid, obd_flag flags)
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, setdq);
-        rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags);
-        RETURN(rc);
-}
-
-static inline int lquota_poll_check(quota_interface_t *interface,
-                                    struct obd_export *exp,
-                                    struct if_quotacheck *qchk)
-{
-        int rc;
-        ENTRY;
-        
-        QUOTA_CHECK_OP(interface, poll_check);
-        rc = QUOTA_OP(interface, poll_check)(exp, qchk);
-        RETURN(rc);
-}
-
-       
-static inline int lquota_setinfo(quota_interface_t *interface,
-                                 struct obd_export *exp, 
-                                 struct obd_device *obd) 
-{
-        int rc;
-        ENTRY;
-
-        QUOTA_CHECK_OP(interface, setinfo);
-        rc = QUOTA_OP(interface, setinfo)(exp, obd);
-        RETURN(rc);
-}
-
-static inline int lquota_enforce(quota_interface_t *interface, 
-                                 struct obd_device *obd,
-                                 unsigned int ignore)
-{
-        int rc;
-        ENTRY;
-
-        QUOTA_CHECK_OP(interface, enforce);
-        rc = QUOTA_OP(interface, enforce)(obd, ignore);
-        RETURN(rc);
-}
-
-static inline int lquota_getflag(quota_interface_t *interface,
-                                 struct obd_device *obd, struct obdo *oa)
-{
-        int rc;
-        ENTRY;
-
-        QUOTA_CHECK_OP(interface, getflag);
-        rc = QUOTA_OP(interface, getflag)(obd, oa);
-        RETURN(rc);
-}
-        
-static inline int lquota_acquire(quota_interface_t *interface,
-                                 struct obd_device *obd, 
-                                 unsigned int uid, unsigned int gid)
-{
-        int rc;
-        ENTRY;
-
-        QUOTA_CHECK_OP(interface, acquire);
-        rc = QUOTA_OP(interface, acquire)(obd, uid, gid);
-        RETURN(rc);
-}
-
-#ifndef __KERNEL__
-extern quota_interface_t osc_quota_interface;
-extern quota_interface_t mdc_quota_interface;
-extern quota_interface_t lov_quota_interface;
-#endif
 
 #endif /* _LUSTRE_QUOTA_H */
diff --git a/lustre/include/linux/lustre_types.h b/lustre/include/linux/lustre_types.h
new file mode 100644 (file)
index 0000000..8f724c8
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef _LUSTRE_LINUX_TYPES_H
+#define _LUSTRE_LINUX_TYPES_H
+
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
+#endif
+
+#ifdef __KERNEL__
+# include <linux/types.h>
+# include <linux/fs.h>    /* to check for FMODE_EXEC, dev_t, lest we redefine */
+#else
+#ifdef __CYGWIN__
+# include <sys/types.h>
+#elif defined(_AIX)
+# include <inttypes.h>
+#else
+# include <stdint.h>
+#endif
+#endif
+
+#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
+     !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H) && \
+     !defined(_X86_64_TYPES_H))
+
+typedef unsigned short umode_t;
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+typedef __signed__ long long __s64;
+typedef unsigned long long __u64;
+#endif
+
+#endif
diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h
new file mode 100644 (file)
index 0000000..7bbcca7
--- /dev/null
@@ -0,0 +1,80 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *   This file is part of Lustre, http://www.lustre.org
+ *
+ * Lustre public user-space interface definitions.
+ */
+
+#ifndef _LINUX_LUSTRE_USER_H
+#define _LINUX_LUSTRE_USER_H
+
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
+#else
+#include <lustre/types.h>
+#endif
+
+
+#ifndef __KERNEL__
+# define NEED_QUOTA_DEFS
+# ifdef HAVE_QUOTA_SUPPORT
+#  include <sys/quota.h>
+# endif
+#else
+# include <linux/version.h>
+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21)
+#  define NEED_QUOTA_DEFS
+# endif
+# ifdef HAVE_QUOTA_SUPPORT
+#  include <linux/quota.h>
+# endif
+#endif
+
+/*
+ * asm-x86_64/processor.h on some SLES 9 distros seems to use
+ * kernel-only typedefs.  fortunately skipping it altogether is ok
+ * (for now).
+ */
+#define __ASM_X86_64_PROCESSOR_H
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#include <sys/stat.h>
+#endif
+
+#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
+    defined(__craynv)
+typedef struct stat     lstat_t;
+#define HAVE_LOV_USER_MDS_DATA
+#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__)
+typedef struct stat64   lstat_t;
+#define HAVE_LOV_USER_MDS_DATA
+#endif
+
+#ifndef LPU64
+/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */
+#if defined(__x86_64__) && defined(__KERNEL__)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPSZ  "%lu"
+# define LPSSZ "%ld"
+#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPSZ  "%u"
+# define LPSSZ "%d"
+#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%lu"
+# define LPD64 "%ld"
+# define LPX64 "%#lx"
+# define LPSZ  "%lu"
+# define LPSSZ "%ld"
+#endif
+#endif /* !LPU64 */
+
+#endif /* _LUSTRE_USER_H */
index c90a78f..816925a 100644 (file)
  * lustre VFS/process permission interface
  */
 
-#ifndef __LVFS_H__
-#define __LVFS_H__
-
-#include <libcfs/kp30.h>
-#include <linux/lustre_ucache.h>
+#ifndef __LINUX_LVFS_H__
+#define __LINUX_LVFS_H__
 
-#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
+#ifndef __LVFS_H__
+#error Do not #include this file directly. #include <lvfs.h> instead
+#endif
 
 #if defined __KERNEL__
 #include <linux/lustre_compat25.h>
@@ -36,9 +35,7 @@
 struct group_info { /* unused */ };
 #endif
 
-#ifdef LIBLUSTRE
-#include <lvfs_user_fs.h>
-#endif
+#define LLOG_LVFS
 
 /* simple.c */
 
@@ -81,14 +78,6 @@ struct lvfs_run_ctxt {
 #define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0)
 #endif
 
-/* lvfs_common.c */
-struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data);
-
-void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
-               struct lvfs_ucred *cred);
-void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
-              struct lvfs_ucred *cred);
-
 #ifdef __KERNEL__
 
 struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix);
index 7c31b31..0ea6104 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 
-#include <linux/lvfs.h>
+#include <lvfs.h>
 
 #define l_file file
 #define l_dentry dentry
index fd90061..6ca1d9e 100644 (file)
@@ -22,8 +22,8 @@
  *
  */
 
-#ifndef _LINUX_MD_OBJECT_H
-#define _LINUX_MD_OBJECT_H
+#ifndef _LUSTRE_MD_OBJECT_H
+#define _LUSTRE_MD_OBJECT_H
 
 /*
  * Sub-class of lu_object with methods common for "meta-data" objects in MDT
@@ -39,7 +39,7 @@
 /*
  * super-class definitions.
  */
-#include <linux/lu_object.h>
+#include <lu_object.h>
 
 struct md_device;
 struct md_device_operations;
index 0926854..ba0d89e 100644 (file)
@@ -2,20 +2,12 @@
  * vim:expandtab:shiftwidth=8:tabstop=8:
  */
 
-#ifndef __OBD_H
-#define __OBD_H
-
-#define IOC_OSC_TYPE         'h'
-#define IOC_OSC_MIN_NR       20
-#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
-#define IOC_OSC_MAX_NR       50
+#ifndef __LINUX_OBD_H
+#define __LINUX_OBD_H
 
-#define IOC_MDC_TYPE         'i'
-#define IOC_MDC_MIN_NR       20
-#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-/* Moved to lustre_user.h
-#define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */
-#define IOC_MDC_MAX_NR       50
+#ifndef __OBD_H
+#error Do not #include this file directly. #include <obd.h> instead
+#endif
 
 #ifdef __KERNEL__
 # include <linux/fs.h>
 # include <linux/mount.h>
 #endif
 
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_export.h>
-#include <linux/lustre_quota.h>
-
-#include <linux/lu_object.h>
-
-/* this is really local to the OSC */
-struct loi_oap_pages {
-        struct list_head        lop_pending;
-        int                     lop_num_pending;
-        struct list_head        lop_urgent;
-        struct list_head        lop_pending_group;
-};
-
-struct osc_async_rc {
-        int     ar_rc;
-        int     ar_force_sync;
-        int     ar_min_xid;
-};
-
-struct lov_oinfo {                 /* per-stripe data structure */
-        __u64 loi_id;              /* object ID on the target OST */
-        __u64 loi_gr;              /* object group on the target OST */
-        int loi_ost_idx;           /* OST stripe index in lov_tgt_desc->tgts */
-        int loi_ost_gen;           /* generation of this loi_ost_idx */
-
-        /* used by the osc to keep track of what objects to build into rpcs */
-        struct loi_oap_pages loi_read_lop;
-        struct loi_oap_pages loi_write_lop;
-        /* _cli_ is poorly named, it should be _ready_ */
-        struct list_head loi_cli_item;
-        struct list_head loi_write_item;
-        struct list_head loi_read_item;
-
-        unsigned loi_kms_valid:1;
-        __u64 loi_kms;             /* known minimum size */
-        struct ost_lvb loi_lvb;
-        struct osc_async_rc     loi_ar;
-};
-
-static inline void loi_init(struct lov_oinfo *loi)
-{
-        INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending);
-        INIT_LIST_HEAD(&loi->loi_read_lop.lop_urgent);
-        INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending_group);
-        INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending);
-        INIT_LIST_HEAD(&loi->loi_write_lop.lop_urgent);
-        INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group);
-        INIT_LIST_HEAD(&loi->loi_cli_item);
-        INIT_LIST_HEAD(&loi->loi_write_item);
-        INIT_LIST_HEAD(&loi->loi_read_item);
-}
-/*extent array item for describing the joined file extent info*/
-struct lov_extent {
-        __u64 le_start;            /* extent start */
-        __u64 le_len;              /* extent length */
-        int   le_loi_idx;          /* extent #1 loi's index in lsm loi array */
-        int   le_stripe_count;     /* extent stripe count*/
-};
-
-/*Lov array info for describing joined file array EA info*/
-struct lov_array_info {
-        struct llog_logid    lai_array_id;    /* MDS med llog object id */
-        unsigned             lai_ext_count; /* number of extent count */
-        struct lov_extent    *lai_ext_array; /* extent desc array */
-};
-
-struct lov_stripe_md {
-        spinlock_t       lsm_lock;
-        void            *lsm_lock_owner; /* debugging */
-
-        struct {
-                /* Public members. */
-                __u64 lw_object_id;        /* lov object id */
-                __u64 lw_object_gr;        /* lov object group */
-                __u64 lw_maxbytes;         /* maximum possible file size */
-                unsigned long lw_xfersize; /* optimal transfer size */
-
-                /* LOV-private members start here -- only for use in lov/. */
-                __u32 lw_magic;
-                __u32 lw_stripe_size;      /* size of the stripe */
-                __u32 lw_pattern;          /* striping pattern (RAID0, RAID1) */
-                unsigned lw_stripe_count;  /* number of objects being striped over */
-        } lsm_wire;
-
-        struct lov_array_info *lsm_array; /*Only for joined file array info*/
-        struct lov_oinfo lsm_oinfo[0];
-};
-
-#define lsm_object_id    lsm_wire.lw_object_id
-#define lsm_object_gr    lsm_wire.lw_object_gr
-#define lsm_maxbytes     lsm_wire.lw_maxbytes
-#define lsm_xfersize     lsm_wire.lw_xfersize
-#define lsm_magic        lsm_wire.lw_magic
-#define lsm_stripe_size  lsm_wire.lw_stripe_size
-#define lsm_pattern      lsm_wire.lw_pattern
-#define lsm_stripe_count lsm_wire.lw_stripe_count
-
-/* compare all relevant fields. */
-static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1,
-                                    struct lov_stripe_md *m2)
-{
-        /*
-         * ->lsm_wire contains padding, but it should be zeroed out during
-         * allocation.
-         */
-        return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire);
-}
-
-void lov_stripe_lock(struct lov_stripe_md *md);
-void lov_stripe_unlock(struct lov_stripe_md *md);
-
-struct obd_type {
-        struct list_head typ_chain;
-        struct obd_ops *typ_dt_ops;
-        struct md_ops *typ_md_ops;
-        struct proc_dir_entry *typ_procroot;
-        char *typ_name;
-        int  typ_refcnt;
-        struct lu_device_type *typ_lu;
-};
-
-struct brw_page {
-        obd_off  off;
-        struct page *pg;
-        int count;
-        obd_flag flag;
-};
-
-enum async_flags {
-        ASYNC_READY = 0x1, /* ap_make_ready will not be called before this
-                              page is added to an rpc */
-        ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */
-        ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called
-                                     to give the caller a chance to update
-                                     or cancel the size of the io */
-        ASYNC_GROUP_SYNC = 0x8,  /* ap_completion will not be called, instead
-                                    the page is accounted for in the
-                                    obd_io_group given to
-                                    obd_queue_group_io */
-};
-
-struct obd_async_page_ops {
-        int  (*ap_make_ready)(void *data, int cmd);
-        int  (*ap_refresh_count)(void *data, int cmd);
-        void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa);
-        void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
-};
-
-/* the `oig' is passed down from a caller of obd rw methods.  the callee
- * records enough state such that the caller can sleep on the oig and
- * be woken when all the callees have finished their work */
-struct obd_io_group {
-        spinlock_t      oig_lock;
-        atomic_t        oig_refcount;
-        int             oig_pending;
-        int             oig_rc;
-        struct list_head oig_occ_list;
-        wait_queue_head_t oig_waitq;
-};
-
-/* the oig callback context lets the callee of obd rw methods register
- * for callbacks from the caller. */
-struct oig_callback_context {
-        struct list_head occ_oig_item;
-        /* called when the caller has received a signal while sleeping.
-         * callees of this method are encouraged to abort their state
-         * in the oig.  This may be called multiple times. */
-        void (*occ_interrupted)(struct oig_callback_context *occ);
-        unsigned int interrupted:1;
-};
-
-/* if we find more consumers this could be generalized */
-#define OBD_HIST_MAX 32
-struct obd_histogram {
-        spinlock_t      oh_lock;
-        unsigned long   oh_buckets[OBD_HIST_MAX];
-};
-
-/* Individual type definitions */
-
-struct ost_server_data;
-
-/* hold common fields for "target" device */
-struct obd_device_target {
-        struct super_block       *obt_sb;
-        atomic_t                  obt_quotachecking;
-        struct lustre_quota_ctxt  obt_qctxt;
-};
-
-#define FILTER_GROUP_LLOG 1
-#define FILTER_GROUP_ECHO 2
-
-struct filter_ext {
-        __u64                fe_start;
-        __u64                fe_end;
-};
-
-struct filter_obd {
-        /* NB this field MUST be first */
-        struct obd_device_target fo_obt;
-        const char          *fo_fstype;
-        struct vfsmount     *fo_vfsmnt;
-        struct dentry       *fo_dentry_O;
-        struct dentry      **fo_dentry_O_groups;
-        struct dentry      **fo_dentry_O_sub;
-        spinlock_t           fo_objidlock;      /* protect fo_lastobjid */
-        spinlock_t           fo_translock;      /* protect fsd_last_transno */
-        struct file         *fo_rcvd_filp;
-        struct file         *fo_health_check_filp;
-        struct lr_server_data *fo_fsd;
-        unsigned long       *fo_last_rcvd_slots;
-        __u64                fo_mount_count;
-
-        int                  fo_destroy_in_progress;
-        struct semaphore     fo_create_lock;
-
-        struct list_head     fo_export_list;
-        int                  fo_subdir_count;
-
-        obd_size             fo_tot_dirty;      /* protected by obd_osfs_lock */
-        obd_size             fo_tot_granted;    /* all values in bytes */
-        obd_size             fo_tot_pending;
-
-        obd_size             fo_readcache_max_filesize;
-
-        struct obd_import   *fo_mdc_imp;
-        struct obd_uuid      fo_mdc_uuid;
-        struct lustre_handle fo_mdc_conn;
-        struct file        **fo_last_objid_files;
-        __u64               *fo_last_objids; /* last created objid for groups,
-                                              * protected by fo_objidlock */
-
-        struct semaphore     fo_alloc_lock;
-
-        spinlock_t fo_stats_lock;
-        int fo_r_in_flight; /* protected by fo_stats_lock */
-        int fo_w_in_flight; /* protected by fo_stats_lock */
-
-        /*
-         * per-filter pool of kiobuf's allocated by filter_common_setup() and
-         * torn down by filter_cleanup(). Contains OST_NUM_THREADS elements of
-         * which ->fo_iobuf_count were allocated.
-         *
-         * This pool contains kiobuf used by
-         * filter_{prep,commit}rw_{read,write}() and is shared by all OST
-         * threads.
-         *
-         * Locking: none, each OST thread uses only one element, determined by
-         * its "ordinal number", ->t_id.
-         */
-        struct filter_iobuf    **fo_iobuf_pool;
-        int                      fo_iobuf_count;
-
-        struct obd_histogram     fo_r_pages;
-        struct obd_histogram     fo_w_pages;
-        struct obd_histogram     fo_read_rpc_hist;
-        struct obd_histogram     fo_write_rpc_hist;
-        struct obd_histogram     fo_r_io_time;
-        struct obd_histogram     fo_w_io_time;
-        struct obd_histogram     fo_r_discont_pages;
-        struct obd_histogram     fo_w_discont_pages;
-        struct obd_histogram     fo_r_discont_blocks;
-        struct obd_histogram     fo_w_discont_blocks;
-        struct obd_histogram     fo_r_disk_iosize;
-        struct obd_histogram     fo_w_disk_iosize;
-
-        struct lustre_quota_ctxt fo_quota_ctxt;
-        spinlock_t               fo_quotacheck_lock;
-        atomic_t                 fo_quotachecking;
-};
-
-#define OSC_MAX_RIF_DEFAULT       8
-#define OSC_MAX_RIF_MAX         256
-#define OSC_MAX_DIRTY_DEFAULT  (OSC_MAX_RIF_DEFAULT * 4)
-#define OSC_MAX_DIRTY_MB_MAX   2048     /* totally arbitrary */
-
-struct mdc_rpc_lock;
-struct obd_import;
-struct client_obd {
-        struct obd_uuid          cl_target_uuid;
-        struct obd_import       *cl_import; /* ptlrpc connection state */
-        struct semaphore         cl_sem;
-        int                      cl_conn_count;
-        /* max_mds_easize is purely a performance thing so we don't have to
-         * call obd_size_diskmd() all the time. */
-        int                      cl_default_mds_easize;
-        int                      cl_max_mds_easize;
-        int                      cl_max_mds_cookiesize;
-        kdev_t                   cl_sandev;
-
-        //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */
-        void                    *cl_llcd_offset;
-
-        /* the grant values are protected by loi_list_lock below */
-        long                     cl_dirty;         /* all _dirty_ in bytes */
-        long                     cl_dirty_max;     /* allowed w/o rpc */
-        long                     cl_avail_grant;   /* bytes of credit for ost */
-        long                     cl_lost_grant;    /* lost credits (trunc) */
-        struct list_head         cl_cache_waiters; /* waiting for cache/grant */
-
-        /* keep track of objects that have lois that contain pages which
-         * have been queued for async brw.  this lock also protects the
-         * lists of osc_client_pages that hang off of the loi */
-        spinlock_t               cl_loi_list_lock;
-        struct list_head         cl_loi_ready_list;
-        struct list_head         cl_loi_write_list;
-        struct list_head         cl_loi_read_list;
-        int                      cl_r_in_flight;
-        int                      cl_w_in_flight;
-        /* just a sum of the loi/lop pending numbers to be exported by /proc */
-        int                      cl_pending_w_pages;
-        int                      cl_pending_r_pages;
-        int                      cl_max_pages_per_rpc;
-        int                      cl_max_rpcs_in_flight;
-        struct obd_histogram     cl_read_rpc_hist;
-        struct obd_histogram     cl_write_rpc_hist;
-        struct obd_histogram     cl_read_page_hist;
-        struct obd_histogram     cl_write_page_hist;
-        struct obd_histogram     cl_read_offset_hist;
-        struct obd_histogram     cl_write_offset_hist;
-
-        struct mdc_rpc_lock     *cl_rpc_lock;
-        struct mdc_rpc_lock     *cl_setattr_lock;
-        struct osc_creator       cl_oscc;
-
-        /* mgc datastruct */
-        struct semaphore         cl_mgc_sem;
-        struct vfsmount         *cl_mgc_vfsmnt;
-        struct dentry           *cl_mgc_configs_dir;
-        atomic_t                 cl_mgc_refcount;
-        struct obd_export       *cl_mgc_mgsexp;
-
-        /* Flags section */
-        unsigned int             cl_checksum:1; /* debug checksums */
-
-        /* also protected by the poorly named _loi_list_lock lock above */
-        struct osc_async_rc      cl_ar;
-
-        /* used by quotacheck */
-        int                      cl_qchk_stat; /* quotacheck stat of the peer */
-
-        struct lu_fid            cl_fid;
-        spinlock_t               cl_fid_lock;
-};
-#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
-
-#define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
-
-struct mgs_obd {
-        struct ptlrpc_service           *mgs_service;
-        struct vfsmount                 *mgs_vfsmnt;
-        struct super_block              *mgs_sb;
-        struct dentry                   *mgs_configs_dir;
-        struct dentry                   *mgs_fid_de;
-        struct list_head                 mgs_fs_db_list;
-        struct semaphore                 mgs_sem;
-};
-
-struct mds_obd {
-        /* NB this field MUST be first */
-        struct obd_device_target         mds_obt;
-        struct ptlrpc_service           *mds_service;
-        struct ptlrpc_service           *mds_setattr_service;
-        struct ptlrpc_service           *mds_readpage_service;
-        struct vfsmount                 *mds_vfsmnt;
-        struct dentry                   *mds_fid_de;
-        int                              mds_max_mdsize;
-        int                              mds_max_cookiesize;
-        struct file                     *mds_rcvd_filp;
-        spinlock_t                       mds_transno_lock;
-        __u64                            mds_last_transno;
-        __u64                            mds_mount_count;
-        __u64                            mds_io_epoch;
-        unsigned long                    mds_atime_diff;
-        struct semaphore                 mds_epoch_sem;
-        struct ll_fid                    mds_rootfid;
-        struct lr_server_data           *mds_server_data;
-        struct dentry                   *mds_pending_dir;
-        struct dentry                   *mds_logs_dir;
-        struct dentry                   *mds_objects_dir;
-        struct llog_handle              *mds_cfg_llh;
-//        struct llog_handle              *mds_catalog;
-        struct obd_device               *mds_osc_obd; /* XXX lov_obd */
-        struct obd_uuid                  mds_lov_uuid;
-        char                            *mds_profile;
-        struct obd_export               *mds_osc_exp; /* XXX lov_exp */
-        struct lov_desc                  mds_lov_desc;
-        obd_id                          *mds_lov_objids;
-        int                              mds_lov_objids_size;
-        __u32                            mds_lov_objids_in_file;
-        unsigned int                     mds_lov_objids_dirty:1;
-        int                              mds_lov_nextid_set;
-        struct file                     *mds_lov_objid_filp;
-        struct file                     *mds_health_check_filp;
-        unsigned long                   *mds_client_bitmap;
-        struct semaphore                 mds_orphan_recovery_sem;
-        struct upcall_cache             *mds_group_hash;
-
-        struct lustre_quota_info         mds_quota_info;
-        struct semaphore                 mds_qonoff_sem;
-        struct semaphore                 mds_health_sem;
-        unsigned long                    mds_lov_objids_valid:1,
-                                         mds_fl_user_xattr:1,
-                                         mds_fl_acl:1;
-};
-
-struct echo_obd {
-        struct obdo          eo_oa;
-        spinlock_t           eo_lock;
-        __u64                eo_lastino;
-        struct lustre_handle eo_nl_lock;
-        atomic_t             eo_prep;
-};
-
-struct ost_obd {
-        struct ptlrpc_service *ost_service;
-        struct ptlrpc_service *ost_create_service;
-        struct ptlrpc_service *ost_io_service;
-        struct semaphore       ost_health_sem;
-};
-
-struct echo_client_obd {
-        struct obd_export   *ec_exp;   /* the local connection to osc/lov */
-        spinlock_t           ec_lock;
-        struct list_head     ec_objects;
-        int                  ec_nstripes;
-        __u64                ec_unique;
-};
-
-struct lov_tgt_desc {
-        struct obd_uuid          uuid;
-        __u32                    ltd_gen;
-        struct obd_export       *ltd_exp;
-        unsigned int             active:1, /* is this target up for requests */
-                                 reap:1;   /* should this target be deleted */
-};
+typedef spinlock_t client_obd_lock_t;
 
-struct lov_obd {
-        struct semaphore lov_lock;
-        atomic_t refcount;
-        struct lov_desc desc;
-        struct obd_connect_data ocd;
-        int bufsize;
-        int connects;
-        int death_row;      /* Do we have tgts scheduled to be deleted?
-                               (Make this a linked list?) */
-        unsigned int lo_catalog_loaded:1;
-        struct lov_tgt_desc *tgts;
-};
-
-struct lmv_tgt_desc {
-        struct obd_uuid         uuid;
-        struct obd_export       *ltd_exp;
-        int                     active;   /* is this target up for requests */
-        int                     idx;
-};
-
-struct lmv_obd {
-        int                     refcount;
-        spinlock_t              lmv_lock;
-        struct lmv_desc         desc;
-        struct obd_uuid         cluuid;
-        struct obd_export       *exp;
-
-        int                     connected;
-        int                     max_easize;
-        int                     max_def_easize;
-        int                     max_cookiesize;
-        int                     server_timeout;
-        struct semaphore        init_sem;
-        
-        struct lmv_tgt_desc     *tgts;
-        int                     tgts_size;
-
-        struct obd_connect_data *datas;
-        int                     datas_size;
-
-        struct obd_connect_data conn_data;
-};
-
-struct niobuf_local {
-        __u64 offset;
-        __u32 len;
-        __u32 flags;
-        struct page *page;
-        struct dentry *dentry;
-        int lnb_grant_used;
-        int rc;
-};
-
-#define LUSTRE_OPC_MKDIR     (1 << 0)
-#define LUSTRE_OPC_SYMLINK   (1 << 1)
-#define LUSTRE_OPC_MKNODE    (1 << 2)
-#define LUSTRE_OPC_CREATE    (1 << 3)
-        
-struct placement_hint {
-        struct qstr *ph_pname;
-        struct qstr *ph_cname;
-        int          ph_opc;
-};
-
-/* device types (not names--FIXME) */
-/* FIXME all the references to these defines need to be updated */
-#define LUSTRE_MDS_NAME "mds"
-#define LUSTRE_MDT_NAME "mdt"
-/* new MDS layers. Prototype */
-#define LUSTRE_MDT0_NAME "mdt0"
-#define LUSTRE_CMM0_NAME "cmm0"
-#define LUSTRE_MDD0_NAME "mdd0"
-#define LUSTRE_OSD0_NAME "osd0"
-#define LUSTRE_FLD0_NAME "fld0"
-#define LUSTRE_MDC0_NAME "mdc0"
-
-#define LUSTRE_MDC_NAME "mdc"
-#define LUSTRE_LMV_NAME "lmv"
-
-/* FIXME just the names need to be changed */
-#define LUSTRE_OSS_NAME "ost" /*FIXME oss*/
-#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost*/
-#define LUSTRE_OSTSAN_NAME "sanobdfilter"
-
-#define LUSTRE_OSC_NAME "osc"
-#define LUSTRE_FILTER_NAME "filter"
-#define LUSTRE_SANOSC_NAME "sanosc"
-#define LUSTRE_SANOST_NAME "sanost"
-#define LUSTRE_MGS_NAME "mgs"
-#define LUSTRE_MGC_NAME "mgc"
-
-#define LUSTRE_MGS_OBDNAME "MGS"
-#define LUSTRE_MGC_OBDNAME "MGC"
-
-/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
-#define N_LOCAL_TEMP_PAGE 0x10000000
-
-struct obd_trans_info {
-        __u64                    oti_transno;
-        __u64                   *oti_objid;
-        /* Only used on the server side for tracking acks. */
-        struct oti_req_ack_lock {
-                struct lustre_handle lock;
-                __u32                mode;
-        }                        oti_ack_locks[4];
-        void                    *oti_handle;
-        struct llog_cookie       oti_onecookie;
-        struct llog_cookie      *oti_logcookies;
-        int                      oti_numcookies;
-
-        /* initial thread handling transaction */
-        int                      oti_thread_id;
-};
-
-static inline void oti_init(struct obd_trans_info *oti,
-                            struct ptlrpc_request *req)
+static inline void client_obd_list_lock_init(client_obd_lock_t *lock)
 {
-        if (oti == NULL)
-                return;
-        memset(oti, 0, sizeof *oti);
-
-        if (req == NULL)
-                return;
-
-        if (req->rq_repmsg && req->rq_reqmsg != 0)
-                oti->oti_transno = req->rq_repmsg->transno;
-        oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1;
+        spin_lock_init(lock);
 }
 
-static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
-{
-        if (!oti)
-                return;
-
-        if (num_cookies == 1)
-                oti->oti_logcookies = &oti->oti_onecookie;
-        else
-                OBD_ALLOC(oti->oti_logcookies,
-                          num_cookies * sizeof(oti->oti_onecookie));
-
-        oti->oti_numcookies = num_cookies;
-}
+static inline void client_obd_list_lock_done(client_obd_lock_t *lock)
+{}
 
-static inline void oti_free_cookies(struct obd_trans_info *oti)
+static inline void client_obd_list_lock(client_obd_lock_t *lock)
 {
-        if (!oti || !oti->oti_logcookies)
-                return;
-
-        if (oti->oti_logcookies == &oti->oti_onecookie)
-                LASSERT(oti->oti_numcookies == 1);
-        else
-                OBD_FREE(oti->oti_logcookies,
-                         oti->oti_numcookies * sizeof(oti->oti_onecookie));
-        oti->oti_logcookies = NULL;
-        oti->oti_numcookies = 0;
+        spin_lock(lock);
 }
 
-/* llog contexts */
-enum llog_ctxt_id {
-        LLOG_CONFIG_ORIG_CTXT  =  0,
-        LLOG_CONFIG_REPL_CTXT  =  1,
-        LLOG_MDS_OST_ORIG_CTXT =  2,
-        LLOG_MDS_OST_REPL_CTXT =  3,
-        LLOG_SIZE_ORIG_CTXT    =  4,
-        LLOG_SIZE_REPL_CTXT    =  5,
-        LLOG_MD_ORIG_CTXT      =  6,
-        LLOG_MD_REPL_CTXT      =  7,
-        LLOG_RD1_ORIG_CTXT     =  8,
-        LLOG_RD1_REPL_CTXT     =  9,
-        LLOG_TEST_ORIG_CTXT    = 10,
-        LLOG_TEST_REPL_CTXT    = 11,
-        LLOG_LOVEA_ORIG_CTXT  = 12,
-        LLOG_LOVEA_REPL_CTXT  = 13,
-        LLOG_MAX_CTXTS
-};
-
-/*
- * Events signalled through obd_notify() upcall-chain.
- */
-enum obd_notify_event {
-        /* Device activated */
-        OBD_NOTIFY_ACTIVE,
-        /* Device deactivated */
-        OBD_NOTIFY_INACTIVE,
-        /* Connect data for import were changed */
-        OBD_NOTIFY_OCD,
-        /* Sync request */
-        OBD_NOTIFY_SYNC_NONBLOCK,
-        OBD_NOTIFY_SYNC
-};
-
-/*
- * Data structure used to pass obd_notify()-event to non-obd listeners (llite
- * and liblustre being main examples).
- */
-struct obd_notify_upcall {
-        int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
-                          enum obd_notify_event ev, void *owner);
-        /* Opaque datum supplied by upper layer listener */
-        void *onu_owner;
-};
-
-
-/* corresponds to one of the obd's */
-struct obd_device {
-        struct obd_type        *obd_type;
-        /* common and UUID name of this device */
-        char                   *obd_name;
-        struct obd_uuid         obd_uuid;
-
-        struct lu_device       *obd_lu_dev;
-
-        int                     obd_minor;
-        unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1,
-                obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1,
-                obd_no_recov:1, obd_stopping:1, obd_starting:1,
-                obd_force:1, obd_fail:1, obd_async_recov:1;
-        atomic_t obd_refcount;
-        wait_queue_head_t obd_refcount_waitq;
-        struct proc_dir_entry  *obd_proc_entry;
-        struct list_head        obd_exports;
-        int                     obd_num_exports;
-        struct ldlm_namespace  *obd_namespace;
-        struct ptlrpc_client    obd_ldlm_client; /* XXX OST/MDS only */
-        /* a spinlock is OK for what we do now, may need a semaphore later */
-        spinlock_t              obd_dev_lock;
-        __u64                   obd_last_committed;
-        struct fsfilt_operations *obd_fsops;
-        spinlock_t              obd_osfs_lock;
-        struct obd_statfs       obd_osfs;       /* locked by obd_osfs_lock */
-        unsigned long           obd_osfs_age;   /* jiffies */
-        struct lvfs_run_ctxt    obd_lvfs_ctxt;
-        struct llog_ctxt        *obd_llog_ctxt[LLOG_MAX_CTXTS];
-        struct obd_device       *obd_observer;
-        struct obd_notify_upcall obd_upcall;
-        struct obd_export       *obd_self_export;
-        /* list of exports in LRU order, for ping evictor, with obd_dev_lock */
-        struct list_head        obd_exports_timed;
-        time_t                  obd_eviction_timer; /* for ping evictor */
-
-        /* XXX encapsulate all this recovery data into one struct */
-        svc_handler_t                    obd_recovery_handler;
-        int                              obd_max_recoverable_clients;
-        int                              obd_connected_clients;
-        int                              obd_recoverable_clients;
-        spinlock_t                       obd_processing_task_lock;
-        pid_t                            obd_processing_task;
-        __u64                            obd_next_recovery_transno;
-        int                              obd_replayed_requests;
-        int                              obd_requests_queued_for_recovery;
-        wait_queue_head_t                obd_next_transno_waitq;
-        struct list_head                 obd_uncommitted_replies;
-        spinlock_t                       obd_uncommitted_replies_lock;
-        struct timer_list                obd_recovery_timer;
-        struct list_head                 obd_recovery_queue;
-        struct list_head                 obd_delayed_reply_queue;
-        time_t                           obd_recovery_start;
-        time_t                           obd_recovery_end;
-
-        union {
-                struct obd_device_target obt;
-                struct filter_obd filter;
-                struct mds_obd mds;
-                struct client_obd cli;
-                struct ost_obd ost;
-                struct echo_client_obd echo_client;
-                struct echo_obd echo;
-                struct lov_obd lov;
-                struct lmv_obd lmv;
-                struct mgs_obd mgs;
-        } u;
-
-        /* Fields used by LProcFS */
-        unsigned int           obd_cntr_base;
-        struct lprocfs_stats  *obd_stats;
-
-        unsigned int           md_cntr_base;
-        struct lprocfs_stats  *md_stats;
-
-        struct proc_dir_entry *obd_svc_procroot;
-        struct lprocfs_stats  *obd_svc_stats;
-};
-
-#define OBD_OPT_FORCE           0x0001
-#define OBD_OPT_FAILOVER        0x0002
-
-#define OBD_LLOG_FL_SENDNOW     0x0001
-
-
-enum obd_cleanup_stage {
-/* Special case hack for MDS LOVs */
-        OBD_CLEANUP_EARLY,
-/* Precleanup stage 1, we must make sure all exports (other than the
-   self-export) get destroyed. */
-        OBD_CLEANUP_EXPORTS,
-/* Precleanup stage 2,  do other type-specific cleanup requiring the
-   self-export. */
-        OBD_CLEANUP_SELF_EXP,
-/* FIXME we should eliminate the "precleanup" function and make them stages
-   of the "cleanup" function. */
-        OBD_CLEANUP_OBD,
-};
-
-struct obd_ops {
-        struct module *o_owner;
-        int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
-                           void *karg, void *uarg);
-        int (*o_get_info)(struct obd_export *, __u32 keylen, void *key,
-                          __u32 *vallen, void *val);
-        int (*o_set_info)(struct obd_export *, __u32 keylen, void *key,
-                          __u32 vallen, void *val);
-        int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
-        int (*o_detach)(struct obd_device *dev);
-        int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
-        int (*o_precleanup)(struct obd_device *dev,
-                            enum obd_cleanup_stage cleanup_stage);
-        int (*o_cleanup)(struct obd_device *dev);
-        int (*o_process_config)(struct obd_device *dev, obd_count len,
-                                void *data);
-        int (*o_postrecov)(struct obd_device *dev);
-        int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
-                          int priority);
-        int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
-        /* connect to the target device with given connection
-         * data. @ocd->ocd_connect_flags is modified to reflect flags actually
-         * granted by the target, which are guaranteed to be a subset of flags
-         * asked for. If @ocd == NULL, use default parameters. */
-        int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
-                         struct obd_uuid *cluuid, struct obd_connect_data *ocd);
-        int (*o_reconnect)(struct obd_export *exp, struct obd_device *src,
-                           struct obd_uuid *cluuid,
-                           struct obd_connect_data *ocd);
-        int (*o_disconnect)(struct obd_export *exp);
-
-        /* may be later these should be moved into separate fid_ops */
-        int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
-                           struct placement_hint *hint);
-        
-        int (*o_fid_delete)(struct obd_export *exp, struct lu_fid *fid);
-        
-        int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
-                        unsigned long max_age);
-        int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
-                        struct lov_stripe_md *mem_src);
-        int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt,
-                          struct lov_mds_md *disk_src, int disk_len);
-        int (*o_checkmd)(struct obd_export *exp, struct obd_export *md_exp,
-                         struct lov_stripe_md *mem_tgt);
-        int (*o_preallocate)(struct lustre_handle *, obd_count *req,
-                             obd_id *ids);
-        int (*o_create)(struct obd_export *exp,  struct obdo *oa,
-                        struct lov_stripe_md **ea, struct obd_trans_info *oti);
-        int (*o_destroy)(struct obd_export *exp, struct obdo *oa,
-                         struct lov_stripe_md *ea, struct obd_trans_info *oti,
-                         struct obd_export *md_exp);
-        int (*o_setattr)(struct obd_export *exp, struct obdo *oa,
-                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
-        int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa,
-                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
-        int (*o_getattr)(struct obd_export *exp, struct obdo *oa,
-                         struct lov_stripe_md *ea);
-        int (*o_getattr_async)(struct obd_export *exp, struct obdo *oa,
-                               struct lov_stripe_md *ea,
-                               struct ptlrpc_request_set *set);
-        int (*o_brw)(int rw, struct obd_export *exp, struct obdo *oa,
-                     struct lov_stripe_md *ea, obd_count oa_bufs,
-                     struct brw_page *pgarr, struct obd_trans_info *oti);
-        int (*o_brw_async)(int rw, struct obd_export *exp, struct obdo *oa,
-                           struct lov_stripe_md *ea, obd_count oa_bufs,
-                           struct brw_page *pgarr, struct ptlrpc_request_set *,
-                           struct obd_trans_info *oti);
-        int (*o_prep_async_page)(struct obd_export *exp,
-                                 struct lov_stripe_md *lsm,
-                                 struct lov_oinfo *loi,
-                                 struct page *page, obd_off offset,
-                                 struct obd_async_page_ops *ops, void *data,
-                                 void **res);
-        int (*o_queue_async_io)(struct obd_export *exp,
-                                struct lov_stripe_md *lsm,
-                                struct lov_oinfo *loi, void *cookie,
-                                int cmd, obd_off off, int count,
-                                obd_flag brw_flags, obd_flag async_flags);
-        int (*o_queue_group_io)(struct obd_export *exp,
-                                struct lov_stripe_md *lsm,
-                                struct lov_oinfo *loi,
-                                struct obd_io_group *oig,
-                                void *cookie, int cmd, obd_off off, int count,
-                                obd_flag brw_flags, obd_flag async_flags);
-        int (*o_trigger_group_io)(struct obd_export *exp,
-                                  struct lov_stripe_md *lsm,
-                                  struct lov_oinfo *loi,
-                                  struct obd_io_group *oig);
-        int (*o_set_async_flags)(struct obd_export *exp,
-                                struct lov_stripe_md *lsm,
-                                struct lov_oinfo *loi, void *cookie,
-                                obd_flag async_flags);
-        int (*o_teardown_async_page)(struct obd_export *exp,
-                                     struct lov_stripe_md *lsm,
-                                     struct lov_oinfo *loi, void *cookie);
-        int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm,
-                           struct ost_lvb *lvb, int kms_only);
-        int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
-                            obd_off size, int shrink);
-        int (*o_punch)(struct obd_export *exp, struct obdo *oa,
-                       struct lov_stripe_md *ea, obd_size start,
-                       obd_size end, struct obd_trans_info *oti);
-        int (*o_sync)(struct obd_export *exp, struct obdo *oa,
-                      struct lov_stripe_md *ea, obd_size start, obd_size end);
-        int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
-                         struct lov_stripe_md *src, obd_size start,
-                         obd_size end, struct obd_trans_info *oti);
-        int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
-                      struct lustre_handle *srconn, struct lov_stripe_md *src,
-                      obd_size start, obd_size end, struct obd_trans_info *);
-        int (*o_iterate)(struct lustre_handle *conn,
-                         int (*)(obd_id, obd_gr, void *),
-                         obd_id *startid, obd_gr group, void *data);
-        int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
-                        int objcount, struct obd_ioobj *obj,
-                        int niocount, struct niobuf_remote *remote,
-                        struct niobuf_local *local, struct obd_trans_info *oti);
-        int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
-                          int objcount, struct obd_ioobj *obj,
-                          int niocount, struct niobuf_local *local,
-                          struct obd_trans_info *oti, int rc);
-        int (*o_enqueue)(struct obd_export *, struct lov_stripe_md *,
-                         __u32 type, ldlm_policy_data_t *, __u32 mode,
-                         int *flags, void *bl_cb, void *cp_cb, void *gl_cb,
-                         void *data, __u32 lvb_len, void *lvb_swabber,
-                         struct lustre_handle *lockh);
-        int (*o_match)(struct obd_export *, struct lov_stripe_md *, __u32 type,
-                       ldlm_policy_data_t *, __u32 mode, int *flags, void *data,
-                       struct lustre_handle *lockh);
-        int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
-                               ldlm_iterator_t it, void *data);
-        int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
-                        __u32 mode, struct lustre_handle *);
-        int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
-                               int flags, void *opaque);
-        int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *,
-                         int join);
-        int (*o_san_preprw)(int cmd, struct obd_export *exp,
-                            struct obdo *oa, int objcount,
-                            struct obd_ioobj *obj, int niocount,
-                            struct niobuf_remote *remote);
-        int (*o_init_export)(struct obd_export *exp);
-        int (*o_destroy_export)(struct obd_export *exp);
-
-        /* llog related obd_methods */
-        int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd,
-                           int count, struct llog_catid *logid);
-        int (*o_llog_finish)(struct obd_device *obd, int count);
-
-        /* metadata-only methods */
-        int (*o_pin)(struct obd_export *, struct lu_fid *fid,
-                     struct obd_client_handle *, int flag);
-        int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
-
-        int (*o_import_event)(struct obd_device *, struct obd_import *,
-                              enum obd_import_event);
-
-        int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
-                        enum obd_notify_event ev, void *data);
-
-        int (*o_health_check)(struct obd_device *);
-
-        /* quota methods */
-        int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
-        int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
-
-        /*
-         * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
-         * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
-         * Also, add a wrapper function in include/linux/obd_class.h.
-         *
-         * Also note that if you add it to the END, you also have to change
-         * the num_stats calculation.
-         *
-         */
-};
-
-struct md_ops {
-        int (*m_getstatus)(struct obd_export *, struct lu_fid *);
-        int (*m_change_cbdata)(struct obd_export *, struct lu_fid *,
-                               ldlm_iterator_t, void *);
-        int (*m_close)(struct obd_export *, struct md_op_data *,
-                       struct obd_client_handle *, struct ptlrpc_request **);
-        int (*m_create)(struct obd_export *, struct md_op_data *,
-                        const void *, int, int, __u32, __u32, __u32,
-                        __u64, struct ptlrpc_request **);
-        int (*m_done_writing)(struct obd_export *, struct md_op_data *);
-        int (*m_enqueue)(struct obd_export *, int, struct lookup_intent *,
-                         int, struct md_op_data *, struct lustre_handle *,
-                         void *, int, ldlm_completion_callback,
-                         ldlm_blocking_callback, void *, int);
-        int (*m_getattr)(struct obd_export *, struct lu_fid *,
-                         obd_valid, int, struct ptlrpc_request **);
-        int (*m_getattr_name)(struct obd_export *, struct lu_fid *,
-                              const char *, int, obd_valid,
-                              int, struct ptlrpc_request **);
-        int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
-                             void *, int, struct lookup_intent *, int,
-                             struct ptlrpc_request **,
-                             ldlm_blocking_callback, int);
-        int (*m_link)(struct obd_export *, struct md_op_data *,
-                      struct ptlrpc_request **);
-        int (*m_rename)(struct obd_export *, struct md_op_data *,
-                        const char *, int, const char *, int,
-                        struct ptlrpc_request **);
-        int (*m_setattr)(struct obd_export *, struct md_op_data *,
-                         struct iattr *, void *, int , void *, int,
-                         struct ptlrpc_request **);
-        int (*m_sync)(struct obd_export *, struct lu_fid *,
-                      struct ptlrpc_request **);
-        int (*m_readpage)(struct obd_export *, struct lu_fid *,
-                          __u64, struct page *, struct ptlrpc_request **);
-        int (*m_unlink)(struct obd_export *, struct md_op_data *,
-                        struct ptlrpc_request **);
-
-        int (*m_setxattr)(struct obd_export *, struct lu_fid *,
-                          obd_valid, const char *, const char *,
-                          int, int, int, struct ptlrpc_request **);
-
-        int (*m_getxattr)(struct obd_export *, struct lu_fid *,
-                          obd_valid, const char *, const char *,
-                          int, int, int, struct ptlrpc_request **);
-
-        int (*m_init_ea_size)(struct obd_export *, int, int, int);
-        
-        int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
-                               int, struct obd_export *, struct lustre_md *);
-        
-        int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *);
-        
-        int (*m_set_open_replay_data)(struct obd_export *,
-                                      struct obd_client_handle *,
-                                      struct ptlrpc_request *);
-        int (*m_clear_open_replay_data)(struct obd_export *,
-                                        struct obd_client_handle *);
-        int (*m_set_lock_data)(struct obd_export *, __u64 *, void *);
-        
-        int (*m_lock_match)(struct obd_export *, int, struct lu_fid *,
-                            ldlm_type_t, ldlm_policy_data_t *, ldlm_mode_t,
-                            struct lustre_handle *);
-                
-        int (*m_cancel_unused)(struct obd_export *, struct lu_fid *,
-                               int flags, void *opaque);
-
-        /*
-         * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
-         * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
-         * wrapper function in include/linux/obd_class.h.
-         */
-};
-
-struct lsm_operations {
-        void (*lsm_free)(struct lov_stripe_md *);
-        int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
-                           struct obd_export *md_exp);
-        void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *,
-                                     unsigned long *);
-        void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
-                                     unsigned long *);
-        obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int);
-        int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off);
-        int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd);
-        int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
-                               int *stripe_count);
-        int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm,
-                             struct lov_mds_md *lmm);
-};
-
-extern struct lsm_operations lsm_plain_ops;
-extern struct lsm_operations lsm_join_ops;
-static inline struct lsm_operations *lsm_op_find(int magic)
-{
-        switch(magic) {
-        case LOV_MAGIC:
-               return &lsm_plain_ops;
-        case LOV_MAGIC_JOIN:
-               return &lsm_join_ops;
-        default:
-               CERROR("Cannot recognize lsm_magic %d", magic);
-               return NULL;
-        }
-}
-
-int lvfs_check_io_health(struct obd_device *obd, struct file *file);
-
-static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
-                                         int error)
-{
-        if (error) {
-                CERROR("%s: transno "LPD64" commit error: %d\n",
-                       obd->obd_name, transno, error);
-                return;
-        }
-        CDEBUG(D_HA, "%s: transno "LPD64" committed\n",
-               obd->obd_name, transno);
-        if (transno > obd->obd_last_committed) {
-                obd->obd_last_committed = transno;
-                ptlrpc_commit_replies (obd);
-        }
-}
-
-static inline void init_obd_quota_ops(quota_interface_t *interface,
-                                      struct obd_ops *obd_ops)
+static inline void client_obd_list_unlock(client_obd_lock_t *lock)
 {
-        if (!interface)
-                return;
-
-        LASSERT(obd_ops);
-        obd_ops->o_quotacheck = QUOTA_OP(interface, check);
-        obd_ops->o_quotactl = QUOTA_OP(interface, ctl);
+        spin_unlock(lock);
 }
 
-/* get/set_info keys */
-#define KEY_MDS_CONN "mds_conn"
-#define KEY_NEXT_ID  "next_id"
-#define KEY_LOVDESC  "lovdesc"
-#define KEY_INIT_RECOV "initial_recov"
-#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
-
-#endif /* __OBD_H */
+#endif /* __LINUX_OBD_H */
index b614bb6..3e59fa6 100644 (file)
 #ifndef __LINUX_CLASS_OBD_H
 #define __LINUX_CLASS_OBD_H
 
+#ifndef __CLASS_OBD_H
+#error Do not #include this file directly. #include <obd_class.h> instead
+#endif
+
 #ifndef __KERNEL__
 #include <sys/types.h>
 #include <libcfs/list.h>
 #include <asm/uaccess.h>
 #include <linux/types.h>
 #include <linux/fs.h>
-#include <linux/dcache.h>
 #include <linux/time.h>
 #include <linux/timer.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_import.h>
-#include <linux/lustre_net.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lprocfs_status.h>
-
-/* OBD Device Declarations */
-#define MAX_OBD_DEVICES 520
-extern struct obd_device obd_dev[MAX_OBD_DEVICES];
-extern spinlock_t obd_dev_lock;
-
-/* OBD Operations Declarations */
-extern struct obd_device *class_conn2obd(struct lustre_handle *);
-extern struct obd_device *class_exp2obd(struct obd_export *);
-
-struct lu_device_type;
-
-/* genops.c */
-struct obd_export *class_conn2export(struct lustre_handle *);
-int class_register_type(struct obd_ops *, struct md_ops *,
-                        struct lprocfs_vars *, const char *nm,
-                        struct lu_device_type *ldt);
-int class_unregister_type(const char *nm);
-
-struct obd_device *class_newdev(struct obd_type *type, char *name);
-void class_release_dev(struct obd_device *obd);
-
-int class_name2dev(const char *name);
-struct obd_device *class_name2obd(const char *name);
-int class_uuid2dev(struct obd_uuid *uuid);
-struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
-void class_obd_list(void);
-struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
-                                          const char * typ_name,
-                                          struct obd_uuid *grp_uuid);
-struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid,
-                                             struct obd_uuid *grp_uuid);
-struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
-                                           int *next);
-
-int oig_init(struct obd_io_group **oig);
-void oig_add_one(struct obd_io_group *oig,
-                  struct oig_callback_context *occ);
-void oig_complete_one(struct obd_io_group *oig,
-                      struct oig_callback_context *occ, int rc);
-void oig_release(struct obd_io_group *oig);
-int oig_wait(struct obd_io_group *oig);
-
-char *obd_export_nid2str(struct obd_export *exp);
-
-int obd_export_evict_by_nid(struct obd_device *obd, const char *nid);
-int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid);
-
-/* obd_config.c */
-int class_process_config(struct lustre_cfg *lcfg);
-int class_attach(struct lustre_cfg *lcfg);
-int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
-int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg);
-int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg);
-struct obd_device *class_incref(struct obd_device *obd);
-void class_decref(struct obd_device *obd);
-
-#define CFG_F_START     0x01   /* Set when we start updating from a log */
-#define CFG_F_MARKER    0x02   /* We are within a maker */
-#define CFG_F_SKIP      0x04   /* We should ignore this cfg command */
-#define CFG_F_COMPAT146 0x08   /* Translation to new obd names required */
-#define CFG_F_EXCLUDE   0x10   /* OST exclusion list */
-
-
-/* Passed as data param to class_config_parse_llog */
-struct config_llog_instance {
-        char *              cfg_instance;
-        struct super_block *cfg_sb;
-        struct obd_uuid     cfg_uuid;
-        int                 cfg_last_idx; /* for partial llog processing */
-        int                 cfg_flags;
-};
-int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
-                            struct config_llog_instance *cfg);
-int class_config_dump_llog(struct llog_ctxt *ctxt, char *name,
-                           struct config_llog_instance *cfg);
-
-/* list of active configuration logs  */
-struct config_llog_data {
-        char               *cld_logname;
-        struct ldlm_res_id  cld_resid;
-        struct config_llog_instance cld_cfg;
-        struct list_head    cld_list_chain;
-        atomic_t            cld_refcount;
-        unsigned int        cld_stopping:1;
-};
-
-struct lustre_profile {
-        struct list_head lp_list;
-        char * lp_profile;
-        char * lp_osc;
-        char * lp_mdc;
-};
-
-struct lustre_profile *class_get_profile(const char * prof);
-void class_del_profile(const char *prof);
-
-/* genops.c */
-#define class_export_get(exp)                                                  \
-({                                                                             \
-        struct obd_export *exp_ = exp;                                         \
-        atomic_inc(&exp_->exp_refcount);                                       \
-        CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp_,          \
-               atomic_read(&exp_->exp_refcount));                              \
-        exp_;                                                                  \
-})
-
-#define class_export_put(exp)                                                  \
-do {                                                                           \
-        LASSERT((exp) != NULL);                                                \
-        CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", (exp),         \
-               atomic_read(&(exp)->exp_refcount) - 1);                         \
-        LASSERT(atomic_read(&(exp)->exp_refcount) > 0);                        \
-        LASSERT(atomic_read(&(exp)->exp_refcount) < 0x5a5a5a);                 \
-        __class_export_put(exp);                                               \
-} while (0)
-void __class_export_put(struct obd_export *);
-struct obd_export *class_new_export(struct obd_device *obddev,
-                                    struct obd_uuid *cluuid);
-void class_unlink_export(struct obd_export *exp);
-
-struct obd_import *class_import_get(struct obd_import *);
-void class_import_put(struct obd_import *);
-struct obd_import *class_new_import(struct obd_device *obd);
-void class_destroy_import(struct obd_import *exp);
-
-struct obd_type *class_search_type(const char *name);
-struct obd_type *class_get_type(const char *name);
-void class_put_type(struct obd_type *type);
-int class_connect(struct lustre_handle *conn, struct obd_device *obd,
-                  struct obd_uuid *cluuid);
-int class_disconnect(struct obd_export *exp);
-void class_fail_export(struct obd_export *exp);
-void class_disconnect_exports(struct obd_device *obddev);
-void class_disconnect_stale_exports(struct obd_device *obddev);
-int class_manual_cleanup(struct obd_device *obd);
-
 /* obdo.c */
 #ifdef __KERNEL__
 void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid);
@@ -188,1526 +47,10 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid);
 void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid);
 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid);
 #endif
-void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
-int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
-void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
-
-
-#define OBT(dev)        (dev)->obd_type
-#define OBP(dev, op)    (dev)->obd_type->typ_dt_ops->o_ ## op
-#define MDP(dev, op)    (dev)->obd_type->typ_md_ops->m_ ## op
-#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
-
-/* Ensure obd_setup: used for cleanup which must be called
-   while obd is stopping */
-#define OBD_CHECK_DEV(obd)                                      \
-do {                                                            \
-        if (!(obd)) {                                           \
-                CERROR("NULL device\n");                        \
-                RETURN(-ENODEV);                                \
-        }                                                       \
-} while (0)
-
-/* ensure obd_setup and !obd_stopping */
-#define OBD_CHECK_DEV_ACTIVE(obd)                               \
-do {                                                            \
-        OBD_CHECK_DEV(obd);                                     \
-        if (!(obd)->obd_set_up || (obd)->obd_stopping) {        \
-                CERROR("Device %d not setup\n",                 \
-                       (obd)->obd_minor);                       \
-                RETURN(-ENODEV);                                \
-        }                                                       \
-} while (0)
-
-
-#ifdef LPROCFS
-#define OBD_COUNTER_OFFSET(op)                                  \
-        ((offsetof(struct obd_ops, o_ ## op) -                  \
-          offsetof(struct obd_ops, o_iocontrol))                \
-         / sizeof(((struct obd_ops *)(0))->o_iocontrol))
-
-#define OBD_COUNTER_INCREMENT(obd, op)                          \
-        if ((obd)->obd_stats != NULL) {                         \
-                unsigned int coffset;                           \
-                coffset = (unsigned int)(obd)->obd_cntr_base +  \
-                        OBD_COUNTER_OFFSET(op);                 \
-                LASSERT(coffset < obd->obd_stats->ls_num);      \
-                lprocfs_counter_incr(obd->obd_stats, coffset);  \
-        }
-
-#define MD_COUNTER_OFFSET(op)                                  \
-        ((offsetof(struct md_ops, m_ ## op) -                  \
-          offsetof(struct md_ops, m_getstatus))                \
-         / sizeof(((struct md_ops *)(0))->m_getstatus))
-
-#define MD_COUNTER_INCREMENT(obd, op)                           \
-        if ((obd)->md_stats != NULL) {                          \
-                unsigned int coffset;                           \
-                coffset = (unsigned int)(obd)->md_cntr_base +   \
-                        MD_COUNTER_OFFSET(op);                  \
-                LASSERT(coffset < (obd)->md_stats->ls_num);     \
-                lprocfs_counter_incr((obd)->md_stats, coffset); \
-        }
-
-#else
-#define OBD_COUNTER_OFFSET(op)
-#define OBD_COUNTER_INCREMENT(obd, op)
-#define MD_COUNTER_INCREMENT(obd, op)
-#endif
-
-#define OBD_CHECK_MD_OP(obd, op, err)                           \
-do {                                                            \
-        if (!OBT(obd) || !MDP((obd), op)) {                     \
-                if (err)                                        \
-                        CERROR("md_" #op ": dev %s/%d no operation\n", \
-                               obd->obd_name, obd->obd_minor);  \
-                RETURN(err);                                    \
-        }                                                       \
-} while (0)
-
-#define EXP_CHECK_MD_OP(exp, op)                                \
-do {                                                            \
-        if ((exp) == NULL) {                                    \
-                CERROR("obd_" #op ": NULL export\n");           \
-                RETURN(-ENODEV);                                \
-        }                                                       \
-        if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) {   \
-                CERROR("obd_" #op ": cleaned up obd\n");        \
-                RETURN(-EOPNOTSUPP);                            \
-        }                                                       \
-        if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
-                CERROR("obd_" #op ": dev %s/%d no operation\n", \
-                       (exp)->exp_obd->obd_name,                \
-                      (exp)->exp_obd->obd_minor);              \
-                RETURN(-EOPNOTSUPP);                            \
-        }                                                       \
-} while (0)
-
-
-#define OBD_CHECK_DT_OP(obd, op, err)                           \
-do {                                                            \
-        if (!OBT(obd) || !OBP((obd), op)) {                     \
-                if (err)                                        \
-                        CERROR("obd_" #op ": dev %d no operation\n",    \
-                               obd->obd_minor);                 \
-                RETURN(err);                                    \
-        }                                                       \
-} while (0)
-
-#define EXP_CHECK_DT_OP(exp, op)                                \
-do {                                                            \
-        if ((exp) == NULL) {                                    \
-                CERROR("obd_" #op ": NULL export\n");           \
-                RETURN(-ENODEV);                                \
-        }                                                       \
-        if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) {   \
-                CERROR("obd_" #op ": cleaned up obd\n");        \
-                RETURN(-EOPNOTSUPP);                            \
-        }                                                       \
-        if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
-                CERROR("obd_" #op ": dev %d no operation\n",    \
-                       (exp)->exp_obd->obd_minor);              \
-                RETURN(-EOPNOTSUPP);                            \
-        }                                                       \
-} while (0)
-
-#define CTXT_CHECK_OP(ctxt, op, err)                                         \
-do {                                                            \
-        if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) {                     \
-                if (err)                                        \
-                        CERROR("lop_" #op ": dev %d no operation\n",    \
-                               ctxt->loc_obd->obd_minor);                         \
-                RETURN(err);                                    \
-        }                                                       \
-} while (0)
-
-static inline int obd_get_info(struct obd_export *exp, __u32 keylen,
-                               void *key, __u32 *vallen, void *val)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, get_info);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, get_info);
-
-        rc = OBP(exp->exp_obd, get_info)(exp, keylen, key, vallen, val);
-        RETURN(rc);
-}
-
-static inline int obd_set_info(struct obd_export *exp, obd_count keylen,
-                               void *key, obd_count vallen, void *val)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, set_info);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, set_info);
-
-        rc = OBP(exp->exp_obd, set_info)(exp, keylen, key, vallen, val);
-        RETURN(rc);
-}
-
-static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
-{
-        int rc;
-        struct lu_device_type *ldt;
-        ENTRY;
-
-        ldt = obd->obd_type->typ_lu;
-        if (ldt != NULL) {
-#ifdef __KERNEL__
-                struct lu_context ctx;
-                struct lu_device *d;
-
-                rc = lu_context_init(&ctx);
-                if (rc == 0) {
-                        lu_context_enter(&ctx);
-
-                        d = ldt->ldt_ops->ldto_device_alloc(&ctx, ldt, cfg);
-                        if (!IS_ERR(d)) {
-                                obd->obd_lu_dev = d;
-                                d->ld_obd = obd;
-                                rc = 0;
-                        } else
-                                rc = PTR_ERR(d);
-                }
-#endif
-        } else {
-                OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP);
-                OBD_COUNTER_INCREMENT(obd, setup);
-                rc = OBP(obd, setup)(obd, cfg);
-        }
-        RETURN(rc);
-}
-
-static inline int obd_precleanup(struct obd_device *obd,
-                                 enum obd_cleanup_stage cleanup_stage)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(obd, precleanup, 0);
-        OBD_COUNTER_INCREMENT(obd, precleanup);
-
-        rc = OBP(obd, precleanup)(obd, cleanup_stage);
-        RETURN(rc);
-}
-
-static inline int obd_cleanup(struct obd_device *obd)
-{
-        int rc;
-        struct lu_device *d;
-        struct lu_device_type *ldt;
-        ENTRY;
-
-        OBD_CHECK_DEV(obd);
-
-        ldt = obd->obd_type->typ_lu;
-        d = obd->obd_lu_dev;
-        if (ldt != NULL && d != NULL) {
-#ifdef __KERNEL__
-                struct lu_context ctx;
-
-                rc = lu_context_init(&ctx);
-                if (rc == 0) {
-                        lu_context_enter(&ctx);
-                        ldt->ldt_ops->ldto_device_free(&ctx, d);
-                        lu_context_exit(&ctx);
-                        lu_context_fini(&ctx);
-                        obd->obd_lu_dev = NULL;
-                        rc = 0;
-                }
-#endif
-        } else {
-                OBD_CHECK_DT_OP(obd, cleanup, 0);
-                rc = OBP(obd, cleanup)(obd);
-        }
-        OBD_COUNTER_INCREMENT(obd, cleanup);
-        RETURN(rc);
-}
-
-static inline int
-obd_process_config(struct obd_device *obd, int datalen, void *data)
-{
-        int rc;
-        struct lu_device *d;
-        struct lu_device_type *ldt;
-        ENTRY;
-
-        OBD_CHECK_DEV(obd);
-
-        ldt = obd->obd_type->typ_lu;
-        d = obd->obd_lu_dev;
-        if (ldt != NULL && d != NULL) {
-#ifdef __KERNEL__
-                struct lu_context ctx;
-
-                rc = lu_context_init(&ctx);
-                if (rc == 0) {
-                        lu_context_enter(&ctx);
-                        rc = d->ld_ops->ldo_process_config(&ctx, d, data);
-                        lu_context_exit(&ctx);
-                        lu_context_fini(&ctx);
-                }
-#endif
-        } else {
-                OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP);
-                rc = OBP(obd, process_config)(obd, datalen, data);
-        }
-        OBD_COUNTER_INCREMENT(obd, process_config);
-
-        RETURN(rc);
-}
-
-/* Pack an in-memory MD struct for storage on disk.
- * Returns +ve size of packed MD (0 for free), or -ve error.
- *
- * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
- * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
- * If @*disk_tgt == NULL, it will be allocated
- */
-static inline int obd_packmd(struct obd_export *exp,
-                             struct lov_mds_md **disk_tgt,
-                             struct lov_stripe_md *mem_src)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, packmd);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, packmd);
-
-        rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
-        RETURN(rc);
-}
-
-static inline int obd_size_diskmd(struct obd_export *exp,
-                                  struct lov_stripe_md *mem_src)
-{
-        return obd_packmd(exp, NULL, mem_src);
-}
-
-/* helper functions */
-static inline int obd_alloc_diskmd(struct obd_export *exp,
-                                   struct lov_mds_md **disk_tgt)
-{
-        LASSERT(disk_tgt);
-        LASSERT(*disk_tgt == NULL);
-        return obd_packmd(exp, disk_tgt, NULL);
-}
-
-static inline int obd_free_diskmd(struct obd_export *exp,
-                                  struct lov_mds_md **disk_tgt)
-{
-        LASSERT(disk_tgt);
-        LASSERT(*disk_tgt);
-        return obd_packmd(exp, disk_tgt, NULL);
-}
-
-/* Unpack an MD struct from disk to in-memory format.
- * Returns +ve size of unpacked MD (0 for free), or -ve error.
- *
- * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
- * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
- * If @*mem_tgt == NULL, it will be allocated
- */
-static inline int obd_unpackmd(struct obd_export *exp,
-                               struct lov_stripe_md **mem_tgt,
-                               struct lov_mds_md *disk_src,
-                               int disk_len)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, unpackmd);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd);
-
-        rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
-        RETURN(rc);
-}
-
-/* helper functions */
-static inline int obd_alloc_memmd(struct obd_export *exp,
-                                  struct lov_stripe_md **mem_tgt)
-{
-        LASSERT(mem_tgt);
-        LASSERT(*mem_tgt == NULL);
-        return obd_unpackmd(exp, mem_tgt, NULL, 0);
-}
-
-static inline int obd_free_memmd(struct obd_export *exp,
-                                 struct lov_stripe_md **mem_tgt)
-{
-        LASSERT(mem_tgt);
-        LASSERT(*mem_tgt);
-        return obd_unpackmd(exp, mem_tgt, NULL, 0);
-}
-
-static inline int obd_checkmd(struct obd_export *exp,
-                              struct obd_export *md_exp,
-                              struct lov_stripe_md *mem_tgt)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, checkmd);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, checkmd);
-
-        rc = OBP(exp->exp_obd, checkmd)(exp, md_exp, mem_tgt);
-        RETURN(rc);
-}
-
-static inline int obd_create(struct obd_export *exp, struct obdo *obdo,
-                             struct lov_stripe_md **ea,
-                             struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, create);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, create);
-
-        rc = OBP(exp->exp_obd, create)(exp, obdo, ea, oti);
-        RETURN(rc);
-}
-
-static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
-                              struct lov_stripe_md *ea,
-                              struct obd_trans_info *oti,
-                              struct obd_export *md_exp)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, destroy);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, destroy);
-
-        rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp);
-        RETURN(rc);
-}
-
-static inline int obd_getattr(struct obd_export *exp, struct obdo *obdo,
-                              struct lov_stripe_md *ea)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, getattr);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, getattr);
-
-        rc = OBP(exp->exp_obd, getattr)(exp, obdo, ea);
-        RETURN(rc);
-}
-
-static inline int obd_getattr_async(struct obd_export *exp,
-                                    struct obdo *obdo, struct lov_stripe_md *ea,
-                                    struct ptlrpc_request_set *set)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, getattr);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, getattr);
-
-        rc = OBP(exp->exp_obd, getattr_async)(exp, obdo, ea, set);
-        RETURN(rc);
-}
-
-static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo,
-                              struct lov_stripe_md *ea,
-                              struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, setattr);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, setattr);
-
-        rc = OBP(exp->exp_obd, setattr)(exp, obdo, ea, oti);
-        RETURN(rc);
-}
-
-static inline int obd_setattr_async(struct obd_export *exp,
-                                    struct obdo *obdo,
-                                    struct lov_stripe_md *ea,
-                                    struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, setattr_async);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, setattr_async);
-
-        rc = OBP(exp->exp_obd, setattr_async)(exp, obdo, ea, oti);
-        RETURN(rc);
-}
-
-static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
-                               int priority)
-{
-        struct obd_device *obd = imp->imp_obd;
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DEV_ACTIVE(obd);
-        OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(obd, add_conn);
-
-        rc = OBP(obd, add_conn)(imp, uuid, priority);
-        RETURN(rc);
-}
-
-static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
-{
-        struct obd_device *obd = imp->imp_obd;
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DEV_ACTIVE(obd);
-        OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(obd, del_conn);
-
-        rc = OBP(obd, del_conn)(imp, uuid);
-        RETURN(rc);
-}
-
-static inline int obd_connect(struct lustre_handle *conn,struct obd_device *obd,
-                              struct obd_uuid *cluuid,
-                              struct obd_connect_data *d)
-{
-        int rc;
-        __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */
-        ENTRY;
-
-        OBD_CHECK_DEV_ACTIVE(obd);
-        OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(obd, connect);
-
-        rc = OBP(obd, connect)(conn, obd, cluuid, d);
-        /* check that only subset is granted */
-        LASSERT(ergo(d != NULL,
-                     (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
-        RETURN(rc);
-}
-
-static inline int obd_reconnect(struct obd_export *exp,
-                                struct obd_device *obd,
-                                struct obd_uuid *cluuid,
-                                struct obd_connect_data *d)
-{
-        int rc;
-        __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */
-        ENTRY;
-
-        OBD_CHECK_DEV_ACTIVE(obd);
-        OBD_CHECK_DT_OP(obd, reconnect, 0);
-        OBD_COUNTER_INCREMENT(obd, reconnect);
-
-        rc = OBP(obd, reconnect)(exp, obd, cluuid, d);
-        /* check that only subset is granted */
-        LASSERT(ergo(d != NULL,
-                     (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
-        RETURN(rc);
-}
-
-static inline int obd_disconnect(struct obd_export *exp)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, disconnect);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
-
-        rc = OBP(exp->exp_obd, disconnect)(exp);
-        RETURN(rc);
-}
-
-static inline int obd_fid_alloc(struct obd_export *exp,
-                                struct lu_fid *fid,
-                                struct placement_hint *hint)
-{
-        int rc;
-        ENTRY;
-
-        if (OBP(exp->exp_obd, fid_alloc) == NULL)
-                RETURN(-ENOTSUPP);
-
-        OBD_COUNTER_INCREMENT(exp->exp_obd, fid_alloc);
-
-        rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, hint);
-        RETURN(rc);
-}
-
-static inline int obd_fid_delete(struct obd_export *exp,
-                                 struct lu_fid *fid)
-{
-        int rc;
-        ENTRY;
-
-        if (OBP(exp->exp_obd, fid_delete) == NULL)
-                RETURN(0);
-
-        OBD_COUNTER_INCREMENT(exp->exp_obd, fid_delete);
-        rc = OBP(exp->exp_obd, fid_delete)(exp, fid);
-        RETURN(rc);
-}
-
-static inline int obd_init_export(struct obd_export *exp)
-{
-        int rc = 0;
-
-        ENTRY;
-        if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
-            OBP((exp)->exp_obd, init_export))
-                rc = OBP(exp->exp_obd, init_export)(exp);
-        RETURN(rc);
-}
-
-static inline int obd_destroy_export(struct obd_export *exp)
-{
-        ENTRY;
-        if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
-            OBP((exp)->exp_obd, destroy_export))
-                OBP(exp->exp_obd, destroy_export)(exp);
-        RETURN(0);
-}
-
-static inline struct dentry *
-obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr)
-{
-        LASSERT(exp->exp_obd);
-
-        return lvfs_fid2dentry(&exp->exp_obd->obd_lvfs_ctxt, id_ino, gen, gr,
-                               exp->exp_obd);
-}
-
-static inline int
-obd_lvfs_open_llog(struct obd_export *exp, __u64 id_ino, struct dentry *dentry)
-{
-        LASSERT(exp->exp_obd);
-        CERROR("FIXME what's the story here?  This needs to be an obd fn?\n");
-#if 0
-        return lvfs_open_llog(&exp->exp_obd->obd_lvfs_ctxt, id_ino,
-                              dentry, exp->exp_obd);
-#endif
-        return 0;
-}
-
-#ifndef time_before
-#define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
-#endif
-
-/* @max_age is the oldest time in jiffies that we accept using a cached data.
- * If the cache is older than @max_age we will get a new value from the
- * target.  Use a value of "jiffies + HZ" to guarantee freshness. */
-static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                             unsigned long max_age)
-{
-        int rc = 0;
-        ENTRY;
-
-        if (obd == NULL)
-                RETURN(-EINVAL);
-
-        OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(obd, statfs);
-
-        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
-        if (time_before(obd->obd_osfs_age, max_age)) {
-                rc = OBP(obd, statfs)(obd, osfs, max_age);
-                if (rc == 0) {
-                        spin_lock(&obd->obd_osfs_lock);
-                        memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
-                        obd->obd_osfs_age = jiffies;
-                        spin_unlock(&obd->obd_osfs_lock);
-                }
-        } else {
-                CDEBUG(D_SUPER, "using cached obd_statfs data\n");
-                spin_lock(&obd->obd_osfs_lock);
-                memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
-                spin_unlock(&obd->obd_osfs_lock);
-        }
-        RETURN(rc);
-}
-
-static inline int obd_sync(struct obd_export *exp, struct obdo *oa,
-                           struct lov_stripe_md *ea, obd_size start,
-                           obd_size end)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, sync);
-
-        rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end);
-        RETURN(rc);
-}
-
-static inline int obd_punch(struct obd_export *exp, struct obdo *oa,
-                            struct lov_stripe_md *ea, obd_size start,
-                            obd_size end, struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, punch);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, punch);
-
-        rc = OBP(exp->exp_obd, punch)(exp, oa, ea, start, end, oti);
-        RETURN(rc);
-}
-
-static inline int obd_brw(int cmd, struct obd_export *exp, struct obdo *oa,
-                          struct lov_stripe_md *ea, obd_count oa_bufs,
-                          struct brw_page *pg, struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, brw);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, brw);
-
-        if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) {
-                CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, "
-                       "or OBD_BRW_CHECK\n");
-                LBUG();
-        }
-
-        rc = OBP(exp->exp_obd, brw)(cmd, exp, oa, ea, oa_bufs, pg, oti);
-        RETURN(rc);
-}
-
-static inline int obd_brw_async(int cmd, struct obd_export *exp,
-                                struct obdo *oa, struct lov_stripe_md *ea,
-                                obd_count oa_bufs, struct brw_page *pg,
-                                struct ptlrpc_request_set *set,
-                                struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, brw_async);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async);
-
-        if (!(cmd & OBD_BRW_RWMASK)) {
-                CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n");
-                LBUG();
-        }
-
-        rc = OBP(exp->exp_obd, brw_async)(cmd, exp, oa, ea, oa_bufs, pg, set,
-                                          oti);
-        RETURN(rc);
-}
-
-static inline  int obd_prep_async_page(struct obd_export *exp,
-                                       struct lov_stripe_md *lsm,
-                                       struct lov_oinfo *loi,
-                                       struct page *page, obd_off offset,
-                                       struct obd_async_page_ops *ops,
-                                       void *data, void **res)
-{
-        int ret;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, prep_async_page, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, prep_async_page);
-
-        ret = OBP(exp->exp_obd, prep_async_page)(exp, lsm, loi, page, offset,
-                                                 ops, data, res);
-        RETURN(ret);
-}
-
-static inline int obd_queue_async_io(struct obd_export *exp,
-                                     struct lov_stripe_md *lsm,
-                                     struct lov_oinfo *loi, void *cookie,
-                                     int cmd, obd_off off, int count,
-                                     obd_flag brw_flags, obd_flag async_flags)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, queue_async_io, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, queue_async_io);
-        LASSERT(cmd & OBD_BRW_RWMASK);
-
-        rc = OBP(exp->exp_obd, queue_async_io)(exp, lsm, loi, cookie, cmd, off,
-                                               count, brw_flags, async_flags);
-        RETURN(rc);
-}
-
-static inline int obd_set_async_flags(struct obd_export *exp,
-                                      struct lov_stripe_md *lsm,
-                                      struct lov_oinfo *loi, void *cookie,
-                                      obd_flag async_flags)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, set_async_flags, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, set_async_flags);
-
-        rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie,
-                                                async_flags);
-        RETURN(rc);
-}
-
-static inline int obd_queue_group_io(struct obd_export *exp,
-                                     struct lov_stripe_md *lsm,
-                                     struct lov_oinfo *loi,
-                                     struct obd_io_group *oig,
-                                     void *cookie, int cmd, obd_off off,
-                                     int count, obd_flag brw_flags,
-                                     obd_flag async_flags)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, queue_group_io, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, queue_group_io);
-        LASSERT(cmd & OBD_BRW_RWMASK);
-
-        rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie,
-                                               cmd, off, count, brw_flags,
-                                               async_flags);
-        RETURN(rc);
-}
-
-static inline int obd_trigger_group_io(struct obd_export *exp,
-                                       struct lov_stripe_md *lsm,
-                                       struct lov_oinfo *loi,
-                                       struct obd_io_group *oig)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, trigger_group_io, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, trigger_group_io);
-
-        rc = OBP(exp->exp_obd, trigger_group_io)(exp, lsm, loi, oig);
-        RETURN(rc);
-}
-
-static inline int obd_teardown_async_page(struct obd_export *exp,
-                                          struct lov_stripe_md *lsm,
-                                          struct lov_oinfo *loi, void *cookie)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, teardown_async_page, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, teardown_async_page);
-
-        rc = OBP(exp->exp_obd, teardown_async_page)(exp, lsm, loi, cookie);
-        RETURN(rc);
-}
-
-static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
-                             int objcount, struct obd_ioobj *obj,
-                             int niocount, struct niobuf_remote *remote,
-                             struct niobuf_local *local,
-                             struct obd_trans_info *oti)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, preprw, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
-
-        rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
-                                       remote, local, oti);
-        RETURN(rc);
-}
-
-static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
-                               int objcount, struct obd_ioobj *obj,
-                               int niocount, struct niobuf_local *local,
-                               struct obd_trans_info *oti, int rc)
-{
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, commitrw, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
-
-        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount,
-                                         local, oti, rc);
-        RETURN(rc);
-}
-
-static inline int obd_merge_lvb(struct obd_export *exp,
-                                struct lov_stripe_md *lsm,
-                                struct ost_lvb *lvb, int kms_only)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, merge_lvb);
-
-        rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only);
-        RETURN(rc);
-}
-
-static inline int obd_adjust_kms(struct obd_export *exp,
-                                 struct lov_stripe_md *lsm, obd_off size,
-                                 int shrink)
-{
-        int rc;
-        ENTRY;
-
-        OBD_CHECK_DT_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms);
-
-        rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
-        RETURN(rc);
-}
-
-static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
-                                int len, void *karg, void *uarg)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, iocontrol);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol);
-
-        rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg);
-        RETURN(rc);
-}
-
-static inline int obd_enqueue(struct obd_export *exp, struct lov_stripe_md *ea,
-                              __u32 type, ldlm_policy_data_t *policy,
-                              __u32 mode, int *flags, void *bl_cb, void *cp_cb,
-                              void *gl_cb, void *data, __u32 lvb_len,
-                              void *lvb_swabber, struct lustre_handle *lockh)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, enqueue);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue);
-
-        rc = OBP(exp->exp_obd, enqueue)(exp, ea, type, policy, mode, flags,
-                                        bl_cb, cp_cb, gl_cb, data, lvb_len,
-                                        lvb_swabber, lockh);
-        RETURN(rc);
-}
-
-static inline int obd_match(struct obd_export *exp, struct lov_stripe_md *ea,
-                            __u32 type, ldlm_policy_data_t *policy, __u32 mode,
-                            int *flags, void *data, struct lustre_handle *lockh)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, match);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, match);
-
-        rc = OBP(exp->exp_obd, match)(exp, ea, type, policy, mode, flags, data,
-                                      lockh);
-        RETURN(rc);
-}
-
-static inline int obd_change_cbdata(struct obd_export *exp,
-                                    struct lov_stripe_md *lsm,
-                                    ldlm_iterator_t it, void *data)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, change_cbdata);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata);
-
-        rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data);
-        RETURN(rc);
-}
-
-static inline int obd_cancel(struct obd_export *exp,
-                             struct lov_stripe_md *ea, __u32 mode,
-                             struct lustre_handle *lockh)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, cancel);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, cancel);
-
-        rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh);
-        RETURN(rc);
-}
-
-static inline int obd_cancel_unused(struct obd_export *exp,
-                                    struct lov_stripe_md *ea,
-                                    int flags, void *opaque)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, cancel_unused);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused);
-
-        rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque);
-        RETURN(rc);
-}
-
-static inline int obd_join_lru(struct obd_export *exp,
-                               struct lov_stripe_md *ea, int join)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, join_lru);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, join_lru);
-
-        rc = OBP(exp->exp_obd, join_lru)(exp, ea, join);
-        RETURN(rc);
-}
-
-static inline int obd_san_preprw(int cmd, struct obd_export *exp,
-                                 struct obdo *oa,
-                                 int objcount, struct obd_ioobj *obj,
-                                 int niocount, struct niobuf_remote *remote)
-{
-        int rc;
-
-        EXP_CHECK_DT_OP(exp, preprw);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
-
-        rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
-                                           niocount, remote);
-        class_export_put(exp);
-        return(rc);
-}
-
-static inline int obd_pin(struct obd_export *exp, struct lu_fid *fid,
-                          struct obd_client_handle *handle, int flag)
-{
-        int rc;
-
-        EXP_CHECK_DT_OP(exp, pin);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
-
-        rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag);
-        return(rc);
-}
-
-static inline int obd_unpin(struct obd_export *exp,
-                            struct obd_client_handle *handle, int flag)
-{
-        int rc;
-
-        EXP_CHECK_DT_OP(exp, unpin);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, unpin);
-
-        rc = OBP(exp->exp_obd, unpin)(exp, handle, flag);
-        return(rc);
-}
-
-
-static inline void obd_import_event(struct obd_device *obd,
-                                    struct obd_import *imp,
-                                    enum obd_import_event event)
-{
-        if (!obd) {
-                CERROR("NULL device\n");
-                EXIT;
-                return;
-        }
-        if (obd->obd_set_up && OBP(obd, import_event)) {
-                OBD_COUNTER_INCREMENT(obd, import_event);
-                OBP(obd, import_event)(obd, imp, event);
-        }
-}
-
-static inline int obd_notify(struct obd_device *obd,
-                             struct obd_device *watched,
-                             enum obd_notify_event ev,
-                             void *data)
-{
-        OBD_CHECK_DEV(obd);
-
-        /* the check for async_recov is a complete hack - I'm hereby
-           overloading the meaning to also mean "this was called from
-           mds_postsetup".  I know that my mds is able to handle notifies
-           by this point, and it needs to get them to execute mds_postrecov. */
-        if (!obd->obd_set_up && !obd->obd_async_recov) {
-                CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
-                return -EINVAL;
-        }
-
-        if (!OBP(obd, notify)) {
-                CERROR("obd %s has no notify handler\n", obd->obd_name);
-                return -ENOSYS;
-        }
-
-        OBD_COUNTER_INCREMENT(obd, notify);
-        return OBP(obd, notify)(obd, watched, ev, data);
-}
-
-static inline int obd_notify_observer(struct obd_device *observer,
-                                      struct obd_device *observed,
-                                      enum obd_notify_event ev,
-                                      void *data)
-{
-        int rc1;
-        int rc2;
-
-        struct obd_notify_upcall *onu;
-
-        if (observer->obd_observer)
-                rc1 = obd_notify(observer->obd_observer, observed, ev, data);
-        else
-                rc1 = 0;
-        /*
-         * Also, call non-obd listener, if any
-         */
-        onu = &observer->obd_upcall;
-        if (onu->onu_upcall != NULL)
-                rc2 = onu->onu_upcall(observer, observed, ev, onu->onu_owner);
-        else
-                rc2 = 0;
-
-        return rc1 ? rc1 : rc2;
-}
-
-static inline int obd_quotacheck(struct obd_export *exp,
-                                 struct obd_quotactl *oqctl)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, quotacheck);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, quotacheck);
-
-        rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl);
-        RETURN(rc);
-}
-
-static inline int obd_quotactl(struct obd_export *exp,
-                               struct obd_quotactl *oqctl)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_DT_OP(exp, quotactl);
-        OBD_COUNTER_INCREMENT(exp->exp_obd, quotactl);
-
-        rc = OBP(exp->exp_obd, quotactl)(exp, oqctl);
-        RETURN(rc);
-}
-
-static inline int obd_health_check(struct obd_device *obd)
-{
-        /* returns: 0 on healthy
-         *         >0 on unhealthy + reason code/flag
-         *            however the only suppored reason == 1 right now
-         *            We'll need to define some better reasons
-         *            or flags in the future.
-         *         <0 on error
-         */
-        int rc;
-        ENTRY;
-
-        /* don't use EXP_CHECK_OP, because NULL method is normal here */
-        if (obd == NULL || !OBT(obd)) {
-                CERROR("cleaned up obd\n");
-                RETURN(-EOPNOTSUPP);
-        }
-        if (!obd->obd_set_up || obd->obd_stopping)
-                RETURN(0);
-        if (!OBP(obd, health_check))
-                RETURN(0);
-
-        rc = OBP(obd, health_check)(obd);
-        RETURN(rc);
-}
-
-static inline int obd_register_observer(struct obd_device *obd,
-                                        struct obd_device *observer)
-{
-        ENTRY;
-        OBD_CHECK_DEV(obd);
-        if (obd->obd_observer && observer)
-                RETURN(-EALREADY);
-        obd->obd_observer = observer;
-        RETURN(0);
-}
-
-/* metadata helpers */
-static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_MD_OP(exp, getstatus);
-        MD_COUNTER_INCREMENT(exp->exp_obd, getstatus);
-        rc = MDP(exp->exp_obd, getstatus)(exp, fid);
-        RETURN(rc);
-}
-
-static inline int md_getattr(struct obd_export *exp, struct lu_fid *fid,
-                             obd_valid valid, int ea_size,
-                             struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, getattr);
-        MD_COUNTER_INCREMENT(exp->exp_obd, getattr);
-        rc = MDP(exp->exp_obd, getattr)(exp, fid, valid,
-                                        ea_size, request);
-        RETURN(rc);
-}
-
-static inline int md_change_cbdata(struct obd_export *exp, struct lu_fid *fid,
-                                   ldlm_iterator_t it, void *data)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, change_cbdata);
-        MD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata);
-        rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data);
-        RETURN(rc);
-}
-
-static inline int md_close(struct obd_export *exp,
-                           struct md_op_data *op_data,
-                           struct obd_client_handle *och,
-                           struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, close);
-        MD_COUNTER_INCREMENT(exp->exp_obd, close);
-        rc = MDP(exp->exp_obd, close)(exp, op_data, och, request);
-        RETURN(rc);
-}
-
-static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
-                            const void *data, int datalen, int mode,
-                            __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev,
-                            struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, create);
-        MD_COUNTER_INCREMENT(exp->exp_obd, create);
-        rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode,
-                                       uid, gid, cap_effective, rdev, request);
-        RETURN(rc);
-}
-
-static inline int md_done_writing(struct obd_export *exp,
-                                  struct md_op_data *op_data)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, done_writing);
-        MD_COUNTER_INCREMENT(exp->exp_obd, done_writing);
-        rc = MDP(exp->exp_obd, done_writing)(exp, op_data);
-        RETURN(rc);
-}
-
-static inline int md_enqueue(struct obd_export *exp, int lock_type,
-                             struct lookup_intent *it, int lock_mode,
-                             struct md_op_data *op_data,
-                             struct lustre_handle *lockh,
-                             void *lmm, int lmmsize,
-                             ldlm_completion_callback cb_completion,
-                             ldlm_blocking_callback cb_blocking,
-                             void *cb_data, int extra_lock_flags)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, enqueue);
-        MD_COUNTER_INCREMENT(exp->exp_obd, enqueue);
-        rc = MDP(exp->exp_obd, enqueue)(exp, lock_type, it, lock_mode,
-                                        op_data, lockh, lmm, lmmsize,
-                                        cb_completion, cb_blocking,
-                                        cb_data, extra_lock_flags);
-        RETURN(rc);
-}
-
-static inline int md_getattr_name(struct obd_export *exp, struct lu_fid *fid,
-                                  const char *filename, int namelen,
-                                  obd_valid valid, int ea_size,
-                                  struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, getattr_name);
-        MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name);
-        rc = MDP(exp->exp_obd, getattr_name)(exp, fid, filename, namelen,
-                                             valid, ea_size, request);
-        RETURN(rc);
-}
-
-static inline int md_intent_lock(struct obd_export *exp,
-                                 struct md_op_data *op_data,
-                                 void *lmm, int lmmsize,
-                                 struct lookup_intent *it,
-                                 int flags, struct ptlrpc_request **reqp,
-                                 ldlm_blocking_callback cb_blocking,
-                                 int extra_lock_flags)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, intent_lock);
-        MD_COUNTER_INCREMENT(exp->exp_obd, intent_lock);
-        rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize,
-                                            it, flags, reqp, cb_blocking,
-                                            extra_lock_flags);
-        RETURN(rc);
-}
-
-static inline int md_link(struct obd_export *exp,
-                          struct md_op_data *op_data,
-                          struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, link);
-        MD_COUNTER_INCREMENT(exp->exp_obd, link);
-        rc = MDP(exp->exp_obd, link)(exp, op_data, request);
-        RETURN(rc);
-}
-
-static inline int md_rename(struct obd_export *exp,
-                            struct md_op_data *op_data,
-                            const char *old, int oldlen,
-                            const char *new, int newlen,
-                            struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, rename);
-        MD_COUNTER_INCREMENT(exp->exp_obd, rename);
-        rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new,
-                                       newlen, request);
-        RETURN(rc);
-}
-
-static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
-                             struct iattr *iattr, void *ea, int ealen,
-                             void *ea2, int ea2len, struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, setattr);
-        MD_COUNTER_INCREMENT(exp->exp_obd, setattr);
-        rc = MDP(exp->exp_obd, setattr)(exp, op_data, iattr, ea, ealen,
-                                        ea2, ea2len, request);
-        RETURN(rc);
-}
-
-static inline int md_sync(struct obd_export *exp, struct lu_fid *fid,
-                          struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, sync);
-        MD_COUNTER_INCREMENT(exp->exp_obd, sync);
-        rc = MDP(exp->exp_obd, sync)(exp, fid, request);
-        RETURN(rc);
-}
-
-static inline int md_readpage(struct obd_export *exp, struct lu_fid *fid,
-                              __u64 offset, struct page *page,
-                              struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, readpage);
-        MD_COUNTER_INCREMENT(exp->exp_obd, readpage);
-        rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request);
-        RETURN(rc);
-}
-
-static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data,
-                            struct ptlrpc_request **request)
-{
-        int rc;
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, unlink);
-        MD_COUNTER_INCREMENT(exp->exp_obd, unlink);
-        rc = MDP(exp->exp_obd, unlink)(exp, op_data, request);
-        RETURN(rc);
-}
-
-static inline int md_get_lustre_md(struct obd_export *exp,
-                                   struct ptlrpc_request *req,
-                                   int offset, struct obd_export *dt_exp,
-                                   struct lustre_md *md)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, get_lustre_md);
-        MD_COUNTER_INCREMENT(exp->exp_obd, get_lustre_md);
-        RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, offset,
-                                                dt_exp, md));
-}
-
-static inline int md_free_lustre_md(struct obd_export *exp,
-                                    struct lustre_md *md)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, free_lustre_md);
-        MD_COUNTER_INCREMENT(exp->exp_obd, free_lustre_md);
-        RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md));
-}
-
-static inline int md_setxattr(struct obd_export *exp, struct lu_fid *fid,
-                              obd_valid valid, const char *name,
-                              const char *input, int input_size,
-                              int output_size, int flags,
-                              struct ptlrpc_request **request)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, setxattr);
-        MD_COUNTER_INCREMENT(exp->exp_obd, setxattr);
-        RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input,
-                                           input_size, output_size, flags,
-                                           request));
-}
-
-static inline int md_getxattr(struct obd_export *exp, struct lu_fid *fid,
-                              obd_valid valid, const char *name,
-                              const char *input, int input_size,
-                              int output_size, int flags,
-                              struct ptlrpc_request **request)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, getxattr);
-        MD_COUNTER_INCREMENT(exp->exp_obd, getxattr);
-        RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input,
-                                           input_size, output_size, flags,
-                                           request));
-}
-
-static inline int md_set_open_replay_data(struct obd_export *exp,
-                                          struct obd_client_handle *och,
-                                          struct ptlrpc_request *open_req)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, set_open_replay_data);
-        MD_COUNTER_INCREMENT(exp->exp_obd, set_open_replay_data);
-        RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req));
-}
-
-static inline int md_clear_open_replay_data(struct obd_export *exp,
-                                            struct obd_client_handle *och)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, clear_open_replay_data);
-        MD_COUNTER_INCREMENT(exp->exp_obd, clear_open_replay_data);
-        RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och));
-}
-
-static inline int md_set_lock_data(struct obd_export *exp,
-                                   __u64 *lockh, void *data)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, set_lock_data);
-        MD_COUNTER_INCREMENT(exp->exp_obd, set_lock_data);
-        RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data));
-}
-
-static inline int md_cancel_unused(struct obd_export *exp,
-                                   struct lu_fid *fid,
-                                   int flags, void *opaque)
-{
-        int rc;
-        ENTRY;
-
-        EXP_CHECK_MD_OP(exp, cancel_unused);
-        MD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused);
-
-        rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, flags, opaque);
-        RETURN(rc);
-}
-
-static inline int md_lock_match(struct obd_export *exp, int flags,
-                                struct lu_fid *fid, ldlm_type_t type,
-                                ldlm_policy_data_t *policy, ldlm_mode_t mode,
-                                struct lustre_handle *lockh)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, lock_match);
-        MD_COUNTER_INCREMENT(exp->exp_obd, lock_match);
-        RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type,
-                                             policy, mode, lockh));
-}
-
-static inline int md_init_ea_size(struct obd_export *exp,
-                                  int easize, int def_asize,
-                                  int cookiesize)
-{
-        ENTRY;
-        EXP_CHECK_MD_OP(exp, init_ea_size);
-        MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size);
-        RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize,
-                                               def_asize,
-                                               cookiesize));
-}
-
-/* OBD Metadata Support */
-extern int obd_init_caches(void);
-extern void obd_cleanup_caches(void);
-
-/* support routines */
-extern kmem_cache_t *obdo_cachep;
-static inline struct obdo *obdo_alloc(void)
-{
-        struct obdo *oa;
-
-        OBD_SLAB_ALLOC(oa, obdo_cachep, SLAB_KERNEL, sizeof(*oa));
-
-        return oa;
-}
-
-static inline void obdo_free(struct obdo *oa)
-{
-        OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
-}
-
-static inline void obdo2fid(struct obdo *oa,
-                            struct lu_fid *fid)
-{
-        /* something here */
-}
-
-static inline void fid2obdo(struct lu_fid *fid,
-                            struct obdo *oa)
-{
-        /* something here */
-}
 
 #if !defined(__KERNEL__) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #define to_kdev_t(dev) dev
 #define kdev_t_to_nr(dev) dev
 #endif
 
-/* I'm as embarrassed about this as you are.
- *
- * <shaver> // XXX do not look into _superhack with remaining eye
- * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
-extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
-
-/* sysctl.c */
-extern void obd_sysctl_init (void);
-extern void obd_sysctl_clean (void);
-
-/* uuid.c  */
-typedef __u8 class_uuid_t[16];
-void class_generate_random_uuid(class_uuid_t uuid);
-void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
-
-/* lustre_peer.c    */
-int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index);
-int class_add_uuid(const char *uuid, __u64 nid);
-int class_del_uuid (const char *uuid);
-void class_init_uuidlist(void);
-void class_exit_uuidlist(void);
-
-/* mea.c */
-int mea_name2idx(struct lmv_stripe_md *mea, char *name, int namelen);
-int raw_name2idx(int hashtype, int count, const char *name, int namelen);
-
 #endif /* __LINUX_OBD_CLASS_H */
index f096703..bb188e7 100644 (file)
  *
  */
 
+#ifndef _LINUX_OBD_SUPPORT
+#define _LINUX_OBD_SUPPORT
+
 #ifndef _OBD_SUPPORT
-#define _OBD_SUPPORT
+#error Do not #include this file directly. #include <obd_support.h> instead
+#endif
 
 #ifdef __KERNEL__
 #include <linux/config.h>
 #include <libcfs/kp30.h>
 #include <linux/lustre_compat25.h>
 
-/* global variables */
-extern atomic_t obd_memory;
-extern int obd_memmax;
-extern unsigned int obd_fail_loc;
-extern unsigned int obd_dump_on_timeout;
-extern unsigned int obd_timeout;          /* seconds */
-#define PING_INTERVAL max(obd_timeout / 4, 1U)
-#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
-extern unsigned int ldlm_timeout;
-extern unsigned int obd_health_check_timeout;
-extern char obd_lustre_upcall[128];
-extern unsigned int obd_sync_filter;
-extern wait_queue_head_t obd_race_waitq;
-
-#define OBD_FAIL_MDS                     0x100
-#define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
-#define OBD_FAIL_MDS_GETATTR_NET         0x102
-#define OBD_FAIL_MDS_GETATTR_PACK        0x103
-#define OBD_FAIL_MDS_READPAGE_NET        0x104
-#define OBD_FAIL_MDS_READPAGE_PACK       0x105
-#define OBD_FAIL_MDS_SENDPAGE            0x106
-#define OBD_FAIL_MDS_REINT_NET           0x107
-#define OBD_FAIL_MDS_REINT_UNPACK        0x108
-#define OBD_FAIL_MDS_REINT_SETATTR       0x109
-#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a
-#define OBD_FAIL_MDS_REINT_CREATE        0x10b
-#define OBD_FAIL_MDS_REINT_CREATE_WRITE  0x10c
-#define OBD_FAIL_MDS_REINT_UNLINK        0x10d
-#define OBD_FAIL_MDS_REINT_UNLINK_WRITE  0x10e
-#define OBD_FAIL_MDS_REINT_LINK          0x10f
-#define OBD_FAIL_MDS_REINT_LINK_WRITE    0x110
-#define OBD_FAIL_MDS_REINT_RENAME        0x111
-#define OBD_FAIL_MDS_REINT_RENAME_WRITE  0x112
-#define OBD_FAIL_MDS_OPEN_NET            0x113
-#define OBD_FAIL_MDS_OPEN_PACK           0x114
-#define OBD_FAIL_MDS_CLOSE_NET           0x115
-#define OBD_FAIL_MDS_CLOSE_PACK          0x116
-#define OBD_FAIL_MDS_CONNECT_NET         0x117
-#define OBD_FAIL_MDS_CONNECT_PACK        0x118
-#define OBD_FAIL_MDS_REINT_NET_REP       0x119
-#define OBD_FAIL_MDS_DISCONNECT_NET      0x11a
-#define OBD_FAIL_MDS_GETSTATUS_NET       0x11b
-#define OBD_FAIL_MDS_GETSTATUS_PACK      0x11c
-#define OBD_FAIL_MDS_STATFS_PACK         0x11d
-#define OBD_FAIL_MDS_STATFS_NET          0x11e
-#define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
-#define OBD_FAIL_MDS_PIN_NET             0x120
-#define OBD_FAIL_MDS_UNPIN_NET           0x121
-#define OBD_FAIL_MDS_ALL_REPLY_NET       0x122
-#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
-#define OBD_FAIL_MDS_SYNC_NET            0x124
-#define OBD_FAIL_MDS_SYNC_PACK           0x125
-#define OBD_FAIL_MDS_DONE_WRITING_NET    0x126
-#define OBD_FAIL_MDS_DONE_WRITING_PACK   0x127
-#define OBD_FAIL_MDS_ALLOC_OBDO          0x128
-#define OBD_FAIL_MDS_PAUSE_OPEN          0x129
-#define OBD_FAIL_MDS_STATFS_LCW_SLEEP    0x12a
-#define OBD_FAIL_MDS_OPEN_CREATE         0x12b
-#define OBD_FAIL_MDS_OST_SETATTR         0x12c
-#define OBD_FAIL_MDS_QUOTACHECK_NET      0x12d
-#define OBD_FAIL_MDS_QUOTACTL_NET        0x12e
-#define OBD_FAIL_MDS_CLIENT_ADD          0x12f
-#define OBD_FAIL_MDS_GETXATTR_NET        0x130
-#define OBD_FAIL_MDS_GETXATTR_PACK       0x131
-#define OBD_FAIL_MDS_SETXATTR_NET        0x132
-#define OBD_FAIL_MDS_SETXATTR            0x133
-#define OBD_FAIL_MDS_SETXATTR_WRITE      0x134
-#define OBD_FAIL_MDS_SET_INFO_NET        0x135
-#define OBD_FAIL_MDS_SET_INFO_PACK       0x136
-#define OBD_FAIL_MDS_FLD_NET             0x137
-#define OBD_FAIL_MDS_FLD_PACK            0x138
-
-#define OBD_FAIL_OST                     0x200
-#define OBD_FAIL_OST_CONNECT_NET         0x201
-#define OBD_FAIL_OST_DISCONNECT_NET      0x202
-#define OBD_FAIL_OST_GET_INFO_NET        0x203
-#define OBD_FAIL_OST_CREATE_NET          0x204
-#define OBD_FAIL_OST_DESTROY_NET         0x205
-#define OBD_FAIL_OST_GETATTR_NET         0x206
-#define OBD_FAIL_OST_SETATTR_NET         0x207
-#define OBD_FAIL_OST_OPEN_NET            0x208
-#define OBD_FAIL_OST_CLOSE_NET           0x209
-#define OBD_FAIL_OST_BRW_NET             0x20a
-#define OBD_FAIL_OST_PUNCH_NET           0x20b
-#define OBD_FAIL_OST_STATFS_NET          0x20c
-#define OBD_FAIL_OST_HANDLE_UNPACK       0x20d
-#define OBD_FAIL_OST_BRW_WRITE_BULK      0x20e
-#define OBD_FAIL_OST_BRW_READ_BULK       0x20f
-#define OBD_FAIL_OST_SYNC_NET            0x210
-#define OBD_FAIL_OST_ALL_REPLY_NET       0x211
-#define OBD_FAIL_OST_ALL_REQUESTS_NET    0x212
-#define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
-#define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
-#define OBD_FAIL_OST_ENOSPC              0x215
-#define OBD_FAIL_OST_EROFS               0x216
-#define OBD_FAIL_OST_ENOENT              0x217
-#define OBD_FAIL_OST_QUOTACHECK_NET      0x218
-#define OBD_FAIL_OST_QUOTACTL_NET        0x219
-
-#define OBD_FAIL_LDLM                    0x300
-#define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
-#define OBD_FAIL_LDLM_ENQUEUE            0x302
-#define OBD_FAIL_LDLM_CONVERT            0x303
-#define OBD_FAIL_LDLM_CANCEL             0x304
-#define OBD_FAIL_LDLM_BL_CALLBACK        0x305
-#define OBD_FAIL_LDLM_CP_CALLBACK        0x306
-#define OBD_FAIL_LDLM_GL_CALLBACK        0x307
-#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
-#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309
-#define OBD_FAIL_LDLM_CREATE_RESOURCE    0x30a
-#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED    0x30b
-#define OBD_FAIL_LDLM_REPLY              0x30c
-#define OBD_FAIL_LDLM_RECOV_CLIENTS      0x30d
-#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
-
-#define OBD_FAIL_OSC                     0x400
-#define OBD_FAIL_OSC_BRW_READ_BULK       0x401
-#define OBD_FAIL_OSC_BRW_WRITE_BULK      0x402
-#define OBD_FAIL_OSC_LOCK_BL_AST         0x403
-#define OBD_FAIL_OSC_LOCK_CP_AST         0x404
-#define OBD_FAIL_OSC_MATCH               0x405
-#define OBD_FAIL_OSC_BRW_PREP_REQ        0x406
-#define OBD_FAIL_OSC_SHUTDOWN            0x407
-
-#define OBD_FAIL_PTLRPC                  0x500
-#define OBD_FAIL_PTLRPC_ACK              0x501
-#define OBD_FAIL_PTLRPC_RQBD             0x502
-#define OBD_FAIL_PTLRPC_BULK_GET_NET     0x503
-#define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
-#define OBD_FAIL_PTLRPC_DROP_RPC         0x505
-#define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
-
-#define OBD_FAIL_OBD_PING_NET            0x600
-#define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
-#define OBD_FAIL_OBD_LOGD_NET            0x602
-#define OBD_FAIL_OBD_QC_CALLBACK_NET     0x603
-#define OBD_FAIL_OBD_DQACQ               0x604
-
-#define OBD_FAIL_TGT_REPLY_NET           0x700
-#define OBD_FAIL_TGT_CONN_RACE           0x701
-#define OBD_FAIL_TGT_FORCE_RECONNECT     0x702
-#define OBD_FAIL_TGT_DELAY_CONNECT       0x703
-#define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
-
-#define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
-
-
-#define OBD_FAIL_MGS                     0x900
-#define OBD_FAIL_MGS_FIRST_CONNECT       0x901
-#define OBD_FAIL_MGS_CONNECT_NET         0x117
-#define OBD_FAIL_MGS_DISCONNECT_NET      0x11a
-#define OBD_FAIL_MGS_ALL_REPLY_NET       0x122
-#define OBD_FAIL_MGS_ALL_REQUEST_NET     0x123
-
-/* preparation for a more advanced failure testbed (not functional yet) */
-#define OBD_FAIL_MASK_SYS    0x0000FF00
-#define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
-#define OBD_FAIL_ONCE        0x80000000
-#define OBD_FAILED           0x40000000
-#define OBD_FAIL_MDS_ALL_NET 0x01000000
-#define OBD_FAIL_OST_ALL_NET 0x02000000
-#define OBD_FAIL_MGS_ALL_NET 0x01000000
-
-#define OBD_FAIL_CHECK(id)   (((obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
-                              ((id) & OBD_FAIL_MASK_LOC)) &&                   \
-                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \
-                                (OBD_FAILED | OBD_FAIL_ONCE)))
-
-#define OBD_FAIL_CHECK_ONCE(id)                                              \
-({      int _ret_ = 0;                                                       \
-        if (OBD_FAIL_CHECK(id)) {                                            \
-                CERROR("*** obd_fail_loc=%x ***\n", id);                     \
-                obd_fail_loc |= OBD_FAILED;                                  \
-                if ((id) & OBD_FAIL_ONCE)                                    \
-                        obd_fail_loc |= OBD_FAIL_ONCE;                       \
-                _ret_ = 1;                                                   \
-        }                                                                    \
-        _ret_;                                                               \
-})
-
-#define OBD_FAIL_RETURN(id, ret)                                             \
-do {                                                                         \
-        if (OBD_FAIL_CHECK_ONCE(id)) {                                       \
-                RETURN(ret);                                                 \
-        }                                                                    \
-} while(0)
-
-#define OBD_FAIL_TIMEOUT(id, secs)                                           \
-do {                                                                         \
-        if (OBD_FAIL_CHECK_ONCE(id)) {                                      \
-                CERROR("obd_fail_timeout id %x sleeping for %d secs\n",      \
-                       (id), (secs));                                        \
-                set_current_state(TASK_UNINTERRUPTIBLE);                     \
-                schedule_timeout((secs) * HZ);                               \
-                set_current_state(TASK_RUNNING);                             \
-                CERROR("obd_fail_timeout id %x awake\n", (id));              \
-       }                                                                     \
-} while(0)
-
 /* Prefer the kernel's version, if it exports it, because it might be
  * optimized for this CPU. */
 #if defined(__KERNEL__) && (defined(CONFIG_CRC32) || defined(CONFIG_CRC32_MODULE))
@@ -264,32 +69,9 @@ static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len)
 #endif
 
 #ifdef __KERNEL__
-/* The idea here is to synchronise two threads to force a race. The
- * first thread that calls this with a matching fail_loc is put to
- * sleep. The next thread that calls with the same fail_loc wakes up
- * the first and continues. */
-#define OBD_RACE(id)                                            \
-do {                                                            \
-        if  (OBD_FAIL_CHECK_ONCE(id)) {                         \
-                CERROR("obd_race id %x sleeping\n", (id));      \
-                interruptible_sleep_on(&obd_race_waitq);                      \
-                CERROR("obd_fail_race id %x awake\n", (id));    \
-        } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) ==        \
-                    ((id) & OBD_FAIL_MASK_LOC)) {               \
-                wake_up(&obd_race_waitq);                       \
-        }                                                       \
-} while(0)
-#else
-/* sigh.  an expedient fix until OBD_RACE is fixed up */
-#define OBD_RACE(foo) do {} while(0)
-#endif
-
-#define fixme() CDEBUG(D_OTHER, "FIXME\n");
-
-#ifdef __KERNEL__
 # include <linux/types.h>
 # include <linux/blkdev.h>
-# include <linux/lvfs.h>
+# include <lvfs.h>
 
 static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
 {
@@ -302,6 +84,10 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
                 obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE;
         }
 }
+
+#define OBD_SLEEP_ON(wq)        interruptible_sleep_on(wq)
+
+
 #else /* !__KERNEL__ */
 # define LTIME_S(time) (time)
 /* for obd_class.h */
@@ -310,155 +96,4 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb)
 # endif
 #endif  /* __KERNEL__ */
 
-extern atomic_t libcfs_kmemory;
-
-#if defined(LUSTRE_UTILS) /* this version is for utils only */
-#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
-({                                                                            \
-        typeof(ptr) __ptr;                                                    \
-        __ptr = kmalloc(size, (gfp_mask));                                    \
-        if (__ptr == NULL) {                                                  \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-        } else {                                                              \
-                memset(__ptr, 0, size);                                       \
-                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n",          \
-                       (int)(size), __ptr);                                   \
-        }                                                                     \
-        (ptr) = __ptr;                                                        \
-})
-#else /* this version is for the kernel and liblustre */
-#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
-({                                                                            \
-        typeof(ptr) __ptr;                                                    \
-        __ptr = kmalloc(size, (gfp_mask));                                    \
-        if (__ptr == NULL) {                                                  \
-                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
-        } else {                                                              \
-                memset(__ptr, 0, size);                                       \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), __ptr, atomic_read(&obd_memory));         \
-        }                                                                     \
-        (ptr) = __ptr;                                                        \
-})
-#endif
-
-#ifndef OBD_GFP_MASK
-# define OBD_GFP_MASK GFP_NOFS
-#endif
-
-#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK)
-#define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, GFP_KERNEL)
-#define OBD_ALLOC_PTR(ptr) OBD_ALLOC(ptr, sizeof *(ptr))
-#define OBD_ALLOC_PTR_WAIT(ptr) OBD_ALLOC_WAIT(ptr, sizeof *(ptr))
-
-#ifdef __arch_um__
-# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
-#else
-# define OBD_VMALLOC(ptr, size)                                               \
-do {                                                                          \
-        (ptr) = vmalloc(size);                                                \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
-        } else {                                                              \
-                memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
-        }                                                                     \
-} while (0)
-#endif
-
-#ifdef CONFIG_DEBUG_SLAB
-#define POISON(ptr, c, s) do {} while (0)
-#else
-#define POISON(ptr, c, s) memset(ptr, c, s)
-#endif
-
-#if POISON_BULK
-#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE);       \
-                                    kunmap(page); } while (0)
-#else
-#define POISON_PAGE(page, val) do { } while (0)
-#endif
-
-#ifdef __KERNEL__
-#define OBD_FREE(ptr, size)                                                   \
-do {                                                                          \
-        LASSERT(ptr);                                                         \
-        atomic_sub(size, &obd_memory);                                        \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",           \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        POISON(ptr, 0x5a, size);                                              \
-        kfree(ptr);                                                           \
-        (ptr) = (void *)0xdeadbeef;                                           \
-} while (0)
-#else
-#define OBD_FREE(ptr, size) ((void)(size), free((ptr)))
-#endif
-
-#ifdef __arch_um__
-# define OBD_VFREE(ptr, size) OBD_FREE(ptr, size)
-#else
-# define OBD_VFREE(ptr, size)                                                 \
-do {                                                                          \
-        LASSERT(ptr);                                                         \
-        atomic_sub(size, &obd_memory);                                        \
-        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",           \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        POISON(ptr, 0x5a, size);                                              \
-        vfree(ptr);                                                           \
-        (ptr) = (void *)0xdeadbeef;                                           \
-} while (0)
-#endif
-
-/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
- * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
- * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
-#define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
-do {                                                                          \
-        LASSERT(!in_interrupt());                                             \
-        (ptr) = kmem_cache_alloc(slab, (type));                               \
-        if ((ptr) == NULL) {                                                  \
-                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
-                       (int)(size), __FILE__, __LINE__);                      \
-                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
-                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
-        } else {                                                              \
-                memset(ptr, 0, size);                                         \
-                atomic_add(size, &obd_memory);                                \
-                if (atomic_read(&obd_memory) > obd_memmax)                    \
-                        obd_memmax = atomic_read(&obd_memory);                \
-                CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\
-                       (int)(size), ptr, atomic_read(&obd_memory));           \
-        }                                                                     \
-} while (0)
-
-#define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr))
-
-#define OBD_SLAB_FREE(ptr, slab, size)                                        \
-do {                                                                          \
-        LASSERT(ptr);                                                         \
-        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n",       \
-               (int)(size), ptr, atomic_read(&obd_memory));                   \
-        atomic_sub(size, &obd_memory);                                        \
-        POISON(ptr, 0x5a, size);                                              \
-        kmem_cache_free(slab, ptr);                                           \
-        (ptr) = (void *)0xdeadbeef;                                           \
-} while (0)
-
-#define KEY_IS(str) \
-        (keylen == strlen(str) && memcmp(key, str, keylen) == 0)
-
 #endif
diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h
new file mode 100644 (file)
index 0000000..c6b8005
--- /dev/null
@@ -0,0 +1,376 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *   Top level header file for LProc SNMP
+ *   Author: Hariharan Thantry thantry@users.sourceforge.net
+ */
+#ifndef _LPROCFS_SNMP_H
+#define _LPROCFS_SNMP_H
+
+#if defined(__linux__)
+#include <linux/lprocfs_status.h>
+#elif defined(__APPLE__)
+#include <darwin/lprocfs_status.h>
+#elif defined(__WINNT__)
+#include <winnt/lprocfs_status.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#undef LPROCFS
+#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS))
+# define LPROCFS
+#endif
+
+struct lprocfs_vars {
+        const char   *name;
+        cfs_read_proc_t *read_fptr;
+        cfs_write_proc_t *write_fptr;
+        void *data;
+};
+
+struct lprocfs_static_vars {
+        struct lprocfs_vars *module_vars;
+        struct lprocfs_vars *obd_vars;
+};
+
+/* An lprocfs counter can be configured using the enum bit masks below.
+ *
+ * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
+ * protects this counter from concurrent updates. If not specified,
+ * lprocfs an internal per-counter lock variable. External locks are
+ * not used to protect counter increments, but are used to protect
+ * counter readout and resets.
+ *
+ * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples,
+ * (i.e. counter can be incremented by more than "1"). When specified,
+ * the counter maintains min, max and sum in addition to a simple
+ * invocation count. This allows averages to be be computed.
+ * If not specified, the counter is an increment-by-1 counter.
+ * min, max, sum, etc. are not maintained.
+ *
+ * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of
+ * squares (for multi-valued counter samples only). This allows
+ * external computation of standard deviation, but involves a 64-bit
+ * multiply per counter increment.
+ */
+
+enum {
+        LPROCFS_CNTR_EXTERNALLOCK = 0x0001,
+        LPROCFS_CNTR_AVGMINMAX    = 0x0002,
+        LPROCFS_CNTR_STDDEV       = 0x0004,
+
+        /* counter data type */
+        LPROCFS_TYPE_REGS         = 0x0100,
+        LPROCFS_TYPE_BYTES        = 0x0200,
+        LPROCFS_TYPE_PAGES        = 0x0400,
+        LPROCFS_TYPE_CYCLE        = 0x0800,
+};
+
+struct lprocfs_atomic {
+        atomic_t               la_entry;
+        atomic_t               la_exit;
+};
+
+struct lprocfs_counter {
+        struct lprocfs_atomic  lc_cntl;  /* may need to move to per set */
+        unsigned int           lc_config;
+        __u64                  lc_count;
+        __u64                  lc_sum;
+        __u64                  lc_min;
+        __u64                  lc_max;
+        __u64                  lc_sumsquare;
+        const char            *lc_name;   /* must be static */
+        const char            *lc_units;  /* must be static */
+};
+
+struct lprocfs_percpu {
+        struct lprocfs_counter lp_cntr[0];
+};
+
+
+struct lprocfs_stats {
+        unsigned int           ls_num;     /* # of counters */
+        unsigned int           ls_percpu_size;
+        struct lprocfs_percpu *ls_percpu[0];
+};
+
+
+/* class_obd.c */
+extern cfs_proc_dir_entry_t *proc_lustre_root;
+
+struct obd_device;
+struct file;
+struct obd_histogram;
+
+#ifdef LPROCFS
+
+/* Two optimized LPROCFS counter increment functions are provided:
+ *     lprocfs_counter_incr(cntr, value) - optimized for by-one counters
+ *     lprocfs_counter_add(cntr) - use for multi-valued counters
+ * Counter data layout allows config flag, counter lock and the
+ * count itself to reside within a single cache line.
+ */
+
+static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx,
+                                       long amount)
+{
+        struct lprocfs_counter *percpu_cntr;
+
+        LASSERT(stats != NULL);
+        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
+        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
+        percpu_cntr->lc_count++;
+
+        if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) {
+                percpu_cntr->lc_sum += amount;
+                if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV)
+                        percpu_cntr->lc_sumsquare += (__u64)amount * amount;
+                if (amount < percpu_cntr->lc_min)
+                        percpu_cntr->lc_min = amount;
+                if (amount > percpu_cntr->lc_max)
+                        percpu_cntr->lc_max = amount;
+        }
+        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
+}
+
+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx)
+{
+        struct lprocfs_counter *percpu_cntr;
+
+        LASSERT(stats != NULL);
+        percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]);
+        atomic_inc(&percpu_cntr->lc_cntl.la_entry);
+        percpu_cntr->lc_count++;
+        atomic_inc(&percpu_cntr->lc_cntl.la_exit);
+}
+
+extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num);
+extern void lprocfs_free_stats(struct lprocfs_stats *stats);
+extern int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+                                   unsigned int num_private_stats);
+extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index,
+                                 unsigned conf, const char *name,
+                                 const char *units);
+extern void lprocfs_free_obd_stats(struct obd_device *obddev);
+extern int lprocfs_register_stats(cfs_proc_dir_entry_t *root, const char *name,
+                                  struct lprocfs_stats *stats);
+
+#define LPROCFS_INIT_VARS(name, vclass, vinstance)           \
+void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x)  \
+{                                                      \
+        x->module_vars = vclass;                       \
+        x->obd_vars = vinstance;                       \
+}                                                      \
+
+#define lprocfs_init_vars(NAME, VAR)     \
+do {      \
+        extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *);  \
+        lprocfs_##NAME##_init_vars(VAR);                                       \
+} while (0)
+/* lprocfs_status.c */
+extern int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
+                            struct lprocfs_vars *var,
+                            void *data);
+
+extern cfs_proc_dir_entry_t *lprocfs_register(const char *name,
+                                               cfs_proc_dir_entry_t *parent,
+                                               struct lprocfs_vars *list,
+                                               void *data);
+
+extern void lprocfs_remove(cfs_proc_dir_entry_t *root);
+
+extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
+                                           const char *name);
+
+extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
+extern int lprocfs_obd_cleanup(struct obd_device *obd);
+
+/* Generic callbacks */
+
+extern int lprocfs_rd_u64(char *page, char **start, off_t off,
+                          int count, int *eof, void *data);
+extern int lprocfs_rd_atomic(char *page, char **start, off_t off,
+                          int count, int *eof, void *data);
+extern int lprocfs_rd_uuid(char *page, char **start, off_t off,
+                           int count, int *eof, void *data);
+extern int lprocfs_rd_name(char *page, char **start, off_t off,
+                           int count, int *eof, void *data);
+extern int lprocfs_rd_fstype(char *page, char **start, off_t off,
+                             int count, int *eof, void *data);
+extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data);
+extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
+                                int count, int *eof, void *data);
+extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
+                                    int count, int *eof, void *data);
+extern int lprocfs_rd_num_exports(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data);
+extern int lprocfs_rd_numrefs(char *page, char **start, off_t off,
+                              int count, int *eof, void *data);
+extern int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+                                   unsigned long count, void *data);
+extern int lprocfs_wr_ping(struct file *file, const char *buffer,
+                           unsigned long count, void *data);
+
+/* Statfs helpers */
+extern int lprocfs_rd_blksize(char *page, char **start, off_t off,
+                              int count, int *eof, void *data);
+extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data);
+extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data);
+extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data);
+extern int lprocfs_rd_filestotal(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data);
+extern int lprocfs_rd_filesfree(char *page, char **start, off_t off,
+                                int count, int *eof, void *data);
+extern int lprocfs_rd_filegroups(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data);
+
+extern int lprocfs_write_helper(const char *buffer, unsigned long count,
+                                int *val);
+extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count,
+                                    __u64 *val);
+int lprocfs_obd_seq_create(struct obd_device *dev, char *name, mode_t mode,
+                           struct file_operations *seq_fops, void *data);
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value);
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value);
+void lprocfs_oh_clear(struct obd_histogram *oh);
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh);
+
+/* lprocfs_status.c: counter read/write functions */
+extern int lprocfs_counter_read(char *page, char **start, off_t off,
+                                int count, int *eof, void *data);
+extern int lprocfs_counter_write(struct file *file, const char *buffer,
+                                 unsigned long count, void *data);
+
+/* lprocfs_status.c: recovery status */
+int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off,
+                                   int count, int *eof, void *data);
+#else
+/* LPROCFS is not defined */
+static inline void lprocfs_counter_add(struct lprocfs_stats *stats,
+                                       int index, long amount) { return; }
+static inline void lprocfs_counter_incr(struct lprocfs_stats *stats,
+                                        int index) { return; }
+static inline void lprocfs_counter_init(struct lprocfs_stats *stats,
+                                        int index, unsigned conf,
+                                        const char *name, const char *units)
+{ return; }
+
+static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num)
+{ return NULL; }
+static inline void lprocfs_free_stats(struct lprocfs_stats *stats)
+{ return; }
+
+static inline int lprocfs_register_stats(cfs_proc_dir_entry_t *root,
+                                            const char *name,
+                                            struct lprocfs_stats *stats)
+{ return 0; }
+static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev,
+                                             unsigned int num_private_stats)
+{ return 0; }
+static inline void lprocfs_free_obd_stats(struct obd_device *obddev)
+{ return; }
+
+static inline cfs_proc_dir_entry_t *
+lprocfs_register(const char *name, cfs_proc_dir_entry_t *parent,
+                 struct lprocfs_vars *list, void *data) { return NULL; }
+#define LPROCFS_INIT_VARS(name, vclass, vinstance)
+#define lprocfs_init_vars(...) do {} while (0)
+static inline int lprocfs_add_vars(cfs_proc_dir_entry_t *root,
+                                   struct lprocfs_vars *var,
+                                   void *data) { return 0; }
+static inline void lprocfs_remove(cfs_proc_dir_entry_t *root) {};
+static inline cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *head,
+                                    const char *name) {return 0;}
+static inline int lprocfs_obd_setup(struct obd_device *dev,
+                                    struct lprocfs_vars *list) { return 0; }
+static inline int lprocfs_obd_cleanup(struct obd_device *dev)  { return 0; }
+static inline int lprocfs_rd_u64(char *page, char **start, off_t off,
+                                 int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_uuid(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_name(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data) { return 0; }
+static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off,
+                                         int count, int *eof, void *data)
+{ return 0; }
+static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off,
+                                       int count, int *eof, void *data)
+{ return 0; }
+static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off,
+                                           int count, int *eof, void *data)
+{ return 0; }
+static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off,
+                                         int count, int *eof, void *data)
+{ return 0; }
+static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off,
+                                     int count, int *eof, void *data)
+{ return 0; }
+static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer,
+                                          unsigned long count, void *data)
+{ return 0; }
+static inline int lprocfs_wr_ping(struct file *file, const char *buffer,
+                                  unsigned long count, void *data)
+{ return 0; }
+
+
+/* Statfs helpers */
+static inline
+int lprocfs_rd_blksize(char *page, char **start, off_t off,
+                       int count, int *eof, void *data) { return 0; }
+static inline
+int lprocfs_rd_kbytestotal(char *page, char **start, off_t off,
+                           int count, int *eof, void *data) { return 0; }
+static inline
+int lprocfs_rd_kbytesfree(char *page, char **start, off_t off,
+                          int count, int *eof, void *data) { return 0; }
+static inline
+int lprocfs_rd_kbytesavail(char *page, char **start, off_t off,
+                           int count, int *eof, void *data) { return 0; }
+static inline
+int lprocfs_rd_filestotal(char *page, char **start, off_t off,
+                          int count, int *eof, void *data) { return 0; }
+static inline
+int lprocfs_rd_filesfree(char *page, char **start, off_t off,
+                         int count, int *eof, void *data)  { return 0; }
+static inline
+int lprocfs_rd_filegroups(char *page, char **start, off_t off,
+                          int count, int *eof, void *data) { return 0; }
+static inline
+void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {}
+static inline
+void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {}
+static inline
+void lprocfs_oh_clear(struct obd_histogram *oh) {}
+static inline
+unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; }
+static inline
+int lprocfs_counter_read(char *page, char **start, off_t off,
+                         int count, int *eof, void *data) { return 0; }
+static inline
+int lprocfs_counter_write(struct file *file, const char *buffer,
+                          unsigned long count, void *data) { return 0; }
+#endif /* LPROCFS */
+
+#endif /* LPROCFS_SNMP_H */
diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h
new file mode 100644 (file)
index 0000000..7ae835c
--- /dev/null
@@ -0,0 +1,762 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LUSTRE_LU_OBJECT_H
+#define __LUSTRE_LU_OBJECT_H
+
+/*
+ * struct lu_fid
+ */
+#include <lustre/lustre_idl.h>
+
+#include <libcfs/list.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Layered objects support for CMD3/C5.
+ */
+
+
+struct seq_file;
+struct proc_dir_entry;
+struct lustre_cfg;
+
+/*
+ * lu_* data-types represent server-side entities shared by data and meta-data
+ * stacks.
+ *
+ * Design goals:
+ *
+ * 0. support for layering.
+ *
+ *     Server side object is split into layers, one per device in the
+ *     corresponding device stack. Individual layer is represented by struct
+ *     lu_object. Compound layered object --- by struct lu_object_header. Most
+ *     interface functions take lu_object as an argument and operate on the
+ *     whole compound object. This decision was made due to the following
+ *     reasons:
+ *
+ *        - it's envisaged that lu_object will be used much more often than
+ *        lu_object_header;
+ *
+ *        - we want lower (non-top) layers to be able to initiate operations
+ *        on the whole object.
+ *
+ *     Generic code supports layering more complex than simple stacking, e.g.,
+ *     it is possible that at some layer object "spawns" multiple sub-objects
+ *     on the lower layer.
+ *
+ * 1. fid-based identification.
+ *
+ *     Compound object is uniquely identified by its fid. Objects are indexed
+ *     by their fids (hash table is used for index).
+ *
+ * 2. caching and life-cycle management.
+ *
+ *     Object's life-time is controlled by reference counting. When reference
+ *     count drops to 0, object is returned to cache. Cached objects still
+ *     retain their identity (i.e., fid), and can be recovered from cache.
+ *
+ *     Objects are kept in the global LRU list, and lu_site_purge() function
+ *     can be used to reclaim given number of unused objects from the tail of
+ *     the LRU.
+ *
+ * 3. avoiding recursion.
+ *
+ *     Generic code tries to replace recursion through layers by iterations
+ *     where possible. Additionally to the end of reducing stack consumption,
+ *     data, when practically possible, are allocated through lu_context_key
+ *     interface rather than on stack.
+ *
+ */
+
+struct lu_site;
+struct lu_object;
+struct lu_device;
+struct lu_object_header;
+struct lu_context;
+/*
+ * Operations common for data and meta-data devices.
+ */
+struct lu_device_operations {
+        /*
+         * Object creation protocol.
+         *
+         * Due to design goal of avoiding recursion, object creation (see
+         * lu_object_alloc()) is somewhat involved:
+         *
+         *  - first, ->ldo_object_alloc() method of the top-level device
+         *  in the stack is called. It should allocate top level object
+         *  (including lu_object_header), but without any lower-layer
+         *  sub-object(s).
+         *
+         *  - then lu_object_alloc() sets fid in the header of newly created
+         *  object.
+         *
+         *  - then ->loo_object_init() (a method from struct
+         *  lu_object_operations) is called. It has to allocate lower-layer
+         *  object(s). To do this, ->loo_object_init() calls
+         *  ldo_object_alloc() of the lower-layer device(s).
+         *
+         *  - for all new objects allocated by ->loo_object_init() (and
+         *  inserted into object stack), ->loo_object_init() is called again
+         *  repeatedly, until no new objects are created.
+         *
+         */
+
+        /*
+         * Allocate object for the given device (without lower-layer
+         * parts). This is called by ->loo_object_init() from the parent
+         * layer, and should setup at least ->lo_dev and ->lo_ops fields of
+         * resulting lu_object.
+         *
+         * postcondition: ergo(!IS_ERR(result), result->lo_dev ==  d &&
+         *                                      result->lo_ops != NULL);
+         */
+        struct lu_object *(*ldo_object_alloc)(struct lu_context *ctx,
+                                              struct lu_device *d);
+        /*
+         * Dual to ->ldo_object_alloc(). Called when object is removed from
+         * memory.
+         */
+        void (*ldo_object_free)(struct lu_context *ctx, struct lu_object *o);
+
+        /*
+         * process config specific for device
+         */
+        int  (*ldo_process_config)(struct lu_context *ctx,
+                                   struct lu_device *, struct lustre_cfg *);
+};
+
+/*
+ * Operations specific for particular lu_object.
+ */
+struct lu_object_operations {
+
+        /*
+         * Allocate lower-layer parts of the object by calling
+         * ->ldo_object_alloc() of the corresponding underlying device.
+         *
+         * This method is called once for each object inserted into object
+         * stack. It's responsibility of this method to insert lower-layer
+         * object(s) it create into appropriate places of object stack.
+         */
+        int (*loo_object_init)(struct lu_context *ctx, struct lu_object *o);
+        /*
+         * Called before ->ldo_object_free() to signal that object is being
+         * destroyed. Dual to ->loo_object_init().
+         */
+        void (*loo_object_delete)(struct lu_context *ctx, struct lu_object *o);
+
+        /*
+         * Called when last active reference to the object is released (and
+         * object returns to the cache).
+         */
+        void (*loo_object_release)(struct lu_context *ctx, struct lu_object *o);
+
+        /*
+         * Return true off object @o exists on a storage.
+         */
+        int (*loo_object_exists)(struct lu_context *ctx, struct lu_object *o);
+        /*
+         * Debugging helper. Print given object.
+         */
+        int (*loo_object_print)(struct lu_context *ctx,
+                                struct seq_file *f, const struct lu_object *o);
+};
+
+/*
+ * Type of lu_device.
+ */
+struct lu_device_type;
+
+/*
+ * Device: a layer in the server side abstraction stacking.
+ */
+struct lu_device {
+        /*
+         * reference count. This is incremented, in particular, on each object
+         * created at this layer.
+         *
+         * XXX which means that atomic_t is probably too small.
+         */
+        atomic_t                     ld_ref;
+        /*
+         * Pointer to device type. Never modified once set.
+         */
+        struct lu_device_type       *ld_type;
+        /*
+         * Operation vector for this device.
+         */
+        struct lu_device_operations *ld_ops;
+        /*
+         * Stack this device belongs to.
+         */
+        struct lu_site              *ld_site;
+        struct proc_dir_entry       *ld_proc_entry;
+
+        /* XXX: temporary back pointer into obd. */
+        struct obd_device           *ld_obd;
+};
+
+struct lu_device_type_operations;
+
+/*
+ * Tag bits for device type. They are used to distinguish certain groups of
+ * device types.
+ */
+enum lu_device_tag {
+        /* this is meta-data device */
+        LU_DEVICE_MD = (1 << 0),
+        /* this is data device */
+        LU_DEVICE_DT = (1 << 1)
+};
+
+/*
+ * Type of device.
+ */
+struct lu_device_type {
+        /*
+         * Tag bits. Taken from enum lu_device_tag. Never modified once set.
+         */
+        __u32                             ldt_tags;
+        /*
+         * Name of this class. Unique system-wide. Never modified once set.
+         */
+        char                             *ldt_name;
+        /*
+         * Operations for this type.
+         */
+        struct lu_device_type_operations *ldt_ops;
+        /*
+         * XXX: temporary pointer to associated obd_type.
+         */
+        struct obd_type                  *ldt_obd_type;
+};
+
+/*
+ * Operations on a device type.
+ */
+struct lu_device_type_operations {
+        /*
+         * Allocate new device.
+         */
+        struct lu_device *(*ldto_device_alloc)(struct lu_context *ctx,
+                                               struct lu_device_type *t,
+                                               struct lustre_cfg *lcfg);
+        /*
+         * Free device. Dual to ->ldto_device_alloc().
+         */
+        void (*ldto_device_free)(struct lu_context *ctx, struct lu_device *d);
+
+        /*
+         * Initialize the devices after allocation
+         */
+        int  (*ldto_device_init)(struct lu_context *ctx,
+                                 struct lu_device *, struct lu_device *);
+        /*
+         * Finalize device. Dual to ->ldto_device_init(). Returns pointer to
+         * the next device in the stack.
+         */
+        struct lu_device *(*ldto_device_fini)(struct lu_context *ctx,
+                                              struct lu_device *);
+
+        /*
+         * Initialize device type. This is called on module load.
+         */
+        int  (*ldto_init)(struct lu_device_type *t);
+        /*
+         * Finalize device type. Dual to ->ldto_init(). Called on module
+         * unload.
+         */
+        void (*ldto_fini)(struct lu_device_type *t);
+};
+
+/*
+ * Flags for the object layers.
+ */
+enum lu_object_flags {
+        /*
+         * this flags is set if ->loo_object_init() has been called for this
+         * layer. Used by lu_object_alloc().
+         */
+        LU_OBJECT_ALLOCATED = (1 << 0)
+};
+
+/*
+ * Common object attributes.
+ */
+struct lu_attr {
+        __u64          la_size;   /* size in bytes */
+        __u64          la_mtime;  /* modification time in seconds since Epoch */
+        __u64          la_atime;  /* access time in seconds since Epoch */
+        __u64          la_ctime;  /* change time in seconds since Epoch */
+        __u64          la_blocks; /* 512-byte blocks allocated to object */
+        __u32          la_mode;   /* permission bits and file type */
+        __u32          la_uid;    /* owner id */
+        __u32          la_gid;    /* group id */
+        __u32          la_flags;  /* object flags */
+        __u32          la_nlink;  /* number of persistent references to this
+                                   * object */
+};
+
+
+/*
+ * Layer in the layered object.
+ */
+struct lu_object {
+        /*
+         * Header for this object.
+         */
+        struct lu_object_header     *lo_header;
+        /*
+         * Device for this layer.
+         */
+        struct lu_device            *lo_dev;
+        /*
+         * Operations for this object.
+         */
+        struct lu_object_operations *lo_ops;
+        /*
+         * Linkage into list of all layers.
+         */
+        struct list_head             lo_linkage;
+        /*
+         * Depth. Top level layer depth is 0.
+         */
+        int                          lo_depth;
+        /*
+         * Flags from enum lu_object_flags.
+         */
+        unsigned long                lo_flags;
+};
+
+enum lu_object_header_flags {
+        /*
+         * Don't keep this object in cache. Object will be destroyed as soon
+         * as last reference to it is released. This flag cannot be cleared
+         * once set.
+         */
+        LU_OBJECT_HEARD_BANSHEE = 0,
+};
+
+/*
+ * "Compound" object, consisting of multiple layers.
+ *
+ * Compound object with given fid is unique with given lu_site.
+ *
+ * Note, that object does *not* necessary correspond to the real object in the
+ * persistent storage: object is an anchor for locking and method calling, so
+ * it is created for things like not-yet-existing child created by mkdir or
+ * create calls. ->loo_exists() can be used to check whether object is backed
+ * by persistent storage entity.
+ */
+struct lu_object_header {
+        /*
+         * Object flags from enum lu_object_header_flags. Set and checked
+         * atomically.
+         */
+        unsigned long     loh_flags;
+        /*
+         * Object reference count. Protected by site guard lock.
+         */
+        int               loh_ref;
+        /*
+         * Fid, uniquely identifying this object.
+         */
+        struct lu_fid     loh_fid;
+        /*
+         * Linkage into per-site hash table. Protected by site guard lock.
+         */
+        struct hlist_node loh_hash;
+        /*
+         * Linkage into per-site LRU list. Protected by site guard lock.
+         */
+        struct list_head  loh_lru;
+        /*
+         * Linkage into list of layers. Never modified once set (except lately
+         * during object destruction). No locking is necessary.
+         */
+        struct list_head  loh_layers;
+};
+
+struct fld;
+/*
+ * lu_site is a "compartment" within which objects are unique, and LRU
+ * discipline is maintained.
+ *
+ * lu_site exists so that multiple layered stacks can co-exist in the same
+ * address space.
+ *
+ * lu_site has the same relation to lu_device as lu_object_header to
+ * lu_object.
+ */
+struct lu_site {
+        /*
+         * lock protecting:
+         *
+         *        - ->ls_hash hash table (and its linkages in objects);
+         *
+         *        - ->ls_lru list (and its linkages in objects);
+         *
+         *        - 0/1 transitions of object ->loh_ref reference count;
+         *
+         * yes, it's heavy.
+         */
+        spinlock_t         ls_guard;
+        /*
+         * Hash-table where objects are indexed by fid.
+         */
+        struct hlist_head *ls_hash;
+        /*
+         * Bit-mask for hash-table size.
+         */
+        int                ls_hash_mask;
+
+
+        /*
+         * LRU list, updated on each access to object. Protected by
+         * ->ls_guard.
+         *
+         * "Cold" end of LRU is ->ls_lru.next. Accessed object are moved to
+         * the ->ls_lru.prev (this is due to the non-existence of
+         * list_for_each_entry_safe_reverse()).
+         */
+        struct list_head   ls_lru;
+        /*
+         * Total number of objects in this site. Protected by ->ls_guard.
+         */
+        unsigned           ls_total;
+        /*
+         * Total number of objects in this site with reference counter greater
+         * than 0. Protected by ->ls_guard.
+         */
+        unsigned           ls_busy;
+
+        /*
+         * Top-level device for this stack.
+         */
+        struct lu_device  *ls_top_dev;
+        /* current server index */
+        __u32             ls_node_id;
+        /*
+         * Fid location database
+         */
+        struct fld        *ls_fld;
+
+        /* statistical counters. Protected by nothing, races are accepted. */
+        struct {
+                __u32 s_created;
+                __u32 s_cache_hit;
+                __u32 s_cache_miss;
+                /*
+                 * Number of hash-table entry checks made.
+                 *
+                 *       ->s_cache_check / (->s_cache_miss + ->s_cache_hit)
+                 *
+                 * is an average number of hash slots inspected during single
+                 * lookup.
+                 */
+                __u32 s_cache_check;
+                /* raced cache insertions */
+                __u32 s_cache_race;
+                __u32 s_lru_purged;
+        } ls_stats;
+};
+
+/*
+ * Constructors/destructors.
+ */
+
+/*
+ * Initialize site @s, with @d as the top level device.
+ */
+int  lu_site_init(struct lu_site *s, struct lu_device *d);
+/*
+ * Finalize @s and release its resources.
+ */
+void lu_site_fini(struct lu_site *s);
+
+/*
+ * Acquire additional reference on device @d
+ */
+void lu_device_get(struct lu_device *d);
+/*
+ * Release reference on device @d.
+ */
+void lu_device_put(struct lu_device *d);
+
+/*
+ * Initialize device @d of type @t.
+ */
+int lu_device_init(struct lu_device *d, struct lu_device_type *t);
+/*
+ * Finalize device @d.
+ */
+void lu_device_fini(struct lu_device *d);
+
+/*
+ * Initialize compound object.
+ */
+int lu_object_header_init(struct lu_object_header *h);
+/*
+ * Finalize compound object.
+ */
+void lu_object_header_fini(struct lu_object_header *h);
+
+/*
+ * Initialize object @o that is part of compound object @h and was created by
+ * device @d.
+ */
+int lu_object_init(struct lu_object *o,
+                   struct lu_object_header *h, struct lu_device *d);
+/*
+ * Finalize object and release its resources.
+ */
+void lu_object_fini(struct lu_object *o);
+/*
+ * Add object @o as first layer of compound object @h.
+ *
+ * This is typically called by the ->ldo_object_alloc() method of top-level
+ * device.
+ */
+void lu_object_add_top(struct lu_object_header *h, struct lu_object *o);
+/*
+ * Add object @o as a layer of compound object, going after @before.1
+ *
+ * This is typically called by the ->ldo_object_alloc() method of
+ * @before->lo_dev.
+ */
+void lu_object_add(struct lu_object *before, struct lu_object *o);
+
+/*
+ * Caching and reference counting.
+ */
+
+/*
+ * Acquire additional reference to the given object. This function is used to
+ * attain additional reference. To acquire initial reference use
+ * lu_object_find().
+ */
+static inline void lu_object_get(struct lu_object *o)
+{
+        LASSERT(o->lo_header->loh_ref > 0);
+        spin_lock(&o->lo_dev->ld_site->ls_guard);
+        o->lo_header->loh_ref ++;
+        spin_unlock(&o->lo_dev->ld_site->ls_guard);
+}
+
+/*
+ * Return true of object will not be cached after last reference to it is
+ * released.
+ */
+static inline int lu_object_is_dying(struct lu_object_header *h)
+{
+        return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags);
+}
+
+/*
+ * Decrease reference counter on object. If last reference is freed, return
+ * object to the cache, unless lu_object_is_dying(o) holds. In the latter
+ * case, free object immediately.
+ */
+void lu_object_put(struct lu_context *ctxt, struct lu_object *o);
+
+/*
+ * Free @nr objects from the cold end of the site LRU list.
+ */
+void lu_site_purge(struct lu_context *ctx, struct lu_site *s, int nr);
+
+/*
+ * Search cache for an object with the fid @f. If such object is found, return
+ * it. Otherwise, create new object, insert it into cache and return it. In
+ * any case, additional reference is acquired on the returned object.
+ */
+struct lu_object *lu_object_find(struct lu_context *ctxt,
+                                 struct lu_site *s, const struct lu_fid *f);
+
+/*
+ * Helpers.
+ */
+
+/*
+ * First (topmost) sub-object of given compound object
+ */
+static inline struct lu_object *lu_object_top(struct lu_object_header *h)
+{
+        LASSERT(!list_empty(&h->loh_layers));
+        return container_of0(h->loh_layers.next, struct lu_object, lo_linkage);
+}
+
+/*
+ * Next sub-object in the layering
+ */
+static inline struct lu_object *lu_object_next(const struct lu_object *o)
+{
+        return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage);
+}
+
+/*
+ * Pointer to the fid of this object.
+ */
+static inline const struct lu_fid *lu_object_fid(const struct lu_object *o)
+{
+        return &o->lo_header->loh_fid;
+}
+
+/*
+ * return device operations vector for this object
+ */
+static inline struct lu_device_operations *
+lu_object_ops(const struct lu_object *o)
+{
+        return o->lo_dev->ld_ops;
+}
+
+/*
+ * Given a compound object, find its slice, corresponding to the device type
+ * @dtype.
+ */
+struct lu_object *lu_object_locate(struct lu_object_header *h,
+                                   struct lu_device_type *dtype);
+
+/*
+ * Print human readable representation of the @o to the @f.
+ */
+int lu_object_print(struct lu_context *ctxt,
+                    struct seq_file *f, const struct lu_object *o);
+
+/*
+ * Returns true iff object @o exists on the stable storage.
+ */
+static inline int lu_object_exists(struct lu_context *ctx, struct lu_object *o)
+{
+        return o->lo_ops->loo_object_exists(ctx, o);
+}
+
+/*
+ * lu_context. Execution context for lu_object methods. Currently associated
+ * with thread.
+ *
+ * All lu_object methods, except device and device type methods (called during
+ * system initialization and shutdown) are executed "within" some
+ * lu_context. This means, that pointer to some "current" lu_context is passed
+ * as an argument to all methods.
+ *
+ * All service ptlrpc threads create lu_context as part of their
+ * initialization. It is possible to create "stand-alone" context for other
+ * execution environments (like system calls).
+ *
+ * lu_object methods mainly use lu_context through lu_context_key interface
+ * that allows each layer to associate arbitrary pieces of data with each
+ * context (see pthread_key_create(3) for similar interface).
+ *
+ */
+struct lu_context {
+        /*
+         * Theoretically we'd want to use lu_objects and lu_contexts on the
+         * client side too. On the other hand, we don't want to allocate
+         * values of server-side keys for the client contexts and vice versa.
+         *
+         * To achieve this, set of tags in introduced. Contexts and keys are
+         * marked with tags. Key value are created only for context whose set
+         * of tags has non-empty intersection with one for key. NOT YET
+         * IMPLEMENTED.
+         */
+        __u32                  lc_tags;
+        /*
+         * Pointer to the home service thread. NULL for other execution
+         * contexts.
+         */
+        struct ptlrpc_thread  *lc_thread;
+        /*
+         * Pointer to an array with key values. Internal implementation
+         * detail.
+         */
+        void                 **lc_value;
+};
+
+/*
+ * lu_context_key interface. Similar to pthread_key.
+ */
+
+
+/*
+ * Key. Represents per-context value slot.
+ */
+struct lu_context_key {
+        /*
+         * Value constructor. This is called when new value is created for a
+         * context. Returns pointer to new value of error pointer.
+         */
+        void  *(*lct_init)(struct lu_context *ctx);
+        /*
+         * Value destructor. Called when context with previously allocated
+         * value of this slot is destroyed. @data is a value that was returned
+         * by a matching call to ->lct_init().
+         */
+        void   (*lct_fini)(struct lu_context *ctx, void *data);
+        /*
+         * Internal implementation detail: index within ->lc_value[] reserved
+         * for this key.
+         */
+        int      lct_index;
+        /*
+         * Internal implementation detail: number of values created for this
+         * key.
+         */
+        unsigned lct_used;
+};
+
+/*
+ * Register new key.
+ */
+int   lu_context_key_register(struct lu_context_key *key);
+/*
+ * Deregister key.
+ */
+void  lu_context_key_degister(struct lu_context_key *key);
+/*
+ * Return value associated with key @key in context @ctx.
+ */
+void *lu_context_key_get(struct lu_context *ctx, struct lu_context_key *key);
+
+/*
+ * Initialize context data-structure. Create values for all keys.
+ */
+int  lu_context_init(struct lu_context *ctx);
+/*
+ * Finalize context data-structure. Destroy key values.
+ */
+void lu_context_fini(struct lu_context *ctx);
+
+/*
+ * Called before entering context.
+ */
+void lu_context_enter(struct lu_context *ctx);
+/*
+ * Called after exiting from @ctx
+ */
+void lu_context_exit(struct lu_context *ctx);
+
+
+#endif /* __LUSTRE_LU_OBJECT_H */
index 1580bde..0acd90f 100644 (file)
@@ -4,7 +4,7 @@
 # See the file COPYING in this distribution
 
 if UTILS
-pkginclude_HEADERS = lustre_user.h liblustreapi.h types.h
+pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h types.h
 endif
 
-EXTRA_DIST = lustre_user.h liblustreapi.h types.h
+EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h types.h
index 557c3ab..08f8786 100644 (file)
@@ -23,7 +23,7 @@ extern int llapi_ping(char *obd_type, char *obd_name);
 extern int llapi_target_check(int num_types, char **obd_types, char *dir);
 extern int llapi_catinfo(char *dir, char *keyword, char *node_name);
 extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count);
-extern int llapi_is_lustre_mnttype(char *type);
+extern int llapi_is_lustre_mnttype(struct mntent *mnt);
 extern int llapi_quotachown(char *path, int flag);
 extern int llapi_quotacheck(char *mnt, int check_type);
 extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk);
similarity index 90%
rename from lustre/include/linux/lustre_idl.h
rename to lustre/include/lustre/lustre_idl.h
index 56ffc5e..7f591fe 100644 (file)
 #ifndef _LUSTRE_IDL_H_
 #define _LUSTRE_IDL_H_
 
-#ifdef HAVE_ASM_TYPES_H
-#include <asm/types.h>
+#if defined(__linux__)
+#include <linux/lustre_types.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_types.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_types.h>
 #else
-#include <lustre/types.h>
-#endif
-
-#ifdef __KERNEL__
-# include <linux/types.h>
-# include <linux/fs.h>     /* to check for FMODE_EXEC, dev_t, lest we redefine */
-# ifdef CONFIG_FS_POSIX_ACL
-# include <linux/xattr_acl.h>
-# endif
-#else
-#ifdef __CYGWIN__
-# include <sys/types.h>
-#elif defined(_AIX)
-# include <inttypes.h>
-#else
-# include <stdint.h>
-#endif
+#error Unsupported operating system.
 #endif
 
 #include <lnet/types.h>   /* for lnet_nid_t */
 
-/* Defn's shared with user-space. */
-#include <lustre/lustre_user.h>
-
 /*
  * CLASSERT()
  */
 #include <libcfs/kp30.h>
 
+/* Defn's shared with user-space. */
+#include <lustre/lustre_user.h>
+
 /*
  * this file contains all data structures used in Lustre interfaces:
  * - obdo and obd_request records
 
 #define CONNMGR_REQUEST_PORTAL          1
 #define CONNMGR_REPLY_PORTAL            2
+//#define OSC_REQUEST_PORTAL            3
 #define OSC_REPLY_PORTAL                4
+//#define OSC_BULK_PORTAL               5
 #define OST_IO_PORTAL                   6
 #define OST_CREATE_PORTAL               7
 #define OST_BULK_PORTAL                 8
+//#define MDC_REQUEST_PORTAL            9
 #define MDC_REPLY_PORTAL               10
+//#define MDC_BULK_PORTAL              11
 #define MDS_REQUEST_PORTAL             12
+//#define MDS_REPLY_PORTAL             13
 #define MDS_BULK_PORTAL                14
 #define LDLM_CB_REQUEST_PORTAL         15
 #define LDLM_CB_REPLY_PORTAL           16
 #define LDLM_CANCEL_REQUEST_PORTAL     17
 #define LDLM_CANCEL_REPLY_PORTAL       18
+//#define PTLBD_REQUEST_PORTAL           19
+//#define PTLBD_REPLY_PORTAL             20
+//#define PTLBD_BULK_PORTAL              21
 #define MDS_SETATTR_PORTAL             22
 #define MDS_READPAGE_PORTAL            23
+
 #define MGC_REPLY_PORTAL               25
 #define MGS_REQUEST_PORTAL             26
 #define MGS_REPLY_PORTAL               27
@@ -227,6 +224,7 @@ struct lmv_stripe_md {
         struct lu_fid mea_ids[0];
 };
 
+
 struct lustre_handle {
         __u64 cookie;
 };
@@ -340,14 +338,16 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
 #define OBD_CONNECT_TRANSNO    0x800ULL /* replay is sending initial transno */
 #define OBD_CONNECT_IBITS     0x1000ULL /* support for inodebits locks */
 #define OBD_CONNECT_JOIN      0x2000ULL /* files can be concatenated */
-#define OBD_CONNECT_REAL      0x4000ULL /* show MD stack that real connect is
-                                         * performed */
+#define OBD_CONNECT_REAL      0x4000ULL
+#define OBD_CONNECT_NODEVOH   0x8000ULL /* No open handle for special nodes */
 #define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
+
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags() */
 
 #define MDS_CONNECT_SUPPORTED  (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \
-                                OBD_CONNECT_IBITS | OBD_CONNECT_JOIN)
+                                OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \
+                                OBD_CONNECT_NODEVOH)
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
                                 OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX)
@@ -365,6 +365,26 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags)
 #define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255)
 #define OBD_OCD_VERSION_FIX(version)   ((int)(version)&255)
 
+/* This structure is used for both request and reply.
+ *
+ * If we eventually have separate connect data for different types, which we
+ * almost certainly will, then perhaps we stick a union in here. */
+struct obd_connect_data {
+        __u64 ocd_connect_flags;        /* OBD_CONNECT_* per above */
+        __u32 ocd_version;              /* lustre release version number */
+        __u32 ocd_grant;                /* initial cache grant amount (bytes) */
+        __u32 ocd_index;                /* LOV index to connect to */
+        __u32 ocd_unused;
+        __u64 ocd_ibits_known;          /* inode bits this client understands */
+        __u64 ocd_seq;                  /* sequence info for client */
+        __u64 padding2;                 /* also fix lustre_swab_connect */
+        __u64 padding3;                 /* also fix lustre_swab_connect */
+        __u64 padding4;                 /* also fix lustre_swab_connect */
+        __u64 padding5;                 /* also fix lustre_swab_connect */
+};
+
+extern void lustre_swab_connect(struct obd_connect_data *ocd);
+
 /*
  *   OST requests: OBDO & OBD request records
  */
@@ -428,7 +448,7 @@ typedef uint32_t        obd_count;
 #define OBD_FL_TRUNCLOCK     (0x00000800)
 
 /* this should be not smaller than sizeof(struct lustre_handle) + sizeof(struct
- * llog_cookie) + sizeof(lu_fid). Nevertheless struct lu_fid is not longer
+ * llog_cookie) + sizeof(ll_fid). Nevertheless struct ll_fid is not longer
  * stored in o_inline, we keep this just for case. */
 #define OBD_INLINESZ    80
 
@@ -520,37 +540,39 @@ struct lov_mds_md_v1 {            /* LOV EA mds/wire data (little-endian) */
         struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 };
 
-#define OBD_MD_FLID        (0x0000000000000001ULL) /* object ID */
-#define OBD_MD_FLATIME     (0x0000000000000002ULL) /* access time */
-#define OBD_MD_FLMTIME     (0x0000000000000004ULL) /* data modification time */
-#define OBD_MD_FLCTIME     (0x0000000000000008ULL) /* change time */
-#define OBD_MD_FLSIZE      (0x0000000000000010ULL) /* size */
-#define OBD_MD_FLBLOCKS    (0x0000000000000020ULL) /* allocated blocks count */
-#define OBD_MD_FLBLKSZ     (0x0000000000000040ULL) /* block size */
-#define OBD_MD_FLMODE      (0x0000000000000080ULL) /* access bits (mode & ~S_IFMT) */
-#define OBD_MD_FLTYPE      (0x0000000000000100ULL) /* object type (mode & S_IFMT) */
-#define OBD_MD_FLUID       (0x0000000000000200ULL) /* user ID */
-#define OBD_MD_FLGID       (0x0000000000000400ULL) /* group ID */
-#define OBD_MD_FLFLAGS     (0x0000000000000800ULL) /* flags word */
-#define OBD_MD_FLNLINK     (0x0000000000002000ULL) /* link count */
-#define OBD_MD_FLGENER     (0x0000000000004000ULL) /* generation number */
-#define OBD_MD_FLINLINE    (0x0000000000008000ULL) /* inline data */
-#define OBD_MD_FLRDEV      (0x0000000000010000ULL) /* device number */
-#define OBD_MD_FLEASIZE    (0x0000000000020000ULL) /* extended attribute data */
-#define OBD_MD_LINKNAME    (0x0000000000040000ULL) /* symbolic link target */
-#define OBD_MD_FLHANDLE    (0x0000000000080000ULL) /* file handle */
-#define OBD_MD_FLCKSUM     (0x0000000000100000ULL) /* bulk data checksum */
-#define OBD_MD_FLQOS       (0x0000000000200000ULL) /* quality of service stats */
-#define OBD_MD_FLOSCOPQ    (0x0000000000400000ULL) /* osc opaque data */
-#define OBD_MD_FLCOOKIE    (0x0000000000800000ULL) /* log cancellation cookie */
-#define OBD_MD_FLGROUP     (0x0000000001000000ULL) /* group */
-#define OBD_MD_FLFID       (0x0000000002000000ULL) /* ->ost write inline fid */
-#define OBD_MD_FLEPOCH     (0x0000000004000000ULL) /* ->ost write easize is epoch */
-#define OBD_MD_FLGRANT     (0x0000000008000000ULL) /* ost preallocation space grant */
-#define OBD_MD_FLDIREA     (0x0000000010000000ULL) /* dir's extended attribute data */
-#define OBD_MD_FLUSRQUOTA  (0x0000000020000000ULL) /* over quota flags sent from ost */
-#define OBD_MD_FLGRPQUOTA  (0x0000000040000000ULL) /* over quota flags sent from ost */
-#define OBD_MD_FLMODEASIZE (0x0000000080000000ULL) /* EA size will be changed */
+
+#define OBD_MD_FLID        (0x00000001ULL) /* object ID */
+#define OBD_MD_FLATIME     (0x00000002ULL) /* access time */
+#define OBD_MD_FLMTIME     (0x00000004ULL) /* data modification time */
+#define OBD_MD_FLCTIME     (0x00000008ULL) /* change time */
+#define OBD_MD_FLSIZE      (0x00000010ULL) /* size */
+#define OBD_MD_FLBLOCKS    (0x00000020ULL) /* allocated blocks count */
+#define OBD_MD_FLBLKSZ     (0x00000040ULL) /* block size */
+#define OBD_MD_FLMODE      (0x00000080ULL) /* access bits (mode & ~S_IFMT) */
+#define OBD_MD_FLTYPE      (0x00000100ULL) /* object type (mode & S_IFMT) */
+#define OBD_MD_FLUID       (0x00000200ULL) /* user ID */
+#define OBD_MD_FLGID       (0x00000400ULL) /* group ID */
+#define OBD_MD_FLFLAGS     (0x00000800ULL) /* flags word */
+#define OBD_MD_FLNLINK     (0x00002000ULL) /* link count */
+#define OBD_MD_FLGENER     (0x00004000ULL) /* generation number */
+#define OBD_MD_FLINLINE    (0x00008000ULL) /* inline data */
+#define OBD_MD_FLRDEV      (0x00010000ULL) /* device number */
+#define OBD_MD_FLEASIZE    (0x00020000ULL) /* extended attribute data */
+#define OBD_MD_LINKNAME    (0x00040000ULL) /* symbolic link target */
+#define OBD_MD_FLHANDLE    (0x00080000ULL) /* file handle */
+#define OBD_MD_FLCKSUM     (0x00100000ULL) /* bulk data checksum */
+#define OBD_MD_FLQOS       (0x00200000ULL) /* quality of service stats */
+#define OBD_MD_FLOSCOPQ    (0x00400000ULL) /* osc opaque data */
+#define OBD_MD_FLCOOKIE    (0x00800000ULL) /* log cancellation cookie */
+#define OBD_MD_FLGROUP     (0x01000000ULL) /* group */
+#define OBD_MD_FLFID       (0x02000000ULL) /* ->ost write inline fid */
+#define OBD_MD_FLEPOCH     (0x04000000ULL) /* ->ost write easize is epoch */
+#define OBD_MD_FLGRANT     (0x08000000ULL) /* ost preallocation space grant */
+#define OBD_MD_FLDIREA     (0x10000000ULL) /* dir's extended attribute data */
+#define OBD_MD_FLUSRQUOTA  (0x20000000ULL) /* over quota flags sent from ost */
+#define OBD_MD_FLGRPQUOTA  (0x40000000ULL) /* over quota flags sent from ost */
+#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */
+
 #define OBD_MD_MDS         (0x0000000100000000ULL) /* where an inode lives on */
 #define OBD_MD_REINT       (0x0000000200000000ULL) /* reintegrate oa */
 #define OBD_MD_MEA         (0x0000000400000000ULL) /* CMD EA  */
@@ -713,7 +735,7 @@ typedef enum {
 //      REINT_CLOSE    = 7,
 //      REINT_WRITE    = 8,
         REINT_MAX
-} mds_reint_t,mdt_reint_t;
+} mds_reint_t, mdt_reint_t;
 
 /* the disposition of the intent outlines what was executed */
 #define DISP_IT_EXECD     0x01
@@ -740,7 +762,6 @@ typedef enum {
 #define LUSTRE_CONFIG_METASEQ "metaseq"
 #define LUSTRE_CONFIG_TRANSNO "transno"
 
-/* temporary stuff for compatibility */
 struct ll_fid {
         __u64 id;         /* holds object id */
         __u32 generation; /* holds object generation */
@@ -749,27 +770,7 @@ struct ll_fid {
                            * OST for saving into EA. */
 };
 
-
 extern void lustre_swab_ll_fid (struct ll_fid *fid);
-/* This structure is used for both request and reply.
- *
- * If we eventually have separate connect data for different types, which we
- * almost certainly will, then perhaps we stick a union in here. */
-struct obd_connect_data {
-        __u64          ocd_connect_flags;        /* OBD_CONNECT_* per above */
-        __u32          ocd_version;              /* lustre release version number */
-        __u32          ocd_grant;                /* initial cache grant amount (bytes) */
-        __u32          ocd_index;                /* LOV index to connect to */
-        __u32          ocd_unused;
-        __u64          ocd_ibits_known;          /* inode bits this client understands */
-        __u64          ocd_seq;                  /* sequence info for client */
-        __u64          padding2;                 /* also fix lustre_swab_connect */
-        __u64          padding3;                 /* also fix lustre_swab_connect */
-        __u64          padding4;                 /* also fix lustre_swab_connect */
-        __u64          padding5;                 /* also fix lustre_swab_connect */
-};
-
-extern void lustre_swab_connect(struct obd_connect_data *ocd);
 
 #define MDS_STATUS_CONN 1
 #define MDS_STATUS_LOV 2
@@ -1103,6 +1104,29 @@ struct mdt_rec_rename {
 
 extern void lustre_swab_mdt_rec_rename (struct mdt_rec_rename *rn);
 
+/* begin adding MDT by huanghua@clusterfs.com */
+struct lmv_desc {
+        __u32 ld_tgt_count;                /* how many MDS's */
+        __u32 ld_active_tgt_count;         /* how many active */
+        struct obd_uuid ld_uuid;
+};
+
+extern void lustre_swab_lmv_desc (struct lmv_desc *ld);
+/* end adding MDT by huanghua@clusterfs.com */
+
+struct md_fld {
+        __u64 mf_seq;
+        __u64 mf_mds;
+};
+
+extern void lustre_swab_md_fld (struct md_fld *mf);
+
+enum fld_rpc_opc {
+        FLD_QUERY                       = 600,
+        FLD_LAST_OPC,
+        FLD_FIRST_OPC                   = FLD_QUERY
+};
+
 /*
  *  LOV data structures
  */
@@ -1124,30 +1148,15 @@ struct lov_desc {
         __u32 ld_pattern;                  /* PATTERN_RAID0, PATTERN_RAID1 */
         __u64 ld_default_stripe_size;      /* in bytes */
         __u64 ld_default_stripe_offset;    /* in bytes */
+        __u32 ld_qos_threshold;            /* in MB */
+        __u32 ld_qos_maxage;               /* in second */
         __u32 ld_padding_1;                /* also fix lustre_swab_lov_desc */
         __u32 ld_padding_2;                /* also fix lustre_swab_lov_desc */
-        __u32 ld_padding_3;                /* also fix lustre_swab_lov_desc */
-        __u32 ld_padding_4;                /* also fix lustre_swab_lov_desc */
         struct obd_uuid ld_uuid;
 };
 
 #define ld_magic ld_active_tgt_count       /* for swabbing from llogs */
 
-/*begin adding MDT by huanghua@clusterfs.com*/
-struct lmv_desc {
-        __u32 ld_tgt_count;                /* how many MDS's */
-        __u32 ld_active_tgt_count;         /* how many active */
-        struct obd_uuid ld_uuid;
-};
-
-extern void lustre_swab_lmv_desc (struct lmv_desc *ld);
-/*end adding MDT by huanghua@clusterfs.com*/
-
-struct md_fld {
-        __u64 mf_seq;
-        __u64 mf_mds;
-};
-extern void lustre_swab_md_fld (struct md_fld *mf);
 extern void lustre_swab_lov_desc (struct lov_desc *ld);
 
 /*
@@ -1336,6 +1345,13 @@ typedef enum {
 
 /* catalog of log objects */
 
+/* Identifier for a single log object */
+struct llog_logid {
+        __u64                   lgl_oid;
+        __u64                   lgl_ogr;
+        __u32                   lgl_ogen;
+} __attribute__((packed));
+
 /* Records written to the CATALOGS list */
 #define CATLIST "CATALOGS"
 struct llog_catid {
@@ -1345,7 +1361,6 @@ struct llog_catid {
         __u32                   lci_padding3;
 } __attribute__((packed));
 
-
 /*join file lov mds md*/
 struct lov_mds_md_join {
         struct lov_mds_md lmmj_md;
@@ -1532,12 +1547,6 @@ enum llogd_rpc_ops {
         LLOG_LAST_OPC
 };
 
-enum fld_rpc_opc {
-        FLD_QUERY                       = 600,
-        FLD_LAST_OPC,
-        FLD_FIRST_OPC                   = FLD_QUERY
-};
-
 struct llogd_body {
         struct llog_logid  lgd_logid;
         __u32 lgd_ctxt_idx;
@@ -1554,6 +1563,29 @@ struct llogd_conn_body {
         __u32                   lgdc_ctxt_idx;
 } __attribute__((packed));
 
+struct lov_user_ost_data_join {   /* per-stripe data structure */
+        __u64 l_extent_start;     /* extent start*/
+        __u64 l_extent_end;       /* extent end*/
+        __u64 l_object_id;        /* OST object ID */
+        __u64 l_object_gr;        /* OST object group (creating MDS number) */
+        __u32 l_ost_gen;          /* generation of this OST index */
+        __u32 l_ost_idx;          /* OST index in LOV */
+} __attribute__((packed));
+
+struct lov_user_md_join {         /* LOV EA user data (host-endian) */
+        __u32 lmm_magic;          /* magic number = LOV_MAGIC_JOIN */
+        __u32 lmm_pattern;        /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
+        __u64 lmm_object_id;      /* LOV object ID */
+        __u64 lmm_object_gr;      /* LOV object group */
+        __u32 lmm_stripe_size;    /* size of stripe in bytes */
+        __u32 lmm_stripe_count;   /* num stripes in use for this object */
+        __u32 lmm_extent_count;   /* extent count of lmm*/
+        __u64 lmm_tree_id;        /* mds tree object id */
+        __u64 lmm_tree_gen;       /* mds tree object gen */
+        struct llog_logid lmm_array_id; /* mds extent desc llog object id */
+        struct lov_user_ost_data_join lmm_objects[0]; /* per-stripe data */
+} __attribute__((packed));
+
 extern void lustre_swab_lov_user_md(struct lov_user_md *lum);
 extern void lustre_swab_lov_user_md_objects(struct lov_user_md *lum);
 extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj);
index 8df4aea..4afa860 100644 (file)
@@ -9,28 +9,14 @@
 #ifndef _LUSTRE_USER_H
 #define _LUSTRE_USER_H
 
-#ifdef HAVE_ASM_TYPES_H
-#include <asm/types.h>
+#if defined(__linux__)
+#include <linux/lustre_user.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_user.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_user.h>
 #else
-#include <lustre/types.h>
-#endif
-
-#ifdef HAVE_QUOTA_SUPPORT
-#include <linux/quota.h>
-#endif
-
-/*
- * asm-x86_64/processor.h on some SLES 9 distros seems to use
- * kernel-only typedefs.  fortunately skipping it altogether is ok
- * (for now).
- */
-#define __ASM_X86_64_PROCESSOR_H
-
-#ifdef __KERNEL__
-#include <linux/string.h>
-#else
-#include <string.h>
-#include <sys/stat.h>
+#error Unsupported operating system.
 #endif
 
 /* for statfs() */
@@ -66,6 +52,7 @@ struct obd_statfs;
 #define LL_STATFS_LOV           2
 
 #define IOC_MDC_TYPE            'i'
+#define IOC_MDC_LOOKUP          _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
 #define IOC_MDC_GETSTRIPE       _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
 #define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_mds_data *)
 
@@ -105,15 +92,6 @@ struct lov_user_md_v1 {           /* LOV EA user data (host-endian) */
         struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
 } __attribute__((packed));
 
-#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
-    defined(__craynv)
-typedef struct stat     lstat_t;
-#define HAVE_LOV_USER_MDS_DATA
-#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__)
-typedef struct stat64   lstat_t;
-#define HAVE_LOV_USER_MDS_DATA
-#endif
-
 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
  * use this.  It is unsafe to #define those values in this header as it
  * is possible the application has already #included <sys/stat.h>. */
@@ -125,37 +103,6 @@ struct lov_user_mds_data_v1 {
 } __attribute__((packed));
 #endif
 
-struct lov_user_ost_data_join {   /* per-stripe data structure */
-        __u64 l_extent_start;     /* extent start*/
-        __u64 l_extent_end;       /* extent end*/
-        __u64 l_object_id;        /* OST object ID */
-        __u64 l_object_gr;        /* OST object group (creating MDS number) */
-        __u32 l_ost_gen;          /* generation of this OST index */
-        __u32 l_ost_idx;          /* OST index in LOV */
-} __attribute__((packed));
-
-/* Identifier for a single log object */
-struct llog_logid {
-        __u64                   lgl_oid;
-        __u64                   lgl_ogr;
-        __u32                   lgl_ogen;
-} __attribute__((packed));
-
-struct lov_user_md_join {         /* LOV EA user data (host-endian) */
-        __u32 lmm_magic;          /* magic number = LOV_MAGIC_JOIN */
-        __u32 lmm_pattern;        /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
-        __u64 lmm_object_id;      /* LOV object ID */
-        __u64 lmm_object_gr;      /* LOV object group */
-        __u32 lmm_stripe_size;    /* size of stripe in bytes */
-        __u32 lmm_stripe_count;   /* num stripes in use for this object */
-        __u32 lmm_extent_count;   /* extent count of lmm*/
-        __u64 lmm_tree_id;        /* mds tree object id */
-        __u64 lmm_tree_gen;       /* mds tree object gen */
-        struct llog_logid lmm_array_id; /* mds extent desc llog object id */
-        struct lov_user_ost_data_join lmm_objects[0]; /* per-stripe data */
-} __attribute__((packed));
-
-
 struct ll_recreate_obj {
         __u64 lrc_id;
         __u32 lrc_ost_idx;
@@ -181,7 +128,8 @@ static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
         uuid->uuid[sizeof(*uuid) - 1] = '\0';
 }
 
-static inline char *obd_uuid2str(struct obd_uuid *uuid)
+/* For printf's only, make sure uuid is terminated */
+static inline char *obd_uuid2str(struct obd_uuid *uuid) 
 {
         if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
                 /* Obviously not safe, but for printfs, no real harm done...*/
@@ -220,16 +168,6 @@ struct mds_grp_downcall_data {
         __u32           mgd_groups[0];
 };
 
-
-#ifndef __KERNEL__
-#define NEED_QUOTA_DEFS
-#else
-# include <linux/version.h>
-# if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21)
-#  define NEED_QUOTA_DEFS
-# endif
-#endif
-
 #ifdef NEED_QUOTA_DEFS
 #ifndef QUOTABLOCK_BITS
 #define QUOTABLOCK_BITS 10
@@ -291,29 +229,6 @@ struct if_quotactl {
         struct obd_uuid         obd_uuid;
 };
 
-#ifndef LPU64
-/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */
-#if defined(__x86_64__) && defined(__KERNEL__)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPSZ  "%lu"
-# define LPSSZ "%ld"
-#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPSZ  "%u"
-# define LPSSZ "%d"
-#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
-# define LPU64 "%lu"
-# define LPD64 "%ld"
-# define LPX64 "%#lx"
-# define LPSZ  "%lu"
-# define LPSSZ "%ld"
-#endif
-#endif /* !LPU64 */
-
 #ifndef offsetof
 # define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
 #endif
index 5389d37..a5da592 100644 (file)
@@ -1,27 +1,14 @@
 #ifndef _LUSTRE_TYPES_H
 #define _LUSTRE_TYPES_H
 
-typedef unsigned short umode_t;
-
-#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
-       !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H))
-
-/*
- * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
- * header files exported to user space
- */
-
-typedef __signed__ char __s8;
-typedef unsigned char __u8;
-
-typedef __signed__ short __s16;
-typedef unsigned short __u16;
-
-typedef __signed__ int __s32;
-typedef unsigned int __u32;
-
-typedef __signed__ long long __s64;
-typedef unsigned long long __u64;
+#if defined(__linux__)
+#include <linux/lustre_types.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_types.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_types.h>
+#else
+#error Unsupported operating system.
 #endif
 
 #endif
similarity index 99%
rename from lustre/include/linux/lustre_cfg.h
rename to lustre/include/lustre_cfg.h
index 073ac8f..a04245a 100644 (file)
@@ -184,7 +184,7 @@ static inline int lustre_cfg_len(uint32_t bufcount, uint32_t *buflens)
 }
 
 
-#include <linux/obd_support.h>
+#include <obd_support.h>
 
 static inline struct lustre_cfg *lustre_cfg_new(int cmd,
                                                 struct lustre_cfg_bufs *bufs)
similarity index 96%
rename from lustre/include/linux/lustre_commit_confd.h
rename to lustre/include/lustre_commit_confd.h
index fa1cb35..40b1978 100644 (file)
@@ -9,7 +9,7 @@
 #ifndef _LUSTRE_COMMIT_CONFD_H
 #define _LUSTRE_COMMIT_CONFD_H
 
-#include <linux/lustre_log.h>
+#include <lustre_log.h>
 
 struct llog_canceld_ctxt {
         struct list_head           llcd_list;  /* free or pending struct list */
@@ -29,7 +29,7 @@ struct llog_commit_master {
         int                     lcm_thread_max;   /* <= num_osts normally */
 
         int                     lcm_flags;
-        wait_queue_head_t       lcm_waitq;
+        cfs_waitq_t             lcm_waitq;
 
         struct list_head        lcm_llcd_pending; /* llog_canceld_ctxt to send */
         struct list_head        lcm_llcd_resend;  /* try to resend this data */
diff --git a/lustre/include/lustre_debug.h b/lustre/include/lustre_debug.h
new file mode 100644 (file)
index 0000000..c6bd7ba
--- /dev/null
@@ -0,0 +1,64 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _LUSTRE_DEBUG_H
+#define _LUSTRE_DEBUG_H
+
+#include <lustre_net.h>
+
+#if defined(__linux__)
+#include <linux/lustre_debug.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_debug.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_debug.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#define ASSERT_MAX_SIZE_MB 60000ULL
+#define ASSERT_PAGE_INDEX(index, OP)                                    \
+do { if (index > ASSERT_MAX_SIZE_MB << (20 - CFS_PAGE_SHIFT)) {         \
+        CERROR("bad page index %lu > %Lu\n", index,                     \
+               ASSERT_MAX_SIZE_MB << (20 - CFS_PAGE_SHIFT));            \
+        libcfs_debug = ~0UL;                                            \
+        OP;                                                             \
+}} while(0)
+
+#define ASSERT_FILE_OFFSET(offset, OP)                                  \
+do { if (offset > ASSERT_MAX_SIZE_MB << 20) {                           \
+        CERROR("bad file offset %Lu > %Lu\n", offset,                   \
+               ASSERT_MAX_SIZE_MB << 20);                               \
+        libcfs_debug = ~0UL;                                            \
+        OP;                                                             \
+}} while(0)
+
+/* lib/debug.c */
+int dump_lniobuf(struct niobuf_local *lnb);
+int dump_rniobuf(struct niobuf_remote *rnb);
+int dump_ioo(struct obd_ioobj *nb);
+int dump_req(struct ptlrpc_request *req);
+int dump_obdo(struct obdo *oa);
+void dump_lsm(int level, struct lov_stripe_md *lsm);
+int block_debug_setup(void *addr, int len, __u64 off, __u64 id);
+int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id);
+#endif
similarity index 94%
rename from lustre/include/linux/lustre_disk.h
rename to lustre/include/lustre_disk.h
index 820f1eb..9e0bd03 100644 (file)
 
 
 enum ldd_mount_type {
-        LDD_MT_EXT3 = 0,
+        LDD_MT_EXT3 = 0, 
         LDD_MT_LDISKFS,
-        LDD_MT_SMFS,
+        LDD_MT_SMFS,   
         LDD_MT_REISERFS,
         LDD_MT_LAST
 };
-
+       
 static inline char *mt_str(enum ldd_mount_type mt)
 {
         static char *mount_type_string[] = {
@@ -85,16 +85,16 @@ struct lustre_disk_data {
         __u32      ldd_feature_compat;  /* compatible feature flags */
         __u32      ldd_feature_rocompat;/* read-only compatible feature flags */
         __u32      ldd_feature_incompat;/* incompatible feature flags */
-
+        
         __u32      ldd_config_ver;      /* config rewrite count - not used */
         __u32      ldd_flags;           /* LDD_SV_TYPE */
-        __u32      ldd_svindex;         /* server index (0001), must match
+        __u32      ldd_svindex;         /* server index (0001), must match 
                                            svname */
         __u32      ldd_mount_type;      /* target fs type LDD_MT_* */
         char       ldd_fsname[64];      /* filesystem this server is part of */
         char       ldd_svname[64];      /* this server's name (lustre-mdt0001)*/
         __u8       ldd_uuid[40];        /* server UUID (COMPAT_146) */
-
+   
 /*200*/ __u8       ldd_padding[4096 - 200];
 /*4096*/char       ldd_mount_opts[4096]; /* target fs mount opts */
 /*8192*/char       ldd_params[4096];     /* key=value pairs */
@@ -111,7 +111,7 @@ static inline int server_make_name(__u32 flags, __u16 index, char *fs,
 {
         if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
                 sprintf(name, "%.8s-%s%04x", fs,
-                        (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
+                        (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",  
                         index);
         } else if (flags & LDD_F_SV_TYPE_MGS) {
                 sprintf(name, "MGS");
@@ -128,7 +128,7 @@ int server_name2index(char *svname, __u32 *idx, char **endptr);
 
 /****************** mount command *********************/
 
-/* The lmd is only used internally by Lustre; mount simply passes
+/* The lmd is only used internally by Lustre; mount simply passes 
    everything as string options */
 
 #define LMD_MAGIC    0xbdacbd03
@@ -141,17 +141,17 @@ struct lustre_mount_data {
         int        lmd_exclude_count;
         char      *lmd_dev;           /* device name */
         char      *lmd_profile;       /* client only */
-        char      *lmd_opts;          /* lustre mount options (as opposed to
+        char      *lmd_opts;          /* lustre mount options (as opposed to 
                                          _device_ mount options) */
         __u32     *lmd_exclude;       /* array of OSTs to ignore */
 };
 
 #define LMD_FLG_CLIENT       0x0002  /* Mounting a client only */
 #define LMD_FLG_RECOVER      0x0004  /* Allow recovery */
-#define LMD_FLG_NOSVC        0x0008  /* Only start MGS/MGC for servers,
+#define LMD_FLG_NOSVC        0x0008  /* Only start MGS/MGC for servers, 
                                         no other services */
 
-#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT)
+#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) 
 
 /****************** mkfs command *********************/
 
@@ -167,17 +167,21 @@ struct mkfs_opts {
         char  mo_loopdev[128];          /* in case a loop dev is needed */
         __u64 mo_device_sz;             /* in KB */
         int   mo_stripe_count;
-        int   mo_flags;
+        int   mo_flags; 
         int   mo_mgs_failnodes;
 };
 
+/****************** on-disk files *********************/
+
+#define LAST_RCVD    "last_rcvd"
+#define LOV_OBJID    "lov_objid"
+#define HEALTH_CHECK "health_check"
+
 /****************** last_rcvd file *********************/
 
-#define LAST_RCVD "last_rcvd"
-#define LOV_OBJID "lov_objid"
-#define LR_SERVER_SIZE    512
-#define LR_CLIENT_START   8192
-#define LR_CLIENT_SIZE    128
+#define LR_SERVER_SIZE   512
+#define LR_CLIENT_START 8192
+#define LR_CLIENT_SIZE   128
 #if LR_CLIENT_START < LR_SERVER_SIZE
 #error "Can't have LR_CLIENT_START < LR_SERVER_SIZE"
 #endif
@@ -185,6 +189,7 @@ struct mkfs_opts {
  * 2^n * PAGE_SIZE * 8 for the number of bits that fit an order-n allocation. */
 #define LR_MAX_CLIENTS (PAGE_SIZE * 8)
 
+                                                                                
 /* COMPAT_146 */
 #define OBD_COMPAT_OST          0x00000002 /* this is an OST (temporary) */
 #define OBD_COMPAT_MDT          0x00000004 /* this is an MDT (temporary) */
@@ -279,18 +284,19 @@ struct lustre_mount_info {
 /****************** prototypes *********************/
 
 #ifdef __KERNEL__
-#include <linux/obd_class.h>
+#include <obd_class.h>
 
 /* obd_mount.c */
 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb));
 int lustre_common_put_super(struct super_block *sb);
-int lustre_process_log(struct super_block *sb, char *logname,
+int lustre_process_log(struct super_block *sb, char *logname, 
                      struct config_llog_instance *cfg);
-int lustre_end_log(struct super_block *sb, char *logname,
+int lustre_end_log(struct super_block *sb, char *logname, 
                        struct config_llog_instance *cfg);
 struct lustre_mount_info *server_get_mount(const char *name);
 int server_put_mount(const char *name, struct vfsmount *mnt);
 int server_register_target(struct super_block *sb);
+struct mgs_target_info;
 int server_mti_print(char *title, struct mgs_target_info *mti);
 
 /* mgc_request.c */
diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h
new file mode 100644 (file)
index 0000000..4b74c90
--- /dev/null
@@ -0,0 +1,602 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * (visit-tags-table FILE)
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef _LUSTRE_DLM_H__
+#define _LUSTRE_DLM_H__
+
+#if defined(__linux__)
+#include <linux/lustre_dlm.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_dlm.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_dlm.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_handles.h>
+#include <lustre_export.h> /* for obd_export, for LDLM_DEBUG */
+
+struct obd_ops;
+struct obd_device;
+
+#define OBD_LDLM_DEVICENAME  "ldlm"
+
+#define LDLM_DEFAULT_LRU_SIZE (100 * smp_num_cpus)
+
+typedef enum {
+        ELDLM_OK = 0,
+
+        ELDLM_LOCK_CHANGED = 300,
+        ELDLM_LOCK_ABORTED = 301,
+        ELDLM_LOCK_REPLACED = 302,
+        ELDLM_NO_LOCK_DATA = 303,
+
+        ELDLM_NAMESPACE_EXISTS = 400,
+        ELDLM_BAD_NAMESPACE    = 401
+} ldlm_error_t;
+
+#define LDLM_NAMESPACE_SERVER 0
+#define LDLM_NAMESPACE_CLIENT 1
+
+#define LDLM_FL_LOCK_CHANGED   0x000001 /* extent, mode, or resource changed */
+
+/* If the server returns one of these flags, then the lock was put on that list.
+ * If the client sends one of these flags (during recovery ONLY!), it wants the
+ * lock added to the specified list, no questions asked. -p */
+#define LDLM_FL_BLOCK_GRANTED  0x000002
+#define LDLM_FL_BLOCK_CONV     0x000004
+#define LDLM_FL_BLOCK_WAIT     0x000008
+
+#define LDLM_FL_CBPENDING      0x000010 /* this lock is being destroyed */
+#define LDLM_FL_AST_SENT       0x000020 /* blocking or cancel packet was sent */
+#define LDLM_FL_WAIT_NOREPROC  0x000040 /* not a real flag, not saved in lock */
+#define LDLM_FL_CANCEL         0x000080 /* cancellation callback already run */
+
+/* Lock is being replayed.  This could probably be implied by the fact that one
+ * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. */
+#define LDLM_FL_REPLAY         0x000100
+
+#define LDLM_FL_INTENT_ONLY    0x000200 /* don't grant lock, just do intent */
+#define LDLM_FL_LOCAL_ONLY     0x000400 /* see ldlm_cli_cancel_unused */
+
+/* don't run the cancel callback under ldlm_cli_cancel_unused */
+#define LDLM_FL_FAILED         0x000800
+
+#define LDLM_FL_HAS_INTENT     0x001000 /* lock request has intent */
+#define LDLM_FL_CANCELING      0x002000 /* lock cancel has already been sent */
+#define LDLM_FL_LOCAL          0x004000 /* local lock (ie, no srv/cli split) */
+#define LDLM_FL_WARN           0x008000 /* see ldlm_cli_cancel_unused */
+#define LDLM_FL_DISCARD_DATA   0x010000 /* discard (no writeback) on cancel */
+
+#define LDLM_FL_NO_TIMEOUT     0x020000 /* Blocked by group lock - wait
+                                         * indefinitely */
+
+/* file & record locking */
+#define LDLM_FL_BLOCK_NOWAIT   0x040000 // server told not to wait if blocked
+#define LDLM_FL_TEST_LOCK      0x080000 // return blocking lock
+
+/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that
+ * the LVB filling happens _after_ the lock has been granted, so another thread
+ * can match before the LVB has been updated.  As a dirty hack, we set
+ * LDLM_FL_CAN_MATCH only after we've done the LVB poop.
+ *
+ * The proper fix is to do the granting inside of the completion AST, which can
+ * be replaced with a LVB-aware wrapping function for OSC locks.  That change is
+ * pretty high-risk, though, and would need a lot more testing. */
+#define LDLM_FL_CAN_MATCH      0x100000
+
+/* A lock contributes to the kms calculation until it has finished the part
+ * of it's cancelation that performs write back on its dirty pages.  It
+ * can remain on the granted list during this whole time.  Threads racing
+ * to update the kms after performing their writeback need to know to
+ * exclude each others locks from the calculation as they walk the granted
+ * list. */
+#define LDLM_FL_KMS_IGNORE     0x200000
+
+/* Don't drop lock covering mmapped file in LRU */
+#define LDLM_FL_NO_LRU         0x400000
+
+/* Immediatelly cancel such locks when they block some other locks. Send
+   cancel notification to original lock holder, but expect no reply. */
+#define LDLM_FL_CANCEL_ON_BLOCK 0x800000
+
+/* Flags flags inherited from parent lock when doing intents. */
+#define LDLM_INHERIT_FLAGS     (LDLM_FL_CANCEL_ON_BLOCK)
+
+/* These are flags that are mapped into the flags and ASTs of blocking locks */
+#define LDLM_AST_DISCARD_DATA  0x80000000 /* Add FL_DISCARD to blocking ASTs */
+/* Flags sent in AST lock_flags to be mapped into the receiving lock. */
+#define LDLM_AST_FLAGS         (LDLM_FL_DISCARD_DATA)
+
+/* The blocking callback is overloaded to perform two functions.  These flags
+ * indicate which operation should be performed. */
+#define LDLM_CB_BLOCKING    1
+#define LDLM_CB_CANCELING   2
+
+/* compatibility matrix */
+#define LCK_COMPAT_EX  LCK_NL
+#define LCK_COMPAT_PW  (LCK_COMPAT_EX | LCK_CR)
+#define LCK_COMPAT_PR  (LCK_COMPAT_PW | LCK_PR)
+#define LCK_COMPAT_CW  (LCK_COMPAT_PW | LCK_CW)
+#define LCK_COMPAT_CR  (LCK_COMPAT_CW | LCK_PR | LCK_PW)
+#define LCK_COMPAT_NL  (LCK_COMPAT_CR | LCK_EX)
+#define LCK_COMPAT_GROUP  (LCK_GROUP | LCK_NL)
+
+extern ldlm_mode_t lck_compat_array[];
+
+static inline void lockmode_verify(ldlm_mode_t mode)
+{
+       LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE);
+}
+
+static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new)
+{
+       return (lck_compat_array[exist] & new);
+}
+
+/*
+ *
+ * cluster name spaces
+ *
+ */
+
+#define DLM_OST_NAMESPACE 1
+#define DLM_MDS_NAMESPACE 2
+
+/* XXX
+   - do we just separate this by security domains and use a prefix for
+     multiple namespaces in the same domain?
+   -
+*/
+
+struct ldlm_lock;
+struct ldlm_resource;
+struct ldlm_namespace;
+
+typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **,
+                               void *req_cookie, ldlm_mode_t mode, int flags,
+                               void *data);
+
+struct ldlm_valblock_ops {
+        int (*lvbo_init)(struct ldlm_resource *res);
+        int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m,
+                           int buf_idx, int increase);
+};
+
+struct ldlm_namespace {
+        char                  *ns_name;
+        __u32                  ns_client; /* is this a client-side lock tree? */
+        struct list_head      *ns_hash; /* hash table for ns */
+        cfs_waitq_t            ns_refcount_waitq; /* for cleanup */
+        atomic_t               ns_refcount; /* count of resources in the hash */
+        struct list_head       ns_root_list; /* all root resources in ns */
+        struct lustre_lock     ns_lock; /* protects hash, refcount, list */
+        struct list_head       ns_list_chain; /* position in global NS list */
+
+        struct list_head       ns_unused_list; /* all root resources in ns */
+        int                    ns_nr_unused;
+        unsigned int           ns_max_unused;
+        cfs_time_t             ns_next_dump;   /* next debug dump, jiffies */
+
+        spinlock_t             ns_counter_lock;
+        __u64                  ns_locks;
+        ldlm_res_policy        ns_policy;
+        struct ldlm_valblock_ops *ns_lvbo;
+        void                    *ns_lvbp;
+};
+
+/*
+ *
+ * Resource hash table
+ *
+ */
+
+#define RES_HASH_BITS 10
+#define RES_HASH_SIZE (1UL << RES_HASH_BITS)
+#define RES_HASH_MASK (RES_HASH_SIZE - 1)
+
+struct ldlm_lock;
+
+typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock,
+                                      struct ldlm_lock_desc *new, void *data,
+                                      int flag);
+typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags,
+                                        void *data);
+typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data);
+
+struct ldlm_lock {
+        struct portals_handle l_handle; // must be first in the structure
+        atomic_t              l_refc;
+        struct ldlm_resource *l_resource;
+        struct ldlm_lock     *l_parent;
+        struct list_head      l_children;
+        struct list_head      l_childof;
+        struct list_head      l_lru;
+        struct list_head      l_res_link; // position in one of three res lists
+        struct list_head      l_export_chain; // per-export chain of locks
+
+        ldlm_mode_t           l_req_mode;
+        ldlm_mode_t           l_granted_mode;
+
+        ldlm_completion_callback l_completion_ast;
+        ldlm_blocking_callback   l_blocking_ast;
+        ldlm_glimpse_callback    l_glimpse_ast;
+
+        struct obd_export    *l_export;
+        struct obd_export    *l_conn_export;
+        __u32                 l_flags;
+        struct lustre_handle  l_remote_handle;
+        ldlm_policy_data_t    l_policy_data;
+
+        __u32                 l_readers;
+        __u32                 l_writers;
+        __u8                  l_destroyed;
+
+        /* If the lock is granted, a process sleeps on this waitq to learn when
+         * it's no longer in use.  If the lock is not granted, a process sleeps
+         * on this waitq to learn when it becomes granted. */
+        cfs_waitq_t           l_waitq;
+        struct timeval        l_enqueued_time;
+
+        cfs_time_t            l_last_used;      /* jiffies */
+        struct ldlm_extent    l_req_extent;
+
+        /* Client-side-only members */
+        __u32                 l_lvb_len;        /* temporary storage for */
+        void                 *l_lvb_data;       /* an LVB received during */
+        void                 *l_lvb_swabber;    /* an enqueue */
+        void                 *l_ast_data;
+
+        /* Server-side-only members */
+        struct list_head      l_pending_chain;  /* callbacks pending */
+        cfs_time_t            l_callback_timeout; /* jiffies */
+
+        __u32                 l_pid;            /* pid which created this lock */
+};
+
+struct ldlm_resource {
+        struct ldlm_namespace *lr_namespace;
+        struct list_head       lr_hash;
+        struct ldlm_resource  *lr_parent;   /* 0 for a root resource */
+        struct list_head       lr_children; /* list head for child resources */
+        struct list_head       lr_childof;  /* part of ns_root_list if root res,
+                                             * part of lr_children if child */
+
+        struct list_head       lr_granted;
+        struct list_head       lr_converting;
+        struct list_head       lr_waiting;
+        ldlm_mode_t            lr_most_restr;
+        ldlm_type_t            lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK} */
+        struct ldlm_resource  *lr_root;
+        struct ldlm_res_id     lr_name;
+        atomic_t               lr_refcount;
+
+        /* Server-side-only lock value block elements */
+        struct semaphore       lr_lvb_sem;
+        __u32                  lr_lvb_len;
+        void                  *lr_lvb_data;
+
+        /* lr_tmp holds a list head temporarily, during the building of a work
+         * queue.  see ldlm_add_ast_work_item and ldlm_run_ast_work */
+        void                  *lr_tmp;
+};
+
+struct ldlm_ast_work {
+        struct ldlm_lock *w_lock;
+        int               w_blocking;
+        struct ldlm_lock_desc w_desc;
+        struct list_head   w_list;
+        int w_flags;
+        void *w_data;
+        int w_datalen;
+};
+
+extern struct obd_ops ldlm_obd_ops;
+
+extern char *ldlm_lockname[];
+extern char *ldlm_typename[];
+extern char *ldlm_it2str(int it);
+
+#define __LDLM_DEBUG(level, lock, format, a...)                               \
+do {                                                                          \
+        if (lock->l_resource == NULL) {                                       \
+                CDEBUG(level, "### " format                                   \
+                       " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\
+                       "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "   \
+                       LPX64" expref: %d pid: %u\n" , ## a, lock,             \
+                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
+                       lock->l_readers, lock->l_writers,                      \
+                       ldlm_lockname[lock->l_granted_mode],                   \
+                       ldlm_lockname[lock->l_req_mode],                       \
+                       lock->l_flags, lock->l_remote_handle.cookie,           \
+                       lock->l_export ?                                       \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
+                break;                                                        \
+        }                                                                     \
+        if (lock->l_resource->lr_type == LDLM_EXTENT) {                       \
+                CDEBUG(level, "### " format                                   \
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
+                       "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\
+                       "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64     \
+                       " expref: %d pid: %u\n" , ## a,                        \
+                       lock->l_resource->lr_namespace->ns_name, lock,         \
+                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
+                       lock->l_readers, lock->l_writers,                      \
+                       ldlm_lockname[lock->l_granted_mode],                   \
+                       ldlm_lockname[lock->l_req_mode],                       \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
+                       atomic_read(&lock->l_resource->lr_refcount),           \
+                       ldlm_typename[lock->l_resource->lr_type],              \
+                       lock->l_policy_data.l_extent.start,                    \
+                       lock->l_policy_data.l_extent.end,                      \
+                       lock->l_req_extent.start, lock->l_req_extent.end,      \
+                       lock->l_flags, lock->l_remote_handle.cookie,           \
+                       lock->l_export ?                                       \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
+                break;                                                        \
+        }                                                                     \
+        if (lock->l_resource->lr_type == LDLM_FLOCK) {                        \
+                CDEBUG(level, "### " format                                   \
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
+                       "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "       \
+                       "["LPU64"->"LPU64"] flags: %x remote: "LPX64           \
+                       " expref: %d pid: %u\n" , ## a,                        \
+                       lock->l_resource->lr_namespace->ns_name, lock,         \
+                       lock->l_handle.h_cookie, atomic_read(&lock->l_refc),   \
+                       lock->l_readers, lock->l_writers,                      \
+                       ldlm_lockname[lock->l_granted_mode],                   \
+                       ldlm_lockname[lock->l_req_mode],                       \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
+                       atomic_read(&lock->l_resource->lr_refcount),           \
+                       ldlm_typename[lock->l_resource->lr_type],              \
+                       lock->l_policy_data.l_flock.pid,                       \
+                       lock->l_policy_data.l_flock.start,                     \
+                       lock->l_policy_data.l_flock.end,                       \
+                       lock->l_flags, lock->l_remote_handle.cookie,           \
+                       lock->l_export ?                                       \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
+                break;                                                        \
+        }                                                                     \
+        if (lock->l_resource->lr_type == LDLM_IBITS) {                        \
+                CDEBUG(level, "### " format                                   \
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
+                       "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "  \
+                       "flags: %x remote: "LPX64" expref: %d "                \
+                       "pid %u\n" , ## a,                                     \
+                       lock->l_resource->lr_namespace->ns_name,               \
+                       lock, lock->l_handle.h_cookie,                         \
+                       atomic_read (&lock->l_refc),                           \
+                       lock->l_readers, lock->l_writers,                      \
+                       ldlm_lockname[lock->l_granted_mode],                   \
+                       ldlm_lockname[lock->l_req_mode],                       \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
+                       lock->l_policy_data.l_inodebits.bits,                  \
+                       atomic_read(&lock->l_resource->lr_refcount),           \
+                       ldlm_typename[lock->l_resource->lr_type],              \
+                       lock->l_flags, lock->l_remote_handle.cookie,           \
+                       lock->l_export ?                                       \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
+                break;                                                        \
+        }                                                                     \
+        {                                                                     \
+                CDEBUG(level, "### " format                                   \
+                       " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "  \
+                       "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "     \
+                       "remote: "LPX64" expref: %d pid: %u\n" , ## a,         \
+                       lock->l_resource->lr_namespace->ns_name,               \
+                       lock, lock->l_handle.h_cookie,                         \
+                       atomic_read (&lock->l_refc),                           \
+                       lock->l_readers, lock->l_writers,                      \
+                       ldlm_lockname[lock->l_granted_mode],                   \
+                       ldlm_lockname[lock->l_req_mode],                       \
+                       lock->l_resource->lr_name.name[0],                     \
+                       lock->l_resource->lr_name.name[1],                     \
+                       atomic_read(&lock->l_resource->lr_refcount),           \
+                       ldlm_typename[lock->l_resource->lr_type],              \
+                       lock->l_flags, lock->l_remote_handle.cookie,           \
+                       lock->l_export ?                                       \
+                       atomic_read(&lock->l_export->exp_refcount) : -99,      \
+                       lock->l_pid);                                          \
+        }                                                                     \
+} while (0)
+
+#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \
+                                                    format, ## a)
+#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a)
+
+#define LDLM_DEBUG_NOLOCK(format, a...)                 \
+        CDEBUG(D_DLMTRACE, "### " format "\n" , ## a)
+
+typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, int *flags,
+                                      int first_enq, ldlm_error_t *err);
+
+/*
+ * Iterators.
+ */
+
+#define LDLM_ITER_CONTINUE 1 /* keep iterating */
+#define LDLM_ITER_STOP     2 /* stop iterating */
+
+typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *);
+typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *);
+
+int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter,
+                          void *closure);
+int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
+                           void *closure);
+int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
+                               ldlm_res_iterator_t iter, void *closure);
+
+int ldlm_replay_locks(struct obd_import *imp);
+void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *,
+                        ldlm_iterator_t iter, void *data);
+
+/* ldlm_flock.c */
+int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data);
+
+/* ldlm_extent.c */
+__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms);
+
+
+/* ldlm_lockd.c */
+int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
+                             void *data, int flag);
+int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data);
+int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data);
+int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback,
+                        ldlm_blocking_callback, ldlm_glimpse_callback);
+int ldlm_handle_convert(struct ptlrpc_request *req);
+int ldlm_handle_cancel(struct ptlrpc_request *req);
+int ldlm_del_waiting_lock(struct ldlm_lock *lock);
+int ldlm_get_ref(void);
+void ldlm_put_ref(int force);
+
+/* ldlm_lock.c */
+ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res);
+void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg);
+void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh);
+struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags);
+void ldlm_cancel_callback(struct ldlm_lock *);
+int ldlm_lock_set_data(struct lustre_handle *, void *data);
+void ldlm_lock_remove_from_lru(struct ldlm_lock *);
+struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *,
+                                      struct lustre_handle *);
+
+static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h)
+{
+        return __ldlm_handle2lock(h, 0);
+}
+
+#define LDLM_LOCK_PUT(lock)                     \
+do {                                            \
+        /*LDLM_DEBUG((lock), "put");*/          \
+        ldlm_lock_put(lock);                    \
+} while (0)
+
+#define LDLM_LOCK_GET(lock)                     \
+({                                              \
+        ldlm_lock_get(lock);                    \
+        /*LDLM_DEBUG((lock), "get");*/          \
+        lock;                                   \
+})
+
+struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock);
+void ldlm_lock_put(struct ldlm_lock *lock);
+void ldlm_lock_destroy(struct ldlm_lock *lock);
+void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc);
+void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode);
+void ldlm_lock_allow_match(struct ldlm_lock *lock);
+int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *,
+                    ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode,
+                    struct lustre_handle *);
+struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode,
+                                        int *flags);
+void ldlm_lock_cancel(struct ldlm_lock *lock);
+void ldlm_cancel_locks_for_export(struct obd_export *export);
+void ldlm_reprocess_all(struct ldlm_resource *res);
+void ldlm_reprocess_all_ns(struct ldlm_namespace *ns);
+void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos);
+void ldlm_lock_dump_handle(int level, struct lustre_handle *);
+
+/* resource.c */
+struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 local);
+int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags);
+int ldlm_namespace_free(struct ldlm_namespace *ns, int force);
+int ldlm_proc_setup(void);
+#ifdef LPROCFS
+void ldlm_proc_cleanup(void);
+#else
+static inline void ldlm_proc_cleanup(void) {}
+#endif
+
+/* resource.c - internal */
+struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns,
+                                        struct ldlm_resource *parent,
+                                        struct ldlm_res_id, ldlm_type_t type,
+                                        int create);
+struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res);
+int ldlm_resource_putref(struct ldlm_resource *res);
+void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head,
+                            struct ldlm_lock *lock);
+void ldlm_resource_unlink_lock(struct ldlm_lock *lock);
+void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc);
+void ldlm_dump_all_namespaces(int level);
+void ldlm_namespace_dump(int level, struct ldlm_namespace *);
+void ldlm_resource_dump(int level, struct ldlm_resource *);
+int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *,
+                              struct ldlm_res_id);
+
+struct ldlm_callback_suite {
+        ldlm_completion_callback lcs_completion;
+        ldlm_blocking_callback   lcs_blocking;
+        ldlm_glimpse_callback    lcs_glimpse;
+};
+
+/* ldlm_request.c */
+int ldlm_expired_completion_wait(void *data);
+int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+                      void *data, int flag);
+int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp);
+int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data);
+int ldlm_cli_enqueue(struct obd_export *exp,
+                     struct ptlrpc_request *req,
+                     struct ldlm_namespace *ns,
+                     struct ldlm_res_id,
+                     ldlm_type_t type,
+                     ldlm_policy_data_t *,
+                     ldlm_mode_t mode,
+                     int *flags,
+                     ldlm_blocking_callback blocking,
+                     ldlm_completion_callback completion,
+                     ldlm_glimpse_callback glimpse,
+                     void *data,
+                     void *lvb,
+                     __u32 lvb_len,
+                     void *lvb_swabber,
+                     struct lustre_handle *lockh);
+int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req,
+                         struct ldlm_request *dlm_req,
+                         struct ldlm_callback_suite *cbs);
+int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new,
+                    void *data, __u32 data_len);
+int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags);
+int ldlm_handle_convert0(struct ptlrpc_request *req,
+                         struct ldlm_request *dlm_req);
+int ldlm_cli_cancel(struct lustre_handle *lockh);
+int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *,
+                           int flags, void *opaque);
+int ldlm_cli_join_lru(struct ldlm_namespace *, struct ldlm_res_id *,
+                      int join);
+
+/* mds/handler.c */
+/* This has to be here because recursive inclusion sucks. */
+int intent_disposition(struct ldlm_reply *rep, int flag);
+void intent_set_disposition(struct ldlm_reply *rep, int flag);
+
+
+/* ioctls for trying requests */
+#define IOC_LDLM_TYPE                   'f'
+#define IOC_LDLM_MIN_NR                 40
+
+#define IOC_LDLM_TEST                   _IOWR('f', 40, long)
+#define IOC_LDLM_DUMP                   _IOWR('f', 41, long)
+#define IOC_LDLM_REGRESS_START          _IOWR('f', 42, long)
+#define IOC_LDLM_REGRESS_STOP           _IOWR('f', 43, long)
+#define IOC_LDLM_MAX_NR                 43
+
+#endif
similarity index 96%
rename from lustre/include/linux/lustre_export.h
rename to lustre/include/lustre_export.h
index 820426b..f6e3f36 100644 (file)
@@ -5,8 +5,8 @@
 #ifndef __EXPORT_H
 #define __EXPORT_H
 
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
 
 /* Data stored per client in the last_rcvd file.  In le32 order. */
 struct mds_client_data;
@@ -29,7 +29,7 @@ struct osc_creator {
         int                     oscc_grow_count;
         struct obdo             oscc_oa;
         int                     oscc_flags;
-        wait_queue_head_t       oscc_waitq; /* creating procs wait on this */
+        cfs_waitq_t             oscc_waitq; /* creating procs wait on this */
 };
 
 struct ldlm_export_data {
similarity index 98%
rename from lustre/include/linux/lustre_fid.h
rename to lustre/include/lustre_fid.h
index 1547c2a..6476fa7 100644 (file)
@@ -26,7 +26,7 @@
 /*
  * struct lu_fid
  */
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 
 #include <libcfs/list.h>
 #include <libcfs/kp30.h>
diff --git a/lustre/include/lustre_fsfilt.h b/lustre/include/lustre_fsfilt.h
new file mode 100644 (file)
index 0000000..41b9431
--- /dev/null
@@ -0,0 +1,38 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2004 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Filesystem interface helper.
+ *
+ */
+
+#ifndef _LUSTRE_FSFILT_H
+#define _LUSTRE_FSFILT_H
+
+#if defined(__linux__)
+#include <linux/lustre_fsfilt.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_fsfilt.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_fsfilt.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#endif
diff --git a/lustre/include/lustre_handles.h b/lustre/include/lustre_handles.h
new file mode 100644 (file)
index 0000000..bbd2fcd
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef __LUSTRE_HANDLES_H_
+#define __LUSTRE_HANDLES_H_
+
+#if defined(__linux__)
+#include <linux/lustre_handles.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_handles.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_handles.h>
+#else
+#error Unsupported operating system.
+#endif
+
+typedef void (*portals_handle_addref_cb)(void *object);
+
+/* These handles are most easily used by having them appear at the very top of
+ * whatever object that you want to make handles for.  ie:
+ *
+ * struct ldlm_lock {
+ *         struct portals_handle handle;
+ *         ...
+ * };
+ *
+ * Now you're able to assign the results of cookie2handle directly to an
+ * ldlm_lock.  If it's not at the top, you'll want to hack up a macro that
+ * uses some offsetof() magic. */
+
+struct portals_handle {
+        struct list_head h_link;
+        __u64 h_cookie;
+        portals_handle_addref_cb h_addref;
+};
+
+/* handles.c */
+
+/* Add a handle to the hash table */
+void class_handle_hash(struct portals_handle *, portals_handle_addref_cb);
+void class_handle_unhash(struct portals_handle *);
+void *class_handle2object(__u64 cookie);
+int class_handle_init(void);
+void class_handle_cleanup(void);
+
+#endif
similarity index 92%
rename from lustre/include/linux/lustre_import.h
rename to lustre/include/lustre_import.h
index 6b87e84..ff74277 100644 (file)
@@ -5,8 +5,8 @@
 #ifndef __IMPORT_H
 #define __IMPORT_H
 
-#include <linux/lustre_handles.h>
-#include <linux/lustre_idl.h>
+#include <lustre_handles.h>
+#include <lustre/lustre_idl.h>
 
 enum lustre_imp_state {
         LUSTRE_IMP_CLOSED     = 1,
@@ -45,7 +45,7 @@ struct obd_import_conn {
         struct list_head          oic_item;
         struct ptlrpc_connection *oic_conn;
         struct obd_uuid           oic_uuid;
-        unsigned long             oic_last_attempt; /* in jiffies */
+        cfs_time_t                oic_last_attempt; /* in cfs_time_t */
 };
 
 struct obd_import {
@@ -64,17 +64,19 @@ struct obd_import {
         struct list_head          imp_delayed_list;
 
         struct obd_device        *imp_obd;
-        wait_queue_head_t         imp_recovery_waitq;
-        __u64                     imp_last_replay_transno;
+        cfs_waitq_t               imp_recovery_waitq;
+
         atomic_t                  imp_inflight;
         atomic_t                  imp_replay_inflight;
         enum lustre_imp_state     imp_state;
         int                       imp_generation;
         __u32                     imp_conn_cnt;
-        __u64                     imp_max_transno;
+        int                       imp_last_generation_checked;
+        __u64                     imp_last_replay_transno;
         __u64                     imp_peer_committed_transno;
+        __u64                     imp_last_transno_checked;
         struct lustre_handle      imp_remote_handle;
-        unsigned long             imp_next_ping;   /* jiffies */
+        cfs_time_t                imp_next_ping;   /* jiffies */
 
         /* all available obd_import_conn linked here */
         struct list_head          imp_conn_list;
diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h
new file mode 100644 (file)
index 0000000..5c0f95f
--- /dev/null
@@ -0,0 +1,754 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic Lustre library routines.
+ *
+ */
+
+#ifndef _LUSTRE_LIB_H
+#define _LUSTRE_LIB_H
+
+#include <libcfs/kp30.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_cfg.h>
+#if defined(__linux__)
+#include <linux/lustre_lib.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_lib.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_lib.h>
+#else
+#error Unsupported operating system.
+#endif
+
+/* prng.c */
+unsigned int ll_rand(void);        /* returns a random 32-bit integer */
+void ll_srand(unsigned int, unsigned int);     /* seed the generator */
+
+/* target.c */
+struct ptlrpc_request;
+struct recovd_data;
+struct recovd_obd;
+struct obd_export;
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lvfs.h>
+
+int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler);
+int target_handle_disconnect(struct ptlrpc_request *req);
+void target_destroy_export(struct obd_export *exp);
+int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
+                            struct obd_uuid *cluuid);
+int target_handle_ping(struct ptlrpc_request *req);
+void target_committed_to_req(struct ptlrpc_request *req);
+
+#ifdef HAVE_QUOTA_SUPPORT
+/* quotacheck callback, dqacq/dqrel callback handler */
+int target_handle_qc_callback(struct ptlrpc_request *req);
+int target_handle_dqacq_callback(struct ptlrpc_request *req);
+#else
+#define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP)
+#define target_handle_qc_callback(req) (0)
+#endif
+
+void target_cancel_recovery_timer(struct obd_device *obd);
+
+#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) /* *waves hands* */
+void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler);
+void target_abort_recovery(void *data);
+void target_cleanup_recovery(struct obd_device *obd);
+int target_queue_recovery_request(struct ptlrpc_request *req,
+                                  struct obd_device *obd);
+int target_queue_final_reply(struct ptlrpc_request *req, int rc);
+void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id);
+
+/* client.c */
+
+int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg);
+struct client_obd *client_conn2cli(struct lustre_handle *conn);
+
+struct mdc_open_data;
+struct obd_client_handle {
+        struct lustre_handle och_fh;
+        struct llog_cookie och_cookie;
+        struct mdc_open_data *och_mod;
+        __u32 och_magic;
+};
+#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed
+
+/* statfs_pack.c */
+void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs);
+void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs);
+
+/* l_lock.c */
+struct lustre_lock {
+        int l_depth;
+        cfs_task_t *l_owner;
+        struct semaphore l_sem;
+        spinlock_t l_spin;
+};
+
+void l_lock_init(struct lustre_lock *);
+void l_lock(struct lustre_lock *);
+void l_unlock(struct lustre_lock *);
+int l_has_lock(struct lustre_lock *);
+
+
+/*
+ *   OBD IOCTLS
+ */
+#define OBD_IOCTL_VERSION 0x00010004
+
+struct obd_ioctl_data {
+        uint32_t ioc_len;
+        uint32_t ioc_version;
+
+        uint64_t ioc_cookie;
+        uint32_t ioc_conn1;
+        uint32_t ioc_conn2;
+
+        struct obdo ioc_obdo1;
+        struct obdo ioc_obdo2;
+
+        obd_size         ioc_count;
+        obd_off          ioc_offset;
+        uint32_t         ioc_dev;
+        uint32_t         ioc_command;
+
+        uint64_t ioc_nid;
+        uint32_t ioc_nal;
+        uint32_t ioc_type;
+
+        /* buffers the kernel will treat as user pointers */
+        uint32_t ioc_plen1;
+        char    *ioc_pbuf1;
+        uint32_t ioc_plen2;
+        char    *ioc_pbuf2;
+
+        /* inline buffers for various arguments */
+        uint32_t ioc_inllen1;
+        char    *ioc_inlbuf1;
+        uint32_t ioc_inllen2;
+        char    *ioc_inlbuf2;
+        uint32_t ioc_inllen3;
+        char    *ioc_inlbuf3;
+        uint32_t ioc_inllen4;
+        char    *ioc_inlbuf4;
+
+        char    ioc_bulk[0];
+};
+
+struct obd_ioctl_hdr {
+        uint32_t ioc_len;
+        uint32_t ioc_version;
+};
+
+static inline int obd_ioctl_packlen(struct obd_ioctl_data *data)
+{
+        int len = size_round(sizeof(struct obd_ioctl_data));
+        len += size_round(data->ioc_inllen1);
+        len += size_round(data->ioc_inllen2);
+        len += size_round(data->ioc_inllen3);
+        len += size_round(data->ioc_inllen4);
+        return len;
+}
+
+
+static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data)
+{
+        if (data->ioc_len > (1<<30)) {
+                CERROR("OBD ioctl: ioc_len larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen1 > (1<<30)) {
+                CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen2 > (1<<30)) {
+                CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen3 > (1<<30)) {
+                CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen4 > (1<<30)) {
+                CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+                CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+                CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf3 && !data->ioc_inllen3) {
+                CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf4 && !data->ioc_inllen4) {
+                CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_pbuf1 && !data->ioc_plen1) {
+                CERROR("OBD ioctl: pbuf1 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_pbuf2 && !data->ioc_plen2) {
+                CERROR("OBD ioctl: pbuf2 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_plen1 && !data->ioc_pbuf1) {
+                CERROR("OBD ioctl: plen1 set but NULL pointer\n");
+                return 1;
+        }
+        if (data->ioc_plen2 && !data->ioc_pbuf2) {
+                CERROR("OBD ioctl: plen2 set but NULL pointer\n");
+                return 1;
+        }
+        if (obd_ioctl_packlen(data) > data->ioc_len) {
+                CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n",
+                       obd_ioctl_packlen(data), data->ioc_len);
+                return 1;
+        }
+        return 0;
+}
+
+#ifndef __KERNEL__
+static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf,
+                                 int max)
+{
+        char *ptr;
+        struct obd_ioctl_data *overlay;
+        data->ioc_len = obd_ioctl_packlen(data);
+        data->ioc_version = OBD_IOCTL_VERSION;
+
+        if (*pbuf && data->ioc_len > max)
+                return 1;
+        if (*pbuf == NULL) {
+                *pbuf = malloc(data->ioc_len);
+        }
+        if (!*pbuf)
+                return 1;
+        overlay = (struct obd_ioctl_data *)*pbuf;
+        memcpy(*pbuf, data, sizeof(*data));
+
+        ptr = overlay->ioc_bulk;
+        if (data->ioc_inlbuf1)
+                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+        if (data->ioc_inlbuf2)
+                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+        if (data->ioc_inlbuf3)
+                LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+        if (data->ioc_inlbuf4)
+                LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+        if (obd_ioctl_is_invalid(overlay))
+                return 1;
+
+        return 0;
+}
+
+static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf,
+                                   int max)
+{
+        char *ptr;
+        struct obd_ioctl_data *overlay;
+
+        if (!pbuf)
+                return 1;
+        overlay = (struct obd_ioctl_data *)pbuf;
+
+        /* Preserve the caller's buffer pointers */
+        overlay->ioc_inlbuf1 = data->ioc_inlbuf1;
+        overlay->ioc_inlbuf2 = data->ioc_inlbuf2;
+        overlay->ioc_inlbuf3 = data->ioc_inlbuf3;
+        overlay->ioc_inlbuf4 = data->ioc_inlbuf4;
+
+        memcpy(data, pbuf, sizeof(*data));
+
+        ptr = overlay->ioc_bulk;
+        if (data->ioc_inlbuf1)
+                LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+        if (data->ioc_inlbuf2)
+                LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+        if (data->ioc_inlbuf3)
+                LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+        if (data->ioc_inlbuf4)
+                LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+
+        return 0;
+}
+#endif
+
+#include <obd_support.h>
+
+#ifdef __KERNEL__
+/* function defined in lustre/obdclass/<platform>/<platform>-module.c */
+int obd_ioctl_getdata(char **buf, int *len, void *arg);
+int obd_ioctl_popdata(void *arg, void *data, int len);
+#else
+/* buffer MUST be at least the size of obd_ioctl_hdr */
+static inline int obd_ioctl_getdata(char **buf, int *len, void *arg)
+{
+        struct obd_ioctl_hdr hdr;
+        struct obd_ioctl_data *data;
+        int err;
+        int offset = 0;
+        ENTRY;
+
+        err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
+        if (err) 
+                RETURN(err);
+
+        if (hdr.ioc_version != OBD_IOCTL_VERSION) {
+                CERROR("Version mismatch kernel vs application\n");
+                RETURN(-EINVAL);
+        }
+
+        if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
+                CERROR("User buffer len %d exceeds %d max buffer\n",
+                       hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
+                RETURN(-EINVAL);
+        }
+
+        if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
+                CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len);
+                RETURN(-EINVAL);
+        }
+
+        /* XXX allocate this more intelligently, using kmalloc when
+         * appropriate */
+        OBD_VMALLOC(*buf, hdr.ioc_len);
+        if (*buf == NULL) {
+                CERROR("Cannot allocate control buffer of len %d\n",
+                       hdr.ioc_len);
+                RETURN(-EINVAL);
+        }
+        *len = hdr.ioc_len;
+        data = (struct obd_ioctl_data *)*buf;
+
+        err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
+        if (err) {
+                OBD_VFREE(*buf, hdr.ioc_len);
+                RETURN(err);
+        }
+
+        if (obd_ioctl_is_invalid(data)) {
+                CERROR("ioctl not correctly formatted\n");
+                OBD_VFREE(*buf, hdr.ioc_len);
+                RETURN(-EINVAL);
+        }
+
+        if (data->ioc_inllen1) {
+                data->ioc_inlbuf1 = &data->ioc_bulk[0];
+                offset += size_round(data->ioc_inllen1);
+        }
+
+        if (data->ioc_inllen2) {
+                data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
+                offset += size_round(data->ioc_inllen2);
+        }
+
+        if (data->ioc_inllen3) {
+                data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
+                offset += size_round(data->ioc_inllen3);
+        }
+
+        if (data->ioc_inllen4) {
+                data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
+        }
+
+        RETURN(0);
+}
+
+static inline int obd_ioctl_popdata(void *arg, void *data, int len)
+{
+        int err = copy_to_user(arg, data, len);
+        if (err)
+                err = -EFAULT;
+        return err;
+}
+#endif
+
+static inline void obd_ioctl_freedata(char *buf, int len)
+{
+        ENTRY;
+
+        OBD_VFREE(buf, len);
+        EXIT;
+        return;
+}
+
+/*
+ * BSD ioctl description:
+ * #define IOC_V1       _IOR(g, n1, long)
+ * #define IOC_V2       _IOW(g, n2, long)
+ *
+ * ioctl(f, IOC_V1, arg);
+ * arg will be treated as a long value,
+ *
+ * ioctl(f, IOC_V2, arg)
+ * arg will be treated as a pointer, bsd will call
+ * copyin(buf, arg, sizeof(long))
+ *
+ * To make BSD ioctl handles argument correctly and simplely, 
+ * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data 
+ * for us. Does this change affect Linux?  (XXX Liang)
+ */
+#define OBD_IOC_CREATE                 _IOWR('f', 101, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DESTROY                _IOW ('f', 104, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PREALLOCATE            _IOWR('f', 105, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_SETATTR                _IOW ('f', 107, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETATTR                _IOWR ('f', 108, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ                   _IOWR('f', 109, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_WRITE                  _IOWR('f', 110, OBD_IOC_DATA_TYPE)
+
+
+#define OBD_IOC_STATFS                 _IOWR('f', 113, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SYNC                   _IOW ('f', 114, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_READ2                  _IOWR('f', 115, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_FORMAT                 _IOWR('f', 116, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARTITION              _IOWR('f', 117, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_COPY                   _IOWR('f', 120, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_MIGR                   _IOWR('f', 121, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PUNCH                  _IOWR('f', 122, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_MODULE_DEBUG           _IOWR('f', 124, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_READ               _IOWR('f', 125, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_BRW_WRITE              _IOWR('f', 126, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_NAME2DEV               _IOWR('f', 127, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_UUID2DEV               _IOWR('f', 130, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_GETNAME                _IOWR('f', 131, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_LOV_GET_CONFIG         _IOWR('f', 132, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLIENT_RECOVER         _IOW ('f', 133, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_DEC_FS_USE_COUNT       _IO  ('f', 139      )
+#define OBD_IOC_NO_TRANSNO             _IOW ('f', 140, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_SET_READONLY           _IOW ('f', 141, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ABORT_RECOVERY         _IOR ('f', 142, OBD_IOC_DATA_TYPE)
+
+#define OBD_GET_VERSION                _IOWR ('f', 144, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CLOSE_UUID             _IOWR ('f', 147, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_GETDEVICE              _IOWR ('f', 149, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_LOV_SETSTRIPE          _IOW ('f', 154, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_GETSTRIPE          _IOW ('f', 155, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LOV_SETEA              _IOW ('f', 156, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_QUOTACHECK             _IOW ('f', 160, int)
+#define OBD_IOC_POLL_QUOTACHECK        _IOR ('f', 161, struct if_quotacheck *)
+#define OBD_IOC_QUOTACTL               _IOWR('f', 162, struct if_quotactl *)
+
+#define OBD_IOC_MOUNTOPT               _IOWR('f', 170, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_RECORD                 _IOWR('f', 180, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_ENDRECORD              _IOWR('f', 181, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARSE                  _IOWR('f', 182, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DORECORD               _IOWR('f', 183, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PROCESS_CFG            _IOWR('f', 184, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_DUMP_LOG               _IOWR('f', 185, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_CLEAR_LOG              _IOWR('f', 186, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_PARAM                  _IOW ('f', 187, OBD_IOC_DATA_TYPE)
+
+#define OBD_IOC_CATLOGLIST             _IOWR('f', 190, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_INFO              _IOWR('f', 191, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_PRINT             _IOWR('f', 192, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CANCEL            _IOWR('f', 193, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_REMOVE            _IOWR('f', 194, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CHECK             _IOWR('f', 195, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_LLOG_CATINFO           _IOWR('f', 196, OBD_IOC_DATA_TYPE)
+
+#define ECHO_IOC_GET_STRIPE            _IOWR('f', 200, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_SET_STRIPE            _IOWR('f', 201, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_ENQUEUE               _IOWR('f', 202, OBD_IOC_DATA_TYPE)
+#define ECHO_IOC_CANCEL                _IOWR('f', 203, OBD_IOC_DATA_TYPE)
+
+/* XXX _IOWR('f', 250, long) has been defined in
+ * lnet/include/libcfs/kp30.h for debug, don't use it
+ */
+
+/* Until such time as we get_info the per-stripe maximum from the OST,
+ * we define this to be 2T - 4k, which is the ext3 maxbytes. */
+#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL
+
+#define POISON_BULK 0
+
+/*
+ * l_wait_event is a flexible sleeping function, permitting simple caller
+ * configuration of interrupt and timeout sensitivity along with actions to
+ * be performed in the event of either exception.
+ *
+ * The first form of usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler,
+ *                                           intr_handler, callback_data);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * l_wait_event() makes the current process wait on 'waitq' until 'condition'
+ * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending.  It
+ * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before
+ * 'condition' becomes true, it optionally calls the specified 'intr_handler'
+ * if not NULL, and returns -EINTR.
+ *
+ * If a non-zero timeout is specified, signals are ignored until the timeout
+ * has expired.  At this time, if 'timeout_handler' is not NULL it is called.
+ * If it returns FALSE l_wait_event() continues to wait as described above with
+ * signals enabled.  Otherwise it returns -ETIMEDOUT.
+ *
+ * LWI_INTR(intr_handler, callback_data) is shorthand for
+ * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data)
+ *
+ * The second form of usage looks like this:
+ *
+ * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ *
+ * This form is the same as the first except that it COMPLETELY IGNORES
+ * SIGNALS.  The caller must therefore beware that if 'timeout' is zero, or if
+ * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that
+ * can unblock the current process is 'condition' becoming TRUE.
+ *
+ * Another form of usage is:
+ * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval,
+ *                                               timeout_handler);
+ * rc = l_wait_event(waitq, condition, &lwi);
+ * This is the same as previous case, but condition is checked once every
+ * 'interval' jiffies (if non-zero).
+ *
+ * Subtle synchronization point: this macro does *not* necessary takes
+ * wait-queue spin-lock before returning, and, hence, following idiom is safe
+ * ONLY when caller provides some external locking:
+ *
+ *             Thread1                            Thread2
+ *
+ *   l_wait_event(&obj->wq, ....);                                       (1)
+ *
+ *                                    wake_up(&obj->wq):                 (2)
+ *                                         spin_lock(&q->lock);          (2.1)
+ *                                         __wake_up_common(q, ...);     (2.2)
+ *                                         spin_unlock(&q->lock, flags); (2.3)
+ *
+ *   OBD_FREE_PTR(obj);                                                  (3)
+ *
+ * As l_wait_event() may "short-cut" execution and return without taking
+ * wait-queue spin-lock, some additional synchronization is necessary to
+ * guarantee that step (3) can begin only after (2.3) finishes.
+ *
+ * XXX nikita: some ptlrpc daemon threads have races of that sort.
+ *
+ */
+
+#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1))
+
+struct l_wait_info {
+        cfs_duration_t lwi_timeout;
+        cfs_duration_t lwi_interval;
+        int  (*lwi_on_timeout)(void *);
+        void (*lwi_on_signal)(void *);
+        void  *lwi_cb_data;
+};
+
+/* NB: LWI_TIMEOUT ignores signals completely */
+#define LWI_TIMEOUT(time, cb, data)             \
+((struct l_wait_info) {                         \
+        .lwi_timeout    = time,                 \
+        .lwi_on_timeout = cb,                   \
+        .lwi_cb_data    = data,                 \
+        .lwi_interval   = 0                     \
+})
+
+#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data)  \
+((struct l_wait_info) {                                 \
+        .lwi_timeout    = time,                         \
+        .lwi_on_timeout = cb,                           \
+        .lwi_cb_data    = data,                         \
+        .lwi_interval   = interval                      \
+})
+
+#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data)                          \
+((struct l_wait_info) {                                                        \
+        .lwi_timeout    = time,                                                \
+        .lwi_on_timeout = time_cb,                                             \
+        .lwi_on_signal = (sig_cb == NULL) ? LWI_ON_SIGNAL_NOOP : sig_cb,       \
+        .lwi_cb_data    = data,                                                \
+        .lwi_interval    = 0                                                   \
+})
+
+#define LWI_INTR(cb, data)  LWI_TIMEOUT_INTR(0, NULL, cb, data)
+
+#ifdef __KERNEL__
+
+/*
+ * wait for @condition to become true, but no longer than timeout, specified
+ * by @info.
+ */
+#define __l_wait_event(wq, condition, info, ret, excl)                         \
+do {                                                                           \
+        cfs_waitlink_t __wait;                                                 \
+        cfs_duration_t __timeout = info->lwi_timeout;                          \
+        cfs_sigset_t   __blocked;                                              \
+                                                                               \
+        ret = 0;                                                               \
+        if (condition)                                                         \
+                break;                                                         \
+                                                                               \
+        cfs_waitlink_init(&__wait);                                            \
+        if (excl)                                                              \
+                cfs_waitq_add_exclusive(&wq, &__wait);                         \
+        else                                                                   \
+                cfs_waitq_add(&wq, &__wait);                                   \
+                                                                               \
+        /* Block all signals (just the non-fatal ones if no timeout). */       \
+        if (info->lwi_on_signal != NULL && __timeout == 0)                     \
+                __blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS);                 \
+        else                                                                   \
+                __blocked = l_w_e_set_sigs(0);                                 \
+                                                                               \
+        for (;;) {                                                             \
+                set_current_state(TASK_INTERRUPTIBLE);                         \
+                                                                               \
+                if (condition)                                                 \
+                        break;                                                 \
+                                                                               \
+                if (__timeout == 0) {                                          \
+                        cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);       \
+                } else {                                                       \
+                        cfs_duration_t interval = info->lwi_interval?          \
+                                             min_t(cfs_duration_t,             \
+                                                 info->lwi_interval,__timeout):\
+                                             __timeout;                        \
+                        cfs_duration_t remaining = cfs_waitq_timedwait(&__wait,\
+                                                   CFS_TASK_INTERRUPTIBLE,     \
+                                                   interval);                  \
+                        __timeout = cfs_time_sub(__timeout,                    \
+                                            cfs_time_sub(interval, remaining));\
+                        if (__timeout == 0) {                                  \
+                                if (info->lwi_on_timeout == NULL ||            \
+                                    info->lwi_on_timeout(info->lwi_cb_data)) { \
+                                        ret = -ETIMEDOUT;                      \
+                                        break;                                 \
+                                }                                              \
+                                /* Take signals after the timeout expires. */  \
+                                if (info->lwi_on_signal != NULL)               \
+                                    (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS);   \
+                        }                                                      \
+                }                                                              \
+                                                                               \
+                if (condition)                                                 \
+                        break;                                                 \
+                if (cfs_signal_pending()) {                                    \
+                        if (info->lwi_on_signal != NULL && __timeout == 0) {   \
+                                if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \
+                                        info->lwi_on_signal(info->lwi_cb_data);\
+                                ret = -EINTR;                                  \
+                                break;                                         \
+                        }                                                      \
+                        /* We have to do this here because some signals */     \
+                        /* are not blockable - ie from strace(1).       */     \
+                        /* In these cases we want to schedule_timeout() */     \
+                        /* again, because we don't want that to return  */     \
+                        /* -EINTR when the RPC actually succeeded.      */     \
+                        /* the RECALC_SIGPENDING below will deliver the */     \
+                        /* signal properly.                             */     \
+                        cfs_clear_sigpending();                                \
+                }                                                              \
+        }                                                                      \
+                                                                               \
+        cfs_block_sigs(__blocked);                                             \
+                                                                               \
+        set_current_state(TASK_RUNNING);                                       \
+        cfs_waitq_del(&wq, &__wait);                                           \
+} while (0)
+
+#else /* !__KERNEL__ */
+#define __l_wait_event(wq, condition, info, ret, excl)                         \
+do {                                                                    \
+        long __timeout = info->lwi_timeout;                             \
+        long __now;                                                     \
+        long __then = 0;                                                \
+        int  __timed_out = 0;                                           \
+                                                                        \
+        ret = 0;                                                        \
+        if (condition)                                                  \
+                break;                                                  \
+                                                                        \
+        if (__timeout == 0)                                             \
+                __timeout = 1000000000;                                 \
+        else                                                            \
+                __then = time(NULL);                                    \
+                                                                        \
+        while (!(condition)) {                                          \
+                if (liblustre_wait_event(info->lwi_interval?:__timeout) || \
+                    (info->lwi_interval && info->lwi_interval < __timeout)) {\
+                        if (__timeout != 0 && info->lwi_timeout != 0) { \
+                                __now = time(NULL);                     \
+                                __timeout -= __now - __then;            \
+                                if (__timeout < 0)                      \
+                                        __timeout = 0;                  \
+                                __then = __now;                         \
+                        }                                               \
+                        continue;                                       \
+                }                                                       \
+                                                                        \
+                if (info->lwi_timeout != 0 && !__timed_out) {           \
+                        __timed_out = 1;                                \
+                        if (info->lwi_on_timeout == NULL ||             \
+                            info->lwi_on_timeout(info->lwi_cb_data)) {  \
+                                ret = -ETIMEDOUT;                       \
+                                break;                                  \
+                        }                                               \
+                }                                                       \
+        }                                                               \
+} while (0)
+
+#endif /* __KERNEL__ */
+
+#define l_wait_event(wq, condition, info)                       \
+({                                                              \
+        int                 __ret;                              \
+        struct l_wait_info *__info = (info);                    \
+                                                                \
+        __l_wait_event(wq, condition, __info, __ret, 0);        \
+        __ret;                                                  \
+})
+
+#define l_wait_event_exclusive(wq, condition, info)             \
+({                                                              \
+        int                 __ret;                              \
+        struct l_wait_info *__info = (info);                    \
+                                                                \
+        __l_wait_event(wq, condition, __info, __ret, 1);        \
+        __ret;                                                  \
+})
+
+#ifdef __KERNEL__
+#define LIBLUSTRE_CLIENT (0)
+#else
+#define LIBLUSTRE_CLIENT (1)
+#endif
+
+#endif /* _LUSTRE_LIB_H */
+
diff --git a/lustre/include/lustre_lite.h b/lustre/include/lustre_lite.h
new file mode 100644 (file)
index 0000000..09c9e7a
--- /dev/null
@@ -0,0 +1,138 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef _LL_H
+#define _LL_H
+
+#if defined(__linux__)
+#include <linux/lustre_lite.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_lite.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_lite.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include <obd_class.h>
+#include <obd_ost.h>
+#include <lustre_net.h>
+#include <lustre_mds.h>
+#include <lustre_ha.h>
+
+#ifdef __KERNEL__
+
+/* careful, this is easy to screw up */
+#define PAGE_CACHE_MAXBYTES ((__u64)(~0UL) << CFS_PAGE_SHIFT)
+
+#endif
+
+#define LLAP_FROM_COOKIE(c)                                                    \
+        (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC),     \
+         (struct ll_async_page *)(c))
+
+#define LL_MAX_BLKSIZE          (4UL * 1024 * 1024)
+
+#include <lustre/lustre_user.h>
+
+
+struct lustre_rw_params {
+        int                lrp_lock_mode;
+        ldlm_policy_data_t lrp_policy;
+        obd_flag           lrp_brw_flags;
+        int                lrp_ast_flags;
+};
+
+/*
+ * XXX nikita: this function lives in the header because it is used by both
+ * llite kernel module and liblustre library, and there is no (?) better place
+ * to put it in.
+ */
+static inline void lustre_build_lock_params(int cmd, unsigned long open_flags,
+                                            __u64 connect_flags,
+                                            loff_t pos, ssize_t len,
+                                            struct lustre_rw_params *params)
+{
+        params->lrp_lock_mode = (cmd == OBD_BRW_READ) ? LCK_PR : LCK_PW;
+        params->lrp_brw_flags = 0;
+
+        params->lrp_policy.l_extent.start = pos;
+        params->lrp_policy.l_extent.end = pos + len - 1;
+        /*
+         * for now O_APPEND always takes local locks.
+         */
+        if (cmd == OBD_BRW_WRITE && (open_flags & O_APPEND)) {
+                params->lrp_policy.l_extent.start = 0;
+                params->lrp_policy.l_extent.end   = OBD_OBJECT_EOF;
+        } else if (LIBLUSTRE_CLIENT && (connect_flags & OBD_CONNECT_SRVLOCK)) {
+                /*
+                 * liblustre: OST-side locking for all non-O_APPEND
+                 * reads/writes.
+                 */
+                params->lrp_lock_mode = LCK_NL;
+                params->lrp_brw_flags = OBD_BRW_SRVLOCK;
+        } else {
+                /*
+                 * nothing special for the kernel. In the future llite may use
+                 * OST-side locks for small writes into highly contended
+                 * files.
+                 */
+        }
+        params->lrp_ast_flags = (open_flags & O_NONBLOCK) ?
+                LDLM_FL_BLOCK_NOWAIT : 0;
+}
+
+/*
+ * This is embedded into liblustre and llite super-blocks to keep track of
+ * connect flags (capabilities) supported by all imports given mount is
+ * connected to.
+ */
+struct lustre_client_ocd {
+        /*
+         * This is conjunction of connect_flags across all imports (LOVs) this
+         * mount is connected to. This field is updated by ll_ocd_update()
+         * under ->lco_lock.
+         */
+        __u64      lco_flags;
+        spinlock_t lco_lock;
+};
+
+/*
+ * This function is used as an upcall-callback hooked by liblustre and llite
+ * clients into obd_notify() listeners chain to handle notifications about
+ * change of import connect_flags. See llu_fsswop_mount() and
+ * lustre_common_fill_super().
+ *
+ * Again, it is dumped into this header for the lack of a better place.
+ */
+static inline int ll_ocd_update(struct obd_device *host,
+                                struct obd_device *watched,
+                                enum obd_notify_event ev, void *owner)
+{
+        struct lustre_client_ocd *lco;
+        struct client_obd        *cli;
+        __u64 flags;
+        int   result;
+
+        ENTRY;
+        if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) {
+                cli = &watched->u.cli;
+                lco = owner;
+                flags = cli->cl_import->imp_connect_data.ocd_connect_flags;
+                CDEBUG(D_SUPER, "Changing connect_flags: "LPX64" -> "LPX64"\n",
+                       lco->lco_flags, flags);
+                spin_lock(&lco->lco_lock);
+                lco->lco_flags &= flags;
+                spin_unlock(&lco->lco_lock);
+                result = 0;
+        } else {
+                CERROR("unexpected notification from %s %s!\n",
+                       watched->obd_type->typ_name,
+                       watched->obd_name);
+                result = -EINVAL;
+        }
+        RETURN(result);
+}
+
+#endif
diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h
new file mode 100644 (file)
index 0000000..c05ce65
--- /dev/null
@@ -0,0 +1,425 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <info@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Generic infrastructure for managing a collection of logs.
+ *
+ * These logs are used for:
+ *
+ * - orphan recovery: OST adds record on create
+ * - mtime/size consistency: the OST adds a record on first write
+ * - open/unlinked objects: OST adds a record on destroy
+ *
+ * - mds unlink log: the MDS adds an entry upon delete
+ *
+ * - raid1 replication log between OST's
+ * - MDS replication logs
+ */
+
+#ifndef _LUSTRE_LOG_H
+#define _LUSTRE_LOG_H
+
+#if defined(__linux__)
+#include <linux/lustre_log.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_log.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_log.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include <obd.h>
+#include <obd_ost.h>
+#include <lustre/lustre_idl.h>
+
+#define LOG_NAME_LIMIT(logname, name)                   \
+        snprintf(logname, sizeof(logname), "LOGS/%s", name)
+#define LLOG_EEMPTY 4711
+
+struct plain_handle_data {
+        struct list_head    phd_entry;
+        struct llog_handle *phd_cat_handle;
+        struct llog_cookie  phd_cookie; /* cookie of this log in its cat */
+        int                 phd_last_idx;
+};
+
+struct cat_handle_data {
+        struct list_head        chd_head;
+        struct llog_handle     *chd_current_log; /* currently open log */
+};
+
+/* In-memory descriptor for a log object or log catalog */
+struct llog_handle {
+        struct rw_semaphore     lgh_lock;
+        struct llog_logid       lgh_id;              /* id of this log */
+        struct llog_log_hdr    *lgh_hdr;
+        struct file            *lgh_file;
+        int                     lgh_last_idx;
+        struct llog_ctxt       *lgh_ctxt;
+        union {
+                struct plain_handle_data phd;
+                struct cat_handle_data   chd;
+        } u;
+};
+
+/* llog.c  -  general API */
+typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *);
+typedef int (*llog_fill_rec_cb_t)(struct llog_rec_hdr *rec, void *data);
+extern struct llog_handle *llog_alloc_handle(void);
+int llog_init_handle(struct llog_handle *handle, int flags,
+                     struct obd_uuid *uuid);
+extern void llog_free_handle(struct llog_handle *handle);
+int llog_process(struct llog_handle *loghandle, llog_cb_t cb,
+                 void *data, void *catdata);
+int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb,
+                         void *data, void *catdata);
+extern int llog_cancel_rec(struct llog_handle *loghandle, int index);
+extern int llog_close(struct llog_handle *cathandle);
+extern int llog_get_size(struct llog_handle *loghandle);
+
+/* llog_cat.c   -  catalog api */
+struct llog_process_data {
+        void *lpd_data;
+        llog_cb_t lpd_cb;
+};
+
+struct llog_process_cat_data {
+        int     first_idx;
+        int     last_idx;
+        /* to process catalog across zero record */
+};
+
+int llog_cat_put(struct llog_handle *cathandle);
+int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec,
+                     struct llog_cookie *reccookie, void *buf);
+int llog_cat_cancel_records(struct llog_handle *cathandle, int count,
+                            struct llog_cookie *cookies);
+int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data);
+int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data);
+int llog_cat_set_first_idx(struct llog_handle *cathandle, int index);
+
+/* llog_obd.c */
+int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd,
+               int count,  struct llog_logid *logid,struct llog_operations *op);
+int llog_cleanup(struct llog_ctxt *);
+int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
+int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
+             struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
+             int numcookies);
+int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm,
+                int count, struct llog_cookie *cookies, int flags);
+
+int llog_obd_origin_setup(struct obd_device *obd, int index,
+                          struct obd_device *disk_obd, int count,
+                          struct llog_logid *logid);
+int llog_obd_origin_cleanup(struct llog_ctxt *ctxt);
+int llog_obd_origin_add(struct llog_ctxt *ctxt,
+                        struct llog_rec_hdr *rec, struct lov_stripe_md *lsm,
+                        struct llog_cookie *logcookies, int numcookies);
+
+int llog_cat_initialize(struct obd_device *obd, int count);
+int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd,
+                  int count, struct llog_catid *logid);
+
+int obd_llog_finish(struct obd_device *obd, int count);
+
+/* llog_ioctl.c */
+int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data);
+int llog_catalog_list(struct obd_device *obd, int count,
+                      struct obd_ioctl_data *data);
+
+/* llog_net.c */
+int llog_initiator_connect(struct llog_ctxt *ctxt);
+int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp);
+int llog_origin_connect(struct llog_ctxt *ctxt, int count,
+                        struct llog_logid *logid, struct llog_gen *gen,
+                        struct obd_uuid *uuid);
+int llog_handle_connect(struct ptlrpc_request *req);
+
+/* recov_thread.c */
+int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
+                         struct lov_stripe_md *lsm, int count,
+                         struct llog_cookie *cookies, int flags);
+int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
+int llog_repl_connect(struct llog_ctxt *ctxt, int count,
+                      struct llog_logid *logid, struct llog_gen *gen,
+                      struct obd_uuid *uuid);
+
+struct llog_operations {
+        int (*lop_write_rec)(struct llog_handle *loghandle,
+                             struct llog_rec_hdr *rec,
+                             struct llog_cookie *logcookies, int numcookies,
+                             void *, int idx);
+        int (*lop_destroy)(struct llog_handle *handle);
+        int (*lop_next_block)(struct llog_handle *h, int *curr_idx,
+                              int next_idx, __u64 *offset, void *buf, int len);
+        int (*lop_prev_block)(struct llog_handle *h,
+                              int prev_idx, void *buf, int len);
+        int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **,
+                          struct llog_logid *logid, char *name);
+        int (*lop_close)(struct llog_handle *handle);
+        int (*lop_read_header)(struct llog_handle *handle);
+
+        int (*lop_setup)(struct obd_device *obd, int ctxt_idx,
+                         struct obd_device *disk_obd, int count,
+                         struct llog_logid *logid);
+        int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp);
+        int (*lop_cleanup)(struct llog_ctxt *ctxt);
+        int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
+                       struct lov_stripe_md *lsm,
+                       struct llog_cookie *logcookies, int numcookies);
+        int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm,
+                          int count, struct llog_cookie *cookies, int flags);
+        int (*lop_connect)(struct llog_ctxt *ctxt, int count,
+                           struct llog_logid *logid, struct llog_gen *gen,
+                           struct obd_uuid *uuid);
+        /* XXX add 2 more: commit callbacks and llog recovery functions */
+};
+
+/* llog_lvfs.c */
+extern struct llog_operations llog_lvfs_ops;
+int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd,
+                      char *name, int count, struct llog_catid *idarray);
+
+struct llog_ctxt {
+        int                      loc_idx; /* my index the obd array of ctxt's */
+        struct llog_gen          loc_gen;
+        struct obd_device       *loc_obd; /* points back to the containing obd*/
+        struct obd_export       *loc_exp; /* parent "disk" export (e.g. MDS) */
+        struct obd_import       *loc_imp; /* to use in RPC's: can be backward
+                                             pointing import */
+        struct llog_operations  *loc_logops;
+        struct llog_handle      *loc_handle;
+        struct llog_canceld_ctxt *loc_llcd;
+        struct semaphore         loc_sem; /* protects loc_llcd and loc_imp */
+        void                    *llog_proc_cb;
+};
+
+static inline void llog_gen_init(struct llog_ctxt *ctxt)
+{
+        struct obd_device *obd = ctxt->loc_exp->exp_obd;
+
+        if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME))
+                ctxt->loc_gen.mnt_cnt = obd->u.mds.mds_mount_count;
+        else if (!strstr(obd->obd_type->typ_name, LUSTRE_OST_NAME))
+                ctxt->loc_gen.mnt_cnt = obd->u.filter.fo_mount_count;
+        else
+                ctxt->loc_gen.mnt_cnt = 0;
+}
+
+static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b)
+{
+        if (a.mnt_cnt < b.mnt_cnt)
+                return 1;
+        if (a.mnt_cnt > b.mnt_cnt)
+                return 0;
+        return(a.conn_cnt < b.conn_cnt ? 1 : 0);
+}
+
+#define LLOG_GEN_INC(gen)  ((gen).conn_cnt ++)
+#define LLOG_PROC_BREAK 0x0001
+#define LLOG_DEL_RECORD 0x0002
+
+static inline int llog_obd2ops(struct llog_ctxt *ctxt,
+                               struct llog_operations **lop)
+{
+        if (ctxt == NULL)
+                return -ENOTCONN;
+
+        *lop = ctxt->loc_logops;
+        if (*lop == NULL)
+                return -EOPNOTSUPP;
+
+        return 0;
+}
+
+static inline int llog_handle2ops(struct llog_handle *loghandle,
+                                  struct llog_operations **lop)
+{
+        if (loghandle == NULL)
+                return -EINVAL;
+
+        return llog_obd2ops(loghandle->lgh_ctxt, lop);
+}
+
+static inline int llog_data_len(int len)
+{
+        return size_round(len);
+}
+
+static inline struct llog_ctxt *llog_get_context(struct obd_device *obd,
+                                                 int index)
+{
+        if (index < 0 || index >= LLOG_MAX_CTXTS)
+                return NULL;
+
+        return obd->obd_llog_ctxt[index];
+}
+
+static inline int llog_write_rec(struct llog_handle *handle,
+                                 struct llog_rec_hdr *rec,
+                                 struct llog_cookie *logcookies,
+                                 int numcookies, void *buf, int idx)
+{
+        struct llog_operations *lop;
+        int rc, buflen;
+        ENTRY;
+
+        rc = llog_handle2ops(handle, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_write_rec == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        if (buf)
+                buflen = rec->lrh_len + sizeof(struct llog_rec_hdr)
+                                + sizeof(struct llog_rec_tail);
+        else
+                buflen = rec->lrh_len;
+        LASSERT(size_round(buflen) == buflen);
+
+        rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx);
+        RETURN(rc);
+}
+
+static inline int llog_read_header(struct llog_handle *handle)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_handle2ops(handle, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_read_header == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_read_header(handle);
+        RETURN(rc);
+}
+
+static inline int llog_destroy(struct llog_handle *handle)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_handle2ops(handle, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_destroy == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_destroy(handle);
+        RETURN(rc);
+}
+
+#if 0
+static inline int llog_cancel(struct obd_export *exp,
+                              struct lov_stripe_md *lsm, int count,
+                              struct llog_cookie *cookies, int flags)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_handle2ops(loghandle, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_cancel == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_cancel(exp, lsm, count, cookies, flags);
+        RETURN(rc);
+}
+#endif
+
+static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx,
+                                  int next_idx, __u64 *cur_offset, void *buf,
+                                  int len)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_handle2ops(loghandle, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_next_block == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_next_block(loghandle, cur_idx, next_idx, cur_offset, buf,
+                                 len);
+        RETURN(rc);
+}
+
+static inline int llog_prev_block(struct llog_handle *loghandle,
+                                  int prev_idx, void *buf, int len)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_handle2ops(loghandle, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_prev_block == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_prev_block(loghandle, prev_idx, buf, len);
+        RETURN(rc);
+}
+
+static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res,
+                              struct llog_logid *logid, char *name)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_obd2ops(ctxt, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_create == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_create(ctxt, res, logid, name);
+        RETURN(rc);
+}
+
+static inline int llog_connect(struct llog_ctxt *ctxt, int count,
+                               struct llog_logid *logid, struct llog_gen *gen,
+                               struct obd_uuid *uuid)
+{
+        struct llog_operations *lop;
+        int rc;
+        ENTRY;
+
+        rc = llog_obd2ops(ctxt, &lop);
+        if (rc)
+                RETURN(rc);
+        if (lop->lop_connect == NULL)
+                RETURN(-EOPNOTSUPP);
+
+        rc = lop->lop_connect(ctxt, count, logid, gen, uuid);
+        RETURN(rc);
+}
+
+#endif
similarity index 82%
rename from lustre/include/linux/lustre_mdc.h
rename to lustre/include/lustre_mdc.h
index b2787c0..c1dfef3 100644 (file)
 # include <linux/xattr_acl.h>
 # endif
 #endif
-#include <linux/lustre_handles.h>
+#include <lustre_handles.h>
 #include <libcfs/kp30.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_export.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_export.h>
 
 struct ptlrpc_client;
 struct obd_export;
diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h
new file mode 100644 (file)
index 0000000..40795f1
--- /dev/null
@@ -0,0 +1,96 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *   This file is part of Lustre, http://www.lustre.org
+ *
+ * MDS data structures.
+ * See also lustre_idl.h for wire formats of requests.
+ */
+
+#ifndef _LUSTRE_MDS_H
+#define _LUSTRE_MDS_H
+
+#include <lustre_handles.h>
+#include <libcfs/kp30.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_export.h>
+
+#if defined(__linux__)
+#include <linux/lustre_mds.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_mds.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_mds.h>
+#else
+#error Unsupported operating system.
+#endif
+
+struct ldlm_lock_desc;
+struct mds_obd;
+struct ptlrpc_connection;
+struct ptlrpc_client;
+struct obd_export;
+struct ptlrpc_request;
+struct obd_device;
+struct ll_file_data;
+
+struct mds_update_record {
+        __u32 ur_opcode;
+        struct ll_fid *ur_fid1;
+        struct ll_fid *ur_fid2;
+        int ur_namelen;
+        char *ur_name;
+        int ur_tgtlen;
+        char *ur_tgt;
+        int ur_eadatalen;
+        void *ur_eadata;
+        int ur_cookielen;
+        struct llog_cookie *ur_logcookies;
+        struct iattr ur_iattr;
+        struct lvfs_ucred ur_uc;
+        __u64 ur_rdev;
+        __u64 ur_time;
+        __u32 ur_mode;
+        __u32 ur_flags;
+        struct lvfs_grp_hash_entry *ur_grp_entry;
+};
+
+/* file data for open files on MDS */
+struct mds_file_data {
+        struct portals_handle mfd_handle; /* must be first */
+        atomic_t              mfd_refcount;
+        struct list_head      mfd_list; /* protected by med_open_lock */
+        __u64                 mfd_xid;
+        int                   mfd_mode;
+        struct dentry        *mfd_dentry;
+};
+
+/* ACL */
+#ifdef CONFIG_FS_POSIX_ACL
+#define LUSTRE_POSIX_ACL_MAX_ENTRIES    (32)
+#define LUSTRE_POSIX_ACL_MAX_SIZE       \
+                (xattr_acl_size(LUSTRE_POSIX_ACL_MAX_ENTRIES))
+#else
+#define LUSTRE_POSIX_ACL_MAX_SIZE       0
+#endif
+
+/* mds/mds_reint.c */
+int mds_reint_rec(struct mds_update_record *r, int offset,
+                  struct ptlrpc_request *req, struct lustre_handle *);
+
+/* ioctls for trying requests */
+#define IOC_REQUEST_TYPE                   'f'
+#define IOC_REQUEST_MIN_NR                 30
+
+#define IOC_REQUEST_GETATTR             _IOWR('f', 30, long)
+#define IOC_REQUEST_READPAGE            _IOWR('f', 31, long)
+#define IOC_REQUEST_SETATTR             _IOWR('f', 32, long)
+#define IOC_REQUEST_CREATE              _IOWR('f', 33, long)
+#define IOC_REQUEST_OPEN                _IOWR('f', 34, long)
+#define IOC_REQUEST_CLOSE               _IOWR('f', 35, long)
+#define IOC_REQUEST_MAX_NR               35
+
+#endif
diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h
new file mode 100644 (file)
index 0000000..217f0c4
--- /dev/null
@@ -0,0 +1,856 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _LUSTRE_NET_H
+#define _LUSTRE_NET_H
+
+#if defined(__linux__)
+#include <linux/lustre_net.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_net.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_net.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include <libcfs/kp30.h>
+// #include <obd.h>
+#include <lnet/lnet.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_ha.h>
+#include <lustre_import.h>
+#include <lprocfs_status.h>
+
+/* MD flags we _always_ use */
+#define PTLRPC_MD_OPTIONS  0
+
+/* Define maxima for bulk I/O
+ * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks)
+ * these limits are system wide and not interface-local. */
+#define PTLRPC_MAX_BRW_SIZE     LNET_MTU
+#define PTLRPC_MAX_BRW_PAGES    (PTLRPC_MAX_BRW_SIZE/CFS_PAGE_SIZE)
+
+/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */
+#ifdef __KERNEL__
+# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0)
+#  error "PTLRPC_MAX_BRW_PAGES isn't a power of two"
+# endif
+# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE))
+#  error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE"
+# endif
+# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU)
+#  error "PTLRPC_MAX_BRW_SIZE too big"
+# endif
+# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV)
+#  error "PTLRPC_MAX_BRW_PAGES too big"
+# endif
+#endif /* __KERNEL__ */
+
+/* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request
+ * buffers */
+#define SVC_BUF_VMALLOC_THRESHOLD (2 * PAGE_SIZE)
+
+/* The following constants determine how memory is used to buffer incoming
+ * service requests.
+ *
+ * ?_NBUFS              # buffers to allocate when growing the pool
+ * ?_BUFSIZE            # bytes in a single request buffer
+ * ?_MAXREQSIZE         # maximum request service will receive
+ *
+ * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk
+ * of ?_NBUFS is added to the pool.
+ *
+ * Messages larger than ?_MAXREQSIZE are dropped.  Request buffers are
+ * considered full when less than ?_MAXREQSIZE is left in them.
+ */
+
+#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64)
+#define LDLM_NBUFS      (64 * smp_num_cpus)
+#define LDLM_BUFSIZE    (8 * 1024)
+#define LDLM_MAXREQSIZE (5 * 1024)
+#define LDLM_MAXREPSIZE (1024)
+
+#define MDT_MIN_THREADS 2UL
+#define MDT_MAX_THREADS 32UL
+#define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \
+                                  num_physpages >> (25 - PAGE_SHIFT)), 2UL)
+#define FLD_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \
+                                  num_physpages >> (25 - PAGE_SHIFT)), 2UL)
+
+#define MDS_MAX_THREADS 512UL
+#define MDS_DEF_THREADS max(2UL, min_t(unsigned long, 32, \
+                            num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT)))
+#define MDS_NBUFS       (64 * smp_num_cpus)
+#define MDS_BUFSIZE     (8 * 1024)
+/* Assume file name length = FNAME_MAX = 256 (true for ext3).
+ *        path name length = PATH_MAX = 4096
+ *        LOV MD size max  = EA_MAX = 4000
+ * symlink:  FNAME_MAX + PATH_MAX  <- largest
+ * link:     FNAME_MAX + PATH_MAX  (mds_rec_link < mds_rec_create)
+ * rename:   FNAME_MAX + FNAME_MAX
+ * open:     FNAME_MAX + EA_MAX
+ *
+ * MDS_MAXREQSIZE ~= 4736 bytes =
+ * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX
+ * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header
+ * or, for mds_close() and mds_reint_unlink() on a many-OST filesystem:
+ *      = 9210 bytes = lustre_msg + mds_body + 160 * (easize + cookiesize)
+ *
+ * Realistic size is about 512 bytes (20 character name + 128 char symlink),
+ * except in the open case where there are a large number of OSTs in a LOV.
+ */
+#define MDS_MAXREQSIZE  (5 * 1024)
+#define MDS_MAXREPSIZE  max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56)
+
+/* FIXME fix all constants here.  Andreas suggests dyamically adding threads. */
+#define MGS_MAX_THREADS 8UL
+#define MGS_NUM_THREADS max(2UL, min_t(unsigned long, MGS_MAX_THREADS, \
+                            num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT)))
+                                  
+#define MGS_NBUFS       (64 * smp_num_cpus)
+#define MGS_BUFSIZE     (8 * 1024)
+#define MGS_MAXREQSIZE  (5 * 1024)
+#define MGS_MAXREPSIZE  (9 * 1024)
+
+#define OST_MAX_THREADS 512UL
+#define OST_DEF_THREADS max_t(unsigned long, 2, \
+                              (num_physpages >> (26-PAGE_SHIFT)) * smp_num_cpus)
+#define OST_NBUFS       (64 * smp_num_cpus)
+#define OST_BUFSIZE     (8 * 1024)
+/* OST_MAXREQSIZE ~= 4768 bytes =
+ * lustre_msg + obdo + 16 * obd_ioobj + 256 * niobuf_remote
+ *
+ * - single object with 16 pages is 512 bytes
+ * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover
+ */
+#define OST_MAXREQSIZE  (5 * 1024)
+#define OST_MAXREPSIZE  (9 * 1024)
+
+struct ptlrpc_connection {
+        struct list_head        c_link;
+        lnet_nid_t              c_self;
+        lnet_process_id_t       c_peer;
+        struct obd_uuid         c_remote_uuid;
+        atomic_t                c_refcount;
+};
+
+struct ptlrpc_client {
+        __u32                     cli_request_portal;
+        __u32                     cli_reply_portal;
+        char                     *cli_name;
+};
+
+/* state flags of requests */
+/* XXX only ones left are those used by the bulk descs as well! */
+#define PTL_RPC_FL_INTR      (1 << 0)  /* reply wait was interrupted by user */
+#define PTL_RPC_FL_TIMEOUT   (1 << 7)  /* request timed out waiting for reply */
+
+#define REQ_MAX_ACK_LOCKS 8
+
+#define SWAB_PARANOIA 1
+#if SWAB_PARANOIA
+/* unpacking: assert idx not unpacked already */
+#define LASSERT_REQSWAB(rq, idx)                                \
+do {                                                            \
+        LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8);  \
+        LASSERT (((rq)->rq_req_swab_mask & (1 << (idx))) == 0); \
+        (rq)->rq_req_swab_mask |= (1 << (idx));                 \
+} while (0)
+
+#define LASSERT_REPSWAB(rq, idx)                                \
+do {                                                            \
+        LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8);  \
+        LASSERT (((rq)->rq_rep_swab_mask & (1 << (idx))) == 0); \
+        (rq)->rq_rep_swab_mask |= (1 << (idx));                 \
+} while (0)
+
+/* just looking: assert idx already unpacked */
+#define LASSERT_REQSWABBED(rq, idx)                     \
+LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8 && \
+         ((rq)->rq_req_swab_mask & (1 << (idx))) != 0)
+
+#define LASSERT_REPSWABBED(rq, idx)                     \
+LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8 && \
+         ((rq)->rq_rep_swab_mask & (1 << (idx))) != 0)
+#else
+#define LASSERT_REQSWAB(rq, idx)
+#define LASSERT_REPSWAB(rq, idx)
+#define LASSERT_REQSWABBED(rq, idx)
+#define LASSERT_REPSWABBED(rq, idx)
+#endif
+
+union ptlrpc_async_args {
+        /* Scratchpad for passing args to completion interpreter. Users
+         * cast to the struct of their choosing, and LASSERT that this is
+         * big enough.  For _tons_ of context, OBD_ALLOC a struct and store
+         * a pointer to it here.  The pointer_arg ensures this struct is at
+         * least big enough for that. */
+        void      *pointer_arg[9];
+        __u64      space[4];
+};
+
+struct ptlrpc_request_set;
+typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int);
+
+struct ptlrpc_request_set {
+        int               set_remaining; /* # uncompleted requests */
+        cfs_waitq_t       set_waitq;
+        cfs_waitq_t      *set_wakeup_ptr;
+        struct list_head  set_requests;
+        set_interpreter_func    set_interpret; /* completion callback */
+        void              *set_arg; /* completion context */
+        /* locked so that any old caller can communicate requests to
+         * the set holder who can then fold them into the lock-free set */
+        spinlock_t        set_new_req_lock;
+        struct list_head  set_new_requests;
+};
+
+struct ptlrpc_bulk_desc;
+
+/*
+ * ptlrpc callback & work item stuff
+ */
+struct ptlrpc_cb_id {
+        void   (*cbid_fn)(lnet_event_t *ev);     /* specific callback fn */
+        void    *cbid_arg;                      /* additional arg */
+};
+
+#define RS_MAX_LOCKS 4
+#define RS_DEBUG     1
+
+struct ptlrpc_reply_state {
+        struct ptlrpc_cb_id    rs_cb_id;
+        struct list_head       rs_list;
+        struct list_head       rs_exp_list;
+        struct list_head       rs_obd_list;
+#if RS_DEBUG
+        struct list_head       rs_debug_list;
+#endif
+        /* updates to following flag serialised by srv_request_lock */
+        unsigned int           rs_difficult:1;     /* ACK/commit stuff */
+        unsigned int           rs_scheduled:1;     /* being handled? */
+        unsigned int           rs_scheduled_ever:1;/* any schedule attempts? */
+        unsigned int           rs_handled:1;  /* been handled yet? */
+        unsigned int           rs_on_net:1;   /* reply_out_callback pending? */
+        unsigned int           rs_prealloc:1; /* rs from prealloc list */
+
+        int                    rs_size;
+        __u64                  rs_transno;
+        __u64                  rs_xid;
+        struct obd_export     *rs_export;
+        struct ptlrpc_service *rs_service;
+        lnet_handle_md_t       rs_md_h;
+        atomic_t               rs_refcount;
+
+        /* locks awaiting client reply ACK */
+        int                    rs_nlocks;
+        struct lustre_handle   rs_locks[RS_MAX_LOCKS];
+        ldlm_mode_t            rs_modes[RS_MAX_LOCKS];
+        /* last member: variable sized reply message */
+        struct lustre_msg      rs_msg;
+};
+
+struct ptlrpc_thread;
+
+enum rq_phase {
+        RQ_PHASE_NEW         = 0xebc0de00,
+        RQ_PHASE_RPC         = 0xebc0de01,
+        RQ_PHASE_BULK        = 0xebc0de02,
+        RQ_PHASE_INTERPRET   = 0xebc0de03,
+        RQ_PHASE_COMPLETE    = 0xebc0de04,
+};
+
+struct ptlrpc_request_pool {
+        spinlock_t prp_lock;
+        struct list_head prp_req_list;    /* list of ptlrpc_request structs */
+        int prp_rq_size;
+        void (*prp_populate)(struct ptlrpc_request_pool *, int);
+};
+
+struct ptlrpc_request {
+        int rq_type; /* one of PTL_RPC_MSG_* */
+        struct list_head rq_list;
+        struct list_head rq_history_list;       /* server-side history */
+        __u64            rq_history_seq;        /* history sequence # */
+        int rq_status;
+        spinlock_t rq_lock;
+        /* client-side flags */
+        unsigned int rq_intr:1, rq_replied:1, rq_err:1,
+                rq_timedout:1, rq_resend:1, rq_restart:1,
+                /*
+                 * when ->rq_replay is set, request is kept by the client even
+                 * after server commits corresponding transaction. This is
+                 * used for operations that require sequence of multiple
+                 * requests to be replayed. The only example currently is file
+                 * open/close. When last request in such a sequence is
+                 * committed, ->rq_replay is cleared on all requests in the
+                 * sequence.
+                 */
+                rq_replay:1,
+                rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
+                rq_no_delay:1, rq_net_err:1;
+        enum rq_phase rq_phase; /* one of RQ_PHASE_* */
+        atomic_t rq_refcount;   /* client-side refcount for SENT race */
+
+        struct ptlrpc_thread *rq_svc_thread; /* initial thread servicing req */
+
+        int rq_request_portal;  /* XXX FIXME bug 249 */
+        int rq_reply_portal;    /* XXX FIXME bug 249 */
+
+        int rq_nob_received; /* client-side # reply bytes actually received  */
+
+        int rq_reqlen;
+        struct lustre_msg *rq_reqmsg;
+
+        int rq_timeout;         /* time to wait for reply (seconds) */
+        int rq_replen;
+        struct lustre_msg *rq_repmsg;
+        __u64 rq_transno;
+        __u64 rq_xid;
+        struct list_head rq_replay_list;
+
+#if SWAB_PARANOIA
+        __u32 rq_req_swab_mask;
+        __u32 rq_rep_swab_mask;
+#endif
+
+        int rq_import_generation;
+        enum lustre_imp_state rq_send_state;
+
+        /* client+server request */
+        lnet_handle_md_t     rq_req_md_h;
+        struct ptlrpc_cb_id  rq_req_cbid;
+
+        /* server-side... */
+        struct timeval       rq_arrival_time;       /* request arrival time */
+        struct ptlrpc_reply_state *rq_reply_state;  /* separated reply state */
+        struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/
+#if CRAY_XT3
+        __u32                rq_uid;            /* peer uid, used in MDS only */
+#endif
+
+        /* client-only incoming reply */
+        lnet_handle_md_t     rq_reply_md_h;
+        cfs_waitq_t          rq_reply_waitq;
+        struct ptlrpc_cb_id  rq_reply_cbid;
+
+        lnet_nid_t           rq_self;
+        lnet_process_id_t    rq_peer;
+        struct obd_export   *rq_export;
+        struct obd_import   *rq_import;
+
+        void (*rq_replay_cb)(struct ptlrpc_request *);
+        void (*rq_commit_cb)(struct ptlrpc_request *);
+        void  *rq_cb_data;
+
+        struct ptlrpc_bulk_desc *rq_bulk;       /* client side bulk */
+        time_t rq_sent;                         /* when request sent, seconds */
+
+        /* Multi-rpc bits */
+        struct list_head rq_set_chain;
+        struct ptlrpc_request_set *rq_set;
+        void *rq_interpret_reply;               /* Async completion handler */
+        union ptlrpc_async_args rq_async_args;  /* Async completion context */
+        void *rq_ptlrpcd_data;
+        struct ptlrpc_request_pool *rq_pool;    /* Pool if request from
+                                                   preallocated list */
+};
+
+static inline const char *
+ptlrpc_rqphase2str(const struct ptlrpc_request *req)
+{
+        switch (req->rq_phase) {
+        case RQ_PHASE_NEW:
+                return "New";
+        case RQ_PHASE_RPC:
+                return "Rpc";
+        case RQ_PHASE_BULK:
+                return "Bulk";
+        case RQ_PHASE_INTERPRET:
+                return "Interpret";
+        case RQ_PHASE_COMPLETE:
+                return "Complete";
+        default:
+                return "?Phase?";
+        }
+}
+
+/* Spare the preprocessor, spoil the bugs. */
+#define FLAG(field, str) (field ? str : "")
+
+#define DEBUG_REQ_FLAGS(req)                                                    \
+        ptlrpc_rqphase2str(req),                                                \
+        FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"),                    \
+        FLAG(req->rq_err, "E"),                                                 \
+        FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"),   \
+        FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"),                  \
+        FLAG(req->rq_no_resend, "N"),                                           \
+        FLAG(req->rq_waiting, "W")
+
+#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s"
+
+#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...)                       \
+CDEB_TYPE(level, "@@@ " fmt                                                    \
+       " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl "         \
+       REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid,           \
+       req->rq_transno,                                                        \
+       req->rq_reqmsg ? req->rq_reqmsg->opc : -1,                              \
+       req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "<?>",  \
+       req->rq_import ?                                                        \
+          (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "<?>",  \
+       (req->rq_import && req->rq_import->imp_client) ?                        \
+           req->rq_import->imp_client->cli_request_portal : -1,                \
+       req->rq_reqlen, req->rq_replen,                                         \
+       atomic_read(&req->rq_refcount),                                         \
+       DEBUG_REQ_FLAGS(req),                                                   \
+       req->rq_reqmsg ? req->rq_reqmsg->flags : 0,                             \
+       req->rq_repmsg ? req->rq_repmsg->flags : 0,                             \
+       req->rq_status, req->rq_repmsg ? req->rq_repmsg->status : 0)
+
+/* for most callers (level is a constant) this is resolved at compile time */
+#define DEBUG_REQ(level, req, fmt, args...)                                    \
+do {                                                                           \
+        if ((level) & (D_ERROR | D_WARNING))                                   \
+            __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args);               \
+        else                                                                   \
+            __DEBUG_REQ(CDEBUG, level, req, fmt, ## args);                     \
+} while (0)
+
+#define DEBUG_REQ_EX(level, req, fmt, args...)                          \
+do {                                                                    \
+        if ((level) & (D_ERROR | D_WARNING))                            \
+            __DEBUG_REQ(CDEBUG_LIMIT, D_ERROR, req, fmt, ## args);      \
+        else                                                            \
+            __DEBUG_REQ(CDEBUG_EX, level, req, fmt, ## args);           \
+} while (0)
+
+struct ptlrpc_bulk_page {
+        struct list_head bp_link;
+        int bp_buflen;
+        int bp_pageoffset;                      /* offset within a page */
+        struct page *bp_page;
+};
+
+#define BULK_GET_SOURCE   0
+#define BULK_PUT_SINK     1
+#define BULK_GET_SINK     2
+#define BULK_PUT_SOURCE   3
+
+struct ptlrpc_bulk_desc {
+        unsigned int bd_success:1;              /* completed successfully */
+        unsigned int bd_network_rw:1;           /* accessible to the network */
+        unsigned int bd_type:2;                 /* {put,get}{source,sink} */
+        unsigned int bd_registered:1;           /* client side */
+        spinlock_t   bd_lock;                   /* serialise with callback */
+        int bd_import_generation;
+        struct obd_export *bd_export;
+        struct obd_import *bd_import;
+        __u32 bd_portal;
+        struct ptlrpc_request *bd_req;          /* associated request */
+        cfs_waitq_t            bd_waitq;        /* server side only WQ */
+        int                    bd_iov_count;    /* # entries in bd_iov */
+        int                    bd_max_iov;      /* allocated size of bd_iov */
+        int                    bd_nob;          /* # bytes covered */
+        int                    bd_nob_transferred; /* # bytes GOT/PUT */
+
+        __u64                  bd_last_xid;
+
+        struct ptlrpc_cb_id    bd_cbid;         /* network callback info */
+        lnet_handle_md_t        bd_md_h;         /* associated MD */
+
+#if defined(__KERNEL__)
+        lnet_kiov_t             bd_iov[0];
+#else
+        lnet_md_iovec_t         bd_iov[0];
+#endif
+};
+
+struct lu_context;
+struct ptlrpc_thread {
+
+        struct list_head t_link; /* active threads for service, from svc->srv_threads */
+
+        void *t_data;            /* thread-private data (preallocated memory) */
+        __u32 t_flags;
+
+        unsigned int t_id; /* service thread index, from ptlrpc_start_threads */
+        cfs_waitq_t t_ctl_waitq;
+        struct lu_context *t_ctx;
+};
+
+struct ptlrpc_request_buffer_desc {
+        struct list_head       rqbd_list;
+        struct list_head       rqbd_reqs;
+        struct ptlrpc_service *rqbd_service;
+        lnet_handle_md_t       rqbd_md_h;
+        int                    rqbd_refcount;
+        char                  *rqbd_buffer;
+        struct ptlrpc_cb_id    rqbd_cbid;
+        struct ptlrpc_request  rqbd_req;
+};
+
+typedef int (*svc_handler_t)(struct ptlrpc_request *req);
+typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *);
+
+struct ptlrpc_service {
+        struct list_head srv_list;              /* chain thru all services */
+        int              srv_max_req_size;      /* biggest request to receive */
+        int              srv_max_reply_size;    /* biggest reply to send */
+        int              srv_buf_size;          /* size of individual buffers */
+        int              srv_nbuf_per_group;    /* # buffers to allocate in 1 group */
+        int              srv_nbufs;             /* total # req buffer descs allocated */
+        int              srv_nthreads;          /* # running threads */
+        int              srv_n_difficult_replies; /* # 'difficult' replies */
+        int              srv_n_active_reqs;     /* # reqs being served */
+        cfs_duration_t   srv_rqbd_timeout;      /* timeout before re-posting reqs, in tick */
+        int              srv_watchdog_timeout; /* soft watchdog timeout, in ms */
+        int              srv_num_threads;       /* # threads to start/started */
+        unsigned         srv_cpu_affinity:1;    /* bind threads to CPUs */
+
+        __u32            srv_req_portal;
+        __u32            srv_rep_portal;
+
+        int               srv_n_queued_reqs;    /* # reqs waiting to be served */
+        struct list_head  srv_request_queue;    /* reqs waiting for service */
+
+        struct list_head  srv_request_history;  /* request history */
+        __u64             srv_request_seq;      /* next request sequence # */
+        __u64             srv_request_max_cull_seq; /* highest seq culled from history */
+        svcreq_printfn_t  srv_request_history_print_fn; /* service-specific print fn */
+
+        struct list_head  srv_idle_rqbds;       /* request buffers to be reposted */
+        struct list_head  srv_active_rqbds;     /* req buffers receiving */
+        struct list_head  srv_history_rqbds;    /* request buffer history */
+        int               srv_nrqbd_receiving;  /* # posted request buffers */
+        int               srv_n_history_rqbds;  /* # request buffers in history */
+        int               srv_max_history_rqbds; /* max # request buffers in history */
+
+        atomic_t          srv_outstanding_replies;
+        struct list_head  srv_active_replies;   /* all the active replies */
+        struct list_head  srv_reply_queue;      /* replies waiting for service */
+
+        cfs_waitq_t       srv_waitq; /* all threads sleep on this. This
+                                      * wait-queue is signalled when new
+                                      * incoming request arrives and when
+                                      * difficult reply has to be handled. */
+
+        struct list_head   srv_threads;
+        svc_handler_t      srv_handler;
+
+        char *srv_name;  /* only statically allocated strings here; we don't clean them */
+
+        spinlock_t               srv_lock;
+
+        cfs_proc_dir_entry_t    *srv_procroot;
+        struct lprocfs_stats    *srv_stats;
+
+        /* List of free reply_states */
+        struct list_head         srv_free_rs_list;
+        /* waitq to run, when adding stuff to srv_free_rs_list */
+        cfs_waitq_t              srv_free_rs_waitq;
+        
+        /*
+         * if non-NULL called during thread creation (ptlrpc_start_thread())
+         * to initialize service specific per-thread state.
+         */
+        int (*srv_init)(struct ptlrpc_thread *thread);
+        /*
+         * if non-NULL called during thread shutdown (ptlrpc_main()) to
+         * destruct state created by ->srv_init().
+         */
+        void (*srv_done)(struct ptlrpc_thread *thread);
+
+        //struct ptlrpc_srv_ni srv_interfaces[0];
+};
+
+/* ptlrpc/events.c */
+extern lnet_handle_eq_t ptlrpc_eq_h;
+extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid,
+                               lnet_process_id_t *peer, lnet_nid_t *self);
+extern void request_out_callback (lnet_event_t *ev);
+extern void reply_in_callback(lnet_event_t *ev);
+extern void client_bulk_callback (lnet_event_t *ev);
+extern void request_in_callback(lnet_event_t *ev);
+extern void reply_out_callback(lnet_event_t *ev);
+extern void server_bulk_callback (lnet_event_t *ev);
+
+/* ptlrpc/connection.c */
+void ptlrpc_dump_connections(void);
+void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *);
+struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer,
+                                                lnet_nid_t self, struct obd_uuid *uuid);
+int ptlrpc_put_connection(struct ptlrpc_connection *c);
+struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *);
+void ptlrpc_init_connection(void);
+void ptlrpc_cleanup_connection(void);
+extern lnet_pid_t ptl_get_pid(void);
+
+/* ptlrpc/niobuf.c */
+int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc);
+void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc);
+int ptlrpc_register_bulk(struct ptlrpc_request *req);
+void ptlrpc_unregister_bulk (struct ptlrpc_request *req);
+
+static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc)
+{
+        unsigned long flags;
+        int           rc;
+
+        spin_lock_irqsave (&desc->bd_lock, flags);
+        rc = desc->bd_network_rw;
+        spin_unlock_irqrestore (&desc->bd_lock, flags);
+        return (rc);
+}
+
+int ptlrpc_send_reply(struct ptlrpc_request *req, int);
+int ptlrpc_reply(struct ptlrpc_request *req);
+int ptlrpc_error(struct ptlrpc_request *req);
+void ptlrpc_resend_req(struct ptlrpc_request *request);
+int ptl_send_rpc(struct ptlrpc_request *request, int noreply);
+int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd);
+
+/* ptlrpc/client.c */
+void ptlrpc_init_client(int req_portal, int rep_portal, char *name,
+                        struct ptlrpc_client *);
+void ptlrpc_cleanup_client(struct obd_import *imp);
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid);
+
+static inline int
+ptlrpc_client_receiving_reply (struct ptlrpc_request *req)
+{
+        unsigned long flags;
+        int           rc;
+
+        spin_lock_irqsave(&req->rq_lock, flags);
+        rc = req->rq_receiving_reply;
+        spin_unlock_irqrestore(&req->rq_lock, flags);
+        return (rc);
+}
+
+static inline int
+ptlrpc_client_replied (struct ptlrpc_request *req)
+{
+        unsigned long flags;
+        int           rc;
+
+        spin_lock_irqsave(&req->rq_lock, flags);
+        rc = req->rq_replied;
+        spin_unlock_irqrestore(&req->rq_lock, flags);
+        return (rc);
+}
+
+static inline void
+ptlrpc_wake_client_req (struct ptlrpc_request *req)
+{
+        if (req->rq_set == NULL)
+                cfs_waitq_signal(&req->rq_reply_waitq);
+        else
+                cfs_waitq_signal(&req->rq_set->set_waitq);
+}
+
+int ptlrpc_queue_wait(struct ptlrpc_request *req);
+int ptlrpc_replay_req(struct ptlrpc_request *req);
+void ptlrpc_unregister_reply(struct ptlrpc_request *req);
+void ptlrpc_restart_req(struct ptlrpc_request *req);
+void ptlrpc_abort_inflight(struct obd_import *imp);
+
+struct ptlrpc_request_set *ptlrpc_prep_set(void);
+int ptlrpc_set_next_timeout(struct ptlrpc_request_set *);
+int ptlrpc_check_set(struct ptlrpc_request_set *set);
+int ptlrpc_set_wait(struct ptlrpc_request_set *);
+int ptlrpc_expired_set(void *data);
+void ptlrpc_interrupted_set(void *data);
+void ptlrpc_mark_interrupted(struct ptlrpc_request *req);
+void ptlrpc_set_destroy(struct ptlrpc_request_set *);
+void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *);
+void ptlrpc_set_add_new_req(struct ptlrpc_request_set *,
+                            struct ptlrpc_request *);
+
+void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool);
+void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq);
+struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int, int,
+                                                void (*populate_pool)(struct ptlrpc_request_pool *, int));
+struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version,
+                                       int opcode, int count,
+                                       int *lengths, char **bufs);
+struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version,
+                                            int opcode, int count, int *lengths,
+                                            char **bufs,
+                                            struct ptlrpc_request_pool *pool);
+void ptlrpc_free_req(struct ptlrpc_request *request);
+void ptlrpc_req_finished(struct ptlrpc_request *request);
+void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request);
+struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req);
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
+                                               int npages, int type, int portal);
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp(struct ptlrpc_request *req,
+                                              int npages, int type, int portal);
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk);
+void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
+                           cfs_page_t *page, int pageoffset, int len);
+void ptlrpc_retain_replayable_request(struct ptlrpc_request *req,
+                                      struct obd_import *imp);
+__u64 ptlrpc_next_xid(void);
+__u64 ptlrpc_sample_next_xid(void);
+__u64 ptlrpc_req_xid(struct ptlrpc_request *request);
+
+struct ptlrpc_service_conf {
+        int psc_nbufs;
+        int psc_bufsize;
+        int psc_max_req_size;
+        int psc_max_reply_size;
+        int psc_req_portal;
+        int psc_rep_portal;
+        int psc_watchdog_timeout; /* in ms */
+        int psc_num_threads;
+};
+
+/* ptlrpc/service.c */
+void ptlrpc_save_lock (struct ptlrpc_request *req,
+                       struct lustre_handle *lock, int mode);
+void ptlrpc_commit_replies (struct obd_device *obd);
+void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs);
+struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c,
+                                            svc_handler_t h, char *name,
+                                            struct proc_dir_entry *proc_entry,
+                                            svcreq_printfn_t prntfn);
+
+struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size,
+                                       int max_reply_size,
+                                       int req_portal, int rep_portal,
+                                       int watchdog_timeout, /* in ms */
+                                       svc_handler_t, char *name,
+                                       cfs_proc_dir_entry_t *proc_entry,
+                                       svcreq_printfn_t, int num_threads);
+void ptlrpc_stop_all_threads(struct ptlrpc_service *svc);
+
+int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc,
+                         char *base_name);
+int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
+                        char *name, int id);
+int ptlrpc_unregister_service(struct ptlrpc_service *service);
+int liblustre_check_services (void *arg);
+void ptlrpc_daemonize(char *name);
+int ptlrpc_service_health_check(struct ptlrpc_service *);
+
+
+struct ptlrpc_svc_data {
+        char *name;
+        struct ptlrpc_service *svc;
+        struct ptlrpc_thread *thread;
+        struct obd_device *dev;
+};
+
+/* ptlrpc/import.c */
+int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid);
+int ptlrpc_init_import(struct obd_import *imp);
+int ptlrpc_disconnect_import(struct obd_import *imp);
+int ptlrpc_import_recovery_state_machine(struct obd_import *imp);
+
+/* ptlrpc/pack_generic.c */
+int lustre_msg_swabbed(struct lustre_msg *msg);
+int lustre_msg_check_version(struct lustre_msg *msg, __u32 version);
+int lustre_pack_request(struct ptlrpc_request *, int count, const int *lens,
+                        char **bufs);
+int lustre_pack_reply(struct ptlrpc_request *, int count, const int *lens,
+                      char **bufs);
+void lustre_shrink_reply(struct ptlrpc_request *req,
+                         int segment, unsigned int newlen, int move_data);
+void lustre_free_reply_state(struct ptlrpc_reply_state *rs);
+int lustre_msg_size(int count, const int *lengths);
+int lustre_unpack_msg(struct lustre_msg *m, int len);
+void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen);
+int lustre_msg_buflen(struct lustre_msg *m, int n);
+char *lustre_msg_string (struct lustre_msg *m, int n, int max_len);
+void *lustre_swab_buf(struct lustre_msg *, int n, int minlen, void *swabber);
+void *lustre_swab_reqbuf (struct ptlrpc_request *req, int n, int minlen,
+                          void *swabber);
+void *lustre_swab_repbuf (struct ptlrpc_request *req, int n, int minlen,
+                          void *swabber);
+
+static inline void
+ptlrpc_rs_addref(struct ptlrpc_reply_state *rs)
+{
+        LASSERT(atomic_read(&rs->rs_refcount) > 0);
+        atomic_inc(&rs->rs_refcount);
+}
+
+static inline void
+ptlrpc_rs_decref(struct ptlrpc_reply_state *rs)
+{
+        LASSERT(atomic_read(&rs->rs_refcount) > 0);
+        if (atomic_dec_and_test(&rs->rs_refcount))
+                lustre_free_reply_state(rs);
+}
+
+/* ldlm/ldlm_lib.c */
+int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg);
+int client_obd_cleanup(struct obd_device *obddev);
+int client_connect_import(struct lustre_handle *conn, struct obd_device *obd,
+                          struct obd_uuid *cluuid, struct obd_connect_data *);
+int client_disconnect_export(struct obd_export *exp);
+int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+                           int priority);
+int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid);
+int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid);
+
+/* ptlrpc/pinger.c */
+int ptlrpc_pinger_add_import(struct obd_import *imp);
+int ptlrpc_pinger_del_import(struct obd_import *imp);
+#ifdef __KERNEL__
+void ping_evictor_start(void);
+void ping_evictor_stop(void);
+#else
+#define ping_evictor_start()    do {} while (0)
+#define ping_evictor_stop()     do {} while (0)
+#endif
+
+/* ptlrpc/ptlrpcd.c */
+void ptlrpcd_wake(struct ptlrpc_request *req);
+void ptlrpcd_add_req(struct ptlrpc_request *req);
+int ptlrpcd_addref(void);
+void ptlrpcd_decref(void);
+
+/* ptlrpc/lproc_ptlrpc.c */
+#ifdef LPROCFS
+void ptlrpc_lprocfs_register_obd(struct obd_device *obd);
+void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd);
+#else
+static inline void ptlrpc_lprocfs_register_obd(struct obd_device *obd) {}
+static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {}
+#endif
+
+/* ptlrpc/llog_server.c */
+int llog_origin_handle_create(struct ptlrpc_request *req);
+int llog_origin_handle_destroy(struct ptlrpc_request *req);
+int llog_origin_handle_prev_block(struct ptlrpc_request *req);
+int llog_origin_handle_next_block(struct ptlrpc_request *req);
+int llog_origin_handle_read_header(struct ptlrpc_request *req);
+int llog_origin_handle_close(struct ptlrpc_request *req);
+int llog_origin_handle_cancel(struct ptlrpc_request *req);
+int llog_catinfo(struct ptlrpc_request *req);
+
+/* ptlrpc/llog_client.c */
+extern struct llog_operations llog_client_ops;
+
+#endif
diff --git a/lustre/include/lustre_quota.h b/lustre/include/lustre_quota.h
new file mode 100644 (file)
index 0000000..6516fb9
--- /dev/null
@@ -0,0 +1,399 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef _LUSTRE_QUOTA_H
+#define _LUSTRE_QUOTA_H
+
+#if defined(__linux__)
+#include <linux/lustre_quota.h>
+#elif defined(__APPLE__)
+#include <darwin/lustre_quota.h>
+#elif defined(__WINNT__)
+#include <winnt/lustre_quota.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <lvfs.h>
+
+struct obd_device;
+struct client_obd;
+
+#ifndef NR_DQHASH
+#define NR_DQHASH 45
+#endif
+
+#ifdef HAVE_QUOTA_SUPPORT
+
+#ifdef __KERNEL__
+
+/* structures to access admin quotafile */
+struct lustre_mem_dqinfo {
+        unsigned int dqi_bgrace;
+        unsigned int dqi_igrace;
+        unsigned long dqi_flags;
+        unsigned int dqi_blocks;
+        unsigned int dqi_free_blk;
+        unsigned int dqi_free_entry;
+};
+
+struct lustre_quota_info {
+        struct file *qi_files[MAXQUOTAS];
+        struct lustre_mem_dqinfo qi_info[MAXQUOTAS];
+};
+
+#define DQ_STATUS_AVAIL         0x0     /* Available dquot */
+#define DQ_STATUS_SET           0x01    /* Sombody is setting dquot */
+#define DQ_STATUS_RECOVERY      0x02    /* dquot is in recovery */
+
+struct lustre_dquot {
+        /* Hash list in memory, protect by dquot_hash_lock */
+        struct list_head dq_hash;
+        /* Protect the data in lustre_dquot */
+        struct semaphore dq_sem;
+        /* Use count */
+        int dq_refcnt;
+        /* Pointer of quota info it belongs to */
+        struct lustre_quota_info *dq_info;
+        
+        loff_t dq_off;                  /* Offset of dquot on disk */
+        unsigned int dq_id;             /* ID this applies to (uid, gid) */
+        int dq_type;                    /* Type fo quota (USRQUOTA, GRPQUOUTA) */
+        unsigned short dq_status;       /* See DQ_STATUS_ */
+        unsigned long dq_flags;         /* See DQ_ in quota.h */
+        struct mem_dqblk dq_dqb;        /* Diskquota usage */
+};
+
+struct dquot_id {
+        struct list_head        di_link;
+        __u32                   di_id;
+};
+
+#define QFILE_CHK               1
+#define QFILE_RD_INFO           2
+#define QFILE_WR_INFO           3
+#define QFILE_INIT_INFO         4
+#define QFILE_RD_DQUOT          5
+#define QFILE_WR_DQUOT          6
+
+/* admin quotafile operations */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
+int lustre_check_quota_file(struct lustre_quota_info *lqi, int type);
+int lustre_read_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_write_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_read_dquot(struct lustre_dquot *dquot);
+int lustre_commit_dquot(struct lustre_dquot *dquot);
+int lustre_init_quota_info(struct lustre_quota_info *lqi, int type);
+int lustre_get_qids(struct file *file, struct inode *inode, int type, 
+                    struct list_head *list);
+#else
+
+#ifndef DQ_FAKE_B
+#define DQ_FAKE_B       6
+#endif
+
+static inline int lustre_check_quota_file(struct lustre_quota_info *lqi,
+                                          int type)
+{
+        return 0;
+}
+static inline int lustre_read_quota_info(struct lustre_quota_info *lqi,
+                                         int type)
+{
+        return 0;
+}
+static inline int lustre_write_quota_info(struct lustre_quota_info *lqi,
+                                          int type)
+{
+        return 0;
+}
+static inline int lustre_read_dquot(struct lustre_dquot *dquot)
+{
+        return 0;
+}
+static inline int lustre_commit_dquot(struct lustre_dquot *dquot)
+{
+        return 0;
+}
+static inline int lustre_init_quota_info(struct lustre_quota_info *lqi,
+                                         int type)
+{
+        return 0;
+}
+#endif  /* KERNEL_VERSION(2,5,0) */
+
+#define LL_DQUOT_OFF(sb)    DQUOT_OFF(sb)
+
+typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd,
+                                int opc);
+struct lustre_quota_ctxt {
+        struct super_block *lqc_sb;     /* superblock this applies to */
+        struct obd_import *lqc_import;  /* import used to send dqacq/dqrel RPC */
+        dqacq_handler_t lqc_handler;    /* dqacq/dqrel RPC handler, only for quota master */ 
+        unsigned long lqc_recovery:1;   /* Doing recovery */ 
+        unsigned long lqc_iunit_sz;     /* Unit size of file quota */
+        unsigned long lqc_itune_sz;     /* Trigger dqacq when available file quota less than
+                                         * this value, trigger dqrel when available file quota
+                                         * more than this value + 1 iunit */
+        unsigned long lqc_bunit_sz;     /* Unit size of block quota */
+        unsigned long lqc_btune_sz;     /* See comment of lqc_itune_sz */
+};
+
+#else
+
+struct lustre_quota_info {
+};
+
+struct lustre_quota_ctxt {
+};
+
+#endif  /* !__KERNEL__ */
+
+#else
+
+#define LL_DQUOT_OFF(sb) do {} while(0)
+
+struct lustre_quota_info {
+};
+
+struct lustre_quota_ctxt {
+};
+
+#endif /* !HAVE_QUOTA_SUPPORT */
+
+/* If the (quota limit < qunit * slave count), the slave which can't
+ * acquire qunit should set it's local limit as MIN_QLIMIT */
+#define MIN_QLIMIT      1
+
+struct quotacheck_thread_args {
+        struct obd_export   *qta_exp;   /* obd export */
+        struct obd_quotactl  qta_oqctl; /* obd_quotactl args */
+        struct super_block  *qta_sb;    /* obd super block */
+        atomic_t            *qta_sem;   /* obt_quotachecking */
+};
+
+typedef struct {
+        int (*quota_init) (void);
+        int (*quota_exit) (void);
+        int (*quota_setup) (struct obd_device *, struct lustre_cfg *);
+        int (*quota_cleanup) (struct obd_device *);
+        /* For quota master, close admin quota files */
+        int (*quota_fs_cleanup) (struct obd_device *);
+        int (*quota_ctl) (struct obd_export *, struct obd_quotactl *);
+        int (*quota_check) (struct obd_export *, struct obd_quotactl *);
+        int (*quota_recovery) (struct obd_device *);
+        
+        /* For quota master/slave, adjust quota limit after fs operation */
+        int (*quota_adjust) (struct obd_device *, unsigned int[], 
+                             unsigned int[], int, int); 
+        
+        /* For quota slave, set import, trigger quota recovery */
+        int (*quota_setinfo) (struct obd_export *, struct obd_device *);
+        
+        /* For quota slave, set proper thread resoure capability */
+        int (*quota_enforce) (struct obd_device *, unsigned int);
+        
+        /* For quota slave, check whether specified uid/gid is over quota */
+        int (*quota_getflag) (struct obd_device *, struct obdo *);
+        
+        /* For quota slave, acquire/release quota from master if needed */
+        int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int);
+        
+        /* For quota client, poll if the quota check done */
+        int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *);
+        
+        /* For quota client, check whether specified uid/gid is over quota */
+        int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int);
+        
+        /* For quota client, set over quota flag for specifed uid/gid */
+        int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int,
+                            obd_flag, obd_flag);
+} quota_interface_t;
+
+#define Q_COPY(out, in, member) (out)->member = (in)->member
+
+#define QUOTA_OP(interface, op) interface->quota_ ## op         
+
+#define QUOTA_CHECK_OP(interface, op)                           \
+do {                                                            \
+        if (!interface)                                         \
+                RETURN(0);                                      \
+        if (!QUOTA_OP(interface, op)) {                         \
+                CERROR("no quota operation: " #op "\n");        \
+                RETURN(-EOPNOTSUPP);                            \
+        }                                                       \
+} while(0)
+
+static inline int lquota_init(quota_interface_t *interface)
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, init);
+        rc = QUOTA_OP(interface, init)();
+        RETURN(rc);
+}
+
+static inline int lquota_exit(quota_interface_t *interface) 
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, exit);
+        rc = QUOTA_OP(interface, exit)();
+        RETURN(rc);
+}
+
+static inline int lquota_setup(quota_interface_t *interface,
+                               struct obd_device *obd, 
+                               struct lustre_cfg *lcfg) 
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, setup);
+        rc = QUOTA_OP(interface, setup)(obd, lcfg);
+        RETURN(rc);
+}
+
+static inline int lquota_cleanup(quota_interface_t *interface,
+                                 struct obd_device *obd) 
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, cleanup);
+        rc = QUOTA_OP(interface, cleanup)(obd);
+        RETURN(rc);
+}
+
+static inline int lquota_fs_cleanup(quota_interface_t *interface,
+                                    struct obd_device *obd)
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, fs_cleanup);
+        rc = QUOTA_OP(interface, fs_cleanup)(obd);
+        RETURN(rc);
+}
+
+static inline int lquota_recovery(quota_interface_t *interface,
+                                  struct obd_device *obd) 
+{        
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, recovery);
+        rc = QUOTA_OP(interface, recovery)(obd);
+        RETURN(rc);
+}
+
+static inline int lquota_adjust(quota_interface_t *interface,
+                                struct obd_device *obd, 
+                                unsigned int qcids[], 
+                                unsigned int qpids[], 
+                                int rc, int opc) 
+{
+        int ret;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, adjust);
+        ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc);
+        RETURN(ret);
+}
+
+static inline int lquota_chkdq(quota_interface_t *interface,
+                               struct client_obd *cli,
+                               unsigned int uid, unsigned int gid)
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, chkdq);
+        rc = QUOTA_OP(interface, chkdq)(cli, uid, gid);
+        RETURN(rc);
+}
+
+static inline int lquota_setdq(quota_interface_t *interface,
+                               struct client_obd *cli,
+                               unsigned int uid, unsigned int gid,
+                               obd_flag valid, obd_flag flags)
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, setdq);
+        rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags);
+        RETURN(rc);
+}
+
+static inline int lquota_poll_check(quota_interface_t *interface,
+                                    struct obd_export *exp,
+                                    struct if_quotacheck *qchk)
+{
+        int rc;
+        ENTRY;
+        
+        QUOTA_CHECK_OP(interface, poll_check);
+        rc = QUOTA_OP(interface, poll_check)(exp, qchk);
+        RETURN(rc);
+}
+
+       
+static inline int lquota_setinfo(quota_interface_t *interface,
+                                 struct obd_export *exp, 
+                                 struct obd_device *obd) 
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, setinfo);
+        rc = QUOTA_OP(interface, setinfo)(exp, obd);
+        RETURN(rc);
+}
+
+static inline int lquota_enforce(quota_interface_t *interface, 
+                                 struct obd_device *obd,
+                                 unsigned int ignore)
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, enforce);
+        rc = QUOTA_OP(interface, enforce)(obd, ignore);
+        RETURN(rc);
+}
+
+static inline int lquota_getflag(quota_interface_t *interface,
+                                 struct obd_device *obd, struct obdo *oa)
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, getflag);
+        rc = QUOTA_OP(interface, getflag)(obd, oa);
+        RETURN(rc);
+}
+        
+static inline int lquota_acquire(quota_interface_t *interface,
+                                 struct obd_device *obd, 
+                                 unsigned int uid, unsigned int gid)
+{
+        int rc;
+        ENTRY;
+
+        QUOTA_CHECK_OP(interface, acquire);
+        rc = QUOTA_OP(interface, acquire)(obd, uid, gid);
+        RETURN(rc);
+}
+
+#ifndef __KERNEL__
+extern quota_interface_t osc_quota_interface;
+extern quota_interface_t mdc_quota_interface;
+extern quota_interface_t lov_quota_interface;
+#endif
+
+#endif /* _LUSTRE_QUOTA_H */
similarity index 95%
rename from lustre/include/linux/lustre_req_layout.h
rename to lustre/include/lustre_req_layout.h
index e0e5eeb..89660f4 100644 (file)
  *   license text for more details.
  */
 
-#ifndef _LINUX_LUSTRE_REQ_LAYOUT_H__
-#define _LINUX_LUSTRE_REQ_LAYOUT_H__
+#ifndef _LUSTRE_REQ_LAYOUT_H__
+#define _LUSTRE_REQ_LAYOUT_H__
 
 /* struct ptlrpc_request, lustre_msg* */
-#include <linux/lustre_net.h>
+#include <lustre_net.h>
 
 struct req_msg_field;
 struct req_format;
@@ -83,4 +83,4 @@ extern const struct req_msg_field RMF_NAME;
 extern const struct req_msg_field RMF_REC_CREATE;
 
 
-#endif /* _LINUX_LUSTRE_REQ_LAYOUT_H__ */
+#endif /* _LUSTRE_REQ_LAYOUT_H__ */
similarity index 91%
rename from lustre/include/linux/lustre_ucache.h
rename to lustre/include/lustre_ucache.h
index db28cef..16b5c1a 100644 (file)
@@ -34,9 +34,9 @@ struct upcall_cache_entry {
         struct group_info      *ue_group_info;
         atomic_t                ue_refcount;
         int                     ue_flags;
-        wait_queue_head_t       ue_waitq;
-        unsigned long           ue_acquire_expire;
-        unsigned long           ue_expire;
+        cfs_waitq_t             ue_waitq;
+        cfs_time_t              ue_acquire_expire;
+        cfs_time_t              ue_expire;
 };
 
 #define UC_CACHE_HASH_SIZE        (128)
@@ -49,8 +49,8 @@ struct upcall_cache {
 
         char                    uc_name[40];            /* for upcall */
         char                    uc_upcall[UC_CACHE_UPCALL_MAXPATH];
-        unsigned long           uc_acquire_expire;      /* jiffies */
-        unsigned long           uc_entry_expire;        /* jiffies */
+        cfs_time_t              uc_acquire_expire;      /* jiffies */
+        cfs_time_t              uc_entry_expire;        /* jiffies */
 };
 
 struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash,
similarity index 96%
rename from lustre/include/linux/lustre_ver.h.in
rename to lustre/include/lustre_ver.h.in
index 4abf818..5881c4f 100644 (file)
@@ -1,8 +1,6 @@
 #ifndef _LUSTRE_VER_H_
 #define _LUSTRE_VER_H_
 
-#include <linux/lustre_idl.h>
-
 #define LUSTRE_MAJOR @AC_LUSTRE_MAJOR@
 #define LUSTRE_MINOR @AC_LUSTRE_MINOR@
 #define LUSTRE_PATCH @AC_LUSTRE_PATCH@
diff --git a/lustre/include/lvfs.h b/lustre/include/lvfs.h
new file mode 100644 (file)
index 0000000..42e8544
--- /dev/null
@@ -0,0 +1,61 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * lustre VFS/process permission interface
+ */
+
+#ifndef __LVFS_H__
+#define __LVFS_H__
+
+#define LL_FID_NAMELEN (16 + 1 + 8 + 1)
+
+#include <libcfs/kp30.h>
+#if defined(__linux__)
+#include <linux/lvfs.h>
+#elif defined(__APPLE__)
+#include <darwin/lvfs.h>
+#elif defined(__WINNT__)
+#include <winnt/lvfs.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include <lustre_ucache.h>
+
+
+#ifdef LIBLUSTRE
+#include <lvfs_user_fs.h>
+#endif
+
+/* lvfs_common.c */
+struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data);
+
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+               struct lvfs_ucred *cred);
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+              struct lvfs_ucred *cred);
+
+
+static inline int ll_fid2str(char *str, __u64 id, __u32 generation)
+{
+        return sprintf(str, "%llx:%08x", (unsigned long long)id, generation);
+}
+
+#endif
diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h
new file mode 100644 (file)
index 0000000..6ca1d9e
--- /dev/null
@@ -0,0 +1,200 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Extention of lu_object.h for metadata objects
+ *
+ *  Copyright (C) 2006 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _LUSTRE_MD_OBJECT_H
+#define _LUSTRE_MD_OBJECT_H
+
+/*
+ * Sub-class of lu_object with methods common for "meta-data" objects in MDT
+ * stack.
+ *
+ * Meta-data objects implement namespace operations: you can link, unlink
+ * them, and treat them as directories.
+ *
+ * Examples: mdt, cmm, and mdt are implementations of md interface.
+ */
+
+
+/*
+ * super-class definitions.
+ */
+#include <lu_object.h>
+
+struct md_device;
+struct md_device_operations;
+struct md_object;
+
+/*
+ * Operations implemented for each md object (both directory and leaf).
+ */
+struct md_object_operations {
+        int (*moo_attr_get)(struct lu_context *ctxt, struct md_object *dt,
+                            struct lu_attr *attr);
+        int (*moo_attr_set)(struct lu_context *ctxt, struct md_object *dt,
+                            struct lu_attr *attr);
+
+        int (*moo_xattr_get)(struct lu_context *ctxt, struct md_object *obj,
+                             void *buf, int buf_len, const char *name);
+
+        int (*moo_xattr_set)(struct lu_context *ctxt, struct md_object *obj,
+                             void *buf, int buf_len, const char *name);
+        /* part of cross-ref operation */
+        int (*moo_object_create)(struct lu_context *,
+                                 struct md_object *, struct lu_attr *);
+        int (*moo_ref_add)(struct lu_context *, struct md_object *);
+        int (*moo_ref_del)(struct lu_context *, struct md_object *);
+        int (*moo_open)(struct lu_context *, struct md_object *);
+        int (*moo_close)(struct lu_context *, struct md_object *);
+};
+
+/*
+ * Operations implemented for each directory object.
+ */
+struct md_dir_operations {
+        int (*mdo_lookup)(struct lu_context *, struct md_object *,
+                          const char *, struct lu_fid *);
+
+        int (*mdo_mkdir)(struct lu_context *, struct lu_attr *,
+                         struct md_object *, const char *,
+                         struct md_object *);
+
+        int (*mdo_rename)(struct lu_context *ctxt, struct md_object *spobj,
+                          struct md_object *tpobj, struct md_object *sobj,
+                          const char *sname, struct md_object *tobj,
+                          const char *tname);
+
+        int (*mdo_link)(struct lu_context *ctxt, struct md_object *tobj,
+                        struct md_object *sobj, const char *name);
+
+        /* partial ops for cross-ref case */
+        int (*mdo_name_insert)(struct lu_context *, struct md_object *,
+                               const char *, const struct lu_fid *,
+                               struct lu_attr *);
+        int (*mdo_name_remove)(struct lu_context *, struct md_object *,
+                               const char *, struct lu_attr *);
+};
+
+struct md_device_operations {
+        /* method for getting/setting device wide back stored config data, like
+         * last used meta-sequence, etc. */
+        int (*mdo_config) (struct lu_context *ctx,
+                           struct md_device *m, const char *name,
+                           void *buf, int size, int mode);
+
+        /* meta-data device related handlers. */
+        int (*mdo_root_get)(struct lu_context *ctx,
+                            struct md_device *m, struct lu_fid *f);
+        int (*mdo_statfs)(struct lu_context *ctx,
+                          struct md_device *m, struct kstatfs *sfs);
+
+};
+
+struct md_device {
+        struct lu_device             md_lu_dev;
+        struct md_device_operations *md_ops;
+};
+
+struct md_object {
+        struct lu_object             mo_lu;
+        struct md_object_operations *mo_ops;
+        struct md_dir_operations    *mo_dir_ops;
+};
+
+static inline int lu_device_is_md(const struct lu_device *d)
+{
+        return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD);
+}
+
+static inline struct md_device *lu2md_dev(const struct lu_device *d)
+{
+        LASSERT(lu_device_is_md(d));
+        return container_of0(d, struct md_device, md_lu_dev);
+}
+
+static inline struct lu_device *md2lu_dev(struct md_device *d)
+{
+        return &d->md_lu_dev;
+}
+
+static inline struct md_object *lu2md(const struct lu_object *o)
+{
+        LASSERT(lu_device_is_md(o->lo_dev));
+        return container_of0(o, struct md_object, mo_lu);
+}
+
+static inline struct md_object *md_object_next(const struct md_object *obj)
+{
+        return lu2md(lu_object_next(&obj->mo_lu));
+}
+
+static inline struct md_device *md_device_get(const struct md_object *o)
+{
+        LASSERT(lu_device_is_md(o->mo_lu.lo_dev));
+        return container_of0(o->mo_lu.lo_dev, struct md_device, md_lu_dev);
+}
+
+static inline int md_device_init(struct md_device *md, struct lu_device_type *t)
+{
+       return lu_device_init(&md->md_lu_dev, t);
+}
+
+static inline void md_device_fini(struct md_device *md)
+{
+       lu_device_fini(&md->md_lu_dev);
+}
+
+/* md operations */
+static inline int mo_attr_get(struct lu_context *cx, struct md_object *m,
+                              struct lu_attr *at)
+{
+        return m->mo_ops->moo_attr_get(cx, m, at);
+}
+
+static inline int mo_object_create(struct lu_context *cx, struct md_object *m,
+                                   struct lu_attr *at)
+{
+        return m->mo_ops->moo_object_create(cx, m, at);
+}
+
+static inline int mdo_lookup(struct lu_context *cx, struct md_object *p,
+                            const char *name, struct lu_fid *f)
+{
+        return p->mo_dir_ops->mdo_lookup(cx, p, name, f);
+}
+
+static inline int mdo_mkdir(struct lu_context *cx, struct lu_attr *at,
+                            struct md_object *p, const char *name,
+                            struct md_object *c)
+{
+        return p->mo_dir_ops->mdo_mkdir(cx, at, p, name, c);
+}
+
+static inline int mdo_name_insert(struct lu_context *cx, struct md_object *p,
+                                  const char *name, const struct lu_fid *f,
+                                  struct lu_attr *at)
+{
+        return p->mo_dir_ops->mdo_name_insert(cx, p, name, f, at);
+}
+
+#endif /* _LINUX_MD_OBJECT_H */
diff --git a/lustre/include/obd.h b/lustre/include/obd.h
new file mode 100644 (file)
index 0000000..0592ab8
--- /dev/null
@@ -0,0 +1,1114 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+
+#ifndef __OBD_H
+#define __OBD_H
+
+#if defined(__linux__)
+#include <linux/obd.h>
+#elif defined(__APPLE__)
+#include <darwin/obd.h>
+#elif defined(__WINNT__)
+#include <winnt/obd.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#define IOC_OSC_TYPE         'h'
+#define IOC_OSC_MIN_NR       20
+#define IOC_OSC_SET_ACTIVE   _IOWR(IOC_OSC_TYPE, 21, struct obd_device *)
+#define IOC_OSC_MAX_NR       50
+
+#define IOC_MDC_TYPE         'i'
+#define IOC_MDC_MIN_NR       20
+/* Moved to lustre_user.h
+#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */
+#define IOC_MDC_MAX_NR       50
+
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_export.h>
+#include <lustre_quota.h>
+#include <lu_object.h>
+
+/* this is really local to the OSC */
+struct loi_oap_pages {
+        struct list_head        lop_pending;
+        int                     lop_num_pending;
+        struct list_head        lop_urgent;
+        struct list_head        lop_pending_group;
+};
+
+struct osc_async_rc {
+        int     ar_rc;
+        int     ar_force_sync;
+        int     ar_min_xid;
+};
+
+struct lov_oinfo {                 /* per-stripe data structure */
+        __u64 loi_id;              /* object ID on the target OST */
+        __u64 loi_gr;              /* object group on the target OST */
+        int loi_ost_idx;           /* OST stripe index in lov_tgt_desc->tgts */
+        int loi_ost_gen;           /* generation of this loi_ost_idx */
+
+        /* used by the osc to keep track of what objects to build into rpcs */
+        struct loi_oap_pages loi_read_lop;
+        struct loi_oap_pages loi_write_lop;
+        /* _cli_ is poorly named, it should be _ready_ */
+        struct list_head loi_cli_item;
+        struct list_head loi_write_item;
+        struct list_head loi_read_item;
+
+        unsigned loi_kms_valid:1;
+        __u64 loi_kms;             /* known minimum size */
+        struct ost_lvb loi_lvb;
+        struct osc_async_rc     loi_ar;
+};
+
+static inline void loi_init(struct lov_oinfo *loi)
+{
+        CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending);
+        CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_urgent);
+        CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending_group);
+        CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending);
+        CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_urgent);
+        CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group);
+        CFS_INIT_LIST_HEAD(&loi->loi_cli_item);
+        CFS_INIT_LIST_HEAD(&loi->loi_write_item);
+        CFS_INIT_LIST_HEAD(&loi->loi_read_item);
+}
+
+/*extent array item for describing the joined file extent info*/
+struct lov_extent {
+        __u64 le_start;            /* extent start */
+        __u64 le_len;              /* extent length */
+        int   le_loi_idx;          /* extent #1 loi's index in lsm loi array */
+        int   le_stripe_count;     /* extent stripe count*/
+};
+
+/*Lov array info for describing joined file array EA info*/
+struct lov_array_info {
+        struct llog_logid    lai_array_id;    /* MDS med llog object id */
+        unsigned             lai_ext_count; /* number of extent count */
+        struct lov_extent    *lai_ext_array; /* extent desc array */
+};
+
+struct lov_stripe_md {
+        spinlock_t       lsm_lock;
+        void            *lsm_lock_owner; /* debugging */
+
+        struct {
+                /* Public members. */
+                __u64 lw_object_id;        /* lov object id */
+                __u64 lw_object_gr;        /* lov object group */
+                __u64 lw_maxbytes;         /* maximum possible file size */
+                unsigned long lw_xfersize; /* optimal transfer size */
+
+                /* LOV-private members start here -- only for use in lov/. */
+                __u32 lw_magic;
+                __u32 lw_stripe_size;      /* size of the stripe */
+                __u32 lw_pattern;          /* striping pattern (RAID0, RAID1) */
+                unsigned lw_stripe_count;  /* number of objects being striped over */
+        } lsm_wire;
+
+        struct lov_array_info *lsm_array; /*Only for joined file array info*/
+        struct lov_oinfo lsm_oinfo[0];
+};
+
+#define lsm_object_id    lsm_wire.lw_object_id
+#define lsm_object_gr    lsm_wire.lw_object_gr
+#define lsm_maxbytes     lsm_wire.lw_maxbytes
+#define lsm_xfersize     lsm_wire.lw_xfersize
+#define lsm_magic        lsm_wire.lw_magic
+#define lsm_stripe_size  lsm_wire.lw_stripe_size
+#define lsm_pattern      lsm_wire.lw_pattern
+#define lsm_stripe_count lsm_wire.lw_stripe_count
+
+/* compare all relevant fields. */
+static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1,
+                                    struct lov_stripe_md *m2)
+{
+        /*
+         * ->lsm_wire contains padding, but it should be zeroed out during
+         * allocation.
+         */
+        return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire);
+}
+
+void lov_stripe_lock(struct lov_stripe_md *md);
+void lov_stripe_unlock(struct lov_stripe_md *md);
+
+struct obd_type {
+        struct list_head typ_chain;
+        struct obd_ops *typ_dt_ops;
+        struct md_ops *typ_md_ops;
+        struct proc_dir_entry *typ_procroot;
+        char *typ_name;
+        int  typ_refcnt;
+        struct lu_device_type *typ_lu;
+};
+
+struct brw_page {
+        obd_off  off;
+        cfs_page_t *pg;
+        int count;
+        obd_flag flag;
+};
+
+enum async_flags {
+        ASYNC_READY = 0x1, /* ap_make_ready will not be called before this
+                              page is added to an rpc */
+        ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */
+        ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called
+                                     to give the caller a chance to update
+                                     or cancel the size of the io */
+        ASYNC_GROUP_SYNC = 0x8,  /* ap_completion will not be called, instead
+                                    the page is accounted for in the
+                                    obd_io_group given to
+                                    obd_queue_group_io */
+};
+
+struct obd_async_page_ops {
+        int  (*ap_make_ready)(void *data, int cmd);
+        int  (*ap_refresh_count)(void *data, int cmd);
+        void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa);
+        void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
+};
+
+/* the `oig' is passed down from a caller of obd rw methods.  the callee
+ * records enough state such that the caller can sleep on the oig and
+ * be woken when all the callees have finished their work */
+struct obd_io_group {
+        spinlock_t      oig_lock;
+        atomic_t        oig_refcount;
+        int             oig_pending;
+        int             oig_rc;
+        struct list_head oig_occ_list;
+        cfs_waitq_t     oig_waitq;
+};
+
+/* the oig callback context lets the callee of obd rw methods register
+ * for callbacks from the caller. */
+struct oig_callback_context {
+        struct list_head occ_oig_item;
+        /* called when the caller has received a signal while sleeping.
+         * callees of this method are encouraged to abort their state
+         * in the oig.  This may be called multiple times. */
+        void (*occ_interrupted)(struct oig_callback_context *occ);
+        unsigned int interrupted:1;
+};
+
+/* if we find more consumers this could be generalized */
+#define OBD_HIST_MAX 32
+struct obd_histogram {
+        spinlock_t      oh_lock;
+        unsigned long   oh_buckets[OBD_HIST_MAX];
+};
+
+/* Individual type definitions */
+
+struct ost_server_data;
+
+/* hold common fields for "target" device */
+struct obd_device_target {
+        struct super_block       *obt_sb;
+        atomic_t                  obt_quotachecking;
+        struct lustre_quota_ctxt  obt_qctxt;
+};
+
+#define FILTER_GROUP_LLOG 1
+#define FILTER_GROUP_ECHO 2
+
+struct filter_ext {
+        __u64                fe_start;
+        __u64                fe_end;
+};
+
+struct filter_obd {
+        /* NB this field MUST be first */
+        struct obd_device_target fo_obt;
+        const char          *fo_fstype;
+        struct vfsmount     *fo_vfsmnt;
+        cfs_dentry_t        *fo_dentry_O;
+        cfs_dentry_t       **fo_dentry_O_groups;
+        cfs_dentry_t       **fo_dentry_O_sub;
+        spinlock_t           fo_objidlock;      /* protect fo_lastobjid */
+        spinlock_t           fo_translock;      /* protect fsd_last_transno */
+        struct file         *fo_rcvd_filp;
+        struct file         *fo_health_check_filp;
+        struct lr_server_data *fo_fsd;
+        unsigned long       *fo_last_rcvd_slots;
+        __u64                fo_mount_count;
+
+        int                  fo_destroy_in_progress;
+        struct semaphore     fo_create_lock;
+
+        struct list_head     fo_export_list;
+        int                  fo_subdir_count;
+
+        obd_size             fo_tot_dirty;      /* protected by obd_osfs_lock */
+        obd_size             fo_tot_granted;    /* all values in bytes */
+        obd_size             fo_tot_pending;
+
+        obd_size             fo_readcache_max_filesize;
+
+        struct obd_import   *fo_mdc_imp;
+        struct obd_uuid      fo_mdc_uuid;
+        struct lustre_handle fo_mdc_conn;
+        struct file        **fo_last_objid_files;
+        __u64               *fo_last_objids; /* last created objid for groups,
+                                              * protected by fo_objidlock */
+
+        struct semaphore     fo_alloc_lock;
+
+        spinlock_t fo_stats_lock;
+        int fo_r_in_flight; /* protected by fo_stats_lock */
+        int fo_w_in_flight; /* protected by fo_stats_lock */
+
+        /*
+         * per-filter pool of kiobuf's allocated by filter_common_setup() and
+         * torn down by filter_cleanup(). Contains OST_NUM_THREADS elements of
+         * which ->fo_iobuf_count were allocated.
+         *
+         * This pool contains kiobuf used by
+         * filter_{prep,commit}rw_{read,write}() and is shared by all OST
+         * threads.
+         *
+         * Locking: none, each OST thread uses only one element, determined by
+         * its "ordinal number", ->t_id.
+         */
+        struct filter_iobuf    **fo_iobuf_pool;
+        int                      fo_iobuf_count;
+
+        struct obd_histogram     fo_r_pages;
+        struct obd_histogram     fo_w_pages;
+        struct obd_histogram     fo_read_rpc_hist;
+        struct obd_histogram     fo_write_rpc_hist;
+        struct obd_histogram     fo_r_io_time;
+        struct obd_histogram     fo_w_io_time;
+        struct obd_histogram     fo_r_discont_pages;
+        struct obd_histogram     fo_w_discont_pages;
+        struct obd_histogram     fo_r_discont_blocks;
+        struct obd_histogram     fo_w_discont_blocks;
+        struct obd_histogram     fo_r_disk_iosize;
+        struct obd_histogram     fo_w_disk_iosize;
+
+        struct lustre_quota_ctxt fo_quota_ctxt;
+        spinlock_t               fo_quotacheck_lock;
+        atomic_t                 fo_quotachecking;
+};
+
+#define OSC_MAX_RIF_DEFAULT       8
+#define OSC_MAX_RIF_MAX         256
+#define OSC_MAX_DIRTY_DEFAULT  (OSC_MAX_RIF_DEFAULT * 4)
+#define OSC_MAX_DIRTY_MB_MAX   2048     /* totally arbitrary */
+
+struct mdc_rpc_lock;
+struct obd_import;
+struct client_obd {
+        struct semaphore         cl_sem;
+        struct obd_uuid          cl_target_uuid;
+        struct obd_import       *cl_import; /* ptlrpc connection state */
+        int                      cl_conn_count;
+        /* max_mds_easize is purely a performance thing so we don't have to
+         * call obd_size_diskmd() all the time. */
+        int                      cl_default_mds_easize;
+        int                      cl_max_mds_easize;
+        int                      cl_max_mds_cookiesize;
+        kdev_t                   cl_sandev;
+
+        //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */
+        void                    *cl_llcd_offset;
+
+        /* the grant values are protected by loi_list_lock below */
+        long                     cl_dirty;         /* all _dirty_ in bytes */
+        long                     cl_dirty_max;     /* allowed w/o rpc */
+        long                     cl_avail_grant;   /* bytes of credit for ost */
+        long                     cl_lost_grant;    /* lost credits (trunc) */
+        struct list_head         cl_cache_waiters; /* waiting for cache/grant */
+
+        /* keep track of objects that have lois that contain pages which
+         * have been queued for async brw.  this lock also protects the
+         * lists of osc_client_pages that hang off of the loi */
+        /*
+         * ->cl_loi_list_lock protects consistency of
+         * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and
+         * ->ap_completion() call-backs are executed under this lock. As we
+         * cannot guarantee that these call-backs never block on all platforms
+         * (as a matter of fact they do block on Mac OS X), type of
+         * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux
+         * and blocking mutex on Mac OS X. (Alternative is to make this lock
+         * blocking everywhere, but we don't want to slow down fast-path of
+         * our main platform.)
+         *
+         * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together
+         * with client_obd_list_{un,}lock() and
+         * client_obd_list_lock_{init,done}() functions.
+         */
+        client_obd_lock_t        cl_loi_list_lock;
+        struct list_head         cl_loi_ready_list;
+        struct list_head         cl_loi_write_list;
+        struct list_head         cl_loi_read_list;
+        int                      cl_r_in_flight;
+        int                      cl_w_in_flight;
+        /* just a sum of the loi/lop pending numbers to be exported by /proc */
+        int                      cl_pending_w_pages;
+        int                      cl_pending_r_pages;
+        int                      cl_max_pages_per_rpc;
+        int                      cl_max_rpcs_in_flight;
+        struct obd_histogram     cl_read_rpc_hist;
+        struct obd_histogram     cl_write_rpc_hist;
+        struct obd_histogram     cl_read_page_hist;
+        struct obd_histogram     cl_write_page_hist;
+        struct obd_histogram     cl_read_offset_hist;
+        struct obd_histogram     cl_write_offset_hist;
+
+        struct mdc_rpc_lock     *cl_rpc_lock;
+        struct mdc_rpc_lock     *cl_setattr_lock;
+        struct osc_creator       cl_oscc;
+
+        /* mgc datastruct */
+        struct semaphore         cl_mgc_sem;
+        struct vfsmount         *cl_mgc_vfsmnt;
+        struct dentry           *cl_mgc_configs_dir;
+        atomic_t                 cl_mgc_refcount;
+        struct obd_export       *cl_mgc_mgsexp;
+
+        /* Flags section */
+        unsigned int             cl_checksum:1; /* debug checksums */
+
+        /* also protected by the poorly named _loi_list_lock lock above */
+        struct osc_async_rc      cl_ar;
+
+        /* used by quotacheck */
+        int                      cl_qchk_stat; /* quotacheck stat of the peer */
+
+        /* this holds last allocated fid in last obtained seq */
+        struct lu_fid            cl_fid;
+        spinlock_t               cl_fid_lock;
+};
+#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid)
+
+#define CL_NOT_QUOTACHECKED 1   /* client->cl_qchk_stat init value */
+
+struct mgs_obd {
+        struct ptlrpc_service           *mgs_service;
+        struct vfsmount                 *mgs_vfsmnt;
+        struct super_block              *mgs_sb;
+        struct dentry                   *mgs_configs_dir;
+        struct dentry                   *mgs_fid_de;
+        struct list_head                 mgs_fs_db_list;
+        struct semaphore                 mgs_sem;
+};
+
+struct mds_obd {
+        /* NB this field MUST be first */
+        struct obd_device_target         mds_obt;
+        struct ptlrpc_service           *mds_service;
+        struct ptlrpc_service           *mds_setattr_service;
+        struct ptlrpc_service           *mds_readpage_service;
+        struct vfsmount                 *mds_vfsmnt;
+        cfs_dentry_t                    *mds_fid_de;
+        int                              mds_max_mdsize;
+        int                              mds_max_cookiesize;
+        struct file                     *mds_rcvd_filp;
+        spinlock_t                       mds_transno_lock;
+        __u64                            mds_last_transno;
+        __u64                            mds_mount_count;
+        __u64                            mds_io_epoch;
+        unsigned long                    mds_atime_diff;
+        struct semaphore                 mds_epoch_sem;
+        struct ll_fid                    mds_rootfid;
+        struct lr_server_data           *mds_server_data;
+        cfs_dentry_t                    *mds_pending_dir;
+        cfs_dentry_t                    *mds_logs_dir;
+        cfs_dentry_t                    *mds_objects_dir;
+        struct llog_handle              *mds_cfg_llh;
+//        struct llog_handle              *mds_catalog;
+        struct obd_device               *mds_osc_obd; /* XXX lov_obd */
+        struct obd_uuid                  mds_lov_uuid;
+        char                            *mds_profile;
+        struct obd_export               *mds_osc_exp; /* XXX lov_exp */
+        struct lov_desc                  mds_lov_desc;
+        obd_id                          *mds_lov_objids;
+        int                              mds_lov_objids_size;
+        __u32                            mds_lov_objids_in_file;
+        unsigned int                     mds_lov_objids_dirty:1;
+        int                              mds_lov_nextid_set;
+        struct file                     *mds_lov_objid_filp;
+        struct file                     *mds_health_check_filp;
+        unsigned long                   *mds_client_bitmap;
+        struct semaphore                 mds_orphan_recovery_sem;
+        struct upcall_cache             *mds_group_hash;
+
+        struct lustre_quota_info         mds_quota_info;
+        struct semaphore                 mds_qonoff_sem;
+        struct semaphore                 mds_health_sem;
+        unsigned long                    mds_lov_objids_valid:1,
+                                         mds_fl_user_xattr:1,
+                                         mds_fl_acl:1;
+};
+
+struct echo_obd {
+        struct obdo          eo_oa;
+        spinlock_t           eo_lock;
+        __u64                eo_lastino;
+        struct lustre_handle eo_nl_lock;
+        atomic_t             eo_prep;
+};
+
+struct ost_obd {
+        struct ptlrpc_service *ost_service;
+        struct ptlrpc_service *ost_create_service;
+        struct ptlrpc_service *ost_io_service;
+        struct semaphore       ost_health_sem;
+};
+
+struct echo_client_obd {
+        struct obd_export   *ec_exp;   /* the local connection to osc/lov */
+        spinlock_t           ec_lock;
+        struct list_head     ec_objects;
+        int                  ec_nstripes;
+        __u64                ec_unique;
+};
+
+struct lov_tgt_desc {
+        struct obd_uuid          uuid;
+        __u32                    ltd_gen;
+        struct obd_export       *ltd_exp;
+        unsigned int             active:1, /* is this target up for requests */
+                                 reap:1;   /* should this target be deleted */
+        int                      index;  /* index of target array in lov_obd */
+        struct list_head         qos_bavail_list; /* link entry to lov_obd */
+};
+
+struct lov_obd {
+        struct semaphore lov_lock;
+        atomic_t refcount;
+        struct lov_desc desc;
+        struct obd_connect_data ocd;
+        int bufsize;
+        int connects;
+        int death_row;      /* Do we have tgts scheduled to be deleted?
+                               (Make this a linked list?) */
+        struct list_head qos_bavail_list; /* tgts list, sorted by available
+                                             space, protected by lov_lock */
+        struct lov_tgt_desc *tgts;
+};
+
+struct lmv_tgt_desc {
+        struct obd_uuid         uuid;
+        struct obd_export       *ltd_exp;
+        int                     active;   /* is this target up for requests */
+        int                     idx;
+};
+
+struct lmv_obd {
+        int                     refcount;
+        spinlock_t              lmv_lock;
+        struct lmv_desc         desc;
+        struct obd_uuid         cluuid;
+        struct obd_export       *exp;
+
+        int                     connected;
+        int                     max_easize;
+        int                     max_def_easize;
+        int                     max_cookiesize;
+        int                     server_timeout;
+        struct semaphore        init_sem;
+        
+        struct lmv_tgt_desc     *tgts;
+        int                     tgts_size;
+
+        struct obd_connect_data *datas;
+        int                     datas_size;
+
+        struct obd_connect_data conn_data;
+};
+
+struct niobuf_local {
+        __u64 offset;
+        __u32 len;
+        __u32 flags;
+        cfs_page_t    *page;
+        cfs_dentry_t  *dentry;
+        int lnb_grant_used;
+        int rc;
+};
+
+#define LUSTRE_OPC_MKDIR     (1 << 0)
+#define LUSTRE_OPC_SYMLINK   (1 << 1)
+#define LUSTRE_OPC_MKNODE    (1 << 2)
+#define LUSTRE_OPC_CREATE    (1 << 3)
+        
+struct placement_hint {
+        struct qstr *ph_pname;
+        struct qstr *ph_cname;
+        int          ph_opc;
+};
+
+/* device types (not names--FIXME) */
+/* FIXME all the references to these defines need to be updated */
+#define LUSTRE_MDS_NAME "mds"
+#define LUSTRE_MDT_NAME "mdt"
+
+/* new MDS layers. Prototype */
+#define LUSTRE_MDT0_NAME "mdt0"
+#define LUSTRE_CMM0_NAME "cmm0"
+#define LUSTRE_MDD0_NAME "mdd0"
+#define LUSTRE_OSD0_NAME "osd0"
+#define LUSTRE_FLD0_NAME "fld0"
+#define LUSTRE_MDC0_NAME "mdc0"
+
+#define LUSTRE_MDC_NAME  "mdc"
+#define LUSTRE_LOV_NAME  "lov"
+#define LUSTRE_LMV_NAME  "lmv"
+
+/* FIXME just the names need to be changed */
+#define LUSTRE_OSS_NAME "ost"       /* FIXME oss */
+#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost */
+#define LUSTRE_OSTSAN_NAME "sanobdfilter"
+
+#define LUSTRE_OSC_NAME "osc"
+#define LUSTRE_FILTER_NAME "filter"
+#define LUSTRE_SANOSC_NAME "sanosc"
+#define LUSTRE_SANOST_NAME "sanost"
+#define LUSTRE_MGS_NAME "mgs"
+#define LUSTRE_MGC_NAME "mgc"
+
+#define LUSTRE_ECHO_NAME        "obdecho"
+#define LUSTRE_ECHO_CLIENT_NAME "echo_client"
+
+#define LUSTRE_MGS_OBDNAME "MGS"
+#define LUSTRE_MGC_OBDNAME "MGC"
+
+/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */
+#define N_LOCAL_TEMP_PAGE 0x10000000
+
+struct obd_trans_info {
+        __u64                    oti_transno;
+        __u64                   *oti_objid;
+        /* Only used on the server side for tracking acks. */
+        struct oti_req_ack_lock {
+                struct lustre_handle lock;
+                __u32                mode;
+        }                        oti_ack_locks[4];
+        void                    *oti_handle;
+        struct llog_cookie       oti_onecookie;
+        struct llog_cookie      *oti_logcookies;
+        int                      oti_numcookies;
+
+        /* initial thread handling transaction */
+        int                      oti_thread_id;
+};
+
+static inline void oti_init(struct obd_trans_info *oti,
+                            struct ptlrpc_request *req)
+{
+        if (oti == NULL)
+                return;
+        memset(oti, 0, sizeof *oti);
+
+        if (req == NULL)
+                return;
+
+        if (req->rq_repmsg && req->rq_reqmsg != 0)
+                oti->oti_transno = req->rq_repmsg->transno;
+        oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1;
+}
+
+static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies)
+{
+        if (!oti)
+                return;
+
+        if (num_cookies == 1)
+                oti->oti_logcookies = &oti->oti_onecookie;
+        else
+                OBD_ALLOC(oti->oti_logcookies,
+                          num_cookies * sizeof(oti->oti_onecookie));
+
+        oti->oti_numcookies = num_cookies;
+}
+
+static inline void oti_free_cookies(struct obd_trans_info *oti)
+{
+        if (!oti || !oti->oti_logcookies)
+                return;
+
+        if (oti->oti_logcookies == &oti->oti_onecookie)
+                LASSERT(oti->oti_numcookies == 1);
+        else
+                OBD_FREE(oti->oti_logcookies,
+                         oti->oti_numcookies * sizeof(oti->oti_onecookie));
+        oti->oti_logcookies = NULL;
+        oti->oti_numcookies = 0;
+}
+
+/* llog contexts */
+enum llog_ctxt_id {
+        LLOG_CONFIG_ORIG_CTXT  =  0,
+        LLOG_CONFIG_REPL_CTXT  =  1,
+        LLOG_MDS_OST_ORIG_CTXT =  2,
+        LLOG_MDS_OST_REPL_CTXT =  3,
+        LLOG_SIZE_ORIG_CTXT    =  4,
+        LLOG_SIZE_REPL_CTXT    =  5,
+        LLOG_MD_ORIG_CTXT      =  6,
+        LLOG_MD_REPL_CTXT      =  7,
+        LLOG_RD1_ORIG_CTXT     =  8,
+        LLOG_RD1_REPL_CTXT     =  9,
+        LLOG_TEST_ORIG_CTXT    = 10,
+        LLOG_TEST_REPL_CTXT    = 11,
+        LLOG_LOVEA_ORIG_CTXT   = 12,
+        LLOG_LOVEA_REPL_CTXT   = 13,
+        LLOG_MAX_CTXTS
+};
+
+/*
+ * Events signalled through obd_notify() upcall-chain.
+ */
+enum obd_notify_event {
+        /* Device activated */
+        OBD_NOTIFY_ACTIVE,
+        /* Device deactivated */
+        OBD_NOTIFY_INACTIVE,
+        /* Connect data for import were changed */
+        OBD_NOTIFY_OCD,
+        /* Sync request */
+        OBD_NOTIFY_SYNC_NONBLOCK,
+        OBD_NOTIFY_SYNC
+};
+
+/*
+ * Data structure used to pass obd_notify()-event to non-obd listeners (llite
+ * and liblustre being main examples).
+ */
+struct obd_notify_upcall {
+        int (*onu_upcall)(struct obd_device *host, struct obd_device *watched,
+                          enum obd_notify_event ev, void *owner);
+        /* Opaque datum supplied by upper layer listener */
+        void *onu_owner;
+};
+
+/* corresponds to one of the obd's */
+struct obd_device {
+        struct obd_type        *obd_type;
+        /* common and UUID name of this device */
+        char                   *obd_name;
+        struct obd_uuid         obd_uuid;
+
+        struct lu_device       *obd_lu_dev;
+
+        int                     obd_minor;
+        unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1,
+                obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1,
+                obd_no_recov:1, obd_stopping:1, obd_starting:1,
+                obd_force:1, obd_fail:1, obd_async_recov:1;
+        atomic_t obd_refcount;
+        cfs_waitq_t             obd_refcount_waitq;
+        cfs_proc_dir_entry_t  *obd_proc_entry;
+        struct list_head        obd_exports;
+        int                     obd_num_exports;
+        struct ldlm_namespace  *obd_namespace;
+        struct ptlrpc_client    obd_ldlm_client; /* XXX OST/MDS only */
+        /* a spinlock is OK for what we do now, may need a semaphore later */
+        spinlock_t              obd_dev_lock;
+        __u64                   obd_last_committed;
+        struct fsfilt_operations *obd_fsops;
+        spinlock_t              obd_osfs_lock;
+        struct obd_statfs       obd_osfs;       /* locked by obd_osfs_lock */
+        cfs_time_t              obd_osfs_age;   
+        struct lvfs_run_ctxt    obd_lvfs_ctxt;
+        struct llog_ctxt        *obd_llog_ctxt[LLOG_MAX_CTXTS];
+        struct obd_device       *obd_observer;
+        struct obd_notify_upcall obd_upcall;
+        struct obd_export       *obd_self_export;
+        /* list of exports in LRU order, for ping evictor, with obd_dev_lock */
+        struct list_head        obd_exports_timed;
+        time_t                  obd_eviction_timer; /* for ping evictor */
+
+        /* XXX encapsulate all this recovery data into one struct */
+        svc_handler_t                    obd_recovery_handler;
+        int                              obd_max_recoverable_clients;
+        int                              obd_connected_clients;
+        int                              obd_recoverable_clients;
+        spinlock_t                       obd_processing_task_lock;
+        pid_t                            obd_processing_task;
+        __u64                            obd_next_recovery_transno;
+        int                              obd_replayed_requests;
+        int                              obd_requests_queued_for_recovery;
+        cfs_waitq_t                      obd_next_transno_waitq;
+        struct list_head                 obd_uncommitted_replies;
+        spinlock_t                       obd_uncommitted_replies_lock;
+        cfs_timer_t                      obd_recovery_timer;
+        struct list_head                 obd_recovery_queue;
+        struct list_head                 obd_delayed_reply_queue;
+        time_t                           obd_recovery_start;
+        time_t                           obd_recovery_end;
+
+        union {
+                struct obd_device_target obt;
+                struct filter_obd filter;
+                struct mds_obd mds;
+                struct client_obd cli;
+                struct ost_obd ost;
+                struct echo_client_obd echo_client;
+                struct echo_obd echo;
+                struct lov_obd lov;
+                struct lmv_obd lmv;
+                struct mgs_obd mgs;
+        } u;
+        /* Fields used by LProcFS */
+        unsigned int           obd_cntr_base;
+        struct lprocfs_stats  *obd_stats;
+
+        unsigned int           md_cntr_base;
+        struct lprocfs_stats  *md_stats;
+    
+        cfs_proc_dir_entry_t  *obd_svc_procroot;
+        struct lprocfs_stats  *obd_svc_stats;
+};
+
+#define OBD_OPT_FORCE           0x0001
+#define OBD_OPT_FAILOVER        0x0002
+
+#define OBD_LLOG_FL_SENDNOW     0x0001
+
+enum obd_cleanup_stage {
+/* Special case hack for MDS LOVs */
+        OBD_CLEANUP_EARLY,
+/* Precleanup stage 1, we must make sure all exports (other than the
+   self-export) get destroyed. */
+        OBD_CLEANUP_EXPORTS,
+/* Precleanup stage 2,  do other type-specific cleanup requiring the
+   self-export. */
+        OBD_CLEANUP_SELF_EXP,
+/* FIXME we should eliminate the "precleanup" function and make them stages
+   of the "cleanup" function. */
+        OBD_CLEANUP_OBD,
+};
+
+/* get/set_info keys */
+#define KEY_MDS_CONN "mds_conn"
+#define KEY_NEXT_ID  "next_id"
+#define KEY_LOVDESC  "lovdesc"
+#define KEY_INIT_RECOV "initial_recov"
+#define KEY_INIT_RECOV_BACKUP "init_recov_bk"
+
+struct obd_ops {
+        struct module *o_owner;
+        int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len,
+                           void *karg, void *uarg);
+        int (*o_get_info)(struct obd_export *, __u32 keylen, void *key,
+                          __u32 *vallen, void *val);
+        int (*o_set_info_async)(struct obd_export *, __u32 keylen, void *key,
+                                __u32 vallen, void *val,
+                                struct ptlrpc_request_set *set);
+        int (*o_attach)(struct obd_device *dev, obd_count len, void *data);
+        int (*o_detach)(struct obd_device *dev);
+        int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg);
+        int (*o_precleanup)(struct obd_device *dev,
+                            enum obd_cleanup_stage cleanup_stage);
+        int (*o_cleanup)(struct obd_device *dev);
+        int (*o_process_config)(struct obd_device *dev, obd_count len,
+                                void *data);
+        int (*o_postrecov)(struct obd_device *dev);
+        int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid,
+                          int priority);
+        int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid);
+        /* connect to the target device with given connection
+         * data. @ocd->ocd_connect_flags is modified to reflect flags actually
+         * granted by the target, which are guaranteed to be a subset of flags
+         * asked for. If @ocd == NULL, use default parameters. */
+        int (*o_connect)(struct lustre_handle *conn, struct obd_device *src,
+                         struct obd_uuid *cluuid, struct obd_connect_data *ocd);
+        int (*o_reconnect)(struct obd_export *exp, struct obd_device *src,
+                           struct obd_uuid *cluuid,
+                           struct obd_connect_data *ocd);
+        int (*o_disconnect)(struct obd_export *exp);
+
+        /* may be later these should be moved into separate fid_ops */
+        int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid,
+                           struct placement_hint *hint);
+        
+        int (*o_fid_delete)(struct obd_export *exp, struct lu_fid *fid);
+        
+        int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
+                        unsigned long max_age);
+        int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
+                        struct lov_stripe_md *mem_src);
+        int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt,
+                          struct lov_mds_md *disk_src, int disk_len);
+        int (*o_checkmd)(struct obd_export *exp, struct obd_export *md_exp,
+                         struct lov_stripe_md *mem_tgt);
+        int (*o_preallocate)(struct lustre_handle *, obd_count *req,
+                             obd_id *ids);
+        int (*o_create)(struct obd_export *exp,  struct obdo *oa,
+                        struct lov_stripe_md **ea, struct obd_trans_info *oti);
+        int (*o_destroy)(struct obd_export *exp, struct obdo *oa,
+                         struct lov_stripe_md *ea, struct obd_trans_info *oti,
+                         struct obd_export *md_exp);
+        int (*o_setattr)(struct obd_export *exp, struct obdo *oa,
+                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
+        int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa,
+                         struct lov_stripe_md *ea, struct obd_trans_info *oti);
+        int (*o_getattr)(struct obd_export *exp, struct obdo *oa,
+                         struct lov_stripe_md *ea);
+        int (*o_getattr_async)(struct obd_export *exp, struct obdo *oa,
+                               struct lov_stripe_md *ea,
+                               struct ptlrpc_request_set *set);
+        int (*o_brw)(int rw, struct obd_export *exp, struct obdo *oa,
+                     struct lov_stripe_md *ea, obd_count oa_bufs,
+                     struct brw_page *pgarr, struct obd_trans_info *oti);
+        int (*o_brw_async)(int rw, struct obd_export *exp, struct obdo *oa,
+                           struct lov_stripe_md *ea, obd_count oa_bufs,
+                           struct brw_page *pgarr, struct ptlrpc_request_set *,
+                           struct obd_trans_info *oti);
+        int (*o_prep_async_page)(struct obd_export *exp,
+                                 struct lov_stripe_md *lsm,
+                                 struct lov_oinfo *loi,
+                                 struct page *page, obd_off offset,
+                                 struct obd_async_page_ops *ops, void *data,
+                                 void **res);
+        int (*o_queue_async_io)(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                struct lov_oinfo *loi, void *cookie,
+                                int cmd, obd_off off, int count,
+                                obd_flag brw_flags, obd_flag async_flags);
+        int (*o_queue_group_io)(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                struct lov_oinfo *loi,
+                                struct obd_io_group *oig,
+                                void *cookie, int cmd, obd_off off, int count,
+                                obd_flag brw_flags, obd_flag async_flags);
+        int (*o_trigger_group_io)(struct obd_export *exp,
+                                  struct lov_stripe_md *lsm,
+                                  struct lov_oinfo *loi,
+                                  struct obd_io_group *oig);
+        int (*o_set_async_flags)(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                struct lov_oinfo *loi, void *cookie,
+                                obd_flag async_flags);
+        int (*o_teardown_async_page)(struct obd_export *exp,
+                                     struct lov_stripe_md *lsm,
+                                     struct lov_oinfo *loi, void *cookie);
+        int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm,
+                           struct ost_lvb *lvb, int kms_only);
+        int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm,
+                            obd_off size, int shrink);
+        int (*o_punch)(struct obd_export *exp, struct obdo *oa,
+                       struct lov_stripe_md *ea, obd_size start,
+                       obd_size end, struct obd_trans_info *oti);
+        int (*o_sync)(struct obd_export *exp, struct obdo *oa,
+                      struct lov_stripe_md *ea, obd_size start, obd_size end);
+        int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst,
+                         struct lov_stripe_md *src, obd_size start,
+                         obd_size end, struct obd_trans_info *oti);
+        int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst,
+                      struct lustre_handle *srconn, struct lov_stripe_md *src,
+                      obd_size start, obd_size end, struct obd_trans_info *);
+        int (*o_iterate)(struct lustre_handle *conn,
+                         int (*)(obd_id, obd_gr, void *),
+                         obd_id *startid, obd_gr group, void *data);
+        int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa,
+                        int objcount, struct obd_ioobj *obj,
+                        int niocount, struct niobuf_remote *remote,
+                        struct niobuf_local *local, struct obd_trans_info *oti);
+        int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa,
+                          int objcount, struct obd_ioobj *obj,
+                          int niocount, struct niobuf_local *local,
+                          struct obd_trans_info *oti, int rc);
+        int (*o_enqueue)(struct obd_export *, struct lov_stripe_md *,
+                         __u32 type, ldlm_policy_data_t *, __u32 mode,
+                         int *flags, void *bl_cb, void *cp_cb, void *gl_cb,
+                         void *data, __u32 lvb_len, void *lvb_swabber,
+                         struct lustre_handle *lockh);
+        int (*o_match)(struct obd_export *, struct lov_stripe_md *, __u32 type,
+                       ldlm_policy_data_t *, __u32 mode, int *flags, void *data,
+                       struct lustre_handle *lockh);
+        int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
+                               ldlm_iterator_t it, void *data);
+        int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
+                        __u32 mode, struct lustre_handle *);
+        int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
+                               int flags, void *opaque);
+        int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *,
+                         int join);
+        int (*o_san_preprw)(int cmd, struct obd_export *exp,
+                            struct obdo *oa, int objcount,
+                            struct obd_ioobj *obj, int niocount,
+                            struct niobuf_remote *remote);
+        int (*o_init_export)(struct obd_export *exp);
+        int (*o_destroy_export)(struct obd_export *exp);
+
+        /* llog related obd_methods */
+        int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd,
+                           int count, struct llog_catid *logid);
+        int (*o_llog_finish)(struct obd_device *obd, int count);
+
+        /* metadata-only methods */
+        int (*o_pin)(struct obd_export *, struct lu_fid *fid,
+                     struct obd_client_handle *, int flag);
+        int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int);
+
+        int (*o_import_event)(struct obd_device *, struct obd_import *,
+                              enum obd_import_event);
+
+        int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
+                        enum obd_notify_event ev, void *data);
+
+        int (*o_health_check)(struct obd_device *);
+
+        /* quota methods */
+        int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
+        int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
+
+        /*
+         * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
+         * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
+         * Also, add a wrapper function in include/linux/obd_class.h.
+         *
+         * Also note that if you add it to the END, you also have to change
+         * the num_stats calculation.
+         *
+         */
+};
+
+struct md_ops {
+        int (*m_getstatus)(struct obd_export *, struct lu_fid *);
+        int (*m_change_cbdata)(struct obd_export *, struct lu_fid *,
+                               ldlm_iterator_t, void *);
+        int (*m_close)(struct obd_export *, struct md_op_data *,
+                       struct obd_client_handle *, struct ptlrpc_request **);
+        int (*m_create)(struct obd_export *, struct md_op_data *,
+                        const void *, int, int, __u32, __u32, __u32,
+                        __u64, struct ptlrpc_request **);
+        int (*m_done_writing)(struct obd_export *, struct md_op_data *);
+        int (*m_enqueue)(struct obd_export *, int, struct lookup_intent *,
+                         int, struct md_op_data *, struct lustre_handle *,
+                         void *, int, ldlm_completion_callback,
+                         ldlm_blocking_callback, void *, int);
+        int (*m_getattr)(struct obd_export *, struct lu_fid *,
+                         obd_valid, int, struct ptlrpc_request **);
+        int (*m_getattr_name)(struct obd_export *, struct lu_fid *,
+                              const char *, int, obd_valid,
+                              int, struct ptlrpc_request **);
+        int (*m_intent_lock)(struct obd_export *, struct md_op_data *,
+                             void *, int, struct lookup_intent *, int,
+                             struct ptlrpc_request **,
+                             ldlm_blocking_callback, int);
+        int (*m_link)(struct obd_export *, struct md_op_data *,
+                      struct ptlrpc_request **);
+        int (*m_rename)(struct obd_export *, struct md_op_data *,
+                        const char *, int, const char *, int,
+                        struct ptlrpc_request **);
+        int (*m_setattr)(struct obd_export *, struct md_op_data *,
+                         struct iattr *, void *, int , void *, int,
+                         struct ptlrpc_request **);
+        int (*m_sync)(struct obd_export *, struct lu_fid *,
+                      struct ptlrpc_request **);
+        int (*m_readpage)(struct obd_export *, struct lu_fid *,
+                          __u64, struct page *, struct ptlrpc_request **);
+        int (*m_unlink)(struct obd_export *, struct md_op_data *,
+                        struct ptlrpc_request **);
+
+        int (*m_setxattr)(struct obd_export *, struct lu_fid *,
+                          obd_valid, const char *, const char *,
+                          int, int, int, struct ptlrpc_request **);
+
+        int (*m_getxattr)(struct obd_export *, struct lu_fid *,
+                          obd_valid, const char *, const char *,
+                          int, int, int, struct ptlrpc_request **);
+
+        int (*m_init_ea_size)(struct obd_export *, int, int, int);
+        
+        int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *,
+                               int, struct obd_export *, struct lustre_md *);
+        
+        int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *);
+        
+        int (*m_set_open_replay_data)(struct obd_export *,
+                                      struct obd_client_handle *,
+                                      struct ptlrpc_request *);
+        int (*m_clear_open_replay_data)(struct obd_export *,
+                                        struct obd_client_handle *);
+        int (*m_set_lock_data)(struct obd_export *, __u64 *, void *);
+        
+        int (*m_lock_match)(struct obd_export *, int, struct lu_fid *,
+                            ldlm_type_t, ldlm_policy_data_t *, ldlm_mode_t,
+                            struct lustre_handle *);
+                
+        int (*m_cancel_unused)(struct obd_export *, struct lu_fid *,
+                               int flags, void *opaque);
+
+        /*
+         * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to
+         * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a
+         * wrapper function in include/linux/obd_class.h.
+         */
+};
+
+struct lsm_operations {
+        void (*lsm_free)(struct lov_stripe_md *);
+        int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa,
+                           struct obd_export *md_exp);
+        void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *,
+                                     unsigned long *);
+        void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *,
+                                     unsigned long *);
+        obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int);
+        int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off);
+        int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd);
+        int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes,
+                               int *stripe_count);
+        int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm,
+                             struct lov_mds_md *lmm);
+};
+
+extern struct lsm_operations lsm_plain_ops;
+extern struct lsm_operations lsm_join_ops;
+static inline struct lsm_operations *lsm_op_find(int magic)
+{
+        switch(magic) {
+        case LOV_MAGIC:
+               return &lsm_plain_ops;
+        case LOV_MAGIC_JOIN:
+               return &lsm_join_ops;
+        default:
+               CERROR("Cannot recognize lsm_magic %d", magic);
+               return NULL;
+        }
+}
+
+int lvfs_check_io_health(struct obd_device *obd, struct file *file);
+
+static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno,
+                                         int error)
+{
+        if (error) {
+                CERROR("%s: transno "LPD64" commit error: %d\n",
+                       obd->obd_name, transno, error);
+                return;
+        }
+        CDEBUG(D_HA, "%s: transno "LPD64" committed\n",
+               obd->obd_name, transno);
+        if (transno > obd->obd_last_committed) {
+                obd->obd_last_committed = transno;
+                ptlrpc_commit_replies (obd);
+        }
+}
+
+static inline void init_obd_quota_ops(quota_interface_t *interface,
+                                      struct obd_ops *obd_ops)
+{
+        if (!interface)
+                return;
+
+        LASSERT(obd_ops);
+        obd_ops->o_quotacheck = QUOTA_OP(interface, check);
+        obd_ops->o_quotactl = QUOTA_OP(interface, ctl);
+}
+
+#endif /* __OBD_H */
similarity index 83%
rename from lustre/include/linux/obd_cache.h
rename to lustre/include/obd_cache.h
index e75b9f4..c5ec326 100644 (file)
@@ -7,7 +7,5 @@
 
 #ifdef __KERNEL__
 
-#define OBD_CACHE_DEVICENAME "cobd"
-
 #endif
 #endif
diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h
new file mode 100644 (file)
index 0000000..6832fb8
--- /dev/null
@@ -0,0 +1,1698 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __CLASS_OBD_H
+#define __CLASS_OBD_H
+
+#include <obd_support.h>
+#include <lustre_import.h>
+#include <lustre_net.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lprocfs_status.h>
+
+#if defined(__linux__)
+#include <linux/obd_class.h>
+#elif defined(__APPLE__)
+#include <darwin/obd_class.h>
+#elif defined(__WINNT__)
+#include <winnt/obd_class.h>
+#else
+#error Unsupported operating system.
+#endif
+
+/* OBD Device Declarations */
+#define MAX_OBD_DEVICES 520
+extern struct obd_device obd_dev[MAX_OBD_DEVICES];
+extern spinlock_t obd_dev_lock;
+
+/* OBD Operations Declarations */
+extern struct obd_device *class_conn2obd(struct lustre_handle *);
+extern struct obd_device *class_exp2obd(struct obd_export *);
+
+struct lu_device_type;
+
+/* genops.c */
+struct obd_export *class_conn2export(struct lustre_handle *);
+int class_register_type(struct obd_ops *, struct md_ops *,
+                        struct lprocfs_vars *, const char *nm,
+                        struct lu_device_type *ldt);
+int class_unregister_type(const char *nm);
+
+struct obd_device *class_newdev(struct obd_type *type, char *name);
+void class_release_dev(struct obd_device *obd);
+
+int class_name2dev(const char *name);
+struct obd_device *class_name2obd(const char *name);
+int class_uuid2dev(struct obd_uuid *uuid);
+struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
+void class_obd_list(void);
+struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
+                                          const char * typ_name,
+                                          struct obd_uuid *grp_uuid);
+struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid,
+                                             struct obd_uuid *grp_uuid);
+struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
+                                           int *next);
+
+int oig_init(struct obd_io_group **oig);
+void oig_add_one(struct obd_io_group *oig,
+                  struct oig_callback_context *occ);
+void oig_complete_one(struct obd_io_group *oig,
+                      struct oig_callback_context *occ, int rc);
+void oig_release(struct obd_io_group *oig);
+int oig_wait(struct obd_io_group *oig);
+
+char *obd_export_nid2str(struct obd_export *exp);
+
+int obd_export_evict_by_nid(struct obd_device *obd, const char *nid);
+int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid);
+
+/* obd_config.c */
+int class_process_config(struct lustre_cfg *lcfg);
+int class_attach(struct lustre_cfg *lcfg);
+int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg);
+int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg);
+struct obd_device *class_incref(struct obd_device *obd);
+void class_decref(struct obd_device *obd);
+
+#define CFG_F_START     0x01   /* Set when we start updating from a log */
+#define CFG_F_MARKER    0x02   /* We are within a maker */
+#define CFG_F_SKIP      0x04   /* We should ignore this cfg command */
+#define CFG_F_COMPAT146 0x08   /* Translation to new obd names required */
+#define CFG_F_EXCLUDE   0x10   /* OST exclusion list */
+
+
+/* Passed as data param to class_config_parse_llog */
+struct config_llog_instance {
+        char *              cfg_instance;
+        struct super_block *cfg_sb;
+        struct obd_uuid     cfg_uuid;
+        int                 cfg_last_idx; /* for partial llog processing */
+        int                 cfg_flags;
+};
+int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
+                            struct config_llog_instance *cfg);
+int class_config_dump_llog(struct llog_ctxt *ctxt, char *name,
+                           struct config_llog_instance *cfg);
+
+/* list of active configuration logs  */
+struct config_llog_data {
+        char               *cld_logname;
+        struct ldlm_res_id  cld_resid;
+        struct config_llog_instance cld_cfg;
+        struct list_head    cld_list_chain;
+        atomic_t            cld_refcount;
+        unsigned int        cld_stopping:1;
+};
+
+struct lustre_profile {
+        struct list_head lp_list;
+        char * lp_profile;
+        char * lp_osc;
+        char * lp_mdc;
+};
+
+struct lustre_profile *class_get_profile(const char * prof);
+void class_del_profile(const char *prof);
+
+/* genops.c */
+#define class_export_get(exp)                                                  \
+({                                                                             \
+        struct obd_export *exp_ = exp;                                         \
+        atomic_inc(&exp_->exp_refcount);                                       \
+        CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp_,          \
+               atomic_read(&exp_->exp_refcount));                              \
+        exp_;                                                                  \
+})
+
+#define class_export_put(exp)                                                  \
+do {                                                                           \
+        LASSERT((exp) != NULL);                                                \
+        CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", (exp),         \
+               atomic_read(&(exp)->exp_refcount) - 1);                         \
+        LASSERT(atomic_read(&(exp)->exp_refcount) > 0);                        \
+        LASSERT(atomic_read(&(exp)->exp_refcount) < 0x5a5a5a);                 \
+        __class_export_put(exp);                                               \
+} while (0)
+void __class_export_put(struct obd_export *);
+struct obd_export *class_new_export(struct obd_device *obddev,
+                                    struct obd_uuid *cluuid);
+void class_unlink_export(struct obd_export *exp);
+
+struct obd_import *class_import_get(struct obd_import *);
+void class_import_put(struct obd_import *);
+struct obd_import *class_new_import(struct obd_device *obd);
+void class_destroy_import(struct obd_import *exp);
+
+struct obd_type *class_search_type(const char *name);
+struct obd_type *class_get_type(const char *name);
+void class_put_type(struct obd_type *type);
+int class_connect(struct lustre_handle *conn, struct obd_device *obd,
+                  struct obd_uuid *cluuid);
+int class_disconnect(struct obd_export *exp);
+void class_fail_export(struct obd_export *exp);
+void class_disconnect_exports(struct obd_device *obddev);
+void class_disconnect_stale_exports(struct obd_device *obddev);
+int class_manual_cleanup(struct obd_device *obd);
+
+void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid);
+int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare);
+void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj);
+
+
+#define OBT(dev)        (dev)->obd_type
+#define OBP(dev, op)    (dev)->obd_type->typ_dt_ops->o_ ## op
+#define MDP(dev, op)    (dev)->obd_type->typ_md_ops->m_ ## op
+#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op
+
+/* Ensure obd_setup: used for cleanup which must be called
+   while obd is stopping */
+#define OBD_CHECK_DEV(obd)                                      \
+do {                                                            \
+        if (!(obd)) {                                           \
+                CERROR("NULL device\n");                        \
+                RETURN(-ENODEV);                                \
+        }                                                       \
+} while (0)
+
+/* ensure obd_setup and !obd_stopping */
+#define OBD_CHECK_DEV_ACTIVE(obd)                               \
+do {                                                            \
+        OBD_CHECK_DEV(obd);                                     \
+        if (!(obd)->obd_set_up || (obd)->obd_stopping) {        \
+                CERROR("Device %d not setup\n",                 \
+                       (obd)->obd_minor);                       \
+                RETURN(-ENODEV);                                \
+        }                                                       \
+} while (0)
+
+
+#ifdef LPROCFS
+#define OBD_COUNTER_OFFSET(op)                                  \
+        ((offsetof(struct obd_ops, o_ ## op) -                  \
+          offsetof(struct obd_ops, o_iocontrol))                \
+         / sizeof(((struct obd_ops *)(0))->o_iocontrol))
+
+#define OBD_COUNTER_INCREMENT(obd, op)                          \
+        if ((obd)->obd_stats != NULL) {                         \
+                unsigned int coffset;                           \
+                coffset = (unsigned int)(obd)->obd_cntr_base +  \
+                        OBD_COUNTER_OFFSET(op);                 \
+                LASSERT(coffset < obd->obd_stats->ls_num);      \
+                lprocfs_counter_incr(obd->obd_stats, coffset);  \
+        }
+
+#define MD_COUNTER_OFFSET(op)                                  \
+        ((offsetof(struct md_ops, m_ ## op) -                  \
+          offsetof(struct md_ops, m_getstatus))                \
+         / sizeof(((struct md_ops *)(0))->m_getstatus))
+
+#define MD_COUNTER_INCREMENT(obd, op)                           \
+        if ((obd)->md_stats != NULL) {                          \
+                unsigned int coffset;                           \
+                coffset = (unsigned int)(obd)->md_cntr_base +   \
+                        MD_COUNTER_OFFSET(op);                  \
+                LASSERT(coffset < (obd)->md_stats->ls_num);     \
+                lprocfs_counter_incr((obd)->md_stats, coffset); \
+        }
+
+#else
+#define OBD_COUNTER_OFFSET(op)
+#define OBD_COUNTER_INCREMENT(obd, op)
+#define MD_COUNTER_INCREMENT(obd, op)
+#endif
+
+#define OBD_CHECK_MD_OP(obd, op, err)                           \
+do {                                                            \
+        if (!OBT(obd) || !MDP((obd), op)) {                     \
+                if (err)                                        \
+                        CERROR("md_" #op ": dev %s/%d no operation\n", \
+                               obd->obd_name, obd->obd_minor);  \
+                RETURN(err);                                    \
+        }                                                       \
+} while (0)
+
+#define EXP_CHECK_MD_OP(exp, op)                                \
+do {                                                            \
+        if ((exp) == NULL) {                                    \
+                CERROR("obd_" #op ": NULL export\n");           \
+                RETURN(-ENODEV);                                \
+        }                                                       \
+        if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) {   \
+                CERROR("obd_" #op ": cleaned up obd\n");        \
+                RETURN(-EOPNOTSUPP);                            \
+        }                                                       \
+        if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \
+                CERROR("obd_" #op ": dev %s/%d no operation\n", \
+                       (exp)->exp_obd->obd_name,                \
+                      (exp)->exp_obd->obd_minor);              \
+                RETURN(-EOPNOTSUPP);                            \
+        }                                                       \
+} while (0)
+
+
+#define OBD_CHECK_DT_OP(obd, op, err)                           \
+do {                                                            \
+        if (!OBT(obd) || !OBP((obd), op)) {                     \
+                if (err)                                        \
+                        CERROR("obd_" #op ": dev %d no operation\n",    \
+                               obd->obd_minor);                 \
+                RETURN(err);                                    \
+        }                                                       \
+} while (0)
+
+#define EXP_CHECK_DT_OP(exp, op)                                \
+do {                                                            \
+        if ((exp) == NULL) {                                    \
+                CERROR("obd_" #op ": NULL export\n");           \
+                RETURN(-ENODEV);                                \
+        }                                                       \
+        if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) {   \
+                CERROR("obd_" #op ": cleaned up obd\n");        \
+                RETURN(-EOPNOTSUPP);                            \
+        }                                                       \
+        if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \
+                CERROR("obd_" #op ": dev %d no operation\n",    \
+                       (exp)->exp_obd->obd_minor);              \
+                RETURN(-EOPNOTSUPP);                            \
+        }                                                       \
+} while (0)
+
+#define CTXT_CHECK_OP(ctxt, op, err)                                         \
+do {                                                            \
+        if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) {                     \
+                if (err)                                        \
+                        CERROR("lop_" #op ": dev %d no operation\n",    \
+                               ctxt->loc_obd->obd_minor);                         \
+                RETURN(err);                                    \
+        }                                                       \
+} while (0)
+
+static inline int obd_get_info(struct obd_export *exp, __u32 keylen,
+                               void *key, __u32 *vallen, void *val)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, get_info);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, get_info);
+
+        rc = OBP(exp->exp_obd, get_info)(exp, keylen, key, vallen, val);
+        RETURN(rc);
+}
+
+static inline int obd_set_info_async(struct obd_export *exp, obd_count keylen,
+                                     void *key, obd_count vallen, void *val,
+                                     struct ptlrpc_request_set *set)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, set_info_async);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, set_info_async);
+
+        rc = OBP(exp->exp_obd, set_info_async)(exp, keylen, key, vallen, val, 
+                                               set);
+        RETURN(rc);
+}
+
+static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg)
+{
+        int rc;
+        struct lu_device_type *ldt;
+        ENTRY;
+
+        ldt = obd->obd_type->typ_lu;
+        if (ldt != NULL) {
+#ifdef __KERNEL__
+                struct lu_context ctx;
+                struct lu_device *d;
+
+                rc = lu_context_init(&ctx);
+                if (rc == 0) {
+                        lu_context_enter(&ctx);
+
+                        d = ldt->ldt_ops->ldto_device_alloc(&ctx, ldt, cfg);
+                        if (!IS_ERR(d)) {
+                                obd->obd_lu_dev = d;
+                                d->ld_obd = obd;
+                                rc = 0;
+                        } else
+                                rc = PTR_ERR(d);
+                }
+#endif
+        } else {
+                OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP);
+                OBD_COUNTER_INCREMENT(obd, setup);
+                rc = OBP(obd, setup)(obd, cfg);
+        }
+        RETURN(rc);
+}
+
+static inline int obd_precleanup(struct obd_device *obd,
+                                 enum obd_cleanup_stage cleanup_stage)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(obd, precleanup, 0);
+        OBD_COUNTER_INCREMENT(obd, precleanup);
+
+        rc = OBP(obd, precleanup)(obd, cleanup_stage);
+        RETURN(rc);
+}
+
+static inline int obd_cleanup(struct obd_device *obd)
+{
+        int rc;
+        struct lu_device *d;
+        struct lu_device_type *ldt;
+        ENTRY;
+
+        OBD_CHECK_DEV(obd);
+
+        ldt = obd->obd_type->typ_lu;
+        d = obd->obd_lu_dev;
+        if (ldt != NULL && d != NULL) {
+#ifdef __KERNEL__
+                struct lu_context ctx;
+
+                rc = lu_context_init(&ctx);
+                if (rc == 0) {
+                        lu_context_enter(&ctx);
+                        ldt->ldt_ops->ldto_device_free(&ctx, d);
+                        lu_context_exit(&ctx);
+                        lu_context_fini(&ctx);
+                        obd->obd_lu_dev = NULL;
+                        rc = 0;
+                }
+#endif
+                
+        } else {
+                OBD_CHECK_DT_OP(obd, cleanup, 0);
+                rc = OBP(obd, cleanup)(obd);
+        }
+        OBD_COUNTER_INCREMENT(obd, cleanup);
+        RETURN(rc);
+}
+
+static inline int
+obd_process_config(struct obd_device *obd, int datalen, void *data)
+{
+        int rc;
+        struct lu_device *d;
+        struct lu_device_type *ldt;
+        ENTRY;
+
+        OBD_CHECK_DEV(obd);
+
+        ldt = obd->obd_type->typ_lu;
+        d = obd->obd_lu_dev;
+        if (ldt != NULL && d != NULL) {
+#ifdef __KERNEL__
+                struct lu_context ctx;
+
+                rc = lu_context_init(&ctx);
+                if (rc == 0) {
+                        lu_context_enter(&ctx);
+                        rc = d->ld_ops->ldo_process_config(&ctx, d, data);
+                        lu_context_exit(&ctx);
+                        lu_context_fini(&ctx);
+                }
+#endif
+        } else {
+                OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP);
+                rc = OBP(obd, process_config)(obd, datalen, data);
+        }
+        OBD_COUNTER_INCREMENT(obd, process_config);
+
+        RETURN(rc);
+}
+
+/* Pack an in-memory MD struct for storage on disk.
+ * Returns +ve size of packed MD (0 for free), or -ve error.
+ *
+ * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL).
+ * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed.
+ * If @*disk_tgt == NULL, it will be allocated
+ */
+static inline int obd_packmd(struct obd_export *exp,
+                             struct lov_mds_md **disk_tgt,
+                             struct lov_stripe_md *mem_src)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, packmd);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, packmd);
+
+        rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src);
+        RETURN(rc);
+}
+
+static inline int obd_size_diskmd(struct obd_export *exp,
+                                  struct lov_stripe_md *mem_src)
+{
+        return obd_packmd(exp, NULL, mem_src);
+}
+
+/* helper functions */
+static inline int obd_alloc_diskmd(struct obd_export *exp,
+                                   struct lov_mds_md **disk_tgt)
+{
+        LASSERT(disk_tgt);
+        LASSERT(*disk_tgt == NULL);
+        return obd_packmd(exp, disk_tgt, NULL);
+}
+
+static inline int obd_free_diskmd(struct obd_export *exp,
+                                  struct lov_mds_md **disk_tgt)
+{
+        LASSERT(disk_tgt);
+        LASSERT(*disk_tgt);
+        return obd_packmd(exp, disk_tgt, NULL);
+}
+
+/* Unpack an MD struct from disk to in-memory format.
+ * Returns +ve size of unpacked MD (0 for free), or -ve error.
+ *
+ * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL).
+ * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed.
+ * If @*mem_tgt == NULL, it will be allocated
+ */
+static inline int obd_unpackmd(struct obd_export *exp,
+                               struct lov_stripe_md **mem_tgt,
+                               struct lov_mds_md *disk_src,
+                               int disk_len)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, unpackmd);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd);
+
+        rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len);
+        RETURN(rc);
+}
+
+/* helper functions */
+static inline int obd_alloc_memmd(struct obd_export *exp,
+                                  struct lov_stripe_md **mem_tgt)
+{
+        LASSERT(mem_tgt);
+        LASSERT(*mem_tgt == NULL);
+        return obd_unpackmd(exp, mem_tgt, NULL, 0);
+}
+
+static inline int obd_free_memmd(struct obd_export *exp,
+                                 struct lov_stripe_md **mem_tgt)
+{
+        LASSERT(mem_tgt);
+        LASSERT(*mem_tgt);
+        return obd_unpackmd(exp, mem_tgt, NULL, 0);
+}
+
+static inline int obd_checkmd(struct obd_export *exp,
+                              struct obd_export *md_exp,
+                              struct lov_stripe_md *mem_tgt)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, checkmd);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, checkmd);
+
+        rc = OBP(exp->exp_obd, checkmd)(exp, md_exp, mem_tgt);
+        RETURN(rc);
+}
+
+static inline int obd_create(struct obd_export *exp, struct obdo *obdo,
+                             struct lov_stripe_md **ea,
+                             struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, create);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, create);
+
+        rc = OBP(exp->exp_obd, create)(exp, obdo, ea, oti);
+        RETURN(rc);
+}
+
+static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo,
+                              struct lov_stripe_md *ea,
+                              struct obd_trans_info *oti,
+                              struct obd_export *md_exp)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, destroy);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, destroy);
+
+        rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp);
+        RETURN(rc);
+}
+
+static inline int obd_getattr(struct obd_export *exp, struct obdo *obdo,
+                              struct lov_stripe_md *ea)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, getattr);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, getattr);
+
+        rc = OBP(exp->exp_obd, getattr)(exp, obdo, ea);
+        RETURN(rc);
+}
+
+static inline int obd_getattr_async(struct obd_export *exp,
+                                    struct obdo *obdo, struct lov_stripe_md *ea,
+                                    struct ptlrpc_request_set *set)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, getattr);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, getattr);
+
+        rc = OBP(exp->exp_obd, getattr_async)(exp, obdo, ea, set);
+        RETURN(rc);
+}
+
+static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo,
+                              struct lov_stripe_md *ea,
+                              struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, setattr);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, setattr);
+
+        rc = OBP(exp->exp_obd, setattr)(exp, obdo, ea, oti);
+        RETURN(rc);
+}
+
+static inline int obd_setattr_async(struct obd_export *exp,
+                                    struct obdo *obdo,
+                                    struct lov_stripe_md *ea,
+                                    struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, setattr_async);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, setattr_async);
+
+        rc = OBP(exp->exp_obd, setattr_async)(exp, obdo, ea, oti);
+        RETURN(rc);
+}
+
+static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid,
+                               int priority)
+{
+        struct obd_device *obd = imp->imp_obd;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DEV_ACTIVE(obd);
+        OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, add_conn);
+
+        rc = OBP(obd, add_conn)(imp, uuid, priority);
+        RETURN(rc);
+}
+
+static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid)
+{
+        struct obd_device *obd = imp->imp_obd;
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DEV_ACTIVE(obd);
+        OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, del_conn);
+
+        rc = OBP(obd, del_conn)(imp, uuid);
+        RETURN(rc);
+}
+
+static inline int obd_connect(struct lustre_handle *conn,struct obd_device *obd,
+                              struct obd_uuid *cluuid,
+                              struct obd_connect_data *d)
+{
+        int rc;
+        __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */
+        ENTRY;
+
+        OBD_CHECK_DEV_ACTIVE(obd);
+        OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, connect);
+
+        rc = OBP(obd, connect)(conn, obd, cluuid, d);
+        /* check that only subset is granted */
+        LASSERT(ergo(d != NULL,
+                     (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
+        RETURN(rc);
+}
+
+static inline int obd_reconnect(struct obd_export *exp,
+                                struct obd_device *obd,
+                                struct obd_uuid *cluuid,
+                                struct obd_connect_data *d)
+{
+        int rc;
+        __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */
+        ENTRY;
+
+        OBD_CHECK_DEV_ACTIVE(obd);
+        OBD_CHECK_DT_OP(obd, reconnect, 0);
+        OBD_COUNTER_INCREMENT(obd, reconnect);
+
+        rc = OBP(obd, reconnect)(exp, obd, cluuid, d);
+        /* check that only subset is granted */
+        LASSERT(ergo(d != NULL,
+                     (d->ocd_connect_flags & ocf) == d->ocd_connect_flags));
+        RETURN(rc);
+}
+
+static inline int obd_disconnect(struct obd_export *exp)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, disconnect);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect);
+
+        rc = OBP(exp->exp_obd, disconnect)(exp);
+        RETURN(rc);
+}
+
+static inline int obd_fid_alloc(struct obd_export *exp,
+                                struct lu_fid *fid,
+                                struct placement_hint *hint)
+{
+        int rc;
+        ENTRY;
+
+        if (OBP(exp->exp_obd, fid_alloc) == NULL)
+                RETURN(-ENOTSUPP);
+
+        OBD_COUNTER_INCREMENT(exp->exp_obd, fid_alloc);
+
+        rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, hint);
+        RETURN(rc);
+}
+
+static inline int obd_fid_delete(struct obd_export *exp,
+                                 struct lu_fid *fid)
+{
+        int rc;
+        ENTRY;
+
+        if (OBP(exp->exp_obd, fid_delete) == NULL)
+                RETURN(0);
+
+        OBD_COUNTER_INCREMENT(exp->exp_obd, fid_delete);
+        rc = OBP(exp->exp_obd, fid_delete)(exp, fid);
+        RETURN(rc);
+}
+
+static inline int obd_init_export(struct obd_export *exp)
+{
+        int rc = 0;
+
+        ENTRY;
+        if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
+            OBP((exp)->exp_obd, init_export))
+                rc = OBP(exp->exp_obd, init_export)(exp);
+        RETURN(rc);
+}
+
+static inline int obd_destroy_export(struct obd_export *exp)
+{
+        ENTRY;
+        if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) &&
+            OBP((exp)->exp_obd, destroy_export))
+                OBP(exp->exp_obd, destroy_export)(exp);
+        RETURN(0);
+}
+
+static inline struct dentry *
+obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr)
+{
+        LASSERT(exp->exp_obd);
+
+        return lvfs_fid2dentry(&exp->exp_obd->obd_lvfs_ctxt, id_ino, gen, gr,
+                               exp->exp_obd);
+}
+
+static inline int
+obd_lvfs_open_llog(struct obd_export *exp, __u64 id_ino, struct dentry *dentry)
+{
+        LASSERT(exp->exp_obd);
+        CERROR("FIXME what's the story here?  This needs to be an obd fn?\n");
+#if 0
+        return lvfs_open_llog(&exp->exp_obd->obd_lvfs_ctxt, id_ino,
+                              dentry, exp->exp_obd);
+#endif
+        return 0;
+}
+
+#ifndef time_before
+#define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
+#endif
+
+/* @max_age is the oldest time in jiffies that we accept using a cached data.
+ * If the cache is older than @max_age we will get a new value from the
+ * target.  Use a value of "jiffies + HZ" to guarantee freshness. */
+static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
+                             unsigned long max_age)
+{
+        int rc = 0;
+        ENTRY;
+
+        if (obd == NULL)
+                RETURN(-EINVAL);
+
+        OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(obd, statfs);
+
+        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
+        if (time_before(obd->obd_osfs_age, max_age)) {
+                rc = OBP(obd, statfs)(obd, osfs, max_age);
+                if (rc == 0) {
+                        spin_lock(&obd->obd_osfs_lock);
+                        memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs));
+                        obd->obd_osfs_age = jiffies;
+                        spin_unlock(&obd->obd_osfs_lock);
+                }
+        } else {
+                CDEBUG(D_SUPER, "using cached obd_statfs data\n");
+                spin_lock(&obd->obd_osfs_lock);
+                memcpy(osfs, &obd->obd_osfs, sizeof(*osfs));
+                spin_unlock(&obd->obd_osfs_lock);
+        }
+        RETURN(rc);
+}
+
+static inline int obd_sync(struct obd_export *exp, struct obdo *oa,
+                           struct lov_stripe_md *ea, obd_size start,
+                           obd_size end)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, sync);
+
+        rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end);
+        RETURN(rc);
+}
+
+static inline int obd_punch(struct obd_export *exp, struct obdo *oa,
+                            struct lov_stripe_md *ea, obd_size start,
+                            obd_size end, struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, punch);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, punch);
+
+        rc = OBP(exp->exp_obd, punch)(exp, oa, ea, start, end, oti);
+        RETURN(rc);
+}
+
+static inline int obd_brw(int cmd, struct obd_export *exp, struct obdo *oa,
+                          struct lov_stripe_md *ea, obd_count oa_bufs,
+                          struct brw_page *pg, struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, brw);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, brw);
+
+        if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) {
+                CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, "
+                       "or OBD_BRW_CHECK\n");
+                LBUG();
+        }
+
+        rc = OBP(exp->exp_obd, brw)(cmd, exp, oa, ea, oa_bufs, pg, oti);
+        RETURN(rc);
+}
+
+static inline int obd_brw_async(int cmd, struct obd_export *exp,
+                                struct obdo *oa, struct lov_stripe_md *ea,
+                                obd_count oa_bufs, struct brw_page *pg,
+                                struct ptlrpc_request_set *set,
+                                struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, brw_async);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async);
+
+        if (!(cmd & OBD_BRW_RWMASK)) {
+                CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n");
+                LBUG();
+        }
+
+        rc = OBP(exp->exp_obd, brw_async)(cmd, exp, oa, ea, oa_bufs, pg, set,
+                                          oti);
+        RETURN(rc);
+}
+
+static inline  int obd_prep_async_page(struct obd_export *exp,
+                                       struct lov_stripe_md *lsm,
+                                       struct lov_oinfo *loi,
+                                       struct page *page, obd_off offset,
+                                       struct obd_async_page_ops *ops,
+                                       void *data, void **res)
+{
+        int ret;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, prep_async_page, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, prep_async_page);
+
+        ret = OBP(exp->exp_obd, prep_async_page)(exp, lsm, loi, page, offset,
+                                                 ops, data, res);
+        RETURN(ret);
+}
+
+static inline int obd_queue_async_io(struct obd_export *exp,
+                                     struct lov_stripe_md *lsm,
+                                     struct lov_oinfo *loi, void *cookie,
+                                     int cmd, obd_off off, int count,
+                                     obd_flag brw_flags, obd_flag async_flags)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, queue_async_io, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, queue_async_io);
+        LASSERT(cmd & OBD_BRW_RWMASK);
+
+        rc = OBP(exp->exp_obd, queue_async_io)(exp, lsm, loi, cookie, cmd, off,
+                                               count, brw_flags, async_flags);
+        RETURN(rc);
+}
+
+static inline int obd_set_async_flags(struct obd_export *exp,
+                                      struct lov_stripe_md *lsm,
+                                      struct lov_oinfo *loi, void *cookie,
+                                      obd_flag async_flags)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, set_async_flags, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, set_async_flags);
+
+        rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie,
+                                                async_flags);
+        RETURN(rc);
+}
+
+static inline int obd_queue_group_io(struct obd_export *exp,
+                                     struct lov_stripe_md *lsm,
+                                     struct lov_oinfo *loi,
+                                     struct obd_io_group *oig,
+                                     void *cookie, int cmd, obd_off off,
+                                     int count, obd_flag brw_flags,
+                                     obd_flag async_flags)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, queue_group_io, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, queue_group_io);
+        LASSERT(cmd & OBD_BRW_RWMASK);
+
+        rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie,
+                                               cmd, off, count, brw_flags,
+                                               async_flags);
+        RETURN(rc);
+}
+
+static inline int obd_trigger_group_io(struct obd_export *exp,
+                                       struct lov_stripe_md *lsm,
+                                       struct lov_oinfo *loi,
+                                       struct obd_io_group *oig)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, trigger_group_io, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, trigger_group_io);
+
+        rc = OBP(exp->exp_obd, trigger_group_io)(exp, lsm, loi, oig);
+        RETURN(rc);
+}
+
+static inline int obd_teardown_async_page(struct obd_export *exp,
+                                          struct lov_stripe_md *lsm,
+                                          struct lov_oinfo *loi, void *cookie)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, teardown_async_page, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, teardown_async_page);
+
+        rc = OBP(exp->exp_obd, teardown_async_page)(exp, lsm, loi, cookie);
+        RETURN(rc);
+}
+
+static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa,
+                             int objcount, struct obd_ioobj *obj,
+                             int niocount, struct niobuf_remote *remote,
+                             struct niobuf_local *local,
+                             struct obd_trans_info *oti)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, preprw, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
+
+        rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount,
+                                       remote, local, oti);
+        RETURN(rc);
+}
+
+static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa,
+                               int objcount, struct obd_ioobj *obj,
+                               int niocount, struct niobuf_local *local,
+                               struct obd_trans_info *oti, int rc)
+{
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, commitrw, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw);
+
+        rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount,
+                                         local, oti, rc);
+        RETURN(rc);
+}
+
+static inline int obd_merge_lvb(struct obd_export *exp,
+                                struct lov_stripe_md *lsm,
+                                struct ost_lvb *lvb, int kms_only)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, merge_lvb);
+
+        rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only);
+        RETURN(rc);
+}
+
+static inline int obd_adjust_kms(struct obd_export *exp,
+                                 struct lov_stripe_md *lsm, obd_off size,
+                                 int shrink)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_DT_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms);
+
+        rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink);
+        RETURN(rc);
+}
+
+static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp,
+                                int len, void *karg, void *uarg)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, iocontrol);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol);
+
+        rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg);
+        RETURN(rc);
+}
+
+static inline int obd_enqueue(struct obd_export *exp, struct lov_stripe_md *ea,
+                              __u32 type, ldlm_policy_data_t *policy,
+                              __u32 mode, int *flags, void *bl_cb, void *cp_cb,
+                              void *gl_cb, void *data, __u32 lvb_len,
+                              void *lvb_swabber, struct lustre_handle *lockh)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, enqueue);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue);
+
+        rc = OBP(exp->exp_obd, enqueue)(exp, ea, type, policy, mode, flags,
+                                        bl_cb, cp_cb, gl_cb, data, lvb_len,
+                                        lvb_swabber, lockh);
+        RETURN(rc);
+}
+
+static inline int obd_match(struct obd_export *exp, struct lov_stripe_md *ea,
+                            __u32 type, ldlm_policy_data_t *policy, __u32 mode,
+                            int *flags, void *data, struct lustre_handle *lockh)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, match);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, match);
+
+        rc = OBP(exp->exp_obd, match)(exp, ea, type, policy, mode, flags, data,
+                                      lockh);
+        RETURN(rc);
+}
+
+static inline int obd_change_cbdata(struct obd_export *exp,
+                                    struct lov_stripe_md *lsm,
+                                    ldlm_iterator_t it, void *data)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, change_cbdata);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata);
+
+        rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data);
+        RETURN(rc);
+}
+
+static inline int obd_cancel(struct obd_export *exp,
+                             struct lov_stripe_md *ea, __u32 mode,
+                             struct lustre_handle *lockh)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, cancel);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, cancel);
+
+        rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh);
+        RETURN(rc);
+}
+
+static inline int obd_cancel_unused(struct obd_export *exp,
+                                    struct lov_stripe_md *ea,
+                                    int flags, void *opaque)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, cancel_unused);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused);
+
+        rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque);
+        RETURN(rc);
+}
+
+static inline int obd_join_lru(struct obd_export *exp,
+                               struct lov_stripe_md *ea, int join)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, join_lru);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, join_lru);
+
+        rc = OBP(exp->exp_obd, join_lru)(exp, ea, join);
+        RETURN(rc);
+}
+
+static inline int obd_san_preprw(int cmd, struct obd_export *exp,
+                                 struct obdo *oa,
+                                 int objcount, struct obd_ioobj *obj,
+                                 int niocount, struct niobuf_remote *remote)
+{
+        int rc;
+
+        EXP_CHECK_DT_OP(exp, preprw);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, preprw);
+
+        rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj,
+                                           niocount, remote);
+        class_export_put(exp);
+        return(rc);
+}
+
+static inline int obd_pin(struct obd_export *exp, struct lu_fid *fid,
+                          struct obd_client_handle *handle, int flag)
+{
+        int rc;
+
+        EXP_CHECK_DT_OP(exp, pin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, pin);
+
+        rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag);
+        return(rc);
+}
+
+static inline int obd_unpin(struct obd_export *exp,
+                            struct obd_client_handle *handle, int flag)
+{
+        int rc;
+
+        EXP_CHECK_DT_OP(exp, unpin);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, unpin);
+
+        rc = OBP(exp->exp_obd, unpin)(exp, handle, flag);
+        return(rc);
+}
+
+
+static inline void obd_import_event(struct obd_device *obd,
+                                    struct obd_import *imp,
+                                    enum obd_import_event event)
+{
+        if (!obd) {
+                CERROR("NULL device\n");
+                EXIT;
+                return;
+        }
+        if (obd->obd_set_up && OBP(obd, import_event)) {
+                OBD_COUNTER_INCREMENT(obd, import_event);
+                OBP(obd, import_event)(obd, imp, event);
+        }
+}
+
+static inline int obd_notify(struct obd_device *obd,
+                             struct obd_device *watched,
+                             enum obd_notify_event ev,
+                             void *data)
+{
+        OBD_CHECK_DEV(obd);
+
+        /* the check for async_recov is a complete hack - I'm hereby
+           overloading the meaning to also mean "this was called from
+           mds_postsetup".  I know that my mds is able to handle notifies
+           by this point, and it needs to get them to execute mds_postrecov. */
+        if (!obd->obd_set_up && !obd->obd_async_recov) {
+                CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name);
+                return -EINVAL;
+        }
+
+        if (!OBP(obd, notify)) {
+                CERROR("obd %s has no notify handler\n", obd->obd_name);
+                return -ENOSYS;
+        }
+
+        OBD_COUNTER_INCREMENT(obd, notify);
+        return OBP(obd, notify)(obd, watched, ev, data);
+}
+
+static inline int obd_notify_observer(struct obd_device *observer,
+                                      struct obd_device *observed,
+                                      enum obd_notify_event ev,
+                                      void *data)
+{
+        int rc1;
+        int rc2;
+
+        struct obd_notify_upcall *onu;
+
+        if (observer->obd_observer)
+                rc1 = obd_notify(observer->obd_observer, observed, ev, data);
+        else
+                rc1 = 0;
+        /*
+         * Also, call non-obd listener, if any
+         */
+        onu = &observer->obd_upcall;
+        if (onu->onu_upcall != NULL)
+                rc2 = onu->onu_upcall(observer, observed, ev, onu->onu_owner);
+        else
+                rc2 = 0;
+
+        return rc1 ? rc1 : rc2;
+}
+
+static inline int obd_quotacheck(struct obd_export *exp,
+                                 struct obd_quotactl *oqctl)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, quotacheck);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, quotacheck);
+
+        rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl);
+        RETURN(rc);
+}
+
+static inline int obd_quotactl(struct obd_export *exp,
+                               struct obd_quotactl *oqctl)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_DT_OP(exp, quotactl);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, quotactl);
+
+        rc = OBP(exp->exp_obd, quotactl)(exp, oqctl);
+        RETURN(rc);
+}
+
+static inline int obd_health_check(struct obd_device *obd)
+{
+        /* returns: 0 on healthy
+         *         >0 on unhealthy + reason code/flag
+         *            however the only suppored reason == 1 right now
+         *            We'll need to define some better reasons
+         *            or flags in the future.
+         *         <0 on error
+         */
+        int rc;
+        ENTRY;
+
+        /* don't use EXP_CHECK_OP, because NULL method is normal here */
+        if (obd == NULL || !OBT(obd)) {
+                CERROR("cleaned up obd\n");
+                RETURN(-EOPNOTSUPP);
+        }
+        if (!obd->obd_set_up || obd->obd_stopping)
+                RETURN(0);
+        if (!OBP(obd, health_check))
+                RETURN(0);
+
+        rc = OBP(obd, health_check)(obd);
+        RETURN(rc);
+}
+
+static inline int obd_register_observer(struct obd_device *obd,
+                                        struct obd_device *observer)
+{
+        ENTRY;
+        OBD_CHECK_DEV(obd);
+        if (obd->obd_observer && observer)
+                RETURN(-EALREADY);
+        obd->obd_observer = observer;
+        RETURN(0);
+}
+
+/* metadata helpers */
+static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_MD_OP(exp, getstatus);
+        MD_COUNTER_INCREMENT(exp->exp_obd, getstatus);
+        rc = MDP(exp->exp_obd, getstatus)(exp, fid);
+        RETURN(rc);
+}
+
+static inline int md_getattr(struct obd_export *exp, struct lu_fid *fid,
+                             obd_valid valid, int ea_size,
+                             struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, getattr);
+        MD_COUNTER_INCREMENT(exp->exp_obd, getattr);
+        rc = MDP(exp->exp_obd, getattr)(exp, fid, valid,
+                                        ea_size, request);
+        RETURN(rc);
+}
+
+static inline int md_change_cbdata(struct obd_export *exp, struct lu_fid *fid,
+                                   ldlm_iterator_t it, void *data)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, change_cbdata);
+        MD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata);
+        rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data);
+        RETURN(rc);
+}
+
+static inline int md_close(struct obd_export *exp,
+                           struct md_op_data *op_data,
+                           struct obd_client_handle *och,
+                           struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, close);
+        MD_COUNTER_INCREMENT(exp->exp_obd, close);
+        rc = MDP(exp->exp_obd, close)(exp, op_data, och, request);
+        RETURN(rc);
+}
+
+static inline int md_create(struct obd_export *exp, struct md_op_data *op_data,
+                            const void *data, int datalen, int mode,
+                            __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev,
+                            struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, create);
+        MD_COUNTER_INCREMENT(exp->exp_obd, create);
+        rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode,
+                                       uid, gid, cap_effective, rdev, request);
+        RETURN(rc);
+}
+
+static inline int md_done_writing(struct obd_export *exp,
+                                  struct md_op_data *op_data)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, done_writing);
+        MD_COUNTER_INCREMENT(exp->exp_obd, done_writing);
+        rc = MDP(exp->exp_obd, done_writing)(exp, op_data);
+        RETURN(rc);
+}
+
+static inline int md_enqueue(struct obd_export *exp, int lock_type,
+                             struct lookup_intent *it, int lock_mode,
+                             struct md_op_data *op_data,
+                             struct lustre_handle *lockh,
+                             void *lmm, int lmmsize,
+                             ldlm_completion_callback cb_completion,
+                             ldlm_blocking_callback cb_blocking,
+                             void *cb_data, int extra_lock_flags)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, enqueue);
+        MD_COUNTER_INCREMENT(exp->exp_obd, enqueue);
+        rc = MDP(exp->exp_obd, enqueue)(exp, lock_type, it, lock_mode,
+                                        op_data, lockh, lmm, lmmsize,
+                                        cb_completion, cb_blocking,
+                                        cb_data, extra_lock_flags);
+        RETURN(rc);
+}
+
+static inline int md_getattr_name(struct obd_export *exp, struct lu_fid *fid,
+                                  const char *filename, int namelen,
+                                  obd_valid valid, int ea_size,
+                                  struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, getattr_name);
+        MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name);
+        rc = MDP(exp->exp_obd, getattr_name)(exp, fid, filename, namelen,
+                                             valid, ea_size, request);
+        RETURN(rc);
+}
+
+static inline int md_intent_lock(struct obd_export *exp,
+                                 struct md_op_data *op_data,
+                                 void *lmm, int lmmsize,
+                                 struct lookup_intent *it,
+                                 int flags, struct ptlrpc_request **reqp,
+                                 ldlm_blocking_callback cb_blocking,
+                                 int extra_lock_flags)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, intent_lock);
+        MD_COUNTER_INCREMENT(exp->exp_obd, intent_lock);
+        rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize,
+                                            it, flags, reqp, cb_blocking,
+                                            extra_lock_flags);
+        RETURN(rc);
+}
+
+static inline int md_link(struct obd_export *exp,
+                          struct md_op_data *op_data,
+                          struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, link);
+        MD_COUNTER_INCREMENT(exp->exp_obd, link);
+        rc = MDP(exp->exp_obd, link)(exp, op_data, request);
+        RETURN(rc);
+}
+
+static inline int md_rename(struct obd_export *exp,
+                            struct md_op_data *op_data,
+                            const char *old, int oldlen,
+                            const char *new, int newlen,
+                            struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, rename);
+        MD_COUNTER_INCREMENT(exp->exp_obd, rename);
+        rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new,
+                                       newlen, request);
+        RETURN(rc);
+}
+
+static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data,
+                             struct iattr *iattr, void *ea, int ealen,
+                             void *ea2, int ea2len, struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, setattr);
+        MD_COUNTER_INCREMENT(exp->exp_obd, setattr);
+        rc = MDP(exp->exp_obd, setattr)(exp, op_data, iattr, ea, ealen,
+                                        ea2, ea2len, request);
+        RETURN(rc);
+}
+
+static inline int md_sync(struct obd_export *exp, struct lu_fid *fid,
+                          struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, sync);
+        MD_COUNTER_INCREMENT(exp->exp_obd, sync);
+        rc = MDP(exp->exp_obd, sync)(exp, fid, request);
+        RETURN(rc);
+}
+
+static inline int md_readpage(struct obd_export *exp, struct lu_fid *fid,
+                              __u64 offset, struct page *page,
+                              struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, readpage);
+        MD_COUNTER_INCREMENT(exp->exp_obd, readpage);
+        rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request);
+        RETURN(rc);
+}
+
+static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data,
+                            struct ptlrpc_request **request)
+{
+        int rc;
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, unlink);
+        MD_COUNTER_INCREMENT(exp->exp_obd, unlink);
+        rc = MDP(exp->exp_obd, unlink)(exp, op_data, request);
+        RETURN(rc);
+}
+
+static inline int md_get_lustre_md(struct obd_export *exp,
+                                   struct ptlrpc_request *req,
+                                   int offset, struct obd_export *dt_exp,
+                                   struct lustre_md *md)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, get_lustre_md);
+        MD_COUNTER_INCREMENT(exp->exp_obd, get_lustre_md);
+        RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, offset,
+                                                dt_exp, md));
+}
+
+static inline int md_free_lustre_md(struct obd_export *exp,
+                                    struct lustre_md *md)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, free_lustre_md);
+        MD_COUNTER_INCREMENT(exp->exp_obd, free_lustre_md);
+        RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md));
+}
+
+static inline int md_setxattr(struct obd_export *exp, struct lu_fid *fid,
+                              obd_valid valid, const char *name,
+                              const char *input, int input_size,
+                              int output_size, int flags,
+                              struct ptlrpc_request **request)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, setxattr);
+        MD_COUNTER_INCREMENT(exp->exp_obd, setxattr);
+        RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input,
+                                           input_size, output_size, flags,
+                                           request));
+}
+
+static inline int md_getxattr(struct obd_export *exp, struct lu_fid *fid,
+                              obd_valid valid, const char *name,
+                              const char *input, int input_size,
+                              int output_size, int flags,
+                              struct ptlrpc_request **request)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, getxattr);
+        MD_COUNTER_INCREMENT(exp->exp_obd, getxattr);
+        RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input,
+                                           input_size, output_size, flags,
+                                           request));
+}
+
+static inline int md_set_open_replay_data(struct obd_export *exp,
+                                          struct obd_client_handle *och,
+                                          struct ptlrpc_request *open_req)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, set_open_replay_data);
+        MD_COUNTER_INCREMENT(exp->exp_obd, set_open_replay_data);
+        RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req));
+}
+
+static inline int md_clear_open_replay_data(struct obd_export *exp,
+                                            struct obd_client_handle *och)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, clear_open_replay_data);
+        MD_COUNTER_INCREMENT(exp->exp_obd, clear_open_replay_data);
+        RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och));
+}
+
+static inline int md_set_lock_data(struct obd_export *exp,
+                                   __u64 *lockh, void *data)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, set_lock_data);
+        MD_COUNTER_INCREMENT(exp->exp_obd, set_lock_data);
+        RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data));
+}
+
+static inline int md_cancel_unused(struct obd_export *exp,
+                                   struct lu_fid *fid,
+                                   int flags, void *opaque)
+{
+        int rc;
+        ENTRY;
+
+        EXP_CHECK_MD_OP(exp, cancel_unused);
+        MD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused);
+
+        rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, flags, opaque);
+        RETURN(rc);
+}
+
+static inline int md_lock_match(struct obd_export *exp, int flags,
+                                struct lu_fid *fid, ldlm_type_t type,
+                                ldlm_policy_data_t *policy, ldlm_mode_t mode,
+                                struct lustre_handle *lockh)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, lock_match);
+        MD_COUNTER_INCREMENT(exp->exp_obd, lock_match);
+        RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type,
+                                             policy, mode, lockh));
+}
+
+static inline int md_init_ea_size(struct obd_export *exp,
+                                  int easize, int def_asize,
+                                  int cookiesize)
+{
+        ENTRY;
+        EXP_CHECK_MD_OP(exp, init_ea_size);
+        MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size);
+        RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize,
+                                               def_asize,
+                                               cookiesize));
+}
+
+/* OBD Metadata Support */
+extern int obd_init_caches(void);
+extern void obd_cleanup_caches(void);
+
+/* support routines */
+extern cfs_mem_cache_t *obdo_cachep;
+static inline struct obdo *obdo_alloc(void)
+{
+        struct obdo *oa;
+        OBD_SLAB_ALLOC(oa, obdo_cachep, CFS_ALLOC_STD, sizeof(*oa));
+        return oa;
+}
+
+static inline void obdo_free(struct obdo *oa)
+{
+        OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa));
+}
+
+static inline void obdo2fid(struct obdo *oa,
+                            struct lu_fid *fid)
+{
+        /* something here */
+}
+
+static inline void fid2obdo(struct lu_fid *fid,
+                            struct obdo *oa)
+{
+        /* something here */
+}
+
+/* I'm as embarrassed about this as you are.
+ *
+ * <shaver> // XXX do not look into _superhack with remaining eye
+ * <shaver> // XXX if this were any uglier, I'd get my own show on MTV */
+extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
+
+/* sysctl.c */
+extern void obd_sysctl_init (void);
+extern void obd_sysctl_clean (void);
+
+/* uuid.c  */
+typedef __u8 class_uuid_t[16];
+void class_generate_random_uuid(class_uuid_t uuid);
+void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out);
+
+/* lustre_peer.c    */
+int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index);
+int class_add_uuid(const char *uuid, __u64 nid);
+int class_del_uuid (const char *uuid);
+void class_init_uuidlist(void);
+void class_exit_uuidlist(void);
+
+/* mea.c */
+int mea_name2idx(struct lmv_stripe_md *mea, char *name, int namelen);
+int raw_name2idx(int hashtype, int count, const char *name, int namelen);
+
+#endif /* __LINUX_OBD_CLASS_H */
similarity index 91%
rename from lustre/include/linux/obd_echo.h
rename to lustre/include/obd_echo.h
index 0910041..53b0e6b 100644 (file)
@@ -5,9 +5,6 @@
 #ifndef _OBD_ECHO_H
 #define _OBD_ECHO_H
 
-#define OBD_ECHO_DEVICENAME "obdecho"
-#define OBD_ECHO_CLIENT_DEVICENAME "echo_client"
-
 /* The persistent object (i.e. actually stores stuff!) */
 #define ECHO_PERSISTENT_OBJID    1ULL
 #define ECHO_PERSISTENT_SIZE     ((__u64)(1<<20))
similarity index 86%
rename from lustre/include/linux/obd_lov.h
rename to lustre/include/obd_lov.h
index 78ac7bc..057fb9d 100644 (file)
@@ -5,8 +5,6 @@
 #ifndef _OBD_LOV_H__
 #define _OBD_LOV_H__
 
-#define OBD_LOV_DEVICENAME "lov"
-
 static inline int lov_stripe_md_size(int stripes)
 {
         return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo);
@@ -24,4 +22,7 @@ static inline int lov_mds_md_v1_size(int stripes)
 #define IOC_LOV_SET_OSC_ACTIVE         _IOWR('g', 50, long)
 #define IOC_LOV_MAX_NR                 50
 
+#define QOS_DEFAULT_THRESHOLD           10 /* MB */
+#define QOS_DEFAULT_MAXAGE              5  /* Seconds */
+
 #endif
similarity index 95%
rename from lustre/include/linux/obd_ost.h
rename to lustre/include/obd_ost.h
index 4a2a344..50aace7 100644 (file)
@@ -4,14 +4,14 @@
  *   This file is part of Lustre, http://www.lustre.org
  *
  * Data structures for object storage targets and client: OST & OSC's
- *
+ * 
  * See also lustre_idl.h for wire formats of requests.
  */
 
 #ifndef _LUSTRE_OST_H
 #define _LUSTRE_OST_H
 
-#include <linux/obd_class.h>
+#include <obd_class.h>
 
 struct osc_brw_async_args {
         struct obdo     *aa_oa;
diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h
new file mode 100644 (file)
index 0000000..beca205
--- /dev/null
@@ -0,0 +1,398 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef _OBD_SUPPORT
+#define _OBD_SUPPORT
+
+#include <libcfs/kp30.h>
+
+/* global variables */
+extern atomic_t obd_memory;
+extern int obd_memmax;
+extern unsigned int obd_fail_loc;
+extern unsigned int obd_dump_on_timeout;
+extern unsigned int obd_timeout;          /* seconds */
+#define PING_INTERVAL max(obd_timeout / 4, 1U)
+#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U)
+extern unsigned int ldlm_timeout;
+extern unsigned int obd_health_check_timeout;
+extern char obd_lustre_upcall[128];
+extern cfs_waitq_t obd_race_waitq;
+
+#define OBD_FAIL_MDS                     0x100
+#define OBD_FAIL_MDS_HANDLE_UNPACK       0x101
+#define OBD_FAIL_MDS_GETATTR_NET         0x102
+#define OBD_FAIL_MDS_GETATTR_PACK        0x103
+#define OBD_FAIL_MDS_READPAGE_NET        0x104
+#define OBD_FAIL_MDS_READPAGE_PACK       0x105
+#define OBD_FAIL_MDS_SENDPAGE            0x106
+#define OBD_FAIL_MDS_REINT_NET           0x107
+#define OBD_FAIL_MDS_REINT_UNPACK        0x108
+#define OBD_FAIL_MDS_REINT_SETATTR       0x109
+#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a
+#define OBD_FAIL_MDS_REINT_CREATE        0x10b
+#define OBD_FAIL_MDS_REINT_CREATE_WRITE  0x10c
+#define OBD_FAIL_MDS_REINT_UNLINK        0x10d
+#define OBD_FAIL_MDS_REINT_UNLINK_WRITE  0x10e
+#define OBD_FAIL_MDS_REINT_LINK          0x10f
+#define OBD_FAIL_MDS_REINT_LINK_WRITE    0x110
+#define OBD_FAIL_MDS_REINT_RENAME        0x111
+#define OBD_FAIL_MDS_REINT_RENAME_WRITE  0x112
+#define OBD_FAIL_MDS_OPEN_NET            0x113
+#define OBD_FAIL_MDS_OPEN_PACK           0x114
+#define OBD_FAIL_MDS_CLOSE_NET           0x115
+#define OBD_FAIL_MDS_CLOSE_PACK          0x116
+#define OBD_FAIL_MDS_CONNECT_NET         0x117
+#define OBD_FAIL_MDS_CONNECT_PACK        0x118
+#define OBD_FAIL_MDS_REINT_NET_REP       0x119
+#define OBD_FAIL_MDS_DISCONNECT_NET      0x11a
+#define OBD_FAIL_MDS_GETSTATUS_NET       0x11b
+#define OBD_FAIL_MDS_GETSTATUS_PACK      0x11c
+#define OBD_FAIL_MDS_STATFS_PACK         0x11d
+#define OBD_FAIL_MDS_STATFS_NET          0x11e
+#define OBD_FAIL_MDS_GETATTR_NAME_NET    0x11f
+#define OBD_FAIL_MDS_PIN_NET             0x120
+#define OBD_FAIL_MDS_UNPIN_NET           0x121
+#define OBD_FAIL_MDS_ALL_REPLY_NET       0x122
+#define OBD_FAIL_MDS_ALL_REQUEST_NET     0x123
+#define OBD_FAIL_MDS_SYNC_NET            0x124
+#define OBD_FAIL_MDS_SYNC_PACK           0x125
+#define OBD_FAIL_MDS_DONE_WRITING_NET    0x126
+#define OBD_FAIL_MDS_DONE_WRITING_PACK   0x127
+#define OBD_FAIL_MDS_ALLOC_OBDO          0x128
+#define OBD_FAIL_MDS_PAUSE_OPEN          0x129
+#define OBD_FAIL_MDS_STATFS_LCW_SLEEP    0x12a
+#define OBD_FAIL_MDS_OPEN_CREATE         0x12b
+#define OBD_FAIL_MDS_OST_SETATTR         0x12c
+#define OBD_FAIL_MDS_QUOTACHECK_NET      0x12d
+#define OBD_FAIL_MDS_QUOTACTL_NET        0x12e
+#define OBD_FAIL_MDS_CLIENT_ADD          0x12f
+#define OBD_FAIL_MDS_GETXATTR_NET        0x130
+#define OBD_FAIL_MDS_GETXATTR_PACK       0x131
+#define OBD_FAIL_MDS_SETXATTR_NET        0x132
+#define OBD_FAIL_MDS_SETXATTR            0x133
+#define OBD_FAIL_MDS_SETXATTR_WRITE      0x134
+
+#define OBD_FAIL_OST                     0x200
+#define OBD_FAIL_OST_CONNECT_NET         0x201
+#define OBD_FAIL_OST_DISCONNECT_NET      0x202
+#define OBD_FAIL_OST_GET_INFO_NET        0x203
+#define OBD_FAIL_OST_CREATE_NET          0x204
+#define OBD_FAIL_OST_DESTROY_NET         0x205
+#define OBD_FAIL_OST_GETATTR_NET         0x206
+#define OBD_FAIL_OST_SETATTR_NET         0x207
+#define OBD_FAIL_OST_OPEN_NET            0x208
+#define OBD_FAIL_OST_CLOSE_NET           0x209
+#define OBD_FAIL_OST_BRW_NET             0x20a
+#define OBD_FAIL_OST_PUNCH_NET           0x20b
+#define OBD_FAIL_OST_STATFS_NET          0x20c
+#define OBD_FAIL_OST_HANDLE_UNPACK       0x20d
+#define OBD_FAIL_OST_BRW_WRITE_BULK      0x20e
+#define OBD_FAIL_OST_BRW_READ_BULK       0x20f
+#define OBD_FAIL_OST_SYNC_NET            0x210
+#define OBD_FAIL_OST_ALL_REPLY_NET       0x211
+#define OBD_FAIL_OST_ALL_REQUESTS_NET    0x212
+#define OBD_FAIL_OST_LDLM_REPLY_NET      0x213
+#define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
+#define OBD_FAIL_OST_ENOSPC              0x215
+#define OBD_FAIL_OST_EROFS               0x216
+#define OBD_FAIL_OST_ENOENT              0x217
+#define OBD_FAIL_OST_QUOTACHECK_NET      0x218
+#define OBD_FAIL_OST_QUOTACTL_NET        0x219
+
+#define OBD_FAIL_LDLM                    0x300
+#define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
+#define OBD_FAIL_LDLM_ENQUEUE            0x302
+#define OBD_FAIL_LDLM_CONVERT            0x303
+#define OBD_FAIL_LDLM_CANCEL             0x304
+#define OBD_FAIL_LDLM_BL_CALLBACK        0x305
+#define OBD_FAIL_LDLM_CP_CALLBACK        0x306
+#define OBD_FAIL_LDLM_GL_CALLBACK        0x307
+#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308
+#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309
+#define OBD_FAIL_LDLM_CREATE_RESOURCE    0x30a
+#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED    0x30b
+#define OBD_FAIL_LDLM_REPLY              0x30c
+#define OBD_FAIL_LDLM_RECOV_CLIENTS      0x30d
+#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e
+
+#define OBD_FAIL_OSC                     0x400
+#define OBD_FAIL_OSC_BRW_READ_BULK       0x401
+#define OBD_FAIL_OSC_BRW_WRITE_BULK      0x402
+#define OBD_FAIL_OSC_LOCK_BL_AST         0x403
+#define OBD_FAIL_OSC_LOCK_CP_AST         0x404
+#define OBD_FAIL_OSC_MATCH               0x405
+#define OBD_FAIL_OSC_BRW_PREP_REQ        0x406
+#define OBD_FAIL_OSC_SHUTDOWN            0x407
+
+#define OBD_FAIL_PTLRPC                  0x500
+#define OBD_FAIL_PTLRPC_ACK              0x501
+#define OBD_FAIL_PTLRPC_RQBD             0x502
+#define OBD_FAIL_PTLRPC_BULK_GET_NET     0x503
+#define OBD_FAIL_PTLRPC_BULK_PUT_NET     0x504
+#define OBD_FAIL_PTLRPC_DROP_RPC         0x505
+#define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
+
+#define OBD_FAIL_OBD_PING_NET            0x600
+#define OBD_FAIL_OBD_LOG_CANCEL_NET      0x601
+#define OBD_FAIL_OBD_LOGD_NET            0x602
+#define OBD_FAIL_OBD_QC_CALLBACK_NET     0x603
+#define OBD_FAIL_OBD_DQACQ               0x604
+
+#define OBD_FAIL_TGT_REPLY_NET           0x700
+#define OBD_FAIL_TGT_CONN_RACE           0x701
+#define OBD_FAIL_TGT_FORCE_RECONNECT     0x702
+#define OBD_FAIL_TGT_DELAY_CONNECT       0x703
+#define OBD_FAIL_TGT_DELAY_RECONNECT     0x704
+
+#define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
+
+#define OBD_FAIL_MGS                     0x900
+#define OBD_FAIL_MGS_ALL_REQUEST_NET     0x901
+#define OBD_FAIL_MGS_ALL_REPLY_NET       0x902
+
+/* preparation for a more advanced failure testbed (not functional yet) */
+#define OBD_FAIL_MASK_SYS    0x0000FF00
+#define OBD_FAIL_MASK_LOC    (0x000000FF | OBD_FAIL_MASK_SYS)
+#define OBD_FAIL_ONCE        0x80000000
+#define OBD_FAILED           0x40000000
+#define OBD_FAIL_MDS_ALL_NET 0x01000000
+#define OBD_FAIL_OST_ALL_NET 0x02000000
+
+#define OBD_FAIL_CHECK(id)   (((obd_fail_loc & OBD_FAIL_MASK_LOC) ==           \
+                              ((id) & OBD_FAIL_MASK_LOC)) &&                   \
+                              ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \
+                                (OBD_FAILED | OBD_FAIL_ONCE)))
+
+#define OBD_FAIL_CHECK_ONCE(id)                                              \
+({      int _ret_ = 0;                                                       \
+        if (OBD_FAIL_CHECK(id)) {                                            \
+                CERROR("*** obd_fail_loc=%x ***\n", id);                     \
+                obd_fail_loc |= OBD_FAILED;                                  \
+                if ((id) & OBD_FAIL_ONCE)                                    \
+                        obd_fail_loc |= OBD_FAIL_ONCE;                       \
+                _ret_ = 1;                                                   \
+        }                                                                    \
+        _ret_;                                                               \
+})
+
+#define OBD_FAIL_RETURN(id, ret)                                             \
+do {                                                                         \
+        if (OBD_FAIL_CHECK_ONCE(id)) {                                       \
+                RETURN(ret);                                                 \
+        }                                                                    \
+} while(0)
+
+#define OBD_FAIL_TIMEOUT(id, secs)                                           \
+do {                                                                         \
+        if (OBD_FAIL_CHECK_ONCE(id)) {                                       \
+                CERROR("obd_fail_timeout id %x sleeping for %d secs\n",      \
+                       (id), (secs));                                        \
+                set_current_state(TASK_UNINTERRUPTIBLE);                     \
+                cfs_schedule_timeout(CFS_TASK_UNINT,                         \
+                                    cfs_time_seconds(secs));                 \
+                set_current_state(TASK_RUNNING);                             \
+                CERROR("obd_fail_timeout id %x awake\n", (id));              \
+       }                                                                     \
+} while(0)
+
+#ifdef __KERNEL__
+/* The idea here is to synchronise two threads to force a race. The
+ * first thread that calls this with a matching fail_loc is put to
+ * sleep. The next thread that calls with the same fail_loc wakes up
+ * the first and continues. */
+#define OBD_RACE(id)                                            \
+do {                                                            \
+        if  (OBD_FAIL_CHECK_ONCE(id)) {                         \
+                CERROR("obd_race id %x sleeping\n", (id));      \
+                OBD_SLEEP_ON(&obd_race_waitq);        \
+                CERROR("obd_fail_race id %x awake\n", (id));    \
+        } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) ==        \
+                    ((id) & OBD_FAIL_MASK_LOC)) {               \
+                cfs_waitq_signal(&obd_race_waitq);              \
+        }                                                       \
+} while(0)
+#else
+/* sigh.  an expedient fix until OBD_RACE is fixed up */
+#define OBD_RACE(foo) do {} while(0)
+#endif
+
+#define fixme() CDEBUG(D_OTHER, "FIXME\n");
+
+extern atomic_t libcfs_kmemory;
+
+#if defined(LUSTRE_UTILS) /* this version is for utils only */
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
+do {                                                                          \
+        (ptr) = cfs_alloc(size, (gfp_mask));                                  \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+                       (int)(size), __FILE__, __LINE__);                      \
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n",          \
+                       (int)(size), ptr);                                     \
+        }                                                                     \
+} while (0)
+#else /* this version is for the kernel and liblustre */
+#define OBD_ALLOC_GFP(ptr, size, gfp_mask)                                    \
+do {                                                                          \
+        (ptr) = cfs_alloc(size, (gfp_mask));                                  \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+                       (int)(size), __FILE__, __LINE__);                      \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                atomic_add(size, &obd_memory);                                \
+                if (atomic_read(&obd_memory) > obd_memmax)                    \
+                        obd_memmax = atomic_read(&obd_memory);                \
+                CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \
+                       (int)(size), ptr, atomic_read(&obd_memory));           \
+        }                                                                     \
+} while (0)
+#endif
+
+#ifndef OBD_ALLOC_MASK
+# define OBD_ALLOC_MASK CFS_ALLOC_IO
+#endif
+
+#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_ALLOC_MASK)
+#define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, CFS_ALLOC_STD)
+#define OBD_ALLOC_PTR(ptr) OBD_ALLOC(ptr, sizeof *(ptr))
+#define OBD_ALLOC_PTR_WAIT(ptr) OBD_ALLOC_WAIT(ptr, sizeof *(ptr))
+
+#ifdef __arch_um__
+# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size)
+#else
+# define OBD_VMALLOC(ptr, size)                                               \
+do {                                                                          \
+        (ptr) = cfs_alloc_large(size);                                        \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n",  \
+                       (int)(size), __FILE__, __LINE__);                      \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                atomic_add(size, &obd_memory);                                \
+                if (atomic_read(&obd_memory) > obd_memmax)                    \
+                        obd_memmax = atomic_read(&obd_memory);                \
+                CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \
+                       (int)(size), ptr, atomic_read(&obd_memory));           \
+        }                                                                     \
+} while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_SLAB
+#define POISON(ptr, c, s) do {} while (0)
+#else
+#define POISON(ptr, c, s) memset(ptr, c, s)
+#endif
+
+#if POISON_BULK
+#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE);       \
+                                    kunmap(page); } while (0)
+#else
+#define POISON_PAGE(page, val) do { } while (0)
+#endif
+
+#ifdef __KERNEL__
+#define OBD_FREE(ptr, size)                                                   \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        atomic_sub(size, &obd_memory);                                        \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",           \
+               (int)(size), ptr, atomic_read(&obd_memory));                   \
+        POISON(ptr, 0x5a, size);                                              \
+        cfs_free(ptr);                                                        \
+        (ptr) = (void *)0xdeadbeef;                                           \
+} while (0)
+#else
+#define OBD_FREE(ptr, size) ((void)(size), free((ptr)))
+#endif
+
+#ifdef __arch_um__
+# define OBD_VFREE(ptr, size) OBD_FREE(ptr, size)
+#else
+# define OBD_VFREE(ptr, size)                                                 \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        atomic_sub(size, &obd_memory);                                        \
+        CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n",           \
+               (int)(size), ptr, atomic_read(&obd_memory));                   \
+        POISON(ptr, 0x5a, size);                                              \
+        cfs_free_large(ptr);                                                  \
+        (ptr) = (void *)0xdeadbeef;                                           \
+} while (0)
+#endif
+
+/* we memset() the slab object to 0 when allocation succeeds, so DO NOT
+ * HAVE A CTOR THAT DOES ANYTHING.  its work will be cleared here.  we'd
+ * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */
+#define OBD_SLAB_ALLOC(ptr, slab, type, size)                                 \
+do {                                                                          \
+        LASSERT(!in_interrupt());                                             \
+        (ptr) = cfs_mem_cache_alloc(slab, (type));                            \
+        if ((ptr) == NULL) {                                                  \
+                CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \
+                       (int)(size), __FILE__, __LINE__);                      \
+                CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \
+                       atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\
+        } else {                                                              \
+                memset(ptr, 0, size);                                         \
+                atomic_add(size, &obd_memory);                                \
+                if (atomic_read(&obd_memory) > obd_memmax)                    \
+                        obd_memmax = atomic_read(&obd_memory);                \
+                CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\
+                       (int)(size), ptr, atomic_read(&obd_memory));           \
+        }                                                                     \
+} while (0)
+
+#define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr))
+
+#define OBD_SLAB_FREE(ptr, slab, size)                                        \
+do {                                                                          \
+        LASSERT(ptr);                                                         \
+        CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n",       \
+               (int)(size), ptr, atomic_read(&obd_memory));                   \
+        atomic_sub(size, &obd_memory);                                        \
+        POISON(ptr, 0x5a, size);                                              \
+        cfs_mem_cache_free(slab, ptr);                                        \
+        (ptr) = (void *)0xdeadbeef;                                           \
+} while (0)
+
+#define KEY_IS(str) \
+        (keylen == strlen(str) && memcmp(key, str, keylen) == 0)
+
+#if defined(__linux__)
+#include <linux/obd_support.h>
+#elif defined(__APPLE__)
+#include <darwin/obd_support.h>
+#elif defined(__WINNT__)
+#include <winnt/obd_support.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#endif
index e7685c2..0ec6b4a 100644 (file)
@@ -481,6 +481,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 CONFIG_QUOTA=y
+CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=y
 CONFIG_QUOTACTL=y
 CONFIG_DNOTIFY=y
index 588916f..72f5dd5 100644 (file)
@@ -179,9 +179,9 @@ Index: linux-2.4.21-rhel/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -2591,7 +2591,7 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.4.21-rhel.orig/include/linux/ext3_extents.h        2005-03-02 22:42:20.659360368 +0300
 +++ linux-2.4.21-rhel/include/linux/ext3_extents.h     2005-03-04 02:34:52.000000000 +0300
-@@ -0,0 +1,263 @@
+@@ -0,0 +1,261 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2689,7 +2689,7 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2790,15 +2790,13 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 305ef8e..940b916 100644 (file)
@@ -179,9 +179,9 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -2589,7 +2589,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.4.21-suse2.orig/include/linux/ext3_extents.h       2003-01-30 13:24:37.000000000 +0300
 +++ linux-2.4.21-suse2/include/linux/ext3_extents.h    2004-11-02 20:34:00.000000000 +0300
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,261 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2687,7 +2687,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2788,15 +2788,13 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
@@ -2853,7 +2851,6 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h
 +
 +
 +#endif /* _LINUX_EXT3_EXTENTS */
-+
 Index: linux-2.4.21-suse2/include/linux/ext3_fs_i.h
 ===================================================================
 --- linux-2.4.21-suse2.orig/include/linux/ext3_fs_i.h  2004-11-02 20:31:37.000000000 +0300
index 8e84625..571fb0f 100644 (file)
@@ -179,9 +179,9 @@ Index: linux-2.4.24/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -2577,7 +2577,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.4.24.orig/include/linux/ext3_extents.h     2003-01-30 13:24:37.000000000 +0300
 +++ linux-2.4.24/include/linux/ext3_extents.h  2004-11-02 20:32:17.000000000 +0300
-@@ -0,0 +1,263 @@
+@@ -0,0 +1,261 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2675,7 +2675,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2776,15 +2776,13 @@ Index: linux-2.4.24/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index d77d9a7..125f747 100644 (file)
@@ -179,9 +179,9 @@ Index: linux-2.4.29/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -2578,7 +2578,7 @@ Index: linux-2.4.29/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.4.29.orig/include/linux/ext3_extents.h     2005-05-03 16:52:08.724069800 +0300
 +++ linux-2.4.29/include/linux/ext3_extents.h  2005-05-03 16:52:08.819055360 +0300
-@@ -0,0 +1,263 @@
+@@ -0,0 +1,261 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2676,7 +2676,7 @@ Index: linux-2.4.29/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2777,15 +2777,13 @@ Index: linux-2.4.29/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 657ecf4..b6439e6 100644 (file)
@@ -2,7 +2,7 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 ===================================================================
 --- linux-2.6.12-rc6.orig/fs/ext3/extents.c    2005-06-14 16:31:25.756503133 +0200
 +++ linux-2.6.12-rc6/fs/ext3/extents.c 2005-06-14 16:31:25.836581257 +0200
-@@ -0,0 +1,2347 @@
+@@ -0,0 +1,2353 @@
 +/*
 + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -176,9 +176,9 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -448,8 +448,12 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 +
 +      eh = EXT_ROOT_HDR(tree);
 +      EXT_ASSERT(eh);
-+      if (ext3_ext_check_header(eh))
++      if (ext3_ext_check_header(eh)) {
++              /* don't free previously allocated path
++               * -- caller should take care */
++              path = NULL;
 +              goto err;
++      }
 +
 +      i = depth = EXT_DEPTH(tree);
 +      EXT_ASSERT(eh->eh_max);
@@ -506,8 +510,10 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c
 +
 +err:
 +      printk(KERN_ERR "EXT3-fs: header is corrupted!\n");
-+      ext3_ext_drop_refs(path);
-+      kfree(path);
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
 +      return ERR_PTR(-EIO);
 +}
 +
@@ -2644,7 +2650,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.6.12-rc6.orig/include/linux/ext3_extents.h 2005-06-14 16:31:25.780917195 +0200
 +++ linux-2.6.12-rc6/include/linux/ext3_extents.h      2005-06-14 16:31:25.932284381 +0200
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,262 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2742,7 +2748,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2843,15 +2849,13 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 0ee8d28..9e78214 100644 (file)
@@ -3,7 +3,7 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 ===================================================================
 --- linux-2.6.5-sles9.orig/fs/ext3/extents.c   2005-02-17 22:07:57.023609040 +0300
 +++ linux-2.6.5-sles9/fs/ext3/extents.c        2005-02-23 01:02:37.396435640 +0300
-@@ -0,0 +1,2349 @@
+@@ -0,0 +1,2355 @@
 +/*
 + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -177,9 +177,9 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -449,8 +449,12 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 +
 +      eh = EXT_ROOT_HDR(tree);
 +      EXT_ASSERT(eh);
-+      if (ext3_ext_check_header(eh))
++      if (ext3_ext_check_header(eh)) {
++              /* don't free previously allocated path
++               * -- caller should take care */
++              path = NULL;
 +              goto err;
++      }
 +
 +      i = depth = EXT_DEPTH(tree);
 +      EXT_ASSERT(eh->eh_max);
@@ -507,8 +511,10 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c
 +
 +err:
 +      printk(KERN_ERR "EXT3-fs: header is corrupted!\n");
-+      ext3_ext_drop_refs(path);
-+      kfree(path);
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
 +      return ERR_PTR(-EIO);
 +}
 +
@@ -2634,7 +2640,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
 ===================================================================
 --- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h        2005-02-17 22:07:57.023609040 +0300
 +++ linux-2.6.5-sles9/include/linux/ext3_extents.h     2005-02-23 01:02:37.416432600 +0300
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,262 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2732,7 +2738,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2833,15 +2839,13 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 56fe653..bd95c54 100644 (file)
@@ -2,7 +2,7 @@ Index: linux-stage/fs/ext3/extents.c
 ===================================================================
 --- linux-stage.orig/fs/ext3/extents.c 2005-02-25 15:33:48.890198160 +0200
 +++ linux-stage/fs/ext3/extents.c      2005-02-25 15:33:48.917194056 +0200
-@@ -0,0 +1,2347 @@
+@@ -0,0 +1,2353 @@
 +/*
 + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -176,9 +176,9 @@ Index: linux-stage/fs/ext3/extents.c
 +
 +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree)
 +{
-+      struct ext3_extent_header *neh;
-+      neh = EXT_ROOT_HDR(tree);
-+      neh->eh_generation++;
++      struct ext3_extent_header *neh = EXT_ROOT_HDR(tree);
++      neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) |
++                           (EXT_GENERATION(neh) + 1);
 +}
 +
 +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree)
@@ -448,8 +448,12 @@ Index: linux-stage/fs/ext3/extents.c
 +
 +      eh = EXT_ROOT_HDR(tree);
 +      EXT_ASSERT(eh);
-+      if (ext3_ext_check_header(eh))
++      if (ext3_ext_check_header(eh)) {
++              /* don't free previously allocated path
++               * -- caller should take care */
++              path = NULL;
 +              goto err;
++      }
 +
 +      i = depth = EXT_DEPTH(tree);
 +      EXT_ASSERT(eh->eh_max);
@@ -506,8 +510,10 @@ Index: linux-stage/fs/ext3/extents.c
 +
 +err:
 +      printk(KERN_ERR "EXT3-fs: header is corrupted!\n");
-+      ext3_ext_drop_refs(path);
-+      kfree(path);
++      if (path) {
++              ext3_ext_drop_refs(path);
++              kfree(path);
++      }
 +      return ERR_PTR(-EIO);
 +}
 +
@@ -2629,7 +2635,7 @@ Index: linux-stage/include/linux/ext3_extents.h
 ===================================================================
 --- linux-stage.orig/include/linux/ext3_extents.h      2005-02-25 15:33:48.891198008 +0200
 +++ linux-stage/include/linux/ext3_extents.h   2005-02-25 15:33:48.944189952 +0200
-@@ -0,0 +1,264 @@
+@@ -0,0 +1,262 @@
 +/*
 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2727,7 +2733,7 @@ Index: linux-stage/include/linux/ext3_extents.h
 +      __u16   eh_entries;     /* number of valid entries */
 +      __u16   eh_max;         /* capacity of store in entries */
 +      __u16   eh_depth;       /* has tree real underlaying blocks? */
-+      __u32   eh_generation;  /* generation of the tree */
++      __u32   eh_generation;  /* flags(8 bits) | generation of the tree */
 +};
 +
 +#define EXT3_EXT_MAGIC                0xf30a
@@ -2828,15 +2834,13 @@ Index: linux-stage/include/linux/ext3_extents.h
 +      (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1)
 +#define EXT_MAX_INDEX(__hdr__) \
 +      (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1)
++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff)
++#define EXT_FLAGS(__hdr__)    ((__hdr__)->eh_generation >> 24)
++#define EXT_FLAGS_CLR_UNKNOWN 0x7     /* Flags cleared on modification */
 +
-+#define EXT_ROOT_HDR(tree) \
-+      ((struct ext3_extent_header *) (tree)->root)
-+#define EXT_BLOCK_HDR(bh) \
-+      ((struct ext3_extent_header *) (bh)->b_data)
-+#define EXT_DEPTH(_t_)        \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_depth)
-+#define EXT_GENERATION(_t_)   \
-+      (((struct ext3_extent_header *)((_t_)->root))->eh_generation)
++#define EXT_BLOCK_HDR(__bh__)         ((struct ext3_extent_header *)(__bh__)->b_data)
++#define EXT_ROOT_HDR(__tree__)        ((struct ext3_extent_header *)(__tree__)->root)
++#define EXT_DEPTH(__tree__)   (EXT_ROOT_HDR(__tree__)->eh_depth)
 +
 +
 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
index 1d8a4af..2a64875 100644 (file)
@@ -2570,7 +2570,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_old(handle, inode, block, count);
 +      else {
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
index 0c2f445..70f4f8a 100644 (file)
@@ -2565,7 +2565,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_sb(handle, sb, block, count, &freed);
 +      else
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
index 5ff3d3b..01e7387 100644 (file)
@@ -2584,7 +2584,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_sb(handle, sb, block, count, &freed);
 +      else
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
diff --git a/lustre/kernel_patches/patches/iallocsem_consistency.patch b/lustre/kernel_patches/patches/iallocsem_consistency.patch
new file mode 100644 (file)
index 0000000..916ba88
--- /dev/null
@@ -0,0 +1,48 @@
+Index: linux-2.6.9/fs/attr.c
+===================================================================
+--- linux-2.6.9/fs.orig/attr.c 2006-03-10 17:20:39.000000000 +0200
++++ linux-2.6.9/fs/attr.c      2006-04-09 01:21:44.000000000 +0300
+@@ -177,6 +177,9 @@
+       if (!attr->ia_valid)
+               return 0;
++        if (ia_valid & ATTR_SIZE)
++                down_write(&dentry->d_inode->i_alloc_sem);
++
+       if (inode->i_op && inode->i_op->setattr) {
+               audit_notify_watch(inode, MAY_WRITE);
+               error = security_inode_setattr(dentry, attr);
+@@ -194,6 +197,10 @@
+                               error = inode_setattr(inode, attr);
+               }
+       }
++
++        if (ia_valid & ATTR_SIZE)
++                up_write(&dentry->d_inode->i_alloc_sem);
++
+       if (!error) {
+               unsigned long dn_mask = setattr_mask(ia_valid);
+               if (dn_mask)
+Index: linux-2.6.9/fs/open.c
+===================================================================
+--- linux-2.6.9/fs.orig/open.c 2006-04-09 01:18:08.000000000 +0300
++++ linux-2.6.9/fs/open.c      2006-04-09 01:22:29.000000000 +0300
+@@ -205,16 +205,16 @@
+       newattrs.ia_size = length;
+       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+       down(&dentry->d_inode->i_sem);
+-      down_write(&dentry->d_inode->i_alloc_sem);
+       if (called_from_open)
+               newattrs.ia_valid |= ATTR_FROM_OPEN;
+       if (op->setattr_raw) {
+               newattrs.ia_valid |= ATTR_RAW;
+               newattrs.ia_ctime = CURRENT_TIME;
++              down_write(&dentry->d_inode->i_alloc_sem);
+               err = op->setattr_raw(dentry->d_inode, &newattrs);
++              up_write(&dentry->d_inode->i_alloc_sem);
+       } else
+               err = notify_change(dentry, &newattrs);
+-      up_write(&dentry->d_inode->i_alloc_sem);
+       up(&dentry->d_inode->i_sem);
+       return err;
+ }
index 47c152c..c75d7e8 100644 (file)
@@ -1,8 +1,8 @@
-Index: uml/fs/cifs/dir.c
+Index: linux-2.6.10/fs/cifs/dir.c
 ===================================================================
---- uml.orig/fs/cifs/dir.c     2004-12-24 16:35:01.000000000 -0500
-+++ uml/fs/cifs/dir.c  2005-04-13 23:43:03.681625568 -0400
-@@ -199,23 +199,23 @@
+--- linux-2.6.10.orig/fs/cifs/dir.c
++++ linux-2.6.10/fs/cifs/dir.c
+@@ -199,23 +199,23 @@ cifs_create(struct inode *inode, struct 
        }
  
        if(nd) {
@@ -32,11 +32,11 @@ Index: uml/fs/cifs/dir.c
                        disposition = FILE_OPEN_IF;
                else {
                        cFYI(1,("Create flag not set in create function"));
-Index: uml/fs/nfs/nfs4proc.c
+Index: linux-2.6.10/fs/nfs/nfs4proc.c
 ===================================================================
---- uml.orig/fs/nfs/nfs4proc.c 2004-12-24 16:35:23.000000000 -0500
-+++ uml/fs/nfs/nfs4proc.c      2005-04-13 23:43:26.409770503 -0400
-@@ -775,17 +775,17 @@
+--- linux-2.6.10.orig/fs/nfs/nfs4proc.c
++++ linux-2.6.10/fs/nfs/nfs4proc.c
+@@ -775,17 +775,17 @@ nfs4_atomic_open(struct inode *dir, stru
        struct nfs4_state *state;
  
        if (nd->flags & LOOKUP_CREATE) {
@@ -57,11 +57,20 @@ Index: uml/fs/nfs/nfs4proc.c
        put_rpccred(cred);
        if (IS_ERR(state))
                return (struct inode *)state;
-Index: uml/fs/nfs/dir.c
+Index: linux-2.6.10/fs/nfs/dir.c
 ===================================================================
---- uml.orig/fs/nfs/dir.c      2005-04-13 23:42:21.792883770 -0400
-+++ uml/fs/nfs/dir.c   2005-04-13 23:43:03.685625066 -0400
-@@ -791,7 +791,7 @@
+--- linux-2.6.10.orig/fs/nfs/dir.c
++++ linux-2.6.10/fs/nfs/dir.c
+@@ -718,7 +718,7 @@ int nfs_is_exclusive_create(struct inode
+               return 0;
+       if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+               return 0;
+-      return (nd->intent.open.flags & O_EXCL) != 0;
++      return (nd->intent.it_flags & O_EXCL) != 0;
+ }
+ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+@@ -791,7 +791,7 @@ static int is_atomic_open(struct inode *
        if (nd->flags & LOOKUP_DIRECTORY)
                return 0;
        /* Are we trying to write to a read only partition? */
@@ -70,7 +79,7 @@ Index: uml/fs/nfs/dir.c
                return 0;
        return 1;
  }
-@@ -812,7 +812,7 @@
+@@ -812,7 +812,7 @@ static struct dentry *nfs_atomic_lookup(
        dentry->d_op = NFS_PROTO(dir)->dentry_ops;
  
        /* Let vfs_create() deal with O_EXCL */
@@ -79,7 +88,7 @@ Index: uml/fs/nfs/dir.c
                goto no_entry;
  
        /* Open the file on the server */
-@@ -820,7 +820,7 @@
+@@ -820,7 +820,7 @@ static struct dentry *nfs_atomic_lookup(
        /* Revalidate parent directory attribute cache */
        nfs_revalidate_inode(NFS_SERVER(dir), dir);
  
@@ -88,7 +97,7 @@ Index: uml/fs/nfs/dir.c
                nfs_begin_data_update(dir);
                inode = nfs4_atomic_open(dir, dentry, nd);
                nfs_end_data_update(dir);
-@@ -836,7 +836,7 @@
+@@ -836,7 +836,7 @@ static struct dentry *nfs_atomic_lookup(
                                break;
                        /* This turned out not to be a regular file */
                        case -ELOOP:
@@ -97,7 +106,7 @@ Index: uml/fs/nfs/dir.c
                                        goto no_open;
                        /* case -EISDIR: */
                        /* case -EINVAL: */
-@@ -875,7 +875,7 @@
+@@ -875,7 +875,7 @@ static int nfs_open_revalidate(struct de
        /* NFS only supports OPEN on regular files */
        if (!S_ISREG(inode->i_mode))
                goto no_open;
@@ -106,3 +115,13 @@ Index: uml/fs/nfs/dir.c
        /* We cannot do exclusive creation on a positive dentry */
        if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
                goto no_open;
+@@ -1043,7 +1043,8 @@ static int nfs_create(struct inode *dir,
+       attr.ia_valid = ATTR_MODE;
+       if (nd && (nd->flags & LOOKUP_CREATE))
+-              open_flags = nd->intent.open.flags;
++              open_flags = nd->intent.it_flags;
++
+       /*
+        * The 0 argument passed into the create function should one day
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6.12.6.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.12.6.patch
new file mode 100644 (file)
index 0000000..a0245be
--- /dev/null
@@ -0,0 +1,459 @@
+diff -Nur linux-2.6.12.6-orig/include/linux/skbuff.h linux-2.6.12.6/include/linux/skbuff.h
+--- linux-2.6.12.6-orig/include/linux/skbuff.h 2006-03-14 19:40:26.000000000 +0800
++++ linux-2.6.12.6/include/linux/skbuff.h      2006-03-16 17:04:51.000000000 +0800
+@@ -128,6 +128,30 @@
+       __u16 size;
+ };
++/* Support for callback when skb data has been released */
++typedef struct zccd                            /* Zero Copy Callback Descriptor */
++{                                              /* (embed as first member of custom struct) */
++      atomic_t        zccd_count;             /* reference count */
++      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
++} zccd_t;
++
++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
++{
++      atomic_set (&d->zccd_count, 1);
++      d->zccd_destructor = callback;
++}
++
++static inline void zccd_get (zccd_t *d)                /* take a reference */
++{
++      atomic_inc (&d->zccd_count);
++}
++
++static inline void zccd_put (zccd_t *d)                /* release a reference */
++{
++      if (atomic_dec_and_test (&d->zccd_count))
++              (d->zccd_destructor)(d);
++}
++
+ /* This data is invariant across clones and lives at
+  * the end of the header data, ie. at skb->end.
+  */
+@@ -137,6 +161,13 @@
+       unsigned short  tso_size;
+       unsigned short  tso_segs;
+       struct sk_buff  *frag_list;
++      zccd_t          *zccd;                  /* zero copy descriptor */
++      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
++      /* NB we expect zero-copy data to be at least 1 packet, so
++      * having 2 zccds means we don't unneccessarily split the packet
++      * where consecutive zero-copy sends abutt.
++      */
++
+       skb_frag_t      frags[MAX_SKB_FRAGS];
+ };
+diff -Nur linux-2.6.12.6-orig/include/net/tcp.h linux-2.6.12.6/include/net/tcp.h
+--- linux-2.6.12.6-orig/include/net/tcp.h      2005-06-18 03:48:29.000000000 +0800
++++ linux-2.6.12.6/include/net/tcp.h   2006-03-16 17:05:02.000000000 +0800
+@@ -783,6 +783,9 @@
+ extern int                    tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
+                                           struct msghdr *msg, size_t size);
+ extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
++extern ssize_t                 tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                                              int flags, zccd_t *zccd);
++
+ extern int                    tcp_ioctl(struct sock *sk, 
+                                         int cmd, 
+@@ -879,6 +882,9 @@
+                                           struct msghdr *msg,
+                                           size_t len, int nonblock, 
+                                           int flags, int *addr_len);
++extern int                     tcp_recvpackets(struct sock *sk,
++                                              struct sk_buff_head *packets,
++                                              int len, int nonblock);
+ extern int                    tcp_listen_start(struct sock *sk);
+diff -Nur linux-2.6.12.6-orig/net/core/dev.c linux-2.6.12.6/net/core/dev.c
+--- linux-2.6.12.6-orig/net/core/dev.c 2005-06-18 03:48:29.000000000 +0800
++++ linux-2.6.12.6/net/core/dev.c      2006-03-16 17:04:36.000000000 +0800
+@@ -1176,6 +1176,9 @@
+       ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+       ninfo->nr_frags = 0;
+       ninfo->frag_list = NULL;
++      ninfo->zccd = NULL;             /* copied data => no user zero copy descriptor */
++      ninfo->zccd2 = NULL;
++
+       /* Offset between the two in bytes */
+       offset = data - skb->head;
+diff -Nur linux-2.6.12.6-orig/net/core/skbuff.c linux-2.6.12.6/net/core/skbuff.c
+--- linux-2.6.12.6-orig/net/core/skbuff.c      2005-06-18 03:48:29.000000000 +0800
++++ linux-2.6.12.6/net/core/skbuff.c   2006-03-16 17:04:41.000000000 +0800
+@@ -159,6 +159,9 @@
+       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->tso_segs = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
++      skb_shinfo(skb)->zccd2 = NULL;
++
+ out:
+       return skb;
+ nodata:
+@@ -247,6 +250,10 @@
+       if (!skb->cloned ||
+           !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
+                              &skb_shinfo(skb)->dataref)) {
++              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
++              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+@@ -529,6 +536,14 @@
+       n->data_len  = skb->data_len;
+       n->len       = skb->len;
++      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
++
++      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
++
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+@@ -571,6 +586,9 @@
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
++      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
++      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
++
+       if (skb_shared(skb))
+               BUG();
+@@ -592,6 +610,11 @@
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
++      if (zccd != NULL)                       /* user zero copy descriptor? */
++              zccd_get (zccd);                /* extra ref (pages are shared) */
++      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
++              zccd_get (zccd2);               /* extra ref (pages are shared) */
++
+       skb_release_data(skb);
+       off = (data + nhead) - skb->head;
+@@ -606,6 +629,8 @@
+       skb->cloned   = 0;
+       skb->nohdr    = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
++      skb_shinfo(skb)->zccd = zccd;
++      skb_shinfo(skb)->zccd2 = zccd2;
+       return 0;
+ nodata:
+diff -Nur linux-2.6.12.6-orig/net/ipv4/tcp.c linux-2.6.12.6/net/ipv4/tcp.c
+--- linux-2.6.12.6-orig/net/ipv4/tcp.c 2005-06-18 03:48:29.000000000 +0800
++++ linux-2.6.12.6/net/ipv4/tcp.c      2006-03-16 17:04:57.000000000 +0800
+@@ -630,8 +630,10 @@
+       }
+ }
++/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
+ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
+-                       size_t psize, int flags)
++                              size_t psize, int flags, zccd_t *zccd)
++
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       int mss_now;
+@@ -678,6 +680,17 @@
+                       copy = size;
+               i = skb_shinfo(skb)->nr_frags;
++
++              if (zccd != NULL &&             /* this is a zcc I/O */
++                              skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
++                              skb_shinfo(skb)->zccd2 != NULL &&
++                              skb_shinfo(skb)->zccd != zccd && /* not the same one */
++                              skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      tcp_mark_push (tp, skb);
++                      goto new_segment;
++              }
++
+               can_coalesce = skb_can_coalesce(skb, i, page, offset);
+               if (!can_coalesce && i >= MAX_SKB_FRAGS) {
+                       tcp_mark_push(tp, skb);
+@@ -694,6 +707,20 @@
+                       skb_fill_page_desc(skb, i, page, offset, copy);
+               }
++              if (zccd != NULL &&     /* this is a zcc I/O */
++                      skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
++                      skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      zccd_get (zccd);        /* bump ref count */
++
++                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
++
++                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
++                              skb_shinfo(skb)->zccd = zccd;
++                      else
++                              skb_shinfo(skb)->zccd2 = zccd;
++              }
++
+               skb->len += copy;
+               skb->data_len += copy;
+               skb->truesize += copy;
+@@ -762,12 +789,37 @@
+       lock_sock(sk);
+       TCP_CHECK_TIMER(sk);
+-      res = do_tcp_sendpages(sk, &page, offset, size, flags);
++      res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return res;
++}
++
++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                          int flags, zccd_t *zccd)
++{
++      ssize_t res;
++      struct sock *sk = sock->sk;
++
++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
++
++      if (!(sk->sk_route_caps & NETIF_F_SG) ||        /* caller shouldn't waste her time */
++          !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
++              BUG ();
++
++#undef TCP_ZC_CSUM_FLAGS
++
++      lock_sock(sk);
++      TCP_CHECK_TIMER(sk);
++
++      res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
++
+       TCP_CHECK_TIMER(sk);
+       release_sock(sk);
+       return res;
+ }
++
+ #define TCP_PAGE(sk)  (sk->sk_sndmsg_page)
+ #define TCP_OFF(sk)   (sk->sk_sndmsg_off)
+@@ -1530,6 +1582,202 @@
+       goto out;
+ }
++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
++                   int len, int nonblock)
++{
++      struct tcp_sock *tp = tcp_sk(sk);
++      int copied;
++      long timeo;
++
++      BUG_TRAP (len > 0);
++      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
++
++      lock_sock(sk);
++
++      TCP_CHECK_TIMER(sk);
++
++      copied = -ENOTCONN;
++      if (sk->sk_state == TCP_LISTEN)
++              goto out;
++
++      copied = 0;
++      timeo = sock_rcvtimeo(sk, nonblock);
++
++      do {
++              struct sk_buff * skb;
++              u32 offset;
++              unsigned long used;
++              int exhausted;
++              int eaten;
++
++              /* Are we at urgent data? Stop if we have read anything. */
++              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
++                      break;
++
++              /* We need to check signals first, to get correct SIGURG
++               * handling. FIXME: Need to check this doesnt impact 1003.1g
++               * and move it down to the bottom of the loop
++               */
++              if (signal_pending(current)) {
++                      if (copied)
++                              break;
++                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
++                      break;
++              }
++
++              /* Next get a buffer. */
++
++              skb = skb_peek(&sk->sk_receive_queue);
++
++              if (skb == NULL)                /* nothing ready */
++              {
++                      if (copied) {
++                              if (sk->sk_err ||
++                                  sk->sk_state == TCP_CLOSE ||
++                                  (sk->sk_shutdown & RCV_SHUTDOWN) ||
++                                  !timeo ||
++                                  (0))
++                                      break;
++                      } else {
++                              if (sock_flag(sk, SOCK_DONE))
++                                      break;
++
++                              if (sk->sk_err) {
++                                      copied = sock_error(sk);
++                                      break;
++                              }
++
++                              if (sk->sk_shutdown & RCV_SHUTDOWN)
++                                      break;
++
++                              if (sk->sk_state == TCP_CLOSE) {
++                                      if (!(sock_flag(sk, SOCK_DONE))) {
++                                              /* This occurs when user tries to read
++                                               * from never connected socket.
++                                               */
++                                              copied = -ENOTCONN;
++                                              break;
++                                      }
++                                      break;
++                              }
++
++                              if (!timeo) {
++                                      copied = -EAGAIN;
++                                      break;
++                              }
++                      }
++
++                      cleanup_rbuf(sk, copied);
++                      sk_wait_data(sk, &timeo);
++                      continue;
++              }
++
++              BUG_TRAP (atomic_read (&skb->users) == 1);
++
++              exhausted = eaten = 0;
++
++              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
++              if (skb->h.th->syn)
++                      offset--;
++
++              used = skb->len - offset;
++
++              if (tp->urg_data) {
++                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
++                      if (urg_offset < used) {
++                              if (!urg_offset) { /* at urgent date */
++                                      if (!(sock_flag(sk, SOCK_URGINLINE))) {
++                                              tp->copied_seq++; /* discard the single byte of urgent data */
++                                              offset++;
++                                              used--;
++                                      }
++                              } else          /* truncate read */
++                                      used = urg_offset;
++                      }
++              }
++
++              BUG_TRAP (used >= 0);
++              if (len < used)
++                      used = len;
++
++              if (used == 0)
++                      exhausted = 1;
++              else
++              {
++                      if (skb_is_nonlinear (skb))
++                      {
++                              int   rc = skb_linearize (skb, GFP_KERNEL);
++
++                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
++
++                              if (rc)
++                              {
++                                      if (!copied)
++                                              copied = rc;
++                                      break;
++                              }
++                      }
++
++                      if ((offset + used) == skb->len) /* consuming the whole packet */
++                      {
++                              __skb_unlink (skb, &sk->sk_receive_queue);
++                              dst_release (skb->dst);
++                              skb_orphan (skb);
++                              __skb_pull (skb, offset);
++                              __skb_queue_tail (packets, skb);
++                              exhausted = eaten = 1;
++                      }
++                      else                    /* consuming only part of the packet */
++                      {
++                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
++
++                              if (skb2 == NULL)
++                              {
++                                      if (!copied)
++                                              copied = -ENOMEM;
++                                      break;
++                              }
++
++                              dst_release (skb2->dst);
++                              __skb_pull (skb2, offset);
++                              __skb_trim (skb2, used);
++                              __skb_queue_tail (packets, skb2);
++                      }
++
++                      tp->copied_seq += used;
++                      copied += used;
++                      len -= used;
++              }
++
++              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
++                      tp->urg_data = 0;
++                      tcp_fast_path_check(sk, tp);
++              }
++
++              if (!exhausted)
++                      continue;
++
++              if (skb->h.th->fin)
++              {
++                      tp->copied_seq++;
++                      if (!eaten)
++                              sk_eat_skb (sk, skb);
++                      break;
++              }
++
++              if (!eaten)
++                      sk_eat_skb (sk, skb);
++
++      } while (len > 0);
++
++ out:
++      /* Clean up data we have read: This will do ACK frames. */
++      cleanup_rbuf(sk, copied);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return copied;
++}
++
+ /*
+  *    State processing on a close. This implements the state shift for
+  *    sending our FIN frame. Note that we only send a FIN for some
+@@ -2380,6 +2628,8 @@
+ EXPORT_SYMBOL(tcp_recvmsg);
+ EXPORT_SYMBOL(tcp_sendmsg);
+ EXPORT_SYMBOL(tcp_sendpage);
++EXPORT_SYMBOL(tcp_sendpage_zccd);
++EXPORT_SYMBOL(tcp_recvpackets);
+ EXPORT_SYMBOL(tcp_setsockopt);
+ EXPORT_SYMBOL(tcp_shutdown);
+ EXPORT_SYMBOL(tcp_statistics);
diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6.5-7.244.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.5-7.244.patch
new file mode 100644 (file)
index 0000000..06baac2
--- /dev/null
@@ -0,0 +1,545 @@
+diff -Nur linux-2.6.5-7.244-orig/include/linux/skbuff.h linux-2.6.5-7.244/include/linux/skbuff.h
+--- linux-2.6.5-7.244-orig/include/linux/skbuff.h      2005-12-13 07:50:31.000000000 +0800
++++ linux-2.6.5-7.244/include/linux/skbuff.h   2006-03-13 16:31:30.000000000 +0800
+@@ -135,6 +135,30 @@
+       __u16 size;
+ };
++/* Support for callback when skb data has been released */
++typedef struct zccd                            /* Zero Copy Callback Descriptor */
++{                                              /* (embed as first member of custom struct) */
++      atomic_t        zccd_count;             /* reference count */
++      void           (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */
++} zccd_t;
++
++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *))
++{
++      atomic_set (&d->zccd_count, 1);
++      d->zccd_destructor = callback;
++}
++
++static inline void zccd_get (zccd_t *d)                /* take a reference */
++{
++      atomic_inc (&d->zccd_count);
++}
++
++static inline void zccd_put (zccd_t *d)                /* release a reference */
++{
++      if (atomic_dec_and_test (&d->zccd_count))
++              (d->zccd_destructor)(d);
++}
++
+ /* This data is invariant across clones and lives at
+  * the end of the header data, ie. at skb->end.
+  */
+@@ -144,6 +168,12 @@
+       unsigned short  tso_size;
+       unsigned short  tso_segs;
+       struct sk_buff  *frag_list;
++      zccd_t          *zccd;                  /* zero copy descriptor */
++      zccd_t          *zccd2;                 /* 2nd zero copy descriptor */
++      /* NB we expect zero-copy data to be at least 1 packet, so
++      * having 2 zccds means we don't unneccessarily split the packet
++      * where consecutive zero-copy sends abutt.
++      */
+       skb_frag_t      frags[MAX_SKB_FRAGS];
+ };
+diff -Nur linux-2.6.5-7.244-orig/include/net/sock.h linux-2.6.5-7.244/include/net/sock.h
+--- linux-2.6.5-7.244-orig/include/net/sock.h  2005-12-13 07:50:33.000000000 +0800
++++ linux-2.6.5-7.244/include/net/sock.h       2006-03-13 16:32:36.000000000 +0800
+@@ -413,6 +413,18 @@
+       (__skb)->next = NULL;                                   \
+ } while(0)
++#define sk_wait_event(__sk, __timeo, __condition)               \
++({      int rc;                                                 \
++        release_sock(__sk);                                     \
++        rc = __condition;                                       \
++        if (!rc) {                                              \
++                *(__timeo) = schedule_timeout(*(__timeo));      \
++                rc = __condition;                               \
++        }                                                       \
++        lock_sock(__sk);                                        \
++        rc;                                                     \
++})
++
+ /* IP protocol blocks we attach to sockets.
+  * socket layer -> transport layer interface
+  * transport -> network interface is defined by struct inet_proto
+@@ -1037,6 +1049,20 @@
+               sk->sk_stamp = *stamp;
+ }
++/**
++ * sk_eat_skb - Release a skb if it is no longer needed
++ * @sk - socket to eat this skb from
++ * @skb - socket buffer to eat
++ *
++ * This routine must be called with interrupts disabled or with the socket
++ * locked so that the sk_buff queue operation is ok.
++*/
++static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
++{
++        __skb_unlink(skb, &sk->sk_receive_queue);
++        __kfree_skb(skb);
++}
++
+ extern atomic_t netstamp_needed;
+ extern void sock_enable_timestamp(struct sock *sk);
+ extern void sock_disable_timestamp(struct sock *sk);
+diff -Nur linux-2.6.5-7.244-orig/include/net/tcp.h linux-2.6.5-7.244/include/net/tcp.h
+--- linux-2.6.5-7.244-orig/include/net/tcp.h   2005-12-13 07:50:21.000000000 +0800
++++ linux-2.6.5-7.244/include/net/tcp.h        2006-03-13 16:31:37.000000000 +0800
+@@ -764,6 +764,9 @@
+ extern int                    tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
+                                           struct msghdr *msg, size_t size);
+ extern ssize_t                        tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags);
++extern ssize_t                        tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                                              int flags, zccd_t *zccd);
++
+ extern int                    tcp_ioctl(struct sock *sk, 
+                                         int cmd, 
+@@ -861,6 +864,10 @@
+                                           size_t len, int nonblock, 
+                                           int flags, int *addr_len);
++extern int                    tcp_recvpackets(struct sock *sk,
++                                              struct sk_buff_head *packets,
++                                              int len, int nonblock);
++
+ extern int                    tcp_listen_start(struct sock *sk);
+ extern void                   tcp_parse_options(struct sk_buff *skb,
+diff -Nur linux-2.6.5-7.244-orig/net/core/dev.c linux-2.6.5-7.244/net/core/dev.c
+--- linux-2.6.5-7.244-orig/net/core/dev.c      2005-12-13 07:50:38.000000000 +0800
++++ linux-2.6.5-7.244/net/core/dev.c   2006-03-13 16:31:56.000000000 +0800
+@@ -1322,6 +1322,9 @@
+       ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
+       ninfo->nr_frags = 0;
+       ninfo->frag_list = NULL;
++      ninfo->zccd = NULL;             /* copied data => no user zero copy descriptor */
++      ninfo->zccd2 = NULL;
++
+       /* Offset between the two in bytes */
+       offset = data - skb->head;
+diff -Nur linux-2.6.5-7.244-orig/net/core/skbuff.c linux-2.6.5-7.244/net/core/skbuff.c
+--- linux-2.6.5-7.244-orig/net/core/skbuff.c   2004-04-04 11:37:37.000000000 +0800
++++ linux-2.6.5-7.244/net/core/skbuff.c        2006-03-13 16:31:46.000000000 +0800
+@@ -152,6 +152,9 @@
+       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->tso_segs = 0;
+       skb_shinfo(skb)->frag_list = NULL;
++      skb_shinfo(skb)->zccd = NULL;           /* skbuffs kick off with NO user zero copy descriptors */
++      skb_shinfo(skb)->zccd2 = NULL;
++
+ out:
+       return skb;
+ nodata:
+@@ -186,6 +189,10 @@
+ {
+       if (!skb->cloned ||
+           atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
++              if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd); /* release hold */
++              if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */
++                      zccd_put (skb_shinfo(skb)->zccd2); /* release hold */
+               if (skb_shinfo(skb)->nr_frags) {
+                       int i;
+                       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+@@ -449,6 +456,14 @@
+       n->data_len  = skb->data_len;
+       n->len       = skb->len;
++      if (skb_shinfo(skb)->zccd != NULL)      /* user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd;
++
++      if (skb_shinfo(skb)->zccd2 != NULL)     /* 2nd user zero copy descriptor? */
++              zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */
++      skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2;
++
+       if (skb_shinfo(skb)->nr_frags) {
+               int i;
+@@ -493,6 +508,9 @@
+       u8 *data;
+       int size = nhead + (skb->end - skb->head) + ntail;
+       long off;
++      zccd_t *zccd = skb_shinfo(skb)->zccd;   /* stash user zero copy descriptor */
++      zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */
++
+       if (skb_shared(skb))
+               BUG();
+@@ -514,6 +532,11 @@
+       if (skb_shinfo(skb)->frag_list)
+               skb_clone_fraglist(skb);
++      if (zccd != NULL)                       /* user zero copy descriptor? */
++              zccd_get (zccd);                /* extra ref (pages are shared) */
++      if (zccd2 != NULL)                      /* 2nd user zero copy descriptor? */
++              zccd_get (zccd2);               /* extra ref (pages are shared) */
++
+       skb_release_data(skb);
+       off = (data + nhead) - skb->head;
+@@ -527,6 +550,9 @@
+       skb->nh.raw  += off;
+       skb->cloned   = 0;
+       atomic_set(&skb_shinfo(skb)->dataref, 1);
++      skb_shinfo(skb)->zccd = zccd;
++      skb_shinfo(skb)->zccd2 = zccd2;
++
+       return 0;
+ nodata:
+diff -Nur linux-2.6.5-7.244-orig/net/core/sock.c linux-2.6.5-7.244/net/core/sock.c
+--- linux-2.6.5-7.244-orig/net/core/sock.c     2005-12-13 07:50:10.000000000 +0800
++++ linux-2.6.5-7.244/net/core/sock.c  2006-03-13 16:32:44.000000000 +0800
+@@ -917,6 +917,31 @@
+       } while((skb = sk->sk_backlog.head) != NULL);
+ }
++/**
++ * sk_wait_data - wait for data to arrive at sk_receive_queue
++ * sk - sock to wait on
++ * timeo - for how long
++ *
++ * Now socket state including sk->sk_err is changed only under lock,
++ * hence we may omit checks after joining wait queue.
++ * We check receive queue before schedule() only as optimization;
++ * it is very likely that release_sock() added new data.
++ */
++int sk_wait_data(struct sock *sk, long *timeo)
++{
++        int rc;
++        DEFINE_WAIT(wait);
++
++        prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
++        set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
++        rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
++        clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
++        finish_wait(sk->sk_sleep, &wait);
++        return rc;
++}
++
++EXPORT_SYMBOL(sk_wait_data);
++
+ /*
+  * Set of default routines for initialising struct proto_ops when
+  * the protocol does not support a particular function. In certain
+diff -Nur linux-2.6.5-7.244-orig/net/ipv4/tcp.c linux-2.6.5-7.244/net/ipv4/tcp.c
+--- linux-2.6.5-7.244-orig/net/ipv4/tcp.c      2005-12-13 07:50:28.000000000 +0800
++++ linux-2.6.5-7.244/net/ipv4/tcp.c   2006-03-13 16:32:04.000000000 +0800
+@@ -799,7 +799,7 @@
+ }
+ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
+-                       size_t psize, int flags);
++                       size_t psize, int flags,zccd_t *zccd);
+ static inline int can_coalesce(struct sk_buff *skb, int i, struct page *page,
+                              int off)
+@@ -881,8 +881,9 @@
+       return err;
+ }
++/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */
+ ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
+-                       size_t psize, int flags)
++                       size_t psize, int flags,zccd_t *zccd)
+ {
+       struct tcp_opt *tp = tcp_sk(sk);
+       int mss_now;
+@@ -929,6 +930,17 @@
+                       copy = size;
+               i = skb_shinfo(skb)->nr_frags;
++
++              if (zccd != NULL &&             /* this is a zcc I/O */
++                              skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */
++                              skb_shinfo(skb)->zccd2 != NULL &&
++                              skb_shinfo(skb)->zccd != zccd && /* not the same one */
++                              skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      tcp_mark_push (tp, skb);
++                      goto new_segment;
++              }
++
+               if (can_coalesce(skb, i, page, offset)) {
+                       skb_shinfo(skb)->frags[i - 1].size += copy;
+               } else if (i < MAX_SKB_FRAGS) {
+@@ -939,6 +951,20 @@
+                       goto new_segment;
+               }
++              if (zccd != NULL &&     /* this is a zcc I/O */
++                      skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */
++                      skb_shinfo(skb)->zccd2 != zccd)
++              {
++                      zccd_get (zccd);        /* bump ref count */
++
++                      BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL);
++
++                      if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */
++                              skb_shinfo(skb)->zccd = zccd;
++                      else
++                              skb_shinfo(skb)->zccd2 = zccd;
++              }
++
+               skb->len += copy;
+               skb->data_len += copy;
+               skb->ip_summed = CHECKSUM_HW;
+@@ -1003,12 +1029,36 @@
+       lock_sock(sk);
+       TCP_CHECK_TIMER(sk);
+-      res = do_tcp_sendpages(sk, &page, offset, size, flags);
++      res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL);
+       TCP_CHECK_TIMER(sk);
+       release_sock(sk);
+       return res;
+ }
++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size,
++                              int flags, zccd_t *zccd)
++{
++      ssize_t res;
++      struct sock *sk = sock->sk;
++
++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)
++
++      if (!(sk->sk_route_caps & NETIF_F_SG) ||        /* caller shouldn't waste her time */
++            !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */
++              BUG ();
++
++#undef TCP_ZC_CSUM_FLAGS
++
++      lock_sock(sk);
++      TCP_CHECK_TIMER(sk);
++
++      res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return res;
++}
++
++
+ #define TCP_PAGE(sk)  (inet_sk(sk)->sndmsg_page)
+ #define TCP_OFF(sk)   (inet_sk(sk)->sndmsg_off)
+@@ -1849,6 +1899,202 @@
+       err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);
+       goto out;
+ }
++ 
++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets,
++int len, int nonblock)
++{
++      struct tcp_opt *tp = tcp_sk(sk);
++      int copied;
++      long timeo;
++
++      BUG_TRAP (len > 0);
++      /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/
++
++      lock_sock(sk);
++
++      TCP_CHECK_TIMER(sk);
++
++      copied = -ENOTCONN;
++      if (sk->sk_state == TCP_LISTEN)
++              goto out;
++
++      copied = 0;
++      timeo = sock_rcvtimeo(sk, nonblock);
++
++      do {
++              struct sk_buff * skb;
++              u32 offset;
++              unsigned long used;
++              int exhausted;
++              int eaten;
++
++              /* Are we at urgent data? Stop if we have read anything. */
++              if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq)
++                      break;
++
++              /* We need to check signals first, to get correct SIGURG
++               * handling. FIXME: Need to check this doesnt impact 1003.1g
++               * and move it down to the bottom of the loop
++               */
++              if (signal_pending(current)) {
++                      if (copied)
++                              break;
++                      copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
++                      break;
++              }
++
++              /* Next get a buffer. */
++
++              skb = skb_peek(&sk->sk_receive_queue);
++
++              if (skb == NULL)                /* nothing ready */
++              {
++                      if (copied) {
++                              if (sk->sk_err ||
++                                  sk->sk_state == TCP_CLOSE ||
++                                  (sk->sk_shutdown & RCV_SHUTDOWN) ||
++                                  !timeo ||
++                                  (0))
++                                      break;
++                      } else {
++                              if (sock_flag(sk, SOCK_DONE))
++                                      break;
++
++                              if (sk->sk_err) {
++                                      copied = sock_error(sk);
++                                      break;
++                              }
++
++                              if (sk->sk_shutdown & RCV_SHUTDOWN)
++                                      break;
++
++                              if (sk->sk_state == TCP_CLOSE) {
++                                      if (!(sock_flag(sk, SOCK_DONE))) {
++                                              /* This occurs when user tries to read
++                                               * from never connected socket.
++                                               */
++                                              copied = -ENOTCONN;
++                                              break;
++                                      }
++                                      break;
++                              }
++
++                              if (!timeo) {
++                                      copied = -EAGAIN;
++                                      break;
++                              }
++                      }
++
++                      cleanup_rbuf(sk, copied);
++                      sk_wait_data(sk, &timeo);
++                      continue;
++              }
++
++              BUG_TRAP (atomic_read (&skb->users) == 1);
++
++              exhausted = eaten = 0;
++
++              offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
++              if (skb->h.th->syn)
++                      offset--;
++
++              used = skb->len - offset;
++
++              if (tp->urg_data) {
++                      u32 urg_offset = tp->urg_seq - tp->copied_seq;
++                      if (urg_offset < used) {
++                              if (!urg_offset) { /* at urgent date */
++                                      if (!(sock_flag(sk, SOCK_URGINLINE))) {
++                                              tp->copied_seq++; /* discard the single byte of urgent data */
++                                              offset++;
++                                              used--;
++                                      }
++                              } else          /* truncate read */
++                                      used = urg_offset;
++                      }
++              }
++
++              BUG_TRAP (used >= 0);
++              if (len < used)
++                      used = len;
++
++              if (used == 0)
++                      exhausted = 1;
++              else
++              {
++                      if (skb_is_nonlinear (skb))
++                      {
++                              int   rc = skb_linearize (skb, GFP_KERNEL);
++
++                              printk ("tcp_recvpackets(): linearising: %d\n", rc);
++
++                              if (rc)
++                              {
++                                      if (!copied)
++                                              copied = rc;
++                                      break;
++                              }
++                      }
++
++                      if ((offset + used) == skb->len) /* consuming the whole packet */
++                      {
++                              __skb_unlink (skb, &sk->sk_receive_queue);
++                              dst_release (skb->dst);
++                              skb_orphan (skb);
++                              __skb_pull (skb, offset);
++                              __skb_queue_tail (packets, skb);
++                              exhausted = eaten = 1;
++                      }
++                      else                    /* consuming only part of the packet */
++                      {
++                              struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL);
++
++                              if (skb2 == NULL)
++                              {
++                                      if (!copied)
++                                              copied = -ENOMEM;
++                                      break;
++                              }
++
++                              dst_release (skb2->dst);
++                              __skb_pull (skb2, offset);
++                              __skb_trim (skb2, used);
++                              __skb_queue_tail (packets, skb2);
++                      }
++
++                      tp->copied_seq += used;
++                      copied += used;
++                      len -= used;
++              }
++
++              if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) {
++                      tp->urg_data = 0;
++                      tcp_fast_path_check(sk, tp);
++              }
++
++              if (!exhausted)
++                      continue;
++
++              if (skb->h.th->fin)
++              {
++                      tp->copied_seq++;
++                      if (!eaten)
++                              sk_eat_skb (sk, skb);
++                      break;
++              }
++
++              if (!eaten)
++                      sk_eat_skb (sk, skb);
++
++      } while (len > 0);
++
++ out:
++      /* Clean up data we have read: This will do ACK frames. */
++      cleanup_rbuf(sk, copied);
++      TCP_CHECK_TIMER(sk);
++      release_sock(sk);
++      return copied;
++}
+ /*
+  *    State processing on a close. This implements the state shift for
+@@ -2872,6 +3118,8 @@
+ EXPORT_SYMBOL(tcp_recvmsg);
+ EXPORT_SYMBOL(tcp_sendmsg);
+ EXPORT_SYMBOL(tcp_sendpage);
++EXPORT_SYMBOL(tcp_sendpage_zccd);
++EXPORT_SYMBOL(tcp_recvpackets);
+ EXPORT_SYMBOL(tcp_setsockopt);
+ EXPORT_SYMBOL(tcp_shutdown);
+ EXPORT_SYMBOL(tcp_sockets_allocated);
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch
new file mode 100644 (file)
index 0000000..694d097
--- /dev/null
@@ -0,0 +1,773 @@
+Index: linux-2.6.10/fs/exec.c
+===================================================================
+--- linux-2.6.10.orig/fs/exec.c
++++ linux-2.6.10/fs/exec.c
+@@ -124,9 +124,10 @@ asmlinkage long sys_uselib(const char __
+       struct file * file;
+       struct nameidata nd;
+       int error;
++      intent_init(&nd.intent, IT_OPEN);
+-      nd.intent.open.flags = FMODE_READ;
+-      error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
++      nd.intent.it_flags = FMODE_READ|FMODE_EXEC;
++      error = __user_walk_it(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+       if (error)
+               goto out;
+@@ -138,7 +139,7 @@ asmlinkage long sys_uselib(const char __
+       if (error)
+               goto exit;
+-      file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++      file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent);
+       error = PTR_ERR(file);
+       if (IS_ERR(file))
+               goto out;
+@@ -485,8 +486,9 @@ struct file *open_exec(const char *name)
+       int err;
+       struct file *file;
+-      nd.intent.open.flags = FMODE_READ;
+-      err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
++      intent_init(&nd.intent, IT_OPEN);
++      nd.intent.it_flags = FMODE_READ|FMODE_EXEC;
++      err = path_lookup(name, LOOKUP_FOLLOW, &nd);
+       file = ERR_PTR(err);
+       if (!err) {
+@@ -499,7 +501,7 @@ struct file *open_exec(const char *name)
+                               err = -EACCES;
+                       file = ERR_PTR(err);
+                       if (!err) {
+-                              file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++                              file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent);
+                               if (!IS_ERR(file)) {
+                                       err = deny_write_access(file);
+                                       if (err) {
+Index: linux-2.6.10/fs/inode.c
+===================================================================
+--- linux-2.6.10.orig/fs/inode.c
++++ linux-2.6.10/fs/inode.c
+@@ -233,6 +233,7 @@ void __iget(struct inode * inode)
+       inodes_stat.nr_unused--;
+ }
++EXPORT_SYMBOL(__iget);
+ /**
+  * clear_inode - clear an inode
+  * @inode: inode to clear
+Index: linux-2.6.10/fs/namei.c
+===================================================================
+--- linux-2.6.10.orig/fs/namei.c
++++ linux-2.6.10/fs/namei.c
+@@ -288,8 +288,19 @@ int deny_write_access(struct file * file
+       return 0;
+ }
++void intent_release(struct lookup_intent *it)
++{
++      if (!it)
++              return;
++      if (it->it_magic != INTENT_MAGIC)
++              return;
++      if (it->it_op_release)
++              it->it_op_release(it);
++}
++
+ void path_release(struct nameidata *nd)
+ {
++      intent_release(&nd->intent);
+       dput(nd->dentry);
+       mntput(nd->mnt);
+ }
+@@ -379,7 +390,10 @@ static struct dentry * real_lookup(struc
+ {
+       struct dentry * result;
+       struct inode *dir = parent->d_inode;
++      int counter = 0;
++again:
++      counter++;
+       down(&dir->i_sem);
+       /*
+        * First re-do the cached lookup just in case it was created
+@@ -418,7 +432,10 @@ static struct dentry * real_lookup(struc
+       if (result->d_op && result->d_op->d_revalidate) {
+               if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
+                       dput(result);
+-                      result = ERR_PTR(-ENOENT);
++                      if (counter > 10)
++                              result = ERR_PTR(-ESTALE);
++                      if (!IS_ERR(result))
++                              goto again;
+               }
+       }
+       return result;
+@@ -448,7 +465,9 @@ walk_init_root(const char *name, struct 
+ static inline int __vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+       int res = 0;
++      struct lookup_intent it = nd->intent;
+       char *name;
++
+       if (IS_ERR(link))
+               goto fail;
+@@ -458,6 +477,9 @@ static inline int __vfs_follow_link(stru
+                       /* weird __emul_prefix() stuff did it */
+                       goto out;
+       }
++      intent_init(&nd->intent, it.it_op);
++      nd->intent.it_flags = it.it_flags;
++      nd->intent.it_create_mode = it.it_create_mode;
+       res = link_path_walk(link, nd);
+ out:
+       if (nd->depth || res || nd->last_type!=LAST_NORM)
+@@ -666,6 +688,33 @@ fail:
+       return PTR_ERR(dentry);
+ }
++static int revalidate_special(struct nameidata *nd)
++{
++      struct dentry *dentry = nd->dentry;
++      int err, counter = 0;
++
++ revalidate_again:
++      if (!dentry->d_op || !dentry->d_op->d_revalidate)
++              return 0;
++      if (!dentry->d_op->d_revalidate(dentry, nd)) {
++              struct dentry *new;
++              if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd)))
++                      return err;
++              new = real_lookup(dentry->d_parent, &dentry->d_name, nd);
++              if (IS_ERR(new))
++                      return PTR_ERR(new);
++              d_invalidate(dentry);
++              dput(dentry);
++              nd->dentry = dentry = new;
++              counter++;
++              if (counter < 10)
++                      goto revalidate_again;
++              printk("excessive revalidate_it loops\n");
++              return -ESTALE;
++      }
++      return 0;
++}
++
+ /*
+  * Name resolution.
+  *
+@@ -767,8 +816,12 @@ int fastcall link_path_walk(const char *
+                       goto out_dput;
+               if (inode->i_op->follow_link) {
++                      int save_flags = nd->flags;
+                       mntget(next.mnt);
++                      nd->flags |= LOOKUP_LINK_NOTLAST;
+                       err = do_follow_link(next.dentry, nd);
++                      if (!(save_flags & LOOKUP_LINK_NOTLAST))
++                              nd->flags &= ~LOOKUP_LINK_NOTLAST;
+                       dput(next.dentry);
+                       mntput(next.mnt);
+                       if (err)
+@@ -807,14 +860,34 @@ last_component:
+                               inode = nd->dentry->d_inode;
+                               /* fallthrough */
+                       case 1:
++                              nd->flags |= LOOKUP_LAST;
++                              err = revalidate_special(nd);
++                              nd->flags &= ~LOOKUP_LAST;
++                              if (!nd->dentry->d_inode)
++                                      err = -ENOENT;
++                              if (err) {
++                                      path_release(nd);
++                                      goto return_err;
++                              }
++                              if (lookup_flags & LOOKUP_DIRECTORY) {
++                                      err = -ENOTDIR;
++                                      if (!nd->dentry->d_inode->i_op ||
++                                          !nd->dentry->d_inode->i_op->lookup){
++                                              path_release(nd);
++                                              goto return_err;
++                                      }
++                              }
+                               goto return_reval;
+               }
++
+               if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
+                       err = nd->dentry->d_op->d_hash(nd->dentry, &this);
+                       if (err < 0)
+                               break;
+               }
++              nd->flags |= LOOKUP_LAST;
+               err = do_lookup(nd, &this, &next, atomic);
++              nd->flags &= ~LOOKUP_LAST;
+               if (err)
+                       break;
+               follow_mount(&next.mnt, &next.dentry);
+@@ -1032,7 +1105,7 @@ struct dentry * lookup_hash(struct qstr 
+ }
+ /* SMP-safe */
+-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd)
+ {
+       unsigned long hash;
+       struct qstr this;
+@@ -1052,11 +1125,16 @@ struct dentry * lookup_one_len(const cha
+       }
+       this.hash = end_name_hash(hash);
+-      return lookup_hash(&this, base);
++      return __lookup_hash(&this, base, nd);
+ access:
+       return ERR_PTR(-EACCES);
+ }
++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
++{
++      return lookup_one_len_it(name, base, len, NULL);
++}
++
+ /*
+  *    namei()
+  *
+@@ -1068,7 +1146,7 @@ access:
+  * that namei follows links, while lnamei does not.
+  * SMP-safe
+  */
+-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
++int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)
+ {
+       char *tmp = getname(name);
+       int err = PTR_ERR(tmp);
+@@ -1080,6 +1158,12 @@ int fastcall __user_walk(const char __us
+       return err;
+ }
++int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
++{
++      intent_init(&nd->intent, IT_LOOKUP);
++      return __user_walk_it(name, flags, nd);
++}
++
+ /*
+  * It's inline, so penalty for filesystems that don't use sticky bit is
+  * minimal.
+@@ -1363,8 +1447,8 @@ int open_namei(const char * pathname, in
+               acc_mode |= MAY_APPEND;
+       /* Fill in the open() intent data */
+-      nd->intent.open.flags = flag;
+-      nd->intent.open.create_mode = mode;
++      nd->intent.it_flags = flag;
++      nd->intent.it_create_mode = mode;
+       /*
+        * The simplest case - just a plain lookup.
+@@ -1379,6 +1463,7 @@ int open_namei(const char * pathname, in
+       /*
+        * Create - we need to know the parent.
+        */
++      nd->intent.it_op |= IT_CREAT;
+       error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
+       if (error)
+               return error;
+@@ -1395,7 +1480,9 @@ int open_namei(const char * pathname, in
+       dir = nd->dentry;
+       nd->flags &= ~LOOKUP_PARENT;
+       down(&dir->d_inode->i_sem);
++      nd->flags |= LOOKUP_LAST;
+       dentry = __lookup_hash(&nd->last, nd->dentry, nd);
++      nd->flags &= ~LOOKUP_LAST;
+ do_last:
+       error = PTR_ERR(dentry);
+@@ -1508,7 +1595,9 @@ do_link:
+       }
+       dir = nd->dentry;
+       down(&dir->d_inode->i_sem);
++      nd->flags |= LOOKUP_LAST;
+       dentry = __lookup_hash(&nd->last, nd->dentry, nd);
++      nd->flags &= ~LOOKUP_LAST;
+       putname(nd->last.name);
+       goto do_last;
+ }
+Index: linux-2.6.10/fs/namespace.c
+===================================================================
+--- linux-2.6.10.orig/fs/namespace.c
++++ linux-2.6.10/fs/namespace.c
+@@ -62,6 +62,7 @@ struct vfsmount *alloc_vfsmnt(const char
+               INIT_LIST_HEAD(&mnt->mnt_mounts);
+               INIT_LIST_HEAD(&mnt->mnt_list);
+               INIT_LIST_HEAD(&mnt->mnt_fslink);
++              INIT_LIST_HEAD(&mnt->mnt_lustre_list);
+               if (name) {
+                       int size = strlen(name)+1;
+                       char *newname = kmalloc(size, GFP_KERNEL);
+@@ -113,6 +114,7 @@ static inline int check_mnt(struct vfsmo
+ static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd)
+ {
++      memset(old_nd, 0, sizeof(*old_nd));
+       old_nd->dentry = mnt->mnt_mountpoint;
+       old_nd->mnt = mnt->mnt_parent;
+       mnt->mnt_parent = mnt;
+@@ -176,6 +178,9 @@ void __mntput(struct vfsmount *mnt)
+ {
+       struct super_block *sb = mnt->mnt_sb;
+       dput(mnt->mnt_root);
++      spin_lock(&dcache_lock);
++      list_del(&mnt->mnt_lustre_list);
++      spin_unlock(&dcache_lock);
+       free_vfsmnt(mnt);
+       deactivate_super(sb);
+ }
+@@ -402,6 +407,8 @@ static int do_umount(struct vfsmount *mn
+        */
+       lock_kernel();
++      if (sb->s_op->umount_lustre)
++              sb->s_op->umount_lustre(sb);
+       if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
+               sb->s_op->umount_begin(sb);
+       unlock_kernel();
+@@ -627,6 +634,7 @@ static int do_loopback(struct nameidata 
+               return err;
+       if (!old_name || !*old_name)
+               return -EINVAL;
++      intent_init(&old_nd.intent, IT_LOOKUP);
+       err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+       if (err)
+               return err;
+@@ -701,6 +709,7 @@ static int do_move_mount(struct nameidat
+               return -EPERM;
+       if (!old_name || !*old_name)
+               return -EINVAL;
++      intent_init(&old_nd.intent, IT_LOOKUP);
+       err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd);
+       if (err)
+               return err;
+@@ -1012,6 +1021,7 @@ long do_mount(char * dev_name, char * di
+       int retval = 0;
+       int mnt_flags = 0;
++      intent_init(&nd.intent, IT_LOOKUP);
+       /* Discard magic */
+       if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+               flags &= ~MS_MGC_MSK;
+Index: linux-2.6.10/fs/open.c
+===================================================================
+--- linux-2.6.10.orig/fs/open.c
++++ linux-2.6.10/fs/open.c
+@@ -216,12 +216,12 @@ static inline long do_sys_truncate(const
+       struct nameidata nd;
+       struct inode * inode;
+       int error;
+-
++      intent_init(&nd.intent, IT_GETATTR);
+       error = -EINVAL;
+       if (length < 0) /* sorry, but loff_t says... */
+               goto out;
+-      error = user_path_walk(path, &nd);
++      error = user_path_walk_it(path, &nd);
+       if (error)
+               goto out;
+       inode = nd.dentry->d_inode;
+@@ -475,6 +475,7 @@ asmlinkage long sys_access(const char __
+       int old_fsuid, old_fsgid;
+       kernel_cap_t old_cap;
+       int res;
++      intent_init(&nd.intent, IT_GETATTR);
+       if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
+               return -EINVAL;
+@@ -499,13 +500,14 @@ asmlinkage long sys_access(const char __
+       else
+               current->cap_effective = current->cap_permitted;
+-      res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
++      res = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd);
+       if (!res) {
+               res = permission(nd.dentry->d_inode, mode, &nd);
+               /* SuS v2 requires we report a read only fs too */
+               if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+                  && !special_file(nd.dentry->d_inode->i_mode))
+                       res = -EROFS;
++
+               path_release(&nd);
+       }
+@@ -520,8 +522,9 @@ asmlinkage long sys_chdir(const char __u
+ {
+       struct nameidata nd;
+       int error;
++      intent_init(&nd.intent, IT_GETATTR);
+-      error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
++      error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+       if (error)
+               goto out;
+@@ -573,8 +576,9 @@ asmlinkage long sys_chroot(const char __
+ {
+       struct nameidata nd;
+       int error;
++      intent_init(&nd.intent, IT_GETATTR);
+-      error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
++      error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+       if (error)
+               goto out;
+@@ -758,8 +762,10 @@ asmlinkage long sys_fchown(unsigned int 
+ struct file *filp_open(const char * filename, int flags, int mode)
+ {
+       int namei_flags, error;
++      struct file * temp_filp;
+       struct nameidata nd;
++      intent_init(&nd.intent, IT_OPEN);
+       namei_flags = flags;
+       if ((namei_flags+1) & O_ACCMODE)
+               namei_flags++;
+@@ -767,15 +773,26 @@ struct file *filp_open(const char * file
+               namei_flags |= 2;
+       error = open_namei(filename, namei_flags, mode, &nd);
+-      if (!error)
+-              return dentry_open(nd.dentry, nd.mnt, flags);
+-
++      if (!error) {
++              temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent);
++              return temp_filp;
++      }
+       return ERR_PTR(error);
+ }
+-EXPORT_SYMBOL(filp_open);
+ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++ {
++
++      struct lookup_intent it;
++      intent_init(&it, IT_LOOKUP);
++
++      return dentry_open_it(dentry, mnt, flags, &it);
++}
++
++EXPORT_SYMBOL(dentry_open);
++
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags,struct lookup_intent *it)
+ {
+       struct file * f;
+       struct inode *inode;
+@@ -787,6 +805,7 @@ struct file *dentry_open(struct dentry *
+               goto cleanup_dentry;
+       f->f_flags = flags;
+       f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
++      f->f_it = it;
+       inode = dentry->d_inode;
+       if (f->f_mode & FMODE_WRITE) {
+               error = get_write_access(inode);
+@@ -805,6 +824,7 @@ struct file *dentry_open(struct dentry *
+               error = f->f_op->open(inode,f);
+               if (error)
+                       goto cleanup_all;
++              intent_release(it);
+       }
+       f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+@@ -830,13 +850,12 @@ cleanup_all:
+ cleanup_file:
+       put_filp(f);
+ cleanup_dentry:
++      intent_release(it);
+       dput(dentry);
+       mntput(mnt);
+       return ERR_PTR(error);
+ }
+-EXPORT_SYMBOL(dentry_open);
+-
+ /*
+  * Find an empty file descriptor entry, and mark it busy.
+  */
+Index: linux-2.6.10/fs/stat.c
+===================================================================
+--- linux-2.6.10.orig/fs/stat.c
++++ linux-2.6.10/fs/stat.c
+@@ -38,7 +38,7 @@ void generic_fillattr(struct inode *inod
+ EXPORT_SYMBOL(generic_fillattr);
+-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat)
+ {
+       struct inode *inode = dentry->d_inode;
+       int retval;
+@@ -47,6 +47,8 @@ int vfs_getattr(struct vfsmount *mnt, st
+       if (retval)
+               return retval;
++      if (inode->i_op->getattr_it)
++              return inode->i_op->getattr_it(mnt, dentry, it, stat);
+       if (inode->i_op->getattr)
+               return inode->i_op->getattr(mnt, dentry, stat);
+@@ -63,14 +65,20 @@ int vfs_getattr(struct vfsmount *mnt, st
+ EXPORT_SYMBOL(vfs_getattr);
++int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++{
++      return vfs_getattr_it(mnt, dentry, NULL, stat);
++}
++
+ int vfs_stat(char __user *name, struct kstat *stat)
+ {
+       struct nameidata nd;
+       int error;
++      intent_init(&nd.intent, IT_GETATTR);
+-      error = user_path_walk(name, &nd);
++      error = user_path_walk_it(name, &nd);
+       if (!error) {
+-              error = vfs_getattr(nd.mnt, nd.dentry, stat);
++              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
+               path_release(&nd);
+       }
+       return error;
+@@ -82,10 +90,11 @@ int vfs_lstat(char __user *name, struct 
+ {
+       struct nameidata nd;
+       int error;
++      intent_init(&nd.intent, IT_GETATTR);
+-      error = user_path_walk_link(name, &nd);
++      error = user_path_walk_link_it(name, &nd);
+       if (!error) {
+-              error = vfs_getattr(nd.mnt, nd.dentry, stat);
++              error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
+               path_release(&nd);
+       }
+       return error;
+@@ -97,9 +106,12 @@ int vfs_fstat(unsigned int fd, struct ks
+ {
+       struct file *f = fget(fd);
+       int error = -EBADF;
++      struct nameidata nd;
++      intent_init(&nd.intent, IT_GETATTR);
+       if (f) {
+-              error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
++              error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat);
++              intent_release(&nd.intent);
+               fput(f);
+       }
+       return error;
+Index: linux-2.6.10/include/linux/dcache.h
+===================================================================
+--- linux-2.6.10.orig/include/linux/dcache.h
++++ linux-2.6.10/include/linux/dcache.h
+@@ -4,6 +4,7 @@
+ #ifdef __KERNEL__
+ #include <asm/atomic.h>
++#include <linux/string.h>
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/cache.h>
+@@ -37,6 +38,8 @@ struct qstr {
+       const unsigned char *name;
+ };
++#include <linux/namei.h>
++
+ struct dentry_stat_t {
+       int nr_dentry;
+       int nr_unused;
+Index: linux-2.6.10/include/linux/fs.h
+===================================================================
+--- linux-2.6.10.orig/include/linux/fs.h
++++ linux-2.6.10/include/linux/fs.h
+@@ -78,6 +78,7 @@ extern int dir_notify_enable;
+ #define FMODE_READ 1
+ #define FMODE_WRITE 2
++#define FMODE_EXEC 4
+ /* Internal kernel extensions */
+ #define FMODE_LSEEK   4
+@@ -262,6 +263,8 @@ typedef void (dio_iodone_t)(struct inode
+ #define ATTR_ATTR_FLAG        1024
+ #define ATTR_KILL_SUID        2048
+ #define ATTR_KILL_SGID        4096
++#define ATTR_RAW              8192    /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN        16384    /* called from open path, ie O_TRUNC */
+ /*
+  * This is the Inode Attributes structure, used for notify_change().  It
+@@ -465,6 +468,7 @@ struct inode {
+       struct block_device     *i_bdev;
+       struct cdev             *i_cdev;
+       int                     i_cindex;
++      void                    *i_filterdata;
+       __u32                   i_generation;
+@@ -600,6 +604,7 @@ struct file {
+       spinlock_t              f_ep_lock;
+ #endif /* #ifdef CONFIG_EPOLL */
+       struct address_space    *f_mapping;
++      struct lookup_intent    *f_it;
+ };
+ extern spinlock_t files_lock;
+ #define file_list_lock() spin_lock(&files_lock);
+@@ -950,7 +955,9 @@ struct inode_operations {
+       void (*truncate) (struct inode *);
+       int (*permission) (struct inode *, int, struct nameidata *);
+       int (*setattr) (struct dentry *, struct iattr *);
++      int (*setattr_raw) (struct inode *, struct iattr *);
+       int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
++      int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *);
+       int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+       ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+       ssize_t (*listxattr) (struct dentry *, char *, size_t);
+@@ -990,6 +997,7 @@ struct super_operations {
+       int (*remount_fs) (struct super_block *, int *, char *);
+       void (*clear_inode) (struct inode *);
+       void (*umount_begin) (struct super_block *);
++      void (*umount_lustre) (struct super_block *);
+       int (*show_options)(struct seq_file *, struct vfsmount *);
+ };
+@@ -1181,6 +1189,7 @@ extern int unregister_filesystem(struct 
+ extern struct vfsmount *kern_mount(struct file_system_type *);
+ extern int may_umount_tree(struct vfsmount *);
+ extern int may_umount(struct vfsmount *);
++struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
+ extern int vfs_statfs(struct super_block *, struct kstatfs *);
+@@ -1245,6 +1254,7 @@ static inline int break_lease(struct ino
+ extern int do_truncate(struct dentry *, loff_t start);
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
++extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *);
+ extern int filp_close(struct file *, fl_owner_t id);
+ extern char * getname(const char __user *);
+Index: linux-2.6.10/include/linux/mount.h
+===================================================================
+--- linux-2.6.10.orig/include/linux/mount.h
++++ linux-2.6.10/include/linux/mount.h
+@@ -36,6 +36,8 @@ struct vfsmount
+       struct list_head mnt_list;
+       struct list_head mnt_fslink;    /* link in fs-specific expiry list */
+       struct namespace *mnt_namespace; /* containing namespace */
++      struct list_head mnt_lustre_list; /* GNS mount list */
++      unsigned long mnt_last_used;    /* for GNS auto-umount (jiffies) */
+ };
+ static inline struct vfsmount *mntget(struct vfsmount *mnt)
+Index: linux-2.6.10/include/linux/namei.h
+===================================================================
+--- linux-2.6.10.orig/include/linux/namei.h
++++ linux-2.6.10/include/linux/namei.h
+@@ -2,14 +2,48 @@
+ #define _LINUX_NAMEI_H
+ #include <linux/linkage.h>
++#include <linux/string.h>
+ struct vfsmount;
++struct nameidata;
+-struct open_intent {
+-      int     flags;
+-      int     create_mode;
++/* intent opcodes */
++#define IT_OPEN               (1)
++#define IT_CREAT      (1<<1)
++#define IT_READDIR    (1<<2)
++#define IT_GETATTR    (1<<3)
++#define IT_LOOKUP     (1<<4)
++#define IT_UNLINK     (1<<5)
++#define IT_TRUNC      (1<<6)
++#define IT_GETXATTR   (1<<7)
++
++struct lustre_intent_data {
++      int     it_disposition;
++      int     it_status;
++      __u64   it_lock_handle;
++      void    *it_data;
++      int     it_lock_mode;
+ };
++#define INTENT_MAGIC 0x19620323
++struct lookup_intent {
++      int     it_magic;
++      void    (*it_op_release)(struct lookup_intent *);
++      int     it_op;
++      int     it_flags;
++      int     it_create_mode;
++      union {
++              struct lustre_intent_data lustre;
++      } d;
++};
++
++static inline void intent_init(struct lookup_intent *it, int op)
++{
++      memset(it, 0, sizeof(*it));
++      it->it_magic = INTENT_MAGIC;
++      it->it_op = op;
++}
++
+ enum { MAX_NESTED_LINKS = 8 };
+ struct nameidata {
+@@ -21,10 +55,7 @@ struct nameidata {
+       unsigned        depth;
+       char *saved_names[MAX_NESTED_LINKS + 1];
+-      /* Intent data */
+-      union {
+-              struct open_intent open;
+-      } intent;
++      struct lookup_intent intent;
+ };
+ /*
+@@ -47,6 +78,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+ #define LOOKUP_NOALT          32
+ #define LOOKUP_ATOMIC         64
+ #define LOOKUP_REVAL          128
++#define LOOKUP_LAST           (0x1000)
++#define LOOKUP_LINK_NOTLAST   (0x2000)
+ /*
+  * Intent data
+@@ -56,6 +89,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA
+ #define LOOKUP_ACCESS         (0x0400)
+ extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd));
++#define user_path_walk_it(name,nd) \
++      __user_walk_it(name, LOOKUP_FOLLOW, nd)
++#define user_path_walk_link_it(name,nd) \
++      __user_walk_it(name, 0, nd)
++extern void intent_release(struct lookup_intent *);
+ #define user_path_walk(name,nd) \
+       __user_walk(name, LOOKUP_FOLLOW, nd)
+ #define user_path_walk_link(name,nd) \
+@@ -68,7 +107,6 @@ extern void path_release_on_umount(struc
+ extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+-
+ extern int follow_down(struct vfsmount **, struct dentry **);
+ extern int follow_up(struct vfsmount **, struct dentry **);
index 361da69..90ada9a 100644 (file)
@@ -1,7 +1,7 @@
 uml-2.6.10-fc3.patch
 lustre_version.patch
 fc3_to_rhel4_updates.patch 
-vfs_intent-2.6-rhel4.patch
+vfs_intent-2.6-fc3.patch
 vfs_nointent-2.6-rhel4.patch
 vfs_races-2.6-fc3.patch
 ext3-wantedi-misc-2.6-suse.patch
index 0b2e845..1c08d3b 100644 (file)
@@ -21,3 +21,4 @@ compile-fixes-2.6.9-rhel4-22.patch
 vm-tunables-rhel4.patch 
 2.6-rhel4-kgdb-ga.patch
 tcp-zero-copy-2.6.9-rhel4.patch
+iallocsem_consistency.patch
index 1c5d31f..4068bed 100644 (file)
@@ -7,3 +7,5 @@ uml-exprt-clearuser.patch
 qsnet-suse-2.6.patch 
 fsprivate-2.6.patch
 dcache-qstr-api-fix-2.6-suse.patch 
+iallocsem_consistency.patch
+tcp-zero-copy-2.6.5-7.244.patch
index 9ecb127..cb41054 100644 (file)
@@ -17,3 +17,4 @@ export-show_task-2.6-vanilla.patch
 sd_iostats-2.6-rhel4.patch 
 fsprivate-2.6.patch
 export_symbol_numa.patch
+tcp-zero-copy-2.6.12.6.patch
index 15739f1..9db3f3f 100644 (file)
@@ -23,7 +23,7 @@
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
-#include <linux/lustre_quota.h>
+#include <lustre_quota.h>
 #include "lustre_quota_fmt.h"
 
 typedef char *dqbuf_t;
index 0e2f5f3..822ef95 100644 (file)
@@ -16,8 +16,8 @@
 #include <linux/kernel.h>
 #include <linux/random.h>
 
-#include <linux/lustre_quota.h>
-#include <linux/obd_class.h>
+#include <lustre_quota.h>
+#include <obd_class.h>
 
 #include "lustre_quota_fmt.h"
 
index 09fda38..7601bce 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LDLM
 #ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/version.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
+#include <libcfs/libcfs.h>
 #else 
 #include <liblustre.h>
 #endif
 
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_lib.h>
 
 /* invariants:
  - only the owner of the lock changes l_owner/l_depth
@@ -67,7 +49,7 @@ void l_lock(struct lustre_lock *lock)
         int owner = 0;
 
         spin_lock(&lock->l_spin);
-        if (lock->l_owner == current)
+        if (lock->l_owner == cfs_current())
                 owner = 1;
         spin_unlock(&lock->l_spin);
 
@@ -78,9 +60,9 @@ void l_lock(struct lustre_lock *lock)
         if (owner) {
                 ++lock->l_depth;
         } else {
-                down(&lock->l_sem);
+                mutex_down(&lock->l_sem);
                 spin_lock(&lock->l_spin);
-                lock->l_owner = current;
+                lock->l_owner = cfs_current();
                 lock->l_depth = 0;
                 spin_unlock(&lock->l_spin);
         }
@@ -88,15 +70,15 @@ void l_lock(struct lustre_lock *lock)
 
 void l_unlock(struct lustre_lock *lock)
 {
-        LASSERTF(lock->l_owner == current, "lock %p, current %p\n",
-                 lock->l_owner, current);
+        LASSERTF(lock->l_owner == cfs_current(), "lock %p, current %p\n",
+                 lock->l_owner, cfs_current());
         LASSERTF(lock->l_depth >= 0, "depth %d\n", lock->l_depth);
 
         spin_lock(&lock->l_spin);
         if (--lock->l_depth < 0) {
                 lock->l_owner = NULL;
                 spin_unlock(&lock->l_spin);
-                up(&lock->l_sem);
+                mutex_up(&lock->l_sem);
                 return;
         }
         spin_unlock(&lock->l_spin);
@@ -107,7 +89,7 @@ int l_has_lock(struct lustre_lock *lock)
         int depth = -1, owner = 0;
 
         spin_lock(&lock->l_spin);
-        if (lock->l_owner == current) {
+        if (lock->l_owner == cfs_current()) {
                 depth = lock->l_depth;
                 owner = 1;
         }
@@ -119,28 +101,27 @@ int l_has_lock(struct lustre_lock *lock)
 }
 
 #ifdef __KERNEL__
-#include <linux/lustre_version.h>
 void l_check_ns_lock(struct ldlm_namespace *ns)
 {
-        static unsigned long next_msg;
+        static cfs_time_t next_msg;
 
-        if (!l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) {
+        if (!l_has_lock(&ns->ns_lock) && cfs_time_after(cfs_time_current(), next_msg)) {
                 CERROR("namespace %s lock not held when it should be; tell "
                        "phil\n", ns->ns_name);
                 libcfs_debug_dumpstack(NULL);
-                next_msg = jiffies + 60 * HZ;
+                next_msg = cfs_time_shift(60);
         }
 }
 
 void l_check_no_ns_lock(struct ldlm_namespace *ns)
 {
-        static unsigned long next_msg;
+        static cfs_time_t next_msg;
 
-        if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) {
+        if (l_has_lock(&ns->ns_lock) && cfs_time_after(cfs_time_current(), next_msg)) {
                 CERROR("namespace %s lock held illegally; tell phil\n",
                        ns->ns_name);
                 libcfs_debug_dumpstack(NULL);
-                next_msg = jiffies + 60 * HZ;
+                next_msg = cfs_time_shift(60);
         }
 }
 
index f54ff12..205ff14 100644 (file)
@@ -29,9 +29,9 @@
 # include <liblustre.h>
 #endif
 
-#include <linux/lustre_dlm.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
 
 #include "ldlm_internal.h"
 
@@ -327,7 +327,7 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                              ldlm_error_t *err)
 {
         struct ldlm_resource *res = lock->l_resource;
-        struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
+        struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list);
         int rc, rc2;
         ENTRY;
 
index 6c7e259..c86ee5c 100644 (file)
 #define DEBUG_SUBSYSTEM S_LDLM
 
 #ifdef __KERNEL__
-#include <linux/lustre_dlm.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_lib.h>
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
 #include <libcfs/list.h>
 #else
 #include <liblustre.h>
-#include <linux/obd_class.h>
+#include <obd_class.h>
 #endif
 
 #include "ldlm_internal.h"
 
 #define l_flock_waitq   l_lru
 
-static struct list_head ldlm_flock_waitq = LIST_HEAD_INIT(ldlm_flock_waitq);
+static struct list_head ldlm_flock_waitq = CFS_LIST_HEAD_INIT(ldlm_flock_waitq);
 
 int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                             void *data, int flag);
@@ -390,7 +390,7 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq,
                          * ldlm_reprocess_queue. */
                         if ((mode == LCK_NL) && overlaps) {
                                 struct list_head rpc_list
-                                                    = LIST_HEAD_INIT(rpc_list);
+                                                    = CFS_LIST_HEAD_INIT(rpc_list);
                                 int rc;
 restart:
                                 res->lr_tmp = &rpc_list;
@@ -451,7 +451,7 @@ int
 ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_namespace *ns;
-        struct file_lock *getlk = lock->l_ast_data;
+        cfs_flock_t *getlk = lock->l_ast_data;
         struct ldlm_flock_wait_data fwd;
         unsigned long irqflags;
         struct obd_device *obd;
@@ -512,20 +512,20 @@ granted:
                 /* fcntl(F_GETLK) request */
                 /* The old mode was saved in getlk->fl_type so that if the mode
                  * in the lock changes we can decref the approprate refcount. */
-                ldlm_flock_destroy(lock, getlk->fl_type, LDLM_FL_WAIT_NOREPROC);
+                ldlm_flock_destroy(lock, cfs_flock_type(getlk), LDLM_FL_WAIT_NOREPROC);
                 switch (lock->l_granted_mode) {
                 case LCK_PR:
-                        getlk->fl_type = F_RDLCK;
+                        cfs_flock_set_type(getlk, F_RDLCK);
                         break;
                 case LCK_PW:
-                        getlk->fl_type = F_WRLCK;
+                        cfs_flock_set_type(getlk, F_WRLCK);
                         break;
                 default:
-                        getlk->fl_type = F_UNLCK;
+                        cfs_flock_set_type(getlk, F_UNLCK);
                 }
-                getlk->fl_pid = lock->l_policy_data.l_flock.pid;
-                getlk->fl_start = lock->l_policy_data.l_flock.start;
-                getlk->fl_end = lock->l_policy_data.l_flock.end;
+                cfs_flock_set_pid(getlk, (pid_t)lock->l_policy_data.l_flock.pid);
+                cfs_flock_set_start(getlk, (off_t)lock->l_policy_data.l_flock.start);
+                cfs_flock_set_end(getlk, (off_t)lock->l_policy_data.l_flock.end);
         } else {
                 int noreproc = LDLM_FL_WAIT_NOREPROC;
 
@@ -533,7 +533,7 @@ granted:
                  * with existing locks owned by this process. */
                 ldlm_process_flock_lock(lock, &noreproc, 1, &err);
                 if (flags == 0)
-                        wake_up(&lock->l_waitq);
+                        cfs_waitq_signal(&lock->l_waitq);
         }
         l_unlock(&ns->ns_lock);
         RETURN(0);
index 8cee698..8c473dd 100644 (file)
@@ -26,9 +26,9 @@
 # include <liblustre.h>
 #endif
 
-#include <linux/lustre_dlm.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
 
 #include "ldlm_internal.h"
 
@@ -85,7 +85,7 @@ int ldlm_process_inodebits_lock(struct ldlm_lock *lock, int *flags,
                                 int first_enq, ldlm_error_t *err)
 {
         struct ldlm_resource *res = lock->l_resource;
-        struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
+        struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list);
         int rc;
         ENTRY;
 
index 2e247ff..f9f6c43 100644 (file)
@@ -58,7 +58,7 @@ int ldlm_process_inodebits_lock(struct ldlm_lock *lock, int *flags,
 void l_check_ns_lock(struct ldlm_namespace *ns);
 void l_check_no_ns_lock(struct ldlm_namespace *ns);
 
-extern struct proc_dir_entry *ldlm_svc_proc_dir;
+extern cfs_proc_dir_entry_t *ldlm_svc_proc_dir;
 
 struct ldlm_state {
         struct ptlrpc_service *ldlm_cb_service;
index 03da517..9d9f9ab 100644 (file)
 #define DEBUG_SUBSYSTEM S_LDLM
 
 #ifdef __KERNEL__
-# include <linux/module.h>
+# include <libcfs/libcfs.h>
 #else
 # include <liblustre.h>
 #endif
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_ver.h>
-#include <linux/lustre_net.h>
+#include <obd.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_net.h>
+#include <lustre_ver.h>
 
 /* @priority: if non-zero, move the selected to the list head
  * @create: if zero, only search in existed connections
@@ -186,7 +186,7 @@ out:
  * 2 - server UUID
  * 3 - inactive-on-startup
  */
-int client_obd_setup(struct obd_device *obddev, struct lustre_cfglcfg)
+int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
 {
         struct client_obd *cli = &obddev->u.cli;
         struct obd_import *imp;
@@ -249,11 +249,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
         cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024;
         if (cli->cl_dirty_max >> PAGE_SHIFT > num_physpages / 8)
                 cli->cl_dirty_max = num_physpages << (PAGE_SHIFT - 3);
-        INIT_LIST_HEAD(&cli->cl_cache_waiters);
-        INIT_LIST_HEAD(&cli->cl_loi_ready_list);
-        INIT_LIST_HEAD(&cli->cl_loi_write_list);
-        INIT_LIST_HEAD(&cli->cl_loi_read_list);
-        spin_lock_init(&cli->cl_loi_list_lock);
+        CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters);
+        CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list);
+        CFS_INIT_LIST_HEAD(&cli->cl_loi_write_list);
+        CFS_INIT_LIST_HEAD(&cli->cl_loi_read_list);
+        client_obd_list_lock_init(&cli->cl_loi_list_lock);
         cli->cl_r_in_flight = 0;
         cli->cl_w_in_flight = 0;
 
@@ -293,7 +293,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg)
         imp->imp_connect_op = connect_op;
         imp->imp_initial_recov = 1;
         imp->imp_initial_recov_bk = 0;
-        INIT_LIST_HEAD(&imp->imp_pinger_chain);
+        CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain);
         memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1),
                LUSTRE_CFG_BUFLEN(lcfg, 1));
         class_import_put(imp);
@@ -334,6 +334,7 @@ err:
 
 int client_obd_cleanup(struct obd_device *obddev)
 {
+        ENTRY;
         ldlm_put_ref(obddev->obd_force);
 
         RETURN(0);
@@ -351,7 +352,7 @@ int client_connect_import(struct lustre_handle *dlm_handle,
         int rc;
         ENTRY;
 
-        down(&cli->cl_sem);
+        mutex_down(&cli->cl_sem);
         rc = class_connect(dlm_handle, obd, cluuid);
         if (rc)
                 GOTO(out_sem, rc);
@@ -410,7 +411,7 @@ out_disco:
                 class_export_put(exp);
         }
 out_sem:
-        up(&cli->cl_sem);
+        mutex_up(&cli->cl_sem);
         return rc;
 }
 
@@ -431,7 +432,7 @@ int client_disconnect_export(struct obd_export *exp)
         cli = &obd->u.cli;
         imp = cli->cl_import;
 
-        down(&cli->cl_sem);
+        mutex_down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
                        obd->obd_name);
@@ -471,7 +472,7 @@ int client_disconnect_export(struct obd_export *exp)
         if (!rc && err)
                 rc = err;
  out_sem:
-        up(&cli->cl_sem);
+        mutex_up(&cli->cl_sem);
         RETURN(rc);
 }
 
@@ -482,6 +483,7 @@ int client_disconnect_export(struct obd_export *exp)
 int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp,
                             struct obd_uuid *cluuid)
 {
+        ENTRY;
         if (exp->exp_connection && exp->exp_imp_reverse) {
                 struct lustre_handle *hdl;
                 hdl = &exp->exp_imp_reverse->imp_remote_handle;
@@ -691,7 +693,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler)
                                target->obd_name,
                                libcfs_nid2str(req->rq_peer.nid), cluuid.uuid,
                                target->obd_recoverable_clients,
-                               (target->obd_recovery_timer.expires-jiffies)/HZ);
+                               cfs_duration_sec(cfs_time_sub(cfs_timer_deadline(&target->obd_recovery_timer),
+                                                             cfs_time_current())));
                         rc = -EBUSY;
                 } else {
  dont_check_exports:
@@ -923,6 +926,7 @@ void target_abort_recovery(void *data)
 {
         struct obd_device *obd = data;
 
+        ENTRY;
         spin_lock_bh(&obd->obd_processing_task_lock);
         if (!obd->obd_recovering) {
                 spin_unlock_bh(&obd->obd_processing_task_lock);
@@ -942,6 +946,7 @@ void target_abort_recovery(void *data)
         target_finish_recovery(obd);
 
         ptlrpc_run_recovery_over_upcall(obd);
+        EXIT;
 }
 
 static void target_recovery_expired(unsigned long castmeharder)
@@ -951,7 +956,7 @@ static void target_recovery_expired(unsigned long castmeharder)
         spin_lock_bh(&obd->obd_processing_task_lock);
         if (obd->obd_recovering)
                 obd->obd_abort_recovery = 1;
-        wake_up(&obd->obd_next_transno_waitq);
+        cfs_waitq_signal(&obd->obd_next_transno_waitq);
         spin_unlock_bh(&obd->obd_processing_task_lock);
 }
 
@@ -960,7 +965,7 @@ static void target_recovery_expired(unsigned long castmeharder)
 void target_cancel_recovery_timer(struct obd_device *obd)
 {
         CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name);
-        del_timer(&obd->obd_recovery_timer);
+        cfs_timer_disarm(&obd->obd_recovery_timer);
 }
 
 static void reset_recovery_timer(struct obd_device *obd)
@@ -970,12 +975,13 @@ static void reset_recovery_timer(struct obd_device *obd)
                 spin_unlock_bh(&obd->obd_processing_task_lock);
                 return;
         }
-        mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT);
+        cfs_timer_arm(&obd->obd_recovery_timer, 
+                      cfs_time_shift(OBD_RECOVERY_TIMEOUT));
         spin_unlock_bh(&obd->obd_processing_task_lock);
         CDEBUG(D_HA, "%s: timer will expire in %u seconds\n", obd->obd_name,
-               (int)(OBD_RECOVERY_TIMEOUT / HZ));
+               OBD_RECOVERY_TIMEOUT);
         /* Only used for lprocfs_status */
-        obd->obd_recovery_end = CURRENT_SECONDS + OBD_RECOVERY_TIMEOUT/HZ;
+        obd->obd_recovery_end = CURRENT_SECONDS + OBD_RECOVERY_TIMEOUT;
 }
 
 
@@ -988,10 +994,9 @@ void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler)
                 return;
         }
         CWARN("%s: starting recovery timer (%us)\n", obd->obd_name,
-              (int)(OBD_RECOVERY_TIMEOUT / HZ));
+              OBD_RECOVERY_TIMEOUT);
         obd->obd_recovery_handler = handler;
-        obd->obd_recovery_timer.function = target_recovery_expired;
-        obd->obd_recovery_timer.data = (unsigned long)obd;
+        cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd);
         spin_unlock_bh(&obd->obd_processing_task_lock);
 
         reset_recovery_timer(obd);
@@ -1047,7 +1052,7 @@ static void process_recovery_queue(struct obd_device *obd)
 
         for (;;) {
                 spin_lock_bh(&obd->obd_processing_task_lock);
-                LASSERT(obd->obd_processing_task == current->pid);
+                LASSERT(obd->obd_processing_task == cfs_curproc_pid());
                 req = list_entry(obd->obd_recovery_queue.next,
                                  struct ptlrpc_request, rq_list);
 
@@ -1111,7 +1116,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
          * buffers (eg mds_body, ost_body etc) have NOT been swabbed. */
 
         if (!transno) {
-                INIT_LIST_HEAD(&req->rq_list);
+                CFS_INIT_LIST_HEAD(&req->rq_list);
                 DEBUG_REQ(D_HA, req, "not queueing");
                 return 1;
         }
@@ -1137,7 +1142,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
          * Also, a resent, replayed request that has already been
          * handled will pass through here and be processed immediately.
          */
-        if (obd->obd_processing_task == current->pid ||
+        if (obd->obd_processing_task == cfs_curproc_pid() ||
             transno < obd->obd_next_recovery_transno) {
                 /* Processing the queue right now, don't re-add. */
                 LASSERT(list_empty(&req->rq_list));
@@ -1163,7 +1168,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         req = saved_req;
         req->rq_reqmsg = reqmsg;
         class_export_get(req->rq_export);
-        INIT_LIST_HEAD(&req->rq_list);
+        CFS_INIT_LIST_HEAD(&req->rq_list);
 
         /* XXX O(n^2) */
         list_for_each(tmp, &obd->obd_recovery_queue) {
@@ -1187,7 +1192,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
                 /* Someone else is processing this queue, we'll leave it to
                  * them.
                  */
-                wake_up(&obd->obd_next_transno_waitq);
+                cfs_waitq_signal(&obd->obd_next_transno_waitq);
                 spin_unlock_bh(&obd->obd_processing_task_lock);
                 return 0;
         }
@@ -1195,7 +1200,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         /* Nobody is processing, and we know there's (at least) one to process
          * now, so we'll do the honours.
          */
-        obd->obd_processing_task = current->pid;
+        obd->obd_processing_task = cfs_curproc_pid();
         spin_unlock_bh(&obd->obd_processing_task_lock);
 
         process_recovery_queue(obd);
@@ -1272,7 +1277,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc)
         } else {
                 CWARN("%s: %d recoverable clients remain\n",
                        obd->obd_name, obd->obd_recoverable_clients);
-                wake_up(&obd->obd_next_transno_waitq);
+                cfs_waitq_signal(&obd->obd_next_transno_waitq);
         }
 
         return 1;
@@ -1376,7 +1381,7 @@ target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
             list_empty(&rs->rs_exp_list) ||     /* completed already */
             list_empty(&rs->rs_obd_list)) {
                 list_add_tail (&rs->rs_list, &svc->srv_reply_queue);
-                wake_up (&svc->srv_waitq);
+                cfs_waitq_signal (&svc->srv_waitq);
         } else {
                 list_add (&rs->rs_list, &svc->srv_active_replies);
                 rs->rs_scheduled = 0;           /* allow notifier to schedule */
index cce090a..e3df674 100644 (file)
 #define DEBUG_SUBSYSTEM S_LDLM
 
 #ifdef __KERNEL__
-# include <linux/slab.h>
-# include <linux/module.h>
-# include <linux/lustre_dlm.h>
+# include <libcfs/libcfs.h>
 #else
 # include <liblustre.h>
 # include <libcfs/kp30.h>
 #endif
 
-#include <linux/obd_class.h>
+#include <obd_class.h>
 #include "ldlm_internal.h"
 
 //struct lustre_lock ldlm_everything_lock;
@@ -83,7 +81,7 @@ char *ldlm_it2str(int it)
         }
 }
 
-extern kmem_cache_t *ldlm_lock_slab;
+extern cfs_mem_cache_t *ldlm_lock_slab;
 struct lustre_lock ldlm_handle_lock;
 
 static ldlm_processing_policy ldlm_processing_policy_table[] = {
@@ -249,20 +247,20 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
         if (resource == NULL)
                 LBUG();
 
-        OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock));
+        OBD_SLAB_ALLOC(lock, ldlm_lock_slab, CFS_ALLOC_IO, sizeof(*lock));
         if (lock == NULL)
                 RETURN(NULL);
 
         lock->l_resource = ldlm_resource_getref(resource);
 
         atomic_set(&lock->l_refc, 2);
-        INIT_LIST_HEAD(&lock->l_children);
-        INIT_LIST_HEAD(&lock->l_childof);
-        INIT_LIST_HEAD(&lock->l_res_link);
-        INIT_LIST_HEAD(&lock->l_lru);
-        INIT_LIST_HEAD(&lock->l_export_chain);
-        INIT_LIST_HEAD(&lock->l_pending_chain);
-        init_waitqueue_head(&lock->l_waitq);
+        CFS_INIT_LIST_HEAD(&lock->l_children);
+        CFS_INIT_LIST_HEAD(&lock->l_childof);
+        CFS_INIT_LIST_HEAD(&lock->l_res_link);
+        CFS_INIT_LIST_HEAD(&lock->l_lru);
+        CFS_INIT_LIST_HEAD(&lock->l_export_chain);
+        CFS_INIT_LIST_HEAD(&lock->l_pending_chain);
+        cfs_waitq_init(&lock->l_waitq);
 
         spin_lock(&resource->lr_namespace->ns_counter_lock);
         resource->lr_namespace->ns_locks++;
@@ -275,7 +273,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent,
                 l_unlock(&parent->l_resource->lr_namespace->ns_lock);
         }
 
-        INIT_LIST_HEAD(&lock->l_handle.h_link);
+        CFS_INIT_LIST_HEAD(&lock->l_handle.h_link);
         class_handle_hash(&lock->l_handle, lock_handle_addref);
 
         RETURN(lock);
@@ -482,7 +480,7 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode)
                 lock->l_readers++;
         if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP))
                 lock->l_writers++;
-        lock->l_last_used = jiffies;
+        lock->l_last_used = cfs_time_current();
         LDLM_LOCK_GET(lock);
         LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
@@ -669,7 +667,7 @@ void ldlm_lock_allow_match(struct ldlm_lock *lock)
 {
         l_lock(&lock->l_resource->lr_namespace->ns_lock);
         lock->l_flags |= LDLM_FL_CAN_MATCH;
-        wake_up(&lock->l_waitq);
+        cfs_waitq_signal(&lock->l_waitq);
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
 }
 
@@ -756,7 +754,7 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags,
                                 }
                         }
 
-                        lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL);
+                        lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout), NULL,NULL,NULL);
 
                         /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */
                         l_wait_event(lock->l_waitq,
@@ -826,7 +824,7 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns,
         lock->l_blocking_ast = blocking;
         lock->l_completion_ast = completion;
         lock->l_glimpse_ast = glimpse;
-        lock->l_pid = current->pid;
+        lock->l_pid = cfs_curproc_pid();
 
         if (lvb_len) {
                 lock->l_lvb_len = lvb_len;
@@ -1016,6 +1014,7 @@ void ldlm_reprocess_all_ns(struct ldlm_namespace *ns)
 {
         int i, rc;
 
+        ENTRY;
         l_lock(&ns->ns_lock);
         for (i = 0; i < RES_HASH_SIZE; i++) {
                 struct list_head *tmp, *next;
index b5bdf80..0ec457e 100644 (file)
 #define DEBUG_SUBSYSTEM S_LDLM
 
 #ifdef __KERNEL__
-# include <linux/module.h>
-# include <linux/slab.h>
-# include <linux/init.h>
-# include <linux/wait.h>
+# include <libcfs/libcfs.h>
 #else
 # include <liblustre.h>
 #endif
 
-#include <linux/lustre_dlm.h>
-#include <linux/obd_class.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
 #include <libcfs/list.h>
 #include "ldlm_internal.h"
 
-extern kmem_cache_t *ldlm_resource_slab;
-extern kmem_cache_t *ldlm_lock_slab;
+extern cfs_mem_cache_t *ldlm_resource_slab;
+extern cfs_mem_cache_t *ldlm_lock_slab;
 extern struct lustre_lock ldlm_handle_lock;
 extern struct list_head ldlm_namespace_list;
 
-static DECLARE_MUTEX(ldlm_ref_sem);
+extern struct semaphore ldlm_namespace_lock;
+static struct semaphore ldlm_ref_sem;
 static int ldlm_refcount;
 
 /* LDLM state */
 
 static struct ldlm_state *ldlm_state;
 
-inline unsigned long round_timeout(unsigned long timeout)
+inline cfs_time_t round_timeout(cfs_time_t timeout)
 {
-        return ((timeout / HZ) + 1) * HZ;
+        return cfs_time_seconds((int)cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1);
 }
 
 #ifdef __KERNEL__
 /* w_l_spinlock protects both waiting_locks_list and expired_lock_thread */
 static spinlock_t waiting_locks_spinlock;
 static struct list_head waiting_locks_list;
-static struct timer_list waiting_locks_timer;
+static cfs_timer_t waiting_locks_timer;
 
 static struct expired_lock_thread {
-        wait_queue_head_t         elt_waitq;
+        cfs_waitq_t               elt_waitq;
         int                       elt_state;
         int                       elt_dump;
         struct list_head          elt_expired_locks;
@@ -81,7 +79,7 @@ static struct expired_lock_thread {
 struct ldlm_bl_pool {
         spinlock_t              blp_lock;
         struct list_head        blp_list;
-        wait_queue_head_t       blp_waitq;
+        cfs_waitq_t             blp_waitq;
         atomic_t                blp_num_threads;
         struct completion       blp_comp;
 };
@@ -99,6 +97,7 @@ static inline int have_expired_locks(void)
 {
         int need_to_run;
 
+        ENTRY;
         spin_lock_bh(&waiting_locks_spinlock);
         need_to_run = !list_empty(&expired_lock_thread.elt_expired_locks);
         spin_unlock_bh(&waiting_locks_spinlock);
@@ -110,21 +109,12 @@ static int expired_lock_main(void *arg)
 {
         struct list_head *expired = &expired_lock_thread.elt_expired_locks;
         struct l_wait_info lwi = { 0 };
-        unsigned long flags;
 
         ENTRY;
-        lock_kernel();
-        libcfs_daemonize("ldlm_elt");
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
-        unlock_kernel();
+        cfs_daemonize("ldlm_elt");
 
         expired_lock_thread.elt_state = ELT_READY;
-        wake_up(&expired_lock_thread.elt_waitq);
+        cfs_waitq_signal(&expired_lock_thread.elt_waitq);
 
         while (1) {
                 l_wait_event(expired_lock_thread.elt_waitq,
@@ -181,7 +171,7 @@ static int expired_lock_main(void *arg)
         }
 
         expired_lock_thread.elt_state = ELT_STOPPED;
-        wake_up(&expired_lock_thread.elt_waitq);
+        cfs_waitq_signal(&expired_lock_thread.elt_waitq);
         RETURN(0);
 }
 
@@ -190,15 +180,12 @@ static void waiting_locks_callback(unsigned long unused)
 {
         struct ldlm_lock *lock, *last = NULL;
 
-        if (obd_dump_on_timeout)
-                libcfs_debug_dumplog();
-
         spin_lock_bh(&waiting_locks_spinlock);
         while (!list_empty(&waiting_locks_list)) {
                 lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
                                   l_pending_chain);
 
-                if (time_after(lock->l_callback_timeout, jiffies) ||
+                if (cfs_time_after(lock->l_callback_timeout, cfs_time_current()) ||
                     (lock->l_req_mode == LCK_GROUP))
                         break;
 
@@ -216,9 +203,8 @@ static void waiting_locks_callback(unsigned long unused)
                                lock->l_pending_chain.next,
                                lock->l_pending_chain.prev);
 
-                        INIT_LIST_HEAD(&waiting_locks_list);    /* HACK */
+                        CFS_INIT_LIST_HEAD(&waiting_locks_list);    /* HACK */
                         expired_lock_thread.elt_dump = __LINE__;
-                        spin_unlock_bh(&waiting_locks_spinlock);
 
                         /* LBUG(); */
                         CEMERG("would be an LBUG, but isn't (bug 5653)\n");
@@ -232,8 +218,13 @@ static void waiting_locks_callback(unsigned long unused)
                 list_del(&lock->l_pending_chain);
                 list_add(&lock->l_pending_chain,
                          &expired_lock_thread.elt_expired_locks);
+        }
 
-                wake_up(&expired_lock_thread.elt_waitq);
+        if (!list_empty(&expired_lock_thread.elt_expired_locks)) {
+                if (obd_dump_on_timeout)
+                        expired_lock_thread.elt_dump = __LINE__;
+
+                cfs_waitq_signal(&expired_lock_thread.elt_waitq);
         }
 
         /*
@@ -241,11 +232,11 @@ static void waiting_locks_callback(unsigned long unused)
          * left.
          */
         if (!list_empty(&waiting_locks_list)) {
-                unsigned long timeout_rounded;
+                cfs_time_t timeout_rounded;
                 lock = list_entry(waiting_locks_list.next, struct ldlm_lock,
                                   l_pending_chain);
-                timeout_rounded = round_timeout(lock->l_callback_timeout);
-                mod_timer(&waiting_locks_timer, timeout_rounded);
+                timeout_rounded = (cfs_time_t)round_timeout(lock->l_callback_timeout);
+                cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
         }
         spin_unlock_bh(&waiting_locks_spinlock);
 }
@@ -260,18 +251,18 @@ static void waiting_locks_callback(unsigned long unused)
  */
 static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
 {
-        unsigned long timeout_rounded;
+        cfs_time_t timeout_rounded;
 
         l_check_ns_lock(lock->l_resource->lr_namespace);
         LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK));
 
         spin_lock_bh(&waiting_locks_spinlock);
         if (lock->l_destroyed) {
-                static unsigned long next;
+                static cfs_time_t next;
                 spin_unlock_bh(&waiting_locks_spinlock);
                 LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)");
-                if (time_after(jiffies, next)) {
-                        next = jiffies + 14400 * HZ;
+                if (cfs_time_after(cfs_time_current(), next)) {
+                        next = cfs_time_shift(14400);
                         libcfs_debug_dumpstack(NULL);
                 }
                 return 0;
@@ -283,13 +274,14 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock)
                 return 0;
         }
 
-        lock->l_callback_timeout = jiffies + (obd_timeout * HZ / 2);
+        lock->l_callback_timeout =cfs_time_add(cfs_time_current(),
+                                               cfs_time_seconds(obd_timeout)/2);
 
         timeout_rounded = round_timeout(lock->l_callback_timeout);
 
-        if (time_before(timeout_rounded, waiting_locks_timer.expires) ||
-            !timer_pending(&waiting_locks_timer)) {
-                mod_timer(&waiting_locks_timer, timeout_rounded);
+        if (cfs_time_before(timeout_rounded, cfs_timer_deadline(&waiting_locks_timer)) ||
+            !cfs_timer_is_armed(&waiting_locks_timer)) {
+                cfs_timer_arm(&waiting_locks_timer, timeout_rounded);
         }
         list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */
         spin_unlock_bh(&waiting_locks_spinlock);
@@ -329,13 +321,13 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock)
                 /* Removing the head of the list, adjust timer. */
                 if (list_next == &waiting_locks_list) {
                         /* No more, just cancel. */
-                        del_timer(&waiting_locks_timer);
+                        cfs_timer_disarm(&waiting_locks_timer);
                 } else {
                         struct ldlm_lock *next;
                         next = list_entry(list_next, struct ldlm_lock,
                                           l_pending_chain);
-                        mod_timer(&waiting_locks_timer,
-                                  round_timeout(next->l_callback_timeout));
+                        cfs_timer_arm(&waiting_locks_timer,
+                                      round_timeout(next->l_callback_timeout));
                 }
         }
         list_del_init(&lock->l_pending_chain);
@@ -511,13 +503,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         RETURN(rc);
 }
 
-/* XXX copied from ptlrpc/service.c */
-static long timeval_sub(struct timeval *large, struct timeval *small)
-{
-        return (large->tv_sec - small->tv_sec) * 1000000 +
-                (large->tv_usec - small->tv_usec);
-}
-
 int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 {
         struct ldlm_request *body;
@@ -530,18 +515,19 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         LASSERT(lock != NULL);
 
         do_gettimeofday(&granted_time);
-        total_enqueue_wait = timeval_sub(&granted_time,&lock->l_enqueued_time);
+        total_enqueue_wait = cfs_timeval_sub(&granted_time,
+                                             &lock->l_enqueued_time, NULL);
 
         if (total_enqueue_wait / 1000000 > obd_timeout)
                 LDLM_ERROR(lock, "enqueue wait took %luus from %lu",
                            total_enqueue_wait, lock->l_enqueued_time.tv_sec);
 
-        down(&lock->l_resource->lr_lvb_sem);
+        mutex_down(&lock->l_resource->lr_lvb_sem);
         if (lock->l_resource->lr_lvb_len) {
                 buffers = 2;
                 size[1] = lock->l_resource->lr_lvb_len;
         }
-        up(&lock->l_resource->lr_lvb_sem);
+        mutex_up(&lock->l_resource->lr_lvb_sem);
 
         req = ptlrpc_prep_req(lock->l_export->exp_imp_reverse,
                               LUSTRE_DLM_VERSION, LDLM_CP_CALLBACK,
@@ -557,12 +543,12 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         if (buffers == 2) {
                 void *lvb;
 
-                down(&lock->l_resource->lr_lvb_sem);
+                mutex_down(&lock->l_resource->lr_lvb_sem);
                 lvb = lustre_msg_buf(req->rq_reqmsg, 1,
                                      lock->l_resource->lr_lvb_len);
                 memcpy(lvb, lock->l_resource->lr_lvb_data,
                        lock->l_resource->lr_lvb_len);
-                up(&lock->l_resource->lr_lvb_sem);
+                mutex_up(&lock->l_resource->lr_lvb_sem);
         }
 
         LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)",
@@ -626,9 +612,9 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         body->lock_handle1 = lock->l_remote_handle;
         ldlm_lock2desc(lock, &body->lock_desc);
 
-        down(&lock->l_resource->lr_lvb_sem);
+        mutex_down(&lock->l_resource->lr_lvb_sem);
         size = lock->l_resource->lr_lvb_len;
-        up(&lock->l_resource->lr_lvb_sem);
+        mutex_up(&lock->l_resource->lr_lvb_sem);
         req->rq_replen = lustre_msg_size(1, &size);
 
         req->rq_send_state = LUSTRE_IMP_FULL;
@@ -718,9 +704,9 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
         }
 
 #if 0
-        /* FIXME this makes it impossible to use plain locks -- check against
-           server's *_CONNECT_SUPPORTED flags? (I don't want to use ibits
-           for mgc/mgs) */
+        /* FIXME this makes it impossible to use LDLM_PLAIN locks -- check 
+           against server's _CONNECT_SUPPORTED flags? (I don't want to use
+           ibits for mgc/mgs) */
 
         /* INODEBITS_INTEROP: Perform conversion from plain lock to
          * inodebits lock if client does not support them. */
@@ -781,12 +767,12 @@ existing_lock:
         } else {
                 int buffers = 1;
 
-                down(&lock->l_resource->lr_lvb_sem);
+                mutex_down(&lock->l_resource->lr_lvb_sem);
                 if (lock->l_resource->lr_lvb_len) {
                         size[1] = lock->l_resource->lr_lvb_len;
                         buffers = 2;
                 }
-                up(&lock->l_resource->lr_lvb_sem);
+                mutex_up(&lock->l_resource->lr_lvb_sem);
 
                 if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR))
                         GOTO(out, rc = -ENOMEM);
@@ -879,7 +865,7 @@ existing_lock:
                 l_unlock(&lock->l_resource->lr_namespace->ns_lock);
 
                 if (rc == 0) {
-                        down(&lock->l_resource->lr_lvb_sem);
+                        mutex_down(&lock->l_resource->lr_lvb_sem);
                         size[1] = lock->l_resource->lr_lvb_len;
                         if (size[1] > 0) {
                                 void *lvb = lustre_msg_buf(req->rq_repmsg,
@@ -890,7 +876,7 @@ existing_lock:
                                 memcpy(lvb, lock->l_resource->lr_lvb_data,
                                        size[1]);
                         }
-                        up(&lock->l_resource->lr_lvb_sem);
+                        mutex_up(&lock->l_resource->lr_lvb_sem);
                 } else {
                         ldlm_resource_unlink_lock(lock);
                         ldlm_lock_destroy(lock);
@@ -1110,7 +1096,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                                     struct ldlm_request *dlm_req,
                                     struct ldlm_lock *lock)
 {
-        LIST_HEAD(ast_list);
+        CFS_LIST_HEAD(ast_list);
         ENTRY;
 
         l_lock(&ns->ns_lock);
@@ -1196,7 +1182,8 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req,
         l_unlock(&ns->ns_lock);
         if (lock->l_granted_mode == LCK_PW &&
             !lock->l_readers && !lock->l_writers &&
-            time_after(jiffies, lock->l_last_used + 10 * HZ)) {
+            cfs_time_after(cfs_time_current(), 
+                           cfs_time_add(lock->l_last_used, cfs_time_seconds(10)))) {
                 if (ldlm_bl_to_thread(ns, NULL, lock))
                         ldlm_handle_bl_callback(ns, NULL, lock);
                 EXIT;
@@ -1237,7 +1224,7 @@ int ldlm_bl_to_thread(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
 
         spin_lock(&blp->blp_lock);
         list_add_tail(&blwi->blwi_entry, &blp->blp_list);
-        wake_up(&blp->blp_waitq);
+        cfs_waitq_signal(&blp->blp_waitq);
         spin_unlock(&blp->blp_lock);
 
         RETURN(0);
@@ -1462,20 +1449,14 @@ static int ldlm_bl_thread_main(void *arg)
 {
         struct ldlm_bl_thread_data *bltd = arg;
         struct ldlm_bl_pool *blp = bltd->bltd_blp;
-        unsigned long flags;
         ENTRY;
 
-        /* XXX boiler-plate */
         {
-                char name[sizeof(current->comm)];
+                char name[CFS_CURPROC_COMM_MAX];
                 snprintf(name, sizeof(name) - 1, "ldlm_bl_%02d",
                          bltd->bltd_num);
-                libcfs_daemonize(name);
+                cfs_daemonize(name);
         }
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
 
         atomic_inc(&blp->blp_num_threads);
         complete(&blp->blp_comp);
@@ -1509,20 +1490,22 @@ static int ldlm_cleanup(int force);
 int ldlm_get_ref(void)
 {
         int rc = 0;
-        down(&ldlm_ref_sem);
+        ENTRY;
+        mutex_down(&ldlm_ref_sem);
         if (++ldlm_refcount == 1) {
                 rc = ldlm_setup();
                 if (rc)
                         ldlm_refcount--;
         }
-        up(&ldlm_ref_sem);
+        mutex_up(&ldlm_ref_sem);
 
         RETURN(rc);
 }
 
 void ldlm_put_ref(int force)
 {
-        down(&ldlm_ref_sem);
+        ENTRY;
+        mutex_down(&ldlm_ref_sem);
         if (ldlm_refcount == 1) {
                 int rc = ldlm_cleanup(force);
                 if (rc)
@@ -1532,7 +1515,7 @@ void ldlm_put_ref(int force)
         } else {
                 ldlm_refcount--;
         }
-        up(&ldlm_ref_sem);
+        mutex_up(&ldlm_ref_sem);
 
         EXIT;
 }
@@ -1589,10 +1572,10 @@ static int ldlm_setup(void)
         ldlm_state->ldlm_bl_pool = blp;
 
         atomic_set(&blp->blp_num_threads, 0);
-        init_waitqueue_head(&blp->blp_waitq);
+        cfs_waitq_init(&blp->blp_waitq);
         spin_lock_init(&blp->blp_lock);
 
-        INIT_LIST_HEAD(&blp->blp_list);
+        CFS_INIT_LIST_HEAD(&blp->blp_list);
 
 #ifdef __KERNEL__
         for (i = 0; i < LDLM_NUM_THREADS; i++) {
@@ -1601,7 +1584,7 @@ static int ldlm_setup(void)
                         .bltd_blp = blp,
                 };
                 init_completion(&blp->blp_comp);
-                rc = kernel_thread(ldlm_bl_thread_main, &bltd, 0);
+                rc = cfs_kernel_thread(ldlm_bl_thread_main, &bltd, 0);
                 if (rc < 0) {
                         CERROR("cannot start LDLM thread #%d: rc %d\n", i, rc);
                         GOTO(out_thread, rc);
@@ -1619,17 +1602,18 @@ static int ldlm_setup(void)
         if (rc)
                 GOTO(out_thread, rc);
 
-        INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
+        CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks);
         expired_lock_thread.elt_state = ELT_STOPPED;
-        init_waitqueue_head(&expired_lock_thread.elt_waitq);
+        cfs_waitq_init(&expired_lock_thread.elt_waitq);
 
-        INIT_LIST_HEAD(&waiting_locks_list);
+        CFS_INIT_LIST_HEAD(&waiting_locks_list);
         spin_lock_init(&waiting_locks_spinlock);
-        waiting_locks_timer.function = waiting_locks_callback;
-        waiting_locks_timer.data = 0;
-        init_timer(&waiting_locks_timer);
+        cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0);
 
-        rc = kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS);
+        /* Using CLONE_FILES instead of CLONE_FS here causes failures in 
+           conf-sanity test 21.  But using CLONE_FS can cause problems
+           if the daemonize happens between push/pop_ctxt... */
+        rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS);
         if (rc < 0) {
                 CERROR("Cannot start ldlm expired-lock thread: %d\n", rc);
                 GOTO(out_thread, rc);
@@ -1678,7 +1662,7 @@ static int ldlm_cleanup(int force)
 
                 spin_lock(&blp->blp_lock);
                 list_add_tail(&blwi.blwi_entry, &blp->blp_list);
-                wake_up(&blp->blp_waitq);
+                cfs_waitq_signal(&blp->blp_waitq);
                 spin_unlock(&blp->blp_lock);
 
                 wait_for_completion(&blp->blp_comp);
@@ -1690,7 +1674,7 @@ static int ldlm_cleanup(int force)
         ldlm_proc_cleanup();
 
         expired_lock_thread.elt_state = ELT_TERMINATE;
-        wake_up(&expired_lock_thread.elt_waitq);
+        cfs_waitq_signal(&expired_lock_thread.elt_waitq);
         wait_event(expired_lock_thread.elt_waitq,
                    expired_lock_thread.elt_state == ELT_STOPPED);
 #else
@@ -1706,17 +1690,19 @@ static int ldlm_cleanup(int force)
 
 int __init ldlm_init(void)
 {
-        ldlm_resource_slab = kmem_cache_create("ldlm_resources",
+        init_mutex(&ldlm_ref_sem);
+        init_mutex(&ldlm_namespace_lock);
+        ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources",
                                                sizeof(struct ldlm_resource), 0,
-                                               SLAB_HWCACHE_ALIGN, NULL, NULL);
+                                               SLAB_HWCACHE_ALIGN);
         if (ldlm_resource_slab == NULL)
                 return -ENOMEM;
 
-        ldlm_lock_slab = kmem_cache_create("ldlm_locks",
+        ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks",
                                            sizeof(struct ldlm_lock), 0,
-                                           SLAB_HWCACHE_ALIGN, NULL, NULL);
+                                           SLAB_HWCACHE_ALIGN);
         if (ldlm_lock_slab == NULL) {
-                kmem_cache_destroy(ldlm_resource_slab);
+                cfs_mem_cache_destroy(ldlm_resource_slab);
                 return -ENOMEM;
         }
 
@@ -1727,12 +1713,14 @@ int __init ldlm_init(void)
 
 void __exit ldlm_exit(void)
 {
-        if ( ldlm_refcount )
+        int rc;
+
+        if (ldlm_refcount)
                 CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount);
-        LASSERTF(kmem_cache_destroy(ldlm_resource_slab) == 0,
-                 "couldn't free ldlm resource slab\n");
-        LASSERTF(kmem_cache_destroy(ldlm_lock_slab) == 0,
-                 "couldn't free ldlm lock slab\n");
+        rc = cfs_mem_cache_destroy(ldlm_resource_slab);
+        LASSERTF(rc == 0, "couldn't free ldlm resource slab\n");
+        rc = cfs_mem_cache_destroy(ldlm_lock_slab);
+        LASSERTF(rc == 0, "couldn't free ldlm lock slab\n");
 }
 
 /* ldlm_extent.c */
index 63026f5..38b0c40 100644 (file)
@@ -27,9 +27,9 @@
 #define DEBUG_SUBSYSTEM S_LDLM
 
 #ifdef __KERNEL__
-#include <linux/lustre_dlm.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
+#include <lustre_dlm.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
 #else
 #include <liblustre.h>
 #endif
@@ -81,7 +81,7 @@ int ldlm_process_plain_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                             ldlm_error_t *err)
 {
         struct ldlm_resource *res = lock->l_resource;
-        struct list_head rpc_list = LIST_HEAD_INIT(rpc_list);
+        struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list);
         int rc;
         ENTRY;
 
@@ -104,7 +104,6 @@ int ldlm_process_plain_lock(struct ldlm_lock *lock, int *flags, int first_enq,
  restart:
         LASSERT(res->lr_tmp == NULL);
         res->lr_tmp = &rpc_list;
-
         rc = ldlm_plain_compat_queue(&res->lr_granted, lock, 1);
         rc += ldlm_plain_compat_queue(&res->lr_waiting, lock, 1);
         res->lr_tmp = NULL;
index ee89422..bf2f655 100644 (file)
@@ -28,9 +28,9 @@
 #include <liblustre.h>
 #endif
 
-#include <linux/lustre_dlm.h>
-#include <linux/obd_class.h>
-#include <linux/obd.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <obd.h>
 
 #include "ldlm_internal.h"
 
@@ -50,15 +50,16 @@ int ldlm_expired_completion_wait(void *data)
         struct obd_import *imp;
         struct obd_device *obd;
 
+        ENTRY;
         if (lock->l_conn_export == NULL) {
-                static unsigned long next_dump = 0, last_dump = 0;
+                static cfs_time_t next_dump = 0, last_dump = 0;
 
                 LDLM_ERROR(lock, "lock timed out (enq %lus ago); not entering "
                            "recovery in server code, just going back to sleep",
                            lock->l_enqueued_time.tv_sec);
-                if (time_after(jiffies, next_dump)) {
+                if (cfs_time_after(cfs_time_current(), next_dump)) {
                         last_dump = next_dump;
-                        next_dump = jiffies + 300 * HZ;
+                        next_dump = cfs_time_shift(300);
                         ldlm_namespace_dump(D_DLMTRACE,
                                             lock->l_resource->lr_namespace);
                         if (last_dump == 0)
@@ -95,7 +96,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 
         if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED |
                        LDLM_FL_BLOCK_CONV))) {
-                wake_up(&lock->l_waitq);
+                cfs_waitq_signal(&lock->l_waitq);
                 RETURN(0);
         }
 
@@ -115,11 +116,10 @@ noreproc:
         lwd.lwd_lock = lock;
 
         if (unlikely(flags & LDLM_FL_NO_TIMEOUT)) {
-                LDLM_DEBUG(lock, "waiting indefinitely because CW lock was"
-                           " met\n");
+                LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT");
                 lwi = LWI_INTR(interrupted_completion_wait, &lwd);
         } else {
-                lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ,
+                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout),
                                        ldlm_expired_completion_wait,
                                        interrupted_completion_wait, &lwd);
         }
@@ -450,9 +450,11 @@ int ldlm_cli_enqueue(struct obd_export *exp,
         /* lock enqueued on the server */
         cleanup_phase = 1;
 
+        l_lock(&ns->ns_lock);
         lock->l_remote_handle = reply->lock_handle;
         *flags = reply->lock_flags;
         lock->l_flags |= reply->lock_flags & LDLM_INHERIT_FLAGS;
+        l_unlock(&ns->ns_lock);
 
         CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%x\n",
                lock, reply->lock_handle.cookie, *flags);
@@ -752,7 +754,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync)
 {
         struct ldlm_lock *lock, *next;
         int count, rc = 0;
-        LIST_HEAD(cblist);
+        CFS_LIST_HEAD(cblist);
         ENTRY;
 
 #ifndef __KERNEL__
@@ -818,7 +820,7 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
                                            void *opaque)
 {
         struct ldlm_resource *res;
-        struct list_head *tmp, *next, list = LIST_HEAD_INIT(list);
+        struct list_head *tmp, *next, list = CFS_LIST_HEAD_INIT(list);
         struct ldlm_ast_work *w;
         ENTRY;
 
@@ -1045,6 +1047,7 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
 {
         int i, rc = LDLM_ITER_CONTINUE;
 
+        ENTRY;
         l_lock(&ns->ns_lock);
         for (i = 0; i < RES_HASH_SIZE; i++) {
                 struct list_head *tmp, *next;
@@ -1098,7 +1101,9 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure)
         /* we use l_pending_chain here, because it's unused on clients. */
         LASSERTF(list_empty(&lock->l_pending_chain),"lock %p next %p prev %p\n",
                  lock, &lock->l_pending_chain.next,&lock->l_pending_chain.prev);
-        list_add(&lock->l_pending_chain, list);
+        /* bug 9573: don't replay locks left after eviction */
+        if (!(lock->l_flags & LDLM_FL_FAILED))
+                list_add(&lock->l_pending_chain, list);
         return LDLM_ITER_CONTINUE;
 }
 
@@ -1108,6 +1113,7 @@ static int replay_lock_interpret(struct ptlrpc_request *req,
         struct ldlm_lock *lock;
         struct ldlm_reply *reply;
 
+        ENTRY;
         atomic_dec(&req->rq_import->imp_replay_inflight);
         if (rc != ELDLM_OK)
                 GOTO(out, rc);
@@ -1141,6 +1147,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock)
         int buffers = 1;
         int size[2];
         int flags;
+        ENTRY;
 
         /* If this is reply-less callback lock, we cannot replay it, since
          * server might have long dropped it, but notification of that event was
@@ -1212,7 +1219,7 @@ int ldlm_replay_locks(struct obd_import *imp)
         int rc = 0;
 
         ENTRY;
-        INIT_LIST_HEAD(&list);
+        CFS_INIT_LIST_HEAD(&list);
 
         LASSERT(atomic_read(&imp->imp_replay_inflight) == 0);
 
index e2e8fa9..809b87f 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LDLM
 #ifdef __KERNEL__
-# include <linux/lustre_dlm.h>
+# include <lustre_dlm.h>
 #else
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
+#include <obd_class.h>
 #include "ldlm_internal.h"
 
-kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
+cfs_mem_cache_t *ldlm_resource_slab, *ldlm_lock_slab;
 
-DECLARE_MUTEX(ldlm_namespace_lock);
-struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list);
-struct proc_dir_entry *ldlm_type_proc_dir = NULL;
-struct proc_dir_entry *ldlm_ns_proc_dir = NULL;
-struct proc_dir_entry *ldlm_svc_proc_dir = NULL;
+struct semaphore ldlm_namespace_lock;
+struct list_head ldlm_namespace_list = CFS_LIST_HEAD_INIT(ldlm_namespace_list);
+cfs_proc_dir_entry_t *ldlm_type_proc_dir = NULL;
+cfs_proc_dir_entry_t *ldlm_ns_proc_dir = NULL;
+cfs_proc_dir_entry_t *ldlm_svc_proc_dir = NULL;
 
 #ifdef LPROCFS
 static int ldlm_proc_dump_ns(struct file *file, const char *buffer,
@@ -242,9 +242,9 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
 
         strcpy(ns->ns_name, name);
 
-        INIT_LIST_HEAD(&ns->ns_root_list);
+        CFS_INIT_LIST_HEAD(&ns->ns_root_list);
         l_lock_init(&ns->ns_lock);
-        init_waitqueue_head(&ns->ns_refcount_waitq);
+        cfs_waitq_init(&ns->ns_refcount_waitq);
         atomic_set(&ns->ns_refcount, 0);
         ns->ns_client = client;
         spin_lock_init(&ns->ns_counter_lock);
@@ -252,15 +252,15 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client)
 
         for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash;
              bucket--)
-                INIT_LIST_HEAD(bucket);
+                CFS_INIT_LIST_HEAD(bucket);
 
-        INIT_LIST_HEAD(&ns->ns_unused_list);
+        CFS_INIT_LIST_HEAD(&ns->ns_unused_list);
         ns->ns_nr_unused = 0;
         ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
 
-        down(&ldlm_namespace_lock);
+        mutex_down(&ldlm_namespace_lock);
         list_add(&ns->ns_list_chain, &ldlm_namespace_list);
-        up(&ldlm_namespace_lock);
+        mutex_up(&ldlm_namespace_lock);
         ldlm_proc_namespace(ns);
         RETURN(ns);
 
@@ -374,12 +374,13 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags)
 /* Cleanup, but also free, the namespace */
 int ldlm_namespace_free(struct ldlm_namespace *ns, int force)
 {
+        ENTRY;
         if (!ns)
                 RETURN(ELDLM_OK);
 
-        down(&ldlm_namespace_lock);
+        mutex_down(&ldlm_namespace_lock);
         list_del(&ns->ns_list_chain);
-        up(&ldlm_namespace_lock);
+        mutex_up(&ldlm_namespace_lock);
 
         /* At shutdown time, don't call the cancellation callback */
         ldlm_namespace_cleanup(ns, 0);
@@ -444,17 +445,17 @@ static struct ldlm_resource *ldlm_resource_new(void)
 {
         struct ldlm_resource *res;
 
-        OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res);
+        OBD_SLAB_ALLOC(res, ldlm_resource_slab, CFS_ALLOC_IO, sizeof *res);
         if (res == NULL)
                 return NULL;
 
         memset(res, 0, sizeof(*res));
 
-        INIT_LIST_HEAD(&res->lr_children);
-        INIT_LIST_HEAD(&res->lr_childof);
-        INIT_LIST_HEAD(&res->lr_granted);
-        INIT_LIST_HEAD(&res->lr_converting);
-        INIT_LIST_HEAD(&res->lr_waiting);
+        CFS_INIT_LIST_HEAD(&res->lr_children);
+        CFS_INIT_LIST_HEAD(&res->lr_childof);
+        CFS_INIT_LIST_HEAD(&res->lr_granted);
+        CFS_INIT_LIST_HEAD(&res->lr_converting);
+        CFS_INIT_LIST_HEAD(&res->lr_waiting);
         sema_init(&res->lr_lvb_sem, 1);
         atomic_set(&res->lr_refcount, 1);
 
@@ -543,12 +544,12 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent,
                 /* Although this is technically a lock inversion risk (lvb_sem
                  * should be taken before DLM lock), this resource was just
                  * created, so nobody else can take the lvb_sem yet. -p */
-                down(&res->lr_lvb_sem);
+                mutex_down(&res->lr_lvb_sem);
                 /* Drop the dlm lock, because lvbo_init can touch the disk */
                 l_unlock(&ns->ns_lock);
                 OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2);
                 rc = ns->ns_lvbo->lvbo_init(res);
-                up(&res->lr_lvb_sem);
+                mutex_up(&res->lr_lvb_sem);
                 if (rc)
                         CERROR("lvbo_init failed for resource "LPU64"/"LPU64
                                ": rc %d\n", name.name[0], name.name[1], rc);
@@ -623,7 +624,7 @@ int ldlm_resource_putref(struct ldlm_resource *res)
 
                 if (atomic_dec_and_test(&ns->ns_refcount)) {
                         CDEBUG(D_DLMTRACE, "last ref on ns %s\n", ns->ns_name);
-                        wake_up(&ns->ns_refcount_waitq);
+                        cfs_waitq_signal(&ns->ns_refcount_waitq);
                 }
 
                 rc = 1;
@@ -695,7 +696,7 @@ void ldlm_dump_all_namespaces(int level)
 {
         struct list_head *tmp;
 
-        down(&ldlm_namespace_lock);
+        mutex_down(&ldlm_namespace_lock);
 
         list_for_each(tmp, &ldlm_namespace_list) {
                 struct ldlm_namespace *ns;
@@ -703,7 +704,7 @@ void ldlm_dump_all_namespaces(int level)
                 ldlm_namespace_dump(level, ns);
         }
 
-        up(&ldlm_namespace_lock);
+        mutex_up(&ldlm_namespace_lock);
 }
 
 void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
@@ -714,7 +715,7 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
                atomic_read(&ns->ns_refcount), ns->ns_client);
 
         l_lock(&ns->ns_lock);
-        if (time_after(jiffies, ns->ns_next_dump)) {
+        if (cfs_time_after(cfs_time_current(), ns->ns_next_dump)) {
                 list_for_each(tmp, &ns->ns_root_list) {
                         struct ldlm_resource *res;
                         res = list_entry(tmp, struct ldlm_resource, lr_childof);
@@ -723,7 +724,7 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns)
                          * really dump them recursively. */
                         ldlm_resource_dump(level, res);
                 }
-                ns->ns_next_dump = jiffies + 10 * HZ;
+                ns->ns_next_dump = cfs_time_shift(10);
         }
         l_unlock(&ns->ns_lock);
 }
index e944fe5..3ef736a 100644 (file)
@@ -46,8 +46,8 @@
 
 #undef LIST_HEAD
 
-#ifdef HAVE_LINUX_TYPES_H
-#include <linux/types.h>
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
 #elif defined(HAVE_SYS_TYPES_H)
 #include <sys/types.h>
 #endif
@@ -83,7 +83,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page)
         rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED,
                              &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh);
         if (!rc) {
-                llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
+                llu_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0);
 
                 rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, &it, LCK_CR,
                                 &op_data, &lockh, NULL, 0,
index 3047dee..a5190f4 100644 (file)
@@ -71,12 +71,12 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
         }
 }
 
-void llu_prepare_mdc_op_data(struct md_op_data *op_data,
-                             struct inode *i1,
-                             struct inode *i2,
-                             const char *name,
-                             int namelen,
-                             int mode)
+void llu_prepare_md_op_data(struct md_op_data *op_data,
+                            struct inode *i1,
+                            struct inode *i2,
+                            const char *name,
+                            int namelen,
+                            int mode)
 {
         LASSERT(i1);
 
index bdc94cc..d1c4c01 100644 (file)
@@ -48,7 +48,7 @@
 
 #include "lutil.h"
 #include "llite_lib.h"
-#include <linux/lustre_ver.h>
+#include <lustre_ver.h>
 
 /* allocates passed fid, that is assigns f_num and f_seq to the @fid */
 int llu_fid_md_alloc(struct llu_sb_info *sbi, struct lu_fid *fid)
index 5a3ad6c..cdb8f04 100644 (file)
@@ -48,7 +48,6 @@
 
 #include "lutil.h"
 #include "llite_lib.h"
-#include <linux/lustre_ver.h>
 
 static int lllib_init(void)
 {
@@ -131,9 +130,9 @@ int liblustre_process_log(struct config_llog_instance *cfg,
         ocd->ocd_version = LUSTRE_VERSION_CODE;
 
         /* Disable initial recovery on this import */
-        rc = obd_set_info(obd->obd_self_export,
-                          strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
-                          sizeof(allow_recov), &allow_recov);
+        rc = obd_set_info_async(obd->obd_self_export,
+                                strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
+                                sizeof(allow_recov), &allow_recov, NULL);
 
         rc = obd_connect(&mdc_conn, obd, &mdc_uuid, ocd);
         if (rc) {
@@ -243,17 +242,10 @@ int _sysio_lustre_init(void)
 {
         int err;
         char *timeout = NULL;
-        char *debug_mask = NULL;
-        char *debug_subsys = NULL;
 #ifndef INIT_SYSIO
         extern void __liblustre_cleanup_(void);
 #endif
 
-#if 0
-        libcfs_debug = -1;
-        libcfs_subsystem_debug = -1;
-#endif
-
         liblustre_init_random();
 
         err = lllib_init();
@@ -268,16 +260,6 @@ int _sysio_lustre_init(void)
                         obd_timeout);
         }
 
-        /* debug masks */
-        debug_mask = getenv("LIBLUSTRE_DEBUG_MASK");
-        if (debug_mask)
-                libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0);
-
-        debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS");
-        if (debug_subsys)
-                libcfs_subsystem_debug =
-                                (unsigned int) strtol(debug_subsys, NULL, 0);
-
 #ifndef INIT_SYSIO
         (void)atexit(__liblustre_cleanup_);
 #endif
index 874e6ce..f9e06b7 100644 (file)
@@ -6,10 +6,12 @@
 #define __LLU_H_
 
 #include <liblustre.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_mds.h>
+#include <lustre_mdc.h>
+#include <lustre_lite.h>
+#include <lustre_ver.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -195,12 +197,12 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr);
 extern struct fssw_ops llu_fssw_ops;
 
 /* file.c */
-void llu_prepare_mdc_op_data(struct md_op_data *op_data,
-                             struct inode *i1,
-                             struct inode *i2,
-                             const char *name,
-                             int namelen,
-                             int mode);
+void llu_prepare_md_op_data(struct md_op_data *op_data,
+                            struct inode *i1,
+                            struct inode *i2,
+                            const char *name,
+                            int namelen,
+                            int mode);
 int llu_create(struct inode *dir, struct pnode_base *pnode, int mode);
 int llu_iop_open(struct pnode *pnode, int flags, mode_t mode);
 int llu_mdc_close(struct obd_export *md_exp, struct inode *inode);
index dc5e6e2..d235eb8 100644 (file)
@@ -23,8 +23,8 @@
 #define __LUTIL_H_
 
 #include <liblustre.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
+#include <obd.h>
+#include <obd_class.h>
 
 void liblustre_init_random(void);
 int liblustre_init_current(char *comm);
index 65d2e46..a35f4b7 100644 (file)
@@ -270,7 +270,7 @@ static int llu_pb_revalidate(struct pnode *pnode, int flags,
                 it->it_op_release = ll_intent_release;
         }
 
-        llu_prepare_mdc_op_data(&op_data, pnode->p_parent->p_base->pb_ino,
+        llu_prepare_md_op_data(&op_data, pnode->p_parent->p_base->pb_ino,
                                 pb->pb_ino, pb->pb_name.name,pb->pb_name.len,0);
 
         rc = md_intent_lock(exp, &op_data, NULL, 0, it, flags,
@@ -448,9 +448,9 @@ static int llu_lookup_it(struct inode *parent, struct pnode *pnode,
                         LBUG();
                 }
         }
-        llu_prepare_mdc_op_data(&op_data, parent, NULL,
-                                pnode->p_base->pb_name.name,
-                                pnode->p_base->pb_name.len, flags);
+        llu_prepare_md_op_data(&op_data, parent, NULL,
+                               pnode->p_base->pb_name.name,
+                               pnode->p_base->pb_name.len, flags);
 
         rc = md_intent_lock(llu_i2mdcexp(parent), &op_data, NULL, 0, it,
                             flags, &req, llu_mdc_blocking_ast,
index 34a039a..0193ede 100644 (file)
@@ -51,7 +51,6 @@
 #undef LIST_HEAD
 
 #include "llite_lib.h"
-#include <linux/lustre_ver.h>
 
 #ifndef MAY_EXEC
 #define MAY_EXEC        1
@@ -667,7 +666,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr)
          * inode ourselves so we can call obdo_from_inode() always. */
         if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
                 struct lustre_md md;
-                llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
+                llu_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0);
 
                 rc = md_setattr(sbi->ll_md_exp, &op_data,
                                 attr, NULL, 0, NULL, 0, &request);
@@ -863,7 +862,7 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
                 CERROR("can't allocate new fid, rc %d\n", err);
                 RETURN(err);
         }
-        llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+        llu_prepare_md_op_data(&op_data, dir, NULL, name, len, 0);
         err = md_create(sbi->ll_md_exp, &op_data,
                         tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
                         current->fsuid, current->fsgid, current->cap_effective,
@@ -993,10 +992,10 @@ static int llu_iop_mknod_raw(struct pnode *pno,
                         RETURN(err);
                 }
 
-                llu_prepare_mdc_op_data(&op_data, dir, NULL,
-                                        pno->p_base->pb_name.name,
-                                        pno->p_base->pb_name.len,
-                                        0);
+                llu_prepare_md_op_data(&op_data, dir, NULL,
+                                       pno->p_base->pb_name.name,
+                                       pno->p_base->pb_name.len,
+                                       0);
                 err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
                                 current->fsuid, current->fsgid,
                                 current->cap_effective, dev, &request);
@@ -1027,7 +1026,7 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
         LASSERT(dir);
 
         liblustre_wait_event(0);
-        llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
+        llu_prepare_md_op_data(&op_data, src, dir, name, namelen, 0);
         rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request);
         ptlrpc_req_finished(request);
         liblustre_wait_event(0);
@@ -1053,7 +1052,7 @@ static int llu_iop_unlink_raw(struct pnode *pno)
         LASSERT(target);
 
         liblustre_wait_event(0);
-        llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+        llu_prepare_md_op_data(&op_data, dir, NULL, name, len, 0);
         rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
         if (!rc)
                 rc = llu_objects_destroy(request, dir);
@@ -1080,7 +1079,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
         LASSERT(tgt);
 
         liblustre_wait_event(0);
-        llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
+        llu_prepare_md_op_data(&op_data, src, tgt, NULL, 0, 0);
         rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data,
                        oldname, oldnamelen, newname, newnamelen,
                        &request);
@@ -1232,7 +1231,7 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
                 CERROR("can't allocate new fid, rc %d\n", err);
                 RETURN(err);
         }
-        llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
+        llu_prepare_md_op_data(&op_data, dir, NULL, name, len, 0);
         err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0, mode,
                         current->fsuid, current->fsgid, current->cap_effective,
                         0, &request);
@@ -1257,7 +1256,7 @@ static int llu_iop_rmdir_raw(struct pnode *pno)
                (long long)llu_i2stat(dir)->st_ino,
                llu_i2info(dir)->lli_st_generation, dir);
 
-        llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
+        llu_prepare_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
         rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
         ptlrpc_req_finished(request);
 
@@ -1798,8 +1797,8 @@ llu_fsswop_mount(const char *source,
                 CERROR("MDC %s: not setup or attached\n", mdc);
                 GOTO(out_free, err = -EINVAL);
         }
-        obd_set_info(obd->obd_self_export, strlen("async"), "async",
-                     sizeof(async), &async);
+        obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
+                           sizeof(async), &async, NULL);
 
         ocd.ocd_connect_flags = OBD_CONNECT_IBITS|OBD_CONNECT_VERSION;
         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
@@ -1833,8 +1832,8 @@ llu_fsswop_mount(const char *source,
                 CERROR("OSC %s: not setup or attached\n", osc);
                 GOTO(out_mdc, err = -EINVAL);
         }
-        obd_set_info(obd->obd_self_export, strlen("async"), "async",
-                     sizeof(async), &async);
+        obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
+                           sizeof(async), &async, NULL);
 
         obd->obd_upcall.onu_owner = &sbi->ll_lco;
         obd->obd_upcall.onu_upcall = ll_ocd_update;
index f24f93f..c47f052 100644 (file)
@@ -22,9 +22,9 @@
  */
 
 #include <liblustre.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/obd_ost.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <obd_ost.h>
 
 #define LIBLUSTRE_TEST 1
 #include "../utils/lctl.c"
index 53e4735..ccab0c3 100644 (file)
@@ -49,6 +49,7 @@
 void *buf_alloc;
 int buf_size;
 int opt_verbose;
+struct timeval start;
 
 extern char *lustre_path;
 
@@ -64,17 +65,23 @@ extern char *lustre_path;
                         buf[80] = 0;                                    \
                 }                                                       \
                 printf("%s", buf);                                      \
+                gettimeofday(&start, NULL);                             \
         } while (0)
 
 #define LEAVE()                                                         \
         do {                                                            \
-                char buf[100];                                          \
-                int len;                                                \
-                sprintf(buf, "===== END TEST %s: successfully ",        \
-                        __FUNCTION__);                                  \
-                len = strlen(buf);                                      \
+                struct timeval stop;                                    \
+                char buf[100] = { '\0' };                               \
+                int len = sizeof(buf) - 1;                              \
+                long usec;                                              \
+                gettimeofday(&stop, NULL);                              \
+                usec = (stop.tv_sec - start.tv_sec) * 1000000 +         \
+                       (stop.tv_usec - start.tv_usec);                  \
+                len = snprintf(buf, len,                                \
+                               "===== END TEST %s: successfully (%gs)", \
+                               __FUNCTION__, (double)usec / 1000000);   \
                 if (len < 79) {                                         \
-                        memset(buf+len, '=', 100-len);                  \
+                        memset(buf+len, '=', sizeof(buf) - len);        \
                         buf[79] = '\n';                                 \
                         buf[80] = 0;                                    \
                 }                                                       \
@@ -1035,7 +1042,6 @@ int t51(char *name)
         printf("\n");
         LEAVE();
 }
-
 /*
  * check atime update during read
  */
index 96a0a49..5059937 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := llite
-llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o special.o symlink.o llite_mmap.o xattr.o
+llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o
 
 ifeq ($(PATCHLEVEL),4)
 llite-objs += rw24.o super.o
index bc781b4..c91ff99 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_version.h>
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <lustre_mdc.h>
+#include <lustre_ver.h>
 
 #include "llite_internal.h"
 
@@ -146,7 +147,6 @@ void ll_intent_release(struct lookup_intent *it)
 void ll_unhash_aliases(struct inode *inode)
 {
         struct list_head *tmp, *head;
-        struct ll_sb_info *sbi;
         ENTRY;
 
         if (inode == NULL) {
@@ -157,7 +157,6 @@ void ll_unhash_aliases(struct inode *inode)
         CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
                inode->i_ino, inode->i_generation, inode);
 
-        sbi = ll_i2sbi(inode);
         head = &inode->i_dentry;
 restart:
         spin_lock(&dcache_lock);
@@ -207,7 +206,7 @@ restart:
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                         __d_drop(dentry);
                         hlist_add_head(&dentry->d_hash,
-                                       &sbi->ll_orphan_dentry_list);
+                                       &ll_i2sbi(inode)->ll_orphan_dentry_list);
 #endif
                 }
                 unlock_dentry(dentry);
@@ -220,7 +219,6 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request,
                             int offset, struct lookup_intent *it,
                             struct dentry *de)
 {
-        struct ll_sb_info *sbi;
         int rc = 0;
         ENTRY;
 
@@ -230,8 +228,8 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request,
         if (it_disposition(it, DISP_LOOKUP_NEG))
                 RETURN(-ENOENT);
 
-        sbi = ll_i2sbi(de->d_inode);
-        rc = ll_prep_inode(&de->d_inode, request, offset, NULL);
+        rc = ll_prep_inode(&de->d_inode,
+                           request, offset, NULL);
 
         RETURN(rc);
 }
@@ -319,8 +317,8 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
                               de->d_name.len, 0);
 
         rc = md_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags,
-                            &req, ll_mdc_blocking_ast, 0);
-        /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
+                            &req, ll_md_blocking_ast, 0);
+        /* If req is NULL, then md_intent_lock only tried to do a lock match;
          * if all was well, it will return 1 if it found locks, 0 otherwise. */
         if (req == NULL && rc >= 0)
                 GOTO(out, rc);
@@ -338,6 +336,11 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags,
                 ll_intent_release(it);
                 GOTO(out, rc = 0);
         }
+        if ((it->it_op & IT_OPEN) && de->d_inode && 
+            !S_ISREG(de->d_inode->i_mode) && 
+            !S_ISDIR(de->d_inode->i_mode)) {
+                ll_release_openhandle(de, it);
+        }
         rc = 1;
 
         /* unfortunately ll_intent_lock may cause a callback and revoke our
index 8c788ca..1da3a5d 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_dlm.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
 #include "llite_internal.h"
 
 typedef struct ext2_dir_entry_2 ext2_dirent;
@@ -111,9 +110,8 @@ static inline unsigned long dir_pages(struct inode *inode)
 }
 
 
-static void ext2_check_page(struct page *page)
+static void ext2_check_page(struct inode *dir, struct page *page)
 {
-        struct inode *dir = page->mapping->host;
         unsigned chunk_size = ext2_chunk_size(dir);
         char *kaddr = page_address(page);
         //      u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count);
@@ -219,7 +217,7 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
 
                 rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, LDLM_IBITS, &it,
                                 LCK_CR, &op_data, &lockh, NULL, 0,
-                                ldlm_completion_ast, ll_mdc_blocking_ast, dir,
+                                ldlm_completion_ast, ll_md_blocking_ast, dir,
                                 0);
 
                 request = (struct ptlrpc_request *)it.d.lustre.it_data;
@@ -242,7 +240,7 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n)
         if (!PageUptodate(page))
                 goto fail;
         if (!PageChecked(page))
-                ext2_check_page(page);
+                ext2_check_page(dir, page);
         if (PageError(page))
                 goto fail;
 
@@ -316,7 +314,7 @@ int ll_readdir(struct file *filp, void *dirent, filldir_t filldir)
                        n, npages, inode->i_size);
                 page = ll_get_dir_page(inode, n);
 
-                /* size might have been updated by mdc_readpage */
+                /* size might have been updated by md_readpage */
                 npages = dir_pages(inode);
 
                 if (IS_ERR(page)) {
@@ -424,7 +422,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                                      filename, namelen, OBD_MD_FLID, 0,
                                      &request);
                 if (rc < 0) {
-                        CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc);
+                        CDEBUG(D_INFO, "md_getattr_name: %d\n", rc);
                         GOTO(out, rc);
                 }
 
@@ -469,7 +467,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 if (rc) {
                         ptlrpc_req_finished(request);
                         if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
+                                CERROR("md_setattr fails: rc = %d\n", rc);
                         return rc;
                 }
                 ptlrpc_req_finished(request);
@@ -490,13 +488,13 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
                                 OBD_MD_FLDIREA, lmmsize, &request);
                 if (rc < 0) {
-                        CDEBUG(D_INFO, "mdc_getattr failed: rc = %d\n", rc);
+                        CDEBUG(D_INFO, "md_getattr failed: rc = %d\n", rc);
                         RETURN(rc);
                 }
 
                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
-                LASSERT(body != NULL);         /* checked by mdc_getattr_name */
-                LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+                LASSERT(body != NULL);         /* checked by md_getattr_name */
+                LASSERT_REPSWABBED(request, 0);/* swabbed by md_getattr_name */
 
                 lmmsize = body->eadatasize;
                 if (lmmsize == 0)
@@ -548,14 +546,14 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                                      filename, strlen(filename) + 1,
                                      OBD_MD_FLEASIZE, lmmsize, &request);
                 if (rc < 0) {
-                        CDEBUG(D_INFO, "mdc_getattr_name failed on %s: rc %d\n",
+                        CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n",
                                filename, rc);
                         GOTO(out_name, rc);
                 }
 
                 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body));
-                LASSERT(body != NULL);         /* checked by mdc_getattr_name */
-                LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */
+                LASSERT(body != NULL);         /* checked by md_getattr_name */
+                LASSERT_REPSWABBED(request, 0);/* swabbed by md_getattr_name */
 
                 lmmsize = body->eadatasize;
 
@@ -733,13 +731,13 @@ out_free_memmd:
                 oqctl->qc_type = arg;
                 rc = obd_quotacheck(sbi->ll_md_exp, oqctl);
                 if (rc < 0) {
-                        CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc);
+                        CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc);
                         error = rc;
                 }
 
                 rc = obd_quotacheck(sbi->ll_dt_exp, oqctl);
                 if (rc < 0)
-                        CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc);
+                        CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc);
 
                 OBD_FREE_PTR(oqctl);
                 return error ?: rc;
index c17c997..90407ad 100644 (file)
@@ -23,8 +23,9 @@
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_lite.h>
+#include <lustre_mdc.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
@@ -47,25 +48,14 @@ static void ll_file_data_put(struct ll_file_data *fd)
                 OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd);
 }
 
-int ll_mdc_close(struct obd_export *md_exp, struct inode *inode,
-                 struct file *file)
+static int ll_close_inode_openhandle(struct obd_export *md_exp,
+                                     struct inode *inode,
+                                     struct obd_client_handle *och)
 {
-        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+        struct md_op_data op_data = { { 0 } };
         struct ptlrpc_request *req = NULL;
-        struct obd_client_handle *och = &fd->fd_mds_och;
-        struct md_op_data op_data;
         int rc;
-        ENTRY;
-
-        /* clear group lock, if present */
-        if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
-                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-                rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
-                                      &fd->fd_cwlockh);
-        }
 
-        memset(&op_data, 0, sizeof(op_data));
         op_data.fid1 = ll_i2info(inode)->lli_fid;
         op_data.valid = OBD_MD_FLTYPE | OBD_MD_FLMODE |
                         OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
@@ -94,15 +84,37 @@ int ll_mdc_close(struct obd_export *md_exp, struct inode *inode,
                 CERROR("inode %lu mdc close failed: rc = %d\n",
                        inode->i_ino, rc);
         }
+
         if (rc == 0) {
-                rc = ll_objects_destroy(req, file->f_dentry->d_inode);
+                rc = ll_objects_destroy(req, inode);
                 if (rc)
                         CERROR("inode %lu ll_objects destroy: rc = %d\n",
                                inode->i_ino, rc);
         }
 
         md_clear_open_replay_data(md_exp, och);
-        ptlrpc_req_finished(req);
+        ptlrpc_req_finished(req); /* This is close request */
+
+        RETURN(rc);
+}
+
+int ll_md_close(struct obd_export *md_exp, struct inode *inode,
+                struct file *file)
+{
+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+        struct obd_client_handle *och = &fd->fd_mds_och;
+        int rc;
+        ENTRY;
+
+        /* clear group lock, if present */
+        if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
+                struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
+                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
+                rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP,
+                                      &fd->fd_cwlockh);
+        }
+        
+        rc = ll_close_inode_openhandle(md_exp, inode, och);
         och->och_fh.cookie = DEAD_HANDLE_MAGIC;
         LUSTRE_FPRIVATE(file) = NULL;
         ll_file_data_put(fd);
@@ -141,7 +153,7 @@ int ll_file_release(struct inode *inode, struct file *file)
                 lov_test_and_clear_async_rc(lsm);
         lli->lli_async_rc = 0;
 
-        rc = ll_mdc_close(sbi->ll_md_exp, inode, file);
+        rc = ll_md_close(sbi->ll_md_exp, inode, file);
         RETURN(rc);
 }
 
@@ -164,40 +176,54 @@ static int ll_intent_file_open(struct file *file, void *lmm,
 
         rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PW, &op_data,
                         &lockh, lmm, lmmsize, ldlm_completion_ast,
-                        ll_mdc_blocking_ast, NULL, 0);
-        if (rc < 0)
+                        ll_md_blocking_ast, NULL, 0);
+        if (rc < 0) {
                 CERROR("lock enqueue: err: %d\n", rc);
+                GOTO(out, rc);
+        }
+
+        rc = ll_prep_inode(&file->f_dentry->d_inode,
+                           (struct ptlrpc_request *)itp->d.lustre.it_data, 1,
+                            NULL);
+out:
         RETURN(rc);
 }
 
+static void ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli, 
+                        struct lookup_intent *it, struct obd_client_handle *och)
+{
+        struct ptlrpc_request *req = it->d.lustre.it_data;
+        struct mds_body *body;
+
+        LASSERT(och);
+
+        body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body));
+        LASSERT(body != NULL);                  /* reply already checked out */
+        LASSERT_REPSWABBED(req, 1);             /* and swabbed in md_enqueue */
+
+        memcpy(&och->och_fh, &body->handle, sizeof(body->handle));
+        och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
+        lli->lli_io_epoch = body->io_epoch;
+
+        md_set_open_replay_data(md_exp, och, it->d.lustre.it_data);
+}
+
 int ll_local_open(struct file *file, struct lookup_intent *it,
                   struct ll_file_data *fd)
 {
-        struct ptlrpc_request *req = it->d.lustre.it_data;
         struct inode *inode = file->f_dentry->d_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
-        struct ll_sb_info *sbi = ll_i2sbi(inode);
-        struct mdt_body *body;
         ENTRY;
 
-        body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body));
-        LASSERT (body != NULL);                 /* reply already checked out */
-        LASSERT_REPSWABBED (req, 1);            /* and swabbed down */
-
         LASSERT(!LUSTRE_FPRIVATE(file));
 
         LASSERT(fd != NULL);
 
-        memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle));
-        fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC;
+        ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
+                    ll_i2info(inode), it, &fd->fd_mds_och);
+                    
         LUSTRE_FPRIVATE(file) = fd;
         ll_readahead_init(inode, &fd->fd_ras);
 
-        lli->lli_io_epoch = body->io_epoch;
-
-        md_set_open_replay_data(sbi->ll_md_exp, &fd->fd_mds_och,
-                                it->d.lustre.it_data);
-
         RETURN(0);
 }
 
@@ -241,6 +267,21 @@ int ll_file_open(struct inode *inode, struct file *file)
                 RETURN(-ENOMEM);
 
         if (!it || !it->d.lustre.it_disposition) {
+                /* Convert f_flags into access mode. We cannot use file->f_mode,
+                 * because everything but O_ACCMODE mask was stripped from
+                 * there */
+                if ((oit.it_flags + 1) & O_ACCMODE)
+                        oit.it_flags++;
+                if (oit.it_flags & O_TRUNC)
+                        oit.it_flags |= FMODE_WRITE;
+
+                if (oit.it_flags & O_CREAT)
+                        oit.it_flags |= MDS_OPEN_OWNEROVERRIDE;
+
+                /* We do not want O_EXCL here, presumably we opened the file
+                 * already? XXX - NFS implications? */
+                oit.it_flags &= ~O_EXCL;
+
                 it = &oit;
                 rc = ll_intent_file_open(file, NULL, 0, it);
                 if (rc) {
@@ -900,7 +941,7 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
                 /* A glimpse is necessary to determine whether we return a
                  * short read (B) or some zeroes at the end of the buffer (C) */
                 ll_inode_size_unlock(inode, 1);
-                retval = ll_glimpse_size(inode, 0);
+                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
                 if (retval)
                         goto out;
         } else {
@@ -1032,6 +1073,10 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
         lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES,
                             count);
 
+        /* File with no objects, nothing to lock */
+        if (!lsm)
+                RETURN(generic_file_sendfile(in_file, ppos, count, actor, target));
+
         node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR);
         tree.lt_fd = LUSTRE_FPRIVATE(in_file);
         rc = ll_tree_lock(&tree, node, NULL, count,
@@ -1066,7 +1111,7 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count,
                 /* A glimpse is necessary to determine whether we return a
                  * short read (B) or some zeroes at the end of the buffer (C) */
                 ll_inode_size_unlock(inode, 1);
-                retval = ll_glimpse_size(inode, 0);
+                retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
                 if (retval)
                         goto out;
         } else {
@@ -1412,7 +1457,7 @@ static int join_file(struct inode *head_inode, struct file *head_filp,
         
         rc = md_enqueue(ll_i2mdexp(head_inode), LDLM_IBITS, &oit, LCK_PW,
                         op_data, &lockh, &tsize, 0, ldlm_completion_ast,
-                        ll_mdc_blocking_ast, &hsize, 0);
+                        ll_md_blocking_ast, &hsize, 0);
 
         if (rc < 0)
                 GOTO(out, rc);
@@ -1443,7 +1488,7 @@ out:
 static int ll_file_join(struct inode *head, struct file *filp,
                         char *filename_tail)
 {
-        struct inode *tail = NULL, *first, *second;
+        struct inode *tail = NULL, *first = NULL, *second = NULL;
         struct dentry *tail_dentry;
         struct file *tail_filp, *first_filp, *second_filp;
         struct ll_lock_tree first_tree, second_tree;
@@ -1533,6 +1578,40 @@ cleanup:
         RETURN(rc);
 }
 
+int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it)
+{
+        struct inode *inode = dentry->d_inode;
+        struct obd_client_handle *och;
+        int rc;
+        ENTRY;
+
+        LASSERT(inode);
+
+        /* Root ? Do nothing. */
+        if (dentry->d_inode->i_sb->s_root == dentry)
+                RETURN(0);
+
+        /* No open handle to close? Move away */
+        if (!it_disposition(it, DISP_OPEN_OPEN))
+                RETURN(0);
+
+        OBD_ALLOC(och, sizeof(*och));
+        if (!och)
+                GOTO(out, rc = -ENOMEM);
+
+        ll_och_fill(ll_i2sbi(inode)->ll_md_exp,
+                    ll_i2info(inode), it, och);
+
+        rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, 
+                                       inode, och);
+
+        OBD_FREE(och, sizeof(*och));
+ out:
+        /* this one is in place of ll_file_open */
+        ptlrpc_req_finished(it->d.lustre.it_data);
+        RETURN(rc);
+}
+
 int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                   unsigned long arg)
 {
@@ -1835,7 +1914,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
         ll_prepare_md_op_data(&op_data, inode, inode, NULL, 0, 0);
         
         rc = md_intent_lock(sbi->ll_md_exp, &op_data, NULL, 0, &oit, 0,
-                            &req, ll_mdc_blocking_ast, 0);
+                            &req, ll_md_blocking_ast, 0);
 
         if (rc < 0)
                 GOTO(out, rc);
@@ -1860,8 +1939,8 @@ out:
 }
 
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int ll_getattr(struct vfsmount *mnt, struct dentry *de,
-               struct lookup_intent *it, struct kstat *stat)
+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
+                  struct lookup_intent *it, struct kstat *stat)
 {
         struct inode *inode = de->d_inode;
         int res = 0;
@@ -1891,6 +1970,12 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de,
 
         return 0;
 }
+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
+{
+        struct lookup_intent it = { .it_op = IT_GETATTR };
+
+        return ll_getattr_it(mnt, de, &it, stat);
+}
 #endif
 
 static
@@ -2011,7 +2096,7 @@ struct inode_operations ll_file_inode_operations = {
         .setattr        = ll_setattr,
         .truncate       = ll_truncate,
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        .getattr_it     = ll_getattr,
+        .getattr_it     = ll_getattr_it,
 #else
         .revalidate_it  = ll_inode_revalidate_it,
 #endif
index 69c98b2..32da443 100644 (file)
@@ -25,8 +25,8 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
+#include <lustre_mdc.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 
 /* record that a write is in flight */
@@ -196,18 +196,12 @@ static int ll_close_thread(void *arg)
         struct ll_close_queue *lcq = arg;
         ENTRY;
 
-        /* XXX boiler-plate */
         {
-                char name[sizeof(current->comm)];
-                unsigned long flags;
+                char name[CFS_CURPROC_COMM_MAX];
                 snprintf(name, sizeof(name) - 1, "ll_close");
-                libcfs_daemonize(name);
-                SIGNAL_MASK_LOCK(current, flags);
-                sigfillset(&current->blocked);
-                RECALC_SIGPENDING;
-                SIGNAL_MASK_UNLOCK(current, flags);
+                cfs_daemonize(name);
         }
-
+        
         complete(&lcq->lcq_comp);
 
         while (1) {
index 5b28077..e3ffc98 100644 (file)
 #include <linux/random.h>
 #include <linux/version.h>
 
-#include <linux/lustre_lite.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_ver.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_disk.h>
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_ver.h>
+#include <lustre_dlm.h>
+#include <lustre_disk.h>
 #include "llite_internal.h"
 
 static int ll_fid_alloc(struct obd_export *exp, struct lu_fid *fid,
index 0e1e113..3ec657a 100644 (file)
@@ -9,10 +9,11 @@
 # include <linux/fs.h>
 # include <linux/xattr_acl.h>
 #endif
-
-#include <linux/lustre_debug.h>
 #include <linux/lustre_version.h>
-#include <linux/lustre_disk.h>  /* for s2sbi */
+
+#include <lustre_debug.h>
+#include <lustre_ver.h>
+#include <lustre_disk.h>  /* for s2sbi */
  
 /*
 struct lustre_intent_data {
@@ -77,11 +78,6 @@ struct ll_inode_info {
         /* for writepage() only to communicate to fsync */
         int                     lli_async_rc;
 
-        struct file_operations *ll_save_ifop;
-        struct file_operations *ll_save_ffop;
-        struct file_operations *ll_save_wfop;
-        struct file_operations *ll_save_wrfop;
-
         struct posix_acl       *lli_posix_acl;
 
         struct list_head        lli_dead_list;
@@ -120,6 +116,10 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode)
 /* default to about 40meg of readahead on a given system.  That much tied
  * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */
 #define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT))
+
+/* default to read-ahead full files smaller than 2MB on the second read */
+#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_CACHE_SHIFT))
+
 enum ra_stat {
         RA_STAT_HIT = 0,
         RA_STAT_MISS,
@@ -139,6 +139,7 @@ enum ra_stat {
 struct ll_ra_info {
         unsigned long             ra_cur_pages;
         unsigned long             ra_max_pages;
+        unsigned long             ra_max_read_ahead_whole_pages;
         unsigned long             ra_stats[_NR_RA_STAT];
 };
 
@@ -213,7 +214,13 @@ struct ll_readahead_state {
          * case, it probably doesn't make sense to expand window to
          * PTLRPC_MAX_BRW_PAGES on the third access.
          */
-        unsigned long   ras_consecutive;
+        unsigned long   ras_consecutive_pages;
+        /*
+         * number of read requests after the last read-ahead window reset
+         * As window is reset on each seek, this is effectively the number 
+         * on consecutive read request and is used to trigger read-ahead.
+         */
+        unsigned long   ras_consecutive_requests;
         /*
          * Parameters of current read-ahead window. Handled by
          * ras_update(). On the initial access to the file or after a seek,
@@ -231,6 +238,17 @@ struct ll_readahead_state {
          */
         unsigned long   ras_next_readahead;
         /*
+         * Total number of ll_file_read requests issued, reads originating
+         * due to mmap are not counted in this total.  This value is used to
+         * trigger full file read-ahead after multiple reads to a small file.
+         */
+        unsigned long   ras_requests;
+        /*
+         * Page index with respect to the current request, these value 
+         * will not be accurate when dealing with reads issued via mmap.
+         */
+        unsigned long   ras_request_index;
+        /*
          * list of struct ll_ra_read's one per read(2) call current in
          * progress against this file descriptor. Used by read-ahead code,
          * protected by ->ras_lock.
@@ -339,8 +357,8 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir);
 struct inode *ll_iget(struct super_block *sb, ino_t hash,
                       struct lustre_md *lic);
 struct dentry *ll_find_alias(struct inode *, struct dentry *);
-int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
-                        void *data, int flag);
+int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
+                       void *data, int flag);
 void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1,
                            struct inode *i2, const char *name, int namelen,
                            int mode);
@@ -377,11 +395,13 @@ int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
 int ll_glimpse_size(struct inode *inode, int ast_flags);
 int ll_local_open(struct file *file,
                   struct lookup_intent *it, struct ll_file_data *fd);
-int ll_mdc_close(struct obd_export *md_exp, struct inode *inode,
-                 struct file *file);
+int ll_release_openhandle(struct dentry *, struct lookup_intent *);
+int ll_md_close(struct obd_export *md_exp, struct inode *inode,
+                struct file *file);
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-int ll_getattr(struct vfsmount *mnt, struct dentry *de,
+int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                struct lookup_intent *it, struct kstat *stat);
+int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
 #endif
 struct ll_file_data *ll_file_data_get(void);
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
@@ -434,6 +454,7 @@ int ll_obd_statfs(struct inode *inode, void *arg);
 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize);
 
 /* llite/llite_nfs.c */
+extern struct export_operations lustre_export_operations;
 __u32 get_uuid2int(const char *name, int len);
 struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len,
                                int fhtype, int parent);
index 69058a9..b37cc06 100644 (file)
 #include <linux/random.h>
 #include <linux/version.h>
 
-#include <linux/lustre_idl.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_ver.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_disk.h>
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_disk.h>
 #include "llite_internal.h"
 
 kmem_cache_t *ll_file_data_slab;
@@ -70,6 +68,8 @@ struct ll_sb_info *ll_init_sbi(void)
                 sbi->ll_async_page_max = (num_physpages / 4) * 3;
         sbi->ll_ra_info.ra_max_pages = min(num_physpages / 8,
                                            SBI_DEFAULT_READAHEAD_MAX);
+        sbi->ll_ra_info.ra_max_read_ahead_whole_pages = 
+                                           SBI_DEFAULT_READAHEAD_WHOLE_MAX;
 
         INIT_LIST_HEAD(&sbi->ll_conn_chain);
         INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list);
@@ -148,7 +148,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         struct obd_statfs osfs;
         struct ptlrpc_request *request = NULL;
         struct lustre_handle osc_conn = {0, };
-        struct lustre_handle mdc_conn = {0, };
+        struct lustre_handle md_conn = {0, };
         struct obd_connect_data *data = NULL;
         struct lustre_md md;
         int err;
@@ -172,7 +172,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         }
 
         /* indicate that inodebits locking is supported by this client */
-        data->ocd_connect_flags |= OBD_CONNECT_IBITS;
+        data->ocd_connect_flags |= OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH;
         data->ocd_ibits_known = MDS_INODELOCK_FULL;
 
         if (sb->s_flags & MS_RDONLY)
@@ -193,7 +193,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         /* real client */
         data->ocd_connect_flags |= OBD_CONNECT_REAL;
 
-        err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data);
+        err = obd_connect(&md_conn, obd, &sbi->ll_sb_uuid, data);
         if (err == -EBUSY) {
                 CERROR("An MDT (mdc %s) is performing recovery, of which this"
                        " client is not a part.  Please wait for recovery to "
@@ -203,7 +203,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
                 GOTO(out, err);
         }
-        sbi->ll_md_exp = class_conn2export(&mdc_conn);
+        sbi->ll_md_exp = class_conn2export(&md_conn);
 
         err = obd_statfs(obd, &osfs, jiffies - HZ);
         if (err)
@@ -247,11 +247,11 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         obd = class_name2obd(osc);
         if (!obd) {
                 CERROR("OSC %s: not setup or attached\n", osc);
-                GOTO(out_mdc, err);
+                GOTO(out_mdc, err = -ENODEV);
         }
 
         data->ocd_connect_flags =
-                OBD_CONNECT_GRANT|OBD_CONNECT_VERSION|OBD_CONNECT_REQPORTAL;
+                OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL;
 
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
@@ -307,6 +307,9 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         sbi->ll_root_fid = rootfid;
 
         sb->s_op = &lustre_super_operations;
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+        sb->s_export_op = &lustre_export_operations;
+#endif
 
         /* make root inode
          * XXX: move this to after cbd setup? */
@@ -315,7 +318,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                          (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0),
                          0, &request);
         if (err) {
-                CERROR("mdc_getattr failed for root: rc = %d\n", err);
+                CERROR("md_getattr failed for root: rc = %d\n", err);
                 GOTO(out_osc, err);
         }
 
@@ -713,7 +716,7 @@ void ll_put_super(struct super_block *sb)
         struct lustre_sb_info *lsi = s2lsi(sb);
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         char *profilenm = get_profile_name(sb);
-        int next = 0;
+        int next;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
@@ -724,18 +727,19 @@ void ll_put_super(struct super_block *sb)
 
         obd = class_exp2obd(sbi->ll_md_exp);
         if (obd) {
-                int next = 0;
                 int force = obd->obd_no_recov;
                 /* We need to set force before the lov_disconnect in
                 lustre_common_put_super, since l_d cleans up osc's as well. */
-                while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next))
+                next = 0;
+                while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) 
                        != NULL) {
                         obd->obd_force = force;
                 }
         }
 
         client_common_put_super(sb);
-
+                
+        next = 0;
         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) {
                 class_manual_cleanup(obd);
         }
@@ -748,7 +752,7 @@ void ll_put_super(struct super_block *sb)
 
         lustre_common_put_super(sb);
 
-        CDEBUG(D_WARNING, "client umount done\n");
+        LCONSOLE_WARN("client umount complete\n");
         EXIT;
 } /* client_put_super */
 
@@ -923,7 +927,6 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
                        CURRENT_SECONDS);
 
-
         /* NB: ATTR_SIZE will only be set after this point if the size
          * resides on the MDS, ie, this file has no objects. */
         if (lsm)
@@ -941,8 +944,17 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
 
                 if (rc) {
                         ptlrpc_req_finished(request);
-                        if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
+                        if (rc == -ENOENT) {
+                                inode->i_nlink = 0;
+                                /* Unlinked special device node? Or just a race?
+                                 * Pretend we done everything. */
+                                if (!S_ISREG(inode->i_mode) &&
+                                    !S_ISDIR(inode->i_mode) &&
+                                    !S_ISDIR(inode->i_mode))
+                                        rc = inode_setattr(inode, attr);
+                        } else if (rc != -EPERM && rc != -EACCES) {
+                                CERROR("mdcsetattr fails: rc = %d\n", rc);
+                        }
                         RETURN(rc);
                 }
 
@@ -1061,8 +1073,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr)
 
 int ll_setattr(struct dentry *de, struct iattr *attr)
 {
-        LBUG(); /* code is unused, but leave this in case of VFS changes */
-        RETURN(-ENOSYS);
+        return ll_setattr_raw(de->d_inode, attr);
 }
 
 int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
@@ -1075,7 +1086,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
 
         rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age);
         if (rc) {
-                CERROR("mdc_statfs fails: rc = %d\n", rc);
+                CERROR("md_statfs fails: rc = %d\n", rc);
                 RETURN(rc);
         }
 
@@ -1356,16 +1367,6 @@ void ll_read_inode2(struct inode *inode, void *opaque)
 #else
                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
 #endif
-                lli->ll_save_ifop = inode->i_fop;
-
-                if (S_ISCHR(inode->i_mode))
-                        inode->i_fop = &ll_special_chr_inode_fops;
-                else if (S_ISBLK(inode->i_mode))
-                        inode->i_fop = &ll_special_blk_inode_fops;
-                else if (S_ISFIFO(inode->i_mode))
-                        inode->i_fop = &ll_special_fifo_inode_fops;
-                else if (S_ISSOCK(inode->i_mode))
-                        inode->i_fop = &ll_special_sock_inode_fops;
                 EXIT;
         }
 }
@@ -1418,7 +1419,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
         }
         case EXT3_IOC_SETFLAGS: {
                 struct md_op_data op_data = { { 0 } };
-                struct iattr attr;
+                struct ll_iattr_struct attr;
                 struct obdo *oa;
                 struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
 
@@ -1433,10 +1434,10 @@ int ll_iocontrol(struct inode *inode, struct file *file,
 
                 memset(&attr, 0x0, sizeof(attr));
                 attr.ia_attr_flags = flags;
-                attr.ia_valid |= ATTR_ATTR_FLAG;
+                ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG;
 
                 rc = md_setattr(sbi->ll_md_exp, &op_data,
-                                &attr, NULL, 0, NULL, 0, &req);
+                                (struct iattr *)&attr, NULL, 0, NULL, 0, &req);
                 if (rc || lsm == NULL) {
                         ptlrpc_req_finished(req);
                         obdo_free(oa);
@@ -1453,7 +1454,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                 obdo_free(oa);
                 if (rc) {
                         if (rc != -EPERM && rc != -EACCES)
-                                CERROR("mdc_setattr fails: rc = %d\n", rc);
+                                CERROR("md_setattr fails: rc = %d\n", rc);
                         RETURN(rc);
                 }
 
@@ -1512,6 +1513,7 @@ void ll_umount_begin(struct super_block *sb)
                 EXIT;
                 return;
         }
+
         obd->obd_no_recov = 1;
         obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, sizeof ioc_data,
                       &ioc_data, NULL);
@@ -1533,8 +1535,9 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data)
  
         if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
                 read_only = *flags & MS_RDONLY;
-                err = obd_set_info(sbi->ll_md_exp, strlen("read-only"),
-                                   "read-only", sizeof(read_only), &read_only);
+                err = obd_set_info_async(sbi->ll_md_exp, strlen("read-only"),
+                                         "read-only", sizeof(read_only), 
+                                         &read_only, NULL);
                 if (err) {
                         CERROR("Failed to change the read-only flag during "
                                "remount: %d\n", err);
index 8254e98..f0de698 100644 (file)
@@ -44,8 +44,8 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
+#include <lustre_mdc.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
@@ -407,7 +407,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address,
 
         if (pgoff >= size) {
                 lov_stripe_unlock(lsm);
-                ll_glimpse_size(inode, 0);
+                ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED);
         } else {
                 /* XXX change inode size without ll_inode_size_lock() held!
                  *     there is a race condition with truncate path. (see
index d458ebf..fb05f98 100644 (file)
@@ -22,7 +22,7 @@
  */
 
 #define DEBUG_SUBSYSTEM S_LLITE
-#include <linux/lustre_lite.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 
 __u32 get_uuid2int(const char *name, int len)
@@ -103,7 +103,9 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb,
 {
         struct inode *inode;
         struct dentry *result;
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         struct list_head *lp;
+#endif
 
         if (fid_num(fid) == 0)
                 return ERR_PTR(-ESTALE);
@@ -120,6 +122,13 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb,
                 return ERR_PTR(-ESTALE);
         }
 
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        result = d_alloc_anon(inode);
+        if (!result) {
+                iput(inode);
+                return ERR_PTR(-ENOMEM);
+        }
+#else
         /* now to find a dentry.
          * If possible, get a well-connected one
          */
@@ -145,6 +154,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb,
         }
         result->d_flags |= DCACHE_DISCONNECTED;
 
+#endif
         ll_set_dd(result);
         result->d_op = &ll_d_ops;
         return result;
@@ -234,3 +244,59 @@ int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp,
         *lenp = 5;
         return 1;
 }
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+struct dentry *ll_get_dentry(struct super_block *sb, void *data)
+{
+        __u32 *inump = (__u32*)data;
+        struct lu_fid fid;
+        
+        /* FIXME: seems this is not enough */
+        fid.f_seq = inump[0];
+        fid.f_oid = inump[1];
+        
+        return ll_iget_for_nfs(sb, &fid, S_IFREG);
+}
+
+struct dentry *ll_get_parent(struct dentry *dchild)
+{
+        struct ptlrpc_request *req = NULL;
+        struct inode *dir = dchild->d_inode;
+        struct ll_sb_info *sbi;
+        struct dentry *result = NULL;
+        struct mds_body *body;
+        char dotdot[] = "..";
+        int  rc = 0;
+        ENTRY;
+        
+        LASSERT(dir && S_ISDIR(dir->i_mode));
+        
+        sbi = ll_s2sbi(dir->i_sb);
+        rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir),
+                             dotdot, strlen(dotdot) + 1,
+                             0, 0, &req);
+        if (rc) {
+                CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino);
+                return ERR_PTR(rc);
+        }
+        body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); 
+       
+        LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID));
+        
+        result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir), S_IFDIR);
+
+        if (IS_ERR(result))
+                rc = PTR_ERR(result);
+
+        ptlrpc_req_finished(req);
+        if (rc)
+                return ERR_PTR(rc);
+        RETURN(result);
+} 
+
+struct export_operations lustre_export_operations = {
+       .get_parent = ll_get_parent,
+       .get_dentry = ll_get_dentry, 
+};
+#endif
index 4cbf3ed..e08a466 100644 (file)
 #define DEBUG_SUBSYSTEM S_LLITE
 
 #include <linux/version.h>
-#include <linux/lustre_lite.h>
-#include <linux/lprocfs_status.h>
+#include <lustre_lite.h>
+#include <lprocfs_status.h>
 #include <linux/seq_file.h>
-#include <linux/obd_support.h>
+#include <obd_support.h>
 
 #include "llite_internal.h"
 
@@ -201,7 +201,7 @@ static int ll_wr_max_readahead_mb(struct file *file, const char *buffer,
                 return rc;
 
         if (val < 0 || val > (num_physpages >> (20 - PAGE_CACHE_SHIFT - 1))) {
-                CERROR("can't set readahead more than %lu MB\n",
+                CERROR("can't set file readahead more than %lu MB\n",
                         num_physpages >> (20 - PAGE_CACHE_SHIFT - 1));
                 return -ERANGE;
         }
@@ -213,6 +213,50 @@ static int ll_wr_max_readahead_mb(struct file *file, const char *buffer,
         return count;
 }
 
+static int ll_rd_max_read_ahead_whole_mb(char *page, char **start, off_t off,
+                                       int count, int *eof, void *data)
+{
+        struct super_block *sb = data;
+        struct ll_sb_info *sbi = ll_s2sbi(sb);
+        unsigned val;
+
+        spin_lock(&sbi->ll_lock);
+        val = sbi->ll_ra_info.ra_max_read_ahead_whole_pages >>
+              (20 - PAGE_CACHE_SHIFT);
+        spin_unlock(&sbi->ll_lock);
+
+        return snprintf(page, count, "%u\n", val);
+}
+
+static int ll_wr_max_read_ahead_whole_mb(struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+        struct super_block *sb = data;
+        struct ll_sb_info *sbi = ll_s2sbi(sb);
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        /* Cap this at the current max readahead window size, the readahead
+         * algorithm does this anyway so it's pointless to set it larger. */
+        if (val < 0 ||
+            val > (sbi->ll_ra_info.ra_max_pages >> (20 - PAGE_CACHE_SHIFT))) {
+                CERROR("can't set max_read_ahead_whole_mb more than "
+                       "max_read_ahead_mb: %lu\n",
+                       sbi->ll_ra_info.ra_max_pages >> (20 - PAGE_CACHE_SHIFT));
+                return -ERANGE;
+        }
+
+        spin_lock(&sbi->ll_lock);
+        sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
+                val << (20 - PAGE_CACHE_SHIFT);
+        spin_unlock(&sbi->ll_lock);
+
+        return count;
+}
+
 static int ll_rd_max_cached_mb(char *page, char **start, off_t off,
                                int count, int *eof, void *data)
 {
@@ -280,8 +324,8 @@ static int ll_wr_checksum(struct file *file, const char *buffer,
         else
                 sbi->ll_flags &= ~LL_SBI_CHECKSUM;
 
-        rc = obd_set_info(sbi->ll_dt_exp, strlen("checksum"), "checksum",
-                          sizeof(val), &val);
+        rc = obd_set_info_async(sbi->ll_dt_exp, strlen("checksum"), "checksum",
+                                sizeof(val), &val, NULL);
         if (rc)
                 CWARN("Failed to set OSC checksum flags: %d\n", rc);
 
@@ -301,6 +345,8 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         //{ "filegroups",   lprocfs_rd_filegroups,  0, 0 },
         { "max_read_ahead_mb", ll_rd_max_readahead_mb,
                                ll_wr_max_readahead_mb, 0 },
+        { "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb,
+                                     ll_wr_max_read_ahead_whole_mb, 0 },
         { "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 },
         { "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 },
         { 0 }
index 8e64ae8..7a0e982 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_version.h>
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_ver.h>
+#include <lustre_mdc.h>
 #include "llite_internal.h"
 
 /* methods */
@@ -97,8 +98,8 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash,
 }
 #endif
 
-int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                        void *data, int flag)
+int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
+                       void *data, int flag)
 {
         int rc;
         struct lustre_handle lockh;
@@ -402,7 +403,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
         it->it_create_mode &= ~current->fs->umask;
 
         rc = md_intent_lock(ll_i2mdexp(parent), &op_data, NULL, 0, it,
-                            lookup_flags, &req, ll_mdc_blocking_ast, 0);
+                            lookup_flags, &req, ll_md_blocking_ast, 0);
 
         if (rc < 0)
                 GOTO(out, retval = ERR_PTR(rc));
@@ -413,6 +414,11 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
                 GOTO(out, retval = ERR_PTR(rc));
         }
 
+        if ((it->it_op & IT_OPEN) && dentry->d_inode &&
+            !S_ISREG(dentry->d_inode->i_mode) &&
+            !S_ISDIR(dentry->d_inode->i_mode)) {
+                ll_release_openhandle(dentry, it);
+        }
         ll_lookup_finish_locks(it, dentry);
 
         if (dentry == save)
@@ -514,13 +520,6 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
         RETURN(0);
 }
 
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
-{
-        return ll_create_it(dir, dentry, mode, &nd->intent);
-}
-#endif
-
 static void ll_update_times(struct ptlrpc_request *request, int offset,
                             struct inode *inode)
 {
@@ -539,17 +538,18 @@ static void ll_update_times(struct ptlrpc_request *request, int offset,
                 LTIME_S(inode->i_ctime) = body->ctime;
 }
 
-static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
+static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode,
+                            unsigned rdev, struct dentry *dchild)
 {
         struct ptlrpc_request *request = NULL;
-        struct inode *dir = nd->dentry->d_inode;
+        struct inode *inode = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct md_op_data op_data = { { 0 } };
         int err;
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p) mode %o dev %x\n",
-               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir,
+               name->len, name->name, dir->i_ino, dir->i_generation, dir,
                mode, rdev);
 
         mode &= ~current->fs->umask;
@@ -562,15 +562,23 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
         case S_IFBLK:
         case S_IFIFO:
         case S_IFSOCK:
-                ll_prepare_md_op_data(&op_data, dir, NULL,
-                                       nd->last.name, nd->last.len, 0);
-
+                ll_prepare_md_op_data(&op_data, dir, NULL, name->name,
+                                      name->len, 0);
                 err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
                                 current->fsuid, current->fsgid,
                                 current->cap_effective, rdev, &request);
-                if (err == 0)
-                        ll_update_times(request, 0, dir);
-                ptlrpc_req_finished(request);
+                if (err)
+                        break;
+                ll_update_times(request, 0, dir);
+
+                if (dchild) {
+                        err = ll_prep_inode(&inode, request, 0,
+                                            dchild->d_sb);
+                        if (err)
+                                break;
+
+                        d_instantiate(dchild, inode);
+                }
                 break;
         case S_IFDIR:
                 err = -EPERM;
@@ -578,79 +586,29 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
         default:
                 err = -EINVAL;
         }
+        ptlrpc_req_finished(request);
         RETURN(err);
 }
 
-static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode,
-                    ll_dev_t rdev)
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
 {
-        struct placement_hint hint = { .ph_pname = NULL,
-                                       .ph_cname = &dchild->d_name,
-                                       .ph_opc = LUSTRE_OPC_MKNODE };
-                
-        struct ptlrpc_request *request = NULL;
-        struct inode *inode = NULL;
-        struct ll_sb_info *sbi = ll_i2sbi(dir);
-        struct md_op_data op_data = { { 0 } };
-        int err;
-        ENTRY;
-
-        CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
-               dchild->d_name.len, dchild->d_name.name,
-               dir->i_ino, dir->i_generation, dir);
-
-        mode &= ~current->fs->umask;
-
-        switch (mode & S_IFMT) {
-        case 0:
-        case S_IFREG:
-                mode |= S_IFREG; /* for mode = 0 case, fallthrough */
-        case S_IFCHR:
-        case S_IFBLK:
-        case S_IFIFO:
-        case S_IFSOCK:
-                /* allocate new fid */
-                err = ll_fid_md_alloc(ll_i2sbi(dir), &op_data.fid2, &hint);
-                if (err) {
-                        CERROR("can't allocate new fid, rc %d\n", err);
-                        LBUG();
-                }
-
-                ll_prepare_md_op_data(&op_data, dir, NULL, dchild->d_name.name,
-                                       dchild->d_name.len, 0);
-
-                err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
-                                current->fsuid, current->fsgid,
-                                current->cap_effective, rdev, &request);
-                if (err)
-                        GOTO(out_err, err);
-
-                ll_update_times(request, 0, dir);
-
-                err = ll_prep_inode(&inode, request, 0, dchild->d_sb);
-                if (err)
-                        GOTO(out_err, err);
-                break;
-        case S_IFDIR:
-                RETURN(-EPERM);
-                break;
-        default:
-                RETURN(-EINVAL);
+        if (!nd || !nd->intent.d.lustre.it_disposition) {
+                /* No saved request? Just mknod the file */
+                return ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry);
         }
 
-        d_instantiate(dchild, inode);
- out_err:
-        ptlrpc_req_finished(request);
-        RETURN(err);
+        return ll_create_it(dir, dentry, mode, &nd->intent);
 }
+#endif
 
-static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
+static int ll_symlink_generic(struct inode *dir, struct qstr *name,
+                              const char *tgt)
 {
         struct placement_hint hint = { .ph_pname = NULL,
-                                       .ph_cname = &nd->dentry->d_name,
+                                       .ph_cname = name,
                                        .ph_opc = LUSTRE_OPC_SYMLINK };
                 
-        struct inode *dir = nd->dentry->d_inode;
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct md_op_data op_data = { { 0 } };
@@ -658,7 +616,7 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
         ENTRY;
 
         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),target=%s\n",
-               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation,
+               name->len, name->name, dir->i_ino, dir->i_generation,
                dir, tgt);
 
         /* allocate new fid */
@@ -669,7 +627,7 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
         }
 
         ll_prepare_md_op_data(&op_data, dir, NULL,
-                              nd->last.name, nd->last.len, 0);
+                              name->name, name->len, 0);
 
         err = md_create(sbi->ll_md_exp, &op_data,
                         tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
@@ -682,10 +640,9 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
         RETURN(err);
 }
 
-static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
+static int ll_link_generic(struct inode *src,  struct inode *dir,
+                           struct qstr *name)
 {
-        struct inode *src = srcnd->dentry->d_inode;
-        struct inode *dir = tgtnd->dentry->d_inode;
         struct ptlrpc_request *request = NULL;
         struct md_op_data op_data = { { 0 } };
         int err;
@@ -695,11 +652,10 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
         CDEBUG(D_VFSTRACE,
                "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
                src->i_ino, src->i_generation, src, dir->i_ino,
-               dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name);
-
-        ll_prepare_md_op_data(&op_data, src, dir, tgtnd->last.name,
-                               tgtnd->last.len, 0);
+               dir->i_generation, dir, name->len, name->name);
 
+        ll_prepare_md_op_data(&op_data, src, dir, name->name,
+                              name->len, 0);
         err = md_link(sbi->ll_md_exp, &op_data, &request);
         if (err == 0)
                 ll_update_times(request, 0, dir);
@@ -709,20 +665,21 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
         RETURN(err);
 }
 
+static int ll_mkdir_generic(struct inode *dir, struct qstr *name, int mode,
+                            struct dentry *dchild)
 
-static int ll_mkdir_raw(struct nameidata *nd, int mode)
 {
         struct placement_hint hint = { .ph_pname = NULL,
-                                       .ph_cname = &nd->dentry->d_name,
+                                       .ph_cname = name,
                                        .ph_opc = LUSTRE_OPC_MKDIR };
-        struct inode *dir = nd->dentry->d_inode;
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct md_op_data op_data = { { 0 } };
+        struct inode *inode = NULL;
         int err;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
-               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
+               name->len, name->name, dir->i_ino, dir->i_generation, dir);
 
         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
 
@@ -734,7 +691,7 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode)
         }
 
         ll_prepare_md_op_data(&op_data, dir, NULL,
-                               nd->last.name, nd->last.len, 0);
+                              name->name, name->len, 0);
 
         err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
                         current->fsuid, current->fsgid, current->cap_effective,
@@ -742,34 +699,45 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode)
         if (err == 0)
                 ll_update_times(request, 0, dir);
 
+        ll_update_times(request, 0, dir);
+        if (dchild) {
+                err = ll_prep_inode(&inode, request, 0,
+                                    dchild->d_sb);
+                if (err)
+                        GOTO(out, err);
+                d_instantiate(dchild, inode);
+        }
+        EXIT;
+out:
         ptlrpc_req_finished(request);
-        RETURN(err);
+        return err;
 }
 
-static int ll_rmdir_raw(struct nameidata *nd)
+static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
+                            struct qstr *name)
 {
-        struct inode *dir = nd->dentry->d_inode;
         struct ptlrpc_request *request = NULL;
         struct md_op_data op_data = { { 0 } };
         struct dentry *dentry;
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
-               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
+               name->len, name->name, dir->i_ino, dir->i_generation, dir);
 
         /* Check if we have something mounted at the dir we are going to delete
          * In such a case there would always be dentry present. */
-        dentry = d_lookup(nd->dentry, &nd->last);
-        if (dentry) {
-                int mounted = d_mountpoint(dentry);
-                dput(dentry);
-                if (mounted)
-                        RETURN(-EBUSY);
+        if (dparent) {
+                dentry = d_lookup(dparent, name);
+                if (dentry) {
+                        int mounted = d_mountpoint(dentry);
+                        dput(dentry);
+                        if (mounted)
+                                RETURN(-EBUSY);
+                }
         }
-
-        ll_prepare_md_op_data(&op_data, dir, NULL, nd->last.name,
-                               nd->last.len, S_IFDIR);
-
+                
+        ll_prepare_md_op_data(&op_data, dir, NULL, name->name,
+                              name->len, S_IFDIR);
         rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, &op_data, &request);
         if (rc == 0)
                 ll_update_times(request, 0, dir);
@@ -851,19 +819,17 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
         return rc;
 }
 
-static int ll_unlink_raw(struct nameidata *nd)
+static int ll_unlink_generic(struct inode * dir, struct qstr *name)
 {
-        struct inode *dir = nd->dentry->d_inode;
         struct ptlrpc_request *request = NULL;
         struct md_op_data op_data = { { 0 } };
         int rc;
         ENTRY;
         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
-               nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir);
-
-        ll_prepare_md_op_data(&op_data, dir, NULL,
-                               nd->last.name, nd->last.len, 0);
+               name->len, name->name, dir->i_ino, dir->i_generation, dir);
 
+        ll_prepare_md_op_data(&op_data, dir, NULL, name->name,
+                              name->len, 0);
         rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, &op_data, &request);
         if (rc)
                 GOTO(out, rc);
@@ -876,25 +842,23 @@ static int ll_unlink_raw(struct nameidata *nd)
         RETURN(rc);
 }
 
-static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
+static int ll_rename_generic(struct inode *src, struct qstr *src_name,
+                             struct inode *tgt, struct qstr *tgt_name)
 {
-        struct inode *src = srcnd->dentry->d_inode;
-        struct inode *tgt = tgtnd->dentry->d_inode;
         struct ptlrpc_request *request = NULL;
         struct ll_sb_info *sbi = ll_i2sbi(src);
         struct md_op_data op_data = { { 0 } };
         int err;
         ENTRY;
         CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
-               "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name,
-               src->i_ino, src->i_generation, src, tgtnd->last.len,
-               tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt);
+               "tgt_dir=%lu/%u(%p)\n", src_name->len, src_name->name,
+               src->i_ino, src->i_generation, src, tgt_name->len,
+               tgt_name->name, tgt->i_ino, tgt->i_generation, tgt);
 
         ll_prepare_md_op_data(&op_data, src, tgt, NULL, 0, 0);
-
         err = md_rename(sbi->ll_md_exp, &op_data,
-                        srcnd->last.name, srcnd->last.len,
-                        tgtnd->last.name, tgtnd->last.len, &request);
+                        src_name->name, src_name->len,
+                        tgt_name->name, tgt_name->len, &request);
         if (!err) {
                 ll_update_times(request, 0, src);
                 ll_update_times(request, 0, tgt);
@@ -906,6 +870,75 @@ static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
         RETURN(err);
 }
 
+static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
+{
+        return ll_mknod_generic(nd->dentry->d_inode, &nd->last, mode,rdev,NULL);
+}
+static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
+{
+        return ll_rename_generic(srcnd->dentry->d_inode, &srcnd->last,
+                                 tgtnd->dentry->d_inode, &tgtnd->last);
+}
+static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
+{
+        return ll_link_generic(srcnd->dentry->d_inode, tgtnd->dentry->d_inode,
+                               &tgtnd->last);
+}
+static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
+{
+        return ll_symlink_generic(nd->dentry->d_inode, &nd->last, tgt);
+}
+static int ll_rmdir_raw(struct nameidata *nd)
+{
+        return ll_rmdir_generic(nd->dentry->d_inode, nd->dentry, &nd->last);
+}
+static int ll_mkdir_raw(struct nameidata *nd, int mode)
+{
+        return ll_mkdir_generic(nd->dentry->d_inode, &nd->last, mode, NULL);
+}
+static int ll_unlink_raw(struct nameidata *nd)
+{
+        return ll_unlink_generic(nd->dentry->d_inode, &nd->last);
+}
+
+static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode,
+                    ll_dev_t rdev)
+{
+        return ll_mknod_generic(dir, &dchild->d_name, mode,
+                                old_encode_dev(rdev), dchild);
+}
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+static int ll_unlink(struct inode * dir, struct dentry *dentry)
+{
+        return ll_unlink_generic(dir, &dentry->d_name);
+}
+static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+        return ll_mkdir_generic(dir, &dentry->d_name, mode, dentry);
+}
+static int ll_rmdir(struct inode *dir, struct dentry *dentry)
+{
+        return ll_rmdir_generic(dir, NULL, &dentry->d_name);
+}
+static int ll_symlink(struct inode *dir, struct dentry *dentry,
+                      const char *oldname)
+{
+        return ll_symlink_generic(dir, &dentry->d_name, oldname);
+}
+static int ll_link(struct dentry *old_dentry, struct inode *dir, 
+                   struct dentry *new_dentry)
+{
+        return ll_link_generic(old_dentry->d_inode, dir, &new_dentry->d_name);
+}
+static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
+                     struct inode *new_dir, struct dentry *new_dentry)
+{
+        return ll_rename_generic(old_dir, &old_dentry->d_name, new_dir, 
+                               &new_dentry->d_name);
+}
+#endif
+
 struct inode_operations ll_dir_inode_operations = {
         .link_raw           = ll_link_raw,
         .unlink_raw         = ll_unlink_raw,
@@ -924,7 +957,16 @@ struct inode_operations ll_dir_inode_operations = {
 #else
         .lookup             = ll_lookup_nd,
         .create             = ll_create_nd,
-        .getattr_it         = ll_getattr,
+        .getattr_it         = ll_getattr_it,
+        /* We need all these non-raw things for NFSD, to not patch it. */
+        .unlink             = ll_unlink,
+        .mkdir              = ll_mkdir,
+        .rmdir              = ll_rmdir,
+        .symlink            = ll_symlink,
+        .link               = ll_link,
+        .rename             = ll_rename,
+        .setattr            = ll_setattr,
+        .getattr            = ll_getattr,
 #endif
         .permission         = ll_inode_permission,
         .setxattr           = ll_setxattr,
@@ -932,3 +974,18 @@ struct inode_operations ll_dir_inode_operations = {
         .listxattr          = ll_listxattr,
         .removexattr        = ll_removexattr,
 };
+
+struct inode_operations ll_special_inode_operations = {
+        .setattr_raw    = ll_setattr_raw,
+        .setattr        = ll_setattr,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+        .getattr_it     = ll_getattr_it,
+#else   
+        .revalidate_it  = ll_inode_revalidate_it,
+#endif
+        .permission     = ll_inode_permission,
+        .setxattr       = ll_setxattr,
+        .getxattr       = ll_getxattr,
+        .listxattr      = ll_listxattr,
+        .removexattr    = ll_removexattr,
+};
index 8c55247..0bab99e 100644 (file)
@@ -43,8 +43,8 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
+#include <lustre_mdc.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
@@ -971,10 +971,12 @@ void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping)
 }
 
 #define RAS_CDEBUG(ras) \
-        CDEBUG(D_READA, "lrp %lu c %lu ws %lu wl %lu nra %lu\n",        \
-               ras->ras_last_readpage, ras->ras_consecutive,            \
-               ras->ras_window_start, ras->ras_window_len,              \
-               ras->ras_next_readahead);
+        CDEBUG(D_READA,                                                      \
+               "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu\n", \
+               ras->ras_last_readpage, ras->ras_consecutive_requests,        \
+               ras->ras_consecutive_pages, ras->ras_window_start,            \
+               ras->ras_window_len, ras->ras_next_readahead,                 \
+               ras->ras_requests, ras->ras_request_index);
 
 static int index_in_window(unsigned long index, unsigned long point,
                            unsigned long before, unsigned long after)
@@ -1002,9 +1004,13 @@ void ll_ra_read_in(struct file *f, struct ll_ra_read *rar)
         struct ll_readahead_state *ras;
 
         ras = ll_ras_get(f);
-        rar->lrr_reader = current;
 
         spin_lock(&ras->ras_lock);
+        ras->ras_requests++;
+        ras->ras_request_index = 0;
+        ras->ras_consecutive_requests++;
+        rar->lrr_reader = current;
+
         list_add(&rar->lrr_linkage, &ras->ras_read_beads);
         spin_unlock(&ras->ras_lock);
 }
@@ -1075,34 +1081,19 @@ static int ll_readahead(struct ll_readahead_state *ras,
 
         spin_lock(&ras->ras_lock);
         bead = ll_ra_read_get_locked(ras);
-        /* reserve a part of the read-ahead window that we'll be issuing */
+        /* Enlarge the RA window to encompass the full read */
+        if (bead != NULL && ras->ras_window_start + ras->ras_window_len <
+            bead->lrr_start + bead->lrr_count) {
+                ras->ras_window_len = bead->lrr_start + bead->lrr_count -
+                                      ras->ras_window_start;
+        }
+        /* Reserve a part of the read-ahead window that we'll be issuing */
         if (ras->ras_window_len) {
                 start = ras->ras_next_readahead;
                 end = ras->ras_window_start + ras->ras_window_len - 1;
         }
-        if (bead != NULL) {
-                pgoff_t read_end;
-
-                start = max(start, bead->lrr_start);
-                read_end = bead->lrr_start + bead->lrr_count - 1;
-                if (ras->ras_consecutive > start - bead->lrr_start + 1)
-                        /*
-                         * if current read(2) is a part of larger sequential
-                         * read, make sure read-ahead is at least to the end
-                         * of the read region.
-                         *
-                         * XXX nikita: This doesn't work when some pages in
-                         * [lrr_start, start] were cached (and, as a result,
-                         * weren't counted in ->ras_consecutive).
-                         */
-                        end = max(end, read_end);
-                else
-                        /*
-                         * otherwise, clip read-ahead at the read boundary.
-                         */
-                        end = read_end;
-        }
         if (end != 0) {
+                /* Truncate RA window to end of file */
                 end = min(end, (unsigned long)((kms - 1) >> PAGE_CACHE_SHIFT));
                 ras->ras_next_readahead = max(end, end + 1);
                 RAS_CDEBUG(ras);
@@ -1209,10 +1200,11 @@ static void ras_set_start(struct ll_readahead_state *ras, unsigned long index)
 static void ras_reset(struct ll_readahead_state *ras, unsigned long index)
 {
         ras->ras_last_readpage = index;
-        ras->ras_consecutive = 1;
+        ras->ras_consecutive_requests = 0;
+        ras->ras_consecutive_pages = 0;
         ras->ras_window_len = 0;
         ras_set_start(ras, index);
-        ras->ras_next_readahead = ras->ras_window_start;
+        ras->ras_next_readahead = max(ras->ras_window_start, index);
 
         RAS_CDEBUG(ras);
 }
@@ -1221,11 +1213,13 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras)
 {
         spin_lock_init(&ras->ras_lock);
         ras_reset(ras, 0);
+        ras->ras_requests = 0;
         INIT_LIST_HEAD(&ras->ras_read_beads);
 }
 
-static void ras_update(struct ll_sb_info *sbi, struct ll_readahead_state *ras,
-                       unsigned long index, unsigned hit)
+static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
+                       struct ll_readahead_state *ras, unsigned long index,
+                       unsigned hit)
 {
         struct ll_ra_info *ra = &sbi->ll_ra_info;
         int zero = 0;
@@ -1252,36 +1246,62 @@ static void ras_update(struct ll_sb_info *sbi, struct ll_readahead_state *ras,
                 ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW);
         }
 
+        /* On the second access to a file smaller than the tunable
+         * ra_max_read_ahead_whole_pages trigger RA on all pages in the
+         * file up to ra_max_pages.  This is simply a best effort and
+         * only occurs once per open file.  Normal RA behavior is reverted
+         * to for subsequent IO.  The mmap case does not increment
+         * ras_requests and thus can never trigger this behavior. */
+        if (ras->ras_requests == 2 && !ras->ras_request_index) {
+                __u64 kms_pages;
+
+                kms_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+                CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
+                       ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
+
+                if (kms_pages &&
+                    kms_pages <= ra->ra_max_read_ahead_whole_pages) {
+                        ras->ras_window_start = 0;
+                        ras->ras_last_readpage = 0;
+                        ras->ras_next_readahead = 0;
+                        ras->ras_window_len = min(ra->ra_max_pages,
+                                ra->ra_max_read_ahead_whole_pages);
+                        GOTO(out_unlock, 0);
+                }
+        }
+
         if (zero) {
                 ras_reset(ras, index);
                 GOTO(out_unlock, 0);
         }
 
         ras->ras_last_readpage = index;
-        ras->ras_consecutive++;
+        ras->ras_consecutive_pages++;
         ras_set_start(ras, index);
         ras->ras_next_readahead = max(ras->ras_window_start,
                                       ras->ras_next_readahead);
 
-        /* wait for a few pages to arrive before issuing readahead to avoid
-         * the worst overutilization */
-        if (ras->ras_consecutive == 3) {
+        /* Trigger RA in the mmap case where ras_consecutive_requests
+         * is not incremented and thus can't be used to trigger RA */
+        if (!ras->ras_window_len && ras->ras_consecutive_pages == 3) {
                 ras->ras_window_len = PTLRPC_MAX_BRW_PAGES;
                 GOTO(out_unlock, 0);
         }
 
-        /* we need to increase the window sometimes.  we'll arbitrarily
-         * do it half-way through the pages in an rpc */
-        if ((index & (PTLRPC_MAX_BRW_PAGES - 1)) ==
-            (PTLRPC_MAX_BRW_PAGES >> 1)) {
-                ras->ras_window_len += PTLRPC_MAX_BRW_PAGES;
-                ras->ras_window_len = min(ras->ras_window_len,
+        /* The initial ras_window_len is set to the request size.  To avoid
+         * uselessly reading and discarding pages for random IO the window is
+         * only increased once per consecutive request received. */
+        if (ras->ras_consecutive_requests > 1 && !ras->ras_request_index) {
+                ras->ras_window_len = min(ras->ras_window_len +
+                                          PTLRPC_MAX_BRW_PAGES,
                                           ra->ra_max_pages);
         }
 
         EXIT;
 out_unlock:
         RAS_CDEBUG(ras);
+        ras->ras_request_index++;
         spin_unlock(&ras->ras_lock);
         spin_unlock(&sbi->ll_lock);
         return;
@@ -1357,6 +1377,17 @@ int ll_readpage(struct file *filp, struct page *page)
                (((loff_t)page->index) << PAGE_SHIFT));
         LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0);
 
+        if (!ll_i2info(inode)->lli_smd) {
+                /* File with no objects - one big hole */
+                /* We use this just for remove_from_page_cache that is not
+                 * exported, we'd make page back up to date. */
+                ll_truncate_complete_page(page);
+                clear_page(page);
+                SetPageUptodate(page);
+                unlock_page(page);
+                RETURN(0);
+        }
+
         rc = oig_init(&oig);
         if (rc < 0)
                 GOTO(out, rc);
@@ -1370,7 +1401,7 @@ int ll_readpage(struct file *filp, struct page *page)
                 GOTO(out, rc = PTR_ERR(llap));
 
         if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages)
-                ras_update(ll_i2sbi(inode), &fd->fd_ras, page->index,
+                ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index,
                            llap->llap_defer_uptodate);
 
         if (llap->llap_defer_uptodate) {
index 1f39574..98c4a4c 100644 (file)
@@ -44,8 +44,8 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
+#include <lustre_mdc.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
index 72250af..598c130 100644 (file)
@@ -46,8 +46,8 @@
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_lite.h>
+#include <lustre_mdc.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 #include <linux/lustre_compat25.h>
 
diff --git a/lustre/llite/special.c b/lustre/llite/special.c
deleted file mode 100644 (file)
index bf1d707..0000000
+++ /dev/null
@@ -1,419 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Special file handling for Lustre.
- *
- *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
- *   Author: Wang Di <wangdi@clusterfs.com>
- *   Author: Andreas Dilger <adilger@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_LLITE
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_lite.h>
-#include <linux/pagemap.h>
-#include <linux/file.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/lustre_compat25.h>
-#endif
-#include <asm/poll.h>
-#include "llite_internal.h"
-
-#define INODE_OPS 1
-#define FILE_OPS 2
-
-static struct file_operations **get_save_fops(struct file* filp, int mode)
-{
-        struct inode *inode = filp->f_dentry->d_inode;
-        struct ll_inode_info *lli = ll_i2info(inode);
-
-        if (mode == INODE_OPS) {
-                return &(lli->ll_save_ifop);
-        } else if (mode == FILE_OPS) {
-                if (S_ISFIFO(inode->i_mode)) {
-                        switch (filp->f_mode) {
-                        case 1: /*O_RDONLY*/
-                                return &(lli->ll_save_ffop);
-                        case 2: /*O_WRONLY*/
-                                return &(lli->ll_save_wfop);
-                        case 3: /* O_RDWR */
-                                return &(lli->ll_save_wrfop);
-                        default:
-                                return NULL;
-                        }
-                }
-                return &(lli->ll_save_ffop);
-        } else {
-                CERROR("invalid special file ops %d\n", mode);
-                LBUG();
-                return NULL;
-        }
-}
-
-static void save_fops(struct file *filp, struct inode *inode,
-                      struct file_operations *sfops)
-{
-        if (sfops != filp->f_op) {
-                struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-
-                *pfop = filp->f_op;
-                if (S_ISCHR(inode->i_mode))
-                        filp->f_op = &ll_special_chr_file_fops;
-                else if (S_ISFIFO(inode->i_mode))
-                        filp->f_op = &ll_special_fifo_file_fops;
-        }
-}
-
-static ssize_t ll_special_file_read(struct file *filp, char *buf,
-                                    size_t count, loff_t *ppos)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->read)
-                rc = (*pfop)->read(filp, buf, count, ppos);
-
-        RETURN(rc);
-}
-
-static ssize_t ll_special_file_write(struct file *filp, const char *buf,
-                                     size_t count, loff_t *ppos)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->write)
-                rc = (*pfop)->write(filp, buf, count, ppos);
-
-        RETURN(rc);
-}
-
-static int ll_special_file_ioctl(struct inode *inode, struct file *filp,
-                                 unsigned int cmd, unsigned long arg)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = -ENOTTY;
-
-        if (pfop && *pfop && (*pfop)->ioctl) {
-                struct file_operations *sfops = filp->f_op;
-
-                rc = (*pfop)->ioctl(inode, filp, cmd, arg);
-                save_fops(filp, inode, sfops);
-        }
-        RETURN(rc);
-}
-
-static loff_t ll_special_file_seek(struct file *filp, loff_t offset, int origin)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = 0;
-
-        if (pfop && *pfop && (*pfop)->llseek)
-                rc = (*pfop)->llseek(filp, offset, origin);
-        else
-                rc = default_llseek(filp, offset, origin);
-
-        RETURN(rc);
-}
-
-
-#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
-
-static unsigned int ll_special_file_poll(struct file *filp,
-                                         struct poll_table_struct *poll_table)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = DEFAULT_POLLMASK;
-
-        if (pfop && *pfop && (*pfop)->poll)
-                rc = (*pfop)->poll(filp, poll_table);
-
-        RETURN(rc);
-}
-
-static int ll_special_file_open(struct inode *inode, struct file *filp)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->open)
-                rc = (*pfop)->open(inode, filp);
-
-        RETURN(rc);
-}
-
-static ssize_t ll_special_read(struct file *filp, char *buf, size_t count,
-                               loff_t *ppos)
-{
-        struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->read)
-                rc = (*pfop)->read(filp, buf, count, ppos);
-
-        RETURN(rc);
-}
-
-static ssize_t ll_special_write(struct file *filp, const char *buf,
-                                size_t count, loff_t *ppos)
-{
-        struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->write)
-                rc = (*pfop)->write(filp, buf, count, ppos);
-
-        RETURN(rc);
-}
-
-#ifdef HAVE_UNLOCKED_IOCTL
-static long ll_special_unlocked_ioctl(struct file *filp, unsigned int cmd,
-                                     unsigned long arg)
-{
-        struct file_operations **pfop;
-        int rc = -ENOTTY;
-
-        lock_kernel();
-        pfop = get_save_fops(filp, INODE_OPS);
-        unlock_kernel();
-        if (pfop && *pfop && (*pfop)->unlocked_ioctl) {
-                struct file_operations *sfops = filp->f_op;
-
-                rc = (*pfop)->unlocked_ioctl(filp, cmd, arg);
-
-                /* sometimes, file_operations will be changed in ioctl */
-                lock_kernel();
-                save_fops(filp, filp->f_dentry->d_inode, sfops);
-                unlock_kernel();
-        }
-
-        RETURN(rc);
-}
-#endif
-
-static int ll_special_ioctl(struct inode *inode, struct file *filp,
-                            unsigned int cmd, unsigned long arg)
-{
-        struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
-        int rc = -ENOTTY;
-
-        if (pfop && *pfop && (*pfop)->ioctl) {
-                struct file_operations *sfops = filp->f_op;
-
-                rc = (*pfop)->ioctl(inode, filp, cmd, arg);
-
-                /* sometimes, file_operations will be changed in ioctl */
-                save_fops(filp, inode, sfops);
-        }
-
-        RETURN(rc);
-}
-
-static int ll_special_mmap(struct file * filp, struct vm_area_struct * vma)
-{
-        struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
-        int rc = -ENODEV;
-
-        if (pfop && *pfop && (*pfop)->mmap)
-                rc = (*pfop)->mmap(filp, vma);
-
-        RETURN(rc);
-}
-
-static loff_t ll_special_seek(struct file *filp, loff_t offset, int origin)
-{
-        struct file_operations** pfop = get_save_fops (filp, INODE_OPS);
-        int    rc = 0;
-
-        if (pfop && *pfop && (*pfop)->llseek)
-                rc = (*pfop)->llseek(filp, offset, origin);
-        else
-                rc = default_llseek(filp, offset, origin);
-
-        RETURN(rc);
-}
-
-static int ll_special_fsync(struct file *filp, struct dentry *dentry, int data)
-{
-        struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->fsync)
-                rc = (*pfop)->fsync(filp, dentry, data);
-
-        RETURN(rc);
-}
-
-static int ll_special_file_fasync(int fd, struct file *filp, int on)
-{
-        struct file_operations **pfop = get_save_fops(filp, FILE_OPS);
-        int rc = -EINVAL;
-
-        if (pfop && *pfop && (*pfop)->fasync)
-                rc = (*pfop)->fasync(fd, filp, on);
-
-        RETURN(rc);
-}
-
-static int ll_special_release_internal(struct inode *inode, struct file *filp,
-                                       int mode)
-{
-       struct file_operations **pfop = get_save_fops(filp, mode);
-       struct ll_sb_info *sbi = ll_i2sbi(inode);
-       int rc = 0, err;
-       ENTRY;
-
-        if (pfop && *pfop) {
-                if ((*pfop)->release)
-                        rc = (*pfop)->release(inode, filp);
-                /* FIXME fops_put */
-        }
-
-        lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_RELEASE);
-
-        err = ll_mdc_close(sbi->ll_md_exp, inode, filp);
-        if (err && rc == 0)
-                rc = err;
-
-        RETURN(rc);
-}
-
-static int ll_special_open(struct inode *inode, struct file *filp)
-{
-        struct file_operations **pfop = get_save_fops(filp, INODE_OPS);
-        struct file_operations *sfops = filp->f_op;
-        struct ptlrpc_request *req;
-        struct lookup_intent *it;
-        struct ll_file_data *fd;
-        int rc = -EINVAL, err;
-        ENTRY;
-
-        fd = ll_file_data_get();
-        if (fd == NULL)
-                RETURN(-ENOMEM);
-
-        if (pfop && *pfop) {
-                /* FIXME fops_get */
-                if ((*pfop)->open) {
-                        rc = (*pfop)->open(inode, filp);
-
-                        /* sometimes file_operations will be changed in open */
-                        save_fops(filp, inode, sfops);
-                }
-        }
-
-        lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN);
-
-        it = filp->f_it;
-
-        err = ll_local_open(filp, it, fd);
-        if (rc != 0) {
-                CERROR("error opening special file: rc %d\n", rc);
-                ll_mdc_close(ll_i2sbi(inode)->ll_md_exp, inode, filp);
-        } else if (err) {
-                if (pfop && *pfop && (*pfop)->release)
-                        (*pfop)->release(inode, filp);
-                /* FIXME fops_put */
-                rc = err;
-        }
-
-        req = it->d.lustre.it_data;
-        if (req)
-                ptlrpc_req_finished(req);
-
-        RETURN(rc);
-}
-
-static int ll_special_release(struct inode *inode, struct file *filp)
-{
-        return ll_special_release_internal(inode, filp, INODE_OPS);
-}
-
-static int ll_special_file_release(struct inode *inode, struct file *filp)
-{
-        return ll_special_release_internal(inode, filp, FILE_OPS);
-}
-
-struct inode_operations ll_special_inode_operations = {
-        .setattr_raw    = ll_setattr_raw,
-        .setattr        = ll_setattr,
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-        .getattr_it     = ll_getattr,
-#else
-        .revalidate_it  = ll_inode_revalidate_it,
-#endif
-        .permission     = ll_inode_permission,
-        .setxattr       = ll_setxattr,
-        .getxattr       = ll_getxattr,
-        .listxattr      = ll_listxattr,
-        .removexattr    = ll_removexattr,
-};
-
-struct file_operations ll_special_chr_inode_fops = {
-        .owner          = THIS_MODULE,
-        .open           = ll_special_open,
-};
-
-struct file_operations ll_special_blk_inode_fops = {
-        .owner          = THIS_MODULE,
-        .read           = ll_special_read,
-        .write          = ll_special_write,
-        .ioctl          = ll_special_ioctl,
-#ifdef HAVE_UNLOCKED_IOCTL
-        .unlocked_ioctl = ll_special_unlocked_ioctl,
-#endif
-        .open           = ll_special_open,
-        .release        = ll_special_release,
-        .mmap           = ll_special_mmap,
-        .llseek         = ll_special_seek,
-        .fsync          = ll_special_fsync,
-};
-
-struct file_operations ll_special_fifo_inode_fops = {
-        .owner          = THIS_MODULE,
-        .open           = ll_special_open,
-};
-
-struct file_operations ll_special_sock_inode_fops = {
-        .owner          = THIS_MODULE,
-        .open           = ll_special_open
-};
-
-struct file_operations ll_special_chr_file_fops = {
-        .owner          = THIS_MODULE,
-        .llseek         = ll_special_file_seek,
-        .read           = ll_special_file_read,
-        .write          = ll_special_file_write,
-        .poll           = ll_special_file_poll,
-        .ioctl          = ll_special_file_ioctl,
-        .open           = ll_special_file_open,
-        .release        = ll_special_file_release,
-        .fasync         = ll_special_file_fasync,
-};
-
-struct file_operations ll_special_fifo_file_fops = {
-        .owner          = THIS_MODULE,
-        .llseek         = ll_special_file_seek,
-        .read           = ll_special_file_read,
-        .write          = ll_special_file_write,
-        .poll           = ll_special_file_poll,
-        .ioctl          = ll_special_file_ioctl,
-        .open           = ll_special_file_open,
-        .release        = ll_special_file_release,
-};
-
index cd1b232..eb16296 100644 (file)
 #include <linux/types.h>
 #include <linux/random.h>
 #include <linux/version.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_dlm.h>
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/random.h>
 #include <linux/cache_def.h>
-#include <linux/lprocfs_status.h>
+#include <lprocfs_status.h>
 #include "llite_internal.h"
 #include <lustre/lustre_user.h>
 
@@ -60,7 +60,7 @@ static int __init init_lustre_lite(void)
 {
         int rc, seed[2];
 
-        printk(KERN_INFO "Lustre: Lustre Lite Client File System; "
+        printk(KERN_INFO "Lustre: Lustre Client File System; "
                "info@clusterfs.com\n");
         ll_file_data_slab = kmem_cache_create("ll_file_data",
                                               sizeof(struct ll_file_data), 0,
@@ -83,15 +83,18 @@ static int __init init_lustre_lite(void)
 
 static void __exit exit_lustre_lite(void)
 {
+        int rc;
+
         lustre_register_client_fill_super(NULL);
         
         ll_unregister_cache(&ll_cache_definition);
 
-        LASSERTF(kmem_cache_destroy(ll_file_data_slab) == 0,
-                 "couldn't destroy ll_file_data slab\n");
-        if (ll_async_page_slab)
-                LASSERTF(kmem_cache_destroy(ll_async_page_slab) == 0,
-                         "couldn't destroy ll_async_page slab\n");
+        rc = kmem_cache_destroy(ll_file_data_slab);
+        LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n");
+        if (ll_async_page_slab) {
+                rc = kmem_cache_destroy(ll_async_page_slab);
+                LASSERTF(rc == 0, "couldn't destroy ll_async_page slab\n");
+        }
 
         if (proc_lustre_fs_root) {
                 lprocfs_remove(proc_lustre_fs_root);
index f927774..976fcca 100644 (file)
 #include <linux/types.h>
 #include <linux/random.h>
 #include <linux/version.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_dlm.h>
+#include <lustre_lite.h>
+#include <lustre_ha.h>
+#include <lustre_dlm.h>
 #include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/lprocfs_status.h>
+#include <lprocfs_status.h>
 #include "llite_internal.h"
 
 static kmem_cache_t *ll_inode_cachep;
@@ -79,8 +79,10 @@ int ll_init_inodecache(void)
 
 void ll_destroy_inodecache(void)
 {
-        LASSERTF(kmem_cache_destroy(ll_inode_cachep) == 0,
-                 "ll_inode_cache: not all structures were freed\n");
+        int rc;
+
+        rc = kmem_cache_destroy(ll_inode_cachep);
+        LASSERTF(rc == 0, "ll_inode_cache: not all structures were freed\n");
 }
 
 /* exported operations */
@@ -100,7 +102,7 @@ struct super_operations lustre_super_operations =
 static int __init init_lustre_lite(void)
 {
         int rc, seed[2];
-        printk(KERN_INFO "Lustre: Lustre Lite Client File System; "
+        printk(KERN_INFO "Lustre: Lustre Client File System; "
                "info@clusterfs.com\n");
         rc = ll_init_inodecache();
         if (rc)
@@ -128,16 +130,19 @@ static int __init init_lustre_lite(void)
 
 static void __exit exit_lustre_lite(void)
 {
+        int rc;
+
         lustre_register_client_fill_super(NULL);
 
         ll_unregister_cache(&ll_cache_definition);
 
         ll_destroy_inodecache();
-        LASSERTF(kmem_cache_destroy(ll_file_data_slab) == 0,
-                 "couldn't destroy ll_file_data slab\n");
-        if (ll_async_page_slab)
-                LASSERTF(kmem_cache_destroy(ll_async_page_slab) == 0,
-                         "couldn't destroy ll_async_page slab\n");
+        rc = kmem_cache_destroy(ll_file_data_slab);
+        LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n");
+        if (ll_async_page_slab) {
+                rc = kmem_cache_destroy(ll_async_page_slab);
+                LASSERTF(rc == 0, "couldn't destroy ll_async_page slab\n");
+        }
 
         if (proc_lustre_fs_root) {
                 lprocfs_remove(proc_lustre_fs_root);
index 2d713bc..990a9c1 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/version.h>
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/lustre_lite.h>
+#include <lustre_lite.h>
 #include "llite_internal.h"
 
 static int ll_readlink_internal(struct inode *inode,
@@ -157,7 +157,7 @@ struct inode_operations ll_fast_symlink_inode_operations = {
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
         .revalidate_it  = ll_inode_revalidate_it,
 #else 
-        .getattr_it     = ll_getattr,
+        .getattr_it     = ll_getattr_it,
 #endif
         .permission     = ll_inode_permission,
         .setxattr       = ll_setxattr,
index 2e0e1f3..cbfc0da 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_LLITE
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lite.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_version.h>
-#include <linux/lustre_mdc.h>
+#include <obd_support.h>
+#include <lustre_lite.h>
+#include <lustre_dlm.h>
+#include <lustre_ver.h>
+#include <lustre_mdc.h>
 
 #include "llite_internal.h"
 
 #define XATTR_USER_T            (1)
 #define XATTR_TRUSTED_T         (2)
 #define XATTR_SECURITY_T        (3)
-#define XATTR_ACL_T             (4)
-#define XATTR_OTHER_T           (5)
+#define XATTR_ACL_ACCESS_T      (4)
+#define XATTR_ACL_DEFAULT_T     (5)
+#define XATTR_OTHER_T           (6)
 
 static
 int get_xattr_type(const char *name)
 {
-        if (!strcmp(name, XATTR_NAME_ACL_ACCESS) ||
-            !strcmp(name, XATTR_NAME_ACL_DEFAULT))
-                return XATTR_ACL_T;
+        if (!strcmp(name, XATTR_NAME_ACL_ACCESS))
+                return XATTR_ACL_ACCESS_T;
+
+        if (!strcmp(name, XATTR_NAME_ACL_DEFAULT))
+                return XATTR_ACL_DEFAULT_T;
 
         if (!strncmp(name, XATTR_USER_PREFIX,
                      sizeof(XATTR_USER_PREFIX) - 1))
@@ -75,8 +78,11 @@ int get_xattr_type(const char *name)
 static
 int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type)
 {
-        if (xattr_type == XATTR_ACL_T && !(sbi->ll_flags & LL_SBI_ACL))
+        if ((xattr_type == XATTR_ACL_ACCESS_T ||
+             xattr_type == XATTR_ACL_DEFAULT_T) &&
+            !(sbi->ll_flags & LL_SBI_ACL))
                 return -EOPNOTSUPP;
+
         if (xattr_type == XATTR_USER_T && !(sbi->ll_flags & LL_SBI_USER_XATTR))
                 return -EOPNOTSUPP;
         if (xattr_type == XATTR_TRUSTED_T && !capable(CAP_SYS_ADMIN))
@@ -178,6 +184,26 @@ int ll_getxattr_common(struct inode *inode, const char *name,
         if (rc)
                 RETURN(rc);
 
+        /* posix acl is under protection of LOOKUP lock. when calling to this,
+         * we just have path resolution to the target inode, so we have great
+         * chance that cached ACL is uptodate.
+         */
+        if (xattr_type == XATTR_ACL_ACCESS_T) {
+                struct ll_inode_info *lli = ll_i2info(inode);
+                struct posix_acl *acl;
+
+                spin_lock(&lli->lli_lock);
+                acl = posix_acl_dup(lli->lli_posix_acl);
+                spin_unlock(&lli->lli_lock);
+
+                if (!acl)
+                        RETURN(-ENODATA);
+
+                rc = posix_acl_to_xattr(acl, buffer, size);
+                posix_acl_release(acl);
+                RETURN(rc);
+        }
+
 do_getxattr:
         rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid,
                          name, NULL, 0, size, 0, &req);
index 79956a5..106dd44 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include "lmv_internal.h"
 
 /* dummy function for a while */
index 2667e68..f91bd52 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_mds.h>
-#include <linux/obd_class.h>
-//#include <linux/obd_ost.h>
-#include <linux/lprocfs_status.h>
-//#include <linux/lustre_fsfilt.h>
-//#include <linux/lustre_lite.h>
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include "lmv_internal.h"
 
 static inline void lmv_drop_intent_lock(struct lookup_intent *it)
index 228c3e4..094ae72 100644 (file)
@@ -22,7 +22,7 @@
 #ifndef _LMV_INTERNAL_H_
 #define _LMV_INTERNAL_H_
 
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 
 #define LMV_MAX_TGT_COUNT 128
 
index 419320e..6b1f71f 100644 (file)
 #include <linux/namei.h>
 #else
 #include <liblustre.h>
-#include <linux/lustre_log.h>
+#include <lustre_log.h>
 #endif
 #include <linux/ext2_fs.h>
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_lite.h>
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre_lite.h>
 #include "lmv_internal.h"
 
 /* not defined for liblustre building */
@@ -240,8 +240,8 @@ static void lmv_set_timeouts(struct obd_device *obd)
                 if (tgts->ltd_exp == NULL)
                         continue;
                 
-                obd_set_info(tgts->ltd_exp, strlen("inter_mds"),
-                             "inter_mds", 0, NULL);
+                obd_set_info_async(tgts->ltd_exp, strlen("inter_mds"),
+                                   "inter_mds", 0, NULL, NULL);
         }
 }
 
@@ -1903,8 +1903,9 @@ static int lmv_get_info(struct obd_export *exp, __u32 keylen,
         RETURN(-EINVAL);
 }
 
-int lmv_set_info(struct obd_export *exp, obd_count keylen,
-                 void *key, obd_count vallen, void *val)
+int lmv_set_info_async(struct obd_export *exp, obd_count keylen,
+                       void *key, obd_count vallen, void *val,
+                       struct ptlrpc_request_set *set)
 {
         struct lmv_tgt_desc    *tgt;
         struct obd_device      *obd;
@@ -1956,7 +1957,7 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen,
                                 exp = tgt_obd->obd_self_export;
                         }
 
-                        err = obd_set_info(exp, keylen, key, vallen, val);
+                        err = obd_set_info_async(exp, keylen, key, vallen, val, set);
                         if (!rc)
                                 rc = err;
                 }
@@ -1974,8 +1975,9 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen,
                      i++, tgt++) {
                         if (!tgt->ltd_exp)
                                 continue;
-                        rc = obd_set_info(tgt->ltd_exp,
-                                          keylen, key, vallen, val);
+                        rc = obd_set_info_async(tgt->ltd_exp,
+                                                keylen, key, vallen, 
+                                                val, set);
                         if (rc)
                                 RETURN(rc);
                 }
@@ -1992,8 +1994,9 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen,
                         RETURN(rc);
 
                 i = lmv_fld_lookup(obd, fid);
-                rc = obd_set_info(lmv->tgts[i].ltd_exp, 
-                                  keylen, key, vallen, val); 
+                rc = obd_set_info_async(lmv->tgts[i].ltd_exp, 
+                                        keylen, key, vallen, val,
+                                        set); 
                 RETURN(rc);
         }
 
@@ -2378,7 +2381,7 @@ struct obd_ops lmv_obd_ops = {
         .o_llog_init            = lmv_llog_init,
         .o_llog_finish          = lmv_llog_finish,
         .o_get_info             = lmv_get_info,
-        .o_set_info             = lmv_set_info,
+        .o_set_info_async       = lmv_set_info_async,
         .o_packmd               = lmv_packmd,
         .o_unpackmd             = lmv_unpackmd,
         .o_notify               = lmv_notify,
index 8663d57..7740271 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre_dlm.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include "lmv_internal.h"
 
 /* objects cache. */
-extern kmem_cache_t *obj_cache;
+extern cfs_mem_cache_t *obj_cache;
 extern atomic_t obj_cache_count;
 
 /* object list and its guard. */
@@ -66,7 +66,7 @@ lmv_obj_alloc(struct obd_device *obd,
         LASSERT(mea->mea_magic == MEA_MAGIC_LAST_CHAR
                 || mea->mea_magic == MEA_MAGIC_ALL_CHARS);
 
-        OBD_SLAB_ALLOC(obj, obj_cache, GFP_NOFS,
+        OBD_SLAB_ALLOC(obj, obj_cache, CFS_ALLOC_STD,
                        sizeof(*obj));
         if (!obj)
                 return NULL;
index 3c3bd76..39c62fc 100644 (file)
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
+#include <linux/seq_file.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <asm/statfs.h>
 #endif
-#include <linux/lprocfs_status.h>
-#include <linux/obd_class.h>
-#include <linux/seq_file.h>
+#include <lprocfs_status.h>
+#include <obd_class.h>
 
 #ifndef LPROCFS
 static struct lprocfs_vars lprocfs_module_vars[] = { {0} };
diff --git a/lustre/lov/Info.plist b/lustre/lov/Info.plist
new file mode 100644 (file)
index 0000000..006f794
--- /dev/null
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>lov</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.lov</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string>
+        <key>OSBundleCompatibleVersion</key>
+        <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict>
+               <key>com.apple.kpi.bsd</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.mach</key>
+               <string>8.0.0b1</string>
+               <key>com.clusterfs.lustre.libcfs</key>
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.lvfs</key>
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.obdclass</key>
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.ptlrpc</key>
+               <string>1.0.0</string>
+       </dict>
+</dict>
+</plist>
index f925c2a..583a425 100644 (file)
@@ -11,8 +11,36 @@ liblov_a_CFLAGS = $(LLCFLAGS)
 endif
 
 if MODULES
+if LINUX
 modulefs_DATA = lov$(KMODEXT)
+endif
+
+if DARWIN
+macos_PROGRAMS := lov
+
+lov_SOURCES :=          \
+        lov_log.c       \
+        lov_obd.c       \
+        lov_pack.c      \
+        lov_request.c   \
+        lov_merge.c     \
+        lov_qos.c       \
+        lov_offset.c    \
+        lov_internal.h
+
+lov_CFLAGS := $(EXTRA_KCFLAGS)
+lov_LDFLAGS := $(EXTRA_KLDFLAGS)
+lov_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif # DARWIN
+
 endif # MODULES
 
+install-data-hook: $(install_data_hook)
+
 DIST_SOURCES = $(lov-objs:.o=.c) lov_internal.h
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ 
index c08020d..de3dd8d 100755 (executable)
 
 #ifdef __KERNEL__
 #include <asm/div64.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_log.h>
 
 #include "lov_internal.h"
 
index 68c762f..5829fa9 100644 (file)
@@ -84,7 +84,7 @@ static inline struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm)
                 return NULL;
         atomic_set(&llh->llh_refcount, 2);
         llh->llh_stripe_count = lsm->lsm_stripe_count;
-        INIT_LIST_HEAD(&llh->llh_handle.h_link);
+        CFS_INIT_LIST_HEAD(&llh->llh_handle.h_link);
         class_handle_hash(&llh->llh_handle, lov_llh_addref);
         return llh;
 }
@@ -132,8 +132,8 @@ int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off);
 
 /* lov_qos.c */
 void qos_shrink_lsm(struct lov_request_set *set);
-int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set,
-                    int newea);
+int qos_prep_create(struct obd_export *exp, struct lov_request_set *set);
+void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs);
 int qos_remedy_create(struct lov_request_set *set, struct lov_request *req);
 
 /* lov_request.c */
index 683b744..454b5a6 100644 (file)
 #endif
 #define DEBUG_SUBSYSTEM S_LOV
 #ifdef __KERNEL__
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <asm/div64.h>
-#include <linux/seq_file.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_mds.h>
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/obd_ost.h>
-#include <linux/lprocfs_status.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <obd_ost.h>
+#include <lprocfs_status.h>
 
 #include "lov_internal.h"
 
index 01c07e3..ff20962 100644 (file)
 #define DEBUG_SUBSYSTEM S_LOV
 
 #ifdef __KERNEL__
-#include <asm/div64.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
+#include <obd_class.h>
+#include <obd_lov.h>
 
 #include "lov_internal.h"
 
@@ -60,7 +60,7 @@ int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         LASSERT_SPIN_LOCKED(&lsm->lsm_lock);
 #ifdef __KERNEL__
-        LASSERT(lsm->lsm_lock_owner == current);
+        LASSERT(lsm->lsm_lock_owner == cfs_current());
 #endif
 
         for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count;
@@ -103,7 +103,7 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         LASSERT_SPIN_LOCKED(&lsm->lsm_lock);
 #ifdef __KERNEL__
-        LASSERT(lsm->lsm_lock_owner == current);
+        LASSERT(lsm->lsm_lock_owner == cfs_current());
 #endif
 
         if (shrink) {
index b78c15f..7688122 100644 (file)
 #endif
 #define DEBUG_SUBSYSTEM S_LOV
 #ifdef __KERNEL__
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <asm/div64.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_debug.h>
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/obd_ost.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_param.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_net.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h>
+#include <lustre_debug.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <obd_ost.h>
+#include <lprocfs_status.h>
+#include <lustre_param.h>
 
 #include "lov_internal.h"
 
@@ -65,9 +59,9 @@ static void lov_getref(struct obd_device *obd)
         struct lov_obd *lov = &obd->u.lov;
 
         /* nobody gets through here until lov_putref is done */
-        down(&lov->lov_lock);
+        mutex_down(&lov->lov_lock);
         atomic_inc(&lov->refcount);
-        up(&lov->lov_lock);
+        mutex_up(&lov->lov_lock);
         return;
 }
 
@@ -76,7 +70,7 @@ static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt);
 static void lov_putref(struct obd_device *obd)
 {
         struct lov_obd *lov = &obd->u.lov;
-        down(&lov->lov_lock);
+        mutex_down(&lov->lov_lock);
         /* ok to dec to 0 more than once -- ltd_exp's will be null */
         if (atomic_dec_and_test(&lov->refcount) && lov->death_row) {
                 struct lov_tgt_desc *tgt;
@@ -91,7 +85,7 @@ static void lov_putref(struct obd_device *obd)
                         lov->death_row--;
                 }
         }
-        up(&lov->lov_lock);
+        mutex_up(&lov->lov_lock);
 }
 
 #define MAX_STRING_SIZE 128
@@ -105,12 +99,11 @@ static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt,
         struct lustre_handle conn = {0, };
         struct obd_import *imp;
 #ifdef __KERNEL__
-        struct proc_dir_entry *lov_proc_dir;
+        cfs_proc_dir_entry_t *lov_proc_dir;
 #endif
         int rc;
         ENTRY;
 
-
         tgt_obd = class_find_client_obd(tgt_uuid, LUSTRE_OSC_NAME,
                                         &obd->obd_uuid);
 
@@ -176,7 +169,7 @@ static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt,
         lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
         if (lov_proc_dir) {
                 struct obd_device *osc_obd = class_conn2obd(&conn);
-                struct proc_dir_entry *osc_symlink;
+                cfs_proc_dir_entry_t *osc_symlink;
                 char name[MAX_STRING_SIZE];
 
                 LASSERT(osc_obd != NULL);
@@ -224,7 +217,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd,
 
 static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
 {
-        struct proc_dir_entry *lov_proc_dir;
+        cfs_proc_dir_entry_t *lov_proc_dir;
         struct obd_device *osc_obd = class_exp2obd(tgt->ltd_exp);
         struct lov_obd *lov = &obd->u.lov;
         int rc;
@@ -235,7 +228,7 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
 
         lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds");
         if (lov_proc_dir) {
-                struct proc_dir_entry *osc_symlink;
+                cfs_proc_dir_entry_t *osc_symlink;
 
                 osc_symlink = lprocfs_srch(lov_proc_dir, osc_obd->obd_name);
                 if (osc_symlink) {
@@ -459,9 +452,15 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
                         RETURN(-ENOMEM);
                 }
 
-                memset(tgt, 0, bufsize);
                 if (lov->tgts) {
+                        int i;
                         memcpy(tgt, lov->tgts, lov->bufsize);
+                        LASSERT(index == lov->desc.ld_tgt_count);
+                        for (i = 0; i < index; i++) {
+                                INIT_LIST_HEAD(&tgt[i].qos_bavail_list);
+                                list_splice(&lov->tgts[i].qos_bavail_list,
+                                            &tgt[i].qos_bavail_list);
+                        }
                         OBD_FREE(lov->tgts, lov->bufsize);
                 }
 
@@ -481,6 +480,8 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp,
         tgt->uuid = *uuidp;
         /* XXX - add a sanity check on the generation number. */
         tgt->ltd_gen = gen;
+        tgt->index = index;
+        INIT_LIST_HEAD(&tgt->qos_bavail_list);
 
         if (index >= lov->desc.ld_tgt_count)
                 lov->desc.ld_tgt_count = index + 1;
@@ -625,7 +626,8 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         struct lprocfs_static_vars lvars;
         struct lov_desc *desc;
         struct lov_obd *lov = &obd->u.lov;
-        int count;
+        struct lov_tgt_desc *tgts;
+        int count, i;
         ENTRY;
 
         if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
@@ -675,18 +677,22 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
                 CERROR("Out of memory\n");
                 RETURN(-EINVAL);
         }
-        memset(lov->tgts, 0, lov->bufsize);
+        for (i = 0, tgts = lov->tgts; i < max(count, 1); i++, tgts++) {
+                tgts->index = i;
+                INIT_LIST_HEAD(&tgts->qos_bavail_list);
+        }
 
         desc->ld_active_tgt_count = 0;
         lov->desc = *desc;
         sema_init(&lov->lov_lock, 1);
         atomic_set(&lov->refcount, 0);
+        INIT_LIST_HEAD(&lov->qos_bavail_list);
 
         lprocfs_init_vars(lov, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 #ifdef LPROCFS
         {
-                struct proc_dir_entry *entry;
+                cfs_proc_dir_entry_t *entry;
 
                 entry = create_proc_entry("target_obd", 0444,
                                           obd->obd_proc_entry);
@@ -947,7 +953,9 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
 {
         struct lov_obd *lov;
         struct lov_request_set *set = NULL;
-        struct list_head *pos;
+        struct obd_statfs osfs;
+        unsigned long maxage;
+        struct lov_request *req;
         int rc = 0;
         ENTRY;
 
@@ -972,14 +980,14 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa,
                  RETURN(rc);
         }
 
+        maxage = cfs_time_shift(-lov->desc.ld_qos_maxage);
+        obd_statfs(exp->exp_obd, &osfs, maxage);
+
         rc = lov_prep_create_set(exp, ea, src_oa, oti, &set);
         if (rc)
                 RETURN(rc);
 
-        list_for_each (pos, &set->set_list) {
-                struct lov_request *req =
-                        list_entry(pos, struct lov_request, rq_link);
-
+        list_for_each_entry(req, &set->set_list, rq_link) {
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
                 rc = obd_create(lov->tgts[req->rq_idx].ltd_exp,
                                 req->rq_oa, &req->rq_md, oti);
@@ -1521,7 +1529,7 @@ static struct obd_async_page_ops lov_async_page_ops = {
 };
 
 int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
-                           struct lov_oinfo *loi, struct page *page,
+                           struct lov_oinfo *loi, cfs_page_t *page,
                            obd_off offset, struct obd_async_page_ops *ops,
                            void *data, void **res)
 {
@@ -1958,7 +1966,7 @@ static int lov_join_lru(struct obd_export *exp,
         } while(0)
 
 static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                      unsigned long max_age)
+                      cfs_time_t max_age)
 {
         struct lov_obd *lov = &obd->u.lov;
         struct obd_statfs lov_sfs;
@@ -1983,6 +1991,7 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                                 rc = err;
                         continue;
                 }
+                qos_update(lov, i, &lov_sfs);
 
                 if (!set) {
                         memcpy(osfs, &lov_sfs, sizeof(lov_sfs));
@@ -2211,14 +2220,22 @@ out:
         RETURN(rc);
 }
 
-static int lov_set_info(struct obd_export *exp, obd_count keylen,
-                        void *key, obd_count vallen, void *val)
+static int lov_set_info_async(struct obd_export *exp, obd_count keylen,
+                              void *key, obd_count vallen, void *val,
+                              struct ptlrpc_request_set *set)
 {
         struct obd_device *obddev = class_exp2obd(exp);
         struct lov_obd *lov = &obddev->u.lov;
         int i, rc = 0, err;
+        int no_set = !set;
         ENTRY;
 
+        if (no_set) {
+                set = ptlrpc_prep_set();
+                if (!set)
+                        RETURN(-ENOMEM);
+        }
+
         if (KEY_IS(KEY_NEXT_ID)) {
                 if (vallen > lov->desc.ld_tgt_count)
                         RETURN(-EINVAL);
@@ -2234,8 +2251,9 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                                 continue;
 
                         /* hit all OSCs, even inactive ones */
-                        err = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
-                                           vallen, ((obd_id*)val) + i);
+                        err = obd_set_info_async(lov->tgts[i].ltd_exp, keylen,
+                                                 key, vallen,
+                                                 ((obd_id*)val) + i, set);
                         if (!rc)
                                 rc = err;
                 }
@@ -2248,8 +2266,8 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                         if (!lov->tgts[i].ltd_exp || !lov->tgts[i].active)
                                 continue;
 
-                        err = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
-                                           vallen, val);
+                        err = obd_set_info_async(lov->tgts[i].ltd_exp, keylen,
+                                                 key, vallen, val, set);
                         if (!rc)
                                 rc = err;
                 }
@@ -2274,13 +2292,19 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                 if (!val && !lov->tgts[i].active)
                         continue;
 
-                err = obd_set_info(lov->tgts[i].ltd_exp,
-                                  keylen, key, vallen, val);
+                err = obd_set_info_async(lov->tgts[i].ltd_exp,
+                                         keylen, key, vallen, val, set);
                 if (!rc)
                         rc = err;
         }
 out:
         lov_putref(obddev);
+        if (no_set) {
+                err = ptlrpc_set_wait(set);
+                if (!rc)
+                        rc = err;
+                ptlrpc_set_destroy(set);
+        }
         RETURN(rc);
 }
 
@@ -2406,16 +2430,16 @@ int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm,
 
 void lov_stripe_lock(struct lov_stripe_md *md)
 {
-        LASSERT(md->lsm_lock_owner != current);
+        LASSERT(md->lsm_lock_owner != cfs_current());
         spin_lock(&md->lsm_lock);
         LASSERT(md->lsm_lock_owner == NULL);
-        md->lsm_lock_owner = current;
+        md->lsm_lock_owner = cfs_current();
 }
 EXPORT_SYMBOL(lov_stripe_lock);
 
 void lov_stripe_unlock(struct lov_stripe_md *md)
 {
-        LASSERT(md->lsm_lock_owner == current);
+        LASSERT(md->lsm_lock_owner == cfs_current());
         md->lsm_lock_owner = NULL;
         spin_unlock(&md->lsm_lock);
 }
@@ -2459,7 +2483,7 @@ struct obd_ops lov_obd_ops = {
         .o_join_lru            = lov_join_lru,
         .o_iocontrol           = lov_iocontrol,
         .o_get_info            = lov_get_info,
-        .o_set_info            = lov_set_info,
+        .o_set_info_async      = lov_set_info_async,
         .o_llog_init           = lov_llog_init,
         .o_llog_finish         = lov_llog_finish,
         .o_notify              = lov_notify,
@@ -2480,7 +2504,7 @@ int __init lov_init(void)
         init_obd_quota_ops(quota_interface, &lov_obd_ops);
 
         rc = class_register_type(&lov_obd_ops, NULL, lvars.module_vars,
-                                 OBD_LOV_DEVICENAME, NULL);
+                                 LUSTRE_LOV_NAME, NULL);
         if (rc && quota_interface)
                 PORTAL_SYMBOL_PUT(osc_quota_interface);
 
@@ -2493,13 +2517,12 @@ static void /*__exit*/ lov_exit(void)
         if (quota_interface)
                 PORTAL_SYMBOL_PUT(lov_quota_interface);
 
-        class_unregister_type(OBD_LOV_DEVICENAME);
+        class_unregister_type(LUSTRE_LOV_NAME);
 }
 
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver");
 MODULE_LICENSE("GPL");
 
-module_init(lov_init);
-module_exit(lov_exit);
+cfs_module(lov, "1.0.0", lov_init, lov_exit);
 #endif
index d6e83c3..22af87e 100644 (file)
 #define DEBUG_SUBSYSTEM S_LOV
 
 #ifdef __KERNEL__
-#include <asm/div64.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
+#include <obd_class.h>
+#include <obd_lov.h>
 
 #include "lov_internal.h"
 
index f33e24b..7ad2745 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/lustre_net.h>
-#include <linux/obd.h>
-#include <linux/obd_lov.h>
-#include <linux/obd_class.h>
-#include <linux/obd_support.h>
+#include <lustre_net.h>
+#include <obd.h>
+#include <obd_lov.h>
+#include <obd_class.h>
+#include <obd_support.h>
 #include <lustre/lustre_user.h>
 
 #include "lov_internal.h"
@@ -148,6 +148,7 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
         RETURN(lmm_size);
 }
 
+/* Find the max stripecount we should use */
 int lov_get_stripecnt(struct lov_obd *lov, int stripe_count)
 {
         if (!stripe_count)
@@ -363,6 +364,7 @@ int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp,
         struct lov_obd *lov = &exp->exp_obd->u.lov;
         obd_id last_id = 0;
 
+        ENTRY;
         for (i = 0; i < lump->lmm_stripe_count; i++) {
                 __u32 len = sizeof(last_id);
                 oexp = lov->tgts[lump->lmm_objects[i].l_ost_idx].ltd_exp;
index bde768b..84be134 100644 (file)
 #define DEBUG_SUBSYSTEM S_LOV
 
 #ifdef __KERNEL__
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
+#include <obd_class.h>
+#include <obd_lov.h>
 
 #include "lov_internal.h"
 
@@ -99,7 +100,7 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req)
 
                 if (stripe >= lsm->lsm_stripe_count) {
                         req->rq_idx = ost_idx;
-                        rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa,
+                        rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa, 
                                         &req->rq_md, set->set_oti);
                         if (!rc)
                                 break;
@@ -110,73 +111,365 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req)
 
 #define LOV_CREATE_RESEED_MULT 4
 #define LOV_CREATE_RESEED_MIN  1000
-/* FIXME use real qos data to prepare the lov create request */
-int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
+/* alloc objects on osts with round-robin algorithm */
+static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt)
 {
-        static int ost_start_idx, ost_start_count;
+        static int ost_start_count, ost_start_idx;
         unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
         unsigned ost_active_count = lov->desc.ld_active_tgt_count;
-        struct lov_stripe_md *lsm = set->set_md;
-        struct obdo *src_oa = set->set_oa;
-        int i, rc = 0;
+        int i, *idx_pos = idx_arr;
         ENTRY;
-
-        LASSERT(src_oa->o_valid & OBD_MD_FLID);
-
-        lsm->lsm_object_id = src_oa->o_id;
-        if (!lsm->lsm_stripe_size)
-                lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
-        if (!lsm->lsm_pattern) {
-                lsm->lsm_pattern = lov->desc.ld_pattern ?
-                        lov->desc.ld_pattern : LOV_PATTERN_RAID0;
+        
+        if (--ost_start_count <= 0) {
+                ost_start_idx = ll_rand();
+                ost_start_count = 
+                        (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
+                         LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
+        } else if (*stripe_cnt >= lov->desc.ld_active_tgt_count) {
+                /* If we allocate from all of the stripes, make the
+                 * next file start on the next OST. */
+                ++ost_start_idx;
         }
+        ost_idx = ost_start_idx % ost_count;
 
-        if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) {
-                if (--ost_start_count <= 0) {
-                        ost_start_idx = ll_rand();
-                        ost_start_count =
-                          (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
-                           LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
-                } else if (lsm->lsm_stripe_count >= ost_active_count) {
-                        /* If we allocate from all of the stripes, make the
-                         * next file start on the next OST. */
-                        ++ost_start_idx;
+        for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
+                ++ost_start_idx;
+                
+                if (lov->tgts[ost_idx].active == 0) {
+                        CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
+                        continue;
                 }
-                ost_idx = ost_start_idx % ost_count;
-        } else {
-                ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
+                
+                *idx_pos = ost_idx;
+                idx_pos++;
+                /* got enough ost */
+                if (idx_pos - idx_arr == *stripe_cnt)
+                        RETURN(0);
         }
+        *stripe_cnt = idx_pos - idx_arr;
+        RETURN(0);
+}
 
-        CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n",
-               lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx);
+/* alloc objects on osts with specific stripe offset */
+static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm,
+                          int *idx_arr)
+{
+        unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
+        int i, *idx_pos = idx_arr;
+        ENTRY;
 
+        ost_idx = lsm->lsm_oinfo[0].loi_ost_idx;
         for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) {
-                struct lov_request *req;
-
-                ++ost_start_idx;
                 if (lov->tgts[ost_idx].active == 0) {
                         CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx);
                         continue;
                 }
+                *idx_pos = ost_idx;
+                idx_pos++;
+                /* got enough ost */
+                if (idx_pos - idx_arr == lsm->lsm_stripe_count)
+                        RETURN(0);
+        }
+        /* If we were passed specific striping params, then a failure to
+         * meet those requirements is an error, since we can't reallocate
+         * that memory (it might be part of a larger array or something).
+         *
+         * We can only get here if lsm_stripe_count was originally > 1.
+         */
+        CERROR("can't lstripe objid "LPX64": have %u want %u\n",
+               lsm->lsm_object_id, idx_pos - idx_arr, lsm->lsm_stripe_count);
+        RETURN(-EFBIG);
+}
+
+/* free space OST must have to be used for object allocation. */
+#define QOS_MIN                 (lov->desc.ld_qos_threshold << 20)
+
+#define TGT_BAVAIL(tgt)         (tgt->ltd_exp->exp_obd->obd_osfs.os_bavail * \
+                                 tgt->ltd_exp->exp_obd->obd_osfs.os_bsize) 
+#define TGT_FFREE(tgt)          (tgt->ltd_exp->exp_obd->obd_osfs.os_ffree)
+
+/* alloc objects on osts with free space weighted algorithm */
+static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        unsigned ost_count = lov->desc.ld_tgt_count;
+        __u64 cur_bavail, rand, *availspace, total_bavail = 0;
+        int *indexes, nfound, good_osts, i, warn = 0, rc = 0;
+        struct lov_tgt_desc *tgt;
+        int shift, require_stripes = *stripe_cnt;
+        static time_t last_warn = 0;
+        time_t now = cfs_time_current_sec();
+        ENTRY;
+        
+        availspace = NULL;
+        indexes = NULL;
+        OBD_ALLOC(availspace, sizeof(__u64) * ost_count);
+        OBD_ALLOC(indexes, sizeof(int) * require_stripes);
+        if (!availspace || !indexes)
+                GOTO(out_free, rc = -EAGAIN);
+        
+        mutex_down(&lov->lov_lock);
+        /* if free space is below some threshold, just go
+         * to do round-robin allocation */
+        total_bavail = (exp->exp_obd->obd_osfs.os_bavail * \
+                        exp->exp_obd->obd_osfs.os_bsize);
+        if (ost_count < 2 || total_bavail <= QOS_MIN) {
+                mutex_up(&lov->lov_lock);
+                GOTO(out_free, rc = -EAGAIN);
+        }
+
+        /* if each ost has almost same free space, go to 
+         * do rr allocation for better creation performance */
+        if (!list_empty(&lov->qos_bavail_list)) {
+                __u64 max, min, val;
+                tgt = list_entry(lov->qos_bavail_list.next, 
+                                 struct lov_tgt_desc, qos_bavail_list);
+                max = TGT_BAVAIL(tgt);
+                tgt = list_entry(lov->qos_bavail_list.prev,
+                                 struct lov_tgt_desc, qos_bavail_list);
+                min = TGT_BAVAIL(tgt);
+
+                val = (max >= min) ? (max - min) : (min - max);
+                min = (min * 13) >> 8;          /* less than 5% of gap */ 
+
+                if (val < min) {
+                        mutex_up(&lov->lov_lock);
+                        GOTO(out_free, rc = -EAGAIN);
+                }
+        } else {
+                mutex_up(&lov->lov_lock);
+                GOTO(out_free, rc = -EAGAIN);
+        }
+        
+        total_bavail = 0;
+        good_osts = 0;
+        /* warn zero available space/inode every 30 min */
+        if (cfs_time_sub(now, last_warn) > 60 * 30)
+                warn = 1;
+        /* Find all the OSTs big enough to be stripe candidates */
+        list_for_each_entry(tgt, &lov->qos_bavail_list, qos_bavail_list) {
+                if (!tgt->active)
+                        continue;
+                if (!TGT_BAVAIL(tgt)) {
+                        if (warn) {
+                                CWARN("no free space on %s\n", 
+                                      tgt->uuid.uuid);
+                                last_warn = now;
+                        }
+                        continue;
+                }
+                if (!TGT_FFREE(tgt)) {
+                        if (warn) {
+                                CWARN("no free inodes on %s\n", 
+                                      tgt->uuid.uuid);
+                                last_warn = now;
+                        }
+                        continue;
+                }
+                /* We can stop if we have enough good osts and our osts
+                   are getting too small */ 
+                if ((TGT_BAVAIL(tgt) <= QOS_MIN) && (good_osts >= *stripe_cnt))
+                        break;
+                availspace[good_osts] = TGT_BAVAIL(tgt);
+                indexes[good_osts] = tgt->index;
+                total_bavail += availspace[good_osts];
+                good_osts++;
+        }
+        
+        mutex_up(&lov->lov_lock);
+        
+        if (!total_bavail)
+                GOTO(out_free, rc = -ENOSPC);
+       
+        /* if we don't have enough good OSTs, we reduce the stripe count. */
+        if (good_osts < *stripe_cnt)
+                *stripe_cnt = good_osts;
+
+        if (!*stripe_cnt) 
+                GOTO(out_free, rc = -EAGAIN);
+        
+        /* The point of all this shift and rand is to choose a 64-bit 
+           random number between 0 and total_bavail. Apparently '%' doesn't
+           work for 64bit numbers. */
+        nfound = shift = 0;
+        while ((total_bavail >> shift) > 0)
+                shift++;
+        shift++;
+        /* Find enough OSTs with free space weighted random allocation */
+        while (nfound < *stripe_cnt) {
+                cur_bavail = 0;
+
+                /* If the total storage left is < 4GB, don't use random order, 
+                   store in biggest OST first. (Low storage situation.) 
+                   Otherwise, choose a 64bit random number... */
+                rand = (shift < 32 ? 0ULL : (__u64)ll_rand() << 32) | ll_rand();
+                /* ... mask everything above shift... */
+                if (shift < 64)
+                        rand &= ((1ULL << shift) - 1);
+                /* ... and this while should execute at most once... */
+                while (rand > total_bavail)
+                        rand -= total_bavail;
+                /* ... leaving us a 64bit number between 0 and total_bavail. */
+                
+                /* Try to fit in bigger OSTs first. On average, this will
+                   fill more toward the front of the OST array */
+                for (i = 0; i < good_osts; i++) {
+                        cur_bavail += availspace[i];
+                        if (cur_bavail >= rand) {
+                                total_bavail -= availspace[i];
+                                availspace[i] = 0;
+                                idx_arr[nfound] = indexes[i];
+                                nfound++;
+                                break;
+                        }
+                }
+                /* should never satisfy below condition */
+                if (cur_bavail == 0)
+                        break;
+        }
+        LASSERT(nfound == *stripe_cnt);
+        
+out_free:
+        if (availspace)
+                OBD_FREE(availspace, sizeof(__u64) * ost_count);
+        if (indexes)
+                OBD_FREE(indexes, sizeof(int) * require_stripes);
+        if (rc != -EAGAIN)
+                /* rc == 0 or err */
+                RETURN(rc);
+
+        rc = alloc_rr(lov, idx_arr, stripe_cnt);
+        RETURN(rc);
+}
 
+/* return new alloced stripe count on success */
+static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, 
+                           int newea, int **idx_arr, int *arr_cnt)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        int stripe_cnt = lsm->lsm_stripe_count;
+        int i, rc = 0;
+        int *tmp_arr = NULL;
+        ENTRY;
+
+        *arr_cnt = stripe_cnt;
+        OBD_ALLOC(tmp_arr, *arr_cnt * sizeof(int));
+        if (tmp_arr == NULL)
+                RETURN(-ENOMEM);
+        for (i = 0; i < *arr_cnt; i++)
+                tmp_arr[i] = -1;
+
+        if (newea || 
+            lsm->lsm_oinfo[0].loi_ost_idx >= lov->desc.ld_tgt_count) 
+                rc = alloc_qos(exp, tmp_arr, &stripe_cnt);
+        else
+                rc = alloc_specific(lov, lsm, tmp_arr);
+
+        if (rc)
+                GOTO(out_arr, rc);
+
+        *idx_arr = tmp_arr;
+        RETURN(stripe_cnt);
+out_arr:
+        OBD_FREE(tmp_arr, *arr_cnt * sizeof(int));
+        *arr_cnt = 0;
+        RETURN(rc);
+}
+
+static void free_idx_array(int *idx_arr, int arr_cnt)
+{
+        if (arr_cnt)
+                OBD_FREE(idx_arr, arr_cnt * sizeof(int));
+}
+
+int qos_prep_create(struct obd_export *exp, struct lov_request_set *set)
+{
+        struct lov_obd *lov = &exp->exp_obd->u.lov;
+        struct lov_stripe_md *lsm;
+        struct obdo *src_oa = set->set_oa;
+        struct obd_trans_info *oti = set->set_oti;
+        int i, stripes, rc = 0, newea = 0;
+        int *idx_arr, idx_cnt = 0;
+        ENTRY;
+
+        LASSERT(src_oa->o_valid & OBD_MD_FLID);
+        if (set->set_md == NULL) {
+                int stripe_cnt = lov_get_stripecnt(lov, 0);
+
+                /* If the MDS file was truncated up to some size, stripe over
+                 * enough OSTs to allow the file to be created at that size. 
+                 * This may mean we use more than the default # of stripes. */
+                if (src_oa->o_valid & OBD_MD_FLSIZE) {
+                        struct lov_tgt_desc *tgt;
+                        
+                        /* Find the smallest number of stripes we can use 
+                           (up to # of active osts). */
+                        stripes = 1;
+                        mutex_down(&lov->lov_lock);
+                        list_for_each_entry(tgt, &lov->qos_bavail_list, 
+                                            qos_bavail_list) {
+                                if (!tgt->active)
+                                        continue;
+                                /* All earlier tgts have at least this many 
+                                   bytes available also, since our list is
+                                   sorted by size  */
+                                if (TGT_BAVAIL(tgt) * stripes > src_oa->o_size)
+                                        break;
+                                stripes++;
+                        }
+                        mutex_up(&lov->lov_lock);
+
+                        if (stripes < stripe_cnt)
+                                stripes = stripe_cnt;
+                } else {
+                        stripes = stripe_cnt;
+                }
+
+                rc = lov_alloc_memmd(&set->set_md, stripes, 
+                                     lov->desc.ld_pattern ?
+                                     lov->desc.ld_pattern : LOV_PATTERN_RAID0,
+                                     LOV_MAGIC);
+                if (rc < 0)
+                        GOTO(out_err, rc);
+                rc = 0;
+                newea = 1;
+        }
+        lsm = set->set_md;
+       
+        lsm->lsm_object_id = src_oa->o_id;
+        if (!lsm->lsm_stripe_size)
+                lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size;
+        if (!lsm->lsm_pattern) {
+                LASSERT(lov->desc.ld_pattern);
+                lsm->lsm_pattern = lov->desc.ld_pattern;
+        }
+
+        stripes = alloc_idx_array(exp, lsm, newea, &idx_arr, &idx_cnt);
+        LASSERT(stripes <= lsm->lsm_stripe_count);
+        if (stripes <= 0)
+                GOTO(out_err, rc = stripes ? stripes : -EIO);
+        
+        for (i = 0; i < stripes; i++) {
+                struct lov_request *req;
+                int ost_idx = idx_arr[i];
+                LASSERT(ost_idx >= 0);
+                
                 OBD_ALLOC(req, sizeof(*req));
                 if (req == NULL)
-                        GOTO(out, rc = -ENOMEM);
+                        GOTO(out_err, rc = -ENOMEM);
+                lov_set_add_req(req, set);
 
                 req->rq_buflen = sizeof(*req->rq_md);
                 OBD_ALLOC(req->rq_md, req->rq_buflen);
-                if (req->rq_md == NULL) {
-                        OBD_FREE_PTR(req);
-                        GOTO(out, rc = -ENOMEM);
-                }
-
+                if (req->rq_md == NULL)
+                        GOTO(out_err, rc = -ENOMEM);
+                
                 req->rq_oa = obdo_alloc();
-                if (req->rq_oa == NULL) {
-                        OBD_FREE_PTR(req->rq_md);
-                        OBD_FREE_PTR(req);
-                        GOTO(out, rc = -ENOMEM);
-                }
-
+                if (req->rq_oa == NULL)
+                        GOTO(out_err, rc = -ENOMEM);
+                
                 req->rq_idx = ost_idx;
                 req->rq_stripe = i;
                 /* create data objects with "parent" OA */
@@ -187,41 +480,74 @@ int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea)
                  *     stripe which holds the existing file size.
                  */
                 if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        if (lov_stripe_offset(lsm, src_oa->o_size, i,
-                                              &req->rq_oa->o_size) < 0 &&
-                            req->rq_oa->o_size)
-                                req->rq_oa->o_size--;
+                        req->rq_oa->o_size = 
+                                lov_size_to_stripe(lsm, src_oa->o_size, i);
 
                         CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n",
                                i, req->rq_oa->o_size, src_oa->o_size);
                 }
 
-                lov_set_add_req(req, set);
-
-                /* If we have allocated enough objects, we are OK */
-                if (set->set_count == lsm->lsm_stripe_count)
-                        GOTO(out, rc = 0);
         }
+        LASSERT(set->set_count == stripes);
 
-        if (set->set_count == 0)
-                GOTO(out, rc = -EIO);
-
-        /* If we were passed specific striping params, then a failure to
-         * meet those requirements is an error, since we can't reallocate
-         * that memory (it might be part of a larger array or something).
-         *
-         * We can only get here if lsm_stripe_count was originally > 1.
-         */
-        if (!newea) {
-                CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n",
-                       lsm->lsm_object_id, set->set_count,
-                       lsm->lsm_stripe_count, rc);
-                rc = rc ? rc : -EFBIG;
-        } else {
+        if (stripes < lsm->lsm_stripe_count)
                 qos_shrink_lsm(set);
-                rc = 0;
+
+        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
+                oti_alloc_cookies(oti, set->set_count);
+                if (!oti->oti_logcookies)
+                        GOTO(out_err, rc = -ENOMEM);
+                set->set_cookies = oti->oti_logcookies;
         }
-out:
+out_err:
+        if (newea && rc)
+                obd_free_memmd(exp, &set->set_md);
+        free_idx_array(idx_arr, idx_cnt);
+        EXIT;
+        return rc;
+}
 
-        RETURN(rc);
+/* An caveat here is don't use list_move() on same list */
+#define list_adjust(tgt, lov, list_name, value) \
+{ \
+        struct list_head *element; \
+        struct lov_tgt_desc *tmp;  \
+        if (list_empty(&(tgt)->list_name)) \
+                list_add(&(tgt)->list_name, &(lov)->list_name); \
+        element = (tgt)->list_name.next; \
+        while((element != &(lov)->list_name) && \
+              (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \
+              (value(tgt) < value(tmp))) \
+                element = element->next; \
+        if (element != (tgt)->list_name.next) { \
+                list_del_init(&(tgt)->list_name); \
+                list_add(&(tgt)->list_name, element->prev); \
+        } \
+        element = (tgt)->list_name.prev; \
+        while ((element != &(lov)->list_name) && \
+               (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \
+               (value(tgt) > value(tmp))) \
+                element = element->prev; \
+        if (element != (tgt)->list_name.prev) { \
+                list_del_init(&(tgt)->list_name); \
+                list_add_tail(&(tgt)->list_name, element->prev); \
+        } \
 }
+
+void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs)
+{
+        struct lov_tgt_desc *tgt = &lov->tgts[idx];
+        __u64 bavail;
+        ENTRY;
+        
+        bavail = osfs->os_bavail * osfs->os_bsize;
+        if (!bavail) 
+                CWARN("ost %d has zero avail space!\n", idx);
+        
+        CDEBUG(D_OTHER, "QOS: bfree now "LPU64"\n", bavail);
+        
+        mutex_down(&lov->lov_lock);
+        list_adjust(tgt, lov, qos_bavail_list, TGT_BAVAIL);
+        mutex_up(&lov->lov_lock);
+}
+
index b6b4b62..a3a4372 100644 (file)
 #define DEBUG_SUBSYSTEM S_LOV
 
 #ifdef __KERNEL__
-#include <asm/div64.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/lustre_idl.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre/lustre_idl.h>
 
 #include "lov_internal.h"
 
@@ -44,7 +44,7 @@ static void lov_init_set(struct lov_request_set *set)
         set->set_count = 0;
         set->set_completes = 0;
         set->set_success = 0;
-        INIT_LIST_HEAD(&set->set_list);
+        CFS_INIT_LIST_HEAD(&set->set_list);
         atomic_set(&set->set_refcount, 1);
 }
 
@@ -591,10 +591,8 @@ int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp)
         if (set == NULL)
                 RETURN(0);
         LASSERT(set->set_exp);
-        if (set->set_completes) {
+        if (set->set_completes)
                 rc = create_done(set->set_exp, set, lsmp);
-                /* FIXME update qos data here */
-        }
 
         if (atomic_dec_and_test(&set->set_refcount))
                 lov_finish_set(set);
@@ -649,9 +647,8 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp,
                         struct obdo *src_oa, struct obd_trans_info *oti,
                         struct lov_request_set **reqset)
 {
-        struct lov_obd *lov = &exp->exp_obd->u.lov;
         struct lov_request_set *set;
-        int rc = 0, newea = 0;
+        int rc = 0;
         ENTRY;
 
         OBD_ALLOC(set, sizeof(*set));
@@ -664,51 +661,11 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp,
         set->set_oa = src_oa;
         set->set_oti = oti;
 
-        if (set->set_md == NULL) {
-                int stripes, stripe_cnt;
-                stripe_cnt = lov_get_stripecnt(lov, 0);
-
-                /* If the MDS file was truncated up to some size, stripe over
-                 * enough OSTs to allow the file to be created at that size. */
-                if (src_oa->o_valid & OBD_MD_FLSIZE) {
-                        stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1;
-                        do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12));
-
-                        if (stripes > lov->desc.ld_active_tgt_count)
-                                GOTO(out_set, rc = -EFBIG);
-                        if (stripes < stripe_cnt)
-                                stripes = stripe_cnt;
-                } else {
-                        stripes = stripe_cnt;
-                }
-
-                rc = lov_alloc_memmd(&set->set_md, stripes,
-                                     lov->desc.ld_pattern ?
-                                     lov->desc.ld_pattern : LOV_PATTERN_RAID0, 
-                                     LOV_MAGIC);
-                if (rc < 0)
-                        goto out_set;
-                newea = 1;
-        }
-
-        rc = qos_prep_create(lov, set, newea);
+        rc = qos_prep_create(exp, set);
         if (rc)
-                goto out_lsm;
-
-        if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) {
-                oti_alloc_cookies(oti, set->set_count);
-                if (!oti->oti_logcookies)
-                        goto out_lsm;
-                set->set_cookies = oti->oti_logcookies;
-        }
-        *reqset = set;
-        RETURN(rc);
-
-out_lsm:
-        if (*lsmp == NULL)
-                obd_free_memmd(exp, &set->set_md);
-out_set:
-        lov_fini_create_set(set, lsmp);
+                lov_fini_create_set(set, lsmp);
+        else
+                *reqset = set;
         RETURN(rc);
 }
 
index 12b755b..5ae9f62 100644 (file)
@@ -28,8 +28,8 @@
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <asm/statfs.h>
 #endif
-#include <linux/lprocfs_status.h>
-#include <linux/obd_class.h>
+#include <lprocfs_status.h>
+#include <obd_class.h>
 #include <linux/seq_file.h>
 
 #ifdef LPROCFS
@@ -118,6 +118,68 @@ static int lov_rd_desc_uuid(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%s\n", lov->desc.ld_uuid.uuid);
 }
 
+static int lov_rd_qos_threshold(char *page, char **start, off_t off, int count,
+                                int *eof, void *data)
+{
+        struct obd_device *dev = (struct obd_device*) data;
+        struct lov_obd *lov;
+
+        LASSERT(dev != NULL);
+        lov = &dev->u.lov;
+        *eof = 1;
+        return snprintf(page, count, "%u MB\n", lov->desc.ld_qos_threshold);
+}
+
+static int lov_wr_qos_threshold(struct file *file, const char *buffer,
+                                unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_obd *lov;
+        int val, rc;
+        LASSERT(dev != NULL);
+
+        lov = &dev->u.lov;
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= 0)
+                return -EINVAL;
+        lov->desc.ld_qos_threshold = val;
+        return count;
+}
+
+static int lov_rd_qos_maxage(char *page, char **start, off_t off, int count,
+                             int *eof, void *data)
+{
+        struct obd_device *dev = (struct obd_device*) data;
+        struct lov_obd *lov;
+
+        LASSERT(dev != NULL);
+        lov = &dev->u.lov;
+        *eof = 1;
+        return snprintf(page, count, "%u Sec\n", lov->desc.ld_qos_maxage);
+}
+
+static int lov_wr_qos_maxage(struct file *file, const char *buffer,
+                             unsigned long count, void *data)
+{
+        struct obd_device *dev = (struct obd_device *)data;
+        struct lov_obd *lov;
+        int val, rc;
+        LASSERT(dev != NULL);
+
+        lov = &dev->u.lov;
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val <= 0)
+                return -EINVAL;
+        lov->desc.ld_qos_maxage = val;
+        return count;
+}
+
 static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos)
 {
         struct obd_device *dev = p->private;
@@ -188,6 +250,8 @@ struct lprocfs_vars lprocfs_obd_vars[] = {
         { "kbytesfree",   lprocfs_rd_kbytesfree,  0, 0 },
         { "kbytesavail",  lprocfs_rd_kbytesavail, 0, 0 },
         { "desc_uuid",    lov_rd_desc_uuid,       0, 0 },
+        { "qos_threshold",lov_rd_qos_threshold, lov_wr_qos_threshold, 0 },
+        { "qos_maxage",   lov_rd_qos_maxage, lov_wr_qos_maxage, 0 },
         { 0 }
 };
 
diff --git a/lustre/lvfs/Info.plist b/lustre/lvfs/Info.plist
new file mode 100644 (file)
index 0000000..44439e2
--- /dev/null
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>lvfs</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.lvfs</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string> 
+       <key>OSBundleCompatibleVersion</key> 
+       <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict>
+               <key>com.apple.kpi.bsd</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.mach</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.unsupported</key>
+               <string>8.0.0b1</string>
+               <key>com.clusterfs.lustre.libcfs</key> 
+               <string>1.0.0</string>
+       </dict>
+</dict>
+</plist>
index 10d8904..9185580 100644 (file)
@@ -16,6 +16,8 @@ endif
 
 if MODULES
 
+if LINUX
+
 modulefs_DATA := lvfs$(KMODEXT)
 
 if SERVER
@@ -43,13 +45,32 @@ fsfilt_ldiskfs.c: fsfilt_ext3.c
 fsfilt_ldiskfs_quota.h: fsfilt_ext3_quota.h
        sed $(strip $(ldiskfs_sed_flags)) $< > $@
 
+endif # LINUX
 
-else
+if DARWIN
+
+macos_PROGRAMS := lvfs
+
+lvfs_SOURCES := lvfs_darwin.c
+
+lvfs_CFLAGS := $(EXTRA_KCFLAGS)
+lvfs_LDFLAGS := $(EXTRA_KLDFLAGS)
+lvfs_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif # DARWIN
+
+else # MODULES
 
 sources:
 
 endif # MODULES
 
+install-data-hook: $(install_data_hook)
+
 DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \
        lvfs_internal.h lvfs_linux.c lvfs_userfs.c \
        upcall_cache.c \
index d3ca4b8..6f88917 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/kmod.h>
 #include <linux/slab.h>
 #include <libcfs/kp30.h>
-#include <linux/lustre_fsfilt.h>
+#include <lustre_fsfilt.h>
 
 LIST_HEAD(fsfilt_types);
 
index ab9ba93..533f0d3 100644 (file)
 #endif
 
 #include <libcfs/kp30.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_quota.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_quota.h>
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
 #include <linux/iobuf.h>
 #endif
@@ -524,10 +524,6 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle,
 
         LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0);
 
-        if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */)
-                CWARN("setting EA on %lu/%u again... interesting\n",
-                       inode->i_ino, inode->i_generation);
-
         lock_24kernel();
         rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED,
                                    name, lmm, lmm_size, 0);
@@ -864,7 +860,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
                 return EXT_CONTINUE;
         }
 
-        tgen = EXT_GENERATION(tree);
+        tgen = EXT_GENERATION(EXT_ROOT_HDR(tree));
         count = ext3_ext_calc_credits_for_insert(tree, path);
         ext3_up_truncate_sem(inode);
 
@@ -877,7 +873,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree,
         }
 
         ext3_down_truncate_sem(inode);
-        if (tgen != EXT_GENERATION(tree)) {
+        if (tgen != EXT_GENERATION(EXT_ROOT_HDR(tree))) {
                 /* the tree has changed. so path can be invalid at moment */
                 lock_24kernel();
                 journal_stop(handle);
@@ -2015,8 +2011,11 @@ out:
 
 static void __exit fsfilt_ext3_exit(void)
 {
+        int rc;
+
         fsfilt_unregister_ops(&fsfilt_ext3_ops);
-        LASSERT(kmem_cache_destroy(fcb_cache) == 0);
+        rc = kmem_cache_destroy(fcb_cache);
+        LASSERTF(rc == 0, "couldn't destroy fcb_cache slab\n");
 }
 
 module_init(fsfilt_ext3_init);
index 68a049e..20cbb3f 100644 (file)
@@ -42,9 +42,9 @@
 #include <asm/statfs.h>
 #endif
 #include <libcfs/kp30.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
+#include <obd_class.h>
 #include <linux/module.h>
 #include <linux/init.h>
 
index 08f4f14..1834616 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
-#include <linux/lvfs.h>
+#include <lvfs.h>
 
 struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *ctxt, __u64 id,
                                __u32 gen, __u64 gr, void *data)
 {
         return ctxt->cb_ops.l_fid2dentry(id, gen, gr, data);
 }
-
-
 EXPORT_SYMBOL(lvfs_fid2dentry);
diff --git a/lustre/lvfs/lvfs_darwin.c b/lustre/lvfs/lvfs_darwin.c
new file mode 100644 (file)
index 0000000..1feb31c
--- /dev/null
@@ -0,0 +1,45 @@
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <libcfs/libcfs.h>
+#include <obd.h>
+#include <lvfs.h>
+#include <lustre_lib.h>
+
+atomic_t obd_memory;
+int obd_memmax;
+
+/* XXX currently ctxt functions should not be used ?? */
+void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
+              struct lvfs_ucred *cred)
+{
+       LBUG();
+}
+
+void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx,
+              struct lvfs_ucred *cred)
+{
+       LBUG();
+}
+
+static int __init lvfs_init(void)
+{
+       int ret = 0;
+       ENTRY;
+
+       RETURN(ret);
+}
+
+static void __exit lvfs_exit(void)
+{
+       int leaked;
+       ENTRY;
+       
+       leaked = atomic_read(&obd_memory);
+       CDEBUG(leaked ? D_ERROR : D_INFO,
+              "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
+
+       return;
+}
+
+cfs_module(lvfs, "1.0.0", lvfs_init, lvfs_exit);
+
index 69c3616..3f662d7 100644 (file)
 #include <linux/quotaops.h>
 #include <linux/version.h>
 #include <libcfs/kp30.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
+#include <lustre_fsfilt.h>
+#include <obd.h>
+#include <obd_class.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/lustre_compat25.h>
-#include <linux/lvfs.h>
+#include <lvfs.h>
 #include "lvfs_internal.h"
 
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_quota.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre_quota.h>
 
 atomic_t obd_memory;
 int obd_memmax;
@@ -294,8 +294,9 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix)
 
                 /* Fixup directory permissions if necessary */
                 if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) {
-                        CWARN("fixing permissions on %s from %o to %o\n",
-                              name, old_mode, mode);
+                        CDEBUG(D_CONFIG, 
+                               "fixing permissions on %s from %o to %o\n",
+                               name, old_mode, mode);
                         dchild->d_inode->i_mode = (mode & S_IALLUGO) |
                                                   (old_mode & ~S_IALLUGO);
                         mark_inode_dirty(dchild->d_inode);
@@ -505,6 +506,7 @@ static void __exit lvfs_linux_exit(void)
         CDEBUG_EX(leaked ? D_ERROR : D_INFO,
                "obd mem max: %d leaked: %d\n", obd_memmax, leaked);
 
+        EXIT;
         return;
 }
 
index a6140d5..28afe5f 100644 (file)
  */
 
 #include <liblustre.h>
-#include <linux/lvfs.h>
+#include <lvfs.h>
 #include "lvfs_internal.h"
 
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
+#include <obd.h>
+#include <lustre_lib.h>
 
 /* XXX currently ctxt functions should not be used ?? */
 void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx,
index ea33ee7..6de1dac 100644 (file)
@@ -43,8 +43,8 @@
 #include <linux/slab.h>
 #include <asm/segment.h>
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
 
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4)
 struct group_info *groups_alloc(int ngroups)
index 195bbff..0092084 100644 (file)
@@ -26,8 +26,8 @@
 
 #include <linux/version.h>
 #include <linux/vfs.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 
 #ifdef LPROCFS
 static struct lprocfs_vars lprocfs_obd_vars[] = {
index 852046c..07e3faf 100644 (file)
@@ -25,7 +25,7 @@
 #ifndef _MDC_INTERNAL_H
 #define _MDC_INTERNAL_H
 
-#include <linux/lustre_mdc.h>
+#include <lustre_mdc.h>
 
 void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
                        __u64 valid, struct lu_fid *fid, int ea_size);
index fef2e36..aa10f6a 100644 (file)
@@ -27,9 +27,8 @@
 # include <fcntl.h>
 # include <liblustre.h>
 #endif
-#include <linux/lustre_idl.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_mdc.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
 #include "mdc_internal.h"
 
 #ifndef __KERNEL__
@@ -193,7 +192,9 @@ void mdc_setattr_pack(struct ptlrpc_request *req, int offset,
                 rec->sa_atime = LTIME_S(iattr->ia_atime);
                 rec->sa_mtime = LTIME_S(iattr->ia_mtime);
                 rec->sa_ctime = LTIME_S(iattr->ia_ctime);
-                rec->sa_attr_flags = iattr->ia_attr_flags;
+                rec->sa_attr_flags =
+                               ((struct ll_iattr_struct *)iattr)->ia_attr_flags;
+
                 if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid))
                         rec->sa_suppgid = iattr->ia_gid;
                 else
index a21d911..10202b1 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_mdc.h>
-#include <linux/lustre_acl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lprocfs_status.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h>
+#include <lprocfs_status.h>
 #include "mdc_internal.h"
 
 int it_disposition(struct lookup_intent *it, int flag)
@@ -374,7 +373,8 @@ int mdc_enqueue(struct obd_export *exp,
                 repsize[repbufcnt++] = obddev->u.cli.cl_max_mds_cookiesize;
         } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
                 obd_valid valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE |
-                                  OBD_MD_FLACL | OBD_MD_FLMODEASIZE;
+                                  OBD_MD_FLACL | OBD_MD_FLMODEASIZE |
+                                  OBD_MD_FLDIREA;
                 size[req_buffers++] = sizeof(struct mdt_body);
                 size[req_buffers++] = op_data->namelen + 1;
 
index 9d05b87..e0ec343 100644 (file)
@@ -35,8 +35,7 @@
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_mdc.h>
+#include <obd_class.h>
 #include "mdc_internal.h"
 
 /* mdc_setattr does its own semaphore handling */
index 6978cc6..18d7395 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_mdc.h>
-#include <linux/md_object.h>
-#include <linux/lustre_acl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lprocfs_status.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lustre_mds.h> /* for LUSTRE_POSIX_ACL_MAX_SIZE */
+#include <md_object.h>
+#include <lprocfs_status.h>
 #include "mdc_internal.h"
 
+static quota_interface_t *quota_interface;
+
 #define REQUEST_MINOR 244
 
 static int mdc_cleanup(struct obd_device *obd);
@@ -662,6 +663,9 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data,
         EXIT;
         *request = req;
  out:
+        if (rc != 0 && req && req->rq_commit_cb)
+                req->rq_commit_cb(req);
+
         return rc;
 }
 
@@ -806,8 +810,9 @@ out:
         return rc;
 }
 
-int mdc_set_info(struct obd_export *exp, obd_count keylen,
-                 void *key, obd_count vallen, void *val)
+int mdc_set_info_async(struct obd_export *exp, obd_count keylen,
+                       void *key, obd_count vallen, void *val,
+                       struct ptlrpc_request_set *set)
 {
         struct obd_import *imp = class_exp2cliimp(exp);
         int rc = -EINVAL;
@@ -853,8 +858,14 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen,
                         RETURN(-ENOMEM);
 
                 req->rq_replen = lustre_msg_size(0, NULL);
-                rc = ptlrpc_queue_wait(req);
-                ptlrpc_req_finished(req);
+                if (set) {
+                        rc = 0;
+                        ptlrpc_set_add_req(set, req);
+                        ptlrpc_check_set(set);
+                } else {
+                        rc = ptlrpc_queue_wait(req);
+                        ptlrpc_req_finished(req);
+                }
                 RETURN(rc);
         }
         RETURN(rc);
@@ -892,7 +903,7 @@ out_req:
 }
 
 static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                      unsigned long max_age)
+                      cfs_time_t max_age)
 {
         struct ptlrpc_request *req;
         struct obd_statfs *msfs;
@@ -1243,7 +1254,7 @@ struct obd_ops mdc_obd_ops = {
         .o_connect          = client_connect_import,
         .o_disconnect       = client_disconnect_export,
         .o_iocontrol        = mdc_iocontrol,
-        .o_set_info         = mdc_set_info,
+        .o_set_info_async   = mdc_set_info_async,
         .o_statfs           = mdc_statfs,
         .o_pin              = mdc_pin,
         .o_unpin            = mdc_unpin,
@@ -1279,7 +1290,6 @@ struct md_ops mdc_md_ops = {
         .m_clear_open_replay_data = mdc_clear_open_replay_data
 };
 
-static quota_interface_t *quota_interface;
 extern quota_interface_t mdc_quota_interface;
 
 int __init mdc_init(void)
index 5c1ff99..a0c7c2f 100644 (file)
 
 #include <linux/module.h>
 
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_ver.h>
-#include <linux/obd_support.h>
-#include <linux/lprocfs_status.h>
-
-
-#include <linux/lu_object.h>
-#include <linux/md_object.h>
-#include <linux/dt_object.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
+#include <obd_support.h>
+#include <lprocfs_status.h>
+
+#include <lu_object.h>
+#include <md_object.h>
+#include <dt_object.h>
 
 #include "mdd_internal.h"
 
index 87b13f3..aa39e64 100644 (file)
@@ -4,9 +4,8 @@
 #ifndef _MDD_INTERNAL_H
 #define _MDD_INTERNAL_H
 
-#include <linux/md_object.h>
-
 #include <asm/semaphore.h>
+#include <md_object.h>
 
 struct dt_device;
 struct file;
index 8cc953d..68d3456 100644 (file)
 #endif
 #define DEBUG_SUBSYSTEM S_MDS
 
+#include <lustre_mds.h>
 #include <linux/module.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_acl.h>
-#include <linux/lustre_dlm.h>
 #include <linux/init.h>
-#include <linux/obd_class.h>
 #include <linux/random.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #else
 # include <linux/locks.h>
 #endif
-#include <linux/obd_lov.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/lustre_quota.h>
-#include <linux/lustre_disk.h>
-#include <linux/lustre_ver.h>
+
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <obd_lov.h>
+#include <lustre_fsfilt.h>
+#include <lprocfs_status.h>
+#include <lustre_commit_confd.h>
+#include <lustre_quota.h>
+#include <lustre_disk.h>
+#include <lustre_ver.h>
 
 #include "mds_internal.h"
 
@@ -228,9 +227,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid,
 
         if (inode->i_generation == 0 || inode->i_nlink == 0) {
                 LCONSOLE_WARN("Found inode with zero generation or link -- this"
-                              " may indicate disk corruption (inode: %lu, link:"
-                              " %lu, count: %d)\n", inode->i_ino,
-                              (unsigned long)inode->i_nlink,
+                              " may indicate disk corruption (inode: %lu/%u, "
+                              "link %lu, count %d)\n", inode->i_ino,
+                              inode->i_generation,(unsigned long)inode->i_nlink,
                               atomic_read(&inode->i_count));
                 dput(result);
                 RETURN(ERR_PTR(-ENOENT));
@@ -707,7 +706,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode,
 {
         struct mds_obd *mds = mds_req2mds(req);
         struct mds_body *body;
-        int rc, size[2] = {sizeof(*body)}, bufcount = 1;
+        int rc, size[3] = {sizeof(*body)}, bufcount = 1;
         ENTRY;
 
         body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body));
@@ -1939,7 +1938,6 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
         }
 
         label = fsfilt_get_label(obd, obd->u.obt.obt_sb);
-
         if (obd->obd_recovering) {
                 LCONSOLE_WARN("MDT %s now serving %s (%s%s%s), but will be in "
                               "recovery until %d %s reconnect, or if no clients"
@@ -1952,8 +1950,8 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
                               obd->obd_recoverable_clients,
                               (obd->obd_recoverable_clients == 1) ?
                               "client" : "clients",
-                              (int)(OBD_RECOVERY_TIMEOUT / HZ) / 60,
-                              (int)(OBD_RECOVERY_TIMEOUT / HZ) % 60,
+                              (int)(OBD_RECOVERY_TIMEOUT) / 60,
+                              (int)(OBD_RECOVERY_TIMEOUT) % 60,
                               obd->obd_name);
         } else {
                 LCONSOLE_INFO("MDT %s now serving %s (%s%s%s) with recovery "
@@ -2381,7 +2379,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns,
                 break;
         default:
                 CERROR("Unhandled intent "LPD64"\n", it->opc);
-                LBUG();
+                RETURN(-EFAULT);
         }
 
         /* By this point, whatever function we called above must have either
index 92d351a..1140a61 100644 (file)
@@ -28,9 +28,9 @@
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <asm/statfs.h>
 #endif
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include "mds_internal.h"
 
 #ifdef LPROCFS
@@ -52,6 +52,7 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer,
         struct obd_device *obd = data;
         struct mds_obd *mds = &obd->u.mds;
         char tmpbuf[sizeof(struct obd_uuid)];
+        struct ptlrpc_request_set *set;
         int rc;
 
         sscanf(buffer, "%40s", tmpbuf);
@@ -59,14 +60,25 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer,
         if (strncmp(tmpbuf, "nid:", 4) != 0)
                 return lprocfs_wr_evict_client(file, buffer, count, data);
 
-        obd_export_evict_by_nid(obd, tmpbuf+4);
+        set = ptlrpc_prep_set();
+        if (!set)
+                return -ENOMEM;
 
-        rc = obd_set_info(mds->mds_osc_exp, strlen("evict_by_nid"),
-                          "evict_by_nid", strlen(tmpbuf + 4) + 1, tmpbuf + 4);
+        rc = obd_set_info_async(mds->mds_osc_exp, strlen("evict_by_nid"),
+                                "evict_by_nid", strlen(tmpbuf + 4) + 1,
+                                 tmpbuf + 4, set);
         if (rc)
                 CERROR("Failed to evict nid %s from OSTs: rc %d\n", tmpbuf + 4,
                        rc);
 
+        ptlrpc_check_set(set);
+
+        obd_export_evict_by_nid(obd, tmpbuf+4);
+        rc = ptlrpc_set_wait(set);
+        if (rc)
+                CERROR("Failed to evict nid %s from OSTs: rc %d\n", tmpbuf + 4,
+                       rc);
+        ptlrpc_set_destroy(set);
         return count;
 }
 
index 72af624..124d178 100644 (file)
 #include <linux/kmod.h>
 #include <linux/version.h>
 #include <linux/sched.h>
-#include <linux/lustre_quota.h>
+#include <lustre_quota.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <linux/mount.h>
 #endif
-#include <linux/lustre_mds.h>
-#include <linux/obd_class.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_disk.h>
+#include <lustre_mds.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
 #include <libcfs/list.h>
 
 #include "mds_internal.h"
 
-#define HEALTH_CHECK "health_check"
 
 /* Add client data to the MDS.  We use a bitmap to locate a free space
  * in the last_rcvd file if cl_off is -1 (i.e. a new client).
@@ -237,7 +236,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
         mds->mds_server_data = lsd;
 
         if (last_rcvd_size == 0) {
-                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
+                LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name);
 
                 memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(lsd->lsd_uuid));
                 lsd->lsd_last_transno = 0;
@@ -255,8 +254,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                         GOTO(err_msd, rc);
                 }
                 if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) {
-                        CERROR("OBD UUID %s does not match last_rcvd UUID %s\n",
-                               obd->obd_uuid.uuid, lsd->lsd_uuid);
+                        LCONSOLE_ERROR("Trying to start OBD %s using the wrong"
+                                       " disk %s. Were the /dev/ assignments "
+                                       "rearranged?\n",
+                                       obd->obd_uuid.uuid, lsd->lsd_uuid);
                         GOTO(err_msd, rc = -EINVAL);
                 }
                 mount_count = le64_to_cpu(lsd->lsd_mount_count);
@@ -392,7 +393,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file)
                 obd->obd_recovery_start = CURRENT_SECONDS;
                 /* Only used for lprocfs_status */
                 obd->obd_recovery_end = obd->obd_recovery_start +
-                        OBD_RECOVERY_TIMEOUT / HZ;
+                        OBD_RECOVERY_TIMEOUT;
         }
 
         mds->mds_mount_count = mount_count + 1;
@@ -426,6 +427,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
                 RETURN(rc);
 
         mds->mds_vfsmnt = mnt;
+        /* why not mnt->mnt_sb instead of mnt->mnt_root->d_inode->i_sb? */
         obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb;
 
         fsfilt_setup(obd, obd->u.obt.obt_sb);
index 3c53564..f67a19f 100644 (file)
@@ -5,8 +5,8 @@
 #ifndef _MDS_INTERNAL_H
 #define _MDS_INTERNAL_H
 
-#include <linux/lustre_mds.h>
-#include <linux/lustre_disk.h>
+#include <lustre_disk.h>
+#include <lustre_mds.h>
 
 #define MDT_ROCOMPAT_SUPP       (OBD_ROCOMPAT_LOVOBJID)
 #define MDT_INCOMPAT_SUPP       (OBD_INCOMPAT_MDT | OBD_INCOMPAT_COMMON_LR)
index 5075bfb..3c17a05 100644 (file)
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/ext3_fs.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_lite.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_fsfilt.h>
+#include <lustre_lite.h>
+#include <obd_lov.h>
 #include "mds_internal.h"
-#include <linux/obd_lov.h>
 
 struct mdsea_cb_data {
     struct llog_handle     *mc_llh;
index 4bc0f1b..c136494 100644 (file)
@@ -48,8 +48,8 @@
 #include <linux/slab.h>
 #include <asm/segment.h>
 
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
 #include "mds_internal.h"
 
 void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode)
index 7922bbc..43a63e3 100644 (file)
 #include <linux/version.h>
 
 #include <libcfs/list.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_mds.h>
+#include <lustre_commit_confd.h>
+#include <lustre_log.h>
 
 #include "mds_internal.h"
 
index 57e7e09..0f95347 100644 (file)
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <linux/module.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_idl.h>
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_ver.h>
+#include <lustre_mds.h>
+#include <lustre/lustre_idl.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre_lib.h>
+#include <lustre_fsfilt.h>
+#include <lustre_ver.h>
 
 #include "mds_internal.h"
 
@@ -163,17 +163,19 @@ int mds_lov_set_nextid(struct obd_device *obd)
 
         LASSERT(mds->mds_lov_objids != NULL);
 
-        rc = obd_set_info(mds->mds_osc_exp, strlen(KEY_NEXT_ID), KEY_NEXT_ID,
-                          mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids);
+        rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_NEXT_ID),
+                                KEY_NEXT_ID,
+                                mds->mds_lov_desc.ld_tgt_count,
+                                mds->mds_lov_objids, NULL);
         
         if (rc) 
                 CERROR ("%s: mds_lov_set_nextid failed (%d)\n", 
                         obd->obd_name, rc);
+
         RETURN(rc);
 }
 
-/* Update the lov desc for a new size lov.
-   From HEAD mds_dt_lov_update_desc (but fixed) */
+/* Update the lov desc for a new size lov. */
 static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
 {
         struct mds_obd *mds = &obd->u.mds;
@@ -388,7 +390,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
          * set_nextid().  The class driver can help us here, because
          * it can use the obd_recovering flag to determine when the
          * the OBD is full available. */
-        if (!obd->obd_recovering) 
+        if (!obd->obd_recovering)
                 rc = mds_postrecov(obd);
         RETURN(rc);
 
@@ -443,7 +445,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
 
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT),
-                                 &mds->mds_cfg_llh, NULL,  name);
+                                 &mds->mds_cfg_llh, NULL, name);
                 if (rc == 0)
                         llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN,
                                          &cfg_uuid);
@@ -586,8 +588,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 rc = llog_ioctl(ctxt, cmd, data);
                 pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL);
                 llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count);
-                rc2 = obd_set_info(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
-                                   KEY_MDS_CONN, 0, NULL);
+                rc2 = obd_set_info_async(mds->mds_osc_exp,
+                                         strlen(KEY_MDS_CONN), KEY_MDS_CONN,
+                                         0, NULL, NULL);
                 if (!rc)
                         rc = rc2;
                 RETURN(rc);
@@ -623,37 +626,35 @@ struct mds_lov_sync_info {
         __u32              mlsi_index;   /* index of target */
 };
 
+/* We only sync one osc at a time, so that we don't have to hold
+   any kind of lock on the whole mds_lov_desc, which may change 
+   (grow) as a result of mds_lov_add_ost.  This also avoids any
+   kind of mismatch between the lov_desc and the mds_lov_desc, 
+   which are not in lock-step during lov_add_obd */
 static int __mds_lov_synchronize(void *data)
 {
         struct mds_lov_sync_info *mlsi = data;
         struct obd_device *obd = mlsi->mlsi_obd;
         struct obd_device *watched = mlsi->mlsi_watched;
         struct mds_obd *mds = &obd->u.mds;
-        struct obd_uuid *uuid = NULL;
+        struct obd_uuid *uuid;
         __u32  idx = mlsi->mlsi_index;
         int rc = 0;
         ENTRY;
 
-        if (watched) 
-                uuid = &watched->u.cli.cl_target_uuid;
-
         OBD_FREE(mlsi, sizeof(*mlsi));
 
         LASSERT(obd);
-
-        /* We only sync one osc at a time, so that we don't have to hold
-           any kind of lock on the whole mds_lov_desc, which may change 
-           (grow) as a result of mds_lov_add_ost.  This also avoids any
-           kind of mismatch between the lov_desc and the mds_lov_desc, 
-           which are not in lock-step during lov_add_obd */
+        LASSERT(watched);
+        uuid = &watched->u.cli.cl_target_uuid;
         LASSERT(uuid);
 
         rc = mds_lov_update_mds(obd, watched, idx);
         if (rc != 0)
                 GOTO(out, rc);
         
-        rc = obd_set_info(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
-                          KEY_MDS_CONN, 0, uuid);
+        rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN),
+                                KEY_MDS_CONN, 0, uuid, NULL);
         if (rc != 0)
                 GOTO(out, rc);
 
@@ -668,7 +669,7 @@ static int __mds_lov_synchronize(void *data)
         }
 
         LCONSOLE_INFO("MDS %s: %s now active, resetting orphans\n",
-              obd->obd_name, (char *)uuid->uuid);
+              obd->obd_name, obd_uuid2str(uuid));
 
         if (obd->obd_stopping)
                 GOTO(out, rc = -ENODEV);
@@ -730,15 +731,16 @@ int mds_lov_start_synchronize(struct obd_device *obd,
 
         if (nonblock) {
                 /* Synchronize in the background */
-                rc = kernel_thread(mds_lov_synchronize, mlsi,
-                                   CLONE_VM | CLONE_FILES);
+                rc = cfs_kernel_thread(mds_lov_synchronize, mlsi,
+                                       CLONE_VM | CLONE_FILES);
                 if (rc < 0) {
                         CERROR("%s: error starting mds_lov_synchronize: %d\n",
                                obd->obd_name, rc);
                         class_decref(obd);
                 } else {
-                        CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n",
-                               obd->obd_name, rc);
+                        CDEBUG(D_HA, "%s: mds_lov_synchronize idx=%d "
+                               "thread=%d\n", obd->obd_name,
+                               mlsi->mlsi_index, rc);
                         rc = 0;
                 }
         } else {
index f70a487..993afb8 100644 (file)
 # include <linux/locks.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre_fsfilt.h>
+#include <lprocfs_status.h>
 
 #include "mds_internal.h"
 
@@ -1097,6 +1097,14 @@ found_child:
                 GOTO(cleanup, rc = -EAGAIN);
         }
 
+        if (!S_ISREG(dchild->d_inode->i_mode) &&
+            !S_ISDIR(dchild->d_inode->i_mode) &&
+            (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH)) {
+                /* If client supports this, do not return open handle for
+                 * special device nodes */
+                GOTO(cleanup_no_trans, rc = 0);
+        }
+
         /* Step 5: mds_open it */
         rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec,
                              rep, &parent_lockh);
index 3905d1f..43e1ba3 100644 (file)
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <linux/fs.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_ucache.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_fsfilt.h>
+#include <lustre_ucache.h>
 
 #include "mds_internal.h"
 
@@ -531,6 +531,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                 rc = mds_get_md(obd, inode, lmm, &lmm_size, need_lock);
                 if (rc < 0)
                         GOTO(cleanup, rc);
+                rc = 0;
 
                 handle = fsfilt_start_log(obd, inode, FSFILT_OP_SETATTR, NULL,
                                           le32_to_cpu(lmm->lmm_stripe_count));
@@ -1040,6 +1041,22 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id,
         RETURN(0);
 }
 
+static inline int res_eq(struct ldlm_res_id *res1, struct ldlm_res_id *res2)
+{
+        return !memcmp(res1, res2, sizeof(*res1));
+}
+
+static inline void
+try_to_aggregate_locks(struct ldlm_res_id *res1, ldlm_policy_data_t *p1,
+                        struct ldlm_res_id *res2, ldlm_policy_data_t *p2)
+{
+        if (!res_eq(res1, res2))
+                return;
+        /* XXX: any additional inodebits (to current LOOKUP and UPDATE)
+         * should be taken with great care here */
+        p1->l_inodebits.bits |= p2->l_inodebits.bits;
+}
+
 int enqueue_4ordered_locks(struct obd_device *obd,struct ldlm_res_id *p1_res_id,
                            struct lustre_handle *p1_lockh, int p1_lock_mode,
                            ldlm_policy_data_t *p1_policy,
@@ -1105,14 +1122,19 @@ int enqueue_4ordered_locks(struct obd_device *obd,struct ldlm_res_id *p1_res_id,
                 flags = 0;
                 if (res_id[i]->name[0] == 0)
                         break;
-                if (i != 0 &&
-                    memcmp(res_id[i], res_id[i-1], sizeof(*res_id[i])) == 0 &&
-                    (policies[i]->l_inodebits.bits &
-                     policies[i-1]->l_inodebits.bits)) {
+                if (i && res_eq(res_id[i], res_id[i-1])) {
                         memcpy(dlm_handles[i], dlm_handles[i-1],
                                sizeof(*(dlm_handles[i])));
                         ldlm_lock_addref(dlm_handles[i], lock_modes[i]);
                 } else {
+                        /* we need to enqueue locks with different inodebits
+                         * at once, because otherwise concurrent thread can
+                         * hit the windown between these two locks and we'll
+                         * get to deadlock. see bug 10360. note also, that it
+                         * is impossible to have >2 equal res. */
+                        if (i < 3)
+                                try_to_aggregate_locks(res_id[i], policies[i],
+                                                       res_id[i+1], policies[i+1]);
                         rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace,
                                               *res_id[i], LDLM_IBITS,
                                               policies[i],
@@ -1193,8 +1215,11 @@ static int mds_verify_child(struct obd_device *obd,
                 child_res_id->name[0] = dchild->d_inode->i_ino;
                 child_res_id->name[1] = dchild->d_inode->i_generation;
 
-                if (res_gt(parent_res_id, child_res_id, NULL, NULL) ||
-                    res_gt(maxres, child_res_id, NULL, NULL)) {
+                /* Make sure that we don't try to re-enqueue a lock on the
+                 * same resource if it happens that the source is renamed to
+                 * the target by another thread (bug 9974, thanks racer :-) */
+                if (!res_gt(child_res_id, parent_res_id, NULL, NULL) ||
+                    !res_gt(child_res_id, maxres, NULL, NULL)) {
                         CDEBUG(D_DLMTRACE, "relock "LPU64"<("LPU64"|"LPU64")\n",
                                child_res_id->name[0], parent_res_id->name[0],
                                maxres->name[0]);
@@ -1634,8 +1659,8 @@ cleanup:
         rc = mds_finish_transno(mds, dparent ? dparent->d_inode : NULL,
                                 handle, req, rc, 0);
         if (!rc)
-                (void)obd_set_info(mds->mds_osc_exp, strlen("unlinked"),
-                                   "unlinked", 0, NULL);
+                (void)obd_set_info_async(mds->mds_osc_exp, strlen("unlinked"),
+                                         "unlinked", 0, NULL, NULL);
         switch(cleanup_phase) {
         case 5: /* pending_dir semaphore */
                 UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode);
index 8d30235..ed4539b 100644 (file)
 #include <linux/version.h>
 
 #include <libcfs/list.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/lvfs.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_mds.h>
+#include <lustre_commit_confd.h>
+#include <lvfs.h>
 
 #include "mds_internal.h"
 
index 836f675..c3968ef 100644 (file)
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <linux/fs.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_ucache.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <obd.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_fsfilt.h>
+#include <lustre_ucache.h>
 
 #include "mds_internal.h"
 
@@ -199,6 +199,10 @@ out_ucred:
         return rc;
 }
 
+/*
+ * alwasy return 0, and set req->rq_status as error number in case
+ * of failures.
+ */
 static
 int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body)
 {
@@ -225,20 +229,11 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body)
 
         lockpart = MDS_INODELOCK_UPDATE;
 
-        de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_EX,
-                                   &lockh, lockpart);
-        if (IS_ERR(de))
-                GOTO(out, rc = PTR_ERR(de));
-
-        inode = de->d_inode;
-        LASSERT(inode);
-
-        OBD_FAIL_WRITE(OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb);
-
+        /* various sanity check for xattr name */
         xattr_name = lustre_msg_string(req->rq_reqmsg, 1, 0);
         if (!xattr_name) {
                 CERROR("can't extract xattr name\n");
-                GOTO(out_dput, rc = -EPROTO);
+                GOTO(out, rc = -EPROTO);
         }
 
         DEBUG_REQ(D_INODE, req, "%sxattr %s\n",
@@ -247,14 +242,27 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body)
 
         if (strncmp(xattr_name, "trusted.", 8) == 0) {
                 if (strcmp(xattr_name + 8, XATTR_LUSTRE_MDS_LOV_EA) == 0)
-                        GOTO(out_dput, rc = -EACCES);
+                        GOTO(out, rc = -EACCES);
         }
 
         if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) &&
             (strncmp(xattr_name, "user.", 5) == 0)) {
-                GOTO(out_dput, rc = -EOPNOTSUPP);
+                GOTO(out, rc = -EOPNOTSUPP);
         }
 
+        if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS))
+                lockpart |= MDS_INODELOCK_LOOKUP;
+
+        de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_EX,
+                                   &lockh, lockpart);
+        if (IS_ERR(de))
+                GOTO(out, rc = PTR_ERR(de));
+
+        inode = de->d_inode;
+        LASSERT(inode);
+
+        OBD_FAIL_WRITE(OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb);
+
         /* filter_op simply use setattr one */
         handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL);
         if (IS_ERR(handle))
index 21ae60d..11d7322 100644 (file)
 #include <linux/module.h>
 
 /* LUSTRE_VERSION_CODE */
-#include <linux/lustre_ver.h>
+#include <lustre_ver.h>
 /*
  * struct OBD_{ALLOC,FREE}*()
  * OBD_FAIL_CHECK
  */
-#include <linux/obd_support.h>
+#include <obd_support.h>
 /* struct ptlrpc_request */
-#include <linux/lustre_net.h>
+#include <lustre_net.h>
 /* struct obd_export */
-#include <linux/lustre_export.h>
+#include <lustre_export.h>
 /* struct obd_device */
-#include <linux/obd.h>
+#include <obd.h>
 /* lu2dt_dev() */
-#include <linux/dt_object.h>
-
-/*LUSTRE_POSIX_ACL_MAX_SIZE*/
-#include <linux/lustre_acl.h>
-
+#include <dt_object.h>
 /* struct mds_client_data */
 #include "../mds/mds_internal.h"
 #include "mdt_internal.h"
index 8546082..add79d5 100644 (file)
 /*
  * struct ptlrpc_client
  */
-#include <linux/lustre_net.h>
-#include <linux/obd.h>
+#include <lustre_net.h>
+#include <obd.h>
 /*
  * struct obd_connect_data
  * struct lustre_handle
  */
-#include <linux/lustre_idl.h>
-/* req_layout things */
-#include <linux/lustre_req_layout.h>
-
-#include <linux/md_object.h>
-#include <linux/lustre_fid.h>
+#include <lustre/lustre_idl.h>
+#include <md_object.h>
+#include <lustre_fid.h>
+#include <lustre_req_layout.h>
 
 struct mdt_device {
         /* super-class */
index e88e641..a56c781 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_disk.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
 
 
 int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id)
@@ -256,6 +256,8 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
         /* The mgc fs exclusion sem. Only one fs can be setup at a time. */
         down(&cli->cl_mgc_sem);
 
+        cleanup_group_info();
+
         obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd));
         if (IS_ERR(obd->obd_fsops)) {
                 up(&cli->cl_mgc_sem);
@@ -265,9 +267,6 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb,
         }
 
         cli->cl_mgc_vfsmnt = mnt;
-        // FIXME which is the right SB? - filter_common_setup also 
-        CDEBUG(D_MGC, "SB's: fill=%p mnt=%p == root=%p\n", sb, mnt->mnt_sb,
-               mnt->mnt_root->d_inode->i_sb);
         fsfilt_setup(obd, mnt->mnt_sb);
 
         OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
@@ -497,8 +496,8 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 
                 /* Re-enqueue the lock in a separate thread, because we must
                    return from this fn before that lock can be taken. */
-                rc = kernel_thread(mgc_async_requeue, data,
-                                   CLONE_VM | CLONE_FS);
+                rc = cfs_kernel_thread(mgc_async_requeue, data,
+                                       CLONE_VM | CLONE_FILES);
                 if (rc < 0) {
                         CERROR("Cannot re-enqueue thread: %d\n", rc);
                 } else {
@@ -667,8 +666,9 @@ static int mgc_target_register(struct obd_export *exp,
         RETURN(rc);
 }
 
-int mgc_set_info(struct obd_export *exp, obd_count keylen,
-                 void *key, obd_count vallen, void *val)
+int mgc_set_info_async(struct obd_export *exp, obd_count keylen,
+                       void *key, obd_count vallen, void *val, 
+                       struct ptlrpc_request_set *set)
 {
         struct obd_import *imp = class_exp2cliimp(exp);
         int rc = -EINVAL;
@@ -1091,7 +1091,7 @@ struct obd_ops mgc_obd_ops = {
         //.o_enqueue      = mgc_enqueue,
         .o_cancel       = mgc_cancel,
         //.o_iocontrol    = mgc_iocontrol,
-        .o_set_info     = mgc_set_info,
+        .o_set_info_async = mgc_set_info_async,
         .o_import_event = mgc_import_event,
         .o_llog_init    = mgc_llog_init,
         .o_llog_finish  = mgc_llog_finish,
index e1f91f6..d1ce512 100644 (file)
@@ -28,9 +28,9 @@
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <asm/statfs.h>
 #endif
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include "mgs_internal.h"
 
 #ifdef LPROCFS
index 8100bbe..ce489ae 100644 (file)
 #include <linux/kmod.h>
 #include <linux/version.h>
 #include <linux/sched.h>
-#include <linux/lustre_quota.h>
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <linux/mount.h>
 #endif
-#include <linux/obd_class.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_disk.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_commit_confd.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_disk.h>
+#include <lustre_lib.h>
+#include <lustre_fsfilt.h>
 #include <libcfs/list.h>
 #include "mgs_internal.h"
 
index 671529d..17411a6 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/lustre_disk.h>
-#include <linux/lustre_ver.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
+#include <lustre_fsfilt.h>
+#include <lustre_commit_confd.h>
+#include <lustre_disk.h>
+#include <lustre_ver.h>
 #include "mgs_internal.h"
 
 
@@ -275,7 +275,8 @@ static int mgs_cleanup(struct obd_device *obd)
         /* Free the namespace in it's own thread, so that if the
            ldlm_cancel_handler put the last mgs obd ref, we won't
            deadlock here. */
-        kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, CLONE_VM | CLONE_FS);
+        cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, 
+                          CLONE_VM | CLONE_FILES);
 
         lvfs_clear_rdonly(save_dev);
 
@@ -460,7 +461,6 @@ int mgs_handle(struct ptlrpc_request *req)
         switch (req->rq_reqmsg->opc) {
         case MGS_CONNECT:
                 DEBUG_REQ(D_MGS, req, "connect");
-                OBD_FAIL_RETURN(OBD_FAIL_MGS_CONNECT_NET, 0);
                 rc = target_handle_connect(req, mgs_handle);
                 if (!rc && (req->rq_reqmsg->conn_cnt > 1))
                         /* Make clients trying to reconnect after a MGS restart
@@ -470,7 +470,6 @@ int mgs_handle(struct ptlrpc_request *req)
                 break;
         case MGS_DISCONNECT:
                 DEBUG_REQ(D_MGS, req, "disconnect");
-                OBD_FAIL_RETURN(OBD_FAIL_MGS_DISCONNECT_NET, 0);
                 rc = target_handle_disconnect(req);
                 req->rq_status = rc;            /* superfluous? */
                 break;
@@ -485,7 +484,6 @@ int mgs_handle(struct ptlrpc_request *req)
 
         case LDLM_ENQUEUE:
                 DEBUG_REQ(D_MGS, req, "enqueue");
-                OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0);
                 rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast,
                                          ldlm_server_blocking_ast, NULL);
                 fail = OBD_FAIL_LDLM_REPLY;
@@ -495,7 +493,6 @@ int mgs_handle(struct ptlrpc_request *req)
                 DEBUG_REQ(D_MGS, req, "callback");
                 CERROR("callbacks should not happen on MGS\n");
                 LBUG();
-                OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0);
                 break;
 
         case OBD_PING:
@@ -504,33 +501,27 @@ int mgs_handle(struct ptlrpc_request *req)
                 break;
         case OBD_LOG_CANCEL:
                 DEBUG_REQ(D_MGS, req, "log cancel\n");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0);
                 rc = -ENOTSUPP; /* la la la */
                 break;
 
         case LLOG_ORIGIN_HANDLE_CREATE:
                 DEBUG_REQ(D_MGS, req, "llog_init");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0);
                 rc = llog_origin_handle_create(req);
                 break;
         case LLOG_ORIGIN_HANDLE_NEXT_BLOCK:
                 DEBUG_REQ(D_MGS, req, "llog next block");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0);
                 rc = llog_origin_handle_next_block(req);
                 break;
         case LLOG_ORIGIN_HANDLE_READ_HEADER:
                 DEBUG_REQ(D_MGS, req, "llog read header");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0);
                 rc = llog_origin_handle_read_header(req);
                 break;
         case LLOG_ORIGIN_HANDLE_CLOSE:
                 DEBUG_REQ(D_MGS, req, "llog close");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0);
                 rc = llog_origin_handle_close(req);
                 break;
         case LLOG_CATINFO:
                 DEBUG_REQ(D_MGS, req, "llog catinfo");
-                OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0);
                 rc = llog_catinfo(req);
                 break;
         default:
index 0bd90ee..688055c 100644 (file)
@@ -7,15 +7,13 @@
 
 #ifdef __KERNEL__
 # include <linux/fs.h>
-# include <linux/dcache.h>
 #endif
-#include <linux/lustre_handles.h>
 #include <libcfs/kp30.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_export.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre_dlm.h>
+#include <lustre_log.h>
+#include <lustre_export.h>
 
 
 /* MDS has o_t * 1000 */
index 9cba6f5..d398be6 100644 (file)
 #include <linux/fs.h>
 #endif
 
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
-#include <linux/obd_ost.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <obd_ost.h>
 #include <libcfs/list.h>
 #include <linux/lvfs.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_disk.h>
-#include <linux/lustre_param.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
 #include "mgs_internal.h"
 
 /********************** Class fns ********************/
diff --git a/lustre/obdclass/Info.plist b/lustre/obdclass/Info.plist
new file mode 100644 (file)
index 0000000..0b7e718
--- /dev/null
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>obdclass</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.obdclass</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string>
+        <key>OSBundleCompatibleVersion</key>
+        <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict>
+               <key>com.apple.kpi.bsd</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.mach</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.unsupported</key>
+               <string>8.0.0b1</string>
+                <key>com.clusterfs.lustre.libcfs</key>
+                <string>1.0.0</string>
+                <key>com.clusterfs.lustre.lvfs</key>
+                <string>1.0.0</string>
+       </dict>
+</dict>
+</plist>
index ea93343..8c50c93 100644 (file)
@@ -1,11 +1,32 @@
 MODULES := obdclass llog_test
 
-obdclass-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o
-obdclass-objs += class_obd.o lu_object.o dt_object.o
-obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o
-obdclass-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
-obdclass-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o mea.o prng.o
+obdclass-linux-objs := linux-module.o linux-obdo.o linux-sysctl.o
+ifeq ($(PATCHLEVEL),6)
+obdclass-linux-objs := $(addprefix linux/,$(obdclass-linux-objs))
+endif
+
+default: all
+
+ifeq (@linux25@,no)
+sources:
+       @for i in $(obdclass-linux-objs:%.o=%.c) ; do \
+               echo "ln -s @srcdir@/linux/$$i ." ; \
+               ln -sf @srcdir@/linux/$$i . || exit 1 ; \
+       done
+
+else
+sources:
+
+endif
+
+obdclass-all-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o
+obdclass-all-objs += class_obd.o
+obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o
+obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o
+obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o
+obdclass-all-objs += lu_object.o dt_object.o
 
+obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs)
 
 ifeq ($(PATCHLEVEL),6)
 llog_test-objs := llog-test.o
index 0951147..cc7b60a 100644 (file)
@@ -1,20 +1,55 @@
+SUBDIRS := linux
+if DARWIN
+SUBDIRS += darwin
+endif
+DIST_SUBDIRS := $(SUBDIRS)
+
 if LIBLUSTRE
 
 noinst_LIBRARIES = liblustreclass.a
 liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid.c 
 liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c
 liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c 
-liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c 
+liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c
 liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c
 liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\"
 liblustreclass_a_CFLAGS = $(LLCFLAGS)
 
 endif
 
+
 if MODULES
+
+if LINUX
 modulefs_DATA = obdclass$(KMODEXT)
 noinst_DATA = llog_test$(KMODEXT)
+endif # LINUX
+
+if DARWIN
+macos_PROGRAMS := obdclass
+
+obdclass_SOURCES := \
+        darwin/darwin-module.c darwin/darwin-sysctl.c          \
+        class_obd.c genops.c lprocfs_status.c                  \
+        lustre_handles.c lustre_peer.c obd_config.c            \
+        obdo.c debug.c llog_ioctl.c uuid.c prng.c               \
+        llog_swab.c llog_obd.c llog.c llog_cat.c llog_lvfs.c    \
+        mea.c lu_object.c dt_object.c
+
+obdclass_CFLAGS := $(EXTRA_KCFLAGS)
+obdclass_LDFLAGS := $(EXTRA_KLDFLAGS)
+obdclass_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif # DARWIN
+
 endif # MODULES
 
+install-data-hook: $(install_data_hook)
+
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@  llog-test.c
-DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h
+MOSTLYCLEANFILES += linux/*.o darwin/*.o
+DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-all-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h
index 1f9b1e1..49f935e 100644 (file)
 #ifndef EXPORT_SYMTAB
 # define EXPORT_SYMTAB
 #endif
-#ifdef __KERNEL__
-#include <linux/config.h> /* for CONFIG_PROC_FS */
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/sched.h>
-#include <linux/lp.h>
-#include <linux/slab.h>
-#include <linux/ioport.h>
-#include <linux/fcntl.h>
-#include <linux/delay.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/fs.h>
-#include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <asm/io.h>
-#include <asm/ioctls.h>
-#include <asm/system.h>
-#include <asm/poll.h>
-#include <asm/uaccess.h>
-#include <linux/miscdevice.h>
-#include <linux/smp_lock.h>
-#include <linux/seq_file.h>
-#else
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_debug.h>
-#include <linux/lprocfs_status.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_debug.h>
+#include <lprocfs_status.h>
 #ifdef __KERNEL__
 #include <linux/lustre_build_version.h>
-#include <linux/lustre_version.h>
 #endif
 #include <libcfs/list.h>
+#include <lustre_ver.h>
 #include "llog_internal.h"
 
 #ifndef __KERNEL__
@@ -86,9 +59,7 @@ atomic_t obd_memory;
 int obd_memmax;
 #endif
 
-int proc_version;
-
-/* The following are visible and mutable through /proc/fs/lustre/. */
+/* The following are visible and mutable through /proc/sys/lustre/. */
 unsigned int obd_fail_loc;
 unsigned int obd_dump_on_timeout;
 unsigned int obd_timeout = 100; /* seconds */
@@ -96,7 +67,7 @@ unsigned int ldlm_timeout = 20; /* seconds */
 unsigned int obd_health_check_timeout = 120; /* seconds */
 char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall  */
 
-DECLARE_WAIT_QUEUE_HEAD(obd_race_waitq);
+cfs_waitq_t obd_race_waitq;
 
 #ifdef __KERNEL__
 unsigned int obd_print_fail_loc(void)
@@ -111,7 +82,7 @@ void obd_set_fail_loc(unsigned int fl)
 }
 
 /*  opening /dev/obd */
-static int obd_class_open(struct inode * inode, struct file * file)
+static int obd_class_open(unsigned long flags, void *args)
 {
         ENTRY;
 
@@ -120,7 +91,7 @@ static int obd_class_open(struct inode * inode, struct file * file)
 }
 
 /*  closing /dev/obd */
-static int obd_class_release(struct inode * inode, struct file * file)
+static int obd_class_release(unsigned long flags, void *args)
 {
         ENTRY;
 
@@ -147,6 +118,7 @@ int class_resolve_dev_name(uint32_t len, char *name)
         int rc;
         int dev;
 
+        ENTRY;
         if (!len || !name) {
                 CERROR("No name passed,!\n");
                 GOTO(out, rc = -EINVAL);
@@ -179,14 +151,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
         int err = 0, len = 0;
         ENTRY;
 
-#ifdef __KERNEL__
-        if (current->fsuid != 0)
-                RETURN(err = -EACCES);
-#endif
-
-        if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */
-                RETURN(err = -ENOTTY);
-
         /* only for debugging */
         if (cmd == LIBCFS_IOC_DEBUG_MASK) {
                 debug_data = (struct libcfs_debug_ioctl_data*)arg;
@@ -210,16 +174,19 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                         CERROR("No config buffer passed!\n");
                         GOTO(out, err = -EINVAL);
                 }
-
-                err = lustre_cfg_sanity_check(data->ioc_pbuf1,
-                                              data->ioc_plen1);
-                if (err)
-                        GOTO(out, err);
-
                 OBD_ALLOC(lcfg, data->ioc_plen1);
                 err = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1);
-                if (!err)
-                        err = class_process_config(lcfg);
+                if (err) {
+                        OBD_FREE(lcfg, data->ioc_plen1);
+                        GOTO(out, err);
+                }
+                err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1);
+                if (err) {
+                        OBD_FREE(lcfg, data->ioc_plen1);
+                        GOTO(out, err);
+                }
+                err = class_process_config(lcfg);
+
                 OBD_FREE(lcfg, data->ioc_plen1);
                 GOTO(out, err);
         }
@@ -238,7 +205,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 memcpy(data->ioc_bulk, BUILD_VERSION,
                        strlen(BUILD_VERSION) + 1);
 
-                err = copy_to_user((void *)arg, data, len);
+                err = obd_ioctl_popdata((void *)arg, data, len);
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -255,7 +222,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (dev < 0)
                         GOTO(out, err = -EINVAL);
 
-                err = copy_to_user((void *)arg, data, sizeof(*data));
+                err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -289,19 +256,55 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 
                 CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1,
                        dev);
-                err = copy_to_user((void *)arg, data, sizeof(*data));
+                err = obd_ioctl_popdata((void *)arg, data, sizeof(*data));
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
         }
 
-
         case OBD_IOC_CLOSE_UUID: {
                 CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n",
                        data->ioc_inlbuf1);
                 GOTO(out, err = 0);
         }
 
+        case OBD_IOC_GETDEVICE: {
+                int     index = data->ioc_count;
+                char    *status, *str;
+
+                if (!data->ioc_inlbuf1) {
+                        CERROR("No buffer passed in ioctl\n");
+                        GOTO(out, err = -EINVAL);
+                } 
+                if (data->ioc_inllen1 < 128) {
+                        CERROR("ioctl buffer too small to hold version\n");
+                        GOTO(out, err = -EINVAL);
+                }
+                                
+                if (index >= MAX_OBD_DEVICES)
+                        GOTO(out, err = -ENOENT);
+                obd = &obd_dev[index];
+                if (!obd->obd_type)
+                        GOTO(out, err = -ENOENT);
+                
+                if (obd->obd_stopping)
+                        status = "ST";
+                else if (obd->obd_set_up)
+                        status = "UP";
+                else if (obd->obd_attached)
+                        status = "AT";
+                else
+                        status = "--"; 
+                str = (char *)data->ioc_bulk;
+                snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d",
+                         (int)index, status, obd->obd_type->typ_name,
+                         obd->obd_name, obd->obd_uuid.uuid,
+                         atomic_read(&obd->obd_refcount));
+                err = obd_ioctl_popdata((void *)arg, data, len);
+
+                GOTO(out, err = 0);
+        }
+
         }
 
         if (data->ioc_dev >= MAX_OBD_DEVICES) {
@@ -331,7 +334,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 if (err)
                         GOTO(out, err);
 
-                err = copy_to_user((void *)arg, data, len);
+                err = obd_ioctl_popdata((void *)arg, data, len);
                 if (err)
                         err = -EFAULT;
                 GOTO(out, err);
@@ -349,26 +352,21 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 #define OBD_MINOR 241
 #ifdef __KERNEL__
 /* to control /dev/obd */
-static int obd_class_ioctl(struct inode *inode, struct file *filp,
-                           unsigned int cmd, unsigned long arg)
+static int obd_class_ioctl (struct cfs_psdev_file *pfile, unsigned long cmd, void *arg)
 {
-        return class_handle_ioctl(cmd, arg);
+        return class_handle_ioctl(cmd, (unsigned long)arg);
 }
 
 /* declare character device */
-static struct file_operations obd_psdev_fops = {
-        .owner   = THIS_MODULE,
-        .ioctl   = obd_class_ioctl,     /* ioctl */
-        .open    = obd_class_open,      /* open */
-        .release = obd_class_release,   /* release */
+struct cfs_psdev_ops obd_psdev_ops = {
+        /* .p_open    = */ obd_class_open,      /* open */
+        /* .p_close   = */ obd_class_release,   /* release */
+        /* .p_read    = */ NULL,
+        /* .p_write   = */ NULL,
+        /* .p_ioctl   = */ obd_class_ioctl     /* ioctl */
 };
 
-/* modules setup */
-static struct miscdevice obd_psdev = {
-        .minor = OBD_MINOR,
-        .name  = "obd",
-        .fops  = &obd_psdev_fops,
-};
+extern cfs_psdev_t obd_psdev;
 #else
 void *obd_psdev = NULL;
 #endif
@@ -384,7 +382,6 @@ EXPORT_SYMBOL(obd_health_check_timeout);
 EXPORT_SYMBOL(obd_lustre_upcall);
 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
 
-struct proc_dir_entry *proc_lustre_root;
 EXPORT_SYMBOL(proc_lustre_root);
 
 EXPORT_SYMBOL(class_register_type);
@@ -434,183 +431,6 @@ EXPORT_SYMBOL(class_manual_cleanup);
 EXPORT_SYMBOL(mea_name2idx);
 EXPORT_SYMBOL(raw_name2idx);
 
-#ifdef LPROCFS
-int obd_proc_read_version(char *page, char **start, off_t off, int count,
-                          int *eof, void *data)
-{
-        *eof = 1;
-        return snprintf(page, count, "%s\n", BUILD_VERSION);
-}
-
-int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count,
-                                 int *eof, void *data)
-{
-        *eof = 1;
-        return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION);
-}
-
-int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
-                         int *eof, void *data)
-{
-        *eof = 1;
-        return snprintf(page, count, "%s\n",
-#ifdef ENABLE_PINGER
-                        "on"
-#else
-                        "off"
-#endif
-                       );
-}
-
-static int obd_proc_read_health(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
-{
-        int rc = 0, i;
-        *eof = 1;
-
-        if (libcfs_catastrophe)
-                rc += snprintf(page + rc, count - rc, "LBUG\n");
-
-        spin_lock(&obd_dev_lock);
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd;
-
-                obd = &obd_dev[i];
-                if (obd->obd_type == NULL)
-                        continue;
-
-                if (obd->obd_stopping)
-                        continue;
-
-                class_incref(obd);
-                spin_unlock(&obd_dev_lock);
-
-                if (obd_health_check(obd)) {
-                        rc += snprintf(page + rc, count - rc,
-                                       "device %s reported unhealthy\n",
-                                       obd->obd_name);
-                }
-                class_decref(obd);
-                spin_lock(&obd_dev_lock);
-        }
-        spin_unlock(&obd_dev_lock);
-
-        if (rc == 0)
-                return snprintf(page, count, "healthy\n");
-
-        rc += snprintf(page + rc, count - rc, "NOT HEALTHY\n");
-        return rc;
-}
-
-static int obd_proc_rd_health_timeout(char *page, char **start, off_t off,
-                                      int count, int *eof, void *data)
-{
-        *eof = 1;
-        return snprintf(page, count, "%d\n", obd_health_check_timeout);
-}
-
-static int obd_proc_wr_health_timeout(struct file *file, const char *buffer,
-                                      unsigned long count, void *data)
-{
-        int val, rc;
-
-        rc = lprocfs_write_helper(buffer, count, &val);
-        if (rc)
-                return rc;
-
-        obd_health_check_timeout = val;
-
-        return count;
-}
-
-/* Root for /proc/fs/lustre */
-struct lprocfs_vars lprocfs_base[] = {
-        { "version", obd_proc_read_version, NULL, NULL },
-        { "kernel_version", obd_proc_read_kernel_version, NULL, NULL },
-        { "pinger", obd_proc_read_pinger, NULL, NULL },
-        { "health_check", obd_proc_read_health, NULL, NULL },
-        { "health_check_timeout", obd_proc_rd_health_timeout,
-          obd_proc_wr_health_timeout, NULL },        
-        { 0 }
-};
-#else
-#define lprocfs_base NULL
-#endif /* LPROCFS */
-
-#ifdef __KERNEL__
-static void *obd_device_list_seq_start(struct seq_file *p, loff_t*pos)
-{
-        if (*pos >= MAX_OBD_DEVICES)
-                return NULL;
-        return &obd_dev[*pos];
-}
-
-static void obd_device_list_seq_stop(struct seq_file *p, void *v)
-{
-}
-
-static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
-{
-        ++*pos;
-        if (*pos >= MAX_OBD_DEVICES)
-                return NULL;
-        return &obd_dev[*pos];
-}
-
-static int obd_device_list_seq_show(struct seq_file *p, void *v)
-{
-        struct obd_device *obd = (struct obd_device *)v;
-        int index = obd - &obd_dev[0];
-        char *status;
-
-        if (!obd->obd_type)
-                return 0;
-        if (obd->obd_stopping)
-                status = "ST";
-        else if (obd->obd_set_up)
-                status = "UP";
-        else if (obd->obd_attached)
-                status = "AT";
-        else
-                status = "--";
-
-        return seq_printf(p, "%3d %s %s %s %s %d\n",
-                          (int)index, status, obd->obd_type->typ_name,
-                          obd->obd_name, obd->obd_uuid.uuid,
-                          atomic_read(&obd->obd_refcount));
-}
-
-struct seq_operations obd_device_list_sops = {
-        .start = obd_device_list_seq_start,
-        .stop = obd_device_list_seq_stop,
-        .next = obd_device_list_seq_next,
-        .show = obd_device_list_seq_show,
-};
-
-static int obd_device_list_open(struct inode *inode, struct file *file)
-{
-        struct proc_dir_entry *dp = PDE(inode);
-        struct seq_file *seq;
-        int rc = seq_open(file, &obd_device_list_sops);
-
-        if (rc)
-                return rc;
-
-        seq = file->private_data;
-        seq->private = dp->data;
-
-        return 0;
-}
-
-struct file_operations obd_device_list_fops = {
-        .owner   = THIS_MODULE,
-        .open    = obd_device_list_open,
-        .read    = seq_read,
-        .llseek  = seq_lseek,
-        .release = seq_release,
-};
-#endif
-
 #define OBD_INIT_CHECK
 #ifdef OBD_INIT_CHECK
 int obd_init_checks(void)
@@ -673,8 +493,8 @@ int obd_init_checks(void)
                 CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len);
                 ret = -EINVAL;
         }
-        if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) {
-                CWARN("mask failed: u64val "LPU64" >= %lu\n", u64val,PAGE_SIZE);
+        if ((u64val & ~CFS_PAGE_MASK) >= CFS_PAGE_SIZE) {
+                CWARN("mask failed: u64val "LPU64" >= %lu\n", u64val,CFS_PAGE_SIZE);
                 ret = -EINVAL;
         }
 
@@ -684,21 +504,22 @@ int obd_init_checks(void)
 #define obd_init_checks() do {} while(0)
 #endif
 
+extern spinlock_t obd_types_lock;
+extern spinlock_t handle_lock;
+extern int class_procfs_init(void);
+extern int class_procfs_clean(void);
+
 #ifdef __KERNEL__
 static int __init init_obdclass(void)
 #else
 int init_obdclass(void)
 #endif
 {
+        int i, err;
         struct obd_device *obd;
 #ifdef __KERNEL__
-        struct proc_dir_entry *entry;
         int lustre_register_fs(void);
-#endif
-        int err;
-        int i;
 
-#ifdef __KERNEL__
         printk(KERN_INFO "Lustre: OBD class driver Build Version: "
                BUILD_VERSION", info@clusterfs.com\n");
 #else
@@ -706,6 +527,10 @@ int init_obdclass(void)
                BUILD_VERSION", info@clusterfs.com\n");
 #endif
 
+        spin_lock_init(&obd_types_lock);
+        spin_lock_init(&handle_lock);
+        cfs_waitq_init(&obd_race_waitq);
+
         err = obd_init_checks();
         if (err == -EOVERFLOW)
                 return err;
@@ -718,7 +543,7 @@ int init_obdclass(void)
         spin_lock_init(&obd_dev_lock);
         INIT_LIST_HEAD(&obd_types);
 
-        err = misc_register(&obd_psdev);
+        err = cfs_psdev_register(&obd_psdev);
         if (err) {
                 CERROR("cannot register %d err %d\n", OBD_MINOR, err);
                 return err;
@@ -731,28 +556,12 @@ int init_obdclass(void)
         err = obd_init_caches();
         if (err)
                 return err;
-
 #ifdef __KERNEL__
-        obd_sysctl_init();
-
-        proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
-        if (!proc_lustre_root) {
-                printk(KERN_ERR
-                       "LustreError: error registering /proc/fs/lustre\n");
-                RETURN(-ENOMEM);
-        }
-        proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL);
-        entry = create_proc_entry("devices", 0444, proc_lustre_root);
-        if (entry == NULL) {
-                CERROR("error registering /proc/fs/lustre/devices\n");
-                lprocfs_remove(proc_lustre_root);
-                RETURN(-ENOMEM);
-        }
-        entry->proc_fops = &obd_device_list_fops;
-
+        err = class_procfs_init();
         lustre_register_fs();
 #endif
-        return 0;
+
+        return err;
 }
 
 /* liblustre doesn't call cleanup_obdclass, apparently.  we carry on in this
@@ -766,7 +575,7 @@ static void cleanup_obdclass(void)
 
         lustre_unregister_fs();
 
-        misc_deregister(&obd_psdev);
+        cfs_psdev_deregister(&obd_psdev);
         for (i = 0; i < MAX_OBD_DEVICES; i++) {
                 struct obd_device *obd = &obd_dev[i];
                 if (obd->obd_type && obd->obd_set_up &&
@@ -779,32 +588,16 @@ static void cleanup_obdclass(void)
         obd_cleanup_caches();
         obd_sysctl_clean();
 
-        if (proc_lustre_root) {
-                lprocfs_remove(proc_lustre_root);
-                proc_lustre_root = NULL;
-        }
+        class_procfs_clean();
 
         class_handle_cleanup();
         class_exit_uuidlist();
         EXIT;
 }
 
-
-/* Check that we're building against the appropriate version of the Lustre
- * kernel patch */
-#include <linux/lustre_version.h>
-#define LUSTRE_MIN_VERSION 37
-#define LUSTRE_MAX_VERSION 47
-#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
-# error Cannot continue: Your Lustre kernel patch is older than the sources
-#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
-# error Cannot continue: Your Lustre sources are older than the kernel patch
-#endif
-
 MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION);
 MODULE_LICENSE("GPL");
 
-module_init(init_obdclass);
-module_exit(cleanup_obdclass);
+cfs_module(obdclass, "1.0.0", init_obdclass, cleanup_obdclass);
 #endif
diff --git a/lustre/obdclass/darwin/Makefile.am b/lustre/obdclass/darwin/Makefile.am
new file mode 100644 (file)
index 0000000..75ba623
--- /dev/null
@@ -0,0 +1,3 @@
+EXTRA_DIST :=          \
+       darwin-module.c \
+       darwin-sysctl.c
diff --git a/lustre/obdclass/darwin/darwin-module.c b/lustre/obdclass/darwin/darwin-module.c
new file mode 100644 (file)
index 0000000..287d942
--- /dev/null
@@ -0,0 +1,181 @@
+#define DEBUG_SUBSYSTEM S_CLASS
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+
+#include <libcfs/libcfs.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+
+#ifndef OBD_MAX_IOCTL_BUFFER
+#define OBD_MAX_IOCTL_BUFFER 8192
+#endif
+
+/* buffer MUST be at least the size of obd_ioctl_hdr */
+int obd_ioctl_getdata(char **buf, int *len, void *arg)
+{
+        struct obd_ioctl_hdr *hdr;
+        struct obd_ioctl_data *data;
+        int err = 0;
+        int offset = 0;
+        ENTRY;
+
+       hdr = (struct obd_ioctl_hdr *)arg;
+        if (hdr->ioc_version != OBD_IOCTL_VERSION) {
+                CERROR("Version mismatch kernel vs application\n");
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len > OBD_MAX_IOCTL_BUFFER) {
+                CERROR("User buffer len %d exceeds %d max buffer\n",
+                       hdr->ioc_len, OBD_MAX_IOCTL_BUFFER);
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len < sizeof(struct obd_ioctl_data)) {
+                CERROR("OBD: user buffer too small for ioctl (%d)\n", hdr->ioc_len);
+                RETURN(-EINVAL);
+        }
+
+        /* XXX allocate this more intelligently, using kmalloc when
+         * appropriate */
+        OBD_VMALLOC(*buf, hdr->ioc_len);
+        if (*buf == NULL) {
+                CERROR("Cannot allocate control buffer of len %d\n",
+                       hdr->ioc_len);
+                RETURN(-EINVAL);
+        }
+        *len = hdr->ioc_len;
+        data = (struct obd_ioctl_data *)*buf;
+
+       bzero(data, hdr->ioc_len);
+       memcpy(data, (void *)arg, sizeof(struct obd_ioctl_data));
+       if (data->ioc_inlbuf1)
+               err = copy_from_user(&data->ioc_bulk[0], (void *)data->ioc_inlbuf1,
+                                    hdr->ioc_len - ((void *)&data->ioc_bulk[0] - (void *)data));
+
+        if (obd_ioctl_is_invalid(data)) {
+                CERROR("ioctl not correctly formatted\n");
+                return -EINVAL;
+        }
+
+        if (data->ioc_inllen1) {
+                data->ioc_inlbuf1 = &data->ioc_bulk[0];
+                offset += size_round(data->ioc_inllen1);
+        }
+
+        if (data->ioc_inllen2) {
+                data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
+                offset += size_round(data->ioc_inllen2);
+        }
+
+        if (data->ioc_inllen3) {
+                data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
+                offset += size_round(data->ioc_inllen3);
+        }
+
+        if (data->ioc_inllen4) {
+                data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
+        }
+
+        EXIT;
+        return 0;
+}
+
+int obd_ioctl_popdata(void *arg, void *data, int len)
+{
+       /* 
+        * Xnu ioctl copyout(uaddr, arg, sizeof(struct obd_ioctl_data)),
+        * we have to copyout data exceed sizeof(struct obd_ioctl_data)
+        * by ourself.
+        */
+       if (len <= sizeof(struct obd_ioctl_data)) {
+               memcpy(arg, data, len);
+               return 0;
+       } else {
+               int err;
+               struct obd_ioctl_data *u = (struct obd_ioctl_data *)arg;
+               struct obd_ioctl_data *k = (struct obd_ioctl_data *)data;
+               err = copy_to_user((void *)u->ioc_inlbuf1, &k->ioc_bulk[0],
+                                   len -((void *)&k->ioc_bulk[0] -(void *)k));
+               memcpy(arg, data, sizeof(struct obd_ioctl_data));
+               return err;
+       }
+}
+/*
+ * cfs pseudo device
+ */
+extern struct cfs_psdev_ops          obd_psdev_ops;
+
+static int
+obd_class_open(dev_t dev, int flags, int devtype, struct proc *p)
+{
+       if (obd_psdev_ops.p_open != NULL)
+               return -obd_psdev_ops.p_open(0, NULL);
+       return EPERM;
+}
+
+/*  closing /dev/obd */
+static int
+obd_class_release(dev_t dev, int flags, int mode, struct proc *p)
+{
+       if (obd_psdev_ops.p_close != NULL)
+               return -obd_psdev_ops.p_close(0, NULL);
+       return EPERM;
+}
+
+static int
+obd_class_ioctl(dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
+{
+       int err = 0;
+       ENTRY;
+
+       if (!is_suser())
+               RETURN (EPERM);
+       if (obd_psdev_ops.p_ioctl != NULL)
+               err = -obd_psdev_ops.p_ioctl(NULL, cmd, (void *)arg);
+       else
+               err = EPERM;
+
+       RETURN(err);
+}
+
+static struct cdevsw obd_psdevsw = {
+       obd_class_open,
+       obd_class_release,
+       NULL,
+       NULL,
+       obd_class_ioctl,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+};
+
+cfs_psdev_t obd_psdev = {
+       -1,
+       NULL,
+       "obd",
+       &obd_psdevsw
+};
+
+int class_procfs_init(void)
+{
+       return 0;
+}
+
+int class_procfs_clean(void)
+{
+       return 0;
+}
diff --git a/lustre/obdclass/darwin/darwin-sysctl.c b/lustre/obdclass/darwin/darwin-sysctl.c
new file mode 100644 (file)
index 0000000..59b7e45
--- /dev/null
@@ -0,0 +1,154 @@
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/unistd.h>
+#include <mach/mach_types.h>
+#include <linux/lustre_build_version.h>
+
+#define DEBUG_SUBSYSTEM S_CLASS
+                                                                                                                                                                     
+#include <libcfs/libcfs.h>
+#ifndef BUILD_VERSION  
+#define BUILD_VERSION          "Unknown"
+#endif
+#ifndef LUSTRE_KERNEL_VERSION
+#define LUSTRE_KERNEL_VERSION  "Unknown Darwin version"
+#endif
+
+cfs_sysctl_table_header_t *obd_table_header = NULL;
+
+int proc_fail_loc SYSCTL_HANDLER_ARGS;
+int proc_obd_timeout SYSCTL_HANDLER_ARGS;
+extern unsigned int obd_fail_loc;
+extern unsigned int obd_dump_on_timeout;
+extern unsigned int obd_timeout;
+extern unsigned int ldlm_timeout;
+extern char obd_lustre_upcall[128];
+extern unsigned int obd_sync_filter;
+extern atomic_t obd_memory;
+
+int read_build_version SYSCTL_HANDLER_ARGS;
+int read_lustre_kernel_version SYSCTL_HANDLER_ARGS;
+
+SYSCTL_NODE (,                  OID_AUTO,       lustre,            CTLFLAG_RW,
+            0,                 "lustre sysctl top");
+SYSCTL_PROC(_lustre,           OID_AUTO,       fail_loc, 
+           CTLTYPE_INT | CTLFLAG_RW ,          &obd_fail_loc,
+           0,          &proc_fail_loc,         "I",    "obd_fail_loc");
+SYSCTL_PROC(_lustre,           OID_AUTO,       timeout, 
+           CTLTYPE_INT | CTLFLAG_RW ,          &obd_timeout,
+           0,          &proc_obd_timeout,      "I",    "obd_timeout");
+SYSCTL_PROC(_lustre,           OID_AUTO,       build_version, 
+           CTLTYPE_STRING | CTLFLAG_RD ,       NULL,
+           0,          &read_build_version,    "A",    "lustre_build_version");
+SYSCTL_PROC(_lustre,           OID_AUTO,       lustre_kernel_version,
+           CTLTYPE_STRING | CTLFLAG_RD ,       NULL,
+           0,          &read_lustre_kernel_version,    "A",    "lustre_build_version");
+SYSCTL_INT(_lustre,            OID_AUTO,       dump_on_timeout, 
+          CTLTYPE_INT | CTLFLAG_RW,            &obd_dump_on_timeout,
+          0,           "lustre_dump_on_timeout");
+SYSCTL_STRING(_lustre,         OID_AUTO,       upcall, 
+          CTLTYPE_STRING | CTLFLAG_RW,         obd_lustre_upcall,
+          128,         "lustre_upcall");
+SYSCTL_INT(_lustre,            OID_AUTO,       memused, 
+          CTLTYPE_INT | CTLFLAG_RW,            (int *)&obd_memory.counter,
+          0,           "lustre_memory_used");
+SYSCTL_INT(_lustre,            OID_AUTO,       ldlm_timeout, 
+          CTLTYPE_INT | CTLFLAG_RW,            &ldlm_timeout,
+          0,           "ldlm_timeout");
+
+static cfs_sysctl_table_t      parent_table[] = {
+       &sysctl__lustre,
+       &sysctl__lustre_fail_loc,
+       &sysctl__lustre_timeout,
+       &sysctl__lustre_dump_on_timeout,
+       &sysctl__lustre_upcall,
+       &sysctl__lustre_memused,
+       &sysctl__lustre_filter_sync_on_commit,
+       &sysctl__lustre_ldlm_timeout,
+};
+
+extern cfs_waitq_t obd_race_waitq;
+
+int proc_fail_loc SYSCTL_HANDLER_ARGS
+{ 
+       int error = 0; 
+       int old_fail_loc = obd_fail_loc;
+       
+       error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 
+       if (!error && req->newptr != USER_ADDR_NULL) {
+               if (old_fail_loc != obd_fail_loc) 
+                       cfs_waitq_signal(&obd_race_waitq);
+       } else  if (req->newptr != USER_ADDR_NULL) { 
+               /* Something was wrong with the write request */ 
+               printf ("sysctl fail loc fault: %d.\n", error);
+       } else { 
+               /* Read request */ 
+               error = SYSCTL_OUT(req, &obd_fail_loc, sizeof obd_fail_loc);
+       }
+       return error;
+}
+
+int proc_obd_timeout SYSCTL_HANDLER_ARGS
+{ 
+       int error = 0;
+
+       error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 
+       if (!error && req->newptr != USER_ADDR_NULL) {
+               if (ldlm_timeout >= obd_timeout)
+                       ldlm_timeout = max(obd_timeout / 3, 1U);
+       } else  if (req->newptr != USER_ADDR_NULL) { 
+               printf ("sysctl fail obd_timeout: %d.\n", error);
+       } else {
+               /* Read request */ 
+               error = SYSCTL_OUT(req, &obd_timeout, sizeof obd_timeout);
+       }
+       return error;
+}
+
+int read_build_version SYSCTL_HANDLER_ARGS
+{
+       int error = 0;
+
+       error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 
+       if ( req->newptr != USER_ADDR_NULL) {
+               printf("sysctl read_build_version is read-only!\n");
+       } else {
+               error = SYSCTL_OUT(req, BUILD_VERSION, strlen(BUILD_VERSION));
+       }
+       return error;
+}
+
+int read_lustre_kernel_version SYSCTL_HANDLER_ARGS
+{
+       int error = 0;
+
+       error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 
+       if ( req->newptr != NULL) {
+               printf("sysctl lustre_kernel_version is read-only!\n");
+       } else {
+               error = SYSCTL_OUT(req, LUSTRE_KERNEL_VERSION, strlen(LUSTRE_KERNEL_VERSION));
+       }
+       return error;
+}
+
+void obd_sysctl_init (void)
+{
+#if 1 
+       if ( !obd_table_header ) 
+               obd_table_header = cfs_register_sysctl_table(parent_table, 0);
+#endif
+}
+                                                                                                                                                                     
+void obd_sysctl_clean (void)
+{
+#if 1 
+       if ( obd_table_header ) 
+               cfs_unregister_sysctl_table(obd_table_header); 
+       obd_table_header = NULL;
+#endif
+}
+
index 90c57e1..763606b 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_ost.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_debug.h>
-#include <linux/lustre_net.h>
+#include <obd_ost.h>
+#include <obd_support.h>
+#include <lustre_debug.h>
+#include <lustre_net.h>
 
 int dump_ioo(struct obd_ioobj *ioo)
 {
@@ -50,7 +50,7 @@ int dump_lniobuf(struct niobuf_local *nb)
 {
         CERROR("niobuf_local: offset="LPD64", len=%d, page=%p, rc=%d\n",
                nb->offset, nb->len, nb->page, nb->rc);
-        CERROR("nb->page: index = %ld\n", nb->page ? nb->page->index : -1);
+        CERROR("nb->page: index = %ld\n", nb->page ? cfs_page_index(nb->page) : -1);
 
         return -EINVAL;
 }
index a5a20e9..f5639d5 100644 (file)
@@ -32,7 +32,7 @@
 # define EXPORT_SYMTAB
 #endif
 
-#include <linux/dt_object.h>
+#include <dt_object.h>
 #include <libcfs/list.h>
 
 void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb)
index f77ee69..4930dc1 100644 (file)
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifdef __KERNEL__
-#include <linux/kmod.h>   /* for request_module() */
-#include <linux/module.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
-#include <linux/lustre_mds.h>
-#include <linux/obd_ost.h>
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lu_object.h>
+#include <obd_ost.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 
 extern struct list_head obd_types;
-static spinlock_t obd_types_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t obd_types_lock;
 
-kmem_cache_t *obdo_cachep = NULL;
+cfs_mem_cache_t *obdo_cachep = NULL;
 EXPORT_SYMBOL(obdo_cachep);
-kmem_cache_t *import_cachep = NULL;
+cfs_mem_cache_t *import_cachep = NULL;
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
@@ -411,15 +406,17 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
 
 void obd_cleanup_caches(void)
 {
+        int rc;
+
         ENTRY;
         if (obdo_cachep) {
-                LASSERTF(kmem_cache_destroy(obdo_cachep) == 0,
-                         "Cannot destory ll_obdo_cache\n");
+                rc = cfs_mem_cache_destroy(obdo_cachep);
+                LASSERTF(rc == 0, "Cannot destory ll_obdo_cache\n");
                 obdo_cachep = NULL;
         }
         if (import_cachep) {
-                LASSERTF(kmem_cache_destroy(import_cachep) == 0,
-                         "Cannot destory ll_import_cache\n");
+                rc = cfs_mem_cache_destroy(import_cachep);
+                LASSERTF(rc == 0, "Cannot destory ll_import_cache\n");
                 import_cachep = NULL;
         }
         EXIT;
@@ -430,15 +427,15 @@ int obd_init_caches(void)
         ENTRY;
 
         LASSERT(obdo_cachep == NULL);
-        obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo),
-                                        0, 0, NULL, NULL);
+        obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo),
+                                        0, 0);
         if (!obdo_cachep)
                 GOTO(out, -ENOMEM);
 
         LASSERT(import_cachep == NULL);
-        import_cachep = kmem_cache_create("ll_import_cache",
+        import_cachep = cfs_mem_cache_create("ll_import_cache",
                                           sizeof(struct obd_import),
-                                          0, 0, NULL, NULL);
+                                          0, 0);
         if (!import_cachep)
                 GOTO(out, -ENOMEM);
 
@@ -549,11 +546,11 @@ struct obd_export *class_new_export(struct obd_device *obd,
         export->exp_conn_cnt = 0;
         atomic_set(&export->exp_refcount, 2);
         export->exp_obd = obd;
-        INIT_LIST_HEAD(&export->exp_outstanding_replies);
+        CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
         /* XXX this should be in LDLM init */
-        INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks);
+        CFS_INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks);
 
-        INIT_LIST_HEAD(&export->exp_handle.h_link);
+        CFS_INIT_LIST_HEAD(&export->exp_handle.h_link);
         class_handle_hash(&export->exp_handle, export_handle_addref);
         export->exp_last_request_time = CURRENT_SECONDS;
         spin_lock_init(&export->exp_lock);
@@ -660,19 +657,19 @@ struct obd_import *class_new_import(struct obd_device *obd)
         if (imp == NULL)
                 return NULL;
 
-        INIT_LIST_HEAD(&imp->imp_replay_list);
-        INIT_LIST_HEAD(&imp->imp_sending_list);
-        INIT_LIST_HEAD(&imp->imp_delayed_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_replay_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_sending_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_delayed_list);
         spin_lock_init(&imp->imp_lock);
         imp->imp_state = LUSTRE_IMP_NEW;
         imp->imp_obd = class_incref(obd);
-        init_waitqueue_head(&imp->imp_recovery_waitq);
+        cfs_waitq_init(&imp->imp_recovery_waitq);
 
         atomic_set(&imp->imp_refcount, 2);
         atomic_set(&imp->imp_inflight, 0);
         atomic_set(&imp->imp_replay_inflight, 0);
-        INIT_LIST_HEAD(&imp->imp_conn_list);
-        INIT_LIST_HEAD(&imp->imp_handle.h_link);
+        CFS_INIT_LIST_HEAD(&imp->imp_conn_list);
+        CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link);
         class_handle_hash(&imp->imp_handle, import_handle_addref);
 
         return imp;
@@ -830,7 +827,7 @@ void class_disconnect_stale_exports(struct obd_device *obd)
         int cnt = 0;
         ENTRY;
 
-        INIT_LIST_HEAD(&work_list);
+        CFS_INIT_LIST_HEAD(&work_list);
         spin_lock(&obd->obd_dev_lock);
         list_for_each_safe(pos, n, &obd->obd_exports) {
                 exp = list_entry(pos, struct obd_export, exp_obd_chain);
@@ -862,8 +859,8 @@ int oig_init(struct obd_io_group **oig_out)
         oig->oig_rc = 0;
         oig->oig_pending = 0;
         atomic_set(&oig->oig_refcount, 1);
-        init_waitqueue_head(&oig->oig_waitq);
-        INIT_LIST_HEAD(&oig->oig_occ_list);
+        cfs_waitq_init(&oig->oig_waitq);
+        CFS_INIT_LIST_HEAD(&oig->oig_occ_list);
 
         *oig_out = oig;
         RETURN(0);
@@ -899,7 +896,7 @@ void oig_complete_one(struct obd_io_group *oig,
                       struct oig_callback_context *occ, int rc)
 {
         unsigned long flags;
-        wait_queue_head_t *wake = NULL;
+        cfs_waitq_t *wake = NULL;
         int old_rc;
 
         spin_lock_irqsave(&oig->oig_lock, flags);
@@ -920,7 +917,7 @@ void oig_complete_one(struct obd_io_group *oig,
                         "pending (racey)\n", oig, old_rc, oig->oig_rc, rc,
                         oig->oig_pending);
         if (wake)
-                wake_up(wake);
+                cfs_waitq_signal(wake);
         oig_release(oig);
 }
 EXPORT_SYMBOL(oig_complete_one);
diff --git a/lustre/obdclass/linux/Makefile.am b/lustre/obdclass/linux/Makefile.am
new file mode 100644 (file)
index 0000000..bf95892
--- /dev/null
@@ -0,0 +1,4 @@
+EXTRA_DIST :=                   \
+        linux-module.c         \
+       linux-sysctl.c          \
+       linux-obdo.c
diff --git a/lustre/obdclass/linux/linux-module.c b/lustre/obdclass/linux/linux-module.c
new file mode 100644 (file)
index 0000000..517035c
--- /dev/null
@@ -0,0 +1,441 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Object Devices Class Driver
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ */
+#define DEBUG_SUBSYSTEM S_CLASS
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifdef __KERNEL__
+#include <linux/config.h> /* for CONFIG_PROC_FS */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/lp.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/fcntl.h>
+#include <linux/delay.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <asm/io.h>
+#include <asm/ioctls.h>
+#include <asm/system.h>
+#include <asm/poll.h>
+#include <asm/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/smp_lock.h>
+#include <linux/seq_file.h>
+#else
+# include <liblustre.h>
+#endif
+
+#include <libcfs/libcfs.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#ifdef __KERNEL__
+#include <linux/lustre_build_version.h>
+#include <linux/lustre_version.h>
+
+int proc_version;
+
+/* buffer MUST be at least the size of obd_ioctl_hdr */
+int obd_ioctl_getdata(char **buf, int *len, void *arg)
+{
+        struct obd_ioctl_hdr hdr;
+        struct obd_ioctl_data *data;
+        int err;
+        int offset = 0;
+        ENTRY;
+
+        err = copy_from_user(&hdr, (void *)arg, sizeof(hdr));
+        if ( err ) 
+                RETURN(err);
+
+        if (hdr.ioc_version != OBD_IOCTL_VERSION) {
+                CERROR("Version mismatch kernel vs application\n");
+                RETURN(-EINVAL);
+        }
+
+        if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) {
+                CERROR("User buffer len %d exceeds %d max buffer\n",
+                       hdr.ioc_len, OBD_MAX_IOCTL_BUFFER);
+                RETURN(-EINVAL);
+        }
+
+        if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) {
+                CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len);
+                RETURN(-EINVAL);
+        }
+
+        /* XXX allocate this more intelligently, using kmalloc when
+         * appropriate */
+        OBD_VMALLOC(*buf, hdr.ioc_len);
+        if (*buf == NULL) {
+                CERROR("Cannot allocate control buffer of len %d\n",
+                       hdr.ioc_len);
+                RETURN(-EINVAL);
+        }
+        *len = hdr.ioc_len;
+        data = (struct obd_ioctl_data *)*buf;
+
+        err = copy_from_user(*buf, (void *)arg, hdr.ioc_len);
+        if ( err ) {
+                OBD_VFREE(*buf, hdr.ioc_len);
+                RETURN(err);
+        }
+
+        if (obd_ioctl_is_invalid(data)) {
+                CERROR("ioctl not correctly formatted\n");
+                OBD_VFREE(*buf, hdr.ioc_len);
+                RETURN(-EINVAL);
+        }
+
+        if (data->ioc_inllen1) {
+                data->ioc_inlbuf1 = &data->ioc_bulk[0];
+                offset += size_round(data->ioc_inllen1);
+        }
+
+        if (data->ioc_inllen2) {
+                data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset;
+                offset += size_round(data->ioc_inllen2);
+        }
+
+        if (data->ioc_inllen3) {
+                data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset;
+                offset += size_round(data->ioc_inllen3);
+        }
+
+        if (data->ioc_inllen4) {
+                data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset;
+        }
+
+        EXIT;
+        return 0;
+}
+
+int obd_ioctl_popdata(void *arg, void *data, int len)
+{
+        int err; 
+        
+        err = copy_to_user(arg, data, len);
+        if (err)
+                err = -EFAULT;
+        return err;
+}
+
+EXPORT_SYMBOL(obd_ioctl_getdata);
+EXPORT_SYMBOL(obd_ioctl_popdata);
+
+#define OBD_MINOR 241
+extern struct cfs_psdev_ops          obd_psdev_ops;
+
+/*  opening /dev/obd */
+static int obd_class_open(struct inode * inode, struct file * file)
+{
+       if (obd_psdev_ops.p_open != NULL)
+               return obd_psdev_ops.p_open(0, NULL);
+       return -EPERM;
+}
+
+/*  closing /dev/obd */
+static int obd_class_release(struct inode * inode, struct file * file)
+{
+       if (obd_psdev_ops.p_close != NULL)
+               return obd_psdev_ops.p_close(0, NULL);
+       return -EPERM;
+}
+
+/* to control /dev/obd */
+static int obd_class_ioctl(struct inode *inode, struct file *filp,
+                          unsigned int cmd, unsigned long arg)
+{
+       int err = 0;
+       ENTRY;
+
+       if (current->fsuid != 0)
+               RETURN(err = -EACCES);
+       if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */
+               RETURN(err = -ENOTTY);
+
+       if (obd_psdev_ops.p_ioctl != NULL)
+               err = obd_psdev_ops.p_ioctl(NULL, cmd, (void *)arg);
+       else
+               err = -EPERM;
+
+       RETURN(err);
+}
+
+/* declare character device */
+static struct file_operations obd_psdev_fops = {
+        .owner   = THIS_MODULE,
+        .ioctl   = obd_class_ioctl,     /* ioctl */
+        .open    = obd_class_open,      /* open */
+        .release = obd_class_release,   /* release */
+};
+
+/* modules setup */
+cfs_psdev_t obd_psdev = {
+        .minor = OBD_MINOR,
+        .name  = "obd_psdev",
+        .fops  = &obd_psdev_fops,
+};
+
+#endif
+
+#ifdef LPROCFS
+int obd_proc_read_version(char *page, char **start, off_t off, int count,
+                          int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, "%s\n", BUILD_VERSION);
+}
+
+int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count,
+                                 int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION);
+}
+
+int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
+                         int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, "%s\n",
+#ifdef ENABLE_PINGER
+                        "on"
+#else
+                        "off"
+#endif
+                       );
+}
+
+static int obd_proc_read_health(char *page, char **start, off_t off,
+                                int count, int *eof, void *data)
+{
+        int rc = 0, i;
+        *eof = 1;
+
+        if (libcfs_catastrophe)
+                rc += snprintf(page + rc, count - rc, "LBUG\n");
+
+        spin_lock(&obd_dev_lock);
+        for (i = 0; i < MAX_OBD_DEVICES; i++) {
+                struct obd_device *obd;
+
+                obd = &obd_dev[i];
+                if (obd->obd_type == NULL)
+                        continue;
+
+                class_incref(obd);
+                spin_unlock(&obd_dev_lock);
+
+                if (obd_health_check(obd)) {
+                        rc += snprintf(page + rc, count - rc,
+                                       "device %s reported unhealthy\n",
+                                       obd->obd_name);
+                }
+                class_decref(obd);
+                spin_lock(&obd_dev_lock);
+        }
+        spin_unlock(&obd_dev_lock);
+
+        if (rc == 0)
+                return snprintf(page, count, "healthy\n");
+
+        rc += snprintf(page + rc, count - rc, "NOT HEALTHY\n");
+        return rc;
+}
+
+static int obd_proc_rd_health_timeout(char *page, char **start, off_t off,
+                                      int count, int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, "%d\n", obd_health_check_timeout);
+}
+
+static int obd_proc_wr_health_timeout(struct file *file, const char *buffer,
+                                      unsigned long count, void *data)
+{
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        obd_health_check_timeout = val;
+
+        return count;
+}
+
+/* Root for /proc/fs/lustre */
+struct proc_dir_entry *proc_lustre_root = NULL;
+
+struct lprocfs_vars lprocfs_base[] = {
+        { "version", obd_proc_read_version, NULL, NULL },
+        { "kernel_version", obd_proc_read_kernel_version, NULL, NULL },
+        { "pinger", obd_proc_read_pinger, NULL, NULL },
+        { "health_check", obd_proc_read_health, NULL, NULL },
+        { "health_check_timeout", obd_proc_rd_health_timeout,
+           obd_proc_wr_health_timeout, NULL },
+        { 0 }
+};
+#else
+#define lprocfs_base NULL
+#endif /* LPROCFS */
+
+#ifdef __KERNEL__
+static void *obd_device_list_seq_start(struct seq_file *p, loff_t*pos)
+{
+        if (*pos >= MAX_OBD_DEVICES)
+                return NULL;
+        return &obd_dev[*pos];
+}
+
+static void obd_device_list_seq_stop(struct seq_file *p, void *v)
+{
+}
+
+static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
+{
+        ++*pos;
+        if (*pos >= MAX_OBD_DEVICES)
+                return NULL;
+        return &obd_dev[*pos];
+}
+
+static int obd_device_list_seq_show(struct seq_file *p, void *v)
+{
+        struct obd_device *obd = (struct obd_device *)v;
+        int index = obd - &obd_dev[0];
+        char *status;
+
+        if (!obd->obd_type)
+                return 0;
+        if (obd->obd_stopping)
+                status = "ST";
+        else if (obd->obd_set_up)
+                status = "UP";
+        else if (obd->obd_attached)
+                status = "AT";
+        else
+                status = "--";
+
+        return seq_printf(p, "%3d %s %s %s %s %d\n",
+                          (int)index, status, obd->obd_type->typ_name,
+                          obd->obd_name, obd->obd_uuid.uuid,
+                          atomic_read(&obd->obd_refcount));
+}
+
+struct seq_operations obd_device_list_sops = {
+        .start = obd_device_list_seq_start,
+        .stop = obd_device_list_seq_stop,
+        .next = obd_device_list_seq_next,
+        .show = obd_device_list_seq_show,
+};
+
+static int obd_device_list_open(struct inode *inode, struct file *file)
+{
+        struct proc_dir_entry *dp = PDE(inode);
+        struct seq_file *seq;
+        int rc = seq_open(file, &obd_device_list_sops);
+
+        if (rc)
+                return rc;
+
+        seq = file->private_data;
+        seq->private = dp->data;
+
+        return 0;
+}
+
+struct file_operations obd_device_list_fops = {
+        .owner   = THIS_MODULE,
+        .open    = obd_device_list_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release,
+};
+#endif
+
+int class_procfs_init(void)
+{
+#ifdef __KERNEL__
+        struct proc_dir_entry *entry;
+        ENTRY;
+
+        obd_sysctl_init();
+        proc_lustre_root = proc_mkdir("lustre", proc_root_fs);
+        if (!proc_lustre_root) {
+                printk(KERN_ERR
+                       "LustreError: error registering /proc/fs/lustre\n");
+                RETURN(-ENOMEM);
+        }
+        proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL);
+        entry = create_proc_entry("devices", 0444, proc_lustre_root);
+        if (entry == NULL) {
+                CERROR("error registering /proc/fs/lustre/devices\n");
+                lprocfs_remove(proc_lustre_root);
+                RETURN(-ENOMEM);
+        }
+        entry->proc_fops = &obd_device_list_fops;
+#else
+        ENTRY;
+#endif
+        RETURN(0);
+}
+
+#ifdef __KERNEL__
+int class_procfs_clean(void)
+{
+        ENTRY;
+        if (proc_lustre_root) {
+                lprocfs_remove(proc_lustre_root);
+                proc_lustre_root = NULL;
+        }
+        RETURN(0);
+}
+
+
+/* Check that we're building against the appropriate version of the Lustre
+ * kernel patch */
+#include <linux/lustre_version.h>
+#define LUSTRE_MIN_VERSION 37
+#define LUSTRE_MAX_VERSION 47
+#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION)
+# error Cannot continue: Your Lustre kernel patch is older than the sources
+#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION)
+# error Cannot continue: Your Lustre sources are older than the kernel patch
+#endif
+#endif
diff --git a/lustre/obdclass/linux/linux-obdo.c b/lustre/obdclass/linux/linux-obdo.c
new file mode 100644 (file)
index 0000000..b5db22d
--- /dev/null
@@ -0,0 +1,268 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Object Devices Class Driver
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of the Lustre file system, http://www.lustre.org
+ *   Lustre is a trademark of Cluster File Systems, Inc.
+ *
+ *   You may have signed or agreed to another license before downloading
+ *   this software.  If so, you are bound by the terms and conditions
+ *   of that agreement, and the following does not apply to you.  See the
+ *   LICENSE file included with this distribution for more information.
+ *
+ *   If you did not agree to a different license, then this copy of Lustre
+ *   is open source software; you can redistribute it and/or modify it
+ *   under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   In either case, Lustre is distributed in the hope that it will be
+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   license text for more details.
+ *
+ * These are the only exported functions, they provide some generic
+ * infrastructure for managing object devices
+ */
+
+#define DEBUG_SUBSYSTEM S_CLASS
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifndef __KERNEL__
+#include <liblustre.h>
+#else
+#include <linux/module.h>
+#include <obd_class.h>
+#include <lustre/lustre_idl.h>
+#endif
+
+#ifdef __KERNEL__
+#include <linux/fs.h>
+#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
+
+void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid)
+{
+        if (ia_valid & ATTR_ATIME) {
+                oa->o_atime = LTIME_S(attr->ia_atime);
+                oa->o_valid |= OBD_MD_FLATIME;
+        }
+        if (ia_valid & ATTR_MTIME) {
+                oa->o_mtime = LTIME_S(attr->ia_mtime);
+                oa->o_valid |= OBD_MD_FLMTIME;
+        }
+        if (ia_valid & ATTR_CTIME) {
+                oa->o_ctime = LTIME_S(attr->ia_ctime);
+                oa->o_valid |= OBD_MD_FLCTIME;
+        }
+        if (ia_valid & ATTR_SIZE) {
+                oa->o_size = attr->ia_size;
+                oa->o_valid |= OBD_MD_FLSIZE;
+        }
+        if (ia_valid & ATTR_MODE) {
+                oa->o_mode = attr->ia_mode;
+                oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
+                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
+                        oa->o_mode &= ~S_ISGID;
+        }
+        if (ia_valid & ATTR_UID) {
+                oa->o_uid = attr->ia_uid;
+                oa->o_valid |= OBD_MD_FLUID;
+        }
+        if (ia_valid & ATTR_GID) {
+                oa->o_gid = attr->ia_gid;
+                oa->o_valid |= OBD_MD_FLGID;
+        }
+}
+EXPORT_SYMBOL(obdo_from_iattr);
+
+void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid)
+{
+        valid &= oa->o_valid;
+
+        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+                CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n",
+                       oa->o_valid, oa->o_mtime, oa->o_ctime);
+
+        attr->ia_valid = 0;
+        if (valid & OBD_MD_FLATIME) {
+                LTIME_S(attr->ia_atime) = oa->o_atime;
+                attr->ia_valid |= ATTR_ATIME;
+        }
+        if (valid & OBD_MD_FLMTIME) {
+                LTIME_S(attr->ia_mtime) = oa->o_mtime;
+                attr->ia_valid |= ATTR_MTIME;
+        }
+        if (valid & OBD_MD_FLCTIME) {
+                LTIME_S(attr->ia_ctime) = oa->o_ctime;
+                attr->ia_valid |= ATTR_CTIME;
+        }
+        if (valid & OBD_MD_FLSIZE) {
+                attr->ia_size = oa->o_size;
+                attr->ia_valid |= ATTR_SIZE;
+        }
+#if 0   /* you shouldn't be able to change a file's type with setattr */
+        if (valid & OBD_MD_FLTYPE) {
+                attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
+                attr->ia_valid |= ATTR_MODE;
+        }
+#endif
+        if (valid & OBD_MD_FLMODE) {
+                attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
+                attr->ia_valid |= ATTR_MODE;
+                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
+                        attr->ia_mode &= ~S_ISGID;
+        }
+        if (valid & OBD_MD_FLUID) {
+                attr->ia_uid = oa->o_uid;
+                attr->ia_valid |= ATTR_UID;
+        }
+        if (valid & OBD_MD_FLGID) {
+                attr->ia_gid = oa->o_gid;
+                attr->ia_valid |= ATTR_GID;
+        }
+}
+EXPORT_SYMBOL(iattr_from_obdo);
+
+/* WARNING: the file systems must take care not to tinker with
+   attributes they don't manage (such as blocks). */
+void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
+{
+        obd_flag newvalid = 0;
+
+        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+                CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
+                       valid, LTIME_S(src->i_mtime), 
+                       LTIME_S(src->i_ctime));
+
+        if (valid & OBD_MD_FLATIME) {
+                dst->o_atime = LTIME_S(src->i_atime);
+                newvalid |= OBD_MD_FLATIME;
+        }
+        if (valid & OBD_MD_FLMTIME) {
+                dst->o_mtime = LTIME_S(src->i_mtime);
+                newvalid |= OBD_MD_FLMTIME;
+        }
+        if (valid & OBD_MD_FLCTIME) {
+                dst->o_ctime = LTIME_S(src->i_ctime);
+                newvalid |= OBD_MD_FLCTIME;
+        }
+        if (valid & OBD_MD_FLSIZE) {
+                dst->o_size = src->i_size;
+                newvalid |= OBD_MD_FLSIZE;
+        }
+        if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
+                dst->o_blocks = src->i_blocks;
+                newvalid |= OBD_MD_FLBLOCKS;
+        }
+        if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
+                dst->o_blksize = src->i_blksize;
+                newvalid |= OBD_MD_FLBLKSZ;
+        }
+        if (valid & OBD_MD_FLTYPE) {
+                dst->o_mode = (dst->o_mode & S_IALLUGO)|(src->i_mode & S_IFMT);
+                newvalid |= OBD_MD_FLTYPE;
+        }
+        if (valid & OBD_MD_FLMODE) {
+                dst->o_mode = (dst->o_mode & S_IFMT)|(src->i_mode & S_IALLUGO);
+                newvalid |= OBD_MD_FLMODE;
+        }
+        if (valid & OBD_MD_FLUID) {
+                dst->o_uid = src->i_uid;
+                newvalid |= OBD_MD_FLUID;
+        }
+        if (valid & OBD_MD_FLGID) {
+                dst->o_gid = src->i_gid;
+                newvalid |= OBD_MD_FLGID;
+        }
+        if (valid & OBD_MD_FLFLAGS) {
+                dst->o_flags = src->i_flags;
+                newvalid |= OBD_MD_FLFLAGS;
+        }
+        if (valid & OBD_MD_FLGENER) {
+                dst->o_generation = src->i_generation;
+                newvalid |= OBD_MD_FLGENER;
+        }
+        if (valid & OBD_MD_FLFID) {
+                dst->o_fid = src->i_ino;
+                newvalid |= OBD_MD_FLFID;
+        }
+
+        dst->o_valid |= newvalid;
+}
+EXPORT_SYMBOL(obdo_from_inode);
+
+void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid)
+{
+        valid &= src->o_valid;
+
+        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+                CDEBUG(D_INODE,
+                       "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n",
+                       src->o_valid, LTIME_S(dst->i_mtime),
+                       LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
+
+        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
+                LTIME_S(dst->i_atime) = src->o_atime;
+        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
+                LTIME_S(dst->i_mtime) = src->o_mtime;
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
+                LTIME_S(dst->i_ctime) = src->o_ctime;
+        if (valid & OBD_MD_FLSIZE) 
+                dst->i_size = src->o_size;
+        /* optimum IO size */
+        if (valid & OBD_MD_FLBLKSZ && src->o_blksize > dst->i_blksize)
+                dst->i_blksize = src->o_blksize;
+        if (dst->i_blksize < PAGE_CACHE_SIZE)
+                dst->i_blksize = PAGE_CACHE_SIZE;
+        /* allocation of space */
+        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
+                dst->i_blocks = src->o_blocks;
+}
+EXPORT_SYMBOL(obdo_refresh_inode);
+
+void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
+{
+        valid &= src->o_valid;
+
+        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
+                CDEBUG(D_INODE,
+                       "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n",
+                       src->o_valid, LTIME_S(dst->i_mtime),
+                       LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
+
+        if (valid & OBD_MD_FLATIME)
+                LTIME_S(dst->i_atime) = src->o_atime;
+        if (valid & OBD_MD_FLMTIME)
+                LTIME_S(dst->i_mtime) = src->o_mtime;
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
+                LTIME_S(dst->i_ctime) = src->o_ctime;
+        if (valid & OBD_MD_FLSIZE)
+                dst->i_size = src->o_size;
+        if (valid & OBD_MD_FLBLOCKS) { /* allocation of space */
+                dst->i_blocks = src->o_blocks;
+                if (dst->i_blocks < src->o_blocks) /* overflow */
+                        dst->i_blocks = -1;
+
+        }
+        if (valid & OBD_MD_FLBLKSZ)
+                dst->i_blksize = src->o_blksize;
+        if (valid & OBD_MD_FLTYPE)
+                dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
+        if (valid & OBD_MD_FLMODE)
+                dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
+        if (valid & OBD_MD_FLUID)
+                dst->i_uid = src->o_uid;
+        if (valid & OBD_MD_FLGID)
+                dst->i_gid = src->o_gid;
+        if (valid & OBD_MD_FLFLAGS)
+                dst->i_flags = src->o_flags;
+        if (valid & OBD_MD_FLGENER)
+                dst->i_generation = src->o_generation;
+}
+EXPORT_SYMBOL(obdo_to_inode);
+#endif
+
similarity index 99%
rename from lustre/obdclass/sysctl.c
rename to lustre/obdclass/linux/linux-sysctl.c
index 73db087..169aecb 100644 (file)
@@ -44,7 +44,7 @@
 
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/obd_support.h>
+#include <obd_support.h>
 
 struct ctl_table_header *obd_table_header = NULL;
 
index 18e8c00..2af0105 100644 (file)
 #define EXPORT_SYMTAB
 #endif
 
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_log.h>
 #include <libcfs/list.h>
 
 /* Allocate a new log or catalog handle */
@@ -170,10 +168,10 @@ int llog_init_handle(struct llog_handle *handle, int flags,
 
 out:
         if (flags & LLOG_F_IS_CAT) {
-                INIT_LIST_HEAD(&handle->u.chd.chd_head);
+                CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head);
                 llh->llh_size = sizeof(struct llog_logid_rec);
         } else if (flags & LLOG_F_IS_PLAIN) {
-                INIT_LIST_HEAD(&handle->u.phd.phd_entry);
+                CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry);
         } else {
                 CERROR("Unknown flags: %#x (Expected %#x or %#x\n",
                        flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN);
index 396c2ca..55039cc 100644 (file)
 #define EXPORT_SYMTAB
 #endif
 
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_log.h>
 #include <libcfs/list.h>
 
 /* Create a new log handle and add it to the open list.
@@ -343,6 +341,7 @@ int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec,
         struct llog_handle *llh;
         int rc;
 
+        ENTRY;
         if (rec->lrh_type != LLOG_LOGID_MAGIC) {
                 CERROR("invalid record in catalog\n");
                 RETURN(-EINVAL);
index ce3f0a1..9bdea74 100644 (file)
@@ -28,9 +28,8 @@
 #define EXPORT_SYMTAB
 #endif
 
-#include <linux/fs.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_log.h>
 #include <libcfs/list.h>
 #include "llog_internal.h"
 
@@ -38,6 +37,7 @@ static int str2logid(struct llog_logid *logid, char *str, int len)
 {
         char *start, *end, *endp;
 
+        ENTRY;
         start = str;
         if (*start != '#')
                 RETURN(-EINVAL);
@@ -85,6 +85,7 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
         char *endp;
         int cur_index, rc = 0;
 
+        ENTRY;
         cur_index = rec->lrh_index;
 
         if (ioc_data && (ioc_data->ioc_inllen1)) {
@@ -128,7 +129,6 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
                 case MDS_UNLINK_REC:
                 case MDS_SETATTR_REC:
                 case OBD_CFG_REC:
-                case PTL_CFG_REC:               /* obsolete */
                 case LLOG_HDR_MAGIC: {
                          l = snprintf(out, remains, "[index]: %05d  [type]: "
                                       "%02x  [len]: %04d ok\n",
@@ -169,6 +169,7 @@ static int llog_print_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
         char *endp;
         int cur_index;
 
+        ENTRY;
         if (ioc_data->ioc_inllen1) {
                 l = 0;
                 remains = ioc_data->ioc_inllen4 +
@@ -222,6 +223,7 @@ static int llog_remove_log(struct llog_handle *cat, struct llog_logid *logid)
         struct llog_handle *log;
         int rc, index = 0;
 
+        ENTRY;
         down_write(&cat->lgh_lock);
         rc = llog_cat_id2handle(cat, &log, logid);
         if (rc) {
@@ -252,8 +254,9 @@ static int llog_delete_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
         struct  llog_logid_rec *lir = (struct llog_logid_rec*)rec;
         int     rc;
 
+        ENTRY;
         if (rec->lrh_type != LLOG_LOGID_MAGIC)
-              return (-EINVAL);
+              RETURN (-EINVAL);
         rc = llog_remove_log(handle, &lir->lid_id);
 
         RETURN(rc);
@@ -266,6 +269,7 @@ int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data)
         int err = 0;
         struct llog_handle *handle = NULL;
 
+        ENTRY;
         if (*data->ioc_inlbuf1 == '#') {
                 err = str2logid(&logid, data->ioc_inlbuf1, data->ioc_inllen1);
                 if (err)
@@ -406,6 +410,7 @@ int llog_catalog_list(struct obd_device *obd, int count,
         char *out;
         int l, remains, rc = 0;
 
+        ENTRY;
         size = sizeof(*idarray) * count;
 
         OBD_ALLOC(idarray, size);
index f9beaa9..2eedc32 100644 (file)
 #define EXPORT_SYMTAB
 #endif
 
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
 
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
-#include <linux/obd_ost.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <obd_ost.h>
 #include <libcfs/list.h>
-#include <linux/lvfs.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_disk.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
 #include "llog_internal.h"
 
-#ifdef __KERNEL__
+#if defined(__KERNEL__) && defined(LLOG_LVFS)
 
 static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file,
                                 int len, int index)
index 23d84b4..a1829bc 100644 (file)
 #define EXPORT_SYMTAB
 #endif
 
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_log.h>
 #include <libcfs/list.h>
 #include "llog_internal.h"
 
index 87f2883..fd138af 100644 (file)
@@ -32,7 +32,7 @@
 #include <liblustre.h>
 #endif
 
-#include <linux/lustre_log.h>
+#include <lustre_log.h>
 
 static void print_llogd_body(struct llogd_body *d)
 {
index 7f25981..b214d1d 100644 (file)
@@ -33,9 +33,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_mds.h> /* for LUSTRE_MDC_NAME */
+#include <obd_class.h>
+#include <lustre_log.h>
 
 static int llog_test_rand;
 static struct obd_uuid uuid = { .uuid = "test_uuid" };
index 2951d48..0c3aeb8 100644 (file)
 #endif
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#ifdef __KERNEL__
-# include <linux/config.h>
-# include <linux/module.h>
-# include <linux/version.h>
-# include <linux/slab.h>
-# include <linux/types.h>
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#  include <asm/statfs.h>
-# endif
-# include <linux/seq_file.h>
-#else /* __KERNEL__ */
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
+#include <lustre_fsfilt.h>
 
 #if defined(LPROCFS)
 
@@ -371,6 +361,8 @@ static const char *obd_connect_names[] = {
         "initial_transno",
         "inode_bit_locks",
         "join_file",
+        "",
+        "no_oh_for_devices",
         NULL
 };
 
@@ -662,7 +654,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
 
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info);
-        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info_async);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup);
@@ -727,7 +719,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         for (i = num_private_stats; i < num_stats; i++) {
                 /* If this LBUGs, it is likely that an obd
                  * operation was added to struct obd_ops in
-                 * <linux/obd.h>, and that the corresponding line item
+                 * <obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
                 if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
index 23896b1..fae1341 100644 (file)
 
 #include <linux/seq_file.h>
 #include <linux/module.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_disk.h>
-
-#include <linux/lu_object.h>
+#include <obd_support.h>
+#include <lustre_disk.h>
+#include <lu_object.h>
 #include <libcfs/list.h>
 
 static void lu_object_free(struct lu_context *ctx, struct lu_object *o);
index ef7639d..21d6f50 100644 (file)
  */
 
 #define DEBUG_SUBSYSTEM S_CLASS
-#ifdef __KERNEL__
-# include <linux/types.h>
-# include <linux/random.h>
-#else
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_handles.h>
+#include <obd_support.h>
+#include <lustre_handles.h>
 
-static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t handle_lock;
 static __u64 handle_base;
 #define HANDLE_INCR 7
 static struct list_head *handle_hash = NULL;
@@ -146,7 +143,7 @@ int class_handle_init(void)
 
         for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash;
              bucket--)
-                INIT_LIST_HEAD(bucket);
+                CFS_INIT_LIST_HEAD(bucket);
 
         get_random_bytes(&handle_base, sizeof(handle_base));
         LASSERT(handle_base != 0ULL);
index 8a601c1..be6efef 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_RPC
 
-#ifdef __KERNEL__
-# include <linux/module.h>
-# include <linux/init.h>
-# include <linux/list.h>
-#else
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
-#include <linux/obd.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_net.h>
-#include <linux/lprocfs_status.h>
+#include <obd.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lprocfs_status.h>
 
 struct uuid_nid_data {
         struct list_head un_list;
@@ -52,7 +48,7 @@ static spinlock_t       g_uuid_lock;
 
 void class_init_uuidlist(void)
 {
-        INIT_LIST_HEAD(&g_uuid_list);
+        CFS_INIT_LIST_HEAD(&g_uuid_list);
         spin_lock_init(&g_uuid_lock);
 }
 
@@ -128,7 +124,7 @@ int class_del_uuid(const char *uuid)
         struct list_head *n;
         struct uuid_nid_data *data;
 
-        INIT_LIST_HEAD (&deathrow);
+        CFS_INIT_LIST_HEAD (&deathrow);
 
         spin_lock (&g_uuid_lock);
 
index 1042f09..2de1de6 100644 (file)
 #ifdef __KERNEL__
 #include <linux/kmod.h>   /* for request_module() */
 #include <linux/module.h>
-#include <linux/obd_class.h>
+#include <obd_class.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #else 
 #include <liblustre.h>
-#include <linux/obd_class.h>
-#include <linux/obd.h>
+#include <obd_class.h>
+#include <obd.h>
 #endif
-#include <linux/lprocfs_status.h>
+#include <lprocfs_status.h>
 
 static int mea_last_char_hash(int count, char *name, int namelen)
 {
index 630079a..c1665cb 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_CLASS
 #ifdef __KERNEL__
-#include <linux/kmod.h>   /* for request_module() */
-#include <linux/module.h>
-#include <linux/obd_class.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
+#include <obd_class.h>
 #else
 #include <liblustre.h>
-#include <linux/obd_class.h>
-#include <linux/obd.h>
+#include <obd_class.h>
+#include <obd.h>
 #endif
-#include <linux/lustre_log.h>
-#include <linux/lprocfs_status.h>
+#include <lustre_log.h>
+#include <lprocfs_status.h>
 #include <libcfs/list.h>
 
 
@@ -54,6 +49,7 @@ int class_attach(struct lustre_cfg *lcfg)
         struct obd_device *obd = NULL;
         char *typename, *name, *namecopy, *uuid;
         int rc, len, cleanup_phase = 0;
+        ENTRY;
 
         if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) {
                 CERROR("No type passed!\n");
@@ -119,22 +115,22 @@ int class_attach(struct lustre_cfg *lcfg)
         }
         cleanup_phase = 3;  /* class_release_dev */
 
-        INIT_LIST_HEAD(&obd->obd_exports);
-        INIT_LIST_HEAD(&obd->obd_exports_timed);
+        CFS_INIT_LIST_HEAD(&obd->obd_exports);
+        CFS_INIT_LIST_HEAD(&obd->obd_exports_timed);
         spin_lock_init(&obd->obd_dev_lock);
         spin_lock_init(&obd->obd_osfs_lock);
-        obd->obd_osfs_age = jiffies - 1000 * HZ;
+        obd->obd_osfs_age = cfs_time_shift(-1000);
 
         /* XXX belongs in setup not attach  */
         /* recovery data */
-        init_timer(&obd->obd_recovery_timer);
+        cfs_init_timer(&obd->obd_recovery_timer);
         spin_lock_init(&obd->obd_processing_task_lock);
-        init_waitqueue_head(&obd->obd_next_transno_waitq);
-        INIT_LIST_HEAD(&obd->obd_recovery_queue);
-        INIT_LIST_HEAD(&obd->obd_delayed_reply_queue);
+        cfs_waitq_init(&obd->obd_next_transno_waitq);
+        CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue);
+        CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue);
 
         spin_lock_init(&obd->obd_uncommitted_replies_lock);
-        INIT_LIST_HEAD(&obd->obd_uncommitted_replies);
+        CFS_INIT_LIST_HEAD(&obd->obd_uncommitted_replies);
 
         len = strlen(uuid);
         if (len >= sizeof(obd->obd_uuid)) {
@@ -513,12 +509,13 @@ int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg)
         RETURN(rc);
 }
 
-static LIST_HEAD(lustre_profile_list);
+CFS_LIST_HEAD(lustre_profile_list);
 
 struct lustre_profile *class_get_profile(const char * prof)
 {
         struct lustre_profile *lprof;
 
+        ENTRY;
         list_for_each_entry(lprof, &lustre_profile_list, lp_list) {
                 if (!strcmp(lprof->lp_profile, prof)) {
                         RETURN(lprof);
@@ -533,10 +530,11 @@ int class_add_profile(int proflen, char *prof, int osclen, char *osc,
         struct lustre_profile *lprof;
         int err = 0;
 
+        ENTRY;
         OBD_ALLOC(lprof, sizeof(*lprof));
         if (lprof == NULL)
                 RETURN(-ENOMEM);
-        INIT_LIST_HEAD(&lprof->lp_list);
+        CFS_INIT_LIST_HEAD(&lprof->lp_list);
 
         LASSERT(proflen == (strlen(prof) + 1));
         OBD_ALLOC(lprof->lp_profile, proflen);
@@ -568,7 +566,7 @@ out:
                 OBD_FREE(lprof->lp_osc, osclen);
         if (lprof->lp_profile)
                 OBD_FREE(lprof->lp_profile, proflen);
-        OBD_FREE(lprof, sizeof(*lprof));
+        OBD_FREE(lprof, sizeof(*lprof));        
         RETURN(err);
 }
 
@@ -846,10 +844,6 @@ static int class_config_llog_handler(struct llog_handle * handle,
                         OBD_FREE(inst_name, inst_len);
                 break;
         }
-        case PTL_CFG_REC: {
-                CWARN("Ignoring obsolete portals config\n");
-                break;
-        }
         default:
                 CERROR("Unknown llog record type %#x encountered\n",
                        rec->lrh_type);
@@ -887,8 +881,8 @@ int class_config_parse_llog(struct llog_ctxt *ctxt, char *name,
 
         rc = llog_process(llh, class_config_llog_handler, cfg, &cd);
 
-        // FIXME remove warning
-        CDEBUG(D_CONFIG|D_WARNING, "Processed log %s gen %d-%d (%d)\n", name,
+        /* FIXME remove warning */
+        CDEBUG(D_CONFIG|D_WARNING, "Processed log %s gen %d-%d (rc=%d)\n", name, 
                cd.first_idx + 1, cd.last_idx, rc);
         if (cfg)
                 cfg->cfg_last_idx = cd.last_idx;
@@ -952,8 +946,6 @@ int class_config_dump_handler(struct llog_handle * handle,
                         }
                 }
                 LCONSOLE(D_WARNING, "   %s\n", outstr);
-        } else if (rec->lrh_type == PTL_CFG_REC) {
-                LCONSOLE(D_WARNING, "Obsolete pcfg command\n");
         } else {
                 LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type);
                 rc = -EINVAL;
index 877f4f3..beb700c 100644 (file)
 #define PRINT_CMD LCONSOLE
 #define PRINT_MASK D_SUPER
 
-#include <linux/obd.h>
-#include <linux/lvfs.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/obd_class.h>
+#include <obd.h>
+#include <lvfs.h>
+#include <lustre_fsfilt.h>
+#include <obd_class.h>
 #include <lustre/lustre_user.h>
-#include <linux/version.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_disk.h>
-#include <linux/lustre_param.h>
-#include <linux/lustre_ver.h>
-
+#include <linux/version.h> 
+#include <lustre_log.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
+#include <lustre_ver.h>
+                      
 static int (*client_fill_super)(struct super_block *sb) = NULL;
 
 /*********** string parsing utils *********/
@@ -463,7 +463,9 @@ int lustre_process_log(struct super_block *sb, char *logname,
 
         if (rc)
                 LCONSOLE_ERROR("%s: The configuration '%s' could not be read "
-                               "(%d), mount will fail.\n",
+                               "from the MGS (%d).  This may be the result of "
+                               "communication errors between this node and "
+                               "the MGS, or the MGS may not be running.\n",
                                mgc->obd_name, logname, rc);
 
         class_obd_list();
@@ -641,10 +643,10 @@ static int lustre_start_mgc(struct super_block *sb)
                    if at all possible. */
                 recov_bk++;
                 CDEBUG(D_MOUNT, "Set MGS reconnect %d\n", recov_bk);
-                rc = obd_set_info(obd->obd_self_export,
-                                  strlen(KEY_INIT_RECOV_BACKUP),
-                                  KEY_INIT_RECOV_BACKUP,
-                                  sizeof(recov_bk), &recov_bk);
+                rc = obd_set_info_async(obd->obd_self_export,
+                                        strlen(KEY_INIT_RECOV_BACKUP),
+                                        KEY_INIT_RECOV_BACKUP,
+                                        sizeof(recov_bk), &recov_bk, NULL);
                 GOTO(out, rc = 0);
         }
 
@@ -735,10 +737,11 @@ static int lustre_start_mgc(struct super_block *sb)
 
         /* Try all connections, but only once. */
         recov_bk = 1;
-        rc = obd_set_info(obd->obd_self_export,
-                          strlen(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP,
-                          sizeof(recov_bk), &recov_bk);
-        if (rc)
+        rc = obd_set_info_async(obd->obd_self_export,
+                                strlen(KEY_INIT_RECOV_BACKUP), 
+                                KEY_INIT_RECOV_BACKUP,
+                                sizeof(recov_bk), &recov_bk, NULL);
+        if (rc) 
                 /* nonfatal */
                 CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc);
 
@@ -827,9 +830,9 @@ static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb)
         CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev);
 
         /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */
-        rc = obd_set_info(mgc->obd_self_export,
-                          strlen("set_fs"), "set_fs",
-                          sizeof(*sb), sb);
+        rc = obd_set_info_async(mgc->obd_self_export,
+                                strlen("set_fs"), "set_fs",
+                                sizeof(*sb), sb, NULL);
         if (rc) {
                 CERROR("can't set_fs %d\n", rc);
         }
@@ -843,9 +846,10 @@ static int server_mgc_clear_fs(struct obd_device *mgc)
         ENTRY;
 
         CDEBUG(D_MOUNT, "Unassign mgc disk\n");
-
-        rc = obd_set_info(mgc->obd_self_export,
-                          strlen("clear_fs"), "clear_fs", 0, NULL);
+        
+        rc = obd_set_info_async(mgc->obd_self_export,
+                                strlen("clear_fs"), "clear_fs",
+                                0, NULL, NULL);
         RETURN(rc);
 }
 
@@ -964,9 +968,9 @@ int server_register_target(struct super_block *sb)
 
         /* Register the target */
         /* FIXME use mdc_process_config instead */
-        rc = obd_set_info(mgc->u.cli.cl_mgc_mgsexp,
-                          strlen("register_target"), "register_target",
-                          sizeof(*mti), mti);
+        rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp,
+                                strlen("register_target"), "register_target",
+                                sizeof(*mti), mti, NULL);
         if (rc) {
                 CERROR("registration with the MGS failed (%d)\n", rc);
                 GOTO(out, rc);
@@ -1358,7 +1362,7 @@ static void server_put_super(struct super_block *sb)
            is right. */
         server_stop_servers(lddflags, lsiflags);
 
-        CDEBUG(D_MOUNT|D_WARNING, "server umount %s done\n", tmpname);
+        LCONSOLE_WARN("server umount %s complete\n", tmpname);
         OBD_FREE(tmpname, tmpname_sz);
         EXIT;
 }
@@ -1831,8 +1835,7 @@ out:
                 CERROR("Unable to mount %s\n",
                        s2lsi(sb) ? lmd->lmd_dev : "");
         } else {
-                CDEBUG(D_MOUNT|D_WARNING, "Successfully mounted %s\n", 
-                       lmd->lmd_dev);
+                LCONSOLE_WARN("mount %s complete\n", lmd->lmd_dev);
         }
         RETURN(rc);
 }
index 97812a9..94e70bb 100644 (file)
 #ifndef __KERNEL__
 #include <liblustre.h>
 #else
-#include <linux/module.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_idl.h>
-#endif
-
-#ifdef __KERNEL__
-#include <linux/fs.h>
-#include <linux/pagemap.h> /* for PAGE_CACHE_SIZE */
-
-void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid)
-{
-        if (ia_valid & ATTR_ATIME) {
-                oa->o_atime = LTIME_S(attr->ia_atime);
-                oa->o_valid |= OBD_MD_FLATIME;
-        }
-        if (ia_valid & ATTR_MTIME) {
-                oa->o_mtime = LTIME_S(attr->ia_mtime);
-                oa->o_valid |= OBD_MD_FLMTIME;
-        }
-        if (ia_valid & ATTR_CTIME) {
-                oa->o_ctime = LTIME_S(attr->ia_ctime);
-                oa->o_valid |= OBD_MD_FLCTIME;
-        }
-        if (ia_valid & ATTR_SIZE) {
-                oa->o_size = attr->ia_size;
-                oa->o_valid |= OBD_MD_FLSIZE;
-        }
-        if (ia_valid & ATTR_MODE) {
-                oa->o_mode = attr->ia_mode;
-                oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        oa->o_mode &= ~S_ISGID;
-        }
-        if (ia_valid & ATTR_UID) {
-                oa->o_uid = attr->ia_uid;
-                oa->o_valid |= OBD_MD_FLUID;
-        }
-        if (ia_valid & ATTR_GID) {
-                oa->o_gid = attr->ia_gid;
-                oa->o_valid |= OBD_MD_FLGID;
-        }
-}
-EXPORT_SYMBOL(obdo_from_iattr);
-
-void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid)
-{
-        valid &= oa->o_valid;
-
-        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-                CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n",
-                       oa->o_valid, oa->o_mtime, oa->o_ctime);
-
-        attr->ia_valid = 0;
-        if (valid & OBD_MD_FLATIME) {
-                LTIME_S(attr->ia_atime) = oa->o_atime;
-                attr->ia_valid |= ATTR_ATIME;
-        }
-        if (valid & OBD_MD_FLMTIME) {
-                LTIME_S(attr->ia_mtime) = oa->o_mtime;
-                attr->ia_valid |= ATTR_MTIME;
-        }
-        if (valid & OBD_MD_FLCTIME) {
-                LTIME_S(attr->ia_ctime) = oa->o_ctime;
-                attr->ia_valid |= ATTR_CTIME;
-        }
-        if (valid & OBD_MD_FLSIZE) {
-                attr->ia_size = oa->o_size;
-                attr->ia_valid |= ATTR_SIZE;
-        }
-#if 0   /* you shouldn't be able to change a file's type with setattr */
-        if (valid & OBD_MD_FLTYPE) {
-                attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-        }
-#endif
-        if (valid & OBD_MD_FLMODE) {
-                attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT);
-                attr->ia_valid |= ATTR_MODE;
-                if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID))
-                        attr->ia_mode &= ~S_ISGID;
-        }
-        if (valid & OBD_MD_FLUID) {
-                attr->ia_uid = oa->o_uid;
-                attr->ia_valid |= ATTR_UID;
-        }
-        if (valid & OBD_MD_FLGID) {
-                attr->ia_gid = oa->o_gid;
-                attr->ia_valid |= ATTR_GID;
-        }
-
-        if (valid & OBD_MD_FLFLAGS) {
-                attr->ia_attr_flags = oa->o_flags;
-                attr->ia_valid |= ATTR_ATTR_FLAG;
-        }
-}
-EXPORT_SYMBOL(iattr_from_obdo);
-
-/* WARNING: the file systems must take care not to tinker with
-   attributes they don't manage (such as blocks). */
-void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
-{
-        obd_flag newvalid = 0;
-
-        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-                CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
-                       valid, LTIME_S(src->i_mtime), 
-                       LTIME_S(src->i_ctime));
-
-        if (valid & OBD_MD_FLATIME) {
-                dst->o_atime = LTIME_S(src->i_atime);
-                newvalid |= OBD_MD_FLATIME;
-        }
-        if (valid & OBD_MD_FLMTIME) {
-                dst->o_mtime = LTIME_S(src->i_mtime);
-                newvalid |= OBD_MD_FLMTIME;
-        }
-        if (valid & OBD_MD_FLCTIME) {
-                dst->o_ctime = LTIME_S(src->i_ctime);
-                newvalid |= OBD_MD_FLCTIME;
-        }
-        if (valid & OBD_MD_FLSIZE) {
-                dst->o_size = src->i_size;
-                newvalid |= OBD_MD_FLSIZE;
-        }
-        if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
-                dst->o_blocks = src->i_blocks;
-                newvalid |= OBD_MD_FLBLOCKS;
-        }
-        if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
-                dst->o_blksize = src->i_blksize;
-                newvalid |= OBD_MD_FLBLKSZ;
-        }
-        if (valid & OBD_MD_FLTYPE) {
-                dst->o_mode = (dst->o_mode & S_IALLUGO)|(src->i_mode & S_IFMT);
-                newvalid |= OBD_MD_FLTYPE;
-        }
-        if (valid & OBD_MD_FLMODE) {
-                dst->o_mode = (dst->o_mode & S_IFMT)|(src->i_mode & S_IALLUGO);
-                newvalid |= OBD_MD_FLMODE;
-        }
-        if (valid & OBD_MD_FLUID) {
-                dst->o_uid = src->i_uid;
-                newvalid |= OBD_MD_FLUID;
-        }
-        if (valid & OBD_MD_FLGID) {
-                dst->o_gid = src->i_gid;
-                newvalid |= OBD_MD_FLGID;
-        }
-        if (valid & OBD_MD_FLFLAGS) {
-                dst->o_flags = src->i_flags;
-                newvalid |= OBD_MD_FLFLAGS;
-        }
-        if (valid & OBD_MD_FLGENER) {
-                dst->o_generation = src->i_generation;
-                newvalid |= OBD_MD_FLGENER;
-        }
-        if (valid & OBD_MD_FLFID) {
-                dst->o_fid = src->i_ino;
-                newvalid |= OBD_MD_FLFID;
-        }
-
-        dst->o_valid |= newvalid;
-}
-EXPORT_SYMBOL(obdo_from_inode);
-
-void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-                CDEBUG(D_INODE,
-                       "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n",
-                       src->o_valid, LTIME_S(dst->i_mtime),
-                       LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
-
-        if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime))
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime))
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE) 
-                dst->i_size = src->o_size;
-        /* optimum IO size */
-        if (valid & OBD_MD_FLBLKSZ && src->o_blksize > dst->i_blksize)
-                dst->i_blksize = src->o_blksize;
-        if (dst->i_blksize < PAGE_CACHE_SIZE)
-                dst->i_blksize = PAGE_CACHE_SIZE;
-        /* allocation of space */
-        if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks)
-                dst->i_blocks = src->o_blocks;
-}
-EXPORT_SYMBOL(obdo_refresh_inode);
-
-void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
-{
-        valid &= src->o_valid;
-
-        if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
-                CDEBUG(D_INODE,
-                       "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n",
-                       src->o_valid, LTIME_S(dst->i_mtime),
-                       LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime);
-
-        if (valid & OBD_MD_FLATIME)
-                LTIME_S(dst->i_atime) = src->o_atime;
-        if (valid & OBD_MD_FLMTIME)
-                LTIME_S(dst->i_mtime) = src->o_mtime;
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime))
-                LTIME_S(dst->i_ctime) = src->o_ctime;
-        if (valid & OBD_MD_FLSIZE)
-                dst->i_size = src->o_size;
-        if (valid & OBD_MD_FLBLOCKS) { /* allocation of space */
-                dst->i_blocks = src->o_blocks;
-                if (dst->i_blocks < src->o_blocks) /* overflow */
-                        dst->i_blocks = -1;
-        }
-        if (valid & OBD_MD_FLBLKSZ)
-                dst->i_blksize = src->o_blksize;
-        if (valid & OBD_MD_FLTYPE)
-                dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
-        if (valid & OBD_MD_FLMODE)
-                dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
-        if (valid & OBD_MD_FLUID)
-                dst->i_uid = src->o_uid;
-        if (valid & OBD_MD_FLGID)
-                dst->i_gid = src->o_gid;
-        if (valid & OBD_MD_FLFLAGS)
-                dst->i_flags = src->o_flags;
-        if (valid & OBD_MD_FLGENER)
-                dst->i_generation = src->o_generation;
-}
-EXPORT_SYMBOL(obdo_to_inode);
+#include <obd_class.h>
+#include <lustre/lustre_idl.h>
 #endif
 
 void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid)
index 909e311..b3c2a75 100644 (file)
 # define EXPORT_SYMTAB
 #endif
 
-#ifdef __KERNEL__
-#include <linux/module.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
+#include <obd_class.h>
 
 /*
 From: George Marsaglia <geo@stat.fsu.edu>
index 616bdfa..8e20f85 100644 (file)
 #endif
 #ifndef __KERNEL__
 #include <liblustre.h>
-#else
-#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/statfs.h>
-#endif
 #endif
 
-#include <linux/lustre_export.h>
-#include <linux/lustre_net.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
+#include <lustre_export.h>
+#include <lustre_net.h>
+#include <obd_support.h>
+#include <obd_class.h>
 
 void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs)
 {
index f9235dd..09302bd 100644 (file)
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#ifdef __KERNEL__
-# include <linux/ctype.h>
-# include <linux/kernel.h>
-# include <linux/sched.h>
-# include <linux/smp_lock.h>
-#else
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/obd_ost.h> /* for LUSTRE_OST_NAME */
-#include <linux/lustre_mds.h> /* for LUSTRE_MDC_NAME */
+#include <obd_support.h>
+#include <obd_class.h>
 
 struct uuid {
         __u32   time_low;
diff --git a/lustre/obdecho/Info.plist b/lustre/obdecho/Info.plist
new file mode 100644 (file)
index 0000000..19a6096
--- /dev/null
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>obdecho</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.obdecho</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string> 
+       <key>OSBundleCompatibleVersion</key> 
+       <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict> 
+               <key>com.apple.kpi.bsd</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.mach</key>
+               <string>8.0.0b1</string>
+                <key>com.apple.kpi.unsupported</key>
+               <string>8.0.0b1</string>
+               <key>com.clusterfs.lustre.libcfs</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.lvfs</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.obdclass</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.ptlrpc</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.osc</key> 
+               <string>1.0.0</string>
+       </dict>
+</dict>
+</plist>
index 834b082..d08aa57 100644 (file)
@@ -11,8 +11,30 @@ libobdecho_a_CFLAGS = $(LLCFLAGS)
 endif
 
 if MODULES
+if LINUX
 modulefs_DATA = obdecho$(KMODEXT)
+endif
+
+if DARWIN
+macos_PROGRAMS := obdecho
+obdecho_SOURCES := \
+        lproc_echo.c \
+        echo.c \
+        echo_client.c
+
+obdecho_CFLAGS := $(EXTRA_KCFLAGS)
+obdecho_LDFLAGS := $(EXTRA_KLDFLAGS)
+obdecho_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif # darwin
+
 endif # MODULES
 
+install-data-hook: $(install_data_hook)
+
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ 
 DIST_SOURCES = $(obdecho-objs:%.o=%.c)
index beceb66..a923d63 100644 (file)
 # define EXPORT_SYMTAB
 #endif
 
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/sched.h>
-#include <linux/smp_lock.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <asm/unistd.h>
-
 #define DEBUG_SUBSYSTEM S_ECHO
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/obd_echo.h>
-#include <linux/lustre_debug.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lprocfs_status.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <obd_echo.h>
+#include <lustre_debug.h>
+#include <lustre_dlm.h>
+#include <lprocfs_status.h>
 
 #define ECHO_INIT_OBJID      0x1000000000000000ULL
 #define ECHO_HANDLE_MAGIC    0xabcd0123fedc9876ULL
 
-#define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE/PAGE_SIZE)
-static struct page *echo_persistent_pages[ECHO_PERSISTENT_PAGES];
+#define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE/CFS_PAGE_SIZE)
+static cfs_page_t *echo_persistent_pages[ECHO_PERSISTENT_PAGES];
 
 enum {
         LPROC_ECHO_READ_BYTES = 1,
@@ -147,6 +135,7 @@ int echo_destroy(struct obd_export *exp, struct obdo *oa,
 {
         struct obd_device *obd = class_exp2obd(exp);
 
+        ENTRY;
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n",
                        exp->exp_handle.h_cookie);
@@ -163,7 +152,7 @@ int echo_destroy(struct obd_export *exp, struct obdo *oa,
                 RETURN(-EINVAL);
         }
 
-        return 0;
+        RETURN(0);
 }
 
 static int echo_getattr(struct obd_export *exp, struct obdo *oa,
@@ -172,6 +161,7 @@ static int echo_getattr(struct obd_export *exp, struct obdo *oa,
         struct obd_device *obd = class_exp2obd(exp);
         obd_id id = oa->o_id;
 
+        ENTRY;
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n",
                        exp->exp_handle.h_cookie);
@@ -186,7 +176,7 @@ static int echo_getattr(struct obd_export *exp, struct obdo *oa,
         obdo_cpy_md(oa, &obd->u.echo.eo_oa, oa->o_valid);
         oa->o_id = id;
 
-        return 0;
+        RETURN(0);
 }
 
 static int echo_setattr(struct obd_export *exp, struct obdo *oa,
@@ -194,6 +184,7 @@ static int echo_setattr(struct obd_export *exp, struct obdo *oa,
 {
         struct obd_device *obd = class_exp2obd(exp);
 
+        ENTRY;
         if (!obd) {
                 CERROR("invalid client cookie "LPX64"\n",
                        exp->exp_handle.h_cookie);
@@ -214,15 +205,15 @@ static int echo_setattr(struct obd_export *exp, struct obdo *oa,
                 oti->oti_ack_locks[0].lock = obd->u.echo.eo_nl_lock;
         }
 
-        return 0;
+        RETURN(0);
 }
 
 static void
-echo_page_debug_setup(struct page *page, int rw, obd_id id,
+echo_page_debug_setup(cfs_page_t *page, int rw, obd_id id,
                       __u64 offset, int len)
 {
         int   page_offset = offset & (PAGE_SIZE - 1);
-        char *addr        = ((char *)kmap(page)) + page_offset;
+        char *addr        = ((char *)cfs_kmap(page)) + page_offset;
 
         if (len % OBD_ECHO_BLOCK_SIZE != 0)
                 CERROR("Unexpected block size %d\n", len);
@@ -241,15 +232,15 @@ echo_page_debug_setup(struct page *page, int rw, obd_id id,
                 len    -= OBD_ECHO_BLOCK_SIZE;
         }
 
-        kunmap(page);
+        cfs_kunmap(page);
 }
 
 static int
-echo_page_debug_check(struct page *page, obd_id id,
+echo_page_debug_check(cfs_page_t *page, obd_id id,
                       __u64 offset, int len)
 {
         int   page_offset = offset & (PAGE_SIZE - 1);
-        char *addr        = ((char *)kmap(page)) + page_offset;
+        char *addr        = ((char *)cfs_kmap(page)) + page_offset;
         int   rc          = 0;
         int   rc2;
 
@@ -268,7 +259,7 @@ echo_page_debug_check(struct page *page, obd_id id,
                 len    -= OBD_ECHO_BLOCK_SIZE;
         }
 
-        kunmap(page);
+        cfs_kunmap(page);
 
         return (rc);
 }
@@ -304,7 +295,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                 oti->oti_handle = (void *)DESC_PRIV;
 
         for (i = 0; i < objcount; i++, obj++) {
-                int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL;
+                int gfp_mask = (obj->ioo_id & 1) ? CFS_ALLOC_HIGHUSER : CFS_ALLOC_STD;
                 int ispersistent = obj->ioo_id == ECHO_PERSISTENT_OBJID;
                 int debug_setup = (!ispersistent &&
                                    (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
@@ -318,9 +309,9 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
                                 r->page = echo_persistent_pages[nb->offset >>
                                                                 PAGE_SHIFT];
                                 /* Take extra ref so __free_pages() can be called OK */
-                                get_page (r->page);
+                                cfs_get_page (r->page);
                         } else {
-                                r->page = alloc_pages(gfp_mask, 0);
+                                r->page = cfs_alloc_page(gfp_mask);
                                 if (r->page == NULL) {
                                         CERROR("can't get page %u/%u for id "
                                                LPU64"\n",
@@ -335,7 +326,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa,
 
                         r->offset = nb->offset;
                         r->len = nb->len;
-                        LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE);
+                        LASSERT((r->offset & ~CFS_PAGE_MASK) + r->len <= CFS_PAGE_SIZE);
 
                         CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n",
                                r->page, r->offset, r->len);
@@ -368,10 +359,10 @@ preprw_cleanup:
          */
         CERROR("cleaning up %ld pages (%d obdos)\n", (long)(r - res), objcount);
         while (r-- > res) {
-                kunmap(r->page);
+                cfs_kunmap(r->page);
                 /* NB if this is a persistent page, __free_pages will just
                  * lose the extra ref gained above */
-                __free_pages(r->page, 0);
+                cfs_free_page(r->page);
                 atomic_dec(&obd->u.echo.eo_prep);
         }
         memset(res, 0, sizeof(*res) * niocount);
@@ -418,7 +409,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
                 int j;
 
                 for (j = 0 ; j < obj->ioo_bufcnt ; j++, r++) {
-                        struct page *page = r->page;
+                        cfs_page_t *page = r->page;
                         void *addr;
 
                         if (page == NULL) {
@@ -427,7 +418,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
                                 GOTO(commitrw_cleanup, rc = -EFAULT);
                         }
 
-                        addr = kmap(page);
+                        addr = cfs_kmap(page);
 
                         CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n",
                                r->page, addr, r->offset);
@@ -440,9 +431,9 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa,
                                         rc = vrc;
                         }
 
-                        kunmap(page);
+                        cfs_kunmap(page);
                         /* NB see comment above regarding persistent pages */
-                        __free_pages(page, 0);
+                        cfs_free_page(page);
                         atomic_dec(&obd->u.echo.eo_prep);
                 }
         }
@@ -454,10 +445,10 @@ commitrw_cleanup:
         CERROR("cleaning up %ld pages (%d obdos)\n",
                niocount - (long)(r - res) - 1, objcount);
         while (++r < res + niocount) {
-                struct page *page = r->page;
+                cfs_page_t *page = r->page;
 
                 /* NB see comment above regarding persistent pages */
-                __free_pages(page, 0);
+                cfs_free_page(page);
                 atomic_dec(&obd->u.echo.eo_prep);
         }
         return rc;
@@ -516,7 +507,7 @@ static int echo_cleanup(struct obd_device *obd)
         /* XXX Bug 3413; wait for a bit to ensure the BL callback has
          * happened before calling ldlm_namespace_free() */
         set_current_state (TASK_UNINTERRUPTIBLE);
-        schedule_timeout (HZ);
+        cfs_schedule_timeout (CFS_TASK_UNINT, cfs_time_seconds(1));
 
         ldlm_namespace_free(obd->obd_namespace, obd->obd_force);
 
@@ -552,7 +543,7 @@ echo_persistent_pages_fini (void)
 
         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++)
                 if (echo_persistent_pages[i] != NULL) {
-                        __free_pages (echo_persistent_pages[i], 0);
+                        cfs_free_page (echo_persistent_pages[i]);
                         echo_persistent_pages[i] = NULL;
                 }
 }
@@ -560,21 +551,21 @@ echo_persistent_pages_fini (void)
 static int
 echo_persistent_pages_init (void)
 {
-        struct page *pg;
+        cfs_page_t *pg;
         int          i;
 
         for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) {
                 int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ?
-                        GFP_KERNEL : GFP_HIGHUSER;
+                        CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER;
 
-                pg = alloc_pages (gfp_mask, 0);
+                pg = cfs_alloc_page (gfp_mask);
                 if (pg == NULL) {
                         echo_persistent_pages_fini ();
                         return (-ENOMEM);
                 }
 
-                memset (kmap (pg), 0, PAGE_SIZE);
-                kunmap (pg);
+                memset (cfs_kmap (pg), 0, CFS_PAGE_SIZE);
+                cfs_kunmap (pg);
 
                 echo_persistent_pages[i] = pg;
         }
@@ -587,9 +578,10 @@ static int __init obdecho_init(void)
         struct lprocfs_static_vars lvars;
         int rc;
 
+        ENTRY;
         printk(KERN_INFO "Lustre: Echo OBD driver; info@clusterfs.com\n");
 
-        LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
+        LASSERT(CFS_PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
 
         lprocfs_init_vars(echo, &lvars);
 
@@ -598,7 +590,7 @@ static int __init obdecho_init(void)
                 goto failed_0;
 
         rc = class_register_type(&echo_obd_ops, NULL, lvars.module_vars,
-                                 OBD_ECHO_DEVICENAME, NULL);
+                                 LUSTRE_ECHO_NAME, NULL);
         if (rc != 0)
                 goto failed_1;
 
@@ -606,7 +598,7 @@ static int __init obdecho_init(void)
         if (rc == 0)
                 RETURN (0);
 
-        class_unregister_type(OBD_ECHO_DEVICENAME);
+        class_unregister_type(LUSTRE_ECHO_NAME);
  failed_1:
         echo_persistent_pages_fini ();
  failed_0:
@@ -616,7 +608,7 @@ static int __init obdecho_init(void)
 static void /*__exit*/ obdecho_exit(void)
 {
         echo_client_exit();
-        class_unregister_type(OBD_ECHO_DEVICENAME);
+        class_unregister_type(LUSTRE_ECHO_NAME);
         echo_persistent_pages_fini ();
 }
 
@@ -624,5 +616,4 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
 MODULE_LICENSE("GPL");
 
-module_init(obdecho_init);
-module_exit(obdecho_exit);
+cfs_module(obdecho, "1.0.0", obdecho_init, obdecho_exit);
index 3927d9d..c24533e 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_ECHO
 #ifdef __KERNEL__
-#include <linux/version.h>
-#include <linux/module.h>
-#include <linux/fs.h>
-#include <linux/completion.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/iobuf.h>
-#endif
-#include <asm/div64.h>
-#include <linux/smp_lock.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/obd_echo.h>
-#include <linux/lustre_ver.h>
-#include <linux/lustre_debug.h>
-#include <linux/lprocfs_status.h>
+#include <obd.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <obd_echo.h>
+#include <lustre_ver.h>
+#include <lustre_debug.h>
+#include <lprocfs_status.h>
 
 static obd_id last_object_id;
 
@@ -116,7 +108,7 @@ echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm,
         if (ulsm_nob < nob ||
             lsm->lsm_stripe_count > ec->ec_nstripes ||
             lsm->lsm_magic != LOV_MAGIC ||
-            (lsm->lsm_stripe_size & (PAGE_SIZE - 1)) != 0 ||
+            (lsm->lsm_stripe_size & (CFS_PAGE_SIZE - 1)) != 0 ||
             ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
                 return (-EINVAL);
 
@@ -201,7 +193,7 @@ static int echo_create_object(struct obd_device *obd, int on_target,
                         lsm->lsm_stripe_count = ec->ec_nstripes;
 
                 if (lsm->lsm_stripe_size == 0)
-                        lsm->lsm_stripe_size = PAGE_SIZE;
+                        lsm->lsm_stripe_size = CFS_PAGE_SIZE;
 
                 idx = ll_rand();
 
@@ -289,11 +281,11 @@ echo_get_object (struct ec_object **ecop, struct obd_device *obd,
         spin_lock (&ec->ec_lock);
         eco = echo_find_object_locked (obd, oa->o_id);
         if (eco != NULL) {
-                if (eco->eco_deleted) {            /* being deleted */
-                        spin_unlock(&ec->ec_lock); /* (see comment in cleanup) */
+                if (eco->eco_deleted) {           /* being deleted */
+                        spin_unlock(&ec->ec_lock);/* (see comment in cleanup) */
                         return (-EAGAIN);
                 }
-                
+
                 eco->eco_refcount++;
                 spin_unlock (&ec->ec_lock);
                 *ecop = eco;
@@ -431,9 +423,9 @@ echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
         *offp = offset * stripe_size + woffset % stripe_size;
 }
 
-static void
-echo_client_page_debug_setup(struct lov_stripe_md *lsm,
-                             struct page *page, int rw, obd_id id,
+static void 
+echo_client_page_debug_setup(struct lov_stripe_md *lsm, 
+                             cfs_page_t *page, int rw, obd_id id, 
                              obd_off offset, obd_off count)
 {
         char    *addr;
@@ -442,11 +434,11 @@ echo_client_page_debug_setup(struct lov_stripe_md *lsm,
         int      delta;
 
         /* no partial pages on the client */
-        LASSERT(count == PAGE_SIZE);
+        LASSERT(count == CFS_PAGE_SIZE);
 
-        addr = kmap(page);
+        addr = cfs_kmap(page);
 
-        for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+        for (delta = 0; delta < CFS_PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
                 if (rw == OBD_BRW_WRITE) {
                         stripe_off = offset + delta;
                         stripe_id = id;
@@ -459,13 +451,12 @@ echo_client_page_debug_setup(struct lov_stripe_md *lsm,
                                   stripe_off, stripe_id);
         }
 
-        kunmap(page);
+        cfs_kunmap(page);
 }
 
-static int
-echo_client_page_debug_check(struct lov_stripe_md *lsm,
-                             struct page *page, obd_id id,
-                             obd_off offset, obd_off count)
+static int echo_client_page_debug_check(struct lov_stripe_md *lsm, 
+                                        cfs_page_t *page, obd_id id, 
+                                        obd_off offset, obd_off count)
 {
         obd_off stripe_off;
         obd_id  stripe_id;
@@ -475,11 +466,11 @@ echo_client_page_debug_check(struct lov_stripe_md *lsm,
         int     rc2;
 
         /* no partial pages on the client */
-        LASSERT(count == PAGE_SIZE);
+        LASSERT(count == CFS_PAGE_SIZE);
 
-        addr = kmap(page);
+        addr = cfs_kmap(page);
 
-        for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
+        for (rc = delta = 0; delta < CFS_PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
                 stripe_off = offset + delta;
                 stripe_id = id;
                 echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
@@ -493,7 +484,7 @@ echo_client_page_debug_check(struct lov_stripe_md *lsm,
                 }
         }
 
-        kunmap(page);
+        cfs_kunmap(page);
         return rc;
 }
 
@@ -515,18 +506,18 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
                   (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
                   (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
 
-        gfp_mask = ((oa->o_id & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER;
+        gfp_mask = ((oa->o_id & 2) == 0) ? CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER;
 
         LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
         LASSERT(lsm != NULL);
         LASSERT(lsm->lsm_object_id == oa->o_id);
 
         if (count <= 0 ||
-            (count & (PAGE_SIZE - 1)) != 0)
+            (count & (CFS_PAGE_SIZE - 1)) != 0)
                 return (-EINVAL);
 
         /* XXX think again with misaligned I/O */
-        npages = count >> PAGE_SHIFT;
+        npages = count >> CFS_PAGE_SHIFT;
 
         OBD_ALLOC(pga, npages * sizeof(*pga));
         if (pga == NULL)
@@ -534,16 +525,16 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
 
         for (i = 0, pgp = pga, off = offset;
              i < npages;
-             i++, pgp++, off += PAGE_SIZE) {
+             i++, pgp++, off += CFS_PAGE_SIZE) {
 
                 LASSERT (pgp->pg == NULL);      /* for cleanup */
 
                 rc = -ENOMEM;
-                pgp->pg = alloc_pages (gfp_mask, 0);
+                pgp->pg = cfs_alloc_page (gfp_mask);
                 if (pgp->pg == NULL)
                         goto out;
 
-                pgp->count = PAGE_SIZE;
+                pgp->count = CFS_PAGE_SIZE;
                 pgp->off = off;
                 pgp->flag = 0;
 
@@ -569,7 +560,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa,
                         if (vrc != 0 && rc == 0)
                                 rc = vrc;
                 }
-                __free_pages(pgp->pg, 0);
+                cfs_free_page(pgp->pg);
         }
         OBD_FREE(pga, npages * sizeof(*pga));
         return (rc);
@@ -597,13 +588,13 @@ static int echo_client_ubrw(struct obd_device *obd, int rw,
         /* NB: for now, only whole pages, page aligned */
 
         if (count <= 0 ||
-            ((long)buffer & (PAGE_SIZE - 1)) != 0 ||
-            (count & (PAGE_SIZE - 1)) != 0 ||
+            ((long)buffer & (CFS_PAGE_SIZE - 1)) != 0 ||
+            (count & (CFS_PAGE_SIZE - 1)) != 0 ||
             (lsm != NULL && lsm->lsm_object_id != oa->o_id))
                 return (-EINVAL);
 
         /* XXX think again with misaligned I/O */
-        npages = count >> PAGE_SHIFT;
+        npages = count >> CFS_PAGE_SHIFT;
 
         OBD_ALLOC(pga, npages * sizeof(*pga));
         if (pga == NULL)
@@ -623,10 +614,10 @@ static int echo_client_ubrw(struct obd_device *obd, int rw,
 
         for (i = 0, off = offset, pgp = pga;
              i < npages;
-             i++, off += PAGE_SIZE, pgp++) {
+             i++, off += CFS_PAGE_SIZE, pgp++) {
                 pgp->off = off;
                 pgp->pg = kiobuf->maplist[i];
-                pgp->count = PAGE_SIZE;
+                pgp->count = CFS_PAGE_SIZE;
                 pgp->flag = 0;
         }
 
@@ -660,7 +651,7 @@ struct echo_async_state;
 #define EAP_MAGIC 79277927
 struct echo_async_page {
         int                     eap_magic;
-        struct page             *eap_page;
+        cfs_page_t             *eap_page;
         void                    *eap_cookie;
         obd_off                 eap_off;
         struct echo_async_state *eap_eas;
@@ -677,7 +668,7 @@ struct echo_async_state {
         obd_off                 eas_end_offset;
         int                     eas_in_flight;
         int                     eas_rc;
-        wait_queue_head_t       eas_waitq;
+        cfs_waitq_t             eas_waitq;
         struct list_head        eas_avail;
         struct obdo             eas_oa;
         struct lov_stripe_md    *eas_lsm;
@@ -704,7 +695,7 @@ static int ec_ap_refresh_count(void *data, int cmd)
 {
         /* our pages are issued with a stable count */
         LBUG();
-        return PAGE_SIZE;
+        return CFS_PAGE_SIZE;
 }
 static void ec_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
 {
@@ -727,14 +718,14 @@ static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
             (eas->eas_oa.o_flags & OBD_FL_DEBUG_CHECK) != 0)
                 echo_client_page_debug_check(eas->eas_lsm, eap->eap_page,
                                              eas->eas_oa.o_id, eap->eap_off,
-                                             PAGE_SIZE);
+                                             CFS_PAGE_SIZE);
 
         spin_lock_irqsave(&eas->eas_lock, flags);
         if (rc && !eas->eas_rc)
                 eas->eas_rc = rc;
         eas->eas_in_flight--;
         list_add(&eap->eap_item, &eas->eas_avail);
-        wake_up(&eas->eas_waitq);
+        cfs_waitq_signal(&eas->eas_waitq);
         spin_unlock_irqrestore(&eas->eas_lock, flags);
 }
 
@@ -753,10 +744,11 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
         obd_count npages, i;
         struct echo_async_page *eap;
         struct echo_async_state eas;
-        struct list_head *pos, *n;
         int rc = 0;
         unsigned long flags;
-        LIST_HEAD(pages);
+        struct echo_async_page **aps = NULL;
+
+        ENTRY;
 #if 0
         int                     verify;
         int                     gfp_mask;
@@ -771,43 +763,46 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
         LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
 
         if (count <= 0 ||
-            (count & (PAGE_SIZE - 1)) != 0 ||
+            (count & (CFS_PAGE_SIZE - 1)) != 0 ||
             (lsm != NULL &&
              lsm->lsm_object_id != oa->o_id))
                 return (-EINVAL);
 
         /* XXX think again with misaligned I/O */
-        npages = batching >> PAGE_SHIFT;
+        npages = batching >> CFS_PAGE_SHIFT;
 
         memcpy(&eas.eas_oa, oa, sizeof(*oa));
         eas.eas_next_offset = offset;
         eas.eas_end_offset = offset + count;
         spin_lock_init(&eas.eas_lock);
-        init_waitqueue_head(&eas.eas_waitq);
+        cfs_waitq_init(&eas.eas_waitq);
         eas.eas_in_flight = 0;
         eas.eas_rc = 0;
         eas.eas_lsm = lsm;
-        INIT_LIST_HEAD(&eas.eas_avail);
+        CFS_INIT_LIST_HEAD(&eas.eas_avail);
+
+        OBD_ALLOC(aps, npages * sizeof aps[0]);
+        if (aps == NULL)
+                return (-ENOMEM);
 
         /* prepare the group of pages that we're going to be keeping
          * in flight */
         for (i = 0; i < npages; i++) {
-                struct page *page = alloc_page(GFP_KERNEL);
+                cfs_page_t *page = cfs_alloc_page(CFS_ALLOC_STD);
                 if (page == NULL)
                         GOTO(out, rc = -ENOMEM);
 
-                set_page_private(page, 0);
-                list_add_tail(&PAGE_LIST(page), &pages);
-
                 OBD_ALLOC(eap, sizeof(*eap));
-                if (eap == NULL)
+                if (eap == NULL) {
+                        cfs_free_page(page);
                         GOTO(out, rc = -ENOMEM);
+                }
 
                 eap->eap_magic = EAP_MAGIC;
                 eap->eap_page = page;
                 eap->eap_eas = &eas;
-                set_page_private(page, (unsigned long)eap);
                 list_add_tail(&eap->eap_item, &eas.eas_avail);
+                aps[i] = eap;
         }
 
         /* first we spin queueing io and being woken by its completion */
@@ -838,7 +833,7 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
                         eap->eap_cookie = NULL;
                 }
 
-                eas.eas_next_offset += PAGE_SIZE;
+                eas.eas_next_offset += CFS_PAGE_SIZE;
                 eap->eap_off = eas.eas_next_offset;
 
                 rc = obd_prep_async_page(exp, lsm, NULL, eap->eap_page,
@@ -853,13 +848,13 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
                 if (oa->o_id != ECHO_PERSISTENT_OBJID &&
                     (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
                     (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0)
-                        echo_client_page_debug_setup(lsm, eap->eap_page, rw,
-                                                     oa->o_id,
-                                                     eap->eap_off, PAGE_SIZE);
+                        echo_client_page_debug_setup(lsm, eap->eap_page, rw, 
+                                                     oa->o_id, 
+                                                     eap->eap_off, CFS_PAGE_SIZE);
 
                 /* always asserts urgent, which isn't quite right */
                 rc = obd_queue_async_io(exp, lsm, NULL, eap->eap_cookie,
-                                        rw, 0, PAGE_SIZE, 0,
+                                        rw, 0, CFS_PAGE_SIZE, 0,
                                         ASYNC_READY | ASYNC_URGENT |
                                         ASYNC_COUNT_STABLE);
                 spin_lock_irqsave(&eas.eas_lock, flags);
@@ -884,19 +879,19 @@ static int echo_client_async_page(struct obd_export *exp, int rw,
         spin_unlock_irqrestore(&eas.eas_lock, flags);
 
 out:
-        list_for_each_safe(pos, n, &pages) {
-                struct page *page = list_entry(pos, struct page,
-                                               PAGE_LIST_ENTRY);
+        if (aps != NULL) {
+                for (i = 0; i < npages; ++ i) {
+                        cfs_page_t *page;
 
-                list_del(&PAGE_LIST(page));
-                if (page_private(page) != 0) {
-                        eap = (struct echo_async_page *)page_private(page);
+                        eap = aps[i];
+                        page = eap->eap_page;
                         if (eap->eap_cookie != NULL)
                                 obd_teardown_async_page(exp, lsm, NULL,
                                                         eap->eap_cookie);
                         OBD_FREE(eap, sizeof(*eap));
+                        cfs_free_page(page);
                 }
-                __free_page(page);
+                OBD_FREE(aps, npages * sizeof aps[0]);
         }
 
         RETURN(rc);
@@ -915,12 +910,12 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw,
         int i, ret = 0;
         ENTRY;
 
-        if (count <= 0 || (count & (PAGE_SIZE - 1)) != 0 ||
+        if (count <= 0 || (count & (CFS_PAGE_SIZE - 1)) != 0 ||
             (lsm != NULL && lsm->lsm_object_id != oa->o_id))
                 RETURN(-EINVAL);
 
-        npages = batch >> PAGE_SHIFT;
-        tot_pages = count >> PAGE_SHIFT;
+        npages = batch >> CFS_PAGE_SHIFT;
+        tot_pages = count >> CFS_PAGE_SHIFT;
 
         OBD_ALLOC(lnb, npages * sizeof(struct niobuf_local));
         OBD_ALLOC(rnb, npages * sizeof(struct niobuf_remote));
@@ -936,9 +931,9 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw,
                 if (tot_pages < npages)
                         npages = tot_pages;
 
-                for (i = 0; i < npages; i++, off += PAGE_SIZE) {
+                for (i = 0; i < npages; i++, off += CFS_PAGE_SIZE) {
                         rnb[i].offset = off;
-                        rnb[i].len = PAGE_SIZE;
+                        rnb[i].len = CFS_PAGE_SIZE;
                 }
 
                 ioo.ioo_bufcnt = npages;
@@ -949,7 +944,7 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw,
                         GOTO(out, ret);
 
                 for (i = 0; i < npages; i++) {
-                        struct page *page = lnb[i].page;
+                        cfs_page_t *page = lnb[i].page;
 
                         /* read past eof? */
                         if (page == NULL && lnb[i].rc == 0)
@@ -1096,8 +1091,8 @@ echo_client_enqueue(struct obd_export *exp, struct obdo *oa,
         if (!(mode == LCK_PR || mode == LCK_PW))
                 return -EINVAL;
 
-        if ((offset & (PAGE_SIZE - 1)) != 0 ||
-            (nob & (PAGE_SIZE - 1)) != 0)
+        if ((offset & (CFS_PAGE_SIZE - 1)) != 0 ||
+            (nob & (CFS_PAGE_SIZE - 1)) != 0)
                 return -EINVAL;
 
         rc = echo_get_object (&eco, obd, oa);
@@ -1346,7 +1341,7 @@ static int echo_client_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
         }
 
         spin_lock_init (&ec->ec_lock);
-        INIT_LIST_HEAD (&ec->ec_objects);
+        CFS_INIT_LIST_HEAD (&ec->ec_objects);
         ec->ec_unique = 0;
 
         OBD_ALLOC(ocd, sizeof(*ocd));
@@ -1411,10 +1406,11 @@ static int echo_client_connect(struct lustre_handle *conn,
         struct obd_export *exp;
         int                rc;
 
+        ENTRY;
         rc = class_connect(conn, src, cluuid);
         if (rc == 0) {
                 exp = class_conn2export(conn);
-                INIT_LIST_HEAD(&exp->exp_ec_data.eced_locks);
+                CFS_INIT_LIST_HEAD(&exp->exp_ec_data.eced_locks);
                 class_export_put(exp);
         }
 
@@ -1472,10 +1468,10 @@ int echo_client_init(void)
 
         lprocfs_init_vars(echo, &lvars);
         return class_register_type(&echo_obd_ops, NULL, lvars.module_vars,
-                                   OBD_ECHO_CLIENT_DEVICENAME, NULL);
+                                   LUSTRE_ECHO_CLIENT_NAME, NULL);
 }
 
 void echo_client_exit(void)
 {
-        class_unregister_type(OBD_ECHO_CLIENT_DEVICENAME);
+        class_unregister_type(LUSTRE_ECHO_CLIENT_NAME);
 }
index 3418691..c816ca0 100644 (file)
@@ -24,8 +24,8 @@
  */
 #define DEBUG_SUBSYSTEM S_ECHO
 
-#include <linux/lprocfs_status.h>
-#include <linux/obd_class.h>
+#include <lprocfs_status.h>
+#include <obd_class.h>
 
 #ifdef LPROCFS
 static struct lprocfs_vars lprocfs_obd_vars[] = {
index 3521135..02438ba 100644 (file)
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 # include <linux/mount.h>
 # include <linux/buffer_head.h>
-# include <linux/security.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_ver.h>
-#include <linux/lustre_commit_confd.h>
+#include <obd_class.h>
+#include <obd_lov.h>
+#include <lustre_dlm.h>
+#include <lustre_fsfilt.h>
+#include <lprocfs_status.h>
+#include <lustre_log.h>
+#include <lustre_commit_confd.h>
 #include <libcfs/list.h>
-#include <linux/lustre_disk.h>
-#include <linux/lustre_quota.h>
-#include <linux/quotaops.h>
+#include <lustre_disk.h>
+#include <lustre_quota.h>
+#include <lustre_ver.h>
 
 #include "filter_internal.h"
 
@@ -397,7 +395,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
         }
 
         if (last_rcvd_size == 0) {
-                CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
+                LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name);
 
                 memcpy(fsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->lsd_uuid));
                 fsd->lsd_last_transno = 0;
@@ -416,8 +414,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                         GOTO(err_fsd, rc);
                 }
                 if (strcmp(fsd->lsd_uuid, obd->obd_uuid.uuid) != 0) {
-                        CERROR("OBD UUID %s does not match last_rcvd UUID %s\n",
-                               obd->obd_uuid.uuid, fsd->lsd_uuid);
+                        LCONSOLE_ERROR("Trying to start OBD %s using the wrong"
+                                       " disk %s. Were the /dev/ assignments "
+                                       "rearranged?\n",
+                                       obd->obd_uuid.uuid, fsd->lsd_uuid);
                         GOTO(err_fsd, rc = -EINVAL);
                 }
                 mount_count = le64_to_cpu(fsd->lsd_mount_count);
@@ -538,7 +538,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp)
                 obd->obd_recovery_start = CURRENT_SECONDS;
                 /* Only used for lprocfs_status */
                 obd->obd_recovery_end = obd->obd_recovery_start +
-                        OBD_RECOVERY_TIMEOUT / HZ;
+                        OBD_RECOVERY_TIMEOUT;
         }
 
 out:
@@ -1528,8 +1528,8 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg,
                               obd->obd_recoverable_clients,
                               (obd->obd_recoverable_clients == 1)
                               ? "client" : "clients",
-                              (int)(OBD_RECOVERY_TIMEOUT / HZ) / 60,
-                              (int)(OBD_RECOVERY_TIMEOUT / HZ) % 60,
+                              (int)(OBD_RECOVERY_TIMEOUT) / 60,
+                              (int)(OBD_RECOVERY_TIMEOUT) % 60,
                               obd->obd_name);
         } else {
                 LCONSOLE_INFO("OST %s now serving %s (%s%s%s) with recovery "
@@ -2705,7 +2705,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         unsigned int qcids[MAXQUOTAS] = {0, 0};
         struct obd_device *obd;
         struct filter_obd *filter;
-        struct dentry *dchild = NULL, *dparent;
+        struct dentry *dchild = NULL, *dparent = NULL;
         struct lvfs_run_ctxt saved;
         void *handle = NULL;
         struct llog_cookie *fcc = NULL;
@@ -2947,8 +2947,9 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen,
         RETURN(-EINVAL);
 }
 
-static int filter_set_info(struct obd_export *exp, __u32 keylen,
-                           void *key, __u32 vallen, void *val)
+static int filter_set_info_async(struct obd_export *exp, __u32 keylen,
+                                 void *key, __u32 vallen, void *val,
+                                 struct ptlrpc_request_set *set)
 {
         struct obd_device *obd;
         struct llog_ctxt *ctxt;
@@ -2965,8 +2966,8 @@ static int filter_set_info(struct obd_export *exp, __u32 keylen,
             memcmp(key, KEY_MDS_CONN, keylen) != 0)
                 RETURN(-EINVAL);
 
-        CWARN("%s: received MDS connection from %s\n", obd->obd_name,
-              obd_export_nid2str(exp));
+        LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name,
+                      obd_export_nid2str(exp));
         obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie;
 
         /* setup llog imports */
@@ -3077,7 +3078,7 @@ static struct lvfs_callback_ops filter_lvfs_ops = {
 static struct obd_ops filter_obd_ops = {
         .o_owner          = THIS_MODULE,
         .o_get_info       = filter_get_info,
-        .o_set_info       = filter_set_info,
+        .o_set_info_async = filter_set_info_async,
         .o_setup          = filter_setup,
         .o_precleanup     = filter_precleanup,
         .o_cleanup        = filter_cleanup,
@@ -3106,7 +3107,7 @@ static struct obd_ops filter_obd_ops = {
 static struct obd_ops filter_sanobd_ops = {
         .o_owner          = THIS_MODULE,
         .o_get_info       = filter_get_info,
-        .o_set_info       = filter_set_info,
+        .o_set_info_async = filter_set_info_async,
         .o_setup          = filter_san_setup,
         .o_precleanup     = filter_precleanup,
         .o_cleanup        = filter_cleanup,
index 9006728..4c68b34 100644 (file)
@@ -8,12 +8,13 @@
 #ifdef __KERNEL__
 # include <linux/spinlock.h>
 #endif
-#include <linux/lustre_handles.h>
-#include <linux/lustre_debug.h>
-#include <linux/obd.h>
-#include <linux/lustre_disk.h>
+#include <lustre_disk.h>
+#include <lustre_handles.h>
+#include <lustre_debug.h>
+#include <obd.h>
+
+#define FILTER_LAYOUT_VERSION "2"
 
-#define HEALTH_CHECK "health_check"
 #define FILTER_INIT_OBJID 0
 
 #define FILTER_SUBDIR_COUNT      32            /* set to zero for no subdirs */
index cf7140d..fdc0492 100644 (file)
@@ -34,8 +34,8 @@
 #include <linux/pagemap.h> // XXX kill me soon
 #include <linux/version.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
 #include "filter_internal.h"
 
 int *obdfilter_created_scratchpad;
index be6e550..b10a83a 100644 (file)
 #include <linux/module.h>
 #include <linux/pagemap.h> // XXX kill me soon
 #include <linux/version.h>
+#include <linux/lustre_version.h>
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
 #include <linux/iobuf.h>
 #include <linux/locks.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
 #include "filter_internal.h"
 
 /* Bug 2254 -- this is better done in ext3_map_inode_page, but this
index 1bd4995..1a07a95 100644 (file)
@@ -35,9 +35,9 @@
 
 #define DEBUG_SUBSYSTEM S_FILTER
 
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_quota.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_quota.h>
 #include "filter_internal.h"
 
 /* 512byte block min */
@@ -390,11 +390,15 @@ static int filter_clear_page_cache(struct inode *inode,
         rc = generic_osync_inode(inode, inode->i_mapping,
                                  OSYNC_DATA|OSYNC_METADATA);
          */
+        down(&inode->i_sem);
+        current->flags |= PF_SYNCWRITE;
         rc = filemap_fdatawrite(inode->i_mapping);
         rc2 = sync_mapping_buffers(inode->i_mapping);
         if (rc == 0)
                 rc = rc2;
         rc2 = filemap_fdatawait(inode->i_mapping);
+        current->flags &= ~PF_SYNCWRITE;
+        up(&inode->i_sem);
         if (rc == 0)
                 rc = rc2;
         if (rc != 0)
index a2ce350..c61be24 100644 (file)
@@ -34,9 +34,9 @@
 #include <linux/version.h>
 
 #include <libcfs/list.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_commit_confd.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_commit_confd.h>
 
 #include "filter_internal.h"
 
index acdd457..06946fe 100644 (file)
@@ -34,8 +34,8 @@
 #include <linux/version.h>
 
 #include <libcfs/list.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_dlm.h>
+#include <obd_class.h>
+#include <lustre_dlm.h>
 
 #include "filter_internal.h"
 
index 7f83977..c679b3e 100644 (file)
@@ -33,8 +33,8 @@
 #include <linux/pagemap.h> // XXX kill me soon
 #include <linux/version.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
 #include "filter_internal.h"
 
 /* sanobd setup methods - use a specific mount option */
index 88da264..935ae6f 100644 (file)
@@ -25,8 +25,8 @@
 #define DEBUG_SUBSYSTEM S_CLASS
 
 #include <linux/version.h>
-#include <linux/lprocfs_status.h>
-#include <linux/obd.h>
+#include <lprocfs_status.h>
+#include <obd.h>
 #include <linux/seq_file.h>
 #include <linux/version.h>
 
diff --git a/lustre/osc/Info.plist b/lustre/osc/Info.plist
new file mode 100644 (file)
index 0000000..727980d
--- /dev/null
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>osc</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.osc</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string> 
+       <key>OSBundleCompatibleVersion</key> 
+       <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict> 
+                <key>com.apple.kpi.bsd</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.mach</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.unsupported</key>
+               <string>8.0.0b1</string>
+               <key>com.clusterfs.lustre.libcfs</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.lvfs</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.obdclass</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.ptlrpc</key> 
+               <string>1.0.0</string>
+       </dict>
+</dict>
+</plist>
index af0649d..c9f2fbb 100644 (file)
@@ -11,8 +11,31 @@ libosc_a_CFLAGS = $(LLCFLAGS)
 endif
 
 if MODULES
+
+if LINUX
 modulefs_DATA = osc$(KMODEXT)
 endif
 
+if DARWIN
+macos_PROGRAMS := osc
+
+osc_SOURCES := \
+        osc_create.c \
+        osc_request.c
+
+osc_CFLAGS := $(EXTRA_KCFLAGS)
+osc_LDFLAGS := $(EXTRA_KLDFLAGS)
+osc_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif # Darwin
+
+endif
+
+install-data-hook: $(install_data_hook)
+
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ 
 DIST_SOURCES = $(osc-objs:%.o=%.c) osc_internal.h
index 05e2567..764c55c 100644 (file)
@@ -28,8 +28,8 @@
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #include <asm/statfs.h>
 #endif
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include <linux/seq_file.h>
 #include "osc_internal.h"
 
@@ -41,9 +41,9 @@ static int osc_rd_max_pages_per_rpc(char *page, char **start, off_t off,
         struct client_obd *cli = &dev->u.cli;
         int rc;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         rc = snprintf(page, count, "%d\n", cli->cl_max_pages_per_rpc);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         return rc;
 }
 
@@ -61,9 +61,9 @@ static int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer,
         if (val < 1 || val > PTLRPC_MAX_BRW_PAGES)
                 return -ERANGE;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         cli->cl_max_pages_per_rpc = val;
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         return count;
 }
@@ -75,9 +75,9 @@ static int osc_rd_max_rpcs_in_flight(char *page, char **start, off_t off,
         struct client_obd *cli = &dev->u.cli;
         int rc;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         rc = snprintf(page, count, "%u\n", cli->cl_max_rpcs_in_flight);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         return rc;
 }
 
@@ -99,9 +99,9 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer,
         if (pool && val > cli->cl_max_rpcs_in_flight)
                 pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight);
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         cli->cl_max_rpcs_in_flight = val;
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         return count;
 }
@@ -113,9 +113,9 @@ static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count,
         struct client_obd *cli = &dev->u.cli;
         unsigned val;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         val = cli->cl_dirty_max >> 20;
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         return snprintf(page, count, "%u\n", val);
 }
@@ -135,10 +135,10 @@ static int osc_wr_max_dirty_mb(struct file *file, const char *buffer,
             val > num_physpages >> (20 - PAGE_SHIFT - 2)) /* 1/4 of RAM */
                 return -ERANGE;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         cli->cl_dirty_max = (obd_count)val * 1024 * 1024;
         osc_wake_cache_waiters(cli);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         return count;
 }
@@ -150,9 +150,9 @@ static int osc_rd_cur_dirty_bytes(char *page, char **start, off_t off,
         struct client_obd *cli = &dev->u.cli;
         int rc;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         rc = snprintf(page, count, "%lu\n", cli->cl_dirty);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         return rc;
 }
 
@@ -163,9 +163,9 @@ static int osc_rd_cur_grant_bytes(char *page, char **start, off_t off,
         struct client_obd *cli = &dev->u.cli;
         int rc;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         rc = snprintf(page, count, "%lu\n", cli->cl_avail_grant);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         return rc;
 }
 
@@ -297,13 +297,12 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
         struct timeval now;
         struct obd_device *dev = seq->private;
         struct client_obd *cli = &dev->u.cli;
-        unsigned long flags;
         unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum;
         int i;
 
         do_gettimeofday(&now);
 
-        spin_lock_irqsave(&cli->cl_loi_list_lock, flags);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         seq_printf(seq, "snapshot_time:         %lu.%lu (secs.usecs)\n",
                    now.tv_sec, now.tv_usec);
@@ -384,7 +383,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v)
                         break;
         }
 
-        spin_unlock_irqrestore(&cli->cl_loi_list_lock, flags);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         return 0;
 }
index b98cac1..d21c3e8 100644 (file)
 #define DEBUG_SUBSYSTEM S_OSC
 
 #ifdef __KERNEL__
-# include <linux/version.h>
-# include <linux/module.h>
-# include <linux/mm.h>
-# include <linux/highmem.h>
-# include <linux/ctype.h>
-# include <linux/init.h>
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#  include <linux/workqueue.h>
-#  include <linux/smp_lock.h>
-# else
-#  include <linux/locks.h>
-# endif
+# include <libcfs/libcfs.h>
 #else /* __KERNEL__ */
 # include <liblustre.h>
 #endif
@@ -55,8 +44,8 @@
 # include <ctype.h>
 #endif
 
-# include <linux/lustre_dlm.h>
-#include <linux/obd_class.h>
+# include <lustre_dlm.h>
+#include <obd_class.h>
 #include "osc_internal.h"
 
 static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc)
@@ -109,7 +98,7 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc)
         CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n",
                oscc->oscc_last_id, oscc->oscc_next_id);
 
-        wake_up(&oscc->oscc_waitq);
+        cfs_waitq_signal(&oscc->oscc_waitq);
         RETURN(rc);
 }
 
@@ -288,7 +277,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                         CDEBUG(D_HA, "%s: oscc recovery finished, last_id: "
                                LPU64", rc: %d\n", oscc->oscc_obd->obd_name,
                                oscc->oscc_last_id, rc);
-                        wake_up(&oscc->oscc_waitq);
+                        cfs_waitq_signal(&oscc->oscc_waitq);
                 } else {
                         CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n",
                                oscc->oscc_obd->obd_name, rc);
@@ -315,7 +304,8 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                         CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n",
                                oscc);
 
-                        lwi = LWI_TIMEOUT(MAX(obd_timeout*HZ/4, 1), NULL, NULL);
+                        lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout/4)),
+                                          NULL, NULL);
                         rc = l_wait_event(oscc->oscc_waitq,
                                           !oscc_recovering(oscc), &lwi);
                         LASSERT(rc == 0 || rc == -ETIMEDOUT);
@@ -373,8 +363,8 @@ void oscc_init(struct obd_device *obd)
         oscc = &obd->u.cli.cl_oscc;
 
         memset(oscc, 0, sizeof(*oscc));
-        INIT_LIST_HEAD(&oscc->oscc_list);
-        init_waitqueue_head(&oscc->oscc_waitq);
+        CFS_INIT_LIST_HEAD(&oscc->oscc_list);
+        cfs_waitq_init(&oscc->oscc_waitq);
         spin_lock_init(&oscc->oscc_lock);
         oscc->oscc_obd = obd;
         oscc->oscc_grow_count = OST_MIN_PRECREATE;
index 82db660..667da17 100644 (file)
@@ -22,7 +22,7 @@ struct osc_async_page {
 
         unsigned long           oap_interrupted:1;
         struct oig_callback_context oap_occ;
-        struct page             *oap_page;
+        cfs_page_t              *oap_page;
         struct obd_io_group     *oap_oig;
         struct ptlrpc_request   *oap_request;
         struct client_obd       *oap_cli;
@@ -38,7 +38,7 @@ struct osc_async_page {
 
 struct osc_cache_waiter {
         struct list_head        ocw_entry;
-        wait_queue_head_t       ocw_waitq;
+        cfs_waitq_t             ocw_waitq;
         struct osc_async_page   *ocw_oap;
         int                     ocw_rc;
 };
@@ -57,10 +57,6 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa,
 void oscc_init(struct obd_device *obd);
 void osc_wake_cache_waiters(struct client_obd *cli);
 
-
-/* Quota stuff */
-extern quota_interface_t *quota_interface;
-
 #ifdef LPROCFS
 int lproc_osc_attach_seqstat(struct obd_device *dev);
 #else
index 569132b..39bd2f8 100644 (file)
 
 #ifdef __KERNEL__
 # include <linux/module.h>
-# include <linux/obd.h>
-# include <linux/obd_ost.h>
-# include <linux/lustre_net.h>
-# include <linux/lustre_dlm.h>
-# include <linux/lustre_lib.h>
+# include <obd.h>
+# include <obd_ost.h>
+# include <lustre_net.h>
+# include <lustre_dlm.h>
+# include <lustre_lib.h>
 # include <linux/lustre_compat25.h>
 
 /* convert a pathname into a kdev_t */
index 5b1c9ff..fa9e292 100644 (file)
 #define DEBUG_SUBSYSTEM S_OSC
 
 #ifdef __KERNEL__
-# include <linux/version.h>
-# include <linux/module.h>
-# include <linux/mm.h>
-# include <linux/highmem.h>
-# include <linux/ctype.h>
-# include <linux/init.h>
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#  include <linux/workqueue.h>
-#  include <linux/smp_lock.h>
-# else
-#  include <linux/locks.h>
-# endif
+# include <libcfs/libcfs.h>
 #else /* __KERNEL__ */
 # include <liblustre.h>
 #endif
 
-# include <linux/lustre_dlm.h>
+# include <lustre_dlm.h>
 #include <libcfs/kp30.h>
-#include <linux/lustre_net.h>
+#include <lustre_net.h>
 #include <lustre/lustre_user.h>
-#include <linux/obd_ost.h>
-#include <linux/obd_lov.h>
+#include <obd_ost.h>
+#include <obd_lov.h>
 
 #ifdef  __CYGWIN__
 # include <ctype.h>
 #endif
 
-#include <linux/lustre_ha.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_debug.h>
+#include <lustre_ha.h>
+#include <lprocfs_status.h>
+#include <lustre_log.h>
+#include <lustre_debug.h>
 #include "osc_internal.h"
 
+static quota_interface_t *quota_interface = NULL;
+extern quota_interface_t osc_quota_interface;
+
 /* Pack OSC object metadata for disk storage (LE byte order). */
 static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
                       struct lov_stripe_md *lsm)
@@ -548,7 +540,7 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
         LASSERT(!(oa->o_valid & bits));
 
         oa->o_valid |= bits;
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         oa->o_dirty = cli->cl_dirty;
         if (cli->cl_dirty > cli->cl_dirty_max) {
                 CERROR("dirty %lu > dirty_max %lu\n",
@@ -559,14 +551,14 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
                        cli->cl_dirty, cli->cl_dirty_max);
                 oa->o_undirty = 0;
         } else {
-                long max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT)*
+                long max_in_flight = (cli->cl_max_pages_per_rpc << CFS_PAGE_SHIFT)*
                                 (cli->cl_max_rpcs_in_flight + 1);
                 oa->o_undirty = max(cli->cl_dirty_max, max_in_flight);
         }
         oa->o_grant = cli->cl_avail_grant;
         oa->o_dropped = cli->cl_lost_grant;
         cli->cl_lost_grant = 0;
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         CDEBUG(D_CACHE,"dirty: "LPU64" undirty: %u dropped %u grant: "LPU64"\n",
                oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant);
 }
@@ -575,10 +567,10 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa,
 static void osc_consume_write_grant(struct client_obd *cli,
                                     struct osc_async_page *oap)
 {
-        cli->cl_dirty += PAGE_SIZE;
-        cli->cl_avail_grant -= PAGE_SIZE;
+        cli->cl_dirty += CFS_PAGE_SIZE;
+        cli->cl_avail_grant -= CFS_PAGE_SIZE;
         oap->oap_brw_flags |= OBD_BRW_FROM_GRANT;
-        CDEBUG(D_CACHE, "using %lu grant credits for oap %p\n", PAGE_SIZE, oap);
+        CDEBUG(D_CACHE, "using %lu grant credits for oap %p\n", CFS_PAGE_SIZE, oap);
         LASSERT(cli->cl_avail_grant >= 0);
 }
 
@@ -593,9 +585,10 @@ void osc_wake_cache_waiters(struct client_obd *cli)
         struct list_head *l, *tmp;
         struct osc_cache_waiter *ocw;
 
+        ENTRY;
         list_for_each_safe(l, tmp, &cli->cl_cache_waiters) {
                 /* if we can't dirty more, we must wait until some is written */
-                if (cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) {
+                if (cli->cl_dirty + CFS_PAGE_SIZE > cli->cl_dirty_max) {
                         CDEBUG(D_CACHE, "no dirty room: dirty: %ld max %ld\n",
                                cli->cl_dirty, cli->cl_dirty_max);
                         return;
@@ -603,7 +596,7 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
                 /* if still dirty cache but no grant wait for pending RPCs that
                  * may yet return us some grant before doing sync writes */
-                if (cli->cl_w_in_flight && cli->cl_avail_grant < PAGE_SIZE) {
+                if (cli->cl_w_in_flight && cli->cl_avail_grant < CFS_PAGE_SIZE) {
                         CDEBUG(D_CACHE, "%u BRW writes in flight, no grant\n",
                                cli->cl_w_in_flight);
                         return;
@@ -611,7 +604,7 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
                 ocw = list_entry(l, struct osc_cache_waiter, ocw_entry);
                 list_del_init(&ocw->ocw_entry);
-                if (cli->cl_avail_grant < PAGE_SIZE) {
+                if (cli->cl_avail_grant < CFS_PAGE_SIZE) {
                         /* no more RPCs in flight to return grant, do sync IO */
                         ocw->ocw_rc = -EDQUOT;
                         CDEBUG(D_INODE, "wake oap %p for sync\n", ocw->ocw_oap);
@@ -619,7 +612,7 @@ void osc_wake_cache_waiters(struct client_obd *cli)
                         osc_consume_write_grant(cli, ocw->ocw_oap);
                 }
 
-                wake_up(&ocw->ocw_waitq);
+                cfs_waitq_signal(&ocw->ocw_waitq);
         }
 
         EXIT;
@@ -627,9 +620,9 @@ void osc_wake_cache_waiters(struct client_obd *cli)
 
 static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 {
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         cli->cl_avail_grant = ocd->ocd_grant;
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         CDEBUG(D_CACHE, "setting cl_avail_grant: %ld cl_lost_grant: %ld\n",
                cli->cl_avail_grant, cli->cl_lost_grant);
@@ -638,11 +631,11 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd)
 
 static void osc_update_grant(struct client_obd *cli, struct ost_body *body)
 {
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant);
         cli->cl_avail_grant += body->oa.o_grant;
         /* waiters are woken in brw_interpret_oap */
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 }
 
 /* We assume that the reason this OSC got a short read is because it read
@@ -660,9 +653,9 @@ static void handle_short_read(int nob_read, obd_count page_count,
 
                 if (pga->count > nob_read) {
                         /* EOF inside this page */
-                        ptr = kmap(pga->pg) + (pga->off & ~PAGE_MASK);
+                        ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK);
                         memset(ptr + nob_read, 0, pga->count - nob_read);
-                        kunmap(pga->pg);
+                        cfs_kunmap(pga->pg);
                         page_count--;
                         pga++;
                         break;
@@ -675,9 +668,9 @@ static void handle_short_read(int nob_read, obd_count page_count,
 
         /* zero remaining pages */
         while (page_count-- > 0) {
-                ptr = kmap(pga->pg) + (pga->off & ~PAGE_MASK);
+                ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK);
                 memset(ptr, 0, pga->count);
-                kunmap(pga->pg);
+                cfs_kunmap(pga->pg);
                 pga++;
         }
 }
@@ -742,12 +735,12 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count,
 
         LASSERT (pg_count > 0);
         while (nob > 0 && pg_count > 0) {
-                char *ptr = kmap(pga->pg);
-                int off = pga->off & ~PAGE_MASK;
+                char *ptr = cfs_kmap(pga->pg);
+                int off = pga->off & ~CFS_PAGE_MASK;
                 int count = pga->count > nob ? nob : pga->count;
 
                 cksum = crc32_le(cksum, ptr + off, count);
-                kunmap(pga->pg);
+                cfs_kunmap(pga->pg);
                 LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n",
                                off, cksum);
 
@@ -778,6 +771,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         int                      rc;
         struct ptlrpc_request_pool *pool;
 
+        ENTRY;
         opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ;
         pool = ((cmd & OBD_BRW_WRITE) != 0) ? imp->imp_rq_pool : NULL;
 
@@ -793,7 +787,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 3,
                                    size, NULL, pool);
         if (req == NULL)
-                return (-ENOMEM);
+                RETURN (-ENOMEM);
 
         /* FIXME bug 249. Also see bug 7198 */
         if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL)
@@ -824,9 +818,10 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                 struct brw_page *pg_prev = pg - 1;
 
                 LASSERT(pg->count > 0);
-                LASSERTF((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE,
+                LASSERTF((pg->off & ~CFS_PAGE_MASK) + pg->count <= CFS_PAGE_SIZE,
                          "i: %d pg: %p off: "LPU64", count: %u\n", i, pg,
                          pg->off, pg->count);
+#ifdef __LINUX__
                 LASSERTF(i == 0 || pg->off > pg_prev->off,
                          "i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64
                          " prev_pg %p [pri %lu ind %lu] off "LPU64"\n",
@@ -834,10 +829,14 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
                          pg->pg, page_private(pg->pg), pg->pg->index, pg->off,
                          pg_prev->pg, page_private(pg_prev->pg),
                          pg_prev->pg->index, pg_prev->off);
+#else
+                LASSERTF(i == 0 || pg->off > pg_prev->off,
+                         "i %d p_c %u\n", i, page_count);
+#endif
                 LASSERT((pga[0].flag & OBD_BRW_SRVLOCK) ==
                         (pg->flag & OBD_BRW_SRVLOCK));
 
-                ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK,
+                ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~CFS_PAGE_MASK,
                                       pg->count);
                 requested_nob += pg->count;
 
@@ -880,11 +879,11 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         *niocountp = niocount;
         *requested_nobp = requested_nob;
         *reqp = req;
-        return (0);
+        RETURN (0);
 
  out:
         ptlrpc_req_finished (req);
-        return (rc);
+        RETURN (rc);
 }
 
 static void check_write_csum(__u32 cli, __u32 srv, int requested_nob,
@@ -1145,21 +1144,21 @@ static obd_count max_unfragmented_pages(struct brw_page *pg, obd_count pages)
         int count = 1;
         int offset;
 
-        LASSERT (pages > 0);
-        offset = pg->off & (PAGE_SIZE - 1);
+       LASSERT (pages > 0);
+        offset = pg->off & (CFS_PAGE_SIZE - 1);
 
         for (;;) {
                 pages--;
                 if (pages == 0)         /* that's all */
                         return count;
 
-                if (offset + pg->count < PAGE_SIZE)
-                        return count;   /* doesn't end on page boundary */
+                if (offset + pg->count < CFS_PAGE_SIZE)
+                       return count;   /* doesn't end on page boundary */
 
-                pg++;
-                offset = pg->off & (PAGE_SIZE - 1);
-                if (offset != 0)        /* doesn't start on page boundary */
-                        return count;
+               pg++;
+                offset = pg->off & (CFS_PAGE_SIZE - 1);
+               if (offset != 0)        /* doesn't start on page boundary */
+                       return count;
 
                 count++;
         }
@@ -1201,11 +1200,9 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
                         *oa = *saved_oa;
                 } else if (page_count > pages_per_brw) {
                         /* save a copy of oa (brw will clobber it) */
-                        OBD_ALLOC(saved_oa, sizeof(*saved_oa));
-                        if (saved_oa == NULL) {
-                                CERROR("Can't save oa (ENOMEM)\n");
+                        saved_oa = obdo_alloc();
+                        if (saved_oa == NULL)
                                 RETURN(-ENOMEM);
-                        }
                         *saved_oa = *oa;
                 }
 
@@ -1219,7 +1216,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa,
         }
 
         if (saved_oa != NULL)
-                OBD_FREE(saved_oa, sizeof(*saved_oa));
+                obdo_free(saved_oa);
 
         RETURN(rc);
 }
@@ -1364,7 +1361,7 @@ static void osc_occ_interrupted(struct oig_callback_context *occ)
         /* XXX member_of() */
         oap = list_entry(occ, struct osc_async_page, oap_occ);
 
-        spin_lock(&oap->oap_cli->cl_loi_list_lock);
+        client_obd_list_lock(&oap->oap_cli->cl_loi_list_lock);
 
         oap->oap_interrupted = 1;
 
@@ -1392,7 +1389,7 @@ static void osc_occ_interrupted(struct oig_callback_context *occ)
         }
 
 unlock:
-        spin_unlock(&oap->oap_cli->cl_loi_list_lock);
+        client_obd_list_unlock(&oap->oap_cli->cl_loi_list_lock);
 }
 
 /* this is trying to propogate async writeback errors back up to the
@@ -1422,6 +1419,7 @@ static void osc_process_ar(struct osc_async_rc *ar, struct ptlrpc_request *req,
 static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
                               struct osc_async_page *oap, int sent, int rc)
 {
+        ENTRY;
         osc_exit_cache(cli, oap, sent);
         oap->oap_async_flags = 0;
         oap->oap_interrupted = 0;
@@ -1456,6 +1454,7 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa,
 
         oap->oap_caller_ops->ap_completion(oap->oap_caller_data, oap->oap_cmd,
                                            oa, rc);
+        EXIT;
 }
 
 static int brw_interpret_oap(struct ptlrpc_request *request,
@@ -1474,7 +1473,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request,
 
         cli = aa->aa_cli;
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters
          * is called so we know whether to go to sync BRWs or wait for more
@@ -1499,7 +1498,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request,
         osc_wake_cache_waiters(cli);
         osc_check_rpcs(cli);
 
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         obdo_free(aa->aa_oa);
         OBD_FREE(aa->aa_pga, aa->aa_page_count * sizeof(struct brw_page));
@@ -1521,6 +1520,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli,
         struct list_head *pos;
         int i, rc;
 
+        ENTRY;
         LASSERT(!list_empty(rpc_list));
 
         OBD_ALLOC(pga, sizeof(*pga) * page_count);
@@ -1545,7 +1545,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli,
                 pga[i].count = oap->oap_count;
                 pga[i].flag = oap->oap_brw_flags;
                 CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n",
-                       pga[i].pg, oap->oap_page->index, oap, pga[i].flag);
+                       pga[i].pg, cfs_page_index(oap->oap_page), oap, pga[i].flag);
                 i++;
         }
 
@@ -1591,7 +1591,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
         struct osc_async_page *oap = NULL;
         struct osc_brw_async_args *aa;
         struct obd_async_page_ops *ops;
-        LIST_HEAD(rpc_list);
+        CFS_LIST_HEAD(rpc_list);
         unsigned int ending_offset;
         unsigned  starting_offset = 0;
         ENTRY;
@@ -1653,7 +1653,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
                  * XXX nikita: this assertion should be adjusted when lustre
                  * starts using PG_writeback for pages being written out.
                  */
-#if defined(__KERNEL__)
+#if defined(__KERNEL__) && defined(__LINUX__)
                 LASSERT(PageLocked(oap->oap_page));
 #endif
                 /* If there is a gap at the start of this page, it can't merge
@@ -1699,7 +1699,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
                 /* If there is a gap at the end of this page, it can't merge
                  * with any subsequent pages, so we'll hand the network a
                  * "fragmented" page array that it can't transfer in 1 RDMA */
-                if (oap->oap_page_off + oap->oap_count < PAGE_SIZE)
+                if (oap->oap_page_off + oap->oap_count < CFS_PAGE_SIZE)
                         break;
         }
 
@@ -1710,13 +1710,13 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
 
         loi_list_maint(cli, loi);
 
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         request = osc_build_req(cli, &rpc_list, page_count, cmd);
         if (IS_ERR(request)) {
                 /* this should happen rarely and is pretty bad, it makes the
                  * pending list not follow the dirty order */
-                spin_lock(&cli->cl_loi_list_lock);
+                client_obd_list_lock(&cli->cl_loi_list_lock);
                 list_for_each_safe(pos, tmp, &rpc_list) {
                         oap = list_entry(pos, struct osc_async_page,
                                          oap_rpc_item);
@@ -1746,24 +1746,24 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi,
 
         LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args));
         aa = (struct osc_brw_async_args *)&request->rq_async_args;
-        INIT_LIST_HEAD(&aa->aa_oaps);
+        CFS_INIT_LIST_HEAD(&aa->aa_oaps);
         list_splice(&rpc_list, &aa->aa_oaps);
-        INIT_LIST_HEAD(&rpc_list);
+        CFS_INIT_LIST_HEAD(&rpc_list);
 
         if (cmd == OBD_BRW_READ) {
                 lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count);
                 lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight);
                 lprocfs_oh_tally_log2(&cli->cl_read_offset_hist,
-                                      starting_offset/PAGE_SIZE + 1);
+                                      starting_offset/CFS_PAGE_SIZE + 1);
         } else {
                 lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count);
                 lprocfs_oh_tally(&cli->cl_write_rpc_hist,
                                  cli->cl_w_in_flight);
                 lprocfs_oh_tally_log2(&cli->cl_write_offset_hist,
-                                      starting_offset/PAGE_SIZE + 1);
+                                      starting_offset/CFS_PAGE_SIZE + 1);
         }
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         if (cmd == OBD_BRW_READ)
                 cli->cl_r_in_flight++;
@@ -1908,9 +1908,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw)
 {
         int rc;
         ENTRY;
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
         rc = list_empty(&ocw->ocw_entry) || rpcs_in_flight(cli) == 0;
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         RETURN(rc);
 };
 
@@ -1922,22 +1922,23 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi,
         struct osc_cache_waiter ocw;
         struct l_wait_info lwi = { 0 };
 
+        ENTRY;
         CDEBUG(D_CACHE, "dirty: %ld dirty_max: %ld dropped: %lu grant: %lu\n",
                cli->cl_dirty, cli->cl_dirty_max, cli->cl_lost_grant,
                cli->cl_avail_grant);
 
         /* force the caller to try sync io.  this can jump the list
          * of queued writes and create a discontiguous rpc stream */
-        if (cli->cl_dirty_max < PAGE_SIZE || cli->cl_ar.ar_force_sync ||
+        if (cli->cl_dirty_max < CFS_PAGE_SIZE || cli->cl_ar.ar_force_sync ||
             loi->loi_ar.ar_force_sync)
-                return(-EDQUOT);
+                RETURN(-EDQUOT);
 
         /* Hopefully normal case - cache space and write credits available */
-        if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max &&
-            cli->cl_avail_grant >= PAGE_SIZE) {
+        if (cli->cl_dirty + CFS_PAGE_SIZE <= cli->cl_dirty_max &&
+            cli->cl_avail_grant >= CFS_PAGE_SIZE) {
                 /* account for ourselves */
                 osc_consume_write_grant(cli, oap);
-                return(0);
+                RETURN(0);
         }
 
         /* Make sure that there are write rpcs in flight to wait for.  This
@@ -1945,18 +1946,18 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi,
          * other objects sure might. */
         if (cli->cl_w_in_flight) {
                 list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters);
-                init_waitqueue_head(&ocw.ocw_waitq);
+                cfs_waitq_init(&ocw.ocw_waitq);
                 ocw.ocw_oap = oap;
                 ocw.ocw_rc = 0;
 
                 loi_list_maint(cli, loi);
                 osc_check_rpcs(cli);
-                spin_unlock(&cli->cl_loi_list_lock);
+                client_obd_list_unlock(&cli->cl_loi_list_lock);
 
                 CDEBUG(D_CACHE, "sleeping for cache space\n");
                 l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi);
 
-                spin_lock(&cli->cl_loi_list_lock);
+                client_obd_list_lock(&cli->cl_loi_list_lock);
                 if (!list_empty(&ocw.ocw_entry)) {
                         list_del(&ocw.ocw_entry);
                         RETURN(-EINTR);
@@ -1982,24 +1983,24 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap,
         }
 
         oap->oap_brw_flags &= ~OBD_BRW_FROM_GRANT;
-        cli->cl_dirty -= PAGE_SIZE;
+        cli->cl_dirty -= CFS_PAGE_SIZE;
         if (!sent) {
-                cli->cl_lost_grant += PAGE_SIZE;
+                cli->cl_lost_grant += CFS_PAGE_SIZE;
                 CDEBUG(D_CACHE, "lost grant: %lu avail grant: %lu dirty: %lu\n",
                        cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty);
-        } else if (PAGE_SIZE != blocksize && oap->oap_count != PAGE_SIZE) {
+        } else if (CFS_PAGE_SIZE != blocksize && oap->oap_count != CFS_PAGE_SIZE) {
                 /* For short writes we shouldn't count parts of pages that
                  * span a whole block on the OST side, or our accounting goes
                  * wrong.  Should match the code in filter_grant_check. */
-                int offset = (oap->oap_obj_off +oap->oap_page_off) & ~PAGE_MASK;
+                int offset = (oap->oap_obj_off +oap->oap_page_off) & ~CFS_PAGE_MASK;
                 int count = oap->oap_count + (offset & (blocksize - 1));
                 int end = (offset + oap->oap_count) & (blocksize - 1);
                 if (end)
                         count += blocksize - end;
 
-                cli->cl_lost_grant += PAGE_SIZE - count;
+                cli->cl_lost_grant += CFS_PAGE_SIZE - count;
                 CDEBUG(D_CACHE, "lost %lu grant: %lu avail: %lu dirty: %lu\n",
-                       PAGE_SIZE - count, cli->cl_lost_grant,
+                       CFS_PAGE_SIZE - count, cli->cl_lost_grant,
                        cli->cl_avail_grant, cli->cl_dirty);
         }
 
@@ -2007,7 +2008,7 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap,
 }
 
 int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
-                        struct lov_oinfo *loi, struct page *page,
+                        struct lov_oinfo *loi, cfs_page_t *page,
                         obd_off offset, struct obd_async_page_ops *ops,
                         void *data, void **res)
 {
@@ -2028,9 +2029,9 @@ int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm,
         oap->oap_page = page;
         oap->oap_obj_off = offset;
 
-        INIT_LIST_HEAD(&oap->oap_pending_item);
-        INIT_LIST_HEAD(&oap->oap_urgent_item);
-        INIT_LIST_HEAD(&oap->oap_rpc_item);
+        CFS_INIT_LIST_HEAD(&oap->oap_pending_item);
+        CFS_INIT_LIST_HEAD(&oap->oap_urgent_item);
+        CFS_INIT_LIST_HEAD(&oap->oap_rpc_item);
 
         oap->oap_occ.occ_interrupted = osc_occ_interrupted;
 
@@ -2094,7 +2095,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
         if (loi == NULL)
                 loi = &lsm->lsm_oinfo[0];
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         oap->oap_cmd = cmd;
         oap->oap_page_off = off;
@@ -2105,7 +2106,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
         if (cmd & OBD_BRW_WRITE) {
                 rc = osc_enter_cache(cli, loi, oap);
                 if (rc) {
-                        spin_unlock(&cli->cl_loi_list_lock);
+                        client_obd_list_unlock(&cli->cl_loi_list_lock);
                         RETURN(rc);
                 }
                 lop = &loi->loi_write_lop;
@@ -2124,7 +2125,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm,
                   cmd);
 
         osc_check_rpcs(cli);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         RETURN(0);
 }
@@ -2169,7 +2170,7 @@ static int osc_set_async_flags(struct obd_export *exp,
                 lop = &loi->loi_read_lop;
         }
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         if (list_empty(&oap->oap_pending_item))
                 GOTO(out, rc = -EINVAL);
@@ -2191,7 +2192,7 @@ static int osc_set_async_flags(struct obd_export *exp,
                         oap->oap_async_flags);
 out:
         osc_check_rpcs(cli);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         RETURN(rc);
 }
 
@@ -2222,7 +2223,7 @@ static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm,
         if (loi == NULL)
                 loi = &lsm->lsm_oinfo[0];
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         oap->oap_cmd = cmd;
         oap->oap_page_off = off;
@@ -2243,7 +2244,7 @@ static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         LOI_DEBUG(loi, "oap %p page %p on group pending\n", oap, oap->oap_page);
 
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         RETURN(0);
 }
@@ -2276,13 +2277,13 @@ static int osc_trigger_group_io(struct obd_export *exp,
         if (loi == NULL)
                 loi = &lsm->lsm_oinfo[0];
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         osc_group_to_pending(cli, loi, &loi->loi_write_lop, OBD_BRW_WRITE);
         osc_group_to_pending(cli, loi, &loi->loi_read_lop, OBD_BRW_READ);
 
         osc_check_rpcs(cli);
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
 
         RETURN(0);
 }
@@ -2310,7 +2311,7 @@ static int osc_teardown_async_page(struct obd_export *exp,
                 lop = &loi->loi_read_lop;
         }
 
-        spin_lock(&cli->cl_loi_list_lock);
+        client_obd_list_lock(&cli->cl_loi_list_lock);
 
         if (!list_empty(&oap->oap_rpc_item))
                 GOTO(out, rc = -EBUSY);
@@ -2330,7 +2331,7 @@ static int osc_teardown_async_page(struct obd_export *exp,
 
         LOI_DEBUG(loi, "oap %p page %p torn down\n", oap, oap->oap_page);
 out:
-        spin_unlock(&cli->cl_loi_list_lock);
+        client_obd_list_unlock(&cli->cl_loi_list_lock);
         RETURN(rc);
 }
 
@@ -2425,12 +2426,12 @@ static int sanosc_brw_read(struct obd_export *exp, struct obdo *oa,
                         CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n",
                                         page->mapping->host->i_ino,
                                         page->index);
-                        memset(page_address(page), 0, PAGE_SIZE);
+                        memset(page_address(page), 0, CFS_PAGE_SIZE);
                         continue;
                 }
 
                 if (!page->buffers) {
-                        create_empty_buffers(page, dev, PAGE_SIZE);
+                        create_empty_buffers(page, dev, CFS_PAGE_SIZE);
                         bh = page->buffers;
 
                         clear_bit(BH_New, &bh->b_state);
@@ -2548,7 +2549,7 @@ static int sanosc_brw_write(struct obd_export *exp, struct obdo *oa,
                 dev = exp->exp_obd->u.cli.cl_sandev;
 
                 if (!page->buffers) {
-                        create_empty_buffers(page, dev, PAGE_SIZE);
+                        create_empty_buffers(page, dev, CFS_PAGE_SIZE);
                 } else {
                         /* checking */
                         LASSERT(!test_bit(BH_New, &page->buffers->b_state));
@@ -2633,6 +2634,8 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data,
         }
         l_lock(&lock->l_resource->lr_namespace->ns_lock);
 #ifdef __KERNEL__
+#ifdef __LINUX__
+        /* Liang XXX: Darwin and Winnt checking should be added */
         if (lock->l_ast_data && lock->l_ast_data != data) {
                 struct inode *new_inode = data;
                 struct inode *old_inode = lock->l_ast_data;
@@ -2646,6 +2649,7 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data,
                          new_inode, new_inode->i_ino, new_inode->i_generation);
         }
 #endif
+#endif
         lock->l_ast_data = data;
         lock->l_flags |= (flags & LDLM_FL_NO_LRU);
         l_unlock(&lock->l_resource->lr_namespace->ns_lock);
@@ -2678,15 +2682,15 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         /* Filesystem lock extents are extended to page boundaries so that
          * dealing with the page cache is a little smoother.  */
-        policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
-        policy->l_extent.end |= ~PAGE_MASK;
+        policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
+        policy->l_extent.end |= ~CFS_PAGE_MASK;
 
         if (lsm->lsm_oinfo->loi_kms_valid == 0)
                 goto no_match;
 
         /* Next, search for already existing extent locks that will cover us */
-        rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type, policy, mode,
-                             lockh);
+        rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type, policy,
+                             mode, lockh);
         if (rc == 1) {
                 osc_set_data_with_check(lockh, data, *flags);
                 if (*flags & LDLM_FL_HAS_INTENT) {
@@ -2711,7 +2715,7 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
          * locks out from other users right now, too. */
 
         if (mode == LCK_PR) {
-                rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type,
+                rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
                                      policy, LCK_PW, lockh);
                 if (rc == 1) {
                         /* FIXME: This is not incredibly elegant, but it might
@@ -2738,6 +2742,9 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm,
                 req->rq_replen = lustre_msg_size(2, size);
         }
 
+        /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */
+        *flags &= ~LDLM_FL_BLOCK_GRANTED;
+
         rc = ldlm_cli_enqueue(exp, req, obd->obd_namespace, res_id, type,
                               policy, mode, flags, bl_cb, cp_cb, gl_cb, data,
                               &lvb, sizeof(lvb), lustre_swab_ost_lvb, lockh);
@@ -2776,8 +2783,8 @@ static int osc_match(struct obd_export *exp, struct lov_stripe_md *lsm,
 
         /* Filesystem lock extents are extended to page boundaries so that
          * dealing with the page cache is a little smoother */
-        policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK;
-        policy->l_extent.end |= ~PAGE_MASK;
+        policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK;
+        policy->l_extent.end |= ~CFS_PAGE_MASK;
 
         /* Next, search for already existing extent locks that will cover us */
         rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type,
@@ -2839,7 +2846,7 @@ static int osc_join_lru(struct obd_export *exp,
 }
 
 static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                      unsigned long max_age)
+                      cfs_time_t max_age)
 {
         struct obd_statfs *msfs;
         struct ptlrpc_request *request;
@@ -3005,7 +3012,7 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 GOTO(out, err);
         default:
                 CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n",
-                       cmd, current->comm);
+                       cmd, cfs_curproc_comm());
                 GOTO(out, err = -ENOTTY);
         }
 out:
@@ -3059,14 +3066,40 @@ static int osc_get_info(struct obd_export *exp, obd_count keylen,
         RETURN(-EINVAL);
 }
 
-static int osc_set_info(struct obd_export *exp, obd_count keylen,
-                        void *key, obd_count vallen, void *val)
+static int osc_setinfo_mds_conn_interpret(struct ptlrpc_request *req,
+                                          void *aa, int rc)
+{
+        struct llog_ctxt *ctxt;
+        struct obd_import *imp = req->rq_import;
+        ENTRY;
+
+        if (rc != 0)
+                RETURN(rc);
+
+        ctxt = llog_get_context(imp->imp_obd, LLOG_MDS_OST_ORIG_CTXT);
+        if (ctxt) {
+                if (rc == 0)
+                        rc = llog_initiator_connect(ctxt);
+                else
+                        CERROR("cannot establish connection for "
+                               "ctxt %p: %d\n", ctxt, rc);
+        }
+
+        imp->imp_server_timeout = 1;
+        CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(imp->imp_obd));
+        imp->imp_pingable = 1;
+
+        RETURN(rc);
+}
+
+static int osc_set_info_async(struct obd_export *exp, obd_count keylen,
+                              void *key, obd_count vallen, void *val,
+                              struct ptlrpc_request_set *set)
 {
         struct ptlrpc_request *req;
         struct obd_device  *obd = exp->exp_obd;
         struct obd_import *imp = class_exp2cliimp(exp);
-        struct llog_ctxt *ctxt;
-        int rc, size[2] = {keylen, vallen};
+        int size[2] = {keylen, vallen};
         char *bufs[2] = {key, val};
         ENTRY;
 
@@ -3108,9 +3141,15 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(0);
         }
 
-        if (!KEY_IS(KEY_MDS_CONN) && !KEY_IS("evict_by_nid"))
+        if (!set)
                 RETURN(-EINVAL);
 
+        /* We pass all other commands directly to OST. Since nobody calls osc
+           methods directly and everybody is supposed to go through LOV, we
+           assume lov checked invalid values for us.
+           The only recognised values so far are evict_by_nid and mds_conn.
+           Even if something bad goes through, we'd get a -EINVAL from OST
+           anyway. */
 
         req = ptlrpc_prep_req(imp, LUSTRE_OST_VERSION, OST_SET_INFO,
                               2, size, bufs);
@@ -3118,23 +3157,13 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen,
                 RETURN(-ENOMEM);
 
         req->rq_replen = lustre_msg_size(0, NULL);
-        rc = ptlrpc_queue_wait(req);
-        ptlrpc_req_finished(req);
 
-        ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_ORIG_CTXT);
-        if (ctxt) {
-                if (rc == 0)
-                        rc = llog_initiator_connect(ctxt);
-                else
-                        CERROR("cannot establish connection for ctxt %p: %d\n",
-                               ctxt, rc);
-        }
-
-        imp->imp_server_timeout = 1;
-        CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(exp->exp_obd));
-        imp->imp_pingable = 1;
+        if (KEY_IS("mds_conn"))
+                req->rq_interpret_reply = osc_setinfo_mds_conn_interpret;
+        ptlrpc_set_add_req(set, req);
+        ptlrpc_check_set(set);
 
-        RETURN(rc);
+        RETURN(0);
 }
 
 
@@ -3193,12 +3222,12 @@ static int osc_reconnect(struct obd_export *exp, struct obd_device *obd,
         if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) {
                 long lost_grant;
 
-                spin_lock(&cli->cl_loi_list_lock);
+                client_obd_list_lock(&cli->cl_loi_list_lock);
                 data->ocd_grant = cli->cl_avail_grant ?:
                                 2 * cli->cl_max_pages_per_rpc << PAGE_SHIFT;
                 lost_grant = cli->cl_lost_grant;
                 cli->cl_lost_grant = 0;
-                spin_unlock(&cli->cl_loi_list_lock);
+                client_obd_list_unlock(&cli->cl_loi_list_lock);
 
                 CDEBUG(D_CACHE, "request ocd_grant: %d cl_avail_grant: %ld "
                        "cl_lost_grant: %ld\n", data->ocd_grant,
@@ -3232,6 +3261,7 @@ static int osc_import_event(struct obd_device *obd,
         struct client_obd *cli;
         int rc = 0;
 
+        ENTRY;
         LASSERT(imp->imp_obd == obd);
 
         switch (event) {
@@ -3256,12 +3286,12 @@ static int osc_import_event(struct obd_device *obd,
 
                 /* Reset grants */
                 cli = &obd->u.cli;
-                spin_lock(&cli->cl_loi_list_lock);
+                client_obd_list_lock(&cli->cl_loi_list_lock);
                 cli->cl_avail_grant = 0;
                 cli->cl_lost_grant = 0;
                 /* all pages go to failing rpcs due to the invalid import */
                 osc_check_rpcs(cli);
-                spin_unlock(&cli->cl_loi_list_lock);
+                client_obd_list_unlock(&cli->cl_loi_list_lock);
 
                 ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY);
 
@@ -3304,9 +3334,10 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         int rc;
         ENTRY;
 
+        ENTRY;
         rc = ptlrpcd_addref();
         if (rc)
-                return rc;
+                RETURN(rc);
 
         rc = client_obd_setup(obd, lcfg);
         if (rc) {
@@ -3368,6 +3399,7 @@ int osc_cleanup(struct obd_device *obd)
         struct osc_creator *oscc = &obd->u.cli.cl_oscc;
         int rc;
 
+        ENTRY;
         ptlrpc_lprocfs_unregister_obd(obd);
         lprocfs_obd_cleanup(obd);
 
@@ -3423,7 +3455,7 @@ struct obd_ops osc_obd_ops = {
         .o_join_lru             = osc_join_lru,
         .o_iocontrol            = osc_iocontrol,
         .o_get_info             = osc_get_info,
-        .o_set_info             = osc_set_info,
+        .o_set_info_async       = osc_set_info_async,
         .o_import_event         = osc_import_event,
         .o_llog_init            = osc_llog_init,
         .o_llog_finish          = osc_llog_finish,
@@ -3464,7 +3496,6 @@ struct obd_ops sanosc_obd_ops = {
 };
 #endif
 
-static quota_interface_t *quota_interface;
 extern quota_interface_t osc_quota_interface;
 
 int __init osc_init(void)
@@ -3524,6 +3555,5 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)");
 MODULE_LICENSE("GPL");
 
-module_init(osc_init);
-module_exit(osc_exit);
+cfs_module(osc, "1.0.0", osc_init, osc_exit);
 #endif
index 40dc85b..4f2832f 100644 (file)
@@ -34,7 +34,7 @@
 #include <linux/module.h>
 
 /* LUSTRE_VERSION_CODE */
-#include <linux/lustre_ver.h>
+#include <lustre_ver.h>
 /*
  * XXX temporary stuff: direct access to ldiskfs/jdb. Interface between osd
  * and file system is not yet specified.
 /* LDISKFS_SB() */
 #include <linux/ldiskfs_fs.h>
 /* simple_mkdir() */
-#include <linux/lvfs.h>
+#include <lvfs.h>
 
 /*
  * struct OBD_{ALLOC,FREE}*()
  * OBD_FAIL_CHECK
  */
-#include <linux/obd_support.h>
+#include <obd_support.h>
 /* struct ptlrpc_thread */
-#include <linux/lustre_net.h>
+#include <lustre_net.h>
 /* LUSTRE_OSD0_NAME */
-#include <linux/obd.h>
+#include <obd.h>
 /* class_register_type(), class_unregister_type(), class_get_type() */
-#include <linux/obd_class.h>
-#include <linux/lustre_disk.h>
+#include <obd_class.h>
+#include <lustre_disk.h>
 
 /* fid_is_local() */
-#include <linux/lustre_fid.h>
+#include <lustre_fid.h>
 #include <linux/lustre_iam.h>
 
 #include "osd_internal.h"
index 56c384f..091628d 100644 (file)
@@ -34,8 +34,7 @@
 /* struct rw_semaphore */
 #include <linux/rwsem.h>
 
-#include <linux/dt_object.h>
-
+#include <dt_object.h>
 #include "osd_oi.h"
 
 struct inode;
index 8963ace..a3f6c24 100644 (file)
 #include <linux/module.h>
 
 /* LUSTRE_VERSION_CODE */
-#include <linux/lustre_ver.h>
+#include <lustre_ver.h>
 /*
  * struct OBD_{ALLOC,FREE}*()
  * OBD_FAIL_CHECK
  */
-#include <linux/obd_support.h>
+#include <obd_support.h>
 
 /* fid_is_local() */
-#include <linux/lustre_fid.h>
+#include <lustre_fid.h>
 
 #include "osd_oi.h"
 /* osd_lookup(), struct osd_thread_info */
index 0b92534..ec1b1d1 100644 (file)
@@ -33,8 +33,7 @@
 
 /* struct rw_semaphore */
 #include <linux/rwsem.h>
-
-#include <linux/lu_object.h>
+#include <lu_object.h>
 
 struct dentry;
 struct lu_fid;
index 0210d29..7b8e790 100644 (file)
@@ -24,8 +24,8 @@
  */
 #define DEBUG_SUBSYSTEM S_OST
 
-#include <linux/obd_class.h>
-#include <linux/lprocfs_status.h>
+#include <obd_class.h>
+#include <lprocfs_status.h>
 #include <linux/seq_file.h>
 #include "ost_internal.h"
 
index 05de28d..0bb8c64 100644 (file)
 #define DEBUG_SUBSYSTEM S_OST
 
 #include <linux/module.h>
-#include <linux/obd_ost.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_export.h>
-#include <linux/lustre_debug.h>
+#include <obd_ost.h>
+#include <lustre_net.h>
+#include <lustre_dlm.h>
+#include <lustre_export.h>
+#include <lustre_debug.h>
 #include <linux/init.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_commit_confd.h>
+#include <lprocfs_status.h>
+#include <lustre_commit_confd.h>
 #include <libcfs/list.h>
-#include <linux/lustre_quota.h>
+#include <lustre_quota.h>
 #include "ost_internal.h"
 
 static int ost_num_threads;
@@ -1149,7 +1149,7 @@ static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req)
                 GOTO(out, rc = 0);
         }
 
-        rc = obd_set_info(exp, keylen, key, vallen, val);
+        rc = obd_set_info_async(exp, keylen, key, vallen, val, NULL);
 out:
         req->rq_repmsg->status = 0;
         RETURN(rc);
diff --git a/lustre/ptlrpc/Info.plist b/lustre/ptlrpc/Info.plist
new file mode 100644 (file)
index 0000000..2b7d35f
--- /dev/null
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>ptlrpc</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.ptlrpc</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string> 
+       <key>OSBundleCompatibleVersion</key> 
+       <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict> 
+               <key>com.clusterfs.lustre.libcfs</key> 
+               <string>1.0.0</string> 
+               <key>com.clusterfs.lustre.lnet</key> 
+               <string>1.0.0</string>
+               <key>com.clusterfs.lustre.obdclass</key> 
+               <string>1.0.0</string>
+       </dict>
+</dict>
+</plist>
index 09dc09b..9a2c3db 100644 (file)
@@ -30,8 +30,46 @@ libptlrpc_a_CFLAGS = $(LLCFLAGS)
 endif
 
 if MODULES
+
+if LINUX
 modulefs_DATA = ptlrpc$(KMODEXT)
+endif #LINUX
+
+if DARWIN
+macos_PROGRAMS := ptlrpc
+
+ptlrpc_SOURCES := \
+        ptlrpc_module.c \
+        client.c \
+        connection.c \
+        events.c \
+        import.c \
+        llog_client.c \
+        llog_net.c \
+        llog_server.c \
+        lproc_ptlrpc.c \
+        niobuf.c \
+        pack_generic.c \
+        pers.c \
+        pinger.c \
+        ptlrpcd.c \
+        recover.c \
+        recov_thread.c \
+        service.c \
+        $(LDLM_COMM_SOURCES)
+
+ptlrpc_CFLAGS := $(EXTRA_KCFLAGS)
+ptlrpc_LDFLAGS := $(EXTRA_KLDFLAGS)
+ptlrpc_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif # DARWIN
+
 endif # MODULES
 
+install-data-hook: $(install_data_hook)
 DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h
 MOSTLYCLEANFILES := @MOSTLYCLEANFILES@  ldlm_*.c l_lock.c
index 24a312b..2732e53 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_import.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre_ha.h>
+#include <lustre_import.h>
 
 #include "ptlrpc_internal.h"
 
@@ -97,7 +97,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal
                 return NULL;
 
         spin_lock_init(&desc->bd_lock);
-        init_waitqueue_head(&desc->bd_waitq);
+        cfs_waitq_init(&desc->bd_waitq);
         desc->bd_max_iov = npages;
         desc->bd_iov_count = 0;
         desc->bd_md_h = LNET_INVALID_HANDLE;
@@ -113,6 +113,7 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req,
         struct obd_import *imp = req->rq_import;
         struct ptlrpc_bulk_desc *desc;
 
+        ENTRY;
         LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE);
         desc = new_bulk(npages, type, portal);
         if (desc == NULL)
@@ -137,6 +138,7 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
         struct obd_export *exp = req->rq_export;
         struct ptlrpc_bulk_desc *desc;
 
+        ENTRY;
         LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK);
 
         desc = new_bulk(npages, type, portal);
@@ -156,13 +158,13 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req,
 }
 
 void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc,
-                           struct page *page, int pageoffset, int len)
+                           cfs_page_t *page, int pageoffset, int len)
 {
         LASSERT(desc->bd_iov_count < desc->bd_max_iov);
         LASSERT(page != NULL);
         LASSERT(pageoffset >= 0);
         LASSERT(len > 0);
-        LASSERT(pageoffset + len <= PAGE_SIZE);
+        LASSERT(pageoffset + len <= CFS_PAGE_SIZE);
 
         desc->bd_nob += len;
 
@@ -222,19 +224,21 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq)
         for (i = 0; i < num_rq; i++) {
                 struct ptlrpc_request *req;
                 struct lustre_msg *msg;
+
+                spin_unlock(&pool->prp_lock);
                 OBD_ALLOC(req, sizeof(struct ptlrpc_request));
                 if (!req)
-                        goto out;
-                OBD_ALLOC_GFP(msg, size, GFP_KERNEL);
+                        return;
+                OBD_ALLOC_GFP(msg, size, CFS_ALLOC_STD);
                 if (!msg) {
                         OBD_FREE(req, sizeof(struct ptlrpc_request));
-                        goto out;
+                        return;
                 }
                 req->rq_reqmsg = msg;
                 req->rq_pool = pool;
+                spin_lock(&pool->prp_lock);
                 list_add_tail(&req->rq_list, &pool->prp_req_list);
         }
-out:
         spin_unlock(&pool->prp_lock);
         return;
 }
@@ -252,7 +256,7 @@ struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int num_rq, int msgsize,
            kernel would do exactly this */
 
         spin_lock_init(&pool->prp_lock);
-        INIT_LIST_HEAD(&pool->prp_req_list);
+        CFS_INIT_LIST_HEAD(&pool->prp_req_list);
         pool->prp_rq_size = msgsize;
         pool->prp_populate = populate_pool;
 
@@ -360,10 +364,10 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
         request->rq_reply_portal = imp->imp_client->cli_reply_portal;
 
         spin_lock_init(&request->rq_lock);
-        INIT_LIST_HEAD(&request->rq_list);
-        INIT_LIST_HEAD(&request->rq_replay_list);
-        INIT_LIST_HEAD(&request->rq_set_chain);
-        init_waitqueue_head(&request->rq_reply_waitq);
+        CFS_INIT_LIST_HEAD(&request->rq_list);
+        CFS_INIT_LIST_HEAD(&request->rq_replay_list);
+        CFS_INIT_LIST_HEAD(&request->rq_set_chain);
+        cfs_waitq_init(&request->rq_reply_waitq);
         request->rq_xid = ptlrpc_next_xid();
         atomic_set(&request->rq_refcount, 1);
 
@@ -385,14 +389,15 @@ struct ptlrpc_request_set *ptlrpc_prep_set(void)
 {
         struct ptlrpc_request_set *set;
 
+        ENTRY;
         OBD_ALLOC(set, sizeof *set);
         if (!set)
                 RETURN(NULL);
-        INIT_LIST_HEAD(&set->set_requests);
-        init_waitqueue_head(&set->set_waitq);
+        CFS_INIT_LIST_HEAD(&set->set_requests);
+        cfs_waitq_init(&set->set_waitq);
         set->set_remaining = 0;
         spin_lock_init(&set->set_new_req_lock);
-        INIT_LIST_HEAD(&set->set_new_requests);
+        CFS_INIT_LIST_HEAD(&set->set_new_requests);
 
         RETURN(set);
 }
@@ -648,9 +653,6 @@ static int after_reply(struct ptlrpc_request *req)
                         spin_lock_irqsave(&imp->imp_lock, flags);
                 }
 
-                if (req->rq_transno > imp->imp_max_transno)
-                        imp->imp_max_transno = req->rq_transno;
-
                 /* Replay-enabled imports return commit-status information. */
                 if (req->rq_repmsg->last_committed)
                         imp->imp_peer_committed_transno =
@@ -706,9 +708,9 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
         list_add_tail(&req->rq_list, &imp->imp_sending_list);
         spin_unlock_irqrestore(&imp->imp_lock, flags);
 
-        req->rq_reqmsg->status = current->pid;
+        req->rq_reqmsg->status = cfs_curproc_pid();
         CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc"
-               " %s:%s:%d:"LPU64":%s:%d\n", current->comm,
+               " %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
                req->rq_xid,
                libcfs_nid2str(imp->imp_connection->c_peer.nid),
@@ -926,7 +928,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 }
 
                 CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:"
-                       "opc %s:%s:%d:"LPU64":%s:%d\n", current->comm,
+                       "opc %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                        imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status,
                        req->rq_xid,
                        libcfs_nid2str(imp->imp_connection->c_peer.nid),
@@ -935,7 +937,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 set->set_remaining--;
 
                 atomic_dec(&imp->imp_inflight);
-                wake_up(&imp->imp_recovery_waitq);
+                cfs_waitq_signal(&imp->imp_recovery_waitq);
         }
 
         /* If we hit an error, we want to recover promptly. */
@@ -1089,7 +1091,9 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
         int                    rc, timeout;
         ENTRY;
 
-        LASSERT(!list_empty(&set->set_requests));
+        if (list_empty(&set->set_requests))
+                RETURN(0);
+
         list_for_each(tmp, &set->set_requests) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_set_chain);
                 if (req->rq_phase == RQ_PHASE_NEW)
@@ -1103,7 +1107,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                  * req times out */
                 CDEBUG(D_HA, "set %p going to sleep for %d seconds\n",
                        set, timeout);
-                lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ,
+                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout ? timeout : 1),
                                        ptlrpc_expired_set,
                                        ptlrpc_interrupted_set, set);
                 rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi);
@@ -1262,7 +1266,7 @@ EXPORT_SYMBOL(ptlrpc_req_xid);
 void ptlrpc_unregister_reply (struct ptlrpc_request *request)
 {
         int                rc;
-        wait_queue_head_t *wq;
+        cfs_waitq_t       *wq;
         struct l_wait_info lwi;
 
         LASSERT(!in_interrupt ());             /* might sleep */
@@ -1283,7 +1287,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
         for (;;) {
                 /* Network access will complete in finite time but the HUGE
                  * timeout lets us CWARN for visibility of sluggish NALs */
-                lwi = LWI_TIMEOUT(300 * HZ, NULL, NULL);
+                lwi = LWI_TIMEOUT(cfs_time_seconds(300), NULL, NULL);
                 rc = l_wait_event (*wq, !ptlrpc_client_receiving_reply(request), &lwi);
                 if (rc == 0)
                         return;
@@ -1305,8 +1309,19 @@ void ptlrpc_free_committed(struct obd_import *imp)
 
         LASSERT_SPIN_LOCKED(&imp->imp_lock);
 
-        CDEBUG(D_HA, "%s: committing for last_committed "LPU64"\n",
-               imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
+
+        if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked &&
+            imp->imp_generation == imp->imp_last_generation_checked) {
+                CDEBUG(D_HA, "%s: skip recheck for last_committed "LPU64"\n",
+                       imp->imp_obd->obd_name, imp->imp_peer_committed_transno);
+                return;
+        }
+        
+        CDEBUG(D_HA, "%s: committing for last_committed "LPU64" gen %d\n",
+               imp->imp_obd->obd_name, imp->imp_peer_committed_transno,
+               imp->imp_generation);
+        imp->imp_last_transno_checked = imp->imp_peer_committed_transno;
+        imp->imp_last_generation_checked = imp->imp_generation;
 
         list_for_each_safe(tmp, saved, &imp->imp_replay_list) {
                 req = list_entry(tmp, struct ptlrpc_request, rq_replay_list);
@@ -1469,7 +1484,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
         struct l_wait_info lwi;
         struct obd_import *imp = req->rq_import;
         unsigned long flags;
-        int timeout = 0;
+        cfs_duration_t timeout = 0;
         ENTRY;
 
         LASSERT(req->rq_set == NULL);
@@ -1477,10 +1492,10 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
         atomic_inc(&imp->imp_inflight);
 
         /* for distributed debugging */
-        req->rq_reqmsg->status = current->pid;
+        req->rq_reqmsg->status = cfs_curproc_pid();
         LASSERT(imp->imp_obd != NULL);
         CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc "
-               "%s:%s:%d:"LPU64":%s:%d\n", current->comm,
+               "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
                libcfs_nid2str(imp->imp_connection->c_peer.nid),
@@ -1499,7 +1514,7 @@ restart:
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
 
                 DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%s != %s)",
-                          current->comm,
+                          cfs_curproc_comm(),
                           ptlrpc_import_state_name(req->rq_send_state),
                           ptlrpc_import_state_name(imp->imp_state));
                 lwi = LWI_INTR(interrupted_request, req);
@@ -1508,7 +1523,7 @@ restart:
                                    req->rq_err || req->rq_intr),
                                   &lwi);
                 DEBUG_REQ(D_HA, req, "\"%s\" awake: (%s == %s or %d/%d == 1)",
-                          current->comm,
+                          cfs_curproc_comm(),
                           ptlrpc_import_state_name(imp->imp_state),
                           ptlrpc_import_state_name(req->rq_send_state),
                           req->rq_err, req->rq_intr);
@@ -1565,10 +1580,11 @@ restart:
         rc = ptl_send_rpc(req, 0);
         if (rc) {
                 DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc);
-                timeout = 1;
+                timeout = CFS_TICK;
         } else {
-                timeout = MAX(req->rq_timeout * HZ, 1);
-                DEBUG_REQ(D_NET, req, "-- sleeping for %d jiffies", timeout);
+                timeout = cfs_timeout_cap(cfs_time_seconds(req->rq_timeout));
+                DEBUG_REQ(D_NET, req, 
+                          "-- sleeping for "CFS_DURATION_T" jiffies", timeout);
         }
         lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request,
                                req);
@@ -1576,7 +1592,7 @@ restart:
         DEBUG_REQ(D_NET, req, "-- done sleeping");
 
         CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc "
-               "%s:%s:%d:"LPU64":%s:%d\n", current->comm,
+               "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                imp->imp_obd->obd_uuid.uuid,
                req->rq_reqmsg->status, req->rq_xid,
                libcfs_nid2str(imp->imp_connection->c_peer.nid),
@@ -1659,7 +1675,7 @@ restart:
         req->rq_phase = RQ_PHASE_INTERPRET;
 
         atomic_dec(&imp->imp_inflight);
-        wake_up(&imp->imp_recovery_waitq);
+        cfs_waitq_signal(&imp->imp_recovery_waitq);
         RETURN(rc);
 }
 
@@ -1675,6 +1691,7 @@ static int ptlrpc_replay_interpret(struct ptlrpc_request *req,
         struct obd_import *imp = req->rq_import;
         unsigned long flags;
 
+        ENTRY;
         atomic_dec(&imp->imp_replay_inflight);
 
         if (!req->rq_replied) {
@@ -1817,7 +1834,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp)
 }
 
 static __u64 ptlrpc_last_xid = 0;
-static spinlock_t ptlrpc_last_xid_lock = SPIN_LOCK_UNLOCKED;
+spinlock_t ptlrpc_last_xid_lock;
 
 __u64 ptlrpc_next_xid(void)
 {
index fc55e25..1d2e228 100644 (file)
@@ -25,9 +25,9 @@
 
 #define DEBUG_SUBSYSTEM S_RPC
 #ifdef __KERNEL__
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
 #else
 #include <liblustre.h>
 #endif
@@ -167,9 +167,9 @@ struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c)
 
 void ptlrpc_init_connection(void)
 {
-        INIT_LIST_HEAD(&conn_list);
-        INIT_LIST_HEAD(&conn_unused_list);
-        conn_lock = SPIN_LOCK_UNLOCKED;
+        CFS_INIT_LIST_HEAD(&conn_list);
+        CFS_INIT_LIST_HEAD(&conn_unused_list);
+        spin_lock_init(&conn_lock);
 }
 
 void ptlrpc_cleanup_connection(void)
index 2222df9..08b1af5 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_RPC
 
-#ifdef __KERNEL__
-#include <linux/module.h>
-#else
+#ifndef __KERNEL__
 #include <liblustre.h>
 #endif
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <obd_class.h>
+#include <lustre_net.h>
 #include "ptlrpc_internal.h"
 
 lnet_handle_eq_t   ptlrpc_eq_h;
@@ -185,7 +183,7 @@ void request_in_callback(lnet_event_t *ev)
                         /* We moaned above already... */
                         return;
                 }
-                OBD_ALLOC_GFP(req, sizeof(*req), GFP_ATOMIC);
+                OBD_ALLOC_GFP(req, sizeof(*req), CFS_ALLOC_ATOMIC_TRY);
                 if (req == NULL) {
                         CERROR("Can't allocate incoming request descriptor: "
                                "Dropping %s RPC from %s\n",
@@ -239,7 +237,7 @@ void request_in_callback(lnet_event_t *ev)
 
         /* NB everything can disappear under us once the request
          * has been queued and we unlock, so do the wake now... */
-        wake_up(&service->srv_waitq);
+        cfs_waitq_signal(&service->srv_waitq);
 
         spin_unlock_irqrestore(&service->srv_lock, flags);
         EXIT;
@@ -320,7 +318,7 @@ void server_bulk_callback (lnet_event_t *ev)
         if (ev->unlinked) {
                 /* This is the last callback no matter what... */
                 desc->bd_network_rw = 0;
-                wake_up(&desc->bd_waitq);
+                cfs_waitq_signal(&desc->bd_waitq);
         }
 
         spin_unlock_irqrestore (&desc->bd_lock, flags);
@@ -402,7 +400,7 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid,
 
 void ptlrpc_ni_fini(void)
 {
-        wait_queue_head_t   waitq;
+        cfs_waitq_t         waitq;
         struct l_wait_info  lwi;
         int                 rc;
         int                 retries;
@@ -427,8 +425,8 @@ void ptlrpc_ni_fini(void)
                                 CWARN("Event queue still busy\n");
 
                         /* Wait for a bit */
-                        init_waitqueue_head(&waitq);
-                        lwi = LWI_TIMEOUT(2*HZ, NULL, NULL);
+                        cfs_waitq_init(&waitq);
+                        lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL);
                         l_wait_event(waitq, 0, &lwi);
                         break;
                 }
@@ -486,7 +484,7 @@ int ptlrpc_ni_init(void)
 }
 
 #ifndef __KERNEL__
-LIST_HEAD(liblustre_wait_callbacks);
+CFS_LIST_HEAD(liblustre_wait_callbacks);
 void *liblustre_services_callback;
 
 void *
index 7afd850..bb9fd86 100644 (file)
  */
 
 #define DEBUG_SUBSYSTEM S_RPC
-#ifdef __KERNEL__
-# include <linux/config.h>
-# include <linux/module.h>
-# include <linux/kmod.h>
-#else
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_import.h>
-#include <linux/lustre_export.h>
-#include <linux/obd.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_ver.h>
+#include <obd_support.h>
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_export.h>
+#include <obd.h>
+#include <obd_class.h>
+#include <lustre_ver.h>
 
 #include "ptlrpc_internal.h"
 
@@ -143,11 +139,12 @@ int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt)
                                imp->imp_replayable ?
                                       "wait for recovery to complete" : "fail");
 
+                IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
+                spin_unlock_irqrestore(&imp->imp_lock, flags);
+    
                 if (obd_dump_on_timeout)
                         libcfs_debug_dumplog();
 
-                IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON);
-                spin_unlock_irqrestore(&imp->imp_lock, flags);
                 obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON);
                 rc = 1;
         } else {
@@ -199,8 +196,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
         LASSERT(imp->imp_invalid);
 
         /* wait for all requests to error out and call completion callbacks */
-        lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL,
-                               NULL, NULL);
+        lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
+                               NULL, NULL, NULL);
         rc = l_wait_event(imp->imp_recovery_waitq,
                           (atomic_read(&imp->imp_inflight) == 0),
                           &lwi);
@@ -322,6 +319,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
         struct ptlrpc_connect_async_args *aa;
         unsigned long flags;
 
+        ENTRY;
         spin_lock_irqsave(&imp->imp_lock, flags);
         if (imp->imp_state == LUSTRE_IMP_CLOSED) {
                 spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -369,9 +367,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
                        imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd));
                 /* Don't retry if connect fails */
                 rc = 0;
-                obd_set_info(obd->obd_self_export,
-                             strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
-                             sizeof(rc), &rc);
+                obd_set_info_async(obd->obd_self_export,
+                                   strlen(KEY_INIT_RECOV), KEY_INIT_RECOV,
+                                   sizeof(rc), &rc, NULL);
         }
 
         rc = obd_reconnect(imp->imp_obd->obd_self_export, obd,
@@ -683,13 +681,14 @@ finish:
                        (char *)imp->imp_connection->c_remote_uuid.uuid, rc);
         }
 
-        wake_up(&imp->imp_recovery_waitq);
+        cfs_waitq_signal(&imp->imp_recovery_waitq);
         RETURN(rc);
 }
 
 static int completed_replay_interpret(struct ptlrpc_request *req,
                                     void * data, int rc)
 {
+        ENTRY;
         atomic_dec(&req->rq_import->imp_replay_inflight);
         if (req->rq_status == 0) {
                 ptlrpc_import_recovery_state_machine(req->rq_import);
@@ -736,7 +735,7 @@ static int ptlrpc_invalidate_import_thread(void *data)
         ENTRY;
 
         ptlrpc_daemonize("ll_imp_inval");
-
+        
         CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n",
                imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd),
                imp->imp_connection->c_remote_uuid.uuid);
@@ -757,6 +756,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
         char *target_start;
         int target_len;
 
+        ENTRY;
         if (imp->imp_state == LUSTRE_IMP_EVICTED) {
                 deuuidify(obd2cli_tgt(imp->imp_obd), NULL,
                           &target_start, &target_len);
@@ -768,7 +768,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
                        imp->imp_connection->c_remote_uuid.uuid);
 
 #ifdef __KERNEL__
-                rc = kernel_thread(ptlrpc_invalidate_import_thread, imp,
+                rc = cfs_kernel_thread(ptlrpc_invalidate_import_thread, imp,
                                    CLONE_VM | CLONE_FILES);
                 if (rc < 0)
                         CERROR("error starting invalidate thread: %d\n", rc);
@@ -832,7 +832,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp)
         }
 
         if (imp->imp_state == LUSTRE_IMP_FULL) {
-                wake_up(&imp->imp_recovery_waitq);
+                cfs_waitq_signal(&imp->imp_recovery_waitq);
                 ptlrpc_wake_delayed(imp);
         }
 
@@ -865,8 +865,8 @@ int ptlrpc_disconnect_import(struct obd_import *imp)
 
         if (ptlrpc_import_in_recovery(imp)) {
                 struct l_wait_info lwi;
-                lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep,
-                                       NULL, NULL);
+                lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
+                                       back_to_sleep, NULL, NULL);
                 rc = l_wait_event(imp->imp_recovery_waitq,
                                   !ptlrpc_import_in_recovery(imp), &lwi);
 
index 5f2186c..1a65430 100644 (file)
 #endif
 
 /* LUSTRE_VERSION_CODE */
-#include <linux/lustre_ver.h>
+#include <lustre_ver.h>
 
-#include <linux/obd_support.h>
+#include <obd_support.h>
 /* lustre_swab_mdt_body */
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 /* obd2cli_tgt() (required by DEBUG_REQ()) */
-#include <linux/obd.h>
+#include <obd.h>
 
 /* struct ptlrpc_request, lustre_msg* */
-#include <linux/lustre_req_layout.h>
+#include <lustre_req_layout.h>
 
 static const struct req_msg_field *empty[] = {}; /* none */
 
index 1342d1a..bcab551 100644 (file)
 #endif
 
 #ifdef __KERNEL__
-#include <linux/fs.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_net.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <lustre_net.h>
 #include <libcfs/list.h>
 
 /* This is a callback from the llog_* functions.
@@ -309,11 +309,10 @@ out:
 
 static int llog_client_close(struct llog_handle *handle)
 {
-        int rc = 0;
         /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because
            the servers all close the file at the end of every
            other LLOG_ RPC. */
-        RETURN(rc);
+        return(0);
 }
 
 
index 877c0c7..735ed31 100644 (file)
 #endif
 
 #ifdef __KERNEL__
-#include <linux/fs.h>
+#include <libcfs/libcfs.h>
 #else
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
+#include <obd_class.h>
+#include <lustre_log.h>
 #include <libcfs/list.h>
-#include <linux/lvfs.h>
+#include <lvfs.h>
 
 #ifdef __KERNEL__
 int llog_origin_connect(struct llog_ctxt *ctxt, int count,
index ee7e161..cf588d3 100644 (file)
 
 #ifndef __KERNEL__
 #include <liblustre.h>
-#else
-#include <linux/fs.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_log.h>
-#include <linux/lustre_net.h>
+#include <obd_class.h>
+#include <lustre_log.h>
+#include <lustre_net.h>
 #include <libcfs/list.h>
-#include <linux/lustre_fsfilt.h>
+#include <lustre_fsfilt.h>
 
-#ifdef __KERNEL__
+#if defined(__KERNEL__) && defined(LUSTRE_LOG_SERVER)
 
 int llog_origin_handle_create(struct ptlrpc_request *req)
 {
index a2eec28..70266b5 100644 (file)
  */
 #define DEBUG_SUBSYSTEM S_CLASS
 
-#include <linux/obd_support.h>
-#include <linux/obd.h>
-#include <linux/lprocfs_status.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_net.h>
-#include <linux/obd_class.h>
-#include <linux/seq_file.h>
+#include <obd_support.h>
+#include <obd.h>
+#include <lprocfs_status.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_net.h>
+#include <obd_class.h>
 #include "ptlrpc_internal.h"
 
 
index 6ddf32b..b05c5a3 100644 (file)
 #ifndef __KERNEL__
 #include <liblustre.h>
 #endif
-#include <linux/obd_support.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_lib.h>
-#include <linux/obd.h>
+#include <obd_support.h>
+#include <lustre_net.h>
+#include <lustre_lib.h>
+#include <obd.h>
 #include "ptlrpc_internal.h"
 
 static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len,
@@ -177,7 +177,7 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc)
         for (;;) {
                 /* Network access will complete in finite time but the HUGE
                  * timeout lets us CWARN for visibility of sluggish NALs */
-                lwi = LWI_TIMEOUT (300 * HZ, NULL, NULL);
+                lwi = LWI_TIMEOUT (cfs_time_seconds(300), NULL, NULL);
                 rc = l_wait_event(desc->bd_waitq, 
                                   !ptlrpc_bulk_active(desc), &lwi);
                 if (rc == 0)
@@ -266,7 +266,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req)
         /* Disconnect a bulk desc from the network. Idempotent. Not
          * thread-safe (i.e. only interlocks with completion callback). */
         struct ptlrpc_bulk_desc *desc = req->rq_bulk;
-        wait_queue_head_t       *wq;
+        cfs_waitq_t             *wq;
         struct l_wait_info       lwi;
         int                      rc;
 
@@ -292,7 +292,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req)
         for (;;) {
                 /* Network access will complete in finite time but the HUGE
                  * timeout lets us CWARN for visibility of sluggish NALs */
-                lwi = LWI_TIMEOUT (300 * HZ, NULL, NULL);
+                lwi = LWI_TIMEOUT (cfs_time_seconds(300), NULL, NULL);
                 rc = l_wait_event(*wq, !ptlrpc_bulk_active(desc), &lwi);
                 if (rc == 0)
                         return;
index 22be02c..bad1387 100644 (file)
@@ -33,9 +33,9 @@
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
 
 
 #define HDR_SIZE(count) \
@@ -108,8 +108,8 @@ int lustre_pack_request (struct ptlrpc_request *req,
 }
 
 #if RS_DEBUG
-LIST_HEAD(ptlrpc_rs_debug_lru);
-spinlock_t ptlrpc_rs_debug_lock = SPIN_LOCK_UNLOCKED;
+CFS_LIST_HEAD(ptlrpc_rs_debug_lru);
+spinlock_t ptlrpc_rs_debug_lock;
 
 #define PTLRPC_RS_DEBUG_LRU_ADD(rs)                                     \
 do {                                                                    \
@@ -147,7 +147,7 @@ static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc
                 spin_unlock_irqrestore(&svc->srv_lock, flags);
                 /* If we cannot get anything for some long time, we better
                    bail out instead of waiting infinitely */
-                lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL);
+                lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
                 rc = l_wait_event(svc->srv_free_rs_waitq,
                                   !list_empty(&svc->srv_free_rs_list), &lwi);
                 if (rc)
@@ -192,8 +192,8 @@ int lustre_pack_reply (struct ptlrpc_request *req,
         rs->rs_cb_id.cbid_arg = rs;
         rs->rs_service = req->rq_rqbd->rqbd_service;
         rs->rs_size = size;
-        INIT_LIST_HEAD(&rs->rs_exp_list);
-        INIT_LIST_HEAD(&rs->rs_obd_list);
+        CFS_INIT_LIST_HEAD(&rs->rs_exp_list);
+        CFS_INIT_LIST_HEAD(&rs->rs_obd_list);
 
         req->rq_replen = msg_len;
         req->rq_reply_state = rs;
@@ -279,7 +279,7 @@ void lustre_free_reply_state (struct ptlrpc_reply_state *rs)
                 list_add(&rs->rs_list,
                          &svc->srv_free_rs_list);
                 spin_unlock_irqrestore(&svc->srv_lock, flags);
-                wake_up(&svc->srv_free_rs_waitq);
+                cfs_waitq_signal(&svc->srv_free_rs_waitq);
         } else {
                 OBD_FREE(rs, rs->rs_size);
         }
@@ -915,6 +915,8 @@ void lustre_swab_lov_desc (struct lov_desc *ld)
         __swab64s (&ld->ld_default_stripe_size);
         __swab64s (&ld->ld_default_stripe_offset);
         __swab32s (&ld->ld_pattern);
+        __swab32s (&ld->ld_qos_threshold);
+        __swab32s (&ld->ld_qos_maxage);
         /* uuid endian insensitive */
 }
 
@@ -1102,7 +1104,6 @@ void lustre_assert_wire_constants(void)
          * running on Linux tau 2.6.15-dirty #13 SMP Sat Feb 11 18:30:54 MSK 2006 i686 i686 i386 GNU/
          * with gcc version 3.3.3 (SuSE Linux) */
 
-
         /* Constants... */
         LASSERTF(PTLRPC_MSG_MAGIC == 0x0BD00BD0," found %lld\n",
                  (long long)PTLRPC_MSG_MAGIC);
@@ -2243,26 +2244,22 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
-        LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, " found %lld\n",
-                 (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
-        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n",
-                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 32, " found %lld\n",
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_threshold));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold));
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
+        LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_padding_1));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 36, " found %lld\n",
+        LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_padding_2));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_3) == 40, " found %lld\n",
-                 (long long)(int)offsetof(struct lov_desc, ld_padding_3));
-        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_3) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_3));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_4) == 44, " found %lld\n",
-                 (long long)(int)offsetof(struct lov_desc, ld_padding_4));
-        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_4) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_4));
         LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_uuid));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n",
index 5dfbe85..865dcf0 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_import.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_lib.h>
+#include <lustre_ha.h>
+#include <lustre_import.h>
 
 #include "ptlrpc_internal.h"
 
@@ -49,7 +49,7 @@ void ptlrpc_fill_bulk_md (lnet_md_t *md, struct ptlrpc_bulk_desc *desc)
         md->length = desc->bd_iov_count;
 }
 
-void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page,
                           int pageoffset, int len)
 {
         lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count];
@@ -67,9 +67,9 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc)
         
         for (i = 0; i < desc->bd_iov_count ; i++) {
                 lnet_kiov_t *kiov = &desc->bd_iov[i];
-                memset(kmap(kiov->kiov_page)+kiov->kiov_offset, 0xab,
-                                    kiov->kiov_len);
-                kunmap(kiov->kiov_page);
+                memset(cfs_kmap(kiov->kiov_page)+kiov->kiov_offset, 0xab,
+                       kiov->kiov_len);
+                cfs_kunmap(kiov->kiov_page);
         }
 }
 
@@ -102,7 +102,7 @@ static int can_merge_iovs(lnet_md_iovec_t *existing, lnet_md_iovec_t *candidate)
         return 0;
 }
 
-void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, 
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, 
                           int pageoffset, int len)
 {
         lnet_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count];
index cfdce8d..53335f7 100644 (file)
 #ifndef __KERNEL__
 #include <liblustre.h>
 #else
-#include <linux/version.h>
-#include <asm/semaphore.h>
 #define DEBUG_SUBSYSTEM S_RPC
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
+#include <obd_support.h>
+#include <obd_class.h>
 #include "ptlrpc_internal.h"
 
-static DECLARE_MUTEX(pinger_sem);
-static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports);
+struct semaphore pinger_sem;
+static struct list_head pinger_imports = CFS_LIST_HEAD_INIT(pinger_imports);
 
 int ptlrpc_ping(struct obd_import *imp)
 {
@@ -67,14 +65,14 @@ int ptlrpc_ping(struct obd_import *imp)
 
 static void ptlrpc_update_next_ping(struct obd_import *imp)
 {
-        imp->imp_next_ping = jiffies + HZ *
-                (imp->imp_state == LUSTRE_IMP_DISCON ? RECONNECT_INTERVAL :
-                                                       PING_INTERVAL);
+        imp->imp_next_ping = cfs_time_shift(
+                                (imp->imp_state == LUSTRE_IMP_DISCON ?
+                                 RECONNECT_INTERVAL : PING_INTERVAL));
 }
 
 void ptlrpc_ping_import_soon(struct obd_import *imp)
 {
-        imp->imp_next_ping = jiffies;
+        imp->imp_next_ping = cfs_time_current();
 }
 
 #ifdef __KERNEL__
@@ -84,20 +82,20 @@ static int ptlrpc_pinger_main(void *arg)
         struct ptlrpc_thread *thread = data->thread;
         ENTRY;
 
-        ptlrpc_daemonize(data->name);
+        cfs_daemonize(data->name);
 
         /* Record that the thread is running */
         thread->t_flags = SVC_RUNNING;
-        wake_up(&thread->t_ctl_waitq);
+        cfs_waitq_signal(&thread->t_ctl_waitq);
 
         /* And now, loop forever, pinging as needed. */
         while (1) {
-                unsigned long this_ping = jiffies;
-                long time_to_next_ping = 0;
+                cfs_time_t this_ping = cfs_time_current();
                 struct l_wait_info lwi;
+                cfs_duration_t time_to_next_ping;
                 struct list_head *iter;
 
-                down(&pinger_sem);
+                mutex_down(&pinger_sem);
                 list_for_each(iter, &pinger_imports) {
                         struct obd_import *imp =
                                 list_entry(iter, struct obd_import,
@@ -119,13 +117,13 @@ static int ptlrpc_pinger_main(void *arg)
                         if (force ||
                             /* if the next ping is within, say, 5 jiffies from
                                now, go ahead and ping. See note below. */
-                            time_after_eq(this_ping, imp->imp_next_ping - 5)) {
+                            cfs_time_aftereq(this_ping, 
+                                             imp->imp_next_ping - 5 * CFS_TICK)) {
                                 if (level == LUSTRE_IMP_DISCON &&
                                     !imp->imp_deactive) {
                                         /* wait at least a timeout before
                                            trying recovery again. */
-                                        imp->imp_next_ping = jiffies +
-                                                obd_timeout * HZ;
+                                        imp->imp_next_ping = cfs_time_shift(obd_timeout);
                                         ptlrpc_initiate_recovery(imp);
                                 } else if (level != LUSTRE_IMP_FULL ||
                                          imp->imp_obd->obd_no_recov ||
@@ -144,20 +142,24 @@ static int ptlrpc_pinger_main(void *arg)
                                 if (!imp->imp_pingable)
                                         continue;
                                 CDEBUG(D_INFO,
-                                       "don't need to ping %s (%lu > %lu)\n",
+                                       "don't need to ping %s ("CFS_TIME_T
+                                       " > "CFS_TIME_T")\n",
                                        obd2cli_tgt(imp->imp_obd),
                                        imp->imp_next_ping, this_ping);
                         }
 
                         /* obd_timeout might have changed */
-                        if (time_after(imp->imp_next_ping,
-                                       this_ping + PING_INTERVAL * HZ))
+                        if (cfs_time_after(imp->imp_next_ping,
+                                           cfs_time_add(this_ping, 
+                                                        cfs_time_seconds(PING_INTERVAL))))
                                 ptlrpc_update_next_ping(imp);
                 }
-                up(&pinger_sem);
+                mutex_up(&pinger_sem);
 
                 /* Wait until the next ping time, or until we're stopped. */
-                time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies;
+                time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, 
+                                                              cfs_time_seconds(PING_INTERVAL)), 
+                                                 cfs_time_current());
 
                 /* The ping sent by ptlrpc_send_rpc may get sent out
                    say .01 second after this.
@@ -165,10 +167,11 @@ static int ptlrpc_pinger_main(void *arg)
                    next ping time to next_ping + .01 sec, which means
                    we will SKIP the next ping at next_ping, and the
                    ping will get sent 2 timeouts from now!  Beware. */
-                CDEBUG(D_INFO, "next ping in %lu (%lu)\n", time_to_next_ping,
-                       this_ping + PING_INTERVAL * HZ);
+                CDEBUG(D_INFO, "next ping in "CFS_DURATION_T" ("CFS_TIME_T")\n", 
+                               time_to_next_ping, 
+                               cfs_time_add(this_ping, cfs_time_seconds(PING_INTERVAL)));
                 if (time_to_next_ping > 0) {
-                        lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ),
+                        lwi = LWI_TIMEOUT(max_t(cfs_duration_t, time_to_next_ping, cfs_time_seconds(1)),
                                           NULL, NULL);
                         l_wait_event(thread->t_ctl_waitq,
                                      thread->t_flags & (SVC_STOPPING|SVC_EVENT),
@@ -185,9 +188,9 @@ static int ptlrpc_pinger_main(void *arg)
         }
 
         thread->t_flags = SVC_STOPPED;
-        wake_up(&thread->t_ctl_waitq);
+        cfs_waitq_signal(&thread->t_ctl_waitq);
 
-        CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid);
+        CDEBUG(D_NET, "pinger thread exiting, process %d\n", cfs_curproc_pid());
         return 0;
 }
 
@@ -209,14 +212,14 @@ int ptlrpc_start_pinger(void)
         OBD_ALLOC(pinger_thread, sizeof(*pinger_thread));
         if (pinger_thread == NULL)
                 RETURN(-ENOMEM);
-        init_waitqueue_head(&pinger_thread->t_ctl_waitq);
+        cfs_waitq_init(&pinger_thread->t_ctl_waitq);
 
         d.name = "ll_ping";
         d.thread = pinger_thread;
 
         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
          * just drop the VM and FILES in ptlrpc_daemonize() right away. */
-        rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
+        rc = cfs_kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES);
         if (rc < 0) {
                 CERROR("cannot start thread: %d\n", rc);
                 OBD_FREE(pinger_thread, sizeof(*pinger_thread));
@@ -240,10 +243,10 @@ int ptlrpc_stop_pinger(void)
 
         if (pinger_thread == NULL)
                 RETURN(-EALREADY);
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         pinger_thread->t_flags = SVC_STOPPING;
-        wake_up(&pinger_thread->t_ctl_waitq);
-        up(&pinger_sem);
+        cfs_waitq_signal(&pinger_thread->t_ctl_waitq);
+        mutex_up(&pinger_sem);
 
         l_wait_event(pinger_thread->t_ctl_waitq,
                      (pinger_thread->t_flags & SVC_STOPPED), &lwi);
@@ -264,7 +267,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
         if (!list_empty(&imp->imp_pinger_chain))
                 RETURN(-EALREADY);
 
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         CDEBUG(D_HA, "adding pingable import %s->%s\n",
                imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         ptlrpc_update_next_ping(imp);
@@ -273,7 +276,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
         class_import_get(imp);
 
         ptlrpc_pinger_wake_up();
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
 
         RETURN(0);
 }
@@ -284,12 +287,12 @@ int ptlrpc_pinger_del_import(struct obd_import *imp)
         if (list_empty(&imp->imp_pinger_chain))
                 RETURN(-ENOENT);
 
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         list_del_init(&imp->imp_pinger_chain);
         CDEBUG(D_HA, "removing pingable import %s->%s\n",
                imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         class_import_put(imp);
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
         RETURN(0);
 }
 
@@ -297,7 +300,7 @@ void ptlrpc_pinger_wake_up()
 {
 #ifdef ENABLE_PINGER
         pinger_thread->t_flags |= SVC_EVENT;
-        wake_up(&pinger_thread->t_ctl_waitq);
+        cfs_waitq_signal(&pinger_thread->t_ctl_waitq);
 #endif
 }
 
@@ -415,7 +418,7 @@ void ping_evictor_start(void)
 
         init_waitqueue_head(&pet_waitq);
 
-        rc = kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FS);
+        rc = cfs_kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FILES);
         if (rc < 0) {
                 pet_refcount--;
                 CERROR("Cannot start ping evictor thread: %d\n", rc);
@@ -441,14 +444,14 @@ EXPORT_SYMBOL(ping_evictor_stop);
 #ifdef ENABLE_PINGER
 static struct pinger_data {
         int             pd_recursion;
-        unsigned long   pd_this_ping;   /* jiffies */
-        unsigned long   pd_next_ping;   /* jiffies */
+        cfs_time_t      pd_this_ping;   /* jiffies */
+        cfs_time_t      pd_next_ping;   /* jiffies */
         struct ptlrpc_request_set *pd_set;
 } pinger_args;
 
 static int pinger_check_rpcs(void *arg)
 {
-        unsigned long curtime = jiffies;
+        cfs_time_t curtime = cfs_time_current();
         struct ptlrpc_request *req;
         struct ptlrpc_request_set *set;
         struct list_head *iter;
@@ -483,14 +486,15 @@ static int pinger_check_rpcs(void *arg)
         set = pd->pd_set;
 
         /* add rpcs into set */
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         list_for_each(iter, &pinger_imports) {
                 struct obd_import *imp =
                         list_entry(iter, struct obd_import, imp_pinger_chain);
                 int generation, level;
                 unsigned long flags;
 
-                if (time_after_eq(pd->pd_this_ping, imp->imp_next_ping - 5)) {
+                if (cfs_time_aftereq(pd->pd_this_ping, 
+                                     imp->imp_next_ping - 5 * CFS_TICK)) {
                         /* Add a ping. */
                         spin_lock_irqsave(&imp->imp_lock, flags);
                         generation = imp->imp_generation;
@@ -517,13 +521,13 @@ static int pinger_check_rpcs(void *arg)
                         req->rq_import_generation = generation;
                         ptlrpc_set_add_req(set, req);
                 } else {
-                        CDEBUG(D_HA, "don't need to ping %s (%lu > "
-                               "%lu)\n", obd2cli_tgt(imp->imp_obd),
+                        CDEBUG(D_HA, "don't need to ping %s ("CFS_TIME_T" > "
+                               CFS_TIME_T")\n", obd2cli_tgt(imp->imp_obd),
                                imp->imp_next_ping, pd->pd_this_ping);
                 }
         }
         pd->pd_this_ping = curtime;
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
 
         /* Might be empty, that's OK. */
         if (set->set_remaining == 0)
@@ -543,14 +547,16 @@ do_check_set:
         rc = ptlrpc_check_set(set);
 
         /* not finished, and we are not expired, simply return */
-        if (!rc && time_before(curtime, pd->pd_this_ping + PING_INTERVAL * HZ)){
+        if (!rc && cfs_time_before(curtime, 
+                                   cfs_time_add(pd->pd_this_ping, 
+                                                cfs_time_seconds(PING_INTERVAL)))) {
                 CDEBUG(D_HA, "not finished, but also not expired\n");
                 pd->pd_recursion--;
                 return 0;
         }
 
         /* Expire all the requests that didn't come back. */
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         list_for_each(iter, &set->set_requests) {
                 req = list_entry(iter, struct ptlrpc_request,
                                  rq_set_chain);
@@ -569,13 +575,14 @@ do_check_set:
                 CDEBUG(D_HA, "pinger initiate expire_one_request\n");
                 ptlrpc_expire_one_request(req);
         }
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
 
         ptlrpc_set_destroy(set);
         pd->pd_set = NULL;
 
 out:
-        pd->pd_next_ping = pd->pd_this_ping + PING_INTERVAL * HZ;
+        pd->pd_next_ping = cfs_time_add(pd->pd_this_ping,
+                                        cfs_time_seconds(PING_INTERVAL));
         pd->pd_this_ping = 0; /* XXX for debug */
 
         CDEBUG(D_HA, "finished a round ping\n");
@@ -608,15 +615,15 @@ int ptlrpc_stop_pinger(void)
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp)
 {
 #ifdef ENABLE_PINGER
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         ptlrpc_update_next_ping(imp);
         if (pinger_args.pd_set == NULL &&
             time_before(imp->imp_next_ping, pinger_args.pd_next_ping)) {
-                CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n",
-                        imp->imp_next_ping, jiffies);
+                CDEBUG(D_HA, "set next ping to "CFS_TIME_T"(cur "CFS_TIME_T")\n",
+                        imp->imp_next_ping, cfs_time_current());
                 pinger_args.pd_next_ping = imp->imp_next_ping;
         }
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
 #endif
 }
 
@@ -630,10 +637,10 @@ int ptlrpc_pinger_add_import(struct obd_import *imp)
                imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         ptlrpc_pinger_sending_on_import(imp);
 
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         list_add_tail(&imp->imp_pinger_chain, &pinger_imports);
         class_import_get(imp);
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
 
         RETURN(0);
 }
@@ -644,12 +651,12 @@ int ptlrpc_pinger_del_import(struct obd_import *imp)
         if (list_empty(&imp->imp_pinger_chain))
                 RETURN(-ENOENT);
 
-        down(&pinger_sem);
+        mutex_down(&pinger_sem);
         list_del_init(&imp->imp_pinger_chain);
         CDEBUG(D_HA, "removing pingable import %s->%s\n",
                imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd));
         class_import_put(imp);
-        up(&pinger_sem);
+        mutex_up(&pinger_sem);
         RETURN(0);
 }
 
index 9ff8e2b..dc780d7 100644 (file)
@@ -112,7 +112,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req);
 
 /* pers.c */
 void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc);
-void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page,
+void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, 
                           int pageoffset, int len);
 void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc);
 
index 408ec6c..2b40c0b 100644 (file)
 #endif
 #define DEBUG_SUBSYSTEM S_RPC
 
-#ifdef __KERNEL__
-# include <linux/module.h>
-# include <linux/init.h>
-#else
+#ifndef __KERNEL__
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_req_layout.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lustre_req_layout.h>
 
 #include "ptlrpc_internal.h"
 
+extern spinlock_t ptlrpc_last_xid_lock;
+extern spinlock_t ptlrpc_rs_debug_lock;
+extern spinlock_t ptlrpc_all_services_lock;
+extern struct semaphore pinger_sem;
+extern struct semaphore ptlrpcd_sem;
 extern int ptlrpc_init_portals(void);
 extern void ptlrpc_exit_portals(void);
 
@@ -51,6 +53,11 @@ __init int ptlrpc_init(void)
         ENTRY;
 
         lustre_assert_wire_constants();
+        spin_lock_init(&ptlrpc_last_xid_lock);
+        spin_lock_init(&ptlrpc_rs_debug_lock);
+        spin_lock_init(&ptlrpc_all_services_lock);
+        init_mutex(&pinger_sem);
+        init_mutex(&ptlrpcd_sem);
 
         rc = req_layout_init();
         if (rc)
@@ -266,6 +273,5 @@ MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
 MODULE_DESCRIPTION("Lustre Request Processor and Lock Management");
 MODULE_LICENSE("GPL");
 
-module_init(ptlrpc_init);
-module_exit(ptlrpc_exit);
+cfs_module(ptlrpc, "1.0.0", ptlrpc_init, ptlrpc_exit);
 #endif
index 2f28528..fa315eb 100644 (file)
 #define DEBUG_SUBSYSTEM S_RPC
 
 #ifdef __KERNEL__
-# include <linux/version.h>
-# include <linux/module.h>
-# include <linux/mm.h>
-# include <linux/highmem.h>
-# include <linux/lustre_dlm.h>
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#  include <linux/workqueue.h>
-#  include <linux/smp_lock.h>
-# else
-#  include <linux/locks.h>
-# endif
-# include <linux/ctype.h>
-# include <linux/init.h>
+# include <libcfs/libcfs.h>
 #else /* __KERNEL__ */
 # include <liblustre.h>
 # include <ctype.h>
 #endif
 
 #include <libcfs/kp30.h>
-#include <linux/lustre_net.h>
+#include <lustre_net.h>
+# include <lustre_lib.h>
 
-#include <linux/lustre_ha.h>
-#include <linux/obd_support.h> /* for OBD_FAIL_CHECK */
-#include <linux/lprocfs_status.h>
+#include <lustre_ha.h>
+#include <obd_support.h> /* for OBD_FAIL_CHECK */
+#include <lprocfs_status.h>
 
 #define LIOD_STOP 0
 struct ptlrpcd_ctl {
@@ -59,7 +48,7 @@ struct ptlrpcd_ctl {
         struct completion         pc_starting;
         struct completion         pc_finishing;
         struct list_head          pc_req_list;
-        wait_queue_head_t         pc_waitq;
+        cfs_waitq_t               pc_waitq;
         struct ptlrpc_request_set *pc_set;
         char                      pc_name[16];
 #ifndef __KERNEL__
@@ -71,7 +60,7 @@ struct ptlrpcd_ctl {
 static struct ptlrpcd_ctl ptlrpcd_pc;
 static struct ptlrpcd_ctl ptlrpcd_recovery_pc;
 
-static DECLARE_MUTEX(ptlrpcd_sem);
+struct semaphore ptlrpcd_sem;
 static int ptlrpcd_users = 0;
 
 void ptlrpcd_wake(struct ptlrpc_request *req)
@@ -80,7 +69,7 @@ void ptlrpcd_wake(struct ptlrpc_request *req)
 
         LASSERT(pc != NULL);
 
-        wake_up(&pc->pc_waitq);
+        cfs_waitq_signal(&pc->pc_waitq);
 }
 
 /* requests that are added to the ptlrpcd queue are sent via
@@ -153,15 +142,9 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc)
 static int ptlrpcd(void *arg)
 {
         struct ptlrpcd_ctl *pc = arg;
-        unsigned long flags;
         ENTRY;
 
-        libcfs_daemonize(pc->pc_name);
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
+        cfs_daemonize(pc->pc_name);
 
         complete(&pc->pc_starting);
 
@@ -171,18 +154,19 @@ static int ptlrpcd(void *arg)
          * on the set's new_req_list and ptlrpcd_check moves them into
          * the set. */
         while (1) {
-                wait_queue_t set_wait;
+                cfs_waitlink_t set_wait;
                 struct l_wait_info lwi;
-                int timeout;
+                cfs_duration_t timeout;
 
-                timeout = ptlrpc_set_next_timeout(pc->pc_set) * HZ;
+                timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set));
                 lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set);
 
                 /* ala the pinger, wait on pc's waitqueue and the set's */
-                init_waitqueue_entry(&set_wait, current);
-                add_wait_queue(&pc->pc_set->set_waitq, &set_wait);
+                cfs_waitlink_init(&set_wait);
+                cfs_waitq_add(&pc->pc_set->set_waitq, &set_wait);
+                cfs_waitq_forward(&set_wait, &pc->pc_waitq);
                 l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi);
-                remove_wait_queue(&pc->pc_set->set_waitq, &set_wait);
+                cfs_waitq_del(&pc->pc_set->set_waitq, &set_wait);
 
                 if (test_bit(LIOD_STOP, &pc->pc_flags))
                         break;
@@ -218,13 +202,14 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
 {
         int rc;
 
+        ENTRY;
         memset(pc, 0, sizeof(*pc));
         init_completion(&pc->pc_starting);
         init_completion(&pc->pc_finishing);
-        init_waitqueue_head(&pc->pc_waitq);
+        cfs_waitq_init(&pc->pc_waitq);
         pc->pc_flags = 0;
         spin_lock_init(&pc->pc_lock);
-        INIT_LIST_HEAD(&pc->pc_req_list);
+        CFS_INIT_LIST_HEAD(&pc->pc_req_list);
         snprintf (pc->pc_name, sizeof (pc->pc_name), name);
 
         pc->pc_set = ptlrpc_prep_set();
@@ -232,7 +217,7 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
                 RETURN(-ENOMEM);
 
 #ifdef __KERNEL__
-        rc = kernel_thread(ptlrpcd, pc, 0);
+        rc = cfs_kernel_thread(ptlrpcd, pc, 0);
         if (rc < 0)  {
                 ptlrpc_set_destroy(pc->pc_set);
                 RETURN(rc);
@@ -250,7 +235,7 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc)
 static void ptlrpcd_stop(struct ptlrpcd_ctl *pc)
 {
         set_bit(LIOD_STOP, &pc->pc_flags);
-        wake_up(&pc->pc_waitq);
+        cfs_waitq_signal(&pc->pc_waitq);
 #ifdef __KERNEL__
         wait_for_completion(&pc->pc_finishing);
 #else
@@ -264,7 +249,7 @@ int ptlrpcd_addref(void)
         int rc = 0;
         ENTRY;
 
-        down(&ptlrpcd_sem);
+        mutex_down(&ptlrpcd_sem);
         if (++ptlrpcd_users != 1)
                 GOTO(out, rc);
 
@@ -281,16 +266,16 @@ int ptlrpcd_addref(void)
                 GOTO(out, rc);
         }
 out:
-        up(&ptlrpcd_sem);
+        mutex_up(&ptlrpcd_sem);
         RETURN(rc);
 }
 
 void ptlrpcd_decref(void)
 {
-        down(&ptlrpcd_sem);
+        mutex_down(&ptlrpcd_sem);
         if (--ptlrpcd_users == 0) {
                 ptlrpcd_stop(&ptlrpcd_pc);
                 ptlrpcd_stop(&ptlrpcd_recovery_pc);
         }
-        up(&ptlrpcd_sem);
+        mutex_up(&ptlrpcd_sem);
 }
index c9684fe..8ad20c6 100644 (file)
 #endif
 
 #ifdef __KERNEL__
-#include <linux/fs.h>
+# include <libcfs/libcfs.h>
 #else
 # include <libcfs/list.h>
 # include <liblustre.h>
 #endif
 
 #include <libcfs/kp30.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_commit_confd.h>
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <obd_class.h>
+#include <lustre_commit_confd.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
 #include <lnet/types.h>
 #include <libcfs/list.h>
-#include <linux/lustre_log.h>
+#include <lustre_log.h>
 #include "ptlrpc_internal.h"
 
 #ifdef __KERNEL__
@@ -132,7 +132,7 @@ void llcd_send(struct llog_canceld_ctxt *llcd)
         list_add_tail(&llcd->llcd_list, &llcd->llcd_lcm->lcm_llcd_pending);
         spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock);
 
-        wake_up_nr(&llcd->llcd_lcm->lcm_waitq, 1);
+        cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1);
 }
 EXPORT_SYMBOL(llcd_send);
 
@@ -150,7 +150,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
 
         LASSERT(ctxt);
 
-        down(&ctxt->loc_sem);
+        mutex_down(&ctxt->loc_sem);
         if (ctxt->loc_imp == NULL) {
                 CWARN("no import for ctxt %p\n", ctxt);
                 GOTO(out, rc = 0);
@@ -188,7 +188,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
                 llcd_send(llcd);
         }
 out:
-        up(&ctxt->loc_sem);
+        mutex_up(&ctxt->loc_sem);
         return rc;
 }
 EXPORT_SYMBOL(llog_obd_repl_cancel);
@@ -201,13 +201,13 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
         if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) {
                 CDEBUG(D_HA, "reverse import disconnected, put llcd %p:%p\n",
                        ctxt->loc_llcd, ctxt);
-                down(&ctxt->loc_sem);
+                mutex_down(&ctxt->loc_sem);
                 if (ctxt->loc_llcd != NULL) {
                         llcd_put(ctxt->loc_llcd);
                         ctxt->loc_llcd = NULL;
                 }
                 ctxt->loc_imp = NULL;
-                up(&ctxt->loc_sem);
+                mutex_up(&ctxt->loc_sem);
         } else {
                 rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
         }
@@ -221,7 +221,6 @@ static int log_commit_thread(void *arg)
         struct llog_commit_master *lcm = arg;
         struct llog_commit_daemon *lcd;
         struct llog_canceld_ctxt *llcd, *n;
-        char name[24];
         ENTRY;
 
         OBD_ALLOC(lcd, sizeof(*lcd));
@@ -229,18 +228,18 @@ static int log_commit_thread(void *arg)
                 RETURN(-ENOMEM);
 
         spin_lock(&lcm->lcm_thread_lock);
-        THREAD_NAME(name, sizeof(name) - 1,
+        THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1,
                     "ll_log_comt_%02d", atomic_read(&lcm->lcm_thread_total));
         atomic_inc(&lcm->lcm_thread_total);
         spin_unlock(&lcm->lcm_thread_lock);
 
-        ptlrpc_daemonize(name); /* thread never needs to do IO */
+        ptlrpc_daemonize(cfs_curproc_comm()); /* thread never needs to do IO */
 
-        INIT_LIST_HEAD(&lcd->lcd_lcm_list);
-        INIT_LIST_HEAD(&lcd->lcd_llcd_list);
+        CFS_INIT_LIST_HEAD(&lcd->lcd_lcm_list);
+        CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list);
         lcd->lcd_lcm = lcm;
 
-        CDEBUG(D_HA, "%s started\n", current->comm);
+        CDEBUG(D_HA, "%s started\n", cfs_curproc_comm());
         do {
                 struct ptlrpc_request *request;
                 struct obd_import *import = NULL;
@@ -331,15 +330,15 @@ static int log_commit_thread(void *arg)
                                 continue;
                         }
 
-                        down(&llcd->llcd_ctxt->loc_sem);
+                        mutex_down(&llcd->llcd_ctxt->loc_sem);
                         if (llcd->llcd_ctxt->loc_imp == NULL) {
-                                up(&llcd->llcd_ctxt->loc_sem);
+                                mutex_up(&llcd->llcd_ctxt->loc_sem);
                                 CWARN("import will be destroyed, put "
                                       "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
                                 llcd_put(llcd);
                                 continue;
                         }
-                        up(&llcd->llcd_ctxt->loc_sem);
+                        mutex_up(&llcd->llcd_ctxt->loc_sem);
 
                         if (!import || (import == LP_POISON) ||
                             (import->imp_client == LP_POISON)) {
@@ -360,7 +359,7 @@ static int log_commit_thread(void *arg)
                                 spin_lock(&lcm->lcm_llcd_lock);
                                 list_splice(&lcd->lcd_llcd_list,
                                             &lcm->lcm_llcd_resend);
-                                INIT_LIST_HEAD(&lcd->lcd_llcd_list);
+                                CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list);
                                 spin_unlock(&lcm->lcm_llcd_lock);
                                 break;
                         }
@@ -370,16 +369,16 @@ static int log_commit_thread(void *arg)
                         request->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL;
 
                         request->rq_replen = lustre_msg_size(0, NULL);
-                        down(&llcd->llcd_ctxt->loc_sem);
+                        mutex_down(&llcd->llcd_ctxt->loc_sem);
                         if (llcd->llcd_ctxt->loc_imp == NULL) {
-                                up(&llcd->llcd_ctxt->loc_sem);
+                                mutex_up(&llcd->llcd_ctxt->loc_sem);
                                 CWARN("import will be destroyed, put "
                                       "llcd %p:%p\n", llcd, llcd->llcd_ctxt);
                                 llcd_put(llcd);
                                 ptlrpc_req_finished(request);
                                 continue;
                         }
-                        up(&llcd->llcd_ctxt->loc_sem);
+                        mutex_up(&llcd->llcd_ctxt->loc_sem);
                         rc = ptlrpc_queue_wait(request);
                         ptlrpc_req_finished(request);
 
@@ -421,12 +420,12 @@ static int log_commit_thread(void *arg)
         spin_unlock(&lcm->lcm_thread_lock);
         OBD_FREE(lcd, sizeof(*lcd));
 
-        CDEBUG(D_HA, "%s exiting\n", current->comm);
+        CDEBUG(D_HA, "%s exiting\n", cfs_curproc_comm());
 
         spin_lock(&lcm->lcm_thread_lock);
         atomic_dec(&lcm->lcm_thread_total);
         spin_unlock(&lcm->lcm_thread_lock);
-        wake_up(&lcm->lcm_waitq);
+        cfs_waitq_signal(&lcm->lcm_waitq);
 
         return 0;
 }
@@ -439,7 +438,7 @@ int llog_start_commit_thread(void)
         if (atomic_read(&lcm->lcm_thread_total) >= lcm->lcm_thread_max)
                 RETURN(0);
 
-        rc = kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES);
+        rc = cfs_kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES);
         if (rc < 0) {
                 CERROR("error starting thread #%d: %d\n",
                        atomic_read(&lcm->lcm_thread_total), rc);
@@ -459,14 +458,14 @@ static struct llog_process_args {
 
 int llog_init_commit_master(void)
 {
-        INIT_LIST_HEAD(&lcm->lcm_thread_busy);
-        INIT_LIST_HEAD(&lcm->lcm_thread_idle);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle);
         spin_lock_init(&lcm->lcm_thread_lock);
         atomic_set(&lcm->lcm_thread_numidle, 0);
-        init_waitqueue_head(&lcm->lcm_waitq);
-        INIT_LIST_HEAD(&lcm->lcm_llcd_pending);
-        INIT_LIST_HEAD(&lcm->lcm_llcd_resend);
-        INIT_LIST_HEAD(&lcm->lcm_llcd_free);
+        cfs_waitq_init(&lcm->lcm_waitq);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_pending);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_resend);
+        CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_free);
         spin_lock_init(&lcm->lcm_llcd_lock);
         atomic_set(&lcm->lcm_llcd_numfree, 0);
         lcm->lcm_llcd_minfree = 0;
@@ -481,7 +480,7 @@ int llog_cleanup_commit_master(int force)
         lcm->lcm_flags |= LLOG_LCM_FL_EXIT;
         if (force)
                 lcm->lcm_flags |= LLOG_LCM_FL_EXIT_FORCE;
-        wake_up(&lcm->lcm_waitq);
+        cfs_waitq_signal(&lcm->lcm_waitq);
 
         wait_event_interruptible(lcm->lcm_waitq,
                                  atomic_read(&lcm->lcm_thread_total) == 0);
@@ -498,7 +497,7 @@ static int log_process_thread(void *args)
         int rc;
         ENTRY;
 
-        up(&data->llpa_sem);
+        mutex_up(&data->llpa_sem);
         ptlrpc_daemonize("llog_process");     /* thread does IO to log files */
 
         rc = llog_create(ctxt, &llh, &logid, NULL);
@@ -536,12 +535,12 @@ static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg)
         int rc;
         ENTRY;
 
-        down(&llpa.llpa_sem);
+        mutex_down(&llpa.llpa_sem);
         llpa.llpa_ctxt = ctxt;
         llpa.llpa_cb = handle;
         llpa.llpa_arg = arg;
 
-        rc = kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
+        rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES);
         if (rc < 0)
                 CERROR("error starting log_process_thread: %d\n", rc);
         else {
@@ -566,17 +565,17 @@ int llog_repl_connect(struct llog_ctxt *ctxt, int count,
                 llog_sync(ctxt, NULL);
         }
 
-        down(&ctxt->loc_sem);
+        mutex_down(&ctxt->loc_sem);
         ctxt->loc_gen = *gen;
         llcd = llcd_grab();
         if (llcd == NULL) {
                 CERROR("couldn't get an llcd\n");
-                up(&ctxt->loc_sem);
+                mutex_up(&ctxt->loc_sem);
                 RETURN(-ENOMEM);
         }
         llcd->llcd_ctxt = ctxt;
         ctxt->loc_llcd = llcd;
-        up(&ctxt->loc_sem);
+        mutex_up(&ctxt->loc_sem);
 
         rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid);
         if (rc != 0)
index 30f6aa7..cbbed63 100644 (file)
 
 #define DEBUG_SUBSYSTEM S_RPC
 #ifdef __KERNEL__
-# include <linux/config.h>
-# include <linux/module.h>
-# include <linux/kmod.h>
-# include <linux/list.h>
+# include <libcfs/libcfs.h>
 #else
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_support.h>
-#include <linux/lustre_ha.h>
-#include <linux/lustre_net.h>
-#include <linux/lustre_import.h>
-#include <linux/lustre_export.h>
-#include <linux/obd.h>
-#include <linux/obd_ost.h>
-#include <linux/obd_class.h>
-#include <linux/obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
+#include <obd_support.h>
+#include <lustre_ha.h>
+#include <lustre_net.h>
+#include <lustre_import.h>
+#include <lustre_export.h>
+#include <obd.h>
+#include <obd_ost.h>
+#include <obd_class.h>
+#include <obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */
 #include <libcfs/list.h>
 
 #include "ptlrpc_internal.h"
@@ -164,6 +161,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight)
          * get rid of them now.
          */
         spin_lock_irqsave(&imp->imp_lock, flags);
+        imp->imp_last_transno_checked = 0;
         ptlrpc_free_committed(imp);
         last_transno = imp->imp_last_replay_transno;
         spin_unlock_irqrestore(&imp->imp_lock, flags);
@@ -314,6 +312,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active)
         struct obd_device *obd = imp->imp_obd;
         int rc = 0;
 
+        ENTRY;
         LASSERT(obd);
 
         /* When deactivating, mark import invalid, and abort in-flight
@@ -390,7 +389,8 @@ static int ptlrpc_recover_import_no_retry(struct obd_import *imp,
         CDEBUG(D_HA, "%s: recovery started, waiting\n",
                obd2cli_tgt(imp->imp_obd));
 
-        lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL);
+        lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), 
+                          NULL, NULL);
         rc = l_wait_event(imp->imp_recovery_waitq,
                           !ptlrpc_import_in_recovery(imp), &lwi);
         CDEBUG(D_HA, "%s: recovery finished\n",
index 9b3fb47..d0fcb3a 100644 (file)
 #include <liblustre.h>
 #include <libcfs/kp30.h>
 #endif
-#include <linux/obd_support.h>
-#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <obd_support.h>
+#include <obd_class.h>
+#include <lustre_net.h>
+#include <lu_object.h>
 #include <lnet/types.h>
 #include "ptlrpc_internal.h"
 
-#include <linux/lu_object.h>
-
 /* forward ref */
 static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc);
 
-static LIST_HEAD (ptlrpc_all_services);
-static spinlock_t ptlrpc_all_services_lock = SPIN_LOCK_UNLOCKED;
+static CFS_LIST_HEAD (ptlrpc_all_services);
+spinlock_t ptlrpc_all_services_lock;
 
 static char *
 ptlrpc_alloc_request_buffer (int size)
@@ -78,7 +77,7 @@ ptlrpc_alloc_rqbd (struct ptlrpc_service *svc)
         rqbd->rqbd_refcount = 0;
         rqbd->rqbd_cbid.cbid_fn = request_in_callback;
         rqbd->rqbd_cbid.cbid_arg = rqbd;
-        INIT_LIST_HEAD(&rqbd->rqbd_reqs);
+        CFS_INIT_LIST_HEAD(&rqbd->rqbd_reqs);
         rqbd->rqbd_buffer = ptlrpc_alloc_request_buffer(svc->srv_buf_size);
 
         if (rqbd->rqbd_buffer == NULL) {
@@ -170,7 +169,7 @@ ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs)
         rs->rs_scheduled = 1;
         list_del (&rs->rs_list);
         list_add (&rs->rs_list, &svc->srv_reply_queue);
-        wake_up (&svc->srv_waitq);
+        cfs_waitq_signal (&svc->srv_waitq);
 }
 
 void
@@ -205,13 +204,6 @@ ptlrpc_commit_replies (struct obd_device *obd)
         spin_unlock_irqrestore (&obd->obd_uncommitted_replies_lock, flags);
 }
 
-static long
-timeval_sub(struct timeval *large, struct timeval *small)
-{
-        return (large->tv_sec - small->tv_sec) * 1000000 +
-                (large->tv_usec - small->tv_usec);
-}
-
 static int
 ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc)
 {
@@ -283,7 +275,7 @@ struct ptlrpc_service *
 ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
                 int req_portal, int rep_portal, int watchdog_timeout,
                 svc_handler_t handler, char *name,
-                struct proc_dir_entry *proc_entry,
+                cfs_proc_dir_entry_t *proc_entry,
                 svcreq_printfn_t svcreq_printfn, int num_threads)
 {
         int                    rc;
@@ -301,8 +293,8 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
 
         service->srv_name = name;
         spin_lock_init(&service->srv_lock);
-        INIT_LIST_HEAD(&service->srv_threads);
-        init_waitqueue_head(&service->srv_waitq);
+        CFS_INIT_LIST_HEAD(&service->srv_threads);
+        cfs_waitq_init(&service->srv_waitq);
 
         service->srv_nbuf_per_group = nbufs;
         service->srv_max_req_size = max_req_size;
@@ -316,15 +308,15 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size,
         service->srv_request_max_cull_seq = 0;
         service->srv_num_threads = num_threads;
 
-        INIT_LIST_HEAD(&service->srv_request_queue);
-        INIT_LIST_HEAD(&service->srv_idle_rqbds);
-        INIT_LIST_HEAD(&service->srv_active_rqbds);
-        INIT_LIST_HEAD(&service->srv_history_rqbds);
-        INIT_LIST_HEAD(&service->srv_request_history);
-        INIT_LIST_HEAD(&service->srv_active_replies);
-        INIT_LIST_HEAD(&service->srv_reply_queue);
-        INIT_LIST_HEAD(&service->srv_free_rs_list);
-        init_waitqueue_head(&service->srv_free_rs_waitq);
+        CFS_INIT_LIST_HEAD(&service->srv_request_queue);
+        CFS_INIT_LIST_HEAD(&service->srv_idle_rqbds);
+        CFS_INIT_LIST_HEAD(&service->srv_active_rqbds);
+        CFS_INIT_LIST_HEAD(&service->srv_history_rqbds);
+        CFS_INIT_LIST_HEAD(&service->srv_request_history);
+        CFS_INIT_LIST_HEAD(&service->srv_active_replies);
+        CFS_INIT_LIST_HEAD(&service->srv_reply_queue);
+        CFS_INIT_LIST_HEAD(&service->srv_free_rs_list);
+        cfs_waitq_init(&service->srv_free_rs_waitq);
 
         spin_lock (&ptlrpc_all_services_lock);
         list_add (&service->srv_list, &ptlrpc_all_services);
@@ -559,7 +551,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
         spin_unlock_irqrestore (&svc->srv_lock, flags);
 
         do_gettimeofday(&work_start);
-        timediff = timeval_sub(&work_start, &request->rq_arrival_time);
+        timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,NULL);
         if (svc->srv_stats != NULL) {
                 lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR,
                                     timediff);
@@ -631,7 +623,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
         request->rq_phase = RQ_PHASE_INTERPRET;
 
         CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc "
-               "%s:%s+%d:%d:"LPU64":%s:%d\n", current->comm,
+               "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                (request->rq_export ?
                 (char *)request->rq_export->exp_client_uuid.uuid : "0"),
                (request->rq_export ?
@@ -645,7 +637,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc,
         request->rq_phase = RQ_PHASE_COMPLETE;
 
         CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc "
-               "%s:%s+%d:%d:"LPU64":%s:%d\n", current->comm,
+               "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(),
                (request->rq_export ?
                 (char *)request->rq_export->exp_client_uuid.uuid : "0"),
                (request->rq_export ?
@@ -661,15 +653,15 @@ put_conn:
  out:
         do_gettimeofday(&work_end);
 
-        timediff = timeval_sub(&work_end, &work_start);
+        timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
 
         if (timediff / 1000000 > (long)obd_timeout)
                 CERROR("request "LPU64" opc %u from %s processed in %lds "
                        "trans "LPU64" rc %d/%d\n",
                        request->rq_xid, request->rq_reqmsg->opc,
                        libcfs_id2str(request->rq_peer),
-                       timeval_sub(&work_end,
-                                   &request->rq_arrival_time) / 1000000,
+                       cfs_timeval_sub(&work_end, &request->rq_arrival_time,
+                                       NULL) / 1000000,
                        request->rq_repmsg ? request->rq_repmsg->transno :
                        request->rq_transno, request->rq_status,
                        request->rq_repmsg ? request->rq_repmsg->status : -999);
@@ -678,7 +670,8 @@ put_conn:
                        "%ldus (%ldus total) trans "LPU64" rc %d/%d\n",
                        request->rq_xid, request->rq_reqmsg->opc,
                        libcfs_id2str(request->rq_peer), timediff,
-                       timeval_sub(&work_end, &request->rq_arrival_time),
+                       cfs_timeval_sub(&work_end, &request->rq_arrival_time,
+                                       NULL),
                        request->rq_transno, request->rq_status,
                        request->rq_repmsg ? request->rq_repmsg->status : -999);
 
@@ -841,9 +834,10 @@ void ptlrpc_daemonize(char *name)
         struct fs_struct *fs = current->fs;
 
         atomic_inc(&fs->count);
-        libcfs_daemonize(name);
-        exit_fs(current);
+        cfs_daemonize(name);
+        exit_fs(cfs_current());
         current->fs = fs;
+        set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd);
 }
 
 static void
@@ -884,7 +878,7 @@ static int ptlrpc_main(void *arg)
         struct ptlrpc_reply_state *rs;
         struct lc_watchdog     *watchdog;
         unsigned long           flags;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+#ifdef WITH_GROUP_INFO
         struct group_info *ginfo = NULL;
 #endif
         struct lu_context ctx;
@@ -906,11 +900,11 @@ static int ptlrpc_main(void *arg)
                                 break;
                         num_cpu++;
                 }
-                set_cpus_allowed(current, node_to_cpumask(cpu_to_node(cpu)));
+                set_cpus_allowed(cfs_current(), node_to_cpumask(cpu_to_node(cpu)));
         }
 #endif
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+#ifdef WITH_GROUP_INFO
         ginfo = groups_alloc(0);
         if (!ginfo) {
                 rc = -ENOMEM;
@@ -935,7 +929,7 @@ static int ptlrpc_main(void *arg)
         ctx.lc_thread = thread;
 
         /* Alloc reply state structure for this one */
-        OBD_ALLOC_GFP(rs, svc->srv_max_reply_size, GFP_KERNEL);
+        OBD_ALLOC_GFP(rs, svc->srv_max_reply_size, CFS_ALLOC_STD);
         if (!rs) {
                 rc = -ENOMEM;
                 goto out_srv_init;
@@ -947,7 +941,7 @@ static int ptlrpc_main(void *arg)
          * wake up our creator. Note: @data is invalid after this point,
          * because it's allocated on ptlrpc_start_thread() stack.
          */
-        wake_up(&thread->t_ctl_waitq);
+        cfs_waitq_signal(&thread->t_ctl_waitq);
 
         watchdog = lc_watchdog_add(svc->srv_watchdog_timeout,
                                    LC_WATCHDOG_DEFAULT_CB, NULL);
@@ -956,7 +950,7 @@ static int ptlrpc_main(void *arg)
         svc->srv_nthreads++;
         list_add(&rs->rs_list, &svc->srv_free_rs_list);
         spin_unlock_irqrestore(&svc->srv_lock, flags);
-        wake_up(&svc->srv_free_rs_waitq);
+        cfs_waitq_signal(&svc->srv_free_rs_waitq);
 
         CDEBUG(D_NET, "service thread %d started\n", thread->t_id);
 
@@ -1002,7 +996,7 @@ static int ptlrpc_main(void *arg)
                         /* I just failed to repost request buffers.  Wait
                          * for a timeout (unless something else happens)
                          * before I try again */
-                        svc->srv_rqbd_timeout = HZ/10;
+                        svc->srv_rqbd_timeout = cfs_time_seconds(1)/10;
                 }
         }
 
@@ -1025,7 +1019,7 @@ out:
         thread->t_id = rc;
         thread->t_flags = SVC_STOPPED;
 
-        wake_up(&thread->t_ctl_waitq);
+        cfs_waitq_signal(&thread->t_ctl_waitq);
         spin_unlock_irqrestore(&svc->srv_lock, flags);
 
         return rc;
@@ -1041,7 +1035,7 @@ static void ptlrpc_stop_thread(struct ptlrpc_service *svc,
         thread->t_flags = SVC_STOPPING;
         spin_unlock_irqrestore(&svc->srv_lock, flags);
 
-        wake_up_all(&svc->srv_waitq);
+        cfs_waitq_broadcast(&svc->srv_waitq);
         l_wait_event(thread->t_ctl_waitq, (thread->t_flags & SVC_STOPPED),
                      &lwi);
 
@@ -1104,7 +1098,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
         OBD_ALLOC(thread, sizeof(*thread));
         if (thread == NULL)
                 RETURN(-ENOMEM);
-        init_waitqueue_head(&thread->t_ctl_waitq);
+        cfs_waitq_init(&thread->t_ctl_waitq);
         thread->t_id = id;
 
         spin_lock_irqsave(&svc->srv_lock, flags);
@@ -1119,7 +1113,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
         /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
          * just drop the VM and FILES in ptlrpc_daemonize() right away.
          */
-        rc = kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES);
+        rc = cfs_kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES);
         if (rc < 0) {
                 CERROR("cannot start thread '%s': rc %d\n", name, rc);
 
@@ -1184,7 +1178,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
 
                 /* Network access will complete in finite time but the HUGE
                  * timeout lets us CWARN for visibility of sluggish NALs */
-                lwi = LWI_TIMEOUT(300 * HZ, NULL, NULL);
+                lwi = LWI_TIMEOUT(cfs_time_seconds(300), NULL, NULL);
                 rc = l_wait_event(service->srv_waitq,
                                   service->srv_nrqbd_receiving == 0,
                                   &lwi);
@@ -1237,7 +1231,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
         /* wait for all outstanding replies to complete (they were
          * scheduled having been flagged to abort above) */
         while (atomic_read(&service->srv_outstanding_replies) != 0) {
-                struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL);
+                struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL);
 
                 rc = l_wait_event(service->srv_waitq,
                                   !list_empty(&service->srv_reply_queue), &lwi);
@@ -1284,7 +1278,7 @@ int ptlrpc_service_health_check(struct ptlrpc_service *svc)
                              struct ptlrpc_request, rq_list);
 
         do_gettimeofday(&right_now);
-        timediff = timeval_sub(&right_now, &request->rq_arrival_time);
+        timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL);
 
         cutoff = obd_health_check_timeout;
 
index 9fd7910..6086088 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_cfg.h>
-#include <linux/obd_ost.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_quota.h>
+#include <obd_class.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_cfg.h>
+#include <obd_ost.h>
+#include <lustre_fsfilt.h>
+#include <lustre_quota.h>
 #include "quota_internal.h"
 
 #ifdef __KERNEL__
@@ -201,10 +201,11 @@ int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk)
                 rc = -EINTR;
 
         qchk->obd_uuid = cli->cl_target_uuid;
+        /* FIXME change strncmp to strcmp and save the strlen op */
         if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME,
             strlen(LUSTRE_OSC_NAME)))
-                memcpy(qchk->obd_type, LUSTRE_FILTER_NAME,
-                       strlen(LUSTRE_FILTER_NAME));
+                memcpy(qchk->obd_type, LUSTRE_OST_NAME,
+                       strlen(LUSTRE_OST_NAME));
         else if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME,
                  strlen(LUSTRE_MDC_NAME)))
                 memcpy(qchk->obd_type, LUSTRE_MDS_NAME,
index 3aab55c..00b97db 100644 (file)
@@ -26,9 +26,9 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_quota.h>
-#include <linux/lustre_fsfilt.h>
+#include <obd_class.h>
+#include <lustre_quota.h>
+#include <lustre_fsfilt.h>
 #include "quota_internal.h"
 
 unsigned long default_bunit_sz = 100 * 1024 * 1024;       /* 100M bytes */
@@ -62,7 +62,7 @@ void qunit_cache_cleanup(void)
         if (qunit_cachep) {
                 int rc;
                 rc = kmem_cache_destroy(qunit_cachep);
-                LASSERT(rc == 0);
+                LASSERTF(rc == 0, "couldn't destory qunit_cache slab\n");
                 qunit_cachep = NULL;
         }
         EXIT;
@@ -269,7 +269,7 @@ static void remove_qunit_nolock(struct lustre_qunit *qunit)
 
 struct qunit_waiter {
         struct list_head qw_entry;
-        wait_queue_head_t qw_waitq;
+        cfs_waitq_t      qw_waitq;
         int qw_rc;
 };
 
index bd78c00..a8c4317 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_cfg.h>
-#include <linux/obd_ost.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_quota.h>
+#include <obd_class.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_cfg.h>
+#include <obd_ost.h>
+#include <lustre_fsfilt.h>
+#include <lustre_quota.h>
 #include "quota_internal.h"
 
 #ifdef __KERNEL__
@@ -91,6 +91,7 @@ int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
 int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
 {
         struct obd_device *obd = exp->exp_obd;
+        struct obd_device_target *obt = &obd->u.obt;
         struct lvfs_run_ctxt saved;
         int rc = 0;
         ENTRY;
@@ -98,6 +99,12 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
         switch (oqctl->qc_cmd) {
         case Q_QUOTAON:
         case Q_QUOTAOFF:
+                if (!atomic_dec_and_test(&obt->obt_quotachecking)) {
+                        CDEBUG(D_INFO, "other people are doing quotacheck\n");
+                        atomic_inc(&obt->obt_quotachecking);
+                        rc = -EBUSY;
+                        break;
+                }
         case Q_GETOINFO:
         case Q_GETOQUOTA:
         case Q_GETQUOTA:
@@ -113,6 +120,9 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
                 rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl);
                 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
+                if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF)
+                        atomic_inc(&obt->obt_quotachecking);
                 break;
         case Q_INITQUOTA:
                 {
index 5edd982..35a7f4a 100644 (file)
 # include <liblustre.h>
 #endif
 
-#include <linux/obd_class.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_cfg.h>
-#include <linux/obd_ost.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_quota.h>
+#include <obd_class.h>
+#include <lustre_mds.h>
+#include <lustre_dlm.h>
+#include <lustre_cfg.h>
+#include <obd_ost.h>
+#include <lustre_fsfilt.h>
+#include <lustre_quota.h>
 #include "quota_internal.h"
 
 
@@ -410,7 +410,7 @@ spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED;
 
 static struct list_head qinfo_hash[NR_DQHASH];
 /* SLAB cache for client quota context */
-kmem_cache_t *qinfo_cachep = NULL;
+cfs_mem_cache_t *qinfo_cachep = NULL;
 
 static inline int const hashfn(struct client_obd *cli,
                                unsigned long id,
@@ -460,7 +460,7 @@ static struct osc_quota_info *alloc_qinfo(struct client_obd *cli,
         struct osc_quota_info *oqi;
         ENTRY;
 
-        OBD_SLAB_ALLOC(oqi, qinfo_cachep, SLAB_KERNEL, sizeof(*oqi));
+        OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi));
         if(!oqi)
                 RETURN(NULL);
 
@@ -574,9 +574,9 @@ int osc_quota_init(void)
         ENTRY;
 
         LASSERT(qinfo_cachep == NULL);
-        qinfo_cachep = kmem_cache_create("osc_quota_info",
+        qinfo_cachep = cfs_mem_cache_create("osc_quota_info",
                                          sizeof(struct osc_quota_info),
-                                         0, 0, NULL, NULL);
+                                         0, 0);
         if (!qinfo_cachep)
                 RETURN(-ENOMEM);
 
@@ -601,8 +601,10 @@ int osc_quota_exit(void)
         }
         spin_unlock(&qinfo_list_lock);
 
-        rc = kmem_cache_destroy(qinfo_cachep);
-        LASSERT(rc == 0);
+        rc = cfs_mem_cache_destroy(qinfo_cachep);
+        LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n");
+        qinfo_cachep = NULL;
+
         RETURN(0);
 }
 
index 0389734..6e8438e 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef __QUOTA_INTERNAL_H
 #define __QUOTA_INTERNAL_H
 
-#include <linux/lustre_quota.h>
+#include <lustre_quota.h>
 
 /* QUSG covnert bytes to blocks when counting block quota */
 #define QUSG(count, isblk)      (isblk ? toqb(count) : count)
index 9eb3cf3..7332669 100644 (file)
 #include <linux/init.h>
 #include <linux/quota.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_quota.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_mds.h>
+#include <obd_class.h>
+#include <lustre_quota.h>
+#include <lustre_fsfilt.h>
+#include <lustre_mds.h>
 
 #include "quota_internal.h"
 
@@ -71,7 +71,7 @@ void lustre_dquot_exit(void)
         if (lustre_dquot_cachep) {
                 int rc;
                 rc = kmem_cache_destroy(lustre_dquot_cachep);
-                LASSERT(rc == 0);
+                LASSERTF(rc == 0,"couldn't destroy lustre_dquot_cachep slab\n");
                 lustre_dquot_cachep = NULL;
         }
         EXIT;
@@ -528,10 +528,17 @@ static int mds_admin_quota_off(struct obd_device *obd,
 int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
 {
         struct mds_obd *mds = &obd->u.mds;
+        struct obd_device_target *obt = &obd->u.obt;
         struct lvfs_run_ctxt saved;
         int rc;
         ENTRY;
 
+        if (!atomic_dec_and_test(&obt->obt_quotachecking)) {
+                CDEBUG(D_INFO, "other people are doing quotacheck\n");
+                atomic_inc(&obt->obt_quotachecking);
+                RETURN(-EBUSY);
+        }
+
         down(&mds->mds_qonoff_sem);
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         rc = mds_admin_quota_on(obd, oqctl);
@@ -546,16 +553,24 @@ int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl)
 out:
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         up(&mds->mds_qonoff_sem);
+        atomic_inc(&obt->obt_quotachecking);
         RETURN(rc);
 }
 
 int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl)
 {
         struct mds_obd *mds = &obd->u.mds;
+        struct obd_device_target *obt = &obd->u.obt;
         struct lvfs_run_ctxt saved;
         int rc, rc2;
         ENTRY;
 
+        if (!atomic_dec_and_test(&obt->obt_quotachecking)) {
+                CDEBUG(D_INFO, "other people are doing quotacheck\n");
+                atomic_inc(&obt->obt_quotachecking);
+                RETURN(-EBUSY);
+        }
+
         down(&mds->mds_qonoff_sem);
         /* close admin quota files */
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
@@ -566,6 +581,8 @@ int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl)
 
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         up(&mds->mds_qonoff_sem);
+        atomic_inc(&obt->obt_quotachecking);
+
         RETURN(rc ?: rc2);
 }
 
@@ -1068,13 +1085,13 @@ int mds_quota_recovery(struct obd_device *obd)
         int rc = 0;
         ENTRY;
 
-        down(&lov->lov_lock);
+        mutex_down(&lov->lov_lock);
         if (lov->desc.ld_tgt_count != lov->desc.ld_active_tgt_count) {
                 CWARN("Not all osts are active, abort quota recovery\n");
-                up(&lov->lov_lock);
+                mutex_up(&lov->lov_lock);
                 RETURN(rc);
         }
-        up(&lov->lov_lock);
+        mutex_up(&lov->lov_lock);
 
         data.obd = obd;
         init_completion(&data.comp);
index 6d49bcd..c2c2040 100644 (file)
 #include <linux/version.h>
 #include <linux/bitops.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_mds.h>
-#include <linux/obd_ost.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_mds.h>
+#include <obd_ost.h>
 
 char *test_quotafile[] = {"aquotacheck.user", "aquotacheck.group"};
 
index 11c86b4..cffb646 100644 (file)
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include <linux/obd_class.h>
-#include <linux/lustre_fsfilt.h>
-#include <linux/lustre_mds.h>
-#include <linux/obd_ost.h>
+#include <obd_class.h>
+#include <lustre_fsfilt.h>
+#include <lustre_mds.h>
+#include <obd_ost.h>
 
 static struct obd_quotactl oqctl;
 
index 89edc5b..3b6b640 100755 (executable)
@@ -46,25 +46,28 @@ LOCK=/var/lock/subsys/$SERVICE
 
 # Source function library.
 if [ -f /etc/init.d/functions ] ; then
-   . /etc/init.d/functions
+       . /etc/init.d/functions
 fi
 
 # Source networking configuration.
 if [ -f /etc/sysconfig/network ] ; then
-   . /etc/sysconfig/network
+       . /etc/sysconfig/network
 fi
 
 check_start_stop() {
-       # Check that networking is up.
-       [ "${NETWORKING}" = "no" ] && exit 0
+       # Exit codes now LSB compliant
+       # Check that networking is up. - exit 'not running'
+       [ "${NETWORKING}" = "no" ] && exit 7 
 
-       [ -x ${LCONF} -a -x ${LCTL} ] || exit 0
+       # exit 'not installed' 
+       [ -x ${LCONF} -a -x ${LCTL} ] || exit 5
 
        if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then
-                if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
-                    echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
-                    exit 0
-                fi
+                       if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then
+                       echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping."
+                       # exit 'not configured'
+                       exit 6
+               fi
        fi
 
        # Create /var/lustre directory 
@@ -77,7 +80,7 @@ check_start_stop() {
 
 start() {
        if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then
-           if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
+               if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then
                cat >&2 <<EOF
 This script was run directly, which can be dangerous if you are using
 clumanager to manage Lustre services.
@@ -87,7 +90,7 @@ command to have this script start Lustre instead:
 
 touch /etc/lustre/start-despite-clumanager
 EOF
-               RETVAL=1
+               RETVAL=6  # program not configured
                return
            fi
        fi
@@ -95,7 +98,7 @@ EOF
        echo -n "Starting $SERVICE: "
        if [ $UID -ne 0 ]; then
                echo "Lustre should be started as root"
-               RETVAL=1
+               RETVAL=4 # insufficent privileges
                return
        fi
        ${LCONF} ${LCONF_START_ARGS}
@@ -114,7 +117,7 @@ stop() {
        echo -n "Shutting down $SERVICE: "
        if [ $UID -ne 0 ]; then
                echo "Lustre should be stopped as root"
-               RETVAL=1
+               RETVAL=4 # insufficent privileges
                return
        fi
        ${LCONF} ${LCONF_STOP_ARGS}
@@ -135,14 +138,19 @@ restart() {
 
 status() {
        STATE="stopped"
-       RETVAL=1
+       # LSB compliance - return 3 if service is not running
+       # Lustre-specific returns
+       # 150 - partial startup
+       # 151 - health_check unhealthy
+       # 152 - LBUG
+       RETVAL=3
        egrep -q "libcfs|lvfs|portals" /proc/modules && STATE="loaded"
 
        # check for any routes - on a portals router this is the only thing
        [ "`cat /proc/sys/lnet/routes 2> /dev/null`" ] && STATE="running" && RETVAL=0
        
        # check for any configured devices (may indicate partial startup)
-       [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=1
+       [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150
 
        # check for either a server or a client filesystem
        MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`"
@@ -159,14 +167,17 @@ status() {
 
        # check for error in health_check
        HEALTH="/proc/fs/lustre/health_check"
-       [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=2
+       [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151
 
        # check for LBUG
-       [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=3
+       [ -f  "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152
 
-       # Check if the service really exists
-       DUMMY=`lctl dl | grep $SERVICE`
-       [ $? -ne 0 ] && STATE="not_found" && RETVAL=5
+       # If Lustre is up , check if the service really exists
+        # Skip this is we are not checking a specific service
+       if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then
+               DUMMY=`lctl dl | grep $SERVICE`
+               [ $? -ne 0 ] && STATE="not_found" && RETVAL=3
+       fi
 
        echo $STATE
 }
index 3a921be..ca7258e 100644 (file)
@@ -43,7 +43,7 @@ MOUNTOPT=""
     MOUNTOPT=$MOUNTOPT" --param default_stripe_size=$STRIPE_BYTES"
 [ "x$STRIPES_PER_OBJ" != "x" ] &&
     MOUNTOPT=$MOUNTOPT" --param default_stripe_count=$STRIPES_PER_OBJ"
-MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE $MKFSOPT $MOUNTOPT $MDSOPT"
+MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE --param obd_timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT"
 
 MKFSOPT=""
 MOUNTOPT=""
@@ -53,8 +53,8 @@ MOUNTOPT=""
     MKFSOPT="--mkfsoptions=\"$MKFSOPT\""
 [ "x$ostfailover_HOST" != "x" ] &&
     MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`"
-OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT"
-OST2_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT"
+OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID --param obd_timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT"
+OST2_MKFS_OPTS=${OST2_MKFS_OPTS:-${OST_MKFS_OPTS}}
 
 MDS_MOUNT_OPTS="-o loop"
 OST_MOUNT_OPTS="-o loop"
index 711aa35..23ea6ac 100644 (file)
@@ -212,11 +212,22 @@ test_5() {
        # if all the modules have unloaded.
        umount -d $MOUNT &
        UMOUNT_PID=$!
-       sleep 2
+       sleep 6
        echo "killing umount"
        kill -TERM $UMOUNT_PID
        echo "waiting for umount to finish"
        wait $UMOUNT_PID
+       if grep " $MOUNT " /etc/mtab; then
+               echo "test 5: mtab after failed umount"
+               umount $MOUNT &
+               UMOUNT_PID=$!
+               sleep 2
+               echo "killing umount"
+               kill -TERM $UMOUNT_PID
+               echo "waiting for umount to finish"
+               wait $UMOUNT_PID
+               grep " $MOUNT " /etc/mtab && echo "test 5: mtab after second umount" && return 11
+       fi
 
        manual_umount_client
        # stop_mds is a no-op here, and should not fail
@@ -232,8 +243,9 @@ run_test 5 "force cleanup mds, then cleanup"
 test_5b() {
        start_ost
        [ -d $MOUNT ] || mkdir -p $MOUNT
+       grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before mount" && return 10
        mount_client $MOUNT && return 1
-
+       grep " $MOUNT " /etc/mtab && echo "test 5b: mtab after failed mount" && return 11
        umount_client $MOUNT    
        # stop_mds is a no-op here, and should not fail
        cleanup_nocli || return $?
@@ -245,8 +257,9 @@ test_5c() {
        start_ost
        start_mds
        [ -d $MOUNT ] || mkdir -p $MOUNT
-       # Bad nid might still work if mgs is on 0@lo
-       mount -t lustre 1.2.3.4@tcp:/wrong.$FSNAME $MOUNT || :
+       grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10
+       mount -t lustre `facet_nid mgs`:/wrong.$FSNAME $MOUNT || :
+       grep " $MOUNT " /etc/mtab && echo "test 5c: mtab after failed mount" && return 11
        umount_client $MOUNT
        cleanup_nocli  || return $?
 }
@@ -256,18 +269,27 @@ test_5d() {
        start_ost
        start_mds
        stop_ost -f
+       grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before mount" && return 10
        mount_client $MOUNT || return 1
        cleanup  || return $?
+       grep " $MOUNT " /etc/mtab && echo "test 5d: mtab after unmount" && return 11
+       return 0
 }
 run_test 5d "mount with ost down"
 
 test_5e() {
        start_ost
        start_mds
+        # give MDS a chance to connect to OSTs (bz 10476)
+       sleep 5 
+
 #define OBD_FAIL_PTLRPC_DELAY_SEND       0x506
        do_facet client "sysctl -w lustre.fail_loc=0x80000506"
+       grep " $MOUNT " /etc/mtab && echo "test 5e: mtab before mount" && return 10
        mount_client $MOUNT || echo "mount failed (not fatal)"
        cleanup  || return $?
+       grep " $MOUNT " /etc/mtab && echo "test 5e: mtab after unmount" && return 11
+       return 0
 }
 run_test 5e "delayed connect, don't crash (bug 10268)"
 
@@ -772,6 +794,7 @@ test_21() {
        echo Client mount with a running ost
        start_ost
        mount_client $MOUNT
+       sleep 5 #bz10476
        check_mount || return 41
        pass
 
index 933c988..fb9c99b 100644 (file)
@@ -53,8 +53,8 @@ int main(int argc, char **argv)
                 return 1;
         }
 
-        if (argc == 6)
-                st.st_blksize = strtoul(argv[4], 0, 0);
+        if (argc >= 6)
+                st.st_blksize = strtoul(argv[5], 0, 0);
         else if (fstat64(fd, &st) < 0) {
                 printf("Cannot stat %s:  %s\n", argv[1], strerror(errno));
                 return 1;
index 7cb8212..aa1ed8c 100644 (file)
 #include <dirent.h>
 
 #include <liblustre.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
+#include <obd.h>
+#include <lustre_lib.h>
 #include <lustre/lustre_user.h>
-#include <linux/obd_lov.h>
+#include <obd_lov.h>
 
 #include <lnet/lnetctl.h>
 
index 425a26d..0d71f75 100755 (executable)
 #init_test_env $@
 
 mcstopall() {
+    # make sure we are using the primary server, so test-framework will
+    # be able to clean up properly.
+    activemds=`facet_active mds`
+    if [ $activemds != "mds" ]; then
+        fail mds
+    fi
+
     grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT $*
     stop ost -f
     stop ost2 -f
index 8250f96..9335eda 100644 (file)
@@ -100,10 +100,14 @@ int main(int argc, char **argv)
                 exit(1);
         }
 
+#if 0
+        /* We cannot do this any longer, we do not store open special nodes
+         * on MDS after unlink */
         if (st1.st_mode != st2.st_mode) {  // can we do this?
                 fprintf(stderr, "fstat different value on %s and %s\n",                                 dname1, dname2);
                 exit(1);
         }
+#endif
 
         fprintf(stderr, "Ok, everything goes well.\n");
         return 0;
diff --git a/lustre/tests/qos.sh b/lustre/tests/qos.sh
new file mode 100644 (file)
index 0000000..572bef0
--- /dev/null
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+set -e
+
+export PATH=`dirname $0`/../utils:$PATH
+
+LFS=${LFS:-lfs}
+LCTL=${LCTL:-lctl}
+MOUNT=${MOUNT:-/mnt/lustre}
+MAXAGE=${MAXAGE:-1}
+
+QOSFILE=$MOUNT/qos_file
+TAB='--'
+
+echo "remove all files on $MOUNT..."
+rm -fr $MOUNT/*
+sleep 1                # to ensure we get up-to-date statfs info
+
+set_qos() {
+       for i in `ls /proc/fs/lustre/lov/*/qos_threshold`; do
+               echo $(($1/1024)) > $i 
+       done
+       for i in `ls /proc/fs/lustre/lov/*/qos_maxage`; do
+               echo $2 > $i
+       done
+}
+
+# assume all osts has same free space 
+OSTCOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1`
+TOTALAVAIL=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1`
+SINGLEAVAIL=$(($TOTALAVAIL/$OSTCOUNT))
+MINFREE=$((1024 * 4))  # 4M
+TOTALFFREE=`cat /proc/fs/lustre/llite/*/filesfree | head -n 1`
+
+if [ $SINGLEAVAIL -lt $MINFREE ]; then
+       echo "ERROR: single ost free size($SINGLEAVAIL kb) is too low!"
+       exit 1;
+fi
+if [ $OSTCOUNT -lt 3 ]; then
+       echo "WARN: ost count($OSTCOUNT) must be greater than 2!"
+       exit 0;
+fi
+
+qos_test_1() {
+       echo "[qos test 1]: creation skip almost full OST (avail space < threshold)"
+
+       # set qos_threshold as half ost size
+       THRESHOLD=$(($SINGLEAVAIL/2))
+       set_qos $THRESHOLD $MAXAGE
+
+       # set stripe number to 1
+       $LFS setstripe $QOSFILE 65536 -1 1
+       FULLOST=`$LFS find -q $QOSFILE | awk '/\s*\d*/ {print $1}'`
+       
+       # floodfill the FULLOST
+       echo "$TAB fill the OST $FULLOST to almost fullness..."
+       dd if=/dev/zero of=$QOSFILE count=$(($SINGLEAVAIL - $THRESHOLD + 1500)) bs=1k > /dev/null 2>&1 || return 1
+       echo "$TAB done"
+       
+       sleep $(($MAXAGE * 2))
+       echo "$TAB create 10 files with 1 stripe"
+       for i in `seq 10`; do
+               rm -f $MOUNT/file-$i
+               $LFS setstripe $MOUNT/file-$i 65536 -1 1
+               idx=`$LFS find -q $MOUNT/file-$i | awk '/\s*\d*/ {print $1}'`
+               if [ $idx -eq $FULLOST ]; then
+                       echo "$TAB ERROR: create object on full OST $FULLOST"
+                       return 1
+               fi
+       done
+       echo "$TAB no object created on OST $FULLOST"
+
+       # cleanup
+       for i in `seq 10`; do
+               rm -f $MOUNT/file-$i
+       done
+       rm -f $QOSFILE
+       # set threshold and maxage to normal value
+       set_qos 10240 1
+       
+       sleep 1
+       return 0
+}
+
+qos_test_2 () {
+       echo "[qos test 2]: creation balancing over all OSTs by free space"
+
+       if [ $OSTCOUNT -lt 3 ]; then
+               echo "$TAB WARN: OST count < 3, test skipped"
+               return 0
+       fi
+       
+       WADSZ=$(($SINGLEAVAIL * 3 / 4))
+       TOTALSZ=$(($WADSZ * $OSTCOUNT - 1))
+
+       # fill all OST 0 to 3/4 fulness
+       $LFS setstripe $QOSFILE 65536 0 1
+       echo "$TAB fill the OST 0 to 3/4 fulness..."
+       dd if=/dev/zero of=$QOSFILE count=$WADSZ bs=1k > /dev/null 2>&1 || return 1
+       echo "$TAB done"
+
+       # write 2 stripe files to fill up other OSTs
+       LOOPCNT=500
+       echo "$TAB create $LOOPCNT files with 2 stripe..."
+       for i in `seq $LOOPCNT`; do
+               rm -f $MOUNT/file-$i
+               $LFS setstripe $MOUNT/file-$i 65536 -1 2
+       done
+       echo "$TAB done"
+
+       # the objects created on OST 0 should be 1/4 of on other OSTs'
+       CNT0=`$LFS find -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c 0`
+       CNT0=$(($CNT0 - 1))
+       echo "$TAB object created on OST 0: $CNT0"
+       
+       # the object count of other osts must be greater than 2 times 
+       CNT0=$(($CNT0 * 2))
+       for i in `seq $(($OSTCOUNT - 1))`; do
+               CNT=`$LFS find -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c $i`
+               echo "$TAB object created on OST $i: $CNT"
+               if [ $CNT0 -gt $CNT ] ; then
+                       echo "$TAB ERROR: too much objects created on OST 0"
+                       return 1
+               fi
+       done
+       echo "$TAB objects created on OST 0 is about 1/4 of others'"
+       
+       # cleanup
+       for i in `seq $LOOPCNT`; do
+               rm -f $MOUNT/file-$i
+       done
+       rm -f $QOSFILE
+       return 0
+}
+       
+
+# run tests
+for j in `seq 2`; do
+       qos_test_$j
+       [ $? -ne 0 ] && exit 1 
+done
+exit 0
index 9702681..36e90f3 100755 (executable)
@@ -7,8 +7,7 @@ ALWAYS_EXCEPT="20b  24   27 $RECOVERY_SMALL_EXCEPT"
 
 # Tests that always fail with mountconf -- FIXME
 # 16 fails with 1, not evicted
-# 18a,b there is still data in page cache
-EXCEPT="$EXCEPT 16 18a 18b"
+EXCEPT="$EXCEPT 16"
 
 
 LUSTRE=${LUSTRE:-`dirname $0`/..}
@@ -244,13 +243,13 @@ test_18a() {
 
     do_facet client cp /etc/termcap $f
     sync
-    local osc2_dev=`awk '(/OST0001-osc-/){print $4}' $LPROC/devices`
-    $LCTL --device %$osc2_dev deactivate
+    local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'`
+    $LCTL --device $osc2dev deactivate || return 3
     # my understanding is that there should be nothing in the page
     # cache after the client reconnects?     
     rc=0
     pgcache_empty || rc=2
-    $LCTL --device %$osc2_dev activate
+    $LCTL --device $osc2dev activate
     rm -f $f
     return $rc
 }
@@ -368,7 +367,7 @@ test_24() { # bug 2248 - eviction fails writeback but app doesn't see it
 }
 run_test 24 "fsync error (should return error)"
 
-test_26() {      # bug 5921 - evict dead exports 
+test_26() {      # bug 5921 - evict dead exports by pinger
 # this test can only run from a client on a separate node.
        [ "`lsmod | grep obdfilter`" ] && \
            echo "skipping test 26 (local OST)" && return
index cae66d0..513766c 100755 (executable)
@@ -13,6 +13,7 @@ LUSTRE=${LUSTRE:-`dirname $0`/..}
 init_test_env $@
 
 . ${CONFIG:=$LUSTRE/tests/cfg/local.sh}
+. mountconf.sh 
 
 # Skip these tests
 # bug number: 2766
@@ -20,22 +21,8 @@ ALWAYS_EXCEPT="0b   $REPLAY_SINGLE_EXCEPT"
 
 build_test_filter
 
-cleanup() {
-    # make sure we are using the primary server, so test-framework will
-    # be able to clean up properly.
-    activemds=`facet_active mds`
-    if [ $activemds != "mds" ]; then
-        fail mds
-    fi
-
-    zconf_umount `hostname` $MOUNT
-    stop ost -f
-    stop ost2 -f
-    stop mds -f
-}
-
 SETUP=${SETUP:-"setup"}
-CLEANUP=${CLEANUP:-"cleanup"}
+CLEANUP=${CLEANUP:-"mcstopall"}
 
 if [ "$ONLY" == "cleanup" ]; then
     sysctl -w lnet.debug=0 || true
@@ -44,15 +31,8 @@ if [ "$ONLY" == "cleanup" ]; then
 fi
 
 setup() {
-    cleanup
-    add mds $MDS_MKFS_OPTS --reformat $MDSDEV
-    add ost $OST_MKFS_OPTS --reformat $OSTDEV
-    add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2
-    start mds $MDSDEV $MDS_MOUNT_OPTS
-    start ost $OSTDEV $OST_MOUNT_OPTS
-    start ost2 $OSTDEV2 $OST2_MOUNT_OPTS
-    [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE
-    grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT
+    mcformat
+    mcsetup
 }
 
 $SETUP
@@ -91,20 +71,20 @@ test_1a() {
     do_facet ost "sysctl -w lustre.fail_loc=0"
 
     rm -fr $DIR/$tfile
-    local old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+    local old_last_id=`cat $LPROC/obdfilter/*/last_id`
     touch -o $DIR/$tfile 1
     sync
-    local new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+    local new_last_id=`cat $LPROC/obdfilter/*/last_id`
     
     test "$old_last_id" = "$new_last_id" || {
        echo "OST object create is caused by MDS"
        return 1
     }
     
-    old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+    old_last_id=`cat $LPROC/obdfilter/*/last_id`
     echo "data" > $DIR/$tfile
     sync
-    new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+    new_last_id=`cat $LPROC/obdfilter/*/last_id`
     test "$old_last_id" = "$new_last_id "&& {
        echo "CROW does not work on write"
        return 1
@@ -116,10 +96,10 @@ test_1a() {
     do_facet ost "sysctl -w lustre.fail_loc=0x80000801"
 
     rm -fr $DIR/1a1
-    old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+    old_last_id=`cat $LPROC/obdfilter/*/last_id`
     echo "data" > $DIR/1a1
     sync
-    new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id`
+    new_last_id=`cat $LPROC/obdfilter/*/last_id`
     test "$old_last_id" = "$new_last_id" || {
        echo "CROW does work with fail_loc=0x80000801"
        return 1
@@ -802,8 +782,7 @@ test_39() { # bug 4176
 run_test 39 "test recovery from unlink llog (test llog_gen_rec) "
 
 count_ost_writes() {
-        cat /proc/fs/lustre/osc/*/stats |
-            awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }'
+    awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }' $LPROC/osc/*/stats
 }
 
 #b=2477,2532
@@ -856,11 +835,11 @@ test_41() {
     do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3
     cancel_lru_locks osc
     # fail ost2 and read from ost1
-    local osc2_dev=`$LCTL device_list | \
-               awk '(/ost2.*client_facet/){print $4}' `
-    $LCTL --device %$osc2_dev deactivate
+    local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'`
+    [ "$osc2dev" ] || return 4
+    $LCTL --device $osc2dev deactivate || return 1
     do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3
-    $LCTL --device %$osc2_dev activate
+    $LCTL --device $osc2dev activate || return 2
     return 0
 }
 run_test 41 "read from a valid osc while other oscs are invalid"
@@ -901,8 +880,10 @@ test_43() { # bug 2530
 run_test 43 "mds osc import failure during recovery; don't LBUG"
 
 test_44() {
-    mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+    mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
+    [ "$mdcdev" ] || exit 2
     for i in `seq 1 10`; do
+       echo iteration $i
         #define OBD_FAIL_TGT_CONN_RACE     0x701
         do_facet mds "sysctl -w lustre.fail_loc=0x80000701"
         $LCTL --device $mdcdev recover
@@ -914,8 +895,10 @@ test_44() {
 run_test 44 "race in target handle connect"
 
 test_44b() {
-    mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+    mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
+    [ "$mdcdev" ] || exit 2
     for i in `seq 1 10`; do
+       echo iteration $i
         #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704
         do_facet mds "sysctl -w lustre.fail_loc=0x80000704"
         $LCTL --device $mdcdev recover
@@ -928,7 +911,8 @@ run_test 44b "race in target handle connect"
 
 # Handle failed close
 test_45() {
-    mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices`
+    mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices`
+    [ "$mdcdev" ] || exit 2
     $LCTL --device $mdcdev recover
 
     multiop $DIR/$tfile O_c &
@@ -937,13 +921,13 @@ test_45() {
 
     # This will cause the CLOSE to fail before even 
     # allocating a reply buffer
-    $LCTL --device $mdcdev deactivate
+    $LCTL --device $mdcdev deactivate || return 4
 
     # try the close
     kill -USR1 $pid
     wait $pid || return 1
 
-    $LCTL --device $mdcdev activate
+    $LCTL --device $mdcdev activate || return 5
     sleep 1
 
     $CHECKSTAT -t file $DIR/$tfile || return 2
@@ -1002,9 +986,9 @@ test_48() {
 run_test 48 "MDS->OSC failure during precreate cleanup (2824)"
 
 test_50() {
-    local osc_dev=`$LCTL device_list | \
-               awk '(/ost_svc_mds_svc/){print $4}' `
-    $LCTL --device %$osc_dev recover &&  $LCTL --device %$osc_dev recover
+    local oscdev=`grep ${ost_svc}-osc- $LPROC/devices | awk '{print $1}'`
+    [ "$oscdev" ] || return 1
+    $LCTL --device $oscdev recover &&  $LCTL --device $oscdev recover
     # give the mds_lov_sync threads a chance to run
     sleep 5
 }
index 09a0549..fe80594 100755 (executable)
@@ -8,7 +8,7 @@ SRC=${SRC:-/usr/lib/dbench/client.txt}
 [ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
 SRC=/usr/lib/dbench/client_plain.txt
 [ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT
-[ ! -s $TGT ] && echo "$TGT doesn't exist" && exit 1
+[ ! -s $TGT ] && echo "$0: $TGT doesn't exist (SRC=$SRC)" && exit 1
 cd $DIR
 echo "running 'dbench $@' on $PWD at `date`"
 dbench -c client.txt $@
index 0c4f5fd..b68cb58 100644 (file)
@@ -11,14 +11,13 @@ ONLY=${ONLY:-"$*"}
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42b  42c  42d  45   68"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
-[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 101"
+[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 77 101"
 # Tests that fail on uml
-[ "$UML" = "no" ] && EXCEPT="$EXCEPT 31d"
+[ "$UML" = "true" ] && EXCEPT="$EXCEPT 31d"
 
 # Tests that always fail with mountconf -- FIXME
 # 48a moving the working dir succeeds
-# 104 something is out of sync with b1_4? 'lfs df' needs an arg
-EXCEPT="$EXCEPT 48a 104"
+EXCEPT="$EXCEPT 48a"
 
 case `uname -r` in
 2.4*) FSTYPE=${FSTYPE:-ext3};    ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;;
@@ -2724,6 +2723,11 @@ test_76() { # bug 1443
 }
 run_test 76 "destroy duplicate inodes in client inode cache"
 
+test_77() {
+       sh qos.sh
+}
+run_test 77 "qos test ============================================"
+
 # on the LLNL clusters, runas will still pick up root's $TMP settings,
 # which will not be writable for the runas user, and then you get a CVS
 # error message with a corrupt path string (CVS bug) and panic.
@@ -2817,39 +2821,49 @@ function get_named_value()
     done
 }
 
+export CACHE_MAX=`cat /proc/fs/lustre/llite/*/max_cached_mb | head -n 1`
+cleanup_101() {
+       for s in $LPROC/llite/*/max_cached_mb; do
+               echo $CACHE_MAX > $s
+       done
+       trap 0
+}
+
 test_101() {
        local s
        local discard
-       local nreads
+       local nreads=10000
+       local cache_limit=32
 
-       for s in $LPROC/osc/*-osc*/rpc_stats ;do
+       for s in $LPROC/osc/*-osc*/rpc_statsdo
                echo 0 > $s
        done
-       for s in $LPROC/llite/*/read_ahead_stats ;do
-               echo 0 > $s
+       trap cleanup_101 EXIT
+       for s in $LPROC/llite/fs*; do
+               echo 0 > $s/read_ahead_stats
+               echo $cache_limit > $s/max_cached_mb
        done
 
        #
-       # randomly read 10000 of 64K chunks from file 3x RAM size
+       # randomly read 10000 of 64K chunks from file 3x 32MB in size
        #
-       nreads=10000
-       s=$(($(awk '/MemTotal/ { print $2 }' /proc/meminfo) * 3))
-       echo "nreads: $nreads file size: ${s}kB"
-       $RANDOM_READS -f $DIR/f101 -s${s}000 -b65536 -C -n$nreads -t 180
+       echo "nreads: $nreads file size: $((cache_limit * 3))MB"
+       $RANDOM_READS -f $DIR/$tfile -s$((cache_limit * 3192 * 1024)) -b65536 -C -n$nreads -t 180
 
        discard=0
-       for s in $LPROC/llite/*/read_ahead_stats ;do
-               discard=$(($discard + $(cat $s | get_named_value 'read but discarded')))
+       for s in $LPROC/llite/fs*; do
+               discard=$(($discard + $(cat $s/read_ahead_stats | get_named_value 'read but discarded')))
        done
+       cleanup_101
 
        if [ $(($discard * 10)) -gt $nreads ] ;then
                cat $LPROC/osc/*-osc*/rpc_stats
                cat $LPROC/llite/*/read_ahead_stats
                error "too many ($discard) discarded pages" 
        fi
-       rm -f $DIR/f101 || true
+       rm -f $DIR/$tfile || true
 }
-run_test 101 "check read-ahead for random reads ==========="
+run_test 101 "check read-ahead for random reads ================"
 
 test_102() {
        local testfile=$DIR/xattr_testfile
@@ -2858,7 +2872,7 @@ test_102() {
         touch $testfile
 
        [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return
-       [ -z "`grep \<xattr\> $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
+       [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return
        echo "set/get xattr..."
         setfattr -n trusted.name1 -v value1 $testfile || error
         [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \
@@ -2894,7 +2908,7 @@ test_102() {
 
        rm -f $testfile
 }
-run_test 102 "user xattr test ====================="
+run_test 102 "user xattr test =================================="
 
 run_acl_subtest()
 {
@@ -2934,14 +2948,14 @@ test_103 () {
     cd $SAVED_PWD
     umask $SAVE_UMASK
 }
-run_test 103 "==============acl test ============="
+run_test 103 "acl test ========================================="
 
 test_104() {
        touch $DIR/$tfile
        lfs df || error "lfs df failed"
        lfs df -ih || error "lfs df -ih failed"
-       lfs df $DIR || error "lfs df $DIR failed"
-       lfs df -ih $DIR || error "lfs df -ih $DIR failed"
+       lfs df -h $DIR || error "lfs df -h $DIR failed"
+       lfs df -i $DIR || error "lfs df -i $DIR failed"
        lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed"
        lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed"
        
@@ -2951,7 +2965,7 @@ test_104() {
        lctl --device %$OSC recover
        lfs df || error "lfs df with reactivated OSC failed"
 }
-run_test 104 "lfs>df [-ih] [path] test ============"
+run_test 104 "lfs df [-ih] [path] test ========================="
 
 TMPDIR=$OLDTMPDIR
 TMP=$OLDTMP
index b10a0e1..a32f2b6 100644 (file)
@@ -7,6 +7,9 @@ ONLY=${ONLY:-"$*"}
 ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"14b  14c"}
 # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
 
+# Tests that fail on uml
+[ "$UML" = "true" ] && EXCEPT="$EXCEPT 7"
+
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
@@ -559,6 +562,45 @@ test_23() { # Bug 5972
 }
 run_test 23 " others should see updated atime while another read===="
 
+test_24() {
+       touch $DIR1/$tfile
+       lfs df || error "lfs df failed"
+       lfs df -ih || error "lfs df -ih failed"
+       lfs df -h $DIR1 || error "lfs df -h $DIR1 failed"
+       lfs df -i $DIR2 || error "lfs df -i $DIR2 failed"
+       lfs df $DIR1/$tfile || error "lfs df $DIR1/$tfile failed"
+       lfs df -ih $DIR2/$tfile || error "lfs df -ih $DIR2/$tfile failed"
+       
+       OSC=`lctl dl | awk '/OSC.*MNT/ {print $4}' | head -n 1`
+       lctl --device %$OSC deactivate
+       lfs df -i || error "lfs df -i with deactivated OSC failed"
+       lctl --device %$OSC recover
+       lfs df || error "lfs df with reactivated OSC failed"
+}
+run_test 24 "lfs df [-ih] [path] test ========================="
+
+test_25() {
+       [ -z "`mount | grep " $DIR1 .*\<acl\>"`" ] && echo "skipping $TESTNAME ($DIR1 must have acl)" && return
+       [ -z "`mount | grep " $DIR2 .*\<acl\>"`" ] && echo "skipping $TESTNAME ($DIR2 must have acl)" && return
+
+       mkdir $DIR1/d25 || error
+       touch $DIR1/d25/f1 || error
+       chmod 0755 $DIR1/d25/f1 || error
+
+       $RUNAS checkstat $DIR2/d25/f1 || error
+       setfacl -m u:$RUNAS_ID:--- $DIR1/d25 || error
+       $RUNAS checkstat $DIR2/d25/f1 && error
+       setfacl -m u:$RUNAS_ID:r-x $DIR1/d25 || error
+       $RUNAS checkstat $DIR2/d25/f1 || error
+       setfacl -m u:$RUNAS_ID:--- $DIR1/d25 || error
+       $RUNAS checkstat $DIR2/d25/f1 && error
+       setfacl -x u:$RUNAS_ID: $DIR1/d25 || error
+       $RUNAS checkstat $DIR2/d25/f1 || error
+
+       rm -rf $DIR1/d25
+}
+run_test 25 "change ACL on one mountpoint be seen on another ==="
+
 log "cleanup: ======================================================"
 rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true
 if [ "$I_MOUNTED" = "yes" ]; then
index ea3c411..63a13ad 100644 (file)
@@ -15,8 +15,8 @@
 #include <linux/ldiskfs_fs.h>
 #endif
 #include <liblustre.h>
-#include <linux/lustre_lib.h>
-#include <linux/obd.h>
+#include <lustre_lib.h>
+#include <obd.h>
 
 struct option longopts[] = {
        {"ea", 0, 0, 'e'},
index 5250984..cf2b153 100644 (file)
@@ -5,8 +5,8 @@
 #include <fcntl.h>
 
 #include <liblustre.h>
-#include <linux/lustre_lib.h>
-#include <linux/obd.h>
+#include <lustre_lib.h>
+#include <obd.h>
 
 int main(int argc, char **argv)
 {
index 774398d..0b19a44 100644 (file)
@@ -34,7 +34,7 @@
 #undef _GNU_SOURCE
 
 #include <liblustre.h>
-#include <linux/lustre_mds.h>
+#include <lustre_mds.h>
 
 static void usage(char *argv0, int status)
 {
index 78fd71b..7c78924 100644 (file)
@@ -11,8 +11,8 @@
 #include <limits.h>
 #include <sys/ioctl.h>
 #include <liblustre.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
+#include <obd.h>
+#include <lustre_lib.h>
 
 static int usage(char *prog, FILE *out)
 {
index 066abd3..eb43617 100644 (file)
@@ -8,7 +8,7 @@ AM_LDFLAGS := -L$(top_builddir)/lnet/utils
 
 LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a
 
-sbin_scripts = llanalyze llstat.pl llobdstat.pl lactive        lrun 
+sbin_scripts = llanalyze llstat.pl llobdstat.pl lactive lrun 
 bin_scripts = lfind lstripe
 
 if UTILS
@@ -16,51 +16,52 @@ if UTILS
 rootsbin_PROGRAMS = mount.lustre
 sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \
        mount_lustre mkfs_lustre mkfs.lustre \
-       tunefs_lustre tunefs.lustre l_getgroups llog_reader
-bin_PROGRAMS = lfs
+       tunefs_lustre tunefs.lustre l_getgroups
+bin_PROGRAMS = lfs llog_reader
 lib_LIBRARIES = liblustreapi.a
 sbin_SCRIPTS = $(sbin_scripts)
 bin_SCRIPTS = $(bin_scripts)
 endif # UTILS
 
+lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h platform.h
+lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL) 
+lctl_DEPENDENCIES := $(LIBPTLCTL) 
+
+lfs_SOURCES = lfs.c parser.c obd.c
+lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL)
+lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a 
+
+lload_SOURCES = lload.c 
+lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
+lload_DEPENDENCIES := $(LIBPTLCTL)
+
 liblustreapi_a_SOURCES = liblustreapi.c
 
 wirecheck_SOURCES = wirecheck.c
 wirecheck_CPPFLAGS = -DCC="\"$(CC)\""
-wiretest_SOURCES = wiretest.c
 
-lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
-lctl_DEPENDENCIES := $(LIBPTLCTL)
-lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h
-
-lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
-lload_DEPENDENCIES := $(LIBPTLCTL)
-lload_SOURCES = lload.c 
+wiretest_SOURCES = wiretest.c
 
 obdio_SOURCES = obdio.c obdiolib.c obdiolib.h
 obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h
 
-lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL)
-lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a
-lfs_SOURCES = lfs.c parser.c obd.c
-
-llog_reader_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
-llog_reader_DEPENDENCIES := $(LIBPTLCTL)
 llog_reader_SOURCES = llog_reader.c
+llog_reader_LDADD := $(LIBPTLCTL)
+llog_reader_DEPENDENCIES := $(LIBPTLCTL)
 
-mount_lustre_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
-mount_lustre_DEPENDENCIES := $(LIBPTLCTL)
 mount_lustre_SOURCES = mount_lustre.c 
+mount_lustre_LDADD := $(LIBPTLCTL)
+mount_lustre_DEPENDENCIES := $(LIBPTLCTL)
 
-mkfs_lustre_LDADD := $(LIBREADLINE) $(LIBPTLCTL)
-mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL)
-mkfs_lustre_SOURCES = parser.c obd.c lustre_cfg.c mkfs_lustre.c parser.h obdctl.h
+mkfs_lustre_SOURCES = mkfs_lustre.c
 mkfs_lustre_CPPFLAGS = -UTUNEFS $(AM_CPPFLAGS)
+mkfs_lustre_LDADD := $(LIBPTLCTL)
+mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL)
 
-tunefs_lustre_LDADD := $(mkfs_lustre_LDADD)
-tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES)
 tunefs_lustre_SOURCES = $(mkfs_lustre_SOURCES)
 tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS)
+tunefs_lustre_LDADD := $(mkfs_lustre_LDADD)
+tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES)
 
 EXTRA_DIST = $(bin_scripts) $(sbin_scripts)
 
index 61c87ee..de4bac0 100644 (file)
@@ -128,14 +128,15 @@ int main(int argc, char **argv)
         else
                 progname++;
 
-        if (strcmp(argv[1], "-d") == 0)
-                debug = 1;
-
         if (argc != 3) {
                 fprintf(stderr, "%s: bad parameter count\n", progname);
                 usage(stderr);
                 return EINVAL;
         }
+
+        if (strcmp(argv[1], "-d") == 0)
+                debug = 1;
+
         param->mgd_uid = strtoul(argv[2], &end, 0);
         if (*end) {
                 fprintf(stderr, "%s: invalid uid '%s'\n", progname, argv[2]);
index 82bdbdc..fa92ab1 100755 (executable)
@@ -38,6 +38,15 @@ else:
     from fcntl import F_GETFL, F_SETFL
 
 PYMOD_DIR = ["/usr/lib64/lustre/python", "/usr/lib/lustre/python"]
+PLATFORM = ''
+KEXTPATH = ''
+if string.find(sys.platform, 'linux') != -1:
+    PLATFORM='LINUX'
+elif string.find(sys.platform, 'darwin') != -1:
+    PLATFORM='DARWIN'
+    KEXTPATH='/System/Library/Extensions/'
+else:
+    PLATFORM='Unsupported'
 
 def development_mode():
     base = os.path.dirname(sys.argv[0])
@@ -456,15 +465,25 @@ class LCTLInterface:
 
     # get list of devices
     def device_list(self):
-        devices = '/proc/fs/lustre/devices'
         ret = []
-        if os.access(devices, os.R_OK):
-            try:
-                fp = open(devices, 'r')
-                ret =  fp.readlines()
-                fp.close()
-            except IOError, e:
-                log(e)
+        if PLATFORM == 'LINUX':
+            devices = '/proc/fs/lustre/devices'
+            if os.access(devices, os.R_OK):
+                try:
+                    fp = open(devices, 'r')
+                    ret =  fp.readlines()
+                    fp.close()
+                except IOError, e:
+                    log(e)
+        elif PLATFORM == 'DARWIN':
+            rc, out = self.run("device_list")
+            ret = out.split("\n")
+            if len(ret) == 0:
+                return ret
+            tail = ret[-1]
+            if not tail:
+                # remove the last empty line
+                ret = ret[:-1]
         return ret
 
     # get lustre version
@@ -862,15 +881,24 @@ def sys_get_branch():
 
 def mod_loaded(modname):
     """Check if a module is already loaded. Look in /proc/modules for it."""
-    try:
-        fp = open('/proc/modules')
-        lines = fp.readlines()
-        fp.close()
-        # please forgive my tired fingers for this one
-        ret = filter(lambda word, mod=modname: word == mod,
-                     map(lambda line: string.split(line)[0], lines))
-        return ret
-    except Exception, e:
+    if PLATFORM == 'LINUX':
+        try:
+            fp = open('/proc/modules')
+            lines = fp.readlines()
+            fp.close()
+            # please forgive my tired fingers for this one
+            ret = filter(lambda word, mod=modname: word == mod,
+                         map(lambda line: string.split(line)[0], lines))
+            return ret
+        except Exception, e:
+            return 0
+    elif PLATFORM == 'DARWIN':
+        ret, out = run('/usr/sbin/kextstat | /usr/bin/grep', modname)
+        if ret == 0:
+                return 1
+        else:
+                return 0
+    else:
         return 0
 
 # XXX: instead of device_list, ask for $name and see what we get
@@ -934,28 +962,31 @@ class kmod:
             if mod_loaded(mod) and not config.noexec:
                 continue
             log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir)
-            options = ''
-            if mod == 'lnet':
-                #For LNET we really need modprobe to load defined LNDs
-                run('/sbin/modprobe lnet')
-                #But if that fails, try insmod anyhow with dev option
-                #accept=all for dev liblustre testing
-                options = 'accept=all'
-            if src_dir:
-                module = find_module(src_dir, dev_dir, mod)
-                if not module:
-                    panic('module not found:', mod)
-                (rc, out) = run('/sbin/insmod', module, options)
-                if rc and not mod_loaded(mod):
-                    if rc == 1:
-                        print("Bad module options? Check dmesg.") 
-                    raise CommandError('insmod', out, rc)
-            else:
-                (rc, out) = run('/sbin/modprobe', mod)
-                if rc and not mod_loaded(mod):
-                    if rc == 1:
-                        print("Bad module options? Check dmesg.") 
-                    raise CommandError('modprobe', out, rc)
+            if PLATFORM == 'LINUX':
+                options = ''
+                if mod == 'lnet':
+                    #For LNET we really need modprobe to load defined LNDs
+                    run('/sbin/modprobe lnet')
+                    #But if that fails, try insmod anyhow with dev option
+                    #accept=all for dev liblustre testing
+                    options = 'accept=all'
+                if src_dir:
+                    module = find_module(src_dir, dev_dir, mod)
+                    if not module:
+                        panic('module not found:', mod)
+                    (rc, out) = run('/sbin/insmod', module, options)
+                    if rc and not mod_loaded(mod):
+                        if rc == 1:
+                            print("Bad module options? Check dmesg.") 
+                        raise CommandError('insmod', out, rc)
+                else:
+                    (rc, out) = run('/sbin/modprobe', mod)
+                    if rc and not mod_loaded(mod):
+                        if rc == 1:
+                            print("Bad module options? Check dmesg.") 
+                        raise CommandError('modprobe', out, rc)
+            elif PLATFORM == 'DARWIN':
+                run('/sbin/kextload', KEXTPATH + mod + '.kext');
 
     def cleanup_module(self):
         """Unload the modules in the list in reverse order."""
@@ -979,7 +1010,10 @@ class kmod:
                 log('unloading the network')
                 lctl.unconfigure_network()
                 if mod_loaded("ksocklnd"):
-                    run('/sbin/rmmod ksocklnd')
+                    if PLATFORM == 'LINUX':
+                        run('/sbin/rmmod ksocklnd')
+                    elif PLATFORM == 'DARWIN':
+                        run('/sbin/kextunload', KEXTPATH+'ksocklnd.kext')
                 if mod_loaded("kqswlnd"):
                     run('/sbin/rmmod kqswlnd')
                 if mod_loaded("kgmlnd"):
@@ -994,7 +1028,10 @@ class kmod:
                     run('/sbin/rmmod kralnd')
                 if mod_loaded("kptllnd"):
                     run('/sbin/rmmod kptllnd')
-            (rc, out) = run('/sbin/rmmod', mod)
+            if PLATFORM == 'LINUX':
+                (rc, out) = run('/sbin/rmmod', mod)
+            elif PLATFORM == 'DARWIN':
+                (rc, out) = run('/sbin/kextunload', KEXTPATH+mod+'.kext');
             if rc:
                 log('! unable to unload module:', mod)
                 logall(out)
@@ -1076,7 +1113,12 @@ class Network(Module):
             sys_optimize_elan()
 
     def safe_to_clean(self):
-        return not is_network_prepared()
+        if PLATFORM == 'LINUX':
+            return not is_network_prepared()
+        elif PLATFORM == 'DARWIN':
+            # XXX always assume it's safe to clean 
+            return 1    
+        return 1
 
     def cleanup(self):
         self.info(self.net_type, self.nid)
@@ -1471,7 +1513,9 @@ class OSD(Module):
         self.journal_size = self.db.get_val_int('journalsize', 0)
 
         # now as we store fids in EA on OST we need to make inode bigger
-        self.inode_size = self.db.get_val_int('inodesize', 256)
+        self.inode_size = self.db.get_val_int('inodesize', 0)
+        if self.inode_size == 0:
+                self.inode_size = 256
         self.mkfsoptions = self.db.get_val('mkfsoptions', '')
         # Allocate fewer inodes on large OST devices.  Most filesystems
         # can be much more aggressive than this, but by default we can't.
@@ -1631,7 +1675,7 @@ class Client(Module):
             self.name = self_name
         self.uuid = uuid
         self.lookup_server(self.tgt_dev_uuid)
-       self.lookup_backup_targets()
+        self.lookup_backup_targets()
         self.fs_name = fs_name
         if not module_dir:
             module_dir = module
@@ -1697,7 +1741,6 @@ class Client(Module):
             else:
                 for srv in this_nets:
                     lctl.connect(srv)
-                    break
             if srv:
                  lctl.add_conn(self.name, srv.nid_uuid);
 
@@ -2252,14 +2295,17 @@ def doHost(lustreDB, hosts):
             for_each_profile(node_db, prof_list, doSetup)
             return
 
-        sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
-        sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
+        if PLATFORM == 'LINUX':
+            sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF)
+            sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF)
 
         for_each_profile(node_db, prof_list, doModules)
 
-        sys_set_debug_path()
-        sys_set_ptldebug(ptldebug)
-        sys_set_subsystem(subsystem)
+        if PLATFORM == 'LINUX':
+            # XXX need to be fixed for Darwin
+            sys_set_debug_path()
+            sys_set_ptldebug(ptldebug)
+            sys_set_subsystem(subsystem)
         script = config.gdb_script
         run(lctl.lctl, ' modules >', script)
         if config.gdb:
index 77237d4..349c703 100644 (file)
@@ -41,7 +41,7 @@
 #include <lnet/lnetctl.h>
 
 #include <liblustre.h>
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 #include <lustre/liblustreapi.h>
 #include <lustre/lustre_user.h>
 
@@ -322,7 +322,7 @@ static int lfs_osts(int argc, char **argv)
         } else {
                 mnt = getmntent(fp);
                 while (feof(fp) == 0 && ferror(fp) ==0) {
-                        if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+                        if (llapi_is_lustre_mnttype(mnt)) {
                                 rc = llapi_find(mnt->mnt_dir, obduuid, 0, 0, 0);
                                 if (rc)
                                         fprintf(stderr,
@@ -370,7 +370,7 @@ static int path2mnt(char *path, FILE *fp, char *mntdir, int dir_len)
         len = 0;
         mnt = getmntent(fp);
         while (feof(fp) == 0 && ferror(fp) == 0) {
-                if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+                if (llapi_is_lustre_mnttype(mnt)) {
                         len = strlen(mnt->mnt_dir);
                         if (len > out_len &&
                             !strncmp(rpath, mnt->mnt_dir, len)) {
@@ -384,42 +384,36 @@ static int path2mnt(char *path, FILE *fp, char *mntdir, int dir_len)
 
         if (out_len > 0)
                 return 0;
-        
+
         fprintf(stderr, "error: lfs df: %s isn't mounted on lustre\n", path);
         return -EINVAL;
 }
 
 static int showdf(char *mntdir, struct obd_statfs *stat,
-                  struct obd_uuid *uuid, int ishow, int cooked,
+                  char *uuid, int ishow, int cooked,
                   char *type, int index, int rc)
 {
         __u64 avail, used, total;
         double ratio = 0;
-        int obd_type;
         char *suffix = "KMGTPEZY";
         char tbuf[10], ubuf[10], abuf[10], rbuf[10];
 
-        if (!uuid || !stat || !type)
-                return -EINVAL;
-        if (!strncmp(type, "MDT", 3)) {
-                obd_type = 0;
-        } else if(!strncmp(type, "OST", 3)){
-                obd_type = 1;
-        } else {
-                fprintf(stderr, "error: lfs df: invalid type '%s'\n", type);
+        if (!uuid || !stat)
                 return -EINVAL;
-        }
 
-        if (rc == 0) {
+        switch (rc) {
+        case 0:
                 if (ishow) {
                         avail = stat->os_ffree;
                         used = stat->os_files - stat->os_ffree;
                         total = stat->os_files;
                 } else {
-                        avail = stat->os_bavail * stat->os_bsize / 1024;
+                        int shift = cooked ? 0 : 10;
+
+                        avail = (stat->os_bavail * stat->os_bsize) >> shift;
                         used = stat->os_blocks - stat->os_bavail;
-                        used = used * stat->os_bsize / 1024;
-                        total = stat->os_blocks * stat->os_bsize / 1024;
+                        used = (used * stat->os_bsize) >> shift;
+                        total = (stat->os_blocks * stat->os_bsize) >> shift;
                 }
 
                 if (total > 0)
@@ -427,26 +421,26 @@ static int showdf(char *mntdir, struct obd_statfs *stat,
 
                 if (cooked) {
                         int i;
-                        double total_d, used_d, avail_d;
-                        
-                        total_d = (double)total;
-                        i = COOK(total_d);
+                        double cook_val;
+
+                        cook_val = (double)total;
+                        i = COOK(cook_val);
                         if (i > 0)
-                                sprintf(tbuf, HDF"%c", total_d, suffix[i - 1]);
+                                sprintf(tbuf, HDF"%c", cook_val, suffix[i - 1]);
                         else
                                 sprintf(tbuf, CDF, total);
 
-                        used_d = (double)used;
-                        i = COOK(used_d);
+                        cook_val = (double)used;
+                        i = COOK(cook_val);
                         if (i > 0)
-                                sprintf(ubuf, HDF"%c", used_d, suffix[i - 1]);
+                                sprintf(ubuf, HDF"%c", cook_val, suffix[i - 1]);
                         else
                                 sprintf(ubuf, CDF, used);
 
-                        avail_d = (double)avail;
-                        i = COOK(avail_d);
+                        cook_val = (double)avail;
+                        i = COOK(cook_val);
                         if (i > 0)
-                                sprintf(abuf, HDF"%c", avail_d, suffix[i - 1]);
+                                sprintf(abuf, HDF"%c", cook_val, suffix[i - 1]);
                         else
                                 sprintf(abuf, CDF, avail);
                 } else {
@@ -456,23 +450,19 @@ static int showdf(char *mntdir, struct obd_statfs *stat,
                 }
 
                 sprintf(rbuf, RDF, (int)(ratio * 100));
-                if (obd_type == 0)
-                        printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s[MDT:%d]\n",
-                               (char *)uuid, tbuf, ubuf, abuf, rbuf,
-                               mntdir, index);
+                printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s",
+                       uuid, tbuf, ubuf, abuf, rbuf, mntdir);
+                if (type)
+                        printf("[%s:%d]\n", type, index);
                 else
-                        printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s[OST:%d]\n",
-                               (char *)uuid, tbuf, ubuf, abuf, rbuf,
-                               mntdir, index);
+                        printf("\n");
 
-                return 0;
-        }
-        switch (rc) {
+                break;
         case -ENODATA:
-                printf(UUF": inactive OST\n", (char *)uuid);
+                printf(UUF": inactive device\n", uuid);
                 break;
         default:
-                printf(UUF": %s\n", (char *)uuid, strerror(-rc));
+                printf(UUF": %s\n", uuid, strerror(-rc));
                 break;
         }
 
@@ -481,12 +471,9 @@ static int showdf(char *mntdir, struct obd_statfs *stat,
 
 static int mntdf(char *mntdir, int ishow, int cooked)
 {
-        struct obd_statfs stat_buf;
+        struct obd_statfs stat_buf, sum = { .os_bsize = 1 };
         struct obd_uuid uuid_buf;
         __u32 index;
-        __u64 avail_sum, used_sum, total_sum;
-        char tbuf[10], ubuf[10], abuf[10], rbuf[10];        
-        double ratio_sum = 0;
         int rc;
 
         if (ishow)
@@ -495,10 +482,9 @@ static int mntdf(char *mntdir, int ishow, int cooked)
                        "IUse%", "Mounted on");
         else
                 printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
-                       "UUID", "1K-blocks", "Used", "Available",
-                       "Use%", "Mounted on");
+                       "UUID", cooked ? "bytes" : "1K-blocks",
+                       "Used", "Available", "Use%", "Mounted on");
 
-        avail_sum = total_sum = 0; 
         for (index = 0; ; index++) {
                 memset(&stat_buf, 0, sizeof(struct obd_statfs));
                 memset(&uuid_buf, 0, sizeof(struct obd_uuid));
@@ -509,7 +495,7 @@ static int mntdf(char *mntdir, int ishow, int cooked)
 
                 if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO ||
                     rc == -ENODATA || rc == 0) {
-                        showdf(mntdir, &stat_buf, &uuid_buf, ishow, cooked,
+                        showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked,
                                "MDT", index, rc);
                 } else {
                         fprintf(stderr,
@@ -517,13 +503,13 @@ static int mntdf(char *mntdir, int ishow, int cooked)
                                 uuid_buf.uuid, strerror(-rc), rc);
                         return rc;
                 }
-                if (!rc && ishow) {
-                        avail_sum += stat_buf.os_ffree;
-                        total_sum += stat_buf.os_files;
+                if (rc == 0) {
+                        sum.os_ffree += stat_buf.os_ffree;
+                        sum.os_files += stat_buf.os_files;
                 }
         }
 
-        for (index = 0;;index++) {
+        for (index = 0; ; index++) {
                 memset(&stat_buf, 0, sizeof(struct obd_statfs));
                 memset(&uuid_buf, 0, sizeof(struct obd_uuid));
                 rc = llapi_obd_statfs(mntdir, LL_STATFS_LOV, index,
@@ -533,7 +519,7 @@ static int mntdf(char *mntdir, int ishow, int cooked)
 
                 if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO ||
                     rc == -ENODATA || rc == 0) {
-                        showdf(mntdir, &stat_buf, &uuid_buf, ishow, cooked,
+                        showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked,
                                "OST", index, rc);
                 } else {
                         fprintf(stderr,
@@ -541,55 +527,15 @@ static int mntdf(char *mntdir, int ishow, int cooked)
                                 strerror(-rc), rc);
                         return rc;
                 }
-                if (!rc && !ishow) {
-                        __u64 avail, total;
-                        avail = stat_buf.os_bavail * stat_buf.os_bsize;
-                        avail /= 1024;
-                        total = stat_buf.os_blocks * stat_buf.os_bsize;
-                        total /= 1024;
-                        
-                        avail_sum += avail;
-                        total_sum += total;
+                if (rc == 0) {
+                        sum.os_blocks += stat_buf.os_blocks * stat_buf.os_bsize;
+                        sum.os_bfree  += stat_buf.os_bfree * stat_buf.os_bsize;
+                        sum.os_bavail += stat_buf.os_bavail * stat_buf.os_bsize;
                 }
         }
 
-        used_sum = total_sum - avail_sum;
-        if (total_sum > 0)
-                ratio_sum = (double)(total_sum - avail_sum) / (double)total_sum;
-        sprintf(rbuf, RDF, (int)(ratio_sum * 100));
-        if (cooked) {
-                int i;
-                char *suffix = "KMGTPEZY";
-                double total_sum_d, used_sum_d, avail_sum_d;
-
-                total_sum_d = (double)total_sum;
-                i = COOK(total_sum_d);
-                if (i > 0)
-                        sprintf(tbuf, HDF"%c", total_sum_d, suffix[i - 1]);
-                else
-                        sprintf(tbuf, CDF, total_sum);
-                
-                used_sum_d = (double)used_sum;
-                i = COOK(used_sum_d);
-                if (i > 0)
-                        sprintf(ubuf, HDF"%c", used_sum_d, suffix[i - 1]);
-                else
-                        sprintf(ubuf, CDF, used_sum);
-                        
-                avail_sum_d = (double)avail_sum;
-                i = COOK(avail_sum_d);
-                if (i > 0)
-                        sprintf(abuf, HDF"%c", avail_sum_d, suffix[i - 1]);
-                else
-                        sprintf(abuf, CDF, avail_sum);
-        } else {
-                sprintf(tbuf, CDF, total_sum);
-                sprintf(ubuf, CDF, used_sum);
-                sprintf(abuf, CDF, avail_sum);
-        }
-       
-        printf("\n"UUF" "CSF" "CSF" "CSF" "RSF" %-s\n",
-               "filesystem summary:", tbuf, ubuf, abuf, rbuf, mntdir);
+        printf("\n");
+        showdf(mntdir, &sum, "filesystem summary:", ishow, cooked, NULL, 0,0);
 
         return 0;
 }
@@ -639,7 +585,7 @@ static int lfs_df(int argc, char **argv)
         } else {
                 mnt = getmntent(fp);
                 while (feof(fp) == 0 && ferror(fp) == 0) {
-                        if (llapi_is_lustre_mnttype(mnt->mnt_type)) {
+                        if (llapi_is_lustre_mnttype(mnt)) {
                                 rc = mntdf(mnt->mnt_dir, ishow, cooked);
                                 if (rc)
                                         break;
@@ -690,7 +636,7 @@ static int lfs_check(int argc, char **argv)
         } else {
                 mnt = getmntent(fp);
                 while (feof(fp) == 0 && ferror(fp) ==0) {
-                        if (llapi_is_lustre_mnttype(mnt->mnt_type))
+                        if (llapi_is_lustre_mnttype(mnt))
                                 break;
                         mnt = getmntent(fp);
                 }
@@ -731,7 +677,7 @@ static int lfs_catinfo(int argc, char **argv)
         } else {
                 mnt = getmntent(fp);
                 while (feof(fp) == 0 && ferror(fp) == 0) {
-                        if (llapi_is_lustre_mnttype(mnt->mnt_type))
+                        if (llapi_is_lustre_mnttype(mnt))
                                 break;
                         mnt = getmntent(fp);
                 }
index 5c4c68a..c320aed 100644 (file)
@@ -40,8 +40,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
-#ifdef HAVE_LINUX_TYPES_H
-#include <linux/types.h>
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
 #endif
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
 #include <lnet/lnetctl.h>
 
 #include <liblustre.h>
-#include <linux/obd.h>
-#include <linux/lustre_lib.h>
+#include <obd.h>
+#include <lustre_lib.h>
 #include <lustre/liblustreapi.h>
-#include <linux/obd_lov.h>
+#include <obd_lov.h>
 #include <lustre/liblustreapi.h>
 
 static void err_msg(char *fmt, ...)
@@ -889,9 +889,12 @@ int llapi_catinfo(char *dir, char *keyword, char *node_name)
         return rc;
 }
 
-int llapi_is_lustre_mnttype(char *type)
+/* Is this a lustre client fs? */
+int llapi_is_lustre_mnttype(struct mntent *mnt)
 {
-        return (strcmp(type,"lustre") == 0 || strcmp(type,"lustre_lite") == 0);
+        char *type = mnt->mnt_type;
+        return ((strcmp(type, "lustre") == 0 || strcmp(type,"lustre_lite") == 0)
+                && (strstr(mnt->mnt_fsname, ":/") != NULL));
 }
 
 int llapi_quotacheck(char *mnt, int check_type)
index fc75f21..1c10faa 100644 (file)
@@ -41,7 +41,9 @@ int          verbose;
 int          nomtab;
 int          fake;
 int          force;
+int          retry;
 static char *progname = NULL;
+#define MAX_RETRIES 99
 
 void usage(FILE *out)
 {
@@ -59,6 +61,7 @@ void usage(FILE *out)
                 "\t-v|--verbose: print verbose config settings\n"
                 "\t-o: filesystem mount options:\n"
                 "\t\tflock/noflock: enable/disable flock support\n"
+                "\t\troute=<gw>[-<gw>]:<low>[-<high>]: portal route to MDS\n"
                 "\t\tuser_xattr/nouser_xattr: enable/disable user extended "
                 "attributes\n"
                 );
@@ -115,6 +118,9 @@ update_mtab_entry(char *spec, char *mtpt, char *type, char *opts,
                         fprintf(stderr, "%s: addmntent: %s:",
                                 progname, strerror (errno));
                         rc = 16;
+                } else if (verbose > 1) {
+                        fprintf(stderr, "%s: added %s on %s to %s\n",
+                                progname, spec, mtpt, MOUNTED);
                 }
                 endmntent(fp);
         }
@@ -141,6 +147,7 @@ print_options(FILE *out, struct lustre_mount_data *lmd, const char *options)
         fprintf(out, "mds name:        %s\n", lmd->lmd_mds);
         fprintf(out, "profile:         %s\n", lmd->lmd_profile);
         fprintf(out, "options:         %s\n", options);
+        fprintf(out, "retry:           %d\n", retry);
 
         return 0;
 }
@@ -243,8 +250,11 @@ int parse_options(char *options, struct lustre_mount_data *lmd, int *flagp)
                 if ((opteq = strchr(opt, '='))) {
                         val = atoi(opteq + 1);
                         *opteq = '\0';
-                        if (0) {
-                                /* All the network options have gone :)) */
+                        if (!strcmp(opt, "retry")) {
+                                if (val >= 0 || val < MAX_RETRIES)
+                                        retry = val;
+                                else
+                                        retry = 0;
                         } else {
                                 fprintf(stderr, "%s: unknown option '%s'. "
                                         "Ignoring.\n", progname, opt);
@@ -353,12 +363,14 @@ int main(int argc, char *const argv[])
                 switch (opt) {
                 case 1:
                         ++force;
-                        printf("force: %d\n", force);
+                        if (verbose)
+                                printf("force: %d\n", force);
                         nargs++;
                         break;
                 case 'f':
                         ++fake;
-                        printf("fake: %d\n", fake);
+                        if (verbose)
+                                printf("fake: %d\n", fake);
                         nargs++;
                         break;
                 case 'h':
@@ -366,7 +378,8 @@ int main(int argc, char *const argv[])
                         break;
                 case 'n':
                         ++nomtab;
-                        printf("nomtab: %d\n", nomtab);
+                        if (verbose)
+                                printf("nomtab: %d\n", nomtab);
                         nargs++;
                         break;
                 case 'o':
@@ -428,15 +441,29 @@ int main(int argc, char *const argv[])
                 return 1;
         }
 
-        if (!fake)
-                rc = mount(source, target, "lustre", flags, (void *)&lmd);
+        if (!fake) {
+                FILE *modpipe = popen("/sbin/modprobe -q llite", "r");
+                if (modpipe != NULL)
+                        pclose(modpipe);
+                /* use <= to include the initial mount before we retry */
+                for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++)
+                        rc = mount(source, target, "lustre", flags, &lmd);
+        }
         if (rc) {
                 fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", progname,
                         source, target, strerror(errno));
                 print_options(stderr, &lmd, options);
-                if (errno == ENODEV)
+                if (errno == ENODEV) {
+                        struct utsname unamebuf;
+                        char *modfile = "/etc/modutils.conf";
+
+                        if (uname(&unamebuf) == 0 &&
+                            strncmp(unamebuf.release, "2.4", 3) == 0)
+                                modfile = "/etc/modules.conf";
+
                         fprintf(stderr, "Are the lustre modules loaded?\n"
-                             "Check /etc/modules.conf and /proc/filesystems\n");
+                                "Check %s and /proc/filesystems\n");
+                }
                 rc = 32;
         } else if (!nomtab) {
                 rc = update_mtab_entry(source, target, "lustre", options,0,0,0);
index 0a7ea24..9bcf577 100644 (file)
@@ -28,7 +28,7 @@
 
 #include <time.h>
 #include <liblustre.h>
-#include <linux/lustre_idl.h>
+#include <lustre/lustre_idl.h>
 
 int llog_pack_buffer(int fd, struct llog_log_hdr** llog_buf, struct llog_rec_hdr*** recs, int* recs_number);
 
index 2a50cf2..342a4da 100644 (file)
 #ifndef __KERNEL__
 #include <liblustre.h>
 #endif
-#include <linux/lustre_lib.h>
-#include <linux/lustre_cfg.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/obd.h>          /* for struct lov_stripe_md */
+#include <lustre_lib.h>
+#include <lustre_cfg.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <obd.h>          /* for struct lov_stripe_md */
+#include <obd_lov.h>
 #include <linux/lustre_build_version.h>
 
 #include <unistd.h>
@@ -388,6 +389,8 @@ int jt_lcfg_lov_setup(int argc, char **argv)
                         jt_cmdname(argv[0]), argv[5]);
                 return CMD_HELP;
         }
+        desc.ld_qos_threshold = QOS_DEFAULT_THRESHOLD;
+        desc.ld_qos_maxage = QOS_DEFAULT_MAXAGE;
 
         if (argc == 7) {
                 desc.ld_tgt_count = strtoul(argv[6], &end, 0);
index b00e2ed..6f35a32 100644 (file)
@@ -28,6 +28,7 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdarg.h>
+#include <mntent.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <getopt.h>
 
 #include <linux/types.h>
-#define NO_SYS_VFS 1
+//#define HAVE_SYS_VFS_H 1
 #include <linux/fs.h> // for BLKGETSIZE64
-#include <linux/lustre_disk.h>
-#include <linux/lustre_param.h>
+#include <lustre_disk.h>
+#include <lustre_param.h>
 #include <lnet/lnetctl.h>
-#include <linux/lustre_ver.h>
-#include "obdctl.h"
+#include <lustre_ver.h>
 
-/* So obd.o will link */
-#include "parser.h"
-command_t cmdlist[] = {
-        { 0, 0, 0, NULL }
-};
 
 #define MAX_LOOP_DEVICES 16
 #define L_BLOCK_SIZE 4096
@@ -180,6 +175,29 @@ int run_command(char *cmd)
         return rc;
 }                                                       
 
+static int check_mtab_entry(char *spec, char *type)
+{
+        FILE *fp;
+        struct mntent *mnt;
+
+        fp = setmntent(MOUNTED, "r");
+        if (fp == NULL)
+                return(0);
+
+        while ((mnt = getmntent(fp)) != NULL) {
+                if (strcmp(mnt->mnt_fsname, spec) == 0 &&
+                        strcmp(mnt->mnt_type, type) == 0) {
+                        endmntent(fp);
+                        fprintf(stderr, "%s: according to %s %s is "
+                                "already mounted on %s\n",
+                                progname, MOUNTED, spec, mnt->mnt_dir);
+                        return(EEXIST);
+                }
+        }
+        endmntent(fp);
+
+        return(0);
+}
 
 /*============ disk dev functions ===================*/
 
@@ -341,17 +359,14 @@ static int file_in_dev(char *file_name, char *dev_name)
         return 0;
 }
 
-/* Check whether the device has already been fomatted by mkfs.lustre */
+/* Check whether the device has already been used with lustre */
 static int is_lustre_target(struct mkfs_opts *mop)
 {
         int rc;
-        /* Check whether there exist MOUNT_DATA_FILE,
-           LAST_RCVD or CATLIST in the device. */
         vprint("checking for existing Lustre data\n");
         
         if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))
-            || (rc = file_in_dev(LAST_RCVD, mop->mo_device))
-            || (rc = file_in_dev(CATLIST, mop->mo_device))) { 
+            || (rc = file_in_dev(LAST_RCVD, mop->mo_device))) { 
                 vprint("found Lustre data\n");
                 /* in the -1 case, 'extents' means this really IS a lustre
                    target */
@@ -461,6 +476,10 @@ int make_lustre_backfs(struct mkfs_opts *mop)
                         strcat(mop->mo_mkfsopts, " -O dir_index");
                 }
 
+                /* Allow reformat of full devices (as opposed to 
+                   partitions.)  We already checked for mounted dev. */
+                strcat(mop->mo_mkfsopts, " -F");
+
                 sprintf(mkfs_cmd, "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE,
                         mop->mo_ldd.ldd_svname);
 
@@ -479,7 +498,7 @@ int make_lustre_backfs(struct mkfs_opts *mop)
                 return EINVAL;
         }
 
-        /* Loop device? */
+        /* For loop device format the dev, not the filename */
         dev = mop->mo_device;
         if (mop->mo_flags & MO_IS_LOOP) 
                 dev = mop->mo_loopdev;
@@ -1079,6 +1098,10 @@ int main(int argc, char *const argv[])
 
         /* device is last arg */
         strcpy(mop.mo_device, argv[argc - 1]);
+
+        if (check_mtab_entry(mop.mo_device, "lustre"))
+                return(EEXIST);
+
         /* Are we using a loop device? */
         ret = is_block(mop.mo_device);
         if (ret < 0) 
index 8631dc1..be8ebdf 100644 (file)
@@ -34,8 +34,8 @@
 #include <mntent.h>
 #include <getopt.h>
 #include <sys/utsname.h>
-#include <linux/lustre_ver.h>
 #include "obdctl.h"
+#include <lustre_ver.h>
 
 int          verbose = 0;
 int          nomtab = 0;
@@ -83,10 +83,11 @@ static int check_mtab_entry(char *spec, char *mtpt, char *type)
                 if (strcmp(mnt->mnt_fsname, spec) == 0 &&
                         strcmp(mnt->mnt_dir, mtpt) == 0 &&
                         strcmp(mnt->mnt_type, type) == 0) {
+                        endmntent(fp);
                         fprintf(stderr, "%s: according to %s %s is "
                                 "already mounted on %s\n",
                                 progname, MOUNTED, spec, mtpt);
-                        return(1); /* or should we return an error? */
+                        return(EEXIST); 
                 }
         }
         endmntent(fp);
index 1184ffd..85ee351 100644 (file)
@@ -38,7 +38,7 @@
 
 #include "obdctl.h"
 
-#include <linux/obd.h>          /* for struct lov_stripe_md */
+#include <obd.h>          /* for struct lov_stripe_md */
 #include <linux/lustre_build_version.h>
 
 #include <unistd.h>
 #include <asm/page.h>           /* needed for PAGE_SIZE - rread */
 #endif
 
-#include <linux/obd_class.h>
+#include <obd_class.h>
 #include <lnet/lnetctl.h>
 #include "parser.h"
+#include "platform.h"
 #include <stdio.h>
 
 #define MAX_STRING_SIZE 128
@@ -72,8 +73,8 @@ struct shared_data {
         __u64 offsets[MAX_THREADS];
         int   running;
         int   barrier;
-        pthread_mutex_t mutex;
-        pthread_cond_t  cond;
+        l_mutex_t mutex;
+        l_cond_t  cond;
 };
 
 static struct shared_data *shared_data;
@@ -486,12 +487,12 @@ static void shmem_setup(void)
 
 static inline void shmem_lock(void)
 { 
-        pthread_mutex_lock(&shared_data->mutex);
+        l_mutex_lock(&shared_data->mutex);
 }
 
 static inline void shmem_unlock(void)
 { 
-        pthread_mutex_unlock(&shared_data->mutex);
+        l_mutex_unlock(&shared_data->mutex);
 }
 
 static inline void shmem_reset(int total_threads)
@@ -500,8 +501,8 @@ static inline void shmem_reset(int total_threads)
                 return;
 
         memset(shared_data, 0, sizeof(*shared_data));
-        pthread_mutex_init(&shared_data->mutex, NULL);
-        pthread_cond_init(&shared_data->cond, NULL);
+        l_mutex_init(&shared_data->mutex);
+        l_cond_init(&shared_data->cond);
         memset(counter_snapshot, 0, sizeof(counter_snapshot));
         prev_valid = 0;
         shared_data->barrier = total_threads;
@@ -875,6 +876,7 @@ int jt_get_version(int argc, char **argv)
         memset(buf, 0, sizeof(buf));
         data->ioc_version = OBD_IOCTL_VERSION;
         data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data));
+        data->ioc_inlbuf1 = buf + size_round(sizeof(*data));
         data->ioc_len = obd_ioctl_packlen(data);
 
         rc = l2_ioctl(OBD_DEV_ID, OBD_GET_VERSION, buf);
@@ -892,6 +894,7 @@ int jt_get_version(int argc, char **argv)
 int jt_obd_list(int argc, char **argv)
 {
         int rc;
+#if HAVE_PROC_FS
         char buf[MAX_STRING_SIZE];
         FILE *fp = fopen(DEVICES_LIST, "r");
 
@@ -908,8 +911,40 @@ int jt_obd_list(int argc, char **argv)
                 printf("%s", buf);
 
         fclose(fp);
-
         return 0;
+#else
+        /* No /proc filesystem, get device list by ioctl */
+        int index;
+        char buf[8192];
+        struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf;
+
+        if (argc != 1)
+                return CMD_HELP;
+
+        for (index = 0;; index++) {
+                memset(buf, 0, sizeof(buf));
+                data->ioc_version = OBD_IOCTL_VERSION;
+                data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data));
+                data->ioc_inlbuf1 = buf + size_round(sizeof(*data));
+                data->ioc_len = obd_ioctl_packlen(data);
+                data->ioc_count = index;
+
+                rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_GETDEVICE, buf);
+                if (rc != 0)
+                        break;
+                printf("%s\n", (char *)data->ioc_bulk);
+        }
+        if (rc != 0) {
+                if (errno == ENOENT) 
+                        /* no device or the last device */
+                        rc = 0;
+                else 
+                        fprintf(stderr, "Error getting device list: %s: "
+                                        "check dmesg.\n",
+                                        strerror(errno));
+        }
+        return rc;
+#endif
 }
 
 /* Get echo client's stripe meta-data for the given object
@@ -1585,9 +1620,9 @@ int jt_obd_test_brw(int argc, char **argv)
 
                 shared_data->barrier--;
                 if (shared_data->barrier == 0)
-                        pthread_cond_broadcast(&shared_data->cond);
+                        l_cond_broadcast(&shared_data->cond);
                 else
-                        pthread_cond_wait(&shared_data->cond,
+                        l_cond_wait(&shared_data->cond,
                                           &shared_data->mutex);
 
                 shmem_unlock ();
index 430bcdc..afbfb52 100644 (file)
 #include <liblustre.h>
 #endif
 
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_dlm.h>
-#include <linux/lustre_cfg.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <lustre_dlm.h>
+#include <lustre_cfg.h>
 
 /* obd.c */
 int do_disconnect(char *func, int verbose);
index e6fa3b9..8813de4 100644 (file)
@@ -11,9 +11,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/obd_class.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
+#include <obd_class.h>
 
 struct obdio_conn {
         int                    oc_fd;
index 9c23e77..2cb518d 100644 (file)
 #include <sys/param.h>
 #include <assert.h>
 
-#ifdef HAVE_LIBREADLINE
-#define READLINE_LIBRARY
-#include <readline/readline.h>
-
-/* completion_matches() is #if 0-ed out in modern glibc */
-#ifndef completion_matches
-#  define completion_matches rl_completion_matches
-#endif
-extern void using_history(void);
-extern void stifle_history(int);
-extern void add_history(char *);
-#endif
-
+#include "platform.h"
 #include "parser.h"
 
 static command_t * top_level;           /* Top level of commands, initialized by
@@ -345,6 +333,7 @@ char * readline(char * prompt)
         char *line = malloc(size);
         char *ptr = line;
         int c;
+        int eof = 0;
 
         if (line == NULL)
                 return NULL;
@@ -370,6 +359,7 @@ char * readline(char * prompt)
                                 line = tmp;
                         }
                 } else {
+                        eof = 1;
                         if (ferror(stdin))
                                 goto outfree;
                         goto out;
@@ -377,6 +367,10 @@ char * readline(char * prompt)
         }
 out:
         *ptr = 0;
+        if (eof && (strlen(line) == 0)) {
+                free(line);
+                line = NULL;
+        }
         return line;
 outfree:
         free(line);
diff --git a/lustre/utils/platform.h b/lustre/utils/platform.h
new file mode 100644 (file)
index 0000000..4f5b5c9
--- /dev/null
@@ -0,0 +1,248 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __LUSTRE_UTILS_PLATFORM_H
+#define __LUSTRE_UTILS_PLATFORM_H
+
+#ifdef __linux__
+
+#ifdef HAVE_LIBREADLINE
+#define READLINE_LIBRARY
+#include <readline/readline.h>
+
+/* completion_matches() is #if 0-ed out in modern glibc */
+
+#ifndef completion_matches
+#  define completion_matches rl_completion_matches
+#endif
+extern void using_history(void);
+extern void stifle_history(int);
+extern void add_history(char *);
+#endif /* HAVE_LIBREADLINE */
+
+#include <errno.h>
+#include <string.h>
+#if HAVE_LIBPTHREAD
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <pthread.h>
+
+typedef pthread_mutex_t        l_mutex_t;
+typedef pthread_cond_t l_cond_t;
+#define l_mutex_init(s)                pthread_mutex_init(s, NULL)
+#define l_mutex_lock(s)                pthread_mutex_lock(s)
+#define l_mutex_unlock(s)      pthread_mutex_unlock(s)
+#define l_cond_init(c)         pthread_cond_init(c, NULL)
+#define l_cond_broadcast(c)    pthread_cond_broadcast(c)
+#define l_cond_wait(c, s)      pthread_cond_wait(c, s)
+#endif
+
+#elif __APPLE__
+
+#ifdef HAVE_LIBREADLINE
+#define READLINE_LIBRARY
+#include <readline/readline.h>
+typedef VFunction       rl_vintfunc_t;
+typedef VFunction       rl_voidfunc_t;
+#endif /* HAVE_LIBREADLINE */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/shm.h>
+#include <sys/semaphore.h>
+
+/*
+ * POSIX compliant inter-process synchronization aren't supported well
+ * in Darwin, pthread_mutex_t and pthread_cond_t can only work as
+ * inter-thread synchronization, they wouldn't work even being put in
+ * shared memory for multi-process. PTHREAD_PROCESS_SHARED is not 
+ * supported by Darwin also (pthread_mutexattr_setpshared() with the 
+ * PTHREAD_PROCESS_SHARED attribute will return EINVAL). 
+ *
+ * The only inter-process sychronization mechanism can be used in Darwin
+ * is POSIX NAMED semaphores and file lock, here we use NAMED semaphore
+ * to implement mutex and condition. 
+ *
+ * XXX Liang:
+ * They are just proto-type now, more tests are needed. 
+ */
+#define L_LOCK_DEBUG           (0)             
+
+#define L_SEM_NAMESIZE         32
+
+typedef struct {
+       sem_t           *s_sem;
+#if L_LOCK_DEBUG
+       char            s_name[L_SEM_NAMESIZE];
+#endif
+} l_sem_t;
+
+typedef l_sem_t         l_mutex_t;
+
+typedef struct {
+       l_mutex_t       c_guard;
+       int             c_count;
+       l_sem_t         c_waiter;
+} l_cond_t;
+
+static inline int l_sem_init(l_sem_t *sem, int val)
+{
+       char *s_name;
+#if L_LOCK_DEBUG
+       s_name = sem->s_name;
+#else
+       char buf[L_SEM_NAMESIZE];
+       s_name = buf;
+#endif
+       /* get an unique name for named semaphore */
+       snprintf(s_name, L_SEM_NAMESIZE, "%d-%p", (int)getpid(), sem);
+       sem->s_sem = sem_open(s_name, O_CREAT, 0600, val);
+       if ((int)sem->s_sem == SEM_FAILED) {
+               fprintf(stderr, "lock %s creating fail: %d, %d!\n",
+                               s_name, (int)sem->s_sem, errno);
+               return -1;
+       } else {
+#if L_LOCK_DEBUG
+               printf("open lock: %s\n", s_name);
+#endif
+       }
+       return 0;
+}
+
+static inline void l_sem_done(l_sem_t *sem)
+{
+#if L_LOCK_DEBUG
+       printf("close lock: %s.\n", sem->s_name);
+#endif
+       sem_close(sem->s_sem);
+}
+
+static inline void l_sem_down(l_sem_t *sem)
+{
+#if L_LOCK_DEBUG
+       printf("sem down :%s\n", sem->s_name);
+#endif
+       sem_wait(sem->s_sem);
+}
+
+static inline void l_sem_up(l_sem_t *sem)
+{
+#if L_LOCK_DEBUG
+       printf("sem up  :%s\n", sem->s_name);
+#endif
+       sem_post(sem->s_sem);
+}
+
+static inline void l_mutex_init(l_mutex_t *mutex)
+{
+       l_sem_init((l_sem_t *)mutex, 1);
+}
+
+static inline void l_mutex_init_locked(l_mutex_t *mutex)
+{
+       l_sem_init((l_sem_t *)mutex, 0);
+}
+
+static inline void l_mutex_done(l_mutex_t *mutex)
+{
+       l_sem_done((l_sem_t *)mutex);
+}
+
+static inline void l_mutex_lock(l_mutex_t *mutex)
+{
+#if L_LOCK_DEBUG
+       printf("lock mutex  :%s\n", mutex->s_name);
+#endif
+       sem_wait(mutex->s_sem);
+}
+
+static inline void l_mutex_unlock(l_mutex_t *mutex)
+{
+#if L_LOCK_DEBUG
+       printf("unlock mutex: %s\n", mutex->s_name);
+#endif
+       sem_post(mutex->s_sem);
+}
+
+static inline void l_cond_init(l_cond_t *cond)
+{
+       l_mutex_init(&cond->c_guard);
+       l_sem_init(&cond->c_waiter, 0);
+       cond->c_count = 0;
+}
+
+static inline void l_cond_done(l_cond_t *cond)
+{
+       if (cond->c_count != 0)
+               fprintf(stderr, "your waiter list is not empty: %d!\n", cond->c_count);
+       l_mutex_done(&cond->c_guard);
+       l_sem_done(&cond->c_waiter);
+}
+
+static inline void l_cond_wait(l_cond_t *cond, l_mutex_t *lock)
+{
+       l_mutex_lock(&cond->c_guard);
+       cond->c_count --;
+       l_mutex_unlock(&cond->c_guard);
+       l_mutex_unlock(lock);
+       l_sem_down(&cond->c_waiter);
+       l_mutex_lock(lock);
+}
+
+static inline void l_cond_broadcast(l_cond_t *cond)
+{
+       l_mutex_lock(&cond->c_guard);
+       while (cond->c_count < 0) {
+               l_sem_up(&cond->c_waiter);
+               cond->c_count ++;
+       }
+       l_mutex_unlock(&cond->c_guard);
+}
+
+#else /* other platform */
+
+#ifdef HAVE_LIBREADLINE
+#define READLINE_LIBRARY
+#include <readline/readline.h>
+#endif /* HAVE_LIBREADLINE */
+#include <errno.h>
+#include <string.h>
+#if HAVE_LIBPTHREAD
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <pthread.h>
+
+typedef pthread_mutex_t        l_mutex_t;
+typedef pthread_cond_t l_cond_t;
+#define l_mutex_init(s)                pthread_mutex_init(s, NULL)
+#define l_mutex_lock(s)                pthread_mutex_lock(s)
+#define l_mutex_unlock(s)      pthread_mutex_unlock(s)
+#define l_cond_init(c)         pthread_cond_init(c, NULL)
+#define l_cond_broadcast(c)    pthread_cond_broadcast(c)
+#define l_cond_wait(c, s)      pthread_cond_wait(c, s)
+#endif /* HAVE_LIBPTHREAD */
+
+#endif /* __linux__  */
+
+#endif
index 9ae82bb..0e1726f 100755 (executable)
@@ -3,6 +3,6 @@
 SRCDIR=`dirname $0`
 PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH
 
-lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 
+lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1
 # do it again, in case we tried to unload ksocklnd too early
-lctl modules | awk '{ print $2 }' | xargs rmmod
+lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod
index 3b781cc..5a1f55a 100644 (file)
@@ -5,8 +5,8 @@
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <liblustre.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
 
 #define BLANK_LINE()                                            \
 do {                                                            \
@@ -494,11 +494,10 @@ check_lov_desc(void)
         CHECK_MEMBER(lov_desc, ld_pattern);
         CHECK_MEMBER(lov_desc, ld_default_stripe_size);
         CHECK_MEMBER(lov_desc, ld_default_stripe_offset);
-        CHECK_MEMBER(lov_desc, ld_default_stripe_offset);
+        CHECK_MEMBER(lov_desc, ld_qos_threshold);
+        CHECK_MEMBER(lov_desc, ld_qos_maxage);
         CHECK_MEMBER(lov_desc, ld_padding_1);
         CHECK_MEMBER(lov_desc, ld_padding_2);
-        CHECK_MEMBER(lov_desc, ld_padding_3);
-        CHECK_MEMBER(lov_desc, ld_padding_4);
         CHECK_MEMBER(lov_desc, ld_uuid);
 }
 
index 0b65ac1..95de9db 100644 (file)
@@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <liblustre.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
 
 #undef LASSERT
 #undef LASSERTF
index 5a594d0..fd6c2f2 100644 (file)
@@ -1,7 +1,7 @@
 #include <stdio.h>
 #include <liblustre.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
+#include <lustre_lib.h>
+#include <lustre/lustre_idl.h>
 
 #undef LASSERT
 #undef LASSERTF
@@ -30,7 +30,6 @@ void lustre_assert_wire_constants(void)
          * running on Linux tau 2.6.15-dirty #13 SMP Sat Feb 11 18:30:54 MSK 2006 i686 i686 i386 GNU/
          * with gcc version 3.3.3 (SuSE Linux) */
 
-
         /* Constants... */
         LASSERTF(PTLRPC_MSG_MAGIC == 0x0BD00BD0," found %lld\n",
                  (long long)PTLRPC_MSG_MAGIC);
@@ -1171,26 +1170,22 @@ void lustre_assert_wire_constants(void)
                  (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
-        LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, " found %lld\n",
-                 (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset));
-        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n",
-                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 32, " found %lld\n",
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_threshold));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold));
+        LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n",
+                 (long long)(int)offsetof(struct lov_desc, ld_qos_maxage));
+        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage));
+        LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_padding_1));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 36, " found %lld\n",
+        LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_padding_2));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_3) == 40, " found %lld\n",
-                 (long long)(int)offsetof(struct lov_desc, ld_padding_3));
-        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_3) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_3));
-        LASSERTF((int)offsetof(struct lov_desc, ld_padding_4) == 44, " found %lld\n",
-                 (long long)(int)offsetof(struct lov_desc, ld_padding_4));
-        LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_4) == 4, " found %lld\n",
-                 (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_4));
         LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n",
                  (long long)(int)offsetof(struct lov_desc, ld_uuid));
         LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n",