From: yury Date: Fri, 19 May 2006 11:29:53 +0000 (+0000) Subject: - update from b1_4_mountconf X-Git-Tag: v1_8_0_110~486^2~1792 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=d23cfc57a5b5022f6bdf9adf447231b4b6ef06a5;p=fs%2Flustre-release.git - update from b1_4_mountconf --- diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch index 657ecf4..b6439e6 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.12.patch @@ -2,7 +2,7 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c =================================================================== --- linux-2.6.12-rc6.orig/fs/ext3/extents.c 2005-06-14 16:31:25.756503133 +0200 +++ linux-2.6.12-rc6/fs/ext3/extents.c 2005-06-14 16:31:25.836581257 +0200 -@@ -0,0 +1,2347 @@ +@@ -0,0 +1,2353 @@ +/* + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -176,9 +176,9 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -448,8 +448,12 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) ++ if (ext3_ext_check_header(eh)) { ++ /* don't free previously allocated path ++ * -- caller should take care */ ++ path = NULL; + goto err; ++ } + + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); @@ -506,8 +510,10 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c + +err: + printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } + return ERR_PTR(-EIO); +} + @@ -2644,7 +2650,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h =================================================================== --- linux-2.6.12-rc6.orig/include/linux/ext3_extents.h 2005-06-14 16:31:25.780917195 +0200 +++ linux-2.6.12-rc6/include/linux/ext3_extents.h 2005-06-14 16:31:25.932284381 +0200 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2742,7 +2748,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2843,15 +2849,13 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch index 0ee8d28..9e78214 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -3,7 +3,7 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== --- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2005-02-17 22:07:57.023609040 +0300 +++ linux-2.6.5-sles9/fs/ext3/extents.c 2005-02-23 01:02:37.396435640 +0300 -@@ -0,0 +1,2349 @@ +@@ -0,0 +1,2355 @@ +/* + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -177,9 +177,9 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -449,8 +449,12 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) ++ if (ext3_ext_check_header(eh)) { ++ /* don't free previously allocated path ++ * -- caller should take care */ ++ path = NULL; + goto err; ++ } + + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); @@ -507,8 +511,10 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c + +err: + printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } + return ERR_PTR(-EIO); +} + @@ -2634,7 +2640,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h =================================================================== --- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2005-02-17 22:07:57.023609040 +0300 +++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2005-02-23 01:02:37.416432600 +0300 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2732,7 +2738,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2833,15 +2839,13 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch index 56fe653..bd95c54 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch @@ -2,7 +2,7 @@ Index: linux-stage/fs/ext3/extents.c =================================================================== --- linux-stage.orig/fs/ext3/extents.c 2005-02-25 15:33:48.890198160 +0200 +++ linux-stage/fs/ext3/extents.c 2005-02-25 15:33:48.917194056 +0200 -@@ -0,0 +1,2347 @@ +@@ -0,0 +1,2353 @@ +/* + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -176,9 +176,9 @@ Index: linux-stage/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -448,8 +448,12 @@ Index: linux-stage/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) ++ if (ext3_ext_check_header(eh)) { ++ /* don't free previously allocated path ++ * -- caller should take care */ ++ path = NULL; + goto err; ++ } + + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); @@ -506,8 +510,10 @@ Index: linux-stage/fs/ext3/extents.c + +err: + printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } + return ERR_PTR(-EIO); +} + @@ -2629,7 +2635,7 @@ Index: linux-stage/include/linux/ext3_extents.h =================================================================== --- linux-stage.orig/include/linux/ext3_extents.h 2005-02-25 15:33:48.891198008 +0200 +++ linux-stage/include/linux/ext3_extents.h 2005-02-25 15:33:48.944189952 +0200 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2727,7 +2733,7 @@ Index: linux-stage/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2828,15 +2834,13 @@ Index: linux-stage/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index 1d8a4af..2a64875 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -2570,7 +2570,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c + int freed; + + sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC)) ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_old(handle, inode, block, count); + else { + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch index 0c2f445..70f4f8a 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch @@ -2565,7 +2565,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c + int freed; + + sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC)) ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_sb(handle, sb, block, count, &freed); + else + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch index 5ff3d3b..01e7387 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch @@ -2584,7 +2584,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c + int freed; + + sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC)) ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_sb(handle, sb, block, count, &freed); + else + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 6fad0a6..727f180 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -121,6 +121,57 @@ Description: write_conf for zeroconf mount queried LDAP incorrectly for client Details : LDAP apparently contains 'lustreName' attributes instead of 'name'. A simple remapping of the name is sufficient. +Severity : major +Frequency : rare (only with non-default dump_on_timeout debug enabled) +Bugzilla : 10397 +Description: waiting_locks_callback trips kernel BUG if client is evicted +Details : Running with the dump_on_timeout debug flag turned on makes + it possible that the waiting_locks_callback() can try to dump + the Lustre kernel debug logs from an interrupt handler. Defer + this log dumping to the expired_lock_main() thread. + +Severity : enhancement +Bugzilla : 10420 +Description: Support NFS exporting on 2.6 kernels. +Details : Implement non-rawops metadata methods for NFS server to use without + changing NFS server code. + +Severity : medium +Frequency : very rare (synthetic metadata workload only) +Bugzilla : 9974 +Description: two racing renames might cause an MDS thread to deadlock +Details : Running the "racer" program may cause one MDS thread to rename + a file from being the source of a rename to being the target of + a rename at exactly the same time that another thread is doing + so, and the second thread has already enqueued these locks after + doing a lookup of the target and is trying to relock them in + order. Ensure that we don't try to re-lock the same resource. + +Severity : major +Frequency : only very large systems with liblustre clients +Bugzilla : 7304 +Description: slow eviction of liblustre clients with the "evict_by_nid" RPC +Details : Use asynchronous set_info RPCs to send the "evict_by_nid" to + all OSTs in parallel. This allows the eviction of stale liblustre + clients to proceed much faster than if they were done in series, + and also offers similar improvements for other set_info RPCs. + +Severity : minor +Bugzilla : 10265 +Description: excessive CPU usage during initial read phase on client +Details : During the initial read phase on a client, it would agressively + retry readahead on the file, consuming too much CPU and impacting + performance (since 1.4.5.8). Improve the readahead algorithm + to avoid this, and also improve some other common cases (read + of small files in particular, where "small" is files smaller than + /proc/fs/lustre/llite/*/max_read_ahead_whole_mb, 2MB by default). + +Severity : minor +Bugzilla : 10450 +Description: MDS crash when receiving packet with unknown intent. +Details : Do not LBUG in unknown intent case, just return -EFAULT + + ------------------------------------------------------------------------------ 02-14-2006 Cluster File Systems, Inc. diff --git a/lustre/autoMakefile.am b/lustre/autoMakefile.am index 272cb89d..c9e56a5 100644 --- a/lustre/autoMakefile.am +++ b/lustre/autoMakefile.am @@ -51,8 +51,10 @@ endif lvfs-sources: $(MAKE) sources -C lvfs +obdclass-sources: + $(MAKE) sources -C obdclass -sources: $(LDISKFS) lvfs-sources lustre_build_version +sources: $(LDISKFS) lvfs-sources obdclass-sources lustre_build_version all-recursive: lustre_build_version diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index a1fe35d..47a58f5 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -38,7 +38,7 @@ AC_SUBST(pymoddir) # AC_DEFUN([LC_TARGET_SUPPORTED], [case $target_os in - linux*) + linux* | darwin*) $1 ;; *) @@ -602,7 +602,7 @@ AC_DEFUN([LC_CONFIGURE], [LC_CONFIG_OBD_BUFFER_SIZE # include/liblustre.h -AC_CHECK_HEADERS([asm/page.h sys/user.h stdint.h]) +AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h]) # include/lustre/lustre_user.h # See note there re: __ASM_X86_64_PROCESSOR_H @@ -621,6 +621,10 @@ AC_CHECK_FUNCS([inet_ntoa]) # llite/xattr.c AC_CHECK_HEADERS([linux/xattr_acl.h]) +# use universal lustre headers +# i.e: include/obd.h instead of include/linux/obd.h +AC_CHECK_FILE($PWD/lustre/include/obd.h, [AC_DEFINE(UNIV_LUSTRE_HEADERS, 1, [Use universal lustre headers])]) + # Super safe df AC_ARG_ENABLE([mindf], AC_HELP_STRING([--enable-mindf], @@ -661,8 +665,8 @@ lustre/autoconf/Makefile lustre/conf/Makefile lustre/doc/Makefile lustre/include/Makefile +lustre/include/lustre_ver.h lustre/include/linux/Makefile -lustre/include/linux/lustre_ver.h lustre/include/lustre/Makefile lustre/kernel_patches/targets/2.6-suse.target lustre/kernel_patches/targets/2.6-vanilla.target @@ -701,6 +705,7 @@ lustre/fld/Makefile lustre/fld/autoMakefile lustre/obdclass/Makefile lustre/obdclass/autoMakefile +lustre/obdclass/linux/Makefile lustre/obdecho/Makefile lustre/obdecho/autoMakefile lustre/obdfilter/Makefile @@ -725,4 +730,10 @@ lustre/tests/Makefile lustre/utils/Lustre/Makefile lustre/utils/Makefile ]) +case $lb_target_os in + darwin) + AC_CONFIG_FILES([ lustre/obdclass/darwin/Makefile ]) + ;; +esac + ]) diff --git a/lustre/autoconf/lustre-version.ac b/lustre/autoconf/lustre-version.ac index 85b2de6..5a2d2a0 100644 --- a/lustre/autoconf/lustre-version.ac +++ b/lustre/autoconf/lustre-version.ac @@ -1,7 +1,7 @@ m4_define([LUSTRE_MAJOR],[1]) -m4_define([LUSTRE_MINOR],[4]) -m4_define([LUSTRE_PATCH],[6]) -m4_define([LUSTRE_FIX],[90]) +m4_define([LUSTRE_MINOR],[5]) +m4_define([LUSTRE_PATCH],[1]) +m4_define([LUSTRE_FIX],[0]) dnl # 288 stands for 0.0.1.32 , next version with fixes is ok, but next after dnl # next release candidate/beta would spill this warning already. diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c index 95bb99c..e466501 100644 --- a/lustre/cmm/cmm_device.c +++ b/lustre/cmm/cmm_device.c @@ -33,15 +33,13 @@ #include -#include -#include - +#include +#include +#include +#include #include "cmm_internal.h" #include "mdc_internal.h" -#include -#include - static struct obd_ops cmm_obd_device_ops = { .o_owner = THIS_MODULE }; diff --git a/lustre/cmm/cmm_internal.h b/lustre/cmm/cmm_internal.h index bceb019..2f5d630 100644 --- a/lustre/cmm/cmm_internal.h +++ b/lustre/cmm/cmm_internal.h @@ -25,8 +25,8 @@ #if defined(__KERNEL__) -#include -#include +#include +#include struct cmm_device { struct md_device cmm_md_dev; diff --git a/lustre/cmm/mdc_device.c b/lustre/cmm/mdc_device.c index 8e6b364..805546c 100644 --- a/lustre/cmm/mdc_device.c +++ b/lustre/cmm/mdc_device.c @@ -31,14 +31,12 @@ #endif #define DEBUG_SUBSYSTEM S_MDS -#include -#include - +#include +#include +#include +#include #include "mdc_internal.h" -#include -#include - static struct lu_device_operations mdc_lu_ops; static inline int lu_device_is_mdc(struct lu_device *ld) diff --git a/lustre/cmm/mdc_internal.h b/lustre/cmm/mdc_internal.h index 668fed1..01b360f 100644 --- a/lustre/cmm/mdc_internal.h +++ b/lustre/cmm/mdc_internal.h @@ -25,8 +25,8 @@ #if defined(__KERNEL__) -#include -#include +#include +#include struct mdc_cli_desc { struct obd_connect_data cl_conn_data; diff --git a/lustre/cmm/mdc_object.c b/lustre/cmm/mdc_object.c index c3a73b3..a0f35c7 100644 --- a/lustre/cmm/mdc_object.c +++ b/lustre/cmm/mdc_object.c @@ -31,11 +31,11 @@ #endif #define DEBUG_SUBSYSTEM S_MDS -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "mdc_internal.h" static struct md_object_operations mdc_mo_ops; diff --git a/lustre/fid/fid_misc.c b/lustre/fid/fid_misc.c index 5d3bfa67..0488b30 100644 --- a/lustre/fid/fid_misc.c +++ b/lustre/fid/fid_misc.c @@ -27,10 +27,10 @@ */ #include +#include -#include -#include -#include +#include +#include void fid_to_le(struct lu_fid *dst, const struct lu_fid *src) { diff --git a/lustre/fid/fid_seq.c b/lustre/fid/fid_seq.c index 6e1e59f..3f5706e 100644 --- a/lustre/fid/fid_seq.c +++ b/lustre/fid/fid_seq.c @@ -28,9 +28,9 @@ #include -#include -#include -#include +#include +#include +#include /* sequence manager initialization/finalization stuff */ struct lu_seq_mgr *seq_mgr_init(struct lu_seq_mgr_ops *ops, diff --git a/lustre/fld/Makefile.in b/lustre/fld/Makefile.in index ff381b3..0283a4a 100644 --- a/lustre/fld/Makefile.in +++ b/lustre/fld/Makefile.in @@ -1,5 +1,6 @@ MODULES := fld fld-objs := fld_handle.o fld_iam.o -EXTRA_PRE_CFLAGS := -I@LUSTRE@/ldiskfs/ +EXTRA_PRE_CFLAGS := -I@LUSTRE@ -I@LUSTRE@/ldiskfs + @INCLUDE_RULES@ diff --git a/lustre/fld/fld_handle.c b/lustre/fld/fld_handle.c index cf74f84..7b395af 100644 --- a/lustre/fld/fld_handle.c +++ b/lustre/fld/fld_handle.c @@ -30,18 +30,18 @@ #define DEBUG_SUBSYSTEM S_LLITE #include - -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include #include "fld_internal.h" static int fld_handle(struct lu_context *ctx, diff --git a/lustre/fld/fld_iam.c b/lustre/fld/fld_iam.c index ec51a8b..d69c6ed 100644 --- a/lustre/fld/fld_iam.c +++ b/lustre/fld/fld_iam.c @@ -30,18 +30,18 @@ #define DEBUG_SUBSYSTEM S_LLITE #include - -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include #include #include "fld_internal.h" @@ -96,9 +96,9 @@ int fld_handle_delete(struct lu_context *ctx, struct fld *fld, int fld_handle_lookup(struct lu_context *ctx, struct fld *fld, fidseq_t seq_num, mdsno_t *mds_num) { +#if 0 int size; -#if 0 size = fld_param.id_rec_size; return fld->fld_dt->dd_ops->dt_iam_lookup(&lctx, fld->fld_dt, fld->fld_info->fi_container, diff --git a/lustre/include/.cvsignore b/lustre/include/.cvsignore index a8dd680..e2f4176 100644 --- a/lustre/include/.cvsignore +++ b/lustre/include/.cvsignore @@ -11,3 +11,4 @@ Makefile Makefile.in .deps TAGS +lustre_ver.h diff --git a/lustre/include/Makefile.am b/lustre/include/Makefile.am index 00c4db7..794ae12 100644 --- a/lustre/include/Makefile.am +++ b/lustre/include/Makefile.am @@ -5,5 +5,15 @@ # See the file COPYING in this distribution SUBDIRS = linux lustre -EXTRA_DIST = ioctl.h liblustre.h + +EXTRA_DIST = ioctl.h liblustre.h lprocfs_status.h lustre_cfg.h \ + lustre_commit_confd.h lustre_debug.h lustre_dlm.h \ + lustre_export.h lustre_fsfilt.h lustre_ha.h \ + lustre_handles.h lustre_import.h lustre_lib.h \ + lustre_lite.h lustre_log.h lustre_mds.h lustre_mdc.h \ + lustre_net.h lustre_quota.h lustre_ucache.h lvfs.h \ + obd_cache.h obd_class.h obd_echo.h obd.h obd_lov.h \ + obd_ost.h obd_support.h lustre_ver.h lu_object.h \ + md_object.h dt_object.h lustre_param.h lustre_disk.h \ + lustre_fid.h lustre_req_layout.h diff --git a/lustre/include/darwin/lprocfs_status.h b/lustre/include/darwin/lprocfs_status.h new file mode 100644 index 0000000..dc17b9f --- /dev/null +++ b/lustre/include/darwin/lprocfs_status.h @@ -0,0 +1,57 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Top level header file for LProc SNMP + * Author: Hariharan Thantry thantry@users.sourceforge.net + */ +#ifndef _DARWIN_LPROCFS_SNMP_H +#define _DARWIN_LPROCFS_SNMP_H + +#ifndef _LPROCFS_SNMP_H +#error Do not #include this file directly. #include instead +#endif + +#ifdef LPROCFS +#undef LPROCFS +#endif + +#include +#define kstatfs statfs + +/* + * XXX nikita: temporary! Stubs for naked procfs calls made by Lustre + * code. Should be replaced with our own procfs-like API. + */ + +static inline cfs_proc_dir_entry_t *proc_symlink(const char *name, + cfs_proc_dir_entry_t *parent, + const char *dest) +{ + return NULL; +} + +static inline cfs_proc_dir_entry_t *create_proc_entry(const char *name, + mode_t mode, + cfs_proc_dir_entry_t *p) +{ + return NULL; +} + +#endif /* XNU_LPROCFS_SNMP_H */ diff --git a/lustre/include/darwin/lustre_compat.h b/lustre/include/darwin/lustre_compat.h new file mode 100644 index 0000000..d11c8d6 --- /dev/null +++ b/lustre/include/darwin/lustre_compat.h @@ -0,0 +1,75 @@ +#ifndef __DARWIN_LUSTRE_COMPAT_H__ +#define __DARWIN_LUSTRE_COMPAT_H__ + +#include + +#ifdef __KERNEL__ + +#ifndef HLIST_HEAD +#define hlist_entry list_entry +#define hlist_head list_head +#define hlist_node list_head +#define hlist_del_init list_del_init +#define hlist_add_head list_add +#define hlist_for_each_safe list_for_each_safe + +/* XXX */ +#define LOOKUP_COBD 4096 +#define CURRENT_SECONDS cfs_unix_seconds() + +#endif + +struct module; +static inline int try_module_get(struct module *module) +{ + return 1; +} + +static inline void module_put(struct module *module) +{ +} + +#define THIS_MODULE NULL + +static inline void lustre_daemonize_helper(void) +{ + return; +} + +static inline int32_t ext2_set_bit(int nr, void *a) +{ + int32_t old = test_bit(nr, a); + set_bit(nr, a); + return old; +} + +static inline int32_t ext2_clear_bit(int nr, void *a) +{ + int32_t old = test_bit(nr, a); + clear_bit(nr, a); + return old; +} + +struct nameidata; + +#if !defined(__DARWIN8__) +static inline int ll_path_lookup(const char *path, unsigned int flags, struct nameidata *nd) +{ + int ret = 0; + NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)path, current_proc()); + if (ret = namei(nd)){ + CERROR("ll_path_lookup fail!\n"); + } + return ret; +} +#endif + +#define to_kdev_t(dev) (dev) +#define kdev_t_to_nr(dev) (dev) +#define val_to_kdev(dev) (dev) + +#define ext2_test_bit test_bit + +#endif /* __KERNEL__ */ + +#endif diff --git a/lustre/include/darwin/lustre_debug.h b/lustre/include/darwin/lustre_debug.h new file mode 100644 index 0000000..b2b72f6 --- /dev/null +++ b/lustre/include/darwin/lustre_debug.h @@ -0,0 +1,36 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _DARWIN_LUSTRE_DEBUG_H +#define _DARWIN_LUSTRE_DEBUG_H + +#ifndef _LUSTRE_DEBUG_H +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) do {} while (0) +#else +#define LL_CDEBUG_PAGE(mask, page, fmt, arg...) do {} while (0) +#endif + +#endif diff --git a/lustre/include/darwin/lustre_dlm.h b/lustre/include/darwin/lustre_dlm.h new file mode 100644 index 0000000..98587f3 --- /dev/null +++ b/lustre/include/darwin/lustre_dlm.h @@ -0,0 +1,25 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * (visit-tags-table FILE) + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef _DARWIN_LUSTRE_DLM_H__ +#define _DARWIN_LUSTRE_DLM_H__ + +#ifndef _LUSTRE_DLM_H__ +#error Do not #include this file directly. #include instead +#endif + +#define IT_OPEN 0x0001 +#define IT_CREAT 0x0002 +#define IT_READDIR 0x0004 +#define IT_GETATTR 0x0008 +#define IT_LOOKUP 0x0010 +#define IT_UNLINK 0x0020 +#define IT_GETXATTR 0x0040 +#define IT_EXEC 0x0080 +#define IT_PIN 0x0100 +#define IT_CHDIR 0x0200 + + +#endif diff --git a/lustre/include/darwin/lustre_fsfilt.h b/lustre/include/darwin/lustre_fsfilt.h new file mode 100644 index 0000000..e3d9a7e --- /dev/null +++ b/lustre/include/darwin/lustre_fsfilt.h @@ -0,0 +1,32 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Filesystem interface helper. + * + */ + +#ifndef _DARWIN_LUSTRE_FSFILT_H +#define _DARWIN_LUSTRE_FSFILT_H + +#ifndef _LUSTRE_FSFILT_H +#error Do not #include this file directly. #include instead +#endif + +#endif diff --git a/lustre/include/darwin/lustre_handles.h b/lustre/include/darwin/lustre_handles.h new file mode 100644 index 0000000..341a25b --- /dev/null +++ b/lustre/include/darwin/lustre_handles.h @@ -0,0 +1,12 @@ +#ifndef __DARWIN_LUSTRE_HANDLES_H_ +#define __DARWIN_LUSTR_HANDLES_H_ + +#ifndef __LUSTRE_HANDLES_H_ +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +#endif + diff --git a/lustre/include/darwin/lustre_lib.h b/lustre/include/darwin/lustre_lib.h new file mode 100644 index 0000000..5adadae --- /dev/null +++ b/lustre/include/darwin/lustre_lib.h @@ -0,0 +1,76 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic Lustre library routines. + * + */ + +#ifndef _DARWIN_LUSTRE_LIB_H +#define _DARWIN_LUSTRE_LIB_H + +#ifndef _LUSTRE_LIB_H +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include + +#ifndef LP_POISON +#define LI_POISON ((int)0x5a5a5a5a) +#define LL_POISON ((long)0x5a5a5a5a) +#define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif + +#ifndef LPU64 +#define LPU64 "%llu" +#define LPD64 "%lld" +#define LPX64 "%llx" +#endif + +struct obd_ioctl_data; +#define OBD_IOC_DATA_TYPE struct obd_ioctl_data + +#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ + sigmask(SIGTERM) | sigmask(SIGQUIT) | \ + sigmask(SIGALRM) | sigmask(SIGHUP)) + +#ifdef __KERNEL__ +static inline sigset_t l_w_e_set_sigs(sigset_t sigs) +{ + sigset_t old = 0; + + /* XXX Liang: how to change sigmask in Darwin8.x? + * there is syscall like pthread_sigmask() but we cannot + * use in kernel */ +#if !defined(__DARWIN8__) + struct proc *p = current_proc(); + extern int block_procsigmask(struct proc *p, int bit); + old = cfs_current()->uu_sigmask; + block_procsigmask(p, ~sigs); +#endif + + return old; +} +#endif + +#endif + + diff --git a/lustre/include/darwin/lustre_lite.h b/lustre/include/darwin/lustre_lite.h new file mode 100644 index 0000000..2fcfb96 --- /dev/null +++ b/lustre/include/darwin/lustre_lite.h @@ -0,0 +1,86 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * lustre lite cluster file system + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + * + * Copyright (C) 2002 Cluster File Systems, Inc. + */ + + + +#ifndef _DARWIN_LL_H +#define _DARWIN_LL_H + +#ifndef _LL_H +#error Do not #include this file directly. #include instead +#endif + +#include + +#ifdef __KERNEL__ + +struct iattr { + unsigned int ia_valid; + umode_t ia_mode; + uid_t ia_uid; + gid_t ia_gid; + loff_t ia_size; + time_t ia_atime; + time_t ia_mtime; + time_t ia_ctime; + unsigned int ia_attr_flags; +}; + +/* + * intent data-structured. For Linux they are defined in + * linux/include/linux/dcache.h + */ +#define IT_OPEN 0x0001 +#define IT_CREAT 0x0002 +#define IT_READDIR 0x0004 +#define IT_GETATTR 0x0008 +#define IT_LOOKUP 0x0010 +#define IT_UNLINK 0x0020 +#define IT_GETXATTR 0x0040 +#define IT_EXEC 0x0080 +#define IT_PIN 0x0100 + +#define IT_FL_LOCKED 0x0001 +#define IT_FL_FOLLOWED 0x0002 /* set by vfs_follow_link */ + +#define INTENT_MAGIC 0x19620323 /* Happy birthday! */ + +struct lustre_intent_data { + int it_disposition; + int it_status; + __u64 it_lock_handle; + void *it_data; + int it_lock_mode; +}; + +/* + * Liang: We keep the old lookup_intent struct in XNU + * to avoid unnecessary allocate/free. + */ +#define LUSTRE_IT(it) ((struct lustre_intent_data *)(&(it)->d.lustre)) + +struct lookup_intent { + int it_magic; + void (*it_op_release)(struct lookup_intent *); + int it_op; + int it_flags; + int it_create_mode; + union { + struct lustre_intent_data lustre; + void *fs_data; + } d; +}; + +struct super_operations{ +}; +#endif + +#endif diff --git a/lustre/include/darwin/lustre_log.h b/lustre/include/darwin/lustre_log.h new file mode 100644 index 0000000..d777465 --- /dev/null +++ b/lustre/include/darwin/lustre_log.h @@ -0,0 +1,11 @@ +#ifndef _DARWIN_LUSTRE_LOG_H +#define _DARWIN_LUSTRE_LOG_H + +#ifndef _LUSTRE_LOG_H +#error Do not #include this file directly. #include instead +#endif + +#undef LUSTRE_LOG_SERVER +#include + +#endif diff --git a/lustre/include/darwin/lustre_mds.h b/lustre/include/darwin/lustre_mds.h new file mode 100644 index 0000000..7fd8549 --- /dev/null +++ b/lustre/include/darwin/lustre_mds.h @@ -0,0 +1,32 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DARWIN_LUSTRE_MDS_H +#define _DARWIN_LUSTRE_MDS_H + +#ifndef _LUSTRE_MDS_H +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +#endif diff --git a/lustre/include/darwin/lustre_net.h b/lustre/include/darwin/lustre_net.h new file mode 100644 index 0000000..f028545 --- /dev/null +++ b/lustre/include/darwin/lustre_net.h @@ -0,0 +1,34 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _DARWIN_LUSTRE_NET_H +#define _DARWIN_LUSTRE_NET_H + +#ifndef _LUSTRE_NET_H +#error Do not #include this file directly. #include instead +#endif + +#include + +#undef WITH_GROUP_INFO + +#endif diff --git a/lustre/include/darwin/lustre_quota.h b/lustre/include/darwin/lustre_quota.h new file mode 100644 index 0000000..5d0864f --- /dev/null +++ b/lustre/include/darwin/lustre_quota.h @@ -0,0 +1,16 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _DARWIN_LUSTRE_QUOTA_H +#define _DARWIN_LUSTRE_QUOTA_H + +#ifndef _LUSTRE_QUOTA_H +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#endif + + +#endif /* _LUSTRE_QUOTA_H */ diff --git a/lustre/include/darwin/lustre_types.h b/lustre/include/darwin/lustre_types.h new file mode 100644 index 0000000..651cf2d --- /dev/null +++ b/lustre/include/darwin/lustre_types.h @@ -0,0 +1,7 @@ +#ifndef _LUSTRE_DARWIN_TYPES_H +#define _LUSTRE_DARWIN_TYPES_H + +#include +#include + +#endif diff --git a/lustre/include/darwin/lustre_user.h b/lustre/include/darwin/lustre_user.h new file mode 100644 index 0000000..a495e60 --- /dev/null +++ b/lustre/include/darwin/lustre_user.h @@ -0,0 +1,47 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * This file is part of Lustre, http://www.lustre.org + * + * Lustre public user-space interface definitions. + */ + +#ifndef _DARWIN_LUSTRE_USER_H +#define _DARWIN_LUSTRE_USER_H + +#include + +#ifndef __KERNEL__ +/* for llmount */ +# define _GNU_SOURCE +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif + +typedef struct stat lstat_t; +#define HAVE_LOV_USER_MDS_DATA + +#ifndef LPU64 +#if (BITS_PER_LONG == 32 || __WORDSIZE == 32) +# define LPU64 "%llu" +# define LPD64 "%lld" +# define LPX64 "%#llx" +# define LPSZ "%u" +# define LPSSZ "%d" +#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) +# define LPU64 "%lu" +# define LPD64 "%ld" +# define LPX64 "%#lx" +# define LPSZ "%lu" +# define LPSSZ "%ld" +#endif +#endif /* !LPU64 */ + +#endif /* _LUSTRE_USER_H */ diff --git a/lustre/include/darwin/lvfs.h b/lustre/include/darwin/lvfs.h new file mode 100644 index 0000000..d271854 --- /dev/null +++ b/lustre/include/darwin/lvfs.h @@ -0,0 +1,24 @@ +#ifndef __DARWIN_LVFS_H__ +#define __DARWIN_LVFS_H__ + +#ifndef __LVFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef LLOG_LVFS +#undef LLOG_LVFS +#endif + +struct lvfs_ucred { + __u32 luc_fsuid; + __u32 luc_fsgid; + __u32 luc_cap; + __u32 luc_uid; + __u32 luc_umask; +}; + +struct lvfs_run_ctxt { + int pid; +}; + +#endif diff --git a/lustre/include/darwin/obd.h b/lustre/include/darwin/obd.h new file mode 100644 index 0000000..175758e --- /dev/null +++ b/lustre/include/darwin/obd.h @@ -0,0 +1,39 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This code is issued under the GNU General Public License. + * See the file COPYING in this distribution + */ + +#ifndef __DARWIN_OBD_H +#define __DARWIN_OBD_H + +#ifndef __OBD_H +#error Do not #include this file directly. #include instead +#endif + +#include + +typedef struct semaphore client_obd_lock_t; + +static inline void client_obd_list_lock_init(client_obd_lock_t *lock) +{ + sema_init(lock, 1); +} + +static inline void client_obd_list_lock_done(client_obd_lock_t *lock) +{} + +static inline void client_obd_list_lock(client_obd_lock_t *lock) +{ + mutex_down(lock); +} + +static inline void client_obd_list_unlock(client_obd_lock_t *lock) +{ + mutex_up(lock); +} + +#endif /* __DARWIN_OBD_H */ diff --git a/lustre/include/darwin/obd_class.h b/lustre/include/darwin/obd_class.h new file mode 100644 index 0000000..833da61 --- /dev/null +++ b/lustre/include/darwin/obd_class.h @@ -0,0 +1,34 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __DARWIN_CLASS_OBD_H +#define __DARWIN_CLASS_OBD_H + +#ifndef __CLASS_OBD_H +#error Do not #include this file directly. #include instead +#endif + +#if !defined(__KERNEL__) +#define to_kdev_t(dev) (dev) +#endif + +#endif /* __XNU_OBD_CLASS_H */ diff --git a/lustre/include/darwin/obd_support.h b/lustre/include/darwin/obd_support.h new file mode 100644 index 0000000..8ff7200 --- /dev/null +++ b/lustre/include/darwin/obd_support.h @@ -0,0 +1,58 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _DARWIN_OBD_SUPPORT +#define _DARWIN_OBD_SUPPORT + +#ifndef _OBD_SUPPORT +#error Do not #include this file directly. #include instead +#endif + +#include + +#define CRCPOLY_LE 0xedb88320 +/** + * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 + * @crc - seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p - pointer to buffer over which CRC is run + * @len - length of buffer @p + */ +static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} + +#define OBD_SLEEP_ON(wq) sleep_on(wq) + +/* for obd_class.h */ +# ifndef ERR_PTR +# define ERR_PTR(a) ((void *)(a)) +# endif + +#endif diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h new file mode 100644 index 0000000..95d0251 --- /dev/null +++ b/lustre/include/dt_object.h @@ -0,0 +1,329 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LUSTRE_DT_OBJECT_H +#define __LUSTRE_DT_OBJECT_H + +/* + * Sub-class of lu_object with methods common for "data" objects in OST stack. + * + * Data objects behave like regular files: you can read/write them, get and + * set their attributes. Implementation of dt interface is supposed to + * implement some form of garbage collection, normally reference counting + * (nlink) based one. + * + * Examples: osd (lustre/osd) is an implementation of dt interface. + */ + + +/* + * super-class definitions. + */ +#include + +#include +#include + +struct seq_file; +struct proc_dir_entry; +struct lustre_cfg; + +struct thandle; +struct txn_param; +struct dt_device; +struct dt_object; + +/* + * Lock mode for DT objects. + */ +enum dt_lock_mode { + DT_WRITE_LOCK = 1, + DT_READ_LOCK = 2, +}; + +/* + * Operations on dt device. + */ +struct dt_device_operations { + /* + * Method for getting/setting device wide back stored config data, + * like last used meta-sequence, etc. + * + * XXX this is ioctl()-like interface we want to get rid of. + */ + int (*dt_config) (struct lu_context *ctx, + struct dt_device *dev, const char *name, + void *buf, int size, int mode); + /* + * Return device-wide statistics. + */ + int (*dt_statfs)(struct lu_context *ctx, + struct dt_device *dev, struct kstatfs *sfs); + /* + * Start transaction, described by @param. + */ + struct thandle *(*dt_trans_start)(struct lu_context *ctx, + struct dt_device *dev, + struct txn_param *param); + /* + * Finish previously started transaction. + */ + void (*dt_trans_stop)(struct lu_context *ctx, struct thandle *th); + /* + * Return fid of root index object. + */ + int (*dt_root_get)(struct lu_context *ctx, + struct dt_device *dev, struct lu_fid *f); +}; + +/* + * Per-dt-object operations. + */ +struct dt_object_operations { + void (*do_object_lock)(struct lu_context *ctx, + struct dt_object *dt, enum dt_lock_mode mode); + void (*do_object_unlock)(struct lu_context *ctx, + struct dt_object *dt, enum dt_lock_mode mode); + /* + * Note: following ->do_{x,}attr_{set,get}() operations are very + * similar to ->moo_{x,}attr_{set,get}() operations in struct + * md_object_operations (see md_object.h). These operations are not in + * lu_object_operations, because ->do_{x,}attr_set() versions take + * transaction handle as an argument (this transaction is started by + * caller). We might factor ->do_{x,}attr_get() into + * lu_object_operations, but that would break existing symmetry. + */ + + /* + * Return standard attributes. + * + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*do_attr_get)(struct lu_context *ctxt, struct dt_object *dt, + struct lu_attr *attr); + /* + * Set standard attributes. + * + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*do_attr_set)(struct lu_context *ctxt, struct dt_object *dt, + struct lu_attr *attr, struct thandle *handle); + /* + * Return a value of an extended attribute. + * + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*do_xattr_get)(struct lu_context *ctxt, struct dt_object *dt, + void *buf, int buf_len, const char *name); + /* + * Set value of an extended attribute. + * + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*do_xattr_set)(struct lu_context *ctxt, struct dt_object *dt, + void *buf, int buf_len, const char *name, + struct thandle *handle); + /* + * Create new object on this device. + * + * precondition: !lu_object_exists(ctxt, &dt->do_lu); + * postcondition: ergo(result == 0, lu_object_exists(ctxt, &dt->do_lu)); + */ + int (*do_object_create)(struct lu_context *ctxt, struct dt_object *dt, + struct lu_attr *attr, struct thandle *th); + /* + * Destroy existing object. + * + * precondition: lu_object_exists(ctxt, &dt->do_lu); + * postcondition: ergo(result == 0, + * !lu_object_exists(ctxt, &dt->do_lu)); + */ + int (*do_object_destroy)(struct lu_context *ctxt, + struct dt_object *dt, struct thandle *th); +}; + +/* + * Per-dt-object operations on "file body". + */ +struct dt_body_operations { + /* + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*dbo_read)(struct lu_context *ctxt, struct dt_object *dt, ...); + /* + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*dbo_write)(struct lu_context *ctxt, struct dt_object *dt, ...); + /* + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*dbo_truncate)(struct lu_context *ctxt, struct dt_object *dt, ...); +}; + +/* + * Incomplete type of index record. + */ +struct dt_rec; + +/* + * Incomplete type of index key. + */ +struct dt_key; + +struct dt_index_features { + /* required feature flags from enum dt_index_flags */ + __u32 dif_flags; + /* minimal required key size */ + size_t dif_keysize_min; + /* maximal required key size, 0 if no limit */ + size_t dif_keysize_max; + /* minimal required record size */ + size_t dif_recsize_min; + /* maximal required record size, 0 if no limit */ + size_t dif_recsize_max; +}; + +enum dt_index_flags { + /* index supports variable sized keys */ + DT_IND_VARKEY = 1 << 0, + /* index supports variable sized records */ + DT_IND_VARREC = 1 << 1, + /* index can be modified */ + DT_IND_UPDATE = 1 << 2, + /* index supports records with non-unique (duplicate) keys */ + DT_IND_NONUNQ = 1 << 3 +}; + +/* + * Features, required from index to support file system directories (mapping + * names to fids). + */ +extern const struct dt_index_features dt_directory_features; + +/* + * Per-dt-object operations on object as index. + */ +struct dt_index_operations { + /* + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*dio_lookup)(struct lu_context *ctxt, struct dt_object *dt, + struct dt_rec *rec, const struct dt_key *key); + /* + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*dio_insert)(struct lu_context *ctxt, struct dt_object *dt, + const struct dt_rec *rec, const struct dt_key *key, + struct thandle *handle); + /* + * precondition: lu_object_exists(ctxt, &dt->do_lu); + */ + int (*dio_delete)(struct lu_context *ctxt, struct dt_object *dt, + const struct dt_rec *rec, const struct dt_key *key, + struct thandle *handle); + + /* + * Features probing. Returns 1 if this index supports all features in + * @feat, -ve on error, 0 otherwise. + */ + int (*dio_probe)(struct lu_context *ctxt, struct dt_object *dt, + const struct dt_index_features *feat); +}; + +struct dt_device { + struct lu_device dd_lu_dev; + struct dt_device_operations *dd_ops; + /* + * List of dt_txn_callback (see below). This is not protected in any + * way, because callbacks are supposed to be added/deleted only during + * single-threaded start-up shut-down procedures. + */ + struct list_head dd_txn_callbacks; +}; + +int dt_device_init(struct dt_device *dev, struct lu_device_type *t); +void dt_device_fini(struct dt_device *dev); + +static inline int lu_device_is_dt(const struct lu_device *d) +{ + return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_DT); +} + +static inline struct dt_device * lu2dt_dev(struct lu_device *l) +{ + LASSERT(lu_device_is_dt(l)); + return container_of0(l, struct dt_device, dd_lu_dev); +} + +struct dt_object { + struct lu_object do_lu; + struct dt_object_operations *do_ops; + struct dt_body_operations *do_body_ops; + struct dt_index_operations *do_index_ops; +}; + +int dt_object_init(struct dt_object *obj, + struct lu_object_header *h, struct lu_device *d); + +void dt_object_fini(struct dt_object *obj); + +struct txn_param { + unsigned int tp_credits; +}; + +struct thandle { + struct dt_device *th_dev; +}; + +/* + * Transaction call-backs. + * + * These are invoked by osd (or underlying transaction engine) when + * transaction changes state. + * + * Call-backs are used by upper layers to modify transaction parameters and to + * perform some actions on for each transaction state transition. Typical + * example is mdt registering call-back to write into last-received file + * before each transaction commit. + */ +struct dt_txn_callback { + int (*dtc_txn_start)(struct lu_context *ctx, struct dt_device *dev, + struct txn_param *param, void *cookie); + int (*dtc_txn_stop)(struct lu_context *ctx, struct dt_device *dev, + struct thandle *txn, void *cookie); + int (*dtc_txn_commit)(struct lu_context *ctx, struct dt_device *dev, + struct thandle *txn, void *cookie); + void *dtc_cookie; + struct list_head dtc_linkage; +}; + +void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb); +void dt_txn_callback_del(struct dt_device *dev, struct dt_txn_callback *cb); + +int dt_txn_hook_start(struct lu_context *ctx, + struct dt_device *dev, struct txn_param *param); +int dt_txn_hook_stop(struct lu_context *ctx, + struct dt_device *dev, struct thandle *txn); +int dt_txn_hook_commit(struct lu_context *ctx, + struct dt_device *dev, struct thandle *txn); + +#endif /* __LUSTRE_DT_OBJECT_H */ diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h index b0f21cb..2bc0dcd 100644 --- a/lustre/include/liblustre.h +++ b/lustre/include/liblustre.h @@ -57,8 +57,7 @@ #include #include #include -/* Hack for mkfs_lustre.c */ -#ifndef NO_SYS_VFS +#ifdef HAVE_SYS_VFS_H # include #endif #include @@ -403,9 +402,9 @@ static inline int kmem_cache_destroy(kmem_cache_t *a) #define kmap(page) (page)->addr #define kunmap(a) do {} while (0) -static inline struct page *alloc_pages(int mask, unsigned long order) +static inline cfs_page_t *alloc_pages(int mask, unsigned long order) { - struct page *pg = malloc(sizeof(*pg)); + cfs_page_t *pg = malloc(sizeof(*pg)); if (!pg) return NULL; @@ -424,7 +423,7 @@ static inline struct page *alloc_pages(int mask, unsigned long order) #define alloc_page(mask) alloc_pages((mask), 0) -static inline void __free_pages(struct page *pg, int what) +static inline void __free_pages(cfs_page_t *pg, int what) { #if 0 //#ifdef MAP_ANONYMOUS munmap(pg->addr, PAGE_SIZE); @@ -437,9 +436,9 @@ static inline void __free_pages(struct page *pg, int what) #define __free_page(page) __free_pages((page), 0) #define free_page(page) __free_page(page) -static inline struct page* __grab_cache_page(unsigned long index) +static inline cfs_page_t* __grab_cache_page(unsigned long index) { - struct page *pg = alloc_pages(0, 0); + cfs_page_t *pg = alloc_pages(0, 0); if (pg) pg->index = index; @@ -485,6 +484,7 @@ struct iattr { time_t ia_ctime; unsigned int ia_attr_flags; }; +#define ll_iattr_struct iattr #define IT_OPEN 0x0001 #define IT_CREAT 0x0002 @@ -549,7 +549,9 @@ struct semaphore { /* use the macro's argument to avoid unused warnings */ #define down(a) do { (void)a; } while (0) +#define mutex_down(a) down(a) #define up(a) do { (void)a; } while (0) +#define mutex_up(a) up(a) #define down_read(a) do { (void)a; } while (0) #define up_read(a) do { (void)a; } while (0) #define down_write(a) do { (void)a; } while (0) @@ -563,6 +565,7 @@ static inline void init_MUTEX (struct semaphore *sem) sema_init(sem, 1); } +#define init_mutex(s) init_MUTEX(s) typedef struct { struct list_head sleepers; @@ -590,6 +593,11 @@ struct task_struct { __u32 cap_effective; }; +typedef struct task_struct cfs_task_t; +#define cfs_current() current +#define cfs_curproc_pid() (current->pid) +#define cfs_curproc_comm() (current->comm) + extern struct task_struct *current; int in_group_p(gid_t gid); static inline int capable(int cap) @@ -748,7 +756,7 @@ static inline void libcfs_run_lbug_upcall(char *file, const char *fn, /* completion */ struct completion { unsigned int done; - wait_queue_head_t wait; + cfs_waitq_t wait; }; #define COMPLETION_INITIALIZER(work) \ @@ -782,13 +790,13 @@ struct nfs_lock_info { void *host; }; -struct file_lock { +typedef struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ struct list_head fl_link; /* doubly linked list of all locks */ struct list_head fl_block; /* circular list of blocked processes */ void *fl_owner; unsigned int fl_pid; - wait_queue_head_t fl_wait; + cfs_waitq_t fl_wait; struct file *fl_file; unsigned char fl_flags; unsigned char fl_type; @@ -805,7 +813,16 @@ struct file_lock { union { struct nfs_lock_info nfs_fl; } fl_u; -}; +} cfs_flock_t; + +#define cfs_flock_type(fl) ((fl)->fl_type) +#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) +#define cfs_flock_pid(fl) ((fl)->fl_pid) +#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) +#define cfs_flock_start(fl) ((fl)->fl_start) +#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) +#define cfs_flock_end(fl) ((fl)->fl_end) +#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) #ifndef OFFSET_MAX #define INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) @@ -820,13 +837,6 @@ struct file_lock { #define QUOTA_OK 0 #define NO_QUOTA 1 -/* proc */ -#define proc_symlink(...) \ -({ \ - void *result = NULL; \ - result; \ -}) - /* ACL */ struct posix_acl_entry { short e_tag; @@ -877,11 +887,11 @@ void posix_acl_release(struct posix_acl *acl) #define ENOTSUPP ENOTSUP #endif -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #endif diff --git a/lustre/include/linux/.cvsignore b/lustre/include/linux/.cvsignore index ee57167..b731c89 100644 --- a/lustre/include/linux/.cvsignore +++ b/lustre/include/linux/.cvsignore @@ -13,4 +13,3 @@ extN_jbd.h extN_xattr.h xattr.h lustre_build_version.h -lustre_ver.h diff --git a/lustre/include/linux/Makefile.am b/lustre/include/linux/Makefile.am index e9068bb..3d8313f 100644 --- a/lustre/include/linux/Makefile.am +++ b/lustre/include/linux/Makefile.am @@ -6,13 +6,11 @@ linuxdir = $(includedir)/linux if UTILS -linux_HEADERS = lustre_idl.h +linux_HEADERS = lustre_types.h lustre_user.h endif -EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \ - obd_cache.h obd_lov.h lustre_dlm.h lustre_handles.h lustre_disk.h \ - lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \ - lustre_export.h lustre_log.h obd_echo.h \ - lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h lustre_mdc.h \ - lustre_acl.h obd.h lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h lustre_idl.h \ - lustre_quota.h lustre_ucache.h lustre_ver.h.in lustre_param.h lu_object.h lustre_fid.h md_object.h lustre_req_layout.h +EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_lib.h lustre_dlm.h \ + lustre_handles.h lustre_net.h obd_class.h obd_support.h \ + lustre_log.h lustre_compat25.h lustre_fsfilt.h lustre_mds.h \ + obd.h lvfs.h lvfs_linux.h lustre_lite.h lustre_quota.h \ + lustre_user.h lustre_types.h diff --git a/lustre/include/linux/dt_object.h b/lustre/include/linux/dt_object.h index 8af5b89..95d0251 100644 --- a/lustre/include/linux/dt_object.h +++ b/lustre/include/linux/dt_object.h @@ -20,8 +20,8 @@ * */ -#ifndef __LINUX_DT_OBJECT_H -#define __LINUX_DT_OBJECT_H +#ifndef __LUSTRE_DT_OBJECT_H +#define __LUSTRE_DT_OBJECT_H /* * Sub-class of lu_object with methods common for "data" objects in OST stack. @@ -38,7 +38,7 @@ /* * super-class definitions. */ -#include +#include #include #include @@ -283,6 +283,7 @@ struct dt_object { int dt_object_init(struct dt_object *obj, struct lu_object_header *h, struct lu_device *d); + void dt_object_fini(struct dt_object *obj); struct txn_param { @@ -325,4 +326,4 @@ int dt_txn_hook_stop(struct lu_context *ctx, int dt_txn_hook_commit(struct lu_context *ctx, struct dt_device *dev, struct thandle *txn); -#endif /* __LINUX_DT_OBJECT_H */ +#endif /* __LUSTRE_DT_OBJECT_H */ diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h index 541975c..e3bf664 100644 --- a/lustre/include/linux/lprocfs_status.h +++ b/lustre/include/linux/lprocfs_status.h @@ -21,14 +21,18 @@ * Top level header file for LProc SNMP * Author: Hariharan Thantry thantry@users.sourceforge.net */ -#ifndef _LPROCFS_SNMP_H -#define _LPROCFS_SNMP_H +#ifndef _LINUX_LPROCFS_SNMP_H +#define _LINUX_LPROCFS_SNMP_H +#ifndef _LPROCFS_SNMP_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ #include #include #include +#include #include #include #include @@ -43,345 +47,4 @@ # define kstatfs statfs #endif - -#undef LPROCFS -#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS)) -# define LPROCFS -#endif - -struct lprocfs_vars { - const char *name; - read_proc_t *read_fptr; - write_proc_t *write_fptr; - void *data; -}; - -struct lprocfs_static_vars { - struct lprocfs_vars *module_vars; - struct lprocfs_vars *obd_vars; -}; - -/* An lprocfs counter can be configured using the enum bit masks below. - * - * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already - * protects this counter from concurrent updates. If not specified, - * lprocfs an internal per-counter lock variable. External locks are - * not used to protect counter increments, but are used to protect - * counter readout and resets. - * - * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples, - * (i.e. counter can be incremented by more than "1"). When specified, - * the counter maintains min, max and sum in addition to a simple - * invocation count. This allows averages to be be computed. - * If not specified, the counter is an increment-by-1 counter. - * min, max, sum, etc. are not maintained. - * - * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of - * squares (for multi-valued counter samples only). This allows - * external computation of standard deviation, but involves a 64-bit - * multiply per counter increment. - */ - -enum { - LPROCFS_CNTR_EXTERNALLOCK = 0x0001, - LPROCFS_CNTR_AVGMINMAX = 0x0002, - LPROCFS_CNTR_STDDEV = 0x0004, - - /* counter data type */ - LPROCFS_TYPE_REGS = 0x0100, - LPROCFS_TYPE_BYTES = 0x0200, - LPROCFS_TYPE_PAGES = 0x0400, - LPROCFS_TYPE_CYCLE = 0x0800, -}; - -struct lprocfs_atomic { - atomic_t la_entry; - atomic_t la_exit; -}; - -struct lprocfs_counter { - struct lprocfs_atomic lc_cntl; /* may need to move to per set */ - unsigned int lc_config; - __u64 lc_count; - __u64 lc_sum; - __u64 lc_min; - __u64 lc_max; - __u64 lc_sumsquare; - const char *lc_name; /* must be static */ - const char *lc_units; /* must be static */ -}; - -struct lprocfs_percpu { - struct lprocfs_counter lp_cntr[0]; -}; - - -struct lprocfs_stats { - unsigned int ls_num; /* # of counters */ - unsigned int ls_percpu_size; - struct lprocfs_percpu *ls_percpu[0]; -}; - - -/* class_obd.c */ -extern struct proc_dir_entry *proc_lustre_root; - -struct obd_device; -struct file; -struct obd_histogram; - -#ifdef LPROCFS - -/* Two optimized LPROCFS counter increment functions are provided: - * lprocfs_counter_incr(cntr, value) - optimized for by-one counters - * lprocfs_counter_add(cntr) - use for multi-valued counters - * Counter data layout allows config flag, counter lock and the - * count itself to reside within a single cache line. - */ - -static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, - long amount) -{ - struct lprocfs_counter *percpu_cntr; - - LASSERT(stats != NULL); - percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); - atomic_inc(&percpu_cntr->lc_cntl.la_entry); - percpu_cntr->lc_count++; - - if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) { - percpu_cntr->lc_sum += amount; - if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV) - percpu_cntr->lc_sumsquare += (__u64)amount * amount; - if (amount < percpu_cntr->lc_min) - percpu_cntr->lc_min = amount; - if (amount > percpu_cntr->lc_max) - percpu_cntr->lc_max = amount; - } - atomic_inc(&percpu_cntr->lc_cntl.la_exit); -} - -static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) -{ - struct lprocfs_counter *percpu_cntr; - - LASSERT(stats != NULL); - percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); - atomic_inc(&percpu_cntr->lc_cntl.la_entry); - percpu_cntr->lc_count++; - atomic_inc(&percpu_cntr->lc_cntl.la_exit); -} - -extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num); -extern void lprocfs_free_stats(struct lprocfs_stats *stats); -extern int lprocfs_alloc_obd_stats(struct obd_device *obddev, - unsigned int num_private_stats); -extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, - const char *units); -extern void lprocfs_free_obd_stats(struct obd_device *obddev); -extern int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, - struct lprocfs_stats *stats); - -#define LPROCFS_INIT_VARS(name, vclass, vinstance) \ -void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \ -{ \ - x->module_vars = vclass; \ - x->obd_vars = vinstance; \ -} \ - -#define lprocfs_init_vars(NAME, VAR) \ -do { \ - extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \ - lprocfs_##NAME##_init_vars(VAR); \ -} while (0) -/* lprocfs_status.c */ -extern int lprocfs_add_vars(struct proc_dir_entry *root, - struct lprocfs_vars *var, - void *data); - -extern struct proc_dir_entry *lprocfs_register(const char *name, - struct proc_dir_entry *parent, - struct lprocfs_vars *list, - void *data); - -extern void lprocfs_remove(struct proc_dir_entry *root); - -extern struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *root, - const char *name); - -extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list); -extern int lprocfs_obd_cleanup(struct obd_device *obd); - -/* Generic callbacks */ - -extern int lprocfs_rd_u64(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_atomic(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_name(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_fstype(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_wr_evict_client(struct file *file, const char *buffer, - unsigned long count, void *data); -extern int lprocfs_wr_ping(struct file *file, const char *buffer, - unsigned long count, void *data); - -/* Statfs helpers */ -extern int lprocfs_rd_blksize(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_filestotal(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_filegroups(char *page, char **start, off_t off, - int count, int *eof, void *data); - -extern int lprocfs_write_helper(const char *buffer, unsigned long count, - int *val); -extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count, - __u64 *val); -int lprocfs_obd_seq_create(struct obd_device *dev, char *name, mode_t mode, - struct file_operations *seq_fops, void *data); -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value); -void lprocfs_oh_clear(struct obd_histogram *oh); -unsigned long lprocfs_oh_sum(struct obd_histogram *oh); - -/* lprocfs_status.c: counter read/write functions */ -extern int lprocfs_counter_read(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data); - -/* lprocfs_status.c: recovery status */ -int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, - int count, int *eof, void *data); -#else -/* LPROCFS is not defined */ -static inline void lprocfs_counter_add(struct lprocfs_stats *stats, - int index, long amount) { return; } -static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, - int index) { return; } -static inline void lprocfs_counter_init(struct lprocfs_stats *stats, - int index, unsigned conf, - const char *name, const char *units) -{ return; } - -static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num) -{ return NULL; } -static inline void lprocfs_free_stats(struct lprocfs_stats *stats) -{ return; } - -static inline int lprocfs_register_stats(struct proc_dir_entry *root, - const char *name, - struct lprocfs_stats *stats) -{ return 0; } -static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev, - unsigned int num_private_stats) -{ return 0; } -static inline void lprocfs_free_obd_stats(struct obd_device *obddev) -{ return; } - -static inline struct proc_dir_entry * -lprocfs_register(const char *name, struct proc_dir_entry *parent, - struct lprocfs_vars *list, void *data) { return NULL; } -#define LPROCFS_INIT_VARS(name, vclass, vinstance) -#define lprocfs_init_vars(...) do {} while (0) -static inline int lprocfs_add_vars(struct proc_dir_entry *root, - struct lprocfs_vars *var, - void *data) { return 0; } -static inline void lprocfs_remove(struct proc_dir_entry *root) {}; -static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, - const char *name) {return 0;} -static inline int lprocfs_obd_setup(struct obd_device *dev, - struct lprocfs_vars *list) { return 0; } -static inline int lprocfs_obd_cleanup(struct obd_device *dev) { return 0; } -static inline int lprocfs_rd_u64(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_name(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ return 0; } -static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ return 0; } -static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ return 0; } - -static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ return 0; } -static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ return 0; } -static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer, - unsigned long count, void *data) -{ return 0; } -static inline int lprocfs_wr_ping(struct file *file, const char *buffer, - unsigned long count, void *data) -{ return 0; } - - -/* Statfs helpers */ -static inline -int lprocfs_rd_blksize(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_rd_filestotal(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_rd_filesfree(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_rd_filegroups(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {} -static inline -void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {} -static inline -void lprocfs_oh_clear(struct obd_histogram *oh) {} -static inline -unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; } -static inline -int lprocfs_counter_read(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data) { return 0; } -#endif /* LPROCFS */ - #endif /* LPROCFS_SNMP_H */ diff --git a/lustre/include/linux/lu_object.h b/lustre/include/linux/lu_object.h index a691224..7ae835c 100644 --- a/lustre/include/linux/lu_object.h +++ b/lustre/include/linux/lu_object.h @@ -20,13 +20,13 @@ * */ -#ifndef __LINUX_LU_OBJECT_H -#define __LINUX_LU_OBJECT_H +#ifndef __LUSTRE_LU_OBJECT_H +#define __LUSTRE_LU_OBJECT_H /* * struct lu_fid */ -#include +#include #include #include @@ -759,4 +759,4 @@ void lu_context_enter(struct lu_context *ctx); void lu_context_exit(struct lu_context *ctx); -#endif /* __LINUX_LU_OBJECT_H */ +#endif /* __LUSTRE_LU_OBJECT_H */ diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 5d804c8..066cc20 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -20,8 +20,8 @@ * */ -#ifndef _COMPAT25_H -#define _COMPAT25_H +#ifndef _LINUX_COMPAT25_H +#define _LINUX_COMPAT25_H #ifdef __KERNEL__ @@ -31,6 +31,15 @@ #include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) +struct ll_iattr_struct { + struct iattr iattr; + unsigned int ia_attr_flags; +}; +#else +#define ll_iattr_struct iattr +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) #define UNLOCK_INODE_MUTEX(inode) do {mutex_unlock(&(inode)->i_mutex); } while(0) #define LOCK_INODE_MUTEX(inode) do {mutex_lock(&(inode)->i_mutex); } while(0) @@ -181,6 +190,7 @@ static inline int cleanup_group_info(void) #define ILOOKUP(sb, ino, test, data) ilookup4(sb, ino, test, data); #define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED #define ll_dev_t int +#define old_encode_dev(dev) (dev) /* 2.5 uses hlists for some things, like the d_hash. we'll treat them * as 2.5 and let macros drop back.. */ diff --git a/lustre/include/linux/lustre_debug.h b/lustre/include/linux/lustre_debug.h index 7d76d8d..db872a9 100644 --- a/lustre/include/linux/lustre_debug.h +++ b/lustre/include/linux/lustre_debug.h @@ -20,27 +20,12 @@ * */ -#ifndef _LUSTRE_DEBUG_H -#define _LUSTRE_DEBUG_H - -#include - -#define ASSERT_MAX_SIZE_MB 60000ULL -#define ASSERT_PAGE_INDEX(index, OP) \ -do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)) { \ - CERROR("bad page index %lu > %Lu\n", index, \ - ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)); \ - libcfs_debug = ~0UL; \ - OP; \ -}} while(0) +#ifndef _LINUX_LUSTRE_DEBUG_H +#define _LINUX_LUSTRE_DEBUG_H -#define ASSERT_FILE_OFFSET(offset, OP) \ -do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \ - CERROR("bad file offset %Lu > %Lu\n", offset, \ - ASSERT_MAX_SIZE_MB << 20); \ - libcfs_debug = ~0UL; \ - OP; \ -}} while(0) +#ifndef _LUSTRE_DEBUG_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ #define LL_CDEBUG_PAGE(mask, page, fmt, arg...) \ @@ -53,13 +38,4 @@ do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \ fmt, page, page->index, page_private(page), ## arg) #endif -/* lib/debug.c */ -int dump_lniobuf(struct niobuf_local *lnb); -int dump_rniobuf(struct niobuf_remote *rnb); -int dump_ioo(struct obd_ioobj *nb); -int dump_req(struct ptlrpc_request *req); -int dump_obdo(struct obdo *oa); -void dump_lsm(int level, struct lov_stripe_md *lsm); -int block_debug_setup(void *addr, int len, __u64 off, __u64 id); -int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); #endif diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 8b198eb..a96c161 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -3,593 +3,15 @@ * vim:expandtab:shiftwidth=8:tabstop=8: */ +#ifndef _LINUX_LUSTRE_DLM_H__ +#define _LINUX_LUSTRE_DLM_H__ + #ifndef _LUSTRE_DLM_H__ -#define _LUSTRE_DLM_H__ +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ # include #endif -#include -#include -#include -#include -#include /* for obd_export, for LDLM_DEBUG */ - -struct obd_ops; -struct obd_device; - -#define OBD_LDLM_DEVICENAME "ldlm" - -#define LDLM_DEFAULT_LRU_SIZE (100 * smp_num_cpus) - -typedef enum { - ELDLM_OK = 0, - - ELDLM_LOCK_CHANGED = 300, - ELDLM_LOCK_ABORTED = 301, - ELDLM_LOCK_REPLACED = 302, - ELDLM_NO_LOCK_DATA = 303, - - ELDLM_NAMESPACE_EXISTS = 400, - ELDLM_BAD_NAMESPACE = 401 -} ldlm_error_t; - -#define LDLM_NAMESPACE_SERVER 0 -#define LDLM_NAMESPACE_CLIENT 1 - -#define LDLM_FL_LOCK_CHANGED 0x000001 /* extent, mode, or resource changed */ - -/* If the server returns one of these flags, then the lock was put on that list. - * If the client sends one of these flags (during recovery ONLY!), it wants the - * lock added to the specified list, no questions asked. -p */ -#define LDLM_FL_BLOCK_GRANTED 0x000002 -#define LDLM_FL_BLOCK_CONV 0x000004 -#define LDLM_FL_BLOCK_WAIT 0x000008 - -#define LDLM_FL_CBPENDING 0x000010 /* this lock is being destroyed */ -#define LDLM_FL_AST_SENT 0x000020 /* blocking or cancel packet was sent */ -#define LDLM_FL_WAIT_NOREPROC 0x000040 /* not a real flag, not saved in lock */ -#define LDLM_FL_CANCEL 0x000080 /* cancellation callback already run */ - -/* Lock is being replayed. This could probably be implied by the fact that one - * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. */ -#define LDLM_FL_REPLAY 0x000100 - -#define LDLM_FL_INTENT_ONLY 0x000200 /* don't grant lock, just do intent */ -#define LDLM_FL_LOCAL_ONLY 0x000400 /* see ldlm_cli_cancel_unused */ - -/* don't run the cancel callback under ldlm_cli_cancel_unused */ -#define LDLM_FL_FAILED 0x000800 - -#define LDLM_FL_HAS_INTENT 0x001000 /* lock request has intent */ -#define LDLM_FL_CANCELING 0x002000 /* lock cancel has already been sent */ -#define LDLM_FL_LOCAL 0x004000 /* local lock (ie, no srv/cli split) */ -#define LDLM_FL_WARN 0x008000 /* see ldlm_cli_cancel_unused */ -#define LDLM_FL_DISCARD_DATA 0x010000 /* discard (no writeback) on cancel */ - -#define LDLM_FL_NO_TIMEOUT 0x020000 /* Blocked by group lock - wait - * indefinitely */ - -/* file & record locking */ -#define LDLM_FL_BLOCK_NOWAIT 0x040000 // server told not to wait if blocked -#define LDLM_FL_TEST_LOCK 0x080000 // return blocking lock - -/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that - * the LVB filling happens _after_ the lock has been granted, so another thread - * can match before the LVB has been updated. As a dirty hack, we set - * LDLM_FL_CAN_MATCH only after we've done the LVB poop. - * - * The proper fix is to do the granting inside of the completion AST, which can - * be replaced with a LVB-aware wrapping function for OSC locks. That change is - * pretty high-risk, though, and would need a lot more testing. */ -#define LDLM_FL_CAN_MATCH 0x100000 - -/* A lock contributes to the kms calculation until it has finished the part - * of it's cancelation that performs write back on its dirty pages. It - * can remain on the granted list during this whole time. Threads racing - * to update the kms after performing their writeback need to know to - * exclude each others locks from the calculation as they walk the granted - * list. */ -#define LDLM_FL_KMS_IGNORE 0x200000 - -/* Don't drop lock covering mmapped file in LRU */ -#define LDLM_FL_NO_LRU 0x400000 - -/* Immediatelly cancel such locks when they block some other locks. Send - cancel notification to original lock holder, but expect no reply. */ -#define LDLM_FL_CANCEL_ON_BLOCK 0x800000 - -/* Flags flags inherited from parent lock when doing intents. */ -#define LDLM_INHERIT_FLAGS (LDLM_FL_CANCEL_ON_BLOCK) - -/* These are flags that are mapped into the flags and ASTs of blocking locks */ -#define LDLM_AST_DISCARD_DATA 0x80000000 /* Add FL_DISCARD to blocking ASTs */ -/* Flags sent in AST lock_flags to be mapped into the receiving lock. */ -#define LDLM_AST_FLAGS (LDLM_FL_DISCARD_DATA) - -/* The blocking callback is overloaded to perform two functions. These flags - * indicate which operation should be performed. */ -#define LDLM_CB_BLOCKING 1 -#define LDLM_CB_CANCELING 2 - -/* compatibility matrix */ -#define LCK_COMPAT_EX LCK_NL -#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR) -#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR) -#define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW) -#define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW) -#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX) -#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL) - -extern ldlm_mode_t lck_compat_array[]; - -static inline void lockmode_verify(ldlm_mode_t mode) -{ - LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE); -} - -static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new) -{ - return (lck_compat_array[exist] & new); -} - -/* - * - * cluster name spaces - * - */ - -#define DLM_OST_NAMESPACE 1 -#define DLM_MDS_NAMESPACE 2 - -/* XXX - - do we just separate this by security domains and use a prefix for - multiple namespaces in the same domain? - - -*/ - -struct ldlm_lock; -struct ldlm_resource; -struct ldlm_namespace; - -typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, - void *req_cookie, ldlm_mode_t mode, int flags, - void *data); - -struct ldlm_valblock_ops { - int (*lvbo_init)(struct ldlm_resource *res); - int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m, - int buf_idx, int increase); -}; - -struct ldlm_namespace { - char *ns_name; - __u32 ns_client; /* is this a client-side lock tree? */ - struct list_head *ns_hash; /* hash table for ns */ - wait_queue_head_t ns_refcount_waitq; /* for cleanup */ - atomic_t ns_refcount; /* count of resources in the hash */ - struct list_head ns_root_list; /* all root resources in ns */ - struct lustre_lock ns_lock; /* protects hash, refcount, list */ - struct list_head ns_list_chain; /* position in global NS list */ - - struct list_head ns_unused_list; /* all root resources in ns */ - int ns_nr_unused; - unsigned int ns_max_unused; - unsigned long ns_next_dump; /* next debug dump, jiffies */ - - spinlock_t ns_counter_lock; - __u64 ns_locks; - ldlm_res_policy ns_policy; - struct ldlm_valblock_ops *ns_lvbo; - void *ns_lvbp; -}; - -/* - * - * Resource hash table - * - */ - -#define RES_HASH_BITS 10 -#define RES_HASH_SIZE (1UL << RES_HASH_BITS) -#define RES_HASH_MASK (RES_HASH_SIZE - 1) - -struct ldlm_lock; - -typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock, - struct ldlm_lock_desc *new, void *data, - int flag); -typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags, - void *data); -typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data); - -struct ldlm_lock { - struct portals_handle l_handle; // must be first in the structure - atomic_t l_refc; - struct ldlm_resource *l_resource; - struct ldlm_lock *l_parent; - struct list_head l_children; - struct list_head l_childof; - struct list_head l_lru; - struct list_head l_res_link; // position in one of three res lists - struct list_head l_export_chain; // per-export chain of locks - - ldlm_mode_t l_req_mode; - ldlm_mode_t l_granted_mode; - - ldlm_completion_callback l_completion_ast; - ldlm_blocking_callback l_blocking_ast; - ldlm_glimpse_callback l_glimpse_ast; - - struct obd_export *l_export; - struct obd_export *l_conn_export; - __u32 l_flags; - struct lustre_handle l_remote_handle; - ldlm_policy_data_t l_policy_data; - - __u32 l_readers; - __u32 l_writers; - __u8 l_destroyed; - - /* If the lock is granted, a process sleeps on this waitq to learn when - * it's no longer in use. If the lock is not granted, a process sleeps - * on this waitq to learn when it becomes granted. */ - wait_queue_head_t l_waitq; - struct timeval l_enqueued_time; - - unsigned long l_last_used; /* jiffies */ - struct ldlm_extent l_req_extent; - - /* Client-side-only members */ - __u32 l_lvb_len; /* temporary storage for */ - void *l_lvb_data; /* an LVB received during */ - void *l_lvb_swabber; /* an enqueue */ - void *l_ast_data; - - /* Server-side-only members */ - struct list_head l_pending_chain; /* callbacks pending */ - unsigned long l_callback_timeout; /* jiffies */ - - __u32 l_pid; /* pid which created this lock */ -}; - -struct ldlm_resource { - struct ldlm_namespace *lr_namespace; - struct list_head lr_hash; - struct ldlm_resource *lr_parent; /* 0 for a root resource */ - struct list_head lr_children; /* list head for child resources */ - struct list_head lr_childof; /* part of ns_root_list if root res, - * part of lr_children if child */ - - struct list_head lr_granted; - struct list_head lr_converting; - struct list_head lr_waiting; - ldlm_mode_t lr_most_restr; - ldlm_type_t lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK} */ - struct ldlm_resource *lr_root; - struct ldlm_res_id lr_name; - atomic_t lr_refcount; - - /* Server-side-only lock value block elements */ - struct semaphore lr_lvb_sem; - __u32 lr_lvb_len; - void *lr_lvb_data; - - /* lr_tmp holds a list head temporarily, during the building of a work - * queue. see ldlm_add_ast_work_item and ldlm_run_ast_work */ - void *lr_tmp; -}; - -struct ldlm_ast_work { - struct ldlm_lock *w_lock; - int w_blocking; - struct ldlm_lock_desc w_desc; - struct list_head w_list; - int w_flags; - void *w_data; - int w_datalen; -}; - -extern struct obd_ops ldlm_obd_ops; - -extern char *ldlm_lockname[]; -extern char *ldlm_typename[]; -extern char *ldlm_it2str(int it); - -#define __LDLM_DEBUG(level, lock, format, a...) \ -do { \ - if (lock->l_resource == NULL) { \ - CDEBUG(level, "### " format \ - " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\ - "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " \ - LPX64" expref: %d pid: %u\n" , ## a, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_EXTENT) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\ - "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 \ - " expref: %d pid: %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_policy_data.l_extent.start, \ - lock->l_policy_data.l_extent.end, \ - lock->l_req_extent.start, lock->l_req_extent.end, \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_FLOCK) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " \ - "["LPU64"->"LPU64"] flags: %x remote: "LPX64 \ - " expref: %d pid: %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_policy_data.l_flock.pid, \ - lock->l_policy_data.l_flock.start, \ - lock->l_policy_data.l_flock.end, \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_IBITS) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " \ - "flags: %x remote: "LPX64" expref: %d " \ - "pid %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, \ - lock, lock->l_handle.h_cookie, \ - atomic_read (&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - lock->l_policy_data.l_inodebits.bits, \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - break; \ - } \ - { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " \ - "remote: "LPX64" expref: %d pid: %u\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, \ - lock, lock->l_handle.h_cookie, \ - atomic_read (&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_flags, lock->l_remote_handle.cookie, \ - lock->l_export ? \ - atomic_read(&lock->l_export->exp_refcount) : -99, \ - lock->l_pid); \ - } \ -} while (0) - -#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \ - format, ## a) -#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a) - -#define LDLM_DEBUG_NOLOCK(format, a...) \ - CDEBUG(D_DLMTRACE, "### " format "\n" , ## a) - -typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, int *flags, - int first_enq, ldlm_error_t *err); - -/* - * Iterators. - */ - -#define LDLM_ITER_CONTINUE 1 /* keep iterating */ -#define LDLM_ITER_STOP 2 /* stop iterating */ - -typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *); -typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *); - -int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter, - void *closure); -int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter, - void *closure); -int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, - ldlm_res_iterator_t iter, void *closure); - -int ldlm_replay_locks(struct obd_import *imp); -void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *, - ldlm_iterator_t iter, void *data); - -/* ldlm_flock.c */ -int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data); - -/* ldlm_extent.c */ -__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); - -struct ldlm_callback_suite { - ldlm_completion_callback lcs_completion; - ldlm_blocking_callback lcs_blocking; - ldlm_glimpse_callback lcs_glimpse; -}; - -/* ldlm_lockd.c */ -int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, - void *data, int flag); -int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data); -int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data); -int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback, - ldlm_blocking_callback, ldlm_glimpse_callback); -int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req, - struct ldlm_request *dlm_req, - struct ldlm_callback_suite *cbs); -int ldlm_handle_convert(struct ptlrpc_request *req); -int ldlm_handle_convert0(struct ptlrpc_request *req, - struct ldlm_request *dlm_req); -int ldlm_handle_cancel(struct ptlrpc_request *req); -int ldlm_del_waiting_lock(struct ldlm_lock *lock); -int ldlm_get_ref(void); -void ldlm_put_ref(int force); - -/* ldlm_lock.c */ -ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res); -void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg); -void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh); -struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags); -void ldlm_cancel_callback(struct ldlm_lock *); -int ldlm_lock_set_data(struct lustre_handle *, void *data); -void ldlm_lock_remove_from_lru(struct ldlm_lock *); -struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *, - struct lustre_handle *); - -static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h) -{ - return __ldlm_handle2lock(h, 0); -} - -#define LDLM_LOCK_PUT(lock) \ -do { \ - /*LDLM_DEBUG((lock), "put");*/ \ - ldlm_lock_put(lock); \ -} while (0) - -#define LDLM_LOCK_GET(lock) \ -({ \ - ldlm_lock_get(lock); \ - /*LDLM_DEBUG((lock), "get");*/ \ - lock; \ -}) - -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); -void ldlm_lock_put(struct ldlm_lock *lock); -void ldlm_lock_destroy(struct ldlm_lock *lock); -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_allow_match(struct ldlm_lock *lock); -int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *, - ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode, - struct lustre_handle *); -struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, - int *flags); -void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_cancel_locks_for_export(struct obd_export *export); -void ldlm_reprocess_all(struct ldlm_resource *res); -void ldlm_reprocess_all_ns(struct ldlm_namespace *ns); -void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos); -void ldlm_lock_dump_handle(int level, struct lustre_handle *); - -/* resource.c */ -struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 local); -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags); -int ldlm_namespace_free(struct ldlm_namespace *ns, int force); -int ldlm_proc_setup(void); -#ifdef LPROCFS -void ldlm_proc_cleanup(void); -#else -static inline void ldlm_proc_cleanup(void) {} -#endif - -/* resource.c - internal */ -struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, - struct ldlm_resource *parent, - struct ldlm_res_id, ldlm_type_t type, - int create); -struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res); -int ldlm_resource_putref(struct ldlm_resource *res); -void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, - struct ldlm_lock *lock); -void ldlm_resource_unlink_lock(struct ldlm_lock *lock); -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(int level); -void ldlm_namespace_dump(int level, struct ldlm_namespace *); -void ldlm_resource_dump(int level, struct ldlm_resource *); -int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, - struct ldlm_res_id); - -/* ldlm_request.c */ -int ldlm_expired_completion_wait(void *data); -int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag); -int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp); -int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data); -int ldlm_cli_enqueue(struct obd_export *exp, - struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_res_id, - ldlm_type_t type, - ldlm_policy_data_t *, - ldlm_mode_t mode, - int *flags, - ldlm_blocking_callback blocking, - ldlm_completion_callback completion, - ldlm_glimpse_callback glimpse, - void *data, - void *lvb, - __u32 lvb_len, - void *lvb_swabber, - struct lustre_handle *lockh); -int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new, - void *data, __u32 data_len); -int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags); -int ldlm_cli_cancel(struct lustre_handle *lockh); -int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *, - int flags, void *opaque); -int ldlm_cli_join_lru(struct ldlm_namespace *, struct ldlm_res_id *, - int join); - -/* mds/handler.c */ -/* This has to be here because recursive inclusion sucks. */ -int intent_disposition(struct ldlm_reply *rep, int flag); -void intent_set_disposition(struct ldlm_reply *rep, int flag); - - -/* ioctls for trying requests */ -#define IOC_LDLM_TYPE 'f' -#define IOC_LDLM_MIN_NR 40 - -#define IOC_LDLM_TEST _IOWR('f', 40, long) -#define IOC_LDLM_DUMP _IOWR('f', 41, long) -#define IOC_LDLM_REGRESS_START _IOWR('f', 42, long) -#define IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) -#define IOC_LDLM_MAX_NR 43 - #endif diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index bc831e5..84e9af9 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -22,13 +22,17 @@ * */ +#ifndef _LINUX_LUSTRE_FSFILT_H +#define _LINUX_LUSTRE_FSFILT_H + #ifndef _LUSTRE_FSFILT_H -#define _LUSTRE_FSFILT_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ -#include -#include +#include +#include typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, void *data, int error); diff --git a/lustre/include/linux/lustre_handles.h b/lustre/include/linux/lustre_handles.h index f644cf1..21eb047 100644 --- a/lustre/include/linux/lustre_handles.h +++ b/lustre/include/linux/lustre_handles.h @@ -1,39 +1,15 @@ -#ifndef __LINUX_HANDLES_H_ -#define __LINUX_HANDLES_H_ +#ifndef __LINUX_LUSTRE_HANDLES_H_ +#define __LINUX_LUSTRE_HANDLES_H_ + +#ifndef __LUSTRE_HANDLES_H_ +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ #include #include #include +#include #endif -typedef void (*portals_handle_addref_cb)(void *object); - -/* These handles are most easily used by having them appear at the very top of - * whatever object that you want to make handles for. ie: - * - * struct ldlm_lock { - * struct portals_handle handle; - * ... - * }; - * - * Now you're able to assign the results of cookie2handle directly to an - * ldlm_lock. If it's not at the top, you'll want to hack up a macro that - * uses some offsetof() magic. */ - -struct portals_handle { - struct list_head h_link; - __u64 h_cookie; - portals_handle_addref_cb h_addref; -}; - -/* handles.c */ - -/* Add a handle to the hash table */ -void class_handle_hash(struct portals_handle *, portals_handle_addref_cb); -void class_handle_unhash(struct portals_handle *); -void *class_handle2object(__u64 cookie); -int class_handle_init(void); -void class_handle_cleanup(void); - #endif diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h index 730d1b0..e5be7d6 100644 --- a/lustre/include/linux/lustre_lib.h +++ b/lustre/include/linux/lustre_lib.h @@ -22,8 +22,12 @@ * */ +#ifndef _LINUX_LUSTRE_LIB_H +#define _LINUX_LUSTRE_LIB_H + #ifndef _LUSTRE_LIB_H -#define _LUSTRE_LIB_H +#error Do not #include this file directly. #include instead +#endif #ifndef __KERNEL__ # include @@ -35,9 +39,7 @@ # include # include #endif -#include -#include -#include +#include #ifndef LP_POISON #if BITS_PER_LONG > 32 @@ -51,538 +53,7 @@ #endif #endif -/* prng.c */ -unsigned int ll_rand(void); /* returns a random 32-bit integer */ -void ll_srand(unsigned int, unsigned int); /* seed the generator */ - -/* target.c */ -struct ptlrpc_request; -struct recovd_data; -struct recovd_obd; -struct obd_export; -#include -#include -#include -#include - -int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler); -int target_handle_disconnect(struct ptlrpc_request *req); -void target_destroy_export(struct obd_export *exp); -int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, - struct obd_uuid *cluuid); -int target_handle_ping(struct ptlrpc_request *req); -void target_committed_to_req(struct ptlrpc_request *req); - -#ifdef HAVE_QUOTA_SUPPORT -/* quotacheck callback, dqacq/dqrel callback handler */ -int target_handle_qc_callback(struct ptlrpc_request *req); -int target_handle_dqacq_callback(struct ptlrpc_request *req); -#else -#define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP) -#define target_handle_qc_callback(req) (0) -#endif - -void target_cancel_recovery_timer(struct obd_device *obd); - -#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ -void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler); -void target_abort_recovery(void *data); -void target_cleanup_recovery(struct obd_device *obd); -int target_queue_recovery_request(struct ptlrpc_request *req, - struct obd_device *obd); -int target_queue_final_reply(struct ptlrpc_request *req, int rc); -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); - -/* client.c */ - -int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg); -struct client_obd *client_conn2cli(struct lustre_handle *conn); - -struct mdc_open_data; -struct obd_client_handle { - struct lustre_handle och_fh; - struct llog_cookie och_cookie; - struct mdc_open_data *och_mod; - __u32 och_magic; -}; -#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed - -/* statfs_pack.c */ -struct obd_statfs; -struct kstatfs; -void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs); -void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs); - -/* l_lock.c */ -struct lustre_lock { - int l_depth; - struct task_struct *l_owner; - struct semaphore l_sem; - spinlock_t l_spin; -}; - -void l_lock_init(struct lustre_lock *); -void l_lock(struct lustre_lock *); -void l_unlock(struct lustre_lock *); -int l_has_lock(struct lustre_lock *); - - -/* - * OBD IOCTLS - */ -#define OBD_IOCTL_VERSION 0x00010004 - -struct obd_ioctl_data { - uint32_t ioc_len; - uint32_t ioc_version; - - uint64_t ioc_cookie; - uint32_t ioc_conn1; - uint32_t ioc_conn2; - - struct obdo ioc_obdo1; - struct obdo ioc_obdo2; - - obd_size ioc_count; - obd_off ioc_offset; - uint32_t ioc_dev; - uint32_t ioc_command; - - uint64_t ioc_nid; - uint32_t ioc_nal; - uint32_t ioc_type; - - /* buffers the kernel will treat as user pointers */ - uint32_t ioc_plen1; - char *ioc_pbuf1; - uint32_t ioc_plen2; - char *ioc_pbuf2; - - /* inline buffers for various arguments */ - uint32_t ioc_inllen1; - char *ioc_inlbuf1; - uint32_t ioc_inllen2; - char *ioc_inlbuf2; - uint32_t ioc_inllen3; - char *ioc_inlbuf3; - uint32_t ioc_inllen4; - char *ioc_inlbuf4; - - char ioc_bulk[0]; -}; - -struct obd_ioctl_hdr { - uint32_t ioc_len; - uint32_t ioc_version; -}; - -static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) -{ - int len = size_round(sizeof(struct obd_ioctl_data)); - len += size_round(data->ioc_inllen1); - len += size_round(data->ioc_inllen2); - len += size_round(data->ioc_inllen3); - len += size_round(data->ioc_inllen4); - return len; -} - - -static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - CERROR("OBD ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen3 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen4 > (1<<30)) { - CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf3 && !data->ioc_inllen3) { - CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf4 && !data->ioc_inllen4) { - CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR("OBD ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR("OBD ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR("OBD ioctl: plen1 set but NULL pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR("OBD ioctl: plen2 set but NULL pointer\n"); - return 1; - } - if (obd_ioctl_packlen(data) > data->ioc_len) { - CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n", - obd_ioctl_packlen(data), data->ioc_len); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct obd_ioctl_data *overlay; - data->ioc_len = obd_ioctl_packlen(data); - data->ioc_version = OBD_IOCTL_VERSION; - - if (*pbuf && data->ioc_len > max) - return 1; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return 1; - overlay = (struct obd_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (data->ioc_inlbuf3) - LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - if (data->ioc_inlbuf4) - LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - if (obd_ioctl_is_invalid(overlay)) - return 1; - - return 0; -} - -static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, - int max) -{ - char *ptr; - struct obd_ioctl_data *overlay; - - if (!pbuf) - return 1; - overlay = (struct obd_ioctl_data *)pbuf; - - /* Preserve the caller's buffer pointers */ - overlay->ioc_inlbuf1 = data->ioc_inlbuf1; - overlay->ioc_inlbuf2 = data->ioc_inlbuf2; - overlay->ioc_inlbuf3 = data->ioc_inlbuf3; - overlay->ioc_inlbuf4 = data->ioc_inlbuf4; - - memcpy(data, pbuf, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (data->ioc_inlbuf3) - LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - if (data->ioc_inlbuf4) - LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - - return 0; -} -#endif - -#include - -/* buffer MUST be at least the size of obd_ioctl_hdr */ -static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) -{ - struct obd_ioctl_hdr hdr; - struct obd_ioctl_data *data; - int err; - int offset = 0; - ENTRY; - - err = copy_from_user(&hdr, (void *)arg, sizeof(hdr)); - if (err) - RETURN(err); - - if (hdr.ioc_version != OBD_IOCTL_VERSION) { - CERROR("Version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) { - CERROR("User buffer len %d exceeds %d max buffer\n", - hdr.ioc_len, OBD_MAX_IOCTL_BUFFER); - RETURN(-EINVAL); - } - - if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) { - CERROR("user buffer too small for ioctl (%d)\n", hdr.ioc_len); - RETURN(-EINVAL); - } - - /* XXX allocate this more intelligently, using kmalloc when - * appropriate */ - OBD_VMALLOC(*buf, hdr.ioc_len); - if (*buf == NULL) { - CERROR("Cannot allocate control buffer of len %d\n", - hdr.ioc_len); - RETURN(-EINVAL); - } - *len = hdr.ioc_len; - data = (struct obd_ioctl_data *)*buf; - - err = copy_from_user(*buf, (void *)arg, hdr.ioc_len); - if (err) { - OBD_VFREE(*buf, hdr.ioc_len); - RETURN(err); - } - - if (obd_ioctl_is_invalid(data)) { - CERROR("ioctl not correctly formatted\n"); - OBD_VFREE(*buf, hdr.ioc_len); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) { - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - offset += size_round(data->ioc_inllen1); - } - - if (data->ioc_inllen2) { - data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset; - offset += size_round(data->ioc_inllen2); - } - - if (data->ioc_inllen3) { - data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset; - offset += size_round(data->ioc_inllen3); - } - - if (data->ioc_inllen4) { - data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; - } - - RETURN(0); -} - -static inline void obd_ioctl_freedata(char *buf, int len) -{ - ENTRY; - - OBD_VFREE(buf, len); - EXIT; - return; -} - -#define OBD_IOC_CREATE _IOR ('f', 101, long) -#define OBD_IOC_DESTROY _IOW ('f', 104, long) -#define OBD_IOC_PREALLOCATE _IOWR('f', 105, long) - -#define OBD_IOC_SETATTR _IOW ('f', 107, long) -#define OBD_IOC_GETATTR _IOR ('f', 108, long) -#define OBD_IOC_READ _IOWR('f', 109, long) -#define OBD_IOC_WRITE _IOWR('f', 110, long) - - -#define OBD_IOC_STATFS _IOWR('f', 113, long) -#define OBD_IOC_SYNC _IOW ('f', 114, long) -#define OBD_IOC_READ2 _IOWR('f', 115, long) -#define OBD_IOC_FORMAT _IOWR('f', 116, long) -#define OBD_IOC_PARTITION _IOWR('f', 117, long) -#define OBD_IOC_COPY _IOWR('f', 120, long) -#define OBD_IOC_MIGR _IOWR('f', 121, long) -#define OBD_IOC_PUNCH _IOWR('f', 122, long) - -#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, long) -#define OBD_IOC_BRW_READ _IOWR('f', 125, long) -#define OBD_IOC_BRW_WRITE _IOWR('f', 126, long) -#define OBD_IOC_NAME2DEV _IOWR('f', 127, long) -#define OBD_IOC_UUID2DEV _IOWR('f', 130, long) -#define OBD_IOC_GETNAME _IOR ('f', 131, long) - -#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, long) -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, long) - -#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 ) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, long) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, long) - -#define OBD_GET_VERSION _IOWR ('f', 144, long) - -#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long) - -#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) -#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) -#define OBD_IOC_LOV_SETEA _IOW ('f', 156, long) - -#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) -#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) -#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) - -#define OBD_IOC_MOUNTOPT _IOWR('f', 170, long) - -#define OBD_IOC_RECORD _IOWR('f', 180, long) -#define OBD_IOC_ENDRECORD _IOWR('f', 181, long) -#define OBD_IOC_PARSE _IOWR('f', 182, long) -#define OBD_IOC_DORECORD _IOWR('f', 183, long) -#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, long) -#define OBD_IOC_DUMP_LOG _IOWR('f', 185, long) -#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, long) -#define OBD_IOC_PARAM _IOW ('f', 187, long) - -#define OBD_IOC_CATLOGLIST _IOWR('f', 190, long) -#define OBD_IOC_LLOG_INFO _IOWR('f', 191, long) -#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, long) -#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, long) -#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, long) -#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, long) -#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, long) - -#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long) -#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long) -#define ECHO_IOC_ENQUEUE _IOWR('f', 202, long) -#define ECHO_IOC_CANCEL _IOWR('f', 203, long) - -/* XXX _IOWR('f', 250, long) has been defined in - * lnet/include/libcfs/kp30.h for debug, don't use it - */ - -/* Until such time as we get_info the per-stripe maximum from the OST, - * we define this to be 2T - 4k, which is the ext3 maxbytes. */ -#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL - -#define POISON_BULK 0 - -/* - * l_wait_event is a flexible sleeping function, permitting simple caller - * configuration of interrupt and timeout sensitivity along with actions to - * be performed in the event of either exception. - * - * The first form of usage looks like this: - * - * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler, - * intr_handler, callback_data); - * rc = l_wait_event(waitq, condition, &lwi); - * - * l_wait_event() makes the current process wait on 'waitq' until 'condition' - * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending. It - * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before - * 'condition' becomes true, it optionally calls the specified 'intr_handler' - * if not NULL, and returns -EINTR. - * - * If a non-zero timeout is specified, signals are ignored until the timeout - * has expired. At this time, if 'timeout_handler' is not NULL it is called. - * If it returns FALSE l_wait_event() continues to wait as described above with - * signals enabled. Otherwise it returns -ETIMEDOUT. - * - * LWI_INTR(intr_handler, callback_data) is shorthand for - * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data) - * - * The second form of usage looks like this: - * - * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler); - * rc = l_wait_event(waitq, condition, &lwi); - * - * This form is the same as the first except that it COMPLETELY IGNORES - * SIGNALS. The caller must therefore beware that if 'timeout' is zero, or if - * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that - * can unblock the current process is 'condition' becoming TRUE. - * - * Another form of usage is: - * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval, - * timeout_handler); - * rc = l_wait_event(waitq, condition, &lwi); - * This is the same as previous case, but condition is checked once every - * 'interval' jiffies (if non-zero). - * - * Subtle synchronization point: this macro does *not* necessary takes - * wait-queue spin-lock before returning, and, hence, following idiom is safe - * ONLY when caller provides some external locking: - * - * Thread1 Thread2 - * - * l_wait_event(&obj->wq, ....); (1) - * - * wake_up(&obj->wq): (2) - * spin_lock(&q->lock); (2.1) - * __wake_up_common(q, ...); (2.2) - * spin_unlock(&q->lock, flags); (2.3) - * - * OBD_FREE_PTR(obj); (3) - * - * As l_wait_event() may "short-cut" execution and return without taking - * wait-queue spin-lock, some additional synchronization is necessary to - * guarantee that step (3) can begin only after (2.3) finishes. - * - * XXX nikita: some ptlrpc daemon threads have races of that sort. - * - */ - -#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1)) - -struct l_wait_info { - long lwi_timeout; - long lwi_interval; - int (*lwi_on_timeout)(void *); - void (*lwi_on_signal)(void *); - void *lwi_cb_data; -}; - -/* NB: LWI_TIMEOUT ignores signals completely */ -#define LWI_TIMEOUT(time, cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = cb, \ - .lwi_cb_data = data, \ - .lwi_interval = 0 \ -}) - -#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = cb, \ - .lwi_cb_data = data, \ - .lwi_interval = interval \ -}) - - -#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \ -((struct l_wait_info) { \ - .lwi_timeout = time, \ - .lwi_on_timeout = time_cb, \ - .lwi_on_signal = (sig_cb == NULL) ? LWI_ON_SIGNAL_NOOP : sig_cb, \ - .lwi_cb_data = data, \ - .lwi_interval = 0 \ -}) - -#define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data) +#define OBD_IOC_DATA_TYPE long #define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ sigmask(SIGTERM) | sigmask(SIGQUIT) | \ @@ -602,152 +73,7 @@ static inline sigset_t l_w_e_set_sigs(int sigs) return old; } - -/* - * wait for @condition to become true, but no longer than timeout, specified - * by @info. - */ -#define __l_wait_event(wq, condition, info, ret, excl) \ -do { \ - wait_queue_t __wait; \ - unsigned long __timeout = info->lwi_timeout; \ - unsigned long __irqflags; \ - sigset_t __blocked; \ - \ - ret = 0; \ - if (condition) \ - break; \ - \ - init_waitqueue_entry(&__wait, current); \ - if (excl) \ - add_wait_queue_exclusive(&wq, &__wait); \ - else \ - add_wait_queue(&wq, &__wait); \ - \ - /* Block all signals (just the non-fatal ones if no timeout). */ \ - if (info->lwi_on_signal != NULL && __timeout == 0) \ - __blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - else \ - __blocked = l_w_e_set_sigs(0); \ - \ - for (;;) { \ - set_current_state(TASK_INTERRUPTIBLE); \ - \ - if (condition) \ - break; \ - \ - if (__timeout == 0) { \ - schedule(); \ - } else { \ - unsigned long interval = info->lwi_interval? \ - min_t(unsigned long, \ - info->lwi_interval,__timeout):\ - __timeout; \ - __timeout -= interval - schedule_timeout(interval); \ - if (__timeout == 0) { \ - if (info->lwi_on_timeout == NULL || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - /* Take signals after the timeout expires. */ \ - if (info->lwi_on_signal != NULL) \ - (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - } \ - } \ - \ - if (condition) \ - break; \ - \ - if (signal_pending(current)) { \ - if (info->lwi_on_signal != NULL && __timeout == 0) { \ - if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ - info->lwi_on_signal(info->lwi_cb_data);\ - ret = -EINTR; \ - break; \ - } \ - /* We have to do this here because some signals */ \ - /* are not blockable - ie from strace(1). */ \ - /* In these cases we want to schedule_timeout() */ \ - /* again, because we don't want that to return */ \ - /* -EINTR when the RPC actually succeeded. */ \ - /* the RECALC_SIGPENDING below will deliver the */ \ - /* signal properly. */ \ - SIGNAL_MASK_LOCK(current, __irqflags); \ - CLEAR_SIGPENDING; \ - SIGNAL_MASK_UNLOCK(current, __irqflags); \ - } \ - } \ - \ - SIGNAL_MASK_LOCK(current, __irqflags); \ - current->blocked = __blocked; \ - RECALC_SIGPENDING; \ - SIGNAL_MASK_UNLOCK(current, __irqflags); \ - \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while(0) - -#else /* !__KERNEL__ */ -#define __l_wait_event(wq, condition, info, ret, excl) \ -do { \ - long __timeout = info->lwi_timeout; \ - long __now; \ - long __then = 0; \ - int __timed_out = 0; \ - \ - ret = 0; \ - if (condition) \ - break; \ - \ - if (__timeout == 0) \ - __timeout = 1000000000; \ - else \ - __then = time(NULL); \ - \ - while (!(condition)) { \ - if (liblustre_wait_event(info->lwi_interval?:__timeout) || \ - (info->lwi_interval && info->lwi_interval < __timeout)) {\ - if (__timeout != 0 && info->lwi_timeout != 0) { \ - __now = time(NULL); \ - __timeout -= __now - __then; \ - if (__timeout < 0) \ - __timeout = 0; \ - __then = __now; \ - } \ - continue; \ - } \ - \ - if (info->lwi_timeout != 0 && !__timed_out) { \ - __timed_out = 1; \ - if (info->lwi_on_timeout == NULL || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - } \ - } \ -} while (0) - -#endif /* __KERNEL__ */ - -#define l_wait_event(wq, condition, info) \ -({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, __ret, 0); \ - __ret; \ -}) - -#define l_wait_event_exclusive(wq, condition, info) \ -({ \ - int __ret; \ - struct l_wait_info *__info = (info); \ - \ - __l_wait_event(wq, condition, __info, __ret, 1); \ - __ret; \ -}) +#endif #ifdef __KERNEL__ /* initialize ost_lvb according to inode */ @@ -763,11 +89,5 @@ static inline void inode_init_lvb(struct inode *inode, struct ost_lvb *lvb) /* defined in liblustre/llite_lib.h */ #endif -#ifdef __KERNEL__ -#define LIBLUSTRE_CLIENT (0) -#else -#define LIBLUSTRE_CLIENT (1) -#endif - #endif /* _LUSTRE_LIB_H */ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index 578a7c0..17f4546 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -2,8 +2,12 @@ * vim:expandtab:shiftwidth=8:tabstop=8: +#ifndef _LINUX_LL_H +#define _LINUX_LL_H + #ifndef _LL_H -#define _LL_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ @@ -19,18 +23,15 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include -/* careful, this is easy to screw up */ -#define PAGE_CACHE_MAXBYTES ((__u64)(~0UL) << PAGE_CACHE_SHIFT) - /* lprocfs.c */ enum { LPROC_LL_DIRTY_HITS = 0, @@ -68,114 +69,7 @@ enum { }; #else -#include +#include #endif /* __KERNEL__ */ -#define LLAP_FROM_COOKIE(c) \ - (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC), \ - (struct ll_async_page *)(c)) - -#define LL_MAX_BLKSIZE (4UL * 1024 * 1024) - -#include - #endif - -struct lustre_rw_params { - int lrp_lock_mode; - ldlm_policy_data_t lrp_policy; - obd_flag lrp_brw_flags; - int lrp_ast_flags; -}; - -/* - * XXX nikita: this function lives in the header because it is used by both - * llite kernel module and liblustre library, and there is no (?) better place - * to put it in. - */ -static inline void lustre_build_lock_params(int cmd, unsigned long open_flags, - __u64 connect_flags, - loff_t pos, ssize_t len, - struct lustre_rw_params *params) -{ - params->lrp_lock_mode = (cmd == OBD_BRW_READ) ? LCK_PR : LCK_PW; - params->lrp_brw_flags = 0; - - params->lrp_policy.l_extent.start = pos; - params->lrp_policy.l_extent.end = pos + len - 1; - /* - * for now O_APPEND always takes local locks. - */ - if (cmd == OBD_BRW_WRITE && (open_flags & O_APPEND)) { - params->lrp_policy.l_extent.start = 0; - params->lrp_policy.l_extent.end = OBD_OBJECT_EOF; - } else if (LIBLUSTRE_CLIENT && (connect_flags & OBD_CONNECT_SRVLOCK)) { - /* - * liblustre: OST-side locking for all non-O_APPEND - * reads/writes. - */ - params->lrp_lock_mode = LCK_NL; - params->lrp_brw_flags = OBD_BRW_SRVLOCK; - } else { - /* - * nothing special for the kernel. In the future llite may use - * OST-side locks for small writes into highly contended - * files. - */ - } - params->lrp_ast_flags = (open_flags & O_NONBLOCK) ? - LDLM_FL_BLOCK_NOWAIT : 0; -} - -/* - * This is embedded into liblustre and llite super-blocks to keep track of - * connect flags (capabilities) supported by all imports given mount is - * connected to. - */ -struct lustre_client_ocd { - /* - * This is conjunction of connect_flags across all imports (LOVs) this - * mount is connected to. This field is updated by ll_ocd_update() - * under ->lco_lock. - */ - __u64 lco_flags; - spinlock_t lco_lock; -}; - -/* - * This function is used as an upcall-callback hooked by liblustre and llite - * clients into obd_notify() listeners chain to handle notifications about - * change of import connect_flags. See llu_fsswop_mount() and - * lustre_common_fill_super(). - * - * Again, it is dumped into this header for the lack of a better place. - */ -static inline int ll_ocd_update(struct obd_device *host, - struct obd_device *watched, - enum obd_notify_event ev, void *owner) -{ - struct lustre_client_ocd *lco; - struct client_obd *cli; - __u64 flags; - int result; - - ENTRY; - if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { - cli = &watched->u.cli; - lco = owner; - flags = cli->cl_import->imp_connect_data.ocd_connect_flags; - CDEBUG(D_SUPER, "Changing connect_flags: "LPX64" -> "LPX64"\n", - lco->lco_flags, flags); - spin_lock(&lco->lco_lock); - lco->lco_flags &= flags; - spin_unlock(&lco->lco_lock); - result = 0; - } else { - CERROR("unexpected notification from %s %s!\n", - watched->obd_type->typ_name, - watched->obd_name); - result = -EINVAL; - } - RETURN(result); -} - diff --git a/lustre/include/linux/lustre_log.h b/lustre/include/linux/lustre_log.h index 2055a0f..65e1c51 100644 --- a/lustre/include/linux/lustre_log.h +++ b/lustre/include/linux/lustre_log.h @@ -32,383 +32,13 @@ * - MDS replication logs */ -#ifndef _LUSTRE_LOG_H -#define _LUSTRE_LOG_H - -#include -#include - -#define LOG_NAME_LIMIT(logname, name) \ - snprintf(logname, sizeof(logname), "LOGS/%s", name) -#define LLOG_EEMPTY 4711 - -struct plain_handle_data { - struct list_head phd_entry; - struct llog_handle *phd_cat_handle; - struct llog_cookie phd_cookie; /* cookie of this log in its cat */ - int phd_last_idx; -}; - -struct cat_handle_data { - struct list_head chd_head; - struct llog_handle *chd_current_log; /* currently open log */ -}; - -/* In-memory descriptor for a log object or log catalog */ -struct llog_handle { - struct rw_semaphore lgh_lock; - struct llog_logid lgh_id; /* id of this log */ - struct llog_log_hdr *lgh_hdr; - struct file *lgh_file; - int lgh_last_idx; - struct llog_ctxt *lgh_ctxt; - union { - struct plain_handle_data phd; - struct cat_handle_data chd; - } u; -}; - -/* llog.c - general API */ -typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); -typedef int (*llog_fill_rec_cb_t)(struct llog_rec_hdr *rec, void *data); -extern struct llog_handle *llog_alloc_handle(void); -int llog_init_handle(struct llog_handle *handle, int flags, - struct obd_uuid *uuid); -extern void llog_free_handle(struct llog_handle *handle); -int llog_process(struct llog_handle *loghandle, llog_cb_t cb, - void *data, void *catdata); -int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, - void *data, void *catdata); -extern int llog_cancel_rec(struct llog_handle *loghandle, int index); -extern int llog_close(struct llog_handle *cathandle); -extern int llog_get_size(struct llog_handle *loghandle); - -/* llog_cat.c - catalog api */ -struct llog_process_data { - void *lpd_data; - llog_cb_t lpd_cb; -}; - -struct llog_process_cat_data { - int first_idx; - int last_idx; - /* to process catalog across zero record */ -}; - -int llog_cat_put(struct llog_handle *cathandle); -int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, - struct llog_cookie *reccookie, void *buf); -int llog_cat_cancel_records(struct llog_handle *cathandle, int count, - struct llog_cookie *cookies); -int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); -int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); -int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); - -/* llog_obd.c */ -int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, - int count, struct llog_logid *logid,struct llog_operations *op); -int llog_cleanup(struct llog_ctxt *); -int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); -int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, struct llog_cookie *logcookies, - int numcookies); -int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags); - -int llog_obd_origin_setup(struct obd_device *obd, int index, - struct obd_device *disk_obd, int count, - struct llog_logid *logid); -int llog_obd_origin_cleanup(struct llog_ctxt *ctxt); -int llog_obd_origin_add(struct llog_ctxt *ctxt, - struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); - -int llog_cat_initialize(struct obd_device *obd, int count); -int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid); - -int obd_llog_finish(struct obd_device *obd, int count); - -/* llog_ioctl.c */ -int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data); -int llog_catalog_list(struct obd_device *obd, int count, - struct obd_ioctl_data *data); - -/* llog_net.c */ -int llog_initiator_connect(struct llog_ctxt *ctxt); -int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp); -int llog_origin_connect(struct llog_ctxt *ctxt, int count, - struct llog_logid *logid, struct llog_gen *gen, - struct obd_uuid *uuid); -int llog_handle_connect(struct ptlrpc_request *req); - -/* recov_thread.c */ -int llog_obd_repl_cancel(struct llog_ctxt *ctxt, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags); -int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp); -int llog_repl_connect(struct llog_ctxt *ctxt, int count, - struct llog_logid *logid, struct llog_gen *gen, - struct obd_uuid *uuid); - -struct llog_operations { - int (*lop_write_rec)(struct llog_handle *loghandle, - struct llog_rec_hdr *rec, - struct llog_cookie *logcookies, int numcookies, - void *, int idx); - int (*lop_destroy)(struct llog_handle *handle); - int (*lop_next_block)(struct llog_handle *h, int *curr_idx, - int next_idx, __u64 *offset, void *buf, int len); - int (*lop_prev_block)(struct llog_handle *h, - int prev_idx, void *buf, int len); - int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, - struct llog_logid *logid, char *name); - int (*lop_close)(struct llog_handle *handle); - int (*lop_read_header)(struct llog_handle *handle); - - int (*lop_setup)(struct obd_device *obd, int ctxt_idx, - struct obd_device *disk_obd, int count, - struct llog_logid *logid); - int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); - int (*lop_cleanup)(struct llog_ctxt *ctxt); - int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, - struct lov_stripe_md *lsm, - struct llog_cookie *logcookies, int numcookies); - int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, - int count, struct llog_cookie *cookies, int flags); - int (*lop_connect)(struct llog_ctxt *ctxt, int count, - struct llog_logid *logid, struct llog_gen *gen, - struct obd_uuid *uuid); - /* XXX add 2 more: commit callbacks and llog recovery functions */ -}; - -/* llog_lvfs.c */ -extern struct llog_operations llog_lvfs_ops; -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray); - -struct llog_ctxt { - int loc_idx; /* my index the obd array of ctxt's */ - struct llog_gen loc_gen; - struct obd_device *loc_obd; /* points back to the containing obd*/ - struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */ - struct obd_import *loc_imp; /* to use in RPC's: can be backward - pointing import */ - struct llog_operations *loc_logops; - struct llog_handle *loc_handle; - struct llog_canceld_ctxt *loc_llcd; - struct semaphore loc_sem; /* protects loc_llcd and loc_imp */ - void *llog_proc_cb; -}; - -static inline void llog_gen_init(struct llog_ctxt *ctxt) -{ - struct obd_device *obd = ctxt->loc_exp->exp_obd; - - if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)) - ctxt->loc_gen.mnt_cnt = obd->u.mds.mds_mount_count; - else if (!strstr(obd->obd_type->typ_name, LUSTRE_FILTER_NAME)) - ctxt->loc_gen.mnt_cnt = obd->u.filter.fo_mount_count; - else - ctxt->loc_gen.mnt_cnt = 0; -} - -static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b) -{ - if (a.mnt_cnt < b.mnt_cnt) - return 1; - if (a.mnt_cnt > b.mnt_cnt) - return 0; - return(a.conn_cnt < b.conn_cnt ? 1 : 0); -} - -#define LLOG_GEN_INC(gen) ((gen).conn_cnt ++) -#define LLOG_PROC_BREAK 0x0001 -#define LLOG_DEL_RECORD 0x0002 - -static inline int llog_obd2ops(struct llog_ctxt *ctxt, - struct llog_operations **lop) -{ - if (ctxt == NULL) - return -ENOTCONN; - - *lop = ctxt->loc_logops; - if (*lop == NULL) - return -EOPNOTSUPP; - - return 0; -} - -static inline int llog_handle2ops(struct llog_handle *loghandle, - struct llog_operations **lop) -{ - if (loghandle == NULL) - return -EINVAL; +#ifndef _LINUX_LUSTRE_LOG_H +#define _LINUX_LUSTRE_LOG_H - return llog_obd2ops(loghandle->lgh_ctxt, lop); -} - -static inline int llog_data_len(int len) -{ - return size_round(len); -} - -static inline struct llog_ctxt *llog_get_context(struct obd_device *obd, - int index) -{ - if (index < 0 || index >= LLOG_MAX_CTXTS) - return NULL; - - return obd->obd_llog_ctxt[index]; -} - -static inline int llog_write_rec(struct llog_handle *handle, - struct llog_rec_hdr *rec, - struct llog_cookie *logcookies, - int numcookies, void *buf, int idx) -{ - struct llog_operations *lop; - int rc, buflen; - ENTRY; - - rc = llog_handle2ops(handle, &lop); - if (rc) - RETURN(rc); - if (lop->lop_write_rec == NULL) - RETURN(-EOPNOTSUPP); - - if (buf) - buflen = rec->lrh_len + sizeof(struct llog_rec_hdr) - + sizeof(struct llog_rec_tail); - else - buflen = rec->lrh_len; - LASSERT(size_round(buflen) == buflen); - - rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); - RETURN(rc); -} - -static inline int llog_read_header(struct llog_handle *handle) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_handle2ops(handle, &lop); - if (rc) - RETURN(rc); - if (lop->lop_read_header == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_read_header(handle); - RETURN(rc); -} - -static inline int llog_destroy(struct llog_handle *handle) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_handle2ops(handle, &lop); - if (rc) - RETURN(rc); - if (lop->lop_destroy == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_destroy(handle); - RETURN(rc); -} - -#if 0 -static inline int llog_cancel(struct obd_export *exp, - struct lov_stripe_md *lsm, int count, - struct llog_cookie *cookies, int flags) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_handle2ops(loghandle, &lop); - if (rc) - RETURN(rc); - if (lop->lop_cancel == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_cancel(exp, lsm, count, cookies, flags); - RETURN(rc); -} +#ifndef _LUSTRE_LOG_H +#error Do not #include this file directly. #include instead #endif -static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, - int next_idx, __u64 *cur_offset, void *buf, - int len) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_handle2ops(loghandle, &lop); - if (rc) - RETURN(rc); - if (lop->lop_next_block == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_next_block(loghandle, cur_idx, next_idx, cur_offset, buf, - len); - RETURN(rc); -} - -static inline int llog_prev_block(struct llog_handle *loghandle, - int prev_idx, void *buf, int len) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_handle2ops(loghandle, &lop); - if (rc) - RETURN(rc); - if (lop->lop_prev_block == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_prev_block(loghandle, prev_idx, buf, len); - RETURN(rc); -} - -static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, - struct llog_logid *logid, char *name) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_obd2ops(ctxt, &lop); - if (rc) - RETURN(rc); - if (lop->lop_create == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_create(ctxt, res, logid, name); - RETURN(rc); -} - -static inline int llog_connect(struct llog_ctxt *ctxt, int count, - struct llog_logid *logid, struct llog_gen *gen, - struct obd_uuid *uuid) -{ - struct llog_operations *lop; - int rc; - ENTRY; - - rc = llog_obd2ops(ctxt, &lop); - if (rc) - RETURN(rc); - if (lop->lop_connect == NULL) - RETURN(-EOPNOTSUPP); - - rc = lop->lop_connect(ctxt, count, logid, gen, uuid); - RETURN(rc); -} +#define LUSTRE_LOG_SERVER #endif diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index 97f089c..b305234 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -7,59 +7,23 @@ * See also lustre_idl.h for wire formats of requests. */ +#ifndef _LINUX_LUSTRE_MDS_H +#define _LINUX_LUSTRE_MDS_H + #ifndef _LUSTRE_MDS_H -#define _LUSTRE_MDS_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ # include # include +# include #endif -#include -#include -#include -#include -#include -#include -#include struct mds_obd; struct ptlrpc_request; struct obd_device; - -struct mds_update_record { - __u32 ur_opcode; - struct ll_fid *ur_fid1; - struct ll_fid *ur_fid2; - int ur_namelen; - char *ur_name; - int ur_tgtlen; - char *ur_tgt; - int ur_eadatalen; - void *ur_eadata; - int ur_cookielen; - struct llog_cookie *ur_logcookies; - struct iattr ur_iattr; - struct lvfs_ucred ur_uc; - __u64 ur_rdev; - __u64 ur_time; - __u32 ur_mode; - __u32 ur_flags; - struct lvfs_grp_hash_entry *ur_grp_entry; -}; - -/* file data for open files on MDS */ -struct mds_file_data { - struct portals_handle mfd_handle; /* must be first */ - atomic_t mfd_refcount; - struct list_head mfd_list; /* protected by med_open_lock */ - __u64 mfd_xid; - int mfd_mode; - struct dentry *mfd_dentry; -}; - -/* mds/mds_reint.c */ -int mds_reint_rec(struct mds_update_record *r, int offset, - struct ptlrpc_request *req, struct lustre_handle *); +struct ll_file_data; /* mds/handler.c */ #ifdef __KERNEL__ @@ -76,16 +40,4 @@ int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt); int mds_fs_cleanup(struct obd_device *obddev); #endif -/* ioctls for trying requests */ -#define IOC_REQUEST_TYPE 'f' -#define IOC_REQUEST_MIN_NR 30 - -#define IOC_REQUEST_GETATTR _IOWR('f', 30, long) -#define IOC_REQUEST_READPAGE _IOWR('f', 31, long) -#define IOC_REQUEST_SETATTR _IOWR('f', 32, long) -#define IOC_REQUEST_CREATE _IOWR('f', 33, long) -#define IOC_REQUEST_OPEN _IOWR('f', 34, long) -#define IOC_REQUEST_CLOSE _IOWR('f', 35, long) -#define IOC_REQUEST_MAX_NR 35 - #endif diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index caee8db..1d2f17e 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -20,8 +20,12 @@ * */ +#ifndef _LINUX_LUSTRE_NET_H +#define _LINUX_LUSTRE_NET_H + #ifndef _LUSTRE_NET_H -#define _LUSTRE_NET_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ #include @@ -32,821 +36,11 @@ #endif #endif -#include -// #include -#include -#include -#include -#include -#include - -/* MD flags we _always_ use */ -#define PTLRPC_MD_OPTIONS 0 - -/* Define maxima for bulk I/O - * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) - * these limits are system wide and not interface-local. */ -#define PTLRPC_MAX_BRW_SIZE LNET_MTU -#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE/PAGE_SIZE) - -/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */ -#ifdef __KERNEL__ -# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) -# error "PTLRPC_MAX_BRW_PAGES isn't a power of two" -# endif -# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * PAGE_SIZE)) -# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * PAGE_SIZE" -# endif -# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU) -# error "PTLRPC_MAX_BRW_SIZE too big" -# endif -# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV) -# error "PTLRPC_MAX_BRW_PAGES too big" -# endif -#endif /* __KERNEL__ */ - -/* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request - * buffers */ -#define SVC_BUF_VMALLOC_THRESHOLD (2 * PAGE_SIZE) - -/* The following constants determine how memory is used to buffer incoming - * service requests. - * - * ?_NBUFS # buffers to allocate when growing the pool - * ?_BUFSIZE # bytes in a single request buffer - * ?_MAXREQSIZE # maximum request service will receive - * - * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk - * of ?_NBUFS is added to the pool. - * - * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are - * considered full when less than ?_MAXREQSIZE is left in them. - */ - -#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64) -#define LDLM_NBUFS (64 * smp_num_cpus) -#define LDLM_BUFSIZE (8 * 1024) -#define LDLM_MAXREQSIZE (5 * 1024) -#define LDLM_MAXREPSIZE (1024) - -#define MDT_MIN_THREADS 2UL -#define MDT_MAX_THREADS 32UL -#define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ - num_physpages >> (25 - PAGE_SHIFT)), 2UL) -#define FLD_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ - num_physpages >> (25 - PAGE_SHIFT)), 2UL) - -#define MDS_NBUFS (64 * smp_num_cpus) -#define MDS_BUFSIZE (8 * 1024) -/* Assume file name length = FNAME_MAX = 256 (true for ext3). - * path name length = PATH_MAX = 4096 - * LOV MD size max = EA_MAX = 4000 - * symlink: FNAME_MAX + PATH_MAX <- largest - * link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create) - * rename: FNAME_MAX + FNAME_MAX - * open: FNAME_MAX + EA_MAX - * - * MDS_MAXREQSIZE ~= 4736 bytes = - * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX - * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header - * or, for mds_close() and mds_reint_unlink() on a many-OST filesystem: - * = 9210 bytes = lustre_msg + mds_body + 160 * (easize + cookiesize) - * - * Realistic size is about 512 bytes (20 character name + 128 char symlink), - * except in the open case where there are a large number of OSTs in a LOV. - */ -#define MDS_MAXREQSIZE (5 * 1024) -#define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) - -/* FIXME fix all constants here */ -#define MGS_MAX_THREADS 8UL -#define MGS_NUM_THREADS max(2UL, min_t(unsigned long, MGS_MAX_THREADS, \ - num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT))) - -#define MGS_NBUFS (64 * smp_num_cpus) -#define MGS_BUFSIZE (8 * 1024) -#define MGS_MAXREQSIZE (5 * 1024) -#define MGS_MAXREPSIZE (9 * 1024) - -#define OST_MAX_THREADS 512UL -#define OST_DEF_THREADS max_t(unsigned long, 2, \ - (num_physpages >> (26-PAGE_SHIFT)) * smp_num_cpus) -#define OST_NBUFS (64 * smp_num_cpus) -#define OST_BUFSIZE (8 * 1024) -/* OST_MAXREQSIZE ~= 4768 bytes = - * lustre_msg + obdo + 16 * obd_ioobj + 256 * niobuf_remote - * - * - single object with 16 pages is 512 bytes - * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover - */ -#define OST_MAXREQSIZE (5 * 1024) -#define OST_MAXREPSIZE (9 * 1024) - -struct ptlrpc_connection { - struct list_head c_link; - lnet_nid_t c_self; - lnet_process_id_t c_peer; - struct obd_uuid c_remote_uuid; - atomic_t c_refcount; -}; - -struct ptlrpc_client { - __u32 cli_request_portal; - __u32 cli_reply_portal; - char *cli_name; -}; - -/* state flags of requests */ -/* XXX only ones left are those used by the bulk descs as well! */ -#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */ -#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */ - -#define REQ_MAX_ACK_LOCKS 8 - -#define SWAB_PARANOIA 1 -#if SWAB_PARANOIA -/* unpacking: assert idx not unpacked already */ -#define LASSERT_REQSWAB(rq, idx) \ -do { \ - LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8); \ - LASSERT (((rq)->rq_req_swab_mask & (1 << (idx))) == 0); \ - (rq)->rq_req_swab_mask |= (1 << (idx)); \ -} while (0) - -#define LASSERT_REPSWAB(rq, idx) \ -do { \ - LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8); \ - LASSERT (((rq)->rq_rep_swab_mask & (1 << (idx))) == 0); \ - (rq)->rq_rep_swab_mask |= (1 << (idx)); \ -} while (0) - -/* just looking: assert idx already unpacked */ -#define LASSERT_REQSWABBED(rq, idx) \ -LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8 && \ - ((rq)->rq_req_swab_mask & (1 << (idx))) != 0) - -#define LASSERT_REPSWABBED(rq, idx) \ -LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8 && \ - ((rq)->rq_rep_swab_mask & (1 << (idx))) != 0) -#else -#define LASSERT_REQSWAB(rq, idx) -#define LASSERT_REPSWAB(rq, idx) -#define LASSERT_REQSWABBED(rq, idx) -#define LASSERT_REPSWABBED(rq, idx) -#endif - -union ptlrpc_async_args { - /* Scratchpad for passing args to completion interpreter. Users - * cast to the struct of their choosing, and LASSERT that this is - * big enough. For _tons_ of context, OBD_ALLOC a struct and store - * a pointer to it here. The pointer_arg ensures this struct is at - * least big enough for that. */ - void *pointer_arg[9]; - __u64 space[4]; -}; - -struct ptlrpc_request_set; -typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); - -struct ptlrpc_request_set { - int set_remaining; /* # uncompleted requests */ - wait_queue_head_t set_waitq; - wait_queue_head_t *set_wakeup_ptr; - struct list_head set_requests; - set_interpreter_func set_interpret; /* completion callback */ - void *set_arg; /* completion context */ - /* locked so that any old caller can communicate requests to - * the set holder who can then fold them into the lock-free set */ - spinlock_t set_new_req_lock; - struct list_head set_new_requests; -}; - -struct ptlrpc_bulk_desc; - -/* - * ptlrpc callback & work item stuff - */ -struct ptlrpc_cb_id { - void (*cbid_fn)(lnet_event_t *ev); /* specific callback fn */ - void *cbid_arg; /* additional arg */ -}; - -#define RS_MAX_LOCKS 4 -#define RS_DEBUG 1 - -struct ptlrpc_reply_state { - struct ptlrpc_cb_id rs_cb_id; - struct list_head rs_list; - struct list_head rs_exp_list; - struct list_head rs_obd_list; -#if RS_DEBUG - struct list_head rs_debug_list; -#endif - /* updates to following flag serialised by srv_request_lock */ - unsigned int rs_difficult:1; /* ACK/commit stuff */ - unsigned int rs_scheduled:1; /* being handled? */ - unsigned int rs_scheduled_ever:1;/* any schedule attempts? */ - unsigned int rs_handled:1; /* been handled yet? */ - unsigned int rs_on_net:1; /* reply_out_callback pending? */ - unsigned int rs_prealloc:1; /* rs from prealloc list */ - - int rs_size; - __u64 rs_transno; - __u64 rs_xid; - struct obd_export *rs_export; - struct ptlrpc_service *rs_service; - lnet_handle_md_t rs_md_h; - atomic_t rs_refcount; - - /* locks awaiting client reply ACK */ - int rs_nlocks; - struct lustre_handle rs_locks[RS_MAX_LOCKS]; - ldlm_mode_t rs_modes[RS_MAX_LOCKS]; - /* last member: variable sized reply message */ - struct lustre_msg rs_msg; -}; - -struct ptlrpc_thread; - -enum rq_phase { - RQ_PHASE_NEW = 0xebc0de00, - RQ_PHASE_RPC = 0xebc0de01, - RQ_PHASE_BULK = 0xebc0de02, - RQ_PHASE_INTERPRET = 0xebc0de03, - RQ_PHASE_COMPLETE = 0xebc0de04, -}; - -struct ptlrpc_request_pool { - spinlock_t prp_lock; - struct list_head prp_req_list; /* list of ptlrpc_request structs */ - int prp_rq_size; - void (*prp_populate)(struct ptlrpc_request_pool *, int); -}; - -struct ptlrpc_request { - int rq_type; /* one of PTL_RPC_MSG_* */ - struct list_head rq_list; - struct list_head rq_history_list; /* server-side history */ - __u64 rq_history_seq; /* history sequence # */ - int rq_status; - spinlock_t rq_lock; - /* client-side flags */ - unsigned int rq_intr:1, rq_replied:1, rq_err:1, - rq_timedout:1, rq_resend:1, rq_restart:1, - /* - * when ->rq_replay is set, request is kept by the client even - * after server commits corresponding transaction. This is - * used for operations that require sequence of multiple - * requests to be replayed. The only example currently is file - * open/close. When last request in such a sequence is - * committed, ->rq_replay is cleared on all requests in the - * sequence. - */ - rq_replay:1, - rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, - rq_no_delay:1, rq_net_err:1; - enum rq_phase rq_phase; /* one of RQ_PHASE_* */ - atomic_t rq_refcount; /* client-side refcount for SENT race */ - - struct ptlrpc_thread *rq_svc_thread; /* initial thread servicing req */ - - int rq_request_portal; /* XXX FIXME bug 249 */ - int rq_reply_portal; /* XXX FIXME bug 249 */ - - int rq_nob_received; /* client-side # reply bytes actually received */ - - int rq_reqlen; - struct lustre_msg *rq_reqmsg; - - int rq_timeout; /* time to wait for reply (seconds) */ - int rq_replen; - struct lustre_msg *rq_repmsg; - __u64 rq_transno; - __u64 rq_xid; - struct list_head rq_replay_list; - -#if SWAB_PARANOIA - __u32 rq_req_swab_mask; - __u32 rq_rep_swab_mask; +/* XXX Liang: should be moved to other header instead of here */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) +#ifndef WITH_GROUP_INFO +#define WITH_GROUP_INFO #endif - - int rq_import_generation; - enum lustre_imp_state rq_send_state; - - /* client+server request */ - lnet_handle_md_t rq_req_md_h; - struct ptlrpc_cb_id rq_req_cbid; - - /* server-side... */ - struct timeval rq_arrival_time; /* request arrival time */ - struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ - struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/ -#if CRAY_XT3 - __u32 rq_uid; /* peer uid, used in MDS only */ #endif - /* client-only incoming reply */ - lnet_handle_md_t rq_reply_md_h; - wait_queue_head_t rq_reply_waitq; - struct ptlrpc_cb_id rq_reply_cbid; - - lnet_nid_t rq_self; - lnet_process_id_t rq_peer; - struct obd_export *rq_export; - struct obd_import *rq_import; - - void (*rq_replay_cb)(struct ptlrpc_request *); - void (*rq_commit_cb)(struct ptlrpc_request *); - void *rq_cb_data; - - struct ptlrpc_bulk_desc *rq_bulk; /* client side bulk */ - time_t rq_sent; /* when request sent, seconds */ - - /* Multi-rpc bits */ - struct list_head rq_set_chain; - struct ptlrpc_request_set *rq_set; - void *rq_interpret_reply; /* Async completion handler */ - union ptlrpc_async_args rq_async_args; /* Async completion context */ - void *rq_ptlrpcd_data; - struct ptlrpc_request_pool *rq_pool; /* Pool if request from - preallocated list */ -}; - -static inline const char * -ptlrpc_rqphase2str(const struct ptlrpc_request *req) -{ - switch (req->rq_phase) { - case RQ_PHASE_NEW: - return "New"; - case RQ_PHASE_RPC: - return "Rpc"; - case RQ_PHASE_BULK: - return "Bulk"; - case RQ_PHASE_INTERPRET: - return "Interpret"; - case RQ_PHASE_COMPLETE: - return "Complete"; - default: - return "?Phase?"; - } -} - -/* Spare the preprocessor, spoil the bugs. */ -#define FLAG(field, str) (field ? str : "") - -#define DEBUG_REQ_FLAGS(req) \ - ptlrpc_rqphase2str(req), \ - FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), \ - FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ - FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ - FLAG(req->rq_no_resend, "N"), \ - FLAG(req->rq_waiting, "W") - -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s" - -#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...) \ -CDEB_TYPE(level, "@@@ " fmt \ - " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " \ - REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid, \ - req->rq_transno, \ - req->rq_reqmsg ? req->rq_reqmsg->opc : -1, \ - req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "", \ - req->rq_import ? \ - (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "", \ - (req->rq_import && req->rq_import->imp_client) ? \ - req->rq_import->imp_client->cli_request_portal : -1, \ - req->rq_reqlen, req->rq_replen, \ - atomic_read(&req->rq_refcount), \ - DEBUG_REQ_FLAGS(req), \ - req->rq_reqmsg ? req->rq_reqmsg->flags : 0, \ - req->rq_repmsg ? req->rq_repmsg->flags : 0, \ - req->rq_status, req->rq_repmsg ? req->rq_repmsg->status : 0) - -/* for most callers (level is a constant) this is resolved at compile time */ -#define DEBUG_REQ(level, req, fmt, args...) \ -do { \ - if ((level) & (D_ERROR | D_WARNING)) \ - __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args); \ - else \ - __DEBUG_REQ(CDEBUG, level, req, fmt, ## args); \ -} while (0) - -#define DEBUG_REQ_EX(level, req, fmt, args...) \ -do { \ - if ((level) & (D_ERROR | D_WARNING)) \ - __DEBUG_REQ(CDEBUG_LIMIT, D_ERROR, req, fmt, ## args); \ - else \ - __DEBUG_REQ(CDEBUG_EX, level, req, fmt, ## args); \ -} while (0) - -struct ptlrpc_bulk_page { - struct list_head bp_link; - int bp_buflen; - int bp_pageoffset; /* offset within a page */ - struct page *bp_page; -}; - -#define BULK_GET_SOURCE 0 -#define BULK_PUT_SINK 1 -#define BULK_GET_SINK 2 -#define BULK_PUT_SOURCE 3 - -struct ptlrpc_bulk_desc { - unsigned int bd_success:1; /* completed successfully */ - unsigned int bd_network_rw:1; /* accessible to the network */ - unsigned int bd_type:2; /* {put,get}{source,sink} */ - unsigned int bd_registered:1; /* client side */ - spinlock_t bd_lock; /* serialise with callback */ - int bd_import_generation; - struct obd_export *bd_export; - struct obd_import *bd_import; - __u32 bd_portal; - struct ptlrpc_request *bd_req; /* associated request */ - wait_queue_head_t bd_waitq; /* server side only WQ */ - int bd_iov_count; /* # entries in bd_iov */ - int bd_max_iov; /* allocated size of bd_iov */ - int bd_nob; /* # bytes covered */ - int bd_nob_transferred; /* # bytes GOT/PUT */ - - __u64 bd_last_xid; - - struct ptlrpc_cb_id bd_cbid; /* network callback info */ - lnet_handle_md_t bd_md_h; /* associated MD */ - -#if defined(__KERNEL__) - lnet_kiov_t bd_iov[0]; -#else - lnet_md_iovec_t bd_iov[0]; -#endif -}; - -struct lu_context; -struct ptlrpc_thread { - - struct list_head t_link; /* active threads for service, from svc->srv_threads */ - - void *t_data; /* thread-private data (preallocated memory) */ - __u32 t_flags; - - unsigned int t_id; /* service thread index, from ptlrpc_start_threads */ - wait_queue_head_t t_ctl_waitq; - struct lu_context *t_ctx; -}; - -struct ptlrpc_request_buffer_desc { - struct list_head rqbd_list; - struct list_head rqbd_reqs; - struct ptlrpc_service *rqbd_service; - lnet_handle_md_t rqbd_md_h; - int rqbd_refcount; - char *rqbd_buffer; - struct ptlrpc_cb_id rqbd_cbid; - struct ptlrpc_request rqbd_req; -}; - -typedef int (*svc_handler_t)(struct ptlrpc_request *req); -typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *); - -struct ptlrpc_service { - struct list_head srv_list; /* chain thru all services */ - int srv_max_req_size; /* biggest request to receive */ - int srv_max_reply_size; /* biggest reply to send */ - int srv_buf_size; /* size of individual buffers */ - int srv_nbuf_per_group; /* # buffers to allocate in 1 group */ - int srv_nbufs; /* total # req buffer descs allocated */ - int srv_nthreads; /* # running threads */ - int srv_n_difficult_replies; /* # 'difficult' replies */ - int srv_n_active_reqs; /* # reqs being served */ - int srv_rqbd_timeout; /* timeout before re-posting reqs */ - int srv_watchdog_timeout; /* soft watchdog timeout, in ms */ - int srv_num_threads; /* # threads to start/started */ - unsigned srv_cpu_affinity:1; /* bind threads to CPUs */ - - __u32 srv_req_portal; - __u32 srv_rep_portal; - - int srv_n_queued_reqs; /* # reqs waiting to be served */ - struct list_head srv_request_queue; /* reqs waiting for service */ - - struct list_head srv_request_history; /* request history */ - __u64 srv_request_seq; /* next request sequence # */ - __u64 srv_request_max_cull_seq; /* highest seq culled from history */ - svcreq_printfn_t srv_request_history_print_fn; /* service-specific print fn */ - - struct list_head srv_idle_rqbds; /* request buffers to be reposted */ - struct list_head srv_active_rqbds; /* req buffers receiving */ - struct list_head srv_history_rqbds; /* request buffer history */ - int srv_nrqbd_receiving; /* # posted request buffers */ - int srv_n_history_rqbds; /* # request buffers in history */ - int srv_max_history_rqbds; /* max # request buffers in history */ - - atomic_t srv_outstanding_replies; - struct list_head srv_active_replies; /* all the active replies */ - struct list_head srv_reply_queue; /* replies waiting for service */ - - wait_queue_head_t srv_waitq; /* all threads sleep on this. This - * wait-queue is signalled when new - * incoming request arrives and when - * difficult reply has to be handled. */ - - struct list_head srv_threads; - svc_handler_t srv_handler; - - char *srv_name; /* only statically allocated strings here; we don't clean them */ - - spinlock_t srv_lock; - - struct proc_dir_entry *srv_procroot; - struct lprocfs_stats *srv_stats; - - /* List of free reply_states */ - struct list_head srv_free_rs_list; - /* waitq to run, when adding stuff to srv_free_rs_list */ - wait_queue_head_t srv_free_rs_waitq; - - /* - * if non-NULL called during thread creation (ptlrpc_start_thread()) - * to initialize service specific per-thread state. - */ - int (*srv_init)(struct ptlrpc_thread *thread); - /* - * if non-NULL called during thread shutdown (ptlrpc_main()) to - * destruct state created by ->srv_init(). - */ - void (*srv_done)(struct ptlrpc_thread *thread); - - //struct ptlrpc_srv_ni srv_interfaces[0]; -}; - -/* ptlrpc/events.c */ -extern lnet_handle_eq_t ptlrpc_eq_h; -extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, - lnet_process_id_t *peer, lnet_nid_t *self); -extern void request_out_callback (lnet_event_t *ev); -extern void reply_in_callback(lnet_event_t *ev); -extern void client_bulk_callback (lnet_event_t *ev); -extern void request_in_callback(lnet_event_t *ev); -extern void reply_out_callback(lnet_event_t *ev); -extern void server_bulk_callback (lnet_event_t *ev); - -/* ptlrpc/connection.c */ -void ptlrpc_dump_connections(void); -void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *); -struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, - lnet_nid_t self, struct obd_uuid *uuid); -int ptlrpc_put_connection(struct ptlrpc_connection *c); -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); -void ptlrpc_init_connection(void); -void ptlrpc_cleanup_connection(void); -extern lnet_pid_t ptl_get_pid(void); - -/* ptlrpc/niobuf.c */ -int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc); -void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc); -int ptlrpc_register_bulk(struct ptlrpc_request *req); -void ptlrpc_unregister_bulk (struct ptlrpc_request *req); - -static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave (&desc->bd_lock, flags); - rc = desc->bd_network_rw; - spin_unlock_irqrestore (&desc->bd_lock, flags); - return (rc); -} - -int ptlrpc_send_reply(struct ptlrpc_request *req, int); -int ptlrpc_reply(struct ptlrpc_request *req); -int ptlrpc_error(struct ptlrpc_request *req); -void ptlrpc_resend_req(struct ptlrpc_request *request); -int ptl_send_rpc(struct ptlrpc_request *request, int noreply); -int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd); - -/* ptlrpc/client.c */ -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *); -void ptlrpc_cleanup_client(struct obd_import *imp); -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); - -static inline int -ptlrpc_client_receiving_reply (struct ptlrpc_request *req) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&req->rq_lock, flags); - rc = req->rq_receiving_reply; - spin_unlock_irqrestore(&req->rq_lock, flags); - return (rc); -} - -static inline int -ptlrpc_client_replied (struct ptlrpc_request *req) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave(&req->rq_lock, flags); - rc = req->rq_replied; - spin_unlock_irqrestore(&req->rq_lock, flags); - return (rc); -} - -static inline void -ptlrpc_wake_client_req (struct ptlrpc_request *req) -{ - if (req->rq_set == NULL) - wake_up(&req->rq_reply_waitq); - else - wake_up(&req->rq_set->set_waitq); -} - -int ptlrpc_queue_wait(struct ptlrpc_request *req); -int ptlrpc_replay_req(struct ptlrpc_request *req); -void ptlrpc_unregister_reply(struct ptlrpc_request *req); -void ptlrpc_restart_req(struct ptlrpc_request *req); -void ptlrpc_abort_inflight(struct obd_import *imp); - -struct ptlrpc_request_set *ptlrpc_prep_set(void); -int ptlrpc_set_next_timeout(struct ptlrpc_request_set *); -int ptlrpc_check_set(struct ptlrpc_request_set *set); -int ptlrpc_set_wait(struct ptlrpc_request_set *); -int ptlrpc_expired_set(void *data); -void ptlrpc_interrupted_set(void *data); -void ptlrpc_mark_interrupted(struct ptlrpc_request *req); -void ptlrpc_set_destroy(struct ptlrpc_request_set *); -void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *); -void ptlrpc_set_add_new_req(struct ptlrpc_request_set *, - struct ptlrpc_request *); - -void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool); -void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq); -struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int, int, - void (*populate_pool)(struct ptlrpc_request_pool *, int)); -struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, int opcode, - int count, int *lengths, char **bufs); -struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, - int count, int *lengths, char **bufs, - struct ptlrpc_request_pool *pool); -void ptlrpc_free_req(struct ptlrpc_request *request); -void ptlrpc_req_finished(struct ptlrpc_request *request); -void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request); -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, - int npages, int type, int portal); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp(struct ptlrpc_request *req, - int npages, int type, int portal); -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); -void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len); -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp); -__u64 ptlrpc_next_xid(void); -__u64 ptlrpc_sample_next_xid(void); -__u64 ptlrpc_req_xid(struct ptlrpc_request *request); - -struct ptlrpc_service_conf { - int psc_nbufs; - int psc_bufsize; - int psc_max_req_size; - int psc_max_reply_size; - int psc_req_portal; - int psc_rep_portal; - int psc_watchdog_timeout; /* in ms */ - int psc_num_threads; -}; - - -/* ptlrpc/service.c */ -void ptlrpc_save_lock (struct ptlrpc_request *req, - struct lustre_handle *lock, int mode); -void ptlrpc_commit_replies (struct obd_device *obd); -void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); - -struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, - svc_handler_t h, char *name, - struct proc_dir_entry *proc_entry, - svcreq_printfn_t prntfn); - -struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, - int max_reply_size, - int req_portal, int rep_portal, - int watchdog_timeout, /* in ms */ - svc_handler_t, char *name, - struct proc_dir_entry *proc_entry, - svcreq_printfn_t, int num_threads); -void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); - -int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc, - char *base_name); -int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, - char *name, int id); -int ptlrpc_unregister_service(struct ptlrpc_service *service); -int liblustre_check_services (void *arg); -void ptlrpc_daemonize(char *name); -int ptlrpc_service_health_check(struct ptlrpc_service *); - - -struct ptlrpc_svc_data { - char *name; - struct ptlrpc_service *svc; - struct ptlrpc_thread *thread; - struct obd_device *dev; -}; - -/* ptlrpc/import.c */ -int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid); -int ptlrpc_init_import(struct obd_import *imp); -int ptlrpc_disconnect_import(struct obd_import *imp); -int ptlrpc_import_recovery_state_machine(struct obd_import *imp); - -/* ptlrpc/pack_generic.c */ -int lustre_msg_swabbed(struct lustre_msg *msg); -int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); -int lustre_pack_request(struct ptlrpc_request *, int count, const int *lens, - char **bufs); -int lustre_pack_reply(struct ptlrpc_request *, int count, const int *lens, - char **bufs); -void lustre_shrink_reply(struct ptlrpc_request *req, - int segment, unsigned int newlen, int move_data); -void lustre_free_reply_state(struct ptlrpc_reply_state *rs); -int lustre_msg_size(int count, const int *lengths); -int lustre_unpack_msg(struct lustre_msg *m, int len); -void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); -int lustre_msg_buflen(struct lustre_msg *m, int n); -char *lustre_msg_string (struct lustre_msg *m, int n, int max_len); -void *lustre_swab_buf(struct lustre_msg *, int n, int minlen, void *swabber); -void *lustre_swab_reqbuf (struct ptlrpc_request *req, int n, int minlen, - void *swabber); -void *lustre_swab_repbuf (struct ptlrpc_request *req, int n, int minlen, - void *swabber); - -static inline void -ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - atomic_inc(&rs->rs_refcount); -} - -static inline void -ptlrpc_rs_decref(struct ptlrpc_reply_state *rs) -{ - LASSERT(atomic_read(&rs->rs_refcount) > 0); - if (atomic_dec_and_test(&rs->rs_refcount)) - lustre_free_reply_state(rs); -} - -/* ldlm/ldlm_lib.c */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg); -int client_obd_cleanup(struct obd_device * obddev); -int client_connect_import(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid, struct obd_connect_data *); -int client_disconnect_export(struct obd_export *exp); -int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority); -int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); -int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); - -/* ptlrpc/pinger.c */ -int ptlrpc_pinger_add_import(struct obd_import *imp); -int ptlrpc_pinger_del_import(struct obd_import *imp); -#ifdef __KERNEL__ -void ping_evictor_start(void); -void ping_evictor_stop(void); -#else -#define ping_evictor_start() do {} while (0) -#define ping_evictor_stop() do {} while (0) -#endif - -/* ptlrpc/ptlrpcd.c */ -void ptlrpcd_wake(struct ptlrpc_request *req); -void ptlrpcd_add_req(struct ptlrpc_request *req); -int ptlrpcd_addref(void); -void ptlrpcd_decref(void); - -/* ptlrpc/lproc_ptlrpc.c */ -#ifdef LPROCFS -void ptlrpc_lprocfs_register_obd(struct obd_device *obd); -void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd); -#else -static inline void ptlrpc_lprocfs_register_obd(struct obd_device *obd) {} -static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {} -#endif - -/* ptlrpc/llog_server.c */ -int llog_origin_handle_create(struct ptlrpc_request *req); -int llog_origin_handle_destroy(struct ptlrpc_request *req); -int llog_origin_handle_prev_block(struct ptlrpc_request *req); -int llog_origin_handle_next_block(struct ptlrpc_request *req); -int llog_origin_handle_read_header(struct ptlrpc_request *req); -int llog_origin_handle_close(struct ptlrpc_request *req); -int llog_origin_handle_cancel(struct ptlrpc_request *req); -int llog_catinfo(struct ptlrpc_request *req); - -/* ptlrpc/llog_client.c */ -extern struct llog_operations llog_client_ops; - #endif diff --git a/lustre/include/linux/lustre_quota.h b/lustre/include/linux/lustre_quota.h index c597b7a..248bd5d 100644 --- a/lustre/include/linux/lustre_quota.h +++ b/lustre/include/linux/lustre_quota.h @@ -1,8 +1,12 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ +#ifndef _LINUX_LUSTRE_QUOTA_H +#define _LINUX_LUSTRE_QUOTA_H + #ifndef _LUSTRE_QUOTA_H -#define _LUSTRE_QUOTA_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ #include @@ -10,386 +14,5 @@ #include #include #endif -#include -#include -#include - -struct obd_device; -struct client_obd; - -#ifndef NR_DQHASH -#define NR_DQHASH 45 -#endif - -#ifdef HAVE_QUOTA_SUPPORT - -#ifdef __KERNEL__ - -/* structures to access admin quotafile */ -struct lustre_mem_dqinfo { - unsigned int dqi_bgrace; - unsigned int dqi_igrace; - unsigned long dqi_flags; - unsigned int dqi_blocks; - unsigned int dqi_free_blk; - unsigned int dqi_free_entry; -}; - -struct lustre_quota_info { - struct file *qi_files[MAXQUOTAS]; - struct lustre_mem_dqinfo qi_info[MAXQUOTAS]; -}; - -#define DQ_STATUS_AVAIL 0x0 /* Available dquot */ -#define DQ_STATUS_SET 0x01 /* Sombody is setting dquot */ -#define DQ_STATUS_RECOVERY 0x02 /* dquot is in recovery */ - -struct lustre_dquot { - /* Hash list in memory, protect by dquot_hash_lock */ - struct list_head dq_hash; - /* Protect the data in lustre_dquot */ - struct semaphore dq_sem; - /* Use count */ - int dq_refcnt; - /* Pointer of quota info it belongs to */ - struct lustre_quota_info *dq_info; - - loff_t dq_off; /* Offset of dquot on disk */ - unsigned int dq_id; /* ID this applies to (uid, gid) */ - int dq_type; /* Type fo quota (USRQUOTA, GRPQUOUTA) */ - unsigned short dq_status; /* See DQ_STATUS_ */ - unsigned long dq_flags; /* See DQ_ in quota.h */ - struct mem_dqblk dq_dqb; /* Diskquota usage */ -}; - -struct dquot_id { - struct list_head di_link; - __u32 di_id; -}; - -#define QFILE_CHK 1 -#define QFILE_RD_INFO 2 -#define QFILE_WR_INFO 3 -#define QFILE_INIT_INFO 4 -#define QFILE_RD_DQUOT 5 -#define QFILE_WR_DQUOT 6 - -/* admin quotafile operations */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -int lustre_check_quota_file(struct lustre_quota_info *lqi, int type); -int lustre_read_quota_info(struct lustre_quota_info *lqi, int type); -int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); -int lustre_read_dquot(struct lustre_dquot *dquot); -int lustre_commit_dquot(struct lustre_dquot *dquot); -int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); -int lustre_get_qids(struct file *file, struct inode *inode, int type, - struct list_head *list); -#else - -#ifndef DQ_FAKE_B -#define DQ_FAKE_B 6 -#endif - -static inline int lustre_check_quota_file(struct lustre_quota_info *lqi, - int type) -{ - return 0; -} -static inline int lustre_read_quota_info(struct lustre_quota_info *lqi, - int type) -{ - return 0; -} -static inline int lustre_write_quota_info(struct lustre_quota_info *lqi, - int type) -{ - return 0; -} -static inline int lustre_read_dquot(struct lustre_dquot *dquot) -{ - return 0; -} -static inline int lustre_commit_dquot(struct lustre_dquot *dquot) -{ - return 0; -} -static inline int lustre_init_quota_info(struct lustre_quota_info *lqi, - int type) -{ - return 0; -} -#endif /* KERNEL_VERSION(2,5,0) */ - -#define LL_DQUOT_OFF(sb) DQUOT_OFF(sb) - -typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd, - int opc); -struct lustre_quota_ctxt { - struct super_block *lqc_sb; /* superblock this applies to */ - struct obd_import *lqc_import; /* import used to send dqacq/dqrel RPC */ - dqacq_handler_t lqc_handler; /* dqacq/dqrel RPC handler, only for quota master */ - unsigned long lqc_recovery:1; /* Doing recovery */ - unsigned long lqc_iunit_sz; /* Unit size of file quota */ - unsigned long lqc_itune_sz; /* Trigger dqacq when available file quota less than - * this value, trigger dqrel when available file quota - * more than this value + 1 iunit */ - unsigned long lqc_bunit_sz; /* Unit size of block quota */ - unsigned long lqc_btune_sz; /* See comment of lqc_itune_sz */ -}; - -#else - -struct lustre_quota_info { -}; - -struct lustre_quota_ctxt { -}; - -#endif /* !__KERNEL__ */ - -#else - -#define LL_DQUOT_OFF(sb) do {} while(0) - -struct lustre_quota_info { -}; - -struct lustre_quota_ctxt { -}; - -#endif /* !HAVE_QUOTA_SUPPORT */ - -/* If the (quota limit < qunit * slave count), the slave which can't - * acquire qunit should set it's local limit as MIN_QLIMIT */ -#define MIN_QLIMIT 1 - -struct quotacheck_thread_args { - struct obd_export *qta_exp; /* obd export */ - struct obd_quotactl qta_oqctl; /* obd_quotactl args */ - struct super_block *qta_sb; /* obd super block */ - atomic_t *qta_sem; /* obt_quotachecking */ -}; - -typedef struct { - int (*quota_init) (void); - int (*quota_exit) (void); - int (*quota_setup) (struct obd_device *, struct lustre_cfg *); - int (*quota_cleanup) (struct obd_device *); - /* For quota master, close admin quota files */ - int (*quota_fs_cleanup) (struct obd_device *); - int (*quota_ctl) (struct obd_export *, struct obd_quotactl *); - int (*quota_check) (struct obd_export *, struct obd_quotactl *); - int (*quota_recovery) (struct obd_device *); - - /* For quota master/slave, adjust quota limit after fs operation */ - int (*quota_adjust) (struct obd_device *, unsigned int[], - unsigned int[], int, int); - - /* For quota slave, set import, trigger quota recovery */ - int (*quota_setinfo) (struct obd_export *, struct obd_device *); - - /* For quota slave, set proper thread resoure capability */ - int (*quota_enforce) (struct obd_device *, unsigned int); - - /* For quota slave, check whether specified uid/gid is over quota */ - int (*quota_getflag) (struct obd_device *, struct obdo *); - - /* For quota slave, acquire/release quota from master if needed */ - int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int); - - /* For quota client, poll if the quota check done */ - int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *); - - /* For quota client, check whether specified uid/gid is over quota */ - int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int); - - /* For quota client, set over quota flag for specifed uid/gid */ - int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int, - obd_flag, obd_flag); -} quota_interface_t; - -#define Q_COPY(out, in, member) (out)->member = (in)->member - -#define QUOTA_OP(interface, op) interface->quota_ ## op - -#define QUOTA_CHECK_OP(interface, op) \ -do { \ - if (!interface) \ - RETURN(0); \ - if (!QUOTA_OP(interface, op)) { \ - CERROR("no quota operation: " #op "\n"); \ - RETURN(-EOPNOTSUPP); \ - } \ -} while(0) - -static inline int lquota_init(quota_interface_t *interface) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, init); - rc = QUOTA_OP(interface, init)(); - RETURN(rc); -} - -static inline int lquota_exit(quota_interface_t *interface) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, exit); - rc = QUOTA_OP(interface, exit)(); - RETURN(rc); -} - -static inline int lquota_setup(quota_interface_t *interface, - struct obd_device *obd, - struct lustre_cfg *lcfg) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, setup); - rc = QUOTA_OP(interface, setup)(obd, lcfg); - RETURN(rc); -} - -static inline int lquota_cleanup(quota_interface_t *interface, - struct obd_device *obd) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, cleanup); - rc = QUOTA_OP(interface, cleanup)(obd); - RETURN(rc); -} - -static inline int lquota_fs_cleanup(quota_interface_t *interface, - struct obd_device *obd) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, fs_cleanup); - rc = QUOTA_OP(interface, fs_cleanup)(obd); - RETURN(rc); -} - -static inline int lquota_recovery(quota_interface_t *interface, - struct obd_device *obd) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, recovery); - rc = QUOTA_OP(interface, recovery)(obd); - RETURN(rc); -} - -static inline int lquota_adjust(quota_interface_t *interface, - struct obd_device *obd, - unsigned int qcids[], - unsigned int qpids[], - int rc, int opc) -{ - int ret; - ENTRY; - - QUOTA_CHECK_OP(interface, adjust); - ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc); - RETURN(ret); -} - -static inline int lquota_chkdq(quota_interface_t *interface, - struct client_obd *cli, - unsigned int uid, unsigned int gid) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, chkdq); - rc = QUOTA_OP(interface, chkdq)(cli, uid, gid); - RETURN(rc); -} - -static inline int lquota_setdq(quota_interface_t *interface, - struct client_obd *cli, - unsigned int uid, unsigned int gid, - obd_flag valid, obd_flag flags) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, setdq); - rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags); - RETURN(rc); -} - -static inline int lquota_poll_check(quota_interface_t *interface, - struct obd_export *exp, - struct if_quotacheck *qchk) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, poll_check); - rc = QUOTA_OP(interface, poll_check)(exp, qchk); - RETURN(rc); -} - - -static inline int lquota_setinfo(quota_interface_t *interface, - struct obd_export *exp, - struct obd_device *obd) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, setinfo); - rc = QUOTA_OP(interface, setinfo)(exp, obd); - RETURN(rc); -} - -static inline int lquota_enforce(quota_interface_t *interface, - struct obd_device *obd, - unsigned int ignore) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, enforce); - rc = QUOTA_OP(interface, enforce)(obd, ignore); - RETURN(rc); -} - -static inline int lquota_getflag(quota_interface_t *interface, - struct obd_device *obd, struct obdo *oa) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, getflag); - rc = QUOTA_OP(interface, getflag)(obd, oa); - RETURN(rc); -} - -static inline int lquota_acquire(quota_interface_t *interface, - struct obd_device *obd, - unsigned int uid, unsigned int gid) -{ - int rc; - ENTRY; - - QUOTA_CHECK_OP(interface, acquire); - rc = QUOTA_OP(interface, acquire)(obd, uid, gid); - RETURN(rc); -} - -#ifndef __KERNEL__ -extern quota_interface_t osc_quota_interface; -extern quota_interface_t mdc_quota_interface; -extern quota_interface_t lov_quota_interface; -#endif #endif /* _LUSTRE_QUOTA_H */ diff --git a/lustre/include/linux/lustre_types.h b/lustre/include/linux/lustre_types.h new file mode 100644 index 0000000..8f724c8 --- /dev/null +++ b/lustre/include/linux/lustre_types.h @@ -0,0 +1,44 @@ +#ifndef _LUSTRE_LINUX_TYPES_H +#define _LUSTRE_LINUX_TYPES_H + +#ifdef HAVE_ASM_TYPES_H +#include +#endif + +#ifdef __KERNEL__ +# include +# include /* to check for FMODE_EXEC, dev_t, lest we redefine */ +#else +#ifdef __CYGWIN__ +# include +#elif defined(_AIX) +# include +#else +# include +#endif +#endif + +#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \ + !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H) && \ + !defined(_X86_64_TYPES_H)) + +typedef unsigned short umode_t; +/* + * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the + * header files exported to user space + */ + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +typedef __signed__ long long __s64; +typedef unsigned long long __u64; +#endif + +#endif diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h new file mode 100644 index 0000000..7bbcca7 --- /dev/null +++ b/lustre/include/linux/lustre_user.h @@ -0,0 +1,80 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * This file is part of Lustre, http://www.lustre.org + * + * Lustre public user-space interface definitions. + */ + +#ifndef _LINUX_LUSTRE_USER_H +#define _LINUX_LUSTRE_USER_H + +#ifdef HAVE_ASM_TYPES_H +#include +#else +#include +#endif + + +#ifndef __KERNEL__ +# define NEED_QUOTA_DEFS +# ifdef HAVE_QUOTA_SUPPORT +# include +# endif +#else +# include +# if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) +# define NEED_QUOTA_DEFS +# endif +# ifdef HAVE_QUOTA_SUPPORT +# include +# endif +#endif + +/* + * asm-x86_64/processor.h on some SLES 9 distros seems to use + * kernel-only typedefs. fortunately skipping it altogether is ok + * (for now). + */ +#define __ASM_X86_64_PROCESSOR_H + +#ifdef __KERNEL__ +#include +#else +#include +#include +#endif + +#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \ + defined(__craynv) +typedef struct stat lstat_t; +#define HAVE_LOV_USER_MDS_DATA +#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__) +typedef struct stat64 lstat_t; +#define HAVE_LOV_USER_MDS_DATA +#endif + +#ifndef LPU64 +/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */ +#if defined(__x86_64__) && defined(__KERNEL__) +# define LPU64 "%Lu" +# define LPD64 "%Ld" +# define LPX64 "%#Lx" +# define LPSZ "%lu" +# define LPSSZ "%ld" +#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) +# define LPU64 "%Lu" +# define LPD64 "%Ld" +# define LPX64 "%#Lx" +# define LPSZ "%u" +# define LPSSZ "%d" +#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) +# define LPU64 "%lu" +# define LPD64 "%ld" +# define LPX64 "%#lx" +# define LPSZ "%lu" +# define LPSSZ "%ld" +#endif +#endif /* !LPU64 */ + +#endif /* _LUSTRE_USER_H */ diff --git a/lustre/include/linux/lvfs.h b/lustre/include/linux/lvfs.h index c90a78f..816925a 100644 --- a/lustre/include/linux/lvfs.h +++ b/lustre/include/linux/lvfs.h @@ -21,13 +21,12 @@ * lustre VFS/process permission interface */ -#ifndef __LVFS_H__ -#define __LVFS_H__ - -#include -#include +#ifndef __LINUX_LVFS_H__ +#define __LINUX_LVFS_H__ -#define LL_FID_NAMELEN (16 + 1 + 8 + 1) +#ifndef __LVFS_H__ +#error Do not #include this file directly. #include instead +#endif #if defined __KERNEL__ #include @@ -36,9 +35,7 @@ struct group_info { /* unused */ }; #endif -#ifdef LIBLUSTRE -#include -#endif +#define LLOG_LVFS /* simple.c */ @@ -81,14 +78,6 @@ struct lvfs_run_ctxt { #define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0) #endif -/* lvfs_common.c */ -struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data); - -void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, - struct lvfs_ucred *cred); -void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, - struct lvfs_ucred *cred); - #ifdef __KERNEL__ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix); diff --git a/lustre/include/linux/lvfs_linux.h b/lustre/include/linux/lvfs_linux.h index 7c31b31..0ea6104 100644 --- a/lustre/include/linux/lvfs_linux.h +++ b/lustre/include/linux/lvfs_linux.h @@ -9,7 +9,7 @@ #include #include -#include +#include #define l_file file #define l_dentry dentry diff --git a/lustre/include/linux/md_object.h b/lustre/include/linux/md_object.h index fd90061..6ca1d9e 100644 --- a/lustre/include/linux/md_object.h +++ b/lustre/include/linux/md_object.h @@ -22,8 +22,8 @@ * */ -#ifndef _LINUX_MD_OBJECT_H -#define _LINUX_MD_OBJECT_H +#ifndef _LUSTRE_MD_OBJECT_H +#define _LUSTRE_MD_OBJECT_H /* * Sub-class of lu_object with methods common for "meta-data" objects in MDT @@ -39,7 +39,7 @@ /* * super-class definitions. */ -#include +#include struct md_device; struct md_device_operations; diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index 0926854..ba0d89e 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -2,20 +2,12 @@ * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef __OBD_H -#define __OBD_H - -#define IOC_OSC_TYPE 'h' -#define IOC_OSC_MIN_NR 20 -#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *) -#define IOC_OSC_MAX_NR 50 +#ifndef __LINUX_OBD_H +#define __LINUX_OBD_H -#define IOC_MDC_TYPE 'i' -#define IOC_MDC_MIN_NR 20 -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -/* Moved to lustre_user.h -#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */ -#define IOC_MDC_MAX_NR 50 +#ifndef __OBD_H +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ # include @@ -27,1066 +19,24 @@ # include #endif -#include -#include -#include -#include - -#include - -/* this is really local to the OSC */ -struct loi_oap_pages { - struct list_head lop_pending; - int lop_num_pending; - struct list_head lop_urgent; - struct list_head lop_pending_group; -}; - -struct osc_async_rc { - int ar_rc; - int ar_force_sync; - int ar_min_xid; -}; - -struct lov_oinfo { /* per-stripe data structure */ - __u64 loi_id; /* object ID on the target OST */ - __u64 loi_gr; /* object group on the target OST */ - int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */ - int loi_ost_gen; /* generation of this loi_ost_idx */ - - /* used by the osc to keep track of what objects to build into rpcs */ - struct loi_oap_pages loi_read_lop; - struct loi_oap_pages loi_write_lop; - /* _cli_ is poorly named, it should be _ready_ */ - struct list_head loi_cli_item; - struct list_head loi_write_item; - struct list_head loi_read_item; - - unsigned loi_kms_valid:1; - __u64 loi_kms; /* known minimum size */ - struct ost_lvb loi_lvb; - struct osc_async_rc loi_ar; -}; - -static inline void loi_init(struct lov_oinfo *loi) -{ - INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending); - INIT_LIST_HEAD(&loi->loi_read_lop.lop_urgent); - INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending_group); - INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending); - INIT_LIST_HEAD(&loi->loi_write_lop.lop_urgent); - INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group); - INIT_LIST_HEAD(&loi->loi_cli_item); - INIT_LIST_HEAD(&loi->loi_write_item); - INIT_LIST_HEAD(&loi->loi_read_item); -} -/*extent array item for describing the joined file extent info*/ -struct lov_extent { - __u64 le_start; /* extent start */ - __u64 le_len; /* extent length */ - int le_loi_idx; /* extent #1 loi's index in lsm loi array */ - int le_stripe_count; /* extent stripe count*/ -}; - -/*Lov array info for describing joined file array EA info*/ -struct lov_array_info { - struct llog_logid lai_array_id; /* MDS med llog object id */ - unsigned lai_ext_count; /* number of extent count */ - struct lov_extent *lai_ext_array; /* extent desc array */ -}; - -struct lov_stripe_md { - spinlock_t lsm_lock; - void *lsm_lock_owner; /* debugging */ - - struct { - /* Public members. */ - __u64 lw_object_id; /* lov object id */ - __u64 lw_object_gr; /* lov object group */ - __u64 lw_maxbytes; /* maximum possible file size */ - unsigned long lw_xfersize; /* optimal transfer size */ - - /* LOV-private members start here -- only for use in lov/. */ - __u32 lw_magic; - __u32 lw_stripe_size; /* size of the stripe */ - __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ - unsigned lw_stripe_count; /* number of objects being striped over */ - } lsm_wire; - - struct lov_array_info *lsm_array; /*Only for joined file array info*/ - struct lov_oinfo lsm_oinfo[0]; -}; - -#define lsm_object_id lsm_wire.lw_object_id -#define lsm_object_gr lsm_wire.lw_object_gr -#define lsm_maxbytes lsm_wire.lw_maxbytes -#define lsm_xfersize lsm_wire.lw_xfersize -#define lsm_magic lsm_wire.lw_magic -#define lsm_stripe_size lsm_wire.lw_stripe_size -#define lsm_pattern lsm_wire.lw_pattern -#define lsm_stripe_count lsm_wire.lw_stripe_count - -/* compare all relevant fields. */ -static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1, - struct lov_stripe_md *m2) -{ - /* - * ->lsm_wire contains padding, but it should be zeroed out during - * allocation. - */ - return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire); -} - -void lov_stripe_lock(struct lov_stripe_md *md); -void lov_stripe_unlock(struct lov_stripe_md *md); - -struct obd_type { - struct list_head typ_chain; - struct obd_ops *typ_dt_ops; - struct md_ops *typ_md_ops; - struct proc_dir_entry *typ_procroot; - char *typ_name; - int typ_refcnt; - struct lu_device_type *typ_lu; -}; - -struct brw_page { - obd_off off; - struct page *pg; - int count; - obd_flag flag; -}; - -enum async_flags { - ASYNC_READY = 0x1, /* ap_make_ready will not be called before this - page is added to an rpc */ - ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */ - ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called - to give the caller a chance to update - or cancel the size of the io */ - ASYNC_GROUP_SYNC = 0x8, /* ap_completion will not be called, instead - the page is accounted for in the - obd_io_group given to - obd_queue_group_io */ -}; - -struct obd_async_page_ops { - int (*ap_make_ready)(void *data, int cmd); - int (*ap_refresh_count)(void *data, int cmd); - void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa); - void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); -}; - -/* the `oig' is passed down from a caller of obd rw methods. the callee - * records enough state such that the caller can sleep on the oig and - * be woken when all the callees have finished their work */ -struct obd_io_group { - spinlock_t oig_lock; - atomic_t oig_refcount; - int oig_pending; - int oig_rc; - struct list_head oig_occ_list; - wait_queue_head_t oig_waitq; -}; - -/* the oig callback context lets the callee of obd rw methods register - * for callbacks from the caller. */ -struct oig_callback_context { - struct list_head occ_oig_item; - /* called when the caller has received a signal while sleeping. - * callees of this method are encouraged to abort their state - * in the oig. This may be called multiple times. */ - void (*occ_interrupted)(struct oig_callback_context *occ); - unsigned int interrupted:1; -}; - -/* if we find more consumers this could be generalized */ -#define OBD_HIST_MAX 32 -struct obd_histogram { - spinlock_t oh_lock; - unsigned long oh_buckets[OBD_HIST_MAX]; -}; - -/* Individual type definitions */ - -struct ost_server_data; - -/* hold common fields for "target" device */ -struct obd_device_target { - struct super_block *obt_sb; - atomic_t obt_quotachecking; - struct lustre_quota_ctxt obt_qctxt; -}; - -#define FILTER_GROUP_LLOG 1 -#define FILTER_GROUP_ECHO 2 - -struct filter_ext { - __u64 fe_start; - __u64 fe_end; -}; - -struct filter_obd { - /* NB this field MUST be first */ - struct obd_device_target fo_obt; - const char *fo_fstype; - struct vfsmount *fo_vfsmnt; - struct dentry *fo_dentry_O; - struct dentry **fo_dentry_O_groups; - struct dentry **fo_dentry_O_sub; - spinlock_t fo_objidlock; /* protect fo_lastobjid */ - spinlock_t fo_translock; /* protect fsd_last_transno */ - struct file *fo_rcvd_filp; - struct file *fo_health_check_filp; - struct lr_server_data *fo_fsd; - unsigned long *fo_last_rcvd_slots; - __u64 fo_mount_count; - - int fo_destroy_in_progress; - struct semaphore fo_create_lock; - - struct list_head fo_export_list; - int fo_subdir_count; - - obd_size fo_tot_dirty; /* protected by obd_osfs_lock */ - obd_size fo_tot_granted; /* all values in bytes */ - obd_size fo_tot_pending; - - obd_size fo_readcache_max_filesize; - - struct obd_import *fo_mdc_imp; - struct obd_uuid fo_mdc_uuid; - struct lustre_handle fo_mdc_conn; - struct file **fo_last_objid_files; - __u64 *fo_last_objids; /* last created objid for groups, - * protected by fo_objidlock */ - - struct semaphore fo_alloc_lock; - - spinlock_t fo_stats_lock; - int fo_r_in_flight; /* protected by fo_stats_lock */ - int fo_w_in_flight; /* protected by fo_stats_lock */ - - /* - * per-filter pool of kiobuf's allocated by filter_common_setup() and - * torn down by filter_cleanup(). Contains OST_NUM_THREADS elements of - * which ->fo_iobuf_count were allocated. - * - * This pool contains kiobuf used by - * filter_{prep,commit}rw_{read,write}() and is shared by all OST - * threads. - * - * Locking: none, each OST thread uses only one element, determined by - * its "ordinal number", ->t_id. - */ - struct filter_iobuf **fo_iobuf_pool; - int fo_iobuf_count; - - struct obd_histogram fo_r_pages; - struct obd_histogram fo_w_pages; - struct obd_histogram fo_read_rpc_hist; - struct obd_histogram fo_write_rpc_hist; - struct obd_histogram fo_r_io_time; - struct obd_histogram fo_w_io_time; - struct obd_histogram fo_r_discont_pages; - struct obd_histogram fo_w_discont_pages; - struct obd_histogram fo_r_discont_blocks; - struct obd_histogram fo_w_discont_blocks; - struct obd_histogram fo_r_disk_iosize; - struct obd_histogram fo_w_disk_iosize; - - struct lustre_quota_ctxt fo_quota_ctxt; - spinlock_t fo_quotacheck_lock; - atomic_t fo_quotachecking; -}; - -#define OSC_MAX_RIF_DEFAULT 8 -#define OSC_MAX_RIF_MAX 256 -#define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4) -#define OSC_MAX_DIRTY_MB_MAX 2048 /* totally arbitrary */ - -struct mdc_rpc_lock; -struct obd_import; -struct client_obd { - struct obd_uuid cl_target_uuid; - struct obd_import *cl_import; /* ptlrpc connection state */ - struct semaphore cl_sem; - int cl_conn_count; - /* max_mds_easize is purely a performance thing so we don't have to - * call obd_size_diskmd() all the time. */ - int cl_default_mds_easize; - int cl_max_mds_easize; - int cl_max_mds_cookiesize; - kdev_t cl_sandev; - - //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */ - void *cl_llcd_offset; - - /* the grant values are protected by loi_list_lock below */ - long cl_dirty; /* all _dirty_ in bytes */ - long cl_dirty_max; /* allowed w/o rpc */ - long cl_avail_grant; /* bytes of credit for ost */ - long cl_lost_grant; /* lost credits (trunc) */ - struct list_head cl_cache_waiters; /* waiting for cache/grant */ - - /* keep track of objects that have lois that contain pages which - * have been queued for async brw. this lock also protects the - * lists of osc_client_pages that hang off of the loi */ - spinlock_t cl_loi_list_lock; - struct list_head cl_loi_ready_list; - struct list_head cl_loi_write_list; - struct list_head cl_loi_read_list; - int cl_r_in_flight; - int cl_w_in_flight; - /* just a sum of the loi/lop pending numbers to be exported by /proc */ - int cl_pending_w_pages; - int cl_pending_r_pages; - int cl_max_pages_per_rpc; - int cl_max_rpcs_in_flight; - struct obd_histogram cl_read_rpc_hist; - struct obd_histogram cl_write_rpc_hist; - struct obd_histogram cl_read_page_hist; - struct obd_histogram cl_write_page_hist; - struct obd_histogram cl_read_offset_hist; - struct obd_histogram cl_write_offset_hist; - - struct mdc_rpc_lock *cl_rpc_lock; - struct mdc_rpc_lock *cl_setattr_lock; - struct osc_creator cl_oscc; - - /* mgc datastruct */ - struct semaphore cl_mgc_sem; - struct vfsmount *cl_mgc_vfsmnt; - struct dentry *cl_mgc_configs_dir; - atomic_t cl_mgc_refcount; - struct obd_export *cl_mgc_mgsexp; - - /* Flags section */ - unsigned int cl_checksum:1; /* debug checksums */ - - /* also protected by the poorly named _loi_list_lock lock above */ - struct osc_async_rc cl_ar; - - /* used by quotacheck */ - int cl_qchk_stat; /* quotacheck stat of the peer */ - - struct lu_fid cl_fid; - spinlock_t cl_fid_lock; -}; -#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) - -#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ - -struct mgs_obd { - struct ptlrpc_service *mgs_service; - struct vfsmount *mgs_vfsmnt; - struct super_block *mgs_sb; - struct dentry *mgs_configs_dir; - struct dentry *mgs_fid_de; - struct list_head mgs_fs_db_list; - struct semaphore mgs_sem; -}; - -struct mds_obd { - /* NB this field MUST be first */ - struct obd_device_target mds_obt; - struct ptlrpc_service *mds_service; - struct ptlrpc_service *mds_setattr_service; - struct ptlrpc_service *mds_readpage_service; - struct vfsmount *mds_vfsmnt; - struct dentry *mds_fid_de; - int mds_max_mdsize; - int mds_max_cookiesize; - struct file *mds_rcvd_filp; - spinlock_t mds_transno_lock; - __u64 mds_last_transno; - __u64 mds_mount_count; - __u64 mds_io_epoch; - unsigned long mds_atime_diff; - struct semaphore mds_epoch_sem; - struct ll_fid mds_rootfid; - struct lr_server_data *mds_server_data; - struct dentry *mds_pending_dir; - struct dentry *mds_logs_dir; - struct dentry *mds_objects_dir; - struct llog_handle *mds_cfg_llh; -// struct llog_handle *mds_catalog; - struct obd_device *mds_osc_obd; /* XXX lov_obd */ - struct obd_uuid mds_lov_uuid; - char *mds_profile; - struct obd_export *mds_osc_exp; /* XXX lov_exp */ - struct lov_desc mds_lov_desc; - obd_id *mds_lov_objids; - int mds_lov_objids_size; - __u32 mds_lov_objids_in_file; - unsigned int mds_lov_objids_dirty:1; - int mds_lov_nextid_set; - struct file *mds_lov_objid_filp; - struct file *mds_health_check_filp; - unsigned long *mds_client_bitmap; - struct semaphore mds_orphan_recovery_sem; - struct upcall_cache *mds_group_hash; - - struct lustre_quota_info mds_quota_info; - struct semaphore mds_qonoff_sem; - struct semaphore mds_health_sem; - unsigned long mds_lov_objids_valid:1, - mds_fl_user_xattr:1, - mds_fl_acl:1; -}; - -struct echo_obd { - struct obdo eo_oa; - spinlock_t eo_lock; - __u64 eo_lastino; - struct lustre_handle eo_nl_lock; - atomic_t eo_prep; -}; - -struct ost_obd { - struct ptlrpc_service *ost_service; - struct ptlrpc_service *ost_create_service; - struct ptlrpc_service *ost_io_service; - struct semaphore ost_health_sem; -}; - -struct echo_client_obd { - struct obd_export *ec_exp; /* the local connection to osc/lov */ - spinlock_t ec_lock; - struct list_head ec_objects; - int ec_nstripes; - __u64 ec_unique; -}; - -struct lov_tgt_desc { - struct obd_uuid uuid; - __u32 ltd_gen; - struct obd_export *ltd_exp; - unsigned int active:1, /* is this target up for requests */ - reap:1; /* should this target be deleted */ -}; +typedef spinlock_t client_obd_lock_t; -struct lov_obd { - struct semaphore lov_lock; - atomic_t refcount; - struct lov_desc desc; - struct obd_connect_data ocd; - int bufsize; - int connects; - int death_row; /* Do we have tgts scheduled to be deleted? - (Make this a linked list?) */ - unsigned int lo_catalog_loaded:1; - struct lov_tgt_desc *tgts; -}; - -struct lmv_tgt_desc { - struct obd_uuid uuid; - struct obd_export *ltd_exp; - int active; /* is this target up for requests */ - int idx; -}; - -struct lmv_obd { - int refcount; - spinlock_t lmv_lock; - struct lmv_desc desc; - struct obd_uuid cluuid; - struct obd_export *exp; - - int connected; - int max_easize; - int max_def_easize; - int max_cookiesize; - int server_timeout; - struct semaphore init_sem; - - struct lmv_tgt_desc *tgts; - int tgts_size; - - struct obd_connect_data *datas; - int datas_size; - - struct obd_connect_data conn_data; -}; - -struct niobuf_local { - __u64 offset; - __u32 len; - __u32 flags; - struct page *page; - struct dentry *dentry; - int lnb_grant_used; - int rc; -}; - -#define LUSTRE_OPC_MKDIR (1 << 0) -#define LUSTRE_OPC_SYMLINK (1 << 1) -#define LUSTRE_OPC_MKNODE (1 << 2) -#define LUSTRE_OPC_CREATE (1 << 3) - -struct placement_hint { - struct qstr *ph_pname; - struct qstr *ph_cname; - int ph_opc; -}; - -/* device types (not names--FIXME) */ -/* FIXME all the references to these defines need to be updated */ -#define LUSTRE_MDS_NAME "mds" -#define LUSTRE_MDT_NAME "mdt" -/* new MDS layers. Prototype */ -#define LUSTRE_MDT0_NAME "mdt0" -#define LUSTRE_CMM0_NAME "cmm0" -#define LUSTRE_MDD0_NAME "mdd0" -#define LUSTRE_OSD0_NAME "osd0" -#define LUSTRE_FLD0_NAME "fld0" -#define LUSTRE_MDC0_NAME "mdc0" - -#define LUSTRE_MDC_NAME "mdc" -#define LUSTRE_LMV_NAME "lmv" - -/* FIXME just the names need to be changed */ -#define LUSTRE_OSS_NAME "ost" /*FIXME oss*/ -#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost*/ -#define LUSTRE_OSTSAN_NAME "sanobdfilter" - -#define LUSTRE_OSC_NAME "osc" -#define LUSTRE_FILTER_NAME "filter" -#define LUSTRE_SANOSC_NAME "sanosc" -#define LUSTRE_SANOST_NAME "sanost" -#define LUSTRE_MGS_NAME "mgs" -#define LUSTRE_MGC_NAME "mgc" - -#define LUSTRE_MGS_OBDNAME "MGS" -#define LUSTRE_MGC_OBDNAME "MGC" - -/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ -#define N_LOCAL_TEMP_PAGE 0x10000000 - -struct obd_trans_info { - __u64 oti_transno; - __u64 *oti_objid; - /* Only used on the server side for tracking acks. */ - struct oti_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } oti_ack_locks[4]; - void *oti_handle; - struct llog_cookie oti_onecookie; - struct llog_cookie *oti_logcookies; - int oti_numcookies; - - /* initial thread handling transaction */ - int oti_thread_id; -}; - -static inline void oti_init(struct obd_trans_info *oti, - struct ptlrpc_request *req) +static inline void client_obd_list_lock_init(client_obd_lock_t *lock) { - if (oti == NULL) - return; - memset(oti, 0, sizeof *oti); - - if (req == NULL) - return; - - if (req->rq_repmsg && req->rq_reqmsg != 0) - oti->oti_transno = req->rq_repmsg->transno; - oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1; + spin_lock_init(lock); } -static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies) -{ - if (!oti) - return; - - if (num_cookies == 1) - oti->oti_logcookies = &oti->oti_onecookie; - else - OBD_ALLOC(oti->oti_logcookies, - num_cookies * sizeof(oti->oti_onecookie)); - - oti->oti_numcookies = num_cookies; -} +static inline void client_obd_list_lock_done(client_obd_lock_t *lock) +{} -static inline void oti_free_cookies(struct obd_trans_info *oti) +static inline void client_obd_list_lock(client_obd_lock_t *lock) { - if (!oti || !oti->oti_logcookies) - return; - - if (oti->oti_logcookies == &oti->oti_onecookie) - LASSERT(oti->oti_numcookies == 1); - else - OBD_FREE(oti->oti_logcookies, - oti->oti_numcookies * sizeof(oti->oti_onecookie)); - oti->oti_logcookies = NULL; - oti->oti_numcookies = 0; + spin_lock(lock); } -/* llog contexts */ -enum llog_ctxt_id { - LLOG_CONFIG_ORIG_CTXT = 0, - LLOG_CONFIG_REPL_CTXT = 1, - LLOG_MDS_OST_ORIG_CTXT = 2, - LLOG_MDS_OST_REPL_CTXT = 3, - LLOG_SIZE_ORIG_CTXT = 4, - LLOG_SIZE_REPL_CTXT = 5, - LLOG_MD_ORIG_CTXT = 6, - LLOG_MD_REPL_CTXT = 7, - LLOG_RD1_ORIG_CTXT = 8, - LLOG_RD1_REPL_CTXT = 9, - LLOG_TEST_ORIG_CTXT = 10, - LLOG_TEST_REPL_CTXT = 11, - LLOG_LOVEA_ORIG_CTXT = 12, - LLOG_LOVEA_REPL_CTXT = 13, - LLOG_MAX_CTXTS -}; - -/* - * Events signalled through obd_notify() upcall-chain. - */ -enum obd_notify_event { - /* Device activated */ - OBD_NOTIFY_ACTIVE, - /* Device deactivated */ - OBD_NOTIFY_INACTIVE, - /* Connect data for import were changed */ - OBD_NOTIFY_OCD, - /* Sync request */ - OBD_NOTIFY_SYNC_NONBLOCK, - OBD_NOTIFY_SYNC -}; - -/* - * Data structure used to pass obd_notify()-event to non-obd listeners (llite - * and liblustre being main examples). - */ -struct obd_notify_upcall { - int (*onu_upcall)(struct obd_device *host, struct obd_device *watched, - enum obd_notify_event ev, void *owner); - /* Opaque datum supplied by upper layer listener */ - void *onu_owner; -}; - - -/* corresponds to one of the obd's */ -struct obd_device { - struct obd_type *obd_type; - /* common and UUID name of this device */ - char *obd_name; - struct obd_uuid obd_uuid; - - struct lu_device *obd_lu_dev; - - int obd_minor; - unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1, - obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1, - obd_no_recov:1, obd_stopping:1, obd_starting:1, - obd_force:1, obd_fail:1, obd_async_recov:1; - atomic_t obd_refcount; - wait_queue_head_t obd_refcount_waitq; - struct proc_dir_entry *obd_proc_entry; - struct list_head obd_exports; - int obd_num_exports; - struct ldlm_namespace *obd_namespace; - struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ - /* a spinlock is OK for what we do now, may need a semaphore later */ - spinlock_t obd_dev_lock; - __u64 obd_last_committed; - struct fsfilt_operations *obd_fsops; - spinlock_t obd_osfs_lock; - struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ - unsigned long obd_osfs_age; /* jiffies */ - struct lvfs_run_ctxt obd_lvfs_ctxt; - struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; - struct obd_device *obd_observer; - struct obd_notify_upcall obd_upcall; - struct obd_export *obd_self_export; - /* list of exports in LRU order, for ping evictor, with obd_dev_lock */ - struct list_head obd_exports_timed; - time_t obd_eviction_timer; /* for ping evictor */ - - /* XXX encapsulate all this recovery data into one struct */ - svc_handler_t obd_recovery_handler; - int obd_max_recoverable_clients; - int obd_connected_clients; - int obd_recoverable_clients; - spinlock_t obd_processing_task_lock; - pid_t obd_processing_task; - __u64 obd_next_recovery_transno; - int obd_replayed_requests; - int obd_requests_queued_for_recovery; - wait_queue_head_t obd_next_transno_waitq; - struct list_head obd_uncommitted_replies; - spinlock_t obd_uncommitted_replies_lock; - struct timer_list obd_recovery_timer; - struct list_head obd_recovery_queue; - struct list_head obd_delayed_reply_queue; - time_t obd_recovery_start; - time_t obd_recovery_end; - - union { - struct obd_device_target obt; - struct filter_obd filter; - struct mds_obd mds; - struct client_obd cli; - struct ost_obd ost; - struct echo_client_obd echo_client; - struct echo_obd echo; - struct lov_obd lov; - struct lmv_obd lmv; - struct mgs_obd mgs; - } u; - - /* Fields used by LProcFS */ - unsigned int obd_cntr_base; - struct lprocfs_stats *obd_stats; - - unsigned int md_cntr_base; - struct lprocfs_stats *md_stats; - - struct proc_dir_entry *obd_svc_procroot; - struct lprocfs_stats *obd_svc_stats; -}; - -#define OBD_OPT_FORCE 0x0001 -#define OBD_OPT_FAILOVER 0x0002 - -#define OBD_LLOG_FL_SENDNOW 0x0001 - - -enum obd_cleanup_stage { -/* Special case hack for MDS LOVs */ - OBD_CLEANUP_EARLY, -/* Precleanup stage 1, we must make sure all exports (other than the - self-export) get destroyed. */ - OBD_CLEANUP_EXPORTS, -/* Precleanup stage 2, do other type-specific cleanup requiring the - self-export. */ - OBD_CLEANUP_SELF_EXP, -/* FIXME we should eliminate the "precleanup" function and make them stages - of the "cleanup" function. */ - OBD_CLEANUP_OBD, -}; - -struct obd_ops { - struct module *o_owner; - int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len, - void *karg, void *uarg); - int (*o_get_info)(struct obd_export *, __u32 keylen, void *key, - __u32 *vallen, void *val); - int (*o_set_info)(struct obd_export *, __u32 keylen, void *key, - __u32 vallen, void *val); - int (*o_attach)(struct obd_device *dev, obd_count len, void *data); - int (*o_detach)(struct obd_device *dev); - int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); - int (*o_precleanup)(struct obd_device *dev, - enum obd_cleanup_stage cleanup_stage); - int (*o_cleanup)(struct obd_device *dev); - int (*o_process_config)(struct obd_device *dev, obd_count len, - void *data); - int (*o_postrecov)(struct obd_device *dev); - int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, - int priority); - int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); - /* connect to the target device with given connection - * data. @ocd->ocd_connect_flags is modified to reflect flags actually - * granted by the target, which are guaranteed to be a subset of flags - * asked for. If @ocd == NULL, use default parameters. */ - int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, - struct obd_uuid *cluuid, struct obd_connect_data *ocd); - int (*o_reconnect)(struct obd_export *exp, struct obd_device *src, - struct obd_uuid *cluuid, - struct obd_connect_data *ocd); - int (*o_disconnect)(struct obd_export *exp); - - /* may be later these should be moved into separate fid_ops */ - int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid, - struct placement_hint *hint); - - int (*o_fid_delete)(struct obd_export *exp, struct lu_fid *fid); - - int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs, - unsigned long max_age); - int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src); - int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int disk_len); - int (*o_checkmd)(struct obd_export *exp, struct obd_export *md_exp, - struct lov_stripe_md *mem_tgt); - int (*o_preallocate)(struct lustre_handle *, obd_count *req, - obd_id *ids); - int (*o_create)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti); - int (*o_destroy)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_export *md_exp); - int (*o_setattr)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); - int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); - int (*o_getattr)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea); - int (*o_getattr_async)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, - struct ptlrpc_request_set *set); - int (*o_brw)(int rw, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pgarr, struct obd_trans_info *oti); - int (*o_brw_async)(int rw, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pgarr, struct ptlrpc_request_set *, - struct obd_trans_info *oti); - int (*o_prep_async_page)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct page *page, obd_off offset, - struct obd_async_page_ops *ops, void *data, - void **res); - int (*o_queue_async_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, - obd_flag brw_flags, obd_flag async_flags); - int (*o_queue_group_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig, - void *cookie, int cmd, obd_off off, int count, - obd_flag brw_flags, obd_flag async_flags); - int (*o_trigger_group_io)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig); - int (*o_set_async_flags)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - obd_flag async_flags); - int (*o_teardown_async_page)(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie); - int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm, - struct ost_lvb *lvb, int kms_only); - int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm, - obd_off size, int shrink); - int (*o_punch)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, - obd_size end, struct obd_trans_info *oti); - int (*o_sync)(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, obd_size end); - int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst, - struct lov_stripe_md *src, obd_size start, - obd_size end, struct obd_trans_info *oti); - int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst, - struct lustre_handle *srconn, struct lov_stripe_md *src, - obd_size start, obd_size end, struct obd_trans_info *); - int (*o_iterate)(struct lustre_handle *conn, - int (*)(obd_id, obd_gr, void *), - obd_id *startid, obd_gr group, void *data); - int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, struct obd_trans_info *oti); - int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, - struct obd_trans_info *oti, int rc); - int (*o_enqueue)(struct obd_export *, struct lov_stripe_md *, - __u32 type, ldlm_policy_data_t *, __u32 mode, - int *flags, void *bl_cb, void *cp_cb, void *gl_cb, - void *data, __u32 lvb_len, void *lvb_swabber, - struct lustre_handle *lockh); - int (*o_match)(struct obd_export *, struct lov_stripe_md *, __u32 type, - ldlm_policy_data_t *, __u32 mode, int *flags, void *data, - struct lustre_handle *lockh); - int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *, - ldlm_iterator_t it, void *data); - int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md, - __u32 mode, struct lustre_handle *); - int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *, - int flags, void *opaque); - int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *, - int join); - int (*o_san_preprw)(int cmd, struct obd_export *exp, - struct obdo *oa, int objcount, - struct obd_ioobj *obj, int niocount, - struct niobuf_remote *remote); - int (*o_init_export)(struct obd_export *exp); - int (*o_destroy_export)(struct obd_export *exp); - - /* llog related obd_methods */ - int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd, - int count, struct llog_catid *logid); - int (*o_llog_finish)(struct obd_device *obd, int count); - - /* metadata-only methods */ - int (*o_pin)(struct obd_export *, struct lu_fid *fid, - struct obd_client_handle *, int flag); - int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int); - - int (*o_import_event)(struct obd_device *, struct obd_import *, - enum obd_import_event); - - int (*o_notify)(struct obd_device *obd, struct obd_device *watched, - enum obd_notify_event ev, void *data); - - int (*o_health_check)(struct obd_device *); - - /* quota methods */ - int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); - int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); - - /* - * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line - * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. - * Also, add a wrapper function in include/linux/obd_class.h. - * - * Also note that if you add it to the END, you also have to change - * the num_stats calculation. - * - */ -}; - -struct md_ops { - int (*m_getstatus)(struct obd_export *, struct lu_fid *); - int (*m_change_cbdata)(struct obd_export *, struct lu_fid *, - ldlm_iterator_t, void *); - int (*m_close)(struct obd_export *, struct md_op_data *, - struct obd_client_handle *, struct ptlrpc_request **); - int (*m_create)(struct obd_export *, struct md_op_data *, - const void *, int, int, __u32, __u32, __u32, - __u64, struct ptlrpc_request **); - int (*m_done_writing)(struct obd_export *, struct md_op_data *); - int (*m_enqueue)(struct obd_export *, int, struct lookup_intent *, - int, struct md_op_data *, struct lustre_handle *, - void *, int, ldlm_completion_callback, - ldlm_blocking_callback, void *, int); - int (*m_getattr)(struct obd_export *, struct lu_fid *, - obd_valid, int, struct ptlrpc_request **); - int (*m_getattr_name)(struct obd_export *, struct lu_fid *, - const char *, int, obd_valid, - int, struct ptlrpc_request **); - int (*m_intent_lock)(struct obd_export *, struct md_op_data *, - void *, int, struct lookup_intent *, int, - struct ptlrpc_request **, - ldlm_blocking_callback, int); - int (*m_link)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - int (*m_rename)(struct obd_export *, struct md_op_data *, - const char *, int, const char *, int, - struct ptlrpc_request **); - int (*m_setattr)(struct obd_export *, struct md_op_data *, - struct iattr *, void *, int , void *, int, - struct ptlrpc_request **); - int (*m_sync)(struct obd_export *, struct lu_fid *, - struct ptlrpc_request **); - int (*m_readpage)(struct obd_export *, struct lu_fid *, - __u64, struct page *, struct ptlrpc_request **); - int (*m_unlink)(struct obd_export *, struct md_op_data *, - struct ptlrpc_request **); - - int (*m_setxattr)(struct obd_export *, struct lu_fid *, - obd_valid, const char *, const char *, - int, int, int, struct ptlrpc_request **); - - int (*m_getxattr)(struct obd_export *, struct lu_fid *, - obd_valid, const char *, const char *, - int, int, int, struct ptlrpc_request **); - - int (*m_init_ea_size)(struct obd_export *, int, int, int); - - int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *, - int, struct obd_export *, struct lustre_md *); - - int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *); - - int (*m_set_open_replay_data)(struct obd_export *, - struct obd_client_handle *, - struct ptlrpc_request *); - int (*m_clear_open_replay_data)(struct obd_export *, - struct obd_client_handle *); - int (*m_set_lock_data)(struct obd_export *, __u64 *, void *); - - int (*m_lock_match)(struct obd_export *, int, struct lu_fid *, - ldlm_type_t, ldlm_policy_data_t *, ldlm_mode_t, - struct lustre_handle *); - - int (*m_cancel_unused)(struct obd_export *, struct lu_fid *, - int flags, void *opaque); - - /* - * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to - * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a - * wrapper function in include/linux/obd_class.h. - */ -}; - -struct lsm_operations { - void (*lsm_free)(struct lov_stripe_md *); - int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, - struct obd_export *md_exp); - void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *, - unsigned long *); - void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, - unsigned long *); - obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int); - int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off); - int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd); - int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, - int *stripe_count); - int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, - struct lov_mds_md *lmm); -}; - -extern struct lsm_operations lsm_plain_ops; -extern struct lsm_operations lsm_join_ops; -static inline struct lsm_operations *lsm_op_find(int magic) -{ - switch(magic) { - case LOV_MAGIC: - return &lsm_plain_ops; - case LOV_MAGIC_JOIN: - return &lsm_join_ops; - default: - CERROR("Cannot recognize lsm_magic %d", magic); - return NULL; - } -} - -int lvfs_check_io_health(struct obd_device *obd, struct file *file); - -static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, - int error) -{ - if (error) { - CERROR("%s: transno "LPD64" commit error: %d\n", - obd->obd_name, transno, error); - return; - } - CDEBUG(D_HA, "%s: transno "LPD64" committed\n", - obd->obd_name, transno); - if (transno > obd->obd_last_committed) { - obd->obd_last_committed = transno; - ptlrpc_commit_replies (obd); - } -} - -static inline void init_obd_quota_ops(quota_interface_t *interface, - struct obd_ops *obd_ops) +static inline void client_obd_list_unlock(client_obd_lock_t *lock) { - if (!interface) - return; - - LASSERT(obd_ops); - obd_ops->o_quotacheck = QUOTA_OP(interface, check); - obd_ops->o_quotactl = QUOTA_OP(interface, ctl); + spin_unlock(lock); } -/* get/set_info keys */ -#define KEY_MDS_CONN "mds_conn" -#define KEY_NEXT_ID "next_id" -#define KEY_LOVDESC "lovdesc" -#define KEY_INIT_RECOV "initial_recov" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" - -#endif /* __OBD_H */ +#endif /* __LINUX_OBD_H */ diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index b614bb6..3e59fa6 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -23,6 +23,10 @@ #ifndef __LINUX_CLASS_OBD_H #define __LINUX_CLASS_OBD_H +#ifndef __CLASS_OBD_H +#error Do not #include this file directly. #include instead +#endif + #ifndef __KERNEL__ #include #include @@ -31,155 +35,10 @@ #include #include #include -#include #include #include #endif -#include -#include -#include -#include -#include -#include -#include - -/* OBD Device Declarations */ -#define MAX_OBD_DEVICES 520 -extern struct obd_device obd_dev[MAX_OBD_DEVICES]; -extern spinlock_t obd_dev_lock; - -/* OBD Operations Declarations */ -extern struct obd_device *class_conn2obd(struct lustre_handle *); -extern struct obd_device *class_exp2obd(struct obd_export *); - -struct lu_device_type; - -/* genops.c */ -struct obd_export *class_conn2export(struct lustre_handle *); -int class_register_type(struct obd_ops *, struct md_ops *, - struct lprocfs_vars *, const char *nm, - struct lu_device_type *ldt); -int class_unregister_type(const char *nm); - -struct obd_device *class_newdev(struct obd_type *type, char *name); -void class_release_dev(struct obd_device *obd); - -int class_name2dev(const char *name); -struct obd_device *class_name2obd(const char *name); -int class_uuid2dev(struct obd_uuid *uuid); -struct obd_device *class_uuid2obd(struct obd_uuid *uuid); -void class_obd_list(void); -struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, - const char * typ_name, - struct obd_uuid *grp_uuid); -struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid, - struct obd_uuid *grp_uuid); -struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, - int *next); - -int oig_init(struct obd_io_group **oig); -void oig_add_one(struct obd_io_group *oig, - struct oig_callback_context *occ); -void oig_complete_one(struct obd_io_group *oig, - struct oig_callback_context *occ, int rc); -void oig_release(struct obd_io_group *oig); -int oig_wait(struct obd_io_group *oig); - -char *obd_export_nid2str(struct obd_export *exp); - -int obd_export_evict_by_nid(struct obd_device *obd, const char *nid); -int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid); - -/* obd_config.c */ -int class_process_config(struct lustre_cfg *lcfg); -int class_attach(struct lustre_cfg *lcfg); -int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg); -int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg); -int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg); -struct obd_device *class_incref(struct obd_device *obd); -void class_decref(struct obd_device *obd); - -#define CFG_F_START 0x01 /* Set when we start updating from a log */ -#define CFG_F_MARKER 0x02 /* We are within a maker */ -#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */ -#define CFG_F_COMPAT146 0x08 /* Translation to new obd names required */ -#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */ - - -/* Passed as data param to class_config_parse_llog */ -struct config_llog_instance { - char * cfg_instance; - struct super_block *cfg_sb; - struct obd_uuid cfg_uuid; - int cfg_last_idx; /* for partial llog processing */ - int cfg_flags; -}; -int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, - struct config_llog_instance *cfg); -int class_config_dump_llog(struct llog_ctxt *ctxt, char *name, - struct config_llog_instance *cfg); - -/* list of active configuration logs */ -struct config_llog_data { - char *cld_logname; - struct ldlm_res_id cld_resid; - struct config_llog_instance cld_cfg; - struct list_head cld_list_chain; - atomic_t cld_refcount; - unsigned int cld_stopping:1; -}; - -struct lustre_profile { - struct list_head lp_list; - char * lp_profile; - char * lp_osc; - char * lp_mdc; -}; - -struct lustre_profile *class_get_profile(const char * prof); -void class_del_profile(const char *prof); - -/* genops.c */ -#define class_export_get(exp) \ -({ \ - struct obd_export *exp_ = exp; \ - atomic_inc(&exp_->exp_refcount); \ - CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp_, \ - atomic_read(&exp_->exp_refcount)); \ - exp_; \ -}) - -#define class_export_put(exp) \ -do { \ - LASSERT((exp) != NULL); \ - CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", (exp), \ - atomic_read(&(exp)->exp_refcount) - 1); \ - LASSERT(atomic_read(&(exp)->exp_refcount) > 0); \ - LASSERT(atomic_read(&(exp)->exp_refcount) < 0x5a5a5a); \ - __class_export_put(exp); \ -} while (0) -void __class_export_put(struct obd_export *); -struct obd_export *class_new_export(struct obd_device *obddev, - struct obd_uuid *cluuid); -void class_unlink_export(struct obd_export *exp); - -struct obd_import *class_import_get(struct obd_import *); -void class_import_put(struct obd_import *); -struct obd_import *class_new_import(struct obd_device *obd); -void class_destroy_import(struct obd_import *exp); - -struct obd_type *class_search_type(const char *name); -struct obd_type *class_get_type(const char *name); -void class_put_type(struct obd_type *type); -int class_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid); -int class_disconnect(struct obd_export *exp); -void class_fail_export(struct obd_export *exp); -void class_disconnect_exports(struct obd_device *obddev); -void class_disconnect_stale_exports(struct obd_device *obddev); -int class_manual_cleanup(struct obd_device *obd); - /* obdo.c */ #ifdef __KERNEL__ void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned ia_valid); @@ -188,1526 +47,10 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid); void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid); #endif -void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid); -int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare); -void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj); - - -#define OBT(dev) (dev)->obd_type -#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->o_ ## op -#define MDP(dev, op) (dev)->obd_type->typ_md_ops->m_ ## op -#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op - -/* Ensure obd_setup: used for cleanup which must be called - while obd is stopping */ -#define OBD_CHECK_DEV(obd) \ -do { \ - if (!(obd)) { \ - CERROR("NULL device\n"); \ - RETURN(-ENODEV); \ - } \ -} while (0) - -/* ensure obd_setup and !obd_stopping */ -#define OBD_CHECK_DEV_ACTIVE(obd) \ -do { \ - OBD_CHECK_DEV(obd); \ - if (!(obd)->obd_set_up || (obd)->obd_stopping) { \ - CERROR("Device %d not setup\n", \ - (obd)->obd_minor); \ - RETURN(-ENODEV); \ - } \ -} while (0) - - -#ifdef LPROCFS -#define OBD_COUNTER_OFFSET(op) \ - ((offsetof(struct obd_ops, o_ ## op) - \ - offsetof(struct obd_ops, o_iocontrol)) \ - / sizeof(((struct obd_ops *)(0))->o_iocontrol)) - -#define OBD_COUNTER_INCREMENT(obd, op) \ - if ((obd)->obd_stats != NULL) { \ - unsigned int coffset; \ - coffset = (unsigned int)(obd)->obd_cntr_base + \ - OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < obd->obd_stats->ls_num); \ - lprocfs_counter_incr(obd->obd_stats, coffset); \ - } - -#define MD_COUNTER_OFFSET(op) \ - ((offsetof(struct md_ops, m_ ## op) - \ - offsetof(struct md_ops, m_getstatus)) \ - / sizeof(((struct md_ops *)(0))->m_getstatus)) - -#define MD_COUNTER_INCREMENT(obd, op) \ - if ((obd)->md_stats != NULL) { \ - unsigned int coffset; \ - coffset = (unsigned int)(obd)->md_cntr_base + \ - MD_COUNTER_OFFSET(op); \ - LASSERT(coffset < (obd)->md_stats->ls_num); \ - lprocfs_counter_incr((obd)->md_stats, coffset); \ - } - -#else -#define OBD_COUNTER_OFFSET(op) -#define OBD_COUNTER_INCREMENT(obd, op) -#define MD_COUNTER_INCREMENT(obd, op) -#endif - -#define OBD_CHECK_MD_OP(obd, op, err) \ -do { \ - if (!OBT(obd) || !MDP((obd), op)) { \ - if (err) \ - CERROR("md_" #op ": dev %s/%d no operation\n", \ - obd->obd_name, obd->obd_minor); \ - RETURN(err); \ - } \ -} while (0) - -#define EXP_CHECK_MD_OP(exp, op) \ -do { \ - if ((exp) == NULL) { \ - CERROR("obd_" #op ": NULL export\n"); \ - RETURN(-ENODEV); \ - } \ - if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \ - CERROR("obd_" #op ": cleaned up obd\n"); \ - RETURN(-EOPNOTSUPP); \ - } \ - if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %s/%d no operation\n", \ - (exp)->exp_obd->obd_name, \ - (exp)->exp_obd->obd_minor); \ - RETURN(-EOPNOTSUPP); \ - } \ -} while (0) - - -#define OBD_CHECK_DT_OP(obd, op, err) \ -do { \ - if (!OBT(obd) || !OBP((obd), op)) { \ - if (err) \ - CERROR("obd_" #op ": dev %d no operation\n", \ - obd->obd_minor); \ - RETURN(err); \ - } \ -} while (0) - -#define EXP_CHECK_DT_OP(exp, op) \ -do { \ - if ((exp) == NULL) { \ - CERROR("obd_" #op ": NULL export\n"); \ - RETURN(-ENODEV); \ - } \ - if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \ - CERROR("obd_" #op ": cleaned up obd\n"); \ - RETURN(-EOPNOTSUPP); \ - } \ - if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - (exp)->exp_obd->obd_minor); \ - RETURN(-EOPNOTSUPP); \ - } \ -} while (0) - -#define CTXT_CHECK_OP(ctxt, op, err) \ -do { \ - if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \ - if (err) \ - CERROR("lop_" #op ": dev %d no operation\n", \ - ctxt->loc_obd->obd_minor); \ - RETURN(err); \ - } \ -} while (0) - -static inline int obd_get_info(struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, get_info); - OBD_COUNTER_INCREMENT(exp->exp_obd, get_info); - - rc = OBP(exp->exp_obd, get_info)(exp, keylen, key, vallen, val); - RETURN(rc); -} - -static inline int obd_set_info(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, set_info); - OBD_COUNTER_INCREMENT(exp->exp_obd, set_info); - - rc = OBP(exp->exp_obd, set_info)(exp, keylen, key, vallen, val); - RETURN(rc); -} - -static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) -{ - int rc; - struct lu_device_type *ldt; - ENTRY; - - ldt = obd->obd_type->typ_lu; - if (ldt != NULL) { -#ifdef __KERNEL__ - struct lu_context ctx; - struct lu_device *d; - - rc = lu_context_init(&ctx); - if (rc == 0) { - lu_context_enter(&ctx); - - d = ldt->ldt_ops->ldto_device_alloc(&ctx, ldt, cfg); - if (!IS_ERR(d)) { - obd->obd_lu_dev = d; - d->ld_obd = obd; - rc = 0; - } else - rc = PTR_ERR(d); - } -#endif - } else { - OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, setup); - rc = OBP(obd, setup)(obd, cfg); - } - RETURN(rc); -} - -static inline int obd_precleanup(struct obd_device *obd, - enum obd_cleanup_stage cleanup_stage) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(obd, precleanup, 0); - OBD_COUNTER_INCREMENT(obd, precleanup); - - rc = OBP(obd, precleanup)(obd, cleanup_stage); - RETURN(rc); -} - -static inline int obd_cleanup(struct obd_device *obd) -{ - int rc; - struct lu_device *d; - struct lu_device_type *ldt; - ENTRY; - - OBD_CHECK_DEV(obd); - - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt != NULL && d != NULL) { -#ifdef __KERNEL__ - struct lu_context ctx; - - rc = lu_context_init(&ctx); - if (rc == 0) { - lu_context_enter(&ctx); - ldt->ldt_ops->ldto_device_free(&ctx, d); - lu_context_exit(&ctx); - lu_context_fini(&ctx); - obd->obd_lu_dev = NULL; - rc = 0; - } -#endif - } else { - OBD_CHECK_DT_OP(obd, cleanup, 0); - rc = OBP(obd, cleanup)(obd); - } - OBD_COUNTER_INCREMENT(obd, cleanup); - RETURN(rc); -} - -static inline int -obd_process_config(struct obd_device *obd, int datalen, void *data) -{ - int rc; - struct lu_device *d; - struct lu_device_type *ldt; - ENTRY; - - OBD_CHECK_DEV(obd); - - ldt = obd->obd_type->typ_lu; - d = obd->obd_lu_dev; - if (ldt != NULL && d != NULL) { -#ifdef __KERNEL__ - struct lu_context ctx; - - rc = lu_context_init(&ctx); - if (rc == 0) { - lu_context_enter(&ctx); - rc = d->ld_ops->ldo_process_config(&ctx, d, data); - lu_context_exit(&ctx); - lu_context_fini(&ctx); - } -#endif - } else { - OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP); - rc = OBP(obd, process_config)(obd, datalen, data); - } - OBD_COUNTER_INCREMENT(obd, process_config); - - RETURN(rc); -} - -/* Pack an in-memory MD struct for storage on disk. - * Returns +ve size of packed MD (0 for free), or -ve error. - * - * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL). - * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed. - * If @*disk_tgt == NULL, it will be allocated - */ -static inline int obd_packmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, packmd); - OBD_COUNTER_INCREMENT(exp->exp_obd, packmd); - - rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src); - RETURN(rc); -} - -static inline int obd_size_diskmd(struct obd_export *exp, - struct lov_stripe_md *mem_src) -{ - return obd_packmd(exp, NULL, mem_src); -} - -/* helper functions */ -static inline int obd_alloc_diskmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt) -{ - LASSERT(disk_tgt); - LASSERT(*disk_tgt == NULL); - return obd_packmd(exp, disk_tgt, NULL); -} - -static inline int obd_free_diskmd(struct obd_export *exp, - struct lov_mds_md **disk_tgt) -{ - LASSERT(disk_tgt); - LASSERT(*disk_tgt); - return obd_packmd(exp, disk_tgt, NULL); -} - -/* Unpack an MD struct from disk to in-memory format. - * Returns +ve size of unpacked MD (0 for free), or -ve error. - * - * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL). - * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed. - * If @*mem_tgt == NULL, it will be allocated - */ -static inline int obd_unpackmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, - int disk_len) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, unpackmd); - OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd); - - rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len); - RETURN(rc); -} - -/* helper functions */ -static inline int obd_alloc_memmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt) -{ - LASSERT(mem_tgt); - LASSERT(*mem_tgt == NULL); - return obd_unpackmd(exp, mem_tgt, NULL, 0); -} - -static inline int obd_free_memmd(struct obd_export *exp, - struct lov_stripe_md **mem_tgt) -{ - LASSERT(mem_tgt); - LASSERT(*mem_tgt); - return obd_unpackmd(exp, mem_tgt, NULL, 0); -} - -static inline int obd_checkmd(struct obd_export *exp, - struct obd_export *md_exp, - struct lov_stripe_md *mem_tgt) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, checkmd); - OBD_COUNTER_INCREMENT(exp->exp_obd, checkmd); - - rc = OBP(exp->exp_obd, checkmd)(exp, md_exp, mem_tgt); - RETURN(rc); -} - -static inline int obd_create(struct obd_export *exp, struct obdo *obdo, - struct lov_stripe_md **ea, - struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, create); - OBD_COUNTER_INCREMENT(exp->exp_obd, create); - - rc = OBP(exp->exp_obd, create)(exp, obdo, ea, oti); - RETURN(rc); -} - -static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, - struct lov_stripe_md *ea, - struct obd_trans_info *oti, - struct obd_export *md_exp) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, destroy); - OBD_COUNTER_INCREMENT(exp->exp_obd, destroy); - - rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp); - RETURN(rc); -} - -static inline int obd_getattr(struct obd_export *exp, struct obdo *obdo, - struct lov_stripe_md *ea) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, getattr); - OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); - - rc = OBP(exp->exp_obd, getattr)(exp, obdo, ea); - RETURN(rc); -} - -static inline int obd_getattr_async(struct obd_export *exp, - struct obdo *obdo, struct lov_stripe_md *ea, - struct ptlrpc_request_set *set) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, getattr); - OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); - - rc = OBP(exp->exp_obd, getattr_async)(exp, obdo, ea, set); - RETURN(rc); -} - -static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo, - struct lov_stripe_md *ea, - struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, setattr); - OBD_COUNTER_INCREMENT(exp->exp_obd, setattr); - - rc = OBP(exp->exp_obd, setattr)(exp, obdo, ea, oti); - RETURN(rc); -} - -static inline int obd_setattr_async(struct obd_export *exp, - struct obdo *obdo, - struct lov_stripe_md *ea, - struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, setattr_async); - OBD_COUNTER_INCREMENT(exp->exp_obd, setattr_async); - - rc = OBP(exp->exp_obd, setattr_async)(exp, obdo, ea, oti); - RETURN(rc); -} - -static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, - int priority) -{ - struct obd_device *obd = imp->imp_obd; - int rc; - ENTRY; - - OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, add_conn); - - rc = OBP(obd, add_conn)(imp, uuid, priority); - RETURN(rc); -} - -static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid) -{ - struct obd_device *obd = imp->imp_obd; - int rc; - ENTRY; - - OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, del_conn); - - rc = OBP(obd, del_conn)(imp, uuid); - RETURN(rc); -} - -static inline int obd_connect(struct lustre_handle *conn,struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *d) -{ - int rc; - __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ - ENTRY; - - OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, connect); - - rc = OBP(obd, connect)(conn, obd, cluuid, d); - /* check that only subset is granted */ - LASSERT(ergo(d != NULL, - (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); - RETURN(rc); -} - -static inline int obd_reconnect(struct obd_export *exp, - struct obd_device *obd, - struct obd_uuid *cluuid, - struct obd_connect_data *d) -{ - int rc; - __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ - ENTRY; - - OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_DT_OP(obd, reconnect, 0); - OBD_COUNTER_INCREMENT(obd, reconnect); - - rc = OBP(obd, reconnect)(exp, obd, cluuid, d); - /* check that only subset is granted */ - LASSERT(ergo(d != NULL, - (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); - RETURN(rc); -} - -static inline int obd_disconnect(struct obd_export *exp) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, disconnect); - OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect); - - rc = OBP(exp->exp_obd, disconnect)(exp); - RETURN(rc); -} - -static inline int obd_fid_alloc(struct obd_export *exp, - struct lu_fid *fid, - struct placement_hint *hint) -{ - int rc; - ENTRY; - - if (OBP(exp->exp_obd, fid_alloc) == NULL) - RETURN(-ENOTSUPP); - - OBD_COUNTER_INCREMENT(exp->exp_obd, fid_alloc); - - rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, hint); - RETURN(rc); -} - -static inline int obd_fid_delete(struct obd_export *exp, - struct lu_fid *fid) -{ - int rc; - ENTRY; - - if (OBP(exp->exp_obd, fid_delete) == NULL) - RETURN(0); - - OBD_COUNTER_INCREMENT(exp->exp_obd, fid_delete); - rc = OBP(exp->exp_obd, fid_delete)(exp, fid); - RETURN(rc); -} - -static inline int obd_init_export(struct obd_export *exp) -{ - int rc = 0; - - ENTRY; - if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) && - OBP((exp)->exp_obd, init_export)) - rc = OBP(exp->exp_obd, init_export)(exp); - RETURN(rc); -} - -static inline int obd_destroy_export(struct obd_export *exp) -{ - ENTRY; - if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) && - OBP((exp)->exp_obd, destroy_export)) - OBP(exp->exp_obd, destroy_export)(exp); - RETURN(0); -} - -static inline struct dentry * -obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr) -{ - LASSERT(exp->exp_obd); - - return lvfs_fid2dentry(&exp->exp_obd->obd_lvfs_ctxt, id_ino, gen, gr, - exp->exp_obd); -} - -static inline int -obd_lvfs_open_llog(struct obd_export *exp, __u64 id_ino, struct dentry *dentry) -{ - LASSERT(exp->exp_obd); - CERROR("FIXME what's the story here? This needs to be an obd fn?\n"); -#if 0 - return lvfs_open_llog(&exp->exp_obd->obd_lvfs_ctxt, id_ino, - dentry, exp->exp_obd); -#endif - return 0; -} - -#ifndef time_before -#define time_before(t1, t2) ((long)t2 - (long)t1 > 0) -#endif - -/* @max_age is the oldest time in jiffies that we accept using a cached data. - * If the cache is older than @max_age we will get a new value from the - * target. Use a value of "jiffies + HZ" to guarantee freshness. */ -static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, - unsigned long max_age) -{ - int rc = 0; - ENTRY; - - if (obd == NULL) - RETURN(-EINVAL); - - OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(obd, statfs); - - CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age); - if (time_before(obd->obd_osfs_age, max_age)) { - rc = OBP(obd, statfs)(obd, osfs, max_age); - if (rc == 0) { - spin_lock(&obd->obd_osfs_lock); - memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); - obd->obd_osfs_age = jiffies; - spin_unlock(&obd->obd_osfs_lock); - } - } else { - CDEBUG(D_SUPER, "using cached obd_statfs data\n"); - spin_lock(&obd->obd_osfs_lock); - memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); - spin_unlock(&obd->obd_osfs_lock); - } - RETURN(rc); -} - -static inline int obd_sync(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, - obd_size end) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, sync); - - rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end); - RETURN(rc); -} - -static inline int obd_punch(struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, - obd_size end, struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, punch); - OBD_COUNTER_INCREMENT(exp->exp_obd, punch); - - rc = OBP(exp->exp_obd, punch)(exp, oa, ea, start, end, oti); - RETURN(rc); -} - -static inline int obd_brw(int cmd, struct obd_export *exp, struct obdo *oa, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pg, struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, brw); - OBD_COUNTER_INCREMENT(exp->exp_obd, brw); - - if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) { - CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, " - "or OBD_BRW_CHECK\n"); - LBUG(); - } - - rc = OBP(exp->exp_obd, brw)(cmd, exp, oa, ea, oa_bufs, pg, oti); - RETURN(rc); -} - -static inline int obd_brw_async(int cmd, struct obd_export *exp, - struct obdo *oa, struct lov_stripe_md *ea, - obd_count oa_bufs, struct brw_page *pg, - struct ptlrpc_request_set *set, - struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, brw_async); - OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async); - - if (!(cmd & OBD_BRW_RWMASK)) { - CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n"); - LBUG(); - } - - rc = OBP(exp->exp_obd, brw_async)(cmd, exp, oa, ea, oa_bufs, pg, set, - oti); - RETURN(rc); -} - -static inline int obd_prep_async_page(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct page *page, obd_off offset, - struct obd_async_page_ops *ops, - void *data, void **res) -{ - int ret; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, prep_async_page, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, prep_async_page); - - ret = OBP(exp->exp_obd, prep_async_page)(exp, lsm, loi, page, offset, - ops, data, res); - RETURN(ret); -} - -static inline int obd_queue_async_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - int cmd, obd_off off, int count, - obd_flag brw_flags, obd_flag async_flags) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, queue_async_io, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, queue_async_io); - LASSERT(cmd & OBD_BRW_RWMASK); - - rc = OBP(exp->exp_obd, queue_async_io)(exp, lsm, loi, cookie, cmd, off, - count, brw_flags, async_flags); - RETURN(rc); -} - -static inline int obd_set_async_flags(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie, - obd_flag async_flags) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, set_async_flags, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, set_async_flags); - - rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie, - async_flags); - RETURN(rc); -} - -static inline int obd_queue_group_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig, - void *cookie, int cmd, obd_off off, - int count, obd_flag brw_flags, - obd_flag async_flags) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, queue_group_io, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, queue_group_io); - LASSERT(cmd & OBD_BRW_RWMASK); - - rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie, - cmd, off, count, brw_flags, - async_flags); - RETURN(rc); -} - -static inline int obd_trigger_group_io(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, - struct obd_io_group *oig) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, trigger_group_io, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, trigger_group_io); - - rc = OBP(exp->exp_obd, trigger_group_io)(exp, lsm, loi, oig); - RETURN(rc); -} - -static inline int obd_teardown_async_page(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct lov_oinfo *loi, void *cookie) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, teardown_async_page, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, teardown_async_page); - - rc = OBP(exp->exp_obd, teardown_async_page)(exp, lsm, loi, cookie); - RETURN(rc); -} - -static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, - struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, preprw, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - - rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount, - remote, local, oti); - RETURN(rc); -} - -static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, - struct obd_trans_info *oti, int rc) -{ - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, commitrw, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw); - - rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount, - local, oti, rc); - RETURN(rc); -} - -static inline int obd_merge_lvb(struct obd_export *exp, - struct lov_stripe_md *lsm, - struct ost_lvb *lvb, int kms_only) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, merge_lvb); - - rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only); - RETURN(rc); -} - -static inline int obd_adjust_kms(struct obd_export *exp, - struct lov_stripe_md *lsm, obd_off size, - int shrink) -{ - int rc; - ENTRY; - - OBD_CHECK_DT_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP); - OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms); - - rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink); - RETURN(rc); -} - -static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, - int len, void *karg, void *uarg) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, iocontrol); - OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol); - - rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg); - RETURN(rc); -} - -static inline int obd_enqueue(struct obd_export *exp, struct lov_stripe_md *ea, - __u32 type, ldlm_policy_data_t *policy, - __u32 mode, int *flags, void *bl_cb, void *cp_cb, - void *gl_cb, void *data, __u32 lvb_len, - void *lvb_swabber, struct lustre_handle *lockh) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, enqueue); - OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue); - - rc = OBP(exp->exp_obd, enqueue)(exp, ea, type, policy, mode, flags, - bl_cb, cp_cb, gl_cb, data, lvb_len, - lvb_swabber, lockh); - RETURN(rc); -} - -static inline int obd_match(struct obd_export *exp, struct lov_stripe_md *ea, - __u32 type, ldlm_policy_data_t *policy, __u32 mode, - int *flags, void *data, struct lustre_handle *lockh) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, match); - OBD_COUNTER_INCREMENT(exp->exp_obd, match); - - rc = OBP(exp->exp_obd, match)(exp, ea, type, policy, mode, flags, data, - lockh); - RETURN(rc); -} - -static inline int obd_change_cbdata(struct obd_export *exp, - struct lov_stripe_md *lsm, - ldlm_iterator_t it, void *data) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, change_cbdata); - OBD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata); - - rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data); - RETURN(rc); -} - -static inline int obd_cancel(struct obd_export *exp, - struct lov_stripe_md *ea, __u32 mode, - struct lustre_handle *lockh) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, cancel); - OBD_COUNTER_INCREMENT(exp->exp_obd, cancel); - - rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh); - RETURN(rc); -} - -static inline int obd_cancel_unused(struct obd_export *exp, - struct lov_stripe_md *ea, - int flags, void *opaque) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, cancel_unused); - OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused); - - rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque); - RETURN(rc); -} - -static inline int obd_join_lru(struct obd_export *exp, - struct lov_stripe_md *ea, int join) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, join_lru); - OBD_COUNTER_INCREMENT(exp->exp_obd, join_lru); - - rc = OBP(exp->exp_obd, join_lru)(exp, ea, join); - RETURN(rc); -} - -static inline int obd_san_preprw(int cmd, struct obd_export *exp, - struct obdo *oa, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote) -{ - int rc; - - EXP_CHECK_DT_OP(exp, preprw); - OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - - rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj, - niocount, remote); - class_export_put(exp); - return(rc); -} - -static inline int obd_pin(struct obd_export *exp, struct lu_fid *fid, - struct obd_client_handle *handle, int flag) -{ - int rc; - - EXP_CHECK_DT_OP(exp, pin); - OBD_COUNTER_INCREMENT(exp->exp_obd, pin); - - rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag); - return(rc); -} - -static inline int obd_unpin(struct obd_export *exp, - struct obd_client_handle *handle, int flag) -{ - int rc; - - EXP_CHECK_DT_OP(exp, unpin); - OBD_COUNTER_INCREMENT(exp->exp_obd, unpin); - - rc = OBP(exp->exp_obd, unpin)(exp, handle, flag); - return(rc); -} - - -static inline void obd_import_event(struct obd_device *obd, - struct obd_import *imp, - enum obd_import_event event) -{ - if (!obd) { - CERROR("NULL device\n"); - EXIT; - return; - } - if (obd->obd_set_up && OBP(obd, import_event)) { - OBD_COUNTER_INCREMENT(obd, import_event); - OBP(obd, import_event)(obd, imp, event); - } -} - -static inline int obd_notify(struct obd_device *obd, - struct obd_device *watched, - enum obd_notify_event ev, - void *data) -{ - OBD_CHECK_DEV(obd); - - /* the check for async_recov is a complete hack - I'm hereby - overloading the meaning to also mean "this was called from - mds_postsetup". I know that my mds is able to handle notifies - by this point, and it needs to get them to execute mds_postrecov. */ - if (!obd->obd_set_up && !obd->obd_async_recov) { - CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name); - return -EINVAL; - } - - if (!OBP(obd, notify)) { - CERROR("obd %s has no notify handler\n", obd->obd_name); - return -ENOSYS; - } - - OBD_COUNTER_INCREMENT(obd, notify); - return OBP(obd, notify)(obd, watched, ev, data); -} - -static inline int obd_notify_observer(struct obd_device *observer, - struct obd_device *observed, - enum obd_notify_event ev, - void *data) -{ - int rc1; - int rc2; - - struct obd_notify_upcall *onu; - - if (observer->obd_observer) - rc1 = obd_notify(observer->obd_observer, observed, ev, data); - else - rc1 = 0; - /* - * Also, call non-obd listener, if any - */ - onu = &observer->obd_upcall; - if (onu->onu_upcall != NULL) - rc2 = onu->onu_upcall(observer, observed, ev, onu->onu_owner); - else - rc2 = 0; - - return rc1 ? rc1 : rc2; -} - -static inline int obd_quotacheck(struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, quotacheck); - OBD_COUNTER_INCREMENT(exp->exp_obd, quotacheck); - - rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl); - RETURN(rc); -} - -static inline int obd_quotactl(struct obd_export *exp, - struct obd_quotactl *oqctl) -{ - int rc; - ENTRY; - - EXP_CHECK_DT_OP(exp, quotactl); - OBD_COUNTER_INCREMENT(exp->exp_obd, quotactl); - - rc = OBP(exp->exp_obd, quotactl)(exp, oqctl); - RETURN(rc); -} - -static inline int obd_health_check(struct obd_device *obd) -{ - /* returns: 0 on healthy - * >0 on unhealthy + reason code/flag - * however the only suppored reason == 1 right now - * We'll need to define some better reasons - * or flags in the future. - * <0 on error - */ - int rc; - ENTRY; - - /* don't use EXP_CHECK_OP, because NULL method is normal here */ - if (obd == NULL || !OBT(obd)) { - CERROR("cleaned up obd\n"); - RETURN(-EOPNOTSUPP); - } - if (!obd->obd_set_up || obd->obd_stopping) - RETURN(0); - if (!OBP(obd, health_check)) - RETURN(0); - - rc = OBP(obd, health_check)(obd); - RETURN(rc); -} - -static inline int obd_register_observer(struct obd_device *obd, - struct obd_device *observer) -{ - ENTRY; - OBD_CHECK_DEV(obd); - if (obd->obd_observer && observer) - RETURN(-EALREADY); - obd->obd_observer = observer; - RETURN(0); -} - -/* metadata helpers */ -static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid) -{ - int rc; - ENTRY; - - EXP_CHECK_MD_OP(exp, getstatus); - MD_COUNTER_INCREMENT(exp->exp_obd, getstatus); - rc = MDP(exp->exp_obd, getstatus)(exp, fid); - RETURN(rc); -} - -static inline int md_getattr(struct obd_export *exp, struct lu_fid *fid, - obd_valid valid, int ea_size, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, getattr); - MD_COUNTER_INCREMENT(exp->exp_obd, getattr); - rc = MDP(exp->exp_obd, getattr)(exp, fid, valid, - ea_size, request); - RETURN(rc); -} - -static inline int md_change_cbdata(struct obd_export *exp, struct lu_fid *fid, - ldlm_iterator_t it, void *data) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, change_cbdata); - MD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata); - rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data); - RETURN(rc); -} - -static inline int md_close(struct obd_export *exp, - struct md_op_data *op_data, - struct obd_client_handle *och, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, close); - MD_COUNTER_INCREMENT(exp->exp_obd, close); - rc = MDP(exp->exp_obd, close)(exp, op_data, och, request); - RETURN(rc); -} - -static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, - const void *data, int datalen, int mode, - __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, create); - MD_COUNTER_INCREMENT(exp->exp_obd, create); - rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode, - uid, gid, cap_effective, rdev, request); - RETURN(rc); -} - -static inline int md_done_writing(struct obd_export *exp, - struct md_op_data *op_data) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, done_writing); - MD_COUNTER_INCREMENT(exp->exp_obd, done_writing); - rc = MDP(exp->exp_obd, done_writing)(exp, op_data); - RETURN(rc); -} - -static inline int md_enqueue(struct obd_export *exp, int lock_type, - struct lookup_intent *it, int lock_mode, - struct md_op_data *op_data, - struct lustre_handle *lockh, - void *lmm, int lmmsize, - ldlm_completion_callback cb_completion, - ldlm_blocking_callback cb_blocking, - void *cb_data, int extra_lock_flags) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, enqueue); - MD_COUNTER_INCREMENT(exp->exp_obd, enqueue); - rc = MDP(exp->exp_obd, enqueue)(exp, lock_type, it, lock_mode, - op_data, lockh, lmm, lmmsize, - cb_completion, cb_blocking, - cb_data, extra_lock_flags); - RETURN(rc); -} - -static inline int md_getattr_name(struct obd_export *exp, struct lu_fid *fid, - const char *filename, int namelen, - obd_valid valid, int ea_size, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, getattr_name); - MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name); - rc = MDP(exp->exp_obd, getattr_name)(exp, fid, filename, namelen, - valid, ea_size, request); - RETURN(rc); -} - -static inline int md_intent_lock(struct obd_export *exp, - struct md_op_data *op_data, - void *lmm, int lmmsize, - struct lookup_intent *it, - int flags, struct ptlrpc_request **reqp, - ldlm_blocking_callback cb_blocking, - int extra_lock_flags) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, intent_lock); - MD_COUNTER_INCREMENT(exp->exp_obd, intent_lock); - rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize, - it, flags, reqp, cb_blocking, - extra_lock_flags); - RETURN(rc); -} - -static inline int md_link(struct obd_export *exp, - struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, link); - MD_COUNTER_INCREMENT(exp->exp_obd, link); - rc = MDP(exp->exp_obd, link)(exp, op_data, request); - RETURN(rc); -} - -static inline int md_rename(struct obd_export *exp, - struct md_op_data *op_data, - const char *old, int oldlen, - const char *new, int newlen, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, rename); - MD_COUNTER_INCREMENT(exp->exp_obd, rename); - rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new, - newlen, request); - RETURN(rc); -} - -static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, - struct iattr *iattr, void *ea, int ealen, - void *ea2, int ea2len, struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, setattr); - MD_COUNTER_INCREMENT(exp->exp_obd, setattr); - rc = MDP(exp->exp_obd, setattr)(exp, op_data, iattr, ea, ealen, - ea2, ea2len, request); - RETURN(rc); -} - -static inline int md_sync(struct obd_export *exp, struct lu_fid *fid, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, sync); - MD_COUNTER_INCREMENT(exp->exp_obd, sync); - rc = MDP(exp->exp_obd, sync)(exp, fid, request); - RETURN(rc); -} - -static inline int md_readpage(struct obd_export *exp, struct lu_fid *fid, - __u64 offset, struct page *page, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, readpage); - MD_COUNTER_INCREMENT(exp->exp_obd, readpage); - rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request); - RETURN(rc); -} - -static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data, - struct ptlrpc_request **request) -{ - int rc; - ENTRY; - EXP_CHECK_MD_OP(exp, unlink); - MD_COUNTER_INCREMENT(exp->exp_obd, unlink); - rc = MDP(exp->exp_obd, unlink)(exp, op_data, request); - RETURN(rc); -} - -static inline int md_get_lustre_md(struct obd_export *exp, - struct ptlrpc_request *req, - int offset, struct obd_export *dt_exp, - struct lustre_md *md) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, get_lustre_md); - MD_COUNTER_INCREMENT(exp->exp_obd, get_lustre_md); - RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, offset, - dt_exp, md)); -} - -static inline int md_free_lustre_md(struct obd_export *exp, - struct lustre_md *md) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, free_lustre_md); - MD_COUNTER_INCREMENT(exp->exp_obd, free_lustre_md); - RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md)); -} - -static inline int md_setxattr(struct obd_export *exp, struct lu_fid *fid, - obd_valid valid, const char *name, - const char *input, int input_size, - int output_size, int flags, - struct ptlrpc_request **request) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, setxattr); - MD_COUNTER_INCREMENT(exp->exp_obd, setxattr); - RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input, - input_size, output_size, flags, - request)); -} - -static inline int md_getxattr(struct obd_export *exp, struct lu_fid *fid, - obd_valid valid, const char *name, - const char *input, int input_size, - int output_size, int flags, - struct ptlrpc_request **request) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, getxattr); - MD_COUNTER_INCREMENT(exp->exp_obd, getxattr); - RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input, - input_size, output_size, flags, - request)); -} - -static inline int md_set_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och, - struct ptlrpc_request *open_req) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, set_open_replay_data); - MD_COUNTER_INCREMENT(exp->exp_obd, set_open_replay_data); - RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req)); -} - -static inline int md_clear_open_replay_data(struct obd_export *exp, - struct obd_client_handle *och) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, clear_open_replay_data); - MD_COUNTER_INCREMENT(exp->exp_obd, clear_open_replay_data); - RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och)); -} - -static inline int md_set_lock_data(struct obd_export *exp, - __u64 *lockh, void *data) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, set_lock_data); - MD_COUNTER_INCREMENT(exp->exp_obd, set_lock_data); - RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data)); -} - -static inline int md_cancel_unused(struct obd_export *exp, - struct lu_fid *fid, - int flags, void *opaque) -{ - int rc; - ENTRY; - - EXP_CHECK_MD_OP(exp, cancel_unused); - MD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused); - - rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, flags, opaque); - RETURN(rc); -} - -static inline int md_lock_match(struct obd_export *exp, int flags, - struct lu_fid *fid, ldlm_type_t type, - ldlm_policy_data_t *policy, ldlm_mode_t mode, - struct lustre_handle *lockh) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, lock_match); - MD_COUNTER_INCREMENT(exp->exp_obd, lock_match); - RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type, - policy, mode, lockh)); -} - -static inline int md_init_ea_size(struct obd_export *exp, - int easize, int def_asize, - int cookiesize) -{ - ENTRY; - EXP_CHECK_MD_OP(exp, init_ea_size); - MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size); - RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, - def_asize, - cookiesize)); -} - -/* OBD Metadata Support */ -extern int obd_init_caches(void); -extern void obd_cleanup_caches(void); - -/* support routines */ -extern kmem_cache_t *obdo_cachep; -static inline struct obdo *obdo_alloc(void) -{ - struct obdo *oa; - - OBD_SLAB_ALLOC(oa, obdo_cachep, SLAB_KERNEL, sizeof(*oa)); - - return oa; -} - -static inline void obdo_free(struct obdo *oa) -{ - OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa)); -} - -static inline void obdo2fid(struct obdo *oa, - struct lu_fid *fid) -{ - /* something here */ -} - -static inline void fid2obdo(struct lu_fid *fid, - struct obdo *oa) -{ - /* something here */ -} #if !defined(__KERNEL__) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #define to_kdev_t(dev) dev #define kdev_t_to_nr(dev) dev #endif -/* I'm as embarrassed about this as you are. - * - * // XXX do not look into _superhack with remaining eye - * // XXX if this were any uglier, I'd get my own show on MTV */ -extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); - -/* sysctl.c */ -extern void obd_sysctl_init (void); -extern void obd_sysctl_clean (void); - -/* uuid.c */ -typedef __u8 class_uuid_t[16]; -void class_generate_random_uuid(class_uuid_t uuid); -void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); - -/* lustre_peer.c */ -int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index); -int class_add_uuid(const char *uuid, __u64 nid); -int class_del_uuid (const char *uuid); -void class_init_uuidlist(void); -void class_exit_uuidlist(void); - -/* mea.c */ -int mea_name2idx(struct lmv_stripe_md *mea, char *name, int namelen); -int raw_name2idx(int hashtype, int count, const char *name, int namelen); - #endif /* __LINUX_OBD_CLASS_H */ diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index f096703..bb188e7 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -20,8 +20,12 @@ * */ +#ifndef _LINUX_OBD_SUPPORT +#define _LINUX_OBD_SUPPORT + #ifndef _OBD_SUPPORT -#define _OBD_SUPPORT +#error Do not #include this file directly. #include instead +#endif #ifdef __KERNEL__ #include @@ -32,205 +36,6 @@ #include #include -/* global variables */ -extern atomic_t obd_memory; -extern int obd_memmax; -extern unsigned int obd_fail_loc; -extern unsigned int obd_dump_on_timeout; -extern unsigned int obd_timeout; /* seconds */ -#define PING_INTERVAL max(obd_timeout / 4, 1U) -#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) -extern unsigned int ldlm_timeout; -extern unsigned int obd_health_check_timeout; -extern char obd_lustre_upcall[128]; -extern unsigned int obd_sync_filter; -extern wait_queue_head_t obd_race_waitq; - -#define OBD_FAIL_MDS 0x100 -#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 -#define OBD_FAIL_MDS_GETATTR_NET 0x102 -#define OBD_FAIL_MDS_GETATTR_PACK 0x103 -#define OBD_FAIL_MDS_READPAGE_NET 0x104 -#define OBD_FAIL_MDS_READPAGE_PACK 0x105 -#define OBD_FAIL_MDS_SENDPAGE 0x106 -#define OBD_FAIL_MDS_REINT_NET 0x107 -#define OBD_FAIL_MDS_REINT_UNPACK 0x108 -#define OBD_FAIL_MDS_REINT_SETATTR 0x109 -#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a -#define OBD_FAIL_MDS_REINT_CREATE 0x10b -#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c -#define OBD_FAIL_MDS_REINT_UNLINK 0x10d -#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e -#define OBD_FAIL_MDS_REINT_LINK 0x10f -#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110 -#define OBD_FAIL_MDS_REINT_RENAME 0x111 -#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112 -#define OBD_FAIL_MDS_OPEN_NET 0x113 -#define OBD_FAIL_MDS_OPEN_PACK 0x114 -#define OBD_FAIL_MDS_CLOSE_NET 0x115 -#define OBD_FAIL_MDS_CLOSE_PACK 0x116 -#define OBD_FAIL_MDS_CONNECT_NET 0x117 -#define OBD_FAIL_MDS_CONNECT_PACK 0x118 -#define OBD_FAIL_MDS_REINT_NET_REP 0x119 -#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a -#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b -#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c -#define OBD_FAIL_MDS_STATFS_PACK 0x11d -#define OBD_FAIL_MDS_STATFS_NET 0x11e -#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f -#define OBD_FAIL_MDS_PIN_NET 0x120 -#define OBD_FAIL_MDS_UNPIN_NET 0x121 -#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122 -#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 -#define OBD_FAIL_MDS_SYNC_NET 0x124 -#define OBD_FAIL_MDS_SYNC_PACK 0x125 -#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 -#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 -#define OBD_FAIL_MDS_ALLOC_OBDO 0x128 -#define OBD_FAIL_MDS_PAUSE_OPEN 0x129 -#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a -#define OBD_FAIL_MDS_OPEN_CREATE 0x12b -#define OBD_FAIL_MDS_OST_SETATTR 0x12c -#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d -#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e -#define OBD_FAIL_MDS_CLIENT_ADD 0x12f -#define OBD_FAIL_MDS_GETXATTR_NET 0x130 -#define OBD_FAIL_MDS_GETXATTR_PACK 0x131 -#define OBD_FAIL_MDS_SETXATTR_NET 0x132 -#define OBD_FAIL_MDS_SETXATTR 0x133 -#define OBD_FAIL_MDS_SETXATTR_WRITE 0x134 -#define OBD_FAIL_MDS_SET_INFO_NET 0x135 -#define OBD_FAIL_MDS_SET_INFO_PACK 0x136 -#define OBD_FAIL_MDS_FLD_NET 0x137 -#define OBD_FAIL_MDS_FLD_PACK 0x138 - -#define OBD_FAIL_OST 0x200 -#define OBD_FAIL_OST_CONNECT_NET 0x201 -#define OBD_FAIL_OST_DISCONNECT_NET 0x202 -#define OBD_FAIL_OST_GET_INFO_NET 0x203 -#define OBD_FAIL_OST_CREATE_NET 0x204 -#define OBD_FAIL_OST_DESTROY_NET 0x205 -#define OBD_FAIL_OST_GETATTR_NET 0x206 -#define OBD_FAIL_OST_SETATTR_NET 0x207 -#define OBD_FAIL_OST_OPEN_NET 0x208 -#define OBD_FAIL_OST_CLOSE_NET 0x209 -#define OBD_FAIL_OST_BRW_NET 0x20a -#define OBD_FAIL_OST_PUNCH_NET 0x20b -#define OBD_FAIL_OST_STATFS_NET 0x20c -#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d -#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e -#define OBD_FAIL_OST_BRW_READ_BULK 0x20f -#define OBD_FAIL_OST_SYNC_NET 0x210 -#define OBD_FAIL_OST_ALL_REPLY_NET 0x211 -#define OBD_FAIL_OST_ALL_REQUESTS_NET 0x212 -#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 -#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 -#define OBD_FAIL_OST_ENOSPC 0x215 -#define OBD_FAIL_OST_EROFS 0x216 -#define OBD_FAIL_OST_ENOENT 0x217 -#define OBD_FAIL_OST_QUOTACHECK_NET 0x218 -#define OBD_FAIL_OST_QUOTACTL_NET 0x219 - -#define OBD_FAIL_LDLM 0x300 -#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 -#define OBD_FAIL_LDLM_ENQUEUE 0x302 -#define OBD_FAIL_LDLM_CONVERT 0x303 -#define OBD_FAIL_LDLM_CANCEL 0x304 -#define OBD_FAIL_LDLM_BL_CALLBACK 0x305 -#define OBD_FAIL_LDLM_CP_CALLBACK 0x306 -#define OBD_FAIL_LDLM_GL_CALLBACK 0x307 -#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 -#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 -#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a -#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b -#define OBD_FAIL_LDLM_REPLY 0x30c -#define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d -#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e - -#define OBD_FAIL_OSC 0x400 -#define OBD_FAIL_OSC_BRW_READ_BULK 0x401 -#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402 -#define OBD_FAIL_OSC_LOCK_BL_AST 0x403 -#define OBD_FAIL_OSC_LOCK_CP_AST 0x404 -#define OBD_FAIL_OSC_MATCH 0x405 -#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 -#define OBD_FAIL_OSC_SHUTDOWN 0x407 - -#define OBD_FAIL_PTLRPC 0x500 -#define OBD_FAIL_PTLRPC_ACK 0x501 -#define OBD_FAIL_PTLRPC_RQBD 0x502 -#define OBD_FAIL_PTLRPC_BULK_GET_NET 0x503 -#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 -#define OBD_FAIL_PTLRPC_DROP_RPC 0x505 -#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 - -#define OBD_FAIL_OBD_PING_NET 0x600 -#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 -#define OBD_FAIL_OBD_LOGD_NET 0x602 -#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 -#define OBD_FAIL_OBD_DQACQ 0x604 - -#define OBD_FAIL_TGT_REPLY_NET 0x700 -#define OBD_FAIL_TGT_CONN_RACE 0x701 -#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702 -#define OBD_FAIL_TGT_DELAY_CONNECT 0x703 -#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 - -#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 - - -#define OBD_FAIL_MGS 0x900 -#define OBD_FAIL_MGS_FIRST_CONNECT 0x901 -#define OBD_FAIL_MGS_CONNECT_NET 0x117 -#define OBD_FAIL_MGS_DISCONNECT_NET 0x11a -#define OBD_FAIL_MGS_ALL_REPLY_NET 0x122 -#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x123 - -/* preparation for a more advanced failure testbed (not functional yet) */ -#define OBD_FAIL_MASK_SYS 0x0000FF00 -#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) -#define OBD_FAIL_ONCE 0x80000000 -#define OBD_FAILED 0x40000000 -#define OBD_FAIL_MDS_ALL_NET 0x01000000 -#define OBD_FAIL_OST_ALL_NET 0x02000000 -#define OBD_FAIL_MGS_ALL_NET 0x01000000 - -#define OBD_FAIL_CHECK(id) (((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) && \ - ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \ - (OBD_FAILED | OBD_FAIL_ONCE))) - -#define OBD_FAIL_CHECK_ONCE(id) \ -({ int _ret_ = 0; \ - if (OBD_FAIL_CHECK(id)) { \ - CERROR("*** obd_fail_loc=%x ***\n", id); \ - obd_fail_loc |= OBD_FAILED; \ - if ((id) & OBD_FAIL_ONCE) \ - obd_fail_loc |= OBD_FAIL_ONCE; \ - _ret_ = 1; \ - } \ - _ret_; \ -}) - -#define OBD_FAIL_RETURN(id, ret) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - RETURN(ret); \ - } \ -} while(0) - -#define OBD_FAIL_TIMEOUT(id, secs) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - CERROR("obd_fail_timeout id %x sleeping for %d secs\n", \ - (id), (secs)); \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout((secs) * HZ); \ - set_current_state(TASK_RUNNING); \ - CERROR("obd_fail_timeout id %x awake\n", (id)); \ - } \ -} while(0) - /* Prefer the kernel's version, if it exports it, because it might be * optimized for this CPU. */ #if defined(__KERNEL__) && (defined(CONFIG_CRC32) || defined(CONFIG_CRC32_MODULE)) @@ -264,32 +69,9 @@ static inline __u32 crc32_le(__u32 crc, unsigned char const *p, size_t len) #endif #ifdef __KERNEL__ -/* The idea here is to synchronise two threads to force a race. The - * first thread that calls this with a matching fail_loc is put to - * sleep. The next thread that calls with the same fail_loc wakes up - * the first and continues. */ -#define OBD_RACE(id) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - CERROR("obd_race id %x sleeping\n", (id)); \ - interruptible_sleep_on(&obd_race_waitq); \ - CERROR("obd_fail_race id %x awake\n", (id)); \ - } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) { \ - wake_up(&obd_race_waitq); \ - } \ -} while(0) -#else -/* sigh. an expedient fix until OBD_RACE is fixed up */ -#define OBD_RACE(foo) do {} while(0) -#endif - -#define fixme() CDEBUG(D_OTHER, "FIXME\n"); - -#ifdef __KERNEL__ # include # include -# include +# include static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) { @@ -302,6 +84,10 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; } } + +#define OBD_SLEEP_ON(wq) interruptible_sleep_on(wq) + + #else /* !__KERNEL__ */ # define LTIME_S(time) (time) /* for obd_class.h */ @@ -310,155 +96,4 @@ static inline void OBD_FAIL_WRITE(int id, struct super_block *sb) # endif #endif /* __KERNEL__ */ -extern atomic_t libcfs_kmemory; - -#if defined(LUSTRE_UTILS) /* this version is for utils only */ -#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ -({ \ - typeof(ptr) __ptr; \ - __ptr = kmalloc(size, (gfp_mask)); \ - if (__ptr == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - } else { \ - memset(__ptr, 0, size); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n", \ - (int)(size), __ptr); \ - } \ - (ptr) = __ptr; \ -}) -#else /* this version is for the kernel and liblustre */ -#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ -({ \ - typeof(ptr) __ptr; \ - __ptr = kmalloc(size, (gfp_mask)); \ - if (__ptr == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ - } else { \ - memset(__ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), __ptr, atomic_read(&obd_memory)); \ - } \ - (ptr) = __ptr; \ -}) -#endif - -#ifndef OBD_GFP_MASK -# define OBD_GFP_MASK GFP_NOFS -#endif - -#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_GFP_MASK) -#define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, GFP_KERNEL) -#define OBD_ALLOC_PTR(ptr) OBD_ALLOC(ptr, sizeof *(ptr)) -#define OBD_ALLOC_PTR_WAIT(ptr) OBD_ALLOC_WAIT(ptr, sizeof *(ptr)) - -#ifdef __arch_um__ -# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size) -#else -# define OBD_VMALLOC(ptr, size) \ -do { \ - (ptr) = vmalloc(size); \ - if ((ptr) == NULL) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ -} while (0) -#endif - -#ifdef CONFIG_DEBUG_SLAB -#define POISON(ptr, c, s) do {} while (0) -#else -#define POISON(ptr, c, s) memset(ptr, c, s) -#endif - -#if POISON_BULK -#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE); \ - kunmap(page); } while (0) -#else -#define POISON_PAGE(page, val) do { } while (0) -#endif - -#ifdef __KERNEL__ -#define OBD_FREE(ptr, size) \ -do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ - kfree(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) -#else -#define OBD_FREE(ptr, size) ((void)(size), free((ptr))) -#endif - -#ifdef __arch_um__ -# define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) -#else -# define OBD_VFREE(ptr, size) \ -do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ - vfree(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) -#endif - -/* we memset() the slab object to 0 when allocation succeeds, so DO NOT - * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd - * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */ -#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ -do { \ - LASSERT(!in_interrupt()); \ - (ptr) = kmem_cache_alloc(slab, (type)); \ - if ((ptr) == NULL) { \ - CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ - atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ -} while (0) - -#define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr)) - -#define OBD_SLAB_FREE(ptr, slab, size) \ -do { \ - LASSERT(ptr); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - atomic_sub(size, &obd_memory); \ - POISON(ptr, 0x5a, size); \ - kmem_cache_free(slab, ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) - -#define KEY_IS(str) \ - (keylen == strlen(str) && memcmp(key, str, keylen) == 0) - #endif diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h new file mode 100644 index 0000000..c6b8005 --- /dev/null +++ b/lustre/include/lprocfs_status.h @@ -0,0 +1,376 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Top level header file for LProc SNMP + * Author: Hariharan Thantry thantry@users.sourceforge.net + */ +#ifndef _LPROCFS_SNMP_H +#define _LPROCFS_SNMP_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#undef LPROCFS +#if (defined(__KERNEL__) && defined(CONFIG_PROC_FS)) +# define LPROCFS +#endif + +struct lprocfs_vars { + const char *name; + cfs_read_proc_t *read_fptr; + cfs_write_proc_t *write_fptr; + void *data; +}; + +struct lprocfs_static_vars { + struct lprocfs_vars *module_vars; + struct lprocfs_vars *obd_vars; +}; + +/* An lprocfs counter can be configured using the enum bit masks below. + * + * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already + * protects this counter from concurrent updates. If not specified, + * lprocfs an internal per-counter lock variable. External locks are + * not used to protect counter increments, but are used to protect + * counter readout and resets. + * + * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples, + * (i.e. counter can be incremented by more than "1"). When specified, + * the counter maintains min, max and sum in addition to a simple + * invocation count. This allows averages to be be computed. + * If not specified, the counter is an increment-by-1 counter. + * min, max, sum, etc. are not maintained. + * + * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of + * squares (for multi-valued counter samples only). This allows + * external computation of standard deviation, but involves a 64-bit + * multiply per counter increment. + */ + +enum { + LPROCFS_CNTR_EXTERNALLOCK = 0x0001, + LPROCFS_CNTR_AVGMINMAX = 0x0002, + LPROCFS_CNTR_STDDEV = 0x0004, + + /* counter data type */ + LPROCFS_TYPE_REGS = 0x0100, + LPROCFS_TYPE_BYTES = 0x0200, + LPROCFS_TYPE_PAGES = 0x0400, + LPROCFS_TYPE_CYCLE = 0x0800, +}; + +struct lprocfs_atomic { + atomic_t la_entry; + atomic_t la_exit; +}; + +struct lprocfs_counter { + struct lprocfs_atomic lc_cntl; /* may need to move to per set */ + unsigned int lc_config; + __u64 lc_count; + __u64 lc_sum; + __u64 lc_min; + __u64 lc_max; + __u64 lc_sumsquare; + const char *lc_name; /* must be static */ + const char *lc_units; /* must be static */ +}; + +struct lprocfs_percpu { + struct lprocfs_counter lp_cntr[0]; +}; + + +struct lprocfs_stats { + unsigned int ls_num; /* # of counters */ + unsigned int ls_percpu_size; + struct lprocfs_percpu *ls_percpu[0]; +}; + + +/* class_obd.c */ +extern cfs_proc_dir_entry_t *proc_lustre_root; + +struct obd_device; +struct file; +struct obd_histogram; + +#ifdef LPROCFS + +/* Two optimized LPROCFS counter increment functions are provided: + * lprocfs_counter_incr(cntr, value) - optimized for by-one counters + * lprocfs_counter_add(cntr) - use for multi-valued counters + * Counter data layout allows config flag, counter lock and the + * count itself to reside within a single cache line. + */ + +static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, + long amount) +{ + struct lprocfs_counter *percpu_cntr; + + LASSERT(stats != NULL); + percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); + atomic_inc(&percpu_cntr->lc_cntl.la_entry); + percpu_cntr->lc_count++; + + if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) { + percpu_cntr->lc_sum += amount; + if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV) + percpu_cntr->lc_sumsquare += (__u64)amount * amount; + if (amount < percpu_cntr->lc_min) + percpu_cntr->lc_min = amount; + if (amount > percpu_cntr->lc_max) + percpu_cntr->lc_max = amount; + } + atomic_inc(&percpu_cntr->lc_cntl.la_exit); +} + +static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) +{ + struct lprocfs_counter *percpu_cntr; + + LASSERT(stats != NULL); + percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); + atomic_inc(&percpu_cntr->lc_cntl.la_entry); + percpu_cntr->lc_count++; + atomic_inc(&percpu_cntr->lc_cntl.la_exit); +} + +extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num); +extern void lprocfs_free_stats(struct lprocfs_stats *stats); +extern int lprocfs_alloc_obd_stats(struct obd_device *obddev, + unsigned int num_private_stats); +extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index, + unsigned conf, const char *name, + const char *units); +extern void lprocfs_free_obd_stats(struct obd_device *obddev); +extern int lprocfs_register_stats(cfs_proc_dir_entry_t *root, const char *name, + struct lprocfs_stats *stats); + +#define LPROCFS_INIT_VARS(name, vclass, vinstance) \ +void lprocfs_##name##_init_vars(struct lprocfs_static_vars *x) \ +{ \ + x->module_vars = vclass; \ + x->obd_vars = vinstance; \ +} \ + +#define lprocfs_init_vars(NAME, VAR) \ +do { \ + extern void lprocfs_##NAME##_init_vars(struct lprocfs_static_vars *); \ + lprocfs_##NAME##_init_vars(VAR); \ +} while (0) +/* lprocfs_status.c */ +extern int lprocfs_add_vars(cfs_proc_dir_entry_t *root, + struct lprocfs_vars *var, + void *data); + +extern cfs_proc_dir_entry_t *lprocfs_register(const char *name, + cfs_proc_dir_entry_t *parent, + struct lprocfs_vars *list, + void *data); + +extern void lprocfs_remove(cfs_proc_dir_entry_t *root); + +extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root, + const char *name); + +extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list); +extern int lprocfs_obd_cleanup(struct obd_device *obd); + +/* Generic callbacks */ + +extern int lprocfs_rd_u64(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_atomic(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_uuid(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_name(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_fstype(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_wr_evict_client(struct file *file, const char *buffer, + unsigned long count, void *data); +extern int lprocfs_wr_ping(struct file *file, const char *buffer, + unsigned long count, void *data); + +/* Statfs helpers */ +extern int lprocfs_rd_blksize(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_filestotal(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_rd_filegroups(char *page, char **start, off_t off, + int count, int *eof, void *data); + +extern int lprocfs_write_helper(const char *buffer, unsigned long count, + int *val); +extern int lprocfs_write_u64_helper(const char *buffer, unsigned long count, + __u64 *val); +int lprocfs_obd_seq_create(struct obd_device *dev, char *name, mode_t mode, + struct file_operations *seq_fops, void *data); +void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value); +void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value); +void lprocfs_oh_clear(struct obd_histogram *oh); +unsigned long lprocfs_oh_sum(struct obd_histogram *oh); + +/* lprocfs_status.c: counter read/write functions */ +extern int lprocfs_counter_read(char *page, char **start, off_t off, + int count, int *eof, void *data); +extern int lprocfs_counter_write(struct file *file, const char *buffer, + unsigned long count, void *data); + +/* lprocfs_status.c: recovery status */ +int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, + int count, int *eof, void *data); +#else +/* LPROCFS is not defined */ +static inline void lprocfs_counter_add(struct lprocfs_stats *stats, + int index, long amount) { return; } +static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, + int index) { return; } +static inline void lprocfs_counter_init(struct lprocfs_stats *stats, + int index, unsigned conf, + const char *name, const char *units) +{ return; } + +static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num) +{ return NULL; } +static inline void lprocfs_free_stats(struct lprocfs_stats *stats) +{ return; } + +static inline int lprocfs_register_stats(cfs_proc_dir_entry_t *root, + const char *name, + struct lprocfs_stats *stats) +{ return 0; } +static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev, + unsigned int num_private_stats) +{ return 0; } +static inline void lprocfs_free_obd_stats(struct obd_device *obddev) +{ return; } + +static inline cfs_proc_dir_entry_t * +lprocfs_register(const char *name, cfs_proc_dir_entry_t *parent, + struct lprocfs_vars *list, void *data) { return NULL; } +#define LPROCFS_INIT_VARS(name, vclass, vinstance) +#define lprocfs_init_vars(...) do {} while (0) +static inline int lprocfs_add_vars(cfs_proc_dir_entry_t *root, + struct lprocfs_vars *var, + void *data) { return 0; } +static inline void lprocfs_remove(cfs_proc_dir_entry_t *root) {}; +static inline cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *head, + const char *name) {return 0;} +static inline int lprocfs_obd_setup(struct obd_device *dev, + struct lprocfs_vars *list) { return 0; } +static inline int lprocfs_obd_cleanup(struct obd_device *dev) { return 0; } +static inline int lprocfs_rd_u64(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline int lprocfs_rd_uuid(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline int lprocfs_rd_name(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_rd_num_exports(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_wr_evict_client(struct file *file, const char *buffer, + unsigned long count, void *data) +{ return 0; } +static inline int lprocfs_wr_ping(struct file *file, const char *buffer, + unsigned long count, void *data) +{ return 0; } + + +/* Statfs helpers */ +static inline +int lprocfs_rd_blksize(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_rd_kbytesavail(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_rd_filestotal(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_rd_filesfree(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_rd_filegroups(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +void lprocfs_oh_tally(struct obd_histogram *oh, unsigned int value) {} +static inline +void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value) {} +static inline +void lprocfs_oh_clear(struct obd_histogram *oh) {} +static inline +unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; } +static inline +int lprocfs_counter_read(char *page, char **start, off_t off, + int count, int *eof, void *data) { return 0; } +static inline +int lprocfs_counter_write(struct file *file, const char *buffer, + unsigned long count, void *data) { return 0; } +#endif /* LPROCFS */ + +#endif /* LPROCFS_SNMP_H */ diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h new file mode 100644 index 0000000..7ae835c --- /dev/null +++ b/lustre/include/lu_object.h @@ -0,0 +1,762 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LUSTRE_LU_OBJECT_H +#define __LUSTRE_LU_OBJECT_H + +/* + * struct lu_fid + */ +#include + +#include +#include + +/* + * Layered objects support for CMD3/C5. + */ + + +struct seq_file; +struct proc_dir_entry; +struct lustre_cfg; + +/* + * lu_* data-types represent server-side entities shared by data and meta-data + * stacks. + * + * Design goals: + * + * 0. support for layering. + * + * Server side object is split into layers, one per device in the + * corresponding device stack. Individual layer is represented by struct + * lu_object. Compound layered object --- by struct lu_object_header. Most + * interface functions take lu_object as an argument and operate on the + * whole compound object. This decision was made due to the following + * reasons: + * + * - it's envisaged that lu_object will be used much more often than + * lu_object_header; + * + * - we want lower (non-top) layers to be able to initiate operations + * on the whole object. + * + * Generic code supports layering more complex than simple stacking, e.g., + * it is possible that at some layer object "spawns" multiple sub-objects + * on the lower layer. + * + * 1. fid-based identification. + * + * Compound object is uniquely identified by its fid. Objects are indexed + * by their fids (hash table is used for index). + * + * 2. caching and life-cycle management. + * + * Object's life-time is controlled by reference counting. When reference + * count drops to 0, object is returned to cache. Cached objects still + * retain their identity (i.e., fid), and can be recovered from cache. + * + * Objects are kept in the global LRU list, and lu_site_purge() function + * can be used to reclaim given number of unused objects from the tail of + * the LRU. + * + * 3. avoiding recursion. + * + * Generic code tries to replace recursion through layers by iterations + * where possible. Additionally to the end of reducing stack consumption, + * data, when practically possible, are allocated through lu_context_key + * interface rather than on stack. + * + */ + +struct lu_site; +struct lu_object; +struct lu_device; +struct lu_object_header; +struct lu_context; +/* + * Operations common for data and meta-data devices. + */ +struct lu_device_operations { + /* + * Object creation protocol. + * + * Due to design goal of avoiding recursion, object creation (see + * lu_object_alloc()) is somewhat involved: + * + * - first, ->ldo_object_alloc() method of the top-level device + * in the stack is called. It should allocate top level object + * (including lu_object_header), but without any lower-layer + * sub-object(s). + * + * - then lu_object_alloc() sets fid in the header of newly created + * object. + * + * - then ->loo_object_init() (a method from struct + * lu_object_operations) is called. It has to allocate lower-layer + * object(s). To do this, ->loo_object_init() calls + * ldo_object_alloc() of the lower-layer device(s). + * + * - for all new objects allocated by ->loo_object_init() (and + * inserted into object stack), ->loo_object_init() is called again + * repeatedly, until no new objects are created. + * + */ + + /* + * Allocate object for the given device (without lower-layer + * parts). This is called by ->loo_object_init() from the parent + * layer, and should setup at least ->lo_dev and ->lo_ops fields of + * resulting lu_object. + * + * postcondition: ergo(!IS_ERR(result), result->lo_dev == d && + * result->lo_ops != NULL); + */ + struct lu_object *(*ldo_object_alloc)(struct lu_context *ctx, + struct lu_device *d); + /* + * Dual to ->ldo_object_alloc(). Called when object is removed from + * memory. + */ + void (*ldo_object_free)(struct lu_context *ctx, struct lu_object *o); + + /* + * process config specific for device + */ + int (*ldo_process_config)(struct lu_context *ctx, + struct lu_device *, struct lustre_cfg *); +}; + +/* + * Operations specific for particular lu_object. + */ +struct lu_object_operations { + + /* + * Allocate lower-layer parts of the object by calling + * ->ldo_object_alloc() of the corresponding underlying device. + * + * This method is called once for each object inserted into object + * stack. It's responsibility of this method to insert lower-layer + * object(s) it create into appropriate places of object stack. + */ + int (*loo_object_init)(struct lu_context *ctx, struct lu_object *o); + /* + * Called before ->ldo_object_free() to signal that object is being + * destroyed. Dual to ->loo_object_init(). + */ + void (*loo_object_delete)(struct lu_context *ctx, struct lu_object *o); + + /* + * Called when last active reference to the object is released (and + * object returns to the cache). + */ + void (*loo_object_release)(struct lu_context *ctx, struct lu_object *o); + + /* + * Return true off object @o exists on a storage. + */ + int (*loo_object_exists)(struct lu_context *ctx, struct lu_object *o); + /* + * Debugging helper. Print given object. + */ + int (*loo_object_print)(struct lu_context *ctx, + struct seq_file *f, const struct lu_object *o); +}; + +/* + * Type of lu_device. + */ +struct lu_device_type; + +/* + * Device: a layer in the server side abstraction stacking. + */ +struct lu_device { + /* + * reference count. This is incremented, in particular, on each object + * created at this layer. + * + * XXX which means that atomic_t is probably too small. + */ + atomic_t ld_ref; + /* + * Pointer to device type. Never modified once set. + */ + struct lu_device_type *ld_type; + /* + * Operation vector for this device. + */ + struct lu_device_operations *ld_ops; + /* + * Stack this device belongs to. + */ + struct lu_site *ld_site; + struct proc_dir_entry *ld_proc_entry; + + /* XXX: temporary back pointer into obd. */ + struct obd_device *ld_obd; +}; + +struct lu_device_type_operations; + +/* + * Tag bits for device type. They are used to distinguish certain groups of + * device types. + */ +enum lu_device_tag { + /* this is meta-data device */ + LU_DEVICE_MD = (1 << 0), + /* this is data device */ + LU_DEVICE_DT = (1 << 1) +}; + +/* + * Type of device. + */ +struct lu_device_type { + /* + * Tag bits. Taken from enum lu_device_tag. Never modified once set. + */ + __u32 ldt_tags; + /* + * Name of this class. Unique system-wide. Never modified once set. + */ + char *ldt_name; + /* + * Operations for this type. + */ + struct lu_device_type_operations *ldt_ops; + /* + * XXX: temporary pointer to associated obd_type. + */ + struct obd_type *ldt_obd_type; +}; + +/* + * Operations on a device type. + */ +struct lu_device_type_operations { + /* + * Allocate new device. + */ + struct lu_device *(*ldto_device_alloc)(struct lu_context *ctx, + struct lu_device_type *t, + struct lustre_cfg *lcfg); + /* + * Free device. Dual to ->ldto_device_alloc(). + */ + void (*ldto_device_free)(struct lu_context *ctx, struct lu_device *d); + + /* + * Initialize the devices after allocation + */ + int (*ldto_device_init)(struct lu_context *ctx, + struct lu_device *, struct lu_device *); + /* + * Finalize device. Dual to ->ldto_device_init(). Returns pointer to + * the next device in the stack. + */ + struct lu_device *(*ldto_device_fini)(struct lu_context *ctx, + struct lu_device *); + + /* + * Initialize device type. This is called on module load. + */ + int (*ldto_init)(struct lu_device_type *t); + /* + * Finalize device type. Dual to ->ldto_init(). Called on module + * unload. + */ + void (*ldto_fini)(struct lu_device_type *t); +}; + +/* + * Flags for the object layers. + */ +enum lu_object_flags { + /* + * this flags is set if ->loo_object_init() has been called for this + * layer. Used by lu_object_alloc(). + */ + LU_OBJECT_ALLOCATED = (1 << 0) +}; + +/* + * Common object attributes. + */ +struct lu_attr { + __u64 la_size; /* size in bytes */ + __u64 la_mtime; /* modification time in seconds since Epoch */ + __u64 la_atime; /* access time in seconds since Epoch */ + __u64 la_ctime; /* change time in seconds since Epoch */ + __u64 la_blocks; /* 512-byte blocks allocated to object */ + __u32 la_mode; /* permission bits and file type */ + __u32 la_uid; /* owner id */ + __u32 la_gid; /* group id */ + __u32 la_flags; /* object flags */ + __u32 la_nlink; /* number of persistent references to this + * object */ +}; + + +/* + * Layer in the layered object. + */ +struct lu_object { + /* + * Header for this object. + */ + struct lu_object_header *lo_header; + /* + * Device for this layer. + */ + struct lu_device *lo_dev; + /* + * Operations for this object. + */ + struct lu_object_operations *lo_ops; + /* + * Linkage into list of all layers. + */ + struct list_head lo_linkage; + /* + * Depth. Top level layer depth is 0. + */ + int lo_depth; + /* + * Flags from enum lu_object_flags. + */ + unsigned long lo_flags; +}; + +enum lu_object_header_flags { + /* + * Don't keep this object in cache. Object will be destroyed as soon + * as last reference to it is released. This flag cannot be cleared + * once set. + */ + LU_OBJECT_HEARD_BANSHEE = 0, +}; + +/* + * "Compound" object, consisting of multiple layers. + * + * Compound object with given fid is unique with given lu_site. + * + * Note, that object does *not* necessary correspond to the real object in the + * persistent storage: object is an anchor for locking and method calling, so + * it is created for things like not-yet-existing child created by mkdir or + * create calls. ->loo_exists() can be used to check whether object is backed + * by persistent storage entity. + */ +struct lu_object_header { + /* + * Object flags from enum lu_object_header_flags. Set and checked + * atomically. + */ + unsigned long loh_flags; + /* + * Object reference count. Protected by site guard lock. + */ + int loh_ref; + /* + * Fid, uniquely identifying this object. + */ + struct lu_fid loh_fid; + /* + * Linkage into per-site hash table. Protected by site guard lock. + */ + struct hlist_node loh_hash; + /* + * Linkage into per-site LRU list. Protected by site guard lock. + */ + struct list_head loh_lru; + /* + * Linkage into list of layers. Never modified once set (except lately + * during object destruction). No locking is necessary. + */ + struct list_head loh_layers; +}; + +struct fld; +/* + * lu_site is a "compartment" within which objects are unique, and LRU + * discipline is maintained. + * + * lu_site exists so that multiple layered stacks can co-exist in the same + * address space. + * + * lu_site has the same relation to lu_device as lu_object_header to + * lu_object. + */ +struct lu_site { + /* + * lock protecting: + * + * - ->ls_hash hash table (and its linkages in objects); + * + * - ->ls_lru list (and its linkages in objects); + * + * - 0/1 transitions of object ->loh_ref reference count; + * + * yes, it's heavy. + */ + spinlock_t ls_guard; + /* + * Hash-table where objects are indexed by fid. + */ + struct hlist_head *ls_hash; + /* + * Bit-mask for hash-table size. + */ + int ls_hash_mask; + + + /* + * LRU list, updated on each access to object. Protected by + * ->ls_guard. + * + * "Cold" end of LRU is ->ls_lru.next. Accessed object are moved to + * the ->ls_lru.prev (this is due to the non-existence of + * list_for_each_entry_safe_reverse()). + */ + struct list_head ls_lru; + /* + * Total number of objects in this site. Protected by ->ls_guard. + */ + unsigned ls_total; + /* + * Total number of objects in this site with reference counter greater + * than 0. Protected by ->ls_guard. + */ + unsigned ls_busy; + + /* + * Top-level device for this stack. + */ + struct lu_device *ls_top_dev; + /* current server index */ + __u32 ls_node_id; + /* + * Fid location database + */ + struct fld *ls_fld; + + /* statistical counters. Protected by nothing, races are accepted. */ + struct { + __u32 s_created; + __u32 s_cache_hit; + __u32 s_cache_miss; + /* + * Number of hash-table entry checks made. + * + * ->s_cache_check / (->s_cache_miss + ->s_cache_hit) + * + * is an average number of hash slots inspected during single + * lookup. + */ + __u32 s_cache_check; + /* raced cache insertions */ + __u32 s_cache_race; + __u32 s_lru_purged; + } ls_stats; +}; + +/* + * Constructors/destructors. + */ + +/* + * Initialize site @s, with @d as the top level device. + */ +int lu_site_init(struct lu_site *s, struct lu_device *d); +/* + * Finalize @s and release its resources. + */ +void lu_site_fini(struct lu_site *s); + +/* + * Acquire additional reference on device @d + */ +void lu_device_get(struct lu_device *d); +/* + * Release reference on device @d. + */ +void lu_device_put(struct lu_device *d); + +/* + * Initialize device @d of type @t. + */ +int lu_device_init(struct lu_device *d, struct lu_device_type *t); +/* + * Finalize device @d. + */ +void lu_device_fini(struct lu_device *d); + +/* + * Initialize compound object. + */ +int lu_object_header_init(struct lu_object_header *h); +/* + * Finalize compound object. + */ +void lu_object_header_fini(struct lu_object_header *h); + +/* + * Initialize object @o that is part of compound object @h and was created by + * device @d. + */ +int lu_object_init(struct lu_object *o, + struct lu_object_header *h, struct lu_device *d); +/* + * Finalize object and release its resources. + */ +void lu_object_fini(struct lu_object *o); +/* + * Add object @o as first layer of compound object @h. + * + * This is typically called by the ->ldo_object_alloc() method of top-level + * device. + */ +void lu_object_add_top(struct lu_object_header *h, struct lu_object *o); +/* + * Add object @o as a layer of compound object, going after @before.1 + * + * This is typically called by the ->ldo_object_alloc() method of + * @before->lo_dev. + */ +void lu_object_add(struct lu_object *before, struct lu_object *o); + +/* + * Caching and reference counting. + */ + +/* + * Acquire additional reference to the given object. This function is used to + * attain additional reference. To acquire initial reference use + * lu_object_find(). + */ +static inline void lu_object_get(struct lu_object *o) +{ + LASSERT(o->lo_header->loh_ref > 0); + spin_lock(&o->lo_dev->ld_site->ls_guard); + o->lo_header->loh_ref ++; + spin_unlock(&o->lo_dev->ld_site->ls_guard); +} + +/* + * Return true of object will not be cached after last reference to it is + * released. + */ +static inline int lu_object_is_dying(struct lu_object_header *h) +{ + return test_bit(LU_OBJECT_HEARD_BANSHEE, &h->loh_flags); +} + +/* + * Decrease reference counter on object. If last reference is freed, return + * object to the cache, unless lu_object_is_dying(o) holds. In the latter + * case, free object immediately. + */ +void lu_object_put(struct lu_context *ctxt, struct lu_object *o); + +/* + * Free @nr objects from the cold end of the site LRU list. + */ +void lu_site_purge(struct lu_context *ctx, struct lu_site *s, int nr); + +/* + * Search cache for an object with the fid @f. If such object is found, return + * it. Otherwise, create new object, insert it into cache and return it. In + * any case, additional reference is acquired on the returned object. + */ +struct lu_object *lu_object_find(struct lu_context *ctxt, + struct lu_site *s, const struct lu_fid *f); + +/* + * Helpers. + */ + +/* + * First (topmost) sub-object of given compound object + */ +static inline struct lu_object *lu_object_top(struct lu_object_header *h) +{ + LASSERT(!list_empty(&h->loh_layers)); + return container_of0(h->loh_layers.next, struct lu_object, lo_linkage); +} + +/* + * Next sub-object in the layering + */ +static inline struct lu_object *lu_object_next(const struct lu_object *o) +{ + return container_of0(o->lo_linkage.next, struct lu_object, lo_linkage); +} + +/* + * Pointer to the fid of this object. + */ +static inline const struct lu_fid *lu_object_fid(const struct lu_object *o) +{ + return &o->lo_header->loh_fid; +} + +/* + * return device operations vector for this object + */ +static inline struct lu_device_operations * +lu_object_ops(const struct lu_object *o) +{ + return o->lo_dev->ld_ops; +} + +/* + * Given a compound object, find its slice, corresponding to the device type + * @dtype. + */ +struct lu_object *lu_object_locate(struct lu_object_header *h, + struct lu_device_type *dtype); + +/* + * Print human readable representation of the @o to the @f. + */ +int lu_object_print(struct lu_context *ctxt, + struct seq_file *f, const struct lu_object *o); + +/* + * Returns true iff object @o exists on the stable storage. + */ +static inline int lu_object_exists(struct lu_context *ctx, struct lu_object *o) +{ + return o->lo_ops->loo_object_exists(ctx, o); +} + +/* + * lu_context. Execution context for lu_object methods. Currently associated + * with thread. + * + * All lu_object methods, except device and device type methods (called during + * system initialization and shutdown) are executed "within" some + * lu_context. This means, that pointer to some "current" lu_context is passed + * as an argument to all methods. + * + * All service ptlrpc threads create lu_context as part of their + * initialization. It is possible to create "stand-alone" context for other + * execution environments (like system calls). + * + * lu_object methods mainly use lu_context through lu_context_key interface + * that allows each layer to associate arbitrary pieces of data with each + * context (see pthread_key_create(3) for similar interface). + * + */ +struct lu_context { + /* + * Theoretically we'd want to use lu_objects and lu_contexts on the + * client side too. On the other hand, we don't want to allocate + * values of server-side keys for the client contexts and vice versa. + * + * To achieve this, set of tags in introduced. Contexts and keys are + * marked with tags. Key value are created only for context whose set + * of tags has non-empty intersection with one for key. NOT YET + * IMPLEMENTED. + */ + __u32 lc_tags; + /* + * Pointer to the home service thread. NULL for other execution + * contexts. + */ + struct ptlrpc_thread *lc_thread; + /* + * Pointer to an array with key values. Internal implementation + * detail. + */ + void **lc_value; +}; + +/* + * lu_context_key interface. Similar to pthread_key. + */ + + +/* + * Key. Represents per-context value slot. + */ +struct lu_context_key { + /* + * Value constructor. This is called when new value is created for a + * context. Returns pointer to new value of error pointer. + */ + void *(*lct_init)(struct lu_context *ctx); + /* + * Value destructor. Called when context with previously allocated + * value of this slot is destroyed. @data is a value that was returned + * by a matching call to ->lct_init(). + */ + void (*lct_fini)(struct lu_context *ctx, void *data); + /* + * Internal implementation detail: index within ->lc_value[] reserved + * for this key. + */ + int lct_index; + /* + * Internal implementation detail: number of values created for this + * key. + */ + unsigned lct_used; +}; + +/* + * Register new key. + */ +int lu_context_key_register(struct lu_context_key *key); +/* + * Deregister key. + */ +void lu_context_key_degister(struct lu_context_key *key); +/* + * Return value associated with key @key in context @ctx. + */ +void *lu_context_key_get(struct lu_context *ctx, struct lu_context_key *key); + +/* + * Initialize context data-structure. Create values for all keys. + */ +int lu_context_init(struct lu_context *ctx); +/* + * Finalize context data-structure. Destroy key values. + */ +void lu_context_fini(struct lu_context *ctx); + +/* + * Called before entering context. + */ +void lu_context_enter(struct lu_context *ctx); +/* + * Called after exiting from @ctx + */ +void lu_context_exit(struct lu_context *ctx); + + +#endif /* __LUSTRE_LU_OBJECT_H */ diff --git a/lustre/include/lustre/Makefile.am b/lustre/include/lustre/Makefile.am index 1580bde..0acd90f 100644 --- a/lustre/include/lustre/Makefile.am +++ b/lustre/include/lustre/Makefile.am @@ -4,7 +4,7 @@ # See the file COPYING in this distribution if UTILS -pkginclude_HEADERS = lustre_user.h liblustreapi.h types.h +pkginclude_HEADERS = lustre_idl.h lustre_user.h liblustreapi.h types.h endif -EXTRA_DIST = lustre_user.h liblustreapi.h types.h +EXTRA_DIST = lustre_idl.h lustre_user.h liblustreapi.h types.h diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index 557c3ab..08f8786 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -23,7 +23,7 @@ extern int llapi_ping(char *obd_type, char *obd_name); extern int llapi_target_check(int num_types, char **obd_types, char *dir); extern int llapi_catinfo(char *dir, char *keyword, char *node_name); extern int llapi_lov_get_uuids(int fd, struct obd_uuid *uuidp, int *ost_count); -extern int llapi_is_lustre_mnttype(char *type); +extern int llapi_is_lustre_mnttype(struct mntent *mnt); extern int llapi_quotachown(char *path, int flag); extern int llapi_quotacheck(char *mnt, int check_type); extern int llapi_poll_quotacheck(char *mnt, struct if_quotacheck *qchk); diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/lustre/lustre_idl.h similarity index 90% rename from lustre/include/linux/lustre_idl.h rename to lustre/include/lustre/lustre_idl.h index 56ffc5e..7f591fe 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -33,38 +33,26 @@ #ifndef _LUSTRE_IDL_H_ #define _LUSTRE_IDL_H_ -#ifdef HAVE_ASM_TYPES_H -#include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include #else -#include -#endif - -#ifdef __KERNEL__ -# include -# include /* to check for FMODE_EXEC, dev_t, lest we redefine */ -# ifdef CONFIG_FS_POSIX_ACL -# include -# endif -#else -#ifdef __CYGWIN__ -# include -#elif defined(_AIX) -# include -#else -# include -#endif +#error Unsupported operating system. #endif #include /* for lnet_nid_t */ -/* Defn's shared with user-space. */ -#include - /* * CLASSERT() */ #include +/* Defn's shared with user-space. */ +#include + /* * this file contains all data structures used in Lustre interfaces: * - obdo and obd_request records @@ -83,19 +71,28 @@ #define CONNMGR_REQUEST_PORTAL 1 #define CONNMGR_REPLY_PORTAL 2 +//#define OSC_REQUEST_PORTAL 3 #define OSC_REPLY_PORTAL 4 +//#define OSC_BULK_PORTAL 5 #define OST_IO_PORTAL 6 #define OST_CREATE_PORTAL 7 #define OST_BULK_PORTAL 8 +//#define MDC_REQUEST_PORTAL 9 #define MDC_REPLY_PORTAL 10 +//#define MDC_BULK_PORTAL 11 #define MDS_REQUEST_PORTAL 12 +//#define MDS_REPLY_PORTAL 13 #define MDS_BULK_PORTAL 14 #define LDLM_CB_REQUEST_PORTAL 15 #define LDLM_CB_REPLY_PORTAL 16 #define LDLM_CANCEL_REQUEST_PORTAL 17 #define LDLM_CANCEL_REPLY_PORTAL 18 +//#define PTLBD_REQUEST_PORTAL 19 +//#define PTLBD_REPLY_PORTAL 20 +//#define PTLBD_BULK_PORTAL 21 #define MDS_SETATTR_PORTAL 22 #define MDS_READPAGE_PORTAL 23 + #define MGC_REPLY_PORTAL 25 #define MGS_REQUEST_PORTAL 26 #define MGS_REPLY_PORTAL 27 @@ -227,6 +224,7 @@ struct lmv_stripe_md { struct lu_fid mea_ids[0]; }; + struct lustre_handle { __u64 cookie; }; @@ -340,14 +338,16 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) #define OBD_CONNECT_TRANSNO 0x800ULL /* replay is sending initial transno */ #define OBD_CONNECT_IBITS 0x1000ULL /* support for inodebits locks */ #define OBD_CONNECT_JOIN 0x2000ULL /* files can be concatenated */ -#define OBD_CONNECT_REAL 0x4000ULL /* show MD stack that real connect is - * performed */ +#define OBD_CONNECT_REAL 0x4000ULL +#define OBD_CONNECT_NODEVOH 0x8000ULL /* No open handle for special nodes */ #define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/ + /* also update obd_connect_names[] for lprocfs_rd_connect_flags() */ #define MDS_CONNECT_SUPPORTED (OBD_CONNECT_RDONLY | OBD_CONNECT_VERSION | \ OBD_CONNECT_ACL | OBD_CONNECT_XATTR | \ - OBD_CONNECT_IBITS | OBD_CONNECT_JOIN) + OBD_CONNECT_IBITS | OBD_CONNECT_JOIN | \ + OBD_CONNECT_NODEVOH) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX) @@ -365,6 +365,26 @@ static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) #define OBD_OCD_VERSION_PATCH(version) ((int)((version)>>8)&255) #define OBD_OCD_VERSION_FIX(version) ((int)(version)&255) +/* This structure is used for both request and reply. + * + * If we eventually have separate connect data for different types, which we + * almost certainly will, then perhaps we stick a union in here. */ +struct obd_connect_data { + __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ + __u32 ocd_version; /* lustre release version number */ + __u32 ocd_grant; /* initial cache grant amount (bytes) */ + __u32 ocd_index; /* LOV index to connect to */ + __u32 ocd_unused; + __u64 ocd_ibits_known; /* inode bits this client understands */ + __u64 ocd_seq; /* sequence info for client */ + __u64 padding2; /* also fix lustre_swab_connect */ + __u64 padding3; /* also fix lustre_swab_connect */ + __u64 padding4; /* also fix lustre_swab_connect */ + __u64 padding5; /* also fix lustre_swab_connect */ +}; + +extern void lustre_swab_connect(struct obd_connect_data *ocd); + /* * OST requests: OBDO & OBD request records */ @@ -428,7 +448,7 @@ typedef uint32_t obd_count; #define OBD_FL_TRUNCLOCK (0x00000800) /* this should be not smaller than sizeof(struct lustre_handle) + sizeof(struct - * llog_cookie) + sizeof(lu_fid). Nevertheless struct lu_fid is not longer + * llog_cookie) + sizeof(ll_fid). Nevertheless struct ll_fid is not longer * stored in o_inline, we keep this just for case. */ #define OBD_INLINESZ 80 @@ -520,37 +540,39 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ struct lov_ost_data_v1 lmm_objects[0]; /* per-stripe data */ }; -#define OBD_MD_FLID (0x0000000000000001ULL) /* object ID */ -#define OBD_MD_FLATIME (0x0000000000000002ULL) /* access time */ -#define OBD_MD_FLMTIME (0x0000000000000004ULL) /* data modification time */ -#define OBD_MD_FLCTIME (0x0000000000000008ULL) /* change time */ -#define OBD_MD_FLSIZE (0x0000000000000010ULL) /* size */ -#define OBD_MD_FLBLOCKS (0x0000000000000020ULL) /* allocated blocks count */ -#define OBD_MD_FLBLKSZ (0x0000000000000040ULL) /* block size */ -#define OBD_MD_FLMODE (0x0000000000000080ULL) /* access bits (mode & ~S_IFMT) */ -#define OBD_MD_FLTYPE (0x0000000000000100ULL) /* object type (mode & S_IFMT) */ -#define OBD_MD_FLUID (0x0000000000000200ULL) /* user ID */ -#define OBD_MD_FLGID (0x0000000000000400ULL) /* group ID */ -#define OBD_MD_FLFLAGS (0x0000000000000800ULL) /* flags word */ -#define OBD_MD_FLNLINK (0x0000000000002000ULL) /* link count */ -#define OBD_MD_FLGENER (0x0000000000004000ULL) /* generation number */ -#define OBD_MD_FLINLINE (0x0000000000008000ULL) /* inline data */ -#define OBD_MD_FLRDEV (0x0000000000010000ULL) /* device number */ -#define OBD_MD_FLEASIZE (0x0000000000020000ULL) /* extended attribute data */ -#define OBD_MD_LINKNAME (0x0000000000040000ULL) /* symbolic link target */ -#define OBD_MD_FLHANDLE (0x0000000000080000ULL) /* file handle */ -#define OBD_MD_FLCKSUM (0x0000000000100000ULL) /* bulk data checksum */ -#define OBD_MD_FLQOS (0x0000000000200000ULL) /* quality of service stats */ -#define OBD_MD_FLOSCOPQ (0x0000000000400000ULL) /* osc opaque data */ -#define OBD_MD_FLCOOKIE (0x0000000000800000ULL) /* log cancellation cookie */ -#define OBD_MD_FLGROUP (0x0000000001000000ULL) /* group */ -#define OBD_MD_FLFID (0x0000000002000000ULL) /* ->ost write inline fid */ -#define OBD_MD_FLEPOCH (0x0000000004000000ULL) /* ->ost write easize is epoch */ -#define OBD_MD_FLGRANT (0x0000000008000000ULL) /* ost preallocation space grant */ -#define OBD_MD_FLDIREA (0x0000000010000000ULL) /* dir's extended attribute data */ -#define OBD_MD_FLUSRQUOTA (0x0000000020000000ULL) /* over quota flags sent from ost */ -#define OBD_MD_FLGRPQUOTA (0x0000000040000000ULL) /* over quota flags sent from ost */ -#define OBD_MD_FLMODEASIZE (0x0000000080000000ULL) /* EA size will be changed */ + +#define OBD_MD_FLID (0x00000001ULL) /* object ID */ +#define OBD_MD_FLATIME (0x00000002ULL) /* access time */ +#define OBD_MD_FLMTIME (0x00000004ULL) /* data modification time */ +#define OBD_MD_FLCTIME (0x00000008ULL) /* change time */ +#define OBD_MD_FLSIZE (0x00000010ULL) /* size */ +#define OBD_MD_FLBLOCKS (0x00000020ULL) /* allocated blocks count */ +#define OBD_MD_FLBLKSZ (0x00000040ULL) /* block size */ +#define OBD_MD_FLMODE (0x00000080ULL) /* access bits (mode & ~S_IFMT) */ +#define OBD_MD_FLTYPE (0x00000100ULL) /* object type (mode & S_IFMT) */ +#define OBD_MD_FLUID (0x00000200ULL) /* user ID */ +#define OBD_MD_FLGID (0x00000400ULL) /* group ID */ +#define OBD_MD_FLFLAGS (0x00000800ULL) /* flags word */ +#define OBD_MD_FLNLINK (0x00002000ULL) /* link count */ +#define OBD_MD_FLGENER (0x00004000ULL) /* generation number */ +#define OBD_MD_FLINLINE (0x00008000ULL) /* inline data */ +#define OBD_MD_FLRDEV (0x00010000ULL) /* device number */ +#define OBD_MD_FLEASIZE (0x00020000ULL) /* extended attribute data */ +#define OBD_MD_LINKNAME (0x00040000ULL) /* symbolic link target */ +#define OBD_MD_FLHANDLE (0x00080000ULL) /* file handle */ +#define OBD_MD_FLCKSUM (0x00100000ULL) /* bulk data checksum */ +#define OBD_MD_FLQOS (0x00200000ULL) /* quality of service stats */ +#define OBD_MD_FLOSCOPQ (0x00400000ULL) /* osc opaque data */ +#define OBD_MD_FLCOOKIE (0x00800000ULL) /* log cancellation cookie */ +#define OBD_MD_FLGROUP (0x01000000ULL) /* group */ +#define OBD_MD_FLFID (0x02000000ULL) /* ->ost write inline fid */ +#define OBD_MD_FLEPOCH (0x04000000ULL) /* ->ost write easize is epoch */ +#define OBD_MD_FLGRANT (0x08000000ULL) /* ost preallocation space grant */ +#define OBD_MD_FLDIREA (0x10000000ULL) /* dir's extended attribute data */ +#define OBD_MD_FLUSRQUOTA (0x20000000ULL) /* over quota flags sent from ost */ +#define OBD_MD_FLGRPQUOTA (0x40000000ULL) /* over quota flags sent from ost */ +#define OBD_MD_FLMODEASIZE (0x80000000ULL) /* EA size will be changed */ + #define OBD_MD_MDS (0x0000000100000000ULL) /* where an inode lives on */ #define OBD_MD_REINT (0x0000000200000000ULL) /* reintegrate oa */ #define OBD_MD_MEA (0x0000000400000000ULL) /* CMD EA */ @@ -713,7 +735,7 @@ typedef enum { // REINT_CLOSE = 7, // REINT_WRITE = 8, REINT_MAX -} mds_reint_t,mdt_reint_t; +} mds_reint_t, mdt_reint_t; /* the disposition of the intent outlines what was executed */ #define DISP_IT_EXECD 0x01 @@ -740,7 +762,6 @@ typedef enum { #define LUSTRE_CONFIG_METASEQ "metaseq" #define LUSTRE_CONFIG_TRANSNO "transno" -/* temporary stuff for compatibility */ struct ll_fid { __u64 id; /* holds object id */ __u32 generation; /* holds object generation */ @@ -749,27 +770,7 @@ struct ll_fid { * OST for saving into EA. */ }; - extern void lustre_swab_ll_fid (struct ll_fid *fid); -/* This structure is used for both request and reply. - * - * If we eventually have separate connect data for different types, which we - * almost certainly will, then perhaps we stick a union in here. */ -struct obd_connect_data { - __u64 ocd_connect_flags; /* OBD_CONNECT_* per above */ - __u32 ocd_version; /* lustre release version number */ - __u32 ocd_grant; /* initial cache grant amount (bytes) */ - __u32 ocd_index; /* LOV index to connect to */ - __u32 ocd_unused; - __u64 ocd_ibits_known; /* inode bits this client understands */ - __u64 ocd_seq; /* sequence info for client */ - __u64 padding2; /* also fix lustre_swab_connect */ - __u64 padding3; /* also fix lustre_swab_connect */ - __u64 padding4; /* also fix lustre_swab_connect */ - __u64 padding5; /* also fix lustre_swab_connect */ -}; - -extern void lustre_swab_connect(struct obd_connect_data *ocd); #define MDS_STATUS_CONN 1 #define MDS_STATUS_LOV 2 @@ -1103,6 +1104,29 @@ struct mdt_rec_rename { extern void lustre_swab_mdt_rec_rename (struct mdt_rec_rename *rn); +/* begin adding MDT by huanghua@clusterfs.com */ +struct lmv_desc { + __u32 ld_tgt_count; /* how many MDS's */ + __u32 ld_active_tgt_count; /* how many active */ + struct obd_uuid ld_uuid; +}; + +extern void lustre_swab_lmv_desc (struct lmv_desc *ld); +/* end adding MDT by huanghua@clusterfs.com */ + +struct md_fld { + __u64 mf_seq; + __u64 mf_mds; +}; + +extern void lustre_swab_md_fld (struct md_fld *mf); + +enum fld_rpc_opc { + FLD_QUERY = 600, + FLD_LAST_OPC, + FLD_FIRST_OPC = FLD_QUERY +}; + /* * LOV data structures */ @@ -1124,30 +1148,15 @@ struct lov_desc { __u32 ld_pattern; /* PATTERN_RAID0, PATTERN_RAID1 */ __u64 ld_default_stripe_size; /* in bytes */ __u64 ld_default_stripe_offset; /* in bytes */ + __u32 ld_qos_threshold; /* in MB */ + __u32 ld_qos_maxage; /* in second */ __u32 ld_padding_1; /* also fix lustre_swab_lov_desc */ __u32 ld_padding_2; /* also fix lustre_swab_lov_desc */ - __u32 ld_padding_3; /* also fix lustre_swab_lov_desc */ - __u32 ld_padding_4; /* also fix lustre_swab_lov_desc */ struct obd_uuid ld_uuid; }; #define ld_magic ld_active_tgt_count /* for swabbing from llogs */ -/*begin adding MDT by huanghua@clusterfs.com*/ -struct lmv_desc { - __u32 ld_tgt_count; /* how many MDS's */ - __u32 ld_active_tgt_count; /* how many active */ - struct obd_uuid ld_uuid; -}; - -extern void lustre_swab_lmv_desc (struct lmv_desc *ld); -/*end adding MDT by huanghua@clusterfs.com*/ - -struct md_fld { - __u64 mf_seq; - __u64 mf_mds; -}; -extern void lustre_swab_md_fld (struct md_fld *mf); extern void lustre_swab_lov_desc (struct lov_desc *ld); /* @@ -1336,6 +1345,13 @@ typedef enum { /* catalog of log objects */ +/* Identifier for a single log object */ +struct llog_logid { + __u64 lgl_oid; + __u64 lgl_ogr; + __u32 lgl_ogen; +} __attribute__((packed)); + /* Records written to the CATALOGS list */ #define CATLIST "CATALOGS" struct llog_catid { @@ -1345,7 +1361,6 @@ struct llog_catid { __u32 lci_padding3; } __attribute__((packed)); - /*join file lov mds md*/ struct lov_mds_md_join { struct lov_mds_md lmmj_md; @@ -1532,12 +1547,6 @@ enum llogd_rpc_ops { LLOG_LAST_OPC }; -enum fld_rpc_opc { - FLD_QUERY = 600, - FLD_LAST_OPC, - FLD_FIRST_OPC = FLD_QUERY -}; - struct llogd_body { struct llog_logid lgd_logid; __u32 lgd_ctxt_idx; @@ -1554,6 +1563,29 @@ struct llogd_conn_body { __u32 lgdc_ctxt_idx; } __attribute__((packed)); +struct lov_user_ost_data_join { /* per-stripe data structure */ + __u64 l_extent_start; /* extent start*/ + __u64 l_extent_end; /* extent end*/ + __u64 l_object_id; /* OST object ID */ + __u64 l_object_gr; /* OST object group (creating MDS number) */ + __u32 l_ost_gen; /* generation of this OST index */ + __u32 l_ost_idx; /* OST index in LOV */ +} __attribute__((packed)); + +struct lov_user_md_join { /* LOV EA user data (host-endian) */ + __u32 lmm_magic; /* magic number = LOV_MAGIC_JOIN */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + __u64 lmm_object_id; /* LOV object ID */ + __u64 lmm_object_gr; /* LOV object group */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + __u32 lmm_stripe_count; /* num stripes in use for this object */ + __u32 lmm_extent_count; /* extent count of lmm*/ + __u64 lmm_tree_id; /* mds tree object id */ + __u64 lmm_tree_gen; /* mds tree object gen */ + struct llog_logid lmm_array_id; /* mds extent desc llog object id */ + struct lov_user_ost_data_join lmm_objects[0]; /* per-stripe data */ +} __attribute__((packed)); + extern void lustre_swab_lov_user_md(struct lov_user_md *lum); extern void lustre_swab_lov_user_md_objects(struct lov_user_md *lum); extern void lustre_swab_lov_user_md_join(struct lov_user_md_join *lumj); diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 8df4aea..4afa860 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -9,28 +9,14 @@ #ifndef _LUSTRE_USER_H #define _LUSTRE_USER_H -#ifdef HAVE_ASM_TYPES_H -#include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include #else -#include -#endif - -#ifdef HAVE_QUOTA_SUPPORT -#include -#endif - -/* - * asm-x86_64/processor.h on some SLES 9 distros seems to use - * kernel-only typedefs. fortunately skipping it altogether is ok - * (for now). - */ -#define __ASM_X86_64_PROCESSOR_H - -#ifdef __KERNEL__ -#include -#else -#include -#include +#error Unsupported operating system. #endif /* for statfs() */ @@ -66,6 +52,7 @@ struct obd_statfs; #define LL_STATFS_LOV 2 #define IOC_MDC_TYPE 'i' +#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) #define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) #define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_mds_data *) @@ -105,15 +92,6 @@ struct lov_user_md_v1 { /* LOV EA user data (host-endian) */ struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ } __attribute__((packed)); -#if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \ - defined(__craynv) -typedef struct stat lstat_t; -#define HAVE_LOV_USER_MDS_DATA -#elif defined(__USE_LARGEFILE64) || defined(__KERNEL__) -typedef struct stat64 lstat_t; -#define HAVE_LOV_USER_MDS_DATA -#endif - /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to * use this. It is unsafe to #define those values in this header as it * is possible the application has already #included . */ @@ -125,37 +103,6 @@ struct lov_user_mds_data_v1 { } __attribute__((packed)); #endif -struct lov_user_ost_data_join { /* per-stripe data structure */ - __u64 l_extent_start; /* extent start*/ - __u64 l_extent_end; /* extent end*/ - __u64 l_object_id; /* OST object ID */ - __u64 l_object_gr; /* OST object group (creating MDS number) */ - __u32 l_ost_gen; /* generation of this OST index */ - __u32 l_ost_idx; /* OST index in LOV */ -} __attribute__((packed)); - -/* Identifier for a single log object */ -struct llog_logid { - __u64 lgl_oid; - __u64 lgl_ogr; - __u32 lgl_ogen; -} __attribute__((packed)); - -struct lov_user_md_join { /* LOV EA user data (host-endian) */ - __u32 lmm_magic; /* magic number = LOV_MAGIC_JOIN */ - __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ - __u64 lmm_object_id; /* LOV object ID */ - __u64 lmm_object_gr; /* LOV object group */ - __u32 lmm_stripe_size; /* size of stripe in bytes */ - __u32 lmm_stripe_count; /* num stripes in use for this object */ - __u32 lmm_extent_count; /* extent count of lmm*/ - __u64 lmm_tree_id; /* mds tree object id */ - __u64 lmm_tree_gen; /* mds tree object gen */ - struct llog_logid lmm_array_id; /* mds extent desc llog object id */ - struct lov_user_ost_data_join lmm_objects[0]; /* per-stripe data */ -} __attribute__((packed)); - - struct ll_recreate_obj { __u64 lrc_id; __u32 lrc_ost_idx; @@ -181,7 +128,8 @@ static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp) uuid->uuid[sizeof(*uuid) - 1] = '\0'; } -static inline char *obd_uuid2str(struct obd_uuid *uuid) +/* For printf's only, make sure uuid is terminated */ +static inline char *obd_uuid2str(struct obd_uuid *uuid) { if (uuid->uuid[sizeof(*uuid) - 1] != '\0') { /* Obviously not safe, but for printfs, no real harm done...*/ @@ -220,16 +168,6 @@ struct mds_grp_downcall_data { __u32 mgd_groups[0]; }; - -#ifndef __KERNEL__ -#define NEED_QUOTA_DEFS -#else -# include -# if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) -# define NEED_QUOTA_DEFS -# endif -#endif - #ifdef NEED_QUOTA_DEFS #ifndef QUOTABLOCK_BITS #define QUOTABLOCK_BITS 10 @@ -291,29 +229,6 @@ struct if_quotactl { struct obd_uuid obd_uuid; }; -#ifndef LPU64 -/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */ -#if defined(__x86_64__) && defined(__KERNEL__) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPSZ "%u" -# define LPSSZ "%d" -#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%lu" -# define LPD64 "%ld" -# define LPX64 "%#lx" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#endif -#endif /* !LPU64 */ - #ifndef offsetof # define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) #endif diff --git a/lustre/include/lustre/types.h b/lustre/include/lustre/types.h index 5389d37..a5da5923 100644 --- a/lustre/include/lustre/types.h +++ b/lustre/include/lustre/types.h @@ -1,27 +1,14 @@ #ifndef _LUSTRE_TYPES_H #define _LUSTRE_TYPES_H -typedef unsigned short umode_t; - -#if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \ - !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H)) - -/* - * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the - * header files exported to user space - */ - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long long __s64; -typedef unsigned long long __u64; +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. #endif #endif diff --git a/lustre/include/linux/lustre_cfg.h b/lustre/include/lustre_cfg.h similarity index 99% rename from lustre/include/linux/lustre_cfg.h rename to lustre/include/lustre_cfg.h index 073ac8f..a04245a 100644 --- a/lustre/include/linux/lustre_cfg.h +++ b/lustre/include/lustre_cfg.h @@ -184,7 +184,7 @@ static inline int lustre_cfg_len(uint32_t bufcount, uint32_t *buflens) } -#include +#include static inline struct lustre_cfg *lustre_cfg_new(int cmd, struct lustre_cfg_bufs *bufs) diff --git a/lustre/include/linux/lustre_commit_confd.h b/lustre/include/lustre_commit_confd.h similarity index 96% rename from lustre/include/linux/lustre_commit_confd.h rename to lustre/include/lustre_commit_confd.h index fa1cb35..40b1978 100644 --- a/lustre/include/linux/lustre_commit_confd.h +++ b/lustre/include/lustre_commit_confd.h @@ -9,7 +9,7 @@ #ifndef _LUSTRE_COMMIT_CONFD_H #define _LUSTRE_COMMIT_CONFD_H -#include +#include struct llog_canceld_ctxt { struct list_head llcd_list; /* free or pending struct list */ @@ -29,7 +29,7 @@ struct llog_commit_master { int lcm_thread_max; /* <= num_osts normally */ int lcm_flags; - wait_queue_head_t lcm_waitq; + cfs_waitq_t lcm_waitq; struct list_head lcm_llcd_pending; /* llog_canceld_ctxt to send */ struct list_head lcm_llcd_resend; /* try to resend this data */ diff --git a/lustre/include/lustre_debug.h b/lustre/include/lustre_debug.h new file mode 100644 index 0000000..c6bd7ba --- /dev/null +++ b/lustre/include/lustre_debug.h @@ -0,0 +1,64 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _LUSTRE_DEBUG_H +#define _LUSTRE_DEBUG_H + +#include + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#define ASSERT_MAX_SIZE_MB 60000ULL +#define ASSERT_PAGE_INDEX(index, OP) \ +do { if (index > ASSERT_MAX_SIZE_MB << (20 - CFS_PAGE_SHIFT)) { \ + CERROR("bad page index %lu > %Lu\n", index, \ + ASSERT_MAX_SIZE_MB << (20 - CFS_PAGE_SHIFT)); \ + libcfs_debug = ~0UL; \ + OP; \ +}} while(0) + +#define ASSERT_FILE_OFFSET(offset, OP) \ +do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \ + CERROR("bad file offset %Lu > %Lu\n", offset, \ + ASSERT_MAX_SIZE_MB << 20); \ + libcfs_debug = ~0UL; \ + OP; \ +}} while(0) + +/* lib/debug.c */ +int dump_lniobuf(struct niobuf_local *lnb); +int dump_rniobuf(struct niobuf_remote *rnb); +int dump_ioo(struct obd_ioobj *nb); +int dump_req(struct ptlrpc_request *req); +int dump_obdo(struct obdo *oa); +void dump_lsm(int level, struct lov_stripe_md *lsm); +int block_debug_setup(void *addr, int len, __u64 off, __u64 id); +int block_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); +#endif diff --git a/lustre/include/linux/lustre_disk.h b/lustre/include/lustre_disk.h similarity index 94% rename from lustre/include/linux/lustre_disk.h rename to lustre/include/lustre_disk.h index 820f1eb..9e0bd03 100644 --- a/lustre/include/linux/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -51,13 +51,13 @@ enum ldd_mount_type { - LDD_MT_EXT3 = 0, + LDD_MT_EXT3 = 0, LDD_MT_LDISKFS, - LDD_MT_SMFS, + LDD_MT_SMFS, LDD_MT_REISERFS, LDD_MT_LAST }; - + static inline char *mt_str(enum ldd_mount_type mt) { static char *mount_type_string[] = { @@ -85,16 +85,16 @@ struct lustre_disk_data { __u32 ldd_feature_compat; /* compatible feature flags */ __u32 ldd_feature_rocompat;/* read-only compatible feature flags */ __u32 ldd_feature_incompat;/* incompatible feature flags */ - + __u32 ldd_config_ver; /* config rewrite count - not used */ __u32 ldd_flags; /* LDD_SV_TYPE */ - __u32 ldd_svindex; /* server index (0001), must match + __u32 ldd_svindex; /* server index (0001), must match svname */ __u32 ldd_mount_type; /* target fs type LDD_MT_* */ char ldd_fsname[64]; /* filesystem this server is part of */ char ldd_svname[64]; /* this server's name (lustre-mdt0001)*/ __u8 ldd_uuid[40]; /* server UUID (COMPAT_146) */ - + /*200*/ __u8 ldd_padding[4096 - 200]; /*4096*/char ldd_mount_opts[4096]; /* target fs mount opts */ /*8192*/char ldd_params[4096]; /* key=value pairs */ @@ -111,7 +111,7 @@ static inline int server_make_name(__u32 flags, __u16 index, char *fs, { if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) { sprintf(name, "%.8s-%s%04x", fs, - (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", + (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST", index); } else if (flags & LDD_F_SV_TYPE_MGS) { sprintf(name, "MGS"); @@ -128,7 +128,7 @@ int server_name2index(char *svname, __u32 *idx, char **endptr); /****************** mount command *********************/ -/* The lmd is only used internally by Lustre; mount simply passes +/* The lmd is only used internally by Lustre; mount simply passes everything as string options */ #define LMD_MAGIC 0xbdacbd03 @@ -141,17 +141,17 @@ struct lustre_mount_data { int lmd_exclude_count; char *lmd_dev; /* device name */ char *lmd_profile; /* client only */ - char *lmd_opts; /* lustre mount options (as opposed to + char *lmd_opts; /* lustre mount options (as opposed to _device_ mount options) */ __u32 *lmd_exclude; /* array of OSTs to ignore */ }; #define LMD_FLG_CLIENT 0x0002 /* Mounting a client only */ #define LMD_FLG_RECOVER 0x0004 /* Allow recovery */ -#define LMD_FLG_NOSVC 0x0008 /* Only start MGS/MGC for servers, +#define LMD_FLG_NOSVC 0x0008 /* Only start MGS/MGC for servers, no other services */ -#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) +#define lmd_is_client(x) ((x)->lmd_flags & LMD_FLG_CLIENT) /****************** mkfs command *********************/ @@ -167,17 +167,21 @@ struct mkfs_opts { char mo_loopdev[128]; /* in case a loop dev is needed */ __u64 mo_device_sz; /* in KB */ int mo_stripe_count; - int mo_flags; + int mo_flags; int mo_mgs_failnodes; }; +/****************** on-disk files *********************/ + +#define LAST_RCVD "last_rcvd" +#define LOV_OBJID "lov_objid" +#define HEALTH_CHECK "health_check" + /****************** last_rcvd file *********************/ -#define LAST_RCVD "last_rcvd" -#define LOV_OBJID "lov_objid" -#define LR_SERVER_SIZE 512 -#define LR_CLIENT_START 8192 -#define LR_CLIENT_SIZE 128 +#define LR_SERVER_SIZE 512 +#define LR_CLIENT_START 8192 +#define LR_CLIENT_SIZE 128 #if LR_CLIENT_START < LR_SERVER_SIZE #error "Can't have LR_CLIENT_START < LR_SERVER_SIZE" #endif @@ -185,6 +189,7 @@ struct mkfs_opts { * 2^n * PAGE_SIZE * 8 for the number of bits that fit an order-n allocation. */ #define LR_MAX_CLIENTS (PAGE_SIZE * 8) + /* COMPAT_146 */ #define OBD_COMPAT_OST 0x00000002 /* this is an OST (temporary) */ #define OBD_COMPAT_MDT 0x00000004 /* this is an MDT (temporary) */ @@ -279,18 +284,19 @@ struct lustre_mount_info { /****************** prototypes *********************/ #ifdef __KERNEL__ -#include +#include /* obd_mount.c */ void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb)); int lustre_common_put_super(struct super_block *sb); -int lustre_process_log(struct super_block *sb, char *logname, +int lustre_process_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); -int lustre_end_log(struct super_block *sb, char *logname, +int lustre_end_log(struct super_block *sb, char *logname, struct config_llog_instance *cfg); struct lustre_mount_info *server_get_mount(const char *name); int server_put_mount(const char *name, struct vfsmount *mnt); int server_register_target(struct super_block *sb); +struct mgs_target_info; int server_mti_print(char *title, struct mgs_target_info *mti); /* mgc_request.c */ diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h new file mode 100644 index 0000000..4b74c90 --- /dev/null +++ b/lustre/include/lustre_dlm.h @@ -0,0 +1,602 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * (visit-tags-table FILE) + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef _LUSTRE_DLM_H__ +#define _LUSTRE_DLM_H__ + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include +#include +#include +#include +#include /* for obd_export, for LDLM_DEBUG */ + +struct obd_ops; +struct obd_device; + +#define OBD_LDLM_DEVICENAME "ldlm" + +#define LDLM_DEFAULT_LRU_SIZE (100 * smp_num_cpus) + +typedef enum { + ELDLM_OK = 0, + + ELDLM_LOCK_CHANGED = 300, + ELDLM_LOCK_ABORTED = 301, + ELDLM_LOCK_REPLACED = 302, + ELDLM_NO_LOCK_DATA = 303, + + ELDLM_NAMESPACE_EXISTS = 400, + ELDLM_BAD_NAMESPACE = 401 +} ldlm_error_t; + +#define LDLM_NAMESPACE_SERVER 0 +#define LDLM_NAMESPACE_CLIENT 1 + +#define LDLM_FL_LOCK_CHANGED 0x000001 /* extent, mode, or resource changed */ + +/* If the server returns one of these flags, then the lock was put on that list. + * If the client sends one of these flags (during recovery ONLY!), it wants the + * lock added to the specified list, no questions asked. -p */ +#define LDLM_FL_BLOCK_GRANTED 0x000002 +#define LDLM_FL_BLOCK_CONV 0x000004 +#define LDLM_FL_BLOCK_WAIT 0x000008 + +#define LDLM_FL_CBPENDING 0x000010 /* this lock is being destroyed */ +#define LDLM_FL_AST_SENT 0x000020 /* blocking or cancel packet was sent */ +#define LDLM_FL_WAIT_NOREPROC 0x000040 /* not a real flag, not saved in lock */ +#define LDLM_FL_CANCEL 0x000080 /* cancellation callback already run */ + +/* Lock is being replayed. This could probably be implied by the fact that one + * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. */ +#define LDLM_FL_REPLAY 0x000100 + +#define LDLM_FL_INTENT_ONLY 0x000200 /* don't grant lock, just do intent */ +#define LDLM_FL_LOCAL_ONLY 0x000400 /* see ldlm_cli_cancel_unused */ + +/* don't run the cancel callback under ldlm_cli_cancel_unused */ +#define LDLM_FL_FAILED 0x000800 + +#define LDLM_FL_HAS_INTENT 0x001000 /* lock request has intent */ +#define LDLM_FL_CANCELING 0x002000 /* lock cancel has already been sent */ +#define LDLM_FL_LOCAL 0x004000 /* local lock (ie, no srv/cli split) */ +#define LDLM_FL_WARN 0x008000 /* see ldlm_cli_cancel_unused */ +#define LDLM_FL_DISCARD_DATA 0x010000 /* discard (no writeback) on cancel */ + +#define LDLM_FL_NO_TIMEOUT 0x020000 /* Blocked by group lock - wait + * indefinitely */ + +/* file & record locking */ +#define LDLM_FL_BLOCK_NOWAIT 0x040000 // server told not to wait if blocked +#define LDLM_FL_TEST_LOCK 0x080000 // return blocking lock + +/* XXX FIXME: This is being added to b_size as a low-risk fix to the fact that + * the LVB filling happens _after_ the lock has been granted, so another thread + * can match before the LVB has been updated. As a dirty hack, we set + * LDLM_FL_CAN_MATCH only after we've done the LVB poop. + * + * The proper fix is to do the granting inside of the completion AST, which can + * be replaced with a LVB-aware wrapping function for OSC locks. That change is + * pretty high-risk, though, and would need a lot more testing. */ +#define LDLM_FL_CAN_MATCH 0x100000 + +/* A lock contributes to the kms calculation until it has finished the part + * of it's cancelation that performs write back on its dirty pages. It + * can remain on the granted list during this whole time. Threads racing + * to update the kms after performing their writeback need to know to + * exclude each others locks from the calculation as they walk the granted + * list. */ +#define LDLM_FL_KMS_IGNORE 0x200000 + +/* Don't drop lock covering mmapped file in LRU */ +#define LDLM_FL_NO_LRU 0x400000 + +/* Immediatelly cancel such locks when they block some other locks. Send + cancel notification to original lock holder, but expect no reply. */ +#define LDLM_FL_CANCEL_ON_BLOCK 0x800000 + +/* Flags flags inherited from parent lock when doing intents. */ +#define LDLM_INHERIT_FLAGS (LDLM_FL_CANCEL_ON_BLOCK) + +/* These are flags that are mapped into the flags and ASTs of blocking locks */ +#define LDLM_AST_DISCARD_DATA 0x80000000 /* Add FL_DISCARD to blocking ASTs */ +/* Flags sent in AST lock_flags to be mapped into the receiving lock. */ +#define LDLM_AST_FLAGS (LDLM_FL_DISCARD_DATA) + +/* The blocking callback is overloaded to perform two functions. These flags + * indicate which operation should be performed. */ +#define LDLM_CB_BLOCKING 1 +#define LDLM_CB_CANCELING 2 + +/* compatibility matrix */ +#define LCK_COMPAT_EX LCK_NL +#define LCK_COMPAT_PW (LCK_COMPAT_EX | LCK_CR) +#define LCK_COMPAT_PR (LCK_COMPAT_PW | LCK_PR) +#define LCK_COMPAT_CW (LCK_COMPAT_PW | LCK_CW) +#define LCK_COMPAT_CR (LCK_COMPAT_CW | LCK_PR | LCK_PW) +#define LCK_COMPAT_NL (LCK_COMPAT_CR | LCK_EX) +#define LCK_COMPAT_GROUP (LCK_GROUP | LCK_NL) + +extern ldlm_mode_t lck_compat_array[]; + +static inline void lockmode_verify(ldlm_mode_t mode) +{ + LASSERT(mode > LCK_MINMODE && mode < LCK_MAXMODE); +} + +static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new) +{ + return (lck_compat_array[exist] & new); +} + +/* + * + * cluster name spaces + * + */ + +#define DLM_OST_NAMESPACE 1 +#define DLM_MDS_NAMESPACE 2 + +/* XXX + - do we just separate this by security domains and use a prefix for + multiple namespaces in the same domain? + - +*/ + +struct ldlm_lock; +struct ldlm_resource; +struct ldlm_namespace; + +typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, + void *req_cookie, ldlm_mode_t mode, int flags, + void *data); + +struct ldlm_valblock_ops { + int (*lvbo_init)(struct ldlm_resource *res); + int (*lvbo_update)(struct ldlm_resource *res, struct lustre_msg *m, + int buf_idx, int increase); +}; + +struct ldlm_namespace { + char *ns_name; + __u32 ns_client; /* is this a client-side lock tree? */ + struct list_head *ns_hash; /* hash table for ns */ + cfs_waitq_t ns_refcount_waitq; /* for cleanup */ + atomic_t ns_refcount; /* count of resources in the hash */ + struct list_head ns_root_list; /* all root resources in ns */ + struct lustre_lock ns_lock; /* protects hash, refcount, list */ + struct list_head ns_list_chain; /* position in global NS list */ + + struct list_head ns_unused_list; /* all root resources in ns */ + int ns_nr_unused; + unsigned int ns_max_unused; + cfs_time_t ns_next_dump; /* next debug dump, jiffies */ + + spinlock_t ns_counter_lock; + __u64 ns_locks; + ldlm_res_policy ns_policy; + struct ldlm_valblock_ops *ns_lvbo; + void *ns_lvbp; +}; + +/* + * + * Resource hash table + * + */ + +#define RES_HASH_BITS 10 +#define RES_HASH_SIZE (1UL << RES_HASH_BITS) +#define RES_HASH_MASK (RES_HASH_SIZE - 1) + +struct ldlm_lock; + +typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock, + struct ldlm_lock_desc *new, void *data, + int flag); +typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags, + void *data); +typedef int (*ldlm_glimpse_callback)(struct ldlm_lock *lock, void *data); + +struct ldlm_lock { + struct portals_handle l_handle; // must be first in the structure + atomic_t l_refc; + struct ldlm_resource *l_resource; + struct ldlm_lock *l_parent; + struct list_head l_children; + struct list_head l_childof; + struct list_head l_lru; + struct list_head l_res_link; // position in one of three res lists + struct list_head l_export_chain; // per-export chain of locks + + ldlm_mode_t l_req_mode; + ldlm_mode_t l_granted_mode; + + ldlm_completion_callback l_completion_ast; + ldlm_blocking_callback l_blocking_ast; + ldlm_glimpse_callback l_glimpse_ast; + + struct obd_export *l_export; + struct obd_export *l_conn_export; + __u32 l_flags; + struct lustre_handle l_remote_handle; + ldlm_policy_data_t l_policy_data; + + __u32 l_readers; + __u32 l_writers; + __u8 l_destroyed; + + /* If the lock is granted, a process sleeps on this waitq to learn when + * it's no longer in use. If the lock is not granted, a process sleeps + * on this waitq to learn when it becomes granted. */ + cfs_waitq_t l_waitq; + struct timeval l_enqueued_time; + + cfs_time_t l_last_used; /* jiffies */ + struct ldlm_extent l_req_extent; + + /* Client-side-only members */ + __u32 l_lvb_len; /* temporary storage for */ + void *l_lvb_data; /* an LVB received during */ + void *l_lvb_swabber; /* an enqueue */ + void *l_ast_data; + + /* Server-side-only members */ + struct list_head l_pending_chain; /* callbacks pending */ + cfs_time_t l_callback_timeout; /* jiffies */ + + __u32 l_pid; /* pid which created this lock */ +}; + +struct ldlm_resource { + struct ldlm_namespace *lr_namespace; + struct list_head lr_hash; + struct ldlm_resource *lr_parent; /* 0 for a root resource */ + struct list_head lr_children; /* list head for child resources */ + struct list_head lr_childof; /* part of ns_root_list if root res, + * part of lr_children if child */ + + struct list_head lr_granted; + struct list_head lr_converting; + struct list_head lr_waiting; + ldlm_mode_t lr_most_restr; + ldlm_type_t lr_type; /* LDLM_{PLAIN,EXTENT,FLOCK} */ + struct ldlm_resource *lr_root; + struct ldlm_res_id lr_name; + atomic_t lr_refcount; + + /* Server-side-only lock value block elements */ + struct semaphore lr_lvb_sem; + __u32 lr_lvb_len; + void *lr_lvb_data; + + /* lr_tmp holds a list head temporarily, during the building of a work + * queue. see ldlm_add_ast_work_item and ldlm_run_ast_work */ + void *lr_tmp; +}; + +struct ldlm_ast_work { + struct ldlm_lock *w_lock; + int w_blocking; + struct ldlm_lock_desc w_desc; + struct list_head w_list; + int w_flags; + void *w_data; + int w_datalen; +}; + +extern struct obd_ops ldlm_obd_ops; + +extern char *ldlm_lockname[]; +extern char *ldlm_typename[]; +extern char *ldlm_it2str(int it); + +#define __LDLM_DEBUG(level, lock, format, a...) \ +do { \ + if (lock->l_resource == NULL) { \ + CDEBUG(level, "### " format \ + " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\ + "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: " \ + LPX64" expref: %d pid: %u\n" , ## a, lock, \ + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ + lock->l_readers, lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_flags, lock->l_remote_handle.cookie, \ + lock->l_export ? \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ + break; \ + } \ + if (lock->l_resource->lr_type == LDLM_EXTENT) { \ + CDEBUG(level, "### " format \ + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ + "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\ + "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64 \ + " expref: %d pid: %u\n" , ## a, \ + lock->l_resource->lr_namespace->ns_name, lock, \ + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ + lock->l_readers, lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_resource->lr_name.name[0], \ + lock->l_resource->lr_name.name[1], \ + atomic_read(&lock->l_resource->lr_refcount), \ + ldlm_typename[lock->l_resource->lr_type], \ + lock->l_policy_data.l_extent.start, \ + lock->l_policy_data.l_extent.end, \ + lock->l_req_extent.start, lock->l_req_extent.end, \ + lock->l_flags, lock->l_remote_handle.cookie, \ + lock->l_export ? \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ + break; \ + } \ + if (lock->l_resource->lr_type == LDLM_FLOCK) { \ + CDEBUG(level, "### " format \ + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ + "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d " \ + "["LPU64"->"LPU64"] flags: %x remote: "LPX64 \ + " expref: %d pid: %u\n" , ## a, \ + lock->l_resource->lr_namespace->ns_name, lock, \ + lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ + lock->l_readers, lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_resource->lr_name.name[0], \ + lock->l_resource->lr_name.name[1], \ + atomic_read(&lock->l_resource->lr_refcount), \ + ldlm_typename[lock->l_resource->lr_type], \ + lock->l_policy_data.l_flock.pid, \ + lock->l_policy_data.l_flock.start, \ + lock->l_policy_data.l_flock.end, \ + lock->l_flags, lock->l_remote_handle.cookie, \ + lock->l_export ? \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ + break; \ + } \ + if (lock->l_resource->lr_type == LDLM_IBITS) { \ + CDEBUG(level, "### " format \ + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ + "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s " \ + "flags: %x remote: "LPX64" expref: %d " \ + "pid %u\n" , ## a, \ + lock->l_resource->lr_namespace->ns_name, \ + lock, lock->l_handle.h_cookie, \ + atomic_read (&lock->l_refc), \ + lock->l_readers, lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_resource->lr_name.name[0], \ + lock->l_resource->lr_name.name[1], \ + lock->l_policy_data.l_inodebits.bits, \ + atomic_read(&lock->l_resource->lr_refcount), \ + ldlm_typename[lock->l_resource->lr_type], \ + lock->l_flags, lock->l_remote_handle.cookie, \ + lock->l_export ? \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ + break; \ + } \ + { \ + CDEBUG(level, "### " format \ + " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ + "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x " \ + "remote: "LPX64" expref: %d pid: %u\n" , ## a, \ + lock->l_resource->lr_namespace->ns_name, \ + lock, lock->l_handle.h_cookie, \ + atomic_read (&lock->l_refc), \ + lock->l_readers, lock->l_writers, \ + ldlm_lockname[lock->l_granted_mode], \ + ldlm_lockname[lock->l_req_mode], \ + lock->l_resource->lr_name.name[0], \ + lock->l_resource->lr_name.name[1], \ + atomic_read(&lock->l_resource->lr_refcount), \ + ldlm_typename[lock->l_resource->lr_type], \ + lock->l_flags, lock->l_remote_handle.cookie, \ + lock->l_export ? \ + atomic_read(&lock->l_export->exp_refcount) : -99, \ + lock->l_pid); \ + } \ +} while (0) + +#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \ + format, ## a) +#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a) + +#define LDLM_DEBUG_NOLOCK(format, a...) \ + CDEBUG(D_DLMTRACE, "### " format "\n" , ## a) + +typedef int (*ldlm_processing_policy)(struct ldlm_lock *lock, int *flags, + int first_enq, ldlm_error_t *err); + +/* + * Iterators. + */ + +#define LDLM_ITER_CONTINUE 1 /* keep iterating */ +#define LDLM_ITER_STOP 2 /* stop iterating */ + +typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *); +typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *); + +int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter, + void *closure); +int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter, + void *closure); +int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, + ldlm_res_iterator_t iter, void *closure); + +int ldlm_replay_locks(struct obd_import *imp); +void ldlm_change_cbdata(struct ldlm_namespace *, struct ldlm_res_id *, + ldlm_iterator_t iter, void *data); + +/* ldlm_flock.c */ +int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data); + +/* ldlm_extent.c */ +__u64 ldlm_extent_shift_kms(struct ldlm_lock *lock, __u64 old_kms); + + +/* ldlm_lockd.c */ +int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, + void *data, int flag); +int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data); +int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data); +int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback, + ldlm_blocking_callback, ldlm_glimpse_callback); +int ldlm_handle_convert(struct ptlrpc_request *req); +int ldlm_handle_cancel(struct ptlrpc_request *req); +int ldlm_del_waiting_lock(struct ldlm_lock *lock); +int ldlm_get_ref(void); +void ldlm_put_ref(int force); + +/* ldlm_lock.c */ +ldlm_processing_policy ldlm_get_processing_policy(struct ldlm_resource *res); +void ldlm_register_intent(struct ldlm_namespace *ns, ldlm_res_policy arg); +void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh); +struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags); +void ldlm_cancel_callback(struct ldlm_lock *); +int ldlm_lock_set_data(struct lustre_handle *, void *data); +void ldlm_lock_remove_from_lru(struct ldlm_lock *); +struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *, + struct lustre_handle *); + +static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h) +{ + return __ldlm_handle2lock(h, 0); +} + +#define LDLM_LOCK_PUT(lock) \ +do { \ + /*LDLM_DEBUG((lock), "put");*/ \ + ldlm_lock_put(lock); \ +} while (0) + +#define LDLM_LOCK_GET(lock) \ +({ \ + ldlm_lock_get(lock); \ + /*LDLM_DEBUG((lock), "get");*/ \ + lock; \ +}) + +struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); +void ldlm_lock_put(struct ldlm_lock *lock); +void ldlm_lock_destroy(struct ldlm_lock *lock); +void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); +void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); +void ldlm_lock_allow_match(struct ldlm_lock *lock); +int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *, + ldlm_type_t type, ldlm_policy_data_t *, ldlm_mode_t mode, + struct lustre_handle *); +struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, + int *flags); +void ldlm_lock_cancel(struct ldlm_lock *lock); +void ldlm_cancel_locks_for_export(struct obd_export *export); +void ldlm_reprocess_all(struct ldlm_resource *res); +void ldlm_reprocess_all_ns(struct ldlm_namespace *ns); +void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos); +void ldlm_lock_dump_handle(int level, struct lustre_handle *); + +/* resource.c */ +struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 local); +int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags); +int ldlm_namespace_free(struct ldlm_namespace *ns, int force); +int ldlm_proc_setup(void); +#ifdef LPROCFS +void ldlm_proc_cleanup(void); +#else +static inline void ldlm_proc_cleanup(void) {} +#endif + +/* resource.c - internal */ +struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, + struct ldlm_resource *parent, + struct ldlm_res_id, ldlm_type_t type, + int create); +struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res); +int ldlm_resource_putref(struct ldlm_resource *res); +void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, + struct ldlm_lock *lock); +void ldlm_resource_unlink_lock(struct ldlm_lock *lock); +void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); +void ldlm_dump_all_namespaces(int level); +void ldlm_namespace_dump(int level, struct ldlm_namespace *); +void ldlm_resource_dump(int level, struct ldlm_resource *); +int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, + struct ldlm_res_id); + +struct ldlm_callback_suite { + ldlm_completion_callback lcs_completion; + ldlm_blocking_callback lcs_blocking; + ldlm_glimpse_callback lcs_glimpse; +}; + +/* ldlm_request.c */ +int ldlm_expired_completion_wait(void *data); +int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, int flag); +int ldlm_glimpse_ast(struct ldlm_lock *lock, void *reqp); +int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data); +int ldlm_cli_enqueue(struct obd_export *exp, + struct ptlrpc_request *req, + struct ldlm_namespace *ns, + struct ldlm_res_id, + ldlm_type_t type, + ldlm_policy_data_t *, + ldlm_mode_t mode, + int *flags, + ldlm_blocking_callback blocking, + ldlm_completion_callback completion, + ldlm_glimpse_callback glimpse, + void *data, + void *lvb, + __u32 lvb_len, + void *lvb_swabber, + struct lustre_handle *lockh); +int ldlm_handle_enqueue0(struct ldlm_namespace *ns, struct ptlrpc_request *req, + struct ldlm_request *dlm_req, + struct ldlm_callback_suite *cbs); +int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new, + void *data, __u32 data_len); +int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags); +int ldlm_handle_convert0(struct ptlrpc_request *req, + struct ldlm_request *dlm_req); +int ldlm_cli_cancel(struct lustre_handle *lockh); +int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *, + int flags, void *opaque); +int ldlm_cli_join_lru(struct ldlm_namespace *, struct ldlm_res_id *, + int join); + +/* mds/handler.c */ +/* This has to be here because recursive inclusion sucks. */ +int intent_disposition(struct ldlm_reply *rep, int flag); +void intent_set_disposition(struct ldlm_reply *rep, int flag); + + +/* ioctls for trying requests */ +#define IOC_LDLM_TYPE 'f' +#define IOC_LDLM_MIN_NR 40 + +#define IOC_LDLM_TEST _IOWR('f', 40, long) +#define IOC_LDLM_DUMP _IOWR('f', 41, long) +#define IOC_LDLM_REGRESS_START _IOWR('f', 42, long) +#define IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) +#define IOC_LDLM_MAX_NR 43 + +#endif diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/lustre_export.h similarity index 96% rename from lustre/include/linux/lustre_export.h rename to lustre/include/lustre_export.h index 820426b..f6e3f36 100644 --- a/lustre/include/linux/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -5,8 +5,8 @@ #ifndef __EXPORT_H #define __EXPORT_H -#include -#include +#include +#include /* Data stored per client in the last_rcvd file. In le32 order. */ struct mds_client_data; @@ -29,7 +29,7 @@ struct osc_creator { int oscc_grow_count; struct obdo oscc_oa; int oscc_flags; - wait_queue_head_t oscc_waitq; /* creating procs wait on this */ + cfs_waitq_t oscc_waitq; /* creating procs wait on this */ }; struct ldlm_export_data { diff --git a/lustre/include/linux/lustre_fid.h b/lustre/include/lustre_fid.h similarity index 98% rename from lustre/include/linux/lustre_fid.h rename to lustre/include/lustre_fid.h index 1547c2a..6476fa7 100644 --- a/lustre/include/linux/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -26,7 +26,7 @@ /* * struct lu_fid */ -#include +#include #include #include diff --git a/lustre/include/lustre_fsfilt.h b/lustre/include/lustre_fsfilt.h new file mode 100644 index 0000000..41b9431 --- /dev/null +++ b/lustre/include/lustre_fsfilt.h @@ -0,0 +1,38 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Filesystem interface helper. + * + */ + +#ifndef _LUSTRE_FSFILT_H +#define _LUSTRE_FSFILT_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#endif diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/lustre_ha.h similarity index 100% rename from lustre/include/linux/lustre_ha.h rename to lustre/include/lustre_ha.h diff --git a/lustre/include/lustre_handles.h b/lustre/include/lustre_handles.h new file mode 100644 index 0000000..bbd2fcd --- /dev/null +++ b/lustre/include/lustre_handles.h @@ -0,0 +1,43 @@ +#ifndef __LUSTRE_HANDLES_H_ +#define __LUSTRE_HANDLES_H_ + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +typedef void (*portals_handle_addref_cb)(void *object); + +/* These handles are most easily used by having them appear at the very top of + * whatever object that you want to make handles for. ie: + * + * struct ldlm_lock { + * struct portals_handle handle; + * ... + * }; + * + * Now you're able to assign the results of cookie2handle directly to an + * ldlm_lock. If it's not at the top, you'll want to hack up a macro that + * uses some offsetof() magic. */ + +struct portals_handle { + struct list_head h_link; + __u64 h_cookie; + portals_handle_addref_cb h_addref; +}; + +/* handles.c */ + +/* Add a handle to the hash table */ +void class_handle_hash(struct portals_handle *, portals_handle_addref_cb); +void class_handle_unhash(struct portals_handle *); +void *class_handle2object(__u64 cookie); +int class_handle_init(void); +void class_handle_cleanup(void); + +#endif diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/lustre_import.h similarity index 92% rename from lustre/include/linux/lustre_import.h rename to lustre/include/lustre_import.h index 6b87e84..ff74277 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -5,8 +5,8 @@ #ifndef __IMPORT_H #define __IMPORT_H -#include -#include +#include +#include enum lustre_imp_state { LUSTRE_IMP_CLOSED = 1, @@ -45,7 +45,7 @@ struct obd_import_conn { struct list_head oic_item; struct ptlrpc_connection *oic_conn; struct obd_uuid oic_uuid; - unsigned long oic_last_attempt; /* in jiffies */ + cfs_time_t oic_last_attempt; /* in cfs_time_t */ }; struct obd_import { @@ -64,17 +64,19 @@ struct obd_import { struct list_head imp_delayed_list; struct obd_device *imp_obd; - wait_queue_head_t imp_recovery_waitq; - __u64 imp_last_replay_transno; + cfs_waitq_t imp_recovery_waitq; + atomic_t imp_inflight; atomic_t imp_replay_inflight; enum lustre_imp_state imp_state; int imp_generation; __u32 imp_conn_cnt; - __u64 imp_max_transno; + int imp_last_generation_checked; + __u64 imp_last_replay_transno; __u64 imp_peer_committed_transno; + __u64 imp_last_transno_checked; struct lustre_handle imp_remote_handle; - unsigned long imp_next_ping; /* jiffies */ + cfs_time_t imp_next_ping; /* jiffies */ /* all available obd_import_conn linked here */ struct list_head imp_conn_list; diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h new file mode 100644 index 0000000..5c0f95f7 --- /dev/null +++ b/lustre/include/lustre_lib.h @@ -0,0 +1,754 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic Lustre library routines. + * + */ + +#ifndef _LUSTRE_LIB_H +#define _LUSTRE_LIB_H + +#include +#include +#include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +/* prng.c */ +unsigned int ll_rand(void); /* returns a random 32-bit integer */ +void ll_srand(unsigned int, unsigned int); /* seed the generator */ + +/* target.c */ +struct ptlrpc_request; +struct recovd_data; +struct recovd_obd; +struct obd_export; +#include +#include +#include + +int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler); +int target_handle_disconnect(struct ptlrpc_request *req); +void target_destroy_export(struct obd_export *exp); +int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, + struct obd_uuid *cluuid); +int target_handle_ping(struct ptlrpc_request *req); +void target_committed_to_req(struct ptlrpc_request *req); + +#ifdef HAVE_QUOTA_SUPPORT +/* quotacheck callback, dqacq/dqrel callback handler */ +int target_handle_qc_callback(struct ptlrpc_request *req); +int target_handle_dqacq_callback(struct ptlrpc_request *req); +#else +#define target_handle_dqacq_callback(req) ldlm_callback_reply(req, -ENOTSUPP) +#define target_handle_qc_callback(req) (0) +#endif + +void target_cancel_recovery_timer(struct obd_device *obd); + +#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) /* *waves hands* */ +void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler); +void target_abort_recovery(void *data); +void target_cleanup_recovery(struct obd_device *obd); +int target_queue_recovery_request(struct ptlrpc_request *req, + struct obd_device *obd); +int target_queue_final_reply(struct ptlrpc_request *req, int rc); +void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); + +/* client.c */ + +int client_sanobd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg); +struct client_obd *client_conn2cli(struct lustre_handle *conn); + +struct mdc_open_data; +struct obd_client_handle { + struct lustre_handle och_fh; + struct llog_cookie och_cookie; + struct mdc_open_data *och_mod; + __u32 och_magic; +}; +#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed + +/* statfs_pack.c */ +void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs); +void statfs_unpack(struct kstatfs *sfs, struct obd_statfs *osfs); + +/* l_lock.c */ +struct lustre_lock { + int l_depth; + cfs_task_t *l_owner; + struct semaphore l_sem; + spinlock_t l_spin; +}; + +void l_lock_init(struct lustre_lock *); +void l_lock(struct lustre_lock *); +void l_unlock(struct lustre_lock *); +int l_has_lock(struct lustre_lock *); + + +/* + * OBD IOCTLS + */ +#define OBD_IOCTL_VERSION 0x00010004 + +struct obd_ioctl_data { + uint32_t ioc_len; + uint32_t ioc_version; + + uint64_t ioc_cookie; + uint32_t ioc_conn1; + uint32_t ioc_conn2; + + struct obdo ioc_obdo1; + struct obdo ioc_obdo2; + + obd_size ioc_count; + obd_off ioc_offset; + uint32_t ioc_dev; + uint32_t ioc_command; + + uint64_t ioc_nid; + uint32_t ioc_nal; + uint32_t ioc_type; + + /* buffers the kernel will treat as user pointers */ + uint32_t ioc_plen1; + char *ioc_pbuf1; + uint32_t ioc_plen2; + char *ioc_pbuf2; + + /* inline buffers for various arguments */ + uint32_t ioc_inllen1; + char *ioc_inlbuf1; + uint32_t ioc_inllen2; + char *ioc_inlbuf2; + uint32_t ioc_inllen3; + char *ioc_inlbuf3; + uint32_t ioc_inllen4; + char *ioc_inlbuf4; + + char ioc_bulk[0]; +}; + +struct obd_ioctl_hdr { + uint32_t ioc_len; + uint32_t ioc_version; +}; + +static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) +{ + int len = size_round(sizeof(struct obd_ioctl_data)); + len += size_round(data->ioc_inllen1); + len += size_round(data->ioc_inllen2); + len += size_round(data->ioc_inllen3); + len += size_round(data->ioc_inllen4); + return len; +} + + +static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) +{ + if (data->ioc_len > (1<<30)) { + CERROR("OBD ioctl: ioc_len larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen1 > (1<<30)) { + CERROR("OBD ioctl: ioc_inllen1 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen2 > (1<<30)) { + CERROR("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen3 > (1<<30)) { + CERROR("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen4 > (1<<30)) { + CERROR("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inlbuf1 && !data->ioc_inllen1) { + CERROR("OBD ioctl: inlbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_inlbuf2 && !data->ioc_inllen2) { + CERROR("OBD ioctl: inlbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_inlbuf3 && !data->ioc_inllen3) { + CERROR("OBD ioctl: inlbuf3 pointer but 0 length\n"); + return 1; + } + if (data->ioc_inlbuf4 && !data->ioc_inllen4) { + CERROR("OBD ioctl: inlbuf4 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf1 && !data->ioc_plen1) { + CERROR("OBD ioctl: pbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf2 && !data->ioc_plen2) { + CERROR("OBD ioctl: pbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_plen1 && !data->ioc_pbuf1) { + CERROR("OBD ioctl: plen1 set but NULL pointer\n"); + return 1; + } + if (data->ioc_plen2 && !data->ioc_pbuf2) { + CERROR("OBD ioctl: plen2 set but NULL pointer\n"); + return 1; + } + if (obd_ioctl_packlen(data) > data->ioc_len) { + CERROR("OBD ioctl: packlen exceeds ioc_len (%d > %d)\n", + obd_ioctl_packlen(data), data->ioc_len); + return 1; + } + return 0; +} + +#ifndef __KERNEL__ +static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf, + int max) +{ + char *ptr; + struct obd_ioctl_data *overlay; + data->ioc_len = obd_ioctl_packlen(data); + data->ioc_version = OBD_IOCTL_VERSION; + + if (*pbuf && data->ioc_len > max) + return 1; + if (*pbuf == NULL) { + *pbuf = malloc(data->ioc_len); + } + if (!*pbuf) + return 1; + overlay = (struct obd_ioctl_data *)*pbuf; + memcpy(*pbuf, data, sizeof(*data)); + + ptr = overlay->ioc_bulk; + if (data->ioc_inlbuf1) + LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); + if (data->ioc_inlbuf2) + LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); + if (data->ioc_inlbuf3) + LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); + if (data->ioc_inlbuf4) + LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); + if (obd_ioctl_is_invalid(overlay)) + return 1; + + return 0; +} + +static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, + int max) +{ + char *ptr; + struct obd_ioctl_data *overlay; + + if (!pbuf) + return 1; + overlay = (struct obd_ioctl_data *)pbuf; + + /* Preserve the caller's buffer pointers */ + overlay->ioc_inlbuf1 = data->ioc_inlbuf1; + overlay->ioc_inlbuf2 = data->ioc_inlbuf2; + overlay->ioc_inlbuf3 = data->ioc_inlbuf3; + overlay->ioc_inlbuf4 = data->ioc_inlbuf4; + + memcpy(data, pbuf, sizeof(*data)); + + ptr = overlay->ioc_bulk; + if (data->ioc_inlbuf1) + LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr); + if (data->ioc_inlbuf2) + LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr); + if (data->ioc_inlbuf3) + LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr); + if (data->ioc_inlbuf4) + LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr); + + return 0; +} +#endif + +#include + +#ifdef __KERNEL__ +/* function defined in lustre/obdclass//-module.c */ +int obd_ioctl_getdata(char **buf, int *len, void *arg); +int obd_ioctl_popdata(void *arg, void *data, int len); +#else +/* buffer MUST be at least the size of obd_ioctl_hdr */ +static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) +{ + struct obd_ioctl_hdr hdr; + struct obd_ioctl_data *data; + int err; + int offset = 0; + ENTRY; + + err = copy_from_user(&hdr, (void *)arg, sizeof(hdr)); + if (err) + RETURN(err); + + if (hdr.ioc_version != OBD_IOCTL_VERSION) { + CERROR("Version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) { + CERROR("User buffer len %d exceeds %d max buffer\n", + hdr.ioc_len, OBD_MAX_IOCTL_BUFFER); + RETURN(-EINVAL); + } + + if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) { + CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len); + RETURN(-EINVAL); + } + + /* XXX allocate this more intelligently, using kmalloc when + * appropriate */ + OBD_VMALLOC(*buf, hdr.ioc_len); + if (*buf == NULL) { + CERROR("Cannot allocate control buffer of len %d\n", + hdr.ioc_len); + RETURN(-EINVAL); + } + *len = hdr.ioc_len; + data = (struct obd_ioctl_data *)*buf; + + err = copy_from_user(*buf, (void *)arg, hdr.ioc_len); + if (err) { + OBD_VFREE(*buf, hdr.ioc_len); + RETURN(err); + } + + if (obd_ioctl_is_invalid(data)) { + CERROR("ioctl not correctly formatted\n"); + OBD_VFREE(*buf, hdr.ioc_len); + RETURN(-EINVAL); + } + + if (data->ioc_inllen1) { + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + offset += size_round(data->ioc_inllen1); + } + + if (data->ioc_inllen2) { + data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset; + offset += size_round(data->ioc_inllen2); + } + + if (data->ioc_inllen3) { + data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset; + offset += size_round(data->ioc_inllen3); + } + + if (data->ioc_inllen4) { + data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; + } + + RETURN(0); +} + +static inline int obd_ioctl_popdata(void *arg, void *data, int len) +{ + int err = copy_to_user(arg, data, len); + if (err) + err = -EFAULT; + return err; +} +#endif + +static inline void obd_ioctl_freedata(char *buf, int len) +{ + ENTRY; + + OBD_VFREE(buf, len); + EXIT; + return; +} + +/* + * BSD ioctl description: + * #define IOC_V1 _IOR(g, n1, long) + * #define IOC_V2 _IOW(g, n2, long) + * + * ioctl(f, IOC_V1, arg); + * arg will be treated as a long value, + * + * ioctl(f, IOC_V2, arg) + * arg will be treated as a pointer, bsd will call + * copyin(buf, arg, sizeof(long)) + * + * To make BSD ioctl handles argument correctly and simplely, + * we change _IOR to _IOWR so BSD will copyin obd_ioctl_data + * for us. Does this change affect Linux? (XXX Liang) + */ +#define OBD_IOC_CREATE _IOWR('f', 101, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DESTROY _IOW ('f', 104, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PREALLOCATE _IOWR('f', 105, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_SETATTR _IOW ('f', 107, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETATTR _IOWR ('f', 108, OBD_IOC_DATA_TYPE) +#define OBD_IOC_READ _IOWR('f', 109, OBD_IOC_DATA_TYPE) +#define OBD_IOC_WRITE _IOWR('f', 110, OBD_IOC_DATA_TYPE) + + +#define OBD_IOC_STATFS _IOWR('f', 113, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SYNC _IOW ('f', 114, OBD_IOC_DATA_TYPE) +#define OBD_IOC_READ2 _IOWR('f', 115, OBD_IOC_DATA_TYPE) +#define OBD_IOC_FORMAT _IOWR('f', 116, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PARTITION _IOWR('f', 117, OBD_IOC_DATA_TYPE) +#define OBD_IOC_COPY _IOWR('f', 120, OBD_IOC_DATA_TYPE) +#define OBD_IOC_MIGR _IOWR('f', 121, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PUNCH _IOWR('f', 122, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, OBD_IOC_DATA_TYPE) +#define OBD_IOC_BRW_READ _IOWR('f', 125, OBD_IOC_DATA_TYPE) +#define OBD_IOC_BRW_WRITE _IOWR('f', 126, OBD_IOC_DATA_TYPE) +#define OBD_IOC_NAME2DEV _IOWR('f', 127, OBD_IOC_DATA_TYPE) +#define OBD_IOC_UUID2DEV _IOWR('f', 130, OBD_IOC_DATA_TYPE) +#define OBD_IOC_GETNAME _IOWR('f', 131, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 ) +#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, OBD_IOC_DATA_TYPE) +#define OBD_IOC_SET_READONLY _IOW ('f', 141, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, OBD_IOC_DATA_TYPE) + +#define OBD_GET_VERSION _IOWR ('f', 144, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LOV_SETEA _IOW ('f', 156, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_QUOTACHECK _IOW ('f', 160, int) +#define OBD_IOC_POLL_QUOTACHECK _IOR ('f', 161, struct if_quotacheck *) +#define OBD_IOC_QUOTACTL _IOWR('f', 162, struct if_quotactl *) + +#define OBD_IOC_MOUNTOPT _IOWR('f', 170, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_RECORD _IOWR('f', 180, OBD_IOC_DATA_TYPE) +#define OBD_IOC_ENDRECORD _IOWR('f', 181, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PARSE _IOWR('f', 182, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DORECORD _IOWR('f', 183, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PROCESS_CFG _IOWR('f', 184, OBD_IOC_DATA_TYPE) +#define OBD_IOC_DUMP_LOG _IOWR('f', 185, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CLEAR_LOG _IOWR('f', 186, OBD_IOC_DATA_TYPE) +#define OBD_IOC_PARAM _IOW ('f', 187, OBD_IOC_DATA_TYPE) + +#define OBD_IOC_CATLOGLIST _IOWR('f', 190, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_INFO _IOWR('f', 191, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_PRINT _IOWR('f', 192, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_CANCEL _IOWR('f', 193, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_REMOVE _IOWR('f', 194, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_CHECK _IOWR('f', 195, OBD_IOC_DATA_TYPE) +#define OBD_IOC_LLOG_CATINFO _IOWR('f', 196, OBD_IOC_DATA_TYPE) + +#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, OBD_IOC_DATA_TYPE) +#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, OBD_IOC_DATA_TYPE) +#define ECHO_IOC_ENQUEUE _IOWR('f', 202, OBD_IOC_DATA_TYPE) +#define ECHO_IOC_CANCEL _IOWR('f', 203, OBD_IOC_DATA_TYPE) + +/* XXX _IOWR('f', 250, long) has been defined in + * lnet/include/libcfs/kp30.h for debug, don't use it + */ + +/* Until such time as we get_info the per-stripe maximum from the OST, + * we define this to be 2T - 4k, which is the ext3 maxbytes. */ +#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL + +#define POISON_BULK 0 + +/* + * l_wait_event is a flexible sleeping function, permitting simple caller + * configuration of interrupt and timeout sensitivity along with actions to + * be performed in the event of either exception. + * + * The first form of usage looks like this: + * + * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler, + * intr_handler, callback_data); + * rc = l_wait_event(waitq, condition, &lwi); + * + * l_wait_event() makes the current process wait on 'waitq' until 'condition' + * is TRUE or a "killable" signal (SIGTERM, SIKGILL, SIGINT) is pending. It + * returns 0 to signify 'condition' is TRUE, but if a signal wakes it before + * 'condition' becomes true, it optionally calls the specified 'intr_handler' + * if not NULL, and returns -EINTR. + * + * If a non-zero timeout is specified, signals are ignored until the timeout + * has expired. At this time, if 'timeout_handler' is not NULL it is called. + * If it returns FALSE l_wait_event() continues to wait as described above with + * signals enabled. Otherwise it returns -ETIMEDOUT. + * + * LWI_INTR(intr_handler, callback_data) is shorthand for + * LWI_TIMEOUT_INTR(0, NULL, intr_handler, callback_data) + * + * The second form of usage looks like this: + * + * struct l_wait_info lwi = LWI_TIMEOUT(timeout, timeout_handler); + * rc = l_wait_event(waitq, condition, &lwi); + * + * This form is the same as the first except that it COMPLETELY IGNORES + * SIGNALS. The caller must therefore beware that if 'timeout' is zero, or if + * 'timeout_handler' is not NULL and returns FALSE, then the ONLY thing that + * can unblock the current process is 'condition' becoming TRUE. + * + * Another form of usage is: + * struct l_wait_info lwi = LWI_TIMEOUT_INTERVAL(timeout, interval, + * timeout_handler); + * rc = l_wait_event(waitq, condition, &lwi); + * This is the same as previous case, but condition is checked once every + * 'interval' jiffies (if non-zero). + * + * Subtle synchronization point: this macro does *not* necessary takes + * wait-queue spin-lock before returning, and, hence, following idiom is safe + * ONLY when caller provides some external locking: + * + * Thread1 Thread2 + * + * l_wait_event(&obj->wq, ....); (1) + * + * wake_up(&obj->wq): (2) + * spin_lock(&q->lock); (2.1) + * __wake_up_common(q, ...); (2.2) + * spin_unlock(&q->lock, flags); (2.3) + * + * OBD_FREE_PTR(obj); (3) + * + * As l_wait_event() may "short-cut" execution and return without taking + * wait-queue spin-lock, some additional synchronization is necessary to + * guarantee that step (3) can begin only after (2.3) finishes. + * + * XXX nikita: some ptlrpc daemon threads have races of that sort. + * + */ + +#define LWI_ON_SIGNAL_NOOP ((void (*)(void *))(-1)) + +struct l_wait_info { + cfs_duration_t lwi_timeout; + cfs_duration_t lwi_interval; + int (*lwi_on_timeout)(void *); + void (*lwi_on_signal)(void *); + void *lwi_cb_data; +}; + +/* NB: LWI_TIMEOUT ignores signals completely */ +#define LWI_TIMEOUT(time, cb, data) \ +((struct l_wait_info) { \ + .lwi_timeout = time, \ + .lwi_on_timeout = cb, \ + .lwi_cb_data = data, \ + .lwi_interval = 0 \ +}) + +#define LWI_TIMEOUT_INTERVAL(time, interval, cb, data) \ +((struct l_wait_info) { \ + .lwi_timeout = time, \ + .lwi_on_timeout = cb, \ + .lwi_cb_data = data, \ + .lwi_interval = interval \ +}) + +#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \ +((struct l_wait_info) { \ + .lwi_timeout = time, \ + .lwi_on_timeout = time_cb, \ + .lwi_on_signal = (sig_cb == NULL) ? LWI_ON_SIGNAL_NOOP : sig_cb, \ + .lwi_cb_data = data, \ + .lwi_interval = 0 \ +}) + +#define LWI_INTR(cb, data) LWI_TIMEOUT_INTR(0, NULL, cb, data) + +#ifdef __KERNEL__ + +/* + * wait for @condition to become true, but no longer than timeout, specified + * by @info. + */ +#define __l_wait_event(wq, condition, info, ret, excl) \ +do { \ + cfs_waitlink_t __wait; \ + cfs_duration_t __timeout = info->lwi_timeout; \ + cfs_sigset_t __blocked; \ + \ + ret = 0; \ + if (condition) \ + break; \ + \ + cfs_waitlink_init(&__wait); \ + if (excl) \ + cfs_waitq_add_exclusive(&wq, &__wait); \ + else \ + cfs_waitq_add(&wq, &__wait); \ + \ + /* Block all signals (just the non-fatal ones if no timeout). */ \ + if (info->lwi_on_signal != NULL && __timeout == 0) \ + __blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ + else \ + __blocked = l_w_e_set_sigs(0); \ + \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + \ + if (condition) \ + break; \ + \ + if (__timeout == 0) { \ + cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ + } else { \ + cfs_duration_t interval = info->lwi_interval? \ + min_t(cfs_duration_t, \ + info->lwi_interval,__timeout):\ + __timeout; \ + cfs_duration_t remaining = cfs_waitq_timedwait(&__wait,\ + CFS_TASK_INTERRUPTIBLE, \ + interval); \ + __timeout = cfs_time_sub(__timeout, \ + cfs_time_sub(interval, remaining));\ + if (__timeout == 0) { \ + if (info->lwi_on_timeout == NULL || \ + info->lwi_on_timeout(info->lwi_cb_data)) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + /* Take signals after the timeout expires. */ \ + if (info->lwi_on_signal != NULL) \ + (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ + } \ + } \ + \ + if (condition) \ + break; \ + if (cfs_signal_pending()) { \ + if (info->lwi_on_signal != NULL && __timeout == 0) { \ + if (info->lwi_on_signal != LWI_ON_SIGNAL_NOOP) \ + info->lwi_on_signal(info->lwi_cb_data);\ + ret = -EINTR; \ + break; \ + } \ + /* We have to do this here because some signals */ \ + /* are not blockable - ie from strace(1). */ \ + /* In these cases we want to schedule_timeout() */ \ + /* again, because we don't want that to return */ \ + /* -EINTR when the RPC actually succeeded. */ \ + /* the RECALC_SIGPENDING below will deliver the */ \ + /* signal properly. */ \ + cfs_clear_sigpending(); \ + } \ + } \ + \ + cfs_block_sigs(__blocked); \ + \ + set_current_state(TASK_RUNNING); \ + cfs_waitq_del(&wq, &__wait); \ +} while (0) + +#else /* !__KERNEL__ */ +#define __l_wait_event(wq, condition, info, ret, excl) \ +do { \ + long __timeout = info->lwi_timeout; \ + long __now; \ + long __then = 0; \ + int __timed_out = 0; \ + \ + ret = 0; \ + if (condition) \ + break; \ + \ + if (__timeout == 0) \ + __timeout = 1000000000; \ + else \ + __then = time(NULL); \ + \ + while (!(condition)) { \ + if (liblustre_wait_event(info->lwi_interval?:__timeout) || \ + (info->lwi_interval && info->lwi_interval < __timeout)) {\ + if (__timeout != 0 && info->lwi_timeout != 0) { \ + __now = time(NULL); \ + __timeout -= __now - __then; \ + if (__timeout < 0) \ + __timeout = 0; \ + __then = __now; \ + } \ + continue; \ + } \ + \ + if (info->lwi_timeout != 0 && !__timed_out) { \ + __timed_out = 1; \ + if (info->lwi_on_timeout == NULL || \ + info->lwi_on_timeout(info->lwi_cb_data)) { \ + ret = -ETIMEDOUT; \ + break; \ + } \ + } \ + } \ +} while (0) + +#endif /* __KERNEL__ */ + +#define l_wait_event(wq, condition, info) \ +({ \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, __ret, 0); \ + __ret; \ +}) + +#define l_wait_event_exclusive(wq, condition, info) \ +({ \ + int __ret; \ + struct l_wait_info *__info = (info); \ + \ + __l_wait_event(wq, condition, __info, __ret, 1); \ + __ret; \ +}) + +#ifdef __KERNEL__ +#define LIBLUSTRE_CLIENT (0) +#else +#define LIBLUSTRE_CLIENT (1) +#endif + +#endif /* _LUSTRE_LIB_H */ + diff --git a/lustre/include/lustre_lite.h b/lustre/include/lustre_lite.h new file mode 100644 index 0000000..09c9e7a --- /dev/null +++ b/lustre/include/lustre_lite.h @@ -0,0 +1,138 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef _LL_H +#define _LL_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include +#include +#include +#include +#include + +#ifdef __KERNEL__ + +/* careful, this is easy to screw up */ +#define PAGE_CACHE_MAXBYTES ((__u64)(~0UL) << CFS_PAGE_SHIFT) + +#endif + +#define LLAP_FROM_COOKIE(c) \ + (LASSERT(((struct ll_async_page *)(c))->llap_magic == LLAP_MAGIC), \ + (struct ll_async_page *)(c)) + +#define LL_MAX_BLKSIZE (4UL * 1024 * 1024) + +#include + + +struct lustre_rw_params { + int lrp_lock_mode; + ldlm_policy_data_t lrp_policy; + obd_flag lrp_brw_flags; + int lrp_ast_flags; +}; + +/* + * XXX nikita: this function lives in the header because it is used by both + * llite kernel module and liblustre library, and there is no (?) better place + * to put it in. + */ +static inline void lustre_build_lock_params(int cmd, unsigned long open_flags, + __u64 connect_flags, + loff_t pos, ssize_t len, + struct lustre_rw_params *params) +{ + params->lrp_lock_mode = (cmd == OBD_BRW_READ) ? LCK_PR : LCK_PW; + params->lrp_brw_flags = 0; + + params->lrp_policy.l_extent.start = pos; + params->lrp_policy.l_extent.end = pos + len - 1; + /* + * for now O_APPEND always takes local locks. + */ + if (cmd == OBD_BRW_WRITE && (open_flags & O_APPEND)) { + params->lrp_policy.l_extent.start = 0; + params->lrp_policy.l_extent.end = OBD_OBJECT_EOF; + } else if (LIBLUSTRE_CLIENT && (connect_flags & OBD_CONNECT_SRVLOCK)) { + /* + * liblustre: OST-side locking for all non-O_APPEND + * reads/writes. + */ + params->lrp_lock_mode = LCK_NL; + params->lrp_brw_flags = OBD_BRW_SRVLOCK; + } else { + /* + * nothing special for the kernel. In the future llite may use + * OST-side locks for small writes into highly contended + * files. + */ + } + params->lrp_ast_flags = (open_flags & O_NONBLOCK) ? + LDLM_FL_BLOCK_NOWAIT : 0; +} + +/* + * This is embedded into liblustre and llite super-blocks to keep track of + * connect flags (capabilities) supported by all imports given mount is + * connected to. + */ +struct lustre_client_ocd { + /* + * This is conjunction of connect_flags across all imports (LOVs) this + * mount is connected to. This field is updated by ll_ocd_update() + * under ->lco_lock. + */ + __u64 lco_flags; + spinlock_t lco_lock; +}; + +/* + * This function is used as an upcall-callback hooked by liblustre and llite + * clients into obd_notify() listeners chain to handle notifications about + * change of import connect_flags. See llu_fsswop_mount() and + * lustre_common_fill_super(). + * + * Again, it is dumped into this header for the lack of a better place. + */ +static inline int ll_ocd_update(struct obd_device *host, + struct obd_device *watched, + enum obd_notify_event ev, void *owner) +{ + struct lustre_client_ocd *lco; + struct client_obd *cli; + __u64 flags; + int result; + + ENTRY; + if (!strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME)) { + cli = &watched->u.cli; + lco = owner; + flags = cli->cl_import->imp_connect_data.ocd_connect_flags; + CDEBUG(D_SUPER, "Changing connect_flags: "LPX64" -> "LPX64"\n", + lco->lco_flags, flags); + spin_lock(&lco->lco_lock); + lco->lco_flags &= flags; + spin_unlock(&lco->lco_lock); + result = 0; + } else { + CERROR("unexpected notification from %s %s!\n", + watched->obd_type->typ_name, + watched->obd_name); + result = -EINVAL; + } + RETURN(result); +} + +#endif diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h new file mode 100644 index 0000000..c05ce65 --- /dev/null +++ b/lustre/include/lustre_log.h @@ -0,0 +1,425 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Generic infrastructure for managing a collection of logs. + * + * These logs are used for: + * + * - orphan recovery: OST adds record on create + * - mtime/size consistency: the OST adds a record on first write + * - open/unlinked objects: OST adds a record on destroy + * + * - mds unlink log: the MDS adds an entry upon delete + * + * - raid1 replication log between OST's + * - MDS replication logs + */ + +#ifndef _LUSTRE_LOG_H +#define _LUSTRE_LOG_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include +#include +#include + +#define LOG_NAME_LIMIT(logname, name) \ + snprintf(logname, sizeof(logname), "LOGS/%s", name) +#define LLOG_EEMPTY 4711 + +struct plain_handle_data { + struct list_head phd_entry; + struct llog_handle *phd_cat_handle; + struct llog_cookie phd_cookie; /* cookie of this log in its cat */ + int phd_last_idx; +}; + +struct cat_handle_data { + struct list_head chd_head; + struct llog_handle *chd_current_log; /* currently open log */ +}; + +/* In-memory descriptor for a log object or log catalog */ +struct llog_handle { + struct rw_semaphore lgh_lock; + struct llog_logid lgh_id; /* id of this log */ + struct llog_log_hdr *lgh_hdr; + struct file *lgh_file; + int lgh_last_idx; + struct llog_ctxt *lgh_ctxt; + union { + struct plain_handle_data phd; + struct cat_handle_data chd; + } u; +}; + +/* llog.c - general API */ +typedef int (*llog_cb_t)(struct llog_handle *, struct llog_rec_hdr *, void *); +typedef int (*llog_fill_rec_cb_t)(struct llog_rec_hdr *rec, void *data); +extern struct llog_handle *llog_alloc_handle(void); +int llog_init_handle(struct llog_handle *handle, int flags, + struct obd_uuid *uuid); +extern void llog_free_handle(struct llog_handle *handle); +int llog_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata); +int llog_reverse_process(struct llog_handle *loghandle, llog_cb_t cb, + void *data, void *catdata); +extern int llog_cancel_rec(struct llog_handle *loghandle, int index); +extern int llog_close(struct llog_handle *cathandle); +extern int llog_get_size(struct llog_handle *loghandle); + +/* llog_cat.c - catalog api */ +struct llog_process_data { + void *lpd_data; + llog_cb_t lpd_cb; +}; + +struct llog_process_cat_data { + int first_idx; + int last_idx; + /* to process catalog across zero record */ +}; + +int llog_cat_put(struct llog_handle *cathandle); +int llog_cat_add_rec(struct llog_handle *cathandle, struct llog_rec_hdr *rec, + struct llog_cookie *reccookie, void *buf); +int llog_cat_cancel_records(struct llog_handle *cathandle, int count, + struct llog_cookie *cookies); +int llog_cat_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); +int llog_cat_reverse_process(struct llog_handle *cat_llh, llog_cb_t cb, void *data); +int llog_cat_set_first_idx(struct llog_handle *cathandle, int index); + +/* llog_obd.c */ +int llog_setup(struct obd_device *obd, int index, struct obd_device *disk_obd, + int count, struct llog_logid *logid,struct llog_operations *op); +int llog_cleanup(struct llog_ctxt *); +int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp); +int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + struct lov_stripe_md *lsm, struct llog_cookie *logcookies, + int numcookies); +int llog_cancel(struct llog_ctxt *, struct lov_stripe_md *lsm, + int count, struct llog_cookie *cookies, int flags); + +int llog_obd_origin_setup(struct obd_device *obd, int index, + struct obd_device *disk_obd, int count, + struct llog_logid *logid); +int llog_obd_origin_cleanup(struct llog_ctxt *ctxt); +int llog_obd_origin_add(struct llog_ctxt *ctxt, + struct llog_rec_hdr *rec, struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); + +int llog_cat_initialize(struct obd_device *obd, int count); +int obd_llog_init(struct obd_device *obd, struct obd_device *disk_obd, + int count, struct llog_catid *logid); + +int obd_llog_finish(struct obd_device *obd, int count); + +/* llog_ioctl.c */ +int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data); +int llog_catalog_list(struct obd_device *obd, int count, + struct obd_ioctl_data *data); + +/* llog_net.c */ +int llog_initiator_connect(struct llog_ctxt *ctxt); +int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp); +int llog_origin_connect(struct llog_ctxt *ctxt, int count, + struct llog_logid *logid, struct llog_gen *gen, + struct obd_uuid *uuid); +int llog_handle_connect(struct ptlrpc_request *req); + +/* recov_thread.c */ +int llog_obd_repl_cancel(struct llog_ctxt *ctxt, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags); +int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp); +int llog_repl_connect(struct llog_ctxt *ctxt, int count, + struct llog_logid *logid, struct llog_gen *gen, + struct obd_uuid *uuid); + +struct llog_operations { + int (*lop_write_rec)(struct llog_handle *loghandle, + struct llog_rec_hdr *rec, + struct llog_cookie *logcookies, int numcookies, + void *, int idx); + int (*lop_destroy)(struct llog_handle *handle); + int (*lop_next_block)(struct llog_handle *h, int *curr_idx, + int next_idx, __u64 *offset, void *buf, int len); + int (*lop_prev_block)(struct llog_handle *h, + int prev_idx, void *buf, int len); + int (*lop_create)(struct llog_ctxt *ctxt, struct llog_handle **, + struct llog_logid *logid, char *name); + int (*lop_close)(struct llog_handle *handle); + int (*lop_read_header)(struct llog_handle *handle); + + int (*lop_setup)(struct obd_device *obd, int ctxt_idx, + struct obd_device *disk_obd, int count, + struct llog_logid *logid); + int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp); + int (*lop_cleanup)(struct llog_ctxt *ctxt); + int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec, + struct lov_stripe_md *lsm, + struct llog_cookie *logcookies, int numcookies); + int (*lop_cancel)(struct llog_ctxt *ctxt, struct lov_stripe_md *lsm, + int count, struct llog_cookie *cookies, int flags); + int (*lop_connect)(struct llog_ctxt *ctxt, int count, + struct llog_logid *logid, struct llog_gen *gen, + struct obd_uuid *uuid); + /* XXX add 2 more: commit callbacks and llog recovery functions */ +}; + +/* llog_lvfs.c */ +extern struct llog_operations llog_lvfs_ops; +int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, + char *name, int count, struct llog_catid *idarray); + +struct llog_ctxt { + int loc_idx; /* my index the obd array of ctxt's */ + struct llog_gen loc_gen; + struct obd_device *loc_obd; /* points back to the containing obd*/ + struct obd_export *loc_exp; /* parent "disk" export (e.g. MDS) */ + struct obd_import *loc_imp; /* to use in RPC's: can be backward + pointing import */ + struct llog_operations *loc_logops; + struct llog_handle *loc_handle; + struct llog_canceld_ctxt *loc_llcd; + struct semaphore loc_sem; /* protects loc_llcd and loc_imp */ + void *llog_proc_cb; +}; + +static inline void llog_gen_init(struct llog_ctxt *ctxt) +{ + struct obd_device *obd = ctxt->loc_exp->exp_obd; + + if (!strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME)) + ctxt->loc_gen.mnt_cnt = obd->u.mds.mds_mount_count; + else if (!strstr(obd->obd_type->typ_name, LUSTRE_OST_NAME)) + ctxt->loc_gen.mnt_cnt = obd->u.filter.fo_mount_count; + else + ctxt->loc_gen.mnt_cnt = 0; +} + +static inline int llog_gen_lt(struct llog_gen a, struct llog_gen b) +{ + if (a.mnt_cnt < b.mnt_cnt) + return 1; + if (a.mnt_cnt > b.mnt_cnt) + return 0; + return(a.conn_cnt < b.conn_cnt ? 1 : 0); +} + +#define LLOG_GEN_INC(gen) ((gen).conn_cnt ++) +#define LLOG_PROC_BREAK 0x0001 +#define LLOG_DEL_RECORD 0x0002 + +static inline int llog_obd2ops(struct llog_ctxt *ctxt, + struct llog_operations **lop) +{ + if (ctxt == NULL) + return -ENOTCONN; + + *lop = ctxt->loc_logops; + if (*lop == NULL) + return -EOPNOTSUPP; + + return 0; +} + +static inline int llog_handle2ops(struct llog_handle *loghandle, + struct llog_operations **lop) +{ + if (loghandle == NULL) + return -EINVAL; + + return llog_obd2ops(loghandle->lgh_ctxt, lop); +} + +static inline int llog_data_len(int len) +{ + return size_round(len); +} + +static inline struct llog_ctxt *llog_get_context(struct obd_device *obd, + int index) +{ + if (index < 0 || index >= LLOG_MAX_CTXTS) + return NULL; + + return obd->obd_llog_ctxt[index]; +} + +static inline int llog_write_rec(struct llog_handle *handle, + struct llog_rec_hdr *rec, + struct llog_cookie *logcookies, + int numcookies, void *buf, int idx) +{ + struct llog_operations *lop; + int rc, buflen; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_write_rec == NULL) + RETURN(-EOPNOTSUPP); + + if (buf) + buflen = rec->lrh_len + sizeof(struct llog_rec_hdr) + + sizeof(struct llog_rec_tail); + else + buflen = rec->lrh_len; + LASSERT(size_round(buflen) == buflen); + + rc = lop->lop_write_rec(handle, rec, logcookies, numcookies, buf, idx); + RETURN(rc); +} + +static inline int llog_read_header(struct llog_handle *handle) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_read_header == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_read_header(handle); + RETURN(rc); +} + +static inline int llog_destroy(struct llog_handle *handle) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(handle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_destroy == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_destroy(handle); + RETURN(rc); +} + +#if 0 +static inline int llog_cancel(struct obd_export *exp, + struct lov_stripe_md *lsm, int count, + struct llog_cookie *cookies, int flags) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(loghandle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_cancel == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_cancel(exp, lsm, count, cookies, flags); + RETURN(rc); +} +#endif + +static inline int llog_next_block(struct llog_handle *loghandle, int *cur_idx, + int next_idx, __u64 *cur_offset, void *buf, + int len) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(loghandle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_next_block == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_next_block(loghandle, cur_idx, next_idx, cur_offset, buf, + len); + RETURN(rc); +} + +static inline int llog_prev_block(struct llog_handle *loghandle, + int prev_idx, void *buf, int len) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_handle2ops(loghandle, &lop); + if (rc) + RETURN(rc); + if (lop->lop_prev_block == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_prev_block(loghandle, prev_idx, buf, len); + RETURN(rc); +} + +static inline int llog_create(struct llog_ctxt *ctxt, struct llog_handle **res, + struct llog_logid *logid, char *name) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_obd2ops(ctxt, &lop); + if (rc) + RETURN(rc); + if (lop->lop_create == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_create(ctxt, res, logid, name); + RETURN(rc); +} + +static inline int llog_connect(struct llog_ctxt *ctxt, int count, + struct llog_logid *logid, struct llog_gen *gen, + struct obd_uuid *uuid) +{ + struct llog_operations *lop; + int rc; + ENTRY; + + rc = llog_obd2ops(ctxt, &lop); + if (rc) + RETURN(rc); + if (lop->lop_connect == NULL) + RETURN(-EOPNOTSUPP); + + rc = lop->lop_connect(ctxt, count, logid, gen, uuid); + RETURN(rc); +} + +#endif diff --git a/lustre/include/linux/lustre_mdc.h b/lustre/include/lustre_mdc.h similarity index 82% rename from lustre/include/linux/lustre_mdc.h rename to lustre/include/lustre_mdc.h index b2787c0..c1dfef3 100644 --- a/lustre/include/linux/lustre_mdc.h +++ b/lustre/include/lustre_mdc.h @@ -17,13 +17,13 @@ # include # endif #endif -#include +#include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include struct ptlrpc_client; struct obd_export; diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h new file mode 100644 index 0000000..40795f1 --- /dev/null +++ b/lustre/include/lustre_mds.h @@ -0,0 +1,96 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * This file is part of Lustre, http://www.lustre.org + * + * MDS data structures. + * See also lustre_idl.h for wire formats of requests. + */ + +#ifndef _LUSTRE_MDS_H +#define _LUSTRE_MDS_H + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +struct ldlm_lock_desc; +struct mds_obd; +struct ptlrpc_connection; +struct ptlrpc_client; +struct obd_export; +struct ptlrpc_request; +struct obd_device; +struct ll_file_data; + +struct mds_update_record { + __u32 ur_opcode; + struct ll_fid *ur_fid1; + struct ll_fid *ur_fid2; + int ur_namelen; + char *ur_name; + int ur_tgtlen; + char *ur_tgt; + int ur_eadatalen; + void *ur_eadata; + int ur_cookielen; + struct llog_cookie *ur_logcookies; + struct iattr ur_iattr; + struct lvfs_ucred ur_uc; + __u64 ur_rdev; + __u64 ur_time; + __u32 ur_mode; + __u32 ur_flags; + struct lvfs_grp_hash_entry *ur_grp_entry; +}; + +/* file data for open files on MDS */ +struct mds_file_data { + struct portals_handle mfd_handle; /* must be first */ + atomic_t mfd_refcount; + struct list_head mfd_list; /* protected by med_open_lock */ + __u64 mfd_xid; + int mfd_mode; + struct dentry *mfd_dentry; +}; + +/* ACL */ +#ifdef CONFIG_FS_POSIX_ACL +#define LUSTRE_POSIX_ACL_MAX_ENTRIES (32) +#define LUSTRE_POSIX_ACL_MAX_SIZE \ + (xattr_acl_size(LUSTRE_POSIX_ACL_MAX_ENTRIES)) +#else +#define LUSTRE_POSIX_ACL_MAX_SIZE 0 +#endif + +/* mds/mds_reint.c */ +int mds_reint_rec(struct mds_update_record *r, int offset, + struct ptlrpc_request *req, struct lustre_handle *); + +/* ioctls for trying requests */ +#define IOC_REQUEST_TYPE 'f' +#define IOC_REQUEST_MIN_NR 30 + +#define IOC_REQUEST_GETATTR _IOWR('f', 30, long) +#define IOC_REQUEST_READPAGE _IOWR('f', 31, long) +#define IOC_REQUEST_SETATTR _IOWR('f', 32, long) +#define IOC_REQUEST_CREATE _IOWR('f', 33, long) +#define IOC_REQUEST_OPEN _IOWR('f', 34, long) +#define IOC_REQUEST_CLOSE _IOWR('f', 35, long) +#define IOC_REQUEST_MAX_NR 35 + +#endif diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h new file mode 100644 index 0000000..217f0c4 --- /dev/null +++ b/lustre/include/lustre_net.h @@ -0,0 +1,856 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _LUSTRE_NET_H +#define _LUSTRE_NET_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include +// #include +#include +#include +#include +#include +#include + +/* MD flags we _always_ use */ +#define PTLRPC_MD_OPTIONS 0 + +/* Define maxima for bulk I/O + * CAVEAT EMPTOR, with multinet (i.e. routers forwarding between networks) + * these limits are system wide and not interface-local. */ +#define PTLRPC_MAX_BRW_SIZE LNET_MTU +#define PTLRPC_MAX_BRW_PAGES (PTLRPC_MAX_BRW_SIZE/CFS_PAGE_SIZE) + +/* When PAGE_SIZE is a constant, we can check our arithmetic here with cpp! */ +#ifdef __KERNEL__ +# if ((PTLRPC_MAX_BRW_PAGES & (PTLRPC_MAX_BRW_PAGES - 1)) != 0) +# error "PTLRPC_MAX_BRW_PAGES isn't a power of two" +# endif +# if (PTLRPC_MAX_BRW_SIZE != (PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE)) +# error "PTLRPC_MAX_BRW_SIZE isn't PTLRPC_MAX_BRW_PAGES * CFS_PAGE_SIZE" +# endif +# if (PTLRPC_MAX_BRW_SIZE > LNET_MTU) +# error "PTLRPC_MAX_BRW_SIZE too big" +# endif +# if (PTLRPC_MAX_BRW_PAGES > LNET_MAX_IOV) +# error "PTLRPC_MAX_BRW_PAGES too big" +# endif +#endif /* __KERNEL__ */ + +/* Size over which to OBD_VMALLOC() rather than OBD_ALLOC() service request + * buffers */ +#define SVC_BUF_VMALLOC_THRESHOLD (2 * PAGE_SIZE) + +/* The following constants determine how memory is used to buffer incoming + * service requests. + * + * ?_NBUFS # buffers to allocate when growing the pool + * ?_BUFSIZE # bytes in a single request buffer + * ?_MAXREQSIZE # maximum request service will receive + * + * When fewer than ?_NBUFS/2 buffers are posted for receive, another chunk + * of ?_NBUFS is added to the pool. + * + * Messages larger than ?_MAXREQSIZE are dropped. Request buffers are + * considered full when less than ?_MAXREQSIZE is left in them. + */ + +#define LDLM_NUM_THREADS min((int)(smp_num_cpus * smp_num_cpus * 8), 64) +#define LDLM_NBUFS (64 * smp_num_cpus) +#define LDLM_BUFSIZE (8 * 1024) +#define LDLM_MAXREQSIZE (5 * 1024) +#define LDLM_MAXREPSIZE (1024) + +#define MDT_MIN_THREADS 2UL +#define MDT_MAX_THREADS 32UL +#define MDT_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ + num_physpages >> (25 - PAGE_SHIFT)), 2UL) +#define FLD_NUM_THREADS max(min_t(unsigned long, MDT_MAX_THREADS, \ + num_physpages >> (25 - PAGE_SHIFT)), 2UL) + +#define MDS_MAX_THREADS 512UL +#define MDS_DEF_THREADS max(2UL, min_t(unsigned long, 32, \ + num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT))) +#define MDS_NBUFS (64 * smp_num_cpus) +#define MDS_BUFSIZE (8 * 1024) +/* Assume file name length = FNAME_MAX = 256 (true for ext3). + * path name length = PATH_MAX = 4096 + * LOV MD size max = EA_MAX = 4000 + * symlink: FNAME_MAX + PATH_MAX <- largest + * link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create) + * rename: FNAME_MAX + FNAME_MAX + * open: FNAME_MAX + EA_MAX + * + * MDS_MAXREQSIZE ~= 4736 bytes = + * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX + * MDS_MAXREPSIZE ~= 8300 bytes = lustre_msg + llog_header + * or, for mds_close() and mds_reint_unlink() on a many-OST filesystem: + * = 9210 bytes = lustre_msg + mds_body + 160 * (easize + cookiesize) + * + * Realistic size is about 512 bytes (20 character name + 128 char symlink), + * except in the open case where there are a large number of OSTs in a LOV. + */ +#define MDS_MAXREQSIZE (5 * 1024) +#define MDS_MAXREPSIZE max(9 * 1024, 280 + LOV_MAX_STRIPE_COUNT * 56) + +/* FIXME fix all constants here. Andreas suggests dyamically adding threads. */ +#define MGS_MAX_THREADS 8UL +#define MGS_NUM_THREADS max(2UL, min_t(unsigned long, MGS_MAX_THREADS, \ + num_physpages * smp_num_cpus >> (26 - PAGE_SHIFT))) + +#define MGS_NBUFS (64 * smp_num_cpus) +#define MGS_BUFSIZE (8 * 1024) +#define MGS_MAXREQSIZE (5 * 1024) +#define MGS_MAXREPSIZE (9 * 1024) + +#define OST_MAX_THREADS 512UL +#define OST_DEF_THREADS max_t(unsigned long, 2, \ + (num_physpages >> (26-PAGE_SHIFT)) * smp_num_cpus) +#define OST_NBUFS (64 * smp_num_cpus) +#define OST_BUFSIZE (8 * 1024) +/* OST_MAXREQSIZE ~= 4768 bytes = + * lustre_msg + obdo + 16 * obd_ioobj + 256 * niobuf_remote + * + * - single object with 16 pages is 512 bytes + * - OST_MAXREQSIZE must be at least 1 page of cookies plus some spillover + */ +#define OST_MAXREQSIZE (5 * 1024) +#define OST_MAXREPSIZE (9 * 1024) + +struct ptlrpc_connection { + struct list_head c_link; + lnet_nid_t c_self; + lnet_process_id_t c_peer; + struct obd_uuid c_remote_uuid; + atomic_t c_refcount; +}; + +struct ptlrpc_client { + __u32 cli_request_portal; + __u32 cli_reply_portal; + char *cli_name; +}; + +/* state flags of requests */ +/* XXX only ones left are those used by the bulk descs as well! */ +#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */ +#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */ + +#define REQ_MAX_ACK_LOCKS 8 + +#define SWAB_PARANOIA 1 +#if SWAB_PARANOIA +/* unpacking: assert idx not unpacked already */ +#define LASSERT_REQSWAB(rq, idx) \ +do { \ + LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8); \ + LASSERT (((rq)->rq_req_swab_mask & (1 << (idx))) == 0); \ + (rq)->rq_req_swab_mask |= (1 << (idx)); \ +} while (0) + +#define LASSERT_REPSWAB(rq, idx) \ +do { \ + LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8); \ + LASSERT (((rq)->rq_rep_swab_mask & (1 << (idx))) == 0); \ + (rq)->rq_rep_swab_mask |= (1 << (idx)); \ +} while (0) + +/* just looking: assert idx already unpacked */ +#define LASSERT_REQSWABBED(rq, idx) \ +LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8 && \ + ((rq)->rq_req_swab_mask & (1 << (idx))) != 0) + +#define LASSERT_REPSWABBED(rq, idx) \ +LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8 && \ + ((rq)->rq_rep_swab_mask & (1 << (idx))) != 0) +#else +#define LASSERT_REQSWAB(rq, idx) +#define LASSERT_REPSWAB(rq, idx) +#define LASSERT_REQSWABBED(rq, idx) +#define LASSERT_REPSWABBED(rq, idx) +#endif + +union ptlrpc_async_args { + /* Scratchpad for passing args to completion interpreter. Users + * cast to the struct of their choosing, and LASSERT that this is + * big enough. For _tons_ of context, OBD_ALLOC a struct and store + * a pointer to it here. The pointer_arg ensures this struct is at + * least big enough for that. */ + void *pointer_arg[9]; + __u64 space[4]; +}; + +struct ptlrpc_request_set; +typedef int (*set_interpreter_func)(struct ptlrpc_request_set *, void *, int); + +struct ptlrpc_request_set { + int set_remaining; /* # uncompleted requests */ + cfs_waitq_t set_waitq; + cfs_waitq_t *set_wakeup_ptr; + struct list_head set_requests; + set_interpreter_func set_interpret; /* completion callback */ + void *set_arg; /* completion context */ + /* locked so that any old caller can communicate requests to + * the set holder who can then fold them into the lock-free set */ + spinlock_t set_new_req_lock; + struct list_head set_new_requests; +}; + +struct ptlrpc_bulk_desc; + +/* + * ptlrpc callback & work item stuff + */ +struct ptlrpc_cb_id { + void (*cbid_fn)(lnet_event_t *ev); /* specific callback fn */ + void *cbid_arg; /* additional arg */ +}; + +#define RS_MAX_LOCKS 4 +#define RS_DEBUG 1 + +struct ptlrpc_reply_state { + struct ptlrpc_cb_id rs_cb_id; + struct list_head rs_list; + struct list_head rs_exp_list; + struct list_head rs_obd_list; +#if RS_DEBUG + struct list_head rs_debug_list; +#endif + /* updates to following flag serialised by srv_request_lock */ + unsigned int rs_difficult:1; /* ACK/commit stuff */ + unsigned int rs_scheduled:1; /* being handled? */ + unsigned int rs_scheduled_ever:1;/* any schedule attempts? */ + unsigned int rs_handled:1; /* been handled yet? */ + unsigned int rs_on_net:1; /* reply_out_callback pending? */ + unsigned int rs_prealloc:1; /* rs from prealloc list */ + + int rs_size; + __u64 rs_transno; + __u64 rs_xid; + struct obd_export *rs_export; + struct ptlrpc_service *rs_service; + lnet_handle_md_t rs_md_h; + atomic_t rs_refcount; + + /* locks awaiting client reply ACK */ + int rs_nlocks; + struct lustre_handle rs_locks[RS_MAX_LOCKS]; + ldlm_mode_t rs_modes[RS_MAX_LOCKS]; + /* last member: variable sized reply message */ + struct lustre_msg rs_msg; +}; + +struct ptlrpc_thread; + +enum rq_phase { + RQ_PHASE_NEW = 0xebc0de00, + RQ_PHASE_RPC = 0xebc0de01, + RQ_PHASE_BULK = 0xebc0de02, + RQ_PHASE_INTERPRET = 0xebc0de03, + RQ_PHASE_COMPLETE = 0xebc0de04, +}; + +struct ptlrpc_request_pool { + spinlock_t prp_lock; + struct list_head prp_req_list; /* list of ptlrpc_request structs */ + int prp_rq_size; + void (*prp_populate)(struct ptlrpc_request_pool *, int); +}; + +struct ptlrpc_request { + int rq_type; /* one of PTL_RPC_MSG_* */ + struct list_head rq_list; + struct list_head rq_history_list; /* server-side history */ + __u64 rq_history_seq; /* history sequence # */ + int rq_status; + spinlock_t rq_lock; + /* client-side flags */ + unsigned int rq_intr:1, rq_replied:1, rq_err:1, + rq_timedout:1, rq_resend:1, rq_restart:1, + /* + * when ->rq_replay is set, request is kept by the client even + * after server commits corresponding transaction. This is + * used for operations that require sequence of multiple + * requests to be replayed. The only example currently is file + * open/close. When last request in such a sequence is + * committed, ->rq_replay is cleared on all requests in the + * sequence. + */ + rq_replay:1, + rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, + rq_no_delay:1, rq_net_err:1; + enum rq_phase rq_phase; /* one of RQ_PHASE_* */ + atomic_t rq_refcount; /* client-side refcount for SENT race */ + + struct ptlrpc_thread *rq_svc_thread; /* initial thread servicing req */ + + int rq_request_portal; /* XXX FIXME bug 249 */ + int rq_reply_portal; /* XXX FIXME bug 249 */ + + int rq_nob_received; /* client-side # reply bytes actually received */ + + int rq_reqlen; + struct lustre_msg *rq_reqmsg; + + int rq_timeout; /* time to wait for reply (seconds) */ + int rq_replen; + struct lustre_msg *rq_repmsg; + __u64 rq_transno; + __u64 rq_xid; + struct list_head rq_replay_list; + +#if SWAB_PARANOIA + __u32 rq_req_swab_mask; + __u32 rq_rep_swab_mask; +#endif + + int rq_import_generation; + enum lustre_imp_state rq_send_state; + + /* client+server request */ + lnet_handle_md_t rq_req_md_h; + struct ptlrpc_cb_id rq_req_cbid; + + /* server-side... */ + struct timeval rq_arrival_time; /* request arrival time */ + struct ptlrpc_reply_state *rq_reply_state; /* separated reply state */ + struct ptlrpc_request_buffer_desc *rq_rqbd; /* incoming request buffer*/ +#if CRAY_XT3 + __u32 rq_uid; /* peer uid, used in MDS only */ +#endif + + /* client-only incoming reply */ + lnet_handle_md_t rq_reply_md_h; + cfs_waitq_t rq_reply_waitq; + struct ptlrpc_cb_id rq_reply_cbid; + + lnet_nid_t rq_self; + lnet_process_id_t rq_peer; + struct obd_export *rq_export; + struct obd_import *rq_import; + + void (*rq_replay_cb)(struct ptlrpc_request *); + void (*rq_commit_cb)(struct ptlrpc_request *); + void *rq_cb_data; + + struct ptlrpc_bulk_desc *rq_bulk; /* client side bulk */ + time_t rq_sent; /* when request sent, seconds */ + + /* Multi-rpc bits */ + struct list_head rq_set_chain; + struct ptlrpc_request_set *rq_set; + void *rq_interpret_reply; /* Async completion handler */ + union ptlrpc_async_args rq_async_args; /* Async completion context */ + void *rq_ptlrpcd_data; + struct ptlrpc_request_pool *rq_pool; /* Pool if request from + preallocated list */ +}; + +static inline const char * +ptlrpc_rqphase2str(const struct ptlrpc_request *req) +{ + switch (req->rq_phase) { + case RQ_PHASE_NEW: + return "New"; + case RQ_PHASE_RPC: + return "Rpc"; + case RQ_PHASE_BULK: + return "Bulk"; + case RQ_PHASE_INTERPRET: + return "Interpret"; + case RQ_PHASE_COMPLETE: + return "Complete"; + default: + return "?Phase?"; + } +} + +/* Spare the preprocessor, spoil the bugs. */ +#define FLAG(field, str) (field ? str : "") + +#define DEBUG_REQ_FLAGS(req) \ + ptlrpc_rqphase2str(req), \ + FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ + FLAG(req->rq_err, "E"), \ + FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ + FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ + FLAG(req->rq_no_resend, "N"), \ + FLAG(req->rq_waiting, "W") + +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s" + +#define __DEBUG_REQ(CDEB_TYPE, level, req, fmt, args...) \ +CDEB_TYPE(level, "@@@ " fmt \ + " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " \ + REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid, \ + req->rq_transno, \ + req->rq_reqmsg ? req->rq_reqmsg->opc : -1, \ + req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "", \ + req->rq_import ? \ + (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "", \ + (req->rq_import && req->rq_import->imp_client) ? \ + req->rq_import->imp_client->cli_request_portal : -1, \ + req->rq_reqlen, req->rq_replen, \ + atomic_read(&req->rq_refcount), \ + DEBUG_REQ_FLAGS(req), \ + req->rq_reqmsg ? req->rq_reqmsg->flags : 0, \ + req->rq_repmsg ? req->rq_repmsg->flags : 0, \ + req->rq_status, req->rq_repmsg ? req->rq_repmsg->status : 0) + +/* for most callers (level is a constant) this is resolved at compile time */ +#define DEBUG_REQ(level, req, fmt, args...) \ +do { \ + if ((level) & (D_ERROR | D_WARNING)) \ + __DEBUG_REQ(CDEBUG_LIMIT, level, req, fmt, ## args); \ + else \ + __DEBUG_REQ(CDEBUG, level, req, fmt, ## args); \ +} while (0) + +#define DEBUG_REQ_EX(level, req, fmt, args...) \ +do { \ + if ((level) & (D_ERROR | D_WARNING)) \ + __DEBUG_REQ(CDEBUG_LIMIT, D_ERROR, req, fmt, ## args); \ + else \ + __DEBUG_REQ(CDEBUG_EX, level, req, fmt, ## args); \ +} while (0) + +struct ptlrpc_bulk_page { + struct list_head bp_link; + int bp_buflen; + int bp_pageoffset; /* offset within a page */ + struct page *bp_page; +}; + +#define BULK_GET_SOURCE 0 +#define BULK_PUT_SINK 1 +#define BULK_GET_SINK 2 +#define BULK_PUT_SOURCE 3 + +struct ptlrpc_bulk_desc { + unsigned int bd_success:1; /* completed successfully */ + unsigned int bd_network_rw:1; /* accessible to the network */ + unsigned int bd_type:2; /* {put,get}{source,sink} */ + unsigned int bd_registered:1; /* client side */ + spinlock_t bd_lock; /* serialise with callback */ + int bd_import_generation; + struct obd_export *bd_export; + struct obd_import *bd_import; + __u32 bd_portal; + struct ptlrpc_request *bd_req; /* associated request */ + cfs_waitq_t bd_waitq; /* server side only WQ */ + int bd_iov_count; /* # entries in bd_iov */ + int bd_max_iov; /* allocated size of bd_iov */ + int bd_nob; /* # bytes covered */ + int bd_nob_transferred; /* # bytes GOT/PUT */ + + __u64 bd_last_xid; + + struct ptlrpc_cb_id bd_cbid; /* network callback info */ + lnet_handle_md_t bd_md_h; /* associated MD */ + +#if defined(__KERNEL__) + lnet_kiov_t bd_iov[0]; +#else + lnet_md_iovec_t bd_iov[0]; +#endif +}; + +struct lu_context; +struct ptlrpc_thread { + + struct list_head t_link; /* active threads for service, from svc->srv_threads */ + + void *t_data; /* thread-private data (preallocated memory) */ + __u32 t_flags; + + unsigned int t_id; /* service thread index, from ptlrpc_start_threads */ + cfs_waitq_t t_ctl_waitq; + struct lu_context *t_ctx; +}; + +struct ptlrpc_request_buffer_desc { + struct list_head rqbd_list; + struct list_head rqbd_reqs; + struct ptlrpc_service *rqbd_service; + lnet_handle_md_t rqbd_md_h; + int rqbd_refcount; + char *rqbd_buffer; + struct ptlrpc_cb_id rqbd_cbid; + struct ptlrpc_request rqbd_req; +}; + +typedef int (*svc_handler_t)(struct ptlrpc_request *req); +typedef void (*svcreq_printfn_t)(void *, struct ptlrpc_request *); + +struct ptlrpc_service { + struct list_head srv_list; /* chain thru all services */ + int srv_max_req_size; /* biggest request to receive */ + int srv_max_reply_size; /* biggest reply to send */ + int srv_buf_size; /* size of individual buffers */ + int srv_nbuf_per_group; /* # buffers to allocate in 1 group */ + int srv_nbufs; /* total # req buffer descs allocated */ + int srv_nthreads; /* # running threads */ + int srv_n_difficult_replies; /* # 'difficult' replies */ + int srv_n_active_reqs; /* # reqs being served */ + cfs_duration_t srv_rqbd_timeout; /* timeout before re-posting reqs, in tick */ + int srv_watchdog_timeout; /* soft watchdog timeout, in ms */ + int srv_num_threads; /* # threads to start/started */ + unsigned srv_cpu_affinity:1; /* bind threads to CPUs */ + + __u32 srv_req_portal; + __u32 srv_rep_portal; + + int srv_n_queued_reqs; /* # reqs waiting to be served */ + struct list_head srv_request_queue; /* reqs waiting for service */ + + struct list_head srv_request_history; /* request history */ + __u64 srv_request_seq; /* next request sequence # */ + __u64 srv_request_max_cull_seq; /* highest seq culled from history */ + svcreq_printfn_t srv_request_history_print_fn; /* service-specific print fn */ + + struct list_head srv_idle_rqbds; /* request buffers to be reposted */ + struct list_head srv_active_rqbds; /* req buffers receiving */ + struct list_head srv_history_rqbds; /* request buffer history */ + int srv_nrqbd_receiving; /* # posted request buffers */ + int srv_n_history_rqbds; /* # request buffers in history */ + int srv_max_history_rqbds; /* max # request buffers in history */ + + atomic_t srv_outstanding_replies; + struct list_head srv_active_replies; /* all the active replies */ + struct list_head srv_reply_queue; /* replies waiting for service */ + + cfs_waitq_t srv_waitq; /* all threads sleep on this. This + * wait-queue is signalled when new + * incoming request arrives and when + * difficult reply has to be handled. */ + + struct list_head srv_threads; + svc_handler_t srv_handler; + + char *srv_name; /* only statically allocated strings here; we don't clean them */ + + spinlock_t srv_lock; + + cfs_proc_dir_entry_t *srv_procroot; + struct lprocfs_stats *srv_stats; + + /* List of free reply_states */ + struct list_head srv_free_rs_list; + /* waitq to run, when adding stuff to srv_free_rs_list */ + cfs_waitq_t srv_free_rs_waitq; + + /* + * if non-NULL called during thread creation (ptlrpc_start_thread()) + * to initialize service specific per-thread state. + */ + int (*srv_init)(struct ptlrpc_thread *thread); + /* + * if non-NULL called during thread shutdown (ptlrpc_main()) to + * destruct state created by ->srv_init(). + */ + void (*srv_done)(struct ptlrpc_thread *thread); + + //struct ptlrpc_srv_ni srv_interfaces[0]; +}; + +/* ptlrpc/events.c */ +extern lnet_handle_eq_t ptlrpc_eq_h; +extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, + lnet_process_id_t *peer, lnet_nid_t *self); +extern void request_out_callback (lnet_event_t *ev); +extern void reply_in_callback(lnet_event_t *ev); +extern void client_bulk_callback (lnet_event_t *ev); +extern void request_in_callback(lnet_event_t *ev); +extern void reply_out_callback(lnet_event_t *ev); +extern void server_bulk_callback (lnet_event_t *ev); + +/* ptlrpc/connection.c */ +void ptlrpc_dump_connections(void); +void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *); +struct ptlrpc_connection *ptlrpc_get_connection(lnet_process_id_t peer, + lnet_nid_t self, struct obd_uuid *uuid); +int ptlrpc_put_connection(struct ptlrpc_connection *c); +struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); +void ptlrpc_init_connection(void); +void ptlrpc_cleanup_connection(void); +extern lnet_pid_t ptl_get_pid(void); + +/* ptlrpc/niobuf.c */ +int ptlrpc_start_bulk_transfer(struct ptlrpc_bulk_desc *desc); +void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc); +int ptlrpc_register_bulk(struct ptlrpc_request *req); +void ptlrpc_unregister_bulk (struct ptlrpc_request *req); + +static inline int ptlrpc_bulk_active (struct ptlrpc_bulk_desc *desc) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave (&desc->bd_lock, flags); + rc = desc->bd_network_rw; + spin_unlock_irqrestore (&desc->bd_lock, flags); + return (rc); +} + +int ptlrpc_send_reply(struct ptlrpc_request *req, int); +int ptlrpc_reply(struct ptlrpc_request *req); +int ptlrpc_error(struct ptlrpc_request *req); +void ptlrpc_resend_req(struct ptlrpc_request *request); +int ptl_send_rpc(struct ptlrpc_request *request, int noreply); +int ptlrpc_register_rqbd (struct ptlrpc_request_buffer_desc *rqbd); + +/* ptlrpc/client.c */ +void ptlrpc_init_client(int req_portal, int rep_portal, char *name, + struct ptlrpc_client *); +void ptlrpc_cleanup_client(struct obd_import *imp); +struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); + +static inline int +ptlrpc_client_receiving_reply (struct ptlrpc_request *req) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&req->rq_lock, flags); + rc = req->rq_receiving_reply; + spin_unlock_irqrestore(&req->rq_lock, flags); + return (rc); +} + +static inline int +ptlrpc_client_replied (struct ptlrpc_request *req) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&req->rq_lock, flags); + rc = req->rq_replied; + spin_unlock_irqrestore(&req->rq_lock, flags); + return (rc); +} + +static inline void +ptlrpc_wake_client_req (struct ptlrpc_request *req) +{ + if (req->rq_set == NULL) + cfs_waitq_signal(&req->rq_reply_waitq); + else + cfs_waitq_signal(&req->rq_set->set_waitq); +} + +int ptlrpc_queue_wait(struct ptlrpc_request *req); +int ptlrpc_replay_req(struct ptlrpc_request *req); +void ptlrpc_unregister_reply(struct ptlrpc_request *req); +void ptlrpc_restart_req(struct ptlrpc_request *req); +void ptlrpc_abort_inflight(struct obd_import *imp); + +struct ptlrpc_request_set *ptlrpc_prep_set(void); +int ptlrpc_set_next_timeout(struct ptlrpc_request_set *); +int ptlrpc_check_set(struct ptlrpc_request_set *set); +int ptlrpc_set_wait(struct ptlrpc_request_set *); +int ptlrpc_expired_set(void *data); +void ptlrpc_interrupted_set(void *data); +void ptlrpc_mark_interrupted(struct ptlrpc_request *req); +void ptlrpc_set_destroy(struct ptlrpc_request_set *); +void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *); +void ptlrpc_set_add_new_req(struct ptlrpc_request_set *, + struct ptlrpc_request *); + +void ptlrpc_free_rq_pool(struct ptlrpc_request_pool *pool); +void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq); +struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int, int, + void (*populate_pool)(struct ptlrpc_request_pool *, int)); +struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, __u32 version, + int opcode, int count, + int *lengths, char **bufs); +struct ptlrpc_request *ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, + int opcode, int count, int *lengths, + char **bufs, + struct ptlrpc_request_pool *pool); +void ptlrpc_free_req(struct ptlrpc_request *request); +void ptlrpc_req_finished(struct ptlrpc_request *request); +void ptlrpc_req_finished_with_imp_lock(struct ptlrpc_request *request); +struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); +struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, + int npages, int type, int portal); +struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp(struct ptlrpc_request *req, + int npages, int type, int portal); +void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); +void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, + cfs_page_t *page, int pageoffset, int len); +void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, + struct obd_import *imp); +__u64 ptlrpc_next_xid(void); +__u64 ptlrpc_sample_next_xid(void); +__u64 ptlrpc_req_xid(struct ptlrpc_request *request); + +struct ptlrpc_service_conf { + int psc_nbufs; + int psc_bufsize; + int psc_max_req_size; + int psc_max_reply_size; + int psc_req_portal; + int psc_rep_portal; + int psc_watchdog_timeout; /* in ms */ + int psc_num_threads; +}; + +/* ptlrpc/service.c */ +void ptlrpc_save_lock (struct ptlrpc_request *req, + struct lustre_handle *lock, int mode); +void ptlrpc_commit_replies (struct obd_device *obd); +void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs); +struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c, + svc_handler_t h, char *name, + struct proc_dir_entry *proc_entry, + svcreq_printfn_t prntfn); + +struct ptlrpc_service *ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, + int max_reply_size, + int req_portal, int rep_portal, + int watchdog_timeout, /* in ms */ + svc_handler_t, char *name, + cfs_proc_dir_entry_t *proc_entry, + svcreq_printfn_t, int num_threads); +void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); + +int ptlrpc_start_threads(struct obd_device *dev, struct ptlrpc_service *svc, + char *base_name); +int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, + char *name, int id); +int ptlrpc_unregister_service(struct ptlrpc_service *service); +int liblustre_check_services (void *arg); +void ptlrpc_daemonize(char *name); +int ptlrpc_service_health_check(struct ptlrpc_service *); + + +struct ptlrpc_svc_data { + char *name; + struct ptlrpc_service *svc; + struct ptlrpc_thread *thread; + struct obd_device *dev; +}; + +/* ptlrpc/import.c */ +int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid); +int ptlrpc_init_import(struct obd_import *imp); +int ptlrpc_disconnect_import(struct obd_import *imp); +int ptlrpc_import_recovery_state_machine(struct obd_import *imp); + +/* ptlrpc/pack_generic.c */ +int lustre_msg_swabbed(struct lustre_msg *msg); +int lustre_msg_check_version(struct lustre_msg *msg, __u32 version); +int lustre_pack_request(struct ptlrpc_request *, int count, const int *lens, + char **bufs); +int lustre_pack_reply(struct ptlrpc_request *, int count, const int *lens, + char **bufs); +void lustre_shrink_reply(struct ptlrpc_request *req, + int segment, unsigned int newlen, int move_data); +void lustre_free_reply_state(struct ptlrpc_reply_state *rs); +int lustre_msg_size(int count, const int *lengths); +int lustre_unpack_msg(struct lustre_msg *m, int len); +void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); +int lustre_msg_buflen(struct lustre_msg *m, int n); +char *lustre_msg_string (struct lustre_msg *m, int n, int max_len); +void *lustre_swab_buf(struct lustre_msg *, int n, int minlen, void *swabber); +void *lustre_swab_reqbuf (struct ptlrpc_request *req, int n, int minlen, + void *swabber); +void *lustre_swab_repbuf (struct ptlrpc_request *req, int n, int minlen, + void *swabber); + +static inline void +ptlrpc_rs_addref(struct ptlrpc_reply_state *rs) +{ + LASSERT(atomic_read(&rs->rs_refcount) > 0); + atomic_inc(&rs->rs_refcount); +} + +static inline void +ptlrpc_rs_decref(struct ptlrpc_reply_state *rs) +{ + LASSERT(atomic_read(&rs->rs_refcount) > 0); + if (atomic_dec_and_test(&rs->rs_refcount)) + lustre_free_reply_state(rs); +} + +/* ldlm/ldlm_lib.c */ +int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg); +int client_obd_cleanup(struct obd_device *obddev); +int client_connect_import(struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid, struct obd_connect_data *); +int client_disconnect_export(struct obd_export *exp); +int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, + int priority); +int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); +int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); + +/* ptlrpc/pinger.c */ +int ptlrpc_pinger_add_import(struct obd_import *imp); +int ptlrpc_pinger_del_import(struct obd_import *imp); +#ifdef __KERNEL__ +void ping_evictor_start(void); +void ping_evictor_stop(void); +#else +#define ping_evictor_start() do {} while (0) +#define ping_evictor_stop() do {} while (0) +#endif + +/* ptlrpc/ptlrpcd.c */ +void ptlrpcd_wake(struct ptlrpc_request *req); +void ptlrpcd_add_req(struct ptlrpc_request *req); +int ptlrpcd_addref(void); +void ptlrpcd_decref(void); + +/* ptlrpc/lproc_ptlrpc.c */ +#ifdef LPROCFS +void ptlrpc_lprocfs_register_obd(struct obd_device *obd); +void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd); +#else +static inline void ptlrpc_lprocfs_register_obd(struct obd_device *obd) {} +static inline void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) {} +#endif + +/* ptlrpc/llog_server.c */ +int llog_origin_handle_create(struct ptlrpc_request *req); +int llog_origin_handle_destroy(struct ptlrpc_request *req); +int llog_origin_handle_prev_block(struct ptlrpc_request *req); +int llog_origin_handle_next_block(struct ptlrpc_request *req); +int llog_origin_handle_read_header(struct ptlrpc_request *req); +int llog_origin_handle_close(struct ptlrpc_request *req); +int llog_origin_handle_cancel(struct ptlrpc_request *req); +int llog_catinfo(struct ptlrpc_request *req); + +/* ptlrpc/llog_client.c */ +extern struct llog_operations llog_client_ops; + +#endif diff --git a/lustre/include/linux/lustre_param.h b/lustre/include/lustre_param.h similarity index 100% rename from lustre/include/linux/lustre_param.h rename to lustre/include/lustre_param.h diff --git a/lustre/include/lustre_quota.h b/lustre/include/lustre_quota.h new file mode 100644 index 0000000..6516fb9 --- /dev/null +++ b/lustre/include/lustre_quota.h @@ -0,0 +1,399 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _LUSTRE_QUOTA_H +#define _LUSTRE_QUOTA_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include +#include +#include + +struct obd_device; +struct client_obd; + +#ifndef NR_DQHASH +#define NR_DQHASH 45 +#endif + +#ifdef HAVE_QUOTA_SUPPORT + +#ifdef __KERNEL__ + +/* structures to access admin quotafile */ +struct lustre_mem_dqinfo { + unsigned int dqi_bgrace; + unsigned int dqi_igrace; + unsigned long dqi_flags; + unsigned int dqi_blocks; + unsigned int dqi_free_blk; + unsigned int dqi_free_entry; +}; + +struct lustre_quota_info { + struct file *qi_files[MAXQUOTAS]; + struct lustre_mem_dqinfo qi_info[MAXQUOTAS]; +}; + +#define DQ_STATUS_AVAIL 0x0 /* Available dquot */ +#define DQ_STATUS_SET 0x01 /* Sombody is setting dquot */ +#define DQ_STATUS_RECOVERY 0x02 /* dquot is in recovery */ + +struct lustre_dquot { + /* Hash list in memory, protect by dquot_hash_lock */ + struct list_head dq_hash; + /* Protect the data in lustre_dquot */ + struct semaphore dq_sem; + /* Use count */ + int dq_refcnt; + /* Pointer of quota info it belongs to */ + struct lustre_quota_info *dq_info; + + loff_t dq_off; /* Offset of dquot on disk */ + unsigned int dq_id; /* ID this applies to (uid, gid) */ + int dq_type; /* Type fo quota (USRQUOTA, GRPQUOUTA) */ + unsigned short dq_status; /* See DQ_STATUS_ */ + unsigned long dq_flags; /* See DQ_ in quota.h */ + struct mem_dqblk dq_dqb; /* Diskquota usage */ +}; + +struct dquot_id { + struct list_head di_link; + __u32 di_id; +}; + +#define QFILE_CHK 1 +#define QFILE_RD_INFO 2 +#define QFILE_WR_INFO 3 +#define QFILE_INIT_INFO 4 +#define QFILE_RD_DQUOT 5 +#define QFILE_WR_DQUOT 6 + +/* admin quotafile operations */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) +int lustre_check_quota_file(struct lustre_quota_info *lqi, int type); +int lustre_read_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_write_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_read_dquot(struct lustre_dquot *dquot); +int lustre_commit_dquot(struct lustre_dquot *dquot); +int lustre_init_quota_info(struct lustre_quota_info *lqi, int type); +int lustre_get_qids(struct file *file, struct inode *inode, int type, + struct list_head *list); +#else + +#ifndef DQ_FAKE_B +#define DQ_FAKE_B 6 +#endif + +static inline int lustre_check_quota_file(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_read_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_write_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +static inline int lustre_read_dquot(struct lustre_dquot *dquot) +{ + return 0; +} +static inline int lustre_commit_dquot(struct lustre_dquot *dquot) +{ + return 0; +} +static inline int lustre_init_quota_info(struct lustre_quota_info *lqi, + int type) +{ + return 0; +} +#endif /* KERNEL_VERSION(2,5,0) */ + +#define LL_DQUOT_OFF(sb) DQUOT_OFF(sb) + +typedef int (*dqacq_handler_t) (struct obd_device * obd, struct qunit_data * qd, + int opc); +struct lustre_quota_ctxt { + struct super_block *lqc_sb; /* superblock this applies to */ + struct obd_import *lqc_import; /* import used to send dqacq/dqrel RPC */ + dqacq_handler_t lqc_handler; /* dqacq/dqrel RPC handler, only for quota master */ + unsigned long lqc_recovery:1; /* Doing recovery */ + unsigned long lqc_iunit_sz; /* Unit size of file quota */ + unsigned long lqc_itune_sz; /* Trigger dqacq when available file quota less than + * this value, trigger dqrel when available file quota + * more than this value + 1 iunit */ + unsigned long lqc_bunit_sz; /* Unit size of block quota */ + unsigned long lqc_btune_sz; /* See comment of lqc_itune_sz */ +}; + +#else + +struct lustre_quota_info { +}; + +struct lustre_quota_ctxt { +}; + +#endif /* !__KERNEL__ */ + +#else + +#define LL_DQUOT_OFF(sb) do {} while(0) + +struct lustre_quota_info { +}; + +struct lustre_quota_ctxt { +}; + +#endif /* !HAVE_QUOTA_SUPPORT */ + +/* If the (quota limit < qunit * slave count), the slave which can't + * acquire qunit should set it's local limit as MIN_QLIMIT */ +#define MIN_QLIMIT 1 + +struct quotacheck_thread_args { + struct obd_export *qta_exp; /* obd export */ + struct obd_quotactl qta_oqctl; /* obd_quotactl args */ + struct super_block *qta_sb; /* obd super block */ + atomic_t *qta_sem; /* obt_quotachecking */ +}; + +typedef struct { + int (*quota_init) (void); + int (*quota_exit) (void); + int (*quota_setup) (struct obd_device *, struct lustre_cfg *); + int (*quota_cleanup) (struct obd_device *); + /* For quota master, close admin quota files */ + int (*quota_fs_cleanup) (struct obd_device *); + int (*quota_ctl) (struct obd_export *, struct obd_quotactl *); + int (*quota_check) (struct obd_export *, struct obd_quotactl *); + int (*quota_recovery) (struct obd_device *); + + /* For quota master/slave, adjust quota limit after fs operation */ + int (*quota_adjust) (struct obd_device *, unsigned int[], + unsigned int[], int, int); + + /* For quota slave, set import, trigger quota recovery */ + int (*quota_setinfo) (struct obd_export *, struct obd_device *); + + /* For quota slave, set proper thread resoure capability */ + int (*quota_enforce) (struct obd_device *, unsigned int); + + /* For quota slave, check whether specified uid/gid is over quota */ + int (*quota_getflag) (struct obd_device *, struct obdo *); + + /* For quota slave, acquire/release quota from master if needed */ + int (*quota_acquire) (struct obd_device *, unsigned int, unsigned int); + + /* For quota client, poll if the quota check done */ + int (*quota_poll_check) (struct obd_export *, struct if_quotacheck *); + + /* For quota client, check whether specified uid/gid is over quota */ + int (*quota_chkdq) (struct client_obd *, unsigned int, unsigned int); + + /* For quota client, set over quota flag for specifed uid/gid */ + int (*quota_setdq) (struct client_obd *, unsigned int, unsigned int, + obd_flag, obd_flag); +} quota_interface_t; + +#define Q_COPY(out, in, member) (out)->member = (in)->member + +#define QUOTA_OP(interface, op) interface->quota_ ## op + +#define QUOTA_CHECK_OP(interface, op) \ +do { \ + if (!interface) \ + RETURN(0); \ + if (!QUOTA_OP(interface, op)) { \ + CERROR("no quota operation: " #op "\n"); \ + RETURN(-EOPNOTSUPP); \ + } \ +} while(0) + +static inline int lquota_init(quota_interface_t *interface) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, init); + rc = QUOTA_OP(interface, init)(); + RETURN(rc); +} + +static inline int lquota_exit(quota_interface_t *interface) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, exit); + rc = QUOTA_OP(interface, exit)(); + RETURN(rc); +} + +static inline int lquota_setup(quota_interface_t *interface, + struct obd_device *obd, + struct lustre_cfg *lcfg) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, setup); + rc = QUOTA_OP(interface, setup)(obd, lcfg); + RETURN(rc); +} + +static inline int lquota_cleanup(quota_interface_t *interface, + struct obd_device *obd) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, cleanup); + rc = QUOTA_OP(interface, cleanup)(obd); + RETURN(rc); +} + +static inline int lquota_fs_cleanup(quota_interface_t *interface, + struct obd_device *obd) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, fs_cleanup); + rc = QUOTA_OP(interface, fs_cleanup)(obd); + RETURN(rc); +} + +static inline int lquota_recovery(quota_interface_t *interface, + struct obd_device *obd) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, recovery); + rc = QUOTA_OP(interface, recovery)(obd); + RETURN(rc); +} + +static inline int lquota_adjust(quota_interface_t *interface, + struct obd_device *obd, + unsigned int qcids[], + unsigned int qpids[], + int rc, int opc) +{ + int ret; + ENTRY; + + QUOTA_CHECK_OP(interface, adjust); + ret = QUOTA_OP(interface, adjust)(obd, qcids, qpids, rc, opc); + RETURN(ret); +} + +static inline int lquota_chkdq(quota_interface_t *interface, + struct client_obd *cli, + unsigned int uid, unsigned int gid) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, chkdq); + rc = QUOTA_OP(interface, chkdq)(cli, uid, gid); + RETURN(rc); +} + +static inline int lquota_setdq(quota_interface_t *interface, + struct client_obd *cli, + unsigned int uid, unsigned int gid, + obd_flag valid, obd_flag flags) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, setdq); + rc = QUOTA_OP(interface, setdq)(cli, uid, gid, valid, flags); + RETURN(rc); +} + +static inline int lquota_poll_check(quota_interface_t *interface, + struct obd_export *exp, + struct if_quotacheck *qchk) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, poll_check); + rc = QUOTA_OP(interface, poll_check)(exp, qchk); + RETURN(rc); +} + + +static inline int lquota_setinfo(quota_interface_t *interface, + struct obd_export *exp, + struct obd_device *obd) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, setinfo); + rc = QUOTA_OP(interface, setinfo)(exp, obd); + RETURN(rc); +} + +static inline int lquota_enforce(quota_interface_t *interface, + struct obd_device *obd, + unsigned int ignore) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, enforce); + rc = QUOTA_OP(interface, enforce)(obd, ignore); + RETURN(rc); +} + +static inline int lquota_getflag(quota_interface_t *interface, + struct obd_device *obd, struct obdo *oa) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, getflag); + rc = QUOTA_OP(interface, getflag)(obd, oa); + RETURN(rc); +} + +static inline int lquota_acquire(quota_interface_t *interface, + struct obd_device *obd, + unsigned int uid, unsigned int gid) +{ + int rc; + ENTRY; + + QUOTA_CHECK_OP(interface, acquire); + rc = QUOTA_OP(interface, acquire)(obd, uid, gid); + RETURN(rc); +} + +#ifndef __KERNEL__ +extern quota_interface_t osc_quota_interface; +extern quota_interface_t mdc_quota_interface; +extern quota_interface_t lov_quota_interface; +#endif + +#endif /* _LUSTRE_QUOTA_H */ diff --git a/lustre/include/linux/lustre_req_layout.h b/lustre/include/lustre_req_layout.h similarity index 95% rename from lustre/include/linux/lustre_req_layout.h rename to lustre/include/lustre_req_layout.h index e0e5eeb..89660f4 100644 --- a/lustre/include/linux/lustre_req_layout.h +++ b/lustre/include/lustre_req_layout.h @@ -26,11 +26,11 @@ * license text for more details. */ -#ifndef _LINUX_LUSTRE_REQ_LAYOUT_H__ -#define _LINUX_LUSTRE_REQ_LAYOUT_H__ +#ifndef _LUSTRE_REQ_LAYOUT_H__ +#define _LUSTRE_REQ_LAYOUT_H__ /* struct ptlrpc_request, lustre_msg* */ -#include +#include struct req_msg_field; struct req_format; @@ -83,4 +83,4 @@ extern const struct req_msg_field RMF_NAME; extern const struct req_msg_field RMF_REC_CREATE; -#endif /* _LINUX_LUSTRE_REQ_LAYOUT_H__ */ +#endif /* _LUSTRE_REQ_LAYOUT_H__ */ diff --git a/lustre/include/linux/lustre_ucache.h b/lustre/include/lustre_ucache.h similarity index 91% rename from lustre/include/linux/lustre_ucache.h rename to lustre/include/lustre_ucache.h index db28cef..16b5c1a 100644 --- a/lustre/include/linux/lustre_ucache.h +++ b/lustre/include/lustre_ucache.h @@ -34,9 +34,9 @@ struct upcall_cache_entry { struct group_info *ue_group_info; atomic_t ue_refcount; int ue_flags; - wait_queue_head_t ue_waitq; - unsigned long ue_acquire_expire; - unsigned long ue_expire; + cfs_waitq_t ue_waitq; + cfs_time_t ue_acquire_expire; + cfs_time_t ue_expire; }; #define UC_CACHE_HASH_SIZE (128) @@ -49,8 +49,8 @@ struct upcall_cache { char uc_name[40]; /* for upcall */ char uc_upcall[UC_CACHE_UPCALL_MAXPATH]; - unsigned long uc_acquire_expire; /* jiffies */ - unsigned long uc_entry_expire; /* jiffies */ + cfs_time_t uc_acquire_expire; /* jiffies */ + cfs_time_t uc_entry_expire; /* jiffies */ }; struct upcall_cache_entry *upcall_cache_get_entry(struct upcall_cache *hash, diff --git a/lustre/include/linux/lustre_ver.h.in b/lustre/include/lustre_ver.h.in similarity index 96% rename from lustre/include/linux/lustre_ver.h.in rename to lustre/include/lustre_ver.h.in index 4abf818..5881c4f 100644 --- a/lustre/include/linux/lustre_ver.h.in +++ b/lustre/include/lustre_ver.h.in @@ -1,8 +1,6 @@ #ifndef _LUSTRE_VER_H_ #define _LUSTRE_VER_H_ -#include - #define LUSTRE_MAJOR @AC_LUSTRE_MAJOR@ #define LUSTRE_MINOR @AC_LUSTRE_MINOR@ #define LUSTRE_PATCH @AC_LUSTRE_PATCH@ diff --git a/lustre/include/lvfs.h b/lustre/include/lvfs.h new file mode 100644 index 0000000..42e8544 --- /dev/null +++ b/lustre/include/lvfs.h @@ -0,0 +1,61 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * lustre VFS/process permission interface + */ + +#ifndef __LVFS_H__ +#define __LVFS_H__ + +#define LL_FID_NAMELEN (16 + 1 + 8 + 1) + +#include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include + + +#ifdef LIBLUSTRE +#include +#endif + +/* lvfs_common.c */ +struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *, __u64, __u32, __u64 ,void *data); + +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred); +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred); + + +static inline int ll_fid2str(char *str, __u64 id, __u32 generation) +{ + return sprintf(str, "%llx:%08x", (unsigned long long)id, generation); +} + +#endif diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h new file mode 100644 index 0000000..6ca1d9e --- /dev/null +++ b/lustre/include/md_object.h @@ -0,0 +1,200 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Extention of lu_object.h for metadata objects + * + * Copyright (C) 2006 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _LUSTRE_MD_OBJECT_H +#define _LUSTRE_MD_OBJECT_H + +/* + * Sub-class of lu_object with methods common for "meta-data" objects in MDT + * stack. + * + * Meta-data objects implement namespace operations: you can link, unlink + * them, and treat them as directories. + * + * Examples: mdt, cmm, and mdt are implementations of md interface. + */ + + +/* + * super-class definitions. + */ +#include + +struct md_device; +struct md_device_operations; +struct md_object; + +/* + * Operations implemented for each md object (both directory and leaf). + */ +struct md_object_operations { + int (*moo_attr_get)(struct lu_context *ctxt, struct md_object *dt, + struct lu_attr *attr); + int (*moo_attr_set)(struct lu_context *ctxt, struct md_object *dt, + struct lu_attr *attr); + + int (*moo_xattr_get)(struct lu_context *ctxt, struct md_object *obj, + void *buf, int buf_len, const char *name); + + int (*moo_xattr_set)(struct lu_context *ctxt, struct md_object *obj, + void *buf, int buf_len, const char *name); + /* part of cross-ref operation */ + int (*moo_object_create)(struct lu_context *, + struct md_object *, struct lu_attr *); + int (*moo_ref_add)(struct lu_context *, struct md_object *); + int (*moo_ref_del)(struct lu_context *, struct md_object *); + int (*moo_open)(struct lu_context *, struct md_object *); + int (*moo_close)(struct lu_context *, struct md_object *); +}; + +/* + * Operations implemented for each directory object. + */ +struct md_dir_operations { + int (*mdo_lookup)(struct lu_context *, struct md_object *, + const char *, struct lu_fid *); + + int (*mdo_mkdir)(struct lu_context *, struct lu_attr *, + struct md_object *, const char *, + struct md_object *); + + int (*mdo_rename)(struct lu_context *ctxt, struct md_object *spobj, + struct md_object *tpobj, struct md_object *sobj, + const char *sname, struct md_object *tobj, + const char *tname); + + int (*mdo_link)(struct lu_context *ctxt, struct md_object *tobj, + struct md_object *sobj, const char *name); + + /* partial ops for cross-ref case */ + int (*mdo_name_insert)(struct lu_context *, struct md_object *, + const char *, const struct lu_fid *, + struct lu_attr *); + int (*mdo_name_remove)(struct lu_context *, struct md_object *, + const char *, struct lu_attr *); +}; + +struct md_device_operations { + /* method for getting/setting device wide back stored config data, like + * last used meta-sequence, etc. */ + int (*mdo_config) (struct lu_context *ctx, + struct md_device *m, const char *name, + void *buf, int size, int mode); + + /* meta-data device related handlers. */ + int (*mdo_root_get)(struct lu_context *ctx, + struct md_device *m, struct lu_fid *f); + int (*mdo_statfs)(struct lu_context *ctx, + struct md_device *m, struct kstatfs *sfs); + +}; + +struct md_device { + struct lu_device md_lu_dev; + struct md_device_operations *md_ops; +}; + +struct md_object { + struct lu_object mo_lu; + struct md_object_operations *mo_ops; + struct md_dir_operations *mo_dir_ops; +}; + +static inline int lu_device_is_md(const struct lu_device *d) +{ + return ergo(d != NULL, d->ld_type->ldt_tags & LU_DEVICE_MD); +} + +static inline struct md_device *lu2md_dev(const struct lu_device *d) +{ + LASSERT(lu_device_is_md(d)); + return container_of0(d, struct md_device, md_lu_dev); +} + +static inline struct lu_device *md2lu_dev(struct md_device *d) +{ + return &d->md_lu_dev; +} + +static inline struct md_object *lu2md(const struct lu_object *o) +{ + LASSERT(lu_device_is_md(o->lo_dev)); + return container_of0(o, struct md_object, mo_lu); +} + +static inline struct md_object *md_object_next(const struct md_object *obj) +{ + return lu2md(lu_object_next(&obj->mo_lu)); +} + +static inline struct md_device *md_device_get(const struct md_object *o) +{ + LASSERT(lu_device_is_md(o->mo_lu.lo_dev)); + return container_of0(o->mo_lu.lo_dev, struct md_device, md_lu_dev); +} + +static inline int md_device_init(struct md_device *md, struct lu_device_type *t) +{ + return lu_device_init(&md->md_lu_dev, t); +} + +static inline void md_device_fini(struct md_device *md) +{ + lu_device_fini(&md->md_lu_dev); +} + +/* md operations */ +static inline int mo_attr_get(struct lu_context *cx, struct md_object *m, + struct lu_attr *at) +{ + return m->mo_ops->moo_attr_get(cx, m, at); +} + +static inline int mo_object_create(struct lu_context *cx, struct md_object *m, + struct lu_attr *at) +{ + return m->mo_ops->moo_object_create(cx, m, at); +} + +static inline int mdo_lookup(struct lu_context *cx, struct md_object *p, + const char *name, struct lu_fid *f) +{ + return p->mo_dir_ops->mdo_lookup(cx, p, name, f); +} + +static inline int mdo_mkdir(struct lu_context *cx, struct lu_attr *at, + struct md_object *p, const char *name, + struct md_object *c) +{ + return p->mo_dir_ops->mdo_mkdir(cx, at, p, name, c); +} + +static inline int mdo_name_insert(struct lu_context *cx, struct md_object *p, + const char *name, const struct lu_fid *f, + struct lu_attr *at) +{ + return p->mo_dir_ops->mdo_name_insert(cx, p, name, f, at); +} + +#endif /* _LINUX_MD_OBJECT_H */ diff --git a/lustre/include/obd.h b/lustre/include/obd.h new file mode 100644 index 0000000..0592ab8 --- /dev/null +++ b/lustre/include/obd.h @@ -0,0 +1,1114 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ + +#ifndef __OBD_H +#define __OBD_H + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#define IOC_OSC_TYPE 'h' +#define IOC_OSC_MIN_NR 20 +#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *) +#define IOC_OSC_MAX_NR 50 + +#define IOC_MDC_TYPE 'i' +#define IOC_MDC_MIN_NR 20 +/* Moved to lustre_user.h +#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) +#define IOC_MDC_GETSTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */ +#define IOC_MDC_MAX_NR 50 + +#include +#include +#include +#include +#include + +/* this is really local to the OSC */ +struct loi_oap_pages { + struct list_head lop_pending; + int lop_num_pending; + struct list_head lop_urgent; + struct list_head lop_pending_group; +}; + +struct osc_async_rc { + int ar_rc; + int ar_force_sync; + int ar_min_xid; +}; + +struct lov_oinfo { /* per-stripe data structure */ + __u64 loi_id; /* object ID on the target OST */ + __u64 loi_gr; /* object group on the target OST */ + int loi_ost_idx; /* OST stripe index in lov_tgt_desc->tgts */ + int loi_ost_gen; /* generation of this loi_ost_idx */ + + /* used by the osc to keep track of what objects to build into rpcs */ + struct loi_oap_pages loi_read_lop; + struct loi_oap_pages loi_write_lop; + /* _cli_ is poorly named, it should be _ready_ */ + struct list_head loi_cli_item; + struct list_head loi_write_item; + struct list_head loi_read_item; + + unsigned loi_kms_valid:1; + __u64 loi_kms; /* known minimum size */ + struct ost_lvb loi_lvb; + struct osc_async_rc loi_ar; +}; + +static inline void loi_init(struct lov_oinfo *loi) +{ + CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending); + CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_urgent); + CFS_INIT_LIST_HEAD(&loi->loi_read_lop.lop_pending_group); + CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending); + CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_urgent); + CFS_INIT_LIST_HEAD(&loi->loi_write_lop.lop_pending_group); + CFS_INIT_LIST_HEAD(&loi->loi_cli_item); + CFS_INIT_LIST_HEAD(&loi->loi_write_item); + CFS_INIT_LIST_HEAD(&loi->loi_read_item); +} + +/*extent array item for describing the joined file extent info*/ +struct lov_extent { + __u64 le_start; /* extent start */ + __u64 le_len; /* extent length */ + int le_loi_idx; /* extent #1 loi's index in lsm loi array */ + int le_stripe_count; /* extent stripe count*/ +}; + +/*Lov array info for describing joined file array EA info*/ +struct lov_array_info { + struct llog_logid lai_array_id; /* MDS med llog object id */ + unsigned lai_ext_count; /* number of extent count */ + struct lov_extent *lai_ext_array; /* extent desc array */ +}; + +struct lov_stripe_md { + spinlock_t lsm_lock; + void *lsm_lock_owner; /* debugging */ + + struct { + /* Public members. */ + __u64 lw_object_id; /* lov object id */ + __u64 lw_object_gr; /* lov object group */ + __u64 lw_maxbytes; /* maximum possible file size */ + unsigned long lw_xfersize; /* optimal transfer size */ + + /* LOV-private members start here -- only for use in lov/. */ + __u32 lw_magic; + __u32 lw_stripe_size; /* size of the stripe */ + __u32 lw_pattern; /* striping pattern (RAID0, RAID1) */ + unsigned lw_stripe_count; /* number of objects being striped over */ + } lsm_wire; + + struct lov_array_info *lsm_array; /*Only for joined file array info*/ + struct lov_oinfo lsm_oinfo[0]; +}; + +#define lsm_object_id lsm_wire.lw_object_id +#define lsm_object_gr lsm_wire.lw_object_gr +#define lsm_maxbytes lsm_wire.lw_maxbytes +#define lsm_xfersize lsm_wire.lw_xfersize +#define lsm_magic lsm_wire.lw_magic +#define lsm_stripe_size lsm_wire.lw_stripe_size +#define lsm_pattern lsm_wire.lw_pattern +#define lsm_stripe_count lsm_wire.lw_stripe_count + +/* compare all relevant fields. */ +static inline int lov_stripe_md_cmp(struct lov_stripe_md *m1, + struct lov_stripe_md *m2) +{ + /* + * ->lsm_wire contains padding, but it should be zeroed out during + * allocation. + */ + return memcmp(&m1->lsm_wire, &m2->lsm_wire, sizeof m1->lsm_wire); +} + +void lov_stripe_lock(struct lov_stripe_md *md); +void lov_stripe_unlock(struct lov_stripe_md *md); + +struct obd_type { + struct list_head typ_chain; + struct obd_ops *typ_dt_ops; + struct md_ops *typ_md_ops; + struct proc_dir_entry *typ_procroot; + char *typ_name; + int typ_refcnt; + struct lu_device_type *typ_lu; +}; + +struct brw_page { + obd_off off; + cfs_page_t *pg; + int count; + obd_flag flag; +}; + +enum async_flags { + ASYNC_READY = 0x1, /* ap_make_ready will not be called before this + page is added to an rpc */ + ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */ + ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called + to give the caller a chance to update + or cancel the size of the io */ + ASYNC_GROUP_SYNC = 0x8, /* ap_completion will not be called, instead + the page is accounted for in the + obd_io_group given to + obd_queue_group_io */ +}; + +struct obd_async_page_ops { + int (*ap_make_ready)(void *data, int cmd); + int (*ap_refresh_count)(void *data, int cmd); + void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa); + void (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc); +}; + +/* the `oig' is passed down from a caller of obd rw methods. the callee + * records enough state such that the caller can sleep on the oig and + * be woken when all the callees have finished their work */ +struct obd_io_group { + spinlock_t oig_lock; + atomic_t oig_refcount; + int oig_pending; + int oig_rc; + struct list_head oig_occ_list; + cfs_waitq_t oig_waitq; +}; + +/* the oig callback context lets the callee of obd rw methods register + * for callbacks from the caller. */ +struct oig_callback_context { + struct list_head occ_oig_item; + /* called when the caller has received a signal while sleeping. + * callees of this method are encouraged to abort their state + * in the oig. This may be called multiple times. */ + void (*occ_interrupted)(struct oig_callback_context *occ); + unsigned int interrupted:1; +}; + +/* if we find more consumers this could be generalized */ +#define OBD_HIST_MAX 32 +struct obd_histogram { + spinlock_t oh_lock; + unsigned long oh_buckets[OBD_HIST_MAX]; +}; + +/* Individual type definitions */ + +struct ost_server_data; + +/* hold common fields for "target" device */ +struct obd_device_target { + struct super_block *obt_sb; + atomic_t obt_quotachecking; + struct lustre_quota_ctxt obt_qctxt; +}; + +#define FILTER_GROUP_LLOG 1 +#define FILTER_GROUP_ECHO 2 + +struct filter_ext { + __u64 fe_start; + __u64 fe_end; +}; + +struct filter_obd { + /* NB this field MUST be first */ + struct obd_device_target fo_obt; + const char *fo_fstype; + struct vfsmount *fo_vfsmnt; + cfs_dentry_t *fo_dentry_O; + cfs_dentry_t **fo_dentry_O_groups; + cfs_dentry_t **fo_dentry_O_sub; + spinlock_t fo_objidlock; /* protect fo_lastobjid */ + spinlock_t fo_translock; /* protect fsd_last_transno */ + struct file *fo_rcvd_filp; + struct file *fo_health_check_filp; + struct lr_server_data *fo_fsd; + unsigned long *fo_last_rcvd_slots; + __u64 fo_mount_count; + + int fo_destroy_in_progress; + struct semaphore fo_create_lock; + + struct list_head fo_export_list; + int fo_subdir_count; + + obd_size fo_tot_dirty; /* protected by obd_osfs_lock */ + obd_size fo_tot_granted; /* all values in bytes */ + obd_size fo_tot_pending; + + obd_size fo_readcache_max_filesize; + + struct obd_import *fo_mdc_imp; + struct obd_uuid fo_mdc_uuid; + struct lustre_handle fo_mdc_conn; + struct file **fo_last_objid_files; + __u64 *fo_last_objids; /* last created objid for groups, + * protected by fo_objidlock */ + + struct semaphore fo_alloc_lock; + + spinlock_t fo_stats_lock; + int fo_r_in_flight; /* protected by fo_stats_lock */ + int fo_w_in_flight; /* protected by fo_stats_lock */ + + /* + * per-filter pool of kiobuf's allocated by filter_common_setup() and + * torn down by filter_cleanup(). Contains OST_NUM_THREADS elements of + * which ->fo_iobuf_count were allocated. + * + * This pool contains kiobuf used by + * filter_{prep,commit}rw_{read,write}() and is shared by all OST + * threads. + * + * Locking: none, each OST thread uses only one element, determined by + * its "ordinal number", ->t_id. + */ + struct filter_iobuf **fo_iobuf_pool; + int fo_iobuf_count; + + struct obd_histogram fo_r_pages; + struct obd_histogram fo_w_pages; + struct obd_histogram fo_read_rpc_hist; + struct obd_histogram fo_write_rpc_hist; + struct obd_histogram fo_r_io_time; + struct obd_histogram fo_w_io_time; + struct obd_histogram fo_r_discont_pages; + struct obd_histogram fo_w_discont_pages; + struct obd_histogram fo_r_discont_blocks; + struct obd_histogram fo_w_discont_blocks; + struct obd_histogram fo_r_disk_iosize; + struct obd_histogram fo_w_disk_iosize; + + struct lustre_quota_ctxt fo_quota_ctxt; + spinlock_t fo_quotacheck_lock; + atomic_t fo_quotachecking; +}; + +#define OSC_MAX_RIF_DEFAULT 8 +#define OSC_MAX_RIF_MAX 256 +#define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4) +#define OSC_MAX_DIRTY_MB_MAX 2048 /* totally arbitrary */ + +struct mdc_rpc_lock; +struct obd_import; +struct client_obd { + struct semaphore cl_sem; + struct obd_uuid cl_target_uuid; + struct obd_import *cl_import; /* ptlrpc connection state */ + int cl_conn_count; + /* max_mds_easize is purely a performance thing so we don't have to + * call obd_size_diskmd() all the time. */ + int cl_default_mds_easize; + int cl_max_mds_easize; + int cl_max_mds_cookiesize; + kdev_t cl_sandev; + + //struct llog_canceld_ctxt *cl_llcd; /* it's included by obd_llog_ctxt */ + void *cl_llcd_offset; + + /* the grant values are protected by loi_list_lock below */ + long cl_dirty; /* all _dirty_ in bytes */ + long cl_dirty_max; /* allowed w/o rpc */ + long cl_avail_grant; /* bytes of credit for ost */ + long cl_lost_grant; /* lost credits (trunc) */ + struct list_head cl_cache_waiters; /* waiting for cache/grant */ + + /* keep track of objects that have lois that contain pages which + * have been queued for async brw. this lock also protects the + * lists of osc_client_pages that hang off of the loi */ + /* + * ->cl_loi_list_lock protects consistency of + * ->cl_loi_{ready,read,write}_list. ->ap_make_ready() and + * ->ap_completion() call-backs are executed under this lock. As we + * cannot guarantee that these call-backs never block on all platforms + * (as a matter of fact they do block on Mac OS X), type of + * ->cl_loi_list_lock is platform dependent: it's a spin-lock on Linux + * and blocking mutex on Mac OS X. (Alternative is to make this lock + * blocking everywhere, but we don't want to slow down fast-path of + * our main platform.) + * + * Exact type of ->cl_loi_list_lock is defined in arch/obd.h together + * with client_obd_list_{un,}lock() and + * client_obd_list_lock_{init,done}() functions. + */ + client_obd_lock_t cl_loi_list_lock; + struct list_head cl_loi_ready_list; + struct list_head cl_loi_write_list; + struct list_head cl_loi_read_list; + int cl_r_in_flight; + int cl_w_in_flight; + /* just a sum of the loi/lop pending numbers to be exported by /proc */ + int cl_pending_w_pages; + int cl_pending_r_pages; + int cl_max_pages_per_rpc; + int cl_max_rpcs_in_flight; + struct obd_histogram cl_read_rpc_hist; + struct obd_histogram cl_write_rpc_hist; + struct obd_histogram cl_read_page_hist; + struct obd_histogram cl_write_page_hist; + struct obd_histogram cl_read_offset_hist; + struct obd_histogram cl_write_offset_hist; + + struct mdc_rpc_lock *cl_rpc_lock; + struct mdc_rpc_lock *cl_setattr_lock; + struct osc_creator cl_oscc; + + /* mgc datastruct */ + struct semaphore cl_mgc_sem; + struct vfsmount *cl_mgc_vfsmnt; + struct dentry *cl_mgc_configs_dir; + atomic_t cl_mgc_refcount; + struct obd_export *cl_mgc_mgsexp; + + /* Flags section */ + unsigned int cl_checksum:1; /* debug checksums */ + + /* also protected by the poorly named _loi_list_lock lock above */ + struct osc_async_rc cl_ar; + + /* used by quotacheck */ + int cl_qchk_stat; /* quotacheck stat of the peer */ + + /* this holds last allocated fid in last obtained seq */ + struct lu_fid cl_fid; + spinlock_t cl_fid_lock; +}; +#define obd2cli_tgt(obd) ((char *)(obd)->u.cli.cl_target_uuid.uuid) + +#define CL_NOT_QUOTACHECKED 1 /* client->cl_qchk_stat init value */ + +struct mgs_obd { + struct ptlrpc_service *mgs_service; + struct vfsmount *mgs_vfsmnt; + struct super_block *mgs_sb; + struct dentry *mgs_configs_dir; + struct dentry *mgs_fid_de; + struct list_head mgs_fs_db_list; + struct semaphore mgs_sem; +}; + +struct mds_obd { + /* NB this field MUST be first */ + struct obd_device_target mds_obt; + struct ptlrpc_service *mds_service; + struct ptlrpc_service *mds_setattr_service; + struct ptlrpc_service *mds_readpage_service; + struct vfsmount *mds_vfsmnt; + cfs_dentry_t *mds_fid_de; + int mds_max_mdsize; + int mds_max_cookiesize; + struct file *mds_rcvd_filp; + spinlock_t mds_transno_lock; + __u64 mds_last_transno; + __u64 mds_mount_count; + __u64 mds_io_epoch; + unsigned long mds_atime_diff; + struct semaphore mds_epoch_sem; + struct ll_fid mds_rootfid; + struct lr_server_data *mds_server_data; + cfs_dentry_t *mds_pending_dir; + cfs_dentry_t *mds_logs_dir; + cfs_dentry_t *mds_objects_dir; + struct llog_handle *mds_cfg_llh; +// struct llog_handle *mds_catalog; + struct obd_device *mds_osc_obd; /* XXX lov_obd */ + struct obd_uuid mds_lov_uuid; + char *mds_profile; + struct obd_export *mds_osc_exp; /* XXX lov_exp */ + struct lov_desc mds_lov_desc; + obd_id *mds_lov_objids; + int mds_lov_objids_size; + __u32 mds_lov_objids_in_file; + unsigned int mds_lov_objids_dirty:1; + int mds_lov_nextid_set; + struct file *mds_lov_objid_filp; + struct file *mds_health_check_filp; + unsigned long *mds_client_bitmap; + struct semaphore mds_orphan_recovery_sem; + struct upcall_cache *mds_group_hash; + + struct lustre_quota_info mds_quota_info; + struct semaphore mds_qonoff_sem; + struct semaphore mds_health_sem; + unsigned long mds_lov_objids_valid:1, + mds_fl_user_xattr:1, + mds_fl_acl:1; +}; + +struct echo_obd { + struct obdo eo_oa; + spinlock_t eo_lock; + __u64 eo_lastino; + struct lustre_handle eo_nl_lock; + atomic_t eo_prep; +}; + +struct ost_obd { + struct ptlrpc_service *ost_service; + struct ptlrpc_service *ost_create_service; + struct ptlrpc_service *ost_io_service; + struct semaphore ost_health_sem; +}; + +struct echo_client_obd { + struct obd_export *ec_exp; /* the local connection to osc/lov */ + spinlock_t ec_lock; + struct list_head ec_objects; + int ec_nstripes; + __u64 ec_unique; +}; + +struct lov_tgt_desc { + struct obd_uuid uuid; + __u32 ltd_gen; + struct obd_export *ltd_exp; + unsigned int active:1, /* is this target up for requests */ + reap:1; /* should this target be deleted */ + int index; /* index of target array in lov_obd */ + struct list_head qos_bavail_list; /* link entry to lov_obd */ +}; + +struct lov_obd { + struct semaphore lov_lock; + atomic_t refcount; + struct lov_desc desc; + struct obd_connect_data ocd; + int bufsize; + int connects; + int death_row; /* Do we have tgts scheduled to be deleted? + (Make this a linked list?) */ + struct list_head qos_bavail_list; /* tgts list, sorted by available + space, protected by lov_lock */ + struct lov_tgt_desc *tgts; +}; + +struct lmv_tgt_desc { + struct obd_uuid uuid; + struct obd_export *ltd_exp; + int active; /* is this target up for requests */ + int idx; +}; + +struct lmv_obd { + int refcount; + spinlock_t lmv_lock; + struct lmv_desc desc; + struct obd_uuid cluuid; + struct obd_export *exp; + + int connected; + int max_easize; + int max_def_easize; + int max_cookiesize; + int server_timeout; + struct semaphore init_sem; + + struct lmv_tgt_desc *tgts; + int tgts_size; + + struct obd_connect_data *datas; + int datas_size; + + struct obd_connect_data conn_data; +}; + +struct niobuf_local { + __u64 offset; + __u32 len; + __u32 flags; + cfs_page_t *page; + cfs_dentry_t *dentry; + int lnb_grant_used; + int rc; +}; + +#define LUSTRE_OPC_MKDIR (1 << 0) +#define LUSTRE_OPC_SYMLINK (1 << 1) +#define LUSTRE_OPC_MKNODE (1 << 2) +#define LUSTRE_OPC_CREATE (1 << 3) + +struct placement_hint { + struct qstr *ph_pname; + struct qstr *ph_cname; + int ph_opc; +}; + +/* device types (not names--FIXME) */ +/* FIXME all the references to these defines need to be updated */ +#define LUSTRE_MDS_NAME "mds" +#define LUSTRE_MDT_NAME "mdt" + +/* new MDS layers. Prototype */ +#define LUSTRE_MDT0_NAME "mdt0" +#define LUSTRE_CMM0_NAME "cmm0" +#define LUSTRE_MDD0_NAME "mdd0" +#define LUSTRE_OSD0_NAME "osd0" +#define LUSTRE_FLD0_NAME "fld0" +#define LUSTRE_MDC0_NAME "mdc0" + +#define LUSTRE_MDC_NAME "mdc" +#define LUSTRE_LOV_NAME "lov" +#define LUSTRE_LMV_NAME "lmv" + +/* FIXME just the names need to be changed */ +#define LUSTRE_OSS_NAME "ost" /* FIXME oss */ +#define LUSTRE_OST_NAME "obdfilter" /* FIXME ost */ +#define LUSTRE_OSTSAN_NAME "sanobdfilter" + +#define LUSTRE_OSC_NAME "osc" +#define LUSTRE_FILTER_NAME "filter" +#define LUSTRE_SANOSC_NAME "sanosc" +#define LUSTRE_SANOST_NAME "sanost" +#define LUSTRE_MGS_NAME "mgs" +#define LUSTRE_MGC_NAME "mgc" + +#define LUSTRE_ECHO_NAME "obdecho" +#define LUSTRE_ECHO_CLIENT_NAME "echo_client" + +#define LUSTRE_MGS_OBDNAME "MGS" +#define LUSTRE_MGC_OBDNAME "MGC" + +/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ +#define N_LOCAL_TEMP_PAGE 0x10000000 + +struct obd_trans_info { + __u64 oti_transno; + __u64 *oti_objid; + /* Only used on the server side for tracking acks. */ + struct oti_req_ack_lock { + struct lustre_handle lock; + __u32 mode; + } oti_ack_locks[4]; + void *oti_handle; + struct llog_cookie oti_onecookie; + struct llog_cookie *oti_logcookies; + int oti_numcookies; + + /* initial thread handling transaction */ + int oti_thread_id; +}; + +static inline void oti_init(struct obd_trans_info *oti, + struct ptlrpc_request *req) +{ + if (oti == NULL) + return; + memset(oti, 0, sizeof *oti); + + if (req == NULL) + return; + + if (req->rq_repmsg && req->rq_reqmsg != 0) + oti->oti_transno = req->rq_repmsg->transno; + oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1; +} + +static inline void oti_alloc_cookies(struct obd_trans_info *oti,int num_cookies) +{ + if (!oti) + return; + + if (num_cookies == 1) + oti->oti_logcookies = &oti->oti_onecookie; + else + OBD_ALLOC(oti->oti_logcookies, + num_cookies * sizeof(oti->oti_onecookie)); + + oti->oti_numcookies = num_cookies; +} + +static inline void oti_free_cookies(struct obd_trans_info *oti) +{ + if (!oti || !oti->oti_logcookies) + return; + + if (oti->oti_logcookies == &oti->oti_onecookie) + LASSERT(oti->oti_numcookies == 1); + else + OBD_FREE(oti->oti_logcookies, + oti->oti_numcookies * sizeof(oti->oti_onecookie)); + oti->oti_logcookies = NULL; + oti->oti_numcookies = 0; +} + +/* llog contexts */ +enum llog_ctxt_id { + LLOG_CONFIG_ORIG_CTXT = 0, + LLOG_CONFIG_REPL_CTXT = 1, + LLOG_MDS_OST_ORIG_CTXT = 2, + LLOG_MDS_OST_REPL_CTXT = 3, + LLOG_SIZE_ORIG_CTXT = 4, + LLOG_SIZE_REPL_CTXT = 5, + LLOG_MD_ORIG_CTXT = 6, + LLOG_MD_REPL_CTXT = 7, + LLOG_RD1_ORIG_CTXT = 8, + LLOG_RD1_REPL_CTXT = 9, + LLOG_TEST_ORIG_CTXT = 10, + LLOG_TEST_REPL_CTXT = 11, + LLOG_LOVEA_ORIG_CTXT = 12, + LLOG_LOVEA_REPL_CTXT = 13, + LLOG_MAX_CTXTS +}; + +/* + * Events signalled through obd_notify() upcall-chain. + */ +enum obd_notify_event { + /* Device activated */ + OBD_NOTIFY_ACTIVE, + /* Device deactivated */ + OBD_NOTIFY_INACTIVE, + /* Connect data for import were changed */ + OBD_NOTIFY_OCD, + /* Sync request */ + OBD_NOTIFY_SYNC_NONBLOCK, + OBD_NOTIFY_SYNC +}; + +/* + * Data structure used to pass obd_notify()-event to non-obd listeners (llite + * and liblustre being main examples). + */ +struct obd_notify_upcall { + int (*onu_upcall)(struct obd_device *host, struct obd_device *watched, + enum obd_notify_event ev, void *owner); + /* Opaque datum supplied by upper layer listener */ + void *onu_owner; +}; + +/* corresponds to one of the obd's */ +struct obd_device { + struct obd_type *obd_type; + /* common and UUID name of this device */ + char *obd_name; + struct obd_uuid obd_uuid; + + struct lu_device *obd_lu_dev; + + int obd_minor; + unsigned int obd_attached:1, obd_set_up:1, obd_recovering:1, + obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1, + obd_no_recov:1, obd_stopping:1, obd_starting:1, + obd_force:1, obd_fail:1, obd_async_recov:1; + atomic_t obd_refcount; + cfs_waitq_t obd_refcount_waitq; + cfs_proc_dir_entry_t *obd_proc_entry; + struct list_head obd_exports; + int obd_num_exports; + struct ldlm_namespace *obd_namespace; + struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ + /* a spinlock is OK for what we do now, may need a semaphore later */ + spinlock_t obd_dev_lock; + __u64 obd_last_committed; + struct fsfilt_operations *obd_fsops; + spinlock_t obd_osfs_lock; + struct obd_statfs obd_osfs; /* locked by obd_osfs_lock */ + cfs_time_t obd_osfs_age; + struct lvfs_run_ctxt obd_lvfs_ctxt; + struct llog_ctxt *obd_llog_ctxt[LLOG_MAX_CTXTS]; + struct obd_device *obd_observer; + struct obd_notify_upcall obd_upcall; + struct obd_export *obd_self_export; + /* list of exports in LRU order, for ping evictor, with obd_dev_lock */ + struct list_head obd_exports_timed; + time_t obd_eviction_timer; /* for ping evictor */ + + /* XXX encapsulate all this recovery data into one struct */ + svc_handler_t obd_recovery_handler; + int obd_max_recoverable_clients; + int obd_connected_clients; + int obd_recoverable_clients; + spinlock_t obd_processing_task_lock; + pid_t obd_processing_task; + __u64 obd_next_recovery_transno; + int obd_replayed_requests; + int obd_requests_queued_for_recovery; + cfs_waitq_t obd_next_transno_waitq; + struct list_head obd_uncommitted_replies; + spinlock_t obd_uncommitted_replies_lock; + cfs_timer_t obd_recovery_timer; + struct list_head obd_recovery_queue; + struct list_head obd_delayed_reply_queue; + time_t obd_recovery_start; + time_t obd_recovery_end; + + union { + struct obd_device_target obt; + struct filter_obd filter; + struct mds_obd mds; + struct client_obd cli; + struct ost_obd ost; + struct echo_client_obd echo_client; + struct echo_obd echo; + struct lov_obd lov; + struct lmv_obd lmv; + struct mgs_obd mgs; + } u; + /* Fields used by LProcFS */ + unsigned int obd_cntr_base; + struct lprocfs_stats *obd_stats; + + unsigned int md_cntr_base; + struct lprocfs_stats *md_stats; + + cfs_proc_dir_entry_t *obd_svc_procroot; + struct lprocfs_stats *obd_svc_stats; +}; + +#define OBD_OPT_FORCE 0x0001 +#define OBD_OPT_FAILOVER 0x0002 + +#define OBD_LLOG_FL_SENDNOW 0x0001 + +enum obd_cleanup_stage { +/* Special case hack for MDS LOVs */ + OBD_CLEANUP_EARLY, +/* Precleanup stage 1, we must make sure all exports (other than the + self-export) get destroyed. */ + OBD_CLEANUP_EXPORTS, +/* Precleanup stage 2, do other type-specific cleanup requiring the + self-export. */ + OBD_CLEANUP_SELF_EXP, +/* FIXME we should eliminate the "precleanup" function and make them stages + of the "cleanup" function. */ + OBD_CLEANUP_OBD, +}; + +/* get/set_info keys */ +#define KEY_MDS_CONN "mds_conn" +#define KEY_NEXT_ID "next_id" +#define KEY_LOVDESC "lovdesc" +#define KEY_INIT_RECOV "initial_recov" +#define KEY_INIT_RECOV_BACKUP "init_recov_bk" + +struct obd_ops { + struct module *o_owner; + int (*o_iocontrol)(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void *uarg); + int (*o_get_info)(struct obd_export *, __u32 keylen, void *key, + __u32 *vallen, void *val); + int (*o_set_info_async)(struct obd_export *, __u32 keylen, void *key, + __u32 vallen, void *val, + struct ptlrpc_request_set *set); + int (*o_attach)(struct obd_device *dev, obd_count len, void *data); + int (*o_detach)(struct obd_device *dev); + int (*o_setup) (struct obd_device *dev, struct lustre_cfg *cfg); + int (*o_precleanup)(struct obd_device *dev, + enum obd_cleanup_stage cleanup_stage); + int (*o_cleanup)(struct obd_device *dev); + int (*o_process_config)(struct obd_device *dev, obd_count len, + void *data); + int (*o_postrecov)(struct obd_device *dev); + int (*o_add_conn)(struct obd_import *imp, struct obd_uuid *uuid, + int priority); + int (*o_del_conn)(struct obd_import *imp, struct obd_uuid *uuid); + /* connect to the target device with given connection + * data. @ocd->ocd_connect_flags is modified to reflect flags actually + * granted by the target, which are guaranteed to be a subset of flags + * asked for. If @ocd == NULL, use default parameters. */ + int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, + struct obd_uuid *cluuid, struct obd_connect_data *ocd); + int (*o_reconnect)(struct obd_export *exp, struct obd_device *src, + struct obd_uuid *cluuid, + struct obd_connect_data *ocd); + int (*o_disconnect)(struct obd_export *exp); + + /* may be later these should be moved into separate fid_ops */ + int (*o_fid_alloc)(struct obd_export *exp, struct lu_fid *fid, + struct placement_hint *hint); + + int (*o_fid_delete)(struct obd_export *exp, struct lu_fid *fid); + + int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age); + int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt, + struct lov_stripe_md *mem_src); + int (*o_unpackmd)(struct obd_export *exp,struct lov_stripe_md **mem_tgt, + struct lov_mds_md *disk_src, int disk_len); + int (*o_checkmd)(struct obd_export *exp, struct obd_export *md_exp, + struct lov_stripe_md *mem_tgt); + int (*o_preallocate)(struct lustre_handle *, obd_count *req, + obd_id *ids); + int (*o_create)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md **ea, struct obd_trans_info *oti); + int (*o_destroy)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, struct obd_trans_info *oti, + struct obd_export *md_exp); + int (*o_setattr)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, struct obd_trans_info *oti); + int (*o_setattr_async)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, struct obd_trans_info *oti); + int (*o_getattr)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea); + int (*o_getattr_async)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, + struct ptlrpc_request_set *set); + int (*o_brw)(int rw, struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_count oa_bufs, + struct brw_page *pgarr, struct obd_trans_info *oti); + int (*o_brw_async)(int rw, struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_count oa_bufs, + struct brw_page *pgarr, struct ptlrpc_request_set *, + struct obd_trans_info *oti); + int (*o_prep_async_page)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct page *page, obd_off offset, + struct obd_async_page_ops *ops, void *data, + void **res); + int (*o_queue_async_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + int cmd, obd_off off, int count, + obd_flag brw_flags, obd_flag async_flags); + int (*o_queue_group_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, int count, + obd_flag brw_flags, obd_flag async_flags); + int (*o_trigger_group_io)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig); + int (*o_set_async_flags)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + obd_flag async_flags); + int (*o_teardown_async_page)(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie); + int (*o_merge_lvb)(struct obd_export *exp, struct lov_stripe_md *lsm, + struct ost_lvb *lvb, int kms_only); + int (*o_adjust_kms)(struct obd_export *exp, struct lov_stripe_md *lsm, + obd_off size, int shrink); + int (*o_punch)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_size start, + obd_size end, struct obd_trans_info *oti); + int (*o_sync)(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_size start, obd_size end); + int (*o_migrate)(struct lustre_handle *conn, struct lov_stripe_md *dst, + struct lov_stripe_md *src, obd_size start, + obd_size end, struct obd_trans_info *oti); + int (*o_copy)(struct lustre_handle *dstconn, struct lov_stripe_md *dst, + struct lustre_handle *srconn, struct lov_stripe_md *src, + obd_size start, obd_size end, struct obd_trans_info *); + int (*o_iterate)(struct lustre_handle *conn, + int (*)(obd_id, obd_gr, void *), + obd_id *startid, obd_gr group, void *data); + int (*o_preprw)(int cmd, struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_remote *remote, + struct niobuf_local *local, struct obd_trans_info *oti); + int (*o_commitrw)(int cmd, struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_local *local, + struct obd_trans_info *oti, int rc); + int (*o_enqueue)(struct obd_export *, struct lov_stripe_md *, + __u32 type, ldlm_policy_data_t *, __u32 mode, + int *flags, void *bl_cb, void *cp_cb, void *gl_cb, + void *data, __u32 lvb_len, void *lvb_swabber, + struct lustre_handle *lockh); + int (*o_match)(struct obd_export *, struct lov_stripe_md *, __u32 type, + ldlm_policy_data_t *, __u32 mode, int *flags, void *data, + struct lustre_handle *lockh); + int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *, + ldlm_iterator_t it, void *data); + int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md, + __u32 mode, struct lustre_handle *); + int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *, + int flags, void *opaque); + int (*o_join_lru)(struct obd_export *, struct lov_stripe_md *, + int join); + int (*o_san_preprw)(int cmd, struct obd_export *exp, + struct obdo *oa, int objcount, + struct obd_ioobj *obj, int niocount, + struct niobuf_remote *remote); + int (*o_init_export)(struct obd_export *exp); + int (*o_destroy_export)(struct obd_export *exp); + + /* llog related obd_methods */ + int (*o_llog_init)(struct obd_device *obd, struct obd_device *disk_obd, + int count, struct llog_catid *logid); + int (*o_llog_finish)(struct obd_device *obd, int count); + + /* metadata-only methods */ + int (*o_pin)(struct obd_export *, struct lu_fid *fid, + struct obd_client_handle *, int flag); + int (*o_unpin)(struct obd_export *, struct obd_client_handle *, int); + + int (*o_import_event)(struct obd_device *, struct obd_import *, + enum obd_import_event); + + int (*o_notify)(struct obd_device *obd, struct obd_device *watched, + enum obd_notify_event ev, void *data); + + int (*o_health_check)(struct obd_device *); + + /* quota methods */ + int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *); + int (*o_quotactl)(struct obd_export *, struct obd_quotactl *); + + /* + * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line + * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c. + * Also, add a wrapper function in include/linux/obd_class.h. + * + * Also note that if you add it to the END, you also have to change + * the num_stats calculation. + * + */ +}; + +struct md_ops { + int (*m_getstatus)(struct obd_export *, struct lu_fid *); + int (*m_change_cbdata)(struct obd_export *, struct lu_fid *, + ldlm_iterator_t, void *); + int (*m_close)(struct obd_export *, struct md_op_data *, + struct obd_client_handle *, struct ptlrpc_request **); + int (*m_create)(struct obd_export *, struct md_op_data *, + const void *, int, int, __u32, __u32, __u32, + __u64, struct ptlrpc_request **); + int (*m_done_writing)(struct obd_export *, struct md_op_data *); + int (*m_enqueue)(struct obd_export *, int, struct lookup_intent *, + int, struct md_op_data *, struct lustre_handle *, + void *, int, ldlm_completion_callback, + ldlm_blocking_callback, void *, int); + int (*m_getattr)(struct obd_export *, struct lu_fid *, + obd_valid, int, struct ptlrpc_request **); + int (*m_getattr_name)(struct obd_export *, struct lu_fid *, + const char *, int, obd_valid, + int, struct ptlrpc_request **); + int (*m_intent_lock)(struct obd_export *, struct md_op_data *, + void *, int, struct lookup_intent *, int, + struct ptlrpc_request **, + ldlm_blocking_callback, int); + int (*m_link)(struct obd_export *, struct md_op_data *, + struct ptlrpc_request **); + int (*m_rename)(struct obd_export *, struct md_op_data *, + const char *, int, const char *, int, + struct ptlrpc_request **); + int (*m_setattr)(struct obd_export *, struct md_op_data *, + struct iattr *, void *, int , void *, int, + struct ptlrpc_request **); + int (*m_sync)(struct obd_export *, struct lu_fid *, + struct ptlrpc_request **); + int (*m_readpage)(struct obd_export *, struct lu_fid *, + __u64, struct page *, struct ptlrpc_request **); + int (*m_unlink)(struct obd_export *, struct md_op_data *, + struct ptlrpc_request **); + + int (*m_setxattr)(struct obd_export *, struct lu_fid *, + obd_valid, const char *, const char *, + int, int, int, struct ptlrpc_request **); + + int (*m_getxattr)(struct obd_export *, struct lu_fid *, + obd_valid, const char *, const char *, + int, int, int, struct ptlrpc_request **); + + int (*m_init_ea_size)(struct obd_export *, int, int, int); + + int (*m_get_lustre_md)(struct obd_export *, struct ptlrpc_request *, + int, struct obd_export *, struct lustre_md *); + + int (*m_free_lustre_md)(struct obd_export *, struct lustre_md *); + + int (*m_set_open_replay_data)(struct obd_export *, + struct obd_client_handle *, + struct ptlrpc_request *); + int (*m_clear_open_replay_data)(struct obd_export *, + struct obd_client_handle *); + int (*m_set_lock_data)(struct obd_export *, __u64 *, void *); + + int (*m_lock_match)(struct obd_export *, int, struct lu_fid *, + ldlm_type_t, ldlm_policy_data_t *, ldlm_mode_t, + struct lustre_handle *); + + int (*m_cancel_unused)(struct obd_export *, struct lu_fid *, + int flags, void *opaque); + + /* + * NOTE: If adding ops, add another LPROCFS_MD_OP_INIT() line to + * lprocfs_alloc_md_stats() in obdclass/lprocfs_status.c. Also, add a + * wrapper function in include/linux/obd_class.h. + */ +}; + +struct lsm_operations { + void (*lsm_free)(struct lov_stripe_md *); + int (*lsm_destroy)(struct lov_stripe_md *, struct obdo *oa, + struct obd_export *md_exp); + void (*lsm_stripe_by_index)(struct lov_stripe_md *, int *, obd_off *, + unsigned long *); + void (*lsm_stripe_by_offset)(struct lov_stripe_md *, int *, obd_off *, + unsigned long *); + obd_off (*lsm_stripe_offset_by_index)(struct lov_stripe_md *, int); + int (*lsm_stripe_index_by_offset)(struct lov_stripe_md *, obd_off); + int (*lsm_revalidate) (struct lov_stripe_md *, struct obd_device *obd); + int (*lsm_lmm_verify) (struct lov_mds_md *lmm, int lmm_bytes, + int *stripe_count); + int (*lsm_unpackmd) (struct lov_obd *lov, struct lov_stripe_md *lsm, + struct lov_mds_md *lmm); +}; + +extern struct lsm_operations lsm_plain_ops; +extern struct lsm_operations lsm_join_ops; +static inline struct lsm_operations *lsm_op_find(int magic) +{ + switch(magic) { + case LOV_MAGIC: + return &lsm_plain_ops; + case LOV_MAGIC_JOIN: + return &lsm_join_ops; + default: + CERROR("Cannot recognize lsm_magic %d", magic); + return NULL; + } +} + +int lvfs_check_io_health(struct obd_device *obd, struct file *file); + +static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, + int error) +{ + if (error) { + CERROR("%s: transno "LPD64" commit error: %d\n", + obd->obd_name, transno, error); + return; + } + CDEBUG(D_HA, "%s: transno "LPD64" committed\n", + obd->obd_name, transno); + if (transno > obd->obd_last_committed) { + obd->obd_last_committed = transno; + ptlrpc_commit_replies (obd); + } +} + +static inline void init_obd_quota_ops(quota_interface_t *interface, + struct obd_ops *obd_ops) +{ + if (!interface) + return; + + LASSERT(obd_ops); + obd_ops->o_quotacheck = QUOTA_OP(interface, check); + obd_ops->o_quotactl = QUOTA_OP(interface, ctl); +} + +#endif /* __OBD_H */ diff --git a/lustre/include/linux/obd_cache.h b/lustre/include/obd_cache.h similarity index 83% rename from lustre/include/linux/obd_cache.h rename to lustre/include/obd_cache.h index e75b9f4..c5ec326 100644 --- a/lustre/include/linux/obd_cache.h +++ b/lustre/include/obd_cache.h @@ -7,7 +7,5 @@ #ifdef __KERNEL__ -#define OBD_CACHE_DEVICENAME "cobd" - #endif #endif diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h new file mode 100644 index 0000000..6832fb8 --- /dev/null +++ b/lustre/include/obd_class.h @@ -0,0 +1,1698 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __CLASS_OBD_H +#define __CLASS_OBD_H + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +/* OBD Device Declarations */ +#define MAX_OBD_DEVICES 520 +extern struct obd_device obd_dev[MAX_OBD_DEVICES]; +extern spinlock_t obd_dev_lock; + +/* OBD Operations Declarations */ +extern struct obd_device *class_conn2obd(struct lustre_handle *); +extern struct obd_device *class_exp2obd(struct obd_export *); + +struct lu_device_type; + +/* genops.c */ +struct obd_export *class_conn2export(struct lustre_handle *); +int class_register_type(struct obd_ops *, struct md_ops *, + struct lprocfs_vars *, const char *nm, + struct lu_device_type *ldt); +int class_unregister_type(const char *nm); + +struct obd_device *class_newdev(struct obd_type *type, char *name); +void class_release_dev(struct obd_device *obd); + +int class_name2dev(const char *name); +struct obd_device *class_name2obd(const char *name); +int class_uuid2dev(struct obd_uuid *uuid); +struct obd_device *class_uuid2obd(struct obd_uuid *uuid); +void class_obd_list(void); +struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid, + const char * typ_name, + struct obd_uuid *grp_uuid); +struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid, + struct obd_uuid *grp_uuid); +struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, + int *next); + +int oig_init(struct obd_io_group **oig); +void oig_add_one(struct obd_io_group *oig, + struct oig_callback_context *occ); +void oig_complete_one(struct obd_io_group *oig, + struct oig_callback_context *occ, int rc); +void oig_release(struct obd_io_group *oig); +int oig_wait(struct obd_io_group *oig); + +char *obd_export_nid2str(struct obd_export *exp); + +int obd_export_evict_by_nid(struct obd_device *obd, const char *nid); +int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid); + +/* obd_config.c */ +int class_process_config(struct lustre_cfg *lcfg); +int class_attach(struct lustre_cfg *lcfg); +int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg); +int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg); +int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg); +struct obd_device *class_incref(struct obd_device *obd); +void class_decref(struct obd_device *obd); + +#define CFG_F_START 0x01 /* Set when we start updating from a log */ +#define CFG_F_MARKER 0x02 /* We are within a maker */ +#define CFG_F_SKIP 0x04 /* We should ignore this cfg command */ +#define CFG_F_COMPAT146 0x08 /* Translation to new obd names required */ +#define CFG_F_EXCLUDE 0x10 /* OST exclusion list */ + + +/* Passed as data param to class_config_parse_llog */ +struct config_llog_instance { + char * cfg_instance; + struct super_block *cfg_sb; + struct obd_uuid cfg_uuid; + int cfg_last_idx; /* for partial llog processing */ + int cfg_flags; +}; +int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, + struct config_llog_instance *cfg); +int class_config_dump_llog(struct llog_ctxt *ctxt, char *name, + struct config_llog_instance *cfg); + +/* list of active configuration logs */ +struct config_llog_data { + char *cld_logname; + struct ldlm_res_id cld_resid; + struct config_llog_instance cld_cfg; + struct list_head cld_list_chain; + atomic_t cld_refcount; + unsigned int cld_stopping:1; +}; + +struct lustre_profile { + struct list_head lp_list; + char * lp_profile; + char * lp_osc; + char * lp_mdc; +}; + +struct lustre_profile *class_get_profile(const char * prof); +void class_del_profile(const char *prof); + +/* genops.c */ +#define class_export_get(exp) \ +({ \ + struct obd_export *exp_ = exp; \ + atomic_inc(&exp_->exp_refcount); \ + CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp_, \ + atomic_read(&exp_->exp_refcount)); \ + exp_; \ +}) + +#define class_export_put(exp) \ +do { \ + LASSERT((exp) != NULL); \ + CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", (exp), \ + atomic_read(&(exp)->exp_refcount) - 1); \ + LASSERT(atomic_read(&(exp)->exp_refcount) > 0); \ + LASSERT(atomic_read(&(exp)->exp_refcount) < 0x5a5a5a); \ + __class_export_put(exp); \ +} while (0) +void __class_export_put(struct obd_export *); +struct obd_export *class_new_export(struct obd_device *obddev, + struct obd_uuid *cluuid); +void class_unlink_export(struct obd_export *exp); + +struct obd_import *class_import_get(struct obd_import *); +void class_import_put(struct obd_import *); +struct obd_import *class_new_import(struct obd_device *obd); +void class_destroy_import(struct obd_import *exp); + +struct obd_type *class_search_type(const char *name); +struct obd_type *class_get_type(const char *name); +void class_put_type(struct obd_type *type); +int class_connect(struct lustre_handle *conn, struct obd_device *obd, + struct obd_uuid *cluuid); +int class_disconnect(struct obd_export *exp); +void class_fail_export(struct obd_export *exp); +void class_disconnect_exports(struct obd_device *obddev); +void class_disconnect_stale_exports(struct obd_device *obddev); +int class_manual_cleanup(struct obd_device *obd); + +void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid); +int obdo_cmp_md(struct obdo *dst, struct obdo *src, obd_flag compare); +void obdo_to_ioobj(struct obdo *oa, struct obd_ioobj *ioobj); + + +#define OBT(dev) (dev)->obd_type +#define OBP(dev, op) (dev)->obd_type->typ_dt_ops->o_ ## op +#define MDP(dev, op) (dev)->obd_type->typ_md_ops->m_ ## op +#define CTXTP(ctxt, op) (ctxt)->loc_logops->lop_##op + +/* Ensure obd_setup: used for cleanup which must be called + while obd is stopping */ +#define OBD_CHECK_DEV(obd) \ +do { \ + if (!(obd)) { \ + CERROR("NULL device\n"); \ + RETURN(-ENODEV); \ + } \ +} while (0) + +/* ensure obd_setup and !obd_stopping */ +#define OBD_CHECK_DEV_ACTIVE(obd) \ +do { \ + OBD_CHECK_DEV(obd); \ + if (!(obd)->obd_set_up || (obd)->obd_stopping) { \ + CERROR("Device %d not setup\n", \ + (obd)->obd_minor); \ + RETURN(-ENODEV); \ + } \ +} while (0) + + +#ifdef LPROCFS +#define OBD_COUNTER_OFFSET(op) \ + ((offsetof(struct obd_ops, o_ ## op) - \ + offsetof(struct obd_ops, o_iocontrol)) \ + / sizeof(((struct obd_ops *)(0))->o_iocontrol)) + +#define OBD_COUNTER_INCREMENT(obd, op) \ + if ((obd)->obd_stats != NULL) { \ + unsigned int coffset; \ + coffset = (unsigned int)(obd)->obd_cntr_base + \ + OBD_COUNTER_OFFSET(op); \ + LASSERT(coffset < obd->obd_stats->ls_num); \ + lprocfs_counter_incr(obd->obd_stats, coffset); \ + } + +#define MD_COUNTER_OFFSET(op) \ + ((offsetof(struct md_ops, m_ ## op) - \ + offsetof(struct md_ops, m_getstatus)) \ + / sizeof(((struct md_ops *)(0))->m_getstatus)) + +#define MD_COUNTER_INCREMENT(obd, op) \ + if ((obd)->md_stats != NULL) { \ + unsigned int coffset; \ + coffset = (unsigned int)(obd)->md_cntr_base + \ + MD_COUNTER_OFFSET(op); \ + LASSERT(coffset < (obd)->md_stats->ls_num); \ + lprocfs_counter_incr((obd)->md_stats, coffset); \ + } + +#else +#define OBD_COUNTER_OFFSET(op) +#define OBD_COUNTER_INCREMENT(obd, op) +#define MD_COUNTER_INCREMENT(obd, op) +#endif + +#define OBD_CHECK_MD_OP(obd, op, err) \ +do { \ + if (!OBT(obd) || !MDP((obd), op)) { \ + if (err) \ + CERROR("md_" #op ": dev %s/%d no operation\n", \ + obd->obd_name, obd->obd_minor); \ + RETURN(err); \ + } \ +} while (0) + +#define EXP_CHECK_MD_OP(exp, op) \ +do { \ + if ((exp) == NULL) { \ + CERROR("obd_" #op ": NULL export\n"); \ + RETURN(-ENODEV); \ + } \ + if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \ + CERROR("obd_" #op ": cleaned up obd\n"); \ + RETURN(-EOPNOTSUPP); \ + } \ + if (!OBT((exp)->exp_obd) || !MDP((exp)->exp_obd, op)) { \ + CERROR("obd_" #op ": dev %s/%d no operation\n", \ + (exp)->exp_obd->obd_name, \ + (exp)->exp_obd->obd_minor); \ + RETURN(-EOPNOTSUPP); \ + } \ +} while (0) + + +#define OBD_CHECK_DT_OP(obd, op, err) \ +do { \ + if (!OBT(obd) || !OBP((obd), op)) { \ + if (err) \ + CERROR("obd_" #op ": dev %d no operation\n", \ + obd->obd_minor); \ + RETURN(err); \ + } \ +} while (0) + +#define EXP_CHECK_DT_OP(exp, op) \ +do { \ + if ((exp) == NULL) { \ + CERROR("obd_" #op ": NULL export\n"); \ + RETURN(-ENODEV); \ + } \ + if ((exp)->exp_obd == NULL || !OBT((exp)->exp_obd)) { \ + CERROR("obd_" #op ": cleaned up obd\n"); \ + RETURN(-EOPNOTSUPP); \ + } \ + if (!OBT((exp)->exp_obd) || !OBP((exp)->exp_obd, op)) { \ + CERROR("obd_" #op ": dev %d no operation\n", \ + (exp)->exp_obd->obd_minor); \ + RETURN(-EOPNOTSUPP); \ + } \ +} while (0) + +#define CTXT_CHECK_OP(ctxt, op, err) \ +do { \ + if (!OBT(ctxt->loc_obd) || !CTXTP((ctxt), op)) { \ + if (err) \ + CERROR("lop_" #op ": dev %d no operation\n", \ + ctxt->loc_obd->obd_minor); \ + RETURN(err); \ + } \ +} while (0) + +static inline int obd_get_info(struct obd_export *exp, __u32 keylen, + void *key, __u32 *vallen, void *val) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, get_info); + OBD_COUNTER_INCREMENT(exp->exp_obd, get_info); + + rc = OBP(exp->exp_obd, get_info)(exp, keylen, key, vallen, val); + RETURN(rc); +} + +static inline int obd_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, set_info_async); + OBD_COUNTER_INCREMENT(exp->exp_obd, set_info_async); + + rc = OBP(exp->exp_obd, set_info_async)(exp, keylen, key, vallen, val, + set); + RETURN(rc); +} + +static inline int obd_setup(struct obd_device *obd, struct lustre_cfg *cfg) +{ + int rc; + struct lu_device_type *ldt; + ENTRY; + + ldt = obd->obd_type->typ_lu; + if (ldt != NULL) { +#ifdef __KERNEL__ + struct lu_context ctx; + struct lu_device *d; + + rc = lu_context_init(&ctx); + if (rc == 0) { + lu_context_enter(&ctx); + + d = ldt->ldt_ops->ldto_device_alloc(&ctx, ldt, cfg); + if (!IS_ERR(d)) { + obd->obd_lu_dev = d; + d->ld_obd = obd; + rc = 0; + } else + rc = PTR_ERR(d); + } +#endif + } else { + OBD_CHECK_DT_OP(obd, setup, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, setup); + rc = OBP(obd, setup)(obd, cfg); + } + RETURN(rc); +} + +static inline int obd_precleanup(struct obd_device *obd, + enum obd_cleanup_stage cleanup_stage) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(obd, precleanup, 0); + OBD_COUNTER_INCREMENT(obd, precleanup); + + rc = OBP(obd, precleanup)(obd, cleanup_stage); + RETURN(rc); +} + +static inline int obd_cleanup(struct obd_device *obd) +{ + int rc; + struct lu_device *d; + struct lu_device_type *ldt; + ENTRY; + + OBD_CHECK_DEV(obd); + + ldt = obd->obd_type->typ_lu; + d = obd->obd_lu_dev; + if (ldt != NULL && d != NULL) { +#ifdef __KERNEL__ + struct lu_context ctx; + + rc = lu_context_init(&ctx); + if (rc == 0) { + lu_context_enter(&ctx); + ldt->ldt_ops->ldto_device_free(&ctx, d); + lu_context_exit(&ctx); + lu_context_fini(&ctx); + obd->obd_lu_dev = NULL; + rc = 0; + } +#endif + + } else { + OBD_CHECK_DT_OP(obd, cleanup, 0); + rc = OBP(obd, cleanup)(obd); + } + OBD_COUNTER_INCREMENT(obd, cleanup); + RETURN(rc); +} + +static inline int +obd_process_config(struct obd_device *obd, int datalen, void *data) +{ + int rc; + struct lu_device *d; + struct lu_device_type *ldt; + ENTRY; + + OBD_CHECK_DEV(obd); + + ldt = obd->obd_type->typ_lu; + d = obd->obd_lu_dev; + if (ldt != NULL && d != NULL) { +#ifdef __KERNEL__ + struct lu_context ctx; + + rc = lu_context_init(&ctx); + if (rc == 0) { + lu_context_enter(&ctx); + rc = d->ld_ops->ldo_process_config(&ctx, d, data); + lu_context_exit(&ctx); + lu_context_fini(&ctx); + } +#endif + } else { + OBD_CHECK_DT_OP(obd, process_config, -EOPNOTSUPP); + rc = OBP(obd, process_config)(obd, datalen, data); + } + OBD_COUNTER_INCREMENT(obd, process_config); + + RETURN(rc); +} + +/* Pack an in-memory MD struct for storage on disk. + * Returns +ve size of packed MD (0 for free), or -ve error. + * + * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL). + * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed. + * If @*disk_tgt == NULL, it will be allocated + */ +static inline int obd_packmd(struct obd_export *exp, + struct lov_mds_md **disk_tgt, + struct lov_stripe_md *mem_src) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, packmd); + OBD_COUNTER_INCREMENT(exp->exp_obd, packmd); + + rc = OBP(exp->exp_obd, packmd)(exp, disk_tgt, mem_src); + RETURN(rc); +} + +static inline int obd_size_diskmd(struct obd_export *exp, + struct lov_stripe_md *mem_src) +{ + return obd_packmd(exp, NULL, mem_src); +} + +/* helper functions */ +static inline int obd_alloc_diskmd(struct obd_export *exp, + struct lov_mds_md **disk_tgt) +{ + LASSERT(disk_tgt); + LASSERT(*disk_tgt == NULL); + return obd_packmd(exp, disk_tgt, NULL); +} + +static inline int obd_free_diskmd(struct obd_export *exp, + struct lov_mds_md **disk_tgt) +{ + LASSERT(disk_tgt); + LASSERT(*disk_tgt); + return obd_packmd(exp, disk_tgt, NULL); +} + +/* Unpack an MD struct from disk to in-memory format. + * Returns +ve size of unpacked MD (0 for free), or -ve error. + * + * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL). + * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed. + * If @*mem_tgt == NULL, it will be allocated + */ +static inline int obd_unpackmd(struct obd_export *exp, + struct lov_stripe_md **mem_tgt, + struct lov_mds_md *disk_src, + int disk_len) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, unpackmd); + OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd); + + rc = OBP(exp->exp_obd, unpackmd)(exp, mem_tgt, disk_src, disk_len); + RETURN(rc); +} + +/* helper functions */ +static inline int obd_alloc_memmd(struct obd_export *exp, + struct lov_stripe_md **mem_tgt) +{ + LASSERT(mem_tgt); + LASSERT(*mem_tgt == NULL); + return obd_unpackmd(exp, mem_tgt, NULL, 0); +} + +static inline int obd_free_memmd(struct obd_export *exp, + struct lov_stripe_md **mem_tgt) +{ + LASSERT(mem_tgt); + LASSERT(*mem_tgt); + return obd_unpackmd(exp, mem_tgt, NULL, 0); +} + +static inline int obd_checkmd(struct obd_export *exp, + struct obd_export *md_exp, + struct lov_stripe_md *mem_tgt) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, checkmd); + OBD_COUNTER_INCREMENT(exp->exp_obd, checkmd); + + rc = OBP(exp->exp_obd, checkmd)(exp, md_exp, mem_tgt); + RETURN(rc); +} + +static inline int obd_create(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md **ea, + struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, create); + OBD_COUNTER_INCREMENT(exp->exp_obd, create); + + rc = OBP(exp->exp_obd, create)(exp, obdo, ea, oti); + RETURN(rc); +} + +static inline int obd_destroy(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md *ea, + struct obd_trans_info *oti, + struct obd_export *md_exp) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, destroy); + OBD_COUNTER_INCREMENT(exp->exp_obd, destroy); + + rc = OBP(exp->exp_obd, destroy)(exp, obdo, ea, oti, md_exp); + RETURN(rc); +} + +static inline int obd_getattr(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md *ea) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, getattr); + OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); + + rc = OBP(exp->exp_obd, getattr)(exp, obdo, ea); + RETURN(rc); +} + +static inline int obd_getattr_async(struct obd_export *exp, + struct obdo *obdo, struct lov_stripe_md *ea, + struct ptlrpc_request_set *set) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, getattr); + OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); + + rc = OBP(exp->exp_obd, getattr_async)(exp, obdo, ea, set); + RETURN(rc); +} + +static inline int obd_setattr(struct obd_export *exp, struct obdo *obdo, + struct lov_stripe_md *ea, + struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, setattr); + OBD_COUNTER_INCREMENT(exp->exp_obd, setattr); + + rc = OBP(exp->exp_obd, setattr)(exp, obdo, ea, oti); + RETURN(rc); +} + +static inline int obd_setattr_async(struct obd_export *exp, + struct obdo *obdo, + struct lov_stripe_md *ea, + struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, setattr_async); + OBD_COUNTER_INCREMENT(exp->exp_obd, setattr_async); + + rc = OBP(exp->exp_obd, setattr_async)(exp, obdo, ea, oti); + RETURN(rc); +} + +static inline int obd_add_conn(struct obd_import *imp, struct obd_uuid *uuid, + int priority) +{ + struct obd_device *obd = imp->imp_obd; + int rc; + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_DT_OP(obd, add_conn, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, add_conn); + + rc = OBP(obd, add_conn)(imp, uuid, priority); + RETURN(rc); +} + +static inline int obd_del_conn(struct obd_import *imp, struct obd_uuid *uuid) +{ + struct obd_device *obd = imp->imp_obd; + int rc; + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_DT_OP(obd, del_conn, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, del_conn); + + rc = OBP(obd, del_conn)(imp, uuid); + RETURN(rc); +} + +static inline int obd_connect(struct lustre_handle *conn,struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *d) +{ + int rc; + __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_DT_OP(obd, connect, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, connect); + + rc = OBP(obd, connect)(conn, obd, cluuid, d); + /* check that only subset is granted */ + LASSERT(ergo(d != NULL, + (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); + RETURN(rc); +} + +static inline int obd_reconnect(struct obd_export *exp, + struct obd_device *obd, + struct obd_uuid *cluuid, + struct obd_connect_data *d) +{ + int rc; + __u64 ocf = d ? d->ocd_connect_flags : 0; /* for post-condition check */ + ENTRY; + + OBD_CHECK_DEV_ACTIVE(obd); + OBD_CHECK_DT_OP(obd, reconnect, 0); + OBD_COUNTER_INCREMENT(obd, reconnect); + + rc = OBP(obd, reconnect)(exp, obd, cluuid, d); + /* check that only subset is granted */ + LASSERT(ergo(d != NULL, + (d->ocd_connect_flags & ocf) == d->ocd_connect_flags)); + RETURN(rc); +} + +static inline int obd_disconnect(struct obd_export *exp) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, disconnect); + OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect); + + rc = OBP(exp->exp_obd, disconnect)(exp); + RETURN(rc); +} + +static inline int obd_fid_alloc(struct obd_export *exp, + struct lu_fid *fid, + struct placement_hint *hint) +{ + int rc; + ENTRY; + + if (OBP(exp->exp_obd, fid_alloc) == NULL) + RETURN(-ENOTSUPP); + + OBD_COUNTER_INCREMENT(exp->exp_obd, fid_alloc); + + rc = OBP(exp->exp_obd, fid_alloc)(exp, fid, hint); + RETURN(rc); +} + +static inline int obd_fid_delete(struct obd_export *exp, + struct lu_fid *fid) +{ + int rc; + ENTRY; + + if (OBP(exp->exp_obd, fid_delete) == NULL) + RETURN(0); + + OBD_COUNTER_INCREMENT(exp->exp_obd, fid_delete); + rc = OBP(exp->exp_obd, fid_delete)(exp, fid); + RETURN(rc); +} + +static inline int obd_init_export(struct obd_export *exp) +{ + int rc = 0; + + ENTRY; + if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) && + OBP((exp)->exp_obd, init_export)) + rc = OBP(exp->exp_obd, init_export)(exp); + RETURN(rc); +} + +static inline int obd_destroy_export(struct obd_export *exp) +{ + ENTRY; + if ((exp)->exp_obd != NULL && OBT((exp)->exp_obd) && + OBP((exp)->exp_obd, destroy_export)) + OBP(exp->exp_obd, destroy_export)(exp); + RETURN(0); +} + +static inline struct dentry * +obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr) +{ + LASSERT(exp->exp_obd); + + return lvfs_fid2dentry(&exp->exp_obd->obd_lvfs_ctxt, id_ino, gen, gr, + exp->exp_obd); +} + +static inline int +obd_lvfs_open_llog(struct obd_export *exp, __u64 id_ino, struct dentry *dentry) +{ + LASSERT(exp->exp_obd); + CERROR("FIXME what's the story here? This needs to be an obd fn?\n"); +#if 0 + return lvfs_open_llog(&exp->exp_obd->obd_lvfs_ctxt, id_ino, + dentry, exp->exp_obd); +#endif + return 0; +} + +#ifndef time_before +#define time_before(t1, t2) ((long)t2 - (long)t1 > 0) +#endif + +/* @max_age is the oldest time in jiffies that we accept using a cached data. + * If the cache is older than @max_age we will get a new value from the + * target. Use a value of "jiffies + HZ" to guarantee freshness. */ +static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs, + unsigned long max_age) +{ + int rc = 0; + ENTRY; + + if (obd == NULL) + RETURN(-EINVAL); + + OBD_CHECK_DT_OP(obd, statfs, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(obd, statfs); + + CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age); + if (time_before(obd->obd_osfs_age, max_age)) { + rc = OBP(obd, statfs)(obd, osfs, max_age); + if (rc == 0) { + spin_lock(&obd->obd_osfs_lock); + memcpy(&obd->obd_osfs, osfs, sizeof(obd->obd_osfs)); + obd->obd_osfs_age = jiffies; + spin_unlock(&obd->obd_osfs_lock); + } + } else { + CDEBUG(D_SUPER, "using cached obd_statfs data\n"); + spin_lock(&obd->obd_osfs_lock); + memcpy(osfs, &obd->obd_osfs, sizeof(*osfs)); + spin_unlock(&obd->obd_osfs_lock); + } + RETURN(rc); +} + +static inline int obd_sync(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_size start, + obd_size end) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, sync, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, sync); + + rc = OBP(exp->exp_obd, sync)(exp, oa, ea, start, end); + RETURN(rc); +} + +static inline int obd_punch(struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_size start, + obd_size end, struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, punch); + OBD_COUNTER_INCREMENT(exp->exp_obd, punch); + + rc = OBP(exp->exp_obd, punch)(exp, oa, ea, start, end, oti); + RETURN(rc); +} + +static inline int obd_brw(int cmd, struct obd_export *exp, struct obdo *oa, + struct lov_stripe_md *ea, obd_count oa_bufs, + struct brw_page *pg, struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, brw); + OBD_COUNTER_INCREMENT(exp->exp_obd, brw); + + if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) { + CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, " + "or OBD_BRW_CHECK\n"); + LBUG(); + } + + rc = OBP(exp->exp_obd, brw)(cmd, exp, oa, ea, oa_bufs, pg, oti); + RETURN(rc); +} + +static inline int obd_brw_async(int cmd, struct obd_export *exp, + struct obdo *oa, struct lov_stripe_md *ea, + obd_count oa_bufs, struct brw_page *pg, + struct ptlrpc_request_set *set, + struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, brw_async); + OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async); + + if (!(cmd & OBD_BRW_RWMASK)) { + CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n"); + LBUG(); + } + + rc = OBP(exp->exp_obd, brw_async)(cmd, exp, oa, ea, oa_bufs, pg, set, + oti); + RETURN(rc); +} + +static inline int obd_prep_async_page(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct page *page, obd_off offset, + struct obd_async_page_ops *ops, + void *data, void **res) +{ + int ret; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, prep_async_page, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, prep_async_page); + + ret = OBP(exp->exp_obd, prep_async_page)(exp, lsm, loi, page, offset, + ops, data, res); + RETURN(ret); +} + +static inline int obd_queue_async_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + int cmd, obd_off off, int count, + obd_flag brw_flags, obd_flag async_flags) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, queue_async_io, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, queue_async_io); + LASSERT(cmd & OBD_BRW_RWMASK); + + rc = OBP(exp->exp_obd, queue_async_io)(exp, lsm, loi, cookie, cmd, off, + count, brw_flags, async_flags); + RETURN(rc); +} + +static inline int obd_set_async_flags(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie, + obd_flag async_flags) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, set_async_flags, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, set_async_flags); + + rc = OBP(exp->exp_obd, set_async_flags)(exp, lsm, loi, cookie, + async_flags); + RETURN(rc); +} + +static inline int obd_queue_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig, + void *cookie, int cmd, obd_off off, + int count, obd_flag brw_flags, + obd_flag async_flags) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, queue_group_io, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, queue_group_io); + LASSERT(cmd & OBD_BRW_RWMASK); + + rc = OBP(exp->exp_obd, queue_group_io)(exp, lsm, loi, oig, cookie, + cmd, off, count, brw_flags, + async_flags); + RETURN(rc); +} + +static inline int obd_trigger_group_io(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, + struct obd_io_group *oig) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, trigger_group_io, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, trigger_group_io); + + rc = OBP(exp->exp_obd, trigger_group_io)(exp, lsm, loi, oig); + RETURN(rc); +} + +static inline int obd_teardown_async_page(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct lov_oinfo *loi, void *cookie) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, teardown_async_page, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, teardown_async_page); + + rc = OBP(exp->exp_obd, teardown_async_page)(exp, lsm, loi, cookie); + RETURN(rc); +} + +static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_remote *remote, + struct niobuf_local *local, + struct obd_trans_info *oti) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, preprw, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); + + rc = OBP(exp->exp_obd, preprw)(cmd, exp, oa, objcount, obj, niocount, + remote, local, oti); + RETURN(rc); +} + +static inline int obd_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_local *local, + struct obd_trans_info *oti, int rc) +{ + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, commitrw, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw); + + rc = OBP(exp->exp_obd, commitrw)(cmd, exp, oa, objcount, obj, niocount, + local, oti, rc); + RETURN(rc); +} + +static inline int obd_merge_lvb(struct obd_export *exp, + struct lov_stripe_md *lsm, + struct ost_lvb *lvb, int kms_only) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, merge_lvb, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, merge_lvb); + + rc = OBP(exp->exp_obd, merge_lvb)(exp, lsm, lvb, kms_only); + RETURN(rc); +} + +static inline int obd_adjust_kms(struct obd_export *exp, + struct lov_stripe_md *lsm, obd_off size, + int shrink) +{ + int rc; + ENTRY; + + OBD_CHECK_DT_OP(exp->exp_obd, adjust_kms, -EOPNOTSUPP); + OBD_COUNTER_INCREMENT(exp->exp_obd, adjust_kms); + + rc = OBP(exp->exp_obd, adjust_kms)(exp, lsm, size, shrink); + RETURN(rc); +} + +static inline int obd_iocontrol(unsigned int cmd, struct obd_export *exp, + int len, void *karg, void *uarg) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, iocontrol); + OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol); + + rc = OBP(exp->exp_obd, iocontrol)(cmd, exp, len, karg, uarg); + RETURN(rc); +} + +static inline int obd_enqueue(struct obd_export *exp, struct lov_stripe_md *ea, + __u32 type, ldlm_policy_data_t *policy, + __u32 mode, int *flags, void *bl_cb, void *cp_cb, + void *gl_cb, void *data, __u32 lvb_len, + void *lvb_swabber, struct lustre_handle *lockh) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, enqueue); + OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue); + + rc = OBP(exp->exp_obd, enqueue)(exp, ea, type, policy, mode, flags, + bl_cb, cp_cb, gl_cb, data, lvb_len, + lvb_swabber, lockh); + RETURN(rc); +} + +static inline int obd_match(struct obd_export *exp, struct lov_stripe_md *ea, + __u32 type, ldlm_policy_data_t *policy, __u32 mode, + int *flags, void *data, struct lustre_handle *lockh) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, match); + OBD_COUNTER_INCREMENT(exp->exp_obd, match); + + rc = OBP(exp->exp_obd, match)(exp, ea, type, policy, mode, flags, data, + lockh); + RETURN(rc); +} + +static inline int obd_change_cbdata(struct obd_export *exp, + struct lov_stripe_md *lsm, + ldlm_iterator_t it, void *data) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, change_cbdata); + OBD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata); + + rc = OBP(exp->exp_obd, change_cbdata)(exp, lsm, it, data); + RETURN(rc); +} + +static inline int obd_cancel(struct obd_export *exp, + struct lov_stripe_md *ea, __u32 mode, + struct lustre_handle *lockh) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, cancel); + OBD_COUNTER_INCREMENT(exp->exp_obd, cancel); + + rc = OBP(exp->exp_obd, cancel)(exp, ea, mode, lockh); + RETURN(rc); +} + +static inline int obd_cancel_unused(struct obd_export *exp, + struct lov_stripe_md *ea, + int flags, void *opaque) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, cancel_unused); + OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused); + + rc = OBP(exp->exp_obd, cancel_unused)(exp, ea, flags, opaque); + RETURN(rc); +} + +static inline int obd_join_lru(struct obd_export *exp, + struct lov_stripe_md *ea, int join) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, join_lru); + OBD_COUNTER_INCREMENT(exp->exp_obd, join_lru); + + rc = OBP(exp->exp_obd, join_lru)(exp, ea, join); + RETURN(rc); +} + +static inline int obd_san_preprw(int cmd, struct obd_export *exp, + struct obdo *oa, + int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_remote *remote) +{ + int rc; + + EXP_CHECK_DT_OP(exp, preprw); + OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); + + rc = OBP(exp->exp_obd, san_preprw)(cmd, exp, oa, objcount, obj, + niocount, remote); + class_export_put(exp); + return(rc); +} + +static inline int obd_pin(struct obd_export *exp, struct lu_fid *fid, + struct obd_client_handle *handle, int flag) +{ + int rc; + + EXP_CHECK_DT_OP(exp, pin); + OBD_COUNTER_INCREMENT(exp->exp_obd, pin); + + rc = OBP(exp->exp_obd, pin)(exp, fid, handle, flag); + return(rc); +} + +static inline int obd_unpin(struct obd_export *exp, + struct obd_client_handle *handle, int flag) +{ + int rc; + + EXP_CHECK_DT_OP(exp, unpin); + OBD_COUNTER_INCREMENT(exp->exp_obd, unpin); + + rc = OBP(exp->exp_obd, unpin)(exp, handle, flag); + return(rc); +} + + +static inline void obd_import_event(struct obd_device *obd, + struct obd_import *imp, + enum obd_import_event event) +{ + if (!obd) { + CERROR("NULL device\n"); + EXIT; + return; + } + if (obd->obd_set_up && OBP(obd, import_event)) { + OBD_COUNTER_INCREMENT(obd, import_event); + OBP(obd, import_event)(obd, imp, event); + } +} + +static inline int obd_notify(struct obd_device *obd, + struct obd_device *watched, + enum obd_notify_event ev, + void *data) +{ + OBD_CHECK_DEV(obd); + + /* the check for async_recov is a complete hack - I'm hereby + overloading the meaning to also mean "this was called from + mds_postsetup". I know that my mds is able to handle notifies + by this point, and it needs to get them to execute mds_postrecov. */ + if (!obd->obd_set_up && !obd->obd_async_recov) { + CDEBUG(D_HA, "obd %s not set up\n", obd->obd_name); + return -EINVAL; + } + + if (!OBP(obd, notify)) { + CERROR("obd %s has no notify handler\n", obd->obd_name); + return -ENOSYS; + } + + OBD_COUNTER_INCREMENT(obd, notify); + return OBP(obd, notify)(obd, watched, ev, data); +} + +static inline int obd_notify_observer(struct obd_device *observer, + struct obd_device *observed, + enum obd_notify_event ev, + void *data) +{ + int rc1; + int rc2; + + struct obd_notify_upcall *onu; + + if (observer->obd_observer) + rc1 = obd_notify(observer->obd_observer, observed, ev, data); + else + rc1 = 0; + /* + * Also, call non-obd listener, if any + */ + onu = &observer->obd_upcall; + if (onu->onu_upcall != NULL) + rc2 = onu->onu_upcall(observer, observed, ev, onu->onu_owner); + else + rc2 = 0; + + return rc1 ? rc1 : rc2; +} + +static inline int obd_quotacheck(struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, quotacheck); + OBD_COUNTER_INCREMENT(exp->exp_obd, quotacheck); + + rc = OBP(exp->exp_obd, quotacheck)(exp, oqctl); + RETURN(rc); +} + +static inline int obd_quotactl(struct obd_export *exp, + struct obd_quotactl *oqctl) +{ + int rc; + ENTRY; + + EXP_CHECK_DT_OP(exp, quotactl); + OBD_COUNTER_INCREMENT(exp->exp_obd, quotactl); + + rc = OBP(exp->exp_obd, quotactl)(exp, oqctl); + RETURN(rc); +} + +static inline int obd_health_check(struct obd_device *obd) +{ + /* returns: 0 on healthy + * >0 on unhealthy + reason code/flag + * however the only suppored reason == 1 right now + * We'll need to define some better reasons + * or flags in the future. + * <0 on error + */ + int rc; + ENTRY; + + /* don't use EXP_CHECK_OP, because NULL method is normal here */ + if (obd == NULL || !OBT(obd)) { + CERROR("cleaned up obd\n"); + RETURN(-EOPNOTSUPP); + } + if (!obd->obd_set_up || obd->obd_stopping) + RETURN(0); + if (!OBP(obd, health_check)) + RETURN(0); + + rc = OBP(obd, health_check)(obd); + RETURN(rc); +} + +static inline int obd_register_observer(struct obd_device *obd, + struct obd_device *observer) +{ + ENTRY; + OBD_CHECK_DEV(obd); + if (obd->obd_observer && observer) + RETURN(-EALREADY); + obd->obd_observer = observer; + RETURN(0); +} + +/* metadata helpers */ +static inline int md_getstatus(struct obd_export *exp, struct lu_fid *fid) +{ + int rc; + ENTRY; + + EXP_CHECK_MD_OP(exp, getstatus); + MD_COUNTER_INCREMENT(exp->exp_obd, getstatus); + rc = MDP(exp->exp_obd, getstatus)(exp, fid); + RETURN(rc); +} + +static inline int md_getattr(struct obd_export *exp, struct lu_fid *fid, + obd_valid valid, int ea_size, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, getattr); + MD_COUNTER_INCREMENT(exp->exp_obd, getattr); + rc = MDP(exp->exp_obd, getattr)(exp, fid, valid, + ea_size, request); + RETURN(rc); +} + +static inline int md_change_cbdata(struct obd_export *exp, struct lu_fid *fid, + ldlm_iterator_t it, void *data) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, change_cbdata); + MD_COUNTER_INCREMENT(exp->exp_obd, change_cbdata); + rc = MDP(exp->exp_obd, change_cbdata)(exp, fid, it, data); + RETURN(rc); +} + +static inline int md_close(struct obd_export *exp, + struct md_op_data *op_data, + struct obd_client_handle *och, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, close); + MD_COUNTER_INCREMENT(exp->exp_obd, close); + rc = MDP(exp->exp_obd, close)(exp, op_data, och, request); + RETURN(rc); +} + +static inline int md_create(struct obd_export *exp, struct md_op_data *op_data, + const void *data, int datalen, int mode, + __u32 uid, __u32 gid, __u32 cap_effective, __u64 rdev, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, create); + MD_COUNTER_INCREMENT(exp->exp_obd, create); + rc = MDP(exp->exp_obd, create)(exp, op_data, data, datalen, mode, + uid, gid, cap_effective, rdev, request); + RETURN(rc); +} + +static inline int md_done_writing(struct obd_export *exp, + struct md_op_data *op_data) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, done_writing); + MD_COUNTER_INCREMENT(exp->exp_obd, done_writing); + rc = MDP(exp->exp_obd, done_writing)(exp, op_data); + RETURN(rc); +} + +static inline int md_enqueue(struct obd_export *exp, int lock_type, + struct lookup_intent *it, int lock_mode, + struct md_op_data *op_data, + struct lustre_handle *lockh, + void *lmm, int lmmsize, + ldlm_completion_callback cb_completion, + ldlm_blocking_callback cb_blocking, + void *cb_data, int extra_lock_flags) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, enqueue); + MD_COUNTER_INCREMENT(exp->exp_obd, enqueue); + rc = MDP(exp->exp_obd, enqueue)(exp, lock_type, it, lock_mode, + op_data, lockh, lmm, lmmsize, + cb_completion, cb_blocking, + cb_data, extra_lock_flags); + RETURN(rc); +} + +static inline int md_getattr_name(struct obd_export *exp, struct lu_fid *fid, + const char *filename, int namelen, + obd_valid valid, int ea_size, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, getattr_name); + MD_COUNTER_INCREMENT(exp->exp_obd, getattr_name); + rc = MDP(exp->exp_obd, getattr_name)(exp, fid, filename, namelen, + valid, ea_size, request); + RETURN(rc); +} + +static inline int md_intent_lock(struct obd_export *exp, + struct md_op_data *op_data, + void *lmm, int lmmsize, + struct lookup_intent *it, + int flags, struct ptlrpc_request **reqp, + ldlm_blocking_callback cb_blocking, + int extra_lock_flags) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, intent_lock); + MD_COUNTER_INCREMENT(exp->exp_obd, intent_lock); + rc = MDP(exp->exp_obd, intent_lock)(exp, op_data, lmm, lmmsize, + it, flags, reqp, cb_blocking, + extra_lock_flags); + RETURN(rc); +} + +static inline int md_link(struct obd_export *exp, + struct md_op_data *op_data, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, link); + MD_COUNTER_INCREMENT(exp->exp_obd, link); + rc = MDP(exp->exp_obd, link)(exp, op_data, request); + RETURN(rc); +} + +static inline int md_rename(struct obd_export *exp, + struct md_op_data *op_data, + const char *old, int oldlen, + const char *new, int newlen, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, rename); + MD_COUNTER_INCREMENT(exp->exp_obd, rename); + rc = MDP(exp->exp_obd, rename)(exp, op_data, old, oldlen, new, + newlen, request); + RETURN(rc); +} + +static inline int md_setattr(struct obd_export *exp, struct md_op_data *op_data, + struct iattr *iattr, void *ea, int ealen, + void *ea2, int ea2len, struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, setattr); + MD_COUNTER_INCREMENT(exp->exp_obd, setattr); + rc = MDP(exp->exp_obd, setattr)(exp, op_data, iattr, ea, ealen, + ea2, ea2len, request); + RETURN(rc); +} + +static inline int md_sync(struct obd_export *exp, struct lu_fid *fid, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, sync); + MD_COUNTER_INCREMENT(exp->exp_obd, sync); + rc = MDP(exp->exp_obd, sync)(exp, fid, request); + RETURN(rc); +} + +static inline int md_readpage(struct obd_export *exp, struct lu_fid *fid, + __u64 offset, struct page *page, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, readpage); + MD_COUNTER_INCREMENT(exp->exp_obd, readpage); + rc = MDP(exp->exp_obd, readpage)(exp, fid, offset, page, request); + RETURN(rc); +} + +static inline int md_unlink(struct obd_export *exp, struct md_op_data *op_data, + struct ptlrpc_request **request) +{ + int rc; + ENTRY; + EXP_CHECK_MD_OP(exp, unlink); + MD_COUNTER_INCREMENT(exp->exp_obd, unlink); + rc = MDP(exp->exp_obd, unlink)(exp, op_data, request); + RETURN(rc); +} + +static inline int md_get_lustre_md(struct obd_export *exp, + struct ptlrpc_request *req, + int offset, struct obd_export *dt_exp, + struct lustre_md *md) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, get_lustre_md); + MD_COUNTER_INCREMENT(exp->exp_obd, get_lustre_md); + RETURN(MDP(exp->exp_obd, get_lustre_md)(exp, req, offset, + dt_exp, md)); +} + +static inline int md_free_lustre_md(struct obd_export *exp, + struct lustre_md *md) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, free_lustre_md); + MD_COUNTER_INCREMENT(exp->exp_obd, free_lustre_md); + RETURN(MDP(exp->exp_obd, free_lustre_md)(exp, md)); +} + +static inline int md_setxattr(struct obd_export *exp, struct lu_fid *fid, + obd_valid valid, const char *name, + const char *input, int input_size, + int output_size, int flags, + struct ptlrpc_request **request) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, setxattr); + MD_COUNTER_INCREMENT(exp->exp_obd, setxattr); + RETURN(MDP(exp->exp_obd, setxattr)(exp, fid, valid, name, input, + input_size, output_size, flags, + request)); +} + +static inline int md_getxattr(struct obd_export *exp, struct lu_fid *fid, + obd_valid valid, const char *name, + const char *input, int input_size, + int output_size, int flags, + struct ptlrpc_request **request) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, getxattr); + MD_COUNTER_INCREMENT(exp->exp_obd, getxattr); + RETURN(MDP(exp->exp_obd, getxattr)(exp, fid, valid, name, input, + input_size, output_size, flags, + request)); +} + +static inline int md_set_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och, + struct ptlrpc_request *open_req) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, set_open_replay_data); + MD_COUNTER_INCREMENT(exp->exp_obd, set_open_replay_data); + RETURN(MDP(exp->exp_obd, set_open_replay_data)(exp, och, open_req)); +} + +static inline int md_clear_open_replay_data(struct obd_export *exp, + struct obd_client_handle *och) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, clear_open_replay_data); + MD_COUNTER_INCREMENT(exp->exp_obd, clear_open_replay_data); + RETURN(MDP(exp->exp_obd, clear_open_replay_data)(exp, och)); +} + +static inline int md_set_lock_data(struct obd_export *exp, + __u64 *lockh, void *data) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, set_lock_data); + MD_COUNTER_INCREMENT(exp->exp_obd, set_lock_data); + RETURN(MDP(exp->exp_obd, set_lock_data)(exp, lockh, data)); +} + +static inline int md_cancel_unused(struct obd_export *exp, + struct lu_fid *fid, + int flags, void *opaque) +{ + int rc; + ENTRY; + + EXP_CHECK_MD_OP(exp, cancel_unused); + MD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused); + + rc = MDP(exp->exp_obd, cancel_unused)(exp, fid, flags, opaque); + RETURN(rc); +} + +static inline int md_lock_match(struct obd_export *exp, int flags, + struct lu_fid *fid, ldlm_type_t type, + ldlm_policy_data_t *policy, ldlm_mode_t mode, + struct lustre_handle *lockh) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, lock_match); + MD_COUNTER_INCREMENT(exp->exp_obd, lock_match); + RETURN(MDP(exp->exp_obd, lock_match)(exp, flags, fid, type, + policy, mode, lockh)); +} + +static inline int md_init_ea_size(struct obd_export *exp, + int easize, int def_asize, + int cookiesize) +{ + ENTRY; + EXP_CHECK_MD_OP(exp, init_ea_size); + MD_COUNTER_INCREMENT(exp->exp_obd, init_ea_size); + RETURN(MDP(exp->exp_obd, init_ea_size)(exp, easize, + def_asize, + cookiesize)); +} + +/* OBD Metadata Support */ +extern int obd_init_caches(void); +extern void obd_cleanup_caches(void); + +/* support routines */ +extern cfs_mem_cache_t *obdo_cachep; +static inline struct obdo *obdo_alloc(void) +{ + struct obdo *oa; + OBD_SLAB_ALLOC(oa, obdo_cachep, CFS_ALLOC_STD, sizeof(*oa)); + return oa; +} + +static inline void obdo_free(struct obdo *oa) +{ + OBD_SLAB_FREE(oa, obdo_cachep, sizeof(*oa)); +} + +static inline void obdo2fid(struct obdo *oa, + struct lu_fid *fid) +{ + /* something here */ +} + +static inline void fid2obdo(struct lu_fid *fid, + struct obdo *oa) +{ + /* something here */ +} + +/* I'm as embarrassed about this as you are. + * + * // XXX do not look into _superhack with remaining eye + * // XXX if this were any uglier, I'd get my own show on MTV */ +extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); + +/* sysctl.c */ +extern void obd_sysctl_init (void); +extern void obd_sysctl_clean (void); + +/* uuid.c */ +typedef __u8 class_uuid_t[16]; +void class_generate_random_uuid(class_uuid_t uuid); +void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); + +/* lustre_peer.c */ +int lustre_uuid_to_peer(const char *uuid, lnet_nid_t *peer_nid, int index); +int class_add_uuid(const char *uuid, __u64 nid); +int class_del_uuid (const char *uuid); +void class_init_uuidlist(void); +void class_exit_uuidlist(void); + +/* mea.c */ +int mea_name2idx(struct lmv_stripe_md *mea, char *name, int namelen); +int raw_name2idx(int hashtype, int count, const char *name, int namelen); + +#endif /* __LINUX_OBD_CLASS_H */ diff --git a/lustre/include/linux/obd_echo.h b/lustre/include/obd_echo.h similarity index 91% rename from lustre/include/linux/obd_echo.h rename to lustre/include/obd_echo.h index 0910041..53b0e6b 100644 --- a/lustre/include/linux/obd_echo.h +++ b/lustre/include/obd_echo.h @@ -5,9 +5,6 @@ #ifndef _OBD_ECHO_H #define _OBD_ECHO_H -#define OBD_ECHO_DEVICENAME "obdecho" -#define OBD_ECHO_CLIENT_DEVICENAME "echo_client" - /* The persistent object (i.e. actually stores stuff!) */ #define ECHO_PERSISTENT_OBJID 1ULL #define ECHO_PERSISTENT_SIZE ((__u64)(1<<20)) diff --git a/lustre/include/linux/obd_lov.h b/lustre/include/obd_lov.h similarity index 86% rename from lustre/include/linux/obd_lov.h rename to lustre/include/obd_lov.h index 78ac7bc..057fb9d 100644 --- a/lustre/include/linux/obd_lov.h +++ b/lustre/include/obd_lov.h @@ -5,8 +5,6 @@ #ifndef _OBD_LOV_H__ #define _OBD_LOV_H__ -#define OBD_LOV_DEVICENAME "lov" - static inline int lov_stripe_md_size(int stripes) { return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo); @@ -24,4 +22,7 @@ static inline int lov_mds_md_v1_size(int stripes) #define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long) #define IOC_LOV_MAX_NR 50 +#define QOS_DEFAULT_THRESHOLD 10 /* MB */ +#define QOS_DEFAULT_MAXAGE 5 /* Seconds */ + #endif diff --git a/lustre/include/linux/obd_ost.h b/lustre/include/obd_ost.h similarity index 95% rename from lustre/include/linux/obd_ost.h rename to lustre/include/obd_ost.h index 4a2a344..50aace7 100644 --- a/lustre/include/linux/obd_ost.h +++ b/lustre/include/obd_ost.h @@ -4,14 +4,14 @@ * This file is part of Lustre, http://www.lustre.org * * Data structures for object storage targets and client: OST & OSC's - * + * * See also lustre_idl.h for wire formats of requests. */ #ifndef _LUSTRE_OST_H #define _LUSTRE_OST_H -#include +#include struct osc_brw_async_args { struct obdo *aa_oa; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h new file mode 100644 index 0000000..beca205 --- /dev/null +++ b/lustre/include/obd_support.h @@ -0,0 +1,398 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef _OBD_SUPPORT +#define _OBD_SUPPORT + +#include + +/* global variables */ +extern atomic_t obd_memory; +extern int obd_memmax; +extern unsigned int obd_fail_loc; +extern unsigned int obd_dump_on_timeout; +extern unsigned int obd_timeout; /* seconds */ +#define PING_INTERVAL max(obd_timeout / 4, 1U) +#define RECONNECT_INTERVAL max(obd_timeout / 10, 10U) +extern unsigned int ldlm_timeout; +extern unsigned int obd_health_check_timeout; +extern char obd_lustre_upcall[128]; +extern cfs_waitq_t obd_race_waitq; + +#define OBD_FAIL_MDS 0x100 +#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 +#define OBD_FAIL_MDS_GETATTR_NET 0x102 +#define OBD_FAIL_MDS_GETATTR_PACK 0x103 +#define OBD_FAIL_MDS_READPAGE_NET 0x104 +#define OBD_FAIL_MDS_READPAGE_PACK 0x105 +#define OBD_FAIL_MDS_SENDPAGE 0x106 +#define OBD_FAIL_MDS_REINT_NET 0x107 +#define OBD_FAIL_MDS_REINT_UNPACK 0x108 +#define OBD_FAIL_MDS_REINT_SETATTR 0x109 +#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a +#define OBD_FAIL_MDS_REINT_CREATE 0x10b +#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c +#define OBD_FAIL_MDS_REINT_UNLINK 0x10d +#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e +#define OBD_FAIL_MDS_REINT_LINK 0x10f +#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110 +#define OBD_FAIL_MDS_REINT_RENAME 0x111 +#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112 +#define OBD_FAIL_MDS_OPEN_NET 0x113 +#define OBD_FAIL_MDS_OPEN_PACK 0x114 +#define OBD_FAIL_MDS_CLOSE_NET 0x115 +#define OBD_FAIL_MDS_CLOSE_PACK 0x116 +#define OBD_FAIL_MDS_CONNECT_NET 0x117 +#define OBD_FAIL_MDS_CONNECT_PACK 0x118 +#define OBD_FAIL_MDS_REINT_NET_REP 0x119 +#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a +#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b +#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c +#define OBD_FAIL_MDS_STATFS_PACK 0x11d +#define OBD_FAIL_MDS_STATFS_NET 0x11e +#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f +#define OBD_FAIL_MDS_PIN_NET 0x120 +#define OBD_FAIL_MDS_UNPIN_NET 0x121 +#define OBD_FAIL_MDS_ALL_REPLY_NET 0x122 +#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x123 +#define OBD_FAIL_MDS_SYNC_NET 0x124 +#define OBD_FAIL_MDS_SYNC_PACK 0x125 +#define OBD_FAIL_MDS_DONE_WRITING_NET 0x126 +#define OBD_FAIL_MDS_DONE_WRITING_PACK 0x127 +#define OBD_FAIL_MDS_ALLOC_OBDO 0x128 +#define OBD_FAIL_MDS_PAUSE_OPEN 0x129 +#define OBD_FAIL_MDS_STATFS_LCW_SLEEP 0x12a +#define OBD_FAIL_MDS_OPEN_CREATE 0x12b +#define OBD_FAIL_MDS_OST_SETATTR 0x12c +#define OBD_FAIL_MDS_QUOTACHECK_NET 0x12d +#define OBD_FAIL_MDS_QUOTACTL_NET 0x12e +#define OBD_FAIL_MDS_CLIENT_ADD 0x12f +#define OBD_FAIL_MDS_GETXATTR_NET 0x130 +#define OBD_FAIL_MDS_GETXATTR_PACK 0x131 +#define OBD_FAIL_MDS_SETXATTR_NET 0x132 +#define OBD_FAIL_MDS_SETXATTR 0x133 +#define OBD_FAIL_MDS_SETXATTR_WRITE 0x134 + +#define OBD_FAIL_OST 0x200 +#define OBD_FAIL_OST_CONNECT_NET 0x201 +#define OBD_FAIL_OST_DISCONNECT_NET 0x202 +#define OBD_FAIL_OST_GET_INFO_NET 0x203 +#define OBD_FAIL_OST_CREATE_NET 0x204 +#define OBD_FAIL_OST_DESTROY_NET 0x205 +#define OBD_FAIL_OST_GETATTR_NET 0x206 +#define OBD_FAIL_OST_SETATTR_NET 0x207 +#define OBD_FAIL_OST_OPEN_NET 0x208 +#define OBD_FAIL_OST_CLOSE_NET 0x209 +#define OBD_FAIL_OST_BRW_NET 0x20a +#define OBD_FAIL_OST_PUNCH_NET 0x20b +#define OBD_FAIL_OST_STATFS_NET 0x20c +#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d +#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e +#define OBD_FAIL_OST_BRW_READ_BULK 0x20f +#define OBD_FAIL_OST_SYNC_NET 0x210 +#define OBD_FAIL_OST_ALL_REPLY_NET 0x211 +#define OBD_FAIL_OST_ALL_REQUESTS_NET 0x212 +#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 +#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 +#define OBD_FAIL_OST_ENOSPC 0x215 +#define OBD_FAIL_OST_EROFS 0x216 +#define OBD_FAIL_OST_ENOENT 0x217 +#define OBD_FAIL_OST_QUOTACHECK_NET 0x218 +#define OBD_FAIL_OST_QUOTACTL_NET 0x219 + +#define OBD_FAIL_LDLM 0x300 +#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 +#define OBD_FAIL_LDLM_ENQUEUE 0x302 +#define OBD_FAIL_LDLM_CONVERT 0x303 +#define OBD_FAIL_LDLM_CANCEL 0x304 +#define OBD_FAIL_LDLM_BL_CALLBACK 0x305 +#define OBD_FAIL_LDLM_CP_CALLBACK 0x306 +#define OBD_FAIL_LDLM_GL_CALLBACK 0x307 +#define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 +#define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 +#define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a +#define OBD_FAIL_LDLM_ENQUEUE_BLOCKED 0x30b +#define OBD_FAIL_LDLM_REPLY 0x30c +#define OBD_FAIL_LDLM_RECOV_CLIENTS 0x30d +#define OBD_FAIL_LDLM_ENQUEUE_OLD_EXPORT 0x30e + +#define OBD_FAIL_OSC 0x400 +#define OBD_FAIL_OSC_BRW_READ_BULK 0x401 +#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402 +#define OBD_FAIL_OSC_LOCK_BL_AST 0x403 +#define OBD_FAIL_OSC_LOCK_CP_AST 0x404 +#define OBD_FAIL_OSC_MATCH 0x405 +#define OBD_FAIL_OSC_BRW_PREP_REQ 0x406 +#define OBD_FAIL_OSC_SHUTDOWN 0x407 + +#define OBD_FAIL_PTLRPC 0x500 +#define OBD_FAIL_PTLRPC_ACK 0x501 +#define OBD_FAIL_PTLRPC_RQBD 0x502 +#define OBD_FAIL_PTLRPC_BULK_GET_NET 0x503 +#define OBD_FAIL_PTLRPC_BULK_PUT_NET 0x504 +#define OBD_FAIL_PTLRPC_DROP_RPC 0x505 +#define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 + +#define OBD_FAIL_OBD_PING_NET 0x600 +#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 +#define OBD_FAIL_OBD_LOGD_NET 0x602 +#define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 +#define OBD_FAIL_OBD_DQACQ 0x604 + +#define OBD_FAIL_TGT_REPLY_NET 0x700 +#define OBD_FAIL_TGT_CONN_RACE 0x701 +#define OBD_FAIL_TGT_FORCE_RECONNECT 0x702 +#define OBD_FAIL_TGT_DELAY_CONNECT 0x703 +#define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 + +#define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 + +#define OBD_FAIL_MGS 0x900 +#define OBD_FAIL_MGS_ALL_REQUEST_NET 0x901 +#define OBD_FAIL_MGS_ALL_REPLY_NET 0x902 + +/* preparation for a more advanced failure testbed (not functional yet) */ +#define OBD_FAIL_MASK_SYS 0x0000FF00 +#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) +#define OBD_FAIL_ONCE 0x80000000 +#define OBD_FAILED 0x40000000 +#define OBD_FAIL_MDS_ALL_NET 0x01000000 +#define OBD_FAIL_OST_ALL_NET 0x02000000 + +#define OBD_FAIL_CHECK(id) (((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ + ((id) & OBD_FAIL_MASK_LOC)) && \ + ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \ + (OBD_FAILED | OBD_FAIL_ONCE))) + +#define OBD_FAIL_CHECK_ONCE(id) \ +({ int _ret_ = 0; \ + if (OBD_FAIL_CHECK(id)) { \ + CERROR("*** obd_fail_loc=%x ***\n", id); \ + obd_fail_loc |= OBD_FAILED; \ + if ((id) & OBD_FAIL_ONCE) \ + obd_fail_loc |= OBD_FAIL_ONCE; \ + _ret_ = 1; \ + } \ + _ret_; \ +}) + +#define OBD_FAIL_RETURN(id, ret) \ +do { \ + if (OBD_FAIL_CHECK_ONCE(id)) { \ + RETURN(ret); \ + } \ +} while(0) + +#define OBD_FAIL_TIMEOUT(id, secs) \ +do { \ + if (OBD_FAIL_CHECK_ONCE(id)) { \ + CERROR("obd_fail_timeout id %x sleeping for %d secs\n", \ + (id), (secs)); \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + cfs_schedule_timeout(CFS_TASK_UNINT, \ + cfs_time_seconds(secs)); \ + set_current_state(TASK_RUNNING); \ + CERROR("obd_fail_timeout id %x awake\n", (id)); \ + } \ +} while(0) + +#ifdef __KERNEL__ +/* The idea here is to synchronise two threads to force a race. The + * first thread that calls this with a matching fail_loc is put to + * sleep. The next thread that calls with the same fail_loc wakes up + * the first and continues. */ +#define OBD_RACE(id) \ +do { \ + if (OBD_FAIL_CHECK_ONCE(id)) { \ + CERROR("obd_race id %x sleeping\n", (id)); \ + OBD_SLEEP_ON(&obd_race_waitq); \ + CERROR("obd_fail_race id %x awake\n", (id)); \ + } else if ((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ + ((id) & OBD_FAIL_MASK_LOC)) { \ + cfs_waitq_signal(&obd_race_waitq); \ + } \ +} while(0) +#else +/* sigh. an expedient fix until OBD_RACE is fixed up */ +#define OBD_RACE(foo) do {} while(0) +#endif + +#define fixme() CDEBUG(D_OTHER, "FIXME\n"); + +extern atomic_t libcfs_kmemory; + +#if defined(LUSTRE_UTILS) /* this version is for utils only */ +#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ +do { \ + (ptr) = cfs_alloc(size, (gfp_mask)); \ + if ((ptr) == NULL) { \ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + } else { \ + memset(ptr, 0, size); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p\n", \ + (int)(size), ptr); \ + } \ +} while (0) +#else /* this version is for the kernel and liblustre */ +#define OBD_ALLOC_GFP(ptr, size, gfp_mask) \ +do { \ + (ptr) = cfs_alloc(size, (gfp_mask)); \ + if ((ptr) == NULL) { \ + CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ +} while (0) +#endif + +#ifndef OBD_ALLOC_MASK +# define OBD_ALLOC_MASK CFS_ALLOC_IO +#endif + +#define OBD_ALLOC(ptr, size) OBD_ALLOC_GFP(ptr, size, OBD_ALLOC_MASK) +#define OBD_ALLOC_WAIT(ptr, size) OBD_ALLOC_GFP(ptr, size, CFS_ALLOC_STD) +#define OBD_ALLOC_PTR(ptr) OBD_ALLOC(ptr, sizeof *(ptr)) +#define OBD_ALLOC_PTR_WAIT(ptr) OBD_ALLOC_WAIT(ptr, sizeof *(ptr)) + +#ifdef __arch_um__ +# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size) +#else +# define OBD_VMALLOC(ptr, size) \ +do { \ + (ptr) = cfs_alloc_large(size); \ + if ((ptr) == NULL) { \ + CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ +} while (0) +#endif + +#ifdef CONFIG_DEBUG_SLAB +#define POISON(ptr, c, s) do {} while (0) +#else +#define POISON(ptr, c, s) memset(ptr, c, s) +#endif + +#if POISON_BULK +#define POISON_PAGE(page, val) do { memset(kmap(page), val, PAGE_SIZE); \ + kunmap(page); } while (0) +#else +#define POISON_PAGE(page, val) do { } while (0) +#endif + +#ifdef __KERNEL__ +#define OBD_FREE(ptr, size) \ +do { \ + LASSERT(ptr); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size); \ + cfs_free(ptr); \ + (ptr) = (void *)0xdeadbeef; \ +} while (0) +#else +#define OBD_FREE(ptr, size) ((void)(size), free((ptr))) +#endif + +#ifdef __arch_um__ +# define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) +#else +# define OBD_VFREE(ptr, size) \ +do { \ + LASSERT(ptr); \ + atomic_sub(size, &obd_memory); \ + CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + POISON(ptr, 0x5a, size); \ + cfs_free_large(ptr); \ + (ptr) = (void *)0xdeadbeef; \ +} while (0) +#endif + +/* we memset() the slab object to 0 when allocation succeeds, so DO NOT + * HAVE A CTOR THAT DOES ANYTHING. its work will be cleared here. we'd + * love to assert on that, but slab.c keeps kmem_cache_s all to itself. */ +#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ +do { \ + LASSERT(!in_interrupt()); \ + (ptr) = cfs_mem_cache_alloc(slab, (type)); \ + if ((ptr) == NULL) { \ + CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ + (int)(size), __FILE__, __LINE__); \ + CERROR("%d total bytes allocated by Lustre, %d by Portals\n", \ + atomic_read(&obd_memory), atomic_read(&libcfs_kmemory));\ + } else { \ + memset(ptr, 0, size); \ + atomic_add(size, &obd_memory); \ + if (atomic_read(&obd_memory) > obd_memmax) \ + obd_memmax = atomic_read(&obd_memory); \ + CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ + (int)(size), ptr, atomic_read(&obd_memory)); \ + } \ +} while (0) + +#define OBD_FREE_PTR(ptr) OBD_FREE(ptr, sizeof *(ptr)) + +#define OBD_SLAB_FREE(ptr, slab, size) \ +do { \ + LASSERT(ptr); \ + CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), ptr, atomic_read(&obd_memory)); \ + atomic_sub(size, &obd_memory); \ + POISON(ptr, 0x5a, size); \ + cfs_mem_cache_free(slab, ptr); \ + (ptr) = (void *)0xdeadbeef; \ +} while (0) + +#define KEY_IS(str) \ + (keylen == strlen(str) && memcmp(key, str, keylen) == 0) + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#endif diff --git a/lustre/kernel_patches/kernel_configs/uml-2.6.10-fc3.config b/lustre/kernel_patches/kernel_configs/uml-2.6.10-fc3.config index e7685c2..0ec6b4a 100644 --- a/lustre/kernel_patches/kernel_configs/uml-2.6.10-fc3.config +++ b/lustre/kernel_patches/kernel_configs/uml-2.6.10-fc3.config @@ -481,6 +481,7 @@ CONFIG_FS_POSIX_ACL=y # CONFIG_MINIX_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_QUOTA=y +CONFIG_QFMT_V1=m CONFIG_QFMT_V2=y CONFIG_QUOTACTL=y CONFIG_DNOTIFY=y diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch index 588916f..72f5dd5 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch @@ -179,9 +179,9 @@ Index: linux-2.4.21-rhel/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2591,7 +2591,7 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h =================================================================== --- linux-2.4.21-rhel.orig/include/linux/ext3_extents.h 2005-03-02 22:42:20.659360368 +0300 +++ linux-2.4.21-rhel/include/linux/ext3_extents.h 2005-03-04 02:34:52.000000000 +0300 -@@ -0,0 +1,263 @@ +@@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2689,7 +2689,7 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2790,15 +2790,13 @@ Index: linux-2.4.21-rhel/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch index 305ef8e..940b916 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch @@ -179,9 +179,9 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2589,7 +2589,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h =================================================================== --- linux-2.4.21-suse2.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 +++ linux-2.4.21-suse2/include/linux/ext3_extents.h 2004-11-02 20:34:00.000000000 +0300 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2687,7 +2687,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2788,15 +2788,13 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); @@ -2853,7 +2851,6 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + + +#endif /* _LINUX_EXT3_EXTENTS */ -+ Index: linux-2.4.21-suse2/include/linux/ext3_fs_i.h =================================================================== --- linux-2.4.21-suse2.orig/include/linux/ext3_fs_i.h 2004-11-02 20:31:37.000000000 +0300 diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch index 8e84625..571fb0f 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch @@ -179,9 +179,9 @@ Index: linux-2.4.24/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2577,7 +2577,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h =================================================================== --- linux-2.4.24.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 +++ linux-2.4.24/include/linux/ext3_extents.h 2004-11-02 20:32:17.000000000 +0300 -@@ -0,0 +1,263 @@ +@@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2675,7 +2675,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2776,15 +2776,13 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch index d77d9a7..125f747 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.29.patch @@ -179,9 +179,9 @@ Index: linux-2.4.29/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -2578,7 +2578,7 @@ Index: linux-2.4.29/include/linux/ext3_extents.h =================================================================== --- linux-2.4.29.orig/include/linux/ext3_extents.h 2005-05-03 16:52:08.724069800 +0300 +++ linux-2.4.29/include/linux/ext3_extents.h 2005-05-03 16:52:08.819055360 +0300 -@@ -0,0 +1,263 @@ +@@ -0,0 +1,261 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2676,7 +2676,7 @@ Index: linux-2.4.29/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2777,15 +2777,13 @@ Index: linux-2.4.29/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch index 657ecf4..b6439e6 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.12.patch @@ -2,7 +2,7 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c =================================================================== --- linux-2.6.12-rc6.orig/fs/ext3/extents.c 2005-06-14 16:31:25.756503133 +0200 +++ linux-2.6.12-rc6/fs/ext3/extents.c 2005-06-14 16:31:25.836581257 +0200 -@@ -0,0 +1,2347 @@ +@@ -0,0 +1,2353 @@ +/* + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -176,9 +176,9 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -448,8 +448,12 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) ++ if (ext3_ext_check_header(eh)) { ++ /* don't free previously allocated path ++ * -- caller should take care */ ++ path = NULL; + goto err; ++ } + + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); @@ -506,8 +510,10 @@ Index: linux-2.6.12-rc6/fs/ext3/extents.c + +err: + printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } + return ERR_PTR(-EIO); +} + @@ -2644,7 +2650,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h =================================================================== --- linux-2.6.12-rc6.orig/include/linux/ext3_extents.h 2005-06-14 16:31:25.780917195 +0200 +++ linux-2.6.12-rc6/include/linux/ext3_extents.h 2005-06-14 16:31:25.932284381 +0200 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2742,7 +2748,7 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2843,15 +2849,13 @@ Index: linux-2.6.12-rc6/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch index 0ee8d28..9e78214 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -3,7 +3,7 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== --- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2005-02-17 22:07:57.023609040 +0300 +++ linux-2.6.5-sles9/fs/ext3/extents.c 2005-02-23 01:02:37.396435640 +0300 -@@ -0,0 +1,2349 @@ +@@ -0,0 +1,2355 @@ +/* + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -177,9 +177,9 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -449,8 +449,12 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) ++ if (ext3_ext_check_header(eh)) { ++ /* don't free previously allocated path ++ * -- caller should take care */ ++ path = NULL; + goto err; ++ } + + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); @@ -507,8 +511,10 @@ Index: linux-2.6.5-sles9/fs/ext3/extents.c + +err: + printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } + return ERR_PTR(-EIO); +} + @@ -2634,7 +2640,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h =================================================================== --- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2005-02-17 22:07:57.023609040 +0300 +++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2005-02-23 01:02:37.416432600 +0300 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2732,7 +2738,7 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2833,15 +2839,13 @@ Index: linux-2.6.5-sles9/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch index 56fe653..bd95c54 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.9-rhel4.patch @@ -2,7 +2,7 @@ Index: linux-stage/fs/ext3/extents.c =================================================================== --- linux-stage.orig/fs/ext3/extents.c 2005-02-25 15:33:48.890198160 +0200 +++ linux-stage/fs/ext3/extents.c 2005-02-25 15:33:48.917194056 +0200 -@@ -0,0 +1,2347 @@ +@@ -0,0 +1,2353 @@ +/* + * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -176,9 +176,9 @@ Index: linux-stage/fs/ext3/extents.c + +static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) +{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; ++ struct ext3_extent_header *neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation = ((EXT_FLAGS(neh) & ~EXT_FLAGS_CLR_UNKNOWN) << 24) | ++ (EXT_GENERATION(neh) + 1); +} + +static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) @@ -448,8 +448,12 @@ Index: linux-stage/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); -+ if (ext3_ext_check_header(eh)) ++ if (ext3_ext_check_header(eh)) { ++ /* don't free previously allocated path ++ * -- caller should take care */ ++ path = NULL; + goto err; ++ } + + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); @@ -506,8 +510,10 @@ Index: linux-stage/fs/ext3/extents.c + +err: + printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); -+ ext3_ext_drop_refs(path); -+ kfree(path); ++ if (path) { ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ } + return ERR_PTR(-EIO); +} + @@ -2629,7 +2635,7 @@ Index: linux-stage/include/linux/ext3_extents.h =================================================================== --- linux-stage.orig/include/linux/ext3_extents.h 2005-02-25 15:33:48.891198008 +0200 +++ linux-stage/include/linux/ext3_extents.h 2005-02-25 15:33:48.944189952 +0200 -@@ -0,0 +1,264 @@ +@@ -0,0 +1,262 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2727,7 +2733,7 @@ Index: linux-stage/include/linux/ext3_extents.h + __u16 eh_entries; /* number of valid entries */ + __u16 eh_max; /* capacity of store in entries */ + __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ ++ __u32 eh_generation; /* flags(8 bits) | generation of the tree */ +}; + +#define EXT3_EXT_MAGIC 0xf30a @@ -2828,15 +2834,13 @@ Index: linux-stage/include/linux/ext3_extents.h + (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) +#define EXT_MAX_INDEX(__hdr__) \ + (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_GENERATION(__hdr__) ((__hdr__)->eh_generation & 0x00ffffff) ++#define EXT_FLAGS(__hdr__) ((__hdr__)->eh_generation >> 24) ++#define EXT_FLAGS_CLR_UNKNOWN 0x7 /* Flags cleared on modification */ + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++#define EXT_BLOCK_HDR(__bh__) ((struct ext3_extent_header *)(__bh__)->b_data) ++#define EXT_ROOT_HDR(__tree__) ((struct ext3_extent_header *)(__tree__)->root) ++#define EXT_DEPTH(__tree__) (EXT_ROOT_HDR(__tree__)->eh_depth) + + +#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index 1d8a4af..2a64875 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -2570,7 +2570,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c + int freed; + + sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC)) ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_old(handle, inode, block, count); + else { + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch index 0c2f445..70f4f8a 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch @@ -2565,7 +2565,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c + int freed; + + sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC)) ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_sb(handle, sb, block, count, &freed); + else + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch index 5ff3d3b..01e7387 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch @@ -2584,7 +2584,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c + int freed; + + sb = inode->i_sb; -+ if (!test_opt(sb, MBALLOC)) ++ if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info) + ext3_free_blocks_sb(handle, sb, block, count, &freed); + else + ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed); diff --git a/lustre/kernel_patches/patches/iallocsem_consistency.patch b/lustre/kernel_patches/patches/iallocsem_consistency.patch new file mode 100644 index 0000000..916ba88 --- /dev/null +++ b/lustre/kernel_patches/patches/iallocsem_consistency.patch @@ -0,0 +1,48 @@ +Index: linux-2.6.9/fs/attr.c +=================================================================== +--- linux-2.6.9/fs.orig/attr.c 2006-03-10 17:20:39.000000000 +0200 ++++ linux-2.6.9/fs/attr.c 2006-04-09 01:21:44.000000000 +0300 +@@ -177,6 +177,9 @@ + if (!attr->ia_valid) + return 0; + ++ if (ia_valid & ATTR_SIZE) ++ down_write(&dentry->d_inode->i_alloc_sem); ++ + if (inode->i_op && inode->i_op->setattr) { + audit_notify_watch(inode, MAY_WRITE); + error = security_inode_setattr(dentry, attr); +@@ -194,6 +197,10 @@ + error = inode_setattr(inode, attr); + } + } ++ ++ if (ia_valid & ATTR_SIZE) ++ up_write(&dentry->d_inode->i_alloc_sem); ++ + if (!error) { + unsigned long dn_mask = setattr_mask(ia_valid); + if (dn_mask) +Index: linux-2.6.9/fs/open.c +=================================================================== +--- linux-2.6.9/fs.orig/open.c 2006-04-09 01:18:08.000000000 +0300 ++++ linux-2.6.9/fs/open.c 2006-04-09 01:22:29.000000000 +0300 +@@ -205,16 +205,16 @@ + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + down(&dentry->d_inode->i_sem); +- down_write(&dentry->d_inode->i_alloc_sem); + if (called_from_open) + newattrs.ia_valid |= ATTR_FROM_OPEN; + if (op->setattr_raw) { + newattrs.ia_valid |= ATTR_RAW; + newattrs.ia_ctime = CURRENT_TIME; ++ down_write(&dentry->d_inode->i_alloc_sem); + err = op->setattr_raw(dentry->d_inode, &newattrs); ++ up_write(&dentry->d_inode->i_alloc_sem); + } else + err = notify_change(dentry, &newattrs); +- up_write(&dentry->d_inode->i_alloc_sem); + up(&dentry->d_inode->i_sem); + return err; + } diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-fc3.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-fc3.patch index 47c152c..c75d7e8 100644 --- a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-fc3.patch +++ b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6-fc3.patch @@ -1,8 +1,8 @@ -Index: uml/fs/cifs/dir.c +Index: linux-2.6.10/fs/cifs/dir.c =================================================================== ---- uml.orig/fs/cifs/dir.c 2004-12-24 16:35:01.000000000 -0500 -+++ uml/fs/cifs/dir.c 2005-04-13 23:43:03.681625568 -0400 -@@ -199,23 +199,23 @@ +--- linux-2.6.10.orig/fs/cifs/dir.c ++++ linux-2.6.10/fs/cifs/dir.c +@@ -199,23 +199,23 @@ cifs_create(struct inode *inode, struct } if(nd) { @@ -32,11 +32,11 @@ Index: uml/fs/cifs/dir.c disposition = FILE_OPEN_IF; else { cFYI(1,("Create flag not set in create function")); -Index: uml/fs/nfs/nfs4proc.c +Index: linux-2.6.10/fs/nfs/nfs4proc.c =================================================================== ---- uml.orig/fs/nfs/nfs4proc.c 2004-12-24 16:35:23.000000000 -0500 -+++ uml/fs/nfs/nfs4proc.c 2005-04-13 23:43:26.409770503 -0400 -@@ -775,17 +775,17 @@ +--- linux-2.6.10.orig/fs/nfs/nfs4proc.c ++++ linux-2.6.10/fs/nfs/nfs4proc.c +@@ -775,17 +775,17 @@ nfs4_atomic_open(struct inode *dir, stru struct nfs4_state *state; if (nd->flags & LOOKUP_CREATE) { @@ -57,11 +57,20 @@ Index: uml/fs/nfs/nfs4proc.c put_rpccred(cred); if (IS_ERR(state)) return (struct inode *)state; -Index: uml/fs/nfs/dir.c +Index: linux-2.6.10/fs/nfs/dir.c =================================================================== ---- uml.orig/fs/nfs/dir.c 2005-04-13 23:42:21.792883770 -0400 -+++ uml/fs/nfs/dir.c 2005-04-13 23:43:03.685625066 -0400 -@@ -791,7 +791,7 @@ +--- linux-2.6.10.orig/fs/nfs/dir.c ++++ linux-2.6.10/fs/nfs/dir.c +@@ -718,7 +718,7 @@ int nfs_is_exclusive_create(struct inode + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; +- return (nd->intent.open.flags & O_EXCL) != 0; ++ return (nd->intent.it_flags & O_EXCL) != 0; + } + + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +@@ -791,7 +791,7 @@ static int is_atomic_open(struct inode * if (nd->flags & LOOKUP_DIRECTORY) return 0; /* Are we trying to write to a read only partition? */ @@ -70,7 +79,7 @@ Index: uml/fs/nfs/dir.c return 0; return 1; } -@@ -812,7 +812,7 @@ +@@ -812,7 +812,7 @@ static struct dentry *nfs_atomic_lookup( dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ @@ -79,7 +88,7 @@ Index: uml/fs/nfs/dir.c goto no_entry; /* Open the file on the server */ -@@ -820,7 +820,7 @@ +@@ -820,7 +820,7 @@ static struct dentry *nfs_atomic_lookup( /* Revalidate parent directory attribute cache */ nfs_revalidate_inode(NFS_SERVER(dir), dir); @@ -88,7 +97,7 @@ Index: uml/fs/nfs/dir.c nfs_begin_data_update(dir); inode = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); -@@ -836,7 +836,7 @@ +@@ -836,7 +836,7 @@ static struct dentry *nfs_atomic_lookup( break; /* This turned out not to be a regular file */ case -ELOOP: @@ -97,7 +106,7 @@ Index: uml/fs/nfs/dir.c goto no_open; /* case -EISDIR: */ /* case -EINVAL: */ -@@ -875,7 +875,7 @@ +@@ -875,7 +875,7 @@ static int nfs_open_revalidate(struct de /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open; @@ -106,3 +115,13 @@ Index: uml/fs/nfs/dir.c /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open; +@@ -1043,7 +1043,8 @@ static int nfs_create(struct inode *dir, + attr.ia_valid = ATTR_MODE; + + if (nd && (nd->flags & LOOKUP_CREATE)) +- open_flags = nd->intent.open.flags; ++ open_flags = nd->intent.it_flags; ++ + + /* + * The 0 argument passed into the create function should one day diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6.12.6.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.12.6.patch new file mode 100644 index 0000000..a0245be --- /dev/null +++ b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.12.6.patch @@ -0,0 +1,459 @@ +diff -Nur linux-2.6.12.6-orig/include/linux/skbuff.h linux-2.6.12.6/include/linux/skbuff.h +--- linux-2.6.12.6-orig/include/linux/skbuff.h 2006-03-14 19:40:26.000000000 +0800 ++++ linux-2.6.12.6/include/linux/skbuff.h 2006-03-16 17:04:51.000000000 +0800 +@@ -128,6 +128,30 @@ + __u16 size; + }; + ++/* Support for callback when skb data has been released */ ++typedef struct zccd /* Zero Copy Callback Descriptor */ ++{ /* (embed as first member of custom struct) */ ++ atomic_t zccd_count; /* reference count */ ++ void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */ ++} zccd_t; ++ ++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *)) ++{ ++ atomic_set (&d->zccd_count, 1); ++ d->zccd_destructor = callback; ++} ++ ++static inline void zccd_get (zccd_t *d) /* take a reference */ ++{ ++ atomic_inc (&d->zccd_count); ++} ++ ++static inline void zccd_put (zccd_t *d) /* release a reference */ ++{ ++ if (atomic_dec_and_test (&d->zccd_count)) ++ (d->zccd_destructor)(d); ++} ++ + /* This data is invariant across clones and lives at + * the end of the header data, ie. at skb->end. + */ +@@ -137,6 +161,13 @@ + unsigned short tso_size; + unsigned short tso_segs; + struct sk_buff *frag_list; ++ zccd_t *zccd; /* zero copy descriptor */ ++ zccd_t *zccd2; /* 2nd zero copy descriptor */ ++ /* NB we expect zero-copy data to be at least 1 packet, so ++ * having 2 zccds means we don't unneccessarily split the packet ++ * where consecutive zero-copy sends abutt. ++ */ ++ + skb_frag_t frags[MAX_SKB_FRAGS]; + }; + +diff -Nur linux-2.6.12.6-orig/include/net/tcp.h linux-2.6.12.6/include/net/tcp.h +--- linux-2.6.12.6-orig/include/net/tcp.h 2005-06-18 03:48:29.000000000 +0800 ++++ linux-2.6.12.6/include/net/tcp.h 2006-03-16 17:05:02.000000000 +0800 +@@ -783,6 +783,9 @@ + extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size); + extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); ++extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, ++ int flags, zccd_t *zccd); ++ + + extern int tcp_ioctl(struct sock *sk, + int cmd, +@@ -879,6 +882,9 @@ + struct msghdr *msg, + size_t len, int nonblock, + int flags, int *addr_len); ++extern int tcp_recvpackets(struct sock *sk, ++ struct sk_buff_head *packets, ++ int len, int nonblock); + + extern int tcp_listen_start(struct sock *sk); + +diff -Nur linux-2.6.12.6-orig/net/core/dev.c linux-2.6.12.6/net/core/dev.c +--- linux-2.6.12.6-orig/net/core/dev.c 2005-06-18 03:48:29.000000000 +0800 ++++ linux-2.6.12.6/net/core/dev.c 2006-03-16 17:04:36.000000000 +0800 +@@ -1176,6 +1176,9 @@ + ninfo->tso_segs = skb_shinfo(skb)->tso_segs; + ninfo->nr_frags = 0; + ninfo->frag_list = NULL; ++ ninfo->zccd = NULL; /* copied data => no user zero copy descriptor */ ++ ninfo->zccd2 = NULL; ++ + + /* Offset between the two in bytes */ + offset = data - skb->head; +diff -Nur linux-2.6.12.6-orig/net/core/skbuff.c linux-2.6.12.6/net/core/skbuff.c +--- linux-2.6.12.6-orig/net/core/skbuff.c 2005-06-18 03:48:29.000000000 +0800 ++++ linux-2.6.12.6/net/core/skbuff.c 2006-03-16 17:04:41.000000000 +0800 +@@ -159,6 +159,9 @@ + skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->frag_list = NULL; ++ skb_shinfo(skb)->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */ ++ skb_shinfo(skb)->zccd2 = NULL; ++ + out: + return skb; + nodata: +@@ -247,6 +250,10 @@ + if (!skb->cloned || + !atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, + &skb_shinfo(skb)->dataref)) { ++ if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */ ++ zccd_put (skb_shinfo(skb)->zccd); /* release hold */ ++ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */ ++ zccd_put (skb_shinfo(skb)->zccd2); /* release hold */ + if (skb_shinfo(skb)->nr_frags) { + int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) +@@ -529,6 +536,14 @@ + n->data_len = skb->data_len; + n->len = skb->len; + ++ if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */ ++ zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */ ++ skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd; ++ ++ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */ ++ zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */ ++ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; ++ + if (skb_shinfo(skb)->nr_frags) { + int i; + +@@ -571,6 +586,9 @@ + u8 *data; + int size = nhead + (skb->end - skb->head) + ntail; + long off; ++ zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */ ++ zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */ ++ + + if (skb_shared(skb)) + BUG(); +@@ -592,6 +610,11 @@ + if (skb_shinfo(skb)->frag_list) + skb_clone_fraglist(skb); + ++ if (zccd != NULL) /* user zero copy descriptor? */ ++ zccd_get (zccd); /* extra ref (pages are shared) */ ++ if (zccd2 != NULL) /* 2nd user zero copy descriptor? */ ++ zccd_get (zccd2); /* extra ref (pages are shared) */ ++ + skb_release_data(skb); + + off = (data + nhead) - skb->head; +@@ -606,6 +629,8 @@ + skb->cloned = 0; + skb->nohdr = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); ++ skb_shinfo(skb)->zccd = zccd; ++ skb_shinfo(skb)->zccd2 = zccd2; + return 0; + + nodata: +diff -Nur linux-2.6.12.6-orig/net/ipv4/tcp.c linux-2.6.12.6/net/ipv4/tcp.c +--- linux-2.6.12.6-orig/net/ipv4/tcp.c 2005-06-18 03:48:29.000000000 +0800 ++++ linux-2.6.12.6/net/ipv4/tcp.c 2006-03-16 17:04:57.000000000 +0800 +@@ -630,8 +630,10 @@ + } + } + ++/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */ + static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, +- size_t psize, int flags) ++ size_t psize, int flags, zccd_t *zccd) ++ + { + struct tcp_sock *tp = tcp_sk(sk); + int mss_now; +@@ -678,6 +680,17 @@ + copy = size; + + i = skb_shinfo(skb)->nr_frags; ++ ++ if (zccd != NULL && /* this is a zcc I/O */ ++ skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */ ++ skb_shinfo(skb)->zccd2 != NULL && ++ skb_shinfo(skb)->zccd != zccd && /* not the same one */ ++ skb_shinfo(skb)->zccd2 != zccd) ++ { ++ tcp_mark_push (tp, skb); ++ goto new_segment; ++ } ++ + can_coalesce = skb_can_coalesce(skb, i, page, offset); + if (!can_coalesce && i >= MAX_SKB_FRAGS) { + tcp_mark_push(tp, skb); +@@ -694,6 +707,20 @@ + skb_fill_page_desc(skb, i, page, offset, copy); + } + ++ if (zccd != NULL && /* this is a zcc I/O */ ++ skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */ ++ skb_shinfo(skb)->zccd2 != zccd) ++ { ++ zccd_get (zccd); /* bump ref count */ ++ ++ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); ++ ++ if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */ ++ skb_shinfo(skb)->zccd = zccd; ++ else ++ skb_shinfo(skb)->zccd2 = zccd; ++ } ++ + skb->len += copy; + skb->data_len += copy; + skb->truesize += copy; +@@ -762,12 +789,37 @@ + + lock_sock(sk); + TCP_CHECK_TIMER(sk); +- res = do_tcp_sendpages(sk, &page, offset, size, flags); ++ res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL); ++ TCP_CHECK_TIMER(sk); ++ release_sock(sk); ++ return res; ++} ++ ++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, ++ int flags, zccd_t *zccd) ++{ ++ ssize_t res; ++ struct sock *sk = sock->sk; ++ ++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) ++ ++ if (!(sk->sk_route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */ ++ !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */ ++ BUG (); ++ ++#undef TCP_ZC_CSUM_FLAGS ++ ++ lock_sock(sk); ++ TCP_CHECK_TIMER(sk); ++ ++ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd); ++ + TCP_CHECK_TIMER(sk); + release_sock(sk); + return res; + } + ++ + #define TCP_PAGE(sk) (sk->sk_sndmsg_page) + #define TCP_OFF(sk) (sk->sk_sndmsg_off) + +@@ -1530,6 +1582,202 @@ + goto out; + } + ++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets, ++ int len, int nonblock) ++{ ++ struct tcp_sock *tp = tcp_sk(sk); ++ int copied; ++ long timeo; ++ ++ BUG_TRAP (len > 0); ++ /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/ ++ ++ lock_sock(sk); ++ ++ TCP_CHECK_TIMER(sk); ++ ++ copied = -ENOTCONN; ++ if (sk->sk_state == TCP_LISTEN) ++ goto out; ++ ++ copied = 0; ++ timeo = sock_rcvtimeo(sk, nonblock); ++ ++ do { ++ struct sk_buff * skb; ++ u32 offset; ++ unsigned long used; ++ int exhausted; ++ int eaten; ++ ++ /* Are we at urgent data? Stop if we have read anything. */ ++ if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq) ++ break; ++ ++ /* We need to check signals first, to get correct SIGURG ++ * handling. FIXME: Need to check this doesnt impact 1003.1g ++ * and move it down to the bottom of the loop ++ */ ++ if (signal_pending(current)) { ++ if (copied) ++ break; ++ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; ++ break; ++ } ++ ++ /* Next get a buffer. */ ++ ++ skb = skb_peek(&sk->sk_receive_queue); ++ ++ if (skb == NULL) /* nothing ready */ ++ { ++ if (copied) { ++ if (sk->sk_err || ++ sk->sk_state == TCP_CLOSE || ++ (sk->sk_shutdown & RCV_SHUTDOWN) || ++ !timeo || ++ (0)) ++ break; ++ } else { ++ if (sock_flag(sk, SOCK_DONE)) ++ break; ++ ++ if (sk->sk_err) { ++ copied = sock_error(sk); ++ break; ++ } ++ ++ if (sk->sk_shutdown & RCV_SHUTDOWN) ++ break; ++ ++ if (sk->sk_state == TCP_CLOSE) { ++ if (!(sock_flag(sk, SOCK_DONE))) { ++ /* This occurs when user tries to read ++ * from never connected socket. ++ */ ++ copied = -ENOTCONN; ++ break; ++ } ++ break; ++ } ++ ++ if (!timeo) { ++ copied = -EAGAIN; ++ break; ++ } ++ } ++ ++ cleanup_rbuf(sk, copied); ++ sk_wait_data(sk, &timeo); ++ continue; ++ } ++ ++ BUG_TRAP (atomic_read (&skb->users) == 1); ++ ++ exhausted = eaten = 0; ++ ++ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; ++ if (skb->h.th->syn) ++ offset--; ++ ++ used = skb->len - offset; ++ ++ if (tp->urg_data) { ++ u32 urg_offset = tp->urg_seq - tp->copied_seq; ++ if (urg_offset < used) { ++ if (!urg_offset) { /* at urgent date */ ++ if (!(sock_flag(sk, SOCK_URGINLINE))) { ++ tp->copied_seq++; /* discard the single byte of urgent data */ ++ offset++; ++ used--; ++ } ++ } else /* truncate read */ ++ used = urg_offset; ++ } ++ } ++ ++ BUG_TRAP (used >= 0); ++ if (len < used) ++ used = len; ++ ++ if (used == 0) ++ exhausted = 1; ++ else ++ { ++ if (skb_is_nonlinear (skb)) ++ { ++ int rc = skb_linearize (skb, GFP_KERNEL); ++ ++ printk ("tcp_recvpackets(): linearising: %d\n", rc); ++ ++ if (rc) ++ { ++ if (!copied) ++ copied = rc; ++ break; ++ } ++ } ++ ++ if ((offset + used) == skb->len) /* consuming the whole packet */ ++ { ++ __skb_unlink (skb, &sk->sk_receive_queue); ++ dst_release (skb->dst); ++ skb_orphan (skb); ++ __skb_pull (skb, offset); ++ __skb_queue_tail (packets, skb); ++ exhausted = eaten = 1; ++ } ++ else /* consuming only part of the packet */ ++ { ++ struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL); ++ ++ if (skb2 == NULL) ++ { ++ if (!copied) ++ copied = -ENOMEM; ++ break; ++ } ++ ++ dst_release (skb2->dst); ++ __skb_pull (skb2, offset); ++ __skb_trim (skb2, used); ++ __skb_queue_tail (packets, skb2); ++ } ++ ++ tp->copied_seq += used; ++ copied += used; ++ len -= used; ++ } ++ ++ if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) { ++ tp->urg_data = 0; ++ tcp_fast_path_check(sk, tp); ++ } ++ ++ if (!exhausted) ++ continue; ++ ++ if (skb->h.th->fin) ++ { ++ tp->copied_seq++; ++ if (!eaten) ++ sk_eat_skb (sk, skb); ++ break; ++ } ++ ++ if (!eaten) ++ sk_eat_skb (sk, skb); ++ ++ } while (len > 0); ++ ++ out: ++ /* Clean up data we have read: This will do ACK frames. */ ++ cleanup_rbuf(sk, copied); ++ TCP_CHECK_TIMER(sk); ++ release_sock(sk); ++ return copied; ++} ++ + /* + * State processing on a close. This implements the state shift for + * sending our FIN frame. Note that we only send a FIN for some +@@ -2380,6 +2628,8 @@ + EXPORT_SYMBOL(tcp_recvmsg); + EXPORT_SYMBOL(tcp_sendmsg); + EXPORT_SYMBOL(tcp_sendpage); ++EXPORT_SYMBOL(tcp_sendpage_zccd); ++EXPORT_SYMBOL(tcp_recvpackets); + EXPORT_SYMBOL(tcp_setsockopt); + EXPORT_SYMBOL(tcp_shutdown); + EXPORT_SYMBOL(tcp_statistics); diff --git a/lustre/kernel_patches/patches/tcp-zero-copy-2.6.5-7.244.patch b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.5-7.244.patch new file mode 100644 index 0000000..06baac2 --- /dev/null +++ b/lustre/kernel_patches/patches/tcp-zero-copy-2.6.5-7.244.patch @@ -0,0 +1,545 @@ +diff -Nur linux-2.6.5-7.244-orig/include/linux/skbuff.h linux-2.6.5-7.244/include/linux/skbuff.h +--- linux-2.6.5-7.244-orig/include/linux/skbuff.h 2005-12-13 07:50:31.000000000 +0800 ++++ linux-2.6.5-7.244/include/linux/skbuff.h 2006-03-13 16:31:30.000000000 +0800 +@@ -135,6 +135,30 @@ + __u16 size; + }; + ++/* Support for callback when skb data has been released */ ++typedef struct zccd /* Zero Copy Callback Descriptor */ ++{ /* (embed as first member of custom struct) */ ++ atomic_t zccd_count; /* reference count */ ++ void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */ ++} zccd_t; ++ ++static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *)) ++{ ++ atomic_set (&d->zccd_count, 1); ++ d->zccd_destructor = callback; ++} ++ ++static inline void zccd_get (zccd_t *d) /* take a reference */ ++{ ++ atomic_inc (&d->zccd_count); ++} ++ ++static inline void zccd_put (zccd_t *d) /* release a reference */ ++{ ++ if (atomic_dec_and_test (&d->zccd_count)) ++ (d->zccd_destructor)(d); ++} ++ + /* This data is invariant across clones and lives at + * the end of the header data, ie. at skb->end. + */ +@@ -144,6 +168,12 @@ + unsigned short tso_size; + unsigned short tso_segs; + struct sk_buff *frag_list; ++ zccd_t *zccd; /* zero copy descriptor */ ++ zccd_t *zccd2; /* 2nd zero copy descriptor */ ++ /* NB we expect zero-copy data to be at least 1 packet, so ++ * having 2 zccds means we don't unneccessarily split the packet ++ * where consecutive zero-copy sends abutt. ++ */ + skb_frag_t frags[MAX_SKB_FRAGS]; + }; + +diff -Nur linux-2.6.5-7.244-orig/include/net/sock.h linux-2.6.5-7.244/include/net/sock.h +--- linux-2.6.5-7.244-orig/include/net/sock.h 2005-12-13 07:50:33.000000000 +0800 ++++ linux-2.6.5-7.244/include/net/sock.h 2006-03-13 16:32:36.000000000 +0800 +@@ -413,6 +413,18 @@ + (__skb)->next = NULL; \ + } while(0) + ++#define sk_wait_event(__sk, __timeo, __condition) \ ++({ int rc; \ ++ release_sock(__sk); \ ++ rc = __condition; \ ++ if (!rc) { \ ++ *(__timeo) = schedule_timeout(*(__timeo)); \ ++ rc = __condition; \ ++ } \ ++ lock_sock(__sk); \ ++ rc; \ ++}) ++ + /* IP protocol blocks we attach to sockets. + * socket layer -> transport layer interface + * transport -> network interface is defined by struct inet_proto +@@ -1037,6 +1049,20 @@ + sk->sk_stamp = *stamp; + } + ++/** ++ * sk_eat_skb - Release a skb if it is no longer needed ++ * @sk - socket to eat this skb from ++ * @skb - socket buffer to eat ++ * ++ * This routine must be called with interrupts disabled or with the socket ++ * locked so that the sk_buff queue operation is ok. ++*/ ++static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb) ++{ ++ __skb_unlink(skb, &sk->sk_receive_queue); ++ __kfree_skb(skb); ++} ++ + extern atomic_t netstamp_needed; + extern void sock_enable_timestamp(struct sock *sk); + extern void sock_disable_timestamp(struct sock *sk); +diff -Nur linux-2.6.5-7.244-orig/include/net/tcp.h linux-2.6.5-7.244/include/net/tcp.h +--- linux-2.6.5-7.244-orig/include/net/tcp.h 2005-12-13 07:50:21.000000000 +0800 ++++ linux-2.6.5-7.244/include/net/tcp.h 2006-03-13 16:31:37.000000000 +0800 +@@ -764,6 +764,9 @@ + extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t size); + extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); ++extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, ++ int flags, zccd_t *zccd); ++ + + extern int tcp_ioctl(struct sock *sk, + int cmd, +@@ -861,6 +864,10 @@ + size_t len, int nonblock, + int flags, int *addr_len); + ++extern int tcp_recvpackets(struct sock *sk, ++ struct sk_buff_head *packets, ++ int len, int nonblock); ++ + extern int tcp_listen_start(struct sock *sk); + + extern void tcp_parse_options(struct sk_buff *skb, +diff -Nur linux-2.6.5-7.244-orig/net/core/dev.c linux-2.6.5-7.244/net/core/dev.c +--- linux-2.6.5-7.244-orig/net/core/dev.c 2005-12-13 07:50:38.000000000 +0800 ++++ linux-2.6.5-7.244/net/core/dev.c 2006-03-13 16:31:56.000000000 +0800 +@@ -1322,6 +1322,9 @@ + ninfo->tso_segs = skb_shinfo(skb)->tso_segs; + ninfo->nr_frags = 0; + ninfo->frag_list = NULL; ++ ninfo->zccd = NULL; /* copied data => no user zero copy descriptor */ ++ ninfo->zccd2 = NULL; ++ + + /* Offset between the two in bytes */ + offset = data - skb->head; +diff -Nur linux-2.6.5-7.244-orig/net/core/skbuff.c linux-2.6.5-7.244/net/core/skbuff.c +--- linux-2.6.5-7.244-orig/net/core/skbuff.c 2004-04-04 11:37:37.000000000 +0800 ++++ linux-2.6.5-7.244/net/core/skbuff.c 2006-03-13 16:31:46.000000000 +0800 +@@ -152,6 +152,9 @@ + skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->frag_list = NULL; ++ skb_shinfo(skb)->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */ ++ skb_shinfo(skb)->zccd2 = NULL; ++ + out: + return skb; + nodata: +@@ -186,6 +189,10 @@ + { + if (!skb->cloned || + atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { ++ if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */ ++ zccd_put (skb_shinfo(skb)->zccd); /* release hold */ ++ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */ ++ zccd_put (skb_shinfo(skb)->zccd2); /* release hold */ + if (skb_shinfo(skb)->nr_frags) { + int i; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) +@@ -449,6 +456,14 @@ + n->data_len = skb->data_len; + n->len = skb->len; + ++ if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */ ++ zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */ ++ skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd; ++ ++ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */ ++ zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */ ++ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; ++ + if (skb_shinfo(skb)->nr_frags) { + int i; + +@@ -493,6 +508,9 @@ + u8 *data; + int size = nhead + (skb->end - skb->head) + ntail; + long off; ++ zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */ ++ zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */ ++ + + if (skb_shared(skb)) + BUG(); +@@ -514,6 +532,11 @@ + if (skb_shinfo(skb)->frag_list) + skb_clone_fraglist(skb); + ++ if (zccd != NULL) /* user zero copy descriptor? */ ++ zccd_get (zccd); /* extra ref (pages are shared) */ ++ if (zccd2 != NULL) /* 2nd user zero copy descriptor? */ ++ zccd_get (zccd2); /* extra ref (pages are shared) */ ++ + skb_release_data(skb); + + off = (data + nhead) - skb->head; +@@ -527,6 +550,9 @@ + skb->nh.raw += off; + skb->cloned = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); ++ skb_shinfo(skb)->zccd = zccd; ++ skb_shinfo(skb)->zccd2 = zccd2; ++ + return 0; + + nodata: +diff -Nur linux-2.6.5-7.244-orig/net/core/sock.c linux-2.6.5-7.244/net/core/sock.c +--- linux-2.6.5-7.244-orig/net/core/sock.c 2005-12-13 07:50:10.000000000 +0800 ++++ linux-2.6.5-7.244/net/core/sock.c 2006-03-13 16:32:44.000000000 +0800 +@@ -917,6 +917,31 @@ + } while((skb = sk->sk_backlog.head) != NULL); + } + ++/** ++ * sk_wait_data - wait for data to arrive at sk_receive_queue ++ * sk - sock to wait on ++ * timeo - for how long ++ * ++ * Now socket state including sk->sk_err is changed only under lock, ++ * hence we may omit checks after joining wait queue. ++ * We check receive queue before schedule() only as optimization; ++ * it is very likely that release_sock() added new data. ++ */ ++int sk_wait_data(struct sock *sk, long *timeo) ++{ ++ int rc; ++ DEFINE_WAIT(wait); ++ ++ prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); ++ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); ++ rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); ++ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); ++ finish_wait(sk->sk_sleep, &wait); ++ return rc; ++} ++ ++EXPORT_SYMBOL(sk_wait_data); ++ + /* + * Set of default routines for initialising struct proto_ops when + * the protocol does not support a particular function. In certain +diff -Nur linux-2.6.5-7.244-orig/net/ipv4/tcp.c linux-2.6.5-7.244/net/ipv4/tcp.c +--- linux-2.6.5-7.244-orig/net/ipv4/tcp.c 2005-12-13 07:50:28.000000000 +0800 ++++ linux-2.6.5-7.244/net/ipv4/tcp.c 2006-03-13 16:32:04.000000000 +0800 +@@ -799,7 +799,7 @@ + } + + ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, +- size_t psize, int flags); ++ size_t psize, int flags,zccd_t *zccd); + + static inline int can_coalesce(struct sk_buff *skb, int i, struct page *page, + int off) +@@ -881,8 +881,9 @@ + return err; + } + ++/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */ + ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, +- size_t psize, int flags) ++ size_t psize, int flags,zccd_t *zccd) + { + struct tcp_opt *tp = tcp_sk(sk); + int mss_now; +@@ -929,6 +930,17 @@ + copy = size; + + i = skb_shinfo(skb)->nr_frags; ++ ++ if (zccd != NULL && /* this is a zcc I/O */ ++ skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */ ++ skb_shinfo(skb)->zccd2 != NULL && ++ skb_shinfo(skb)->zccd != zccd && /* not the same one */ ++ skb_shinfo(skb)->zccd2 != zccd) ++ { ++ tcp_mark_push (tp, skb); ++ goto new_segment; ++ } ++ + if (can_coalesce(skb, i, page, offset)) { + skb_shinfo(skb)->frags[i - 1].size += copy; + } else if (i < MAX_SKB_FRAGS) { +@@ -939,6 +951,20 @@ + goto new_segment; + } + ++ if (zccd != NULL && /* this is a zcc I/O */ ++ skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */ ++ skb_shinfo(skb)->zccd2 != zccd) ++ { ++ zccd_get (zccd); /* bump ref count */ ++ ++ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); ++ ++ if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */ ++ skb_shinfo(skb)->zccd = zccd; ++ else ++ skb_shinfo(skb)->zccd2 = zccd; ++ } ++ + skb->len += copy; + skb->data_len += copy; + skb->ip_summed = CHECKSUM_HW; +@@ -1003,12 +1029,36 @@ + + lock_sock(sk); + TCP_CHECK_TIMER(sk); +- res = do_tcp_sendpages(sk, &page, offset, size, flags); ++ res = do_tcp_sendpages(sk, &page, offset, size, flags,NULL); + TCP_CHECK_TIMER(sk); + release_sock(sk); + return res; + } + ++ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, ++ int flags, zccd_t *zccd) ++{ ++ ssize_t res; ++ struct sock *sk = sock->sk; ++ ++#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) ++ ++ if (!(sk->sk_route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */ ++ !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */ ++ BUG (); ++ ++#undef TCP_ZC_CSUM_FLAGS ++ ++ lock_sock(sk); ++ TCP_CHECK_TIMER(sk); ++ ++ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd); ++ TCP_CHECK_TIMER(sk); ++ release_sock(sk); ++ return res; ++} ++ ++ + #define TCP_PAGE(sk) (inet_sk(sk)->sndmsg_page) + #define TCP_OFF(sk) (inet_sk(sk)->sndmsg_off) + +@@ -1849,6 +1899,202 @@ + err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); + goto out; + } ++ ++int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets, ++int len, int nonblock) ++{ ++ struct tcp_opt *tp = tcp_sk(sk); ++ int copied; ++ long timeo; ++ ++ BUG_TRAP (len > 0); ++ /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/ ++ ++ lock_sock(sk); ++ ++ TCP_CHECK_TIMER(sk); ++ ++ copied = -ENOTCONN; ++ if (sk->sk_state == TCP_LISTEN) ++ goto out; ++ ++ copied = 0; ++ timeo = sock_rcvtimeo(sk, nonblock); ++ ++ do { ++ struct sk_buff * skb; ++ u32 offset; ++ unsigned long used; ++ int exhausted; ++ int eaten; ++ ++ /* Are we at urgent data? Stop if we have read anything. */ ++ if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq) ++ break; ++ ++ /* We need to check signals first, to get correct SIGURG ++ * handling. FIXME: Need to check this doesnt impact 1003.1g ++ * and move it down to the bottom of the loop ++ */ ++ if (signal_pending(current)) { ++ if (copied) ++ break; ++ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; ++ break; ++ } ++ ++ /* Next get a buffer. */ ++ ++ skb = skb_peek(&sk->sk_receive_queue); ++ ++ if (skb == NULL) /* nothing ready */ ++ { ++ if (copied) { ++ if (sk->sk_err || ++ sk->sk_state == TCP_CLOSE || ++ (sk->sk_shutdown & RCV_SHUTDOWN) || ++ !timeo || ++ (0)) ++ break; ++ } else { ++ if (sock_flag(sk, SOCK_DONE)) ++ break; ++ ++ if (sk->sk_err) { ++ copied = sock_error(sk); ++ break; ++ } ++ ++ if (sk->sk_shutdown & RCV_SHUTDOWN) ++ break; ++ ++ if (sk->sk_state == TCP_CLOSE) { ++ if (!(sock_flag(sk, SOCK_DONE))) { ++ /* This occurs when user tries to read ++ * from never connected socket. ++ */ ++ copied = -ENOTCONN; ++ break; ++ } ++ break; ++ } ++ ++ if (!timeo) { ++ copied = -EAGAIN; ++ break; ++ } ++ } ++ ++ cleanup_rbuf(sk, copied); ++ sk_wait_data(sk, &timeo); ++ continue; ++ } ++ ++ BUG_TRAP (atomic_read (&skb->users) == 1); ++ ++ exhausted = eaten = 0; ++ ++ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; ++ if (skb->h.th->syn) ++ offset--; ++ ++ used = skb->len - offset; ++ ++ if (tp->urg_data) { ++ u32 urg_offset = tp->urg_seq - tp->copied_seq; ++ if (urg_offset < used) { ++ if (!urg_offset) { /* at urgent date */ ++ if (!(sock_flag(sk, SOCK_URGINLINE))) { ++ tp->copied_seq++; /* discard the single byte of urgent data */ ++ offset++; ++ used--; ++ } ++ } else /* truncate read */ ++ used = urg_offset; ++ } ++ } ++ ++ BUG_TRAP (used >= 0); ++ if (len < used) ++ used = len; ++ ++ if (used == 0) ++ exhausted = 1; ++ else ++ { ++ if (skb_is_nonlinear (skb)) ++ { ++ int rc = skb_linearize (skb, GFP_KERNEL); ++ ++ printk ("tcp_recvpackets(): linearising: %d\n", rc); ++ ++ if (rc) ++ { ++ if (!copied) ++ copied = rc; ++ break; ++ } ++ } ++ ++ if ((offset + used) == skb->len) /* consuming the whole packet */ ++ { ++ __skb_unlink (skb, &sk->sk_receive_queue); ++ dst_release (skb->dst); ++ skb_orphan (skb); ++ __skb_pull (skb, offset); ++ __skb_queue_tail (packets, skb); ++ exhausted = eaten = 1; ++ } ++ else /* consuming only part of the packet */ ++ { ++ struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL); ++ ++ if (skb2 == NULL) ++ { ++ if (!copied) ++ copied = -ENOMEM; ++ break; ++ } ++ ++ dst_release (skb2->dst); ++ __skb_pull (skb2, offset); ++ __skb_trim (skb2, used); ++ __skb_queue_tail (packets, skb2); ++ } ++ ++ tp->copied_seq += used; ++ copied += used; ++ len -= used; ++ } ++ ++ if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) { ++ tp->urg_data = 0; ++ tcp_fast_path_check(sk, tp); ++ } ++ ++ if (!exhausted) ++ continue; ++ ++ if (skb->h.th->fin) ++ { ++ tp->copied_seq++; ++ if (!eaten) ++ sk_eat_skb (sk, skb); ++ break; ++ } ++ ++ if (!eaten) ++ sk_eat_skb (sk, skb); ++ ++ } while (len > 0); ++ ++ out: ++ /* Clean up data we have read: This will do ACK frames. */ ++ cleanup_rbuf(sk, copied); ++ TCP_CHECK_TIMER(sk); ++ release_sock(sk); ++ return copied; ++} + + /* + * State processing on a close. This implements the state shift for +@@ -2872,6 +3118,8 @@ + EXPORT_SYMBOL(tcp_recvmsg); + EXPORT_SYMBOL(tcp_sendmsg); + EXPORT_SYMBOL(tcp_sendpage); ++EXPORT_SYMBOL(tcp_sendpage_zccd); ++EXPORT_SYMBOL(tcp_recvpackets); + EXPORT_SYMBOL(tcp_setsockopt); + EXPORT_SYMBOL(tcp_shutdown); + EXPORT_SYMBOL(tcp_sockets_allocated); diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch b/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch new file mode 100644 index 0000000..694d097 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_intent-2.6-fc3.patch @@ -0,0 +1,773 @@ +Index: linux-2.6.10/fs/exec.c +=================================================================== +--- linux-2.6.10.orig/fs/exec.c ++++ linux-2.6.10/fs/exec.c +@@ -124,9 +124,10 @@ asmlinkage long sys_uselib(const char __ + struct file * file; + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_OPEN); + +- nd.intent.open.flags = FMODE_READ; +- error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); ++ nd.intent.it_flags = FMODE_READ|FMODE_EXEC; ++ error = __user_walk_it(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + if (error) + goto out; + +@@ -138,7 +139,7 @@ asmlinkage long sys_uselib(const char __ + if (error) + goto exit; + +- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); + error = PTR_ERR(file); + if (IS_ERR(file)) + goto out; +@@ -485,8 +486,9 @@ struct file *open_exec(const char *name) + int err; + struct file *file; + +- nd.intent.open.flags = FMODE_READ; +- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); ++ intent_init(&nd.intent, IT_OPEN); ++ nd.intent.it_flags = FMODE_READ|FMODE_EXEC; ++ err = path_lookup(name, LOOKUP_FOLLOW, &nd); + file = ERR_PTR(err); + + if (!err) { +@@ -499,7 +501,7 @@ struct file *open_exec(const char *name) + err = -EACCES; + file = ERR_PTR(err); + if (!err) { +- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); + if (!IS_ERR(file)) { + err = deny_write_access(file); + if (err) { +Index: linux-2.6.10/fs/inode.c +=================================================================== +--- linux-2.6.10.orig/fs/inode.c ++++ linux-2.6.10/fs/inode.c +@@ -233,6 +233,7 @@ void __iget(struct inode * inode) + inodes_stat.nr_unused--; + } + ++EXPORT_SYMBOL(__iget); + /** + * clear_inode - clear an inode + * @inode: inode to clear +Index: linux-2.6.10/fs/namei.c +=================================================================== +--- linux-2.6.10.orig/fs/namei.c ++++ linux-2.6.10/fs/namei.c +@@ -288,8 +288,19 @@ int deny_write_access(struct file * file + return 0; + } + ++void intent_release(struct lookup_intent *it) ++{ ++ if (!it) ++ return; ++ if (it->it_magic != INTENT_MAGIC) ++ return; ++ if (it->it_op_release) ++ it->it_op_release(it); ++} ++ + void path_release(struct nameidata *nd) + { ++ intent_release(&nd->intent); + dput(nd->dentry); + mntput(nd->mnt); + } +@@ -379,7 +390,10 @@ static struct dentry * real_lookup(struc + { + struct dentry * result; + struct inode *dir = parent->d_inode; ++ int counter = 0; + ++again: ++ counter++; + down(&dir->i_sem); + /* + * First re-do the cached lookup just in case it was created +@@ -418,7 +432,10 @@ static struct dentry * real_lookup(struc + if (result->d_op && result->d_op->d_revalidate) { + if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { + dput(result); +- result = ERR_PTR(-ENOENT); ++ if (counter > 10) ++ result = ERR_PTR(-ESTALE); ++ if (!IS_ERR(result)) ++ goto again; + } + } + return result; +@@ -448,7 +465,9 @@ walk_init_root(const char *name, struct + static inline int __vfs_follow_link(struct nameidata *nd, const char *link) + { + int res = 0; ++ struct lookup_intent it = nd->intent; + char *name; ++ + if (IS_ERR(link)) + goto fail; + +@@ -458,6 +477,9 @@ static inline int __vfs_follow_link(stru + /* weird __emul_prefix() stuff did it */ + goto out; + } ++ intent_init(&nd->intent, it.it_op); ++ nd->intent.it_flags = it.it_flags; ++ nd->intent.it_create_mode = it.it_create_mode; + res = link_path_walk(link, nd); + out: + if (nd->depth || res || nd->last_type!=LAST_NORM) +@@ -666,6 +688,33 @@ fail: + return PTR_ERR(dentry); + } + ++static int revalidate_special(struct nameidata *nd) ++{ ++ struct dentry *dentry = nd->dentry; ++ int err, counter = 0; ++ ++ revalidate_again: ++ if (!dentry->d_op || !dentry->d_op->d_revalidate) ++ return 0; ++ if (!dentry->d_op->d_revalidate(dentry, nd)) { ++ struct dentry *new; ++ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC, nd))) ++ return err; ++ new = real_lookup(dentry->d_parent, &dentry->d_name, nd); ++ if (IS_ERR(new)) ++ return PTR_ERR(new); ++ d_invalidate(dentry); ++ dput(dentry); ++ nd->dentry = dentry = new; ++ counter++; ++ if (counter < 10) ++ goto revalidate_again; ++ printk("excessive revalidate_it loops\n"); ++ return -ESTALE; ++ } ++ return 0; ++} ++ + /* + * Name resolution. + * +@@ -767,8 +816,12 @@ int fastcall link_path_walk(const char * + goto out_dput; + + if (inode->i_op->follow_link) { ++ int save_flags = nd->flags; + mntget(next.mnt); ++ nd->flags |= LOOKUP_LINK_NOTLAST; + err = do_follow_link(next.dentry, nd); ++ if (!(save_flags & LOOKUP_LINK_NOTLAST)) ++ nd->flags &= ~LOOKUP_LINK_NOTLAST; + dput(next.dentry); + mntput(next.mnt); + if (err) +@@ -807,14 +860,34 @@ last_component: + inode = nd->dentry->d_inode; + /* fallthrough */ + case 1: ++ nd->flags |= LOOKUP_LAST; ++ err = revalidate_special(nd); ++ nd->flags &= ~LOOKUP_LAST; ++ if (!nd->dentry->d_inode) ++ err = -ENOENT; ++ if (err) { ++ path_release(nd); ++ goto return_err; ++ } ++ if (lookup_flags & LOOKUP_DIRECTORY) { ++ err = -ENOTDIR; ++ if (!nd->dentry->d_inode->i_op || ++ !nd->dentry->d_inode->i_op->lookup){ ++ path_release(nd); ++ goto return_err; ++ } ++ } + goto return_reval; + } ++ + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { + err = nd->dentry->d_op->d_hash(nd->dentry, &this); + if (err < 0) + break; + } ++ nd->flags |= LOOKUP_LAST; + err = do_lookup(nd, &this, &next, atomic); ++ nd->flags &= ~LOOKUP_LAST; + if (err) + break; + follow_mount(&next.mnt, &next.dentry); +@@ -1032,7 +1105,7 @@ struct dentry * lookup_hash(struct qstr + } + + /* SMP-safe */ +-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) + { + unsigned long hash; + struct qstr this; +@@ -1052,11 +1125,16 @@ struct dentry * lookup_one_len(const cha + } + this.hash = end_name_hash(hash); + +- return lookup_hash(&this, base); ++ return __lookup_hash(&this, base, nd); + access: + return ERR_PTR(-EACCES); + } + ++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++{ ++ return lookup_one_len_it(name, base, len, NULL); ++} ++ + /* + * namei() + * +@@ -1068,7 +1146,7 @@ access: + * that namei follows links, while lnamei does not. + * SMP-safe + */ +-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) ++int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) + { + char *tmp = getname(name); + int err = PTR_ERR(tmp); +@@ -1080,6 +1158,12 @@ int fastcall __user_walk(const char __us + return err; + } + ++int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) ++{ ++ intent_init(&nd->intent, IT_LOOKUP); ++ return __user_walk_it(name, flags, nd); ++} ++ + /* + * It's inline, so penalty for filesystems that don't use sticky bit is + * minimal. +@@ -1363,8 +1447,8 @@ int open_namei(const char * pathname, in + acc_mode |= MAY_APPEND; + + /* Fill in the open() intent data */ +- nd->intent.open.flags = flag; +- nd->intent.open.create_mode = mode; ++ nd->intent.it_flags = flag; ++ nd->intent.it_create_mode = mode; + + /* + * The simplest case - just a plain lookup. +@@ -1379,6 +1463,7 @@ int open_namei(const char * pathname, in + /* + * Create - we need to know the parent. + */ ++ nd->intent.it_op |= IT_CREAT; + error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + if (error) + return error; +@@ -1395,7 +1480,9 @@ int open_namei(const char * pathname, in + dir = nd->dentry; + nd->flags &= ~LOOKUP_PARENT; + down(&dir->d_inode->i_sem); ++ nd->flags |= LOOKUP_LAST; + dentry = __lookup_hash(&nd->last, nd->dentry, nd); ++ nd->flags &= ~LOOKUP_LAST; + + do_last: + error = PTR_ERR(dentry); +@@ -1508,7 +1595,9 @@ do_link: + } + dir = nd->dentry; + down(&dir->d_inode->i_sem); ++ nd->flags |= LOOKUP_LAST; + dentry = __lookup_hash(&nd->last, nd->dentry, nd); ++ nd->flags &= ~LOOKUP_LAST; + putname(nd->last.name); + goto do_last; + } +Index: linux-2.6.10/fs/namespace.c +=================================================================== +--- linux-2.6.10.orig/fs/namespace.c ++++ linux-2.6.10/fs/namespace.c +@@ -62,6 +62,7 @@ struct vfsmount *alloc_vfsmnt(const char + INIT_LIST_HEAD(&mnt->mnt_mounts); + INIT_LIST_HEAD(&mnt->mnt_list); + INIT_LIST_HEAD(&mnt->mnt_fslink); ++ INIT_LIST_HEAD(&mnt->mnt_lustre_list); + if (name) { + int size = strlen(name)+1; + char *newname = kmalloc(size, GFP_KERNEL); +@@ -113,6 +114,7 @@ static inline int check_mnt(struct vfsmo + + static void detach_mnt(struct vfsmount *mnt, struct nameidata *old_nd) + { ++ memset(old_nd, 0, sizeof(*old_nd)); + old_nd->dentry = mnt->mnt_mountpoint; + old_nd->mnt = mnt->mnt_parent; + mnt->mnt_parent = mnt; +@@ -176,6 +178,9 @@ void __mntput(struct vfsmount *mnt) + { + struct super_block *sb = mnt->mnt_sb; + dput(mnt->mnt_root); ++ spin_lock(&dcache_lock); ++ list_del(&mnt->mnt_lustre_list); ++ spin_unlock(&dcache_lock); + free_vfsmnt(mnt); + deactivate_super(sb); + } +@@ -402,6 +407,8 @@ static int do_umount(struct vfsmount *mn + */ + + lock_kernel(); ++ if (sb->s_op->umount_lustre) ++ sb->s_op->umount_lustre(sb); + if( (flags&MNT_FORCE) && sb->s_op->umount_begin) + sb->s_op->umount_begin(sb); + unlock_kernel(); +@@ -627,6 +634,7 @@ static int do_loopback(struct nameidata + return err; + if (!old_name || !*old_name) + return -EINVAL; ++ intent_init(&old_nd.intent, IT_LOOKUP); + err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + if (err) + return err; +@@ -701,6 +709,7 @@ static int do_move_mount(struct nameidat + return -EPERM; + if (!old_name || !*old_name) + return -EINVAL; ++ intent_init(&old_nd.intent, IT_LOOKUP); + err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + if (err) + return err; +@@ -1012,6 +1021,7 @@ long do_mount(char * dev_name, char * di + int retval = 0; + int mnt_flags = 0; + ++ intent_init(&nd.intent, IT_LOOKUP); + /* Discard magic */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; +Index: linux-2.6.10/fs/open.c +=================================================================== +--- linux-2.6.10.orig/fs/open.c ++++ linux-2.6.10/fs/open.c +@@ -216,12 +216,12 @@ static inline long do_sys_truncate(const + struct nameidata nd; + struct inode * inode; + int error; +- ++ intent_init(&nd.intent, IT_GETATTR); + error = -EINVAL; + if (length < 0) /* sorry, but loff_t says... */ + goto out; + +- error = user_path_walk(path, &nd); ++ error = user_path_walk_it(path, &nd); + if (error) + goto out; + inode = nd.dentry->d_inode; +@@ -475,6 +475,7 @@ asmlinkage long sys_access(const char __ + int old_fsuid, old_fsgid; + kernel_cap_t old_cap; + int res; ++ intent_init(&nd.intent, IT_GETATTR); + + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; +@@ -499,13 +500,14 @@ asmlinkage long sys_access(const char __ + else + current->cap_effective = current->cap_permitted; + +- res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); ++ res = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); + if (!res) { + res = permission(nd.dentry->d_inode, mode, &nd); + /* SuS v2 requires we report a read only fs too */ + if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) + && !special_file(nd.dentry->d_inode->i_mode)) + res = -EROFS; ++ + path_release(&nd); + } + +@@ -520,8 +522,9 @@ asmlinkage long sys_chdir(const char __u + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); ++ error = __user_walk_it(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + if (error) + goto out; + +@@ -573,8 +576,9 @@ asmlinkage long sys_chroot(const char __ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); ++ error = __user_walk_it(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + if (error) + goto out; + +@@ -758,8 +762,10 @@ asmlinkage long sys_fchown(unsigned int + struct file *filp_open(const char * filename, int flags, int mode) + { + int namei_flags, error; ++ struct file * temp_filp; + struct nameidata nd; + ++ intent_init(&nd.intent, IT_OPEN); + namei_flags = flags; + if ((namei_flags+1) & O_ACCMODE) + namei_flags++; +@@ -767,15 +773,26 @@ struct file *filp_open(const char * file + namei_flags |= 2; + + error = open_namei(filename, namei_flags, mode, &nd); +- if (!error) +- return dentry_open(nd.dentry, nd.mnt, flags); +- ++ if (!error) { ++ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); ++ return temp_filp; ++ } + return ERR_PTR(error); + } + +-EXPORT_SYMBOL(filp_open); + + struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++ { ++ ++ struct lookup_intent it; ++ intent_init(&it, IT_LOOKUP); ++ ++ return dentry_open_it(dentry, mnt, flags, &it); ++} ++ ++EXPORT_SYMBOL(dentry_open); ++ ++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags,struct lookup_intent *it) + { + struct file * f; + struct inode *inode; +@@ -787,6 +805,7 @@ struct file *dentry_open(struct dentry * + goto cleanup_dentry; + f->f_flags = flags; + f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; ++ f->f_it = it; + inode = dentry->d_inode; + if (f->f_mode & FMODE_WRITE) { + error = get_write_access(inode); +@@ -805,6 +824,7 @@ struct file *dentry_open(struct dentry * + error = f->f_op->open(inode,f); + if (error) + goto cleanup_all; ++ intent_release(it); + } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + +@@ -830,13 +850,12 @@ cleanup_all: + cleanup_file: + put_filp(f); + cleanup_dentry: ++ intent_release(it); + dput(dentry); + mntput(mnt); + return ERR_PTR(error); + } + +-EXPORT_SYMBOL(dentry_open); +- + /* + * Find an empty file descriptor entry, and mark it busy. + */ +Index: linux-2.6.10/fs/stat.c +=================================================================== +--- linux-2.6.10.orig/fs/stat.c ++++ linux-2.6.10/fs/stat.c +@@ -38,7 +38,7 @@ void generic_fillattr(struct inode *inod + + EXPORT_SYMBOL(generic_fillattr); + +-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) + { + struct inode *inode = dentry->d_inode; + int retval; +@@ -47,6 +47,8 @@ int vfs_getattr(struct vfsmount *mnt, st + if (retval) + return retval; + ++ if (inode->i_op->getattr_it) ++ return inode->i_op->getattr_it(mnt, dentry, it, stat); + if (inode->i_op->getattr) + return inode->i_op->getattr(mnt, dentry, stat); + +@@ -63,14 +65,20 @@ int vfs_getattr(struct vfsmount *mnt, st + + EXPORT_SYMBOL(vfs_getattr); + ++int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++{ ++ return vfs_getattr_it(mnt, dentry, NULL, stat); ++} ++ + int vfs_stat(char __user *name, struct kstat *stat) + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = user_path_walk(name, &nd); ++ error = user_path_walk_it(name, &nd); + if (!error) { +- error = vfs_getattr(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -82,10 +90,11 @@ int vfs_lstat(char __user *name, struct + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = user_path_walk_link(name, &nd); ++ error = user_path_walk_link_it(name, &nd); + if (!error) { +- error = vfs_getattr(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -97,9 +106,12 @@ int vfs_fstat(unsigned int fd, struct ks + { + struct file *f = fget(fd); + int error = -EBADF; ++ struct nameidata nd; ++ intent_init(&nd.intent, IT_GETATTR); + + if (f) { +- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); ++ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); ++ intent_release(&nd.intent); + fput(f); + } + return error; +Index: linux-2.6.10/include/linux/dcache.h +=================================================================== +--- linux-2.6.10.orig/include/linux/dcache.h ++++ linux-2.6.10/include/linux/dcache.h +@@ -4,6 +4,7 @@ + #ifdef __KERNEL__ + + #include ++#include + #include + #include + #include +@@ -37,6 +38,8 @@ struct qstr { + const unsigned char *name; + }; + ++#include ++ + struct dentry_stat_t { + int nr_dentry; + int nr_unused; +Index: linux-2.6.10/include/linux/fs.h +=================================================================== +--- linux-2.6.10.orig/include/linux/fs.h ++++ linux-2.6.10/include/linux/fs.h +@@ -78,6 +78,7 @@ extern int dir_notify_enable; + + #define FMODE_READ 1 + #define FMODE_WRITE 2 ++#define FMODE_EXEC 4 + + /* Internal kernel extensions */ + #define FMODE_LSEEK 4 +@@ -262,6 +263,8 @@ typedef void (dio_iodone_t)(struct inode + #define ATTR_ATTR_FLAG 1024 + #define ATTR_KILL_SUID 2048 + #define ATTR_KILL_SGID 4096 ++#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ ++#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ + + /* + * This is the Inode Attributes structure, used for notify_change(). It +@@ -465,6 +468,7 @@ struct inode { + struct block_device *i_bdev; + struct cdev *i_cdev; + int i_cindex; ++ void *i_filterdata; + + __u32 i_generation; + +@@ -600,6 +604,7 @@ struct file { + spinlock_t f_ep_lock; + #endif /* #ifdef CONFIG_EPOLL */ + struct address_space *f_mapping; ++ struct lookup_intent *f_it; + }; + extern spinlock_t files_lock; + #define file_list_lock() spin_lock(&files_lock); +@@ -950,7 +955,9 @@ struct inode_operations { + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int, struct nameidata *); + int (*setattr) (struct dentry *, struct iattr *); ++ int (*setattr_raw) (struct inode *, struct iattr *); + int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); ++ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); + int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); + ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*listxattr) (struct dentry *, char *, size_t); +@@ -990,6 +997,7 @@ struct super_operations { + int (*remount_fs) (struct super_block *, int *, char *); + void (*clear_inode) (struct inode *); + void (*umount_begin) (struct super_block *); ++ void (*umount_lustre) (struct super_block *); + + int (*show_options)(struct seq_file *, struct vfsmount *); + }; +@@ -1181,6 +1189,7 @@ extern int unregister_filesystem(struct + extern struct vfsmount *kern_mount(struct file_system_type *); + extern int may_umount_tree(struct vfsmount *); + extern int may_umount(struct vfsmount *); ++struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); + extern long do_mount(char *, char *, char *, unsigned long, void *); + + extern int vfs_statfs(struct super_block *, struct kstatfs *); +@@ -1245,6 +1254,7 @@ static inline int break_lease(struct ino + extern int do_truncate(struct dentry *, loff_t start); + extern struct file *filp_open(const char *, int, int); + extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); ++extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); + extern int filp_close(struct file *, fl_owner_t id); + extern char * getname(const char __user *); + +Index: linux-2.6.10/include/linux/mount.h +=================================================================== +--- linux-2.6.10.orig/include/linux/mount.h ++++ linux-2.6.10/include/linux/mount.h +@@ -36,6 +36,8 @@ struct vfsmount + struct list_head mnt_list; + struct list_head mnt_fslink; /* link in fs-specific expiry list */ + struct namespace *mnt_namespace; /* containing namespace */ ++ struct list_head mnt_lustre_list; /* GNS mount list */ ++ unsigned long mnt_last_used; /* for GNS auto-umount (jiffies) */ + }; + + static inline struct vfsmount *mntget(struct vfsmount *mnt) +Index: linux-2.6.10/include/linux/namei.h +=================================================================== +--- linux-2.6.10.orig/include/linux/namei.h ++++ linux-2.6.10/include/linux/namei.h +@@ -2,14 +2,48 @@ + #define _LINUX_NAMEI_H + + #include ++#include + + struct vfsmount; ++struct nameidata; + +-struct open_intent { +- int flags; +- int create_mode; ++/* intent opcodes */ ++#define IT_OPEN (1) ++#define IT_CREAT (1<<1) ++#define IT_READDIR (1<<2) ++#define IT_GETATTR (1<<3) ++#define IT_LOOKUP (1<<4) ++#define IT_UNLINK (1<<5) ++#define IT_TRUNC (1<<6) ++#define IT_GETXATTR (1<<7) ++ ++struct lustre_intent_data { ++ int it_disposition; ++ int it_status; ++ __u64 it_lock_handle; ++ void *it_data; ++ int it_lock_mode; + }; + ++#define INTENT_MAGIC 0x19620323 ++struct lookup_intent { ++ int it_magic; ++ void (*it_op_release)(struct lookup_intent *); ++ int it_op; ++ int it_flags; ++ int it_create_mode; ++ union { ++ struct lustre_intent_data lustre; ++ } d; ++}; ++ ++static inline void intent_init(struct lookup_intent *it, int op) ++{ ++ memset(it, 0, sizeof(*it)); ++ it->it_magic = INTENT_MAGIC; ++ it->it_op = op; ++} ++ + enum { MAX_NESTED_LINKS = 8 }; + + struct nameidata { +@@ -21,10 +55,7 @@ struct nameidata { + unsigned depth; + char *saved_names[MAX_NESTED_LINKS + 1]; + +- /* Intent data */ +- union { +- struct open_intent open; +- } intent; ++ struct lookup_intent intent; + }; + + /* +@@ -47,6 +78,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA + #define LOOKUP_NOALT 32 + #define LOOKUP_ATOMIC 64 + #define LOOKUP_REVAL 128 ++#define LOOKUP_LAST (0x1000) ++#define LOOKUP_LINK_NOTLAST (0x2000) + + /* + * Intent data +@@ -56,6 +89,12 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA + #define LOOKUP_ACCESS (0x0400) + + extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); ++extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); ++#define user_path_walk_it(name,nd) \ ++ __user_walk_it(name, LOOKUP_FOLLOW, nd) ++#define user_path_walk_link_it(name,nd) \ ++ __user_walk_it(name, 0, nd) ++extern void intent_release(struct lookup_intent *); + #define user_path_walk(name,nd) \ + __user_walk(name, LOOKUP_FOLLOW, nd) + #define user_path_walk_link(name,nd) \ +@@ -68,7 +107,6 @@ extern void path_release_on_umount(struc + + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); + extern struct dentry * lookup_hash(struct qstr *, struct dentry *); +- + extern int follow_down(struct vfsmount **, struct dentry **); + extern int follow_up(struct vfsmount **, struct dentry **); + diff --git a/lustre/kernel_patches/series/2.6-fc3.series b/lustre/kernel_patches/series/2.6-fc3.series index 361da69..90ada9a 100644 --- a/lustre/kernel_patches/series/2.6-fc3.series +++ b/lustre/kernel_patches/series/2.6-fc3.series @@ -1,7 +1,7 @@ uml-2.6.10-fc3.patch lustre_version.patch fc3_to_rhel4_updates.patch -vfs_intent-2.6-rhel4.patch +vfs_intent-2.6-fc3.patch vfs_nointent-2.6-rhel4.patch vfs_races-2.6-fc3.patch ext3-wantedi-misc-2.6-suse.patch diff --git a/lustre/kernel_patches/series/2.6-rhel4.series b/lustre/kernel_patches/series/2.6-rhel4.series index 0b2e845..1c08d3b 100644 --- a/lustre/kernel_patches/series/2.6-rhel4.series +++ b/lustre/kernel_patches/series/2.6-rhel4.series @@ -21,3 +21,4 @@ compile-fixes-2.6.9-rhel4-22.patch vm-tunables-rhel4.patch 2.6-rhel4-kgdb-ga.patch tcp-zero-copy-2.6.9-rhel4.patch +iallocsem_consistency.patch diff --git a/lustre/kernel_patches/series/2.6-suse-newer.series b/lustre/kernel_patches/series/2.6-suse-newer.series index 1c5d31f..4068bed 100644 --- a/lustre/kernel_patches/series/2.6-suse-newer.series +++ b/lustre/kernel_patches/series/2.6-suse-newer.series @@ -7,3 +7,5 @@ uml-exprt-clearuser.patch qsnet-suse-2.6.patch fsprivate-2.6.patch dcache-qstr-api-fix-2.6-suse.patch +iallocsem_consistency.patch +tcp-zero-copy-2.6.5-7.244.patch diff --git a/lustre/kernel_patches/series/2.6.12-vanilla.series b/lustre/kernel_patches/series/2.6.12-vanilla.series index 9ecb127..cb41054 100644 --- a/lustre/kernel_patches/series/2.6.12-vanilla.series +++ b/lustre/kernel_patches/series/2.6.12-vanilla.series @@ -17,3 +17,4 @@ export-show_task-2.6-vanilla.patch sd_iostats-2.6-rhel4.patch fsprivate-2.6.patch export_symbol_numa.patch +tcp-zero-copy-2.6.12.6.patch diff --git a/lustre/ldiskfs/lustre_quota_fmt.c b/lustre/ldiskfs/lustre_quota_fmt.c index 15739f1..9db3f3f 100644 --- a/lustre/ldiskfs/lustre_quota_fmt.c +++ b/lustre/ldiskfs/lustre_quota_fmt.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include "lustre_quota_fmt.h" typedef char *dqbuf_t; diff --git a/lustre/ldiskfs/quotafmt_test.c b/lustre/ldiskfs/quotafmt_test.c index 0e2f5f3..822ef95 100644 --- a/lustre/ldiskfs/quotafmt_test.c +++ b/lustre/ldiskfs/quotafmt_test.c @@ -16,8 +16,8 @@ #include #include -#include -#include +#include +#include #include "lustre_quota_fmt.h" diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c index 09fda38..7601bce 100644 --- a/lustre/ldlm/l_lock.c +++ b/lustre/ldlm/l_lock.c @@ -25,31 +25,13 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include +#include #else #include #endif -#include -#include +#include +#include /* invariants: - only the owner of the lock changes l_owner/l_depth @@ -67,7 +49,7 @@ void l_lock(struct lustre_lock *lock) int owner = 0; spin_lock(&lock->l_spin); - if (lock->l_owner == current) + if (lock->l_owner == cfs_current()) owner = 1; spin_unlock(&lock->l_spin); @@ -78,9 +60,9 @@ void l_lock(struct lustre_lock *lock) if (owner) { ++lock->l_depth; } else { - down(&lock->l_sem); + mutex_down(&lock->l_sem); spin_lock(&lock->l_spin); - lock->l_owner = current; + lock->l_owner = cfs_current(); lock->l_depth = 0; spin_unlock(&lock->l_spin); } @@ -88,15 +70,15 @@ void l_lock(struct lustre_lock *lock) void l_unlock(struct lustre_lock *lock) { - LASSERTF(lock->l_owner == current, "lock %p, current %p\n", - lock->l_owner, current); + LASSERTF(lock->l_owner == cfs_current(), "lock %p, current %p\n", + lock->l_owner, cfs_current()); LASSERTF(lock->l_depth >= 0, "depth %d\n", lock->l_depth); spin_lock(&lock->l_spin); if (--lock->l_depth < 0) { lock->l_owner = NULL; spin_unlock(&lock->l_spin); - up(&lock->l_sem); + mutex_up(&lock->l_sem); return; } spin_unlock(&lock->l_spin); @@ -107,7 +89,7 @@ int l_has_lock(struct lustre_lock *lock) int depth = -1, owner = 0; spin_lock(&lock->l_spin); - if (lock->l_owner == current) { + if (lock->l_owner == cfs_current()) { depth = lock->l_depth; owner = 1; } @@ -119,28 +101,27 @@ int l_has_lock(struct lustre_lock *lock) } #ifdef __KERNEL__ -#include void l_check_ns_lock(struct ldlm_namespace *ns) { - static unsigned long next_msg; + static cfs_time_t next_msg; - if (!l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) { + if (!l_has_lock(&ns->ns_lock) && cfs_time_after(cfs_time_current(), next_msg)) { CERROR("namespace %s lock not held when it should be; tell " "phil\n", ns->ns_name); libcfs_debug_dumpstack(NULL); - next_msg = jiffies + 60 * HZ; + next_msg = cfs_time_shift(60); } } void l_check_no_ns_lock(struct ldlm_namespace *ns) { - static unsigned long next_msg; + static cfs_time_t next_msg; - if (l_has_lock(&ns->ns_lock) && time_after(jiffies, next_msg)) { + if (l_has_lock(&ns->ns_lock) && cfs_time_after(cfs_time_current(), next_msg)) { CERROR("namespace %s lock held illegally; tell phil\n", ns->ns_name); libcfs_debug_dumpstack(NULL); - next_msg = jiffies + 60 * HZ; + next_msg = cfs_time_shift(60); } } diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c index f54ff12..205ff14 100644 --- a/lustre/ldlm/ldlm_extent.c +++ b/lustre/ldlm/ldlm_extent.c @@ -29,9 +29,9 @@ # include #endif -#include -#include -#include +#include +#include +#include #include "ldlm_internal.h" @@ -327,7 +327,7 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq, ldlm_error_t *err) { struct ldlm_resource *res = lock->l_resource; - struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); + struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list); int rc, rc2; ENTRY; diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 6c7e259..c86ee5c 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -27,21 +27,21 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -#include -#include -#include -#include +#include +#include +#include +#include #include #else #include -#include +#include #endif #include "ldlm_internal.h" #define l_flock_waitq l_lru -static struct list_head ldlm_flock_waitq = LIST_HEAD_INIT(ldlm_flock_waitq); +static struct list_head ldlm_flock_waitq = CFS_LIST_HEAD_INIT(ldlm_flock_waitq); int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); @@ -390,7 +390,7 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, * ldlm_reprocess_queue. */ if ((mode == LCK_NL) && overlaps) { struct list_head rpc_list - = LIST_HEAD_INIT(rpc_list); + = CFS_LIST_HEAD_INIT(rpc_list); int rc; restart: res->lr_tmp = &rpc_list; @@ -451,7 +451,7 @@ int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) { struct ldlm_namespace *ns; - struct file_lock *getlk = lock->l_ast_data; + cfs_flock_t *getlk = lock->l_ast_data; struct ldlm_flock_wait_data fwd; unsigned long irqflags; struct obd_device *obd; @@ -512,20 +512,20 @@ granted: /* fcntl(F_GETLK) request */ /* The old mode was saved in getlk->fl_type so that if the mode * in the lock changes we can decref the approprate refcount. */ - ldlm_flock_destroy(lock, getlk->fl_type, LDLM_FL_WAIT_NOREPROC); + ldlm_flock_destroy(lock, cfs_flock_type(getlk), LDLM_FL_WAIT_NOREPROC); switch (lock->l_granted_mode) { case LCK_PR: - getlk->fl_type = F_RDLCK; + cfs_flock_set_type(getlk, F_RDLCK); break; case LCK_PW: - getlk->fl_type = F_WRLCK; + cfs_flock_set_type(getlk, F_WRLCK); break; default: - getlk->fl_type = F_UNLCK; + cfs_flock_set_type(getlk, F_UNLCK); } - getlk->fl_pid = lock->l_policy_data.l_flock.pid; - getlk->fl_start = lock->l_policy_data.l_flock.start; - getlk->fl_end = lock->l_policy_data.l_flock.end; + cfs_flock_set_pid(getlk, (pid_t)lock->l_policy_data.l_flock.pid); + cfs_flock_set_start(getlk, (off_t)lock->l_policy_data.l_flock.start); + cfs_flock_set_end(getlk, (off_t)lock->l_policy_data.l_flock.end); } else { int noreproc = LDLM_FL_WAIT_NOREPROC; @@ -533,7 +533,7 @@ granted: * with existing locks owned by this process. */ ldlm_process_flock_lock(lock, &noreproc, 1, &err); if (flags == 0) - wake_up(&lock->l_waitq); + cfs_waitq_signal(&lock->l_waitq); } l_unlock(&ns->ns_lock); RETURN(0); diff --git a/lustre/ldlm/ldlm_inodebits.c b/lustre/ldlm/ldlm_inodebits.c index 8cee698..8c473dd 100644 --- a/lustre/ldlm/ldlm_inodebits.c +++ b/lustre/ldlm/ldlm_inodebits.c @@ -26,9 +26,9 @@ # include #endif -#include -#include -#include +#include +#include +#include #include "ldlm_internal.h" @@ -85,7 +85,7 @@ int ldlm_process_inodebits_lock(struct ldlm_lock *lock, int *flags, int first_enq, ldlm_error_t *err) { struct ldlm_resource *res = lock->l_resource; - struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); + struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list); int rc; ENTRY; diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 2e247ff..f9f6c43 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -58,7 +58,7 @@ int ldlm_process_inodebits_lock(struct ldlm_lock *lock, int *flags, void l_check_ns_lock(struct ldlm_namespace *ns); void l_check_no_ns_lock(struct ldlm_namespace *ns); -extern struct proc_dir_entry *ldlm_svc_proc_dir; +extern cfs_proc_dir_entry_t *ldlm_svc_proc_dir; struct ldlm_state { struct ptlrpc_service *ldlm_cb_service; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 03da517..9d9f9ab 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -28,15 +28,15 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -# include +# include #else # include #endif -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include /* @priority: if non-zero, move the selected to the list head * @create: if zero, only search in existed connections @@ -186,7 +186,7 @@ out: * 2 - server UUID * 3 - inactive-on-startup */ -int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg) +int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) { struct client_obd *cli = &obddev->u.cli; struct obd_import *imp; @@ -249,11 +249,11 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg) cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; if (cli->cl_dirty_max >> PAGE_SHIFT > num_physpages / 8) cli->cl_dirty_max = num_physpages << (PAGE_SHIFT - 3); - INIT_LIST_HEAD(&cli->cl_cache_waiters); - INIT_LIST_HEAD(&cli->cl_loi_ready_list); - INIT_LIST_HEAD(&cli->cl_loi_write_list); - INIT_LIST_HEAD(&cli->cl_loi_read_list); - spin_lock_init(&cli->cl_loi_list_lock); + CFS_INIT_LIST_HEAD(&cli->cl_cache_waiters); + CFS_INIT_LIST_HEAD(&cli->cl_loi_ready_list); + CFS_INIT_LIST_HEAD(&cli->cl_loi_write_list); + CFS_INIT_LIST_HEAD(&cli->cl_loi_read_list); + client_obd_list_lock_init(&cli->cl_loi_list_lock); cli->cl_r_in_flight = 0; cli->cl_w_in_flight = 0; @@ -293,7 +293,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg* lcfg) imp->imp_connect_op = connect_op; imp->imp_initial_recov = 1; imp->imp_initial_recov_bk = 0; - INIT_LIST_HEAD(&imp->imp_pinger_chain); + CFS_INIT_LIST_HEAD(&imp->imp_pinger_chain); memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1), LUSTRE_CFG_BUFLEN(lcfg, 1)); class_import_put(imp); @@ -334,6 +334,7 @@ err: int client_obd_cleanup(struct obd_device *obddev) { + ENTRY; ldlm_put_ref(obddev->obd_force); RETURN(0); @@ -351,7 +352,7 @@ int client_connect_import(struct lustre_handle *dlm_handle, int rc; ENTRY; - down(&cli->cl_sem); + mutex_down(&cli->cl_sem); rc = class_connect(dlm_handle, obd, cluuid); if (rc) GOTO(out_sem, rc); @@ -410,7 +411,7 @@ out_disco: class_export_put(exp); } out_sem: - up(&cli->cl_sem); + mutex_up(&cli->cl_sem); return rc; } @@ -431,7 +432,7 @@ int client_disconnect_export(struct obd_export *exp) cli = &obd->u.cli; imp = cli->cl_import; - down(&cli->cl_sem); + mutex_down(&cli->cl_sem); if (!cli->cl_conn_count) { CERROR("disconnecting disconnected device (%s)\n", obd->obd_name); @@ -471,7 +472,7 @@ int client_disconnect_export(struct obd_export *exp) if (!rc && err) rc = err; out_sem: - up(&cli->cl_sem); + mutex_up(&cli->cl_sem); RETURN(rc); } @@ -482,6 +483,7 @@ int client_disconnect_export(struct obd_export *exp) int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, struct obd_uuid *cluuid) { + ENTRY; if (exp->exp_connection && exp->exp_imp_reverse) { struct lustre_handle *hdl; hdl = &exp->exp_imp_reverse->imp_remote_handle; @@ -691,7 +693,8 @@ int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) target->obd_name, libcfs_nid2str(req->rq_peer.nid), cluuid.uuid, target->obd_recoverable_clients, - (target->obd_recovery_timer.expires-jiffies)/HZ); + cfs_duration_sec(cfs_time_sub(cfs_timer_deadline(&target->obd_recovery_timer), + cfs_time_current()))); rc = -EBUSY; } else { dont_check_exports: @@ -923,6 +926,7 @@ void target_abort_recovery(void *data) { struct obd_device *obd = data; + ENTRY; spin_lock_bh(&obd->obd_processing_task_lock); if (!obd->obd_recovering) { spin_unlock_bh(&obd->obd_processing_task_lock); @@ -942,6 +946,7 @@ void target_abort_recovery(void *data) target_finish_recovery(obd); ptlrpc_run_recovery_over_upcall(obd); + EXIT; } static void target_recovery_expired(unsigned long castmeharder) @@ -951,7 +956,7 @@ static void target_recovery_expired(unsigned long castmeharder) spin_lock_bh(&obd->obd_processing_task_lock); if (obd->obd_recovering) obd->obd_abort_recovery = 1; - wake_up(&obd->obd_next_transno_waitq); + cfs_waitq_signal(&obd->obd_next_transno_waitq); spin_unlock_bh(&obd->obd_processing_task_lock); } @@ -960,7 +965,7 @@ static void target_recovery_expired(unsigned long castmeharder) void target_cancel_recovery_timer(struct obd_device *obd) { CDEBUG(D_HA, "%s: cancel recovery timer\n", obd->obd_name); - del_timer(&obd->obd_recovery_timer); + cfs_timer_disarm(&obd->obd_recovery_timer); } static void reset_recovery_timer(struct obd_device *obd) @@ -970,12 +975,13 @@ static void reset_recovery_timer(struct obd_device *obd) spin_unlock_bh(&obd->obd_processing_task_lock); return; } - mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT); + cfs_timer_arm(&obd->obd_recovery_timer, + cfs_time_shift(OBD_RECOVERY_TIMEOUT)); spin_unlock_bh(&obd->obd_processing_task_lock); CDEBUG(D_HA, "%s: timer will expire in %u seconds\n", obd->obd_name, - (int)(OBD_RECOVERY_TIMEOUT / HZ)); + OBD_RECOVERY_TIMEOUT); /* Only used for lprocfs_status */ - obd->obd_recovery_end = CURRENT_SECONDS + OBD_RECOVERY_TIMEOUT/HZ; + obd->obd_recovery_end = CURRENT_SECONDS + OBD_RECOVERY_TIMEOUT; } @@ -988,10 +994,9 @@ void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler) return; } CWARN("%s: starting recovery timer (%us)\n", obd->obd_name, - (int)(OBD_RECOVERY_TIMEOUT / HZ)); + OBD_RECOVERY_TIMEOUT); obd->obd_recovery_handler = handler; - obd->obd_recovery_timer.function = target_recovery_expired; - obd->obd_recovery_timer.data = (unsigned long)obd; + cfs_timer_init(&obd->obd_recovery_timer, target_recovery_expired, obd); spin_unlock_bh(&obd->obd_processing_task_lock); reset_recovery_timer(obd); @@ -1047,7 +1052,7 @@ static void process_recovery_queue(struct obd_device *obd) for (;;) { spin_lock_bh(&obd->obd_processing_task_lock); - LASSERT(obd->obd_processing_task == current->pid); + LASSERT(obd->obd_processing_task == cfs_curproc_pid()); req = list_entry(obd->obd_recovery_queue.next, struct ptlrpc_request, rq_list); @@ -1111,7 +1116,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, * buffers (eg mds_body, ost_body etc) have NOT been swabbed. */ if (!transno) { - INIT_LIST_HEAD(&req->rq_list); + CFS_INIT_LIST_HEAD(&req->rq_list); DEBUG_REQ(D_HA, req, "not queueing"); return 1; } @@ -1137,7 +1142,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, * Also, a resent, replayed request that has already been * handled will pass through here and be processed immediately. */ - if (obd->obd_processing_task == current->pid || + if (obd->obd_processing_task == cfs_curproc_pid() || transno < obd->obd_next_recovery_transno) { /* Processing the queue right now, don't re-add. */ LASSERT(list_empty(&req->rq_list)); @@ -1163,7 +1168,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, req = saved_req; req->rq_reqmsg = reqmsg; class_export_get(req->rq_export); - INIT_LIST_HEAD(&req->rq_list); + CFS_INIT_LIST_HEAD(&req->rq_list); /* XXX O(n^2) */ list_for_each(tmp, &obd->obd_recovery_queue) { @@ -1187,7 +1192,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, /* Someone else is processing this queue, we'll leave it to * them. */ - wake_up(&obd->obd_next_transno_waitq); + cfs_waitq_signal(&obd->obd_next_transno_waitq); spin_unlock_bh(&obd->obd_processing_task_lock); return 0; } @@ -1195,7 +1200,7 @@ int target_queue_recovery_request(struct ptlrpc_request *req, /* Nobody is processing, and we know there's (at least) one to process * now, so we'll do the honours. */ - obd->obd_processing_task = current->pid; + obd->obd_processing_task = cfs_curproc_pid(); spin_unlock_bh(&obd->obd_processing_task_lock); process_recovery_queue(obd); @@ -1272,7 +1277,7 @@ int target_queue_final_reply(struct ptlrpc_request *req, int rc) } else { CWARN("%s: %d recoverable clients remain\n", obd->obd_name, obd->obd_recoverable_clients); - wake_up(&obd->obd_next_transno_waitq); + cfs_waitq_signal(&obd->obd_next_transno_waitq); } return 1; @@ -1376,7 +1381,7 @@ target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) list_empty(&rs->rs_exp_list) || /* completed already */ list_empty(&rs->rs_obd_list)) { list_add_tail (&rs->rs_list, &svc->srv_reply_queue); - wake_up (&svc->srv_waitq); + cfs_waitq_signal (&svc->srv_waitq); } else { list_add (&rs->rs_list, &svc->srv_active_replies); rs->rs_scheduled = 0; /* allow notifier to schedule */ diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index cce090a..e3df674 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -27,15 +27,13 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -# include -# include -# include +# include #else # include # include #endif -#include +#include #include "ldlm_internal.h" //struct lustre_lock ldlm_everything_lock; @@ -83,7 +81,7 @@ char *ldlm_it2str(int it) } } -extern kmem_cache_t *ldlm_lock_slab; +extern cfs_mem_cache_t *ldlm_lock_slab; struct lustre_lock ldlm_handle_lock; static ldlm_processing_policy ldlm_processing_policy_table[] = { @@ -249,20 +247,20 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent, if (resource == NULL) LBUG(); - OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_NOFS, sizeof(*lock)); + OBD_SLAB_ALLOC(lock, ldlm_lock_slab, CFS_ALLOC_IO, sizeof(*lock)); if (lock == NULL) RETURN(NULL); lock->l_resource = ldlm_resource_getref(resource); atomic_set(&lock->l_refc, 2); - INIT_LIST_HEAD(&lock->l_children); - INIT_LIST_HEAD(&lock->l_childof); - INIT_LIST_HEAD(&lock->l_res_link); - INIT_LIST_HEAD(&lock->l_lru); - INIT_LIST_HEAD(&lock->l_export_chain); - INIT_LIST_HEAD(&lock->l_pending_chain); - init_waitqueue_head(&lock->l_waitq); + CFS_INIT_LIST_HEAD(&lock->l_children); + CFS_INIT_LIST_HEAD(&lock->l_childof); + CFS_INIT_LIST_HEAD(&lock->l_res_link); + CFS_INIT_LIST_HEAD(&lock->l_lru); + CFS_INIT_LIST_HEAD(&lock->l_export_chain); + CFS_INIT_LIST_HEAD(&lock->l_pending_chain); + cfs_waitq_init(&lock->l_waitq); spin_lock(&resource->lr_namespace->ns_counter_lock); resource->lr_namespace->ns_locks++; @@ -275,7 +273,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent, l_unlock(&parent->l_resource->lr_namespace->ns_lock); } - INIT_LIST_HEAD(&lock->l_handle.h_link); + CFS_INIT_LIST_HEAD(&lock->l_handle.h_link); class_handle_hash(&lock->l_handle, lock_handle_addref); RETURN(lock); @@ -482,7 +480,7 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode) lock->l_readers++; if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP)) lock->l_writers++; - lock->l_last_used = jiffies; + lock->l_last_used = cfs_time_current(); LDLM_LOCK_GET(lock); LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]); l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -669,7 +667,7 @@ void ldlm_lock_allow_match(struct ldlm_lock *lock) { l_lock(&lock->l_resource->lr_namespace->ns_lock); lock->l_flags |= LDLM_FL_CAN_MATCH; - wake_up(&lock->l_waitq); + cfs_waitq_signal(&lock->l_waitq); l_unlock(&lock->l_resource->lr_namespace->ns_lock); } @@ -756,7 +754,7 @@ int ldlm_lock_match(struct ldlm_namespace *ns, int flags, } } - lwi = LWI_TIMEOUT_INTR(obd_timeout*HZ, NULL,NULL,NULL); + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout), NULL,NULL,NULL); /* XXX FIXME see comment on CAN_MATCH in lustre_dlm.h */ l_wait_event(lock->l_waitq, @@ -826,7 +824,7 @@ struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, lock->l_blocking_ast = blocking; lock->l_completion_ast = completion; lock->l_glimpse_ast = glimpse; - lock->l_pid = current->pid; + lock->l_pid = cfs_curproc_pid(); if (lvb_len) { lock->l_lvb_len = lvb_len; @@ -1016,6 +1014,7 @@ void ldlm_reprocess_all_ns(struct ldlm_namespace *ns) { int i, rc; + ENTRY; l_lock(&ns->ns_lock); for (i = 0; i < RES_HASH_SIZE; i++) { struct list_head *tmp, *next; diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b5bdf80..0ec457e 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -30,44 +30,42 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -# include -# include -# include -# include +# include #else # include #endif -#include -#include +#include +#include #include #include "ldlm_internal.h" -extern kmem_cache_t *ldlm_resource_slab; -extern kmem_cache_t *ldlm_lock_slab; +extern cfs_mem_cache_t *ldlm_resource_slab; +extern cfs_mem_cache_t *ldlm_lock_slab; extern struct lustre_lock ldlm_handle_lock; extern struct list_head ldlm_namespace_list; -static DECLARE_MUTEX(ldlm_ref_sem); +extern struct semaphore ldlm_namespace_lock; +static struct semaphore ldlm_ref_sem; static int ldlm_refcount; /* LDLM state */ static struct ldlm_state *ldlm_state; -inline unsigned long round_timeout(unsigned long timeout) +inline cfs_time_t round_timeout(cfs_time_t timeout) { - return ((timeout / HZ) + 1) * HZ; + return cfs_time_seconds((int)cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1); } #ifdef __KERNEL__ /* w_l_spinlock protects both waiting_locks_list and expired_lock_thread */ static spinlock_t waiting_locks_spinlock; static struct list_head waiting_locks_list; -static struct timer_list waiting_locks_timer; +static cfs_timer_t waiting_locks_timer; static struct expired_lock_thread { - wait_queue_head_t elt_waitq; + cfs_waitq_t elt_waitq; int elt_state; int elt_dump; struct list_head elt_expired_locks; @@ -81,7 +79,7 @@ static struct expired_lock_thread { struct ldlm_bl_pool { spinlock_t blp_lock; struct list_head blp_list; - wait_queue_head_t blp_waitq; + cfs_waitq_t blp_waitq; atomic_t blp_num_threads; struct completion blp_comp; }; @@ -99,6 +97,7 @@ static inline int have_expired_locks(void) { int need_to_run; + ENTRY; spin_lock_bh(&waiting_locks_spinlock); need_to_run = !list_empty(&expired_lock_thread.elt_expired_locks); spin_unlock_bh(&waiting_locks_spinlock); @@ -110,21 +109,12 @@ static int expired_lock_main(void *arg) { struct list_head *expired = &expired_lock_thread.elt_expired_locks; struct l_wait_info lwi = { 0 }; - unsigned long flags; ENTRY; - lock_kernel(); - libcfs_daemonize("ldlm_elt"); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - unlock_kernel(); + cfs_daemonize("ldlm_elt"); expired_lock_thread.elt_state = ELT_READY; - wake_up(&expired_lock_thread.elt_waitq); + cfs_waitq_signal(&expired_lock_thread.elt_waitq); while (1) { l_wait_event(expired_lock_thread.elt_waitq, @@ -181,7 +171,7 @@ static int expired_lock_main(void *arg) } expired_lock_thread.elt_state = ELT_STOPPED; - wake_up(&expired_lock_thread.elt_waitq); + cfs_waitq_signal(&expired_lock_thread.elt_waitq); RETURN(0); } @@ -190,15 +180,12 @@ static void waiting_locks_callback(unsigned long unused) { struct ldlm_lock *lock, *last = NULL; - if (obd_dump_on_timeout) - libcfs_debug_dumplog(); - spin_lock_bh(&waiting_locks_spinlock); while (!list_empty(&waiting_locks_list)) { lock = list_entry(waiting_locks_list.next, struct ldlm_lock, l_pending_chain); - if (time_after(lock->l_callback_timeout, jiffies) || + if (cfs_time_after(lock->l_callback_timeout, cfs_time_current()) || (lock->l_req_mode == LCK_GROUP)) break; @@ -216,9 +203,8 @@ static void waiting_locks_callback(unsigned long unused) lock->l_pending_chain.next, lock->l_pending_chain.prev); - INIT_LIST_HEAD(&waiting_locks_list); /* HACK */ + CFS_INIT_LIST_HEAD(&waiting_locks_list); /* HACK */ expired_lock_thread.elt_dump = __LINE__; - spin_unlock_bh(&waiting_locks_spinlock); /* LBUG(); */ CEMERG("would be an LBUG, but isn't (bug 5653)\n"); @@ -232,8 +218,13 @@ static void waiting_locks_callback(unsigned long unused) list_del(&lock->l_pending_chain); list_add(&lock->l_pending_chain, &expired_lock_thread.elt_expired_locks); + } - wake_up(&expired_lock_thread.elt_waitq); + if (!list_empty(&expired_lock_thread.elt_expired_locks)) { + if (obd_dump_on_timeout) + expired_lock_thread.elt_dump = __LINE__; + + cfs_waitq_signal(&expired_lock_thread.elt_waitq); } /* @@ -241,11 +232,11 @@ static void waiting_locks_callback(unsigned long unused) * left. */ if (!list_empty(&waiting_locks_list)) { - unsigned long timeout_rounded; + cfs_time_t timeout_rounded; lock = list_entry(waiting_locks_list.next, struct ldlm_lock, l_pending_chain); - timeout_rounded = round_timeout(lock->l_callback_timeout); - mod_timer(&waiting_locks_timer, timeout_rounded); + timeout_rounded = (cfs_time_t)round_timeout(lock->l_callback_timeout); + cfs_timer_arm(&waiting_locks_timer, timeout_rounded); } spin_unlock_bh(&waiting_locks_spinlock); } @@ -260,18 +251,18 @@ static void waiting_locks_callback(unsigned long unused) */ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) { - unsigned long timeout_rounded; + cfs_time_t timeout_rounded; l_check_ns_lock(lock->l_resource->lr_namespace); LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)); spin_lock_bh(&waiting_locks_spinlock); if (lock->l_destroyed) { - static unsigned long next; + static cfs_time_t next; spin_unlock_bh(&waiting_locks_spinlock); LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)"); - if (time_after(jiffies, next)) { - next = jiffies + 14400 * HZ; + if (cfs_time_after(cfs_time_current(), next)) { + next = cfs_time_shift(14400); libcfs_debug_dumpstack(NULL); } return 0; @@ -283,13 +274,14 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) return 0; } - lock->l_callback_timeout = jiffies + (obd_timeout * HZ / 2); + lock->l_callback_timeout =cfs_time_add(cfs_time_current(), + cfs_time_seconds(obd_timeout)/2); timeout_rounded = round_timeout(lock->l_callback_timeout); - if (time_before(timeout_rounded, waiting_locks_timer.expires) || - !timer_pending(&waiting_locks_timer)) { - mod_timer(&waiting_locks_timer, timeout_rounded); + if (cfs_time_before(timeout_rounded, cfs_timer_deadline(&waiting_locks_timer)) || + !cfs_timer_is_armed(&waiting_locks_timer)) { + cfs_timer_arm(&waiting_locks_timer, timeout_rounded); } list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */ spin_unlock_bh(&waiting_locks_spinlock); @@ -329,13 +321,13 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock) /* Removing the head of the list, adjust timer. */ if (list_next == &waiting_locks_list) { /* No more, just cancel. */ - del_timer(&waiting_locks_timer); + cfs_timer_disarm(&waiting_locks_timer); } else { struct ldlm_lock *next; next = list_entry(list_next, struct ldlm_lock, l_pending_chain); - mod_timer(&waiting_locks_timer, - round_timeout(next->l_callback_timeout)); + cfs_timer_arm(&waiting_locks_timer, + round_timeout(next->l_callback_timeout)); } } list_del_init(&lock->l_pending_chain); @@ -511,13 +503,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, RETURN(rc); } -/* XXX copied from ptlrpc/service.c */ -static long timeval_sub(struct timeval *large, struct timeval *small) -{ - return (large->tv_sec - small->tv_sec) * 1000000 + - (large->tv_usec - small->tv_usec); -} - int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) { struct ldlm_request *body; @@ -530,18 +515,19 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) LASSERT(lock != NULL); do_gettimeofday(&granted_time); - total_enqueue_wait = timeval_sub(&granted_time,&lock->l_enqueued_time); + total_enqueue_wait = cfs_timeval_sub(&granted_time, + &lock->l_enqueued_time, NULL); if (total_enqueue_wait / 1000000 > obd_timeout) LDLM_ERROR(lock, "enqueue wait took %luus from %lu", total_enqueue_wait, lock->l_enqueued_time.tv_sec); - down(&lock->l_resource->lr_lvb_sem); + mutex_down(&lock->l_resource->lr_lvb_sem); if (lock->l_resource->lr_lvb_len) { buffers = 2; size[1] = lock->l_resource->lr_lvb_len; } - up(&lock->l_resource->lr_lvb_sem); + mutex_up(&lock->l_resource->lr_lvb_sem); req = ptlrpc_prep_req(lock->l_export->exp_imp_reverse, LUSTRE_DLM_VERSION, LDLM_CP_CALLBACK, @@ -557,12 +543,12 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) if (buffers == 2) { void *lvb; - down(&lock->l_resource->lr_lvb_sem); + mutex_down(&lock->l_resource->lr_lvb_sem); lvb = lustre_msg_buf(req->rq_reqmsg, 1, lock->l_resource->lr_lvb_len); memcpy(lvb, lock->l_resource->lr_lvb_data, lock->l_resource->lr_lvb_len); - up(&lock->l_resource->lr_lvb_sem); + mutex_up(&lock->l_resource->lr_lvb_sem); } LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)", @@ -626,9 +612,9 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) body->lock_handle1 = lock->l_remote_handle; ldlm_lock2desc(lock, &body->lock_desc); - down(&lock->l_resource->lr_lvb_sem); + mutex_down(&lock->l_resource->lr_lvb_sem); size = lock->l_resource->lr_lvb_len; - up(&lock->l_resource->lr_lvb_sem); + mutex_up(&lock->l_resource->lr_lvb_sem); req->rq_replen = lustre_msg_size(1, &size); req->rq_send_state = LUSTRE_IMP_FULL; @@ -718,9 +704,9 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, } #if 0 - /* FIXME this makes it impossible to use plain locks -- check against - server's *_CONNECT_SUPPORTED flags? (I don't want to use ibits - for mgc/mgs) */ + /* FIXME this makes it impossible to use LDLM_PLAIN locks -- check + against server's _CONNECT_SUPPORTED flags? (I don't want to use + ibits for mgc/mgs) */ /* INODEBITS_INTEROP: Perform conversion from plain lock to * inodebits lock if client does not support them. */ @@ -781,12 +767,12 @@ existing_lock: } else { int buffers = 1; - down(&lock->l_resource->lr_lvb_sem); + mutex_down(&lock->l_resource->lr_lvb_sem); if (lock->l_resource->lr_lvb_len) { size[1] = lock->l_resource->lr_lvb_len; buffers = 2; } - up(&lock->l_resource->lr_lvb_sem); + mutex_up(&lock->l_resource->lr_lvb_sem); if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR)) GOTO(out, rc = -ENOMEM); @@ -879,7 +865,7 @@ existing_lock: l_unlock(&lock->l_resource->lr_namespace->ns_lock); if (rc == 0) { - down(&lock->l_resource->lr_lvb_sem); + mutex_down(&lock->l_resource->lr_lvb_sem); size[1] = lock->l_resource->lr_lvb_len; if (size[1] > 0) { void *lvb = lustre_msg_buf(req->rq_repmsg, @@ -890,7 +876,7 @@ existing_lock: memcpy(lvb, lock->l_resource->lr_lvb_data, size[1]); } - up(&lock->l_resource->lr_lvb_sem); + mutex_up(&lock->l_resource->lr_lvb_sem); } else { ldlm_resource_unlink_lock(lock); ldlm_lock_destroy(lock); @@ -1110,7 +1096,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req, struct ldlm_request *dlm_req, struct ldlm_lock *lock) { - LIST_HEAD(ast_list); + CFS_LIST_HEAD(ast_list); ENTRY; l_lock(&ns->ns_lock); @@ -1196,7 +1182,8 @@ static void ldlm_handle_gl_callback(struct ptlrpc_request *req, l_unlock(&ns->ns_lock); if (lock->l_granted_mode == LCK_PW && !lock->l_readers && !lock->l_writers && - time_after(jiffies, lock->l_last_used + 10 * HZ)) { + cfs_time_after(cfs_time_current(), + cfs_time_add(lock->l_last_used, cfs_time_seconds(10)))) { if (ldlm_bl_to_thread(ns, NULL, lock)) ldlm_handle_bl_callback(ns, NULL, lock); EXIT; @@ -1237,7 +1224,7 @@ int ldlm_bl_to_thread(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, spin_lock(&blp->blp_lock); list_add_tail(&blwi->blwi_entry, &blp->blp_list); - wake_up(&blp->blp_waitq); + cfs_waitq_signal(&blp->blp_waitq); spin_unlock(&blp->blp_lock); RETURN(0); @@ -1462,20 +1449,14 @@ static int ldlm_bl_thread_main(void *arg) { struct ldlm_bl_thread_data *bltd = arg; struct ldlm_bl_pool *blp = bltd->bltd_blp; - unsigned long flags; ENTRY; - /* XXX boiler-plate */ { - char name[sizeof(current->comm)]; + char name[CFS_CURPROC_COMM_MAX]; snprintf(name, sizeof(name) - 1, "ldlm_bl_%02d", bltd->bltd_num); - libcfs_daemonize(name); + cfs_daemonize(name); } - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); atomic_inc(&blp->blp_num_threads); complete(&blp->blp_comp); @@ -1509,20 +1490,22 @@ static int ldlm_cleanup(int force); int ldlm_get_ref(void) { int rc = 0; - down(&ldlm_ref_sem); + ENTRY; + mutex_down(&ldlm_ref_sem); if (++ldlm_refcount == 1) { rc = ldlm_setup(); if (rc) ldlm_refcount--; } - up(&ldlm_ref_sem); + mutex_up(&ldlm_ref_sem); RETURN(rc); } void ldlm_put_ref(int force) { - down(&ldlm_ref_sem); + ENTRY; + mutex_down(&ldlm_ref_sem); if (ldlm_refcount == 1) { int rc = ldlm_cleanup(force); if (rc) @@ -1532,7 +1515,7 @@ void ldlm_put_ref(int force) } else { ldlm_refcount--; } - up(&ldlm_ref_sem); + mutex_up(&ldlm_ref_sem); EXIT; } @@ -1589,10 +1572,10 @@ static int ldlm_setup(void) ldlm_state->ldlm_bl_pool = blp; atomic_set(&blp->blp_num_threads, 0); - init_waitqueue_head(&blp->blp_waitq); + cfs_waitq_init(&blp->blp_waitq); spin_lock_init(&blp->blp_lock); - INIT_LIST_HEAD(&blp->blp_list); + CFS_INIT_LIST_HEAD(&blp->blp_list); #ifdef __KERNEL__ for (i = 0; i < LDLM_NUM_THREADS; i++) { @@ -1601,7 +1584,7 @@ static int ldlm_setup(void) .bltd_blp = blp, }; init_completion(&blp->blp_comp); - rc = kernel_thread(ldlm_bl_thread_main, &bltd, 0); + rc = cfs_kernel_thread(ldlm_bl_thread_main, &bltd, 0); if (rc < 0) { CERROR("cannot start LDLM thread #%d: rc %d\n", i, rc); GOTO(out_thread, rc); @@ -1619,17 +1602,18 @@ static int ldlm_setup(void) if (rc) GOTO(out_thread, rc); - INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); + CFS_INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); expired_lock_thread.elt_state = ELT_STOPPED; - init_waitqueue_head(&expired_lock_thread.elt_waitq); + cfs_waitq_init(&expired_lock_thread.elt_waitq); - INIT_LIST_HEAD(&waiting_locks_list); + CFS_INIT_LIST_HEAD(&waiting_locks_list); spin_lock_init(&waiting_locks_spinlock); - waiting_locks_timer.function = waiting_locks_callback; - waiting_locks_timer.data = 0; - init_timer(&waiting_locks_timer); + cfs_timer_init(&waiting_locks_timer, waiting_locks_callback, 0); - rc = kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS); + /* Using CLONE_FILES instead of CLONE_FS here causes failures in + conf-sanity test 21. But using CLONE_FS can cause problems + if the daemonize happens between push/pop_ctxt... */ + rc = cfs_kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS); if (rc < 0) { CERROR("Cannot start ldlm expired-lock thread: %d\n", rc); GOTO(out_thread, rc); @@ -1678,7 +1662,7 @@ static int ldlm_cleanup(int force) spin_lock(&blp->blp_lock); list_add_tail(&blwi.blwi_entry, &blp->blp_list); - wake_up(&blp->blp_waitq); + cfs_waitq_signal(&blp->blp_waitq); spin_unlock(&blp->blp_lock); wait_for_completion(&blp->blp_comp); @@ -1690,7 +1674,7 @@ static int ldlm_cleanup(int force) ldlm_proc_cleanup(); expired_lock_thread.elt_state = ELT_TERMINATE; - wake_up(&expired_lock_thread.elt_waitq); + cfs_waitq_signal(&expired_lock_thread.elt_waitq); wait_event(expired_lock_thread.elt_waitq, expired_lock_thread.elt_state == ELT_STOPPED); #else @@ -1706,17 +1690,19 @@ static int ldlm_cleanup(int force) int __init ldlm_init(void) { - ldlm_resource_slab = kmem_cache_create("ldlm_resources", + init_mutex(&ldlm_ref_sem); + init_mutex(&ldlm_namespace_lock); + ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources", sizeof(struct ldlm_resource), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN); if (ldlm_resource_slab == NULL) return -ENOMEM; - ldlm_lock_slab = kmem_cache_create("ldlm_locks", + ldlm_lock_slab = cfs_mem_cache_create("ldlm_locks", sizeof(struct ldlm_lock), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN); if (ldlm_lock_slab == NULL) { - kmem_cache_destroy(ldlm_resource_slab); + cfs_mem_cache_destroy(ldlm_resource_slab); return -ENOMEM; } @@ -1727,12 +1713,14 @@ int __init ldlm_init(void) void __exit ldlm_exit(void) { - if ( ldlm_refcount ) + int rc; + + if (ldlm_refcount) CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount); - LASSERTF(kmem_cache_destroy(ldlm_resource_slab) == 0, - "couldn't free ldlm resource slab\n"); - LASSERTF(kmem_cache_destroy(ldlm_lock_slab) == 0, - "couldn't free ldlm lock slab\n"); + rc = cfs_mem_cache_destroy(ldlm_resource_slab); + LASSERTF(rc == 0, "couldn't free ldlm resource slab\n"); + rc = cfs_mem_cache_destroy(ldlm_lock_slab); + LASSERTF(rc == 0, "couldn't free ldlm lock slab\n"); } /* ldlm_extent.c */ diff --git a/lustre/ldlm/ldlm_plain.c b/lustre/ldlm/ldlm_plain.c index 63026f5..38b0c40 100644 --- a/lustre/ldlm/ldlm_plain.c +++ b/lustre/ldlm/ldlm_plain.c @@ -27,9 +27,9 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -#include -#include -#include +#include +#include +#include #else #include #endif @@ -81,7 +81,7 @@ int ldlm_process_plain_lock(struct ldlm_lock *lock, int *flags, int first_enq, ldlm_error_t *err) { struct ldlm_resource *res = lock->l_resource; - struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); + struct list_head rpc_list = CFS_LIST_HEAD_INIT(rpc_list); int rc; ENTRY; @@ -104,7 +104,6 @@ int ldlm_process_plain_lock(struct ldlm_lock *lock, int *flags, int first_enq, restart: LASSERT(res->lr_tmp == NULL); res->lr_tmp = &rpc_list; - rc = ldlm_plain_compat_queue(&res->lr_granted, lock, 1); rc += ldlm_plain_compat_queue(&res->lr_waiting, lock, 1); res->lr_tmp = NULL; diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index ee89422..bf2f655 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -28,9 +28,9 @@ #include #endif -#include -#include -#include +#include +#include +#include #include "ldlm_internal.h" @@ -50,15 +50,16 @@ int ldlm_expired_completion_wait(void *data) struct obd_import *imp; struct obd_device *obd; + ENTRY; if (lock->l_conn_export == NULL) { - static unsigned long next_dump = 0, last_dump = 0; + static cfs_time_t next_dump = 0, last_dump = 0; LDLM_ERROR(lock, "lock timed out (enq %lus ago); not entering " "recovery in server code, just going back to sleep", lock->l_enqueued_time.tv_sec); - if (time_after(jiffies, next_dump)) { + if (cfs_time_after(cfs_time_current(), next_dump)) { last_dump = next_dump; - next_dump = jiffies + 300 * HZ; + next_dump = cfs_time_shift(300); ldlm_namespace_dump(D_DLMTRACE, lock->l_resource->lr_namespace); if (last_dump == 0) @@ -95,7 +96,7 @@ int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data) if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV))) { - wake_up(&lock->l_waitq); + cfs_waitq_signal(&lock->l_waitq); RETURN(0); } @@ -115,11 +116,10 @@ noreproc: lwd.lwd_lock = lock; if (unlikely(flags & LDLM_FL_NO_TIMEOUT)) { - LDLM_DEBUG(lock, "waiting indefinitely because CW lock was" - " met\n"); + LDLM_DEBUG(lock, "waiting indefinitely because of NO_TIMEOUT"); lwi = LWI_INTR(interrupted_completion_wait, &lwd); } else { - lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(obd_timeout), ldlm_expired_completion_wait, interrupted_completion_wait, &lwd); } @@ -450,9 +450,11 @@ int ldlm_cli_enqueue(struct obd_export *exp, /* lock enqueued on the server */ cleanup_phase = 1; + l_lock(&ns->ns_lock); lock->l_remote_handle = reply->lock_handle; *flags = reply->lock_flags; lock->l_flags |= reply->lock_flags & LDLM_INHERIT_FLAGS; + l_unlock(&ns->ns_lock); CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: 0x%x\n", lock, reply->lock_handle.cookie, *flags); @@ -752,7 +754,7 @@ int ldlm_cancel_lru(struct ldlm_namespace *ns, ldlm_sync_t sync) { struct ldlm_lock *lock, *next; int count, rc = 0; - LIST_HEAD(cblist); + CFS_LIST_HEAD(cblist); ENTRY; #ifndef __KERNEL__ @@ -818,7 +820,7 @@ static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, void *opaque) { struct ldlm_resource *res; - struct list_head *tmp, *next, list = LIST_HEAD_INIT(list); + struct list_head *tmp, *next, list = CFS_LIST_HEAD_INIT(list); struct ldlm_ast_work *w; ENTRY; @@ -1045,6 +1047,7 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, { int i, rc = LDLM_ITER_CONTINUE; + ENTRY; l_lock(&ns->ns_lock); for (i = 0; i < RES_HASH_SIZE; i++) { struct list_head *tmp, *next; @@ -1098,7 +1101,9 @@ static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) /* we use l_pending_chain here, because it's unused on clients. */ LASSERTF(list_empty(&lock->l_pending_chain),"lock %p next %p prev %p\n", lock, &lock->l_pending_chain.next,&lock->l_pending_chain.prev); - list_add(&lock->l_pending_chain, list); + /* bug 9573: don't replay locks left after eviction */ + if (!(lock->l_flags & LDLM_FL_FAILED)) + list_add(&lock->l_pending_chain, list); return LDLM_ITER_CONTINUE; } @@ -1108,6 +1113,7 @@ static int replay_lock_interpret(struct ptlrpc_request *req, struct ldlm_lock *lock; struct ldlm_reply *reply; + ENTRY; atomic_dec(&req->rq_import->imp_replay_inflight); if (rc != ELDLM_OK) GOTO(out, rc); @@ -1141,6 +1147,7 @@ static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) int buffers = 1; int size[2]; int flags; + ENTRY; /* If this is reply-less callback lock, we cannot replay it, since * server might have long dropped it, but notification of that event was @@ -1212,7 +1219,7 @@ int ldlm_replay_locks(struct obd_import *imp) int rc = 0; ENTRY; - INIT_LIST_HEAD(&list); + CFS_INIT_LIST_HEAD(&list); LASSERT(atomic_read(&imp->imp_replay_inflight) == 0); diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index e2e8fa9..809b87f 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -26,21 +26,21 @@ #define DEBUG_SUBSYSTEM S_LDLM #ifdef __KERNEL__ -# include +# include #else # include #endif -#include +#include #include "ldlm_internal.h" -kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; +cfs_mem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; -DECLARE_MUTEX(ldlm_namespace_lock); -struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list); -struct proc_dir_entry *ldlm_type_proc_dir = NULL; -struct proc_dir_entry *ldlm_ns_proc_dir = NULL; -struct proc_dir_entry *ldlm_svc_proc_dir = NULL; +struct semaphore ldlm_namespace_lock; +struct list_head ldlm_namespace_list = CFS_LIST_HEAD_INIT(ldlm_namespace_list); +cfs_proc_dir_entry_t *ldlm_type_proc_dir = NULL; +cfs_proc_dir_entry_t *ldlm_ns_proc_dir = NULL; +cfs_proc_dir_entry_t *ldlm_svc_proc_dir = NULL; #ifdef LPROCFS static int ldlm_proc_dump_ns(struct file *file, const char *buffer, @@ -242,9 +242,9 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) strcpy(ns->ns_name, name); - INIT_LIST_HEAD(&ns->ns_root_list); + CFS_INIT_LIST_HEAD(&ns->ns_root_list); l_lock_init(&ns->ns_lock); - init_waitqueue_head(&ns->ns_refcount_waitq); + cfs_waitq_init(&ns->ns_refcount_waitq); atomic_set(&ns->ns_refcount, 0); ns->ns_client = client; spin_lock_init(&ns->ns_counter_lock); @@ -252,15 +252,15 @@ struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash; bucket--) - INIT_LIST_HEAD(bucket); + CFS_INIT_LIST_HEAD(bucket); - INIT_LIST_HEAD(&ns->ns_unused_list); + CFS_INIT_LIST_HEAD(&ns->ns_unused_list); ns->ns_nr_unused = 0; ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; - down(&ldlm_namespace_lock); + mutex_down(&ldlm_namespace_lock); list_add(&ns->ns_list_chain, &ldlm_namespace_list); - up(&ldlm_namespace_lock); + mutex_up(&ldlm_namespace_lock); ldlm_proc_namespace(ns); RETURN(ns); @@ -374,12 +374,13 @@ int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int flags) /* Cleanup, but also free, the namespace */ int ldlm_namespace_free(struct ldlm_namespace *ns, int force) { + ENTRY; if (!ns) RETURN(ELDLM_OK); - down(&ldlm_namespace_lock); + mutex_down(&ldlm_namespace_lock); list_del(&ns->ns_list_chain); - up(&ldlm_namespace_lock); + mutex_up(&ldlm_namespace_lock); /* At shutdown time, don't call the cancellation callback */ ldlm_namespace_cleanup(ns, 0); @@ -444,17 +445,17 @@ static struct ldlm_resource *ldlm_resource_new(void) { struct ldlm_resource *res; - OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_NOFS, sizeof *res); + OBD_SLAB_ALLOC(res, ldlm_resource_slab, CFS_ALLOC_IO, sizeof *res); if (res == NULL) return NULL; memset(res, 0, sizeof(*res)); - INIT_LIST_HEAD(&res->lr_children); - INIT_LIST_HEAD(&res->lr_childof); - INIT_LIST_HEAD(&res->lr_granted); - INIT_LIST_HEAD(&res->lr_converting); - INIT_LIST_HEAD(&res->lr_waiting); + CFS_INIT_LIST_HEAD(&res->lr_children); + CFS_INIT_LIST_HEAD(&res->lr_childof); + CFS_INIT_LIST_HEAD(&res->lr_granted); + CFS_INIT_LIST_HEAD(&res->lr_converting); + CFS_INIT_LIST_HEAD(&res->lr_waiting); sema_init(&res->lr_lvb_sem, 1); atomic_set(&res->lr_refcount, 1); @@ -543,12 +544,12 @@ ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, /* Although this is technically a lock inversion risk (lvb_sem * should be taken before DLM lock), this resource was just * created, so nobody else can take the lvb_sem yet. -p */ - down(&res->lr_lvb_sem); + mutex_down(&res->lr_lvb_sem); /* Drop the dlm lock, because lvbo_init can touch the disk */ l_unlock(&ns->ns_lock); OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CREATE_RESOURCE, 2); rc = ns->ns_lvbo->lvbo_init(res); - up(&res->lr_lvb_sem); + mutex_up(&res->lr_lvb_sem); if (rc) CERROR("lvbo_init failed for resource "LPU64"/"LPU64 ": rc %d\n", name.name[0], name.name[1], rc); @@ -623,7 +624,7 @@ int ldlm_resource_putref(struct ldlm_resource *res) if (atomic_dec_and_test(&ns->ns_refcount)) { CDEBUG(D_DLMTRACE, "last ref on ns %s\n", ns->ns_name); - wake_up(&ns->ns_refcount_waitq); + cfs_waitq_signal(&ns->ns_refcount_waitq); } rc = 1; @@ -695,7 +696,7 @@ void ldlm_dump_all_namespaces(int level) { struct list_head *tmp; - down(&ldlm_namespace_lock); + mutex_down(&ldlm_namespace_lock); list_for_each(tmp, &ldlm_namespace_list) { struct ldlm_namespace *ns; @@ -703,7 +704,7 @@ void ldlm_dump_all_namespaces(int level) ldlm_namespace_dump(level, ns); } - up(&ldlm_namespace_lock); + mutex_up(&ldlm_namespace_lock); } void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) @@ -714,7 +715,7 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) atomic_read(&ns->ns_refcount), ns->ns_client); l_lock(&ns->ns_lock); - if (time_after(jiffies, ns->ns_next_dump)) { + if (cfs_time_after(cfs_time_current(), ns->ns_next_dump)) { list_for_each(tmp, &ns->ns_root_list) { struct ldlm_resource *res; res = list_entry(tmp, struct ldlm_resource, lr_childof); @@ -723,7 +724,7 @@ void ldlm_namespace_dump(int level, struct ldlm_namespace *ns) * really dump them recursively. */ ldlm_resource_dump(level, res); } - ns->ns_next_dump = jiffies + 10 * HZ; + ns->ns_next_dump = cfs_time_shift(10); } l_unlock(&ns->ns_lock); } diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c index e944fe5..3ef736a 100644 --- a/lustre/liblustre/dir.c +++ b/lustre/liblustre/dir.c @@ -46,8 +46,8 @@ #undef LIST_HEAD -#ifdef HAVE_LINUX_TYPES_H -#include +#ifdef HAVE_ASM_TYPES_H +#include #elif defined(HAVE_SYS_TYPES_H) #include #endif @@ -83,7 +83,7 @@ static int llu_dir_do_readpage(struct inode *inode, struct page *page) rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_BLOCK_GRANTED, &res_id, LDLM_IBITS, &policy, LCK_CR, &lockh); if (!rc) { - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); + llu_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0); rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, &it, LCK_CR, &op_data, &lockh, NULL, 0, diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 3047dee..a5190f4 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -71,12 +71,12 @@ void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) } } -void llu_prepare_mdc_op_data(struct md_op_data *op_data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode) +void llu_prepare_md_op_data(struct md_op_data *op_data, + struct inode *i1, + struct inode *i2, + const char *name, + int namelen, + int mode) { LASSERT(i1); diff --git a/lustre/liblustre/llite_fid.c b/lustre/liblustre/llite_fid.c index bdc94cc..d1c4c01 100644 --- a/lustre/liblustre/llite_fid.c +++ b/lustre/liblustre/llite_fid.c @@ -48,7 +48,7 @@ #include "lutil.h" #include "llite_lib.h" -#include +#include /* allocates passed fid, that is assigns f_num and f_seq to the @fid */ int llu_fid_md_alloc(struct llu_sb_info *sbi, struct lu_fid *fid) diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c index 5a3ad6c..cdb8f04 100644 --- a/lustre/liblustre/llite_lib.c +++ b/lustre/liblustre/llite_lib.c @@ -48,7 +48,6 @@ #include "lutil.h" #include "llite_lib.h" -#include static int lllib_init(void) { @@ -131,9 +130,9 @@ int liblustre_process_log(struct config_llog_instance *cfg, ocd->ocd_version = LUSTRE_VERSION_CODE; /* Disable initial recovery on this import */ - rc = obd_set_info(obd->obd_self_export, - strlen(KEY_INIT_RECOV), KEY_INIT_RECOV, - sizeof(allow_recov), &allow_recov); + rc = obd_set_info_async(obd->obd_self_export, + strlen(KEY_INIT_RECOV), KEY_INIT_RECOV, + sizeof(allow_recov), &allow_recov, NULL); rc = obd_connect(&mdc_conn, obd, &mdc_uuid, ocd); if (rc) { @@ -243,17 +242,10 @@ int _sysio_lustre_init(void) { int err; char *timeout = NULL; - char *debug_mask = NULL; - char *debug_subsys = NULL; #ifndef INIT_SYSIO extern void __liblustre_cleanup_(void); #endif -#if 0 - libcfs_debug = -1; - libcfs_subsystem_debug = -1; -#endif - liblustre_init_random(); err = lllib_init(); @@ -268,16 +260,6 @@ int _sysio_lustre_init(void) obd_timeout); } - /* debug masks */ - debug_mask = getenv("LIBLUSTRE_DEBUG_MASK"); - if (debug_mask) - libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0); - - debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS"); - if (debug_subsys) - libcfs_subsystem_debug = - (unsigned int) strtol(debug_subsys, NULL, 0); - #ifndef INIT_SYSIO (void)atexit(__liblustre_cleanup_); #endif diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h index 874e6ce..f9e06b7 100644 --- a/lustre/liblustre/llite_lib.h +++ b/lustre/liblustre/llite_lib.h @@ -6,10 +6,12 @@ #define __LLU_H_ #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include @@ -195,12 +197,12 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr); extern struct fssw_ops llu_fssw_ops; /* file.c */ -void llu_prepare_mdc_op_data(struct md_op_data *op_data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode); +void llu_prepare_md_op_data(struct md_op_data *op_data, + struct inode *i1, + struct inode *i2, + const char *name, + int namelen, + int mode); int llu_create(struct inode *dir, struct pnode_base *pnode, int mode); int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); int llu_mdc_close(struct obd_export *md_exp, struct inode *inode); diff --git a/lustre/liblustre/lutil.h b/lustre/liblustre/lutil.h index dc5e6e2..d235eb8 100644 --- a/lustre/liblustre/lutil.h +++ b/lustre/liblustre/lutil.h @@ -23,8 +23,8 @@ #define __LUTIL_H_ #include -#include -#include +#include +#include void liblustre_init_random(void); int liblustre_init_current(char *comm); diff --git a/lustre/liblustre/namei.c b/lustre/liblustre/namei.c index 65d2e46..a35f4b7 100644 --- a/lustre/liblustre/namei.c +++ b/lustre/liblustre/namei.c @@ -270,7 +270,7 @@ static int llu_pb_revalidate(struct pnode *pnode, int flags, it->it_op_release = ll_intent_release; } - llu_prepare_mdc_op_data(&op_data, pnode->p_parent->p_base->pb_ino, + llu_prepare_md_op_data(&op_data, pnode->p_parent->p_base->pb_ino, pb->pb_ino, pb->pb_name.name,pb->pb_name.len,0); rc = md_intent_lock(exp, &op_data, NULL, 0, it, flags, @@ -448,9 +448,9 @@ static int llu_lookup_it(struct inode *parent, struct pnode *pnode, LBUG(); } } - llu_prepare_mdc_op_data(&op_data, parent, NULL, - pnode->p_base->pb_name.name, - pnode->p_base->pb_name.len, flags); + llu_prepare_md_op_data(&op_data, parent, NULL, + pnode->p_base->pb_name.name, + pnode->p_base->pb_name.len, flags); rc = md_intent_lock(llu_i2mdcexp(parent), &op_data, NULL, 0, it, flags, &req, llu_mdc_blocking_ast, diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 34a039a..0193ede 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -51,7 +51,6 @@ #undef LIST_HEAD #include "llite_lib.h" -#include #ifndef MAY_EXEC #define MAY_EXEC 1 @@ -667,7 +666,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) * inode ourselves so we can call obdo_from_inode() always. */ if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { struct lustre_md md; - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); + llu_prepare_md_op_data(&op_data, inode, NULL, NULL, 0, 0); rc = md_setattr(sbi->ll_md_exp, &op_data, attr, NULL, 0, NULL, 0, &request); @@ -863,7 +862,7 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt) CERROR("can't allocate new fid, rc %d\n", err); RETURN(err); } - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + llu_prepare_md_op_data(&op_data, dir, NULL, name, len, 0); err = md_create(sbi->ll_md_exp, &op_data, tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid, current->cap_effective, @@ -993,10 +992,10 @@ static int llu_iop_mknod_raw(struct pnode *pno, RETURN(err); } - llu_prepare_mdc_op_data(&op_data, dir, NULL, - pno->p_base->pb_name.name, - pno->p_base->pb_name.len, - 0); + llu_prepare_md_op_data(&op_data, dir, NULL, + pno->p_base->pb_name.name, + pno->p_base->pb_name.len, + 0); err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, current->cap_effective, dev, &request); @@ -1027,7 +1026,7 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new) LASSERT(dir); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0); + llu_prepare_md_op_data(&op_data, src, dir, name, namelen, 0); rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); @@ -1053,7 +1052,7 @@ static int llu_iop_unlink_raw(struct pnode *pno) LASSERT(target); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + llu_prepare_md_op_data(&op_data, dir, NULL, name, len, 0); rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); if (!rc) rc = llu_objects_destroy(request, dir); @@ -1080,7 +1079,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) LASSERT(tgt); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0); + llu_prepare_md_op_data(&op_data, src, tgt, NULL, 0, 0); rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data, oldname, oldnamelen, newname, newnamelen, &request); @@ -1232,7 +1231,7 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) CERROR("can't allocate new fid, rc %d\n", err); RETURN(err); } - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); + llu_prepare_md_op_data(&op_data, dir, NULL, name, len, 0); err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, current->cap_effective, 0, &request); @@ -1257,7 +1256,7 @@ static int llu_iop_rmdir_raw(struct pnode *pno) (long long)llu_i2stat(dir)->st_ino, llu_i2info(dir)->lli_st_generation, dir); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR); + llu_prepare_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR); rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); ptlrpc_req_finished(request); @@ -1798,8 +1797,8 @@ llu_fsswop_mount(const char *source, CERROR("MDC %s: not setup or attached\n", mdc); GOTO(out_free, err = -EINVAL); } - obd_set_info(obd->obd_self_export, strlen("async"), "async", - sizeof(async), &async); + obd_set_info_async(obd->obd_self_export, strlen("async"), "async", + sizeof(async), &async, NULL); ocd.ocd_connect_flags = OBD_CONNECT_IBITS|OBD_CONNECT_VERSION; ocd.ocd_ibits_known = MDS_INODELOCK_FULL; @@ -1833,8 +1832,8 @@ llu_fsswop_mount(const char *source, CERROR("OSC %s: not setup or attached\n", osc); GOTO(out_mdc, err = -EINVAL); } - obd_set_info(obd->obd_self_export, strlen("async"), "async", - sizeof(async), &async); + obd_set_info_async(obd->obd_self_export, strlen("async"), "async", + sizeof(async), &async, NULL); obd->obd_upcall.onu_owner = &sbi->ll_lco; obd->obd_upcall.onu_upcall = ll_ocd_update; diff --git a/lustre/liblustre/tests/echo_test.c b/lustre/liblustre/tests/echo_test.c index f24f93f..c47f052 100644 --- a/lustre/liblustre/tests/echo_test.c +++ b/lustre/liblustre/tests/echo_test.c @@ -22,9 +22,9 @@ */ #include -#include -#include -#include +#include +#include +#include #define LIBLUSTRE_TEST 1 #include "../utils/lctl.c" diff --git a/lustre/liblustre/tests/sanity.c b/lustre/liblustre/tests/sanity.c index 53e4735..ccab0c3 100644 --- a/lustre/liblustre/tests/sanity.c +++ b/lustre/liblustre/tests/sanity.c @@ -49,6 +49,7 @@ void *buf_alloc; int buf_size; int opt_verbose; +struct timeval start; extern char *lustre_path; @@ -64,17 +65,23 @@ extern char *lustre_path; buf[80] = 0; \ } \ printf("%s", buf); \ + gettimeofday(&start, NULL); \ } while (0) #define LEAVE() \ do { \ - char buf[100]; \ - int len; \ - sprintf(buf, "===== END TEST %s: successfully ", \ - __FUNCTION__); \ - len = strlen(buf); \ + struct timeval stop; \ + char buf[100] = { '\0' }; \ + int len = sizeof(buf) - 1; \ + long usec; \ + gettimeofday(&stop, NULL); \ + usec = (stop.tv_sec - start.tv_sec) * 1000000 + \ + (stop.tv_usec - start.tv_usec); \ + len = snprintf(buf, len, \ + "===== END TEST %s: successfully (%gs)", \ + __FUNCTION__, (double)usec / 1000000); \ if (len < 79) { \ - memset(buf+len, '=', 100-len); \ + memset(buf+len, '=', sizeof(buf) - len); \ buf[79] = '\n'; \ buf[80] = 0; \ } \ @@ -1035,7 +1042,6 @@ int t51(char *name) printf("\n"); LEAVE(); } - /* * check atime update during read */ diff --git a/lustre/llite/Makefile.in b/lustre/llite/Makefile.in index 96a0a49..5059937 100644 --- a/lustre/llite/Makefile.in +++ b/lustre/llite/Makefile.in @@ -1,5 +1,5 @@ MODULES := llite -llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o special.o symlink.o llite_mmap.o xattr.o +llite-objs := dcache.o dir.o file.o llite_close.o llite_lib.o llite_nfs.o llite_fid.o rw.o lproc_llite.o namei.o symlink.o llite_mmap.o xattr.o ifeq ($(PATCHLEVEL),4) llite-objs += rw24.o super.o diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index bc781b4..c91ff99 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -26,11 +26,12 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include "llite_internal.h" @@ -146,7 +147,6 @@ void ll_intent_release(struct lookup_intent *it) void ll_unhash_aliases(struct inode *inode) { struct list_head *tmp, *head; - struct ll_sb_info *sbi; ENTRY; if (inode == NULL) { @@ -157,7 +157,6 @@ void ll_unhash_aliases(struct inode *inode) CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n", inode->i_ino, inode->i_generation, inode); - sbi = ll_i2sbi(inode); head = &inode->i_dentry; restart: spin_lock(&dcache_lock); @@ -207,7 +206,7 @@ restart: #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) __d_drop(dentry); hlist_add_head(&dentry->d_hash, - &sbi->ll_orphan_dentry_list); + &ll_i2sbi(inode)->ll_orphan_dentry_list); #endif } unlock_dentry(dentry); @@ -220,7 +219,6 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request, int offset, struct lookup_intent *it, struct dentry *de) { - struct ll_sb_info *sbi; int rc = 0; ENTRY; @@ -230,8 +228,8 @@ int ll_revalidate_it_finish(struct ptlrpc_request *request, if (it_disposition(it, DISP_LOOKUP_NEG)) RETURN(-ENOENT); - sbi = ll_i2sbi(de->d_inode); - rc = ll_prep_inode(&de->d_inode, request, offset, NULL); + rc = ll_prep_inode(&de->d_inode, + request, offset, NULL); RETURN(rc); } @@ -319,8 +317,8 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, de->d_name.len, 0); rc = md_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags, - &req, ll_mdc_blocking_ast, 0); - /* If req is NULL, then mdc_intent_lock only tried to do a lock match; + &req, ll_md_blocking_ast, 0); + /* If req is NULL, then md_intent_lock only tried to do a lock match; * if all was well, it will return 1 if it found locks, 0 otherwise. */ if (req == NULL && rc >= 0) GOTO(out, rc); @@ -338,6 +336,11 @@ int ll_revalidate_it(struct dentry *de, int lookup_flags, ll_intent_release(it); GOTO(out, rc = 0); } + if ((it->it_op & IT_OPEN) && de->d_inode && + !S_ISREG(de->d_inode->i_mode) && + !S_ISDIR(de->d_inode->i_mode)) { + ll_release_openhandle(de, it); + } rc = 1; /* unfortunately ll_intent_lock may cause a callback and revoke our diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index 8c788ca..1da3a5d 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -41,13 +41,12 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include "llite_internal.h" typedef struct ext2_dir_entry_2 ext2_dirent; @@ -111,9 +110,8 @@ static inline unsigned long dir_pages(struct inode *inode) } -static void ext2_check_page(struct page *page) +static void ext2_check_page(struct inode *dir, struct page *page) { - struct inode *dir = page->mapping->host; unsigned chunk_size = ext2_chunk_size(dir); char *kaddr = page_address(page); // u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count); @@ -219,7 +217,7 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) rc = md_enqueue(ll_i2sbi(dir)->ll_md_exp, LDLM_IBITS, &it, LCK_CR, &op_data, &lockh, NULL, 0, - ldlm_completion_ast, ll_mdc_blocking_ast, dir, + ldlm_completion_ast, ll_md_blocking_ast, dir, 0); request = (struct ptlrpc_request *)it.d.lustre.it_data; @@ -242,7 +240,7 @@ static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) if (!PageUptodate(page)) goto fail; if (!PageChecked(page)) - ext2_check_page(page); + ext2_check_page(dir, page); if (PageError(page)) goto fail; @@ -316,7 +314,7 @@ int ll_readdir(struct file *filp, void *dirent, filldir_t filldir) n, npages, inode->i_size); page = ll_get_dir_page(inode, n); - /* size might have been updated by mdc_readpage */ + /* size might have been updated by md_readpage */ npages = dir_pages(inode); if (IS_ERR(page)) { @@ -424,7 +422,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, filename, namelen, OBD_MD_FLID, 0, &request); if (rc < 0) { - CDEBUG(D_INFO, "mdc_getattr_name: %d\n", rc); + CDEBUG(D_INFO, "md_getattr_name: %d\n", rc); GOTO(out, rc); } @@ -469,7 +467,7 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, if (rc) { ptlrpc_req_finished(request); if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); + CERROR("md_setattr fails: rc = %d\n", rc); return rc; } ptlrpc_req_finished(request); @@ -490,13 +488,13 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), OBD_MD_FLDIREA, lmmsize, &request); if (rc < 0) { - CDEBUG(D_INFO, "mdc_getattr failed: rc = %d\n", rc); + CDEBUG(D_INFO, "md_getattr failed: rc = %d\n", rc); RETURN(rc); } body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); - LASSERT(body != NULL); /* checked by mdc_getattr_name */ - LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */ + LASSERT(body != NULL); /* checked by md_getattr_name */ + LASSERT_REPSWABBED(request, 0);/* swabbed by md_getattr_name */ lmmsize = body->eadatasize; if (lmmsize == 0) @@ -548,14 +546,14 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file, filename, strlen(filename) + 1, OBD_MD_FLEASIZE, lmmsize, &request); if (rc < 0) { - CDEBUG(D_INFO, "mdc_getattr_name failed on %s: rc %d\n", + CDEBUG(D_INFO, "md_getattr_name failed on %s: rc %d\n", filename, rc); GOTO(out_name, rc); } body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body)); - LASSERT(body != NULL); /* checked by mdc_getattr_name */ - LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */ + LASSERT(body != NULL); /* checked by md_getattr_name */ + LASSERT_REPSWABBED(request, 0);/* swabbed by md_getattr_name */ lmmsize = body->eadatasize; @@ -733,13 +731,13 @@ out_free_memmd: oqctl->qc_type = arg; rc = obd_quotacheck(sbi->ll_md_exp, oqctl); if (rc < 0) { - CDEBUG(D_INFO, "mdc_quotacheck failed: rc %d\n", rc); + CDEBUG(D_INFO, "md_quotacheck failed: rc %d\n", rc); error = rc; } rc = obd_quotacheck(sbi->ll_dt_exp, oqctl); if (rc < 0) - CDEBUG(D_INFO, "osc_quotacheck failed: rc %d\n", rc); + CDEBUG(D_INFO, "obd_quotacheck failed: rc %d\n", rc); OBD_FREE_PTR(oqctl); return error ?: rc; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index c17c997..90407ad 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -23,8 +23,9 @@ */ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include +#include +#include +#include #include #include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) @@ -47,25 +48,14 @@ static void ll_file_data_put(struct ll_file_data *fd) OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd); } -int ll_mdc_close(struct obd_export *md_exp, struct inode *inode, - struct file *file) +static int ll_close_inode_openhandle(struct obd_export *md_exp, + struct inode *inode, + struct obd_client_handle *och) { - struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct md_op_data op_data = { { 0 } }; struct ptlrpc_request *req = NULL; - struct obd_client_handle *och = &fd->fd_mds_och; - struct md_op_data op_data; int rc; - ENTRY; - - /* clear group lock, if present */ - if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); - rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, - &fd->fd_cwlockh); - } - memset(&op_data, 0, sizeof(op_data)); op_data.fid1 = ll_i2info(inode)->lli_fid; op_data.valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | @@ -94,15 +84,37 @@ int ll_mdc_close(struct obd_export *md_exp, struct inode *inode, CERROR("inode %lu mdc close failed: rc = %d\n", inode->i_ino, rc); } + if (rc == 0) { - rc = ll_objects_destroy(req, file->f_dentry->d_inode); + rc = ll_objects_destroy(req, inode); if (rc) CERROR("inode %lu ll_objects destroy: rc = %d\n", inode->i_ino, rc); } md_clear_open_replay_data(md_exp, och); - ptlrpc_req_finished(req); + ptlrpc_req_finished(req); /* This is close request */ + + RETURN(rc); +} + +int ll_md_close(struct obd_export *md_exp, struct inode *inode, + struct file *file) +{ + struct ll_file_data *fd = LUSTRE_FPRIVATE(file); + struct obd_client_handle *och = &fd->fd_mds_och; + int rc; + ENTRY; + + /* clear group lock, if present */ + if (unlikely(fd->fd_flags & LL_FILE_GROUP_LOCKED)) { + struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + rc = ll_extent_unlock(fd, inode, lsm, LCK_GROUP, + &fd->fd_cwlockh); + } + + rc = ll_close_inode_openhandle(md_exp, inode, och); och->och_fh.cookie = DEAD_HANDLE_MAGIC; LUSTRE_FPRIVATE(file) = NULL; ll_file_data_put(fd); @@ -141,7 +153,7 @@ int ll_file_release(struct inode *inode, struct file *file) lov_test_and_clear_async_rc(lsm); lli->lli_async_rc = 0; - rc = ll_mdc_close(sbi->ll_md_exp, inode, file); + rc = ll_md_close(sbi->ll_md_exp, inode, file); RETURN(rc); } @@ -164,40 +176,54 @@ static int ll_intent_file_open(struct file *file, void *lmm, rc = md_enqueue(sbi->ll_md_exp, LDLM_IBITS, itp, LCK_PW, &op_data, &lockh, lmm, lmmsize, ldlm_completion_ast, - ll_mdc_blocking_ast, NULL, 0); - if (rc < 0) + ll_md_blocking_ast, NULL, 0); + if (rc < 0) { CERROR("lock enqueue: err: %d\n", rc); + GOTO(out, rc); + } + + rc = ll_prep_inode(&file->f_dentry->d_inode, + (struct ptlrpc_request *)itp->d.lustre.it_data, 1, + NULL); +out: RETURN(rc); } +static void ll_och_fill(struct obd_export *md_exp, struct ll_inode_info *lli, + struct lookup_intent *it, struct obd_client_handle *och) +{ + struct ptlrpc_request *req = it->d.lustre.it_data; + struct mds_body *body; + + LASSERT(och); + + body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*body)); + LASSERT(body != NULL); /* reply already checked out */ + LASSERT_REPSWABBED(req, 1); /* and swabbed in md_enqueue */ + + memcpy(&och->och_fh, &body->handle, sizeof(body->handle)); + och->och_magic = OBD_CLIENT_HANDLE_MAGIC; + lli->lli_io_epoch = body->io_epoch; + + md_set_open_replay_data(md_exp, och, it->d.lustre.it_data); +} + int ll_local_open(struct file *file, struct lookup_intent *it, struct ll_file_data *fd) { - struct ptlrpc_request *req = it->d.lustre.it_data; struct inode *inode = file->f_dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdt_body *body; ENTRY; - body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED (req, 1); /* and swabbed down */ - LASSERT(!LUSTRE_FPRIVATE(file)); LASSERT(fd != NULL); - memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); - fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; + ll_och_fill(ll_i2sbi(inode)->ll_md_exp, + ll_i2info(inode), it, &fd->fd_mds_och); + LUSTRE_FPRIVATE(file) = fd; ll_readahead_init(inode, &fd->fd_ras); - lli->lli_io_epoch = body->io_epoch; - - md_set_open_replay_data(sbi->ll_md_exp, &fd->fd_mds_och, - it->d.lustre.it_data); - RETURN(0); } @@ -241,6 +267,21 @@ int ll_file_open(struct inode *inode, struct file *file) RETURN(-ENOMEM); if (!it || !it->d.lustre.it_disposition) { + /* Convert f_flags into access mode. We cannot use file->f_mode, + * because everything but O_ACCMODE mask was stripped from + * there */ + if ((oit.it_flags + 1) & O_ACCMODE) + oit.it_flags++; + if (oit.it_flags & O_TRUNC) + oit.it_flags |= FMODE_WRITE; + + if (oit.it_flags & O_CREAT) + oit.it_flags |= MDS_OPEN_OWNEROVERRIDE; + + /* We do not want O_EXCL here, presumably we opened the file + * already? XXX - NFS implications? */ + oit.it_flags &= ~O_EXCL; + it = &oit; rc = ll_intent_file_open(file, NULL, 0, it); if (rc) { @@ -900,7 +941,7 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count, /* A glimpse is necessary to determine whether we return a * short read (B) or some zeroes at the end of the buffer (C) */ ll_inode_size_unlock(inode, 1); - retval = ll_glimpse_size(inode, 0); + retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); if (retval) goto out; } else { @@ -1032,6 +1073,10 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES, count); + /* File with no objects, nothing to lock */ + if (!lsm) + RETURN(generic_file_sendfile(in_file, ppos, count, actor, target)); + node = ll_node_from_inode(inode, *ppos, *ppos + count - 1, LCK_PR); tree.lt_fd = LUSTRE_FPRIVATE(in_file); rc = ll_tree_lock(&tree, node, NULL, count, @@ -1066,7 +1111,7 @@ static ssize_t ll_file_sendfile(struct file *in_file, loff_t *ppos,size_t count, /* A glimpse is necessary to determine whether we return a * short read (B) or some zeroes at the end of the buffer (C) */ ll_inode_size_unlock(inode, 1); - retval = ll_glimpse_size(inode, 0); + retval = ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); if (retval) goto out; } else { @@ -1412,7 +1457,7 @@ static int join_file(struct inode *head_inode, struct file *head_filp, rc = md_enqueue(ll_i2mdexp(head_inode), LDLM_IBITS, &oit, LCK_PW, op_data, &lockh, &tsize, 0, ldlm_completion_ast, - ll_mdc_blocking_ast, &hsize, 0); + ll_md_blocking_ast, &hsize, 0); if (rc < 0) GOTO(out, rc); @@ -1443,7 +1488,7 @@ out: static int ll_file_join(struct inode *head, struct file *filp, char *filename_tail) { - struct inode *tail = NULL, *first, *second; + struct inode *tail = NULL, *first = NULL, *second = NULL; struct dentry *tail_dentry; struct file *tail_filp, *first_filp, *second_filp; struct ll_lock_tree first_tree, second_tree; @@ -1533,6 +1578,40 @@ cleanup: RETURN(rc); } +int ll_release_openhandle(struct dentry *dentry, struct lookup_intent *it) +{ + struct inode *inode = dentry->d_inode; + struct obd_client_handle *och; + int rc; + ENTRY; + + LASSERT(inode); + + /* Root ? Do nothing. */ + if (dentry->d_inode->i_sb->s_root == dentry) + RETURN(0); + + /* No open handle to close? Move away */ + if (!it_disposition(it, DISP_OPEN_OPEN)) + RETURN(0); + + OBD_ALLOC(och, sizeof(*och)); + if (!och) + GOTO(out, rc = -ENOMEM); + + ll_och_fill(ll_i2sbi(inode)->ll_md_exp, + ll_i2info(inode), it, och); + + rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, + inode, och); + + OBD_FREE(och, sizeof(*och)); + out: + /* this one is in place of ll_file_open */ + ptlrpc_req_finished(it->d.lustre.it_data); + RETURN(rc); +} + int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -1835,7 +1914,7 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it) ll_prepare_md_op_data(&op_data, inode, inode, NULL, 0, 0); rc = md_intent_lock(sbi->ll_md_exp, &op_data, NULL, 0, &oit, 0, - &req, ll_mdc_blocking_ast, 0); + &req, ll_md_blocking_ast, 0); if (rc < 0) GOTO(out, rc); @@ -1860,8 +1939,8 @@ out: } #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -int ll_getattr(struct vfsmount *mnt, struct dentry *de, - struct lookup_intent *it, struct kstat *stat) +int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, + struct lookup_intent *it, struct kstat *stat) { struct inode *inode = de->d_inode; int res = 0; @@ -1891,6 +1970,12 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, return 0; } +int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) +{ + struct lookup_intent it = { .it_op = IT_GETATTR }; + + return ll_getattr_it(mnt, de, &it, stat); +} #endif static @@ -2011,7 +2096,7 @@ struct inode_operations ll_file_inode_operations = { .setattr = ll_setattr, .truncate = ll_truncate, #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - .getattr_it = ll_getattr, + .getattr_it = ll_getattr_it, #else .revalidate_it = ll_inode_revalidate_it, #endif diff --git a/lustre/llite/llite_close.c b/lustre/llite/llite_close.c index 69c98b2..32da443 100644 --- a/lustre/llite/llite_close.c +++ b/lustre/llite/llite_close.c @@ -25,8 +25,8 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include +#include +#include #include "llite_internal.h" /* record that a write is in flight */ @@ -196,18 +196,12 @@ static int ll_close_thread(void *arg) struct ll_close_queue *lcq = arg; ENTRY; - /* XXX boiler-plate */ { - char name[sizeof(current->comm)]; - unsigned long flags; + char name[CFS_CURPROC_COMM_MAX]; snprintf(name, sizeof(name) - 1, "ll_close"); - libcfs_daemonize(name); - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); + cfs_daemonize(name); } - + complete(&lcq->lcq_comp); while (1) { diff --git a/lustre/llite/llite_fid.c b/lustre/llite/llite_fid.c index 5b28077..e3ffc98 100644 --- a/lustre/llite/llite_fid.c +++ b/lustre/llite/llite_fid.c @@ -28,11 +28,11 @@ #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "llite_internal.h" static int ll_fid_alloc(struct obd_export *exp, struct lu_fid *fid, diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 0e1e113..3ec657a 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -9,10 +9,11 @@ # include # include #endif - -#include #include -#include /* for s2sbi */ + +#include +#include +#include /* for s2sbi */ /* struct lustre_intent_data { @@ -77,11 +78,6 @@ struct ll_inode_info { /* for writepage() only to communicate to fsync */ int lli_async_rc; - struct file_operations *ll_save_ifop; - struct file_operations *ll_save_ffop; - struct file_operations *ll_save_wfop; - struct file_operations *ll_save_wrfop; - struct posix_acl *lli_posix_acl; struct list_head lli_dead_list; @@ -120,6 +116,10 @@ static inline struct ll_inode_info *ll_i2info(struct inode *inode) /* default to about 40meg of readahead on a given system. That much tied * up in 512k readahead requests serviced at 40ms each is about 1GB/s. */ #define SBI_DEFAULT_READAHEAD_MAX (40UL << (20 - PAGE_CACHE_SHIFT)) + +/* default to read-ahead full files smaller than 2MB on the second read */ +#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_CACHE_SHIFT)) + enum ra_stat { RA_STAT_HIT = 0, RA_STAT_MISS, @@ -139,6 +139,7 @@ enum ra_stat { struct ll_ra_info { unsigned long ra_cur_pages; unsigned long ra_max_pages; + unsigned long ra_max_read_ahead_whole_pages; unsigned long ra_stats[_NR_RA_STAT]; }; @@ -213,7 +214,13 @@ struct ll_readahead_state { * case, it probably doesn't make sense to expand window to * PTLRPC_MAX_BRW_PAGES on the third access. */ - unsigned long ras_consecutive; + unsigned long ras_consecutive_pages; + /* + * number of read requests after the last read-ahead window reset + * As window is reset on each seek, this is effectively the number + * on consecutive read request and is used to trigger read-ahead. + */ + unsigned long ras_consecutive_requests; /* * Parameters of current read-ahead window. Handled by * ras_update(). On the initial access to the file or after a seek, @@ -231,6 +238,17 @@ struct ll_readahead_state { */ unsigned long ras_next_readahead; /* + * Total number of ll_file_read requests issued, reads originating + * due to mmap are not counted in this total. This value is used to + * trigger full file read-ahead after multiple reads to a small file. + */ + unsigned long ras_requests; + /* + * Page index with respect to the current request, these value + * will not be accurate when dealing with reads issued via mmap. + */ + unsigned long ras_request_index; + /* * list of struct ll_ra_read's one per read(2) call current in * progress against this file descriptor. Used by read-ahead code, * protected by ->ras_lock. @@ -339,8 +357,8 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir); struct inode *ll_iget(struct super_block *sb, ino_t hash, struct lustre_md *lic); struct dentry *ll_find_alias(struct inode *, struct dentry *); -int ll_mdc_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, - void *data, int flag); +int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, + void *data, int flag); void ll_prepare_md_op_data(struct md_op_data *op_data, struct inode *i1, struct inode *i2, const char *name, int namelen, int mode); @@ -377,11 +395,13 @@ int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *); int ll_glimpse_size(struct inode *inode, int ast_flags); int ll_local_open(struct file *file, struct lookup_intent *it, struct ll_file_data *fd); -int ll_mdc_close(struct obd_export *md_exp, struct inode *inode, - struct file *file); +int ll_release_openhandle(struct dentry *, struct lookup_intent *); +int ll_md_close(struct obd_export *md_exp, struct inode *inode, + struct file *file); #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -int ll_getattr(struct vfsmount *mnt, struct dentry *de, +int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat); +int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); #endif struct ll_file_data *ll_file_data_get(void); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) @@ -434,6 +454,7 @@ int ll_obd_statfs(struct inode *inode, void *arg); int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); /* llite/llite_nfs.c */ +extern struct export_operations lustre_export_operations; __u32 get_uuid2int(const char *name, int len); struct dentry *ll_fh_to_dentry(struct super_block *sb, __u32 *data, int len, int fhtype, int parent); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 69058a9..b37cc06 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -28,13 +28,11 @@ #include #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "llite_internal.h" kmem_cache_t *ll_file_data_slab; @@ -70,6 +68,8 @@ struct ll_sb_info *ll_init_sbi(void) sbi->ll_async_page_max = (num_physpages / 4) * 3; sbi->ll_ra_info.ra_max_pages = min(num_physpages / 8, SBI_DEFAULT_READAHEAD_MAX); + sbi->ll_ra_info.ra_max_read_ahead_whole_pages = + SBI_DEFAULT_READAHEAD_WHOLE_MAX; INIT_LIST_HEAD(&sbi->ll_conn_chain); INIT_HLIST_HEAD(&sbi->ll_orphan_dentry_list); @@ -148,7 +148,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) struct obd_statfs osfs; struct ptlrpc_request *request = NULL; struct lustre_handle osc_conn = {0, }; - struct lustre_handle mdc_conn = {0, }; + struct lustre_handle md_conn = {0, }; struct obd_connect_data *data = NULL; struct lustre_md md; int err; @@ -172,7 +172,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) } /* indicate that inodebits locking is supported by this client */ - data->ocd_connect_flags |= OBD_CONNECT_IBITS; + data->ocd_connect_flags |= OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH; data->ocd_ibits_known = MDS_INODELOCK_FULL; if (sb->s_flags & MS_RDONLY) @@ -193,7 +193,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) /* real client */ data->ocd_connect_flags |= OBD_CONNECT_REAL; - err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data); + err = obd_connect(&md_conn, obd, &sbi->ll_sb_uuid, data); if (err == -EBUSY) { CERROR("An MDT (mdc %s) is performing recovery, of which this" " client is not a part. Please wait for recovery to " @@ -203,7 +203,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) CERROR("cannot connect to %s: rc = %d\n", mdc, err); GOTO(out, err); } - sbi->ll_md_exp = class_conn2export(&mdc_conn); + sbi->ll_md_exp = class_conn2export(&md_conn); err = obd_statfs(obd, &osfs, jiffies - HZ); if (err) @@ -247,11 +247,11 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) obd = class_name2obd(osc); if (!obd) { CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, err); + GOTO(out_mdc, err = -ENODEV); } data->ocd_connect_flags = - OBD_CONNECT_GRANT|OBD_CONNECT_VERSION|OBD_CONNECT_REQPORTAL; + OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL; CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " "ocd_grant: %d\n", data->ocd_connect_flags, @@ -307,6 +307,9 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) sbi->ll_root_fid = rootfid; sb->s_op = &lustre_super_operations; +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + sb->s_export_op = &lustre_export_operations; +#endif /* make root inode * XXX: move this to after cbd setup? */ @@ -315,7 +318,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc) (sbi->ll_flags & LL_SBI_ACL ? OBD_MD_FLACL : 0), 0, &request); if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); + CERROR("md_getattr failed for root: rc = %d\n", err); GOTO(out_osc, err); } @@ -713,7 +716,7 @@ void ll_put_super(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); char *profilenm = get_profile_name(sb); - int next = 0; + int next; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); @@ -724,18 +727,19 @@ void ll_put_super(struct super_block *sb) obd = class_exp2obd(sbi->ll_md_exp); if (obd) { - int next = 0; int force = obd->obd_no_recov; /* We need to set force before the lov_disconnect in lustre_common_put_super, since l_d cleans up osc's as well. */ - while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) + next = 0; + while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) { obd->obd_force = force; } } client_common_put_super(sb); - + + next = 0; while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) { class_manual_cleanup(obd); } @@ -748,7 +752,7 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); - CDEBUG(D_WARNING, "client umount done\n"); + LCONSOLE_WARN("client umount complete\n"); EXIT; } /* client_put_super */ @@ -923,7 +927,6 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), CURRENT_SECONDS); - /* NB: ATTR_SIZE will only be set after this point if the size * resides on the MDS, ie, this file has no objects. */ if (lsm) @@ -941,8 +944,17 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) if (rc) { ptlrpc_req_finished(request); - if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); + if (rc == -ENOENT) { + inode->i_nlink = 0; + /* Unlinked special device node? Or just a race? + * Pretend we done everything. */ + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode) && + !S_ISDIR(inode->i_mode)) + rc = inode_setattr(inode, attr); + } else if (rc != -EPERM && rc != -EACCES) { + CERROR("mdcsetattr fails: rc = %d\n", rc); + } RETURN(rc); } @@ -1061,8 +1073,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) int ll_setattr(struct dentry *de, struct iattr *attr) { - LBUG(); /* code is unused, but leave this in case of VFS changes */ - RETURN(-ENOSYS); + return ll_setattr_raw(de->d_inode, attr); } int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, @@ -1075,7 +1086,7 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age); if (rc) { - CERROR("mdc_statfs fails: rc = %d\n", rc); + CERROR("md_statfs fails: rc = %d\n", rc); RETURN(rc); } @@ -1356,16 +1367,6 @@ void ll_read_inode2(struct inode *inode, void *opaque) #else init_special_inode(inode, inode->i_mode, inode->i_rdev); #endif - lli->ll_save_ifop = inode->i_fop; - - if (S_ISCHR(inode->i_mode)) - inode->i_fop = &ll_special_chr_inode_fops; - else if (S_ISBLK(inode->i_mode)) - inode->i_fop = &ll_special_blk_inode_fops; - else if (S_ISFIFO(inode->i_mode)) - inode->i_fop = &ll_special_fifo_inode_fops; - else if (S_ISSOCK(inode->i_mode)) - inode->i_fop = &ll_special_sock_inode_fops; EXIT; } } @@ -1418,7 +1419,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, } case EXT3_IOC_SETFLAGS: { struct md_op_data op_data = { { 0 } }; - struct iattr attr; + struct ll_iattr_struct attr; struct obdo *oa; struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; @@ -1433,10 +1434,10 @@ int ll_iocontrol(struct inode *inode, struct file *file, memset(&attr, 0x0, sizeof(attr)); attr.ia_attr_flags = flags; - attr.ia_valid |= ATTR_ATTR_FLAG; + ((struct iattr *)&attr)->ia_valid |= ATTR_ATTR_FLAG; rc = md_setattr(sbi->ll_md_exp, &op_data, - &attr, NULL, 0, NULL, 0, &req); + (struct iattr *)&attr, NULL, 0, NULL, 0, &req); if (rc || lsm == NULL) { ptlrpc_req_finished(req); obdo_free(oa); @@ -1453,7 +1454,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, obdo_free(oa); if (rc) { if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); + CERROR("md_setattr fails: rc = %d\n", rc); RETURN(rc); } @@ -1512,6 +1513,7 @@ void ll_umount_begin(struct super_block *sb) EXIT; return; } + obd->obd_no_recov = 1; obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, sizeof ioc_data, &ioc_data, NULL); @@ -1533,8 +1535,9 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { read_only = *flags & MS_RDONLY; - err = obd_set_info(sbi->ll_md_exp, strlen("read-only"), - "read-only", sizeof(read_only), &read_only); + err = obd_set_info_async(sbi->ll_md_exp, strlen("read-only"), + "read-only", sizeof(read_only), + &read_only, NULL); if (err) { CERROR("Failed to change the read-only flag during " "remount: %d\n", err); diff --git a/lustre/llite/llite_mmap.c b/lustre/llite/llite_mmap.c index 8254e98..f0de698 100644 --- a/lustre/llite/llite_mmap.c +++ b/lustre/llite/llite_mmap.c @@ -44,8 +44,8 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include +#include +#include #include "llite_internal.h" #include @@ -407,7 +407,7 @@ struct page *ll_nopage(struct vm_area_struct *vma, unsigned long address, if (pgoff >= size) { lov_stripe_unlock(lsm); - ll_glimpse_size(inode, 0); + ll_glimpse_size(inode, LDLM_FL_BLOCK_GRANTED); } else { /* XXX change inode size without ll_inode_size_lock() held! * there is a race condition with truncate path. (see diff --git a/lustre/llite/llite_nfs.c b/lustre/llite/llite_nfs.c index d458ebf02..fb05f98 100644 --- a/lustre/llite/llite_nfs.c +++ b/lustre/llite/llite_nfs.c @@ -22,7 +22,7 @@ */ #define DEBUG_SUBSYSTEM S_LLITE -#include +#include #include "llite_internal.h" __u32 get_uuid2int(const char *name, int len) @@ -103,7 +103,9 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, { struct inode *inode; struct dentry *result; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) struct list_head *lp; +#endif if (fid_num(fid) == 0) return ERR_PTR(-ESTALE); @@ -120,6 +122,13 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, return ERR_PTR(-ESTALE); } +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + result = d_alloc_anon(inode); + if (!result) { + iput(inode); + return ERR_PTR(-ENOMEM); + } +#else /* now to find a dentry. * If possible, get a well-connected one */ @@ -145,6 +154,7 @@ static struct dentry *ll_iget_for_nfs(struct super_block *sb, } result->d_flags |= DCACHE_DISCONNECTED; +#endif ll_set_dd(result); result->d_op = &ll_d_ops; return result; @@ -234,3 +244,59 @@ int ll_dentry_to_fh(struct dentry *dentry, __u32 *datap, int *lenp, *lenp = 5; return 1; } + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +struct dentry *ll_get_dentry(struct super_block *sb, void *data) +{ + __u32 *inump = (__u32*)data; + struct lu_fid fid; + + /* FIXME: seems this is not enough */ + fid.f_seq = inump[0]; + fid.f_oid = inump[1]; + + return ll_iget_for_nfs(sb, &fid, S_IFREG); +} + +struct dentry *ll_get_parent(struct dentry *dchild) +{ + struct ptlrpc_request *req = NULL; + struct inode *dir = dchild->d_inode; + struct ll_sb_info *sbi; + struct dentry *result = NULL; + struct mds_body *body; + char dotdot[] = ".."; + int rc = 0; + ENTRY; + + LASSERT(dir && S_ISDIR(dir->i_mode)); + + sbi = ll_s2sbi(dir->i_sb); + + rc = md_getattr_name(sbi->ll_md_exp, ll_inode2fid(dir), + dotdot, strlen(dotdot) + 1, + 0, 0, &req); + if (rc) { + CERROR("failure %d inode %lu get parent\n", rc, dir->i_ino); + return ERR_PTR(rc); + } + body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); + + LASSERT((body->valid & OBD_MD_FLGENER) && (body->valid & OBD_MD_FLID)); + + result = ll_iget_for_nfs(dir->i_sb, ll_inode2fid(dir), S_IFDIR); + + if (IS_ERR(result)) + rc = PTR_ERR(result); + + ptlrpc_req_finished(req); + if (rc) + return ERR_PTR(rc); + RETURN(result); +} + +struct export_operations lustre_export_operations = { + .get_parent = ll_get_parent, + .get_dentry = ll_get_dentry, +}; +#endif diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 4cbf3ed..e08a466 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -22,10 +22,10 @@ #define DEBUG_SUBSYSTEM S_LLITE #include -#include -#include +#include +#include #include -#include +#include #include "llite_internal.h" @@ -201,7 +201,7 @@ static int ll_wr_max_readahead_mb(struct file *file, const char *buffer, return rc; if (val < 0 || val > (num_physpages >> (20 - PAGE_CACHE_SHIFT - 1))) { - CERROR("can't set readahead more than %lu MB\n", + CERROR("can't set file readahead more than %lu MB\n", num_physpages >> (20 - PAGE_CACHE_SHIFT - 1)); return -ERANGE; } @@ -213,6 +213,50 @@ static int ll_wr_max_readahead_mb(struct file *file, const char *buffer, return count; } +static int ll_rd_max_read_ahead_whole_mb(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct super_block *sb = data; + struct ll_sb_info *sbi = ll_s2sbi(sb); + unsigned val; + + spin_lock(&sbi->ll_lock); + val = sbi->ll_ra_info.ra_max_read_ahead_whole_pages >> + (20 - PAGE_CACHE_SHIFT); + spin_unlock(&sbi->ll_lock); + + return snprintf(page, count, "%u\n", val); +} + +static int ll_wr_max_read_ahead_whole_mb(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct super_block *sb = data; + struct ll_sb_info *sbi = ll_s2sbi(sb); + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + /* Cap this at the current max readahead window size, the readahead + * algorithm does this anyway so it's pointless to set it larger. */ + if (val < 0 || + val > (sbi->ll_ra_info.ra_max_pages >> (20 - PAGE_CACHE_SHIFT))) { + CERROR("can't set max_read_ahead_whole_mb more than " + "max_read_ahead_mb: %lu\n", + sbi->ll_ra_info.ra_max_pages >> (20 - PAGE_CACHE_SHIFT)); + return -ERANGE; + } + + spin_lock(&sbi->ll_lock); + sbi->ll_ra_info.ra_max_read_ahead_whole_pages = + val << (20 - PAGE_CACHE_SHIFT); + spin_unlock(&sbi->ll_lock); + + return count; +} + static int ll_rd_max_cached_mb(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -280,8 +324,8 @@ static int ll_wr_checksum(struct file *file, const char *buffer, else sbi->ll_flags &= ~LL_SBI_CHECKSUM; - rc = obd_set_info(sbi->ll_dt_exp, strlen("checksum"), "checksum", - sizeof(val), &val); + rc = obd_set_info_async(sbi->ll_dt_exp, strlen("checksum"), "checksum", + sizeof(val), &val, NULL); if (rc) CWARN("Failed to set OSC checksum flags: %d\n", rc); @@ -301,6 +345,8 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { //{ "filegroups", lprocfs_rd_filegroups, 0, 0 }, { "max_read_ahead_mb", ll_rd_max_readahead_mb, ll_wr_max_readahead_mb, 0 }, + { "max_read_ahead_whole_mb", ll_rd_max_read_ahead_whole_mb, + ll_wr_max_read_ahead_whole_mb, 0 }, { "max_cached_mb", ll_rd_max_cached_mb, ll_wr_max_cached_mb, 0 }, { "checksum_pages", ll_rd_checksum, ll_wr_checksum, 0 }, { 0 } diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 8e64ae8..7a0e982 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -29,10 +29,11 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "llite_internal.h" /* methods */ @@ -97,8 +98,8 @@ struct inode *ll_iget(struct super_block *sb, ino_t hash, } #endif -int ll_mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) +int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, int flag) { int rc; struct lustre_handle lockh; @@ -402,7 +403,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, it->it_create_mode &= ~current->fs->umask; rc = md_intent_lock(ll_i2mdexp(parent), &op_data, NULL, 0, it, - lookup_flags, &req, ll_mdc_blocking_ast, 0); + lookup_flags, &req, ll_md_blocking_ast, 0); if (rc < 0) GOTO(out, retval = ERR_PTR(rc)); @@ -413,6 +414,11 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, GOTO(out, retval = ERR_PTR(rc)); } + if ((it->it_op & IT_OPEN) && dentry->d_inode && + !S_ISREG(dentry->d_inode->i_mode) && + !S_ISDIR(dentry->d_inode->i_mode)) { + ll_release_openhandle(dentry, it); + } ll_lookup_finish_locks(it, dentry); if (dentry == save) @@ -514,13 +520,6 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, RETURN(0); } -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) -{ - return ll_create_it(dir, dentry, mode, &nd->intent); -} -#endif - static void ll_update_times(struct ptlrpc_request *request, int offset, struct inode *inode) { @@ -539,17 +538,18 @@ static void ll_update_times(struct ptlrpc_request *request, int offset, LTIME_S(inode->i_ctime) = body->ctime; } -static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) +static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode, + unsigned rdev, struct dentry *dchild) { struct ptlrpc_request *request = NULL; - struct inode *dir = nd->dentry->d_inode; + struct inode *inode = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct md_op_data op_data = { { 0 } }; int err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p) mode %o dev %x\n", - nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir, + name->len, name->name, dir->i_ino, dir->i_generation, dir, mode, rdev); mode &= ~current->fs->umask; @@ -562,15 +562,23 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) case S_IFBLK: case S_IFIFO: case S_IFSOCK: - ll_prepare_md_op_data(&op_data, dir, NULL, - nd->last.name, nd->last.len, 0); - + ll_prepare_md_op_data(&op_data, dir, NULL, name->name, + name->len, 0); err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, current->cap_effective, rdev, &request); - if (err == 0) - ll_update_times(request, 0, dir); - ptlrpc_req_finished(request); + if (err) + break; + ll_update_times(request, 0, dir); + + if (dchild) { + err = ll_prep_inode(&inode, request, 0, + dchild->d_sb); + if (err) + break; + + d_instantiate(dchild, inode); + } break; case S_IFDIR: err = -EPERM; @@ -578,79 +586,29 @@ static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) default: err = -EINVAL; } + ptlrpc_req_finished(request); RETURN(err); } -static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode, - ll_dev_t rdev) +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static int ll_create_nd(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { - struct placement_hint hint = { .ph_pname = NULL, - .ph_cname = &dchild->d_name, - .ph_opc = LUSTRE_OPC_MKNODE }; - - struct ptlrpc_request *request = NULL; - struct inode *inode = NULL; - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct md_op_data op_data = { { 0 } }; - int err; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", - dchild->d_name.len, dchild->d_name.name, - dir->i_ino, dir->i_generation, dir); - - mode &= ~current->fs->umask; - - switch (mode & S_IFMT) { - case 0: - case S_IFREG: - mode |= S_IFREG; /* for mode = 0 case, fallthrough */ - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: - /* allocate new fid */ - err = ll_fid_md_alloc(ll_i2sbi(dir), &op_data.fid2, &hint); - if (err) { - CERROR("can't allocate new fid, rc %d\n", err); - LBUG(); - } - - ll_prepare_md_op_data(&op_data, dir, NULL, dchild->d_name.name, - dchild->d_name.len, 0); - - err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, - current->cap_effective, rdev, &request); - if (err) - GOTO(out_err, err); - - ll_update_times(request, 0, dir); - - err = ll_prep_inode(&inode, request, 0, dchild->d_sb); - if (err) - GOTO(out_err, err); - break; - case S_IFDIR: - RETURN(-EPERM); - break; - default: - RETURN(-EINVAL); + if (!nd || !nd->intent.d.lustre.it_disposition) { + /* No saved request? Just mknod the file */ + return ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry); } - d_instantiate(dchild, inode); - out_err: - ptlrpc_req_finished(request); - RETURN(err); + return ll_create_it(dir, dentry, mode, &nd->intent); } +#endif -static int ll_symlink_raw(struct nameidata *nd, const char *tgt) +static int ll_symlink_generic(struct inode *dir, struct qstr *name, + const char *tgt) { struct placement_hint hint = { .ph_pname = NULL, - .ph_cname = &nd->dentry->d_name, + .ph_cname = name, .ph_opc = LUSTRE_OPC_SYMLINK }; - struct inode *dir = nd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct md_op_data op_data = { { 0 } }; @@ -658,7 +616,7 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt) ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),target=%s\n", - nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, + name->len, name->name, dir->i_ino, dir->i_generation, dir, tgt); /* allocate new fid */ @@ -669,7 +627,7 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt) } ll_prepare_md_op_data(&op_data, dir, NULL, - nd->last.name, nd->last.len, 0); + name->name, name->len, 0); err = md_create(sbi->ll_md_exp, &op_data, tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, @@ -682,10 +640,9 @@ static int ll_symlink_raw(struct nameidata *nd, const char *tgt) RETURN(err); } -static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) +static int ll_link_generic(struct inode *src, struct inode *dir, + struct qstr *name) { - struct inode *src = srcnd->dentry->d_inode; - struct inode *dir = tgtnd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct md_op_data op_data = { { 0 } }; int err; @@ -695,11 +652,10 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) CDEBUG(D_VFSTRACE, "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n", src->i_ino, src->i_generation, src, dir->i_ino, - dir->i_generation, dir, tgtnd->last.len, tgtnd->last.name); - - ll_prepare_md_op_data(&op_data, src, dir, tgtnd->last.name, - tgtnd->last.len, 0); + dir->i_generation, dir, name->len, name->name); + ll_prepare_md_op_data(&op_data, src, dir, name->name, + name->len, 0); err = md_link(sbi->ll_md_exp, &op_data, &request); if (err == 0) ll_update_times(request, 0, dir); @@ -709,20 +665,21 @@ static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) RETURN(err); } +static int ll_mkdir_generic(struct inode *dir, struct qstr *name, int mode, + struct dentry *dchild) -static int ll_mkdir_raw(struct nameidata *nd, int mode) { struct placement_hint hint = { .ph_pname = NULL, - .ph_cname = &nd->dentry->d_name, + .ph_cname = name, .ph_opc = LUSTRE_OPC_MKDIR }; - struct inode *dir = nd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(dir); struct md_op_data op_data = { { 0 } }; + struct inode *inode = NULL; int err; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", - nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); + name->len, name->name, dir->i_ino, dir->i_generation, dir); mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; @@ -734,7 +691,7 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode) } ll_prepare_md_op_data(&op_data, dir, NULL, - nd->last.name, nd->last.len, 0); + name->name, name->len, 0); err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode, current->fsuid, current->fsgid, current->cap_effective, @@ -742,34 +699,45 @@ static int ll_mkdir_raw(struct nameidata *nd, int mode) if (err == 0) ll_update_times(request, 0, dir); + ll_update_times(request, 0, dir); + if (dchild) { + err = ll_prep_inode(&inode, request, 0, + dchild->d_sb); + if (err) + GOTO(out, err); + d_instantiate(dchild, inode); + } + EXIT; +out: ptlrpc_req_finished(request); - RETURN(err); + return err; } -static int ll_rmdir_raw(struct nameidata *nd) +static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent, + struct qstr *name) { - struct inode *dir = nd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct md_op_data op_data = { { 0 } }; struct dentry *dentry; int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", - nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); + name->len, name->name, dir->i_ino, dir->i_generation, dir); /* Check if we have something mounted at the dir we are going to delete * In such a case there would always be dentry present. */ - dentry = d_lookup(nd->dentry, &nd->last); - if (dentry) { - int mounted = d_mountpoint(dentry); - dput(dentry); - if (mounted) - RETURN(-EBUSY); + if (dparent) { + dentry = d_lookup(dparent, name); + if (dentry) { + int mounted = d_mountpoint(dentry); + dput(dentry); + if (mounted) + RETURN(-EBUSY); + } } - - ll_prepare_md_op_data(&op_data, dir, NULL, nd->last.name, - nd->last.len, S_IFDIR); - + + ll_prepare_md_op_data(&op_data, dir, NULL, name->name, + name->len, S_IFDIR); rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, &op_data, &request); if (rc == 0) ll_update_times(request, 0, dir); @@ -851,19 +819,17 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) return rc; } -static int ll_unlink_raw(struct nameidata *nd) +static int ll_unlink_generic(struct inode * dir, struct qstr *name) { - struct inode *dir = nd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct md_op_data op_data = { { 0 } }; int rc; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", - nd->last.len, nd->last.name, dir->i_ino, dir->i_generation, dir); - - ll_prepare_md_op_data(&op_data, dir, NULL, - nd->last.name, nd->last.len, 0); + name->len, name->name, dir->i_ino, dir->i_generation, dir); + ll_prepare_md_op_data(&op_data, dir, NULL, name->name, + name->len, 0); rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, &op_data, &request); if (rc) GOTO(out, rc); @@ -876,25 +842,23 @@ static int ll_unlink_raw(struct nameidata *nd) RETURN(rc); } -static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd) +static int ll_rename_generic(struct inode *src, struct qstr *src_name, + struct inode *tgt, struct qstr *tgt_name) { - struct inode *src = srcnd->dentry->d_inode; - struct inode *tgt = tgtnd->dentry->d_inode; struct ptlrpc_request *request = NULL; struct ll_sb_info *sbi = ll_i2sbi(src); struct md_op_data op_data = { { 0 } }; int err; ENTRY; CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s," - "tgt_dir=%lu/%u(%p)\n", srcnd->last.len, srcnd->last.name, - src->i_ino, src->i_generation, src, tgtnd->last.len, - tgtnd->last.name, tgt->i_ino, tgt->i_generation, tgt); + "tgt_dir=%lu/%u(%p)\n", src_name->len, src_name->name, + src->i_ino, src->i_generation, src, tgt_name->len, + tgt_name->name, tgt->i_ino, tgt->i_generation, tgt); ll_prepare_md_op_data(&op_data, src, tgt, NULL, 0, 0); - err = md_rename(sbi->ll_md_exp, &op_data, - srcnd->last.name, srcnd->last.len, - tgtnd->last.name, tgtnd->last.len, &request); + src_name->name, src_name->len, + tgt_name->name, tgt_name->len, &request); if (!err) { ll_update_times(request, 0, src); ll_update_times(request, 0, tgt); @@ -906,6 +870,75 @@ static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd) RETURN(err); } +static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev) +{ + return ll_mknod_generic(nd->dentry->d_inode, &nd->last, mode,rdev,NULL); +} +static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd) +{ + return ll_rename_generic(srcnd->dentry->d_inode, &srcnd->last, + tgtnd->dentry->d_inode, &tgtnd->last); +} +static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd) +{ + return ll_link_generic(srcnd->dentry->d_inode, tgtnd->dentry->d_inode, + &tgtnd->last); +} +static int ll_symlink_raw(struct nameidata *nd, const char *tgt) +{ + return ll_symlink_generic(nd->dentry->d_inode, &nd->last, tgt); +} +static int ll_rmdir_raw(struct nameidata *nd) +{ + return ll_rmdir_generic(nd->dentry->d_inode, nd->dentry, &nd->last); +} +static int ll_mkdir_raw(struct nameidata *nd, int mode) +{ + return ll_mkdir_generic(nd->dentry->d_inode, &nd->last, mode, NULL); +} +static int ll_unlink_raw(struct nameidata *nd) +{ + return ll_unlink_generic(nd->dentry->d_inode, &nd->last); +} + +static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode, + ll_dev_t rdev) +{ + return ll_mknod_generic(dir, &dchild->d_name, mode, + old_encode_dev(rdev), dchild); +} + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static int ll_unlink(struct inode * dir, struct dentry *dentry) +{ + return ll_unlink_generic(dir, &dentry->d_name); +} +static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + return ll_mkdir_generic(dir, &dentry->d_name, mode, dentry); +} +static int ll_rmdir(struct inode *dir, struct dentry *dentry) +{ + return ll_rmdir_generic(dir, NULL, &dentry->d_name); +} +static int ll_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname) +{ + return ll_symlink_generic(dir, &dentry->d_name, oldname); +} +static int ll_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + return ll_link_generic(old_dentry->d_inode, dir, &new_dentry->d_name); +} +static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + return ll_rename_generic(old_dir, &old_dentry->d_name, new_dir, + &new_dentry->d_name); +} +#endif + struct inode_operations ll_dir_inode_operations = { .link_raw = ll_link_raw, .unlink_raw = ll_unlink_raw, @@ -924,7 +957,16 @@ struct inode_operations ll_dir_inode_operations = { #else .lookup = ll_lookup_nd, .create = ll_create_nd, - .getattr_it = ll_getattr, + .getattr_it = ll_getattr_it, + /* We need all these non-raw things for NFSD, to not patch it. */ + .unlink = ll_unlink, + .mkdir = ll_mkdir, + .rmdir = ll_rmdir, + .symlink = ll_symlink, + .link = ll_link, + .rename = ll_rename, + .setattr = ll_setattr, + .getattr = ll_getattr, #endif .permission = ll_inode_permission, .setxattr = ll_setxattr, @@ -932,3 +974,18 @@ struct inode_operations ll_dir_inode_operations = { .listxattr = ll_listxattr, .removexattr = ll_removexattr, }; + +struct inode_operations ll_special_inode_operations = { + .setattr_raw = ll_setattr_raw, + .setattr = ll_setattr, +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + .getattr_it = ll_getattr_it, +#else + .revalidate_it = ll_inode_revalidate_it, +#endif + .permission = ll_inode_permission, + .setxattr = ll_setxattr, + .getxattr = ll_getxattr, + .listxattr = ll_listxattr, + .removexattr = ll_removexattr, +}; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 8c55247..0bab99e 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -43,8 +43,8 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include +#include +#include #include "llite_internal.h" #include @@ -971,10 +971,12 @@ void ll_ra_accounting(struct ll_async_page *llap, struct address_space *mapping) } #define RAS_CDEBUG(ras) \ - CDEBUG(D_READA, "lrp %lu c %lu ws %lu wl %lu nra %lu\n", \ - ras->ras_last_readpage, ras->ras_consecutive, \ - ras->ras_window_start, ras->ras_window_len, \ - ras->ras_next_readahead); + CDEBUG(D_READA, \ + "lrp %lu cr %lu cp %lu ws %lu wl %lu nra %lu r %lu ri %lu\n", \ + ras->ras_last_readpage, ras->ras_consecutive_requests, \ + ras->ras_consecutive_pages, ras->ras_window_start, \ + ras->ras_window_len, ras->ras_next_readahead, \ + ras->ras_requests, ras->ras_request_index); static int index_in_window(unsigned long index, unsigned long point, unsigned long before, unsigned long after) @@ -1002,9 +1004,13 @@ void ll_ra_read_in(struct file *f, struct ll_ra_read *rar) struct ll_readahead_state *ras; ras = ll_ras_get(f); - rar->lrr_reader = current; spin_lock(&ras->ras_lock); + ras->ras_requests++; + ras->ras_request_index = 0; + ras->ras_consecutive_requests++; + rar->lrr_reader = current; + list_add(&rar->lrr_linkage, &ras->ras_read_beads); spin_unlock(&ras->ras_lock); } @@ -1075,34 +1081,19 @@ static int ll_readahead(struct ll_readahead_state *ras, spin_lock(&ras->ras_lock); bead = ll_ra_read_get_locked(ras); - /* reserve a part of the read-ahead window that we'll be issuing */ + /* Enlarge the RA window to encompass the full read */ + if (bead != NULL && ras->ras_window_start + ras->ras_window_len < + bead->lrr_start + bead->lrr_count) { + ras->ras_window_len = bead->lrr_start + bead->lrr_count - + ras->ras_window_start; + } + /* Reserve a part of the read-ahead window that we'll be issuing */ if (ras->ras_window_len) { start = ras->ras_next_readahead; end = ras->ras_window_start + ras->ras_window_len - 1; } - if (bead != NULL) { - pgoff_t read_end; - - start = max(start, bead->lrr_start); - read_end = bead->lrr_start + bead->lrr_count - 1; - if (ras->ras_consecutive > start - bead->lrr_start + 1) - /* - * if current read(2) is a part of larger sequential - * read, make sure read-ahead is at least to the end - * of the read region. - * - * XXX nikita: This doesn't work when some pages in - * [lrr_start, start] were cached (and, as a result, - * weren't counted in ->ras_consecutive). - */ - end = max(end, read_end); - else - /* - * otherwise, clip read-ahead at the read boundary. - */ - end = read_end; - } if (end != 0) { + /* Truncate RA window to end of file */ end = min(end, (unsigned long)((kms - 1) >> PAGE_CACHE_SHIFT)); ras->ras_next_readahead = max(end, end + 1); RAS_CDEBUG(ras); @@ -1209,10 +1200,11 @@ static void ras_set_start(struct ll_readahead_state *ras, unsigned long index) static void ras_reset(struct ll_readahead_state *ras, unsigned long index) { ras->ras_last_readpage = index; - ras->ras_consecutive = 1; + ras->ras_consecutive_requests = 0; + ras->ras_consecutive_pages = 0; ras->ras_window_len = 0; ras_set_start(ras, index); - ras->ras_next_readahead = ras->ras_window_start; + ras->ras_next_readahead = max(ras->ras_window_start, index); RAS_CDEBUG(ras); } @@ -1221,11 +1213,13 @@ void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras) { spin_lock_init(&ras->ras_lock); ras_reset(ras, 0); + ras->ras_requests = 0; INIT_LIST_HEAD(&ras->ras_read_beads); } -static void ras_update(struct ll_sb_info *sbi, struct ll_readahead_state *ras, - unsigned long index, unsigned hit) +static void ras_update(struct ll_sb_info *sbi, struct inode *inode, + struct ll_readahead_state *ras, unsigned long index, + unsigned hit) { struct ll_ra_info *ra = &sbi->ll_ra_info; int zero = 0; @@ -1252,36 +1246,62 @@ static void ras_update(struct ll_sb_info *sbi, struct ll_readahead_state *ras, ll_ra_stats_inc_unlocked(ra, RA_STAT_MISS_IN_WINDOW); } + /* On the second access to a file smaller than the tunable + * ra_max_read_ahead_whole_pages trigger RA on all pages in the + * file up to ra_max_pages. This is simply a best effort and + * only occurs once per open file. Normal RA behavior is reverted + * to for subsequent IO. The mmap case does not increment + * ras_requests and thus can never trigger this behavior. */ + if (ras->ras_requests == 2 && !ras->ras_request_index) { + __u64 kms_pages; + + kms_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages, + ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages); + + if (kms_pages && + kms_pages <= ra->ra_max_read_ahead_whole_pages) { + ras->ras_window_start = 0; + ras->ras_last_readpage = 0; + ras->ras_next_readahead = 0; + ras->ras_window_len = min(ra->ra_max_pages, + ra->ra_max_read_ahead_whole_pages); + GOTO(out_unlock, 0); + } + } + if (zero) { ras_reset(ras, index); GOTO(out_unlock, 0); } ras->ras_last_readpage = index; - ras->ras_consecutive++; + ras->ras_consecutive_pages++; ras_set_start(ras, index); ras->ras_next_readahead = max(ras->ras_window_start, ras->ras_next_readahead); - /* wait for a few pages to arrive before issuing readahead to avoid - * the worst overutilization */ - if (ras->ras_consecutive == 3) { + /* Trigger RA in the mmap case where ras_consecutive_requests + * is not incremented and thus can't be used to trigger RA */ + if (!ras->ras_window_len && ras->ras_consecutive_pages == 3) { ras->ras_window_len = PTLRPC_MAX_BRW_PAGES; GOTO(out_unlock, 0); } - /* we need to increase the window sometimes. we'll arbitrarily - * do it half-way through the pages in an rpc */ - if ((index & (PTLRPC_MAX_BRW_PAGES - 1)) == - (PTLRPC_MAX_BRW_PAGES >> 1)) { - ras->ras_window_len += PTLRPC_MAX_BRW_PAGES; - ras->ras_window_len = min(ras->ras_window_len, + /* The initial ras_window_len is set to the request size. To avoid + * uselessly reading and discarding pages for random IO the window is + * only increased once per consecutive request received. */ + if (ras->ras_consecutive_requests > 1 && !ras->ras_request_index) { + ras->ras_window_len = min(ras->ras_window_len + + PTLRPC_MAX_BRW_PAGES, ra->ra_max_pages); } EXIT; out_unlock: RAS_CDEBUG(ras); + ras->ras_request_index++; spin_unlock(&ras->ras_lock); spin_unlock(&sbi->ll_lock); return; @@ -1357,6 +1377,17 @@ int ll_readpage(struct file *filp, struct page *page) (((loff_t)page->index) << PAGE_SHIFT)); LASSERT(atomic_read(&filp->f_dentry->d_inode->i_count) > 0); + if (!ll_i2info(inode)->lli_smd) { + /* File with no objects - one big hole */ + /* We use this just for remove_from_page_cache that is not + * exported, we'd make page back up to date. */ + ll_truncate_complete_page(page); + clear_page(page); + SetPageUptodate(page); + unlock_page(page); + RETURN(0); + } + rc = oig_init(&oig); if (rc < 0) GOTO(out, rc); @@ -1370,7 +1401,7 @@ int ll_readpage(struct file *filp, struct page *page) GOTO(out, rc = PTR_ERR(llap)); if (ll_i2sbi(inode)->ll_ra_info.ra_max_pages) - ras_update(ll_i2sbi(inode), &fd->fd_ras, page->index, + ras_update(ll_i2sbi(inode), inode, &fd->fd_ras, page->index, llap->llap_defer_uptodate); if (llap->llap_defer_uptodate) { diff --git a/lustre/llite/rw24.c b/lustre/llite/rw24.c index 1f39574..98c4a4c 100644 --- a/lustre/llite/rw24.c +++ b/lustre/llite/rw24.c @@ -44,8 +44,8 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include +#include +#include #include "llite_internal.h" #include diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 72250af..598c130 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -46,8 +46,8 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include +#include +#include #include "llite_internal.h" #include diff --git a/lustre/llite/special.c b/lustre/llite/special.c deleted file mode 100644 index bf1d707..0000000 --- a/lustre/llite/special.c +++ /dev/null @@ -1,419 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Special file handling for Lustre. - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Wang Di - * Author: Andreas Dilger - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#endif -#include -#include "llite_internal.h" - -#define INODE_OPS 1 -#define FILE_OPS 2 - -static struct file_operations **get_save_fops(struct file* filp, int mode) -{ - struct inode *inode = filp->f_dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - - if (mode == INODE_OPS) { - return &(lli->ll_save_ifop); - } else if (mode == FILE_OPS) { - if (S_ISFIFO(inode->i_mode)) { - switch (filp->f_mode) { - case 1: /*O_RDONLY*/ - return &(lli->ll_save_ffop); - case 2: /*O_WRONLY*/ - return &(lli->ll_save_wfop); - case 3: /* O_RDWR */ - return &(lli->ll_save_wrfop); - default: - return NULL; - } - } - return &(lli->ll_save_ffop); - } else { - CERROR("invalid special file ops %d\n", mode); - LBUG(); - return NULL; - } -} - -static void save_fops(struct file *filp, struct inode *inode, - struct file_operations *sfops) -{ - if (sfops != filp->f_op) { - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - - *pfop = filp->f_op; - if (S_ISCHR(inode->i_mode)) - filp->f_op = &ll_special_chr_file_fops; - else if (S_ISFIFO(inode->i_mode)) - filp->f_op = &ll_special_fifo_file_fops; - } -} - -static ssize_t ll_special_file_read(struct file *filp, char *buf, - size_t count, loff_t *ppos) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->read) - rc = (*pfop)->read(filp, buf, count, ppos); - - RETURN(rc); -} - -static ssize_t ll_special_file_write(struct file *filp, const char *buf, - size_t count, loff_t *ppos) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->write) - rc = (*pfop)->write(filp, buf, count, ppos); - - RETURN(rc); -} - -static int ll_special_file_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = -ENOTTY; - - if (pfop && *pfop && (*pfop)->ioctl) { - struct file_operations *sfops = filp->f_op; - - rc = (*pfop)->ioctl(inode, filp, cmd, arg); - save_fops(filp, inode, sfops); - } - RETURN(rc); -} - -static loff_t ll_special_file_seek(struct file *filp, loff_t offset, int origin) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = 0; - - if (pfop && *pfop && (*pfop)->llseek) - rc = (*pfop)->llseek(filp, offset, origin); - else - rc = default_llseek(filp, offset, origin); - - RETURN(rc); -} - - -#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) - -static unsigned int ll_special_file_poll(struct file *filp, - struct poll_table_struct *poll_table) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = DEFAULT_POLLMASK; - - if (pfop && *pfop && (*pfop)->poll) - rc = (*pfop)->poll(filp, poll_table); - - RETURN(rc); -} - -static int ll_special_file_open(struct inode *inode, struct file *filp) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->open) - rc = (*pfop)->open(inode, filp); - - RETURN(rc); -} - -static ssize_t ll_special_read(struct file *filp, char *buf, size_t count, - loff_t *ppos) -{ - struct file_operations **pfop = get_save_fops(filp, INODE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->read) - rc = (*pfop)->read(filp, buf, count, ppos); - - RETURN(rc); -} - -static ssize_t ll_special_write(struct file *filp, const char *buf, - size_t count, loff_t *ppos) -{ - struct file_operations **pfop = get_save_fops(filp, INODE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->write) - rc = (*pfop)->write(filp, buf, count, ppos); - - RETURN(rc); -} - -#ifdef HAVE_UNLOCKED_IOCTL -static long ll_special_unlocked_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) -{ - struct file_operations **pfop; - int rc = -ENOTTY; - - lock_kernel(); - pfop = get_save_fops(filp, INODE_OPS); - unlock_kernel(); - if (pfop && *pfop && (*pfop)->unlocked_ioctl) { - struct file_operations *sfops = filp->f_op; - - rc = (*pfop)->unlocked_ioctl(filp, cmd, arg); - - /* sometimes, file_operations will be changed in ioctl */ - lock_kernel(); - save_fops(filp, filp->f_dentry->d_inode, sfops); - unlock_kernel(); - } - - RETURN(rc); -} -#endif - -static int ll_special_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - struct file_operations **pfop = get_save_fops(filp, INODE_OPS); - int rc = -ENOTTY; - - if (pfop && *pfop && (*pfop)->ioctl) { - struct file_operations *sfops = filp->f_op; - - rc = (*pfop)->ioctl(inode, filp, cmd, arg); - - /* sometimes, file_operations will be changed in ioctl */ - save_fops(filp, inode, sfops); - } - - RETURN(rc); -} - -static int ll_special_mmap(struct file * filp, struct vm_area_struct * vma) -{ - struct file_operations **pfop = get_save_fops(filp, INODE_OPS); - int rc = -ENODEV; - - if (pfop && *pfop && (*pfop)->mmap) - rc = (*pfop)->mmap(filp, vma); - - RETURN(rc); -} - -static loff_t ll_special_seek(struct file *filp, loff_t offset, int origin) -{ - struct file_operations** pfop = get_save_fops (filp, INODE_OPS); - int rc = 0; - - if (pfop && *pfop && (*pfop)->llseek) - rc = (*pfop)->llseek(filp, offset, origin); - else - rc = default_llseek(filp, offset, origin); - - RETURN(rc); -} - -static int ll_special_fsync(struct file *filp, struct dentry *dentry, int data) -{ - struct file_operations **pfop = get_save_fops(filp, INODE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->fsync) - rc = (*pfop)->fsync(filp, dentry, data); - - RETURN(rc); -} - -static int ll_special_file_fasync(int fd, struct file *filp, int on) -{ - struct file_operations **pfop = get_save_fops(filp, FILE_OPS); - int rc = -EINVAL; - - if (pfop && *pfop && (*pfop)->fasync) - rc = (*pfop)->fasync(fd, filp, on); - - RETURN(rc); -} - -static int ll_special_release_internal(struct inode *inode, struct file *filp, - int mode) -{ - struct file_operations **pfop = get_save_fops(filp, mode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc = 0, err; - ENTRY; - - if (pfop && *pfop) { - if ((*pfop)->release) - rc = (*pfop)->release(inode, filp); - /* FIXME fops_put */ - } - - lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_RELEASE); - - err = ll_mdc_close(sbi->ll_md_exp, inode, filp); - if (err && rc == 0) - rc = err; - - RETURN(rc); -} - -static int ll_special_open(struct inode *inode, struct file *filp) -{ - struct file_operations **pfop = get_save_fops(filp, INODE_OPS); - struct file_operations *sfops = filp->f_op; - struct ptlrpc_request *req; - struct lookup_intent *it; - struct ll_file_data *fd; - int rc = -EINVAL, err; - ENTRY; - - fd = ll_file_data_get(); - if (fd == NULL) - RETURN(-ENOMEM); - - if (pfop && *pfop) { - /* FIXME fops_get */ - if ((*pfop)->open) { - rc = (*pfop)->open(inode, filp); - - /* sometimes file_operations will be changed in open */ - save_fops(filp, inode, sfops); - } - } - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); - - it = filp->f_it; - - err = ll_local_open(filp, it, fd); - if (rc != 0) { - CERROR("error opening special file: rc %d\n", rc); - ll_mdc_close(ll_i2sbi(inode)->ll_md_exp, inode, filp); - } else if (err) { - if (pfop && *pfop && (*pfop)->release) - (*pfop)->release(inode, filp); - /* FIXME fops_put */ - rc = err; - } - - req = it->d.lustre.it_data; - if (req) - ptlrpc_req_finished(req); - - RETURN(rc); -} - -static int ll_special_release(struct inode *inode, struct file *filp) -{ - return ll_special_release_internal(inode, filp, INODE_OPS); -} - -static int ll_special_file_release(struct inode *inode, struct file *filp) -{ - return ll_special_release_internal(inode, filp, FILE_OPS); -} - -struct inode_operations ll_special_inode_operations = { - .setattr_raw = ll_setattr_raw, - .setattr = ll_setattr, -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - .getattr_it = ll_getattr, -#else - .revalidate_it = ll_inode_revalidate_it, -#endif - .permission = ll_inode_permission, - .setxattr = ll_setxattr, - .getxattr = ll_getxattr, - .listxattr = ll_listxattr, - .removexattr = ll_removexattr, -}; - -struct file_operations ll_special_chr_inode_fops = { - .owner = THIS_MODULE, - .open = ll_special_open, -}; - -struct file_operations ll_special_blk_inode_fops = { - .owner = THIS_MODULE, - .read = ll_special_read, - .write = ll_special_write, - .ioctl = ll_special_ioctl, -#ifdef HAVE_UNLOCKED_IOCTL - .unlocked_ioctl = ll_special_unlocked_ioctl, -#endif - .open = ll_special_open, - .release = ll_special_release, - .mmap = ll_special_mmap, - .llseek = ll_special_seek, - .fsync = ll_special_fsync, -}; - -struct file_operations ll_special_fifo_inode_fops = { - .owner = THIS_MODULE, - .open = ll_special_open, -}; - -struct file_operations ll_special_sock_inode_fops = { - .owner = THIS_MODULE, - .open = ll_special_open -}; - -struct file_operations ll_special_chr_file_fops = { - .owner = THIS_MODULE, - .llseek = ll_special_file_seek, - .read = ll_special_file_read, - .write = ll_special_file_write, - .poll = ll_special_file_poll, - .ioctl = ll_special_file_ioctl, - .open = ll_special_file_open, - .release = ll_special_file_release, - .fasync = ll_special_file_fasync, -}; - -struct file_operations ll_special_fifo_file_fops = { - .owner = THIS_MODULE, - .llseek = ll_special_file_seek, - .read = ll_special_file_read, - .write = ll_special_file_write, - .poll = ll_special_file_poll, - .ioctl = ll_special_file_ioctl, - .open = ll_special_file_open, - .release = ll_special_file_release, -}; - diff --git a/lustre/llite/super.c b/lustre/llite/super.c index cd1b232..eb16296 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -27,14 +27,14 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include #include -#include +#include #include "llite_internal.h" #include @@ -60,7 +60,7 @@ static int __init init_lustre_lite(void) { int rc, seed[2]; - printk(KERN_INFO "Lustre: Lustre Lite Client File System; " + printk(KERN_INFO "Lustre: Lustre Client File System; " "info@clusterfs.com\n"); ll_file_data_slab = kmem_cache_create("ll_file_data", sizeof(struct ll_file_data), 0, @@ -83,15 +83,18 @@ static int __init init_lustre_lite(void) static void __exit exit_lustre_lite(void) { + int rc; + lustre_register_client_fill_super(NULL); ll_unregister_cache(&ll_cache_definition); - LASSERTF(kmem_cache_destroy(ll_file_data_slab) == 0, - "couldn't destroy ll_file_data slab\n"); - if (ll_async_page_slab) - LASSERTF(kmem_cache_destroy(ll_async_page_slab) == 0, - "couldn't destroy ll_async_page slab\n"); + rc = kmem_cache_destroy(ll_file_data_slab); + LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n"); + if (ll_async_page_slab) { + rc = kmem_cache_destroy(ll_async_page_slab); + LASSERTF(rc == 0, "couldn't destroy ll_async_page slab\n"); + } if (proc_lustre_fs_root) { lprocfs_remove(proc_lustre_fs_root); diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index f927774..976fcca 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -27,12 +27,12 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include -#include +#include #include "llite_internal.h" static kmem_cache_t *ll_inode_cachep; @@ -79,8 +79,10 @@ int ll_init_inodecache(void) void ll_destroy_inodecache(void) { - LASSERTF(kmem_cache_destroy(ll_inode_cachep) == 0, - "ll_inode_cache: not all structures were freed\n"); + int rc; + + rc = kmem_cache_destroy(ll_inode_cachep); + LASSERTF(rc == 0, "ll_inode_cache: not all structures were freed\n"); } /* exported operations */ @@ -100,7 +102,7 @@ struct super_operations lustre_super_operations = static int __init init_lustre_lite(void) { int rc, seed[2]; - printk(KERN_INFO "Lustre: Lustre Lite Client File System; " + printk(KERN_INFO "Lustre: Lustre Client File System; " "info@clusterfs.com\n"); rc = ll_init_inodecache(); if (rc) @@ -128,16 +130,19 @@ static int __init init_lustre_lite(void) static void __exit exit_lustre_lite(void) { + int rc; + lustre_register_client_fill_super(NULL); ll_unregister_cache(&ll_cache_definition); ll_destroy_inodecache(); - LASSERTF(kmem_cache_destroy(ll_file_data_slab) == 0, - "couldn't destroy ll_file_data slab\n"); - if (ll_async_page_slab) - LASSERTF(kmem_cache_destroy(ll_async_page_slab) == 0, - "couldn't destroy ll_async_page slab\n"); + rc = kmem_cache_destroy(ll_file_data_slab); + LASSERTF(rc == 0, "couldn't destroy ll_file_data slab\n"); + if (ll_async_page_slab) { + rc = kmem_cache_destroy(ll_async_page_slab); + LASSERTF(rc == 0, "couldn't destroy ll_async_page slab\n"); + } if (proc_lustre_fs_root) { lprocfs_remove(proc_lustre_fs_root); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index 2d713bc..990a9c1 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -26,7 +26,7 @@ #include #define DEBUG_SUBSYSTEM S_LLITE -#include +#include #include "llite_internal.h" static int ll_readlink_internal(struct inode *inode, @@ -157,7 +157,7 @@ struct inode_operations ll_fast_symlink_inode_operations = { #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) .revalidate_it = ll_inode_revalidate_it, #else - .getattr_it = ll_getattr, + .getattr_it = ll_getattr_it, #endif .permission = ll_inode_permission, .setxattr = ll_setxattr, diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 2e0e1f3..cbfc0da 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -32,11 +32,11 @@ #define DEBUG_SUBSYSTEM S_LLITE -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "llite_internal.h" @@ -47,15 +47,18 @@ #define XATTR_USER_T (1) #define XATTR_TRUSTED_T (2) #define XATTR_SECURITY_T (3) -#define XATTR_ACL_T (4) -#define XATTR_OTHER_T (5) +#define XATTR_ACL_ACCESS_T (4) +#define XATTR_ACL_DEFAULT_T (5) +#define XATTR_OTHER_T (6) static int get_xattr_type(const char *name) { - if (!strcmp(name, XATTR_NAME_ACL_ACCESS) || - !strcmp(name, XATTR_NAME_ACL_DEFAULT)) - return XATTR_ACL_T; + if (!strcmp(name, XATTR_NAME_ACL_ACCESS)) + return XATTR_ACL_ACCESS_T; + + if (!strcmp(name, XATTR_NAME_ACL_DEFAULT)) + return XATTR_ACL_DEFAULT_T; if (!strncmp(name, XATTR_USER_PREFIX, sizeof(XATTR_USER_PREFIX) - 1)) @@ -75,8 +78,11 @@ int get_xattr_type(const char *name) static int xattr_type_filter(struct ll_sb_info *sbi, int xattr_type) { - if (xattr_type == XATTR_ACL_T && !(sbi->ll_flags & LL_SBI_ACL)) + if ((xattr_type == XATTR_ACL_ACCESS_T || + xattr_type == XATTR_ACL_DEFAULT_T) && + !(sbi->ll_flags & LL_SBI_ACL)) return -EOPNOTSUPP; + if (xattr_type == XATTR_USER_T && !(sbi->ll_flags & LL_SBI_USER_XATTR)) return -EOPNOTSUPP; if (xattr_type == XATTR_TRUSTED_T && !capable(CAP_SYS_ADMIN)) @@ -178,6 +184,26 @@ int ll_getxattr_common(struct inode *inode, const char *name, if (rc) RETURN(rc); + /* posix acl is under protection of LOOKUP lock. when calling to this, + * we just have path resolution to the target inode, so we have great + * chance that cached ACL is uptodate. + */ + if (xattr_type == XATTR_ACL_ACCESS_T) { + struct ll_inode_info *lli = ll_i2info(inode); + struct posix_acl *acl; + + spin_lock(&lli->lli_lock); + acl = posix_acl_dup(lli->lli_posix_acl); + spin_unlock(&lli->lli_lock); + + if (!acl) + RETURN(-ENODATA); + + rc = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + RETURN(rc); + } + do_getxattr: rc = md_getxattr(sbi->ll_md_exp, ll_inode2fid(inode), valid, name, NULL, 0, size, 0, &req); diff --git a/lustre/lmv/lmv_fld.c b/lustre/lmv/lmv_fld.c index 79956a5..106dd44 100644 --- a/lustre/lmv/lmv_fld.c +++ b/lustre/lmv/lmv_fld.c @@ -35,13 +35,13 @@ #include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "lmv_internal.h" /* dummy function for a while */ diff --git a/lustre/lmv/lmv_intent.c b/lustre/lmv/lmv_intent.c index 2667e68..f91bd52 100644 --- a/lustre/lmv/lmv_intent.c +++ b/lustre/lmv/lmv_intent.c @@ -36,17 +36,14 @@ #include #endif -#include -#include -#include -#include -#include -#include -#include -//#include -#include -//#include -//#include +#include +#include +#include +#include +#include +#include +#include +#include #include "lmv_internal.h" static inline void lmv_drop_intent_lock(struct lookup_intent *it) diff --git a/lustre/lmv/lmv_internal.h b/lustre/lmv/lmv_internal.h index 228c3e4..094ae72 100644 --- a/lustre/lmv/lmv_internal.h +++ b/lustre/lmv/lmv_internal.h @@ -22,7 +22,7 @@ #ifndef _LMV_INTERNAL_H_ #define _LMV_INTERNAL_H_ -#include +#include #define LMV_MAX_TGT_COUNT 128 diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index 419320e..6b1f71f 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -34,17 +34,17 @@ #include #else #include -#include +#include #endif #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "lmv_internal.h" /* not defined for liblustre building */ @@ -240,8 +240,8 @@ static void lmv_set_timeouts(struct obd_device *obd) if (tgts->ltd_exp == NULL) continue; - obd_set_info(tgts->ltd_exp, strlen("inter_mds"), - "inter_mds", 0, NULL); + obd_set_info_async(tgts->ltd_exp, strlen("inter_mds"), + "inter_mds", 0, NULL, NULL); } } @@ -1903,8 +1903,9 @@ static int lmv_get_info(struct obd_export *exp, __u32 keylen, RETURN(-EINVAL); } -int lmv_set_info(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val) +int lmv_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) { struct lmv_tgt_desc *tgt; struct obd_device *obd; @@ -1956,7 +1957,7 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen, exp = tgt_obd->obd_self_export; } - err = obd_set_info(exp, keylen, key, vallen, val); + err = obd_set_info_async(exp, keylen, key, vallen, val, set); if (!rc) rc = err; } @@ -1974,8 +1975,9 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen, i++, tgt++) { if (!tgt->ltd_exp) continue; - rc = obd_set_info(tgt->ltd_exp, - keylen, key, vallen, val); + rc = obd_set_info_async(tgt->ltd_exp, + keylen, key, vallen, + val, set); if (rc) RETURN(rc); } @@ -1992,8 +1994,9 @@ int lmv_set_info(struct obd_export *exp, obd_count keylen, RETURN(rc); i = lmv_fld_lookup(obd, fid); - rc = obd_set_info(lmv->tgts[i].ltd_exp, - keylen, key, vallen, val); + rc = obd_set_info_async(lmv->tgts[i].ltd_exp, + keylen, key, vallen, val, + set); RETURN(rc); } @@ -2378,7 +2381,7 @@ struct obd_ops lmv_obd_ops = { .o_llog_init = lmv_llog_init, .o_llog_finish = lmv_llog_finish, .o_get_info = lmv_get_info, - .o_set_info = lmv_set_info, + .o_set_info_async = lmv_set_info_async, .o_packmd = lmv_packmd, .o_unpackmd = lmv_unpackmd, .o_notify = lmv_notify, diff --git a/lustre/lmv/lmv_object.c b/lustre/lmv/lmv_object.c index 8663d57..7740271 100644 --- a/lustre/lmv/lmv_object.c +++ b/lustre/lmv/lmv_object.c @@ -35,17 +35,17 @@ #include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "lmv_internal.h" /* objects cache. */ -extern kmem_cache_t *obj_cache; +extern cfs_mem_cache_t *obj_cache; extern atomic_t obj_cache_count; /* object list and its guard. */ @@ -66,7 +66,7 @@ lmv_obj_alloc(struct obd_device *obd, LASSERT(mea->mea_magic == MEA_MAGIC_LAST_CHAR || mea->mea_magic == MEA_MAGIC_ALL_CHARS); - OBD_SLAB_ALLOC(obj, obj_cache, GFP_NOFS, + OBD_SLAB_ALLOC(obj, obj_cache, CFS_ALLOC_STD, sizeof(*obj)); if (!obj) return NULL; diff --git a/lustre/lmv/lproc_lmv.c b/lustre/lmv/lproc_lmv.c index 3c3bd76..39c62fc 100644 --- a/lustre/lmv/lproc_lmv.c +++ b/lustre/lmv/lproc_lmv.c @@ -22,12 +22,12 @@ #define DEBUG_SUBSYSTEM S_CLASS #include +#include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include -#include +#include +#include #ifndef LPROCFS static struct lprocfs_vars lprocfs_module_vars[] = { {0} }; diff --git a/lustre/lov/Info.plist b/lustre/lov/Info.plist new file mode 100644 index 0000000..006f794 --- /dev/null +++ b/lustre/lov/Info.plist @@ -0,0 +1,41 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + lov + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.lov + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kpi.bsd + 8.0.0b1 + com.apple.kpi.libkern + 8.0.0b1 + com.apple.kpi.mach + 8.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + com.clusterfs.lustre.lvfs + 1.0.0 + com.clusterfs.lustre.obdclass + 1.0.0 + com.clusterfs.lustre.ptlrpc + 1.0.0 + + + diff --git a/lustre/lov/autoMakefile.am b/lustre/lov/autoMakefile.am index f925c2a..583a425 100644 --- a/lustre/lov/autoMakefile.am +++ b/lustre/lov/autoMakefile.am @@ -11,8 +11,36 @@ liblov_a_CFLAGS = $(LLCFLAGS) endif if MODULES +if LINUX modulefs_DATA = lov$(KMODEXT) +endif + +if DARWIN +macos_PROGRAMS := lov + +lov_SOURCES := \ + lov_log.c \ + lov_obd.c \ + lov_pack.c \ + lov_request.c \ + lov_merge.c \ + lov_qos.c \ + lov_offset.c \ + lov_internal.h + +lov_CFLAGS := $(EXTRA_KCFLAGS) +lov_LDFLAGS := $(EXTRA_KLDFLAGS) +lov_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif # DARWIN + endif # MODULES +install-data-hook: $(install_data_hook) + DIST_SOURCES = $(lov-objs:.o=.c) lov_internal.h MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index c08020d..de3dd8d 100755 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -30,14 +30,15 @@ #ifdef __KERNEL__ #include +#include #else #include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include "lov_internal.h" diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h index 68c762f..5829fa9 100644 --- a/lustre/lov/lov_internal.h +++ b/lustre/lov/lov_internal.h @@ -84,7 +84,7 @@ static inline struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm) return NULL; atomic_set(&llh->llh_refcount, 2); llh->llh_stripe_count = lsm->lsm_stripe_count; - INIT_LIST_HEAD(&llh->llh_handle.h_link); + CFS_INIT_LIST_HEAD(&llh->llh_handle.h_link); class_handle_hash(&llh->llh_handle, lov_llh_addref); return llh; } @@ -132,8 +132,8 @@ int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off); /* lov_qos.c */ void qos_shrink_lsm(struct lov_request_set *set); -int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, - int newea); +int qos_prep_create(struct obd_export *exp, struct lov_request_set *set); +void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs); int qos_remedy_create(struct lov_request_set *set, struct lov_request *req); /* lov_request.c */ diff --git a/lustre/lov/lov_log.c b/lustre/lov/lov_log.c index 683b744..454b5a6 100644 --- a/lustre/lov/lov_log.c +++ b/lustre/lov/lov_log.c @@ -30,27 +30,21 @@ #endif #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ -#include -#include -#include -#include -#include -#include -#include +#include #else #include #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "lov_internal.h" diff --git a/lustre/lov/lov_merge.c b/lustre/lov/lov_merge.c index 01c07e3..ff20962 100644 --- a/lustre/lov/lov_merge.c +++ b/lustre/lov/lov_merge.c @@ -28,13 +28,13 @@ #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ -#include +#include #else #include #endif -#include -#include +#include +#include #include "lov_internal.h" @@ -60,7 +60,7 @@ int lov_merge_lvb(struct obd_export *exp, struct lov_stripe_md *lsm, LASSERT_SPIN_LOCKED(&lsm->lsm_lock); #ifdef __KERNEL__ - LASSERT(lsm->lsm_lock_owner == current); + LASSERT(lsm->lsm_lock_owner == cfs_current()); #endif for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; @@ -103,7 +103,7 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, LASSERT_SPIN_LOCKED(&lsm->lsm_lock); #ifdef __KERNEL__ - LASSERT(lsm->lsm_lock_owner == current); + LASSERT(lsm->lsm_lock_owner == cfs_current()); #endif if (shrink) { diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index b78c15f..7688122 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -30,29 +30,23 @@ #endif #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ -#include -#include -#include -#include -#include -#include -#include +#include #else #include #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "lov_internal.h" @@ -65,9 +59,9 @@ static void lov_getref(struct obd_device *obd) struct lov_obd *lov = &obd->u.lov; /* nobody gets through here until lov_putref is done */ - down(&lov->lov_lock); + mutex_down(&lov->lov_lock); atomic_inc(&lov->refcount); - up(&lov->lov_lock); + mutex_up(&lov->lov_lock); return; } @@ -76,7 +70,7 @@ static void __lov_del_obd(struct obd_device *obd, struct lov_tgt_desc *tgt); static void lov_putref(struct obd_device *obd) { struct lov_obd *lov = &obd->u.lov; - down(&lov->lov_lock); + mutex_down(&lov->lov_lock); /* ok to dec to 0 more than once -- ltd_exp's will be null */ if (atomic_dec_and_test(&lov->refcount) && lov->death_row) { struct lov_tgt_desc *tgt; @@ -91,7 +85,7 @@ static void lov_putref(struct obd_device *obd) lov->death_row--; } } - up(&lov->lov_lock); + mutex_up(&lov->lov_lock); } #define MAX_STRING_SIZE 128 @@ -105,12 +99,11 @@ static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt, struct lustre_handle conn = {0, }; struct obd_import *imp; #ifdef __KERNEL__ - struct proc_dir_entry *lov_proc_dir; + cfs_proc_dir_entry_t *lov_proc_dir; #endif int rc; ENTRY; - tgt_obd = class_find_client_obd(tgt_uuid, LUSTRE_OSC_NAME, &obd->obd_uuid); @@ -176,7 +169,7 @@ static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt, lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); if (lov_proc_dir) { struct obd_device *osc_obd = class_conn2obd(&conn); - struct proc_dir_entry *osc_symlink; + cfs_proc_dir_entry_t *osc_symlink; char name[MAX_STRING_SIZE]; LASSERT(osc_obd != NULL); @@ -224,7 +217,7 @@ static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) { - struct proc_dir_entry *lov_proc_dir; + cfs_proc_dir_entry_t *lov_proc_dir; struct obd_device *osc_obd = class_exp2obd(tgt->ltd_exp); struct lov_obd *lov = &obd->u.lov; int rc; @@ -235,7 +228,7 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt) lov_proc_dir = lprocfs_srch(obd->obd_proc_entry, "target_obds"); if (lov_proc_dir) { - struct proc_dir_entry *osc_symlink; + cfs_proc_dir_entry_t *osc_symlink; osc_symlink = lprocfs_srch(lov_proc_dir, osc_obd->obd_name); if (osc_symlink) { @@ -459,9 +452,15 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, RETURN(-ENOMEM); } - memset(tgt, 0, bufsize); if (lov->tgts) { + int i; memcpy(tgt, lov->tgts, lov->bufsize); + LASSERT(index == lov->desc.ld_tgt_count); + for (i = 0; i < index; i++) { + INIT_LIST_HEAD(&tgt[i].qos_bavail_list); + list_splice(&lov->tgts[i].qos_bavail_list, + &tgt[i].qos_bavail_list); + } OBD_FREE(lov->tgts, lov->bufsize); } @@ -481,6 +480,8 @@ static int lov_add_target(struct obd_device *obd, struct obd_uuid *uuidp, tgt->uuid = *uuidp; /* XXX - add a sanity check on the generation number. */ tgt->ltd_gen = gen; + tgt->index = index; + INIT_LIST_HEAD(&tgt->qos_bavail_list); if (index >= lov->desc.ld_tgt_count) lov->desc.ld_tgt_count = index + 1; @@ -625,7 +626,8 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) struct lprocfs_static_vars lvars; struct lov_desc *desc; struct lov_obd *lov = &obd->u.lov; - int count; + struct lov_tgt_desc *tgts; + int count, i; ENTRY; if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { @@ -675,18 +677,22 @@ static int lov_setup(struct obd_device *obd, struct lustre_cfg *lcfg) CERROR("Out of memory\n"); RETURN(-EINVAL); } - memset(lov->tgts, 0, lov->bufsize); + for (i = 0, tgts = lov->tgts; i < max(count, 1); i++, tgts++) { + tgts->index = i; + INIT_LIST_HEAD(&tgts->qos_bavail_list); + } desc->ld_active_tgt_count = 0; lov->desc = *desc; sema_init(&lov->lov_lock, 1); atomic_set(&lov->refcount, 0); + INIT_LIST_HEAD(&lov->qos_bavail_list); lprocfs_init_vars(lov, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); #ifdef LPROCFS { - struct proc_dir_entry *entry; + cfs_proc_dir_entry_t *entry; entry = create_proc_entry("target_obd", 0444, obd->obd_proc_entry); @@ -947,7 +953,9 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, { struct lov_obd *lov; struct lov_request_set *set = NULL; - struct list_head *pos; + struct obd_statfs osfs; + unsigned long maxage; + struct lov_request *req; int rc = 0; ENTRY; @@ -972,14 +980,14 @@ static int lov_create(struct obd_export *exp, struct obdo *src_oa, RETURN(rc); } + maxage = cfs_time_shift(-lov->desc.ld_qos_maxage); + obd_statfs(exp->exp_obd, &osfs, maxage); + rc = lov_prep_create_set(exp, ea, src_oa, oti, &set); if (rc) RETURN(rc); - list_for_each (pos, &set->set_list) { - struct lov_request *req = - list_entry(pos, struct lov_request, rq_link); - + list_for_each_entry(req, &set->set_list, rq_link) { /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ rc = obd_create(lov->tgts[req->rq_idx].ltd_exp, req->rq_oa, &req->rq_md, oti); @@ -1521,7 +1529,7 @@ static struct obd_async_page_ops lov_async_page_ops = { }; int lov_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, - struct lov_oinfo *loi, struct page *page, + struct lov_oinfo *loi, cfs_page_t *page, obd_off offset, struct obd_async_page_ops *ops, void *data, void **res) { @@ -1958,7 +1966,7 @@ static int lov_join_lru(struct obd_export *exp, } while(0) static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, - unsigned long max_age) + cfs_time_t max_age) { struct lov_obd *lov = &obd->u.lov; struct obd_statfs lov_sfs; @@ -1983,6 +1991,7 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs, rc = err; continue; } + qos_update(lov, i, &lov_sfs); if (!set) { memcpy(osfs, &lov_sfs, sizeof(lov_sfs)); @@ -2211,14 +2220,22 @@ out: RETURN(rc); } -static int lov_set_info(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val) +static int lov_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) { struct obd_device *obddev = class_exp2obd(exp); struct lov_obd *lov = &obddev->u.lov; int i, rc = 0, err; + int no_set = !set; ENTRY; + if (no_set) { + set = ptlrpc_prep_set(); + if (!set) + RETURN(-ENOMEM); + } + if (KEY_IS(KEY_NEXT_ID)) { if (vallen > lov->desc.ld_tgt_count) RETURN(-EINVAL); @@ -2234,8 +2251,9 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, continue; /* hit all OSCs, even inactive ones */ - err = obd_set_info(lov->tgts[i].ltd_exp, keylen, key, - vallen, ((obd_id*)val) + i); + err = obd_set_info_async(lov->tgts[i].ltd_exp, keylen, + key, vallen, + ((obd_id*)val) + i, set); if (!rc) rc = err; } @@ -2248,8 +2266,8 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, if (!lov->tgts[i].ltd_exp || !lov->tgts[i].active) continue; - err = obd_set_info(lov->tgts[i].ltd_exp, keylen, key, - vallen, val); + err = obd_set_info_async(lov->tgts[i].ltd_exp, keylen, + key, vallen, val, set); if (!rc) rc = err; } @@ -2274,13 +2292,19 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen, if (!val && !lov->tgts[i].active) continue; - err = obd_set_info(lov->tgts[i].ltd_exp, - keylen, key, vallen, val); + err = obd_set_info_async(lov->tgts[i].ltd_exp, + keylen, key, vallen, val, set); if (!rc) rc = err; } out: lov_putref(obddev); + if (no_set) { + err = ptlrpc_set_wait(set); + if (!rc) + rc = err; + ptlrpc_set_destroy(set); + } RETURN(rc); } @@ -2406,16 +2430,16 @@ int lov_complete_many(struct obd_export *exp, struct lov_stripe_md *lsm, void lov_stripe_lock(struct lov_stripe_md *md) { - LASSERT(md->lsm_lock_owner != current); + LASSERT(md->lsm_lock_owner != cfs_current()); spin_lock(&md->lsm_lock); LASSERT(md->lsm_lock_owner == NULL); - md->lsm_lock_owner = current; + md->lsm_lock_owner = cfs_current(); } EXPORT_SYMBOL(lov_stripe_lock); void lov_stripe_unlock(struct lov_stripe_md *md) { - LASSERT(md->lsm_lock_owner == current); + LASSERT(md->lsm_lock_owner == cfs_current()); md->lsm_lock_owner = NULL; spin_unlock(&md->lsm_lock); } @@ -2459,7 +2483,7 @@ struct obd_ops lov_obd_ops = { .o_join_lru = lov_join_lru, .o_iocontrol = lov_iocontrol, .o_get_info = lov_get_info, - .o_set_info = lov_set_info, + .o_set_info_async = lov_set_info_async, .o_llog_init = lov_llog_init, .o_llog_finish = lov_llog_finish, .o_notify = lov_notify, @@ -2480,7 +2504,7 @@ int __init lov_init(void) init_obd_quota_ops(quota_interface, &lov_obd_ops); rc = class_register_type(&lov_obd_ops, NULL, lvars.module_vars, - OBD_LOV_DEVICENAME, NULL); + LUSTRE_LOV_NAME, NULL); if (rc && quota_interface) PORTAL_SYMBOL_PUT(osc_quota_interface); @@ -2493,13 +2517,12 @@ static void /*__exit*/ lov_exit(void) if (quota_interface) PORTAL_SYMBOL_PUT(lov_quota_interface); - class_unregister_type(OBD_LOV_DEVICENAME); + class_unregister_type(LUSTRE_LOV_NAME); } MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver"); MODULE_LICENSE("GPL"); -module_init(lov_init); -module_exit(lov_exit); +cfs_module(lov, "1.0.0", lov_init, lov_exit); #endif diff --git a/lustre/lov/lov_offset.c b/lustre/lov/lov_offset.c index d6e83c3..22af87e 100644 --- a/lustre/lov/lov_offset.c +++ b/lustre/lov/lov_offset.c @@ -28,13 +28,13 @@ #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ -#include +#include #else #include #endif -#include -#include +#include +#include #include "lov_internal.h" diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index f33e24b..7ad2745 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -28,11 +28,11 @@ #include #endif -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include "lov_internal.h" @@ -148,6 +148,7 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, RETURN(lmm_size); } +/* Find the max stripecount we should use */ int lov_get_stripecnt(struct lov_obd *lov, int stripe_count) { if (!stripe_count) @@ -363,6 +364,7 @@ int lov_setea(struct obd_export *exp, struct lov_stripe_md **lsmp, struct lov_obd *lov = &exp->exp_obd->u.lov; obd_id last_id = 0; + ENTRY; for (i = 0; i < lump->lmm_stripe_count; i++) { __u32 len = sizeof(last_id); oexp = lov->tgts[lump->lmm_objects[i].l_ost_idx].ltd_exp; diff --git a/lustre/lov/lov_qos.c b/lustre/lov/lov_qos.c index bde768b..84be134 100644 --- a/lustre/lov/lov_qos.c +++ b/lustre/lov/lov_qos.c @@ -28,12 +28,13 @@ #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ +#include #else #include #endif -#include -#include +#include +#include #include "lov_internal.h" @@ -99,7 +100,7 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req) if (stripe >= lsm->lsm_stripe_count) { req->rq_idx = ost_idx; - rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa, + rc = obd_create(lov->tgts[ost_idx].ltd_exp, req->rq_oa, &req->rq_md, set->set_oti); if (!rc) break; @@ -110,73 +111,365 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req) #define LOV_CREATE_RESEED_MULT 4 #define LOV_CREATE_RESEED_MIN 1000 -/* FIXME use real qos data to prepare the lov create request */ -int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea) +/* alloc objects on osts with round-robin algorithm */ +static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt) { - static int ost_start_idx, ost_start_count; + static int ost_start_count, ost_start_idx; unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; unsigned ost_active_count = lov->desc.ld_active_tgt_count; - struct lov_stripe_md *lsm = set->set_md; - struct obdo *src_oa = set->set_oa; - int i, rc = 0; + int i, *idx_pos = idx_arr; ENTRY; - - LASSERT(src_oa->o_valid & OBD_MD_FLID); - - lsm->lsm_object_id = src_oa->o_id; - if (!lsm->lsm_stripe_size) - lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; - if (!lsm->lsm_pattern) { - lsm->lsm_pattern = lov->desc.ld_pattern ? - lov->desc.ld_pattern : LOV_PATTERN_RAID0; + + if (--ost_start_count <= 0) { + ost_start_idx = ll_rand(); + ost_start_count = + (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) + + LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U); + } else if (*stripe_cnt >= lov->desc.ld_active_tgt_count) { + /* If we allocate from all of the stripes, make the + * next file start on the next OST. */ + ++ost_start_idx; } + ost_idx = ost_start_idx % ost_count; - if (newea || lsm->lsm_oinfo[0].loi_ost_idx >= ost_count) { - if (--ost_start_count <= 0) { - ost_start_idx = ll_rand(); - ost_start_count = - (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) + - LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U); - } else if (lsm->lsm_stripe_count >= ost_active_count) { - /* If we allocate from all of the stripes, make the - * next file start on the next OST. */ - ++ost_start_idx; + for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { + ++ost_start_idx; + + if (lov->tgts[ost_idx].active == 0) { + CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); + continue; } - ost_idx = ost_start_idx % ost_count; - } else { - ost_idx = lsm->lsm_oinfo[0].loi_ost_idx; + + *idx_pos = ost_idx; + idx_pos++; + /* got enough ost */ + if (idx_pos - idx_arr == *stripe_cnt) + RETURN(0); } + *stripe_cnt = idx_pos - idx_arr; + RETURN(0); +} - CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n", - lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx); +/* alloc objects on osts with specific stripe offset */ +static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm, + int *idx_arr) +{ + unsigned ost_idx, ost_count = lov->desc.ld_tgt_count; + int i, *idx_pos = idx_arr; + ENTRY; + ost_idx = lsm->lsm_oinfo[0].loi_ost_idx; for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - struct lov_request *req; - - ++ost_start_idx; if (lov->tgts[ost_idx].active == 0) { CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); continue; } + *idx_pos = ost_idx; + idx_pos++; + /* got enough ost */ + if (idx_pos - idx_arr == lsm->lsm_stripe_count) + RETURN(0); + } + /* If we were passed specific striping params, then a failure to + * meet those requirements is an error, since we can't reallocate + * that memory (it might be part of a larger array or something). + * + * We can only get here if lsm_stripe_count was originally > 1. + */ + CERROR("can't lstripe objid "LPX64": have %u want %u\n", + lsm->lsm_object_id, idx_pos - idx_arr, lsm->lsm_stripe_count); + RETURN(-EFBIG); +} + +/* free space OST must have to be used for object allocation. */ +#define QOS_MIN (lov->desc.ld_qos_threshold << 20) + +#define TGT_BAVAIL(tgt) (tgt->ltd_exp->exp_obd->obd_osfs.os_bavail * \ + tgt->ltd_exp->exp_obd->obd_osfs.os_bsize) +#define TGT_FFREE(tgt) (tgt->ltd_exp->exp_obd->obd_osfs.os_ffree) + +/* alloc objects on osts with free space weighted algorithm */ +static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + unsigned ost_count = lov->desc.ld_tgt_count; + __u64 cur_bavail, rand, *availspace, total_bavail = 0; + int *indexes, nfound, good_osts, i, warn = 0, rc = 0; + struct lov_tgt_desc *tgt; + int shift, require_stripes = *stripe_cnt; + static time_t last_warn = 0; + time_t now = cfs_time_current_sec(); + ENTRY; + + availspace = NULL; + indexes = NULL; + OBD_ALLOC(availspace, sizeof(__u64) * ost_count); + OBD_ALLOC(indexes, sizeof(int) * require_stripes); + if (!availspace || !indexes) + GOTO(out_free, rc = -EAGAIN); + + mutex_down(&lov->lov_lock); + + /* if free space is below some threshold, just go + * to do round-robin allocation */ + total_bavail = (exp->exp_obd->obd_osfs.os_bavail * \ + exp->exp_obd->obd_osfs.os_bsize); + if (ost_count < 2 || total_bavail <= QOS_MIN) { + mutex_up(&lov->lov_lock); + GOTO(out_free, rc = -EAGAIN); + } + + /* if each ost has almost same free space, go to + * do rr allocation for better creation performance */ + if (!list_empty(&lov->qos_bavail_list)) { + __u64 max, min, val; + tgt = list_entry(lov->qos_bavail_list.next, + struct lov_tgt_desc, qos_bavail_list); + max = TGT_BAVAIL(tgt); + tgt = list_entry(lov->qos_bavail_list.prev, + struct lov_tgt_desc, qos_bavail_list); + min = TGT_BAVAIL(tgt); + + val = (max >= min) ? (max - min) : (min - max); + min = (min * 13) >> 8; /* less than 5% of gap */ + + if (val < min) { + mutex_up(&lov->lov_lock); + GOTO(out_free, rc = -EAGAIN); + } + } else { + mutex_up(&lov->lov_lock); + GOTO(out_free, rc = -EAGAIN); + } + + total_bavail = 0; + good_osts = 0; + /* warn zero available space/inode every 30 min */ + if (cfs_time_sub(now, last_warn) > 60 * 30) + warn = 1; + /* Find all the OSTs big enough to be stripe candidates */ + list_for_each_entry(tgt, &lov->qos_bavail_list, qos_bavail_list) { + if (!tgt->active) + continue; + if (!TGT_BAVAIL(tgt)) { + if (warn) { + CWARN("no free space on %s\n", + tgt->uuid.uuid); + last_warn = now; + } + continue; + } + if (!TGT_FFREE(tgt)) { + if (warn) { + CWARN("no free inodes on %s\n", + tgt->uuid.uuid); + last_warn = now; + } + continue; + } + /* We can stop if we have enough good osts and our osts + are getting too small */ + if ((TGT_BAVAIL(tgt) <= QOS_MIN) && (good_osts >= *stripe_cnt)) + break; + availspace[good_osts] = TGT_BAVAIL(tgt); + indexes[good_osts] = tgt->index; + total_bavail += availspace[good_osts]; + good_osts++; + } + + mutex_up(&lov->lov_lock); + + if (!total_bavail) + GOTO(out_free, rc = -ENOSPC); + + /* if we don't have enough good OSTs, we reduce the stripe count. */ + if (good_osts < *stripe_cnt) + *stripe_cnt = good_osts; + + if (!*stripe_cnt) + GOTO(out_free, rc = -EAGAIN); + + /* The point of all this shift and rand is to choose a 64-bit + random number between 0 and total_bavail. Apparently '%' doesn't + work for 64bit numbers. */ + nfound = shift = 0; + while ((total_bavail >> shift) > 0) + shift++; + shift++; + /* Find enough OSTs with free space weighted random allocation */ + while (nfound < *stripe_cnt) { + cur_bavail = 0; + + /* If the total storage left is < 4GB, don't use random order, + store in biggest OST first. (Low storage situation.) + Otherwise, choose a 64bit random number... */ + rand = (shift < 32 ? 0ULL : (__u64)ll_rand() << 32) | ll_rand(); + /* ... mask everything above shift... */ + if (shift < 64) + rand &= ((1ULL << shift) - 1); + /* ... and this while should execute at most once... */ + while (rand > total_bavail) + rand -= total_bavail; + /* ... leaving us a 64bit number between 0 and total_bavail. */ + + /* Try to fit in bigger OSTs first. On average, this will + fill more toward the front of the OST array */ + for (i = 0; i < good_osts; i++) { + cur_bavail += availspace[i]; + if (cur_bavail >= rand) { + total_bavail -= availspace[i]; + availspace[i] = 0; + idx_arr[nfound] = indexes[i]; + nfound++; + break; + } + } + /* should never satisfy below condition */ + if (cur_bavail == 0) + break; + } + LASSERT(nfound == *stripe_cnt); + +out_free: + if (availspace) + OBD_FREE(availspace, sizeof(__u64) * ost_count); + if (indexes) + OBD_FREE(indexes, sizeof(int) * require_stripes); + if (rc != -EAGAIN) + /* rc == 0 or err */ + RETURN(rc); + + rc = alloc_rr(lov, idx_arr, stripe_cnt); + RETURN(rc); +} +/* return new alloced stripe count on success */ +static int alloc_idx_array(struct obd_export *exp, struct lov_stripe_md *lsm, + int newea, int **idx_arr, int *arr_cnt) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + int stripe_cnt = lsm->lsm_stripe_count; + int i, rc = 0; + int *tmp_arr = NULL; + ENTRY; + + *arr_cnt = stripe_cnt; + OBD_ALLOC(tmp_arr, *arr_cnt * sizeof(int)); + if (tmp_arr == NULL) + RETURN(-ENOMEM); + for (i = 0; i < *arr_cnt; i++) + tmp_arr[i] = -1; + + if (newea || + lsm->lsm_oinfo[0].loi_ost_idx >= lov->desc.ld_tgt_count) + rc = alloc_qos(exp, tmp_arr, &stripe_cnt); + else + rc = alloc_specific(lov, lsm, tmp_arr); + + if (rc) + GOTO(out_arr, rc); + + *idx_arr = tmp_arr; + RETURN(stripe_cnt); +out_arr: + OBD_FREE(tmp_arr, *arr_cnt * sizeof(int)); + *arr_cnt = 0; + RETURN(rc); +} + +static void free_idx_array(int *idx_arr, int arr_cnt) +{ + if (arr_cnt) + OBD_FREE(idx_arr, arr_cnt * sizeof(int)); +} + +int qos_prep_create(struct obd_export *exp, struct lov_request_set *set) +{ + struct lov_obd *lov = &exp->exp_obd->u.lov; + struct lov_stripe_md *lsm; + struct obdo *src_oa = set->set_oa; + struct obd_trans_info *oti = set->set_oti; + int i, stripes, rc = 0, newea = 0; + int *idx_arr, idx_cnt = 0; + ENTRY; + + LASSERT(src_oa->o_valid & OBD_MD_FLID); + + if (set->set_md == NULL) { + int stripe_cnt = lov_get_stripecnt(lov, 0); + + /* If the MDS file was truncated up to some size, stripe over + * enough OSTs to allow the file to be created at that size. + * This may mean we use more than the default # of stripes. */ + if (src_oa->o_valid & OBD_MD_FLSIZE) { + struct lov_tgt_desc *tgt; + + /* Find the smallest number of stripes we can use + (up to # of active osts). */ + stripes = 1; + mutex_down(&lov->lov_lock); + list_for_each_entry(tgt, &lov->qos_bavail_list, + qos_bavail_list) { + if (!tgt->active) + continue; + /* All earlier tgts have at least this many + bytes available also, since our list is + sorted by size */ + if (TGT_BAVAIL(tgt) * stripes > src_oa->o_size) + break; + stripes++; + } + mutex_up(&lov->lov_lock); + + if (stripes < stripe_cnt) + stripes = stripe_cnt; + } else { + stripes = stripe_cnt; + } + + rc = lov_alloc_memmd(&set->set_md, stripes, + lov->desc.ld_pattern ? + lov->desc.ld_pattern : LOV_PATTERN_RAID0, + LOV_MAGIC); + if (rc < 0) + GOTO(out_err, rc); + rc = 0; + newea = 1; + } + lsm = set->set_md; + + lsm->lsm_object_id = src_oa->o_id; + if (!lsm->lsm_stripe_size) + lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; + if (!lsm->lsm_pattern) { + LASSERT(lov->desc.ld_pattern); + lsm->lsm_pattern = lov->desc.ld_pattern; + } + + stripes = alloc_idx_array(exp, lsm, newea, &idx_arr, &idx_cnt); + LASSERT(stripes <= lsm->lsm_stripe_count); + if (stripes <= 0) + GOTO(out_err, rc = stripes ? stripes : -EIO); + + for (i = 0; i < stripes; i++) { + struct lov_request *req; + int ost_idx = idx_arr[i]; + LASSERT(ost_idx >= 0); + OBD_ALLOC(req, sizeof(*req)); if (req == NULL) - GOTO(out, rc = -ENOMEM); + GOTO(out_err, rc = -ENOMEM); + lov_set_add_req(req, set); req->rq_buflen = sizeof(*req->rq_md); OBD_ALLOC(req->rq_md, req->rq_buflen); - if (req->rq_md == NULL) { - OBD_FREE_PTR(req); - GOTO(out, rc = -ENOMEM); - } - + if (req->rq_md == NULL) + GOTO(out_err, rc = -ENOMEM); + req->rq_oa = obdo_alloc(); - if (req->rq_oa == NULL) { - OBD_FREE_PTR(req->rq_md); - OBD_FREE_PTR(req); - GOTO(out, rc = -ENOMEM); - } - + if (req->rq_oa == NULL) + GOTO(out_err, rc = -ENOMEM); + req->rq_idx = ost_idx; req->rq_stripe = i; /* create data objects with "parent" OA */ @@ -187,41 +480,74 @@ int qos_prep_create(struct lov_obd *lov, struct lov_request_set *set, int newea) * stripe which holds the existing file size. */ if (src_oa->o_valid & OBD_MD_FLSIZE) { - if (lov_stripe_offset(lsm, src_oa->o_size, i, - &req->rq_oa->o_size) < 0 && - req->rq_oa->o_size) - req->rq_oa->o_size--; + req->rq_oa->o_size = + lov_size_to_stripe(lsm, src_oa->o_size, i); CDEBUG(D_INODE, "stripe %d has size "LPU64"/"LPU64"\n", i, req->rq_oa->o_size, src_oa->o_size); } - lov_set_add_req(req, set); - - /* If we have allocated enough objects, we are OK */ - if (set->set_count == lsm->lsm_stripe_count) - GOTO(out, rc = 0); } + LASSERT(set->set_count == stripes); - if (set->set_count == 0) - GOTO(out, rc = -EIO); - - /* If we were passed specific striping params, then a failure to - * meet those requirements is an error, since we can't reallocate - * that memory (it might be part of a larger array or something). - * - * We can only get here if lsm_stripe_count was originally > 1. - */ - if (!newea) { - CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", - lsm->lsm_object_id, set->set_count, - lsm->lsm_stripe_count, rc); - rc = rc ? rc : -EFBIG; - } else { + if (stripes < lsm->lsm_stripe_count) qos_shrink_lsm(set); - rc = 0; + + if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { + oti_alloc_cookies(oti, set->set_count); + if (!oti->oti_logcookies) + GOTO(out_err, rc = -ENOMEM); + set->set_cookies = oti->oti_logcookies; } -out: +out_err: + if (newea && rc) + obd_free_memmd(exp, &set->set_md); + free_idx_array(idx_arr, idx_cnt); + EXIT; + return rc; +} - RETURN(rc); +/* An caveat here is don't use list_move() on same list */ +#define list_adjust(tgt, lov, list_name, value) \ +{ \ + struct list_head *element; \ + struct lov_tgt_desc *tmp; \ + if (list_empty(&(tgt)->list_name)) \ + list_add(&(tgt)->list_name, &(lov)->list_name); \ + element = (tgt)->list_name.next; \ + while((element != &(lov)->list_name) && \ + (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \ + (value(tgt) < value(tmp))) \ + element = element->next; \ + if (element != (tgt)->list_name.next) { \ + list_del_init(&(tgt)->list_name); \ + list_add(&(tgt)->list_name, element->prev); \ + } \ + element = (tgt)->list_name.prev; \ + while ((element != &(lov)->list_name) && \ + (tmp = list_entry(element, struct lov_tgt_desc, list_name)) && \ + (value(tgt) > value(tmp))) \ + element = element->prev; \ + if (element != (tgt)->list_name.prev) { \ + list_del_init(&(tgt)->list_name); \ + list_add_tail(&(tgt)->list_name, element->prev); \ + } \ } + +void qos_update(struct lov_obd *lov, int idx, struct obd_statfs *osfs) +{ + struct lov_tgt_desc *tgt = &lov->tgts[idx]; + __u64 bavail; + ENTRY; + + bavail = osfs->os_bavail * osfs->os_bsize; + if (!bavail) + CWARN("ost %d has zero avail space!\n", idx); + + CDEBUG(D_OTHER, "QOS: bfree now "LPU64"\n", bavail); + + mutex_down(&lov->lov_lock); + list_adjust(tgt, lov, qos_bavail_list, TGT_BAVAIL); + mutex_up(&lov->lov_lock); +} + diff --git a/lustre/lov/lov_request.c b/lustre/lov/lov_request.c index b6b4b62..a3a4372 100644 --- a/lustre/lov/lov_request.c +++ b/lustre/lov/lov_request.c @@ -28,14 +28,14 @@ #define DEBUG_SUBSYSTEM S_LOV #ifdef __KERNEL__ -#include +#include #else #include #endif -#include -#include -#include +#include +#include +#include #include "lov_internal.h" @@ -44,7 +44,7 @@ static void lov_init_set(struct lov_request_set *set) set->set_count = 0; set->set_completes = 0; set->set_success = 0; - INIT_LIST_HEAD(&set->set_list); + CFS_INIT_LIST_HEAD(&set->set_list); atomic_set(&set->set_refcount, 1); } @@ -591,10 +591,8 @@ int lov_fini_create_set(struct lov_request_set *set,struct lov_stripe_md **lsmp) if (set == NULL) RETURN(0); LASSERT(set->set_exp); - if (set->set_completes) { + if (set->set_completes) rc = create_done(set->set_exp, set, lsmp); - /* FIXME update qos data here */ - } if (atomic_dec_and_test(&set->set_refcount)) lov_finish_set(set); @@ -649,9 +647,8 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp, struct obdo *src_oa, struct obd_trans_info *oti, struct lov_request_set **reqset) { - struct lov_obd *lov = &exp->exp_obd->u.lov; struct lov_request_set *set; - int rc = 0, newea = 0; + int rc = 0; ENTRY; OBD_ALLOC(set, sizeof(*set)); @@ -664,51 +661,11 @@ int lov_prep_create_set(struct obd_export *exp, struct lov_stripe_md **lsmp, set->set_oa = src_oa; set->set_oti = oti; - if (set->set_md == NULL) { - int stripes, stripe_cnt; - stripe_cnt = lov_get_stripecnt(lov, 0); - - /* If the MDS file was truncated up to some size, stripe over - * enough OSTs to allow the file to be created at that size. */ - if (src_oa->o_valid & OBD_MD_FLSIZE) { - stripes=((src_oa->o_size+LUSTRE_STRIPE_MAXBYTES)>>12)-1; - do_div(stripes, (__u32)(LUSTRE_STRIPE_MAXBYTES >> 12)); - - if (stripes > lov->desc.ld_active_tgt_count) - GOTO(out_set, rc = -EFBIG); - if (stripes < stripe_cnt) - stripes = stripe_cnt; - } else { - stripes = stripe_cnt; - } - - rc = lov_alloc_memmd(&set->set_md, stripes, - lov->desc.ld_pattern ? - lov->desc.ld_pattern : LOV_PATTERN_RAID0, - LOV_MAGIC); - if (rc < 0) - goto out_set; - newea = 1; - } - - rc = qos_prep_create(lov, set, newea); + rc = qos_prep_create(exp, set); if (rc) - goto out_lsm; - - if (oti && (src_oa->o_valid & OBD_MD_FLCOOKIE)) { - oti_alloc_cookies(oti, set->set_count); - if (!oti->oti_logcookies) - goto out_lsm; - set->set_cookies = oti->oti_logcookies; - } - *reqset = set; - RETURN(rc); - -out_lsm: - if (*lsmp == NULL) - obd_free_memmd(exp, &set->set_md); -out_set: - lov_fini_create_set(set, lsmp); + lov_fini_create_set(set, lsmp); + else + *reqset = set; RETURN(rc); } diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c index 12b755be..5ae9f62 100644 --- a/lustre/lov/lproc_lov.c +++ b/lustre/lov/lproc_lov.c @@ -28,8 +28,8 @@ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include +#include +#include #include #ifdef LPROCFS @@ -118,6 +118,68 @@ static int lov_rd_desc_uuid(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n", lov->desc.ld_uuid.uuid); } +static int lov_rd_qos_threshold(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *dev = (struct obd_device*) data; + struct lov_obd *lov; + + LASSERT(dev != NULL); + lov = &dev->u.lov; + *eof = 1; + return snprintf(page, count, "%u MB\n", lov->desc.ld_qos_threshold); +} + +static int lov_wr_qos_threshold(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *dev = (struct obd_device *)data; + struct lov_obd *lov; + int val, rc; + LASSERT(dev != NULL); + + lov = &dev->u.lov; + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= 0) + return -EINVAL; + lov->desc.ld_qos_threshold = val; + return count; +} + +static int lov_rd_qos_maxage(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *dev = (struct obd_device*) data; + struct lov_obd *lov; + + LASSERT(dev != NULL); + lov = &dev->u.lov; + *eof = 1; + return snprintf(page, count, "%u Sec\n", lov->desc.ld_qos_maxage); +} + +static int lov_wr_qos_maxage(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *dev = (struct obd_device *)data; + struct lov_obd *lov; + int val, rc; + LASSERT(dev != NULL); + + lov = &dev->u.lov; + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + if (val <= 0) + return -EINVAL; + lov->desc.ld_qos_maxage = val; + return count; +} + static void *lov_tgt_seq_start(struct seq_file *p, loff_t *pos) { struct obd_device *dev = p->private; @@ -188,6 +250,8 @@ struct lprocfs_vars lprocfs_obd_vars[] = { { "kbytesfree", lprocfs_rd_kbytesfree, 0, 0 }, { "kbytesavail", lprocfs_rd_kbytesavail, 0, 0 }, { "desc_uuid", lov_rd_desc_uuid, 0, 0 }, + { "qos_threshold",lov_rd_qos_threshold, lov_wr_qos_threshold, 0 }, + { "qos_maxage", lov_rd_qos_maxage, lov_wr_qos_maxage, 0 }, { 0 } }; diff --git a/lustre/lvfs/Info.plist b/lustre/lvfs/Info.plist new file mode 100644 index 0000000..44439e2 --- /dev/null +++ b/lustre/lvfs/Info.plist @@ -0,0 +1,37 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + lvfs + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.lvfs + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kpi.bsd + 8.0.0b1 + com.apple.kpi.libkern + 8.0.0b1 + com.apple.kpi.mach + 8.0.0b1 + com.apple.kpi.unsupported + 8.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + + + diff --git a/lustre/lvfs/autoMakefile.am b/lustre/lvfs/autoMakefile.am index 10d8904..9185580 100644 --- a/lustre/lvfs/autoMakefile.am +++ b/lustre/lvfs/autoMakefile.am @@ -16,6 +16,8 @@ endif if MODULES +if LINUX + modulefs_DATA := lvfs$(KMODEXT) if SERVER @@ -43,13 +45,32 @@ fsfilt_ldiskfs.c: fsfilt_ext3.c fsfilt_ldiskfs_quota.h: fsfilt_ext3_quota.h sed $(strip $(ldiskfs_sed_flags)) $< > $@ +endif # LINUX -else +if DARWIN + +macos_PROGRAMS := lvfs + +lvfs_SOURCES := lvfs_darwin.c + +lvfs_CFLAGS := $(EXTRA_KCFLAGS) +lvfs_LDFLAGS := $(EXTRA_KLDFLAGS) +lvfs_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif # DARWIN + +else # MODULES sources: endif # MODULES +install-data-hook: $(install_data_hook) + DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_reiserfs.c lvfs_common.c \ lvfs_internal.h lvfs_linux.c lvfs_userfs.c \ upcall_cache.c \ diff --git a/lustre/lvfs/fsfilt.c b/lustre/lvfs/fsfilt.c index d3ca4b8..6f88917 100644 --- a/lustre/lvfs/fsfilt.c +++ b/lustre/lvfs/fsfilt.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include LIST_HEAD(fsfilt_types); diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index ab9ba93..533f0d3 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -46,10 +46,10 @@ #endif #include -#include -#include -#include -#include +#include +#include +#include +#include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #include #endif @@ -524,10 +524,6 @@ static int fsfilt_ext3_set_md(struct inode *inode, void *handle, LASSERT(TRYLOCK_INODE_MUTEX(inode) == 0); - if (EXT3_I(inode)->i_file_acl /* || large inode EA flag */) - CWARN("setting EA on %lu/%u again... interesting\n", - inode->i_ino, inode->i_generation); - lock_24kernel(); rc = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_TRUSTED, name, lmm, lmm_size, 0); @@ -864,7 +860,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, return EXT_CONTINUE; } - tgen = EXT_GENERATION(tree); + tgen = EXT_GENERATION(EXT_ROOT_HDR(tree)); count = ext3_ext_calc_credits_for_insert(tree, path); ext3_up_truncate_sem(inode); @@ -877,7 +873,7 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, } ext3_down_truncate_sem(inode); - if (tgen != EXT_GENERATION(tree)) { + if (tgen != EXT_GENERATION(EXT_ROOT_HDR(tree))) { /* the tree has changed. so path can be invalid at moment */ lock_24kernel(); journal_stop(handle); @@ -2015,8 +2011,11 @@ out: static void __exit fsfilt_ext3_exit(void) { + int rc; + fsfilt_unregister_ops(&fsfilt_ext3_ops); - LASSERT(kmem_cache_destroy(fcb_cache) == 0); + rc = kmem_cache_destroy(fcb_cache); + LASSERTF(rc == 0, "couldn't destroy fcb_cache slab\n"); } module_init(fsfilt_ext3_init); diff --git a/lustre/lvfs/fsfilt_reiserfs.c b/lustre/lvfs/fsfilt_reiserfs.c index 68a049e..20cbb3f 100644 --- a/lustre/lvfs/fsfilt_reiserfs.c +++ b/lustre/lvfs/fsfilt_reiserfs.c @@ -42,9 +42,9 @@ #include #endif #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/lustre/lvfs/lvfs_common.c b/lustre/lvfs/lvfs_common.c index 08f4f14..1834616 100644 --- a/lustre/lvfs/lvfs_common.c +++ b/lustre/lvfs/lvfs_common.c @@ -25,13 +25,11 @@ #define DEBUG_SUBSYSTEM S_FILTER -#include +#include struct dentry *lvfs_fid2dentry(struct lvfs_run_ctxt *ctxt, __u64 id, __u32 gen, __u64 gr, void *data) { return ctxt->cb_ops.l_fid2dentry(id, gen, gr, data); } - - EXPORT_SYMBOL(lvfs_fid2dentry); diff --git a/lustre/lvfs/lvfs_darwin.c b/lustre/lvfs/lvfs_darwin.c new file mode 100644 index 0000000..1feb31c --- /dev/null +++ b/lustre/lvfs/lvfs_darwin.c @@ -0,0 +1,45 @@ +#define DEBUG_SUBSYSTEM S_FILTER + +#include +#include +#include +#include + +atomic_t obd_memory; +int obd_memmax; + +/* XXX currently ctxt functions should not be used ?? */ +void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred) +{ + LBUG(); +} + +void pop_ctxt(struct lvfs_run_ctxt *saved, struct lvfs_run_ctxt *new_ctx, + struct lvfs_ucred *cred) +{ + LBUG(); +} + +static int __init lvfs_init(void) +{ + int ret = 0; + ENTRY; + + RETURN(ret); +} + +static void __exit lvfs_exit(void) +{ + int leaked; + ENTRY; + + leaked = atomic_read(&obd_memory); + CDEBUG(leaked ? D_ERROR : D_INFO, + "obd mem max: %d leaked: %d\n", obd_memmax, leaked); + + return; +} + +cfs_module(lvfs, "1.0.0", lvfs_init, lvfs_exit); + diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 69c3616..3f662d7 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -37,18 +37,18 @@ #include #include #include -#include -#include -#include +#include +#include +#include #include #include #include -#include +#include #include "lvfs_internal.h" -#include -#include -#include +#include +#include +#include atomic_t obd_memory; int obd_memmax; @@ -294,8 +294,9 @@ struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode, int fix) /* Fixup directory permissions if necessary */ if (fix && (old_mode & S_IALLUGO) != (mode & S_IALLUGO)) { - CWARN("fixing permissions on %s from %o to %o\n", - name, old_mode, mode); + CDEBUG(D_CONFIG, + "fixing permissions on %s from %o to %o\n", + name, old_mode, mode); dchild->d_inode->i_mode = (mode & S_IALLUGO) | (old_mode & ~S_IALLUGO); mark_inode_dirty(dchild->d_inode); @@ -505,6 +506,7 @@ static void __exit lvfs_linux_exit(void) CDEBUG_EX(leaked ? D_ERROR : D_INFO, "obd mem max: %d leaked: %d\n", obd_memmax, leaked); + EXIT; return; } diff --git a/lustre/lvfs/lvfs_userfs.c b/lustre/lvfs/lvfs_userfs.c index a6140d5..28afe5f 100644 --- a/lustre/lvfs/lvfs_userfs.c +++ b/lustre/lvfs/lvfs_userfs.c @@ -24,11 +24,11 @@ */ #include -#include +#include #include "lvfs_internal.h" -#include -#include +#include +#include /* XXX currently ctxt functions should not be used ?? */ void push_ctxt(struct lvfs_run_ctxt *save, struct lvfs_run_ctxt *new_ctx, diff --git a/lustre/lvfs/upcall_cache.c b/lustre/lvfs/upcall_cache.c index ea33ee7..6de1dac 100644 --- a/lustre/lvfs/upcall_cache.c +++ b/lustre/lvfs/upcall_cache.c @@ -43,8 +43,8 @@ #include #include -#include -#include +#include +#include #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) struct group_info *groups_alloc(int ngroups) diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 195bbff..0092084 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -26,8 +26,8 @@ #include #include -#include -#include +#include +#include #ifdef LPROCFS static struct lprocfs_vars lprocfs_obd_vars[] = { diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 852046c..07e3faf 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -25,7 +25,7 @@ #ifndef _MDC_INTERNAL_H #define _MDC_INTERNAL_H -#include +#include void mdc_pack_req_body(struct ptlrpc_request *req, int offset, __u64 valid, struct lu_fid *fid, int ea_size); diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c index fef2e36..aa10f6a 100644 --- a/lustre/mdc/mdc_lib.c +++ b/lustre/mdc/mdc_lib.c @@ -27,9 +27,8 @@ # include # include #endif -#include -#include -#include +#include +#include #include "mdc_internal.h" #ifndef __KERNEL__ @@ -193,7 +192,9 @@ void mdc_setattr_pack(struct ptlrpc_request *req, int offset, rec->sa_atime = LTIME_S(iattr->ia_atime); rec->sa_mtime = LTIME_S(iattr->ia_mtime); rec->sa_ctime = LTIME_S(iattr->ia_ctime); - rec->sa_attr_flags = iattr->ia_attr_flags; + rec->sa_attr_flags = + ((struct ll_iattr_struct *)iattr)->ia_attr_flags; + if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid)) rec->sa_suppgid = iattr->ia_gid; else diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index a21d911..10202b1 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -36,11 +36,10 @@ # include #endif -#include -#include -#include -#include -#include +#include +#include +#include +#include #include "mdc_internal.h" int it_disposition(struct lookup_intent *it, int flag) @@ -374,7 +373,8 @@ int mdc_enqueue(struct obd_export *exp, repsize[repbufcnt++] = obddev->u.cli.cl_max_mds_cookiesize; } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { obd_valid valid = OBD_MD_FLGETATTR | OBD_MD_FLEASIZE | - OBD_MD_FLACL | OBD_MD_FLMODEASIZE; + OBD_MD_FLACL | OBD_MD_FLMODEASIZE | + OBD_MD_FLDIREA; size[req_buffers++] = sizeof(struct mdt_body); size[req_buffers++] = op_data->namelen + 1; diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 9d05b87..e0ec343 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -35,8 +35,7 @@ # include #endif -#include -#include +#include #include "mdc_internal.h" /* mdc_setattr does its own semaphore handling */ diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 6978cc6..18d7395 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -36,14 +36,15 @@ # include #endif -#include -#include -#include -#include -#include -#include +#include +#include +#include /* for LUSTRE_POSIX_ACL_MAX_SIZE */ +#include +#include #include "mdc_internal.h" +static quota_interface_t *quota_interface; + #define REQUEST_MINOR 244 static int mdc_cleanup(struct obd_device *obd); @@ -662,6 +663,9 @@ int mdc_close(struct obd_export *exp, struct md_op_data *op_data, EXIT; *request = req; out: + if (rc != 0 && req && req->rq_commit_cb) + req->rq_commit_cb(req); + return rc; } @@ -806,8 +810,9 @@ out: return rc; } -int mdc_set_info(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val) +int mdc_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) { struct obd_import *imp = class_exp2cliimp(exp); int rc = -EINVAL; @@ -853,8 +858,14 @@ int mdc_set_info(struct obd_export *exp, obd_count keylen, RETURN(-ENOMEM); req->rq_replen = lustre_msg_size(0, NULL); - rc = ptlrpc_queue_wait(req); - ptlrpc_req_finished(req); + if (set) { + rc = 0; + ptlrpc_set_add_req(set, req); + ptlrpc_check_set(set); + } else { + rc = ptlrpc_queue_wait(req); + ptlrpc_req_finished(req); + } RETURN(rc); } RETURN(rc); @@ -892,7 +903,7 @@ out_req: } static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs, - unsigned long max_age) + cfs_time_t max_age) { struct ptlrpc_request *req; struct obd_statfs *msfs; @@ -1243,7 +1254,7 @@ struct obd_ops mdc_obd_ops = { .o_connect = client_connect_import, .o_disconnect = client_disconnect_export, .o_iocontrol = mdc_iocontrol, - .o_set_info = mdc_set_info, + .o_set_info_async = mdc_set_info_async, .o_statfs = mdc_statfs, .o_pin = mdc_pin, .o_unpin = mdc_unpin, @@ -1279,7 +1290,6 @@ struct md_ops mdc_md_ops = { .m_clear_open_replay_data = mdc_clear_open_replay_data }; -static quota_interface_t *quota_interface; extern quota_interface_t mdc_quota_interface; int __init mdc_init(void) diff --git a/lustre/mdd/mdd_handler.c b/lustre/mdd/mdd_handler.c index 5c1ff99..a0c7c2f 100644 --- a/lustre/mdd/mdd_handler.c +++ b/lustre/mdd/mdd_handler.c @@ -32,16 +32,15 @@ #include -#include -#include -#include -#include -#include - - -#include -#include -#include +#include +#include +#include +#include +#include + +#include +#include +#include #include "mdd_internal.h" diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 87b13f3..aa39e64 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -4,9 +4,8 @@ #ifndef _MDD_INTERNAL_H #define _MDD_INTERNAL_H -#include - #include +#include struct dt_device; struct file; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 8cc953d..68d3456 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -34,12 +34,9 @@ #endif #define DEBUG_SUBSYSTEM S_MDS +#include #include -#include -#include -#include #include -#include #include #include #include @@ -52,14 +49,16 @@ #else # include #endif -#include -#include -#include -#include -#include -#include -#include -#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "mds_internal.h" @@ -228,9 +227,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, if (inode->i_generation == 0 || inode->i_nlink == 0) { LCONSOLE_WARN("Found inode with zero generation or link -- this" - " may indicate disk corruption (inode: %lu, link:" - " %lu, count: %d)\n", inode->i_ino, - (unsigned long)inode->i_nlink, + " may indicate disk corruption (inode: %lu/%u, " + "link %lu, count %d)\n", inode->i_ino, + inode->i_generation,(unsigned long)inode->i_nlink, atomic_read(&inode->i_count)); dput(result); RETURN(ERR_PTR(-ENOENT)); @@ -707,7 +706,7 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, { struct mds_obd *mds = mds_req2mds(req); struct mds_body *body; - int rc, size[2] = {sizeof(*body)}, bufcount = 1; + int rc, size[3] = {sizeof(*body)}, bufcount = 1; ENTRY; body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); @@ -1939,7 +1938,6 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg) } label = fsfilt_get_label(obd, obd->u.obt.obt_sb); - if (obd->obd_recovering) { LCONSOLE_WARN("MDT %s now serving %s (%s%s%s), but will be in " "recovery until %d %s reconnect, or if no clients" @@ -1952,8 +1950,8 @@ static int mds_setup(struct obd_device *obd, struct lustre_cfg* lcfg) obd->obd_recoverable_clients, (obd->obd_recoverable_clients == 1) ? "client" : "clients", - (int)(OBD_RECOVERY_TIMEOUT / HZ) / 60, - (int)(OBD_RECOVERY_TIMEOUT / HZ) % 60, + (int)(OBD_RECOVERY_TIMEOUT) / 60, + (int)(OBD_RECOVERY_TIMEOUT) % 60, obd->obd_name); } else { LCONSOLE_INFO("MDT %s now serving %s (%s%s%s) with recovery " @@ -2381,7 +2379,7 @@ static int mds_intent_policy(struct ldlm_namespace *ns, break; default: CERROR("Unhandled intent "LPD64"\n", it->opc); - LBUG(); + RETURN(-EFAULT); } /* By this point, whatever function we called above must have either diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index 92d351a..1140a61 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -28,9 +28,9 @@ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include -#include +#include +#include +#include #include "mds_internal.h" #ifdef LPROCFS @@ -52,6 +52,7 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, struct obd_device *obd = data; struct mds_obd *mds = &obd->u.mds; char tmpbuf[sizeof(struct obd_uuid)]; + struct ptlrpc_request_set *set; int rc; sscanf(buffer, "%40s", tmpbuf); @@ -59,14 +60,25 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, if (strncmp(tmpbuf, "nid:", 4) != 0) return lprocfs_wr_evict_client(file, buffer, count, data); - obd_export_evict_by_nid(obd, tmpbuf+4); + set = ptlrpc_prep_set(); + if (!set) + return -ENOMEM; - rc = obd_set_info(mds->mds_osc_exp, strlen("evict_by_nid"), - "evict_by_nid", strlen(tmpbuf + 4) + 1, tmpbuf + 4); + rc = obd_set_info_async(mds->mds_osc_exp, strlen("evict_by_nid"), + "evict_by_nid", strlen(tmpbuf + 4) + 1, + tmpbuf + 4, set); if (rc) CERROR("Failed to evict nid %s from OSTs: rc %d\n", tmpbuf + 4, rc); + ptlrpc_check_set(set); + + obd_export_evict_by_nid(obd, tmpbuf+4); + rc = ptlrpc_set_wait(set); + if (rc) + CERROR("Failed to evict nid %s from OSTs: rc %d\n", tmpbuf + 4, + rc); + ptlrpc_set_destroy(set); return count; } diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 72af624..124d178 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -35,21 +35,20 @@ #include #include #include -#include +#include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include "mds_internal.h" -#define HEALTH_CHECK "health_check" /* Add client data to the MDS. We use a bitmap to locate a free space * in the last_rcvd file if cl_off is -1 (i.e. a new client). @@ -237,7 +236,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) mds->mds_server_data = lsd; if (last_rcvd_size == 0) { - CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD); + LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name); memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(lsd->lsd_uuid)); lsd->lsd_last_transno = 0; @@ -255,8 +254,10 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) GOTO(err_msd, rc); } if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) { - CERROR("OBD UUID %s does not match last_rcvd UUID %s\n", - obd->obd_uuid.uuid, lsd->lsd_uuid); + LCONSOLE_ERROR("Trying to start OBD %s using the wrong" + " disk %s. Were the /dev/ assignments " + "rearranged?\n", + obd->obd_uuid.uuid, lsd->lsd_uuid); GOTO(err_msd, rc = -EINVAL); } mount_count = le64_to_cpu(lsd->lsd_mount_count); @@ -392,7 +393,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) obd->obd_recovery_start = CURRENT_SECONDS; /* Only used for lprocfs_status */ obd->obd_recovery_end = obd->obd_recovery_start + - OBD_RECOVERY_TIMEOUT / HZ; + OBD_RECOVERY_TIMEOUT; } mds->mds_mount_count = mount_count + 1; @@ -426,6 +427,7 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) RETURN(rc); mds->mds_vfsmnt = mnt; + /* why not mnt->mnt_sb instead of mnt->mnt_root->d_inode->i_sb? */ obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb; fsfilt_setup(obd, obd->u.obt.obt_sb); diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 3c53564..f67a19f 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -5,8 +5,8 @@ #ifndef _MDS_INTERNAL_H #define _MDS_INTERNAL_H -#include -#include +#include +#include #define MDT_ROCOMPAT_SUPP (OBD_ROCOMPAT_LOVOBJID) #define MDT_INCOMPAT_SUPP (OBD_INCOMPAT_MDT | OBD_INCOMPAT_COMMON_LR) diff --git a/lustre/mds/mds_join.c b/lustre/mds/mds_join.c index 5075bfb..3c17a05 100644 --- a/lustre/mds/mds_join.c +++ b/lustre/mds/mds_join.c @@ -30,18 +30,18 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "mds_internal.h" -#include struct mdsea_cb_data { struct llog_handle *mc_llh; diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c index 4bc0f1b..c136494 100644 --- a/lustre/mds/mds_lib.c +++ b/lustre/mds/mds_lib.c @@ -48,8 +48,8 @@ #include #include -#include -#include +#include +#include #include "mds_internal.h" void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode) diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index 7922bbc..43a63e3 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -34,10 +34,11 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "mds_internal.h" diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 57e7e09..0f95347 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -32,13 +32,13 @@ #define DEBUG_SUBSYSTEM S_MDS #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "mds_internal.h" @@ -163,17 +163,19 @@ int mds_lov_set_nextid(struct obd_device *obd) LASSERT(mds->mds_lov_objids != NULL); - rc = obd_set_info(mds->mds_osc_exp, strlen(KEY_NEXT_ID), KEY_NEXT_ID, - mds->mds_lov_desc.ld_tgt_count, mds->mds_lov_objids); + rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_NEXT_ID), + KEY_NEXT_ID, + mds->mds_lov_desc.ld_tgt_count, + mds->mds_lov_objids, NULL); if (rc) CERROR ("%s: mds_lov_set_nextid failed (%d)\n", obd->obd_name, rc); + RETURN(rc); } -/* Update the lov desc for a new size lov. - From HEAD mds_dt_lov_update_desc (but fixed) */ +/* Update the lov desc for a new size lov. */ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) { struct mds_obd *mds = &obd->u.mds; @@ -388,7 +390,7 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name) * set_nextid(). The class driver can help us here, because * it can use the obd_recovering flag to determine when the * the OBD is full available. */ - if (!obd->obd_recovering) + if (!obd->obd_recovering) rc = mds_postrecov(obd); RETURN(rc); @@ -443,7 +445,7 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = llog_create(llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT), - &mds->mds_cfg_llh, NULL, name); + &mds->mds_cfg_llh, NULL, name); if (rc == 0) llog_init_handle(mds->mds_cfg_llh, LLOG_F_IS_PLAIN, &cfg_uuid); @@ -586,8 +588,9 @@ int mds_iocontrol(unsigned int cmd, struct obd_export *exp, int len, rc = llog_ioctl(ctxt, cmd, data); pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); llog_cat_initialize(obd, mds->mds_lov_desc.ld_tgt_count); - rc2 = obd_set_info(mds->mds_osc_exp, strlen(KEY_MDS_CONN), - KEY_MDS_CONN, 0, NULL); + rc2 = obd_set_info_async(mds->mds_osc_exp, + strlen(KEY_MDS_CONN), KEY_MDS_CONN, + 0, NULL, NULL); if (!rc) rc = rc2; RETURN(rc); @@ -623,37 +626,35 @@ struct mds_lov_sync_info { __u32 mlsi_index; /* index of target */ }; +/* We only sync one osc at a time, so that we don't have to hold + any kind of lock on the whole mds_lov_desc, which may change + (grow) as a result of mds_lov_add_ost. This also avoids any + kind of mismatch between the lov_desc and the mds_lov_desc, + which are not in lock-step during lov_add_obd */ static int __mds_lov_synchronize(void *data) { struct mds_lov_sync_info *mlsi = data; struct obd_device *obd = mlsi->mlsi_obd; struct obd_device *watched = mlsi->mlsi_watched; struct mds_obd *mds = &obd->u.mds; - struct obd_uuid *uuid = NULL; + struct obd_uuid *uuid; __u32 idx = mlsi->mlsi_index; int rc = 0; ENTRY; - if (watched) - uuid = &watched->u.cli.cl_target_uuid; - OBD_FREE(mlsi, sizeof(*mlsi)); LASSERT(obd); - - /* We only sync one osc at a time, so that we don't have to hold - any kind of lock on the whole mds_lov_desc, which may change - (grow) as a result of mds_lov_add_ost. This also avoids any - kind of mismatch between the lov_desc and the mds_lov_desc, - which are not in lock-step during lov_add_obd */ + LASSERT(watched); + uuid = &watched->u.cli.cl_target_uuid; LASSERT(uuid); rc = mds_lov_update_mds(obd, watched, idx); if (rc != 0) GOTO(out, rc); - rc = obd_set_info(mds->mds_osc_exp, strlen(KEY_MDS_CONN), - KEY_MDS_CONN, 0, uuid); + rc = obd_set_info_async(mds->mds_osc_exp, strlen(KEY_MDS_CONN), + KEY_MDS_CONN, 0, uuid, NULL); if (rc != 0) GOTO(out, rc); @@ -668,7 +669,7 @@ static int __mds_lov_synchronize(void *data) } LCONSOLE_INFO("MDS %s: %s now active, resetting orphans\n", - obd->obd_name, (char *)uuid->uuid); + obd->obd_name, obd_uuid2str(uuid)); if (obd->obd_stopping) GOTO(out, rc = -ENODEV); @@ -730,15 +731,16 @@ int mds_lov_start_synchronize(struct obd_device *obd, if (nonblock) { /* Synchronize in the background */ - rc = kernel_thread(mds_lov_synchronize, mlsi, - CLONE_VM | CLONE_FILES); + rc = cfs_kernel_thread(mds_lov_synchronize, mlsi, + CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("%s: error starting mds_lov_synchronize: %d\n", obd->obd_name, rc); class_decref(obd); } else { - CDEBUG(D_HA, "%s: mds_lov_synchronize thread: %d\n", - obd->obd_name, rc); + CDEBUG(D_HA, "%s: mds_lov_synchronize idx=%d " + "thread=%d\n", obd->obd_name, + mlsi->mlsi_index, rc); rc = 0; } } else { diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index f70a487..993afb8 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -42,10 +42,10 @@ # include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include "mds_internal.h" @@ -1097,6 +1097,14 @@ found_child: GOTO(cleanup, rc = -EAGAIN); } + if (!S_ISREG(dchild->d_inode->i_mode) && + !S_ISDIR(dchild->d_inode->i_mode) && + (req->rq_export->exp_connect_flags & OBD_CONNECT_NODEVOH)) { + /* If client supports this, do not return open handle for + * special device nodes */ + GOTO(cleanup_no_trans, rc = 0); + } + /* Step 5: mds_open it */ rc = mds_finish_open(req, dchild, body, rec->ur_flags, &handle, rec, rep, &parent_lockh); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 3905d1f..43e1ba3 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -34,15 +34,15 @@ #define DEBUG_SUBSYSTEM S_MDS #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "mds_internal.h" @@ -531,6 +531,7 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset, rc = mds_get_md(obd, inode, lmm, &lmm_size, need_lock); if (rc < 0) GOTO(cleanup, rc); + rc = 0; handle = fsfilt_start_log(obd, inode, FSFILT_OP_SETATTR, NULL, le32_to_cpu(lmm->lmm_stripe_count)); @@ -1040,6 +1041,22 @@ int enqueue_ordered_locks(struct obd_device *obd, struct ldlm_res_id *p1_res_id, RETURN(0); } +static inline int res_eq(struct ldlm_res_id *res1, struct ldlm_res_id *res2) +{ + return !memcmp(res1, res2, sizeof(*res1)); +} + +static inline void +try_to_aggregate_locks(struct ldlm_res_id *res1, ldlm_policy_data_t *p1, + struct ldlm_res_id *res2, ldlm_policy_data_t *p2) +{ + if (!res_eq(res1, res2)) + return; + /* XXX: any additional inodebits (to current LOOKUP and UPDATE) + * should be taken with great care here */ + p1->l_inodebits.bits |= p2->l_inodebits.bits; +} + int enqueue_4ordered_locks(struct obd_device *obd,struct ldlm_res_id *p1_res_id, struct lustre_handle *p1_lockh, int p1_lock_mode, ldlm_policy_data_t *p1_policy, @@ -1105,14 +1122,19 @@ int enqueue_4ordered_locks(struct obd_device *obd,struct ldlm_res_id *p1_res_id, flags = 0; if (res_id[i]->name[0] == 0) break; - if (i != 0 && - memcmp(res_id[i], res_id[i-1], sizeof(*res_id[i])) == 0 && - (policies[i]->l_inodebits.bits & - policies[i-1]->l_inodebits.bits)) { + if (i && res_eq(res_id[i], res_id[i-1])) { memcpy(dlm_handles[i], dlm_handles[i-1], sizeof(*(dlm_handles[i]))); ldlm_lock_addref(dlm_handles[i], lock_modes[i]); } else { + /* we need to enqueue locks with different inodebits + * at once, because otherwise concurrent thread can + * hit the windown between these two locks and we'll + * get to deadlock. see bug 10360. note also, that it + * is impossible to have >2 equal res. */ + if (i < 3) + try_to_aggregate_locks(res_id[i], policies[i], + res_id[i+1], policies[i+1]); rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, *res_id[i], LDLM_IBITS, policies[i], @@ -1193,8 +1215,11 @@ static int mds_verify_child(struct obd_device *obd, child_res_id->name[0] = dchild->d_inode->i_ino; child_res_id->name[1] = dchild->d_inode->i_generation; - if (res_gt(parent_res_id, child_res_id, NULL, NULL) || - res_gt(maxres, child_res_id, NULL, NULL)) { + /* Make sure that we don't try to re-enqueue a lock on the + * same resource if it happens that the source is renamed to + * the target by another thread (bug 9974, thanks racer :-) */ + if (!res_gt(child_res_id, parent_res_id, NULL, NULL) || + !res_gt(child_res_id, maxres, NULL, NULL)) { CDEBUG(D_DLMTRACE, "relock "LPU64"<("LPU64"|"LPU64")\n", child_res_id->name[0], parent_res_id->name[0], maxres->name[0]); @@ -1634,8 +1659,8 @@ cleanup: rc = mds_finish_transno(mds, dparent ? dparent->d_inode : NULL, handle, req, rc, 0); if (!rc) - (void)obd_set_info(mds->mds_osc_exp, strlen("unlinked"), - "unlinked", 0, NULL); + (void)obd_set_info_async(mds->mds_osc_exp, strlen("unlinked"), + "unlinked", 0, NULL, NULL); switch(cleanup_phase) { case 5: /* pending_dir semaphore */ UNLOCK_INODE_MUTEX(mds->mds_pending_dir->d_inode); diff --git a/lustre/mds/mds_unlink_open.c b/lustre/mds/mds_unlink_open.c index 8d30235..ed4539b 100644 --- a/lustre/mds/mds_unlink_open.c +++ b/lustre/mds/mds_unlink_open.c @@ -36,10 +36,11 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "mds_internal.h" diff --git a/lustre/mds/mds_xattr.c b/lustre/mds/mds_xattr.c index 836f675..c3968ef 100644 --- a/lustre/mds/mds_xattr.c +++ b/lustre/mds/mds_xattr.c @@ -31,15 +31,15 @@ #define DEBUG_SUBSYSTEM S_MDS #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "mds_internal.h" @@ -199,6 +199,10 @@ out_ucred: return rc; } +/* + * alwasy return 0, and set req->rq_status as error number in case + * of failures. + */ static int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) { @@ -225,20 +229,11 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) lockpart = MDS_INODELOCK_UPDATE; - de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_EX, - &lockh, lockpart); - if (IS_ERR(de)) - GOTO(out, rc = PTR_ERR(de)); - - inode = de->d_inode; - LASSERT(inode); - - OBD_FAIL_WRITE(OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb); - + /* various sanity check for xattr name */ xattr_name = lustre_msg_string(req->rq_reqmsg, 1, 0); if (!xattr_name) { CERROR("can't extract xattr name\n"); - GOTO(out_dput, rc = -EPROTO); + GOTO(out, rc = -EPROTO); } DEBUG_REQ(D_INODE, req, "%sxattr %s\n", @@ -247,14 +242,27 @@ int mds_setxattr_internal(struct ptlrpc_request *req, struct mds_body *body) if (strncmp(xattr_name, "trusted.", 8) == 0) { if (strcmp(xattr_name + 8, XATTR_LUSTRE_MDS_LOV_EA) == 0) - GOTO(out_dput, rc = -EACCES); + GOTO(out, rc = -EACCES); } if (!(req->rq_export->exp_connect_flags & OBD_CONNECT_XATTR) && (strncmp(xattr_name, "user.", 5) == 0)) { - GOTO(out_dput, rc = -EOPNOTSUPP); + GOTO(out, rc = -EOPNOTSUPP); } + if (!strcmp(xattr_name, XATTR_NAME_ACL_ACCESS)) + lockpart |= MDS_INODELOCK_LOOKUP; + + de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_EX, + &lockh, lockpart); + if (IS_ERR(de)) + GOTO(out, rc = PTR_ERR(de)); + + inode = de->d_inode; + LASSERT(inode); + + OBD_FAIL_WRITE(OBD_FAIL_MDS_SETXATTR_WRITE, inode->i_sb); + /* filter_op simply use setattr one */ handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR, NULL); if (IS_ERR(handle)) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 21ae60d..11d7322 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -38,24 +38,20 @@ #include /* LUSTRE_VERSION_CODE */ -#include +#include /* * struct OBD_{ALLOC,FREE}*() * OBD_FAIL_CHECK */ -#include +#include /* struct ptlrpc_request */ -#include +#include /* struct obd_export */ -#include +#include /* struct obd_device */ -#include +#include /* lu2dt_dev() */ -#include - -/*LUSTRE_POSIX_ACL_MAX_SIZE*/ -#include - +#include /* struct mds_client_data */ #include "../mds/mds_internal.h" #include "mdt_internal.h" diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 8546082..add79d5 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -38,18 +38,16 @@ /* * struct ptlrpc_client */ -#include -#include +#include +#include /* * struct obd_connect_data * struct lustre_handle */ -#include -/* req_layout things */ -#include - -#include -#include +#include +#include +#include +#include struct mdt_device { /* super-class */ diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index e88e641..a56c781 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -39,11 +39,11 @@ # include #endif -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id) @@ -256,6 +256,8 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, /* The mgc fs exclusion sem. Only one fs can be setup at a time. */ down(&cli->cl_mgc_sem); + cleanup_group_info(); + obd->obd_fsops = fsfilt_get_ops(MT_STR(lsi->lsi_ldd)); if (IS_ERR(obd->obd_fsops)) { up(&cli->cl_mgc_sem); @@ -265,9 +267,6 @@ static int mgc_fs_setup(struct obd_device *obd, struct super_block *sb, } cli->cl_mgc_vfsmnt = mnt; - // FIXME which is the right SB? - filter_common_setup also - CDEBUG(D_MGC, "SB's: fill=%p mnt=%p == root=%p\n", sb, mnt->mnt_sb, - mnt->mnt_root->d_inode->i_sb); fsfilt_setup(obd, mnt->mnt_sb); OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt); @@ -497,8 +496,8 @@ static int mgc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, /* Re-enqueue the lock in a separate thread, because we must return from this fn before that lock can be taken. */ - rc = kernel_thread(mgc_async_requeue, data, - CLONE_VM | CLONE_FS); + rc = cfs_kernel_thread(mgc_async_requeue, data, + CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("Cannot re-enqueue thread: %d\n", rc); } else { @@ -667,8 +666,9 @@ static int mgc_target_register(struct obd_export *exp, RETURN(rc); } -int mgc_set_info(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val) +int mgc_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) { struct obd_import *imp = class_exp2cliimp(exp); int rc = -EINVAL; @@ -1091,7 +1091,7 @@ struct obd_ops mgc_obd_ops = { //.o_enqueue = mgc_enqueue, .o_cancel = mgc_cancel, //.o_iocontrol = mgc_iocontrol, - .o_set_info = mgc_set_info, + .o_set_info_async = mgc_set_info_async, .o_import_event = mgc_import_event, .o_llog_init = mgc_llog_init, .o_llog_finish = mgc_llog_finish, diff --git a/lustre/mgs/lproc_mgs.c b/lustre/mgs/lproc_mgs.c index e1f91f6..d1ce512 100644 --- a/lustre/mgs/lproc_mgs.c +++ b/lustre/mgs/lproc_mgs.c @@ -28,9 +28,9 @@ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include -#include +#include +#include +#include #include "mgs_internal.h" #ifdef LPROCFS diff --git a/lustre/mgs/mgs_fs.c b/lustre/mgs/mgs_fs.c index 8100bbe..ce489ae 100644 --- a/lustre/mgs/mgs_fs.c +++ b/lustre/mgs/mgs_fs.c @@ -31,16 +31,14 @@ #include #include #include -#include #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include "mgs_internal.h" diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 671529d..17411a6 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -38,13 +38,13 @@ # include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "mgs_internal.h" @@ -275,7 +275,8 @@ static int mgs_cleanup(struct obd_device *obd) /* Free the namespace in it's own thread, so that if the ldlm_cancel_handler put the last mgs obd ref, we won't deadlock here. */ - kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, CLONE_VM | CLONE_FS); + cfs_kernel_thread(mgs_ldlm_nsfree, obd->obd_namespace, + CLONE_VM | CLONE_FILES); lvfs_clear_rdonly(save_dev); @@ -460,7 +461,6 @@ int mgs_handle(struct ptlrpc_request *req) switch (req->rq_reqmsg->opc) { case MGS_CONNECT: DEBUG_REQ(D_MGS, req, "connect"); - OBD_FAIL_RETURN(OBD_FAIL_MGS_CONNECT_NET, 0); rc = target_handle_connect(req, mgs_handle); if (!rc && (req->rq_reqmsg->conn_cnt > 1)) /* Make clients trying to reconnect after a MGS restart @@ -470,7 +470,6 @@ int mgs_handle(struct ptlrpc_request *req) break; case MGS_DISCONNECT: DEBUG_REQ(D_MGS, req, "disconnect"); - OBD_FAIL_RETURN(OBD_FAIL_MGS_DISCONNECT_NET, 0); rc = target_handle_disconnect(req); req->rq_status = rc; /* superfluous? */ break; @@ -485,7 +484,6 @@ int mgs_handle(struct ptlrpc_request *req) case LDLM_ENQUEUE: DEBUG_REQ(D_MGS, req, "enqueue"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, ldlm_server_blocking_ast, NULL); fail = OBD_FAIL_LDLM_REPLY; @@ -495,7 +493,6 @@ int mgs_handle(struct ptlrpc_request *req) DEBUG_REQ(D_MGS, req, "callback"); CERROR("callbacks should not happen on MGS\n"); LBUG(); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0); break; case OBD_PING: @@ -504,33 +501,27 @@ int mgs_handle(struct ptlrpc_request *req) break; case OBD_LOG_CANCEL: DEBUG_REQ(D_MGS, req, "log cancel\n"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOG_CANCEL_NET, 0); rc = -ENOTSUPP; /* la la la */ break; case LLOG_ORIGIN_HANDLE_CREATE: DEBUG_REQ(D_MGS, req, "llog_init"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_create(req); break; case LLOG_ORIGIN_HANDLE_NEXT_BLOCK: DEBUG_REQ(D_MGS, req, "llog next block"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_next_block(req); break; case LLOG_ORIGIN_HANDLE_READ_HEADER: DEBUG_REQ(D_MGS, req, "llog read header"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_read_header(req); break; case LLOG_ORIGIN_HANDLE_CLOSE: DEBUG_REQ(D_MGS, req, "llog close"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_close(req); break; case LLOG_CATINFO: DEBUG_REQ(D_MGS, req, "llog catinfo"); - OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_catinfo(req); break; default: diff --git a/lustre/mgs/mgs_internal.h b/lustre/mgs/mgs_internal.h index 0bd90ee..688055c 100644 --- a/lustre/mgs/mgs_internal.h +++ b/lustre/mgs/mgs_internal.h @@ -7,15 +7,13 @@ #ifdef __KERNEL__ # include -# include #endif -#include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include /* MDS has o_t * 1000 */ diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 9cba6f5..d398be6 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -35,15 +35,15 @@ #include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include #include -#include -#include -#include +#include +#include +#include #include "mgs_internal.h" /********************** Class fns ********************/ diff --git a/lustre/obdclass/Info.plist b/lustre/obdclass/Info.plist new file mode 100644 index 0000000..0b7e718 --- /dev/null +++ b/lustre/obdclass/Info.plist @@ -0,0 +1,39 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + obdclass + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.obdclass + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kpi.bsd + 8.0.0b1 + com.apple.kpi.libkern + 8.0.0b1 + com.apple.kpi.mach + 8.0.0b1 + com.apple.kpi.unsupported + 8.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + com.clusterfs.lustre.lvfs + 1.0.0 + + + diff --git a/lustre/obdclass/Makefile.in b/lustre/obdclass/Makefile.in index ea93343..8c50c93 100644 --- a/lustre/obdclass/Makefile.in +++ b/lustre/obdclass/Makefile.in @@ -1,11 +1,32 @@ MODULES := obdclass llog_test -obdclass-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o -obdclass-objs += class_obd.o lu_object.o dt_object.o -obdclass-objs += debug.o genops.o sysctl.o uuid.o llog_ioctl.o -obdclass-objs += lprocfs_status.o lustre_handles.o lustre_peer.o -obdclass-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o mea.o prng.o +obdclass-linux-objs := linux-module.o linux-obdo.o linux-sysctl.o +ifeq ($(PATCHLEVEL),6) +obdclass-linux-objs := $(addprefix linux/,$(obdclass-linux-objs)) +endif + +default: all + +ifeq (@linux25@,no) +sources: + @for i in $(obdclass-linux-objs:%.o=%.c) ; do \ + echo "ln -s @srcdir@/linux/$$i ." ; \ + ln -sf @srcdir@/linux/$$i . || exit 1 ; \ + done + +else +sources: + +endif + +obdclass-all-objs := llog.o llog_cat.o llog_lvfs.o llog_obd.o llog_swab.o +obdclass-all-objs += class_obd.o +obdclass-all-objs += debug.o genops.o uuid.o llog_ioctl.o +obdclass-all-objs += lprocfs_status.o lustre_handles.o lustre_peer.o +obdclass-all-objs += statfs_pack.o obdo.o obd_config.o obd_mount.o prng.o mea.o +obdclass-all-objs += lu_object.o dt_object.o +obdclass-objs := $(obdclass-linux-objs) $(obdclass-all-objs) ifeq ($(PATCHLEVEL),6) llog_test-objs := llog-test.o diff --git a/lustre/obdclass/autoMakefile.am b/lustre/obdclass/autoMakefile.am index 0951147..cc7b60a 100644 --- a/lustre/obdclass/autoMakefile.am +++ b/lustre/obdclass/autoMakefile.am @@ -1,20 +1,55 @@ +SUBDIRS := linux +if DARWIN +SUBDIRS += darwin +endif +DIST_SUBDIRS := $(SUBDIRS) + if LIBLUSTRE noinst_LIBRARIES = liblustreclass.a liblustreclass_a_SOURCES = class_obd.c debug.c genops.c statfs_pack.c mea.c uuid.c liblustreclass_a_SOURCES += lustre_handles.c lustre_peer.c lprocfs_status.c liblustreclass_a_SOURCES += obdo.c obd_config.c llog.c llog_obd.c llog_cat.c -liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c +liblustreclass_a_SOURCES += llog_lvfs.c llog_swab.c liblustreclass_a_SOURCES += prng.c #llog_ioctl.c rbtree.c liblustreclass_a_CPPFLAGS = $(LLCPPFLAGS) -DLUSTRE_VERSION=\"32\" -DBUILD_VERSION=\"1\" liblustreclass_a_CFLAGS = $(LLCFLAGS) endif + if MODULES + +if LINUX modulefs_DATA = obdclass$(KMODEXT) noinst_DATA = llog_test$(KMODEXT) +endif # LINUX + +if DARWIN +macos_PROGRAMS := obdclass + +obdclass_SOURCES := \ + darwin/darwin-module.c darwin/darwin-sysctl.c \ + class_obd.c genops.c lprocfs_status.c \ + lustre_handles.c lustre_peer.c obd_config.c \ + obdo.c debug.c llog_ioctl.c uuid.c prng.c \ + llog_swab.c llog_obd.c llog.c llog_cat.c llog_lvfs.c \ + mea.c lu_object.c dt_object.c + +obdclass_CFLAGS := $(EXTRA_KCFLAGS) +obdclass_LDFLAGS := $(EXTRA_KLDFLAGS) +obdclass_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif # DARWIN + endif # MODULES +install-data-hook: $(install_data_hook) + MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ llog-test.c -DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h +MOSTLYCLEANFILES += linux/*.o darwin/*.o +DIST_SOURCES = $(filter-out llog-test.c,$(obdclass-all-objs:.o=.c)) $(llog-test-objs:.o=.c) llog_test.c llog_internal.h diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 1f9b1e1..49f935e 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -31,46 +31,19 @@ #ifndef EXPORT_SYMTAB # define EXPORT_SYMTAB #endif -#ifdef __KERNEL__ -#include /* for CONFIG_PROC_FS */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#else +#ifndef __KERNEL__ # include #endif -#include -#include -#include -#include +#include +#include +#include +#include #ifdef __KERNEL__ #include -#include #endif #include +#include #include "llog_internal.h" #ifndef __KERNEL__ @@ -86,9 +59,7 @@ atomic_t obd_memory; int obd_memmax; #endif -int proc_version; - -/* The following are visible and mutable through /proc/fs/lustre/. */ +/* The following are visible and mutable through /proc/sys/lustre/. */ unsigned int obd_fail_loc; unsigned int obd_dump_on_timeout; unsigned int obd_timeout = 100; /* seconds */ @@ -96,7 +67,7 @@ unsigned int ldlm_timeout = 20; /* seconds */ unsigned int obd_health_check_timeout = 120; /* seconds */ char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall */ -DECLARE_WAIT_QUEUE_HEAD(obd_race_waitq); +cfs_waitq_t obd_race_waitq; #ifdef __KERNEL__ unsigned int obd_print_fail_loc(void) @@ -111,7 +82,7 @@ void obd_set_fail_loc(unsigned int fl) } /* opening /dev/obd */ -static int obd_class_open(struct inode * inode, struct file * file) +static int obd_class_open(unsigned long flags, void *args) { ENTRY; @@ -120,7 +91,7 @@ static int obd_class_open(struct inode * inode, struct file * file) } /* closing /dev/obd */ -static int obd_class_release(struct inode * inode, struct file * file) +static int obd_class_release(unsigned long flags, void *args) { ENTRY; @@ -147,6 +118,7 @@ int class_resolve_dev_name(uint32_t len, char *name) int rc; int dev; + ENTRY; if (!len || !name) { CERROR("No name passed,!\n"); GOTO(out, rc = -EINVAL); @@ -179,14 +151,6 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) int err = 0, len = 0; ENTRY; -#ifdef __KERNEL__ - if (current->fsuid != 0) - RETURN(err = -EACCES); -#endif - - if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ - RETURN(err = -ENOTTY); - /* only for debugging */ if (cmd == LIBCFS_IOC_DEBUG_MASK) { debug_data = (struct libcfs_debug_ioctl_data*)arg; @@ -210,16 +174,19 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) CERROR("No config buffer passed!\n"); GOTO(out, err = -EINVAL); } - - err = lustre_cfg_sanity_check(data->ioc_pbuf1, - data->ioc_plen1); - if (err) - GOTO(out, err); - OBD_ALLOC(lcfg, data->ioc_plen1); err = copy_from_user(lcfg, data->ioc_pbuf1, data->ioc_plen1); - if (!err) - err = class_process_config(lcfg); + if (err) { + OBD_FREE(lcfg, data->ioc_plen1); + GOTO(out, err); + } + err = lustre_cfg_sanity_check(lcfg, data->ioc_plen1); + if (err) { + OBD_FREE(lcfg, data->ioc_plen1); + GOTO(out, err); + } + err = class_process_config(lcfg); + OBD_FREE(lcfg, data->ioc_plen1); GOTO(out, err); } @@ -238,7 +205,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) memcpy(data->ioc_bulk, BUILD_VERSION, strlen(BUILD_VERSION) + 1); - err = copy_to_user((void *)arg, data, len); + err = obd_ioctl_popdata((void *)arg, data, len); if (err) err = -EFAULT; GOTO(out, err); @@ -255,7 +222,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) if (dev < 0) GOTO(out, err = -EINVAL); - err = copy_to_user((void *)arg, data, sizeof(*data)); + err = obd_ioctl_popdata((void *)arg, data, sizeof(*data)); if (err) err = -EFAULT; GOTO(out, err); @@ -289,19 +256,55 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1, dev); - err = copy_to_user((void *)arg, data, sizeof(*data)); + err = obd_ioctl_popdata((void *)arg, data, sizeof(*data)); if (err) err = -EFAULT; GOTO(out, err); } - case OBD_IOC_CLOSE_UUID: { CDEBUG(D_IOCTL, "closing all connections to uuid %s (NOOP)\n", data->ioc_inlbuf1); GOTO(out, err = 0); } + case OBD_IOC_GETDEVICE: { + int index = data->ioc_count; + char *status, *str; + + if (!data->ioc_inlbuf1) { + CERROR("No buffer passed in ioctl\n"); + GOTO(out, err = -EINVAL); + } + if (data->ioc_inllen1 < 128) { + CERROR("ioctl buffer too small to hold version\n"); + GOTO(out, err = -EINVAL); + } + + if (index >= MAX_OBD_DEVICES) + GOTO(out, err = -ENOENT); + obd = &obd_dev[index]; + if (!obd->obd_type) + GOTO(out, err = -ENOENT); + + if (obd->obd_stopping) + status = "ST"; + else if (obd->obd_set_up) + status = "UP"; + else if (obd->obd_attached) + status = "AT"; + else + status = "--"; + str = (char *)data->ioc_bulk; + snprintf(str, len - sizeof(*data), "%3d %s %s %s %s %d", + (int)index, status, obd->obd_type->typ_name, + obd->obd_name, obd->obd_uuid.uuid, + atomic_read(&obd->obd_refcount)); + err = obd_ioctl_popdata((void *)arg, data, len); + + GOTO(out, err = 0); + } + } if (data->ioc_dev >= MAX_OBD_DEVICES) { @@ -331,7 +334,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) if (err) GOTO(out, err); - err = copy_to_user((void *)arg, data, len); + err = obd_ioctl_popdata((void *)arg, data, len); if (err) err = -EFAULT; GOTO(out, err); @@ -349,26 +352,21 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg) #define OBD_MINOR 241 #ifdef __KERNEL__ /* to control /dev/obd */ -static int obd_class_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static int obd_class_ioctl (struct cfs_psdev_file *pfile, unsigned long cmd, void *arg) { - return class_handle_ioctl(cmd, arg); + return class_handle_ioctl(cmd, (unsigned long)arg); } /* declare character device */ -static struct file_operations obd_psdev_fops = { - .owner = THIS_MODULE, - .ioctl = obd_class_ioctl, /* ioctl */ - .open = obd_class_open, /* open */ - .release = obd_class_release, /* release */ +struct cfs_psdev_ops obd_psdev_ops = { + /* .p_open = */ obd_class_open, /* open */ + /* .p_close = */ obd_class_release, /* release */ + /* .p_read = */ NULL, + /* .p_write = */ NULL, + /* .p_ioctl = */ obd_class_ioctl /* ioctl */ }; -/* modules setup */ -static struct miscdevice obd_psdev = { - .minor = OBD_MINOR, - .name = "obd", - .fops = &obd_psdev_fops, -}; +extern cfs_psdev_t obd_psdev; #else void *obd_psdev = NULL; #endif @@ -384,7 +382,6 @@ EXPORT_SYMBOL(obd_health_check_timeout); EXPORT_SYMBOL(obd_lustre_upcall); EXPORT_SYMBOL(ptlrpc_put_connection_superhack); -struct proc_dir_entry *proc_lustre_root; EXPORT_SYMBOL(proc_lustre_root); EXPORT_SYMBOL(class_register_type); @@ -434,183 +431,6 @@ EXPORT_SYMBOL(class_manual_cleanup); EXPORT_SYMBOL(mea_name2idx); EXPORT_SYMBOL(raw_name2idx); -#ifdef LPROCFS -int obd_proc_read_version(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - *eof = 1; - return snprintf(page, count, "%s\n", BUILD_VERSION); -} - -int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - *eof = 1; - return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION); -} - -int obd_proc_read_pinger(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - *eof = 1; - return snprintf(page, count, "%s\n", -#ifdef ENABLE_PINGER - "on" -#else - "off" -#endif - ); -} - -static int obd_proc_read_health(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int rc = 0, i; - *eof = 1; - - if (libcfs_catastrophe) - rc += snprintf(page + rc, count - rc, "LBUG\n"); - - spin_lock(&obd_dev_lock); - for (i = 0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd; - - obd = &obd_dev[i]; - if (obd->obd_type == NULL) - continue; - - if (obd->obd_stopping) - continue; - - class_incref(obd); - spin_unlock(&obd_dev_lock); - - if (obd_health_check(obd)) { - rc += snprintf(page + rc, count - rc, - "device %s reported unhealthy\n", - obd->obd_name); - } - class_decref(obd); - spin_lock(&obd_dev_lock); - } - spin_unlock(&obd_dev_lock); - - if (rc == 0) - return snprintf(page, count, "healthy\n"); - - rc += snprintf(page + rc, count - rc, "NOT HEALTHY\n"); - return rc; -} - -static int obd_proc_rd_health_timeout(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - *eof = 1; - return snprintf(page, count, "%d\n", obd_health_check_timeout); -} - -static int obd_proc_wr_health_timeout(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int val, rc; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc) - return rc; - - obd_health_check_timeout = val; - - return count; -} - -/* Root for /proc/fs/lustre */ -struct lprocfs_vars lprocfs_base[] = { - { "version", obd_proc_read_version, NULL, NULL }, - { "kernel_version", obd_proc_read_kernel_version, NULL, NULL }, - { "pinger", obd_proc_read_pinger, NULL, NULL }, - { "health_check", obd_proc_read_health, NULL, NULL }, - { "health_check_timeout", obd_proc_rd_health_timeout, - obd_proc_wr_health_timeout, NULL }, - { 0 } -}; -#else -#define lprocfs_base NULL -#endif /* LPROCFS */ - -#ifdef __KERNEL__ -static void *obd_device_list_seq_start(struct seq_file *p, loff_t*pos) -{ - if (*pos >= MAX_OBD_DEVICES) - return NULL; - return &obd_dev[*pos]; -} - -static void obd_device_list_seq_stop(struct seq_file *p, void *v) -{ -} - -static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - ++*pos; - if (*pos >= MAX_OBD_DEVICES) - return NULL; - return &obd_dev[*pos]; -} - -static int obd_device_list_seq_show(struct seq_file *p, void *v) -{ - struct obd_device *obd = (struct obd_device *)v; - int index = obd - &obd_dev[0]; - char *status; - - if (!obd->obd_type) - return 0; - if (obd->obd_stopping) - status = "ST"; - else if (obd->obd_set_up) - status = "UP"; - else if (obd->obd_attached) - status = "AT"; - else - status = "--"; - - return seq_printf(p, "%3d %s %s %s %s %d\n", - (int)index, status, obd->obd_type->typ_name, - obd->obd_name, obd->obd_uuid.uuid, - atomic_read(&obd->obd_refcount)); -} - -struct seq_operations obd_device_list_sops = { - .start = obd_device_list_seq_start, - .stop = obd_device_list_seq_stop, - .next = obd_device_list_seq_next, - .show = obd_device_list_seq_show, -}; - -static int obd_device_list_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *seq; - int rc = seq_open(file, &obd_device_list_sops); - - if (rc) - return rc; - - seq = file->private_data; - seq->private = dp->data; - - return 0; -} - -struct file_operations obd_device_list_fops = { - .owner = THIS_MODULE, - .open = obd_device_list_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - #define OBD_INIT_CHECK #ifdef OBD_INIT_CHECK int obd_init_checks(void) @@ -673,8 +493,8 @@ int obd_init_checks(void) CWARN("LPD64 wrong length! strlen(%s)=%d != 2\n", buf, len); ret = -EINVAL; } - if ((u64val & ~PAGE_MASK) >= PAGE_SIZE) { - CWARN("mask failed: u64val "LPU64" >= %lu\n", u64val,PAGE_SIZE); + if ((u64val & ~CFS_PAGE_MASK) >= CFS_PAGE_SIZE) { + CWARN("mask failed: u64val "LPU64" >= %lu\n", u64val,CFS_PAGE_SIZE); ret = -EINVAL; } @@ -684,21 +504,22 @@ int obd_init_checks(void) #define obd_init_checks() do {} while(0) #endif +extern spinlock_t obd_types_lock; +extern spinlock_t handle_lock; +extern int class_procfs_init(void); +extern int class_procfs_clean(void); + #ifdef __KERNEL__ static int __init init_obdclass(void) #else int init_obdclass(void) #endif { + int i, err; struct obd_device *obd; #ifdef __KERNEL__ - struct proc_dir_entry *entry; int lustre_register_fs(void); -#endif - int err; - int i; -#ifdef __KERNEL__ printk(KERN_INFO "Lustre: OBD class driver Build Version: " BUILD_VERSION", info@clusterfs.com\n"); #else @@ -706,6 +527,10 @@ int init_obdclass(void) BUILD_VERSION", info@clusterfs.com\n"); #endif + spin_lock_init(&obd_types_lock); + spin_lock_init(&handle_lock); + cfs_waitq_init(&obd_race_waitq); + err = obd_init_checks(); if (err == -EOVERFLOW) return err; @@ -718,7 +543,7 @@ int init_obdclass(void) spin_lock_init(&obd_dev_lock); INIT_LIST_HEAD(&obd_types); - err = misc_register(&obd_psdev); + err = cfs_psdev_register(&obd_psdev); if (err) { CERROR("cannot register %d err %d\n", OBD_MINOR, err); return err; @@ -731,28 +556,12 @@ int init_obdclass(void) err = obd_init_caches(); if (err) return err; - #ifdef __KERNEL__ - obd_sysctl_init(); - - proc_lustre_root = proc_mkdir("lustre", proc_root_fs); - if (!proc_lustre_root) { - printk(KERN_ERR - "LustreError: error registering /proc/fs/lustre\n"); - RETURN(-ENOMEM); - } - proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL); - entry = create_proc_entry("devices", 0444, proc_lustre_root); - if (entry == NULL) { - CERROR("error registering /proc/fs/lustre/devices\n"); - lprocfs_remove(proc_lustre_root); - RETURN(-ENOMEM); - } - entry->proc_fops = &obd_device_list_fops; - + err = class_procfs_init(); lustre_register_fs(); #endif - return 0; + + return err; } /* liblustre doesn't call cleanup_obdclass, apparently. we carry on in this @@ -766,7 +575,7 @@ static void cleanup_obdclass(void) lustre_unregister_fs(); - misc_deregister(&obd_psdev); + cfs_psdev_deregister(&obd_psdev); for (i = 0; i < MAX_OBD_DEVICES; i++) { struct obd_device *obd = &obd_dev[i]; if (obd->obd_type && obd->obd_set_up && @@ -779,32 +588,16 @@ static void cleanup_obdclass(void) obd_cleanup_caches(); obd_sysctl_clean(); - if (proc_lustre_root) { - lprocfs_remove(proc_lustre_root); - proc_lustre_root = NULL; - } + class_procfs_clean(); class_handle_cleanup(); class_exit_uuidlist(); EXIT; } - -/* Check that we're building against the appropriate version of the Lustre - * kernel patch */ -#include -#define LUSTRE_MIN_VERSION 37 -#define LUSTRE_MAX_VERSION 47 -#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION) -# error Cannot continue: Your Lustre kernel patch is older than the sources -#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION) -# error Cannot continue: Your Lustre sources are older than the kernel patch -#endif - MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION); MODULE_LICENSE("GPL"); -module_init(init_obdclass); -module_exit(cleanup_obdclass); +cfs_module(obdclass, "1.0.0", init_obdclass, cleanup_obdclass); #endif diff --git a/lustre/obdclass/darwin/Makefile.am b/lustre/obdclass/darwin/Makefile.am new file mode 100644 index 0000000..75ba623 --- /dev/null +++ b/lustre/obdclass/darwin/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST := \ + darwin-module.c \ + darwin-sysctl.c diff --git a/lustre/obdclass/darwin/darwin-module.c b/lustre/obdclass/darwin/darwin-module.c new file mode 100644 index 0000000..287d942 --- /dev/null +++ b/lustre/obdclass/darwin/darwin-module.c @@ -0,0 +1,181 @@ +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifndef OBD_MAX_IOCTL_BUFFER +#define OBD_MAX_IOCTL_BUFFER 8192 +#endif + +/* buffer MUST be at least the size of obd_ioctl_hdr */ +int obd_ioctl_getdata(char **buf, int *len, void *arg) +{ + struct obd_ioctl_hdr *hdr; + struct obd_ioctl_data *data; + int err = 0; + int offset = 0; + ENTRY; + + hdr = (struct obd_ioctl_hdr *)arg; + if (hdr->ioc_version != OBD_IOCTL_VERSION) { + CERROR("Version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len > OBD_MAX_IOCTL_BUFFER) { + CERROR("User buffer len %d exceeds %d max buffer\n", + hdr->ioc_len, OBD_MAX_IOCTL_BUFFER); + RETURN(-EINVAL); + } + + if (hdr->ioc_len < sizeof(struct obd_ioctl_data)) { + CERROR("OBD: user buffer too small for ioctl (%d)\n", hdr->ioc_len); + RETURN(-EINVAL); + } + + /* XXX allocate this more intelligently, using kmalloc when + * appropriate */ + OBD_VMALLOC(*buf, hdr->ioc_len); + if (*buf == NULL) { + CERROR("Cannot allocate control buffer of len %d\n", + hdr->ioc_len); + RETURN(-EINVAL); + } + *len = hdr->ioc_len; + data = (struct obd_ioctl_data *)*buf; + + bzero(data, hdr->ioc_len); + memcpy(data, (void *)arg, sizeof(struct obd_ioctl_data)); + if (data->ioc_inlbuf1) + err = copy_from_user(&data->ioc_bulk[0], (void *)data->ioc_inlbuf1, + hdr->ioc_len - ((void *)&data->ioc_bulk[0] - (void *)data)); + + if (obd_ioctl_is_invalid(data)) { + CERROR("ioctl not correctly formatted\n"); + return -EINVAL; + } + + if (data->ioc_inllen1) { + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + offset += size_round(data->ioc_inllen1); + } + + if (data->ioc_inllen2) { + data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset; + offset += size_round(data->ioc_inllen2); + } + + if (data->ioc_inllen3) { + data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset; + offset += size_round(data->ioc_inllen3); + } + + if (data->ioc_inllen4) { + data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; + } + + EXIT; + return 0; +} + +int obd_ioctl_popdata(void *arg, void *data, int len) +{ + /* + * Xnu ioctl copyout(uaddr, arg, sizeof(struct obd_ioctl_data)), + * we have to copyout data exceed sizeof(struct obd_ioctl_data) + * by ourself. + */ + if (len <= sizeof(struct obd_ioctl_data)) { + memcpy(arg, data, len); + return 0; + } else { + int err; + struct obd_ioctl_data *u = (struct obd_ioctl_data *)arg; + struct obd_ioctl_data *k = (struct obd_ioctl_data *)data; + err = copy_to_user((void *)u->ioc_inlbuf1, &k->ioc_bulk[0], + len -((void *)&k->ioc_bulk[0] -(void *)k)); + memcpy(arg, data, sizeof(struct obd_ioctl_data)); + return err; + } +} +/* + * cfs pseudo device + */ +extern struct cfs_psdev_ops obd_psdev_ops; + +static int +obd_class_open(dev_t dev, int flags, int devtype, struct proc *p) +{ + if (obd_psdev_ops.p_open != NULL) + return -obd_psdev_ops.p_open(0, NULL); + return EPERM; +} + +/* closing /dev/obd */ +static int +obd_class_release(dev_t dev, int flags, int mode, struct proc *p) +{ + if (obd_psdev_ops.p_close != NULL) + return -obd_psdev_ops.p_close(0, NULL); + return EPERM; +} + +static int +obd_class_ioctl(dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p) +{ + int err = 0; + ENTRY; + + if (!is_suser()) + RETURN (EPERM); + if (obd_psdev_ops.p_ioctl != NULL) + err = -obd_psdev_ops.p_ioctl(NULL, cmd, (void *)arg); + else + err = EPERM; + + RETURN(err); +} + +static struct cdevsw obd_psdevsw = { + obd_class_open, + obd_class_release, + NULL, + NULL, + obd_class_ioctl, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +}; + +cfs_psdev_t obd_psdev = { + -1, + NULL, + "obd", + &obd_psdevsw +}; + +int class_procfs_init(void) +{ + return 0; +} + +int class_procfs_clean(void) +{ + return 0; +} diff --git a/lustre/obdclass/darwin/darwin-sysctl.c b/lustre/obdclass/darwin/darwin-sysctl.c new file mode 100644 index 0000000..59b7e45 --- /dev/null +++ b/lustre/obdclass/darwin/darwin-sysctl.c @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_CLASS + +#include +#ifndef BUILD_VERSION +#define BUILD_VERSION "Unknown" +#endif +#ifndef LUSTRE_KERNEL_VERSION +#define LUSTRE_KERNEL_VERSION "Unknown Darwin version" +#endif + +cfs_sysctl_table_header_t *obd_table_header = NULL; + +int proc_fail_loc SYSCTL_HANDLER_ARGS; +int proc_obd_timeout SYSCTL_HANDLER_ARGS; +extern unsigned int obd_fail_loc; +extern unsigned int obd_dump_on_timeout; +extern unsigned int obd_timeout; +extern unsigned int ldlm_timeout; +extern char obd_lustre_upcall[128]; +extern unsigned int obd_sync_filter; +extern atomic_t obd_memory; + +int read_build_version SYSCTL_HANDLER_ARGS; +int read_lustre_kernel_version SYSCTL_HANDLER_ARGS; + +SYSCTL_NODE (, OID_AUTO, lustre, CTLFLAG_RW, + 0, "lustre sysctl top"); +SYSCTL_PROC(_lustre, OID_AUTO, fail_loc, + CTLTYPE_INT | CTLFLAG_RW , &obd_fail_loc, + 0, &proc_fail_loc, "I", "obd_fail_loc"); +SYSCTL_PROC(_lustre, OID_AUTO, timeout, + CTLTYPE_INT | CTLFLAG_RW , &obd_timeout, + 0, &proc_obd_timeout, "I", "obd_timeout"); +SYSCTL_PROC(_lustre, OID_AUTO, build_version, + CTLTYPE_STRING | CTLFLAG_RD , NULL, + 0, &read_build_version, "A", "lustre_build_version"); +SYSCTL_PROC(_lustre, OID_AUTO, lustre_kernel_version, + CTLTYPE_STRING | CTLFLAG_RD , NULL, + 0, &read_lustre_kernel_version, "A", "lustre_build_version"); +SYSCTL_INT(_lustre, OID_AUTO, dump_on_timeout, + CTLTYPE_INT | CTLFLAG_RW, &obd_dump_on_timeout, + 0, "lustre_dump_on_timeout"); +SYSCTL_STRING(_lustre, OID_AUTO, upcall, + CTLTYPE_STRING | CTLFLAG_RW, obd_lustre_upcall, + 128, "lustre_upcall"); +SYSCTL_INT(_lustre, OID_AUTO, memused, + CTLTYPE_INT | CTLFLAG_RW, (int *)&obd_memory.counter, + 0, "lustre_memory_used"); +SYSCTL_INT(_lustre, OID_AUTO, ldlm_timeout, + CTLTYPE_INT | CTLFLAG_RW, &ldlm_timeout, + 0, "ldlm_timeout"); + +static cfs_sysctl_table_t parent_table[] = { + &sysctl__lustre, + &sysctl__lustre_fail_loc, + &sysctl__lustre_timeout, + &sysctl__lustre_dump_on_timeout, + &sysctl__lustre_upcall, + &sysctl__lustre_memused, + &sysctl__lustre_filter_sync_on_commit, + &sysctl__lustre_ldlm_timeout, +}; + +extern cfs_waitq_t obd_race_waitq; + +int proc_fail_loc SYSCTL_HANDLER_ARGS +{ + int error = 0; + int old_fail_loc = obd_fail_loc; + + error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if (!error && req->newptr != USER_ADDR_NULL) { + if (old_fail_loc != obd_fail_loc) + cfs_waitq_signal(&obd_race_waitq); + } else if (req->newptr != USER_ADDR_NULL) { + /* Something was wrong with the write request */ + printf ("sysctl fail loc fault: %d.\n", error); + } else { + /* Read request */ + error = SYSCTL_OUT(req, &obd_fail_loc, sizeof obd_fail_loc); + } + return error; +} + +int proc_obd_timeout SYSCTL_HANDLER_ARGS +{ + int error = 0; + + error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if (!error && req->newptr != USER_ADDR_NULL) { + if (ldlm_timeout >= obd_timeout) + ldlm_timeout = max(obd_timeout / 3, 1U); + } else if (req->newptr != USER_ADDR_NULL) { + printf ("sysctl fail obd_timeout: %d.\n", error); + } else { + /* Read request */ + error = SYSCTL_OUT(req, &obd_timeout, sizeof obd_timeout); + } + return error; +} + +int read_build_version SYSCTL_HANDLER_ARGS +{ + int error = 0; + + error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if ( req->newptr != USER_ADDR_NULL) { + printf("sysctl read_build_version is read-only!\n"); + } else { + error = SYSCTL_OUT(req, BUILD_VERSION, strlen(BUILD_VERSION)); + } + return error; +} + +int read_lustre_kernel_version SYSCTL_HANDLER_ARGS +{ + int error = 0; + + error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if ( req->newptr != NULL) { + printf("sysctl lustre_kernel_version is read-only!\n"); + } else { + error = SYSCTL_OUT(req, LUSTRE_KERNEL_VERSION, strlen(LUSTRE_KERNEL_VERSION)); + } + return error; +} + +void obd_sysctl_init (void) +{ +#if 1 + if ( !obd_table_header ) + obd_table_header = cfs_register_sysctl_table(parent_table, 0); +#endif +} + +void obd_sysctl_clean (void) +{ +#if 1 + if ( obd_table_header ) + cfs_unregister_sysctl_table(obd_table_header); + obd_table_header = NULL; +#endif +} + diff --git a/lustre/obdclass/debug.c b/lustre/obdclass/debug.c index 90c57e1..763606b 100644 --- a/lustre/obdclass/debug.c +++ b/lustre/obdclass/debug.c @@ -33,10 +33,10 @@ # include #endif -#include -#include -#include -#include +#include +#include +#include +#include int dump_ioo(struct obd_ioobj *ioo) { @@ -50,7 +50,7 @@ int dump_lniobuf(struct niobuf_local *nb) { CERROR("niobuf_local: offset="LPD64", len=%d, page=%p, rc=%d\n", nb->offset, nb->len, nb->page, nb->rc); - CERROR("nb->page: index = %ld\n", nb->page ? nb->page->index : -1); + CERROR("nb->page: index = %ld\n", nb->page ? cfs_page_index(nb->page) : -1); return -EINVAL; } diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index a5a20e9..f5639d5 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -32,7 +32,7 @@ # define EXPORT_SYMTAB #endif -#include +#include #include void dt_txn_callback_add(struct dt_device *dev, struct dt_txn_callback *cb) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index f77ee69..4930dc1 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -26,24 +26,19 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -#include /* for request_module() */ -#include -#else +#ifndef __KERNEL__ #include #endif -#include -#include -#include -#include -#include +#include +#include +#include extern struct list_head obd_types; -static spinlock_t obd_types_lock = SPIN_LOCK_UNLOCKED; +spinlock_t obd_types_lock; -kmem_cache_t *obdo_cachep = NULL; +cfs_mem_cache_t *obdo_cachep = NULL; EXPORT_SYMBOL(obdo_cachep); -kmem_cache_t *import_cachep = NULL; +cfs_mem_cache_t *import_cachep = NULL; int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); @@ -411,15 +406,17 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next) void obd_cleanup_caches(void) { + int rc; + ENTRY; if (obdo_cachep) { - LASSERTF(kmem_cache_destroy(obdo_cachep) == 0, - "Cannot destory ll_obdo_cache\n"); + rc = cfs_mem_cache_destroy(obdo_cachep); + LASSERTF(rc == 0, "Cannot destory ll_obdo_cache\n"); obdo_cachep = NULL; } if (import_cachep) { - LASSERTF(kmem_cache_destroy(import_cachep) == 0, - "Cannot destory ll_import_cache\n"); + rc = cfs_mem_cache_destroy(import_cachep); + LASSERTF(rc == 0, "Cannot destory ll_import_cache\n"); import_cachep = NULL; } EXIT; @@ -430,15 +427,15 @@ int obd_init_caches(void) ENTRY; LASSERT(obdo_cachep == NULL); - obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo), - 0, 0, NULL, NULL); + obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo), + 0, 0); if (!obdo_cachep) GOTO(out, -ENOMEM); LASSERT(import_cachep == NULL); - import_cachep = kmem_cache_create("ll_import_cache", + import_cachep = cfs_mem_cache_create("ll_import_cache", sizeof(struct obd_import), - 0, 0, NULL, NULL); + 0, 0); if (!import_cachep) GOTO(out, -ENOMEM); @@ -549,11 +546,11 @@ struct obd_export *class_new_export(struct obd_device *obd, export->exp_conn_cnt = 0; atomic_set(&export->exp_refcount, 2); export->exp_obd = obd; - INIT_LIST_HEAD(&export->exp_outstanding_replies); + CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); /* XXX this should be in LDLM init */ - INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks); + CFS_INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks); - INIT_LIST_HEAD(&export->exp_handle.h_link); + CFS_INIT_LIST_HEAD(&export->exp_handle.h_link); class_handle_hash(&export->exp_handle, export_handle_addref); export->exp_last_request_time = CURRENT_SECONDS; spin_lock_init(&export->exp_lock); @@ -660,19 +657,19 @@ struct obd_import *class_new_import(struct obd_device *obd) if (imp == NULL) return NULL; - INIT_LIST_HEAD(&imp->imp_replay_list); - INIT_LIST_HEAD(&imp->imp_sending_list); - INIT_LIST_HEAD(&imp->imp_delayed_list); + CFS_INIT_LIST_HEAD(&imp->imp_replay_list); + CFS_INIT_LIST_HEAD(&imp->imp_sending_list); + CFS_INIT_LIST_HEAD(&imp->imp_delayed_list); spin_lock_init(&imp->imp_lock); imp->imp_state = LUSTRE_IMP_NEW; imp->imp_obd = class_incref(obd); - init_waitqueue_head(&imp->imp_recovery_waitq); + cfs_waitq_init(&imp->imp_recovery_waitq); atomic_set(&imp->imp_refcount, 2); atomic_set(&imp->imp_inflight, 0); atomic_set(&imp->imp_replay_inflight, 0); - INIT_LIST_HEAD(&imp->imp_conn_list); - INIT_LIST_HEAD(&imp->imp_handle.h_link); + CFS_INIT_LIST_HEAD(&imp->imp_conn_list); + CFS_INIT_LIST_HEAD(&imp->imp_handle.h_link); class_handle_hash(&imp->imp_handle, import_handle_addref); return imp; @@ -830,7 +827,7 @@ void class_disconnect_stale_exports(struct obd_device *obd) int cnt = 0; ENTRY; - INIT_LIST_HEAD(&work_list); + CFS_INIT_LIST_HEAD(&work_list); spin_lock(&obd->obd_dev_lock); list_for_each_safe(pos, n, &obd->obd_exports) { exp = list_entry(pos, struct obd_export, exp_obd_chain); @@ -862,8 +859,8 @@ int oig_init(struct obd_io_group **oig_out) oig->oig_rc = 0; oig->oig_pending = 0; atomic_set(&oig->oig_refcount, 1); - init_waitqueue_head(&oig->oig_waitq); - INIT_LIST_HEAD(&oig->oig_occ_list); + cfs_waitq_init(&oig->oig_waitq); + CFS_INIT_LIST_HEAD(&oig->oig_occ_list); *oig_out = oig; RETURN(0); @@ -899,7 +896,7 @@ void oig_complete_one(struct obd_io_group *oig, struct oig_callback_context *occ, int rc) { unsigned long flags; - wait_queue_head_t *wake = NULL; + cfs_waitq_t *wake = NULL; int old_rc; spin_lock_irqsave(&oig->oig_lock, flags); @@ -920,7 +917,7 @@ void oig_complete_one(struct obd_io_group *oig, "pending (racey)\n", oig, old_rc, oig->oig_rc, rc, oig->oig_pending); if (wake) - wake_up(wake); + cfs_waitq_signal(wake); oig_release(oig); } EXPORT_SYMBOL(oig_complete_one); diff --git a/lustre/obdclass/linux/Makefile.am b/lustre/obdclass/linux/Makefile.am new file mode 100644 index 0000000..bf95892 --- /dev/null +++ b/lustre/obdclass/linux/Makefile.am @@ -0,0 +1,4 @@ +EXTRA_DIST := \ + linux-module.c \ + linux-sysctl.c \ + linux-obdo.c diff --git a/lustre/obdclass/linux/linux-module.c b/lustre/obdclass/linux/linux-module.c new file mode 100644 index 0000000..517035c --- /dev/null +++ b/lustre/obdclass/linux/linux-module.c @@ -0,0 +1,441 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Object Devices Class Driver + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * These are the only exported functions, they provide some generic + * infrastructure for managing object devices + */ +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#ifdef __KERNEL__ +#include /* for CONFIG_PROC_FS */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else +# include +#endif + +#include +#include +#include +#include +#ifdef __KERNEL__ +#include +#include + +int proc_version; + +/* buffer MUST be at least the size of obd_ioctl_hdr */ +int obd_ioctl_getdata(char **buf, int *len, void *arg) +{ + struct obd_ioctl_hdr hdr; + struct obd_ioctl_data *data; + int err; + int offset = 0; + ENTRY; + + err = copy_from_user(&hdr, (void *)arg, sizeof(hdr)); + if ( err ) + RETURN(err); + + if (hdr.ioc_version != OBD_IOCTL_VERSION) { + CERROR("Version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) { + CERROR("User buffer len %d exceeds %d max buffer\n", + hdr.ioc_len, OBD_MAX_IOCTL_BUFFER); + RETURN(-EINVAL); + } + + if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) { + CERROR("User buffer too small for ioctl (%d)\n", hdr.ioc_len); + RETURN(-EINVAL); + } + + /* XXX allocate this more intelligently, using kmalloc when + * appropriate */ + OBD_VMALLOC(*buf, hdr.ioc_len); + if (*buf == NULL) { + CERROR("Cannot allocate control buffer of len %d\n", + hdr.ioc_len); + RETURN(-EINVAL); + } + *len = hdr.ioc_len; + data = (struct obd_ioctl_data *)*buf; + + err = copy_from_user(*buf, (void *)arg, hdr.ioc_len); + if ( err ) { + OBD_VFREE(*buf, hdr.ioc_len); + RETURN(err); + } + + if (obd_ioctl_is_invalid(data)) { + CERROR("ioctl not correctly formatted\n"); + OBD_VFREE(*buf, hdr.ioc_len); + RETURN(-EINVAL); + } + + if (data->ioc_inllen1) { + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + offset += size_round(data->ioc_inllen1); + } + + if (data->ioc_inllen2) { + data->ioc_inlbuf2 = &data->ioc_bulk[0] + offset; + offset += size_round(data->ioc_inllen2); + } + + if (data->ioc_inllen3) { + data->ioc_inlbuf3 = &data->ioc_bulk[0] + offset; + offset += size_round(data->ioc_inllen3); + } + + if (data->ioc_inllen4) { + data->ioc_inlbuf4 = &data->ioc_bulk[0] + offset; + } + + EXIT; + return 0; +} + +int obd_ioctl_popdata(void *arg, void *data, int len) +{ + int err; + + err = copy_to_user(arg, data, len); + if (err) + err = -EFAULT; + return err; +} + +EXPORT_SYMBOL(obd_ioctl_getdata); +EXPORT_SYMBOL(obd_ioctl_popdata); + +#define OBD_MINOR 241 +extern struct cfs_psdev_ops obd_psdev_ops; + +/* opening /dev/obd */ +static int obd_class_open(struct inode * inode, struct file * file) +{ + if (obd_psdev_ops.p_open != NULL) + return obd_psdev_ops.p_open(0, NULL); + return -EPERM; +} + +/* closing /dev/obd */ +static int obd_class_release(struct inode * inode, struct file * file) +{ + if (obd_psdev_ops.p_close != NULL) + return obd_psdev_ops.p_close(0, NULL); + return -EPERM; +} + +/* to control /dev/obd */ +static int obd_class_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + int err = 0; + ENTRY; + + if (current->fsuid != 0) + RETURN(err = -EACCES); + if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ + RETURN(err = -ENOTTY); + + if (obd_psdev_ops.p_ioctl != NULL) + err = obd_psdev_ops.p_ioctl(NULL, cmd, (void *)arg); + else + err = -EPERM; + + RETURN(err); +} + +/* declare character device */ +static struct file_operations obd_psdev_fops = { + .owner = THIS_MODULE, + .ioctl = obd_class_ioctl, /* ioctl */ + .open = obd_class_open, /* open */ + .release = obd_class_release, /* release */ +}; + +/* modules setup */ +cfs_psdev_t obd_psdev = { + .minor = OBD_MINOR, + .name = "obd_psdev", + .fops = &obd_psdev_fops, +}; + +#endif + +#ifdef LPROCFS +int obd_proc_read_version(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + *eof = 1; + return snprintf(page, count, "%s\n", BUILD_VERSION); +} + +int obd_proc_read_kernel_version(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + *eof = 1; + return snprintf(page, count, "%u\n", LUSTRE_KERNEL_VERSION); +} + +int obd_proc_read_pinger(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + *eof = 1; + return snprintf(page, count, "%s\n", +#ifdef ENABLE_PINGER + "on" +#else + "off" +#endif + ); +} + +static int obd_proc_read_health(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int rc = 0, i; + *eof = 1; + + if (libcfs_catastrophe) + rc += snprintf(page + rc, count - rc, "LBUG\n"); + + spin_lock(&obd_dev_lock); + for (i = 0; i < MAX_OBD_DEVICES; i++) { + struct obd_device *obd; + + obd = &obd_dev[i]; + if (obd->obd_type == NULL) + continue; + + class_incref(obd); + spin_unlock(&obd_dev_lock); + + if (obd_health_check(obd)) { + rc += snprintf(page + rc, count - rc, + "device %s reported unhealthy\n", + obd->obd_name); + } + class_decref(obd); + spin_lock(&obd_dev_lock); + } + spin_unlock(&obd_dev_lock); + + if (rc == 0) + return snprintf(page, count, "healthy\n"); + + rc += snprintf(page + rc, count - rc, "NOT HEALTHY\n"); + return rc; +} + +static int obd_proc_rd_health_timeout(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + *eof = 1; + return snprintf(page, count, "%d\n", obd_health_check_timeout); +} + +static int obd_proc_wr_health_timeout(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + obd_health_check_timeout = val; + + return count; +} + +/* Root for /proc/fs/lustre */ +struct proc_dir_entry *proc_lustre_root = NULL; + +struct lprocfs_vars lprocfs_base[] = { + { "version", obd_proc_read_version, NULL, NULL }, + { "kernel_version", obd_proc_read_kernel_version, NULL, NULL }, + { "pinger", obd_proc_read_pinger, NULL, NULL }, + { "health_check", obd_proc_read_health, NULL, NULL }, + { "health_check_timeout", obd_proc_rd_health_timeout, + obd_proc_wr_health_timeout, NULL }, + { 0 } +}; +#else +#define lprocfs_base NULL +#endif /* LPROCFS */ + +#ifdef __KERNEL__ +static void *obd_device_list_seq_start(struct seq_file *p, loff_t*pos) +{ + if (*pos >= MAX_OBD_DEVICES) + return NULL; + return &obd_dev[*pos]; +} + +static void obd_device_list_seq_stop(struct seq_file *p, void *v) +{ +} + +static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos) +{ + ++*pos; + if (*pos >= MAX_OBD_DEVICES) + return NULL; + return &obd_dev[*pos]; +} + +static int obd_device_list_seq_show(struct seq_file *p, void *v) +{ + struct obd_device *obd = (struct obd_device *)v; + int index = obd - &obd_dev[0]; + char *status; + + if (!obd->obd_type) + return 0; + if (obd->obd_stopping) + status = "ST"; + else if (obd->obd_set_up) + status = "UP"; + else if (obd->obd_attached) + status = "AT"; + else + status = "--"; + + return seq_printf(p, "%3d %s %s %s %s %d\n", + (int)index, status, obd->obd_type->typ_name, + obd->obd_name, obd->obd_uuid.uuid, + atomic_read(&obd->obd_refcount)); +} + +struct seq_operations obd_device_list_sops = { + .start = obd_device_list_seq_start, + .stop = obd_device_list_seq_stop, + .next = obd_device_list_seq_next, + .show = obd_device_list_seq_show, +}; + +static int obd_device_list_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc = seq_open(file, &obd_device_list_sops); + + if (rc) + return rc; + + seq = file->private_data; + seq->private = dp->data; + + return 0; +} + +struct file_operations obd_device_list_fops = { + .owner = THIS_MODULE, + .open = obd_device_list_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + +int class_procfs_init(void) +{ +#ifdef __KERNEL__ + struct proc_dir_entry *entry; + ENTRY; + + obd_sysctl_init(); + proc_lustre_root = proc_mkdir("lustre", proc_root_fs); + if (!proc_lustre_root) { + printk(KERN_ERR + "LustreError: error registering /proc/fs/lustre\n"); + RETURN(-ENOMEM); + } + proc_version = lprocfs_add_vars(proc_lustre_root, lprocfs_base, NULL); + entry = create_proc_entry("devices", 0444, proc_lustre_root); + if (entry == NULL) { + CERROR("error registering /proc/fs/lustre/devices\n"); + lprocfs_remove(proc_lustre_root); + RETURN(-ENOMEM); + } + entry->proc_fops = &obd_device_list_fops; +#else + ENTRY; +#endif + RETURN(0); +} + +#ifdef __KERNEL__ +int class_procfs_clean(void) +{ + ENTRY; + if (proc_lustre_root) { + lprocfs_remove(proc_lustre_root); + proc_lustre_root = NULL; + } + RETURN(0); +} + + +/* Check that we're building against the appropriate version of the Lustre + * kernel patch */ +#include +#define LUSTRE_MIN_VERSION 37 +#define LUSTRE_MAX_VERSION 47 +#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION) +# error Cannot continue: Your Lustre kernel patch is older than the sources +#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION) +# error Cannot continue: Your Lustre sources are older than the kernel patch +#endif +#endif diff --git a/lustre/obdclass/linux/linux-obdo.c b/lustre/obdclass/linux/linux-obdo.c new file mode 100644 index 0000000..b5db22d --- /dev/null +++ b/lustre/obdclass/linux/linux-obdo.c @@ -0,0 +1,268 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Object Devices Class Driver + * + * Copyright (C) 2001-2003 Cluster File Systems, Inc. + * + * This file is part of the Lustre file system, http://www.lustre.org + * Lustre is a trademark of Cluster File Systems, Inc. + * + * You may have signed or agreed to another license before downloading + * this software. If so, you are bound by the terms and conditions + * of that agreement, and the following does not apply to you. See the + * LICENSE file included with this distribution for more information. + * + * If you did not agree to a different license, then this copy of Lustre + * is open source software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In either case, Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * license text for more details. + * + * These are the only exported functions, they provide some generic + * infrastructure for managing object devices + */ + +#define DEBUG_SUBSYSTEM S_CLASS +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#ifndef __KERNEL__ +#include +#else +#include +#include +#include +#endif + +#ifdef __KERNEL__ +#include +#include /* for PAGE_CACHE_SIZE */ + +void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid) +{ + if (ia_valid & ATTR_ATIME) { + oa->o_atime = LTIME_S(attr->ia_atime); + oa->o_valid |= OBD_MD_FLATIME; + } + if (ia_valid & ATTR_MTIME) { + oa->o_mtime = LTIME_S(attr->ia_mtime); + oa->o_valid |= OBD_MD_FLMTIME; + } + if (ia_valid & ATTR_CTIME) { + oa->o_ctime = LTIME_S(attr->ia_ctime); + oa->o_valid |= OBD_MD_FLCTIME; + } + if (ia_valid & ATTR_SIZE) { + oa->o_size = attr->ia_size; + oa->o_valid |= OBD_MD_FLSIZE; + } + if (ia_valid & ATTR_MODE) { + oa->o_mode = attr->ia_mode; + oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE; + if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) + oa->o_mode &= ~S_ISGID; + } + if (ia_valid & ATTR_UID) { + oa->o_uid = attr->ia_uid; + oa->o_valid |= OBD_MD_FLUID; + } + if (ia_valid & ATTR_GID) { + oa->o_gid = attr->ia_gid; + oa->o_valid |= OBD_MD_FLGID; + } +} +EXPORT_SYMBOL(obdo_from_iattr); + +void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid) +{ + valid &= oa->o_valid; + + if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) + CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n", + oa->o_valid, oa->o_mtime, oa->o_ctime); + + attr->ia_valid = 0; + if (valid & OBD_MD_FLATIME) { + LTIME_S(attr->ia_atime) = oa->o_atime; + attr->ia_valid |= ATTR_ATIME; + } + if (valid & OBD_MD_FLMTIME) { + LTIME_S(attr->ia_mtime) = oa->o_mtime; + attr->ia_valid |= ATTR_MTIME; + } + if (valid & OBD_MD_FLCTIME) { + LTIME_S(attr->ia_ctime) = oa->o_ctime; + attr->ia_valid |= ATTR_CTIME; + } + if (valid & OBD_MD_FLSIZE) { + attr->ia_size = oa->o_size; + attr->ia_valid |= ATTR_SIZE; + } +#if 0 /* you shouldn't be able to change a file's type with setattr */ + if (valid & OBD_MD_FLTYPE) { + attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT); + attr->ia_valid |= ATTR_MODE; + } +#endif + if (valid & OBD_MD_FLMODE) { + attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT); + attr->ia_valid |= ATTR_MODE; + if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) + attr->ia_mode &= ~S_ISGID; + } + if (valid & OBD_MD_FLUID) { + attr->ia_uid = oa->o_uid; + attr->ia_valid |= ATTR_UID; + } + if (valid & OBD_MD_FLGID) { + attr->ia_gid = oa->o_gid; + attr->ia_valid |= ATTR_GID; + } +} +EXPORT_SYMBOL(iattr_from_obdo); + +/* WARNING: the file systems must take care not to tinker with + attributes they don't manage (such as blocks). */ +void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) +{ + obd_flag newvalid = 0; + + if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) + CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n", + valid, LTIME_S(src->i_mtime), + LTIME_S(src->i_ctime)); + + if (valid & OBD_MD_FLATIME) { + dst->o_atime = LTIME_S(src->i_atime); + newvalid |= OBD_MD_FLATIME; + } + if (valid & OBD_MD_FLMTIME) { + dst->o_mtime = LTIME_S(src->i_mtime); + newvalid |= OBD_MD_FLMTIME; + } + if (valid & OBD_MD_FLCTIME) { + dst->o_ctime = LTIME_S(src->i_ctime); + newvalid |= OBD_MD_FLCTIME; + } + if (valid & OBD_MD_FLSIZE) { + dst->o_size = src->i_size; + newvalid |= OBD_MD_FLSIZE; + } + if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */ + dst->o_blocks = src->i_blocks; + newvalid |= OBD_MD_FLBLOCKS; + } + if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */ + dst->o_blksize = src->i_blksize; + newvalid |= OBD_MD_FLBLKSZ; + } + if (valid & OBD_MD_FLTYPE) { + dst->o_mode = (dst->o_mode & S_IALLUGO)|(src->i_mode & S_IFMT); + newvalid |= OBD_MD_FLTYPE; + } + if (valid & OBD_MD_FLMODE) { + dst->o_mode = (dst->o_mode & S_IFMT)|(src->i_mode & S_IALLUGO); + newvalid |= OBD_MD_FLMODE; + } + if (valid & OBD_MD_FLUID) { + dst->o_uid = src->i_uid; + newvalid |= OBD_MD_FLUID; + } + if (valid & OBD_MD_FLGID) { + dst->o_gid = src->i_gid; + newvalid |= OBD_MD_FLGID; + } + if (valid & OBD_MD_FLFLAGS) { + dst->o_flags = src->i_flags; + newvalid |= OBD_MD_FLFLAGS; + } + if (valid & OBD_MD_FLGENER) { + dst->o_generation = src->i_generation; + newvalid |= OBD_MD_FLGENER; + } + if (valid & OBD_MD_FLFID) { + dst->o_fid = src->i_ino; + newvalid |= OBD_MD_FLFID; + } + + dst->o_valid |= newvalid; +} +EXPORT_SYMBOL(obdo_from_inode); + +void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) +{ + valid &= src->o_valid; + + if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) + CDEBUG(D_INODE, + "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n", + src->o_valid, LTIME_S(dst->i_mtime), + LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime); + + if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime)) + LTIME_S(dst->i_atime) = src->o_atime; + if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime)) + LTIME_S(dst->i_mtime) = src->o_mtime; + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) + LTIME_S(dst->i_ctime) = src->o_ctime; + if (valid & OBD_MD_FLSIZE) + dst->i_size = src->o_size; + /* optimum IO size */ + if (valid & OBD_MD_FLBLKSZ && src->o_blksize > dst->i_blksize) + dst->i_blksize = src->o_blksize; + if (dst->i_blksize < PAGE_CACHE_SIZE) + dst->i_blksize = PAGE_CACHE_SIZE; + /* allocation of space */ + if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks) + dst->i_blocks = src->o_blocks; +} +EXPORT_SYMBOL(obdo_refresh_inode); + +void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) +{ + valid &= src->o_valid; + + if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) + CDEBUG(D_INODE, + "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n", + src->o_valid, LTIME_S(dst->i_mtime), + LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime); + + if (valid & OBD_MD_FLATIME) + LTIME_S(dst->i_atime) = src->o_atime; + if (valid & OBD_MD_FLMTIME) + LTIME_S(dst->i_mtime) = src->o_mtime; + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) + LTIME_S(dst->i_ctime) = src->o_ctime; + if (valid & OBD_MD_FLSIZE) + dst->i_size = src->o_size; + if (valid & OBD_MD_FLBLOCKS) { /* allocation of space */ + dst->i_blocks = src->o_blocks; + if (dst->i_blocks < src->o_blocks) /* overflow */ + dst->i_blocks = -1; + + } + if (valid & OBD_MD_FLBLKSZ) + dst->i_blksize = src->o_blksize; + if (valid & OBD_MD_FLTYPE) + dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT); + if (valid & OBD_MD_FLMODE) + dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT); + if (valid & OBD_MD_FLUID) + dst->i_uid = src->o_uid; + if (valid & OBD_MD_FLGID) + dst->i_gid = src->o_gid; + if (valid & OBD_MD_FLFLAGS) + dst->i_flags = src->o_flags; + if (valid & OBD_MD_FLGENER) + dst->i_generation = src->o_generation; +} +EXPORT_SYMBOL(obdo_to_inode); +#endif + diff --git a/lustre/obdclass/sysctl.c b/lustre/obdclass/linux/linux-sysctl.c similarity index 99% rename from lustre/obdclass/sysctl.c rename to lustre/obdclass/linux/linux-sysctl.c index 73db087..169aecb 100644 --- a/lustre/obdclass/sysctl.c +++ b/lustre/obdclass/linux/linux-sysctl.c @@ -44,7 +44,7 @@ #define DEBUG_SUBSYSTEM S_CLASS -#include +#include struct ctl_table_header *obd_table_header = NULL; diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 18e8c00..2af0105 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -35,14 +35,12 @@ #define EXPORT_SYMTAB #endif -#ifdef __KERNEL__ -#include -#else +#ifndef __KERNEL__ #include #endif -#include -#include +#include +#include #include /* Allocate a new log or catalog handle */ @@ -170,10 +168,10 @@ int llog_init_handle(struct llog_handle *handle, int flags, out: if (flags & LLOG_F_IS_CAT) { - INIT_LIST_HEAD(&handle->u.chd.chd_head); + CFS_INIT_LIST_HEAD(&handle->u.chd.chd_head); llh->llh_size = sizeof(struct llog_logid_rec); } else if (flags & LLOG_F_IS_PLAIN) { - INIT_LIST_HEAD(&handle->u.phd.phd_entry); + CFS_INIT_LIST_HEAD(&handle->u.phd.phd_entry); } else { CERROR("Unknown flags: %#x (Expected %#x or %#x\n", flags, LLOG_F_IS_CAT, LLOG_F_IS_PLAIN); diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index 396c2ca..55039cc 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -35,14 +35,12 @@ #define EXPORT_SYMTAB #endif -#ifdef __KERNEL__ -#include -#else +#ifndef __KERNEL__ #include #endif -#include -#include +#include +#include #include /* Create a new log handle and add it to the open list. @@ -343,6 +341,7 @@ int llog_cat_process_cb(struct llog_handle *cat_llh, struct llog_rec_hdr *rec, struct llog_handle *llh; int rc; + ENTRY; if (rec->lrh_type != LLOG_LOGID_MAGIC) { CERROR("invalid record in catalog\n"); RETURN(-EINVAL); diff --git a/lustre/obdclass/llog_ioctl.c b/lustre/obdclass/llog_ioctl.c index ce3f0a1..9bdea74 100644 --- a/lustre/obdclass/llog_ioctl.c +++ b/lustre/obdclass/llog_ioctl.c @@ -28,9 +28,8 @@ #define EXPORT_SYMTAB #endif -#include -#include -#include +#include +#include #include #include "llog_internal.h" @@ -38,6 +37,7 @@ static int str2logid(struct llog_logid *logid, char *str, int len) { char *start, *end, *endp; + ENTRY; start = str; if (*start != '#') RETURN(-EINVAL); @@ -85,6 +85,7 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, char *endp; int cur_index, rc = 0; + ENTRY; cur_index = rec->lrh_index; if (ioc_data && (ioc_data->ioc_inllen1)) { @@ -128,7 +129,6 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, case MDS_UNLINK_REC: case MDS_SETATTR_REC: case OBD_CFG_REC: - case PTL_CFG_REC: /* obsolete */ case LLOG_HDR_MAGIC: { l = snprintf(out, remains, "[index]: %05d [type]: " "%02x [len]: %04d ok\n", @@ -169,6 +169,7 @@ static int llog_print_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, char *endp; int cur_index; + ENTRY; if (ioc_data->ioc_inllen1) { l = 0; remains = ioc_data->ioc_inllen4 + @@ -222,6 +223,7 @@ static int llog_remove_log(struct llog_handle *cat, struct llog_logid *logid) struct llog_handle *log; int rc, index = 0; + ENTRY; down_write(&cat->lgh_lock); rc = llog_cat_id2handle(cat, &log, logid); if (rc) { @@ -252,8 +254,9 @@ static int llog_delete_cb(struct llog_handle *handle, struct llog_rec_hdr *rec, struct llog_logid_rec *lir = (struct llog_logid_rec*)rec; int rc; + ENTRY; if (rec->lrh_type != LLOG_LOGID_MAGIC) - return (-EINVAL); + RETURN (-EINVAL); rc = llog_remove_log(handle, &lir->lid_id); RETURN(rc); @@ -266,6 +269,7 @@ int llog_ioctl(struct llog_ctxt *ctxt, int cmd, struct obd_ioctl_data *data) int err = 0; struct llog_handle *handle = NULL; + ENTRY; if (*data->ioc_inlbuf1 == '#') { err = str2logid(&logid, data->ioc_inlbuf1, data->ioc_inllen1); if (err) @@ -406,6 +410,7 @@ int llog_catalog_list(struct obd_device *obd, int count, char *out; int l, remains, rc = 0; + ENTRY; size = sizeof(*idarray) * count; OBD_ALLOC(idarray, size); diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index f9beaa9..2eedc32 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -35,23 +35,21 @@ #define EXPORT_SYMTAB #endif -#ifdef __KERNEL__ -#include -#else +#ifndef __KERNEL__ #include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include "llog_internal.h" -#ifdef __KERNEL__ +#if defined(__KERNEL__) && defined(LLOG_LVFS) static int llog_lvfs_pad(struct obd_device *obd, struct l_file *file, int len, int index) diff --git a/lustre/obdclass/llog_obd.c b/lustre/obdclass/llog_obd.c index 23d84b4..a1829bc 100644 --- a/lustre/obdclass/llog_obd.c +++ b/lustre/obdclass/llog_obd.c @@ -28,14 +28,12 @@ #define EXPORT_SYMTAB #endif -#ifdef __KERNEL__ -#include -#else +#ifndef __KERNEL__ #include #endif -#include -#include +#include +#include #include #include "llog_internal.h" diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index 87f2883..fd138af 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -32,7 +32,7 @@ #include #endif -#include +#include static void print_llogd_body(struct llogd_body *d) { diff --git a/lustre/obdclass/llog_test.c b/lustre/obdclass/llog_test.c index 7f25981..b214d1d 100644 --- a/lustre/obdclass/llog_test.c +++ b/lustre/obdclass/llog_test.c @@ -33,9 +33,8 @@ #include #include -#include -#include -#include /* for LUSTRE_MDC_NAME */ +#include +#include static int llog_test_rand; static struct obd_uuid uuid = { .uuid = "test_uuid" }; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 2951d48..0c3aeb8 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -28,23 +28,13 @@ #endif #define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -# include -# include -# include -# include -# include -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include -# endif -# include -#else /* __KERNEL__ */ +#ifndef __KERNEL__ # include #endif -#include -#include -#include +#include +#include +#include #if defined(LPROCFS) @@ -371,6 +361,8 @@ static const char *obd_connect_names[] = { "initial_transno", "inode_bit_locks", "join_file", + "", + "no_oh_for_devices", NULL }; @@ -662,7 +654,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol); LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info); + LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info_async); LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach); LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach); LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup); @@ -727,7 +719,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) for (i = num_private_stats; i < num_stats; i++) { /* If this LBUGs, it is likely that an obd * operation was added to struct obd_ops in - * , and that the corresponding line item + * , and that the corresponding line item * LPROCFS_OBD_OP_INIT(.., .., opname) * is missing from the list above. */ if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) { diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 23896b1..fae1341 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -34,10 +34,9 @@ #include #include -#include -#include - -#include +#include +#include +#include #include static void lu_object_free(struct lu_context *ctx, struct lu_object *o); diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c index ef7639d..21d6f50 100644 --- a/lustre/obdclass/lustre_handles.c +++ b/lustre/obdclass/lustre_handles.c @@ -24,17 +24,14 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -# include -# include -#else +#ifndef __KERNEL__ # include #endif -#include -#include +#include +#include -static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED; +spinlock_t handle_lock; static __u64 handle_base; #define HANDLE_INCR 7 static struct list_head *handle_hash = NULL; @@ -146,7 +143,7 @@ int class_handle_init(void) for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash; bucket--) - INIT_LIST_HEAD(bucket); + CFS_INIT_LIST_HEAD(bucket); get_random_bytes(&handle_base, sizeof(handle_base)); LASSERT(handle_base != 0ULL); diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c index 8a601c1..be6efef 100644 --- a/lustre/obdclass/lustre_peer.c +++ b/lustre/obdclass/lustre_peer.c @@ -25,20 +25,16 @@ #define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -# include -# include -# include -#else +#ifndef __KERNEL__ # include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include struct uuid_nid_data { struct list_head un_list; @@ -52,7 +48,7 @@ static spinlock_t g_uuid_lock; void class_init_uuidlist(void) { - INIT_LIST_HEAD(&g_uuid_list); + CFS_INIT_LIST_HEAD(&g_uuid_list); spin_lock_init(&g_uuid_lock); } @@ -128,7 +124,7 @@ int class_del_uuid(const char *uuid) struct list_head *n; struct uuid_nid_data *data; - INIT_LIST_HEAD (&deathrow); + CFS_INIT_LIST_HEAD (&deathrow); spin_lock (&g_uuid_lock); diff --git a/lustre/obdclass/mea.c b/lustre/obdclass/mea.c index 1042f09..2de1de6 100644 --- a/lustre/obdclass/mea.c +++ b/lustre/obdclass/mea.c @@ -23,16 +23,16 @@ #ifdef __KERNEL__ #include /* for request_module() */ #include -#include +#include #include #include #include #else #include -#include -#include +#include +#include #endif -#include +#include static int mea_last_char_hash(int count, char *name, int namelen) { diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 630079a..c1665cb 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -27,19 +27,14 @@ #define DEBUG_SUBSYSTEM S_CLASS #ifdef __KERNEL__ -#include /* for request_module() */ -#include -#include -#include -#include -#include +#include #else #include -#include -#include +#include +#include #endif -#include -#include +#include +#include #include @@ -54,6 +49,7 @@ int class_attach(struct lustre_cfg *lcfg) struct obd_device *obd = NULL; char *typename, *name, *namecopy, *uuid; int rc, len, cleanup_phase = 0; + ENTRY; if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) { CERROR("No type passed!\n"); @@ -119,22 +115,22 @@ int class_attach(struct lustre_cfg *lcfg) } cleanup_phase = 3; /* class_release_dev */ - INIT_LIST_HEAD(&obd->obd_exports); - INIT_LIST_HEAD(&obd->obd_exports_timed); + CFS_INIT_LIST_HEAD(&obd->obd_exports); + CFS_INIT_LIST_HEAD(&obd->obd_exports_timed); spin_lock_init(&obd->obd_dev_lock); spin_lock_init(&obd->obd_osfs_lock); - obd->obd_osfs_age = jiffies - 1000 * HZ; + obd->obd_osfs_age = cfs_time_shift(-1000); /* XXX belongs in setup not attach */ /* recovery data */ - init_timer(&obd->obd_recovery_timer); + cfs_init_timer(&obd->obd_recovery_timer); spin_lock_init(&obd->obd_processing_task_lock); - init_waitqueue_head(&obd->obd_next_transno_waitq); - INIT_LIST_HEAD(&obd->obd_recovery_queue); - INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); + cfs_waitq_init(&obd->obd_next_transno_waitq); + CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue); + CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); spin_lock_init(&obd->obd_uncommitted_replies_lock); - INIT_LIST_HEAD(&obd->obd_uncommitted_replies); + CFS_INIT_LIST_HEAD(&obd->obd_uncommitted_replies); len = strlen(uuid); if (len >= sizeof(obd->obd_uuid)) { @@ -513,12 +509,13 @@ int class_del_conn(struct obd_device *obd, struct lustre_cfg *lcfg) RETURN(rc); } -static LIST_HEAD(lustre_profile_list); +CFS_LIST_HEAD(lustre_profile_list); struct lustre_profile *class_get_profile(const char * prof) { struct lustre_profile *lprof; + ENTRY; list_for_each_entry(lprof, &lustre_profile_list, lp_list) { if (!strcmp(lprof->lp_profile, prof)) { RETURN(lprof); @@ -533,10 +530,11 @@ int class_add_profile(int proflen, char *prof, int osclen, char *osc, struct lustre_profile *lprof; int err = 0; + ENTRY; OBD_ALLOC(lprof, sizeof(*lprof)); if (lprof == NULL) RETURN(-ENOMEM); - INIT_LIST_HEAD(&lprof->lp_list); + CFS_INIT_LIST_HEAD(&lprof->lp_list); LASSERT(proflen == (strlen(prof) + 1)); OBD_ALLOC(lprof->lp_profile, proflen); @@ -568,7 +566,7 @@ out: OBD_FREE(lprof->lp_osc, osclen); if (lprof->lp_profile) OBD_FREE(lprof->lp_profile, proflen); - OBD_FREE(lprof, sizeof(*lprof)); + OBD_FREE(lprof, sizeof(*lprof)); RETURN(err); } @@ -846,10 +844,6 @@ static int class_config_llog_handler(struct llog_handle * handle, OBD_FREE(inst_name, inst_len); break; } - case PTL_CFG_REC: { - CWARN("Ignoring obsolete portals config\n"); - break; - } default: CERROR("Unknown llog record type %#x encountered\n", rec->lrh_type); @@ -887,8 +881,8 @@ int class_config_parse_llog(struct llog_ctxt *ctxt, char *name, rc = llog_process(llh, class_config_llog_handler, cfg, &cd); - // FIXME remove warning - CDEBUG(D_CONFIG|D_WARNING, "Processed log %s gen %d-%d (%d)\n", name, + /* FIXME remove warning */ + CDEBUG(D_CONFIG|D_WARNING, "Processed log %s gen %d-%d (rc=%d)\n", name, cd.first_idx + 1, cd.last_idx, rc); if (cfg) cfg->cfg_last_idx = cd.last_idx; @@ -952,8 +946,6 @@ int class_config_dump_handler(struct llog_handle * handle, } } LCONSOLE(D_WARNING, " %s\n", outstr); - } else if (rec->lrh_type == PTL_CFG_REC) { - LCONSOLE(D_WARNING, "Obsolete pcfg command\n"); } else { LCONSOLE(D_WARNING, "unhandled lrh_type: %#x\n", rec->lrh_type); rc = -EINVAL; diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 877f4f3..beb700c3 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -29,17 +29,17 @@ #define PRINT_CMD LCONSOLE #define PRINT_MASK D_SUPER -#include -#include -#include -#include +#include +#include +#include +#include #include -#include -#include -#include -#include -#include - +#include +#include +#include +#include +#include + static int (*client_fill_super)(struct super_block *sb) = NULL; /*********** string parsing utils *********/ @@ -463,7 +463,9 @@ int lustre_process_log(struct super_block *sb, char *logname, if (rc) LCONSOLE_ERROR("%s: The configuration '%s' could not be read " - "(%d), mount will fail.\n", + "from the MGS (%d). This may be the result of " + "communication errors between this node and " + "the MGS, or the MGS may not be running.\n", mgc->obd_name, logname, rc); class_obd_list(); @@ -641,10 +643,10 @@ static int lustre_start_mgc(struct super_block *sb) if at all possible. */ recov_bk++; CDEBUG(D_MOUNT, "Set MGS reconnect %d\n", recov_bk); - rc = obd_set_info(obd->obd_self_export, - strlen(KEY_INIT_RECOV_BACKUP), - KEY_INIT_RECOV_BACKUP, - sizeof(recov_bk), &recov_bk); + rc = obd_set_info_async(obd->obd_self_export, + strlen(KEY_INIT_RECOV_BACKUP), + KEY_INIT_RECOV_BACKUP, + sizeof(recov_bk), &recov_bk, NULL); GOTO(out, rc = 0); } @@ -735,10 +737,11 @@ static int lustre_start_mgc(struct super_block *sb) /* Try all connections, but only once. */ recov_bk = 1; - rc = obd_set_info(obd->obd_self_export, - strlen(KEY_INIT_RECOV_BACKUP), KEY_INIT_RECOV_BACKUP, - sizeof(recov_bk), &recov_bk); - if (rc) + rc = obd_set_info_async(obd->obd_self_export, + strlen(KEY_INIT_RECOV_BACKUP), + KEY_INIT_RECOV_BACKUP, + sizeof(recov_bk), &recov_bk, NULL); + if (rc) /* nonfatal */ CERROR("can't set %s %d\n", KEY_INIT_RECOV_BACKUP, rc); @@ -827,9 +830,9 @@ static int server_mgc_set_fs(struct obd_device *mgc, struct super_block *sb) CDEBUG(D_MOUNT, "Set mgc disk for %s\n", lsi->lsi_lmd->lmd_dev); /* cl_mgc_sem in mgc insures we sleep if the mgc_fs is busy */ - rc = obd_set_info(mgc->obd_self_export, - strlen("set_fs"), "set_fs", - sizeof(*sb), sb); + rc = obd_set_info_async(mgc->obd_self_export, + strlen("set_fs"), "set_fs", + sizeof(*sb), sb, NULL); if (rc) { CERROR("can't set_fs %d\n", rc); } @@ -843,9 +846,10 @@ static int server_mgc_clear_fs(struct obd_device *mgc) ENTRY; CDEBUG(D_MOUNT, "Unassign mgc disk\n"); - - rc = obd_set_info(mgc->obd_self_export, - strlen("clear_fs"), "clear_fs", 0, NULL); + + rc = obd_set_info_async(mgc->obd_self_export, + strlen("clear_fs"), "clear_fs", + 0, NULL, NULL); RETURN(rc); } @@ -964,9 +968,9 @@ int server_register_target(struct super_block *sb) /* Register the target */ /* FIXME use mdc_process_config instead */ - rc = obd_set_info(mgc->u.cli.cl_mgc_mgsexp, - strlen("register_target"), "register_target", - sizeof(*mti), mti); + rc = obd_set_info_async(mgc->u.cli.cl_mgc_mgsexp, + strlen("register_target"), "register_target", + sizeof(*mti), mti, NULL); if (rc) { CERROR("registration with the MGS failed (%d)\n", rc); GOTO(out, rc); @@ -1358,7 +1362,7 @@ static void server_put_super(struct super_block *sb) is right. */ server_stop_servers(lddflags, lsiflags); - CDEBUG(D_MOUNT|D_WARNING, "server umount %s done\n", tmpname); + LCONSOLE_WARN("server umount %s complete\n", tmpname); OBD_FREE(tmpname, tmpname_sz); EXIT; } @@ -1831,8 +1835,7 @@ out: CERROR("Unable to mount %s\n", s2lsi(sb) ? lmd->lmd_dev : ""); } else { - CDEBUG(D_MOUNT|D_WARNING, "Successfully mounted %s\n", - lmd->lmd_dev); + LCONSOLE_WARN("mount %s complete\n", lmd->lmd_dev); } RETURN(rc); } diff --git a/lustre/obdclass/obdo.c b/lustre/obdclass/obdo.c index 97812a9..94e70bb 100644 --- a/lustre/obdclass/obdo.c +++ b/lustre/obdclass/obdo.c @@ -35,239 +35,8 @@ #ifndef __KERNEL__ #include #else -#include -#include -#include -#endif - -#ifdef __KERNEL__ -#include -#include /* for PAGE_CACHE_SIZE */ - -void obdo_from_iattr(struct obdo *oa, struct iattr *attr, unsigned int ia_valid) -{ - if (ia_valid & ATTR_ATIME) { - oa->o_atime = LTIME_S(attr->ia_atime); - oa->o_valid |= OBD_MD_FLATIME; - } - if (ia_valid & ATTR_MTIME) { - oa->o_mtime = LTIME_S(attr->ia_mtime); - oa->o_valid |= OBD_MD_FLMTIME; - } - if (ia_valid & ATTR_CTIME) { - oa->o_ctime = LTIME_S(attr->ia_ctime); - oa->o_valid |= OBD_MD_FLCTIME; - } - if (ia_valid & ATTR_SIZE) { - oa->o_size = attr->ia_size; - oa->o_valid |= OBD_MD_FLSIZE; - } - if (ia_valid & ATTR_MODE) { - oa->o_mode = attr->ia_mode; - oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - oa->o_mode &= ~S_ISGID; - } - if (ia_valid & ATTR_UID) { - oa->o_uid = attr->ia_uid; - oa->o_valid |= OBD_MD_FLUID; - } - if (ia_valid & ATTR_GID) { - oa->o_gid = attr->ia_gid; - oa->o_valid |= OBD_MD_FLGID; - } -} -EXPORT_SYMBOL(obdo_from_iattr); - -void iattr_from_obdo(struct iattr *attr, struct obdo *oa, obd_flag valid) -{ - valid &= oa->o_valid; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid "LPX64", new time "LPU64"/"LPU64"\n", - oa->o_valid, oa->o_mtime, oa->o_ctime); - - attr->ia_valid = 0; - if (valid & OBD_MD_FLATIME) { - LTIME_S(attr->ia_atime) = oa->o_atime; - attr->ia_valid |= ATTR_ATIME; - } - if (valid & OBD_MD_FLMTIME) { - LTIME_S(attr->ia_mtime) = oa->o_mtime; - attr->ia_valid |= ATTR_MTIME; - } - if (valid & OBD_MD_FLCTIME) { - LTIME_S(attr->ia_ctime) = oa->o_ctime; - attr->ia_valid |= ATTR_CTIME; - } - if (valid & OBD_MD_FLSIZE) { - attr->ia_size = oa->o_size; - attr->ia_valid |= ATTR_SIZE; - } -#if 0 /* you shouldn't be able to change a file's type with setattr */ - if (valid & OBD_MD_FLTYPE) { - attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT); - attr->ia_valid |= ATTR_MODE; - } -#endif - if (valid & OBD_MD_FLMODE) { - attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT); - attr->ia_valid |= ATTR_MODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - attr->ia_mode &= ~S_ISGID; - } - if (valid & OBD_MD_FLUID) { - attr->ia_uid = oa->o_uid; - attr->ia_valid |= ATTR_UID; - } - if (valid & OBD_MD_FLGID) { - attr->ia_gid = oa->o_gid; - attr->ia_valid |= ATTR_GID; - } - - if (valid & OBD_MD_FLFLAGS) { - attr->ia_attr_flags = oa->o_flags; - attr->ia_valid |= ATTR_ATTR_FLAG; - } -} -EXPORT_SYMBOL(iattr_from_obdo); - -/* WARNING: the file systems must take care not to tinker with - attributes they don't manage (such as blocks). */ -void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) -{ - obd_flag newvalid = 0; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n", - valid, LTIME_S(src->i_mtime), - LTIME_S(src->i_ctime)); - - if (valid & OBD_MD_FLATIME) { - dst->o_atime = LTIME_S(src->i_atime); - newvalid |= OBD_MD_FLATIME; - } - if (valid & OBD_MD_FLMTIME) { - dst->o_mtime = LTIME_S(src->i_mtime); - newvalid |= OBD_MD_FLMTIME; - } - if (valid & OBD_MD_FLCTIME) { - dst->o_ctime = LTIME_S(src->i_ctime); - newvalid |= OBD_MD_FLCTIME; - } - if (valid & OBD_MD_FLSIZE) { - dst->o_size = src->i_size; - newvalid |= OBD_MD_FLSIZE; - } - if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */ - dst->o_blocks = src->i_blocks; - newvalid |= OBD_MD_FLBLOCKS; - } - if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */ - dst->o_blksize = src->i_blksize; - newvalid |= OBD_MD_FLBLKSZ; - } - if (valid & OBD_MD_FLTYPE) { - dst->o_mode = (dst->o_mode & S_IALLUGO)|(src->i_mode & S_IFMT); - newvalid |= OBD_MD_FLTYPE; - } - if (valid & OBD_MD_FLMODE) { - dst->o_mode = (dst->o_mode & S_IFMT)|(src->i_mode & S_IALLUGO); - newvalid |= OBD_MD_FLMODE; - } - if (valid & OBD_MD_FLUID) { - dst->o_uid = src->i_uid; - newvalid |= OBD_MD_FLUID; - } - if (valid & OBD_MD_FLGID) { - dst->o_gid = src->i_gid; - newvalid |= OBD_MD_FLGID; - } - if (valid & OBD_MD_FLFLAGS) { - dst->o_flags = src->i_flags; - newvalid |= OBD_MD_FLFLAGS; - } - if (valid & OBD_MD_FLGENER) { - dst->o_generation = src->i_generation; - newvalid |= OBD_MD_FLGENER; - } - if (valid & OBD_MD_FLFID) { - dst->o_fid = src->i_ino; - newvalid |= OBD_MD_FLFID; - } - - dst->o_valid |= newvalid; -} -EXPORT_SYMBOL(obdo_from_inode); - -void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) -{ - valid &= src->o_valid; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, - "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n", - src->o_valid, LTIME_S(dst->i_mtime), - LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime); - - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime)) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime)) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - dst->i_size = src->o_size; - /* optimum IO size */ - if (valid & OBD_MD_FLBLKSZ && src->o_blksize > dst->i_blksize) - dst->i_blksize = src->o_blksize; - if (dst->i_blksize < PAGE_CACHE_SIZE) - dst->i_blksize = PAGE_CACHE_SIZE; - /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks) - dst->i_blocks = src->o_blocks; -} -EXPORT_SYMBOL(obdo_refresh_inode); - -void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) -{ - valid &= src->o_valid; - - if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, - "valid "LPX64", cur time %lu/%lu, new "LPU64"/"LPU64"\n", - src->o_valid, LTIME_S(dst->i_mtime), - LTIME_S(dst->i_ctime), src->o_mtime, src->o_ctime); - - if (valid & OBD_MD_FLATIME) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - dst->i_size = src->o_size; - if (valid & OBD_MD_FLBLOCKS) { /* allocation of space */ - dst->i_blocks = src->o_blocks; - if (dst->i_blocks < src->o_blocks) /* overflow */ - dst->i_blocks = -1; - } - if (valid & OBD_MD_FLBLKSZ) - dst->i_blksize = src->o_blksize; - if (valid & OBD_MD_FLTYPE) - dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->i_uid = src->o_uid; - if (valid & OBD_MD_FLGID) - dst->i_gid = src->o_gid; - if (valid & OBD_MD_FLFLAGS) - dst->i_flags = src->o_flags; - if (valid & OBD_MD_FLGENER) - dst->i_generation = src->o_generation; -} -EXPORT_SYMBOL(obdo_to_inode); +#include +#include #endif void obdo_cpy_md(struct obdo *dst, struct obdo *src, obd_flag valid) diff --git a/lustre/obdclass/prng.c b/lustre/obdclass/prng.c index 909e311..b3c2a75 100644 --- a/lustre/obdclass/prng.c +++ b/lustre/obdclass/prng.c @@ -13,11 +13,10 @@ # define EXPORT_SYMTAB #endif -#ifdef __KERNEL__ -#include -#else +#ifndef __KERNEL__ #include #endif +#include /* From: George Marsaglia diff --git a/lustre/obdclass/statfs_pack.c b/lustre/obdclass/statfs_pack.c index 616bdfa..8e20f85 100644 --- a/lustre/obdclass/statfs_pack.c +++ b/lustre/obdclass/statfs_pack.c @@ -33,17 +33,12 @@ #endif #ifndef __KERNEL__ #include -#else -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#endif #endif -#include -#include -#include -#include +#include +#include +#include +#include void statfs_pack(struct obd_statfs *osfs, struct kstatfs *sfs) { diff --git a/lustre/obdclass/uuid.c b/lustre/obdclass/uuid.c index f9235dd..09302bd 100644 --- a/lustre/obdclass/uuid.c +++ b/lustre/obdclass/uuid.c @@ -12,19 +12,12 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -# include -# include -# include -# include -#else +#ifndef __KERNEL__ # include #endif -#include -#include -#include /* for LUSTRE_OST_NAME */ -#include /* for LUSTRE_MDC_NAME */ +#include +#include struct uuid { __u32 time_low; diff --git a/lustre/obdecho/Info.plist b/lustre/obdecho/Info.plist new file mode 100644 index 0000000..19a6096 --- /dev/null +++ b/lustre/obdecho/Info.plist @@ -0,0 +1,45 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + obdecho + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.obdecho + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kpi.bsd + 8.0.0b1 + com.apple.kpi.libkern + 8.0.0b1 + com.apple.kpi.mach + 8.0.0b1 + com.apple.kpi.unsupported + 8.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + com.clusterfs.lustre.lvfs + 1.0.0 + com.clusterfs.lustre.obdclass + 1.0.0 + com.clusterfs.lustre.ptlrpc + 1.0.0 + com.clusterfs.lustre.osc + 1.0.0 + + + diff --git a/lustre/obdecho/autoMakefile.am b/lustre/obdecho/autoMakefile.am index 834b082..d08aa57 100644 --- a/lustre/obdecho/autoMakefile.am +++ b/lustre/obdecho/autoMakefile.am @@ -11,8 +11,30 @@ libobdecho_a_CFLAGS = $(LLCFLAGS) endif if MODULES +if LINUX modulefs_DATA = obdecho$(KMODEXT) +endif + +if DARWIN +macos_PROGRAMS := obdecho +obdecho_SOURCES := \ + lproc_echo.c \ + echo.c \ + echo_client.c + +obdecho_CFLAGS := $(EXTRA_KCFLAGS) +obdecho_LDFLAGS := $(EXTRA_KLDFLAGS) +obdecho_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif # darwin + endif # MODULES +install-data-hook: $(install_data_hook) + MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ DIST_SOURCES = $(obdecho-objs:%.o=%.c) diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index beceb66..a923d63 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -28,32 +28,20 @@ # define EXPORT_SYMTAB #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #define DEBUG_SUBSYSTEM S_ECHO -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #define ECHO_INIT_OBJID 0x1000000000000000ULL #define ECHO_HANDLE_MAGIC 0xabcd0123fedc9876ULL -#define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE/PAGE_SIZE) -static struct page *echo_persistent_pages[ECHO_PERSISTENT_PAGES]; +#define ECHO_PERSISTENT_PAGES (ECHO_PERSISTENT_SIZE/CFS_PAGE_SIZE) +static cfs_page_t *echo_persistent_pages[ECHO_PERSISTENT_PAGES]; enum { LPROC_ECHO_READ_BYTES = 1, @@ -147,6 +135,7 @@ int echo_destroy(struct obd_export *exp, struct obdo *oa, { struct obd_device *obd = class_exp2obd(exp); + ENTRY; if (!obd) { CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); @@ -163,7 +152,7 @@ int echo_destroy(struct obd_export *exp, struct obdo *oa, RETURN(-EINVAL); } - return 0; + RETURN(0); } static int echo_getattr(struct obd_export *exp, struct obdo *oa, @@ -172,6 +161,7 @@ static int echo_getattr(struct obd_export *exp, struct obdo *oa, struct obd_device *obd = class_exp2obd(exp); obd_id id = oa->o_id; + ENTRY; if (!obd) { CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); @@ -186,7 +176,7 @@ static int echo_getattr(struct obd_export *exp, struct obdo *oa, obdo_cpy_md(oa, &obd->u.echo.eo_oa, oa->o_valid); oa->o_id = id; - return 0; + RETURN(0); } static int echo_setattr(struct obd_export *exp, struct obdo *oa, @@ -194,6 +184,7 @@ static int echo_setattr(struct obd_export *exp, struct obdo *oa, { struct obd_device *obd = class_exp2obd(exp); + ENTRY; if (!obd) { CERROR("invalid client cookie "LPX64"\n", exp->exp_handle.h_cookie); @@ -214,15 +205,15 @@ static int echo_setattr(struct obd_export *exp, struct obdo *oa, oti->oti_ack_locks[0].lock = obd->u.echo.eo_nl_lock; } - return 0; + RETURN(0); } static void -echo_page_debug_setup(struct page *page, int rw, obd_id id, +echo_page_debug_setup(cfs_page_t *page, int rw, obd_id id, __u64 offset, int len) { int page_offset = offset & (PAGE_SIZE - 1); - char *addr = ((char *)kmap(page)) + page_offset; + char *addr = ((char *)cfs_kmap(page)) + page_offset; if (len % OBD_ECHO_BLOCK_SIZE != 0) CERROR("Unexpected block size %d\n", len); @@ -241,15 +232,15 @@ echo_page_debug_setup(struct page *page, int rw, obd_id id, len -= OBD_ECHO_BLOCK_SIZE; } - kunmap(page); + cfs_kunmap(page); } static int -echo_page_debug_check(struct page *page, obd_id id, +echo_page_debug_check(cfs_page_t *page, obd_id id, __u64 offset, int len) { int page_offset = offset & (PAGE_SIZE - 1); - char *addr = ((char *)kmap(page)) + page_offset; + char *addr = ((char *)cfs_kmap(page)) + page_offset; int rc = 0; int rc2; @@ -268,7 +259,7 @@ echo_page_debug_check(struct page *page, obd_id id, len -= OBD_ECHO_BLOCK_SIZE; } - kunmap(page); + cfs_kunmap(page); return (rc); } @@ -304,7 +295,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, oti->oti_handle = (void *)DESC_PRIV; for (i = 0; i < objcount; i++, obj++) { - int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL; + int gfp_mask = (obj->ioo_id & 1) ? CFS_ALLOC_HIGHUSER : CFS_ALLOC_STD; int ispersistent = obj->ioo_id == ECHO_PERSISTENT_OBJID; int debug_setup = (!ispersistent && (oa->o_valid & OBD_MD_FLFLAGS) != 0 && @@ -318,9 +309,9 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, r->page = echo_persistent_pages[nb->offset >> PAGE_SHIFT]; /* Take extra ref so __free_pages() can be called OK */ - get_page (r->page); + cfs_get_page (r->page); } else { - r->page = alloc_pages(gfp_mask, 0); + r->page = cfs_alloc_page(gfp_mask); if (r->page == NULL) { CERROR("can't get page %u/%u for id " LPU64"\n", @@ -335,7 +326,7 @@ int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, r->offset = nb->offset; r->len = nb->len; - LASSERT((r->offset & ~PAGE_MASK) + r->len <= PAGE_SIZE); + LASSERT((r->offset & ~CFS_PAGE_MASK) + r->len <= CFS_PAGE_SIZE); CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n", r->page, r->offset, r->len); @@ -368,10 +359,10 @@ preprw_cleanup: */ CERROR("cleaning up %ld pages (%d obdos)\n", (long)(r - res), objcount); while (r-- > res) { - kunmap(r->page); + cfs_kunmap(r->page); /* NB if this is a persistent page, __free_pages will just * lose the extra ref gained above */ - __free_pages(r->page, 0); + cfs_free_page(r->page); atomic_dec(&obd->u.echo.eo_prep); } memset(res, 0, sizeof(*res) * niocount); @@ -418,7 +409,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, int j; for (j = 0 ; j < obj->ioo_bufcnt ; j++, r++) { - struct page *page = r->page; + cfs_page_t *page = r->page; void *addr; if (page == NULL) { @@ -427,7 +418,7 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, GOTO(commitrw_cleanup, rc = -EFAULT); } - addr = kmap(page); + addr = cfs_kmap(page); CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n", r->page, addr, r->offset); @@ -440,9 +431,9 @@ int echo_commitrw(int cmd, struct obd_export *export, struct obdo *oa, rc = vrc; } - kunmap(page); + cfs_kunmap(page); /* NB see comment above regarding persistent pages */ - __free_pages(page, 0); + cfs_free_page(page); atomic_dec(&obd->u.echo.eo_prep); } } @@ -454,10 +445,10 @@ commitrw_cleanup: CERROR("cleaning up %ld pages (%d obdos)\n", niocount - (long)(r - res) - 1, objcount); while (++r < res + niocount) { - struct page *page = r->page; + cfs_page_t *page = r->page; /* NB see comment above regarding persistent pages */ - __free_pages(page, 0); + cfs_free_page(page); atomic_dec(&obd->u.echo.eo_prep); } return rc; @@ -516,7 +507,7 @@ static int echo_cleanup(struct obd_device *obd) /* XXX Bug 3413; wait for a bit to ensure the BL callback has * happened before calling ldlm_namespace_free() */ set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); + cfs_schedule_timeout (CFS_TASK_UNINT, cfs_time_seconds(1)); ldlm_namespace_free(obd->obd_namespace, obd->obd_force); @@ -552,7 +543,7 @@ echo_persistent_pages_fini (void) for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) if (echo_persistent_pages[i] != NULL) { - __free_pages (echo_persistent_pages[i], 0); + cfs_free_page (echo_persistent_pages[i]); echo_persistent_pages[i] = NULL; } } @@ -560,21 +551,21 @@ echo_persistent_pages_fini (void) static int echo_persistent_pages_init (void) { - struct page *pg; + cfs_page_t *pg; int i; for (i = 0; i < ECHO_PERSISTENT_PAGES; i++) { int gfp_mask = (i < ECHO_PERSISTENT_PAGES/2) ? - GFP_KERNEL : GFP_HIGHUSER; + CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER; - pg = alloc_pages (gfp_mask, 0); + pg = cfs_alloc_page (gfp_mask); if (pg == NULL) { echo_persistent_pages_fini (); return (-ENOMEM); } - memset (kmap (pg), 0, PAGE_SIZE); - kunmap (pg); + memset (cfs_kmap (pg), 0, CFS_PAGE_SIZE); + cfs_kunmap (pg); echo_persistent_pages[i] = pg; } @@ -587,9 +578,10 @@ static int __init obdecho_init(void) struct lprocfs_static_vars lvars; int rc; + ENTRY; printk(KERN_INFO "Lustre: Echo OBD driver; info@clusterfs.com\n"); - LASSERT(PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0); + LASSERT(CFS_PAGE_SIZE % OBD_ECHO_BLOCK_SIZE == 0); lprocfs_init_vars(echo, &lvars); @@ -598,7 +590,7 @@ static int __init obdecho_init(void) goto failed_0; rc = class_register_type(&echo_obd_ops, NULL, lvars.module_vars, - OBD_ECHO_DEVICENAME, NULL); + LUSTRE_ECHO_NAME, NULL); if (rc != 0) goto failed_1; @@ -606,7 +598,7 @@ static int __init obdecho_init(void) if (rc == 0) RETURN (0); - class_unregister_type(OBD_ECHO_DEVICENAME); + class_unregister_type(LUSTRE_ECHO_NAME); failed_1: echo_persistent_pages_fini (); failed_0: @@ -616,7 +608,7 @@ static int __init obdecho_init(void) static void /*__exit*/ obdecho_exit(void) { echo_client_exit(); - class_unregister_type(OBD_ECHO_DEVICENAME); + class_unregister_type(LUSTRE_ECHO_NAME); echo_persistent_pages_fini (); } @@ -624,5 +616,4 @@ MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Testing Echo OBD driver"); MODULE_LICENSE("GPL"); -module_init(obdecho_init); -module_exit(obdecho_exit); +cfs_module(obdecho, "1.0.0", obdecho_init, obdecho_exit); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 3927d9d..c24533e 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -24,26 +24,18 @@ #define DEBUG_SUBSYSTEM S_ECHO #ifdef __KERNEL__ -#include -#include -#include -#include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#endif -#include -#include +#include #else #include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include static obd_id last_object_id; @@ -116,7 +108,7 @@ echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm, if (ulsm_nob < nob || lsm->lsm_stripe_count > ec->ec_nstripes || lsm->lsm_magic != LOV_MAGIC || - (lsm->lsm_stripe_size & (PAGE_SIZE - 1)) != 0 || + (lsm->lsm_stripe_size & (CFS_PAGE_SIZE - 1)) != 0 || ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL)) return (-EINVAL); @@ -201,7 +193,7 @@ static int echo_create_object(struct obd_device *obd, int on_target, lsm->lsm_stripe_count = ec->ec_nstripes; if (lsm->lsm_stripe_size == 0) - lsm->lsm_stripe_size = PAGE_SIZE; + lsm->lsm_stripe_size = CFS_PAGE_SIZE; idx = ll_rand(); @@ -289,11 +281,11 @@ echo_get_object (struct ec_object **ecop, struct obd_device *obd, spin_lock (&ec->ec_lock); eco = echo_find_object_locked (obd, oa->o_id); if (eco != NULL) { - if (eco->eco_deleted) { /* being deleted */ - spin_unlock(&ec->ec_lock); /* (see comment in cleanup) */ + if (eco->eco_deleted) { /* being deleted */ + spin_unlock(&ec->ec_lock);/* (see comment in cleanup) */ return (-EAGAIN); } - + eco->eco_refcount++; spin_unlock (&ec->ec_lock); *ecop = eco; @@ -431,9 +423,9 @@ echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp) *offp = offset * stripe_size + woffset % stripe_size; } -static void -echo_client_page_debug_setup(struct lov_stripe_md *lsm, - struct page *page, int rw, obd_id id, +static void +echo_client_page_debug_setup(struct lov_stripe_md *lsm, + cfs_page_t *page, int rw, obd_id id, obd_off offset, obd_off count) { char *addr; @@ -442,11 +434,11 @@ echo_client_page_debug_setup(struct lov_stripe_md *lsm, int delta; /* no partial pages on the client */ - LASSERT(count == PAGE_SIZE); + LASSERT(count == CFS_PAGE_SIZE); - addr = kmap(page); + addr = cfs_kmap(page); - for (delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { + for (delta = 0; delta < CFS_PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { if (rw == OBD_BRW_WRITE) { stripe_off = offset + delta; stripe_id = id; @@ -459,13 +451,12 @@ echo_client_page_debug_setup(struct lov_stripe_md *lsm, stripe_off, stripe_id); } - kunmap(page); + cfs_kunmap(page); } -static int -echo_client_page_debug_check(struct lov_stripe_md *lsm, - struct page *page, obd_id id, - obd_off offset, obd_off count) +static int echo_client_page_debug_check(struct lov_stripe_md *lsm, + cfs_page_t *page, obd_id id, + obd_off offset, obd_off count) { obd_off stripe_off; obd_id stripe_id; @@ -475,11 +466,11 @@ echo_client_page_debug_check(struct lov_stripe_md *lsm, int rc2; /* no partial pages on the client */ - LASSERT(count == PAGE_SIZE); + LASSERT(count == CFS_PAGE_SIZE); - addr = kmap(page); + addr = cfs_kmap(page); - for (rc = delta = 0; delta < PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { + for (rc = delta = 0; delta < CFS_PAGE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) { stripe_off = offset + delta; stripe_id = id; echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id); @@ -493,7 +484,7 @@ echo_client_page_debug_check(struct lov_stripe_md *lsm, } } - kunmap(page); + cfs_kunmap(page); return rc; } @@ -515,18 +506,18 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, (oa->o_valid & OBD_MD_FLFLAGS) != 0 && (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0); - gfp_mask = ((oa->o_id & 2) == 0) ? GFP_KERNEL : GFP_HIGHUSER; + gfp_mask = ((oa->o_id & 2) == 0) ? CFS_ALLOC_STD : CFS_ALLOC_HIGHUSER; LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); LASSERT(lsm != NULL); LASSERT(lsm->lsm_object_id == oa->o_id); if (count <= 0 || - (count & (PAGE_SIZE - 1)) != 0) + (count & (CFS_PAGE_SIZE - 1)) != 0) return (-EINVAL); /* XXX think again with misaligned I/O */ - npages = count >> PAGE_SHIFT; + npages = count >> CFS_PAGE_SHIFT; OBD_ALLOC(pga, npages * sizeof(*pga)); if (pga == NULL) @@ -534,16 +525,16 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, for (i = 0, pgp = pga, off = offset; i < npages; - i++, pgp++, off += PAGE_SIZE) { + i++, pgp++, off += CFS_PAGE_SIZE) { LASSERT (pgp->pg == NULL); /* for cleanup */ rc = -ENOMEM; - pgp->pg = alloc_pages (gfp_mask, 0); + pgp->pg = cfs_alloc_page (gfp_mask); if (pgp->pg == NULL) goto out; - pgp->count = PAGE_SIZE; + pgp->count = CFS_PAGE_SIZE; pgp->off = off; pgp->flag = 0; @@ -569,7 +560,7 @@ static int echo_client_kbrw(struct obd_device *obd, int rw, struct obdo *oa, if (vrc != 0 && rc == 0) rc = vrc; } - __free_pages(pgp->pg, 0); + cfs_free_page(pgp->pg); } OBD_FREE(pga, npages * sizeof(*pga)); return (rc); @@ -597,13 +588,13 @@ static int echo_client_ubrw(struct obd_device *obd, int rw, /* NB: for now, only whole pages, page aligned */ if (count <= 0 || - ((long)buffer & (PAGE_SIZE - 1)) != 0 || - (count & (PAGE_SIZE - 1)) != 0 || + ((long)buffer & (CFS_PAGE_SIZE - 1)) != 0 || + (count & (CFS_PAGE_SIZE - 1)) != 0 || (lsm != NULL && lsm->lsm_object_id != oa->o_id)) return (-EINVAL); /* XXX think again with misaligned I/O */ - npages = count >> PAGE_SHIFT; + npages = count >> CFS_PAGE_SHIFT; OBD_ALLOC(pga, npages * sizeof(*pga)); if (pga == NULL) @@ -623,10 +614,10 @@ static int echo_client_ubrw(struct obd_device *obd, int rw, for (i = 0, off = offset, pgp = pga; i < npages; - i++, off += PAGE_SIZE, pgp++) { + i++, off += CFS_PAGE_SIZE, pgp++) { pgp->off = off; pgp->pg = kiobuf->maplist[i]; - pgp->count = PAGE_SIZE; + pgp->count = CFS_PAGE_SIZE; pgp->flag = 0; } @@ -660,7 +651,7 @@ struct echo_async_state; #define EAP_MAGIC 79277927 struct echo_async_page { int eap_magic; - struct page *eap_page; + cfs_page_t *eap_page; void *eap_cookie; obd_off eap_off; struct echo_async_state *eap_eas; @@ -677,7 +668,7 @@ struct echo_async_state { obd_off eas_end_offset; int eas_in_flight; int eas_rc; - wait_queue_head_t eas_waitq; + cfs_waitq_t eas_waitq; struct list_head eas_avail; struct obdo eas_oa; struct lov_stripe_md *eas_lsm; @@ -704,7 +695,7 @@ static int ec_ap_refresh_count(void *data, int cmd) { /* our pages are issued with a stable count */ LBUG(); - return PAGE_SIZE; + return CFS_PAGE_SIZE; } static void ec_ap_fill_obdo(void *data, int cmd, struct obdo *oa) { @@ -727,14 +718,14 @@ static void ec_ap_completion(void *data, int cmd, struct obdo *oa, int rc) (eas->eas_oa.o_flags & OBD_FL_DEBUG_CHECK) != 0) echo_client_page_debug_check(eas->eas_lsm, eap->eap_page, eas->eas_oa.o_id, eap->eap_off, - PAGE_SIZE); + CFS_PAGE_SIZE); spin_lock_irqsave(&eas->eas_lock, flags); if (rc && !eas->eas_rc) eas->eas_rc = rc; eas->eas_in_flight--; list_add(&eap->eap_item, &eas->eas_avail); - wake_up(&eas->eas_waitq); + cfs_waitq_signal(&eas->eas_waitq); spin_unlock_irqrestore(&eas->eas_lock, flags); } @@ -753,10 +744,11 @@ static int echo_client_async_page(struct obd_export *exp, int rw, obd_count npages, i; struct echo_async_page *eap; struct echo_async_state eas; - struct list_head *pos, *n; int rc = 0; unsigned long flags; - LIST_HEAD(pages); + struct echo_async_page **aps = NULL; + + ENTRY; #if 0 int verify; int gfp_mask; @@ -771,43 +763,46 @@ static int echo_client_async_page(struct obd_export *exp, int rw, LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); if (count <= 0 || - (count & (PAGE_SIZE - 1)) != 0 || + (count & (CFS_PAGE_SIZE - 1)) != 0 || (lsm != NULL && lsm->lsm_object_id != oa->o_id)) return (-EINVAL); /* XXX think again with misaligned I/O */ - npages = batching >> PAGE_SHIFT; + npages = batching >> CFS_PAGE_SHIFT; memcpy(&eas.eas_oa, oa, sizeof(*oa)); eas.eas_next_offset = offset; eas.eas_end_offset = offset + count; spin_lock_init(&eas.eas_lock); - init_waitqueue_head(&eas.eas_waitq); + cfs_waitq_init(&eas.eas_waitq); eas.eas_in_flight = 0; eas.eas_rc = 0; eas.eas_lsm = lsm; - INIT_LIST_HEAD(&eas.eas_avail); + CFS_INIT_LIST_HEAD(&eas.eas_avail); + + OBD_ALLOC(aps, npages * sizeof aps[0]); + if (aps == NULL) + return (-ENOMEM); /* prepare the group of pages that we're going to be keeping * in flight */ for (i = 0; i < npages; i++) { - struct page *page = alloc_page(GFP_KERNEL); + cfs_page_t *page = cfs_alloc_page(CFS_ALLOC_STD); if (page == NULL) GOTO(out, rc = -ENOMEM); - set_page_private(page, 0); - list_add_tail(&PAGE_LIST(page), &pages); - OBD_ALLOC(eap, sizeof(*eap)); - if (eap == NULL) + if (eap == NULL) { + cfs_free_page(page); GOTO(out, rc = -ENOMEM); + } eap->eap_magic = EAP_MAGIC; eap->eap_page = page; eap->eap_eas = &eas; - set_page_private(page, (unsigned long)eap); list_add_tail(&eap->eap_item, &eas.eas_avail); + aps[i] = eap; } /* first we spin queueing io and being woken by its completion */ @@ -838,7 +833,7 @@ static int echo_client_async_page(struct obd_export *exp, int rw, eap->eap_cookie = NULL; } - eas.eas_next_offset += PAGE_SIZE; + eas.eas_next_offset += CFS_PAGE_SIZE; eap->eap_off = eas.eas_next_offset; rc = obd_prep_async_page(exp, lsm, NULL, eap->eap_page, @@ -853,13 +848,13 @@ static int echo_client_async_page(struct obd_export *exp, int rw, if (oa->o_id != ECHO_PERSISTENT_OBJID && (oa->o_valid & OBD_MD_FLFLAGS) != 0 && (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0) - echo_client_page_debug_setup(lsm, eap->eap_page, rw, - oa->o_id, - eap->eap_off, PAGE_SIZE); + echo_client_page_debug_setup(lsm, eap->eap_page, rw, + oa->o_id, + eap->eap_off, CFS_PAGE_SIZE); /* always asserts urgent, which isn't quite right */ rc = obd_queue_async_io(exp, lsm, NULL, eap->eap_cookie, - rw, 0, PAGE_SIZE, 0, + rw, 0, CFS_PAGE_SIZE, 0, ASYNC_READY | ASYNC_URGENT | ASYNC_COUNT_STABLE); spin_lock_irqsave(&eas.eas_lock, flags); @@ -884,19 +879,19 @@ static int echo_client_async_page(struct obd_export *exp, int rw, spin_unlock_irqrestore(&eas.eas_lock, flags); out: - list_for_each_safe(pos, n, &pages) { - struct page *page = list_entry(pos, struct page, - PAGE_LIST_ENTRY); + if (aps != NULL) { + for (i = 0; i < npages; ++ i) { + cfs_page_t *page; - list_del(&PAGE_LIST(page)); - if (page_private(page) != 0) { - eap = (struct echo_async_page *)page_private(page); + eap = aps[i]; + page = eap->eap_page; if (eap->eap_cookie != NULL) obd_teardown_async_page(exp, lsm, NULL, eap->eap_cookie); OBD_FREE(eap, sizeof(*eap)); + cfs_free_page(page); } - __free_page(page); + OBD_FREE(aps, npages * sizeof aps[0]); } RETURN(rc); @@ -915,12 +910,12 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw, int i, ret = 0; ENTRY; - if (count <= 0 || (count & (PAGE_SIZE - 1)) != 0 || + if (count <= 0 || (count & (CFS_PAGE_SIZE - 1)) != 0 || (lsm != NULL && lsm->lsm_object_id != oa->o_id)) RETURN(-EINVAL); - npages = batch >> PAGE_SHIFT; - tot_pages = count >> PAGE_SHIFT; + npages = batch >> CFS_PAGE_SHIFT; + tot_pages = count >> CFS_PAGE_SHIFT; OBD_ALLOC(lnb, npages * sizeof(struct niobuf_local)); OBD_ALLOC(rnb, npages * sizeof(struct niobuf_remote)); @@ -936,9 +931,9 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw, if (tot_pages < npages) npages = tot_pages; - for (i = 0; i < npages; i++, off += PAGE_SIZE) { + for (i = 0; i < npages; i++, off += CFS_PAGE_SIZE) { rnb[i].offset = off; - rnb[i].len = PAGE_SIZE; + rnb[i].len = CFS_PAGE_SIZE; } ioo.ioo_bufcnt = npages; @@ -949,7 +944,7 @@ static int echo_client_prep_commit(struct obd_export *exp, int rw, GOTO(out, ret); for (i = 0; i < npages; i++) { - struct page *page = lnb[i].page; + cfs_page_t *page = lnb[i].page; /* read past eof? */ if (page == NULL && lnb[i].rc == 0) @@ -1096,8 +1091,8 @@ echo_client_enqueue(struct obd_export *exp, struct obdo *oa, if (!(mode == LCK_PR || mode == LCK_PW)) return -EINVAL; - if ((offset & (PAGE_SIZE - 1)) != 0 || - (nob & (PAGE_SIZE - 1)) != 0) + if ((offset & (CFS_PAGE_SIZE - 1)) != 0 || + (nob & (CFS_PAGE_SIZE - 1)) != 0) return -EINVAL; rc = echo_get_object (&eco, obd, oa); @@ -1346,7 +1341,7 @@ static int echo_client_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) } spin_lock_init (&ec->ec_lock); - INIT_LIST_HEAD (&ec->ec_objects); + CFS_INIT_LIST_HEAD (&ec->ec_objects); ec->ec_unique = 0; OBD_ALLOC(ocd, sizeof(*ocd)); @@ -1411,10 +1406,11 @@ static int echo_client_connect(struct lustre_handle *conn, struct obd_export *exp; int rc; + ENTRY; rc = class_connect(conn, src, cluuid); if (rc == 0) { exp = class_conn2export(conn); - INIT_LIST_HEAD(&exp->exp_ec_data.eced_locks); + CFS_INIT_LIST_HEAD(&exp->exp_ec_data.eced_locks); class_export_put(exp); } @@ -1472,10 +1468,10 @@ int echo_client_init(void) lprocfs_init_vars(echo, &lvars); return class_register_type(&echo_obd_ops, NULL, lvars.module_vars, - OBD_ECHO_CLIENT_DEVICENAME, NULL); + LUSTRE_ECHO_CLIENT_NAME, NULL); } void echo_client_exit(void) { - class_unregister_type(OBD_ECHO_CLIENT_DEVICENAME); + class_unregister_type(LUSTRE_ECHO_CLIENT_NAME); } diff --git a/lustre/obdecho/lproc_echo.c b/lustre/obdecho/lproc_echo.c index 3418691..c816ca0 100644 --- a/lustre/obdecho/lproc_echo.c +++ b/lustre/obdecho/lproc_echo.c @@ -24,8 +24,8 @@ */ #define DEBUG_SUBSYSTEM S_ECHO -#include -#include +#include +#include #ifdef LPROCFS static struct lprocfs_vars lprocfs_obd_vars[] = { diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 3521135..02438ba 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -45,21 +45,19 @@ #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) # include # include -# include #endif -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include -#include +#include +#include +#include #include "filter_internal.h" @@ -397,7 +395,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) } if (last_rcvd_size == 0) { - CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD); + LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name); memcpy(fsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->lsd_uuid)); fsd->lsd_last_transno = 0; @@ -416,8 +414,10 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) GOTO(err_fsd, rc); } if (strcmp(fsd->lsd_uuid, obd->obd_uuid.uuid) != 0) { - CERROR("OBD UUID %s does not match last_rcvd UUID %s\n", - obd->obd_uuid.uuid, fsd->lsd_uuid); + LCONSOLE_ERROR("Trying to start OBD %s using the wrong" + " disk %s. Were the /dev/ assignments " + "rearranged?\n", + obd->obd_uuid.uuid, fsd->lsd_uuid); GOTO(err_fsd, rc = -EINVAL); } mount_count = le64_to_cpu(fsd->lsd_mount_count); @@ -538,7 +538,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) obd->obd_recovery_start = CURRENT_SECONDS; /* Only used for lprocfs_status */ obd->obd_recovery_end = obd->obd_recovery_start + - OBD_RECOVERY_TIMEOUT / HZ; + OBD_RECOVERY_TIMEOUT; } out: @@ -1528,8 +1528,8 @@ int filter_common_setup(struct obd_device *obd, struct lustre_cfg* lcfg, obd->obd_recoverable_clients, (obd->obd_recoverable_clients == 1) ? "client" : "clients", - (int)(OBD_RECOVERY_TIMEOUT / HZ) / 60, - (int)(OBD_RECOVERY_TIMEOUT / HZ) % 60, + (int)(OBD_RECOVERY_TIMEOUT) / 60, + (int)(OBD_RECOVERY_TIMEOUT) % 60, obd->obd_name); } else { LCONSOLE_INFO("OST %s now serving %s (%s%s%s) with recovery " @@ -2705,7 +2705,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa, unsigned int qcids[MAXQUOTAS] = {0, 0}; struct obd_device *obd; struct filter_obd *filter; - struct dentry *dchild = NULL, *dparent; + struct dentry *dchild = NULL, *dparent = NULL; struct lvfs_run_ctxt saved; void *handle = NULL; struct llog_cookie *fcc = NULL; @@ -2947,8 +2947,9 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen, RETURN(-EINVAL); } -static int filter_set_info(struct obd_export *exp, __u32 keylen, - void *key, __u32 vallen, void *val) +static int filter_set_info_async(struct obd_export *exp, __u32 keylen, + void *key, __u32 vallen, void *val, + struct ptlrpc_request_set *set) { struct obd_device *obd; struct llog_ctxt *ctxt; @@ -2965,8 +2966,8 @@ static int filter_set_info(struct obd_export *exp, __u32 keylen, memcmp(key, KEY_MDS_CONN, keylen) != 0) RETURN(-EINVAL); - CWARN("%s: received MDS connection from %s\n", obd->obd_name, - obd_export_nid2str(exp)); + LCONSOLE_WARN("%s: received MDS connection from %s\n", obd->obd_name, + obd_export_nid2str(exp)); obd->u.filter.fo_mdc_conn.cookie = exp->exp_handle.h_cookie; /* setup llog imports */ @@ -3077,7 +3078,7 @@ static struct lvfs_callback_ops filter_lvfs_ops = { static struct obd_ops filter_obd_ops = { .o_owner = THIS_MODULE, .o_get_info = filter_get_info, - .o_set_info = filter_set_info, + .o_set_info_async = filter_set_info_async, .o_setup = filter_setup, .o_precleanup = filter_precleanup, .o_cleanup = filter_cleanup, @@ -3106,7 +3107,7 @@ static struct obd_ops filter_obd_ops = { static struct obd_ops filter_sanobd_ops = { .o_owner = THIS_MODULE, .o_get_info = filter_get_info, - .o_set_info = filter_set_info, + .o_set_info_async = filter_set_info_async, .o_setup = filter_san_setup, .o_precleanup = filter_precleanup, .o_cleanup = filter_cleanup, diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 9006728..4c68b34 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -8,12 +8,13 @@ #ifdef __KERNEL__ # include #endif -#include -#include -#include -#include +#include +#include +#include +#include + +#define FILTER_LAYOUT_VERSION "2" -#define HEALTH_CHECK "health_check" #define FILTER_INIT_OBJID 0 #define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index cf7140d..fdc0492 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -34,8 +34,8 @@ #include // XXX kill me soon #include -#include -#include +#include +#include #include "filter_internal.h" int *obdfilter_created_scratchpad; diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index be6e550..b10a83a 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -31,14 +31,15 @@ #include #include // XXX kill me soon #include +#include #define DEBUG_SUBSYSTEM S_FILTER #include #include -#include -#include +#include +#include #include "filter_internal.h" /* Bug 2254 -- this is better done in ext3_map_inode_page, but this diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 1bd4995..1a07a95 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -35,9 +35,9 @@ #define DEBUG_SUBSYSTEM S_FILTER -#include -#include -#include +#include +#include +#include #include "filter_internal.h" /* 512byte block min */ @@ -390,11 +390,15 @@ static int filter_clear_page_cache(struct inode *inode, rc = generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA); */ + down(&inode->i_sem); + current->flags |= PF_SYNCWRITE; rc = filemap_fdatawrite(inode->i_mapping); rc2 = sync_mapping_buffers(inode->i_mapping); if (rc == 0) rc = rc2; rc2 = filemap_fdatawait(inode->i_mapping); + current->flags &= ~PF_SYNCWRITE; + up(&inode->i_sem); if (rc == 0) rc = rc2; if (rc != 0) diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index a2ce350..c61be24 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -34,9 +34,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include "filter_internal.h" diff --git a/lustre/obdfilter/filter_lvb.c b/lustre/obdfilter/filter_lvb.c index acdd457..06946fe 100644 --- a/lustre/obdfilter/filter_lvb.c +++ b/lustre/obdfilter/filter_lvb.c @@ -34,8 +34,8 @@ #include #include -#include -#include +#include +#include #include "filter_internal.h" diff --git a/lustre/obdfilter/filter_san.c b/lustre/obdfilter/filter_san.c index 7f83977..c679b3e 100644 --- a/lustre/obdfilter/filter_san.c +++ b/lustre/obdfilter/filter_san.c @@ -33,8 +33,8 @@ #include // XXX kill me soon #include -#include -#include +#include +#include #include "filter_internal.h" /* sanobd setup methods - use a specific mount option */ diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 88da264..935ae6f 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -25,8 +25,8 @@ #define DEBUG_SUBSYSTEM S_CLASS #include -#include -#include +#include +#include #include #include diff --git a/lustre/osc/Info.plist b/lustre/osc/Info.plist new file mode 100644 index 0000000..727980d --- /dev/null +++ b/lustre/osc/Info.plist @@ -0,0 +1,43 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + osc + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.osc + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kpi.bsd + 8.0.0b1 + com.apple.kpi.libkern + 8.0.0b1 + com.apple.kpi.mach + 8.0.0b1 + com.apple.kpi.unsupported + 8.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + com.clusterfs.lustre.lvfs + 1.0.0 + com.clusterfs.lustre.obdclass + 1.0.0 + com.clusterfs.lustre.ptlrpc + 1.0.0 + + + diff --git a/lustre/osc/autoMakefile.am b/lustre/osc/autoMakefile.am index af0649d..c9f2fbb 100644 --- a/lustre/osc/autoMakefile.am +++ b/lustre/osc/autoMakefile.am @@ -11,8 +11,31 @@ libosc_a_CFLAGS = $(LLCFLAGS) endif if MODULES + +if LINUX modulefs_DATA = osc$(KMODEXT) endif +if DARWIN +macos_PROGRAMS := osc + +osc_SOURCES := \ + osc_create.c \ + osc_request.c + +osc_CFLAGS := $(EXTRA_KCFLAGS) +osc_LDFLAGS := $(EXTRA_KLDFLAGS) +osc_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif # Darwin + +endif + +install-data-hook: $(install_data_hook) + MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ DIST_SOURCES = $(osc-objs:%.o=%.c) osc_internal.h diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 05e2567..764c55c 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -28,8 +28,8 @@ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #include #endif -#include -#include +#include +#include #include #include "osc_internal.h" @@ -41,9 +41,9 @@ static int osc_rd_max_pages_per_rpc(char *page, char **start, off_t off, struct client_obd *cli = &dev->u.cli; int rc; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); rc = snprintf(page, count, "%d\n", cli->cl_max_pages_per_rpc); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return rc; } @@ -61,9 +61,9 @@ static int osc_wr_max_pages_per_rpc(struct file *file, const char *buffer, if (val < 1 || val > PTLRPC_MAX_BRW_PAGES) return -ERANGE; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_max_pages_per_rpc = val; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return count; } @@ -75,9 +75,9 @@ static int osc_rd_max_rpcs_in_flight(char *page, char **start, off_t off, struct client_obd *cli = &dev->u.cli; int rc; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); rc = snprintf(page, count, "%u\n", cli->cl_max_rpcs_in_flight); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return rc; } @@ -99,9 +99,9 @@ static int osc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, if (pool && val > cli->cl_max_rpcs_in_flight) pool->prp_populate(pool, val-cli->cl_max_rpcs_in_flight); - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_max_rpcs_in_flight = val; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return count; } @@ -113,9 +113,9 @@ static int osc_rd_max_dirty_mb(char *page, char **start, off_t off, int count, struct client_obd *cli = &dev->u.cli; unsigned val; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); val = cli->cl_dirty_max >> 20; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return snprintf(page, count, "%u\n", val); } @@ -135,10 +135,10 @@ static int osc_wr_max_dirty_mb(struct file *file, const char *buffer, val > num_physpages >> (20 - PAGE_SHIFT - 2)) /* 1/4 of RAM */ return -ERANGE; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_dirty_max = (obd_count)val * 1024 * 1024; osc_wake_cache_waiters(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return count; } @@ -150,9 +150,9 @@ static int osc_rd_cur_dirty_bytes(char *page, char **start, off_t off, struct client_obd *cli = &dev->u.cli; int rc; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); rc = snprintf(page, count, "%lu\n", cli->cl_dirty); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return rc; } @@ -163,9 +163,9 @@ static int osc_rd_cur_grant_bytes(char *page, char **start, off_t off, struct client_obd *cli = &dev->u.cli; int rc; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); rc = snprintf(page, count, "%lu\n", cli->cl_avail_grant); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); return rc; } @@ -297,13 +297,12 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) struct timeval now; struct obd_device *dev = seq->private; struct client_obd *cli = &dev->u.cli; - unsigned long flags; unsigned long read_tot = 0, write_tot = 0, read_cum, write_cum; int i; do_gettimeofday(&now); - spin_lock_irqsave(&cli->cl_loi_list_lock, flags); + client_obd_list_lock(&cli->cl_loi_list_lock); seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); @@ -384,7 +383,7 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) break; } - spin_unlock_irqrestore(&cli->cl_loi_list_lock, flags); + client_obd_list_unlock(&cli->cl_loi_list_lock); return 0; } diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index b98cac1..d21c3e8 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -35,18 +35,7 @@ #define DEBUG_SUBSYSTEM S_OSC #ifdef __KERNEL__ -# include -# include -# include -# include -# include -# include -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include -# include -# else -# include -# endif +# include #else /* __KERNEL__ */ # include #endif @@ -55,8 +44,8 @@ # include #endif -# include -#include +# include +#include #include "osc_internal.h" static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) @@ -109,7 +98,7 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data, int rc) CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n", oscc->oscc_last_id, oscc->oscc_next_id); - wake_up(&oscc->oscc_waitq); + cfs_waitq_signal(&oscc->oscc_waitq); RETURN(rc); } @@ -288,7 +277,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa, CDEBUG(D_HA, "%s: oscc recovery finished, last_id: " LPU64", rc: %d\n", oscc->oscc_obd->obd_name, oscc->oscc_last_id, rc); - wake_up(&oscc->oscc_waitq); + cfs_waitq_signal(&oscc->oscc_waitq); } else { CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", oscc->oscc_obd->obd_name, rc); @@ -315,7 +304,8 @@ int osc_create(struct obd_export *exp, struct obdo *oa, CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n", oscc); - lwi = LWI_TIMEOUT(MAX(obd_timeout*HZ/4, 1), NULL, NULL); + lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout/4)), + NULL, NULL); rc = l_wait_event(oscc->oscc_waitq, !oscc_recovering(oscc), &lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); @@ -373,8 +363,8 @@ void oscc_init(struct obd_device *obd) oscc = &obd->u.cli.cl_oscc; memset(oscc, 0, sizeof(*oscc)); - INIT_LIST_HEAD(&oscc->oscc_list); - init_waitqueue_head(&oscc->oscc_waitq); + CFS_INIT_LIST_HEAD(&oscc->oscc_list); + cfs_waitq_init(&oscc->oscc_waitq); spin_lock_init(&oscc->oscc_lock); oscc->oscc_obd = obd; oscc->oscc_grow_count = OST_MIN_PRECREATE; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 82db660..667da17 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -22,7 +22,7 @@ struct osc_async_page { unsigned long oap_interrupted:1; struct oig_callback_context oap_occ; - struct page *oap_page; + cfs_page_t *oap_page; struct obd_io_group *oap_oig; struct ptlrpc_request *oap_request; struct client_obd *oap_cli; @@ -38,7 +38,7 @@ struct osc_async_page { struct osc_cache_waiter { struct list_head ocw_entry; - wait_queue_head_t ocw_waitq; + cfs_waitq_t ocw_waitq; struct osc_async_page *ocw_oap; int ocw_rc; }; @@ -57,10 +57,6 @@ int osc_real_create(struct obd_export *exp, struct obdo *oa, void oscc_init(struct obd_device *obd); void osc_wake_cache_waiters(struct client_obd *cli); - -/* Quota stuff */ -extern quota_interface_t *quota_interface; - #ifdef LPROCFS int lproc_osc_attach_seqstat(struct obd_device *dev); #else diff --git a/lustre/osc/osc_lib.c b/lustre/osc/osc_lib.c index 569132b..39bd2f8 100644 --- a/lustre/osc/osc_lib.c +++ b/lustre/osc/osc_lib.c @@ -29,11 +29,11 @@ #ifdef __KERNEL__ # include -# include -# include -# include -# include -# include +# include +# include +# include +# include +# include # include /* convert a pathname into a kdev_t */ diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 5b1c9ff..fa9e2929 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -35,39 +35,31 @@ #define DEBUG_SUBSYSTEM S_OSC #ifdef __KERNEL__ -# include -# include -# include -# include -# include -# include -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include -# include -# else -# include -# endif +# include #else /* __KERNEL__ */ # include #endif -# include +# include #include -#include +#include #include -#include -#include +#include +#include #ifdef __CYGWIN__ # include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include "osc_internal.h" +static quota_interface_t *quota_interface = NULL; +extern quota_interface_t osc_quota_interface; + /* Pack OSC object metadata for disk storage (LE byte order). */ static int osc_packmd(struct obd_export *exp, struct lov_mds_md **lmmp, struct lov_stripe_md *lsm) @@ -548,7 +540,7 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, LASSERT(!(oa->o_valid & bits)); oa->o_valid |= bits; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); oa->o_dirty = cli->cl_dirty; if (cli->cl_dirty > cli->cl_dirty_max) { CERROR("dirty %lu > dirty_max %lu\n", @@ -559,14 +551,14 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, cli->cl_dirty, cli->cl_dirty_max); oa->o_undirty = 0; } else { - long max_in_flight = (cli->cl_max_pages_per_rpc << PAGE_SHIFT)* + long max_in_flight = (cli->cl_max_pages_per_rpc << CFS_PAGE_SHIFT)* (cli->cl_max_rpcs_in_flight + 1); oa->o_undirty = max(cli->cl_dirty_max, max_in_flight); } oa->o_grant = cli->cl_avail_grant; oa->o_dropped = cli->cl_lost_grant; cli->cl_lost_grant = 0; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE,"dirty: "LPU64" undirty: %u dropped %u grant: "LPU64"\n", oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant); } @@ -575,10 +567,10 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, static void osc_consume_write_grant(struct client_obd *cli, struct osc_async_page *oap) { - cli->cl_dirty += PAGE_SIZE; - cli->cl_avail_grant -= PAGE_SIZE; + cli->cl_dirty += CFS_PAGE_SIZE; + cli->cl_avail_grant -= CFS_PAGE_SIZE; oap->oap_brw_flags |= OBD_BRW_FROM_GRANT; - CDEBUG(D_CACHE, "using %lu grant credits for oap %p\n", PAGE_SIZE, oap); + CDEBUG(D_CACHE, "using %lu grant credits for oap %p\n", CFS_PAGE_SIZE, oap); LASSERT(cli->cl_avail_grant >= 0); } @@ -593,9 +585,10 @@ void osc_wake_cache_waiters(struct client_obd *cli) struct list_head *l, *tmp; struct osc_cache_waiter *ocw; + ENTRY; list_for_each_safe(l, tmp, &cli->cl_cache_waiters) { /* if we can't dirty more, we must wait until some is written */ - if (cli->cl_dirty + PAGE_SIZE > cli->cl_dirty_max) { + if (cli->cl_dirty + CFS_PAGE_SIZE > cli->cl_dirty_max) { CDEBUG(D_CACHE, "no dirty room: dirty: %ld max %ld\n", cli->cl_dirty, cli->cl_dirty_max); return; @@ -603,7 +596,7 @@ void osc_wake_cache_waiters(struct client_obd *cli) /* if still dirty cache but no grant wait for pending RPCs that * may yet return us some grant before doing sync writes */ - if (cli->cl_w_in_flight && cli->cl_avail_grant < PAGE_SIZE) { + if (cli->cl_w_in_flight && cli->cl_avail_grant < CFS_PAGE_SIZE) { CDEBUG(D_CACHE, "%u BRW writes in flight, no grant\n", cli->cl_w_in_flight); return; @@ -611,7 +604,7 @@ void osc_wake_cache_waiters(struct client_obd *cli) ocw = list_entry(l, struct osc_cache_waiter, ocw_entry); list_del_init(&ocw->ocw_entry); - if (cli->cl_avail_grant < PAGE_SIZE) { + if (cli->cl_avail_grant < CFS_PAGE_SIZE) { /* no more RPCs in flight to return grant, do sync IO */ ocw->ocw_rc = -EDQUOT; CDEBUG(D_INODE, "wake oap %p for sync\n", ocw->ocw_oap); @@ -619,7 +612,7 @@ void osc_wake_cache_waiters(struct client_obd *cli) osc_consume_write_grant(cli, ocw->ocw_oap); } - wake_up(&ocw->ocw_waitq); + cfs_waitq_signal(&ocw->ocw_waitq); } EXIT; @@ -627,9 +620,9 @@ void osc_wake_cache_waiters(struct client_obd *cli) static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) { - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant = ocd->ocd_grant; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "setting cl_avail_grant: %ld cl_lost_grant: %ld\n", cli->cl_avail_grant, cli->cl_lost_grant); @@ -638,11 +631,11 @@ static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) static void osc_update_grant(struct client_obd *cli, struct ost_body *body) { - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant); cli->cl_avail_grant += body->oa.o_grant; /* waiters are woken in brw_interpret_oap */ - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); } /* We assume that the reason this OSC got a short read is because it read @@ -660,9 +653,9 @@ static void handle_short_read(int nob_read, obd_count page_count, if (pga->count > nob_read) { /* EOF inside this page */ - ptr = kmap(pga->pg) + (pga->off & ~PAGE_MASK); + ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK); memset(ptr + nob_read, 0, pga->count - nob_read); - kunmap(pga->pg); + cfs_kunmap(pga->pg); page_count--; pga++; break; @@ -675,9 +668,9 @@ static void handle_short_read(int nob_read, obd_count page_count, /* zero remaining pages */ while (page_count-- > 0) { - ptr = kmap(pga->pg) + (pga->off & ~PAGE_MASK); + ptr = cfs_kmap(pga->pg) + (pga->off & ~CFS_PAGE_MASK); memset(ptr, 0, pga->count); - kunmap(pga->pg); + cfs_kunmap(pga->pg); pga++; } } @@ -742,12 +735,12 @@ static obd_count osc_checksum_bulk(int nob, obd_count pg_count, LASSERT (pg_count > 0); while (nob > 0 && pg_count > 0) { - char *ptr = kmap(pga->pg); - int off = pga->off & ~PAGE_MASK; + char *ptr = cfs_kmap(pga->pg); + int off = pga->off & ~CFS_PAGE_MASK; int count = pga->count > nob ? nob : pga->count; cksum = crc32_le(cksum, ptr + off, count); - kunmap(pga->pg); + cfs_kunmap(pga->pg); LL_CDEBUG_PAGE(D_PAGE, pga->pg, "off %d checksum %x\n", off, cksum); @@ -778,6 +771,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, int rc; struct ptlrpc_request_pool *pool; + ENTRY; opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ; pool = ((cmd & OBD_BRW_WRITE) != 0) ? imp->imp_rq_pool : NULL; @@ -793,7 +787,7 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, req = ptlrpc_prep_req_pool(imp, LUSTRE_OST_VERSION, opc, 3, size, NULL, pool); if (req == NULL) - return (-ENOMEM); + RETURN (-ENOMEM); /* FIXME bug 249. Also see bug 7198 */ if (imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_REQPORTAL) @@ -824,9 +818,10 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, struct brw_page *pg_prev = pg - 1; LASSERT(pg->count > 0); - LASSERTF((pg->off & ~PAGE_MASK) + pg->count <= PAGE_SIZE, + LASSERTF((pg->off & ~CFS_PAGE_MASK) + pg->count <= CFS_PAGE_SIZE, "i: %d pg: %p off: "LPU64", count: %u\n", i, pg, pg->off, pg->count); +#ifdef __LINUX__ LASSERTF(i == 0 || pg->off > pg_prev->off, "i %d p_c %u pg %p [pri %lu ind %lu] off "LPU64 " prev_pg %p [pri %lu ind %lu] off "LPU64"\n", @@ -834,10 +829,14 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, pg->pg, page_private(pg->pg), pg->pg->index, pg->off, pg_prev->pg, page_private(pg_prev->pg), pg_prev->pg->index, pg_prev->off); +#else + LASSERTF(i == 0 || pg->off > pg_prev->off, + "i %d p_c %u\n", i, page_count); +#endif LASSERT((pga[0].flag & OBD_BRW_SRVLOCK) == (pg->flag & OBD_BRW_SRVLOCK)); - ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~PAGE_MASK, + ptlrpc_prep_bulk_page(desc, pg->pg, pg->off & ~CFS_PAGE_MASK, pg->count); requested_nob += pg->count; @@ -880,11 +879,11 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa, *niocountp = niocount; *requested_nobp = requested_nob; *reqp = req; - return (0); + RETURN (0); out: ptlrpc_req_finished (req); - return (rc); + RETURN (rc); } static void check_write_csum(__u32 cli, __u32 srv, int requested_nob, @@ -1145,21 +1144,21 @@ static obd_count max_unfragmented_pages(struct brw_page *pg, obd_count pages) int count = 1; int offset; - LASSERT (pages > 0); - offset = pg->off & (PAGE_SIZE - 1); + LASSERT (pages > 0); + offset = pg->off & (CFS_PAGE_SIZE - 1); for (;;) { pages--; if (pages == 0) /* that's all */ return count; - if (offset + pg->count < PAGE_SIZE) - return count; /* doesn't end on page boundary */ + if (offset + pg->count < CFS_PAGE_SIZE) + return count; /* doesn't end on page boundary */ - pg++; - offset = pg->off & (PAGE_SIZE - 1); - if (offset != 0) /* doesn't start on page boundary */ - return count; + pg++; + offset = pg->off & (CFS_PAGE_SIZE - 1); + if (offset != 0) /* doesn't start on page boundary */ + return count; count++; } @@ -1201,11 +1200,9 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, *oa = *saved_oa; } else if (page_count > pages_per_brw) { /* save a copy of oa (brw will clobber it) */ - OBD_ALLOC(saved_oa, sizeof(*saved_oa)); - if (saved_oa == NULL) { - CERROR("Can't save oa (ENOMEM)\n"); + saved_oa = obdo_alloc(); + if (saved_oa == NULL) RETURN(-ENOMEM); - } *saved_oa = *oa; } @@ -1219,7 +1216,7 @@ static int osc_brw(int cmd, struct obd_export *exp, struct obdo *oa, } if (saved_oa != NULL) - OBD_FREE(saved_oa, sizeof(*saved_oa)); + obdo_free(saved_oa); RETURN(rc); } @@ -1364,7 +1361,7 @@ static void osc_occ_interrupted(struct oig_callback_context *occ) /* XXX member_of() */ oap = list_entry(occ, struct osc_async_page, oap_occ); - spin_lock(&oap->oap_cli->cl_loi_list_lock); + client_obd_list_lock(&oap->oap_cli->cl_loi_list_lock); oap->oap_interrupted = 1; @@ -1392,7 +1389,7 @@ static void osc_occ_interrupted(struct oig_callback_context *occ) } unlock: - spin_unlock(&oap->oap_cli->cl_loi_list_lock); + client_obd_list_unlock(&oap->oap_cli->cl_loi_list_lock); } /* this is trying to propogate async writeback errors back up to the @@ -1422,6 +1419,7 @@ static void osc_process_ar(struct osc_async_rc *ar, struct ptlrpc_request *req, static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, struct osc_async_page *oap, int sent, int rc) { + ENTRY; osc_exit_cache(cli, oap, sent); oap->oap_async_flags = 0; oap->oap_interrupted = 0; @@ -1456,6 +1454,7 @@ static void osc_ap_completion(struct client_obd *cli, struct obdo *oa, oap->oap_caller_ops->ap_completion(oap->oap_caller_data, oap->oap_cmd, oa, rc); + EXIT; } static int brw_interpret_oap(struct ptlrpc_request *request, @@ -1474,7 +1473,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request, cli = aa->aa_cli; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters * is called so we know whether to go to sync BRWs or wait for more @@ -1499,7 +1498,7 @@ static int brw_interpret_oap(struct ptlrpc_request *request, osc_wake_cache_waiters(cli); osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); obdo_free(aa->aa_oa); OBD_FREE(aa->aa_pga, aa->aa_page_count * sizeof(struct brw_page)); @@ -1521,6 +1520,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, struct list_head *pos; int i, rc; + ENTRY; LASSERT(!list_empty(rpc_list)); OBD_ALLOC(pga, sizeof(*pga) * page_count); @@ -1545,7 +1545,7 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli, pga[i].count = oap->oap_count; pga[i].flag = oap->oap_brw_flags; CDEBUG(0, "put page %p index %lu oap %p flg %x to pga\n", - pga[i].pg, oap->oap_page->index, oap, pga[i].flag); + pga[i].pg, cfs_page_index(oap->oap_page), oap, pga[i].flag); i++; } @@ -1591,7 +1591,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, struct osc_async_page *oap = NULL; struct osc_brw_async_args *aa; struct obd_async_page_ops *ops; - LIST_HEAD(rpc_list); + CFS_LIST_HEAD(rpc_list); unsigned int ending_offset; unsigned starting_offset = 0; ENTRY; @@ -1653,7 +1653,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, * XXX nikita: this assertion should be adjusted when lustre * starts using PG_writeback for pages being written out. */ -#if defined(__KERNEL__) +#if defined(__KERNEL__) && defined(__LINUX__) LASSERT(PageLocked(oap->oap_page)); #endif /* If there is a gap at the start of this page, it can't merge @@ -1699,7 +1699,7 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, /* If there is a gap at the end of this page, it can't merge * with any subsequent pages, so we'll hand the network a * "fragmented" page array that it can't transfer in 1 RDMA */ - if (oap->oap_page_off + oap->oap_count < PAGE_SIZE) + if (oap->oap_page_off + oap->oap_count < CFS_PAGE_SIZE) break; } @@ -1710,13 +1710,13 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, loi_list_maint(cli, loi); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); request = osc_build_req(cli, &rpc_list, page_count, cmd); if (IS_ERR(request)) { /* this should happen rarely and is pretty bad, it makes the * pending list not follow the dirty order */ - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); list_for_each_safe(pos, tmp, &rpc_list) { oap = list_entry(pos, struct osc_async_page, oap_rpc_item); @@ -1746,24 +1746,24 @@ static int osc_send_oap_rpc(struct client_obd *cli, struct lov_oinfo *loi, LASSERT(sizeof(*aa) <= sizeof(request->rq_async_args)); aa = (struct osc_brw_async_args *)&request->rq_async_args; - INIT_LIST_HEAD(&aa->aa_oaps); + CFS_INIT_LIST_HEAD(&aa->aa_oaps); list_splice(&rpc_list, &aa->aa_oaps); - INIT_LIST_HEAD(&rpc_list); + CFS_INIT_LIST_HEAD(&rpc_list); if (cmd == OBD_BRW_READ) { lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count); lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight); lprocfs_oh_tally_log2(&cli->cl_read_offset_hist, - starting_offset/PAGE_SIZE + 1); + starting_offset/CFS_PAGE_SIZE + 1); } else { lprocfs_oh_tally_log2(&cli->cl_write_page_hist, page_count); lprocfs_oh_tally(&cli->cl_write_rpc_hist, cli->cl_w_in_flight); lprocfs_oh_tally_log2(&cli->cl_write_offset_hist, - starting_offset/PAGE_SIZE + 1); + starting_offset/CFS_PAGE_SIZE + 1); } - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); if (cmd == OBD_BRW_READ) cli->cl_r_in_flight++; @@ -1908,9 +1908,9 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw) { int rc; ENTRY; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); rc = list_empty(&ocw->ocw_entry) || rpcs_in_flight(cli) == 0; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(rc); }; @@ -1922,22 +1922,23 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi, struct osc_cache_waiter ocw; struct l_wait_info lwi = { 0 }; + ENTRY; CDEBUG(D_CACHE, "dirty: %ld dirty_max: %ld dropped: %lu grant: %lu\n", cli->cl_dirty, cli->cl_dirty_max, cli->cl_lost_grant, cli->cl_avail_grant); /* force the caller to try sync io. this can jump the list * of queued writes and create a discontiguous rpc stream */ - if (cli->cl_dirty_max < PAGE_SIZE || cli->cl_ar.ar_force_sync || + if (cli->cl_dirty_max < CFS_PAGE_SIZE || cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) - return(-EDQUOT); + RETURN(-EDQUOT); /* Hopefully normal case - cache space and write credits available */ - if (cli->cl_dirty + PAGE_SIZE <= cli->cl_dirty_max && - cli->cl_avail_grant >= PAGE_SIZE) { + if (cli->cl_dirty + CFS_PAGE_SIZE <= cli->cl_dirty_max && + cli->cl_avail_grant >= CFS_PAGE_SIZE) { /* account for ourselves */ osc_consume_write_grant(cli, oap); - return(0); + RETURN(0); } /* Make sure that there are write rpcs in flight to wait for. This @@ -1945,18 +1946,18 @@ static int osc_enter_cache(struct client_obd *cli, struct lov_oinfo *loi, * other objects sure might. */ if (cli->cl_w_in_flight) { list_add_tail(&ocw.ocw_entry, &cli->cl_cache_waiters); - init_waitqueue_head(&ocw.ocw_waitq); + cfs_waitq_init(&ocw.ocw_waitq); ocw.ocw_oap = oap; ocw.ocw_rc = 0; loi_list_maint(cli, loi); osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "sleeping for cache space\n"); l_wait_event(ocw.ocw_waitq, ocw_granted(cli, &ocw), &lwi); - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); if (!list_empty(&ocw.ocw_entry)) { list_del(&ocw.ocw_entry); RETURN(-EINTR); @@ -1982,24 +1983,24 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, } oap->oap_brw_flags &= ~OBD_BRW_FROM_GRANT; - cli->cl_dirty -= PAGE_SIZE; + cli->cl_dirty -= CFS_PAGE_SIZE; if (!sent) { - cli->cl_lost_grant += PAGE_SIZE; + cli->cl_lost_grant += CFS_PAGE_SIZE; CDEBUG(D_CACHE, "lost grant: %lu avail grant: %lu dirty: %lu\n", cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty); - } else if (PAGE_SIZE != blocksize && oap->oap_count != PAGE_SIZE) { + } else if (CFS_PAGE_SIZE != blocksize && oap->oap_count != CFS_PAGE_SIZE) { /* For short writes we shouldn't count parts of pages that * span a whole block on the OST side, or our accounting goes * wrong. Should match the code in filter_grant_check. */ - int offset = (oap->oap_obj_off +oap->oap_page_off) & ~PAGE_MASK; + int offset = (oap->oap_obj_off +oap->oap_page_off) & ~CFS_PAGE_MASK; int count = oap->oap_count + (offset & (blocksize - 1)); int end = (offset + oap->oap_count) & (blocksize - 1); if (end) count += blocksize - end; - cli->cl_lost_grant += PAGE_SIZE - count; + cli->cl_lost_grant += CFS_PAGE_SIZE - count; CDEBUG(D_CACHE, "lost %lu grant: %lu avail: %lu dirty: %lu\n", - PAGE_SIZE - count, cli->cl_lost_grant, + CFS_PAGE_SIZE - count, cli->cl_lost_grant, cli->cl_avail_grant, cli->cl_dirty); } @@ -2007,7 +2008,7 @@ static void osc_exit_cache(struct client_obd *cli, struct osc_async_page *oap, } int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, - struct lov_oinfo *loi, struct page *page, + struct lov_oinfo *loi, cfs_page_t *page, obd_off offset, struct obd_async_page_ops *ops, void *data, void **res) { @@ -2028,9 +2029,9 @@ int osc_prep_async_page(struct obd_export *exp, struct lov_stripe_md *lsm, oap->oap_page = page; oap->oap_obj_off = offset; - INIT_LIST_HEAD(&oap->oap_pending_item); - INIT_LIST_HEAD(&oap->oap_urgent_item); - INIT_LIST_HEAD(&oap->oap_rpc_item); + CFS_INIT_LIST_HEAD(&oap->oap_pending_item); + CFS_INIT_LIST_HEAD(&oap->oap_urgent_item); + CFS_INIT_LIST_HEAD(&oap->oap_rpc_item); oap->oap_occ.occ_interrupted = osc_occ_interrupted; @@ -2094,7 +2095,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, if (loi == NULL) loi = &lsm->lsm_oinfo[0]; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); oap->oap_cmd = cmd; oap->oap_page_off = off; @@ -2105,7 +2106,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, if (cmd & OBD_BRW_WRITE) { rc = osc_enter_cache(cli, loi, oap); if (rc) { - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(rc); } lop = &loi->loi_write_lop; @@ -2124,7 +2125,7 @@ static int osc_queue_async_io(struct obd_export *exp, struct lov_stripe_md *lsm, cmd); osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(0); } @@ -2169,7 +2170,7 @@ static int osc_set_async_flags(struct obd_export *exp, lop = &loi->loi_read_lop; } - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); if (list_empty(&oap->oap_pending_item)) GOTO(out, rc = -EINVAL); @@ -2191,7 +2192,7 @@ static int osc_set_async_flags(struct obd_export *exp, oap->oap_async_flags); out: osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(rc); } @@ -2222,7 +2223,7 @@ static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm, if (loi == NULL) loi = &lsm->lsm_oinfo[0]; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); oap->oap_cmd = cmd; oap->oap_page_off = off; @@ -2243,7 +2244,7 @@ static int osc_queue_group_io(struct obd_export *exp, struct lov_stripe_md *lsm, LOI_DEBUG(loi, "oap %p page %p on group pending\n", oap, oap->oap_page); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(0); } @@ -2276,13 +2277,13 @@ static int osc_trigger_group_io(struct obd_export *exp, if (loi == NULL) loi = &lsm->lsm_oinfo[0]; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); osc_group_to_pending(cli, loi, &loi->loi_write_lop, OBD_BRW_WRITE); osc_group_to_pending(cli, loi, &loi->loi_read_lop, OBD_BRW_READ); osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(0); } @@ -2310,7 +2311,7 @@ static int osc_teardown_async_page(struct obd_export *exp, lop = &loi->loi_read_lop; } - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); if (!list_empty(&oap->oap_rpc_item)) GOTO(out, rc = -EBUSY); @@ -2330,7 +2331,7 @@ static int osc_teardown_async_page(struct obd_export *exp, LOI_DEBUG(loi, "oap %p page %p torn down\n", oap, oap->oap_page); out: - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); RETURN(rc); } @@ -2425,12 +2426,12 @@ static int sanosc_brw_read(struct obd_export *exp, struct obdo *oa, CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n", page->mapping->host->i_ino, page->index); - memset(page_address(page), 0, PAGE_SIZE); + memset(page_address(page), 0, CFS_PAGE_SIZE); continue; } if (!page->buffers) { - create_empty_buffers(page, dev, PAGE_SIZE); + create_empty_buffers(page, dev, CFS_PAGE_SIZE); bh = page->buffers; clear_bit(BH_New, &bh->b_state); @@ -2548,7 +2549,7 @@ static int sanosc_brw_write(struct obd_export *exp, struct obdo *oa, dev = exp->exp_obd->u.cli.cl_sandev; if (!page->buffers) { - create_empty_buffers(page, dev, PAGE_SIZE); + create_empty_buffers(page, dev, CFS_PAGE_SIZE); } else { /* checking */ LASSERT(!test_bit(BH_New, &page->buffers->b_state)); @@ -2633,6 +2634,8 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data, } l_lock(&lock->l_resource->lr_namespace->ns_lock); #ifdef __KERNEL__ +#ifdef __LINUX__ + /* Liang XXX: Darwin and Winnt checking should be added */ if (lock->l_ast_data && lock->l_ast_data != data) { struct inode *new_inode = data; struct inode *old_inode = lock->l_ast_data; @@ -2646,6 +2649,7 @@ static void osc_set_data_with_check(struct lustre_handle *lockh, void *data, new_inode, new_inode->i_ino, new_inode->i_generation); } #endif +#endif lock->l_ast_data = data; lock->l_flags |= (flags & LDLM_FL_NO_LRU); l_unlock(&lock->l_resource->lr_namespace->ns_lock); @@ -2678,15 +2682,15 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother. */ - policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; - policy->l_extent.end |= ~PAGE_MASK; + policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK; + policy->l_extent.end |= ~CFS_PAGE_MASK; if (lsm->lsm_oinfo->loi_kms_valid == 0) goto no_match; /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type, policy, mode, - lockh); + rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type, policy, + mode, lockh); if (rc == 1) { osc_set_data_with_check(lockh, data, *flags); if (*flags & LDLM_FL_HAS_INTENT) { @@ -2711,7 +2715,7 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, * locks out from other users right now, too. */ if (mode == LCK_PR) { - rc = ldlm_lock_match(obd->obd_namespace, 0, &res_id, type, + rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type, policy, LCK_PW, lockh); if (rc == 1) { /* FIXME: This is not incredibly elegant, but it might @@ -2738,6 +2742,9 @@ static int osc_enqueue(struct obd_export *exp, struct lov_stripe_md *lsm, req->rq_replen = lustre_msg_size(2, size); } + /* users of osc_enqueue() can pass this flag for ldlm_lock_match() */ + *flags &= ~LDLM_FL_BLOCK_GRANTED; + rc = ldlm_cli_enqueue(exp, req, obd->obd_namespace, res_id, type, policy, mode, flags, bl_cb, cp_cb, gl_cb, data, &lvb, sizeof(lvb), lustre_swab_ost_lvb, lockh); @@ -2776,8 +2783,8 @@ static int osc_match(struct obd_export *exp, struct lov_stripe_md *lsm, /* Filesystem lock extents are extended to page boundaries so that * dealing with the page cache is a little smoother */ - policy->l_extent.start -= policy->l_extent.start & ~PAGE_MASK; - policy->l_extent.end |= ~PAGE_MASK; + policy->l_extent.start -= policy->l_extent.start & ~CFS_PAGE_MASK; + policy->l_extent.end |= ~CFS_PAGE_MASK; /* Next, search for already existing extent locks that will cover us */ rc = ldlm_lock_match(obd->obd_namespace, *flags, &res_id, type, @@ -2839,7 +2846,7 @@ static int osc_join_lru(struct obd_export *exp, } static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs, - unsigned long max_age) + cfs_time_t max_age) { struct obd_statfs *msfs; struct ptlrpc_request *request; @@ -3005,7 +3012,7 @@ static int osc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, GOTO(out, err); default: CDEBUG(D_INODE, "unrecognised ioctl %#x by %s\n", - cmd, current->comm); + cmd, cfs_curproc_comm()); GOTO(out, err = -ENOTTY); } out: @@ -3059,14 +3066,40 @@ static int osc_get_info(struct obd_export *exp, obd_count keylen, RETURN(-EINVAL); } -static int osc_set_info(struct obd_export *exp, obd_count keylen, - void *key, obd_count vallen, void *val) +static int osc_setinfo_mds_conn_interpret(struct ptlrpc_request *req, + void *aa, int rc) +{ + struct llog_ctxt *ctxt; + struct obd_import *imp = req->rq_import; + ENTRY; + + if (rc != 0) + RETURN(rc); + + ctxt = llog_get_context(imp->imp_obd, LLOG_MDS_OST_ORIG_CTXT); + if (ctxt) { + if (rc == 0) + rc = llog_initiator_connect(ctxt); + else + CERROR("cannot establish connection for " + "ctxt %p: %d\n", ctxt, rc); + } + + imp->imp_server_timeout = 1; + CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(imp->imp_obd)); + imp->imp_pingable = 1; + + RETURN(rc); +} + +static int osc_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set) { struct ptlrpc_request *req; struct obd_device *obd = exp->exp_obd; struct obd_import *imp = class_exp2cliimp(exp); - struct llog_ctxt *ctxt; - int rc, size[2] = {keylen, vallen}; + int size[2] = {keylen, vallen}; char *bufs[2] = {key, val}; ENTRY; @@ -3108,9 +3141,15 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, RETURN(0); } - if (!KEY_IS(KEY_MDS_CONN) && !KEY_IS("evict_by_nid")) + if (!set) RETURN(-EINVAL); + /* We pass all other commands directly to OST. Since nobody calls osc + methods directly and everybody is supposed to go through LOV, we + assume lov checked invalid values for us. + The only recognised values so far are evict_by_nid and mds_conn. + Even if something bad goes through, we'd get a -EINVAL from OST + anyway. */ req = ptlrpc_prep_req(imp, LUSTRE_OST_VERSION, OST_SET_INFO, 2, size, bufs); @@ -3118,23 +3157,13 @@ static int osc_set_info(struct obd_export *exp, obd_count keylen, RETURN(-ENOMEM); req->rq_replen = lustre_msg_size(0, NULL); - rc = ptlrpc_queue_wait(req); - ptlrpc_req_finished(req); - ctxt = llog_get_context(exp->exp_obd, LLOG_MDS_OST_ORIG_CTXT); - if (ctxt) { - if (rc == 0) - rc = llog_initiator_connect(ctxt); - else - CERROR("cannot establish connection for ctxt %p: %d\n", - ctxt, rc); - } - - imp->imp_server_timeout = 1; - CDEBUG(D_HA, "pinging OST %s\n", obd2cli_tgt(exp->exp_obd)); - imp->imp_pingable = 1; + if (KEY_IS("mds_conn")) + req->rq_interpret_reply = osc_setinfo_mds_conn_interpret; + ptlrpc_set_add_req(set, req); + ptlrpc_check_set(set); - RETURN(rc); + RETURN(0); } @@ -3193,12 +3222,12 @@ static int osc_reconnect(struct obd_export *exp, struct obd_device *obd, if (data != NULL && (data->ocd_connect_flags & OBD_CONNECT_GRANT)) { long lost_grant; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); data->ocd_grant = cli->cl_avail_grant ?: 2 * cli->cl_max_pages_per_rpc << PAGE_SHIFT; lost_grant = cli->cl_lost_grant; cli->cl_lost_grant = 0; - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE, "request ocd_grant: %d cl_avail_grant: %ld " "cl_lost_grant: %ld\n", data->ocd_grant, @@ -3232,6 +3261,7 @@ static int osc_import_event(struct obd_device *obd, struct client_obd *cli; int rc = 0; + ENTRY; LASSERT(imp->imp_obd == obd); switch (event) { @@ -3256,12 +3286,12 @@ static int osc_import_event(struct obd_device *obd, /* Reset grants */ cli = &obd->u.cli; - spin_lock(&cli->cl_loi_list_lock); + client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant = 0; cli->cl_lost_grant = 0; /* all pages go to failing rpcs due to the invalid import */ osc_check_rpcs(cli); - spin_unlock(&cli->cl_loi_list_lock); + client_obd_list_unlock(&cli->cl_loi_list_lock); ldlm_namespace_cleanup(ns, LDLM_FL_LOCAL_ONLY); @@ -3304,9 +3334,10 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) int rc; ENTRY; + ENTRY; rc = ptlrpcd_addref(); if (rc) - return rc; + RETURN(rc); rc = client_obd_setup(obd, lcfg); if (rc) { @@ -3368,6 +3399,7 @@ int osc_cleanup(struct obd_device *obd) struct osc_creator *oscc = &obd->u.cli.cl_oscc; int rc; + ENTRY; ptlrpc_lprocfs_unregister_obd(obd); lprocfs_obd_cleanup(obd); @@ -3423,7 +3455,7 @@ struct obd_ops osc_obd_ops = { .o_join_lru = osc_join_lru, .o_iocontrol = osc_iocontrol, .o_get_info = osc_get_info, - .o_set_info = osc_set_info, + .o_set_info_async = osc_set_info_async, .o_import_event = osc_import_event, .o_llog_init = osc_llog_init, .o_llog_finish = osc_llog_finish, @@ -3464,7 +3496,6 @@ struct obd_ops sanosc_obd_ops = { }; #endif -static quota_interface_t *quota_interface; extern quota_interface_t osc_quota_interface; int __init osc_init(void) @@ -3524,6 +3555,5 @@ MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)"); MODULE_LICENSE("GPL"); -module_init(osc_init); -module_exit(osc_exit); +cfs_module(osc, "1.0.0", osc_init, osc_exit); #endif diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c index 40dc85b..4f2832f 100644 --- a/lustre/osd/osd_handler.c +++ b/lustre/osd/osd_handler.c @@ -34,7 +34,7 @@ #include /* LUSTRE_VERSION_CODE */ -#include +#include /* * XXX temporary stuff: direct access to ldiskfs/jdb. Interface between osd * and file system is not yet specified. @@ -44,23 +44,23 @@ /* LDISKFS_SB() */ #include /* simple_mkdir() */ -#include +#include /* * struct OBD_{ALLOC,FREE}*() * OBD_FAIL_CHECK */ -#include +#include /* struct ptlrpc_thread */ -#include +#include /* LUSTRE_OSD0_NAME */ -#include +#include /* class_register_type(), class_unregister_type(), class_get_type() */ -#include -#include +#include +#include /* fid_is_local() */ -#include +#include #include #include "osd_internal.h" diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h index 56c384f..091628d 100644 --- a/lustre/osd/osd_internal.h +++ b/lustre/osd/osd_internal.h @@ -34,8 +34,7 @@ /* struct rw_semaphore */ #include -#include - +#include #include "osd_oi.h" struct inode; diff --git a/lustre/osd/osd_oi.c b/lustre/osd/osd_oi.c index 8963ace..a3f6c24 100644 --- a/lustre/osd/osd_oi.c +++ b/lustre/osd/osd_oi.c @@ -34,15 +34,15 @@ #include /* LUSTRE_VERSION_CODE */ -#include +#include /* * struct OBD_{ALLOC,FREE}*() * OBD_FAIL_CHECK */ -#include +#include /* fid_is_local() */ -#include +#include #include "osd_oi.h" /* osd_lookup(), struct osd_thread_info */ diff --git a/lustre/osd/osd_oi.h b/lustre/osd/osd_oi.h index 0b92534..ec1b1d1 100644 --- a/lustre/osd/osd_oi.h +++ b/lustre/osd/osd_oi.h @@ -33,8 +33,7 @@ /* struct rw_semaphore */ #include - -#include +#include struct dentry; struct lu_fid; diff --git a/lustre/ost/lproc_ost.c b/lustre/ost/lproc_ost.c index 0210d29..7b8e790 100644 --- a/lustre/ost/lproc_ost.c +++ b/lustre/ost/lproc_ost.c @@ -24,8 +24,8 @@ */ #define DEBUG_SUBSYSTEM S_OST -#include -#include +#include +#include #include #include "ost_internal.h" diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 05de28d..0bb8c64 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -39,16 +39,16 @@ #define DEBUG_SUBSYSTEM S_OST #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include -#include -#include +#include +#include #include -#include +#include #include "ost_internal.h" static int ost_num_threads; @@ -1149,7 +1149,7 @@ static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req) GOTO(out, rc = 0); } - rc = obd_set_info(exp, keylen, key, vallen, val); + rc = obd_set_info_async(exp, keylen, key, vallen, val, NULL); out: req->rq_repmsg->status = 0; RETURN(rc); diff --git a/lustre/ptlrpc/Info.plist b/lustre/ptlrpc/Info.plist new file mode 100644 index 0000000..2b7d35f --- /dev/null +++ b/lustre/ptlrpc/Info.plist @@ -0,0 +1,33 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ptlrpc + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.ptlrpc + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.clusterfs.lustre.libcfs + 1.0.0 + com.clusterfs.lustre.lnet + 1.0.0 + com.clusterfs.lustre.obdclass + 1.0.0 + + + diff --git a/lustre/ptlrpc/autoMakefile.am b/lustre/ptlrpc/autoMakefile.am index 09dc09b..9a2c3db 100644 --- a/lustre/ptlrpc/autoMakefile.am +++ b/lustre/ptlrpc/autoMakefile.am @@ -30,8 +30,46 @@ libptlrpc_a_CFLAGS = $(LLCFLAGS) endif if MODULES + +if LINUX modulefs_DATA = ptlrpc$(KMODEXT) +endif #LINUX + +if DARWIN +macos_PROGRAMS := ptlrpc + +ptlrpc_SOURCES := \ + ptlrpc_module.c \ + client.c \ + connection.c \ + events.c \ + import.c \ + llog_client.c \ + llog_net.c \ + llog_server.c \ + lproc_ptlrpc.c \ + niobuf.c \ + pack_generic.c \ + pers.c \ + pinger.c \ + ptlrpcd.c \ + recover.c \ + recov_thread.c \ + service.c \ + $(LDLM_COMM_SOURCES) + +ptlrpc_CFLAGS := $(EXTRA_KCFLAGS) +ptlrpc_LDFLAGS := $(EXTRA_KLDFLAGS) +ptlrpc_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif # DARWIN + endif # MODULES +install-data-hook: $(install_data_hook) DIST_SOURCES = $(ptlrpc_objs:.o=.c) ptlrpc_internal.h MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ ldlm_*.c l_lock.c diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 24a312b..2732e53 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -30,11 +30,11 @@ #include #endif -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "ptlrpc_internal.h" @@ -97,7 +97,7 @@ static inline struct ptlrpc_bulk_desc *new_bulk(int npages, int type, int portal return NULL; spin_lock_init(&desc->bd_lock); - init_waitqueue_head(&desc->bd_waitq); + cfs_waitq_init(&desc->bd_waitq); desc->bd_max_iov = npages; desc->bd_iov_count = 0; desc->bd_md_h = LNET_INVALID_HANDLE; @@ -113,6 +113,7 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, struct obd_import *imp = req->rq_import; struct ptlrpc_bulk_desc *desc; + ENTRY; LASSERT(type == BULK_PUT_SINK || type == BULK_GET_SOURCE); desc = new_bulk(npages, type, portal); if (desc == NULL) @@ -137,6 +138,7 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req, struct obd_export *exp = req->rq_export; struct ptlrpc_bulk_desc *desc; + ENTRY; LASSERT(type == BULK_PUT_SOURCE || type == BULK_GET_SINK); desc = new_bulk(npages, type, portal); @@ -156,13 +158,13 @@ struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req, } void ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len) + cfs_page_t *page, int pageoffset, int len) { LASSERT(desc->bd_iov_count < desc->bd_max_iov); LASSERT(page != NULL); LASSERT(pageoffset >= 0); LASSERT(len > 0); - LASSERT(pageoffset + len <= PAGE_SIZE); + LASSERT(pageoffset + len <= CFS_PAGE_SIZE); desc->bd_nob += len; @@ -222,19 +224,21 @@ void ptlrpc_add_rqs_to_pool(struct ptlrpc_request_pool *pool, int num_rq) for (i = 0; i < num_rq; i++) { struct ptlrpc_request *req; struct lustre_msg *msg; + + spin_unlock(&pool->prp_lock); OBD_ALLOC(req, sizeof(struct ptlrpc_request)); if (!req) - goto out; - OBD_ALLOC_GFP(msg, size, GFP_KERNEL); + return; + OBD_ALLOC_GFP(msg, size, CFS_ALLOC_STD); if (!msg) { OBD_FREE(req, sizeof(struct ptlrpc_request)); - goto out; + return; } req->rq_reqmsg = msg; req->rq_pool = pool; + spin_lock(&pool->prp_lock); list_add_tail(&req->rq_list, &pool->prp_req_list); } -out: spin_unlock(&pool->prp_lock); return; } @@ -252,7 +256,7 @@ struct ptlrpc_request_pool *ptlrpc_init_rq_pool(int num_rq, int msgsize, kernel would do exactly this */ spin_lock_init(&pool->prp_lock); - INIT_LIST_HEAD(&pool->prp_req_list); + CFS_INIT_LIST_HEAD(&pool->prp_req_list); pool->prp_rq_size = msgsize; pool->prp_populate = populate_pool; @@ -360,10 +364,10 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode, request->rq_reply_portal = imp->imp_client->cli_reply_portal; spin_lock_init(&request->rq_lock); - INIT_LIST_HEAD(&request->rq_list); - INIT_LIST_HEAD(&request->rq_replay_list); - INIT_LIST_HEAD(&request->rq_set_chain); - init_waitqueue_head(&request->rq_reply_waitq); + CFS_INIT_LIST_HEAD(&request->rq_list); + CFS_INIT_LIST_HEAD(&request->rq_replay_list); + CFS_INIT_LIST_HEAD(&request->rq_set_chain); + cfs_waitq_init(&request->rq_reply_waitq); request->rq_xid = ptlrpc_next_xid(); atomic_set(&request->rq_refcount, 1); @@ -385,14 +389,15 @@ struct ptlrpc_request_set *ptlrpc_prep_set(void) { struct ptlrpc_request_set *set; + ENTRY; OBD_ALLOC(set, sizeof *set); if (!set) RETURN(NULL); - INIT_LIST_HEAD(&set->set_requests); - init_waitqueue_head(&set->set_waitq); + CFS_INIT_LIST_HEAD(&set->set_requests); + cfs_waitq_init(&set->set_waitq); set->set_remaining = 0; spin_lock_init(&set->set_new_req_lock); - INIT_LIST_HEAD(&set->set_new_requests); + CFS_INIT_LIST_HEAD(&set->set_new_requests); RETURN(set); } @@ -648,9 +653,6 @@ static int after_reply(struct ptlrpc_request *req) spin_lock_irqsave(&imp->imp_lock, flags); } - if (req->rq_transno > imp->imp_max_transno) - imp->imp_max_transno = req->rq_transno; - /* Replay-enabled imports return commit-status information. */ if (req->rq_repmsg->last_committed) imp->imp_peer_committed_transno = @@ -706,9 +708,9 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) list_add_tail(&req->rq_list, &imp->imp_sending_list); spin_unlock_irqrestore(&imp->imp_lock, flags); - req->rq_reqmsg->status = current->pid; + req->rq_reqmsg->status = cfs_curproc_pid(); CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc" - " %s:%s:%d:"LPU64":%s:%d\n", current->comm, + " %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, libcfs_nid2str(imp->imp_connection->c_peer.nid), @@ -926,7 +928,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) } CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:" - "opc %s:%s:%d:"LPU64":%s:%d\n", current->comm, + "opc %s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, libcfs_nid2str(imp->imp_connection->c_peer.nid), @@ -935,7 +937,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set) set->set_remaining--; atomic_dec(&imp->imp_inflight); - wake_up(&imp->imp_recovery_waitq); + cfs_waitq_signal(&imp->imp_recovery_waitq); } /* If we hit an error, we want to recover promptly. */ @@ -1089,7 +1091,9 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) int rc, timeout; ENTRY; - LASSERT(!list_empty(&set->set_requests)); + if (list_empty(&set->set_requests)) + RETURN(0); + list_for_each(tmp, &set->set_requests) { req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); if (req->rq_phase == RQ_PHASE_NEW) @@ -1103,7 +1107,7 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set) * req times out */ CDEBUG(D_HA, "set %p going to sleep for %d seconds\n", set, timeout); - lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ, + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(timeout ? timeout : 1), ptlrpc_expired_set, ptlrpc_interrupted_set, set); rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi); @@ -1262,7 +1266,7 @@ EXPORT_SYMBOL(ptlrpc_req_xid); void ptlrpc_unregister_reply (struct ptlrpc_request *request) { int rc; - wait_queue_head_t *wq; + cfs_waitq_t *wq; struct l_wait_info lwi; LASSERT(!in_interrupt ()); /* might sleep */ @@ -1283,7 +1287,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request) for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ - lwi = LWI_TIMEOUT(300 * HZ, NULL, NULL); + lwi = LWI_TIMEOUT(cfs_time_seconds(300), NULL, NULL); rc = l_wait_event (*wq, !ptlrpc_client_receiving_reply(request), &lwi); if (rc == 0) return; @@ -1305,8 +1309,19 @@ void ptlrpc_free_committed(struct obd_import *imp) LASSERT_SPIN_LOCKED(&imp->imp_lock); - CDEBUG(D_HA, "%s: committing for last_committed "LPU64"\n", - imp->imp_obd->obd_name, imp->imp_peer_committed_transno); + + if (imp->imp_peer_committed_transno == imp->imp_last_transno_checked && + imp->imp_generation == imp->imp_last_generation_checked) { + CDEBUG(D_HA, "%s: skip recheck for last_committed "LPU64"\n", + imp->imp_obd->obd_name, imp->imp_peer_committed_transno); + return; + } + + CDEBUG(D_HA, "%s: committing for last_committed "LPU64" gen %d\n", + imp->imp_obd->obd_name, imp->imp_peer_committed_transno, + imp->imp_generation); + imp->imp_last_transno_checked = imp->imp_peer_committed_transno; + imp->imp_last_generation_checked = imp->imp_generation; list_for_each_safe(tmp, saved, &imp->imp_replay_list) { req = list_entry(tmp, struct ptlrpc_request, rq_replay_list); @@ -1469,7 +1484,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) struct l_wait_info lwi; struct obd_import *imp = req->rq_import; unsigned long flags; - int timeout = 0; + cfs_duration_t timeout = 0; ENTRY; LASSERT(req->rq_set == NULL); @@ -1477,10 +1492,10 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) atomic_inc(&imp->imp_inflight); /* for distributed debugging */ - req->rq_reqmsg->status = current->pid; + req->rq_reqmsg->status = cfs_curproc_pid(); LASSERT(imp->imp_obd != NULL); CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:nid:opc " - "%s:%s:%d:"LPU64":%s:%d\n", current->comm, + "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, libcfs_nid2str(imp->imp_connection->c_peer.nid), @@ -1499,7 +1514,7 @@ restart: spin_unlock_irqrestore(&imp->imp_lock, flags); DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%s != %s)", - current->comm, + cfs_curproc_comm(), ptlrpc_import_state_name(req->rq_send_state), ptlrpc_import_state_name(imp->imp_state)); lwi = LWI_INTR(interrupted_request, req); @@ -1508,7 +1523,7 @@ restart: req->rq_err || req->rq_intr), &lwi); DEBUG_REQ(D_HA, req, "\"%s\" awake: (%s == %s or %d/%d == 1)", - current->comm, + cfs_curproc_comm(), ptlrpc_import_state_name(imp->imp_state), ptlrpc_import_state_name(req->rq_send_state), req->rq_err, req->rq_intr); @@ -1565,10 +1580,11 @@ restart: rc = ptl_send_rpc(req, 0); if (rc) { DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc); - timeout = 1; + timeout = CFS_TICK; } else { - timeout = MAX(req->rq_timeout * HZ, 1); - DEBUG_REQ(D_NET, req, "-- sleeping for %d jiffies", timeout); + timeout = cfs_timeout_cap(cfs_time_seconds(req->rq_timeout)); + DEBUG_REQ(D_NET, req, + "-- sleeping for "CFS_DURATION_T" jiffies", timeout); } lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request, req); @@ -1576,7 +1592,7 @@ restart: DEBUG_REQ(D_NET, req, "-- done sleeping"); CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:nid:opc " - "%s:%s:%d:"LPU64":%s:%d\n", current->comm, + "%s:%s:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, req->rq_xid, libcfs_nid2str(imp->imp_connection->c_peer.nid), @@ -1659,7 +1675,7 @@ restart: req->rq_phase = RQ_PHASE_INTERPRET; atomic_dec(&imp->imp_inflight); - wake_up(&imp->imp_recovery_waitq); + cfs_waitq_signal(&imp->imp_recovery_waitq); RETURN(rc); } @@ -1675,6 +1691,7 @@ static int ptlrpc_replay_interpret(struct ptlrpc_request *req, struct obd_import *imp = req->rq_import; unsigned long flags; + ENTRY; atomic_dec(&imp->imp_replay_inflight); if (!req->rq_replied) { @@ -1817,7 +1834,7 @@ void ptlrpc_abort_inflight(struct obd_import *imp) } static __u64 ptlrpc_last_xid = 0; -static spinlock_t ptlrpc_last_xid_lock = SPIN_LOCK_UNLOCKED; +spinlock_t ptlrpc_last_xid_lock; __u64 ptlrpc_next_xid(void) { diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index fc55e25..1d2e228 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -25,9 +25,9 @@ #define DEBUG_SUBSYSTEM S_RPC #ifdef __KERNEL__ -#include -#include -#include +#include +#include +#include #else #include #endif @@ -167,9 +167,9 @@ struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c) void ptlrpc_init_connection(void) { - INIT_LIST_HEAD(&conn_list); - INIT_LIST_HEAD(&conn_unused_list); - conn_lock = SPIN_LOCK_UNLOCKED; + CFS_INIT_LIST_HEAD(&conn_list); + CFS_INIT_LIST_HEAD(&conn_unused_list); + spin_lock_init(&conn_lock); } void ptlrpc_cleanup_connection(void) diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index 2222df9..08b1af5 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -25,13 +25,11 @@ #define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -#include -#else +#ifndef __KERNEL__ #include #endif -#include -#include +#include +#include #include "ptlrpc_internal.h" lnet_handle_eq_t ptlrpc_eq_h; @@ -185,7 +183,7 @@ void request_in_callback(lnet_event_t *ev) /* We moaned above already... */ return; } - OBD_ALLOC_GFP(req, sizeof(*req), GFP_ATOMIC); + OBD_ALLOC_GFP(req, sizeof(*req), CFS_ALLOC_ATOMIC_TRY); if (req == NULL) { CERROR("Can't allocate incoming request descriptor: " "Dropping %s RPC from %s\n", @@ -239,7 +237,7 @@ void request_in_callback(lnet_event_t *ev) /* NB everything can disappear under us once the request * has been queued and we unlock, so do the wake now... */ - wake_up(&service->srv_waitq); + cfs_waitq_signal(&service->srv_waitq); spin_unlock_irqrestore(&service->srv_lock, flags); EXIT; @@ -320,7 +318,7 @@ void server_bulk_callback (lnet_event_t *ev) if (ev->unlinked) { /* This is the last callback no matter what... */ desc->bd_network_rw = 0; - wake_up(&desc->bd_waitq); + cfs_waitq_signal(&desc->bd_waitq); } spin_unlock_irqrestore (&desc->bd_lock, flags); @@ -402,7 +400,7 @@ int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, void ptlrpc_ni_fini(void) { - wait_queue_head_t waitq; + cfs_waitq_t waitq; struct l_wait_info lwi; int rc; int retries; @@ -427,8 +425,8 @@ void ptlrpc_ni_fini(void) CWARN("Event queue still busy\n"); /* Wait for a bit */ - init_waitqueue_head(&waitq); - lwi = LWI_TIMEOUT(2*HZ, NULL, NULL); + cfs_waitq_init(&waitq); + lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); l_wait_event(waitq, 0, &lwi); break; } @@ -486,7 +484,7 @@ int ptlrpc_ni_init(void) } #ifndef __KERNEL__ -LIST_HEAD(liblustre_wait_callbacks); +CFS_LIST_HEAD(liblustre_wait_callbacks); void *liblustre_services_callback; void * diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 7afd850..bb9fd86 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -24,22 +24,18 @@ */ #define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -# include -# include -# include -#else +#ifndef __KERNEL__ # include #endif -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include "ptlrpc_internal.h" @@ -143,11 +139,12 @@ int ptlrpc_set_import_discon(struct obd_import *imp, __u32 conn_cnt) imp->imp_replayable ? "wait for recovery to complete" : "fail"); + IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); + spin_unlock_irqrestore(&imp->imp_lock, flags); + if (obd_dump_on_timeout) libcfs_debug_dumplog(); - IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_DISCON); - spin_unlock_irqrestore(&imp->imp_lock, flags); obd_import_event(imp->imp_obd, imp, IMP_EVENT_DISCON); rc = 1; } else { @@ -199,8 +196,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp) LASSERT(imp->imp_invalid); /* wait for all requests to error out and call completion callbacks */ - lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), NULL, - NULL, NULL); + lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), + NULL, NULL, NULL); rc = l_wait_event(imp->imp_recovery_waitq, (atomic_read(&imp->imp_inflight) == 0), &lwi); @@ -322,6 +319,7 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) struct ptlrpc_connect_async_args *aa; unsigned long flags; + ENTRY; spin_lock_irqsave(&imp->imp_lock, flags); if (imp->imp_state == LUSTRE_IMP_CLOSED) { spin_unlock_irqrestore(&imp->imp_lock, flags); @@ -369,9 +367,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid) imp->imp_conn_cnt, obd2cli_tgt(imp->imp_obd)); /* Don't retry if connect fails */ rc = 0; - obd_set_info(obd->obd_self_export, - strlen(KEY_INIT_RECOV), KEY_INIT_RECOV, - sizeof(rc), &rc); + obd_set_info_async(obd->obd_self_export, + strlen(KEY_INIT_RECOV), KEY_INIT_RECOV, + sizeof(rc), &rc, NULL); } rc = obd_reconnect(imp->imp_obd->obd_self_export, obd, @@ -683,13 +681,14 @@ finish: (char *)imp->imp_connection->c_remote_uuid.uuid, rc); } - wake_up(&imp->imp_recovery_waitq); + cfs_waitq_signal(&imp->imp_recovery_waitq); RETURN(rc); } static int completed_replay_interpret(struct ptlrpc_request *req, void * data, int rc) { + ENTRY; atomic_dec(&req->rq_import->imp_replay_inflight); if (req->rq_status == 0) { ptlrpc_import_recovery_state_machine(req->rq_import); @@ -736,7 +735,7 @@ static int ptlrpc_invalidate_import_thread(void *data) ENTRY; ptlrpc_daemonize("ll_imp_inval"); - + CDEBUG(D_HA, "thread invalidate import %s to %s@%s\n", imp->imp_obd->obd_name, obd2cli_tgt(imp->imp_obd), imp->imp_connection->c_remote_uuid.uuid); @@ -757,6 +756,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) char *target_start; int target_len; + ENTRY; if (imp->imp_state == LUSTRE_IMP_EVICTED) { deuuidify(obd2cli_tgt(imp->imp_obd), NULL, &target_start, &target_len); @@ -768,7 +768,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) imp->imp_connection->c_remote_uuid.uuid); #ifdef __KERNEL__ - rc = kernel_thread(ptlrpc_invalidate_import_thread, imp, + rc = cfs_kernel_thread(ptlrpc_invalidate_import_thread, imp, CLONE_VM | CLONE_FILES); if (rc < 0) CERROR("error starting invalidate thread: %d\n", rc); @@ -832,7 +832,7 @@ int ptlrpc_import_recovery_state_machine(struct obd_import *imp) } if (imp->imp_state == LUSTRE_IMP_FULL) { - wake_up(&imp->imp_recovery_waitq); + cfs_waitq_signal(&imp->imp_recovery_waitq); ptlrpc_wake_delayed(imp); } @@ -865,8 +865,8 @@ int ptlrpc_disconnect_import(struct obd_import *imp) if (ptlrpc_import_in_recovery(imp)) { struct l_wait_info lwi; - lwi = LWI_TIMEOUT_INTR(MAX(obd_timeout * HZ, 1), back_to_sleep, - NULL, NULL); + lwi = LWI_TIMEOUT_INTR(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), + back_to_sleep, NULL, NULL); rc = l_wait_event(imp->imp_recovery_waitq, !ptlrpc_import_in_recovery(imp), &lwi); diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index 5f2186c..1a65430 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -38,16 +38,16 @@ #endif /* LUSTRE_VERSION_CODE */ -#include +#include -#include +#include /* lustre_swab_mdt_body */ -#include +#include /* obd2cli_tgt() (required by DEBUG_REQ()) */ -#include +#include /* struct ptlrpc_request, lustre_msg* */ -#include +#include static const struct req_msg_field *empty[] = {}; /* none */ diff --git a/lustre/ptlrpc/llog_client.c b/lustre/ptlrpc/llog_client.c index 1342d1a..bcab551 100644 --- a/lustre/ptlrpc/llog_client.c +++ b/lustre/ptlrpc/llog_client.c @@ -33,14 +33,14 @@ #endif #ifdef __KERNEL__ -#include +#include #else #include #endif -#include -#include -#include +#include +#include +#include #include /* This is a callback from the llog_* functions. @@ -309,11 +309,10 @@ out: static int llog_client_close(struct llog_handle *handle) { - int rc = 0; /* this doesn't call LLOG_ORIGIN_HANDLE_CLOSE because the servers all close the file at the end of every other LLOG_ RPC. */ - RETURN(rc); + return(0); } diff --git a/lustre/ptlrpc/llog_net.c b/lustre/ptlrpc/llog_net.c index 877c0c7..735ed31 100644 --- a/lustre/ptlrpc/llog_net.c +++ b/lustre/ptlrpc/llog_net.c @@ -36,15 +36,15 @@ #endif #ifdef __KERNEL__ -#include +#include #else #include #endif -#include -#include +#include +#include #include -#include +#include #ifdef __KERNEL__ int llog_origin_connect(struct llog_ctxt *ctxt, int count, diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index ee7e161..cf588d3 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -34,17 +34,15 @@ #ifndef __KERNEL__ #include -#else -#include #endif -#include -#include -#include +#include +#include +#include #include -#include +#include -#ifdef __KERNEL__ +#if defined(__KERNEL__) && defined(LUSTRE_LOG_SERVER) int llog_origin_handle_create(struct ptlrpc_request *req) { diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index a2eec28..70266b5 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -24,13 +24,12 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include "ptlrpc_internal.h" diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 6ddf32b..b05c5a3 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -27,10 +27,10 @@ #ifndef __KERNEL__ #include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include "ptlrpc_internal.h" static int ptl_send_buf (lnet_handle_md_t *mdh, void *base, int len, @@ -177,7 +177,7 @@ void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc) for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ - lwi = LWI_TIMEOUT (300 * HZ, NULL, NULL); + lwi = LWI_TIMEOUT (cfs_time_seconds(300), NULL, NULL); rc = l_wait_event(desc->bd_waitq, !ptlrpc_bulk_active(desc), &lwi); if (rc == 0) @@ -266,7 +266,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) /* Disconnect a bulk desc from the network. Idempotent. Not * thread-safe (i.e. only interlocks with completion callback). */ struct ptlrpc_bulk_desc *desc = req->rq_bulk; - wait_queue_head_t *wq; + cfs_waitq_t *wq; struct l_wait_info lwi; int rc; @@ -292,7 +292,7 @@ void ptlrpc_unregister_bulk (struct ptlrpc_request *req) for (;;) { /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ - lwi = LWI_TIMEOUT (300 * HZ, NULL, NULL); + lwi = LWI_TIMEOUT (cfs_time_seconds(300), NULL, NULL); rc = l_wait_event(*wq, !ptlrpc_bulk_active(desc), &lwi); if (rc == 0) return; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 22be02c..bad1387 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -33,9 +33,9 @@ # include #endif -#include -#include -#include +#include +#include +#include #define HDR_SIZE(count) \ @@ -108,8 +108,8 @@ int lustre_pack_request (struct ptlrpc_request *req, } #if RS_DEBUG -LIST_HEAD(ptlrpc_rs_debug_lru); -spinlock_t ptlrpc_rs_debug_lock = SPIN_LOCK_UNLOCKED; +CFS_LIST_HEAD(ptlrpc_rs_debug_lru); +spinlock_t ptlrpc_rs_debug_lock; #define PTLRPC_RS_DEBUG_LRU_ADD(rs) \ do { \ @@ -147,7 +147,7 @@ static struct ptlrpc_reply_state *lustre_get_emerg_rs(struct ptlrpc_service *svc spin_unlock_irqrestore(&svc->srv_lock, flags); /* If we cannot get anything for some long time, we better bail out instead of waiting infinitely */ - lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL); + lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL); rc = l_wait_event(svc->srv_free_rs_waitq, !list_empty(&svc->srv_free_rs_list), &lwi); if (rc) @@ -192,8 +192,8 @@ int lustre_pack_reply (struct ptlrpc_request *req, rs->rs_cb_id.cbid_arg = rs; rs->rs_service = req->rq_rqbd->rqbd_service; rs->rs_size = size; - INIT_LIST_HEAD(&rs->rs_exp_list); - INIT_LIST_HEAD(&rs->rs_obd_list); + CFS_INIT_LIST_HEAD(&rs->rs_exp_list); + CFS_INIT_LIST_HEAD(&rs->rs_obd_list); req->rq_replen = msg_len; req->rq_reply_state = rs; @@ -279,7 +279,7 @@ void lustre_free_reply_state (struct ptlrpc_reply_state *rs) list_add(&rs->rs_list, &svc->srv_free_rs_list); spin_unlock_irqrestore(&svc->srv_lock, flags); - wake_up(&svc->srv_free_rs_waitq); + cfs_waitq_signal(&svc->srv_free_rs_waitq); } else { OBD_FREE(rs, rs->rs_size); } @@ -915,6 +915,8 @@ void lustre_swab_lov_desc (struct lov_desc *ld) __swab64s (&ld->ld_default_stripe_size); __swab64s (&ld->ld_default_stripe_offset); __swab32s (&ld->ld_pattern); + __swab32s (&ld->ld_qos_threshold); + __swab32s (&ld->ld_qos_maxage); /* uuid endian insensitive */ } @@ -1102,7 +1104,6 @@ void lustre_assert_wire_constants(void) * running on Linux tau 2.6.15-dirty #13 SMP Sat Feb 11 18:30:54 MSK 2006 i686 i686 i386 GNU/ * with gcc version 3.3.3 (SuSE Linux) */ - /* Constants... */ LASSERTF(PTLRPC_MSG_MAGIC == 0x0BD00BD0," found %lld\n", (long long)PTLRPC_MSG_MAGIC); @@ -2243,26 +2244,22 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 32, " found %lld\n", + LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_qos_threshold)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold)); + LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_qos_maxage)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_padding_1)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 36, " found %lld\n", + LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_padding_2)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_3) == 40, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_3)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_3) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_3)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_4) == 44, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_4)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_4) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_4)); LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_uuid)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n", diff --git a/lustre/ptlrpc/pers.c b/lustre/ptlrpc/pers.c index 5dfbe85..865dcf0 100644 --- a/lustre/ptlrpc/pers.c +++ b/lustre/ptlrpc/pers.c @@ -29,11 +29,11 @@ #include #endif -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include "ptlrpc_internal.h" @@ -49,7 +49,7 @@ void ptlrpc_fill_bulk_md (lnet_md_t *md, struct ptlrpc_bulk_desc *desc) md->length = desc->bd_iov_count; } -void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, int pageoffset, int len) { lnet_kiov_t *kiov = &desc->bd_iov[desc->bd_iov_count]; @@ -67,9 +67,9 @@ void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc) for (i = 0; i < desc->bd_iov_count ; i++) { lnet_kiov_t *kiov = &desc->bd_iov[i]; - memset(kmap(kiov->kiov_page)+kiov->kiov_offset, 0xab, - kiov->kiov_len); - kunmap(kiov->kiov_page); + memset(cfs_kmap(kiov->kiov_page)+kiov->kiov_offset, 0xab, + kiov->kiov_len); + cfs_kunmap(kiov->kiov_page); } } @@ -102,7 +102,7 @@ static int can_merge_iovs(lnet_md_iovec_t *existing, lnet_md_iovec_t *candidate) return 0; } -void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, int pageoffset, int len) { lnet_md_iovec_t *iov = &desc->bd_iov[desc->bd_iov_count]; diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index cfdce8d..53335f7 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -29,17 +29,15 @@ #ifndef __KERNEL__ #include #else -#include -#include #define DEBUG_SUBSYSTEM S_RPC #endif -#include -#include +#include +#include #include "ptlrpc_internal.h" -static DECLARE_MUTEX(pinger_sem); -static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports); +struct semaphore pinger_sem; +static struct list_head pinger_imports = CFS_LIST_HEAD_INIT(pinger_imports); int ptlrpc_ping(struct obd_import *imp) { @@ -67,14 +65,14 @@ int ptlrpc_ping(struct obd_import *imp) static void ptlrpc_update_next_ping(struct obd_import *imp) { - imp->imp_next_ping = jiffies + HZ * - (imp->imp_state == LUSTRE_IMP_DISCON ? RECONNECT_INTERVAL : - PING_INTERVAL); + imp->imp_next_ping = cfs_time_shift( + (imp->imp_state == LUSTRE_IMP_DISCON ? + RECONNECT_INTERVAL : PING_INTERVAL)); } void ptlrpc_ping_import_soon(struct obd_import *imp) { - imp->imp_next_ping = jiffies; + imp->imp_next_ping = cfs_time_current(); } #ifdef __KERNEL__ @@ -84,20 +82,20 @@ static int ptlrpc_pinger_main(void *arg) struct ptlrpc_thread *thread = data->thread; ENTRY; - ptlrpc_daemonize(data->name); + cfs_daemonize(data->name); /* Record that the thread is running */ thread->t_flags = SVC_RUNNING; - wake_up(&thread->t_ctl_waitq); + cfs_waitq_signal(&thread->t_ctl_waitq); /* And now, loop forever, pinging as needed. */ while (1) { - unsigned long this_ping = jiffies; - long time_to_next_ping = 0; + cfs_time_t this_ping = cfs_time_current(); struct l_wait_info lwi; + cfs_duration_t time_to_next_ping; struct list_head *iter; - down(&pinger_sem); + mutex_down(&pinger_sem); list_for_each(iter, &pinger_imports) { struct obd_import *imp = list_entry(iter, struct obd_import, @@ -119,13 +117,13 @@ static int ptlrpc_pinger_main(void *arg) if (force || /* if the next ping is within, say, 5 jiffies from now, go ahead and ping. See note below. */ - time_after_eq(this_ping, imp->imp_next_ping - 5)) { + cfs_time_aftereq(this_ping, + imp->imp_next_ping - 5 * CFS_TICK)) { if (level == LUSTRE_IMP_DISCON && !imp->imp_deactive) { /* wait at least a timeout before trying recovery again. */ - imp->imp_next_ping = jiffies + - obd_timeout * HZ; + imp->imp_next_ping = cfs_time_shift(obd_timeout); ptlrpc_initiate_recovery(imp); } else if (level != LUSTRE_IMP_FULL || imp->imp_obd->obd_no_recov || @@ -144,20 +142,24 @@ static int ptlrpc_pinger_main(void *arg) if (!imp->imp_pingable) continue; CDEBUG(D_INFO, - "don't need to ping %s (%lu > %lu)\n", + "don't need to ping %s ("CFS_TIME_T + " > "CFS_TIME_T")\n", obd2cli_tgt(imp->imp_obd), imp->imp_next_ping, this_ping); } /* obd_timeout might have changed */ - if (time_after(imp->imp_next_ping, - this_ping + PING_INTERVAL * HZ)) + if (cfs_time_after(imp->imp_next_ping, + cfs_time_add(this_ping, + cfs_time_seconds(PING_INTERVAL)))) ptlrpc_update_next_ping(imp); } - up(&pinger_sem); + mutex_up(&pinger_sem); /* Wait until the next ping time, or until we're stopped. */ - time_to_next_ping = this_ping + (PING_INTERVAL * HZ) - jiffies; + time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, + cfs_time_seconds(PING_INTERVAL)), + cfs_time_current()); /* The ping sent by ptlrpc_send_rpc may get sent out say .01 second after this. @@ -165,10 +167,11 @@ static int ptlrpc_pinger_main(void *arg) next ping time to next_ping + .01 sec, which means we will SKIP the next ping at next_ping, and the ping will get sent 2 timeouts from now! Beware. */ - CDEBUG(D_INFO, "next ping in %lu (%lu)\n", time_to_next_ping, - this_ping + PING_INTERVAL * HZ); + CDEBUG(D_INFO, "next ping in "CFS_DURATION_T" ("CFS_TIME_T")\n", + time_to_next_ping, + cfs_time_add(this_ping, cfs_time_seconds(PING_INTERVAL))); if (time_to_next_ping > 0) { - lwi = LWI_TIMEOUT(max_t(long, time_to_next_ping, HZ), + lwi = LWI_TIMEOUT(max_t(cfs_duration_t, time_to_next_ping, cfs_time_seconds(1)), NULL, NULL); l_wait_event(thread->t_ctl_waitq, thread->t_flags & (SVC_STOPPING|SVC_EVENT), @@ -185,9 +188,9 @@ static int ptlrpc_pinger_main(void *arg) } thread->t_flags = SVC_STOPPED; - wake_up(&thread->t_ctl_waitq); + cfs_waitq_signal(&thread->t_ctl_waitq); - CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid); + CDEBUG(D_NET, "pinger thread exiting, process %d\n", cfs_curproc_pid()); return 0; } @@ -209,14 +212,14 @@ int ptlrpc_start_pinger(void) OBD_ALLOC(pinger_thread, sizeof(*pinger_thread)); if (pinger_thread == NULL) RETURN(-ENOMEM); - init_waitqueue_head(&pinger_thread->t_ctl_waitq); + cfs_waitq_init(&pinger_thread->t_ctl_waitq); d.name = "ll_ping"; d.thread = pinger_thread; /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we * just drop the VM and FILES in ptlrpc_daemonize() right away. */ - rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES); + rc = cfs_kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("cannot start thread: %d\n", rc); OBD_FREE(pinger_thread, sizeof(*pinger_thread)); @@ -240,10 +243,10 @@ int ptlrpc_stop_pinger(void) if (pinger_thread == NULL) RETURN(-EALREADY); - down(&pinger_sem); + mutex_down(&pinger_sem); pinger_thread->t_flags = SVC_STOPPING; - wake_up(&pinger_thread->t_ctl_waitq); - up(&pinger_sem); + cfs_waitq_signal(&pinger_thread->t_ctl_waitq); + mutex_up(&pinger_sem); l_wait_event(pinger_thread->t_ctl_waitq, (pinger_thread->t_flags & SVC_STOPPED), &lwi); @@ -264,7 +267,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp) if (!list_empty(&imp->imp_pinger_chain)) RETURN(-EALREADY); - down(&pinger_sem); + mutex_down(&pinger_sem); CDEBUG(D_HA, "adding pingable import %s->%s\n", imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); ptlrpc_update_next_ping(imp); @@ -273,7 +276,7 @@ int ptlrpc_pinger_add_import(struct obd_import *imp) class_import_get(imp); ptlrpc_pinger_wake_up(); - up(&pinger_sem); + mutex_up(&pinger_sem); RETURN(0); } @@ -284,12 +287,12 @@ int ptlrpc_pinger_del_import(struct obd_import *imp) if (list_empty(&imp->imp_pinger_chain)) RETURN(-ENOENT); - down(&pinger_sem); + mutex_down(&pinger_sem); list_del_init(&imp->imp_pinger_chain); CDEBUG(D_HA, "removing pingable import %s->%s\n", imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); class_import_put(imp); - up(&pinger_sem); + mutex_up(&pinger_sem); RETURN(0); } @@ -297,7 +300,7 @@ void ptlrpc_pinger_wake_up() { #ifdef ENABLE_PINGER pinger_thread->t_flags |= SVC_EVENT; - wake_up(&pinger_thread->t_ctl_waitq); + cfs_waitq_signal(&pinger_thread->t_ctl_waitq); #endif } @@ -415,7 +418,7 @@ void ping_evictor_start(void) init_waitqueue_head(&pet_waitq); - rc = kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FS); + rc = cfs_kernel_thread(ping_evictor_main, NULL, CLONE_VM | CLONE_FILES); if (rc < 0) { pet_refcount--; CERROR("Cannot start ping evictor thread: %d\n", rc); @@ -441,14 +444,14 @@ EXPORT_SYMBOL(ping_evictor_stop); #ifdef ENABLE_PINGER static struct pinger_data { int pd_recursion; - unsigned long pd_this_ping; /* jiffies */ - unsigned long pd_next_ping; /* jiffies */ + cfs_time_t pd_this_ping; /* jiffies */ + cfs_time_t pd_next_ping; /* jiffies */ struct ptlrpc_request_set *pd_set; } pinger_args; static int pinger_check_rpcs(void *arg) { - unsigned long curtime = jiffies; + cfs_time_t curtime = cfs_time_current(); struct ptlrpc_request *req; struct ptlrpc_request_set *set; struct list_head *iter; @@ -483,14 +486,15 @@ static int pinger_check_rpcs(void *arg) set = pd->pd_set; /* add rpcs into set */ - down(&pinger_sem); + mutex_down(&pinger_sem); list_for_each(iter, &pinger_imports) { struct obd_import *imp = list_entry(iter, struct obd_import, imp_pinger_chain); int generation, level; unsigned long flags; - if (time_after_eq(pd->pd_this_ping, imp->imp_next_ping - 5)) { + if (cfs_time_aftereq(pd->pd_this_ping, + imp->imp_next_ping - 5 * CFS_TICK)) { /* Add a ping. */ spin_lock_irqsave(&imp->imp_lock, flags); generation = imp->imp_generation; @@ -517,13 +521,13 @@ static int pinger_check_rpcs(void *arg) req->rq_import_generation = generation; ptlrpc_set_add_req(set, req); } else { - CDEBUG(D_HA, "don't need to ping %s (%lu > " - "%lu)\n", obd2cli_tgt(imp->imp_obd), + CDEBUG(D_HA, "don't need to ping %s ("CFS_TIME_T" > " + CFS_TIME_T")\n", obd2cli_tgt(imp->imp_obd), imp->imp_next_ping, pd->pd_this_ping); } } pd->pd_this_ping = curtime; - up(&pinger_sem); + mutex_up(&pinger_sem); /* Might be empty, that's OK. */ if (set->set_remaining == 0) @@ -543,14 +547,16 @@ do_check_set: rc = ptlrpc_check_set(set); /* not finished, and we are not expired, simply return */ - if (!rc && time_before(curtime, pd->pd_this_ping + PING_INTERVAL * HZ)){ + if (!rc && cfs_time_before(curtime, + cfs_time_add(pd->pd_this_ping, + cfs_time_seconds(PING_INTERVAL)))) { CDEBUG(D_HA, "not finished, but also not expired\n"); pd->pd_recursion--; return 0; } /* Expire all the requests that didn't come back. */ - down(&pinger_sem); + mutex_down(&pinger_sem); list_for_each(iter, &set->set_requests) { req = list_entry(iter, struct ptlrpc_request, rq_set_chain); @@ -569,13 +575,14 @@ do_check_set: CDEBUG(D_HA, "pinger initiate expire_one_request\n"); ptlrpc_expire_one_request(req); } - up(&pinger_sem); + mutex_up(&pinger_sem); ptlrpc_set_destroy(set); pd->pd_set = NULL; out: - pd->pd_next_ping = pd->pd_this_ping + PING_INTERVAL * HZ; + pd->pd_next_ping = cfs_time_add(pd->pd_this_ping, + cfs_time_seconds(PING_INTERVAL)); pd->pd_this_ping = 0; /* XXX for debug */ CDEBUG(D_HA, "finished a round ping\n"); @@ -608,15 +615,15 @@ int ptlrpc_stop_pinger(void) void ptlrpc_pinger_sending_on_import(struct obd_import *imp) { #ifdef ENABLE_PINGER - down(&pinger_sem); + mutex_down(&pinger_sem); ptlrpc_update_next_ping(imp); if (pinger_args.pd_set == NULL && time_before(imp->imp_next_ping, pinger_args.pd_next_ping)) { - CDEBUG(D_HA, "set next ping to %ld(cur %ld)\n", - imp->imp_next_ping, jiffies); + CDEBUG(D_HA, "set next ping to "CFS_TIME_T"(cur "CFS_TIME_T")\n", + imp->imp_next_ping, cfs_time_current()); pinger_args.pd_next_ping = imp->imp_next_ping; } - up(&pinger_sem); + mutex_up(&pinger_sem); #endif } @@ -630,10 +637,10 @@ int ptlrpc_pinger_add_import(struct obd_import *imp) imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); ptlrpc_pinger_sending_on_import(imp); - down(&pinger_sem); + mutex_down(&pinger_sem); list_add_tail(&imp->imp_pinger_chain, &pinger_imports); class_import_get(imp); - up(&pinger_sem); + mutex_up(&pinger_sem); RETURN(0); } @@ -644,12 +651,12 @@ int ptlrpc_pinger_del_import(struct obd_import *imp) if (list_empty(&imp->imp_pinger_chain)) RETURN(-ENOENT); - down(&pinger_sem); + mutex_down(&pinger_sem); list_del_init(&imp->imp_pinger_chain); CDEBUG(D_HA, "removing pingable import %s->%s\n", imp->imp_obd->obd_uuid.uuid, obd2cli_tgt(imp->imp_obd)); class_import_put(imp); - up(&pinger_sem); + mutex_up(&pinger_sem); RETURN(0); } diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 9ff8e2b..dc780d7 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -112,7 +112,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req); /* pers.c */ void ptlrpc_fill_bulk_md(lnet_md_t *md, struct ptlrpc_bulk_desc *desc); -void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, struct page *page, +void ptlrpc_add_bulk_page(struct ptlrpc_bulk_desc *desc, cfs_page_t *page, int pageoffset, int len); void ptl_rpc_wipe_bulk_pages(struct ptlrpc_bulk_desc *desc); diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index 408ec6c..2b40c0b 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -28,20 +28,22 @@ #endif #define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -# include -# include -#else +#ifndef __KERNEL__ # include #endif -#include -#include -#include -#include +#include +#include +#include +#include #include "ptlrpc_internal.h" +extern spinlock_t ptlrpc_last_xid_lock; +extern spinlock_t ptlrpc_rs_debug_lock; +extern spinlock_t ptlrpc_all_services_lock; +extern struct semaphore pinger_sem; +extern struct semaphore ptlrpcd_sem; extern int ptlrpc_init_portals(void); extern void ptlrpc_exit_portals(void); @@ -51,6 +53,11 @@ __init int ptlrpc_init(void) ENTRY; lustre_assert_wire_constants(); + spin_lock_init(&ptlrpc_last_xid_lock); + spin_lock_init(&ptlrpc_rs_debug_lock); + spin_lock_init(&ptlrpc_all_services_lock); + init_mutex(&pinger_sem); + init_mutex(&ptlrpcd_sem); rc = req_layout_init(); if (rc) @@ -266,6 +273,5 @@ MODULE_AUTHOR("Cluster File Systems, Inc. "); MODULE_DESCRIPTION("Lustre Request Processor and Lock Management"); MODULE_LICENSE("GPL"); -module_init(ptlrpc_init); -module_exit(ptlrpc_exit); +cfs_module(ptlrpc, "1.0.0", ptlrpc_init, ptlrpc_exit); #endif diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 2f28528..fa315eb 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -27,30 +27,19 @@ #define DEBUG_SUBSYSTEM S_RPC #ifdef __KERNEL__ -# include -# include -# include -# include -# include -# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include -# include -# else -# include -# endif -# include -# include +# include #else /* __KERNEL__ */ # include # include #endif #include -#include +#include +# include -#include -#include /* for OBD_FAIL_CHECK */ -#include +#include +#include /* for OBD_FAIL_CHECK */ +#include #define LIOD_STOP 0 struct ptlrpcd_ctl { @@ -59,7 +48,7 @@ struct ptlrpcd_ctl { struct completion pc_starting; struct completion pc_finishing; struct list_head pc_req_list; - wait_queue_head_t pc_waitq; + cfs_waitq_t pc_waitq; struct ptlrpc_request_set *pc_set; char pc_name[16]; #ifndef __KERNEL__ @@ -71,7 +60,7 @@ struct ptlrpcd_ctl { static struct ptlrpcd_ctl ptlrpcd_pc; static struct ptlrpcd_ctl ptlrpcd_recovery_pc; -static DECLARE_MUTEX(ptlrpcd_sem); +struct semaphore ptlrpcd_sem; static int ptlrpcd_users = 0; void ptlrpcd_wake(struct ptlrpc_request *req) @@ -80,7 +69,7 @@ void ptlrpcd_wake(struct ptlrpc_request *req) LASSERT(pc != NULL); - wake_up(&pc->pc_waitq); + cfs_waitq_signal(&pc->pc_waitq); } /* requests that are added to the ptlrpcd queue are sent via @@ -153,15 +142,9 @@ static int ptlrpcd_check(struct ptlrpcd_ctl *pc) static int ptlrpcd(void *arg) { struct ptlrpcd_ctl *pc = arg; - unsigned long flags; ENTRY; - libcfs_daemonize(pc->pc_name); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); + cfs_daemonize(pc->pc_name); complete(&pc->pc_starting); @@ -171,18 +154,19 @@ static int ptlrpcd(void *arg) * on the set's new_req_list and ptlrpcd_check moves them into * the set. */ while (1) { - wait_queue_t set_wait; + cfs_waitlink_t set_wait; struct l_wait_info lwi; - int timeout; + cfs_duration_t timeout; - timeout = ptlrpc_set_next_timeout(pc->pc_set) * HZ; + timeout = cfs_time_seconds(ptlrpc_set_next_timeout(pc->pc_set)); lwi = LWI_TIMEOUT(timeout, ptlrpc_expired_set, pc->pc_set); /* ala the pinger, wait on pc's waitqueue and the set's */ - init_waitqueue_entry(&set_wait, current); - add_wait_queue(&pc->pc_set->set_waitq, &set_wait); + cfs_waitlink_init(&set_wait); + cfs_waitq_add(&pc->pc_set->set_waitq, &set_wait); + cfs_waitq_forward(&set_wait, &pc->pc_waitq); l_wait_event(pc->pc_waitq, ptlrpcd_check(pc), &lwi); - remove_wait_queue(&pc->pc_set->set_waitq, &set_wait); + cfs_waitq_del(&pc->pc_set->set_waitq, &set_wait); if (test_bit(LIOD_STOP, &pc->pc_flags)) break; @@ -218,13 +202,14 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) { int rc; + ENTRY; memset(pc, 0, sizeof(*pc)); init_completion(&pc->pc_starting); init_completion(&pc->pc_finishing); - init_waitqueue_head(&pc->pc_waitq); + cfs_waitq_init(&pc->pc_waitq); pc->pc_flags = 0; spin_lock_init(&pc->pc_lock); - INIT_LIST_HEAD(&pc->pc_req_list); + CFS_INIT_LIST_HEAD(&pc->pc_req_list); snprintf (pc->pc_name, sizeof (pc->pc_name), name); pc->pc_set = ptlrpc_prep_set(); @@ -232,7 +217,7 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) RETURN(-ENOMEM); #ifdef __KERNEL__ - rc = kernel_thread(ptlrpcd, pc, 0); + rc = cfs_kernel_thread(ptlrpcd, pc, 0); if (rc < 0) { ptlrpc_set_destroy(pc->pc_set); RETURN(rc); @@ -250,7 +235,7 @@ static int ptlrpcd_start(char *name, struct ptlrpcd_ctl *pc) static void ptlrpcd_stop(struct ptlrpcd_ctl *pc) { set_bit(LIOD_STOP, &pc->pc_flags); - wake_up(&pc->pc_waitq); + cfs_waitq_signal(&pc->pc_waitq); #ifdef __KERNEL__ wait_for_completion(&pc->pc_finishing); #else @@ -264,7 +249,7 @@ int ptlrpcd_addref(void) int rc = 0; ENTRY; - down(&ptlrpcd_sem); + mutex_down(&ptlrpcd_sem); if (++ptlrpcd_users != 1) GOTO(out, rc); @@ -281,16 +266,16 @@ int ptlrpcd_addref(void) GOTO(out, rc); } out: - up(&ptlrpcd_sem); + mutex_up(&ptlrpcd_sem); RETURN(rc); } void ptlrpcd_decref(void) { - down(&ptlrpcd_sem); + mutex_down(&ptlrpcd_sem); if (--ptlrpcd_users == 0) { ptlrpcd_stop(&ptlrpcd_pc); ptlrpcd_stop(&ptlrpcd_recovery_pc); } - up(&ptlrpcd_sem); + mutex_up(&ptlrpcd_sem); } diff --git a/lustre/ptlrpc/recov_thread.c b/lustre/ptlrpc/recov_thread.c index c9684fe..8ad20c6 100644 --- a/lustre/ptlrpc/recov_thread.c +++ b/lustre/ptlrpc/recov_thread.c @@ -36,21 +36,21 @@ #endif #ifdef __KERNEL__ -#include +# include #else # include # include #endif #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include -#include +#include #include "ptlrpc_internal.h" #ifdef __KERNEL__ @@ -132,7 +132,7 @@ void llcd_send(struct llog_canceld_ctxt *llcd) list_add_tail(&llcd->llcd_list, &llcd->llcd_lcm->lcm_llcd_pending); spin_unlock(&llcd->llcd_lcm->lcm_llcd_lock); - wake_up_nr(&llcd->llcd_lcm->lcm_waitq, 1); + cfs_waitq_signal_nr(&llcd->llcd_lcm->lcm_waitq, 1); } EXPORT_SYMBOL(llcd_send); @@ -150,7 +150,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, LASSERT(ctxt); - down(&ctxt->loc_sem); + mutex_down(&ctxt->loc_sem); if (ctxt->loc_imp == NULL) { CWARN("no import for ctxt %p\n", ctxt); GOTO(out, rc = 0); @@ -188,7 +188,7 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt, llcd_send(llcd); } out: - up(&ctxt->loc_sem); + mutex_up(&ctxt->loc_sem); return rc; } EXPORT_SYMBOL(llog_obd_repl_cancel); @@ -201,13 +201,13 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp) if (exp && (ctxt->loc_imp == exp->exp_imp_reverse)) { CDEBUG(D_HA, "reverse import disconnected, put llcd %p:%p\n", ctxt->loc_llcd, ctxt); - down(&ctxt->loc_sem); + mutex_down(&ctxt->loc_sem); if (ctxt->loc_llcd != NULL) { llcd_put(ctxt->loc_llcd); ctxt->loc_llcd = NULL; } ctxt->loc_imp = NULL; - up(&ctxt->loc_sem); + mutex_up(&ctxt->loc_sem); } else { rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW); } @@ -221,7 +221,6 @@ static int log_commit_thread(void *arg) struct llog_commit_master *lcm = arg; struct llog_commit_daemon *lcd; struct llog_canceld_ctxt *llcd, *n; - char name[24]; ENTRY; OBD_ALLOC(lcd, sizeof(*lcd)); @@ -229,18 +228,18 @@ static int log_commit_thread(void *arg) RETURN(-ENOMEM); spin_lock(&lcm->lcm_thread_lock); - THREAD_NAME(name, sizeof(name) - 1, + THREAD_NAME(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX - 1, "ll_log_comt_%02d", atomic_read(&lcm->lcm_thread_total)); atomic_inc(&lcm->lcm_thread_total); spin_unlock(&lcm->lcm_thread_lock); - ptlrpc_daemonize(name); /* thread never needs to do IO */ + ptlrpc_daemonize(cfs_curproc_comm()); /* thread never needs to do IO */ - INIT_LIST_HEAD(&lcd->lcd_lcm_list); - INIT_LIST_HEAD(&lcd->lcd_llcd_list); + CFS_INIT_LIST_HEAD(&lcd->lcd_lcm_list); + CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list); lcd->lcd_lcm = lcm; - CDEBUG(D_HA, "%s started\n", current->comm); + CDEBUG(D_HA, "%s started\n", cfs_curproc_comm()); do { struct ptlrpc_request *request; struct obd_import *import = NULL; @@ -331,15 +330,15 @@ static int log_commit_thread(void *arg) continue; } - down(&llcd->llcd_ctxt->loc_sem); + mutex_down(&llcd->llcd_ctxt->loc_sem); if (llcd->llcd_ctxt->loc_imp == NULL) { - up(&llcd->llcd_ctxt->loc_sem); + mutex_up(&llcd->llcd_ctxt->loc_sem); CWARN("import will be destroyed, put " "llcd %p:%p\n", llcd, llcd->llcd_ctxt); llcd_put(llcd); continue; } - up(&llcd->llcd_ctxt->loc_sem); + mutex_up(&llcd->llcd_ctxt->loc_sem); if (!import || (import == LP_POISON) || (import->imp_client == LP_POISON)) { @@ -360,7 +359,7 @@ static int log_commit_thread(void *arg) spin_lock(&lcm->lcm_llcd_lock); list_splice(&lcd->lcd_llcd_list, &lcm->lcm_llcd_resend); - INIT_LIST_HEAD(&lcd->lcd_llcd_list); + CFS_INIT_LIST_HEAD(&lcd->lcd_llcd_list); spin_unlock(&lcm->lcm_llcd_lock); break; } @@ -370,16 +369,16 @@ static int log_commit_thread(void *arg) request->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL; request->rq_replen = lustre_msg_size(0, NULL); - down(&llcd->llcd_ctxt->loc_sem); + mutex_down(&llcd->llcd_ctxt->loc_sem); if (llcd->llcd_ctxt->loc_imp == NULL) { - up(&llcd->llcd_ctxt->loc_sem); + mutex_up(&llcd->llcd_ctxt->loc_sem); CWARN("import will be destroyed, put " "llcd %p:%p\n", llcd, llcd->llcd_ctxt); llcd_put(llcd); ptlrpc_req_finished(request); continue; } - up(&llcd->llcd_ctxt->loc_sem); + mutex_up(&llcd->llcd_ctxt->loc_sem); rc = ptlrpc_queue_wait(request); ptlrpc_req_finished(request); @@ -421,12 +420,12 @@ static int log_commit_thread(void *arg) spin_unlock(&lcm->lcm_thread_lock); OBD_FREE(lcd, sizeof(*lcd)); - CDEBUG(D_HA, "%s exiting\n", current->comm); + CDEBUG(D_HA, "%s exiting\n", cfs_curproc_comm()); spin_lock(&lcm->lcm_thread_lock); atomic_dec(&lcm->lcm_thread_total); spin_unlock(&lcm->lcm_thread_lock); - wake_up(&lcm->lcm_waitq); + cfs_waitq_signal(&lcm->lcm_waitq); return 0; } @@ -439,7 +438,7 @@ int llog_start_commit_thread(void) if (atomic_read(&lcm->lcm_thread_total) >= lcm->lcm_thread_max) RETURN(0); - rc = kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES); + rc = cfs_kernel_thread(log_commit_thread, lcm, CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("error starting thread #%d: %d\n", atomic_read(&lcm->lcm_thread_total), rc); @@ -459,14 +458,14 @@ static struct llog_process_args { int llog_init_commit_master(void) { - INIT_LIST_HEAD(&lcm->lcm_thread_busy); - INIT_LIST_HEAD(&lcm->lcm_thread_idle); + CFS_INIT_LIST_HEAD(&lcm->lcm_thread_busy); + CFS_INIT_LIST_HEAD(&lcm->lcm_thread_idle); spin_lock_init(&lcm->lcm_thread_lock); atomic_set(&lcm->lcm_thread_numidle, 0); - init_waitqueue_head(&lcm->lcm_waitq); - INIT_LIST_HEAD(&lcm->lcm_llcd_pending); - INIT_LIST_HEAD(&lcm->lcm_llcd_resend); - INIT_LIST_HEAD(&lcm->lcm_llcd_free); + cfs_waitq_init(&lcm->lcm_waitq); + CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_pending); + CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_resend); + CFS_INIT_LIST_HEAD(&lcm->lcm_llcd_free); spin_lock_init(&lcm->lcm_llcd_lock); atomic_set(&lcm->lcm_llcd_numfree, 0); lcm->lcm_llcd_minfree = 0; @@ -481,7 +480,7 @@ int llog_cleanup_commit_master(int force) lcm->lcm_flags |= LLOG_LCM_FL_EXIT; if (force) lcm->lcm_flags |= LLOG_LCM_FL_EXIT_FORCE; - wake_up(&lcm->lcm_waitq); + cfs_waitq_signal(&lcm->lcm_waitq); wait_event_interruptible(lcm->lcm_waitq, atomic_read(&lcm->lcm_thread_total) == 0); @@ -498,7 +497,7 @@ static int log_process_thread(void *args) int rc; ENTRY; - up(&data->llpa_sem); + mutex_up(&data->llpa_sem); ptlrpc_daemonize("llog_process"); /* thread does IO to log files */ rc = llog_create(ctxt, &llh, &logid, NULL); @@ -536,12 +535,12 @@ static int llog_recovery_generic(struct llog_ctxt *ctxt, void *handle,void *arg) int rc; ENTRY; - down(&llpa.llpa_sem); + mutex_down(&llpa.llpa_sem); llpa.llpa_ctxt = ctxt; llpa.llpa_cb = handle; llpa.llpa_arg = arg; - rc = kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES); + rc = cfs_kernel_thread(log_process_thread, &llpa, CLONE_VM | CLONE_FILES); if (rc < 0) CERROR("error starting log_process_thread: %d\n", rc); else { @@ -566,17 +565,17 @@ int llog_repl_connect(struct llog_ctxt *ctxt, int count, llog_sync(ctxt, NULL); } - down(&ctxt->loc_sem); + mutex_down(&ctxt->loc_sem); ctxt->loc_gen = *gen; llcd = llcd_grab(); if (llcd == NULL) { CERROR("couldn't get an llcd\n"); - up(&ctxt->loc_sem); + mutex_up(&ctxt->loc_sem); RETURN(-ENOMEM); } llcd->llcd_ctxt = ctxt; ctxt->loc_llcd = llcd; - up(&ctxt->loc_sem); + mutex_up(&ctxt->loc_sem); rc = llog_recovery_generic(ctxt, ctxt->llog_proc_cb, logid); if (rc != 0) diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 30f6aa7..cbbed63 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -27,23 +27,20 @@ #define DEBUG_SUBSYSTEM S_RPC #ifdef __KERNEL__ -# include -# include -# include -# include +# include #else # include #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for IOC_LOV_SET_OSC_ACTIVE */ +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for IOC_LOV_SET_OSC_ACTIVE */ #include #include "ptlrpc_internal.h" @@ -164,6 +161,7 @@ int ptlrpc_replay_next(struct obd_import *imp, int *inflight) * get rid of them now. */ spin_lock_irqsave(&imp->imp_lock, flags); + imp->imp_last_transno_checked = 0; ptlrpc_free_committed(imp); last_transno = imp->imp_last_replay_transno; spin_unlock_irqrestore(&imp->imp_lock, flags); @@ -314,6 +312,7 @@ int ptlrpc_set_import_active(struct obd_import *imp, int active) struct obd_device *obd = imp->imp_obd; int rc = 0; + ENTRY; LASSERT(obd); /* When deactivating, mark import invalid, and abort in-flight @@ -390,7 +389,8 @@ static int ptlrpc_recover_import_no_retry(struct obd_import *imp, CDEBUG(D_HA, "%s: recovery started, waiting\n", obd2cli_tgt(imp->imp_obd)); - lwi = LWI_TIMEOUT(MAX(obd_timeout * HZ, 1), NULL, NULL); + lwi = LWI_TIMEOUT(cfs_timeout_cap(cfs_time_seconds(obd_timeout)), + NULL, NULL); rc = l_wait_event(imp->imp_recovery_waitq, !ptlrpc_import_in_recovery(imp), &lwi); CDEBUG(D_HA, "%s: recovery finished\n", diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 9b3fb47..d0fcb3a 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -28,19 +28,18 @@ #include #include #endif -#include -#include -#include +#include +#include +#include +#include #include #include "ptlrpc_internal.h" -#include - /* forward ref */ static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc); -static LIST_HEAD (ptlrpc_all_services); -static spinlock_t ptlrpc_all_services_lock = SPIN_LOCK_UNLOCKED; +static CFS_LIST_HEAD (ptlrpc_all_services); +spinlock_t ptlrpc_all_services_lock; static char * ptlrpc_alloc_request_buffer (int size) @@ -78,7 +77,7 @@ ptlrpc_alloc_rqbd (struct ptlrpc_service *svc) rqbd->rqbd_refcount = 0; rqbd->rqbd_cbid.cbid_fn = request_in_callback; rqbd->rqbd_cbid.cbid_arg = rqbd; - INIT_LIST_HEAD(&rqbd->rqbd_reqs); + CFS_INIT_LIST_HEAD(&rqbd->rqbd_reqs); rqbd->rqbd_buffer = ptlrpc_alloc_request_buffer(svc->srv_buf_size); if (rqbd->rqbd_buffer == NULL) { @@ -170,7 +169,7 @@ ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs) rs->rs_scheduled = 1; list_del (&rs->rs_list); list_add (&rs->rs_list, &svc->srv_reply_queue); - wake_up (&svc->srv_waitq); + cfs_waitq_signal (&svc->srv_waitq); } void @@ -205,13 +204,6 @@ ptlrpc_commit_replies (struct obd_device *obd) spin_unlock_irqrestore (&obd->obd_uncommitted_replies_lock, flags); } -static long -timeval_sub(struct timeval *large, struct timeval *small) -{ - return (large->tv_sec - small->tv_sec) * 1000000 + - (large->tv_usec - small->tv_usec); -} - static int ptlrpc_server_post_idle_rqbds (struct ptlrpc_service *svc) { @@ -283,7 +275,7 @@ struct ptlrpc_service * ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, int req_portal, int rep_portal, int watchdog_timeout, svc_handler_t handler, char *name, - struct proc_dir_entry *proc_entry, + cfs_proc_dir_entry_t *proc_entry, svcreq_printfn_t svcreq_printfn, int num_threads) { int rc; @@ -301,8 +293,8 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, service->srv_name = name; spin_lock_init(&service->srv_lock); - INIT_LIST_HEAD(&service->srv_threads); - init_waitqueue_head(&service->srv_waitq); + CFS_INIT_LIST_HEAD(&service->srv_threads); + cfs_waitq_init(&service->srv_waitq); service->srv_nbuf_per_group = nbufs; service->srv_max_req_size = max_req_size; @@ -316,15 +308,15 @@ ptlrpc_init_svc(int nbufs, int bufsize, int max_req_size, int max_reply_size, service->srv_request_max_cull_seq = 0; service->srv_num_threads = num_threads; - INIT_LIST_HEAD(&service->srv_request_queue); - INIT_LIST_HEAD(&service->srv_idle_rqbds); - INIT_LIST_HEAD(&service->srv_active_rqbds); - INIT_LIST_HEAD(&service->srv_history_rqbds); - INIT_LIST_HEAD(&service->srv_request_history); - INIT_LIST_HEAD(&service->srv_active_replies); - INIT_LIST_HEAD(&service->srv_reply_queue); - INIT_LIST_HEAD(&service->srv_free_rs_list); - init_waitqueue_head(&service->srv_free_rs_waitq); + CFS_INIT_LIST_HEAD(&service->srv_request_queue); + CFS_INIT_LIST_HEAD(&service->srv_idle_rqbds); + CFS_INIT_LIST_HEAD(&service->srv_active_rqbds); + CFS_INIT_LIST_HEAD(&service->srv_history_rqbds); + CFS_INIT_LIST_HEAD(&service->srv_request_history); + CFS_INIT_LIST_HEAD(&service->srv_active_replies); + CFS_INIT_LIST_HEAD(&service->srv_reply_queue); + CFS_INIT_LIST_HEAD(&service->srv_free_rs_list); + cfs_waitq_init(&service->srv_free_rs_waitq); spin_lock (&ptlrpc_all_services_lock); list_add (&service->srv_list, &ptlrpc_all_services); @@ -559,7 +551,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, spin_unlock_irqrestore (&svc->srv_lock, flags); do_gettimeofday(&work_start); - timediff = timeval_sub(&work_start, &request->rq_arrival_time); + timediff = cfs_timeval_sub(&work_start, &request->rq_arrival_time,NULL); if (svc->srv_stats != NULL) { lprocfs_counter_add(svc->srv_stats, PTLRPC_REQWAIT_CNTR, timediff); @@ -631,7 +623,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, request->rq_phase = RQ_PHASE_INTERPRET; CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc " - "%s:%s+%d:%d:"LPU64":%s:%d\n", current->comm, + "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? @@ -645,7 +637,7 @@ ptlrpc_server_handle_request(struct ptlrpc_service *svc, request->rq_phase = RQ_PHASE_COMPLETE; CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc " - "%s:%s+%d:%d:"LPU64":%s:%d\n", current->comm, + "%s:%s+%d:%d:"LPU64":%s:%d\n", cfs_curproc_comm(), (request->rq_export ? (char *)request->rq_export->exp_client_uuid.uuid : "0"), (request->rq_export ? @@ -661,15 +653,15 @@ put_conn: out: do_gettimeofday(&work_end); - timediff = timeval_sub(&work_end, &work_start); + timediff = cfs_timeval_sub(&work_end, &work_start, NULL); if (timediff / 1000000 > (long)obd_timeout) CERROR("request "LPU64" opc %u from %s processed in %lds " "trans "LPU64" rc %d/%d\n", request->rq_xid, request->rq_reqmsg->opc, libcfs_id2str(request->rq_peer), - timeval_sub(&work_end, - &request->rq_arrival_time) / 1000000, + cfs_timeval_sub(&work_end, &request->rq_arrival_time, + NULL) / 1000000, request->rq_repmsg ? request->rq_repmsg->transno : request->rq_transno, request->rq_status, request->rq_repmsg ? request->rq_repmsg->status : -999); @@ -678,7 +670,8 @@ put_conn: "%ldus (%ldus total) trans "LPU64" rc %d/%d\n", request->rq_xid, request->rq_reqmsg->opc, libcfs_id2str(request->rq_peer), timediff, - timeval_sub(&work_end, &request->rq_arrival_time), + cfs_timeval_sub(&work_end, &request->rq_arrival_time, + NULL), request->rq_transno, request->rq_status, request->rq_repmsg ? request->rq_repmsg->status : -999); @@ -841,9 +834,10 @@ void ptlrpc_daemonize(char *name) struct fs_struct *fs = current->fs; atomic_inc(&fs->count); - libcfs_daemonize(name); - exit_fs(current); + cfs_daemonize(name); + exit_fs(cfs_current()); current->fs = fs; + set_fs_pwd(current->fs, init_task.fs->pwdmnt, init_task.fs->pwd); } static void @@ -884,7 +878,7 @@ static int ptlrpc_main(void *arg) struct ptlrpc_reply_state *rs; struct lc_watchdog *watchdog; unsigned long flags; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) +#ifdef WITH_GROUP_INFO struct group_info *ginfo = NULL; #endif struct lu_context ctx; @@ -906,11 +900,11 @@ static int ptlrpc_main(void *arg) break; num_cpu++; } - set_cpus_allowed(current, node_to_cpumask(cpu_to_node(cpu))); + set_cpus_allowed(cfs_current(), node_to_cpumask(cpu_to_node(cpu))); } #endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) +#ifdef WITH_GROUP_INFO ginfo = groups_alloc(0); if (!ginfo) { rc = -ENOMEM; @@ -935,7 +929,7 @@ static int ptlrpc_main(void *arg) ctx.lc_thread = thread; /* Alloc reply state structure for this one */ - OBD_ALLOC_GFP(rs, svc->srv_max_reply_size, GFP_KERNEL); + OBD_ALLOC_GFP(rs, svc->srv_max_reply_size, CFS_ALLOC_STD); if (!rs) { rc = -ENOMEM; goto out_srv_init; @@ -947,7 +941,7 @@ static int ptlrpc_main(void *arg) * wake up our creator. Note: @data is invalid after this point, * because it's allocated on ptlrpc_start_thread() stack. */ - wake_up(&thread->t_ctl_waitq); + cfs_waitq_signal(&thread->t_ctl_waitq); watchdog = lc_watchdog_add(svc->srv_watchdog_timeout, LC_WATCHDOG_DEFAULT_CB, NULL); @@ -956,7 +950,7 @@ static int ptlrpc_main(void *arg) svc->srv_nthreads++; list_add(&rs->rs_list, &svc->srv_free_rs_list); spin_unlock_irqrestore(&svc->srv_lock, flags); - wake_up(&svc->srv_free_rs_waitq); + cfs_waitq_signal(&svc->srv_free_rs_waitq); CDEBUG(D_NET, "service thread %d started\n", thread->t_id); @@ -1002,7 +996,7 @@ static int ptlrpc_main(void *arg) /* I just failed to repost request buffers. Wait * for a timeout (unless something else happens) * before I try again */ - svc->srv_rqbd_timeout = HZ/10; + svc->srv_rqbd_timeout = cfs_time_seconds(1)/10; } } @@ -1025,7 +1019,7 @@ out: thread->t_id = rc; thread->t_flags = SVC_STOPPED; - wake_up(&thread->t_ctl_waitq); + cfs_waitq_signal(&thread->t_ctl_waitq); spin_unlock_irqrestore(&svc->srv_lock, flags); return rc; @@ -1041,7 +1035,7 @@ static void ptlrpc_stop_thread(struct ptlrpc_service *svc, thread->t_flags = SVC_STOPPING; spin_unlock_irqrestore(&svc->srv_lock, flags); - wake_up_all(&svc->srv_waitq); + cfs_waitq_broadcast(&svc->srv_waitq); l_wait_event(thread->t_ctl_waitq, (thread->t_flags & SVC_STOPPED), &lwi); @@ -1104,7 +1098,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, OBD_ALLOC(thread, sizeof(*thread)); if (thread == NULL) RETURN(-ENOMEM); - init_waitqueue_head(&thread->t_ctl_waitq); + cfs_waitq_init(&thread->t_ctl_waitq); thread->t_id = id; spin_lock_irqsave(&svc->srv_lock, flags); @@ -1119,7 +1113,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we * just drop the VM and FILES in ptlrpc_daemonize() right away. */ - rc = kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES); + rc = cfs_kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES); if (rc < 0) { CERROR("cannot start thread '%s': rc %d\n", name, rc); @@ -1184,7 +1178,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) /* Network access will complete in finite time but the HUGE * timeout lets us CWARN for visibility of sluggish NALs */ - lwi = LWI_TIMEOUT(300 * HZ, NULL, NULL); + lwi = LWI_TIMEOUT(cfs_time_seconds(300), NULL, NULL); rc = l_wait_event(service->srv_waitq, service->srv_nrqbd_receiving == 0, &lwi); @@ -1237,7 +1231,7 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service) /* wait for all outstanding replies to complete (they were * scheduled having been flagged to abort above) */ while (atomic_read(&service->srv_outstanding_replies) != 0) { - struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL); + struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(10), NULL, NULL); rc = l_wait_event(service->srv_waitq, !list_empty(&service->srv_reply_queue), &lwi); @@ -1284,7 +1278,7 @@ int ptlrpc_service_health_check(struct ptlrpc_service *svc) struct ptlrpc_request, rq_list); do_gettimeofday(&right_now); - timediff = timeval_sub(&right_now, &request->rq_arrival_time); + timediff = cfs_timeval_sub(&right_now, &request->rq_arrival_time, NULL); cutoff = obd_health_check_timeout; diff --git a/lustre/quota/quota_check.c b/lustre/quota/quota_check.c index 9fd7910..6086088 100644 --- a/lustre/quota/quota_check.c +++ b/lustre/quota/quota_check.c @@ -34,13 +34,13 @@ # include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "quota_internal.h" #ifdef __KERNEL__ @@ -201,10 +201,11 @@ int client_quota_poll_check(struct obd_export *exp, struct if_quotacheck *qchk) rc = -EINTR; qchk->obd_uuid = cli->cl_target_uuid; + /* FIXME change strncmp to strcmp and save the strlen op */ if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_OSC_NAME, strlen(LUSTRE_OSC_NAME))) - memcpy(qchk->obd_type, LUSTRE_FILTER_NAME, - strlen(LUSTRE_FILTER_NAME)); + memcpy(qchk->obd_type, LUSTRE_OST_NAME, + strlen(LUSTRE_OST_NAME)); else if (strncmp(exp->exp_obd->obd_type->typ_name, LUSTRE_MDC_NAME, strlen(LUSTRE_MDC_NAME))) memcpy(qchk->obd_type, LUSTRE_MDS_NAME, diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index 3aab55c..00b97db 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -26,9 +26,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include "quota_internal.h" unsigned long default_bunit_sz = 100 * 1024 * 1024; /* 100M bytes */ @@ -62,7 +62,7 @@ void qunit_cache_cleanup(void) if (qunit_cachep) { int rc; rc = kmem_cache_destroy(qunit_cachep); - LASSERT(rc == 0); + LASSERTF(rc == 0, "couldn't destory qunit_cache slab\n"); qunit_cachep = NULL; } EXIT; @@ -269,7 +269,7 @@ static void remove_qunit_nolock(struct lustre_qunit *qunit) struct qunit_waiter { struct list_head qw_entry; - wait_queue_head_t qw_waitq; + cfs_waitq_t qw_waitq; int qw_rc; }; diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index bd78c00..a8c4317 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -35,13 +35,13 @@ # include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "quota_internal.h" #ifdef __KERNEL__ @@ -91,6 +91,7 @@ int mds_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) { struct obd_device *obd = exp->exp_obd; + struct obd_device_target *obt = &obd->u.obt; struct lvfs_run_ctxt saved; int rc = 0; ENTRY; @@ -98,6 +99,12 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) switch (oqctl->qc_cmd) { case Q_QUOTAON: case Q_QUOTAOFF: + if (!atomic_dec_and_test(&obt->obt_quotachecking)) { + CDEBUG(D_INFO, "other people are doing quotacheck\n"); + atomic_inc(&obt->obt_quotachecking); + rc = -EBUSY; + break; + } case Q_GETOINFO: case Q_GETOQUOTA: case Q_GETQUOTA: @@ -113,6 +120,9 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl) push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_quotactl(obd, obd->u.obt.obt_sb, oqctl); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + + if (oqctl->qc_cmd == Q_QUOTAON || oqctl->qc_cmd == Q_QUOTAOFF) + atomic_inc(&obt->obt_quotachecking); break; case Q_INITQUOTA: { diff --git a/lustre/quota/quota_interface.c b/lustre/quota/quota_interface.c index 5edd982..35a7f4a 100644 --- a/lustre/quota/quota_interface.c +++ b/lustre/quota/quota_interface.c @@ -35,13 +35,13 @@ # include #endif -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include "quota_internal.h" @@ -410,7 +410,7 @@ spinlock_t qinfo_list_lock = SPIN_LOCK_UNLOCKED; static struct list_head qinfo_hash[NR_DQHASH]; /* SLAB cache for client quota context */ -kmem_cache_t *qinfo_cachep = NULL; +cfs_mem_cache_t *qinfo_cachep = NULL; static inline int const hashfn(struct client_obd *cli, unsigned long id, @@ -460,7 +460,7 @@ static struct osc_quota_info *alloc_qinfo(struct client_obd *cli, struct osc_quota_info *oqi; ENTRY; - OBD_SLAB_ALLOC(oqi, qinfo_cachep, SLAB_KERNEL, sizeof(*oqi)); + OBD_SLAB_ALLOC(oqi, qinfo_cachep, CFS_ALLOC_STD, sizeof(*oqi)); if(!oqi) RETURN(NULL); @@ -574,9 +574,9 @@ int osc_quota_init(void) ENTRY; LASSERT(qinfo_cachep == NULL); - qinfo_cachep = kmem_cache_create("osc_quota_info", + qinfo_cachep = cfs_mem_cache_create("osc_quota_info", sizeof(struct osc_quota_info), - 0, 0, NULL, NULL); + 0, 0); if (!qinfo_cachep) RETURN(-ENOMEM); @@ -601,8 +601,10 @@ int osc_quota_exit(void) } spin_unlock(&qinfo_list_lock); - rc = kmem_cache_destroy(qinfo_cachep); - LASSERT(rc == 0); + rc = cfs_mem_cache_destroy(qinfo_cachep); + LASSERTF(rc == 0, "couldn't destory qinfo_cachep slab\n"); + qinfo_cachep = NULL; + RETURN(0); } diff --git a/lustre/quota/quota_internal.h b/lustre/quota/quota_internal.h index 0389734..6e8438e 100644 --- a/lustre/quota/quota_internal.h +++ b/lustre/quota/quota_internal.h @@ -14,7 +14,7 @@ #ifndef __QUOTA_INTERNAL_H #define __QUOTA_INTERNAL_H -#include +#include /* QUSG covnert bytes to blocks when counting block quota */ #define QUSG(count, isblk) (isblk ? toqb(count) : count) diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index 9eb3cf3..7332669 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -27,10 +27,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include "quota_internal.h" @@ -71,7 +71,7 @@ void lustre_dquot_exit(void) if (lustre_dquot_cachep) { int rc; rc = kmem_cache_destroy(lustre_dquot_cachep); - LASSERT(rc == 0); + LASSERTF(rc == 0,"couldn't destroy lustre_dquot_cachep slab\n"); lustre_dquot_cachep = NULL; } EXIT; @@ -528,10 +528,17 @@ static int mds_admin_quota_off(struct obd_device *obd, int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; + struct obd_device_target *obt = &obd->u.obt; struct lvfs_run_ctxt saved; int rc; ENTRY; + if (!atomic_dec_and_test(&obt->obt_quotachecking)) { + CDEBUG(D_INFO, "other people are doing quotacheck\n"); + atomic_inc(&obt->obt_quotachecking); + RETURN(-EBUSY); + } + down(&mds->mds_qonoff_sem); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = mds_admin_quota_on(obd, oqctl); @@ -546,16 +553,24 @@ int mds_quota_on(struct obd_device *obd, struct obd_quotactl *oqctl) out: pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); up(&mds->mds_qonoff_sem); + atomic_inc(&obt->obt_quotachecking); RETURN(rc); } int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl) { struct mds_obd *mds = &obd->u.mds; + struct obd_device_target *obt = &obd->u.obt; struct lvfs_run_ctxt saved; int rc, rc2; ENTRY; + if (!atomic_dec_and_test(&obt->obt_quotachecking)) { + CDEBUG(D_INFO, "other people are doing quotacheck\n"); + atomic_inc(&obt->obt_quotachecking); + RETURN(-EBUSY); + } + down(&mds->mds_qonoff_sem); /* close admin quota files */ push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); @@ -566,6 +581,8 @@ int mds_quota_off(struct obd_device *obd, struct obd_quotactl *oqctl) pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); up(&mds->mds_qonoff_sem); + atomic_inc(&obt->obt_quotachecking); + RETURN(rc ?: rc2); } @@ -1068,13 +1085,13 @@ int mds_quota_recovery(struct obd_device *obd) int rc = 0; ENTRY; - down(&lov->lov_lock); + mutex_down(&lov->lov_lock); if (lov->desc.ld_tgt_count != lov->desc.ld_active_tgt_count) { CWARN("Not all osts are active, abort quota recovery\n"); - up(&lov->lov_lock); + mutex_up(&lov->lov_lock); RETURN(rc); } - up(&lov->lov_lock); + mutex_up(&lov->lov_lock); data.obd = obd; init_completion(&data.comp); diff --git a/lustre/quota/quotacheck_test.c b/lustre/quota/quotacheck_test.c index 6d49bcd..c2c2040 100644 --- a/lustre/quota/quotacheck_test.c +++ b/lustre/quota/quotacheck_test.c @@ -28,10 +28,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include char *test_quotafile[] = {"aquotacheck.user", "aquotacheck.group"}; diff --git a/lustre/quota/quotactl_test.c b/lustre/quota/quotactl_test.c index 11c86b4..cffb646 100644 --- a/lustre/quota/quotactl_test.c +++ b/lustre/quota/quotactl_test.c @@ -19,10 +19,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include static struct obd_quotactl oqctl; diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre index 89edc5b..3b6b640a 100755 --- a/lustre/scripts/lustre +++ b/lustre/scripts/lustre @@ -46,25 +46,28 @@ LOCK=/var/lock/subsys/$SERVICE # Source function library. if [ -f /etc/init.d/functions ] ; then - . /etc/init.d/functions + . /etc/init.d/functions fi # Source networking configuration. if [ -f /etc/sysconfig/network ] ; then - . /etc/sysconfig/network + . /etc/sysconfig/network fi check_start_stop() { - # Check that networking is up. - [ "${NETWORKING}" = "no" ] && exit 0 + # Exit codes now LSB compliant + # Check that networking is up. - exit 'not running' + [ "${NETWORKING}" = "no" ] && exit 7 - [ -x ${LCONF} -a -x ${LCTL} ] || exit 0 + # exit 'not installed' + [ -x ${LCONF} -a -x ${LCTL} ] || exit 5 if [ ${LUSTRE_CONFIG_XML:0:1} = "/" ] ; then - if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then - echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping." - exit 0 - fi + if [ ! -f ${LUSTRE_CONFIG_XML} ] ; then + echo "${0##*/}: Configuration file ${LUSTRE_CONFIG_XML} not found; skipping." + # exit 'not configured' + exit 6 + fi fi # Create /var/lustre directory @@ -77,7 +80,7 @@ check_start_stop() { start() { if [ -x "/usr/sbin/clustat" -a "${SERVICE}" = "lustre" ] ; then - if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then + if [ ! -f "/etc/lustre/start-despite-clumanager" ] ; then cat >&2 < /dev/null`" ] && STATE="running" && RETVAL=0 # check for any configured devices (may indicate partial startup) - [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=1 + [ "`cat /proc/fs/lustre/devices 2> /dev/null`" ] && STATE="partial" && RETVAL=150 # check for either a server or a client filesystem MDS="`ls /proc/fs/lustre/mds/*/recovery_status 2> /dev/null`" @@ -159,14 +167,17 @@ status() { # check for error in health_check HEALTH="/proc/fs/lustre/health_check" - [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=2 + [ -f "$HEALTH" ] && grep -q "NOT HEALTHY" $HEALTH && STATE="unhealthy" && RETVAL=151 # check for LBUG - [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=3 + [ -f "$HEALTH" ] && grep -q "LBUG" $HEALTH && STATE="LBUG" && RETVAL=152 - # Check if the service really exists - DUMMY=`lctl dl | grep $SERVICE` - [ $? -ne 0 ] && STATE="not_found" && RETVAL=5 + # If Lustre is up , check if the service really exists + # Skip this is we are not checking a specific service + if [ $RETVAL -eq 0 ] && [ $SERVICE != 'lustre' ]; then + DUMMY=`lctl dl | grep $SERVICE` + [ $? -ne 0 ] && STATE="not_found" && RETVAL=3 + fi echo $STATE } diff --git a/lustre/tests/cfg/local.sh b/lustre/tests/cfg/local.sh index 3a921be..ca7258e 100644 --- a/lustre/tests/cfg/local.sh +++ b/lustre/tests/cfg/local.sh @@ -43,7 +43,7 @@ MOUNTOPT="" MOUNTOPT=$MOUNTOPT" --param default_stripe_size=$STRIPE_BYTES" [ "x$STRIPES_PER_OBJ" != "x" ] && MOUNTOPT=$MOUNTOPT" --param default_stripe_count=$STRIPES_PER_OBJ" -MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE $MKFSOPT $MOUNTOPT $MDSOPT" +MDS_MKFS_OPTS="--mgs --mdt --device-size=$MDSSIZE --param obd_timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $MDSOPT" MKFSOPT="" MOUNTOPT="" @@ -53,8 +53,8 @@ MOUNTOPT="" MKFSOPT="--mkfsoptions=\"$MKFSOPT\"" [ "x$ostfailover_HOST" != "x" ] && MOUNTOPT=$MOUNTOPT" --failnode=`h2$NETTYPE $ostfailover_HOST`" -OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT" -OST2_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID $MKFSOPT $MOUNTOPT $OSTOPT" +OST_MKFS_OPTS="--ost --device-size=$OSTSIZE --mgsnode=$MGSNID --param obd_timeout=$TIMEOUT $MKFSOPT $MOUNTOPT $OSTOPT" +OST2_MKFS_OPTS=${OST2_MKFS_OPTS:-${OST_MKFS_OPTS}} MDS_MOUNT_OPTS="-o loop" OST_MOUNT_OPTS="-o loop" diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 711aa35..23ea6ac 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -212,11 +212,22 @@ test_5() { # if all the modules have unloaded. umount -d $MOUNT & UMOUNT_PID=$! - sleep 2 + sleep 6 echo "killing umount" kill -TERM $UMOUNT_PID echo "waiting for umount to finish" wait $UMOUNT_PID + if grep " $MOUNT " /etc/mtab; then + echo "test 5: mtab after failed umount" + umount $MOUNT & + UMOUNT_PID=$! + sleep 2 + echo "killing umount" + kill -TERM $UMOUNT_PID + echo "waiting for umount to finish" + wait $UMOUNT_PID + grep " $MOUNT " /etc/mtab && echo "test 5: mtab after second umount" && return 11 + fi manual_umount_client # stop_mds is a no-op here, and should not fail @@ -232,8 +243,9 @@ run_test 5 "force cleanup mds, then cleanup" test_5b() { start_ost [ -d $MOUNT ] || mkdir -p $MOUNT + grep " $MOUNT " /etc/mtab && echo "test 5b: mtab before mount" && return 10 mount_client $MOUNT && return 1 - + grep " $MOUNT " /etc/mtab && echo "test 5b: mtab after failed mount" && return 11 umount_client $MOUNT # stop_mds is a no-op here, and should not fail cleanup_nocli || return $? @@ -245,8 +257,9 @@ test_5c() { start_ost start_mds [ -d $MOUNT ] || mkdir -p $MOUNT - # Bad nid might still work if mgs is on 0@lo - mount -t lustre 1.2.3.4@tcp:/wrong.$FSNAME $MOUNT || : + grep " $MOUNT " /etc/mtab && echo "test 5c: mtab before mount" && return 10 + mount -t lustre `facet_nid mgs`:/wrong.$FSNAME $MOUNT || : + grep " $MOUNT " /etc/mtab && echo "test 5c: mtab after failed mount" && return 11 umount_client $MOUNT cleanup_nocli || return $? } @@ -256,18 +269,27 @@ test_5d() { start_ost start_mds stop_ost -f + grep " $MOUNT " /etc/mtab && echo "test 5d: mtab before mount" && return 10 mount_client $MOUNT || return 1 cleanup || return $? + grep " $MOUNT " /etc/mtab && echo "test 5d: mtab after unmount" && return 11 + return 0 } run_test 5d "mount with ost down" test_5e() { start_ost start_mds + # give MDS a chance to connect to OSTs (bz 10476) + sleep 5 + #define OBD_FAIL_PTLRPC_DELAY_SEND 0x506 do_facet client "sysctl -w lustre.fail_loc=0x80000506" + grep " $MOUNT " /etc/mtab && echo "test 5e: mtab before mount" && return 10 mount_client $MOUNT || echo "mount failed (not fatal)" cleanup || return $? + grep " $MOUNT " /etc/mtab && echo "test 5e: mtab after unmount" && return 11 + return 0 } run_test 5e "delayed connect, don't crash (bug 10268)" @@ -772,6 +794,7 @@ test_21() { echo Client mount with a running ost start_ost mount_client $MOUNT + sleep 5 #bz10476 check_mount || return 41 pass diff --git a/lustre/tests/directio.c b/lustre/tests/directio.c index 933c988..fb9c99b 100644 --- a/lustre/tests/directio.c +++ b/lustre/tests/directio.c @@ -53,8 +53,8 @@ int main(int argc, char **argv) return 1; } - if (argc == 6) - st.st_blksize = strtoul(argv[4], 0, 0); + if (argc >= 6) + st.st_blksize = strtoul(argv[5], 0, 0); else if (fstat64(fd, &st) < 0) { printf("Cannot stat %s: %s\n", argv[1], strerror(errno)); return 1; diff --git a/lustre/tests/ll_dirstripe_verify.c b/lustre/tests/ll_dirstripe_verify.c index 7cb8212..aa1ed8c 100644 --- a/lustre/tests/ll_dirstripe_verify.c +++ b/lustre/tests/ll_dirstripe_verify.c @@ -18,10 +18,10 @@ #include #include -#include -#include +#include +#include #include -#include +#include #include diff --git a/lustre/tests/mountconf.sh b/lustre/tests/mountconf.sh index 425a26d..0d71f75 100755 --- a/lustre/tests/mountconf.sh +++ b/lustre/tests/mountconf.sh @@ -10,6 +10,13 @@ #init_test_env $@ mcstopall() { + # make sure we are using the primary server, so test-framework will + # be able to clean up properly. + activemds=`facet_active mds` + if [ $activemds != "mds" ]; then + fail mds + fi + grep " $MOUNT " /proc/mounts && zconf_umount `hostname` $MOUNT $* stop ost -f stop ost2 -f diff --git a/lustre/tests/opendevunlink.c b/lustre/tests/opendevunlink.c index 8250f96..9335eda 100644 --- a/lustre/tests/opendevunlink.c +++ b/lustre/tests/opendevunlink.c @@ -100,10 +100,14 @@ int main(int argc, char **argv) exit(1); } +#if 0 + /* We cannot do this any longer, we do not store open special nodes + * on MDS after unlink */ if (st1.st_mode != st2.st_mode) { // can we do this? fprintf(stderr, "fstat different value on %s and %s\n", dname1, dname2); exit(1); } +#endif fprintf(stderr, "Ok, everything goes well.\n"); return 0; diff --git a/lustre/tests/qos.sh b/lustre/tests/qos.sh new file mode 100644 index 0000000..572bef0 --- /dev/null +++ b/lustre/tests/qos.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +set -e + +export PATH=`dirname $0`/../utils:$PATH + +LFS=${LFS:-lfs} +LCTL=${LCTL:-lctl} +MOUNT=${MOUNT:-/mnt/lustre} +MAXAGE=${MAXAGE:-1} + +QOSFILE=$MOUNT/qos_file +TAB='--' + +echo "remove all files on $MOUNT..." +rm -fr $MOUNT/* +sleep 1 # to ensure we get up-to-date statfs info + +set_qos() { + for i in `ls /proc/fs/lustre/lov/*/qos_threshold`; do + echo $(($1/1024)) > $i + done + for i in `ls /proc/fs/lustre/lov/*/qos_maxage`; do + echo $2 > $i + done +} + +# assume all osts has same free space +OSTCOUNT=`cat /proc/fs/lustre/lov/*/activeobd | head -n 1` +TOTALAVAIL=`cat /proc/fs/lustre/llite/*/kbytesavail | head -n 1` +SINGLEAVAIL=$(($TOTALAVAIL/$OSTCOUNT)) +MINFREE=$((1024 * 4)) # 4M +TOTALFFREE=`cat /proc/fs/lustre/llite/*/filesfree | head -n 1` + +if [ $SINGLEAVAIL -lt $MINFREE ]; then + echo "ERROR: single ost free size($SINGLEAVAIL kb) is too low!" + exit 1; +fi +if [ $OSTCOUNT -lt 3 ]; then + echo "WARN: ost count($OSTCOUNT) must be greater than 2!" + exit 0; +fi + +qos_test_1() { + echo "[qos test 1]: creation skip almost full OST (avail space < threshold)" + + # set qos_threshold as half ost size + THRESHOLD=$(($SINGLEAVAIL/2)) + set_qos $THRESHOLD $MAXAGE + + # set stripe number to 1 + $LFS setstripe $QOSFILE 65536 -1 1 + FULLOST=`$LFS find -q $QOSFILE | awk '/\s*\d*/ {print $1}'` + + # floodfill the FULLOST + echo "$TAB fill the OST $FULLOST to almost fullness..." + dd if=/dev/zero of=$QOSFILE count=$(($SINGLEAVAIL - $THRESHOLD + 1500)) bs=1k > /dev/null 2>&1 || return 1 + echo "$TAB done" + + sleep $(($MAXAGE * 2)) + echo "$TAB create 10 files with 1 stripe" + for i in `seq 10`; do + rm -f $MOUNT/file-$i + $LFS setstripe $MOUNT/file-$i 65536 -1 1 + idx=`$LFS find -q $MOUNT/file-$i | awk '/\s*\d*/ {print $1}'` + if [ $idx -eq $FULLOST ]; then + echo "$TAB ERROR: create object on full OST $FULLOST" + return 1 + fi + done + echo "$TAB no object created on OST $FULLOST" + + # cleanup + for i in `seq 10`; do + rm -f $MOUNT/file-$i + done + rm -f $QOSFILE + # set threshold and maxage to normal value + set_qos 10240 1 + + sleep 1 + return 0 +} + +qos_test_2 () { + echo "[qos test 2]: creation balancing over all OSTs by free space" + + if [ $OSTCOUNT -lt 3 ]; then + echo "$TAB WARN: OST count < 3, test skipped" + return 0 + fi + + WADSZ=$(($SINGLEAVAIL * 3 / 4)) + TOTALSZ=$(($WADSZ * $OSTCOUNT - 1)) + + # fill all OST 0 to 3/4 fulness + $LFS setstripe $QOSFILE 65536 0 1 + echo "$TAB fill the OST 0 to 3/4 fulness..." + dd if=/dev/zero of=$QOSFILE count=$WADSZ bs=1k > /dev/null 2>&1 || return 1 + echo "$TAB done" + + # write 2 stripe files to fill up other OSTs + LOOPCNT=500 + echo "$TAB create $LOOPCNT files with 2 stripe..." + for i in `seq $LOOPCNT`; do + rm -f $MOUNT/file-$i + $LFS setstripe $MOUNT/file-$i 65536 -1 2 + done + echo "$TAB done" + + # the objects created on OST 0 should be 1/4 of on other OSTs' + CNT0=`$LFS find -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c 0` + CNT0=$(($CNT0 - 1)) + echo "$TAB object created on OST 0: $CNT0" + + # the object count of other osts must be greater than 2 times + CNT0=$(($CNT0 * 2)) + for i in `seq $(($OSTCOUNT - 1))`; do + CNT=`$LFS find -q /mnt/lustre | awk '/\s*\d*/ {print $1}'| grep -c $i` + echo "$TAB object created on OST $i: $CNT" + if [ $CNT0 -gt $CNT ] ; then + echo "$TAB ERROR: too much objects created on OST 0" + return 1 + fi + done + echo "$TAB objects created on OST 0 is about 1/4 of others'" + + # cleanup + for i in `seq $LOOPCNT`; do + rm -f $MOUNT/file-$i + done + rm -f $QOSFILE + return 0 +} + + +# run tests +for j in `seq 2`; do + qos_test_$j + [ $? -ne 0 ] && exit 1 +done +exit 0 diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 9702681..36e90f3 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -7,8 +7,7 @@ ALWAYS_EXCEPT="20b 24 27 $RECOVERY_SMALL_EXCEPT" # Tests that always fail with mountconf -- FIXME # 16 fails with 1, not evicted -# 18a,b there is still data in page cache -EXCEPT="$EXCEPT 16 18a 18b" +EXCEPT="$EXCEPT 16" LUSTRE=${LUSTRE:-`dirname $0`/..} @@ -244,13 +243,13 @@ test_18a() { do_facet client cp /etc/termcap $f sync - local osc2_dev=`awk '(/OST0001-osc-/){print $4}' $LPROC/devices` - $LCTL --device %$osc2_dev deactivate + local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'` + $LCTL --device $osc2dev deactivate || return 3 # my understanding is that there should be nothing in the page # cache after the client reconnects? rc=0 pgcache_empty || rc=2 - $LCTL --device %$osc2_dev activate + $LCTL --device $osc2dev activate rm -f $f return $rc } @@ -368,7 +367,7 @@ test_24() { # bug 2248 - eviction fails writeback but app doesn't see it } run_test 24 "fsync error (should return error)" -test_26() { # bug 5921 - evict dead exports +test_26() { # bug 5921 - evict dead exports by pinger # this test can only run from a client on a separate node. [ "`lsmod | grep obdfilter`" ] && \ echo "skipping test 26 (local OST)" && return diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index cae66d0..513766c 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -13,6 +13,7 @@ LUSTRE=${LUSTRE:-`dirname $0`/..} init_test_env $@ . ${CONFIG:=$LUSTRE/tests/cfg/local.sh} +. mountconf.sh # Skip these tests # bug number: 2766 @@ -20,22 +21,8 @@ ALWAYS_EXCEPT="0b $REPLAY_SINGLE_EXCEPT" build_test_filter -cleanup() { - # make sure we are using the primary server, so test-framework will - # be able to clean up properly. - activemds=`facet_active mds` - if [ $activemds != "mds" ]; then - fail mds - fi - - zconf_umount `hostname` $MOUNT - stop ost -f - stop ost2 -f - stop mds -f -} - SETUP=${SETUP:-"setup"} -CLEANUP=${CLEANUP:-"cleanup"} +CLEANUP=${CLEANUP:-"mcstopall"} if [ "$ONLY" == "cleanup" ]; then sysctl -w lnet.debug=0 || true @@ -44,15 +31,8 @@ if [ "$ONLY" == "cleanup" ]; then fi setup() { - cleanup - add mds $MDS_MKFS_OPTS --reformat $MDSDEV - add ost $OST_MKFS_OPTS --reformat $OSTDEV - add ost2 $OST2_MKFS_OPTS --reformat $OSTDEV2 - start mds $MDSDEV $MDS_MOUNT_OPTS - start ost $OSTDEV $OST_MOUNT_OPTS - start ost2 $OSTDEV2 $OST2_MOUNT_OPTS - [ "$DAEMONFILE" ] && $LCTL debug_daemon start $DAEMONFILE $DAEMONSIZE - grep " $MOUNT " /proc/mounts || zconf_mount `hostname` $MOUNT + mcformat + mcsetup } $SETUP @@ -91,20 +71,20 @@ test_1a() { do_facet ost "sysctl -w lustre.fail_loc=0" rm -fr $DIR/$tfile - local old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` + local old_last_id=`cat $LPROC/obdfilter/*/last_id` touch -o $DIR/$tfile 1 sync - local new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` + local new_last_id=`cat $LPROC/obdfilter/*/last_id` test "$old_last_id" = "$new_last_id" || { echo "OST object create is caused by MDS" return 1 } - old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` + old_last_id=`cat $LPROC/obdfilter/*/last_id` echo "data" > $DIR/$tfile sync - new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` + new_last_id=`cat $LPROC/obdfilter/*/last_id` test "$old_last_id" = "$new_last_id "&& { echo "CROW does not work on write" return 1 @@ -116,10 +96,10 @@ test_1a() { do_facet ost "sysctl -w lustre.fail_loc=0x80000801" rm -fr $DIR/1a1 - old_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` + old_last_id=`cat $LPROC/obdfilter/*/last_id` echo "data" > $DIR/1a1 sync - new_last_id=`cat /proc/fs/lustre/obdfilter/*/last_id` + new_last_id=`cat $LPROC/obdfilter/*/last_id` test "$old_last_id" = "$new_last_id" || { echo "CROW does work with fail_loc=0x80000801" return 1 @@ -802,8 +782,7 @@ test_39() { # bug 4176 run_test 39 "test recovery from unlink llog (test llog_gen_rec) " count_ost_writes() { - cat /proc/fs/lustre/osc/*/stats | - awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }' + awk -vwrites=0 '/ost_write/ { writes += $2 } END { print writes; }' $LPROC/osc/*/stats } #b=2477,2532 @@ -856,11 +835,11 @@ test_41() { do_facet client dd if=/dev/zero of=$f bs=4k count=1 || return 3 cancel_lru_locks osc # fail ost2 and read from ost1 - local osc2_dev=`$LCTL device_list | \ - awk '(/ost2.*client_facet/){print $4}' ` - $LCTL --device %$osc2_dev deactivate + local osc2dev=`grep ${ost2_svc}-osc- $LPROC/devices | awk '{print $1}'` + [ "$osc2dev" ] || return 4 + $LCTL --device $osc2dev deactivate || return 1 do_facet client dd if=$f of=/dev/null bs=4k count=1 || return 3 - $LCTL --device %$osc2_dev activate + $LCTL --device $osc2dev activate || return 2 return 0 } run_test 41 "read from a valid osc while other oscs are invalid" @@ -901,8 +880,10 @@ test_43() { # bug 2530 run_test 43 "mds osc import failure during recovery; don't LBUG" test_44() { - mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices` + mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` + [ "$mdcdev" ] || exit 2 for i in `seq 1 10`; do + echo iteration $i #define OBD_FAIL_TGT_CONN_RACE 0x701 do_facet mds "sysctl -w lustre.fail_loc=0x80000701" $LCTL --device $mdcdev recover @@ -914,8 +895,10 @@ test_44() { run_test 44 "race in target handle connect" test_44b() { - mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices` + mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` + [ "$mdcdev" ] || exit 2 for i in `seq 1 10`; do + echo iteration $i #define OBD_FAIL_TGT_DELAY_RECONNECT 0x704 do_facet mds "sysctl -w lustre.fail_loc=0x80000704" $LCTL --device $mdcdev recover @@ -928,7 +911,8 @@ run_test 44b "race in target handle connect" # Handle failed close test_45() { - mdcdev=`awk '/mds_svc_MNT/ {print $1}' < /proc/fs/lustre/devices` + mdcdev=`awk '/-mdc-/ {print $1}' $LPROC/devices` + [ "$mdcdev" ] || exit 2 $LCTL --device $mdcdev recover multiop $DIR/$tfile O_c & @@ -937,13 +921,13 @@ test_45() { # This will cause the CLOSE to fail before even # allocating a reply buffer - $LCTL --device $mdcdev deactivate + $LCTL --device $mdcdev deactivate || return 4 # try the close kill -USR1 $pid wait $pid || return 1 - $LCTL --device $mdcdev activate + $LCTL --device $mdcdev activate || return 5 sleep 1 $CHECKSTAT -t file $DIR/$tfile || return 2 @@ -1002,9 +986,9 @@ test_48() { run_test 48 "MDS->OSC failure during precreate cleanup (2824)" test_50() { - local osc_dev=`$LCTL device_list | \ - awk '(/ost_svc_mds_svc/){print $4}' ` - $LCTL --device %$osc_dev recover && $LCTL --device %$osc_dev recover + local oscdev=`grep ${ost_svc}-osc- $LPROC/devices | awk '{print $1}'` + [ "$oscdev" ] || return 1 + $LCTL --device $oscdev recover && $LCTL --device $oscdev recover # give the mds_lov_sync threads a chance to run sleep 5 } diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench index 09a0549..fe80594 100755 --- a/lustre/tests/rundbench +++ b/lustre/tests/rundbench @@ -8,7 +8,7 @@ SRC=${SRC:-/usr/lib/dbench/client.txt} [ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT SRC=/usr/lib/dbench/client_plain.txt [ ! -s $TGT -a -s $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT -[ ! -s $TGT ] && echo "$TGT doesn't exist" && exit 1 +[ ! -s $TGT ] && echo "$0: $TGT doesn't exist (SRC=$SRC)" && exit 1 cd $DIR echo "running 'dbench $@' on $PWD at `date`" dbench -c client.txt $@ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 0c4f5fd..b68cb58 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -11,14 +11,13 @@ ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"42a 42b 42c 42d 45 68"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! -[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 101" +[ "$SLOW" = "no" ] && EXCEPT="$EXCEPT 24o 27m 51b 51c 63 64b 71 77 101" # Tests that fail on uml -[ "$UML" = "no" ] && EXCEPT="$EXCEPT 31d" +[ "$UML" = "true" ] && EXCEPT="$EXCEPT 31d" # Tests that always fail with mountconf -- FIXME # 48a moving the working dir succeeds -# 104 something is out of sync with b1_4? 'lfs df' needs an arg -EXCEPT="$EXCEPT 48a 104" +EXCEPT="$EXCEPT 48a" case `uname -r` in 2.4*) FSTYPE=${FSTYPE:-ext3}; ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76" ;; @@ -2724,6 +2723,11 @@ test_76() { # bug 1443 } run_test 76 "destroy duplicate inodes in client inode cache" +test_77() { + sh qos.sh +} +run_test 77 "qos test ============================================" + # on the LLNL clusters, runas will still pick up root's $TMP settings, # which will not be writable for the runas user, and then you get a CVS # error message with a corrupt path string (CVS bug) and panic. @@ -2817,39 +2821,49 @@ function get_named_value() done } +export CACHE_MAX=`cat /proc/fs/lustre/llite/*/max_cached_mb | head -n 1` +cleanup_101() { + for s in $LPROC/llite/*/max_cached_mb; do + echo $CACHE_MAX > $s + done + trap 0 +} + test_101() { local s local discard - local nreads + local nreads=10000 + local cache_limit=32 - for s in $LPROC/osc/*-osc*/rpc_stats ;do + for s in $LPROC/osc/*-osc*/rpc_stats; do echo 0 > $s done - for s in $LPROC/llite/*/read_ahead_stats ;do - echo 0 > $s + trap cleanup_101 EXIT + for s in $LPROC/llite/fs*; do + echo 0 > $s/read_ahead_stats + echo $cache_limit > $s/max_cached_mb done # - # randomly read 10000 of 64K chunks from file 3x RAM size + # randomly read 10000 of 64K chunks from file 3x 32MB in size # - nreads=10000 - s=$(($(awk '/MemTotal/ { print $2 }' /proc/meminfo) * 3)) - echo "nreads: $nreads file size: ${s}kB" - $RANDOM_READS -f $DIR/f101 -s${s}000 -b65536 -C -n$nreads -t 180 + echo "nreads: $nreads file size: $((cache_limit * 3))MB" + $RANDOM_READS -f $DIR/$tfile -s$((cache_limit * 3192 * 1024)) -b65536 -C -n$nreads -t 180 discard=0 - for s in $LPROC/llite/*/read_ahead_stats ;do - discard=$(($discard + $(cat $s | get_named_value 'read but discarded'))) + for s in $LPROC/llite/fs*; do + discard=$(($discard + $(cat $s/read_ahead_stats | get_named_value 'read but discarded'))) done + cleanup_101 if [ $(($discard * 10)) -gt $nreads ] ;then cat $LPROC/osc/*-osc*/rpc_stats cat $LPROC/llite/*/read_ahead_stats error "too many ($discard) discarded pages" fi - rm -f $DIR/f101 || true + rm -f $DIR/$tfile || true } -run_test 101 "check read-ahead for random reads ===========" +run_test 101 "check read-ahead for random reads ================" test_102() { local testfile=$DIR/xattr_testfile @@ -2858,7 +2872,7 @@ test_102() { touch $testfile [ "$UID" != 0 ] && echo "skipping $TESTNAME (must run as root)" && return - [ -z "`grep \ $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return + [ -z "`grep xattr $LPROC/mdc/*-mdc-*/connect_flags`" ] && echo "skipping $TESTNAME (must have user_xattr)" && return echo "set/get xattr..." setfattr -n trusted.name1 -v value1 $testfile || error [ "`getfattr -n trusted.name1 $testfile 2> /dev/null | \ @@ -2894,7 +2908,7 @@ test_102() { rm -f $testfile } -run_test 102 "user xattr test =====================" +run_test 102 "user xattr test ==================================" run_acl_subtest() { @@ -2934,14 +2948,14 @@ test_103 () { cd $SAVED_PWD umask $SAVE_UMASK } -run_test 103 "==============acl test =============" +run_test 103 "acl test =========================================" test_104() { touch $DIR/$tfile lfs df || error "lfs df failed" lfs df -ih || error "lfs df -ih failed" - lfs df $DIR || error "lfs df $DIR failed" - lfs df -ih $DIR || error "lfs df -ih $DIR failed" + lfs df -h $DIR || error "lfs df -h $DIR failed" + lfs df -i $DIR || error "lfs df -i $DIR failed" lfs df $DIR/$tfile || error "lfs df $DIR/$tfile failed" lfs df -ih $DIR/$tfile || error "lfs df -ih $DIR/$tfile failed" @@ -2951,7 +2965,7 @@ test_104() { lctl --device %$OSC recover lfs df || error "lfs df with reactivated OSC failed" } -run_test 104 "lfs>df [-ih] [path] test ============" +run_test 104 "lfs df [-ih] [path] test =========================" TMPDIR=$OLDTMPDIR TMP=$OLDTMP diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh index b10a0e1..a32f2b6 100644 --- a/lustre/tests/sanityN.sh +++ b/lustre/tests/sanityN.sh @@ -7,6 +7,9 @@ ONLY=${ONLY:-"$*"} ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"14b 14c"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! +# Tests that fail on uml +[ "$UML" = "true" ] && EXCEPT="$EXCEPT 7" + SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH @@ -559,6 +562,45 @@ test_23() { # Bug 5972 } run_test 23 " others should see updated atime while another read====" +test_24() { + touch $DIR1/$tfile + lfs df || error "lfs df failed" + lfs df -ih || error "lfs df -ih failed" + lfs df -h $DIR1 || error "lfs df -h $DIR1 failed" + lfs df -i $DIR2 || error "lfs df -i $DIR2 failed" + lfs df $DIR1/$tfile || error "lfs df $DIR1/$tfile failed" + lfs df -ih $DIR2/$tfile || error "lfs df -ih $DIR2/$tfile failed" + + OSC=`lctl dl | awk '/OSC.*MNT/ {print $4}' | head -n 1` + lctl --device %$OSC deactivate + lfs df -i || error "lfs df -i with deactivated OSC failed" + lctl --device %$OSC recover + lfs df || error "lfs df with reactivated OSC failed" +} +run_test 24 "lfs df [-ih] [path] test =========================" + +test_25() { + [ -z "`mount | grep " $DIR1 .*\"`" ] && echo "skipping $TESTNAME ($DIR1 must have acl)" && return + [ -z "`mount | grep " $DIR2 .*\"`" ] && echo "skipping $TESTNAME ($DIR2 must have acl)" && return + + mkdir $DIR1/d25 || error + touch $DIR1/d25/f1 || error + chmod 0755 $DIR1/d25/f1 || error + + $RUNAS checkstat $DIR2/d25/f1 || error + setfacl -m u:$RUNAS_ID:--- $DIR1/d25 || error + $RUNAS checkstat $DIR2/d25/f1 && error + setfacl -m u:$RUNAS_ID:r-x $DIR1/d25 || error + $RUNAS checkstat $DIR2/d25/f1 || error + setfacl -m u:$RUNAS_ID:--- $DIR1/d25 || error + $RUNAS checkstat $DIR2/d25/f1 && error + setfacl -x u:$RUNAS_ID: $DIR1/d25 || error + $RUNAS checkstat $DIR2/d25/f1 || error + + rm -rf $DIR1/d25 +} +run_test 25 "change ACL on one mountpoint be seen on another ===" + log "cleanup: ======================================================" rm -rf $DIR1/[df][0-9]* $DIR1/lnk || true if [ "$I_MOUNTED" = "yes" ]; then diff --git a/lustre/tests/statmany.c b/lustre/tests/statmany.c index ea3c411..63a13ad 100644 --- a/lustre/tests/statmany.c +++ b/lustre/tests/statmany.c @@ -15,8 +15,8 @@ #include #endif #include -#include -#include +#include +#include struct option longopts[] = { {"ea", 0, 0, 'e'}, diff --git a/lustre/tests/statone.c b/lustre/tests/statone.c index 5250984..cf2b153 100644 --- a/lustre/tests/statone.c +++ b/lustre/tests/statone.c @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include int main(int argc, char **argv) { diff --git a/lustre/tests/testreq.c b/lustre/tests/testreq.c index 774398d..0b19a44 100644 --- a/lustre/tests/testreq.c +++ b/lustre/tests/testreq.c @@ -34,7 +34,7 @@ #undef _GNU_SOURCE #include -#include +#include static void usage(char *argv0, int status) { diff --git a/lustre/tests/wantedi.c b/lustre/tests/wantedi.c index 78fd71b..7c78924 100644 --- a/lustre/tests/wantedi.c +++ b/lustre/tests/wantedi.c @@ -11,8 +11,8 @@ #include #include #include -#include -#include +#include +#include static int usage(char *prog, FILE *out) { diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am index 066abd3..eb43617 100644 --- a/lustre/utils/Makefile.am +++ b/lustre/utils/Makefile.am @@ -8,7 +8,7 @@ AM_LDFLAGS := -L$(top_builddir)/lnet/utils LIBPTLCTL := $(top_builddir)/lnet/utils/libptlctl.a -sbin_scripts = llanalyze llstat.pl llobdstat.pl lactive lrun +sbin_scripts = llanalyze llstat.pl llobdstat.pl lactive lrun bin_scripts = lfind lstripe if UTILS @@ -16,51 +16,52 @@ if UTILS rootsbin_PROGRAMS = mount.lustre sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \ mount_lustre mkfs_lustre mkfs.lustre \ - tunefs_lustre tunefs.lustre l_getgroups llog_reader -bin_PROGRAMS = lfs + tunefs_lustre tunefs.lustre l_getgroups +bin_PROGRAMS = lfs llog_reader lib_LIBRARIES = liblustreapi.a sbin_SCRIPTS = $(sbin_scripts) bin_SCRIPTS = $(bin_scripts) endif # UTILS +lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h platform.h +lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL) +lctl_DEPENDENCIES := $(LIBPTLCTL) + +lfs_SOURCES = lfs.c parser.c obd.c +lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) +lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a + +lload_SOURCES = lload.c +lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL) +lload_DEPENDENCIES := $(LIBPTLCTL) + liblustreapi_a_SOURCES = liblustreapi.c wirecheck_SOURCES = wirecheck.c wirecheck_CPPFLAGS = -DCC="\"$(CC)\"" -wiretest_SOURCES = wiretest.c -lctl_LDADD := $(LIBREADLINE) $(LIBPTLCTL) -lctl_DEPENDENCIES := $(LIBPTLCTL) -lctl_SOURCES = parser.c obd.c lustre_cfg.c lctl.c parser.h obdctl.h - -lload_LDADD := $(LIBREADLINE) $(LIBPTLCTL) -lload_DEPENDENCIES := $(LIBPTLCTL) -lload_SOURCES = lload.c +wiretest_SOURCES = wiretest.c obdio_SOURCES = obdio.c obdiolib.c obdiolib.h obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h -lfs_LDADD := $(LIBREADLINE) liblustreapi.a $(LIBPTLCTL) -lfs_DEPENDENCIES := $(LIBPTLCTL) liblustreapi.a -lfs_SOURCES = lfs.c parser.c obd.c - -llog_reader_LDADD := $(LIBREADLINE) $(LIBPTLCTL) -llog_reader_DEPENDENCIES := $(LIBPTLCTL) llog_reader_SOURCES = llog_reader.c +llog_reader_LDADD := $(LIBPTLCTL) +llog_reader_DEPENDENCIES := $(LIBPTLCTL) -mount_lustre_LDADD := $(LIBREADLINE) $(LIBPTLCTL) -mount_lustre_DEPENDENCIES := $(LIBPTLCTL) mount_lustre_SOURCES = mount_lustre.c +mount_lustre_LDADD := $(LIBPTLCTL) +mount_lustre_DEPENDENCIES := $(LIBPTLCTL) -mkfs_lustre_LDADD := $(LIBREADLINE) $(LIBPTLCTL) -mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL) -mkfs_lustre_SOURCES = parser.c obd.c lustre_cfg.c mkfs_lustre.c parser.h obdctl.h +mkfs_lustre_SOURCES = mkfs_lustre.c mkfs_lustre_CPPFLAGS = -UTUNEFS $(AM_CPPFLAGS) +mkfs_lustre_LDADD := $(LIBPTLCTL) +mkfs_lustre_DEPENDENCIES := $(LIBPTLCTL) -tunefs_lustre_LDADD := $(mkfs_lustre_LDADD) -tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES) tunefs_lustre_SOURCES = $(mkfs_lustre_SOURCES) tunefs_lustre_CPPFLAGS = -DTUNEFS $(AM_CPPFLAGS) +tunefs_lustre_LDADD := $(mkfs_lustre_LDADD) +tunefs_lustre_DEPENDENCIES := $(mkfs_lustre_DEPENDENCIES) EXTRA_DIST = $(bin_scripts) $(sbin_scripts) diff --git a/lustre/utils/l_getgroups.c b/lustre/utils/l_getgroups.c index 61c87ee..de4bac0 100644 --- a/lustre/utils/l_getgroups.c +++ b/lustre/utils/l_getgroups.c @@ -128,14 +128,15 @@ int main(int argc, char **argv) else progname++; - if (strcmp(argv[1], "-d") == 0) - debug = 1; - if (argc != 3) { fprintf(stderr, "%s: bad parameter count\n", progname); usage(stderr); return EINVAL; } + + if (strcmp(argv[1], "-d") == 0) + debug = 1; + param->mgd_uid = strtoul(argv[2], &end, 0); if (*end) { fprintf(stderr, "%s: invalid uid '%s'\n", progname, argv[2]); diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 82bdbdc..fa92ab1 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -38,6 +38,15 @@ else: from fcntl import F_GETFL, F_SETFL PYMOD_DIR = ["/usr/lib64/lustre/python", "/usr/lib/lustre/python"] +PLATFORM = '' +KEXTPATH = '' +if string.find(sys.platform, 'linux') != -1: + PLATFORM='LINUX' +elif string.find(sys.platform, 'darwin') != -1: + PLATFORM='DARWIN' + KEXTPATH='/System/Library/Extensions/' +else: + PLATFORM='Unsupported' def development_mode(): base = os.path.dirname(sys.argv[0]) @@ -456,15 +465,25 @@ class LCTLInterface: # get list of devices def device_list(self): - devices = '/proc/fs/lustre/devices' ret = [] - if os.access(devices, os.R_OK): - try: - fp = open(devices, 'r') - ret = fp.readlines() - fp.close() - except IOError, e: - log(e) + if PLATFORM == 'LINUX': + devices = '/proc/fs/lustre/devices' + if os.access(devices, os.R_OK): + try: + fp = open(devices, 'r') + ret = fp.readlines() + fp.close() + except IOError, e: + log(e) + elif PLATFORM == 'DARWIN': + rc, out = self.run("device_list") + ret = out.split("\n") + if len(ret) == 0: + return ret + tail = ret[-1] + if not tail: + # remove the last empty line + ret = ret[:-1] return ret # get lustre version @@ -862,15 +881,24 @@ def sys_get_branch(): def mod_loaded(modname): """Check if a module is already loaded. Look in /proc/modules for it.""" - try: - fp = open('/proc/modules') - lines = fp.readlines() - fp.close() - # please forgive my tired fingers for this one - ret = filter(lambda word, mod=modname: word == mod, - map(lambda line: string.split(line)[0], lines)) - return ret - except Exception, e: + if PLATFORM == 'LINUX': + try: + fp = open('/proc/modules') + lines = fp.readlines() + fp.close() + # please forgive my tired fingers for this one + ret = filter(lambda word, mod=modname: word == mod, + map(lambda line: string.split(line)[0], lines)) + return ret + except Exception, e: + return 0 + elif PLATFORM == 'DARWIN': + ret, out = run('/usr/sbin/kextstat | /usr/bin/grep', modname) + if ret == 0: + return 1 + else: + return 0 + else: return 0 # XXX: instead of device_list, ask for $name and see what we get @@ -934,28 +962,31 @@ class kmod: if mod_loaded(mod) and not config.noexec: continue log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) - options = '' - if mod == 'lnet': - #For LNET we really need modprobe to load defined LNDs - run('/sbin/modprobe lnet') - #But if that fails, try insmod anyhow with dev option - #accept=all for dev liblustre testing - options = 'accept=all' - if src_dir: - module = find_module(src_dir, dev_dir, mod) - if not module: - panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module, options) - if rc and not mod_loaded(mod): - if rc == 1: - print("Bad module options? Check dmesg.") - raise CommandError('insmod', out, rc) - else: - (rc, out) = run('/sbin/modprobe', mod) - if rc and not mod_loaded(mod): - if rc == 1: - print("Bad module options? Check dmesg.") - raise CommandError('modprobe', out, rc) + if PLATFORM == 'LINUX': + options = '' + if mod == 'lnet': + #For LNET we really need modprobe to load defined LNDs + run('/sbin/modprobe lnet') + #But if that fails, try insmod anyhow with dev option + #accept=all for dev liblustre testing + options = 'accept=all' + if src_dir: + module = find_module(src_dir, dev_dir, mod) + if not module: + panic('module not found:', mod) + (rc, out) = run('/sbin/insmod', module, options) + if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") + raise CommandError('insmod', out, rc) + else: + (rc, out) = run('/sbin/modprobe', mod) + if rc and not mod_loaded(mod): + if rc == 1: + print("Bad module options? Check dmesg.") + raise CommandError('modprobe', out, rc) + elif PLATFORM == 'DARWIN': + run('/sbin/kextload', KEXTPATH + mod + '.kext'); def cleanup_module(self): """Unload the modules in the list in reverse order.""" @@ -979,7 +1010,10 @@ class kmod: log('unloading the network') lctl.unconfigure_network() if mod_loaded("ksocklnd"): - run('/sbin/rmmod ksocklnd') + if PLATFORM == 'LINUX': + run('/sbin/rmmod ksocklnd') + elif PLATFORM == 'DARWIN': + run('/sbin/kextunload', KEXTPATH+'ksocklnd.kext') if mod_loaded("kqswlnd"): run('/sbin/rmmod kqswlnd') if mod_loaded("kgmlnd"): @@ -994,7 +1028,10 @@ class kmod: run('/sbin/rmmod kralnd') if mod_loaded("kptllnd"): run('/sbin/rmmod kptllnd') - (rc, out) = run('/sbin/rmmod', mod) + if PLATFORM == 'LINUX': + (rc, out) = run('/sbin/rmmod', mod) + elif PLATFORM == 'DARWIN': + (rc, out) = run('/sbin/kextunload', KEXTPATH+mod+'.kext'); if rc: log('! unable to unload module:', mod) logall(out) @@ -1076,7 +1113,12 @@ class Network(Module): sys_optimize_elan() def safe_to_clean(self): - return not is_network_prepared() + if PLATFORM == 'LINUX': + return not is_network_prepared() + elif PLATFORM == 'DARWIN': + # XXX always assume it's safe to clean + return 1 + return 1 def cleanup(self): self.info(self.net_type, self.nid) @@ -1471,7 +1513,9 @@ class OSD(Module): self.journal_size = self.db.get_val_int('journalsize', 0) # now as we store fids in EA on OST we need to make inode bigger - self.inode_size = self.db.get_val_int('inodesize', 256) + self.inode_size = self.db.get_val_int('inodesize', 0) + if self.inode_size == 0: + self.inode_size = 256 self.mkfsoptions = self.db.get_val('mkfsoptions', '') # Allocate fewer inodes on large OST devices. Most filesystems # can be much more aggressive than this, but by default we can't. @@ -1631,7 +1675,7 @@ class Client(Module): self.name = self_name self.uuid = uuid self.lookup_server(self.tgt_dev_uuid) - self.lookup_backup_targets() + self.lookup_backup_targets() self.fs_name = fs_name if not module_dir: module_dir = module @@ -1697,7 +1741,6 @@ class Client(Module): else: for srv in this_nets: lctl.connect(srv) - break if srv: lctl.add_conn(self.name, srv.nid_uuid); @@ -2252,14 +2295,17 @@ def doHost(lustreDB, hosts): for_each_profile(node_db, prof_list, doSetup) return - sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) - sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) + if PLATFORM == 'LINUX': + sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) + sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) for_each_profile(node_db, prof_list, doModules) - sys_set_debug_path() - sys_set_ptldebug(ptldebug) - sys_set_subsystem(subsystem) + if PLATFORM == 'LINUX': + # XXX need to be fixed for Darwin + sys_set_debug_path() + sys_set_ptldebug(ptldebug) + sys_set_subsystem(subsystem) script = config.gdb_script run(lctl.lctl, ' modules >', script) if config.gdb: diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 77237d4..349c703 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include @@ -322,7 +322,7 @@ static int lfs_osts(int argc, char **argv) } else { mnt = getmntent(fp); while (feof(fp) == 0 && ferror(fp) ==0) { - if (llapi_is_lustre_mnttype(mnt->mnt_type)) { + if (llapi_is_lustre_mnttype(mnt)) { rc = llapi_find(mnt->mnt_dir, obduuid, 0, 0, 0); if (rc) fprintf(stderr, @@ -370,7 +370,7 @@ static int path2mnt(char *path, FILE *fp, char *mntdir, int dir_len) len = 0; mnt = getmntent(fp); while (feof(fp) == 0 && ferror(fp) == 0) { - if (llapi_is_lustre_mnttype(mnt->mnt_type)) { + if (llapi_is_lustre_mnttype(mnt)) { len = strlen(mnt->mnt_dir); if (len > out_len && !strncmp(rpath, mnt->mnt_dir, len)) { @@ -384,42 +384,36 @@ static int path2mnt(char *path, FILE *fp, char *mntdir, int dir_len) if (out_len > 0) return 0; - + fprintf(stderr, "error: lfs df: %s isn't mounted on lustre\n", path); return -EINVAL; } static int showdf(char *mntdir, struct obd_statfs *stat, - struct obd_uuid *uuid, int ishow, int cooked, + char *uuid, int ishow, int cooked, char *type, int index, int rc) { __u64 avail, used, total; double ratio = 0; - int obd_type; char *suffix = "KMGTPEZY"; char tbuf[10], ubuf[10], abuf[10], rbuf[10]; - if (!uuid || !stat || !type) - return -EINVAL; - if (!strncmp(type, "MDT", 3)) { - obd_type = 0; - } else if(!strncmp(type, "OST", 3)){ - obd_type = 1; - } else { - fprintf(stderr, "error: lfs df: invalid type '%s'\n", type); + if (!uuid || !stat) return -EINVAL; - } - if (rc == 0) { + switch (rc) { + case 0: if (ishow) { avail = stat->os_ffree; used = stat->os_files - stat->os_ffree; total = stat->os_files; } else { - avail = stat->os_bavail * stat->os_bsize / 1024; + int shift = cooked ? 0 : 10; + + avail = (stat->os_bavail * stat->os_bsize) >> shift; used = stat->os_blocks - stat->os_bavail; - used = used * stat->os_bsize / 1024; - total = stat->os_blocks * stat->os_bsize / 1024; + used = (used * stat->os_bsize) >> shift; + total = (stat->os_blocks * stat->os_bsize) >> shift; } if (total > 0) @@ -427,26 +421,26 @@ static int showdf(char *mntdir, struct obd_statfs *stat, if (cooked) { int i; - double total_d, used_d, avail_d; - - total_d = (double)total; - i = COOK(total_d); + double cook_val; + + cook_val = (double)total; + i = COOK(cook_val); if (i > 0) - sprintf(tbuf, HDF"%c", total_d, suffix[i - 1]); + sprintf(tbuf, HDF"%c", cook_val, suffix[i - 1]); else sprintf(tbuf, CDF, total); - used_d = (double)used; - i = COOK(used_d); + cook_val = (double)used; + i = COOK(cook_val); if (i > 0) - sprintf(ubuf, HDF"%c", used_d, suffix[i - 1]); + sprintf(ubuf, HDF"%c", cook_val, suffix[i - 1]); else sprintf(ubuf, CDF, used); - avail_d = (double)avail; - i = COOK(avail_d); + cook_val = (double)avail; + i = COOK(cook_val); if (i > 0) - sprintf(abuf, HDF"%c", avail_d, suffix[i - 1]); + sprintf(abuf, HDF"%c", cook_val, suffix[i - 1]); else sprintf(abuf, CDF, avail); } else { @@ -456,23 +450,19 @@ static int showdf(char *mntdir, struct obd_statfs *stat, } sprintf(rbuf, RDF, (int)(ratio * 100)); - if (obd_type == 0) - printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s[MDT:%d]\n", - (char *)uuid, tbuf, ubuf, abuf, rbuf, - mntdir, index); + printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s", + uuid, tbuf, ubuf, abuf, rbuf, mntdir); + if (type) + printf("[%s:%d]\n", type, index); else - printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s[OST:%d]\n", - (char *)uuid, tbuf, ubuf, abuf, rbuf, - mntdir, index); + printf("\n"); - return 0; - } - switch (rc) { + break; case -ENODATA: - printf(UUF": inactive OST\n", (char *)uuid); + printf(UUF": inactive device\n", uuid); break; default: - printf(UUF": %s\n", (char *)uuid, strerror(-rc)); + printf(UUF": %s\n", uuid, strerror(-rc)); break; } @@ -481,12 +471,9 @@ static int showdf(char *mntdir, struct obd_statfs *stat, static int mntdf(char *mntdir, int ishow, int cooked) { - struct obd_statfs stat_buf; + struct obd_statfs stat_buf, sum = { .os_bsize = 1 }; struct obd_uuid uuid_buf; __u32 index; - __u64 avail_sum, used_sum, total_sum; - char tbuf[10], ubuf[10], abuf[10], rbuf[10]; - double ratio_sum = 0; int rc; if (ishow) @@ -495,10 +482,9 @@ static int mntdf(char *mntdir, int ishow, int cooked) "IUse%", "Mounted on"); else printf(UUF" "CSF" "CSF" "CSF" "RSF" %-s\n", - "UUID", "1K-blocks", "Used", "Available", - "Use%", "Mounted on"); + "UUID", cooked ? "bytes" : "1K-blocks", + "Used", "Available", "Use%", "Mounted on"); - avail_sum = total_sum = 0; for (index = 0; ; index++) { memset(&stat_buf, 0, sizeof(struct obd_statfs)); memset(&uuid_buf, 0, sizeof(struct obd_uuid)); @@ -509,7 +495,7 @@ static int mntdf(char *mntdir, int ishow, int cooked) if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO || rc == -ENODATA || rc == 0) { - showdf(mntdir, &stat_buf, &uuid_buf, ishow, cooked, + showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked, "MDT", index, rc); } else { fprintf(stderr, @@ -517,13 +503,13 @@ static int mntdf(char *mntdir, int ishow, int cooked) uuid_buf.uuid, strerror(-rc), rc); return rc; } - if (!rc && ishow) { - avail_sum += stat_buf.os_ffree; - total_sum += stat_buf.os_files; + if (rc == 0) { + sum.os_ffree += stat_buf.os_ffree; + sum.os_files += stat_buf.os_files; } } - for (index = 0;;index++) { + for (index = 0; ; index++) { memset(&stat_buf, 0, sizeof(struct obd_statfs)); memset(&uuid_buf, 0, sizeof(struct obd_uuid)); rc = llapi_obd_statfs(mntdir, LL_STATFS_LOV, index, @@ -533,7 +519,7 @@ static int mntdf(char *mntdir, int ishow, int cooked) if (rc == -ENOTCONN || rc == -ETIMEDOUT || rc == -EIO || rc == -ENODATA || rc == 0) { - showdf(mntdir, &stat_buf, &uuid_buf, ishow, cooked, + showdf(mntdir, &stat_buf, uuid_buf.uuid, ishow, cooked, "OST", index, rc); } else { fprintf(stderr, @@ -541,55 +527,15 @@ static int mntdf(char *mntdir, int ishow, int cooked) strerror(-rc), rc); return rc; } - if (!rc && !ishow) { - __u64 avail, total; - avail = stat_buf.os_bavail * stat_buf.os_bsize; - avail /= 1024; - total = stat_buf.os_blocks * stat_buf.os_bsize; - total /= 1024; - - avail_sum += avail; - total_sum += total; + if (rc == 0) { + sum.os_blocks += stat_buf.os_blocks * stat_buf.os_bsize; + sum.os_bfree += stat_buf.os_bfree * stat_buf.os_bsize; + sum.os_bavail += stat_buf.os_bavail * stat_buf.os_bsize; } } - used_sum = total_sum - avail_sum; - if (total_sum > 0) - ratio_sum = (double)(total_sum - avail_sum) / (double)total_sum; - sprintf(rbuf, RDF, (int)(ratio_sum * 100)); - if (cooked) { - int i; - char *suffix = "KMGTPEZY"; - double total_sum_d, used_sum_d, avail_sum_d; - - total_sum_d = (double)total_sum; - i = COOK(total_sum_d); - if (i > 0) - sprintf(tbuf, HDF"%c", total_sum_d, suffix[i - 1]); - else - sprintf(tbuf, CDF, total_sum); - - used_sum_d = (double)used_sum; - i = COOK(used_sum_d); - if (i > 0) - sprintf(ubuf, HDF"%c", used_sum_d, suffix[i - 1]); - else - sprintf(ubuf, CDF, used_sum); - - avail_sum_d = (double)avail_sum; - i = COOK(avail_sum_d); - if (i > 0) - sprintf(abuf, HDF"%c", avail_sum_d, suffix[i - 1]); - else - sprintf(abuf, CDF, avail_sum); - } else { - sprintf(tbuf, CDF, total_sum); - sprintf(ubuf, CDF, used_sum); - sprintf(abuf, CDF, avail_sum); - } - - printf("\n"UUF" "CSF" "CSF" "CSF" "RSF" %-s\n", - "filesystem summary:", tbuf, ubuf, abuf, rbuf, mntdir); + printf("\n"); + showdf(mntdir, &sum, "filesystem summary:", ishow, cooked, NULL, 0,0); return 0; } @@ -639,7 +585,7 @@ static int lfs_df(int argc, char **argv) } else { mnt = getmntent(fp); while (feof(fp) == 0 && ferror(fp) == 0) { - if (llapi_is_lustre_mnttype(mnt->mnt_type)) { + if (llapi_is_lustre_mnttype(mnt)) { rc = mntdf(mnt->mnt_dir, ishow, cooked); if (rc) break; @@ -690,7 +636,7 @@ static int lfs_check(int argc, char **argv) } else { mnt = getmntent(fp); while (feof(fp) == 0 && ferror(fp) ==0) { - if (llapi_is_lustre_mnttype(mnt->mnt_type)) + if (llapi_is_lustre_mnttype(mnt)) break; mnt = getmntent(fp); } @@ -731,7 +677,7 @@ static int lfs_catinfo(int argc, char **argv) } else { mnt = getmntent(fp); while (feof(fp) == 0 && ferror(fp) == 0) { - if (llapi_is_lustre_mnttype(mnt->mnt_type)) + if (llapi_is_lustre_mnttype(mnt)) break; mnt = getmntent(fp); } diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 5c4c68a..c320aed 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -40,8 +40,8 @@ #include #include #include -#ifdef HAVE_LINUX_TYPES_H -#include +#ifdef HAVE_ASM_TYPES_H +#include #endif #ifdef HAVE_LINUX_UNISTD_H #include @@ -52,10 +52,10 @@ #include #include -#include -#include +#include +#include #include -#include +#include #include static void err_msg(char *fmt, ...) @@ -889,9 +889,12 @@ int llapi_catinfo(char *dir, char *keyword, char *node_name) return rc; } -int llapi_is_lustre_mnttype(char *type) +/* Is this a lustre client fs? */ +int llapi_is_lustre_mnttype(struct mntent *mnt) { - return (strcmp(type,"lustre") == 0 || strcmp(type,"lustre_lite") == 0); + char *type = mnt->mnt_type; + return ((strcmp(type, "lustre") == 0 || strcmp(type,"lustre_lite") == 0) + && (strstr(mnt->mnt_fsname, ":/") != NULL)); } int llapi_quotacheck(char *mnt, int check_type) diff --git a/lustre/utils/llmount.c b/lustre/utils/llmount.c index fc75f21..1c10faa 100644 --- a/lustre/utils/llmount.c +++ b/lustre/utils/llmount.c @@ -41,7 +41,9 @@ int verbose; int nomtab; int fake; int force; +int retry; static char *progname = NULL; +#define MAX_RETRIES 99 void usage(FILE *out) { @@ -59,6 +61,7 @@ void usage(FILE *out) "\t-v|--verbose: print verbose config settings\n" "\t-o: filesystem mount options:\n" "\t\tflock/noflock: enable/disable flock support\n" + "\t\troute=[-]:[-]: portal route to MDS\n" "\t\tuser_xattr/nouser_xattr: enable/disable user extended " "attributes\n" ); @@ -115,6 +118,9 @@ update_mtab_entry(char *spec, char *mtpt, char *type, char *opts, fprintf(stderr, "%s: addmntent: %s:", progname, strerror (errno)); rc = 16; + } else if (verbose > 1) { + fprintf(stderr, "%s: added %s on %s to %s\n", + progname, spec, mtpt, MOUNTED); } endmntent(fp); } @@ -141,6 +147,7 @@ print_options(FILE *out, struct lustre_mount_data *lmd, const char *options) fprintf(out, "mds name: %s\n", lmd->lmd_mds); fprintf(out, "profile: %s\n", lmd->lmd_profile); fprintf(out, "options: %s\n", options); + fprintf(out, "retry: %d\n", retry); return 0; } @@ -243,8 +250,11 @@ int parse_options(char *options, struct lustre_mount_data *lmd, int *flagp) if ((opteq = strchr(opt, '='))) { val = atoi(opteq + 1); *opteq = '\0'; - if (0) { - /* All the network options have gone :)) */ + if (!strcmp(opt, "retry")) { + if (val >= 0 || val < MAX_RETRIES) + retry = val; + else + retry = 0; } else { fprintf(stderr, "%s: unknown option '%s'. " "Ignoring.\n", progname, opt); @@ -353,12 +363,14 @@ int main(int argc, char *const argv[]) switch (opt) { case 1: ++force; - printf("force: %d\n", force); + if (verbose) + printf("force: %d\n", force); nargs++; break; case 'f': ++fake; - printf("fake: %d\n", fake); + if (verbose) + printf("fake: %d\n", fake); nargs++; break; case 'h': @@ -366,7 +378,8 @@ int main(int argc, char *const argv[]) break; case 'n': ++nomtab; - printf("nomtab: %d\n", nomtab); + if (verbose) + printf("nomtab: %d\n", nomtab); nargs++; break; case 'o': @@ -428,15 +441,29 @@ int main(int argc, char *const argv[]) return 1; } - if (!fake) - rc = mount(source, target, "lustre", flags, (void *)&lmd); + if (!fake) { + FILE *modpipe = popen("/sbin/modprobe -q llite", "r"); + if (modpipe != NULL) + pclose(modpipe); + /* use <= to include the initial mount before we retry */ + for (i = 0, rc = -EAGAIN; i <= retry && rc != 0; i++) + rc = mount(source, target, "lustre", flags, &lmd); + } if (rc) { fprintf(stderr, "%s: mount(%s, %s) failed: %s\n", progname, source, target, strerror(errno)); print_options(stderr, &lmd, options); - if (errno == ENODEV) + if (errno == ENODEV) { + struct utsname unamebuf; + char *modfile = "/etc/modutils.conf"; + + if (uname(&unamebuf) == 0 && + strncmp(unamebuf.release, "2.4", 3) == 0) + modfile = "/etc/modules.conf"; + fprintf(stderr, "Are the lustre modules loaded?\n" - "Check /etc/modules.conf and /proc/filesystems\n"); + "Check %s and /proc/filesystems\n"); + } rc = 32; } else if (!nomtab) { rc = update_mtab_entry(source, target, "lustre", options,0,0,0); diff --git a/lustre/utils/llog_reader.c b/lustre/utils/llog_reader.c index 0a7ea24..9bcf577 100644 --- a/lustre/utils/llog_reader.c +++ b/lustre/utils/llog_reader.c @@ -28,7 +28,7 @@ #include #include -#include +#include int llog_pack_buffer(int fd, struct llog_log_hdr** llog_buf, struct llog_rec_hdr*** recs, int* recs_number); diff --git a/lustre/utils/lustre_cfg.c b/lustre/utils/lustre_cfg.c index 2a50cf2..342a4da 100644 --- a/lustre/utils/lustre_cfg.c +++ b/lustre/utils/lustre_cfg.c @@ -33,11 +33,12 @@ #ifndef __KERNEL__ #include #endif -#include -#include -#include -#include -#include /* for struct lov_stripe_md */ +#include +#include +#include +#include +#include /* for struct lov_stripe_md */ +#include #include #include @@ -388,6 +389,8 @@ int jt_lcfg_lov_setup(int argc, char **argv) jt_cmdname(argv[0]), argv[5]); return CMD_HELP; } + desc.ld_qos_threshold = QOS_DEFAULT_THRESHOLD; + desc.ld_qos_maxage = QOS_DEFAULT_MAXAGE; if (argc == 7) { desc.ld_tgt_count = strtoul(argv[6], &end, 0); diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index b00e2ed..6f35a32 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -37,19 +38,13 @@ #include #include -#define NO_SYS_VFS 1 +//#define HAVE_SYS_VFS_H 1 #include // for BLKGETSIZE64 -#include -#include +#include +#include #include -#include -#include "obdctl.h" +#include -/* So obd.o will link */ -#include "parser.h" -command_t cmdlist[] = { - { 0, 0, 0, NULL } -}; #define MAX_LOOP_DEVICES 16 #define L_BLOCK_SIZE 4096 @@ -180,6 +175,29 @@ int run_command(char *cmd) return rc; } +static int check_mtab_entry(char *spec, char *type) +{ + FILE *fp; + struct mntent *mnt; + + fp = setmntent(MOUNTED, "r"); + if (fp == NULL) + return(0); + + while ((mnt = getmntent(fp)) != NULL) { + if (strcmp(mnt->mnt_fsname, spec) == 0 && + strcmp(mnt->mnt_type, type) == 0) { + endmntent(fp); + fprintf(stderr, "%s: according to %s %s is " + "already mounted on %s\n", + progname, MOUNTED, spec, mnt->mnt_dir); + return(EEXIST); + } + } + endmntent(fp); + + return(0); +} /*============ disk dev functions ===================*/ @@ -341,17 +359,14 @@ static int file_in_dev(char *file_name, char *dev_name) return 0; } -/* Check whether the device has already been fomatted by mkfs.lustre */ +/* Check whether the device has already been used with lustre */ static int is_lustre_target(struct mkfs_opts *mop) { int rc; - /* Check whether there exist MOUNT_DATA_FILE, - LAST_RCVD or CATLIST in the device. */ vprint("checking for existing Lustre data\n"); if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device)) - || (rc = file_in_dev(LAST_RCVD, mop->mo_device)) - || (rc = file_in_dev(CATLIST, mop->mo_device))) { + || (rc = file_in_dev(LAST_RCVD, mop->mo_device))) { vprint("found Lustre data\n"); /* in the -1 case, 'extents' means this really IS a lustre target */ @@ -461,6 +476,10 @@ int make_lustre_backfs(struct mkfs_opts *mop) strcat(mop->mo_mkfsopts, " -O dir_index"); } + /* Allow reformat of full devices (as opposed to + partitions.) We already checked for mounted dev. */ + strcat(mop->mo_mkfsopts, " -F"); + sprintf(mkfs_cmd, "mkfs.ext2 -j -b %d -L %s ", L_BLOCK_SIZE, mop->mo_ldd.ldd_svname); @@ -479,7 +498,7 @@ int make_lustre_backfs(struct mkfs_opts *mop) return EINVAL; } - /* Loop device? */ + /* For loop device format the dev, not the filename */ dev = mop->mo_device; if (mop->mo_flags & MO_IS_LOOP) dev = mop->mo_loopdev; @@ -1079,6 +1098,10 @@ int main(int argc, char *const argv[]) /* device is last arg */ strcpy(mop.mo_device, argv[argc - 1]); + + if (check_mtab_entry(mop.mo_device, "lustre")) + return(EEXIST); + /* Are we using a loop device? */ ret = is_block(mop.mo_device); if (ret < 0) diff --git a/lustre/utils/mount_lustre.c b/lustre/utils/mount_lustre.c index 8631dc1..be8ebdf 100644 --- a/lustre/utils/mount_lustre.c +++ b/lustre/utils/mount_lustre.c @@ -34,8 +34,8 @@ #include #include #include -#include #include "obdctl.h" +#include int verbose = 0; int nomtab = 0; @@ -83,10 +83,11 @@ static int check_mtab_entry(char *spec, char *mtpt, char *type) if (strcmp(mnt->mnt_fsname, spec) == 0 && strcmp(mnt->mnt_dir, mtpt) == 0 && strcmp(mnt->mnt_type, type) == 0) { + endmntent(fp); fprintf(stderr, "%s: according to %s %s is " "already mounted on %s\n", progname, MOUNTED, spec, mtpt); - return(1); /* or should we return an error? */ + return(EEXIST); } } endmntent(fp); diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 1184ffd..85ee351 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -38,7 +38,7 @@ #include "obdctl.h" -#include /* for struct lov_stripe_md */ +#include /* for struct lov_stripe_md */ #include #include @@ -52,9 +52,10 @@ #include /* needed for PAGE_SIZE - rread */ #endif -#include +#include #include #include "parser.h" +#include "platform.h" #include #define MAX_STRING_SIZE 128 @@ -72,8 +73,8 @@ struct shared_data { __u64 offsets[MAX_THREADS]; int running; int barrier; - pthread_mutex_t mutex; - pthread_cond_t cond; + l_mutex_t mutex; + l_cond_t cond; }; static struct shared_data *shared_data; @@ -486,12 +487,12 @@ static void shmem_setup(void) static inline void shmem_lock(void) { - pthread_mutex_lock(&shared_data->mutex); + l_mutex_lock(&shared_data->mutex); } static inline void shmem_unlock(void) { - pthread_mutex_unlock(&shared_data->mutex); + l_mutex_unlock(&shared_data->mutex); } static inline void shmem_reset(int total_threads) @@ -500,8 +501,8 @@ static inline void shmem_reset(int total_threads) return; memset(shared_data, 0, sizeof(*shared_data)); - pthread_mutex_init(&shared_data->mutex, NULL); - pthread_cond_init(&shared_data->cond, NULL); + l_mutex_init(&shared_data->mutex); + l_cond_init(&shared_data->cond); memset(counter_snapshot, 0, sizeof(counter_snapshot)); prev_valid = 0; shared_data->barrier = total_threads; @@ -875,6 +876,7 @@ int jt_get_version(int argc, char **argv) memset(buf, 0, sizeof(buf)); data->ioc_version = OBD_IOCTL_VERSION; data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data)); + data->ioc_inlbuf1 = buf + size_round(sizeof(*data)); data->ioc_len = obd_ioctl_packlen(data); rc = l2_ioctl(OBD_DEV_ID, OBD_GET_VERSION, buf); @@ -892,6 +894,7 @@ int jt_get_version(int argc, char **argv) int jt_obd_list(int argc, char **argv) { int rc; +#if HAVE_PROC_FS char buf[MAX_STRING_SIZE]; FILE *fp = fopen(DEVICES_LIST, "r"); @@ -908,8 +911,40 @@ int jt_obd_list(int argc, char **argv) printf("%s", buf); fclose(fp); - return 0; +#else + /* No /proc filesystem, get device list by ioctl */ + int index; + char buf[8192]; + struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf; + + if (argc != 1) + return CMD_HELP; + + for (index = 0;; index++) { + memset(buf, 0, sizeof(buf)); + data->ioc_version = OBD_IOCTL_VERSION; + data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data)); + data->ioc_inlbuf1 = buf + size_round(sizeof(*data)); + data->ioc_len = obd_ioctl_packlen(data); + data->ioc_count = index; + + rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_GETDEVICE, buf); + if (rc != 0) + break; + printf("%s\n", (char *)data->ioc_bulk); + } + if (rc != 0) { + if (errno == ENOENT) + /* no device or the last device */ + rc = 0; + else + fprintf(stderr, "Error getting device list: %s: " + "check dmesg.\n", + strerror(errno)); + } + return rc; +#endif } /* Get echo client's stripe meta-data for the given object @@ -1585,9 +1620,9 @@ int jt_obd_test_brw(int argc, char **argv) shared_data->barrier--; if (shared_data->barrier == 0) - pthread_cond_broadcast(&shared_data->cond); + l_cond_broadcast(&shared_data->cond); else - pthread_cond_wait(&shared_data->cond, + l_cond_wait(&shared_data->cond, &shared_data->mutex); shmem_unlock (); diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index 430bcdc..afbfb52 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -10,10 +10,10 @@ #include #endif -#include -#include -#include -#include +#include +#include +#include +#include /* obd.c */ int do_disconnect(char *func, int verbose); diff --git a/lustre/utils/obdiolib.h b/lustre/utils/obdiolib.h index e6fa3b9..8813de4 100644 --- a/lustre/utils/obdiolib.h +++ b/lustre/utils/obdiolib.h @@ -11,9 +11,9 @@ #include #include -#include -#include -#include +#include +#include +#include struct obdio_conn { int oc_fd; diff --git a/lustre/utils/parser.c b/lustre/utils/parser.c index 9c23e77..2cb518d 100644 --- a/lustre/utils/parser.c +++ b/lustre/utils/parser.c @@ -28,19 +28,7 @@ #include #include -#ifdef HAVE_LIBREADLINE -#define READLINE_LIBRARY -#include - -/* completion_matches() is #if 0-ed out in modern glibc */ -#ifndef completion_matches -# define completion_matches rl_completion_matches -#endif -extern void using_history(void); -extern void stifle_history(int); -extern void add_history(char *); -#endif - +#include "platform.h" #include "parser.h" static command_t * top_level; /* Top level of commands, initialized by @@ -345,6 +333,7 @@ char * readline(char * prompt) char *line = malloc(size); char *ptr = line; int c; + int eof = 0; if (line == NULL) return NULL; @@ -370,6 +359,7 @@ char * readline(char * prompt) line = tmp; } } else { + eof = 1; if (ferror(stdin)) goto outfree; goto out; @@ -377,6 +367,10 @@ char * readline(char * prompt) } out: *ptr = 0; + if (eof && (strlen(line) == 0)) { + free(line); + line = NULL; + } return line; outfree: free(line); diff --git a/lustre/utils/platform.h b/lustre/utils/platform.h new file mode 100644 index 0000000..4f5b5c9 --- /dev/null +++ b/lustre/utils/platform.h @@ -0,0 +1,248 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef __LUSTRE_UTILS_PLATFORM_H +#define __LUSTRE_UTILS_PLATFORM_H + +#ifdef __linux__ + +#ifdef HAVE_LIBREADLINE +#define READLINE_LIBRARY +#include + +/* completion_matches() is #if 0-ed out in modern glibc */ + +#ifndef completion_matches +# define completion_matches rl_completion_matches +#endif +extern void using_history(void); +extern void stifle_history(int); +extern void add_history(char *); +#endif /* HAVE_LIBREADLINE */ + +#include +#include +#if HAVE_LIBPTHREAD +#include +#include +#include + +typedef pthread_mutex_t l_mutex_t; +typedef pthread_cond_t l_cond_t; +#define l_mutex_init(s) pthread_mutex_init(s, NULL) +#define l_mutex_lock(s) pthread_mutex_lock(s) +#define l_mutex_unlock(s) pthread_mutex_unlock(s) +#define l_cond_init(c) pthread_cond_init(c, NULL) +#define l_cond_broadcast(c) pthread_cond_broadcast(c) +#define l_cond_wait(c, s) pthread_cond_wait(c, s) +#endif + +#elif __APPLE__ + +#ifdef HAVE_LIBREADLINE +#define READLINE_LIBRARY +#include +typedef VFunction rl_vintfunc_t; +typedef VFunction rl_voidfunc_t; +#endif /* HAVE_LIBREADLINE */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * POSIX compliant inter-process synchronization aren't supported well + * in Darwin, pthread_mutex_t and pthread_cond_t can only work as + * inter-thread synchronization, they wouldn't work even being put in + * shared memory for multi-process. PTHREAD_PROCESS_SHARED is not + * supported by Darwin also (pthread_mutexattr_setpshared() with the + * PTHREAD_PROCESS_SHARED attribute will return EINVAL). + * + * The only inter-process sychronization mechanism can be used in Darwin + * is POSIX NAMED semaphores and file lock, here we use NAMED semaphore + * to implement mutex and condition. + * + * XXX Liang: + * They are just proto-type now, more tests are needed. + */ +#define L_LOCK_DEBUG (0) + +#define L_SEM_NAMESIZE 32 + +typedef struct { + sem_t *s_sem; +#if L_LOCK_DEBUG + char s_name[L_SEM_NAMESIZE]; +#endif +} l_sem_t; + +typedef l_sem_t l_mutex_t; + +typedef struct { + l_mutex_t c_guard; + int c_count; + l_sem_t c_waiter; +} l_cond_t; + +static inline int l_sem_init(l_sem_t *sem, int val) +{ + char *s_name; +#if L_LOCK_DEBUG + s_name = sem->s_name; +#else + char buf[L_SEM_NAMESIZE]; + s_name = buf; +#endif + /* get an unique name for named semaphore */ + snprintf(s_name, L_SEM_NAMESIZE, "%d-%p", (int)getpid(), sem); + sem->s_sem = sem_open(s_name, O_CREAT, 0600, val); + if ((int)sem->s_sem == SEM_FAILED) { + fprintf(stderr, "lock %s creating fail: %d, %d!\n", + s_name, (int)sem->s_sem, errno); + return -1; + } else { +#if L_LOCK_DEBUG + printf("open lock: %s\n", s_name); +#endif + } + return 0; +} + +static inline void l_sem_done(l_sem_t *sem) +{ +#if L_LOCK_DEBUG + printf("close lock: %s.\n", sem->s_name); +#endif + sem_close(sem->s_sem); +} + +static inline void l_sem_down(l_sem_t *sem) +{ +#if L_LOCK_DEBUG + printf("sem down :%s\n", sem->s_name); +#endif + sem_wait(sem->s_sem); +} + +static inline void l_sem_up(l_sem_t *sem) +{ +#if L_LOCK_DEBUG + printf("sem up :%s\n", sem->s_name); +#endif + sem_post(sem->s_sem); +} + +static inline void l_mutex_init(l_mutex_t *mutex) +{ + l_sem_init((l_sem_t *)mutex, 1); +} + +static inline void l_mutex_init_locked(l_mutex_t *mutex) +{ + l_sem_init((l_sem_t *)mutex, 0); +} + +static inline void l_mutex_done(l_mutex_t *mutex) +{ + l_sem_done((l_sem_t *)mutex); +} + +static inline void l_mutex_lock(l_mutex_t *mutex) +{ +#if L_LOCK_DEBUG + printf("lock mutex :%s\n", mutex->s_name); +#endif + sem_wait(mutex->s_sem); +} + +static inline void l_mutex_unlock(l_mutex_t *mutex) +{ +#if L_LOCK_DEBUG + printf("unlock mutex: %s\n", mutex->s_name); +#endif + sem_post(mutex->s_sem); +} + +static inline void l_cond_init(l_cond_t *cond) +{ + l_mutex_init(&cond->c_guard); + l_sem_init(&cond->c_waiter, 0); + cond->c_count = 0; +} + +static inline void l_cond_done(l_cond_t *cond) +{ + if (cond->c_count != 0) + fprintf(stderr, "your waiter list is not empty: %d!\n", cond->c_count); + l_mutex_done(&cond->c_guard); + l_sem_done(&cond->c_waiter); +} + +static inline void l_cond_wait(l_cond_t *cond, l_mutex_t *lock) +{ + l_mutex_lock(&cond->c_guard); + cond->c_count --; + l_mutex_unlock(&cond->c_guard); + l_mutex_unlock(lock); + l_sem_down(&cond->c_waiter); + l_mutex_lock(lock); +} + +static inline void l_cond_broadcast(l_cond_t *cond) +{ + l_mutex_lock(&cond->c_guard); + while (cond->c_count < 0) { + l_sem_up(&cond->c_waiter); + cond->c_count ++; + } + l_mutex_unlock(&cond->c_guard); +} + +#else /* other platform */ + +#ifdef HAVE_LIBREADLINE +#define READLINE_LIBRARY +#include +#endif /* HAVE_LIBREADLINE */ +#include +#include +#if HAVE_LIBPTHREAD +#include +#include +#include + +typedef pthread_mutex_t l_mutex_t; +typedef pthread_cond_t l_cond_t; +#define l_mutex_init(s) pthread_mutex_init(s, NULL) +#define l_mutex_lock(s) pthread_mutex_lock(s) +#define l_mutex_unlock(s) pthread_mutex_unlock(s) +#define l_cond_init(c) pthread_cond_init(c, NULL) +#define l_cond_broadcast(c) pthread_cond_broadcast(c) +#define l_cond_wait(c, s) pthread_cond_wait(c, s) +#endif /* HAVE_LIBPTHREAD */ + +#endif /* __linux__ */ + +#endif diff --git a/lustre/utils/rmmod_all.sh b/lustre/utils/rmmod_all.sh index 9ae82bb..0e1726f 100755 --- a/lustre/utils/rmmod_all.sh +++ b/lustre/utils/rmmod_all.sh @@ -3,6 +3,6 @@ SRCDIR=`dirname $0` PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH -lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 +lctl modules | awk '{ print $2 }' | xargs rmmod >/dev/null 2>&1 # do it again, in case we tried to unload ksocklnd too early -lctl modules | awk '{ print $2 }' | xargs rmmod +lsmod | grep lnet > /dev/null && lctl modules | awk '{ print $2 }' | xargs rmmod diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 3b781cc..5a1f55a 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -5,8 +5,8 @@ #include #include #include -#include -#include +#include +#include #define BLANK_LINE() \ do { \ @@ -494,11 +494,10 @@ check_lov_desc(void) CHECK_MEMBER(lov_desc, ld_pattern); CHECK_MEMBER(lov_desc, ld_default_stripe_size); CHECK_MEMBER(lov_desc, ld_default_stripe_offset); - CHECK_MEMBER(lov_desc, ld_default_stripe_offset); + CHECK_MEMBER(lov_desc, ld_qos_threshold); + CHECK_MEMBER(lov_desc, ld_qos_maxage); CHECK_MEMBER(lov_desc, ld_padding_1); CHECK_MEMBER(lov_desc, ld_padding_2); - CHECK_MEMBER(lov_desc, ld_padding_3); - CHECK_MEMBER(lov_desc, ld_padding_4); CHECK_MEMBER(lov_desc, ld_uuid); } diff --git a/lustre/utils/wirehdr.c b/lustre/utils/wirehdr.c index 0b65ac1..95de9db 100644 --- a/lustre/utils/wirehdr.c +++ b/lustre/utils/wirehdr.c @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include #undef LASSERT #undef LASSERTF diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 5a594d0..fd6c2f2 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1,7 +1,7 @@ #include #include -#include -#include +#include +#include #undef LASSERT #undef LASSERTF @@ -30,7 +30,6 @@ void lustre_assert_wire_constants(void) * running on Linux tau 2.6.15-dirty #13 SMP Sat Feb 11 18:30:54 MSK 2006 i686 i686 i386 GNU/ * with gcc version 3.3.3 (SuSE Linux) */ - /* Constants... */ LASSERTF(PTLRPC_MSG_MAGIC == 0x0BD00BD0," found %lld\n", (long long)PTLRPC_MSG_MAGIC); @@ -1171,26 +1170,22 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_default_stripe_offset) == 24, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_default_stripe_offset)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset) == 8, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_default_stripe_offset)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 32, " found %lld\n", + LASSERTF((int)offsetof(struct lov_desc, ld_qos_threshold) == 32, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_qos_threshold)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_threshold) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_threshold)); + LASSERTF((int)offsetof(struct lov_desc, ld_qos_maxage) == 36, " found %lld\n", + (long long)(int)offsetof(struct lov_desc, ld_qos_maxage)); + LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_qos_maxage) == 4, " found %lld\n", + (long long)(int)sizeof(((struct lov_desc *)0)->ld_qos_maxage)); + LASSERTF((int)offsetof(struct lov_desc, ld_padding_1) == 40, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_padding_1)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_1) == 4, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_1)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 36, " found %lld\n", + LASSERTF((int)offsetof(struct lov_desc, ld_padding_2) == 44, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_padding_2)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_2) == 4, " found %lld\n", (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_2)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_3) == 40, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_3)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_3) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_3)); - LASSERTF((int)offsetof(struct lov_desc, ld_padding_4) == 44, " found %lld\n", - (long long)(int)offsetof(struct lov_desc, ld_padding_4)); - LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_padding_4) == 4, " found %lld\n", - (long long)(int)sizeof(((struct lov_desc *)0)->ld_padding_4)); LASSERTF((int)offsetof(struct lov_desc, ld_uuid) == 48, " found %lld\n", (long long)(int)offsetof(struct lov_desc, ld_uuid)); LASSERTF((int)sizeof(((struct lov_desc *)0)->ld_uuid) == 40, " found %lld\n",