Whamcloud - gitweb
Merge b1_5 from b1_4 (20060616_1515)
authorscjody <scjody>
Wed, 21 Jun 2006 20:16:58 +0000 (20:16 +0000)
committerscjody <scjody>
Wed, 21 Jun 2006 20:16:58 +0000 (20:16 +0000)
74 files changed:
ldiskfs/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch
ldiskfs/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/ChangeLog
lustre/doc/lconf.8
lustre/doc/lfs.1
lustre/doc/lfs.lyx
lustre/include/liblustre.h
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_fsfilt.h
lustre/include/linux/lustre_user.h
lustre/include/lustre/liblustreapi.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/lustre_export.h
lustre/include/obd.h
lustre/include/obd_class.h
lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.20.patch
lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-chaos.patch
lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.21-suse2.patch
lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.22-rh.patch
lustre/kernel_patches/patches/ext3-ea-in-inode-2.4.29.patch
lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-rhel4.patch
lustre/kernel_patches/patches/ext3-ea-in-inode-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lockd.c
lustre/liblustre/super.c
lustre/liblustre/tests/sanity.c
lustre/llite/dcache.c
lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/lproc_llite.c
lustre/llite/rw.c
lustre/lov/lov_ea.c
lustre/lov/lov_obd.c
lustre/lov/lov_request.c
lustre/lvfs/fsfilt_ext3.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_internal.h
lustre/mds/mds_log.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/mds/mds_unlink_open.c
lustre/obdclass/class_obd.c
lustre/obdclass/genops.c
lustre/obdclass/linux/linux-module.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_config.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_internal.h
lustre/obdfilter/filter_io.c
lustre/obdfilter/filter_io_26.c
lustre/obdfilter/lproc_obdfilter.c
lustre/osc/osc_request.c
lustre/ost/ost_handler.c
lustre/ptlrpc/import.c
lustre/ptlrpc/pack_generic.c
lustre/tests/insanity.sh
lustre/tests/ll_dirstripe_verify.c
lustre/tests/llog-test.sh
lustre/tests/sanity.sh
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/obd.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 3f5687b..89cc1b5 100644 (file)
@@ -2,15 +2,13 @@ Index: linux-stage/fs/ext3/ialloc.c
 ===================================================================
 --- linux-stage.orig/fs/ext3/ialloc.c  2005-10-04 16:53:24.000000000 -0600
 +++ linux-stage/fs/ext3/ialloc.c       2005-10-04 17:07:25.000000000 -0600
-@@ -629,6 +629,11 @@
+@@ -629,6 +629,9 @@
        spin_unlock(&sbi->s_next_gen_lock);
  
        ei->i_state = EXT3_STATE_NEW;
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              ei->i_extra_isize = sizeof(__u16)       /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              ei->i_extra_isize = 0;
++      ei->i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
  
        ret = inode;
        if(DQUOT_ALLOC_INODE(inode)) {
index 19f153d..72c25a4 100644 (file)
@@ -3,15 +3,13 @@ Index: linux-2.6.0/fs/ext3/ialloc.c
 ===================================================================
 --- linux-2.6.0.orig/fs/ext3/ialloc.c  2004-01-14 18:54:11.000000000 +0300
 +++ linux-2.6.0/fs/ext3/ialloc.c       2004-01-14 18:54:12.000000000 +0300
-@@ -627,6 +627,11 @@
+@@ -627,6 +627,9 @@
        inode->i_generation = EXT3_SB(sb)->s_next_generation++;
  
        ei->i_state = EXT3_STATE_NEW;
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              ei->i_extra_isize = sizeof(__u16)       /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              ei->i_extra_isize = 0;
++      ei->i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
  
        ret = inode;
        if(DQUOT_ALLOC_INODE(inode)) {
index e0ee12f..f3b25a3 100644 (file)
@@ -350,7 +350,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c      2006-04-22 17:31:47.543334750 +0400
 +++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c   2006-04-26 23:42:45.000000000 +0400
-@@ -0,0 +1,2703 @@
+@@ -0,0 +1,2702 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2101,7 +2101,6 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 +static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
 +{
 +      struct super_block *sb = seq->private;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      int group = (int) v, i;
 +      struct sg {
 +              struct ext3_group_info info;
index eade9a8..698e9e3 100644 (file)
@@ -345,7 +345,7 @@ Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
 +++ linux-2.6.12.6-bull/fs/ext3/mballoc.c      2006-04-30 01:24:11.000000000 +0400
-@@ -0,0 +1,2702 @@
+@@ -0,0 +1,2701 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2096,7 +2096,6 @@ Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
 +static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
 +{
 +      struct super_block *sb = seq->private;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      int group = (int) v, i;
 +      struct sg {
 +              struct ext3_group_info info;
index 43fc776..c57ae02 100644 (file)
@@ -364,7 +364,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
 --- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600
 +++ linux-stage/fs/ext3/mballoc.c      2006-05-25 10:59:14.000000000 -0600
-@@ -0,0 +1,2702 @@
+@@ -0,0 +1,2701 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2115,7 +2115,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
 +{
 +      struct super_block *sb = seq->private;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      int group = (int) v, i;
 +      struct sg {
 +              struct ext3_group_info info;
index e7b2f25..6a49dcb 100644 (file)
@@ -244,7 +244,7 @@ Description: MDS crash when receiving packet with unknown intent.
 Details    : Do not LBUG in unknown intent case, just return -EFAULT
 
 Severity   : enhancement
-Bugzilla   : 9293
+Bugzilla   : 9293, 9385
 Description: MDS RPCs are serialised on client. This is unnecessary for some.
 Details    : Do not serialize getattr (non-intent version) and statfs.
 
@@ -354,6 +354,30 @@ Description: parallel glimpse, setattr, statfs, punch, destroy requests
 Details    : Sends glimpse, setattr, statfs, punch, destroy requests to OSTs in
             parallel, not waiting for response from every OST before sending
             a rpc to the next OST.
+
+Severity   : minor
+Frequency  : rare
+Bugzilla   : 10150
+Description: setattr vs write race when updating file timestamps
+Details    : Client processes that update a file timestamp into the past
+            right after writing to the file (e.g. tar) it is possible that
+            the updated file modification time can be reset to the current
+            time due to a race between processing the setattr and write RPC.
+
+Severity   : enhancement
+Bugzilla   : 10318
+Description: Bring 'lfs find' closer in line with regular Linux find.
+Details    : lfs find util supports -atime, -mtime, -ctime, -maxdepth, -print,
+            -print0 options and obtains all the needed info through the lustre
+            ioctls.
+
+Severity   : enhancement
+Frequency  : nodes with more than 520 OBD devices
+Bugzilla   : 6221
+Description: support up to 1024 configured devices on one node
+Details    : change obd_dev array from statically allocated to dynamically
+            allocated structs as they are first used to reduce memory usage
+
 ------------------------------------------------------------------------------
 
 02-14-2006  Cluster File Systems, Inc. <info@clusterfs.com>
index 6143c6b..a6ca88a 100644 (file)
@@ -4,48 +4,59 @@ lconf \- Lustre filesystem configuration utility
 .SH SYNOPSIS
 .br
 .B lconf
-[--node <node_name>] [-d,--cleanup] [--noexec] [--gdb] [--nosetup] [--nomod] [-n,--noexec] [-v,--verbose] [-h,--help] <XML-config file>
-[options] --add <objecttype> [args]
+[OPTIONS] <XML-config file>
 .br
 .SH DESCRIPTION
 .B lconf
-, when invoked configures a node following directives in the <XML-config-file>. There will be single configuration file for all the nodes in a single cluster. This file should be distributed to all the nodes in the cluster or kept in a location accessible to all the nodes. One option is to store the cluster configuration information in LDAP format on an LDAP server that can be reached from all the cluster nodes.
+, when invoked configures a node following directives in the
+<XML-config-file>.Can be used to control recovery and startup/shutdown
+. There will be single configuration file for all the nodes in a
+single cluster. This file should be distributed to all the nodes in
+the cluster or kept in a location accessible to all the nodes. The XML file must be specified. When invoked with no options, lconf will attempt to configure the resources owned by the node it is invoked on
 .PP
 The arguments that can be used for lconf are:
 .PP
 .TP
+--abort_recovery - Used to start Lustre when you are certian that
+recovery will not succeed, as when an OST or MDS is disabled. 
+.TP
+--acl Enable Access Control List support on the MDS 
+.TP
+--allow_unprivileged_port Allows connections from unprivileged ports
+.TP
+--clientoptions <options> 
+Additional options for mounting Lustre clients. Obsolete with
+zeroconfig mounting..
+.TP
 --client_uuid <uuid> 
 The failed client (required for recovery).
 .TP
---clientoptions <options> 
-Additional options for Lustre.
+--clumanager Generate a Red Hat Clumanager configuration file for this
+node.
 .TP
 --config <arg> 
-Cluster configuration name used for LDAP query
+Cluster configuration name used for LDAP query (depreciated)
 .TP
 --conn_uuid <uuid> 
 The failed connection (required for recovery).
 .TP
---d|--cleanup 
+-d|--cleanup 
 Unconfigure a node. The same config and --node argument used for configuration needs to be used for cleanup as well. This will attempt to undo all of the configuration steps done by lconf, including unloading the kernel modules.
 .TP
 --debug_path <path> 
-Path to save debug dumps.
+Path to save debug dumps.(default is /tmp/lustre-log)
 .TP
 --dump <file> 
 Dump the kernel debug log to the specified file before portals is unloaded during cleanup.
 .TP
---dump_path <arg> 
-Path to save debug dumps. Default is /tmp/lustre_log
-.TP
 --failover 
-Used to shutdown without saving state. Default is 0. This will allow the node to give up service to another node for failover purposes. This will not be a clean shutdown.
+Used to shutdown without saving state. This will allow the node to give up service to another node for failover purposes. This will not be a clean shutdown.
 .TP
---force 
-Forced unmounting and/or obd detach during cleanup. Default is 0. 
+-f|--force 
+Forced unmounting and/or obd detach during cleanup. 
 .TP
 --gdb 
-Causes lconf to print a message and pause for 5 seconds after creating a gdb module script and before doing any Lustre configuration (the gdb module script is always created, however).
+Causes lconf to create a gdb module script and pause 5 seconds before doing any Lustre configuration (the gdb module script is always created, however).
 .TP
 --gdb_script <arg> 
 Full name of gdb debug script. Default is /tmp/ogdb.
@@ -66,19 +77,29 @@ The UUID of the service to be ignored by a client mounting Lustre. Allows the cl
 Dump all ioctls to the specified file
 .TP
 --ldapurl <arg> 
-LDAP server URL 
+LDAP server URL. Depreciated
+.TP
+--lustre=src_dir 
+Specify the base directory for Lustre sources, this parameter will cause lconf to load the lustre modules from this source tree.
 .TP
 --lustre_upcall <path> 
 Set the location of the Lustre upcall scripts used by the client for recovery
 .TP
---lustre=src_dir 
-Specify the base directory for Lustre sources, this parameter will cause lconf to load the lustre modules from this soure tree.
+--make_service_scripts Create per-service symlinks for use with clumanager HA software
 .TP
 --mds_ost_conn 
 Open connections to OSTs on MDS.
 .TP
 --maxlevel <level> 
-Perform configuration of devices and services up to level given. level can take the values net, dev, svc, fs. When used in conjunction with cleanup, services are torn down up to a certain level. Default is 100.
+Perform configuration of devices and services up to level given. When
+used in conjunction with cleanup, services are torn down up to a
+certain level.
+Levels are aproximatly like:
+10 - network
+20 - device, ldlm
+30 - osd, mdd
+40 - mds, ost
+70 - mountpoint, echo_client, osc, mdc, lov
 .TP
 --minlevel <level> 
 Specify the minimum level of services to configure/cleanup. Default is 0.
@@ -101,24 +122,36 @@ Only setup devices and services, do not load modules.
 --nosetup 
 Only load modules, do not configure devices or services.
 .TP
+--old_conf Start up service even though config logs appear outdated.
+.TP
 --portals <path> 
-Specify portals source directory. If this is a relative path, then it is assumed to be relative to lustre.
+Specify portals source directory. If this is a relative path, then it
+is assumed to be relative to lustre. (Depreciated)
 .TP
 --portals_upcall <path> 
-Specify the location of the Portals upcall scripts used by the client for recovery
+Specify the location of the Portals upcall scripts used by the client
+for recovery (Depreciated)
 .TP
 --ptldebug debug-level 
 This options can be used to set the required debug level.
 .TP
+--quota 
+Enable quota support for client filesystem
+.TP
+--rawprimary <arg>  For clumanager, device of the primary quorum
+(default=/dev/raw/raw1)
+.TP
+--rawsecondary <arg>  For clumanager, device of the secondary quorum (default=/dev/raw/raw2)
+.TP
 --record 
 Write config information on mds.
 .TP
---record_log <arg> 
-Specify the name of config record log.
-.TP
 --record_device <arg> 
 Specify MDS device name that will record the config commands.
 .TP
+--record_log <arg> 
+Specify the name of config record log.
+.TP
 --recover <arg> 
 Recover a device.
 .TP
@@ -131,6 +164,11 @@ Select a particular node for a service
 --service <arg>
 Shorthand for --group <arg> --select <arg>=<hostname>
 .TP
+--service_scripts <arg>  For clumanager, directory containing per-service scripts (default=/etc/lustre/services)
+.TP
+--single_socket The socknal option. Uses only one socket instead of a
+bundle.
+.TP
 --subsystem <arg> 
 Set the portals debug subsystem.
 .TP
@@ -141,7 +179,10 @@ Specify the failed target (required for recovery).
 Set the recovery timeout period.
 .TP
 --upcall <path> 
-Set the location of both Lustre and Portals upcall scripts used by the client for recovery
+Set the location of both Lustre and Portals upcall scripts used by the
+client for recovery
+.TP
+--user_xattr Enable user_xattr support on MDS
 .TP
 --verbose,-v 
 Be verbose and show actions while going along.
index 2601a64..8cae6e6 100644 (file)
@@ -5,9 +5,13 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the
 .br
 .B lfs
 .br
-.B lfs find [--obd <uuid>] [--quiet | --verbose] [--recursive] <dir|file>
+.B lfs find [--atime|-A N] [--mtime|-M N] [--ctime|-C N] [--maxdepth|-D N]
+         \fB[--print0|-P] [--print|-p] [--obd|-O <uuid>] <dir/file>\fR
 .br
-.B lfs getstripe <filename> 
+.B lfs find [--quiet|-q] [--verbose|-v] [--recursive|-r] <dir|file>
+.br
+.B lfs getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v] 
+              \fB[--recursive|-r] <dir/file>\fR
 .br
 .B lfs setstripe <filename> <stripe-size> <start-ost> <stripe-cnt>
 .br
@@ -19,7 +23,8 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the
 .br
 .B lfs quotaoff [-ug] <filesystem>
 .br
-.B lfs setquota [-u|-g] <name> <block-softlimit> <block-hardlimit> <inode-softlimit> <inode-hardlimit> <filesystem>
+.B lfs setquota [-u|-g] <name> <block-softlimit> <block-hardlimit> 
+             \fB<inode-softlimit> <inode-hardlimit> <filesystem>\fR
 .br
 .B lfs quota [-o obd_uuid] [-u|-g] <name> <filesystem>
 .br
@@ -40,10 +45,13 @@ The various options supported by lctl are listed and explained below:
 To create a new file with a specific striping pattern
 .TP
 .B find 
-To list the extended attributes for a given filename or files in a directory or recursively for all files in a directory tree. It can also be used to list the files that have objects on a specific OST. 
+To search the directory tree rooted at the given dir/file name for the files that match the given parameters: \fB--atime\fR (file was last accessed N*24 hours ago), \fB--ctime\fR (file's status was last changed N*24 hours ago), \fB--mtime\fR (file's data was last modified N*24 hours ago), \fB--obd\fR (file has an object on a specific OST). The option \fB--maxdepth\fR allows find to decend at most N levels of directory tree. The options \fB--print\fR and \fB--print0\fR print full file name, followed by a newline and null character correspondingly.
+.TP
+.B find
+To list the striping info for a given filename or files in a directory or recursively for all files in a directory tree use one of the following options: \fB[--quiet|-q] [--verbose|-v] [--recursive|-r]\fR. If one of these options is given find works in old (obsolete, please use getstripe instead) mode.
 .TP
 .B getstripe 
-To list the striping pattern for given filename
+To list the striping info for given filename or files in a directory or recursively for all files in a directory tree. It can also be used to list the files that have objects on a specific OST.
 .TP
 .B quotachown
 To change files' owner and group on OSTs of the specified filesystem
index d9e4889..3b4588f 100644 (file)
@@ -47,13 +47,21 @@ lfs
 
 \series bold 
 lfs\SpecialChar ~
-find [--obd <uuid>] [--quiet | --verbose] [--recursive] <dir|file>
+find [--atime|-A N] [--mtime|-M N] [--ctime|-C N] [--maxdepth|-D N] [--print0
+|-P] [--print|-p] [--obd|-O <uuid>] <dir/file>
 \layout Standard
 
 
 \series bold 
 lfs\SpecialChar ~
-getstripe <filename> 
+find [--quiet|-q] [--verbose|-v] [--recursive|-r] <dir/file>
+\layout Standard
+
+
+\series bold 
+lfs\SpecialChar ~
+getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v] [--recursive|-r]
+ <dir/file>
 \layout Standard
 
 
@@ -62,6 +70,7 @@ lfs\SpecialChar ~
 setstripe <filename|dirname> <stripe_size> <start_ost> <stripe_cnt>
 \layout Standard
 
+
 \series bold 
 lfs\SpecialChar ~
 setstripe -d <dirname>
@@ -110,12 +119,14 @@ lfs\SpecialChar ~
 quota [-o obd_uuid] [-u|-g] <name> <filesystem>
 \layout Standard
 
-\series bold
+
+\series bold 
 lfs\SpecialChar ~
 df [-i] [-h] [path]
 \layout Standard
 
-\series bold
+
+\series bold 
 lfs\SpecialChar ~
 help
 \layout Subsection
@@ -144,9 +155,27 @@ setstripe
 \series bold 
 find
 \series default 
- To list the extended attributes for a given filename or files in a directory
- or recursively for all files in a directory tree.
- It can also be used to list the files that have objects on a specific OST.
+ To search the directory tree rooted at the given dir/file name for the
+ files that match the given parameters: --atime (file was last accessed
+ N*24 hours ago), --ctime (file's status was last changed N*24 hours ago),
+ --mtime (file's data was last modified N*24 hours ago), --obd (file has
+ an object on a specific OST).
+ The option --maxdepth allows find to decend at most N levels of directory
+ tree.
+ The options --print and --print0 print full file name, followed by a newline
+ and null character correspondingly.
+\layout List
+\labelwidthstring 00.00.0000
+
+
+\series bold 
+find
+\series default 
+ To list the striping info for a given filename or files in a directory
+ or recursively for all files in a directory tree use one of the following
+ options: [--quiet|-q] [--verbose|-v] [--recursive|-r].
+ If one of these options is given find works in old (obsolete, please use
+ getstripe instead) mode.
  
 \layout List
 \labelwidthstring 00.00.0000
@@ -155,7 +184,9 @@ find
 \series bold 
 getstripe
 \series default 
- To list the striping pattern for given filename
+ To list the striping info for given filename or files in a directory or
+ recursively for all files in a directory tree.
+ It can also be used to list the files that have objects on a specific OST.
 \layout List
 \labelwidthstring 00.00.0000
 
@@ -225,6 +256,7 @@ quota
 \layout List
 \labelwidthstring 00.00.0000
 
+
 \series bold 
 df
 \series default 
@@ -232,6 +264,7 @@ df
 \layout List
 \labelwidthstring 00.00.0000
 
+
 \series bold 
 help
 \series default 
@@ -262,8 +295,8 @@ This creates a default stripe pattern on and existing dir for all new files
    $ lfs setstripe /mnt/lustre/dir 131072 0 1
 \layout Description
 
-This deletes a default stripe pattern on dir.  New files will use the
- default striping pattern.
+This deletes a default stripe pattern on dir.
 New files will use the default striping pattern.
  created therein.
 \layout LyX-Code
 
index 893abc8..11f47a6 100644 (file)
@@ -382,8 +382,6 @@ static inline int kmem_cache_destroy(kmem_cache_t *a)
         free(a);
         return 0;
 }
-#define kmem_cache_alloc(cache, prio) malloc(cache->size)
-#define kmem_cache_free(cache, obj) free(obj)
 
 #define PAGE_CACHE_SIZE  PAGE_SIZE
 #define PAGE_CACHE_SHIFT PAGE_SHIFT
@@ -668,6 +666,7 @@ static inline int schedule_timeout(signed long t)
                 _ret = tv.tv_sec;               \
         _ret;                                   \
 })
+#define get_jiffies_64()  (__u64)jiffies
 #define time_after(a, b) ((long)(b) - (long)(a) < 0)
 #define time_before(a, b) time_after(b,a)
 #define time_after_eq(a,b)      ((long)(a) - (long)(b) >= 0)
index d3f55f3..3e93680 100644 (file)
@@ -202,6 +202,9 @@ static inline int cleanup_group_info(void)
 #define unlock_24kernel()       unlock_kernel()
 #define ll_kernel_locked()      (current->lock_depth >= 0)
 
+/* 2.4 kernels have HZ=100 on i386/x86_64, this should be reasonably safe */
+#define get_jiffies_64()        (__u64)jiffies
+
 #ifdef HAVE_MM_INLINE
 #include <linux/mm_inline.h>
 #endif
index ce99320..637da54 100644 (file)
@@ -332,15 +332,16 @@ static inline int fsfilt_add_journal_cb(struct obd_device *obd, __u64 last_rcvd,
 
 /* very similar to obd_statfs(), but caller already holds obd_osfs_lock */
 static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *sb,
-                                unsigned long max_age)
+                                cfs_time_t max_age)
 {
         int rc = 0;
 
-        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
-        if (time_before(obd->obd_osfs_age, max_age)) {
+        CDEBUG(D_SUPER, "osfs "CFS_TIME_T", max_age "CFS_TIME_T"\n", 
+                obd->obd_osfs_age, max_age);
+        if (time_before_64(obd->obd_osfs_age, max_age)) {
                 rc = obd->obd_fsops->fs_statfs(sb, &obd->obd_osfs);
                 if (rc == 0) /* N.B. statfs can't really fail */
-                        obd->obd_osfs_age = jiffies;
+                        obd->obd_osfs_age = get_jiffies_64();
         } else {
                 CDEBUG(D_SUPER, "using cached obd_statfs data\n");
         }
index 7bbcca7..668d122 100644 (file)
 #if defined(__x86_64__) || defined(__ia64__) || defined(__ppc64__) || \
     defined(__craynv)
 typedef struct stat     lstat_t;
+#define lstat_f         lstat
 #define HAVE_LOV_USER_MDS_DATA
 #elif defined(__USE_LARGEFILE64) || defined(__KERNEL__)
 typedef struct stat64   lstat_t;
+#define lstat_f         lstat64
 #define HAVE_LOV_USER_MDS_DATA
 #endif
 
index 2473a9a..44a25f4 100644 (file)
@@ -17,8 +17,36 @@ extern int llapi_file_create(const char *name, long stripe_size,
 extern int llapi_file_get_stripe(char *path, struct lov_user_md *lum);
 #define HAVE_LLAPI_FILE_LOOKUP
 extern int llapi_file_lookup(int dirfd, const char *name);
-extern int llapi_find(char *path, struct obd_uuid *obduuid, int recursive,
-                      int verbose, int quiet);
+struct find_param {
+        unsigned int maxdepth;
+        time_t atime;
+        time_t mtime;
+        time_t ctime;
+        int asign;
+        int csign;
+        int msign;
+        int zeroend;
+
+        int     recursive;
+        int     verbose;
+        int     quiet;
+
+        struct  obd_uuid        *obduuid;
+        int     obdindex;
+
+        int     lumlen;
+        struct  lov_user_mds_data *lmd;
+
+        /* In-precess parameters. */
+        unsigned int depth;
+        int     got_uuids;
+        dev_t   st_dev;
+};
+
+extern int llapi_getstripe(char *path, struct find_param *param);
+extern int llapi_find(char *path, struct find_param *param);
+
 extern int llapi_obd_statfs(char *path, __u32 type, __u32 index,
                      struct obd_statfs *stat_buf,
                      struct obd_uuid *uuid_buf);
index baf7f3e..ed9b6f4 100644 (file)
@@ -254,7 +254,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_JOIN      0x2000ULL /* files can be concatenated */
 #define OBD_CONNECT_ATTRFID   0x4000ULL /* Server supports GetAttr By Fid */
 #define OBD_CONNECT_NODEVOH   0x8000ULL /* No open handle for special nodes */
-#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client */
+#define OBD_CONNECT_LCL_CLIENT 0x10000ULL /* local 1.6 client */
+#define OBD_CONNECT_RMT_CLIENT 0x20000ULL /* Remote client */
 
 #define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
 
@@ -288,11 +289,12 @@ struct obd_connect_data {
         __u32 ocd_index;                /* LOV index to connect to */
         __u32 ocd_unused;
         __u64 ocd_ibits_known;          /* inode bits this client understands */
+        __u32 ocd_nllu;                 /* non-local-lustre-user */
+        __u32 ocd_nllg;                 /* non-local-lustre-group */
+        __u64 padding1;                 /* also fix lustre_swab_connect */
         __u64 padding2;                 /* also fix lustre_swab_connect */
         __u64 padding3;                 /* also fix lustre_swab_connect */
         __u64 padding4;                 /* also fix lustre_swab_connect */
-        __u64 padding5;                 /* also fix lustre_swab_connect */
-        __u64 padding6;                 /* also fix lustre_swab_connect */
 };
 
 extern void lustre_swab_connect(struct obd_connect_data *ocd);
index a9b9812..bdf4d5a 100644 (file)
 
 struct obd_statfs;
 
+/* 
+ * The ioctl naming rules:
+ * LL_*     - works on the currently opened filehandle instead of parent dir
+ * *_OBD_*  - gets data for both OSC or MDC (LOV, LMV indirectly)
+ * *_MDC_*  - gets/sets data related to MDC
+ * *_LOV_*  - gets/sets data related to OSC/LOV
+ * *FILE*   - called on parent dir and passes in a filename
+ * *STRIPE* - set/get lov_user_md
+ * *INFO    - set/get lov_user_mds_data
+ */
 #define LL_IOC_GETFLAGS                 _IOR ('f', 151, long)
 #define LL_IOC_SETFLAGS                 _IOW ('f', 152, long)
 #define LL_IOC_CLRFLAGS                 _IOW ('f', 153, long)
@@ -46,15 +56,21 @@ struct obd_statfs;
 #define LL_IOC_POLL_QUOTACHECK          _IOR ('f', 161, struct if_quotacheck *)
 #define LL_IOC_QUOTACTL                 _IOWR('f', 162, struct if_quotactl *)
 #define LL_IOC_JOIN                     _IOW ('f', 163, long)
-#define LL_IOC_OBD_STATFS               _IOWR('f', 164, struct obd_statfs *)
+#define IOC_OBD_STATFS                  _IOWR('f', 164, struct obd_statfs *)
+#define IOC_LOV_GETINFO                 _IOWR('f', 165, struct lov_user_mds_data *)
 
 #define LL_STATFS_MDC           1
 #define LL_STATFS_LOV           2
 
 #define IOC_MDC_TYPE            'i'
 #define IOC_MDC_LOOKUP          _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
-#define IOC_MDC_GETSTRIPE       _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *)
-#define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_mds_data *)
+#define IOC_MDC_GETFILESTRIPE   _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
+#define IOC_MDC_GETFILEINFO     _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
+#define LL_IOC_MDC_GETINFO      _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
+
+/* Keep these for backward compartability. */
+#define LL_IOC_OBD_STATFS       IOC_OBD_STATFS
+#define IOC_MDC_GETSTRIPE       IOC_MDC_GETFILESTRIPE
 
 #define O_LOV_DELAY_CREATE 0100000000  /* hopefully this does not conflict */
 #define O_JOIN_FILE        0400000000  /* hopefully this does not conflict */
index 2590fec..1268177 100644 (file)
@@ -50,6 +50,8 @@ struct filter_export_data {
         int                        fed_lr_idx;
         long                       fed_dirty;    /* in bytes */
         long                       fed_grant;    /* in bytes */
+        struct list_head           fed_mod_list; /* files being modified */
+        int                        fed_mod_count;/* items in fed_writing list */
         long                       fed_pending;  /* bytes just being written */
 };
 
index 085fe67..9381b0d 100644 (file)
@@ -187,6 +187,7 @@ struct obd_type {
         cfs_proc_dir_entry_t *typ_procroot;
         char *typ_name;
         int  typ_refcnt;
+        spinlock_t obd_type_lock;
 };
 
 struct brw_page {
@@ -213,6 +214,8 @@ struct obd_async_page_ops {
         int  (*ap_make_ready)(void *data, int cmd);
         int  (*ap_refresh_count)(void *data, int cmd);
         void (*ap_fill_obdo)(void *data, int cmd, struct obdo *oa);
+        void (*ap_update_obdo)(void *data, int cmd, struct obdo *oa,
+                               obd_valid valid);
         int  (*ap_completion)(void *data, int cmd, struct obdo *oa, int rc);
 };
 
@@ -337,6 +340,9 @@ struct filter_obd {
         struct lustre_quota_ctxt fo_quota_ctxt;
         spinlock_t               fo_quotacheck_lock;
         atomic_t                 fo_quotachecking;
+
+        int                      fo_fmd_max_num; /* per exp filter_mod_data */
+        int                      fo_fmd_max_age; /* jiffies to fmd expiry */
 };
 
 #define OSC_MAX_RIF_DEFAULT       8
@@ -604,6 +610,7 @@ struct niobuf_local {
 
 struct obd_trans_info {
         __u64                    oti_transno;
+        __u64                    oti_xid;
         __u64                   *oti_objid;
         /* Only used on the server side for tracking acks. */
         struct oti_req_ack_lock {
@@ -629,6 +636,8 @@ static inline void oti_init(struct obd_trans_info *oti,
         if (req == NULL)
                 return;
 
+        oti->oti_xid = req->rq_xid;
+
         if (req->rq_repmsg && req->rq_reqmsg != 0)
                 oti->oti_transno = lustre_msg_get_transno(req->rq_repmsg);
         oti->oti_thread_id = req->rq_svc_thread ? req->rq_svc_thread->t_id : -1;
@@ -708,10 +717,14 @@ struct obd_notify_upcall {
 };
 
 /* corresponds to one of the obd's */
+#define MAX_OBD_NAME 128
+#define OBD_DEVICE_MAGIC        0XAB5CD6EF
 struct obd_device {
         struct obd_type        *obd_type;
+        __u32                   obd_magic;
+
         /* common and UUID name of this device */
-        char                   *obd_name;
+        char                    obd_name[MAX_OBD_NAME];
         struct obd_uuid         obd_uuid;
 
         int                     obd_minor;
@@ -848,7 +861,7 @@ struct obd_ops {
         int (*o_statfs)(struct obd_device *obd, struct obd_statfs *osfs,
                         cfs_time_t max_age);
         int (*o_statfs_async)(struct obd_device *obd, struct obd_info *oinfo,
-                              unsigned long max_age,
+                              cfs_time_t max_age,
                               struct ptlrpc_request_set *set);
         int (*o_packmd)(struct obd_export *exp, struct lov_mds_md **disk_tgt,
                         struct lov_stripe_md *mem_src);
@@ -976,15 +989,11 @@ struct obd_ops {
         int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
         int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
 
+        int (*o_ping)(struct obd_export *exp);
         /*
          * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
          * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
-         * Also, add a wrapper function in include/linux/obd_class.h.
-         *
-         * Also note that if you add it to the END, you also have to change
-         * the num_stats calculation.
-         *
-         */
+         * Also, add a wrapper function in include/linux/obd_class.h. */
 };
 
 struct lsm_operations {
index 60fe1fc..e915a23 100644 (file)
@@ -42,8 +42,8 @@
 #endif
 
 /* OBD Device Declarations */
-#define MAX_OBD_DEVICES 520
-extern struct obd_device obd_dev[MAX_OBD_DEVICES];
+#define MAX_OBD_DEVICES 1024
+extern struct obd_device *obd_devs[MAX_OBD_DEVICES];
 extern spinlock_t obd_dev_lock;
 
 /* OBD Operations Declarations */
@@ -52,24 +52,26 @@ extern struct obd_device *class_exp2obd(struct obd_export *);
 
 /* genops.c */
 struct obd_export *class_conn2export(struct lustre_handle *);
-int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, char *nm);
-int class_unregister_type(char *nm);
+int class_register_type(struct obd_ops *ops, struct lprocfs_vars *,
+                        const char *nm);
+int class_unregister_type(const char *nm);
 
-struct obd_device *class_newdev(struct obd_type *type, char *name);
+struct obd_device *class_newdev(const char *type_name, const char *name);
 void class_release_dev(struct obd_device *obd);
 
-int class_name2dev(char *name);
-struct obd_device *class_name2obd(char *name);
+int class_name2dev(const char *name);
+struct obd_device *class_name2obd(const char *name);
 int class_uuid2dev(struct obd_uuid *uuid);
 struct obd_device *class_uuid2obd(struct obd_uuid *uuid);
 void class_obd_list(void);
 struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
-                                          char * typ_name,
+                                          const char * typ_name,
                                           struct obd_uuid *grp_uuid);
 struct obd_device * class_find_client_notype(struct obd_uuid *tgt_uuid,
                                              struct obd_uuid *grp_uuid);
 struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid,
                                            int *next);
+struct obd_device * class_num2obd(int num);
 
 int oig_init(struct obd_io_group **oig);
 void oig_add_one(struct obd_io_group *oig,
@@ -163,8 +165,8 @@ void class_import_put(struct obd_import *);
 struct obd_import *class_new_import(struct obd_device *obd);
 void class_destroy_import(struct obd_import *exp);
 
-struct obd_type *class_search_type(char *name);
-struct obd_type *class_get_type(char *name);
+struct obd_type *class_search_type(const char *name);
+struct obd_type *class_get_type(const char *name);
 void class_put_type(struct obd_type *type);
 int class_connect(struct lustre_handle *conn, struct obd_device *obd,
                   struct obd_uuid *cluuid);
@@ -262,6 +264,11 @@ do {                                                            \
         }                                                       \
 } while (0)
 
+static inline int class_devno_max(void)
+{
+        return MAX_OBD_DEVICES;
+}
+
 static inline int obd_get_info(struct obd_export *exp, __u32 keylen,
                                void *key, __u32 *vallen, void *val)
 {
@@ -626,6 +633,18 @@ static inline int obd_disconnect(struct obd_export *exp)
         RETURN(rc);
 }
 
+static inline int obd_ping(struct obd_export *exp)
+{
+        int rc;
+        ENTRY;
+
+        OBD_CHECK_OP(exp->exp_obd, ping, 0);
+        OBD_COUNTER_INCREMENT(exp->exp_obd, ping);
+
+        rc = OBP(exp->exp_obd, ping)(exp);
+        RETURN(rc);
+}
+
 static inline int obd_init_export(struct obd_export *exp)
 {
         int rc = 0;
@@ -670,12 +689,16 @@ obd_lvfs_fid2dentry(struct obd_export *exp, __u64 id_ino, __u32 gen, __u64 gr)
 #define time_before(t1, t2) ((long)t2 - (long)t1 > 0)
 #endif
 
+#ifndef time_before_64
+#define time_before_64(t1, t2) ((__s64)t2 - (__s64)t1 > 0)
+#endif
+
 /* @max_age is the oldest time in jiffies that we accept using a cached data.
  * If the cache is older than @max_age we will get a new value from the
- * target.  Use a value of "jiffies + HZ" to guarantee freshness. */
+ * target.  Use a value of "cfs_time_current() + HZ" to guarantee freshness. */
 static inline int obd_statfs_async(struct obd_device *obd,
                                    struct obd_info *oinfo,
-                                   unsigned long max_age,
+                                   cfs_time_t max_age,
                                    struct ptlrpc_request_set *rqset)
 {
         int rc = 0;
@@ -687,8 +710,9 @@ static inline int obd_statfs_async(struct obd_device *obd,
         OBD_CHECK_OP(obd, statfs, -EOPNOTSUPP);
         OBD_COUNTER_INCREMENT(obd, statfs);
 
-        CDEBUG(D_SUPER, "osfs %lu, max_age %lu\n", obd->obd_osfs_age, max_age);
-        if (time_before(obd->obd_osfs_age, max_age)) {
+        CDEBUG(D_SUPER, "osfs "CFS_TIME_T", max_age "CFS_TIME_T"\n", 
+               obd->obd_osfs_age, max_age);
+        if (time_before_64(obd->obd_osfs_age, max_age)) {
                 rc = OBP(obd, statfs_async)(obd, oinfo, max_age, rqset);
         } else {
                 CDEBUG(D_SUPER, "using cached obd_statfs data\n");
@@ -703,7 +727,7 @@ static inline int obd_statfs_async(struct obd_device *obd,
 
 static inline int obd_statfs_rqset(struct obd_device *obd,
                                    struct obd_statfs *osfs,
-                                   unsigned long max_age)
+                                   cfs_time_t max_age)
 {
         struct ptlrpc_request_set *set = NULL;
         struct obd_info oinfo = { { { 0 } } };
@@ -724,7 +748,7 @@ static inline int obd_statfs_rqset(struct obd_device *obd,
 
 /* @max_age is the oldest time in jiffies that we accept using a cached data.
  * If the cache is older than @max_age we will get a new value from the
- * target.  Use a value of "jiffies + HZ" to guarantee freshness. */
+ * target.  Use a value of "cfs_time_current() + HZ" to guarantee freshness. */
 static inline int obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                              cfs_time_t max_age)
 {
index 731a826..0e93ced 100644 (file)
@@ -8,19 +8,17 @@
 
 --- linux-2.4.20/fs/ext3/ialloc.c~ext3-ea-in-inode-2.4.20      2003-10-08 23:18:08.000000000 +0400
 +++ linux-2.4.20-alexey/fs/ext3/ialloc.c       2003-10-12 16:25:21.000000000 +0400
-@@ -577,6 +577,12 @@ repeat:
-       insert_inode_hash(inode);
+@@ -577,6 +577,10 @@ repeat:
        inode->i_generation = sb->u.ext3_sb.s_next_generation++;
  
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              inode->u.ext3_i.i_extra_isize = sizeof(__u16)   /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              inode->u.ext3_i.i_extra_isize = 0;
-+
        inode->u.ext3_i.i_state = EXT3_STATE_NEW;
++      inode->u.ext3_i.i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
++
        err = ext3_get_inode_loc_new(inode, &iloc, 1);
        if (err) goto fail;
+       BUFFER_TRACE(iloc->bh, "get_write_access");
 --- linux-2.4.20/fs/ext3/inode.c~ext3-ea-in-inode-2.4.20       2003-10-08 23:18:08.000000000 +0400
 +++ linux-2.4.20-alexey/fs/ext3/inode.c        2003-10-12 16:25:21.000000000 +0400
 @@ -2209,6 +2209,12 @@ void ext3_read_inode(struct inode * inod
index aaf543f..491a2df 100644 (file)
@@ -10,19 +10,17 @@ Index: linux-2.4.21-chaos/fs/ext3/ialloc.c
 ===================================================================
 --- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c   2003-12-12 17:39:10.000000000 +0300
 +++ linux-2.4.21-chaos/fs/ext3/ialloc.c        2003-12-12 17:39:55.000000000 +0300
-@@ -580,6 +580,12 @@
-       insert_inode_hash(inode);
+@@ -580,6 +580,10 @@
        inode->i_generation = sbi->s_next_generation++;
  
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              EXT3_I(inode)->i_extra_isize = sizeof(__u16)    /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              EXT3_I(inode)->i_extra_isize = 0;
-+
        ei->i_state = EXT3_STATE_NEW;
++      ei->i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
++
        err = ext3_get_inode_loc_new(inode, &iloc, 1);
        if (err) goto fail;
+       BUFFER_TRACE(iloc->bh, "get_write_access");
 Index: linux-2.4.21-chaos/fs/ext3/inode.c
 ===================================================================
 --- linux-2.4.21-chaos.orig/fs/ext3/inode.c    2003-12-12 17:39:11.000000000 +0300
index 5980fd9..90ff24a 100644 (file)
@@ -10,19 +10,17 @@ Index: linux-2.4.21-chaos/fs/ext3/ialloc.c
 ===================================================================
 --- linux-2.4.21-chaos.orig/fs/ext3/ialloc.c   2003-12-12 17:39:10.000000000 +0300
 +++ linux-2.4.21-chaos/fs/ext3/ialloc.c        2003-12-12 17:39:55.000000000 +0300
-@@ -580,6 +580,12 @@
-       insert_inode_hash(inode);
+@@ -580,6 +580,10 @@
        inode->i_generation = sbi->s_next_generation++;
  
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              EXT3_I(inode)->i_extra_isize = sizeof(__u16)    /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              EXT3_I(inode)->i_extra_isize = 0;
-+
        inode->u.ext3_i.i_state = EXT3_STATE_NEW;
++      inode->u.ext3_i.i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
++
        err = ext3_get_inode_loc_new(inode, &iloc, 1);
        if (err) goto fail;
+       BUFFER_TRACE(iloc->bh, "get_write_access");
 Index: linux-2.4.21-chaos/fs/ext3/inode.c
 ===================================================================
 --- linux-2.4.21-chaos.orig/fs/ext3/inode.c    2003-12-12 17:39:11.000000000 +0300
index 5b118ae..6b22a71 100644 (file)
@@ -8,19 +8,17 @@
 
 --- linux-2.4.22-ac1/fs/ext3/ialloc.c~ext3-ea-in-inode-2.4.22-rh       2003-10-08 13:57:56.000000000 +0400
 +++ linux-2.4.22-ac1-alexey/fs/ext3/ialloc.c   2003-10-08 15:13:31.000000000 +0400
-@@ -715,6 +715,12 @@ have_bit_and_group:
-       insert_inode_hash(inode);
+@@ -715,6 +715,10 @@ have_bit_and_group:
        inode->i_generation = sb->u.ext3_sb.s_next_generation++;
  
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              inode->u.ext3_i.i_extra_isize = sizeof(__u16)   /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              inode->u.ext3_i.i_extra_isize = 0;
-+
        inode->u.ext3_i.i_state = EXT3_STATE_NEW;
++      inode->u.ext3_i.i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
++
        err = ext3_get_inode_loc_new(inode, &iloc, 1);
        if (err) goto fail;
+       BUFFER_TRACE(iloc->bh, "get_write_access");
 --- linux-2.4.22-ac1/fs/ext3/inode.c~ext3-ea-in-inode-2.4.22-rh        2003-10-08 13:57:57.000000000 +0400
 +++ linux-2.4.22-ac1-alexey/fs/ext3/inode.c    2003-10-08 15:14:57.000000000 +0400
 @@ -2229,6 +2229,12 @@ void ext3_read_inode(struct inode * inod
index f4832af..2376ffa 100644 (file)
@@ -2,19 +2,17 @@ Index: linux-2.4.29/fs/ext3/ialloc.c
 ===================================================================
 --- linux-2.4.29.orig/fs/ext3/ialloc.c 2005-05-03 15:56:43.831530296 +0300
 +++ linux-2.4.29/fs/ext3/ialloc.c      2005-05-03 16:07:32.990843080 +0300
-@@ -576,6 +576,12 @@
-       insert_inode_hash(inode);
+@@ -576,6 +576,10 @@
        inode->i_generation = sb->u.ext3_sb.s_next_generation++;
  
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              inode->u.ext3_i.i_extra_isize = sizeof(__u16)   /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              inode->u.ext3_i.i_extra_isize = 0;
-+
        inode->u.ext3_i.i_state = EXT3_STATE_NEW;
++      inode->u.ext3_i.i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
++
        err = ext3_get_inode_loc_new(inode, &iloc, 1);
        if (err) goto fail;
+       BUFFER_TRACE(iloc->bh, "get_write_access");
 Index: linux-2.4.29/fs/ext3/inode.c
 ===================================================================
 --- linux-2.4.29.orig/fs/ext3/inode.c  2005-05-03 15:58:30.758274960 +0300
index 3f5687b..89cc1b5 100644 (file)
@@ -2,15 +2,13 @@ Index: linux-stage/fs/ext3/ialloc.c
 ===================================================================
 --- linux-stage.orig/fs/ext3/ialloc.c  2005-10-04 16:53:24.000000000 -0600
 +++ linux-stage/fs/ext3/ialloc.c       2005-10-04 17:07:25.000000000 -0600
-@@ -629,6 +629,11 @@
+@@ -629,6 +629,9 @@
        spin_unlock(&sbi->s_next_gen_lock);
  
        ei->i_state = EXT3_STATE_NEW;
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              ei->i_extra_isize = sizeof(__u16)       /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              ei->i_extra_isize = 0;
++      ei->i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
  
        ret = inode;
        if(DQUOT_ALLOC_INODE(inode)) {
index 19f153d..72c25a4 100644 (file)
@@ -3,15 +3,13 @@ Index: linux-2.6.0/fs/ext3/ialloc.c
 ===================================================================
 --- linux-2.6.0.orig/fs/ext3/ialloc.c  2004-01-14 18:54:11.000000000 +0300
 +++ linux-2.6.0/fs/ext3/ialloc.c       2004-01-14 18:54:12.000000000 +0300
-@@ -627,6 +627,11 @@
+@@ -627,6 +627,9 @@
        inode->i_generation = EXT3_SB(sb)->s_next_generation++;
  
        ei->i_state = EXT3_STATE_NEW;
-+      if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
-+              ei->i_extra_isize = sizeof(__u16)       /* i_extra_isize */
-+                              + sizeof(__u16);        /* i_pad1 */
-+      } else
-+              ei->i_extra_isize = 0;
++      ei->i_extra_isize =
++              (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
++              sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
  
        ret = inode;
        if(DQUOT_ALLOC_INODE(inode)) {
index e0ee12f..f3b25a3 100644 (file)
@@ -350,7 +350,7 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c      2006-04-22 17:31:47.543334750 +0400
 +++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c   2006-04-26 23:42:45.000000000 +0400
-@@ -0,0 +1,2703 @@
+@@ -0,0 +1,2702 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2101,7 +2101,6 @@ Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 +static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
 +{
 +      struct super_block *sb = seq->private;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      int group = (int) v, i;
 +      struct sg {
 +              struct ext3_group_info info;
index eade9a8..698e9e3 100644 (file)
@@ -345,7 +345,7 @@ Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
 ===================================================================
 --- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
 +++ linux-2.6.12.6-bull/fs/ext3/mballoc.c      2006-04-30 01:24:11.000000000 +0400
-@@ -0,0 +1,2702 @@
+@@ -0,0 +1,2701 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2096,7 +2096,6 @@ Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
 +static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
 +{
 +      struct super_block *sb = seq->private;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      int group = (int) v, i;
 +      struct sg {
 +              struct ext3_group_info info;
index 43fc776..c57ae02 100644 (file)
@@ -364,7 +364,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
 --- linux-stage.orig/fs/ext3/mballoc.c 2006-05-23 17:33:37.579436680 -0600
 +++ linux-stage/fs/ext3/mballoc.c      2006-05-25 10:59:14.000000000 -0600
-@@ -0,0 +1,2702 @@
+@@ -0,0 +1,2701 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -2115,7 +2115,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
 +{
 +      struct super_block *sb = seq->private;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      int group = (int) v, i;
 +      struct sg {
 +              struct ext3_group_info info;
index 21f8420..3b4bb26 100644 (file)
@@ -436,6 +436,11 @@ int client_disconnect_export(struct obd_export *exp)
         if (cli->cl_conn_count)
                 GOTO(out_no_disconnect, rc = 0);
 
+        /* Mark import deactivated now, so we don't try to reconnect if any
+         * of the cleanup RPCs fails (e.g. ldlm cancel, etc).  We don't
+         * fully deactivate the import, or that would drop all requests. */
+        imp->imp_deactive = 1;
+
         /* Some non-replayable imports (MDS's OSCs) are pinged, so just
          * delete it regardless.  (It's safe to delete an import that was
          * never added.) */
@@ -455,7 +460,6 @@ int client_disconnect_export(struct obd_export *exp)
                 rc = ptlrpc_disconnect_import(imp);
 
         ptlrpc_invalidate_import(imp);
-        imp->imp_deactive = 1;
         ptlrpc_free_rq_pool(imp->imp_rq_pool);
         class_destroy_import(imp);
         cli->cl_import = NULL;
@@ -1409,6 +1413,7 @@ target_send_reply(struct ptlrpc_request *req, int rc, int fail_id)
 
 int target_handle_ping(struct ptlrpc_request *req)
 {
+        obd_ping(req->rq_export);
         return lustre_pack_reply(req, 1, NULL, NULL);
 }
 
index 034219e..c07d3f4 100644 (file)
@@ -59,9 +59,12 @@ inline cfs_time_t round_timeout(cfs_time_t timeout)
 }
 
 /* timeout for initial callback (AST) reply */
-static inline unsigned int ldlm_get_rq_timeout(unsigned int ldlm_timeout, unsigned int obd_timeout)
+static inline unsigned int ldlm_get_rq_timeout(unsigned int ldlm_timeout,
+                                               unsigned int obd_timeout)
 {
-        return max(min(ldlm_timeout, obd_timeout / 3), 1U);
+        unsigned int timeout = min(ldlm_timeout, obd_timeout / 3);
+
+        return timeout < 1 ? 1 : timeout;
 }
 
 #ifdef __KERNEL__
@@ -535,7 +538,7 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         }
 
         req->rq_send_state = LUSTRE_IMP_FULL;
-        req->rq_timeout = ldlm_get_rq_timeout(ldlm_timeout, obd_timeout); /* timeout for initial AST reply */
+        req->rq_timeout = ldlm_get_rq_timeout(ldlm_timeout, obd_timeout);
         if (unlikely(instant_cancel)) {
                 rc = ptl_send_rpc(req, 1);
         } else {
@@ -608,7 +611,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data)
         ptlrpc_req_set_repsize(req, 1, NULL);
 
         req->rq_send_state = LUSTRE_IMP_FULL;
-        req->rq_timeout = ldlm_get_rq_timeout(ldlm_timeout, obd_timeout); /* timeout for initial AST reply */
+        req->rq_timeout = ldlm_get_rq_timeout(ldlm_timeout, obd_timeout);
 
         /* We only send real blocking ASTs after the lock is granted */
         lock_res_and_lock(lock);
@@ -675,7 +678,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         ptlrpc_req_set_repsize(req, 2, size);
 
         req->rq_send_state = LUSTRE_IMP_FULL;
-        req->rq_timeout = ldlm_get_rq_timeout(ldlm_timeout, obd_timeout); /* timeout for initial AST reply */
+        req->rq_timeout = ldlm_get_rq_timeout(ldlm_timeout, obd_timeout);
 
         rc = ptlrpc_queue_wait(req);
         if (rc == -ELDLM_NO_LOCK_DATA)
index 7b3d2d7..7b5ee32 100644 (file)
@@ -1104,8 +1104,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
 
 #ifdef _HAVE_STATVFS
 static int llu_statfs_internal(struct llu_sb_info *sbi,
-                               struct obd_statfs *osfs,
-                               unsigned long max_age)
+                               struct obd_statfs *osfs, __u64 max_age)
 {
         struct obd_statfs obd_osfs;
         int rc;
@@ -1158,7 +1157,7 @@ static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
         /* For now we will always get up-to-date statfs values, but in the
          * future we may allow some amount of caching on the client (e.g.
          * from QOS or lprocfs updates). */
-        rc = llu_statfs_internal(sbi, &osfs, jiffies - 1);
+        rc = llu_statfs_internal(sbi, &osfs, get_jiffies_64() - HZ);
         if (rc)
                 return rc;
 
@@ -1777,7 +1776,7 @@ llu_fsswop_mount(const char *source,
         obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
                            sizeof(async), &async, NULL);
 
-        ocd.ocd_connect_flags = OBD_CONNECT_IBITS|OBD_CONNECT_VERSION;
+        ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION;
         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
         ocd.ocd_version = LUSTRE_VERSION_CODE;
 
@@ -1809,8 +1808,8 @@ llu_fsswop_mount(const char *source,
         obd->obd_upcall.onu_owner = &sbi->ll_lco;
         obd->obd_upcall.onu_upcall = ll_ocd_update;
 
-        ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK|OBD_CONNECT_REQPORTAL|
-                                OBD_CONNECT_VERSION|OBD_CONNECT_TRUNCLOCK;
+        ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
+                                OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK;
         ocd.ocd_version = LUSTRE_VERSION_CODE;
         err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd);
         if (err) {
index ccab0c3..4c41ede 100644 (file)
@@ -766,8 +766,6 @@ int t23(char *name)
 {
         char path[MAX_PATH_LENGTH];
         int fd;
-        char *str = "1234567890";
-        char buf[100];
         long long ret;
         loff_t off;
 
@@ -974,7 +972,7 @@ int t50b(char *name)
 {
         loff_t off_array[] = {1, 17, 255, 258, 4095, 4097, 8191,
                               1024*1024*1024*1024ULL};
-        int np = 1, i;
+        int i;
         loff_t offset;
 
         ENTRY("4k un-aligned i/o sanity");
@@ -1001,7 +999,6 @@ int t51(char *name)
 {
         char file[MAX_PATH_LENGTH] = "";
         int fd;
-        struct stat statbuf;
         off_t size;
         int result;
 
index 0c681ea..c1a3cc6 100644 (file)
@@ -466,9 +466,9 @@ revalidate_finish:
 
  out:
         /* We do not free request as it may be reused during following lookup
-          (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
-           be freed in ll_lookup_it or in ll_intent_release. But if
-           request was not completed, we need to free it. (bug 5154) */
+         * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
+         * be freed in ll_lookup_it or in ll_intent_release. But if
+         * request was not completed, we need to free it. (bug 5154, 9903) */
         if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
                 ptlrpc_req_finished(req);
         if (rc == 0) {
index 2a09cb0..053c9d0 100644 (file)
@@ -481,90 +481,51 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
 
                 return rc;
         }
-        case LL_IOC_LOV_GETSTRIPE: {
-                struct ptlrpc_request *request = NULL;
-                struct lov_user_md *lump = (struct lov_user_md *)arg;
-                struct lov_mds_md *lmm;
-                struct ll_fid fid;
-                struct mds_body *body;
-                int rc, lmmsize;
-
-                ll_inode2fid(&fid, inode);
-
-                rc = ll_get_max_mdsize(sbi, &lmmsize);
-                if (rc)
-                        RETURN(rc);
-
-                rc = mdc_getattr(sbi->ll_mdc_exp, &fid, OBD_MD_FLDIREA,
-                                 lmmsize, &request);
-                if (rc < 0) {
-                        CDEBUG(D_INFO, "mdc_getattr failed: rc = %d\n", rc);
-                        RETURN(rc);
-                }
-
-                body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
-                                      sizeof(*body));
-                LASSERT(body != NULL); /* checked by mdc_getattr_name */
-                /* swabbed by mdc_getattr_name */
-                LASSERT_REPSWABBED(request, REPLY_REC_OFF);
-
-                lmmsize = body->eadatasize;
-                if (lmmsize == 0)
-                        GOTO(out_get, rc = -ENODATA);
-
-                lmm = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF + 1,
-                                     lmmsize);
-                LASSERT(lmm != NULL);
-                LASSERT_REPSWABBED(request, REPLY_REC_OFF + 1);
-
-                /*
-                 * This is coming from the MDS, so is probably in
-                 * little endian.  We convert it to host endian before
-                 * passing it to userspace.
-                 */
-                if (lmm->lmm_magic == __swab32(LOV_MAGIC)) {
-                        lustre_swab_lov_user_md((struct lov_user_md *)lmm);
-                        lustre_swab_lov_user_md_objects((struct lov_user_md *)lmm);
-                }
-
-                rc = copy_to_user(lump, lmm, lmmsize);
-                if (rc)
-                        GOTO(out_get, rc = -EFAULT);
-
-                EXIT;
-        out_get:
-                ptlrpc_req_finished(request);
-                return rc;
-        }
         case LL_IOC_OBD_STATFS:
                 RETURN(ll_obd_statfs(inode, (void *)arg));
+        case LL_IOC_LOV_GETSTRIPE:
+        case LL_IOC_MDC_GETINFO:
         case IOC_MDC_GETFILEINFO:
-        case IOC_MDC_GETSTRIPE: {
+        case IOC_MDC_GETFILESTRIPE: {
                 struct ptlrpc_request *request = NULL;
                 struct ll_fid fid;
                 struct mds_body *body;
                 struct lov_user_md *lump;
-                struct lov_mds_md *lmm;
-                char *filename;
+                struct lov_mds_md *lmm = NULL;
+                char *filename = NULL;
                 int rc, lmmsize;
 
-                filename = getname((const char *)arg);
-                if (IS_ERR(filename))
-                        RETURN(PTR_ERR(filename));
-
                 ll_inode2fid(&fid, inode);
 
                 rc = ll_get_max_mdsize(sbi, &lmmsize);
                 if (rc)
                         RETURN(rc);
 
-                rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid, filename,
-                                      strlen(filename) + 1, OBD_MD_FLEASIZE,
-                                      lmmsize, &request);
-                if (rc < 0) {
-                        CDEBUG(D_INFO, "mdc_getattr_name failed on %s: rc %d\n",
-                               filename, rc);
-                        GOTO(out_name, rc);
+                if (cmd == IOC_MDC_GETFILEINFO ||
+                    cmd == IOC_MDC_GETFILESTRIPE) {
+                        filename = getname((const char *)arg);
+                        if (IS_ERR(filename))
+                                RETURN(PTR_ERR(filename));
+
+                        rc = mdc_getattr_name(sbi->ll_mdc_exp, &fid,
+                                              filename, strlen(filename) + 1,
+                                              OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
+                                              lmmsize, &request);
+                        if (rc < 0) {
+                                CDEBUG(D_INFO, "mdc_getattr_name failed "
+                                       "on %s: rc %d\n", filename, rc);
+                                GOTO(out_name, rc);
+                        }
+                } else {
+                        rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
+                                         OBD_MD_FLEASIZE | OBD_MD_FLDIREA,
+                                         lmmsize, &request);
+                        if (rc < 0) {
+                                CDEBUG(D_INFO, "mdc_getattr failed on inode "
+                                       "%lu/%u: rc %d\n", inode->i_ino,
+                                       inode->i_generation, rc);
+                                GOTO(out_name, rc);
+                         }
                 }
 
                 body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF,
@@ -575,11 +536,14 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
 
                 lmmsize = body->eadatasize;
 
-                if (!(body->valid & OBD_MD_FLEASIZE) || lmmsize == 0)
-                        GOTO(out_req, rc = -ENODATA);
-
-                if (lmmsize > 4096)
-                        GOTO(out_req, rc = -EFBIG);
+                if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
+                    lmmsize == 0) {
+                        if (cmd == LL_IOC_LOV_GETSTRIPE ||
+                            cmd == IOC_MDC_GETFILESTRIPE)
+                                GOTO(out_req, rc = -ENODATA);
+                        else
+                                GOTO(skip_lmm, rc = 0);
+                }
 
                 lmm = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF + 1,
                                      lmmsize);
@@ -618,25 +582,22 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
 
                         memcpy(lmj, lmm, sizeof(struct lov_user_md_join));
                         for (i = 0; i < lsm->lsm_stripe_count; i++) {
-                                struct lov_array_info *lai = lsm->lsm_array;
-                                if ((lai->lai_ext_array[aindex].le_loi_idx +
-                                     lai->lai_ext_array[aindex].le_stripe_count)<=i){
+                                struct lov_extent *lex =
+                                        &lsm->lsm_array->lai_ext_array[aindex];
+
+                                if (lex->le_loi_idx + lex->le_stripe_count <= i)
                                         aindex ++;
-                                }
-                                CDEBUG(D_INFO, "aindex %d i %d l_extent_start"
-                                       LPU64"len %d \n", aindex, i,
-                                       lai->lai_ext_array[aindex].le_start,
-                                       (int)lai->lai_ext_array[aindex].le_len);
+                                CDEBUG(D_INFO, "aindex %d i %d l_extent_start "
+                                       LPU64" len %d\n", aindex, i,
+                                       lex->le_start, (int)lex->le_len);
                                 lmj->lmm_objects[i].l_extent_start =
-                                        lai->lai_ext_array[aindex].le_start;
+                                        lex->le_start;
 
-                                if ((int)lai->lai_ext_array[aindex].le_len == -1) {
+                                if ((int)lex->le_len == -1)
                                         lmj->lmm_objects[i].l_extent_end = -1;
-                                } else {
+                                else
                                         lmj->lmm_objects[i].l_extent_end =
-                                          lai->lai_ext_array[aindex].le_start +
-                                          lai->lai_ext_array[aindex].le_len;
-                                }
+                                                lex->le_start + lex->le_len;
                                 lmj->lmm_objects[i].l_object_id =
                                         lsm->lsm_oinfo[i].loi_id;
                                 lmj->lmm_objects[i].l_object_gr =
@@ -648,16 +609,28 @@ static int ll_dir_ioctl(struct inode *inode, struct file *file,
                         }
                         lmm = (struct lov_mds_md *)lmj;
                         lmmsize = lmj_size;
-out_free_memmd:
+                out_free_memmd:
                         obd_free_memmd(sbi->ll_osc_exp, &lsm);
                         if (rc)
-                                GOTO(out_lmm, rc);
+                                GOTO(out_req, rc);
+                }
+                if (cmd == IOC_MDC_GETFILESTRIPE ||
+                    cmd == LL_IOC_LOV_GETSTRIPE) {
+                        lump = (struct lov_user_md *)arg;
+                } else {
+                        struct lov_user_mds_data *lmdp;
+                        lmdp = (struct lov_user_mds_data *)arg;
+                        lump = &lmdp->lmd_lmm;
                 }
-                if (cmd == IOC_MDC_GETFILEINFO) {
+                rc = copy_to_user(lump, lmm, lmmsize);
+                if (rc)
+                        GOTO(out_lmm, rc = -EFAULT);
+        skip_lmm:
+                if (cmd == IOC_MDC_GETFILEINFO || cmd == LL_IOC_MDC_GETINFO) {
                         struct lov_user_mds_data *lmdp;
                         lstat_t st = { 0 };
 
-                        st.st_dev     = 0;
+                        st.st_dev     = inode->i_sb->s_dev;
                         st.st_mode    = body->mode;
                         st.st_nlink   = body->nlink;
                         st.st_uid     = body->uid;
@@ -675,23 +648,64 @@ out_free_memmd:
                         rc = copy_to_user(&lmdp->lmd_st, &st, sizeof(st));
                         if (rc)
                                 GOTO(out_lmm, rc = -EFAULT);
-                        lump = &lmdp->lmd_lmm;
-                } else {
-                        lump = (struct lov_user_md *)arg;
                 }
 
-                rc = copy_to_user(lump, lmm, lmmsize);
-                if (rc)
-                        GOTO(out_lmm, rc = -EFAULT);
-
                 EXIT;
         out_lmm:
-                if (lmm->lmm_magic == LOV_MAGIC_JOIN)
+                if (lmm && lmm->lmm_magic == LOV_MAGIC_JOIN)
                         OBD_FREE(lmm, lmmsize);
         out_req:
                 ptlrpc_req_finished(request);
         out_name:
-                putname(filename);
+                if (filename)
+                        putname(filename);
+                return rc;
+        }
+        case IOC_LOV_GETINFO: {
+                struct lov_user_mds_data *lumd;
+                struct lov_stripe_md *lsm;
+                struct lov_user_md *lum;
+                struct lov_mds_md *lmm;
+                int lmmsize;
+                lstat_t st;
+                int rc;
+
+                lumd = (struct lov_user_mds_data *)arg;
+                lum = &lumd->lmd_lmm;
+
+                rc = ll_get_max_mdsize(sbi, &lmmsize);
+                if (rc)
+                        RETURN(rc);
+
+                OBD_ALLOC(lmm, lmmsize);
+                rc = copy_from_user(lmm, lum, lmmsize);
+                if (rc)
+                        GOTO(free_lmm, rc = -EFAULT);
+
+                rc = obd_unpackmd(sbi->ll_osc_exp, &lsm, lmm, lmmsize);
+                if (rc < 0)
+                        GOTO(free_lmm, rc = -ENOMEM);
+
+                rc = obd_checkmd(sbi->ll_osc_exp, sbi->ll_mdc_exp, lsm);
+                if (rc)
+                        GOTO(free_lsm, rc);
+
+                /* Perform glimpse_size operation. */
+                memset(&st, 0, sizeof(st));
+
+                rc = ll_glimpse_ioctl(sbi, lsm, &st);
+                if (rc)
+                        GOTO(free_lsm, rc);
+
+                rc = copy_to_user(&lumd->lmd_st, &st, sizeof(st));
+                if (rc)
+                        GOTO(free_lsm, rc = -EFAULT);
+
+                EXIT;
+        free_lsm:
+                obd_free_memmd(sbi->ll_osc_exp, &lsm);
+        free_lmm:
+                OBD_FREE(lmm, lmmsize);
                 return rc;
         }
         case OBD_IOC_LLOG_CATINFO: {
@@ -793,7 +807,7 @@ out_free_memmd:
                                 rc = -EFAULT;
                         GOTO(out_poll, rc);
                 }
-        out_poll:                 
+        out_poll:
                 OBD_FREE_PTR(check);
                 RETURN(rc);
         }
@@ -801,7 +815,7 @@ out_free_memmd:
         case OBD_IOC_QUOTACTL: {
                 struct if_quotactl *qctl;
                 struct obd_quotactl *oqctl;
-                
+
                 int cmd, type, id, rc = 0;
 
                 OBD_ALLOC_PTR(qctl);
index db01d19..7e69c2b 100644 (file)
@@ -916,6 +916,51 @@ static int ll_glimpse_callback(struct ldlm_lock *lock, void *reqp)
         return rc;
 }
 
+int ll_glimpse_ioctl(struct ll_sb_info *sbi, struct lov_stripe_md *lsm,
+                     lstat_t *st)
+{
+        struct lustre_handle lockh = { 0 };
+        struct obd_enqueue_info einfo = { 0 };
+        struct obd_info oinfo = { { { 0 } } };
+        struct ost_lvb lvb;
+        int rc;
+        
+        ENTRY;
+        
+        einfo.ei_type = LDLM_EXTENT;
+        einfo.ei_mode = LCK_PR;
+        einfo.ei_flags = LDLM_FL_HAS_INTENT;
+        einfo.ei_cb_bl = ll_extent_lock_callback;
+        einfo.ei_cb_cp = ldlm_completion_ast;
+        einfo.ei_cb_gl = ll_glimpse_callback;
+        einfo.ei_cbdata = NULL;
+
+        oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
+        oinfo.oi_lockh = &lockh;
+        oinfo.oi_md = lsm;
+
+        rc = obd_enqueue_rqset(sbi->ll_osc_exp, &oinfo, &einfo);
+        if (rc == -ENOENT)
+                RETURN(rc);
+        if (rc != 0) {
+                CERROR("obd_enqueue returned rc %d, "
+                       "returning -EIO\n", rc);
+                RETURN(rc > 0 ? -EIO : rc);
+        }
+        
+        lov_stripe_lock(lsm);
+        memset(&lvb, 0, sizeof(lvb));
+        obd_merge_lvb(sbi->ll_osc_exp, lsm, &lvb, 0);
+        st->st_size = lvb.lvb_size;
+        st->st_blocks = lvb.lvb_blocks;
+        st->st_mtime = lvb.lvb_mtime;
+        st->st_atime = lvb.lvb_atime;
+        st->st_ctime = lvb.lvb_ctime;
+        lov_stripe_unlock(lsm);
+        
+        RETURN(rc);
+}
+
 /* NB: obd_merge_lvb will prefer locally cached writes if they extend the
  * file (because it prefers KMS over RSS when larger) */
 int ll_glimpse_size(struct inode *inode, int ast_flags)
@@ -1920,7 +1965,7 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
                 RETURN(ll_get_grouplock(inode, file, arg));
         case LL_IOC_GROUP_UNLOCK:
                 RETURN(ll_put_grouplock(inode, file, arg));
-        case LL_IOC_OBD_STATFS:
+        case IOC_OBD_STATFS:
                 RETURN(ll_obd_statfs(inode, (void *)arg));
 
         /* We need to special case any other ioctls we want to handle,
@@ -2198,9 +2243,9 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                 ll_prepare_mdc_op_data(&op_data, dentry->d_parent->d_inode,
                                        dentry->d_inode, NULL, 0, 0);
                 rc = mdc_intent_lock(exp, &op_data, NULL, 0,
-                                     /* we are not interested in name 
+                                     /* we are not interested in name
                                         based lookup */
-                                     &oit, 0, &req, 
+                                     &oit, 0, &req,
                                      ll_mdc_blocking_ast, 0);
                 if (rc < 0) {
                         rc = ll_inode_revalidate_fini(inode, rc);
index 9a9ec6c..d6c9f26 100644 (file)
@@ -430,6 +430,8 @@ int ll_extent_unlock(struct ll_file_data *, struct inode *,
 int ll_file_open(struct inode *inode, struct file *file);
 int ll_file_release(struct inode *inode, struct file *file);
 int ll_lsm_getattr(struct obd_export *, struct lov_stripe_md *, struct obdo *);
+int ll_glimpse_ioctl(struct ll_sb_info *sbi, 
+                     struct lov_stripe_md *lsm, lstat_t *st);
 int ll_glimpse_size(struct inode *inode, int ast_flags);
 int ll_local_open(struct file *file,
                   struct lookup_intent *it, struct ll_file_data *fd,
@@ -475,7 +477,7 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr);
 int ll_setattr(struct dentry *de, struct iattr *attr);
 int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
 int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-                       unsigned long maxage);
+                       __u64 max_age);
 void ll_update_inode(struct inode *inode, struct lustre_md *md);
 void ll_read_inode2(struct inode *inode, void *opaque);
 int ll_iocontrol(struct inode *inode, struct file *file,
index 6a7abff..fa33fce 100644 (file)
@@ -69,7 +69,7 @@ struct ll_sb_info *ll_init_sbi(void)
                 sbi->ll_async_page_max = (num_physpages / 4) * 3;
         sbi->ll_ra_info.ra_max_pages = min(num_physpages / 8,
                                            SBI_DEFAULT_READAHEAD_MAX);
-        sbi->ll_ra_info.ra_max_read_ahead_whole_pages = 
+        sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
                                            SBI_DEFAULT_READAHEAD_WHOLE_MAX;
 
         INIT_LIST_HEAD(&sbi->ll_conn_chain);
@@ -140,25 +140,22 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                         CERROR("could not register mount in /proc/lustre");
         }
 
-        /* indicate that inodebits locking is supported by this client */
-        data->ocd_connect_flags |= OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH;
+        /* indicate the features supported by this client */
+        data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
+                                  OBD_CONNECT_ACL | OBD_CONNECT_JOIN |
+                                  OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION;
         data->ocd_ibits_known = MDS_INODELOCK_FULL;
+        data->ocd_version = LUSTRE_VERSION_CODE;
 
         if (sb->s_flags & MS_RDONLY)
                 data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
         if (sbi->ll_flags & LL_SBI_USER_XATTR)
                 data->ocd_connect_flags |= OBD_CONNECT_XATTR;
-        data->ocd_connect_flags |= OBD_CONNECT_ACL | OBD_CONNECT_JOIN | 
-                OBD_CONNECT_ATTRFID;
 
-        if (sbi->ll_flags & LL_SBI_FLOCK) {
+        if (sbi->ll_flags & LL_SBI_FLOCK)
                 sbi->ll_fop = &ll_file_operations_flock;
-        } else {
+        else
                 sbi->ll_fop = &ll_file_operations;
-        }
-
-        data->ocd_connect_flags |= OBD_CONNECT_VERSION;
-        data->ocd_version = LUSTRE_VERSION_CODE;
 
         err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, data);
         if (err == -EBUSY) {
@@ -172,11 +169,12 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         }
         sbi->ll_mdc_exp = class_conn2export(&mdc_conn);
 
-        err = obd_statfs(obd, &osfs, jiffies - HZ);
+        err = obd_statfs(obd, &osfs, get_jiffies_64() - HZ);
         if (err)
                 GOTO(out_mdc, err);
 
-        /* MDC connect is surely finished by now */
+        /* MDC connect is surely finished by now because we actually sent
+         * a statfs RPC, otherwise obd_connect() is asynchronous. */
         *data = class_exp2cliimp(sbi->ll_mdc_exp)->imp_connect_data;
 
         LASSERT(osfs.os_bsize);
@@ -190,7 +188,7 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
         if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
             !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
                 LCONSOLE_INFO("Disabling user_xattr feature because "
-                              "it is not supported on the server\n"); 
+                              "it is not supported on the server\n");
                 sbi->ll_flags &= ~LL_SBI_USER_XATTR;
         }
 
@@ -221,8 +219,8 @@ int client_common_fill_super(struct super_block *sb, char *mdc, char *osc)
                 GOTO(out_mdc, err = -ENODEV);
         }
 
-        data->ocd_connect_flags =
-                OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL;
+        data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
+                                  OBD_CONNECT_REQPORTAL;
 
         CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d "
                "ocd_grant: %d\n", data->ocd_connect_flags,
@@ -352,11 +350,11 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 
         *lmmsize = obd_size_diskmd(sbi->ll_osc_exp, NULL);
         size = sizeof(int);
-        rc = obd_get_info(sbi->ll_mdc_exp, strlen("max_easize"), "max_easize", 
+        rc = obd_get_info(sbi->ll_mdc_exp, strlen("max_easize"), "max_easize",
                           &size, lmmsize);
-        if (rc) 
+        if (rc)
                 CERROR("Get max mdsize error rc %d \n", rc);
-        
+
         RETURN(rc);
 }
 
@@ -766,6 +764,7 @@ static int old_lustre_process_log(struct super_block *sb, char *newprofile,
         if (rc)
                 GOTO(out_cleanup, rc);
 
+        /* If we don't have this then an ACL MDS will refuse the connection */
         ocd.ocd_connect_flags = OBD_CONNECT_ACL;
 
         rc = obd_connect(&mdc_conn, obd, &mdc_uuid, &ocd);
@@ -1069,7 +1068,7 @@ void ll_clear_inode(struct inode *inode)
         LASSERT(!lli->lli_open_fd_write_count);
         LASSERT(!lli->lli_open_fd_read_count);
         LASSERT(!lli->lli_open_fd_exec_count);
-        
+
         if (lli->lli_mds_write_och)
                 ll_mdc_real_close(inode, FMODE_WRITE);
         if (lli->lli_mds_exec_och) {
@@ -1341,7 +1340,7 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
 }
 
 int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
-                       unsigned long max_age)
+                       __u64 max_age)
 {
         struct ll_sb_info *sbi = ll_s2sbi(sb);
         struct obd_statfs obd_osfs;
@@ -1398,14 +1397,18 @@ int ll_statfs(struct super_block *sb, struct kstatfs *sfs)
         /* For now we will always get up-to-date statfs values, but in the
          * future we may allow some amount of caching on the client (e.g.
          * from QOS or lprocfs updates). */
-        rc = ll_statfs_internal(sb, &osfs, jiffies - 1);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - 1);
         if (rc)
                 return rc;
 
         statfs_unpack(sfs, &osfs);
 
-        if (sizeof(sfs->f_blocks) == 4) {
-                while (osfs.os_blocks > ~0UL) {
+        /* We need to downshift for all 32-bit kernels, because we can't
+         * tell if the kernel is being called via sys_statfs64() or not.
+         * Stop before overflowing f_bsize - in which case it is better
+         * to just risk EOVERFLOW if caller is using old sys_statfs(). */
+        if (sizeof(long) < 8) {
+                while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
                         sfs->f_bsize <<= 1;
 
                         osfs.os_blocks >>= 1;
@@ -1457,10 +1460,10 @@ void ll_inode_size_unlock(struct inode *inode, int unlock_lsm)
 static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm)
 {
         struct ll_inode_info *lli = ll_i2info(inode);
+
         dump_lsm(D_INODE, lsm);
-        dump_lsm(D_INODE, lli->lli_smd); 
-        LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN, 
+        dump_lsm(D_INODE, lli->lli_smd);
+        LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN,
                  "lsm must be joined lsm %p\n", lsm);
         obd_free_memmd(ll_i2obdexp(inode), &lli->lli_smd);
         CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n",
@@ -1480,7 +1483,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
         if (lsm != NULL) {
                 if (lli->lli_smd == NULL) {
-                        if (lsm->lsm_magic != LOV_MAGIC && 
+                        if (lsm->lsm_magic != LOV_MAGIC &&
                             lsm->lsm_magic != LOV_MAGIC_JOIN) {
                                 dump_lsm(D_ERROR, lsm);
                                 LBUG();
@@ -1496,7 +1499,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
                 } else {
                         if (lli->lli_smd->lsm_magic == lsm->lsm_magic &&
-                             lli->lli_smd->lsm_stripe_count == 
+                             lli->lli_smd->lsm_stripe_count ==
                                         lsm->lsm_stripe_count) {
                                 if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
                                         CERROR("lsm mismatch for inode %ld\n",
@@ -1507,7 +1510,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
                                         dump_lsm(D_ERROR, lsm);
                                         LBUG();
                                 }
-                        } else 
+                        } else
                                 ll_replace_lsm(inode, lsm);
                 }
                 if (lli->lli_smd != lsm)
@@ -1699,7 +1702,7 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                 oinfo.oi_oa->o_flags = flags;
                 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS;
 
-                obdo_from_inode(oinfo.oi_oa, inode, 
+                obdo_from_inode(oinfo.oi_oa, inode,
                                 OBD_MD_FLFID | OBD_MD_FLGENER);
                 rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
                 obdo_free(oinfo.oi_oa);
@@ -1898,7 +1901,7 @@ int ll_obd_statfs(struct inode *inode, void *arg)
         if (!client_obd)
                 GOTO(out_statfs, rc = -EINVAL);
 
-        rc = obd_statfs(client_obd, &stat_buf, jiffies - 1);
+        rc = obd_statfs(client_obd, &stat_buf, get_jiffies_64() - 1);
         if (rc)
                 GOTO(out_statfs, rc);
 
index 5c7ee25..ddc1847 100644 (file)
@@ -46,7 +46,7 @@ static int ll_rd_blksize(char *page, char **start, off_t off, int count,
         int rc;
 
         LASSERT(sb != NULL);
-        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - HZ);
         if (!rc) {
               *eof = 1;
               rc = snprintf(page, count, "%u\n", osfs.os_bsize);
@@ -63,7 +63,7 @@ static int ll_rd_kbytestotal(char *page, char **start, off_t off, int count,
         int rc;
 
         LASSERT(sb != NULL);
-        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - HZ);
         if (!rc) {
                 __u32 blk_size = osfs.os_bsize >> 10;
                 __u64 result = osfs.os_blocks;
@@ -86,7 +86,7 @@ static int ll_rd_kbytesfree(char *page, char **start, off_t off, int count,
         int rc;
 
         LASSERT(sb != NULL);
-        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - HZ);
         if (!rc) {
                 __u32 blk_size = osfs.os_bsize >> 10;
                 __u64 result = osfs.os_bfree;
@@ -108,7 +108,7 @@ static int ll_rd_kbytesavail(char *page, char **start, off_t off, int count,
         int rc;
 
         LASSERT(sb != NULL);
-        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - HZ);
         if (!rc) {
                 __u32 blk_size = osfs.os_bsize >> 10;
                 __u64 result = osfs.os_bavail;
@@ -130,7 +130,7 @@ static int ll_rd_filestotal(char *page, char **start, off_t off, int count,
         int rc;
 
         LASSERT(sb != NULL);
-        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - HZ);
         if (!rc) {
                  *eof = 1;
                  rc = snprintf(page, count, LPU64"\n", osfs.os_files);
@@ -146,7 +146,7 @@ static int ll_rd_filesfree(char *page, char **start, off_t off, int count,
         int rc;
 
         LASSERT(sb != NULL);
-        rc = ll_statfs_internal(sb, &osfs, jiffies - HZ);
+        rc = ll_statfs_internal(sb, &osfs, get_jiffies_64() - HZ);
         if (!rc) {
                  *eof = 1;
                  rc = snprintf(page, count, LPU64"\n", osfs.os_ffree);
@@ -516,7 +516,7 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         obd = class_name2obd(mdc);
 
         LASSERT(obd != NULL);
-        LASSERT(obd->obd_type != NULL);
+        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
         LASSERT(obd->obd_type->typ_name != NULL);
 
         snprintf(name, MAX_STRING_SIZE, "%s/common_name",
@@ -536,7 +536,7 @@ int lprocfs_register_mountpoint(struct proc_dir_entry *parent,
         obd = class_name2obd(osc);
 
         LASSERT(obd != NULL);
-        LASSERT(obd->obd_type != NULL);
+        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
         LASSERT(obd->obd_type->typ_name != NULL);
 
         snprintf(name, MAX_STRING_SIZE, "%s/common_name",
index 56a4694..3582dbf 100644 (file)
@@ -387,10 +387,23 @@ static void ll_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
         EXIT;
 }
 
+static void ll_ap_update_obdo(void *data, int cmd, struct obdo *oa,
+                              obd_valid valid)
+{
+        struct ll_async_page *llap;
+        ENTRY;
+
+        llap = LLAP_FROM_COOKIE(data);
+        obdo_from_inode(oa, llap->llap_page->mapping->host, valid);
+
+        EXIT;
+}
+
 static struct obd_async_page_ops ll_async_page_ops = {
         .ap_make_ready =        ll_ap_make_ready,
         .ap_refresh_count =     ll_ap_refresh_count,
         .ap_fill_obdo =         ll_ap_fill_obdo,
+        .ap_update_obdo =       ll_ap_update_obdo,
         .ap_completion =        ll_ap_completion,
 };
 
index 668a15c..0885cb8 100755 (executable)
@@ -194,7 +194,7 @@ struct lsm_operations lsm_plain_ops = {
         .lsm_unpackmd           = lsm_unpackmd_plain,
 };
 
-struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, obd_off lov_off)
+struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, __u64 lov_off)
 {
         struct lov_array_info *lai;
         struct lov_extent *le;
@@ -211,7 +211,7 @@ struct lov_extent *lovea_off2le(struct lov_stripe_md *lsm, obd_off lov_off)
                ; /* empty loop */
         }
 
-        CDEBUG(D_INFO, "off "LPU64" idx%d, ext"LPU64":"LPU64"idx%d sc%d\n",
+        CDEBUG(D_INFO, "off "LPU64" idx %d, ext "LPU64":"LPU64" idx %d sc %d\n",
                lov_off, i, le->le_start, le->le_len, le->le_loi_idx,
                le->le_stripe_count);
 
@@ -236,13 +236,12 @@ struct lov_extent *lovea_idx2le(struct lov_stripe_md *lsm, int stripe_no)
                 ; /* empty loop */
         }
 
-        CDEBUG(D_INFO, "stripe %d idx%d, ext"LPU64":"LPU64"idx %d scount%d\n",
+        CDEBUG(D_INFO, "stripe %d idx %d, ext "LPU64":"LPU64" idx %d sc %d\n",
                stripe_no, i, le->le_start, le->le_len, le->le_loi_idx,
                le->le_stripe_count);
         RETURN(le);
 }
 
-
 static void lovea_free_array_info(struct lov_stripe_md *lsm)
 {
         if (!lsm || !lsm->lsm_array)
index 1fcb361..2076e3f 100644 (file)
@@ -174,7 +174,7 @@ static int lov_connect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt,
                 char name[MAX_STRING_SIZE];
 
                 LASSERT(osc_obd != NULL);
-                LASSERT(osc_obd->obd_type != NULL);
+                LASSERT(osc_obd->obd_magic == OBD_DEVICE_MAGIC);
                 LASSERT(osc_obd->obd_type->typ_name != NULL);
                 snprintf(name, MAX_STRING_SIZE, "../../../%s/%s",
                          osc_obd->obd_type->typ_name,
@@ -1049,7 +1049,7 @@ static int lov_destroy(struct obd_export *exp, struct obdo *oa,
                 if (oa->o_valid & OBD_MD_FLCOOKIE)
                         oti->oti_logcookies = set->set_cookies + req->rq_stripe;
 
-                err = obd_destroy(lov->tgts[req->rq_idx].ltd_exp, 
+                err = obd_destroy(lov->tgts[req->rq_idx].ltd_exp,
                                   req->rq_oi.oi_oa, NULL, oti, NULL);
                 err = lov_update_common_set(set, req, err);
                 if (err) {
@@ -1591,6 +1591,14 @@ static void lov_ap_fill_obdo(void *data, int cmd, struct obdo *oa)
         oa->o_stripe_idx = lap->lap_stripe;
 }
 
+static void lov_ap_update_obdo(void *data, int cmd, struct obdo *oa,
+                               obd_valid valid)
+{
+        struct lov_async_page *lap = LAP_FROM_COOKIE(data);
+
+        lap->lap_caller_ops->ap_update_obdo(lap->lap_caller_data, cmd,oa,valid);
+}
+
 static int lov_ap_completion(void *data, int cmd, struct obdo *oa, int rc)
 {
         struct lov_async_page *lap = LAP_FROM_COOKIE(data);
@@ -1606,6 +1614,7 @@ static struct obd_async_page_ops lov_async_page_ops = {
         .ap_make_ready =        lov_ap_make_ready,
         .ap_refresh_count =     lov_ap_refresh_count,
         .ap_fill_obdo =         lov_ap_fill_obdo,
+        .ap_update_obdo =       lov_ap_update_obdo,
         .ap_completion =        lov_ap_completion,
 };
 
@@ -2064,8 +2073,7 @@ static int lov_statfs_interpret(struct ptlrpc_request_set *rqset,
 }
 
 static int lov_statfs_async(struct obd_device *obd, struct obd_info *oinfo,
-                            unsigned long max_age,
-                            struct ptlrpc_request_set *rqset)
+                            cfs_time_t max_age, struct ptlrpc_request_set *rqset)
 {
         struct lov_request_set *set;
         struct lov_request *req;
@@ -2123,8 +2131,8 @@ static int lov_statfs(struct obd_device *obd, struct obd_statfs *osfs,
                         continue;
                 }
 
-                err = obd_statfs(class_exp2obd(lov->tgts[i].ltd_exp), &lov_sfs,
-                                 max_age);
+                err = obd_statfs(class_exp2obd(lov->tgts[i].ltd_exp), 
+                                 &lov_sfs, max_age);
                 if (err) {
                         if (lov->tgts[i].ltd_active && !rc)
                                 rc = err;
index 28ec334..e31dceb 100644 (file)
@@ -1403,7 +1403,7 @@ int lov_fini_statfs(struct obd_device *obd, struct obd_statfs *osfs,int success)
 
                 spin_lock(&obd->obd_osfs_lock);
                 memcpy(&obd->obd_osfs, osfs, sizeof(osfs));
-                obd->obd_osfs_age = jiffies;
+                obd->obd_osfs_age = get_jiffies_64();
                 spin_unlock(&obd->obd_osfs_lock);
                 RETURN(0);
         }
@@ -1435,7 +1435,7 @@ void lov_update_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 {
         spin_lock(&obd->obd_osfs_lock);
         memcpy(&obd->obd_osfs, lov_sfs, sizeof(osfs));
-        obd->obd_osfs_age = jiffies;
+        obd->obd_osfs_age = get_jiffies_64();
         spin_unlock(&obd->obd_osfs_lock);
 
         if (success == 0) {
index acf65da..89422a8 100644 (file)
@@ -533,12 +533,6 @@ static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file,
                 RETURN(-EPERM);
         }
 
-        /* FIXME: Can't do this because of nested transaction deadlock */
-        if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
-                CERROR("can't set data journal flag on file\n");
-                RETURN(-EPERM);
-        }
-
         if (inode->i_fop->ioctl)
                 rc = inode->i_fop->ioctl(inode, file, cmd, arg);
         else
@@ -676,7 +670,7 @@ static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count,
 
                                 CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off);
                                 memset(fake, 0, sizeof(*fake));
-                                fake->rec_len = cpu_to_le32(blksize);
+                                fake->rec_len = cpu_to_le16(blksize);
                         }
                         count -= blksize;
                         buf += blksize;
index f567a53..10b469e 100644 (file)
@@ -878,9 +878,11 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen,
                         RETURN(-EINVAL);
 
                 if (*((int *)val)) {
+                        imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY;
                         imp->imp_connect_data.ocd_connect_flags |=
                                 OBD_CONNECT_RDONLY;
                 } else {
+                        imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY;
                         imp->imp_connect_data.ocd_connect_flags &=
                                 ~OBD_CONNECT_RDONLY;
                 }
@@ -899,6 +901,7 @@ int mdc_set_info_async(struct obd_export *exp, obd_count keylen,
                         rc = ptlrpc_queue_wait(req);
                         ptlrpc_req_finished(req);
                 }
+
                 RETURN(rc);
         }
 
@@ -913,7 +916,7 @@ int mdc_get_info(struct obd_export *exp, __u32 keylen, void *key,
         if (keylen == strlen("max_easize") &&
             memcmp(key, "max_easize", strlen("max_easize")) == 0) {
                 int mdsize, *max_easize;
-                
+
                 if (*vallen != sizeof(int))
                         RETURN(-EINVAL);
                 mdsize = *(int*)val;
index 452aba5..b19582c 100644 (file)
@@ -1017,7 +1017,7 @@ out_ucred:
 }
 
 static int mds_obd_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                          unsigned long max_age)
+                          cfs_time_t max_age)
 {
         int rc;
 
@@ -1050,7 +1050,7 @@ static int mds_statfs(struct ptlrpc_request *req)
         /* We call this so that we can cache a bit - 1 jiffie worth */
         rc = mds_obd_statfs(obd, lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                                 size[REPLY_REC_OFF]),
-                            jiffies - HZ);
+                            cfs_time_current() - HZ);
         if (rc) {
                 CERROR("mds_obd_statfs failed: rc %d\n", rc);
                 GOTO(out, rc);
index 9fd562a..fc38654 100644 (file)
@@ -179,7 +179,7 @@ int mds_cleanup_pending(struct obd_device *obd);
 
 
 /* mds/mds_log.c */
-int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
+int mds_log_op_unlink(struct obd_device *obd,
                       struct lov_mds_md *lmm, int lmm_size,
                       struct llog_cookie *logcookies, int cookies_size);
 int mds_log_op_setattr(struct obd_device *obd, struct inode *inode,
index 3b181b4..a9761d1 100644 (file)
@@ -87,7 +87,7 @@ static int mds_llog_repl_cancel(struct llog_ctxt *ctxt, struct lov_stripe_md *ls
         RETURN(rc);
 }
 
-int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
+int mds_log_op_unlink(struct obd_device *obd, 
                       struct lov_mds_md *lmm, int lmm_size,
                       struct llog_cookie *logcookies, int cookies_size)
 {
index 461a22a..c9be921 100644 (file)
@@ -1306,7 +1306,7 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,struct obd_device *obd,
 
                 if (req != NULL && req->rq_repmsg != NULL &&
                     (reply_body->valid & OBD_MD_FLEASIZE) &&
-                    mds_log_op_unlink(obd, pending_child->d_inode, lmm,
+                    mds_log_op_unlink(obd, lmm,
                              lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF+1),
                              lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF+2, 0),
                              lustre_msg_buflen(req->rq_repmsg, REPLY_REC_OFF+2))
index 146d13e..48e9d2b 100644 (file)
@@ -1678,7 +1678,7 @@ static int mds_reint_unlink(struct mds_update_record *rec, int offset,
                 if (!(body->valid & OBD_MD_FLEASIZE)) {
                         body->valid |=(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                        OBD_MD_FLATIME | OBD_MD_FLMTIME);
-                } else if (mds_log_op_unlink(obd, child_inode,
+                } else if (mds_log_op_unlink(obd,
                                 lustre_msg_buf(req->rq_repmsg, offset + 1, 0),
                                 lustre_msg_buflen(req->rq_repmsg, offset + 1),
                                 lustre_msg_buf(req->rq_repmsg, offset + 2, 0),
@@ -2224,7 +2224,7 @@ no_unlink:
                 if (!(body->valid & OBD_MD_FLEASIZE)) {
                         body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                         OBD_MD_FLATIME | OBD_MD_FLMTIME);
-                } else if (mds_log_op_unlink(obd, new_inode,
+                } else if (mds_log_op_unlink(obd,
                                              lustre_msg_buf(req->rq_repmsg,
                                                             offset + 1, 0),
                                              lustre_msg_buflen(req->rq_repmsg,
index ed4539b..56aeb22 100644 (file)
@@ -45,7 +45,7 @@
 #include "mds_internal.h"
 
 static int mds_osc_destroy_orphan(struct obd_device *obd,
-                                  struct inode *inode,
+                                  umode_t mode,
                                   struct lov_mds_md *lmm,
                                   int lmm_size,
                                   struct llog_cookie *logcookies,
@@ -78,7 +78,7 @@ static int mds_osc_destroy_orphan(struct obd_device *obd,
         if (oa == NULL)
                 GOTO(out_free_memmd, rc = -ENOMEM);
         oa->o_id = lsm->lsm_object_id;
-        oa->o_mode = inode->i_mode & S_IFMT;
+        oa->o_mode = mode & S_IFMT;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
 
         if (log_unlink && logcookies) {
@@ -103,14 +103,16 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
         struct llog_cookie *logcookies = NULL;
         int lmm_size, log_unlink = 0, cookie_size = 0;
         void *handle = NULL;
+        umode_t mode;
         int rc, err;
         ENTRY;
 
         LASSERT(mds->mds_osc_obd != NULL);
-
+        
         /* We don't need to do any of these other things for orhpan dirs,
          * especially not mds_get_md (may get a default LOV EA, bug 4554) */
-        if (S_ISDIR(inode->i_mode)) {
+        mode = inode->i_mode;
+        if (S_ISDIR(mode)) {
                 rc = vfs_rmdir(pending_dir, dchild);
                 if (rc)
                         CERROR("error %d unlinking dir %*s from PENDING\n",
@@ -145,7 +147,7 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
                 OBD_ALLOC(logcookies, cookie_size);
                 if (logcookies == NULL)
                         rc = -ENOMEM;
-                else if (mds_log_op_unlink(obd, inode, lmm,lmm_size,logcookies,
+                else if (mds_log_op_unlink(obd, lmm,lmm_size,logcookies,
                                            cookie_size) > 0)
                         log_unlink = 1;
         }
@@ -156,7 +158,7 @@ static int mds_unlink_orphan(struct obd_device *obd, struct dentry *dchild,
                 if (!rc)
                         rc = err;
         } else if (!rc) {
-                rc = mds_osc_destroy_orphan(obd, inode, lmm, lmm_size,
+                rc = mds_osc_destroy_orphan(obd, mode, lmm, lmm_size,
                                             logcookies, log_unlink);
         }
 
index 297824f..1e80cb6 100644 (file)
@@ -51,9 +51,9 @@
 atomic_t libcfs_kmemory = {0};
 #endif
 
-struct obd_device obd_dev[MAX_OBD_DEVICES];
+struct obd_device *obd_devs[MAX_OBD_DEVICES];
 struct list_head obd_types;
-spinlock_t obd_dev_lock;
+spinlock_t obd_dev_lock = SPIN_LOCK_UNLOCKED;
 #ifndef __KERNEL__
 atomic_t obd_memory;
 int obd_memmax;
@@ -113,7 +113,7 @@ static inline void obd_conn2data(struct obd_ioctl_data *data,
         data->ioc_cookie = conn->cookie;
 }
 
-int class_resolve_dev_name(uint32_t len, char *name)
+int class_resolve_dev_name(uint32_t len, const char *name)
 {
         int rc;
         int dev;
@@ -159,7 +159,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                 return 0;
         }
 
-        CDEBUG(D_IOCTL, "cmd = %x, obd = %p\n", cmd, obd);
+        CDEBUG(D_IOCTL, "cmd = %x\n", cmd);
         if (obd_ioctl_getdata(&buf, &len, (void *)arg)) {
                 CERROR("OBD ioctl: data error\n");
                 GOTO(out, err = -EINVAL);
@@ -283,7 +283,7 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
                                 
                 if (index >= MAX_OBD_DEVICES)
                         GOTO(out, err = -ENOENT);
-                obd = &obd_dev[index];
+                obd = obd_devs[index];
                 if (!obd->obd_type)
                         GOTO(out, err = -ENOENT);
                 
@@ -307,12 +307,19 @@ int class_handle_ioctl(unsigned int cmd, unsigned long arg)
 
         }
 
-        if (data->ioc_dev >= MAX_OBD_DEVICES) {
+        if (data->ioc_dev >= class_devno_max()) {
                 CERROR("OBD ioctl: No device\n");
                 GOTO(out, err = -EINVAL);
         }
-        obd = &obd_dev[data->ioc_dev];
-        if (!(obd && obd->obd_set_up) || obd->obd_stopping) {
+
+        obd = class_num2obd(data->ioc_dev);
+        if (obd == NULL) {
+                CERROR("OBD ioctl : No Device %d\n", data->ioc_dev);
+                GOTO(out, err = -EINVAL);
+        }
+        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
+
+        if (!obd->obd_set_up || obd->obd_stopping) {
                 CERROR("OBD ioctl: device not setup %d \n", data->ioc_dev);
                 GOTO(out, err = -EINVAL);
         }
@@ -371,7 +378,7 @@ extern cfs_psdev_t obd_psdev;
 void *obd_psdev = NULL;
 #endif
 
-EXPORT_SYMBOL(obd_dev);
+EXPORT_SYMBOL(obd_devs);
 EXPORT_SYMBOL(obd_fail_loc);
 EXPORT_SYMBOL(obd_print_fail_loc);
 EXPORT_SYMBOL(obd_race_waitq);
@@ -386,14 +393,12 @@ EXPORT_SYMBOL(proc_lustre_root);
 
 EXPORT_SYMBOL(class_register_type);
 EXPORT_SYMBOL(class_unregister_type);
-EXPORT_SYMBOL(class_search_type);
 EXPORT_SYMBOL(class_get_type);
 EXPORT_SYMBOL(class_put_type);
 EXPORT_SYMBOL(class_name2dev);
 EXPORT_SYMBOL(class_name2obd);
 EXPORT_SYMBOL(class_uuid2dev);
 EXPORT_SYMBOL(class_uuid2obd);
-EXPORT_SYMBOL(class_obd_list);
 EXPORT_SYMBOL(class_find_client_obd);
 EXPORT_SYMBOL(class_find_client_notype);
 EXPORT_SYMBOL(class_devices_in_group);
@@ -403,6 +408,7 @@ EXPORT_SYMBOL(class_conn2obd);
 EXPORT_SYMBOL(class_exp2cliimp);
 EXPORT_SYMBOL(class_conn2cliimp);
 EXPORT_SYMBOL(class_disconnect);
+EXPORT_SYMBOL(class_num2obd);
 
 /* uuid.c */
 EXPORT_SYMBOL(class_generate_random_uuid);
@@ -513,7 +519,6 @@ int init_obdclass(void)
 #endif
 {
         int i, err;
-        struct obd_device *obd;
 #ifdef __KERNEL__
         int lustre_register_fs(void);
 
@@ -547,8 +552,8 @@ int init_obdclass(void)
         }
 
         /* This struct is already zerod for us (static global) */
-        for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++)
-                obd->obd_minor = i;
+        for (i = 0; i < class_devno_max(); i++)
+                obd_devs[i] = NULL;
 
         err = obd_init_caches();
         if (err)
@@ -573,11 +578,12 @@ static void cleanup_obdclass(void)
         lustre_unregister_fs();
 
         cfs_psdev_deregister(&obd_psdev);
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_type && obd->obd_set_up &&
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd->obd_set_up &&
                     OBT(obd) && OBP(obd, detach)) {
                         /* XXX should this call generic detach otherwise? */
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
                         OBP(obd, detach)(obd);
                 }
         }
index 9808155..e750395 100644 (file)
@@ -36,6 +36,7 @@
 extern struct list_head obd_types;
 spinlock_t obd_types_lock;
 
+cfs_mem_cache_t *obd_device_cachep;
 cfs_mem_cache_t *obdo_cachep;
 EXPORT_SYMBOL(obdo_cachep);
 cfs_mem_cache_t *import_cachep;
@@ -46,7 +47,28 @@ int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
  * support functions: we could use inter-module communication, but this
  * is more portable to other OS's
  */
-struct obd_type *class_search_type(char *name)
+static struct obd_device *obd_device_alloc(void)
+{
+        struct obd_device *obd;
+
+        OBD_SLAB_ALLOC(obd, obd_device_cachep, SLAB_KERNEL, sizeof(*obd));
+        if (obd != NULL) {
+                obd->obd_magic = OBD_DEVICE_MAGIC;
+        }
+        return obd;
+}
+EXPORT_SYMBOL(obd_device_alloc);
+
+static void obd_device_free(struct obd_device *obd)
+{
+        LASSERT(obd != NULL);
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n", 
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+        OBD_SLAB_FREE(obd, obd_device_cachep, sizeof(*obd));
+}
+EXPORT_SYMBOL(obd_device_free);
+
+struct obd_type *class_search_type(const char *name)
 {
         struct list_head *tmp;
         struct obd_type *type;
@@ -63,13 +85,13 @@ struct obd_type *class_search_type(char *name)
         return NULL;
 }
 
-struct obd_type *class_get_type(char *name)
+struct obd_type *class_get_type(const char *name)
 {
         struct obd_type *type = class_search_type(name);
 
 #ifdef CONFIG_KMOD
         if (!type) {
-                char *modname = name;
+                const char *modname = name;
                 if (strcmp(modname, LUSTRE_MDT_NAME) == 0) 
                         modname = LUSTRE_MDS_NAME;
                 if (!request_module(modname)) {
@@ -80,19 +102,26 @@ struct obd_type *class_get_type(char *name)
                 }
         }
 #endif
-        if (type)
+        if (type) {
+                spin_lock(&type->obd_type_lock);
+                type->typ_refcnt++;
                 try_module_get(type->typ_ops->o_owner);
+                spin_unlock(&type->obd_type_lock);
+        }
         return type;
 }
 
 void class_put_type(struct obd_type *type)
 {
         LASSERT(type);
+        spin_lock(&type->obd_type_lock);
+        type->typ_refcnt--;
         module_put(type->typ_ops->o_owner);
+        spin_unlock(&type->obd_type_lock);
 }
 
 int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
-                        char *name)
+                        const char *name)
 {
         struct obd_type *type;
         int rc = 0;
@@ -117,6 +146,7 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
 
         *(type->typ_ops) = *ops;
         strcpy(type->typ_name, name);
+        spin_lock_init(&type->obd_type_lock);
 
 #ifdef LPROCFS
         type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
@@ -143,7 +173,7 @@ int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
         RETURN(rc);
 }
 
-int class_unregister_type(char *name)
+int class_unregister_type(const char *name)
 {
         struct obd_type *type = class_search_type(name);
         ENTRY;
@@ -176,51 +206,93 @@ int class_unregister_type(char *name)
         RETURN(0);
 } /* class_unregister_type */
 
-struct obd_device *class_newdev(struct obd_type *type, char *name)
+struct obd_device *class_newdev(const char *type_name, const char *name)
 {
         struct obd_device *result = NULL;
+        struct obd_type *type = NULL;
         int i;
+        int new_obd_minor = 0;
+
+        if (strlen(name) > MAX_OBD_NAME) {
+                CERROR("name/uuid must be < %u bytes long\n",MAX_OBD_NAME);
+                RETURN(ERR_PTR(-EINVAL));
+        }
+
+        type = class_get_type(type_name); 
+        if (type == NULL){
+                CERROR("OBD: unknown type: %s\n", type_name);
+                RETURN(ERR_PTR(-ENODEV));
+        }
 
         spin_lock(&obd_dev_lock);
-        for (i = 0 ; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_name && (strcmp(name, obd->obd_name) == 0)) {
+        for (i = 0 ; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd->obd_name && (strcmp(name, obd->obd_name) == 0)) {
                         CERROR("Device %s already exists, won't add\n", name);
                         if (result) {
-                                result->obd_type = NULL;
-                                result->obd_name = NULL;
-                                result = NULL;
+                                LASSERTF(result->obd_magic == OBD_DEVICE_MAGIC,
+                                         "%p obd_magic %08x != %08x\n",
+                                         result, result->obd_magic, OBD_DEVICE_MAGIC);
+                                LASSERTF(result->obd_minor == new_obd_minor,
+                                         "%p obd_minor %d != %d\n",
+                                         result, result->obd_minor, new_obd_minor);
+
+                                obd_devs[result->obd_minor] = NULL;
+                                result->obd_name[0]='\0';
+                                obd_device_free(result);
                         }
+                        result = ERR_PTR(-EEXIST);
                         break;
                 }
-                if (!result && !obd->obd_type) {
-                        LASSERT(obd->obd_minor == i);
-                        memset(obd, 0, sizeof(*obd));
+                if (!result && !obd) {
+                        obd = obd_device_alloc();
+
+                        if(obd == NULL)
+                                GOTO(out,result = ERR_PTR(-ENOMEM));
+
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
                         obd->obd_minor = i;
+                        new_obd_minor = i;
                         obd->obd_type = type;
-                        obd->obd_name = name;
+                        memcpy(obd->obd_name, name, strlen(name));
+
                         CDEBUG(D_IOCTL, "Adding new device %s (%p)\n",
                                obd->obd_name, obd);
                         result = obd;
+                        obd_devs[i] = result;
+                        obd = NULL;
                 }
         }
         spin_unlock(&obd_dev_lock);
+out :
+        if (IS_ERR(result)) {
+                class_put_type(type);
+        }
         return result;
 }
 
 void class_release_dev(struct obd_device *obd)
 {
-        int minor = obd->obd_minor;
+        struct obd_type *obd_type = obd->obd_type;
+
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "%p obd_magic %08x != %08x\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+        LASSERTF(obd == obd_devs[obd->obd_minor], "obd %p != obd_devs[%d] %p\n",
+                 obd, obd->obd_minor, obd_devs[obd->obd_minor]);
+        LASSERT(obd_type != NULL);
+
+        CDEBUG(D_INFO, "Release obd device %s obd_type name =%s\n",
+               obd->obd_name,obd->obd_type->typ_name);
 
         spin_lock(&obd_dev_lock);
-        memset(obd, 0x5a, sizeof(*obd));
-        obd->obd_type = NULL;
-        obd->obd_minor = minor;
-        obd->obd_name = NULL;
+        obd_devs[obd->obd_minor] = NULL;
+        obd_device_free(obd);
         spin_unlock(&obd_dev_lock);
+
+        class_put_type(obd_type);
 }
 
-int class_name2dev(char *name)
+int class_name2dev(const char *name)
 {
         int i;
 
@@ -228,11 +300,12 @@ int class_name2dev(char *name)
                 return -1;
 
         spin_lock(&obd_dev_lock);
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_name && strcmp(name, obd->obd_name) == 0) {
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd->obd_name && strcmp(name, obd->obd_name) == 0) {
                         /* Make sure we finished attaching before we give
                            out any references */
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
                         if (obd->obd_attached) {
                                 spin_unlock(&obd_dev_lock);
                                 return i;
@@ -245,12 +318,13 @@ int class_name2dev(char *name)
         return -1;
 }
 
-struct obd_device *class_name2obd(char *name)
+struct obd_device *class_name2obd(const char *name)
 {
         int dev = class_name2dev(name);
-        if (dev < 0)
+
+        if (dev < 0 || dev > class_devno_max())
                 return NULL;
-        return &obd_dev[dev];
+        return class_num2obd(dev);
 }
 
 int class_uuid2dev(struct obd_uuid *uuid)
@@ -258,9 +332,10 @@ int class_uuid2dev(struct obd_uuid *uuid)
         int i;
 
         spin_lock(&obd_dev_lock);
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd_uuid_equals(uuid, &obd->obd_uuid)) {
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd && obd_uuid_equals(uuid, &obd->obd_uuid)) {
+                        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
                         spin_unlock(&obd_dev_lock);
                         return i;
                 }
@@ -275,7 +350,28 @@ struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
         int dev = class_uuid2dev(uuid);
         if (dev < 0)
                 return NULL;
-        return &obd_dev[dev];
+        return class_num2obd(dev);
+}
+
+struct obd_device *class_num2obd(int num)
+{
+        struct obd_device *obd = NULL;
+
+        if (num < class_devno_max()) {
+                obd = obd_devs[num];
+                if (obd == NULL) {
+                        return NULL;
+                }
+
+                LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC,
+                         "%p obd_magic %08x != %08x\n",
+                         obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+                LASSERTF(obd->obd_minor == num,
+                         "%p obd_minor %0d != %0d\n",
+                         obd, obd->obd_minor, num);
+        }
+
+        return obd;
 }
 
 void class_obd_list(void)
@@ -284,9 +380,9 @@ void class_obd_list(void)
         int i;
 
         spin_lock(&obd_dev_lock);
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_type == NULL)
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd == NULL)
                         continue;
                 if (obd->obd_stopping)
                         status = "ST";
@@ -309,15 +405,15 @@ void class_obd_list(void)
    specified, then only the client with that uuid is returned,
    otherwise any client connected to the tgt is returned. */
 struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
-                                          char * typ_name,
+                                          const char * typ_name,
                                           struct obd_uuid *grp_uuid)
 {
         int i;
 
         spin_lock(&obd_dev_lock);
-        for (i = 0; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_type == NULL)
+        for (i = 0; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd == NULL)
                         continue;
                 if ((strncmp(obd->obd_type->typ_name, typ_name,
                              strlen(typ_name)) == 0)) {
@@ -357,15 +453,15 @@ struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
 
         if (next == NULL)
                 i = 0;
-        else if (*next >= 0 && *next < MAX_OBD_DEVICES)
+        else if (*next >= 0 && *next < class_devno_max())
                 i = *next;
         else
                 return NULL;
 
         spin_lock(&obd_dev_lock);
-        for (; i < MAX_OBD_DEVICES; i++) {
-                struct obd_device *obd = &obd_dev[i];
-                if (obd->obd_type == NULL)
+        for (; i < class_devno_max(); i++) {
+                struct obd_device *obd = class_num2obd(i);
+                if (obd == NULL)
                         continue;
                 if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
                         if (next != NULL)
@@ -385,6 +481,11 @@ void obd_cleanup_caches(void)
         int rc;
 
         ENTRY;
+        if (obd_device_cachep) {
+                rc = cfs_mem_cache_destroy(obd_device_cachep);
+                LASSERTF(rc == 0, "Cannot destropy ll_obd_device_cache: rc %d\n", rc);
+                obd_device_cachep = NULL;
+        }
         if (obdo_cachep) {
                 rc = cfs_mem_cache_destroy(obdo_cachep);
                 LASSERTF(rc == 0, "Cannot destory ll_obdo_cache\n");
@@ -402,6 +503,12 @@ int obd_init_caches(void)
 {
         ENTRY;
 
+        LASSERT(obd_device_cachep == NULL);
+        obd_device_cachep = cfs_mem_cache_create("ll_obd_dev_cache",
+                                              sizeof(struct obd_device), 0, 0);
+        if (!obd_device_cachep)
+                GOTO(out, -ENOMEM);
+
         LASSERT(obdo_cachep == NULL);
         obdo_cachep = cfs_mem_cache_create("ll_obdo_cache", sizeof(struct obdo),
                                         0, 0);
index 517035c..46a2207 100644 (file)
@@ -254,7 +254,7 @@ static int obd_proc_read_health(char *page, char **start, off_t off,
         for (i = 0; i < MAX_OBD_DEVICES; i++) {
                 struct obd_device *obd;
 
-                obd = &obd_dev[i];
+                obd = obd_devs[i];
                 if (obd->obd_type == NULL)
                         continue;
 
@@ -320,7 +320,7 @@ static void *obd_device_list_seq_start(struct seq_file *p, loff_t*pos)
 {
         if (*pos >= MAX_OBD_DEVICES)
                 return NULL;
-        return &obd_dev[*pos];
+        return obd_devs[*pos];
 }
 
 static void obd_device_list_seq_stop(struct seq_file *p, void *v)
@@ -332,13 +332,13 @@ static void *obd_device_list_seq_next(struct seq_file *p, void *v, loff_t *pos)
         ++*pos;
         if (*pos >= MAX_OBD_DEVICES)
                 return NULL;
-        return &obd_dev[*pos];
+        return obd_devs[*pos];
 }
 
 static int obd_device_list_seq_show(struct seq_file *p, void *v)
 {
         struct obd_device *obd = (struct obd_device *)v;
-        int index = obd - &obd_dev[0];
+        int index = obd - obd_devs[0];
         char *status;
 
         if (!obd->obd_type)
index 40767ee..1ed08ae 100644 (file)
@@ -416,7 +416,7 @@ int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list)
         int rc = 0;
 
         LASSERT(obd != NULL);
-        LASSERT(obd->obd_type != NULL);
+        LASSERT(obd->obd_magic == OBD_DEVICE_MAGIC);
         LASSERT(obd->obd_type->typ_procroot != NULL);
 
         obd->obd_proc_entry = lprocfs_register(obd->obd_name,
@@ -646,8 +646,8 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LASSERT(obd->obd_proc_entry != NULL);
         LASSERT(obd->obd_cntr_base == 0);
 
-        num_stats = 1 + OBD_COUNTER_OFFSET(quotactl) +
-                num_private_stats;
+        num_stats = (sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
+                num_private_stats - 1 /* o_owner */;
         stats = lprocfs_alloc_stats(num_stats);
         if (stats == NULL)
                 return -ENOMEM;
@@ -715,6 +715,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, ping);
 
         for (i = num_private_stats; i < num_stats; i++) {
                 /* If this LBUGs, it is likely that an obd
@@ -722,12 +723,9 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
                  * <obd.h>, and that the corresponding line item
                  * LPROCFS_OBD_OP_INIT(.., .., opname)
                  * is missing from the list above. */
-                if (stats->ls_percpu[0]->lp_cntr[i].lc_name == NULL) {
-                        CERROR("Missing obd_stat initializer obd_op "
-                               "operation at offset %d. Aborting.\n",
-                               i - num_private_stats);
-                        LBUG();
-                }
+                LASSERTF(stats->ls_percpu[0]->lp_cntr[i].lc_name != NULL,
+                         "Missing obd_stat initializer obd_op "
+                         "operation at offset %d.\n", i - num_private_stats);
         }
         rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats);
         if (rc < 0) {
index 0f7634f..6ea6808 100644 (file)
@@ -118,10 +118,9 @@ EXPORT_SYMBOL(class_parse_nid);
  */
 int class_attach(struct lustre_cfg *lcfg)
 {
-        struct obd_type *type;
         struct obd_device *obd = NULL;
-        char *typename, *name, *namecopy, *uuid;
-        int rc, len, cleanup_phase = 0;
+        char *typename, *name, *uuid;
+        int rc, len;
         ENTRY;
 
         if (!LUSTRE_CFG_BUFLEN(lcfg, 1)) {
@@ -144,10 +143,10 @@ int class_attach(struct lustre_cfg *lcfg)
 
         CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n",
                MKSTR(typename), MKSTR(name), MKSTR(uuid));
-        
+
         /* Mountconf transitional hack, should go away after 1.6.
-           1.4.7 uses the old names, so translate back if the 
-           mountconf flag is set. 
+           1.4.7 uses the old names, so translate back if the
+           mountconf flag is set.
            1.6 should set this flag, and translate the other way here
            if not set. */
         if (lcfg->lcfg_flags & LCFG_FLG_MOUNTCONF){
@@ -165,33 +164,29 @@ int class_attach(struct lustre_cfg *lcfg)
                 }
         }
 
-        /* find the type */
-        type = class_get_type(typename);
-        if (!type) {
-                CERROR("OBD: unknown type: %s\n", typename);
-                RETURN(-ENODEV);
-        }
-        cleanup_phase = 1;  /* class_put_type */
-
-        len = strlen(name) + 1;
-        OBD_ALLOC(namecopy, len);
-        if (!namecopy)
-                GOTO(out, rc = -ENOMEM);
-        memcpy(namecopy, name, len);
-        cleanup_phase = 2; /* free obd_name */
-
-        obd = class_newdev(type, namecopy);
-        if (obd == NULL) {
+        obd = class_newdev(typename, name);
+        if (IS_ERR(obd)) {
                 /* Already exists or out of obds */
-                CERROR("Can't create device %s\n", name);
-                GOTO(out, rc = -EEXIST);
+                rc = PTR_ERR(obd);
+                obd = NULL;
+                CERROR("Cannot create device %s of type %s : %d\n",
+                       name, typename, rc);
+                GOTO(out, rc);
         }
-        cleanup_phase = 3;  /* class_release_dev */
+        LASSERTF(obd != NULL, "Cannot get obd device %s of type %s\n",
+                 name, typename);
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, 
+                 "obd %p obd_magic %08X != %08X\n",
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
+        LASSERTF(strncmp(obd->obd_name, name, strlen(name)) == 0, "%p obd_name %s != %s\n",
+                 obd, obd->obd_name, name);
 
         CFS_INIT_LIST_HEAD(&obd->obd_exports);
         CFS_INIT_LIST_HEAD(&obd->obd_exports_timed);
         spin_lock_init(&obd->obd_dev_lock);
         spin_lock_init(&obd->obd_osfs_lock);
+        /* obd->obd_osfs_age must be set to a value in the distant
+         * past to guarantee a fresh statfs is fetched on mount. */
         obd->obd_osfs_age = cfs_time_shift(-1000);
 
         /* XXX belongs in setup not attach  */
@@ -221,21 +216,17 @@ int class_attach(struct lustre_cfg *lcfg)
         }
 
         /* Detach drops this */
+        spin_lock(&obd->obd_dev_lock);
         atomic_set(&obd->obd_refcount, 1);
+        spin_unlock(&obd->obd_dev_lock);
 
         obd->obd_attached = 1;
-        type->typ_refcnt++;
         CDEBUG(D_IOCTL, "OBD: dev %d attached type %s with refcount %d\n",
                obd->obd_minor, typename, atomic_read(&obd->obd_refcount));
         RETURN(0);
  out:
-        switch (cleanup_phase) {
-        case 3:
+        if (obd != NULL) {
                 class_release_dev(obd);
-        case 2:
-                OBD_FREE(namecopy, strlen(namecopy) + 1);
-        case 1:
-                class_put_type(type);
         }
         return rc;
 }
@@ -246,7 +237,11 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         struct obd_export *exp;
         ENTRY;
 
-        LASSERT(obd == (obd_dev + obd->obd_minor));
+        LASSERT(obd != NULL);
+        LASSERTF(obd == class_num2obd(obd->obd_minor), "obd %p != obd_devs[%d] %p\n", 
+                 obd, obd->obd_minor, class_num2obd(obd->obd_minor));
+        LASSERTF(obd->obd_magic == OBD_DEVICE_MAGIC, "obd %p obd_magic %08x != %08x\n", 
+                 obd, obd->obd_magic, OBD_DEVICE_MAGIC);
 
         /* have we attached a type to this device? */
         if (!obd->obd_attached) {
@@ -284,7 +279,6 @@ int class_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
         if (err)
                 GOTO(err_exp, err);
 
-        obd->obd_type->typ_refcnt++;
         obd->obd_set_up = 1;
         spin_lock(&obd->obd_dev_lock);
         /* cleanup drops this */
@@ -304,32 +298,6 @@ err_exp:
         RETURN(err);
 }
 
-static int __class_detach(struct obd_device *obd)
-{
-        int err = 0;
-        ENTRY;
-
-        CDEBUG(D_CONFIG, "destroying obd %d (%s)\n",
-               obd->obd_minor, obd->obd_name);
-
-        if (OBP(obd, detach))
-                err = OBP(obd,detach)(obd);
-
-        if (obd->obd_name) {
-                OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1);
-                obd->obd_name = NULL;
-        } else {
-                CERROR("device %d: no name at detach\n", obd->obd_minor);
-        }
-
-        LASSERT(OBT(obd));
-        /* Attach took type refcount */
-        obd->obd_type->typ_refcnt--;
-        class_put_type(obd->obd_type);
-        class_release_dev(obd);
-        RETURN(err);
-}
-
 int class_detach(struct obd_device *obd, struct lustre_cfg *lcfg)
 {
         ENTRY;
@@ -455,7 +423,6 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
         class_decref(obd);
         obd->obd_set_up = 0;
-        obd->obd_type->typ_refcnt--;
 
         RETURN(0);
 out:
@@ -515,9 +482,12 @@ void class_decref(struct obd_device *obd)
                                 CERROR("Cleanup %s returned %d\n",
                                        obd->obd_name, err);
                 }
-                err = __class_detach(obd);
-                if (err)
-                        CERROR("Detach returned %d\n", err);
+                if (OBP(obd, detach)) {
+                        err = OBP(obd,detach)(obd);
+                        if (err)
+                                CERROR("Detach returned %d\n", err);
+                }
+                class_release_dev(obd);
         }
 }
 
index e9f84fb..dfa6300 100644 (file)
 #include <lustre_disk.h>
 #include <lustre_quota.h>
 #include <lustre_ver.h>
+#include <linux/slab.h>
 
 #include "filter_internal.h"
 
 static struct lvfs_callback_ops filter_lvfs_ops;
+kmem_cache_t *ll_fmd_cachep;
 
 static void filter_commit_cb(struct obd_device *obd, __u64 transno,
                              void *cb_data, int error)
@@ -299,9 +301,181 @@ free:
         return 0;
 }
 
+/* drop fmd reference, free it if last ref. must be called with fed_lock held.*/
+static inline void filter_fmd_put_nolock(struct filter_export_data *fed,
+                                         struct filter_mod_data *fmd)
+{
+        LASSERT_SPIN_LOCKED(&fed->fed_lock);
+        if (--fmd->fmd_refcount == 0) {
+                /* XXX when we have persistent reservations and the handle
+                 * is stored herein we need to drop it here. */
+                fed->fed_mod_count--;
+                list_del(&fmd->fmd_list);
+                OBD_SLAB_FREE(fmd, ll_fmd_cachep, sizeof(*fmd));
+        }
+}
+
+/* drop fmd reference, free it if last ref */
+void filter_fmd_put(struct obd_export *exp, struct filter_mod_data *fmd)
+{
+        struct filter_export_data *fed;
+
+        if (fmd == NULL)
+                return;
+
+        fed = &exp->exp_filter_data;
+        spin_lock(&fed->fed_lock);
+        filter_fmd_put_nolock(fed, fmd); /* caller reference */
+        spin_unlock(&fed->fed_lock);
+}
+
+/* expire entries from the end of the list if there are too many
+ * or they are too old */
+static void filter_fmd_expire_nolock(struct filter_obd *filter,
+                                     struct filter_export_data *fed,
+                                     struct filter_mod_data *keep)
+{
+        struct filter_mod_data *fmd, *tmp;
+
+        list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) {
+                if (fmd == keep)
+                        break;
+
+                if (time_before(jiffies, fmd->fmd_expire) &&
+                    fed->fed_mod_count < filter->fo_fmd_max_num)
+                        break;
+
+                list_del_init(&fmd->fmd_list);
+                filter_fmd_put_nolock(fed, fmd); /* list reference */
+        }
+}
+
+void filter_fmd_expire(struct obd_export *exp)
+{
+        spin_lock(&exp->exp_filter_data.fed_lock);
+        filter_fmd_expire_nolock(&exp->exp_obd->u.filter,
+                                 &exp->exp_filter_data, NULL);
+        spin_unlock(&exp->exp_filter_data.fed_lock);
+}
+
+/* find specified objid, group in export fmd list.
+ * caller must hold fed_lock and take fmd reference itself */
+static struct filter_mod_data *filter_fmd_find_nolock(struct filter_obd *filter,
+                                                struct filter_export_data *fed,
+                                                obd_id objid, obd_gr group)
+{
+        struct filter_mod_data *found = NULL, *fmd;
+
+        LASSERT_SPIN_LOCKED(&fed->fed_lock);
+
+        list_for_each_entry_reverse(fmd, &fed->fed_mod_list, fmd_list) {
+                if (fmd->fmd_id == objid && fmd->fmd_gr == group) {
+                        found = fmd;
+                        list_del(&fmd->fmd_list);
+                        list_add_tail(&fmd->fmd_list, &fed->fed_mod_list);
+                        fmd->fmd_expire = jiffies + filter->fo_fmd_max_age;
+                        break;
+                }
+        }
+
+        filter_fmd_expire_nolock(filter, fed, found);
+
+        return found;
+}
+
+/* Find fmd based on objid and group, or return NULL if not found. */
+struct filter_mod_data *filter_fmd_find(struct obd_export *exp,
+                                        obd_id objid, obd_gr group)
+{
+        struct filter_mod_data *fmd;
+
+        spin_lock(&exp->exp_filter_data.fed_lock);
+        fmd = filter_fmd_find_nolock(&exp->exp_obd->u.filter,
+                                     &exp->exp_filter_data, objid, group);
+        if (fmd)
+                fmd->fmd_refcount++;    /* caller reference */
+        spin_unlock(&exp->exp_filter_data.fed_lock);
+
+        return fmd;
+}
+
+/* Find fmd based on objid and group, or create a new one if none is found.
+ * It is possible for this function to return NULL under memory pressure,
+ * or if objid = 0 is passed (which will only cause old entries to expire).
+ * Currently this is not fatal because any fmd state is transient and
+ * may also be freed when it gets sufficiently old. */
+struct filter_mod_data *filter_fmd_get(struct obd_export *exp,
+                                       obd_id objid, obd_gr group)
+{
+        struct filter_export_data *fed = &exp->exp_filter_data;
+        struct filter_mod_data *found = NULL, *fmd_new = NULL;
+
+        OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, SLAB_NOFS, sizeof(*fmd_new));
+
+        spin_lock(&fed->fed_lock);
+        found = filter_fmd_find_nolock(&exp->exp_obd->u.filter,fed,objid,group);
+        if (fmd_new) {
+                if (found == NULL) {
+                        list_add_tail(&fmd_new->fmd_list, &fed->fed_mod_list);
+                        fmd_new->fmd_id = objid;
+                        fmd_new->fmd_gr = group;
+                        fmd_new->fmd_refcount++;   /* list reference */
+                        found = fmd_new;
+                        fed->fed_mod_count++;
+                } else {
+                        OBD_SLAB_FREE(fmd_new, ll_fmd_cachep, sizeof(*fmd_new));
+                }
+        }
+        if (found) {
+                found->fmd_refcount++;          /* caller reference */
+                found->fmd_expire = jiffies +
+                        exp->exp_obd->u.filter.fo_fmd_max_age;
+        }
+
+        spin_unlock(&fed->fed_lock);
+
+        return found;
+}
+
+#if DO_FMD_DROP
+/* drop fmd list reference so it will disappear when last reference is put.
+ * This isn't so critical because it would in fact only affect the one client
+ * that is doing the unlink and at worst we have an stale entry referencing
+ * an object that should never be used again. */
+static void filter_fmd_drop(struct obd_export *exp, obd_id objid, obd_gr group)
+{
+        struct filter_mod_data *found = NULL;
+
+        spin_lock(&exp->exp_filter_data.fed_lock);
+        found = filter_fmd_find_nolock(&exp->exp_filter_data, objid, group);
+        if (found) {
+                list_del_init(&found->fmd_list);
+                filter_fmd_put_nolock(&exp->exp_filter_data, found);
+        }
+        spin_unlock(&exp->exp_filter_data.fed_lock);
+}
+#else
+#define filter_fmd_drop(exp, objid, group)
+#endif
+
+/* remove all entries from fmd list */
+static void filter_fmd_cleanup(struct obd_export *exp)
+{
+        struct filter_export_data *fed = &exp->exp_filter_data;
+        struct filter_mod_data *fmd = NULL, *tmp;
+
+        spin_lock(&fed->fed_lock);
+        list_for_each_entry_safe(fmd, tmp, &fed->fed_mod_list, fmd_list) {
+                list_del_init(&fmd->fmd_list);
+                filter_fmd_put_nolock(fed, fmd);
+        }
+        spin_unlock(&fed->fed_lock);
+}
+
 static int filter_init_export(struct obd_export *exp)
 {
         spin_lock_init(&exp->exp_filter_data.fed_lock);
+        INIT_LIST_HEAD(&exp->exp_filter_data.fed_mod_list);
         exp->exp_connecting = 1;
 
         return 0;
@@ -885,33 +1059,21 @@ static void filter_post(struct obd_device *obd)
         pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
 }
 
-static void filter_set_last_id(struct filter_obd *filter, struct obdo *oa,
-                               obd_id id)
+static void filter_set_last_id(struct filter_obd *filter,
+                               obd_id id, obd_gr group)
 {
-        obd_gr group = 0;
         LASSERT(filter->fo_fsd != NULL);
 
-        if (oa != NULL) {
-                LASSERT(oa->o_gr <= FILTER_GROUPS);
-                group = oa->o_gr;
-        }
-
         spin_lock(&filter->fo_objidlock);
         filter->fo_last_objids[group] = id;
         spin_unlock(&filter->fo_objidlock);
 }
 
-__u64 filter_last_id(struct filter_obd *filter, struct obdo *oa)
+__u64 filter_last_id(struct filter_obd *filter, obd_gr group)
 {
         obd_id id;
-        obd_gr group = 0;
         LASSERT(filter->fo_fsd != NULL);
 
-        if (oa != NULL) {
-                LASSERT(oa->o_gr <= FILTER_GROUPS);
-                group = oa->o_gr;
-        }
-
         /* FIXME: object groups */
         spin_lock(&filter->fo_objidlock);
         id = filter->fo_last_objids[group];
@@ -1101,7 +1263,7 @@ out:
  * Caller must hold child i_mutex, we drop it always.
  * Caller is also required to ensure that dchild->d_inode exists. */
 static int filter_destroy_internal(struct obd_device *obd, obd_id objid,
-                                   struct dentry *dparent,
+                                   obd_gr group, struct dentry *dparent,
                                    struct dentry *dchild)
 {
         struct inode *inode = dchild->d_inode;
@@ -1492,6 +1654,8 @@ int filter_common_setup(struct obd_device *obd, obd_count len, void *buf,
         spin_lock_init(&filter->fo_r_disk_iosize.oh_lock);
         spin_lock_init(&filter->fo_w_disk_iosize.oh_lock);
         filter->fo_readcache_max_filesize = FILTER_MAX_CACHE_SIZE;
+        filter->fo_fmd_max_num = FILTER_FMD_MAX_NUM_DEFAULT;
+        filter->fo_fmd_max_age = FILTER_FMD_MAX_AGE_DEFAULT;
 
         sprintf(ns_name, "filter-%s", obd->obd_uuid.uuid);
         obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
@@ -1843,8 +2007,6 @@ static int filter_connect(struct lustre_handle *conn, struct obd_device *obd,
 
         fed = &exp->exp_filter_data;
 
-        spin_lock_init(&fed->fed_lock);
-
         rc = filter_connect_internal(exp, data);
         if (rc)
                 GOTO(cleanup, rc);
@@ -2006,6 +2168,7 @@ static int filter_destroy_export(struct obd_export *exp)
                 fsfilt_sync(exp->exp_obd, exp->exp_obd->u.obt.obt_sb);
 
         filter_grant_discard(exp);
+        filter_fmd_cleanup(exp);
 
         if (!(exp->exp_flags & OBD_OPT_FORCE))
                 filter_grant_sanity_check(exp->exp_obd, __FUNCTION__);
@@ -2044,16 +2207,22 @@ static int filter_disconnect(struct obd_export *exp)
         RETURN(rc);
 }
 
+static int filter_ping(struct obd_export *exp)
+{
+        filter_fmd_expire(exp);
+
+        return 0;
+}
+
 struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa,
                                   const char *what, int quiet)
 {
         struct dentry *dchild = NULL;
-        obd_gr group = 0;
 
-        if (oa->o_valid & OBD_MD_FLGROUP)
-                group = oa->o_gr;
+        if (!(oa->o_valid & OBD_MD_FLGROUP))
+                oa->o_gr = 0;
 
-        dchild = filter_fid2dentry(obd, NULL, group, oa->o_id);
+        dchild = filter_fid2dentry(obd, NULL, oa->o_gr, oa->o_id);
 
         if (IS_ERR(dchild)) {
                 CERROR("%s error looking up object: "LPU64"\n",
@@ -2108,10 +2277,9 @@ int filter_update_fidea(struct obd_export *exp, struct inode *inode,
 
         if (oa->o_valid & OBD_MD_FLFID) {
                 struct filter_fid ff;
-                obd_gr group = 0;
 
-                if (oa->o_valid & OBD_MD_FLGROUP)
-                        group = oa->o_gr;
+                if (!(oa->o_valid & OBD_MD_FLGROUP))
+                        oa->o_gr = 0;
 
                 /* packing fid and converting it to LE for storing into EA.
                  * Here ->o_stripe_idx should be filled by LOV and rest of
@@ -2120,11 +2288,11 @@ int filter_update_fidea(struct obd_export *exp, struct inode *inode,
                 ff.ff_fid.f_type = cpu_to_le32(oa->o_stripe_idx);
                 ff.ff_fid.generation = cpu_to_le32(oa->o_generation);
                 ff.ff_objid = cpu_to_le64(oa->o_id);
-                ff.ff_group = cpu_to_le64(group);
+                ff.ff_group = cpu_to_le64(oa->o_gr);
 
                 CDEBUG(D_INODE, "storing filter fid EA ("LPU64"/%u/%u"
                        LPU64"/"LPU64")\n", oa->o_fid, oa->o_stripe_idx,
-                       oa->o_generation, oa->o_id, group);
+                       oa->o_generation, oa->o_id, oa->o_gr);
 
                 rc = fsfilt_set_md(obd, inode, handle, &ff, sizeof(ff), "fid");
                 if (rc)
@@ -2221,8 +2389,7 @@ int filter_setattr_internal(struct obd_export *exp, struct dentry *dentry,
                 rc = fsfilt_setattr(exp->exp_obd, dentry, handle, &iattr, 1);
                 if (fcc != NULL) {
                         /* set cancel cookie callback function */
-                        fsfilt_add_journal_cb(exp->exp_obd, 0, oti ?
-                                              oti->oti_handle : handle,
+                        fsfilt_add_journal_cb(exp->exp_obd, 0, handle,
                                               filter_cancel_cookies_cb,
                                               fcc);
                         fcc = NULL;
@@ -2267,6 +2434,7 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
 {
         struct ldlm_res_id res_id = { .name = { oinfo->oi_oa->o_id } };
         struct ldlm_valblock_ops *ns_lvbo;
+        struct filter_mod_data *fmd;
         struct lvfs_run_ctxt saved;
         struct filter_obd *filter;
         struct ldlm_resource *res;
@@ -2283,6 +2451,14 @@ int filter_setattr(struct obd_export *exp, struct obd_info *oinfo,
         push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
         lock_kernel();
 
+        if (oinfo->oi_oa->o_valid &
+            (OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME)) {
+                fmd = filter_fmd_get(exp,oinfo->oi_oa->o_id,oinfo->oi_oa->o_gr);
+                if (fmd && fmd->fmd_mactime_xid < oti->oti_xid)
+                        fmd->fmd_mactime_xid = oti->oti_xid;
+                filter_fmd_put(exp, fmd);
+        }
+
         /* setting objects attributes (including owner/group) */
         rc = filter_setattr_internal(exp, dentry, oinfo->oi_oa, oti);
         if (rc)
@@ -2389,7 +2565,7 @@ static void filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
                 return;
         }
 
-        last = filter_last_id(filter, &doa);
+        last = filter_last_id(filter, doa.o_gr);
         CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n",
                exp->exp_obd->obd_name, oa->o_id + 1, last);
         for (id = oa->o_id + 1; id <= last; id++) {
@@ -2419,9 +2595,9 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa,
         int diff, rc;
         ENTRY;
 
-        diff = oa->o_id - filter_last_id(filter, oa);
+        diff = oa->o_id - filter_last_id(filter, group);
         CDEBUG(D_INFO, "filter_last_id() = "LPU64" -> diff = %d\n",
-               filter_last_id(filter, oa), diff);
+               filter_last_id(filter, group), diff);
 
         /* delete orphans request */
         if ((oa->o_valid & OBD_MD_FLFLAGS) &&
@@ -2431,7 +2607,7 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa,
                 if (-diff > OST_MAX_PRECREATE) {
                         CERROR("%s: ignoring bogus orphan destroy request: "
                                "obdid "LPU64" last_id "LPU64"\n", obd->obd_name,
-                               oa->o_id, filter_last_id(filter, oa));
+                               oa->o_id, filter_last_id(filter, group));
                         RETURN(-EINVAL);
                 }
                 filter_destroy_precreated(exp, oa, filter);
@@ -2447,13 +2623,13 @@ static int filter_should_precreate(struct obd_export *exp, struct obdo *oa,
                         RETURN(1);
 
                 LASSERTF(diff >= 0,"%s: "LPU64" - "LPU64" = %d\n",obd->obd_name,
-                         oa->o_id, filter_last_id(filter, oa), diff);
+                         oa->o_id, filter_last_id(filter, group), diff);
                 RETURN(diff);
         }
 }
 
 static int filter_statfs(struct obd_device *obd, struct obd_statfs *osfs,
-                         unsigned long max_age)
+                         cfs_time_t max_age)
 {
         struct filter_obd *filter = &obd->u.filter;
         int blockbits = obd->u.obt.obt_sb->s_blocksize_bits;
@@ -2518,7 +2694,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 OBD_ALLOC(osfs, sizeof(*osfs));
                 if (osfs == NULL)
                         RETURN(-ENOMEM);
-                rc = filter_statfs(obd, osfs, jiffies - HZ);
+                rc = filter_statfs(obd, osfs, get_jiffies_64() - HZ);
                 if (rc == 0 && osfs->os_bavail < (osfs->os_blocks >> 10)) {
                         CDEBUG(D_HA, "OST out of space! avail "LPU64"\n",
                                osfs->os_bavail <<
@@ -2549,7 +2725,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 if (recreate_obj) {
                         __u64 last_id;
                         next_id = oa->o_id;
-                        last_id = filter_last_id(filter, oa);
+                        last_id = filter_last_id(filter, group);
                         if (next_id > last_id) {
                                 CERROR("Error: Trying to recreate obj greater"
                                        "than last id "LPD64" > "LPD64"\n",
@@ -2557,7 +2733,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                                 GOTO(cleanup, rc = -EINVAL);
                         }
                 } else
-                        next_id = filter_last_id(filter, oa) + 1;
+                        next_id = filter_last_id(filter, group) + 1;
 
                 CDEBUG(D_INFO, "precreate objid "LPU64"\n", next_id);
 
@@ -2607,7 +2783,7 @@ static int filter_precreate(struct obd_device *obd, struct obdo *oa,
                 }
 
                 if (!recreate_obj) {
-                        filter_set_last_id(filter, oa, next_id);
+                        filter_set_last_id(filter, next_id, group);
                         err = filter_update_last_objid(obd, group, 0);
                         if (err)
                                 CERROR("unable to write lastobjid "
@@ -2655,15 +2831,14 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
         struct obd_device *obd = NULL;
         struct lvfs_run_ctxt saved;
         struct lov_stripe_md *lsm = NULL;
-        obd_gr group = 0;
         int rc = 0, diff;
         ENTRY;
 
-        if (oa->o_valid & OBD_MD_FLGROUP)
-                group = oa->o_gr;
+        if (!(oa->o_valid & OBD_MD_FLGROUP))
+                oa->o_gr = 0;
 
         CDEBUG(D_INFO, "filter_create(od->o_gr="LPU64",od->o_id="LPU64")\n",
-               group, oa->o_id);
+               oa->o_gr, oa->o_id);
         if (ea != NULL) {
                 lsm = *ea;
                 if (lsm == NULL) {
@@ -2678,20 +2853,21 @@ static int filter_create(struct obd_export *exp, struct obdo *oa,
 
         if ((oa->o_valid & OBD_MD_FLFLAGS) &&
             (oa->o_flags & OBD_FL_RECREATE_OBJS)) {
-                if (oa->o_id > filter_last_id(&obd->u.filter, oa)) {
+                if (oa->o_id > filter_last_id(&obd->u.filter, oa->o_gr)) {
                         CERROR("recreate objid "LPU64" > last id "LPU64"\n",
-                               oa->o_id, filter_last_id(&obd->u.filter, oa));
+                               oa->o_id, filter_last_id(&obd->u.filter,
+                                                        oa->o_gr));
                         rc = -EINVAL;
                 } else {
                         diff = 1;
-                        rc = filter_precreate(obd, oa, group, &diff);
+                        rc = filter_precreate(obd, oa, oa->o_gr, &diff);
                 }
         } else {
-                diff = filter_should_precreate(exp, oa, group);
+                diff = filter_should_precreate(exp, oa, oa->o_gr);
                 if (diff > 0) {
-                        oa->o_id = filter_last_id(&obd->u.filter, oa);
-                        rc = filter_precreate(obd, oa, group, &diff);
-                        oa->o_id = filter_last_id(&obd->u.filter, oa);
+                        oa->o_id = filter_last_id(&obd->u.filter, oa->o_gr);
+                        rc = filter_precreate(obd, oa, oa->o_gr, &diff);
+                        oa->o_id = filter_last_id(&obd->u.filter, oa->o_gr);
                         oa->o_valid = OBD_MD_FLID;
                 }
         }
@@ -2723,12 +2899,11 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         void *handle = NULL;
         struct llog_cookie *fcc = NULL;
         int rc, rc2, cleanup_phase = 0;
-        obd_gr group = 0;
         struct iattr iattr;
         ENTRY;
 
-        if (oa->o_valid & OBD_MD_FLGROUP)
-                group = oa->o_gr;
+        if (!(oa->o_valid & OBD_MD_FLGROUP))
+                oa->o_gr = 0;
 
         obd = exp->exp_obd;
         filter = &obd->u.filter;
@@ -2736,7 +2911,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
         cleanup_phase = 1;
 
-        dchild = filter_fid2dentry(obd, NULL, group, oa->o_id);
+        dchild = filter_fid2dentry(obd, NULL, oa->o_gr, oa->o_id);
         if (IS_ERR(dchild))
                 GOTO(cleanup, rc = PTR_ERR(dchild));
         cleanup_phase = 2;
@@ -2792,7 +2967,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
          * here, and not while truncating above.  That avoids holding the
          * parent lock for a long time during truncate, which can block other
          * threads from doing anything to objects in that directory. bug 7171 */
-        dparent = filter_parent_lock(obd, group, oa->o_id);
+        dparent = filter_parent_lock(obd, oa->o_gr, oa->o_id);
         if (IS_ERR(dparent))
                 GOTO(cleanup, rc = PTR_ERR(dparent));
         cleanup_phase = 3; /* filter_parent_unlock */
@@ -2808,8 +2983,10 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         /* Quota release need uid/gid of inode */
         obdo_from_inode(oa, dchild->d_inode, OBD_MD_FLUID|OBD_MD_FLGID);
 
+        filter_fmd_drop(exp, oa->o_id, oa->o_gr);
+
         /* this drops dchild->d_inode->i_mutex unconditionally */
-        rc = filter_destroy_internal(obd, oa->o_id, dparent, dchild);
+        rc = filter_destroy_internal(obd, oa->o_id, oa->o_gr, dparent, dchild);
 
         EXIT;
 cleanup:
@@ -3103,6 +3280,7 @@ static struct obd_ops filter_obd_ops = {
         .o_connect        = filter_connect,
         .o_reconnect      = filter_reconnect,
         .o_disconnect     = filter_disconnect,
+        .o_ping           = filter_ping,
         .o_init_export    = filter_init_export,
         .o_destroy_export = filter_destroy_export,
         .o_statfs         = filter_statfs,
@@ -3132,6 +3310,7 @@ static struct obd_ops filter_sanobd_ops = {
         .o_connect        = filter_connect,
         .o_reconnect      = filter_reconnect,
         .o_disconnect     = filter_disconnect,
+        .o_ping           = filter_ping,
         .o_init_export    = filter_init_export,
         .o_destroy_export = filter_destroy_export,
         .o_statfs         = filter_statfs,
@@ -3169,6 +3348,12 @@ static int __init obdfilter_init(void)
         if (obdfilter_created_scratchpad == NULL)
                 return -ENOMEM;
 
+        ll_fmd_cachep = kmem_cache_create("ll_fmd_cache",
+                                         sizeof(struct filter_mod_data),
+                                         0, 0, NULL, NULL);
+        if (!ll_fmd_cachep)
+                GOTO(out, rc = -ENOMEM);
+
         quota_interface = PORTAL_SYMBOL_GET(filter_quota_interface);
         init_obd_quota_ops(quota_interface, &filter_obd_ops);
         init_obd_quota_ops(quota_interface, &filter_sanobd_ops);
@@ -3176,12 +3361,18 @@ static int __init obdfilter_init(void)
         rc = class_register_type(&filter_obd_ops, lvars.module_vars,
                                  LUSTRE_OST_NAME);
         if (rc)
-                GOTO(out, rc);
+                GOTO(out_fmd, rc);
 
         rc = class_register_type(&filter_sanobd_ops, lvars.module_vars,
                                  LUSTRE_OSTSAN_NAME);
         if (rc) {
+                int err;
+
                 class_unregister_type(LUSTRE_OST_NAME);
+out_fmd:
+                err = kmem_cache_destroy(ll_fmd_cachep);
+                LASSERTF(err == 0, "Cannot destroy ll_fmd_cachep: rc %d\n",err);
+                ll_fmd_cachep = NULL;
 out:
                 if (quota_interface)
                         PORTAL_SYMBOL_PUT(filter_quota_interface);
@@ -3199,9 +3390,14 @@ static void __exit obdfilter_exit(void)
         if (quota_interface)
                 PORTAL_SYMBOL_PUT(filter_quota_interface);
 
+        if (ll_fmd_cachep) {
+                int rc = kmem_cache_destroy(ll_fmd_cachep);
+                LASSERTF(rc == 0, "Cannot destroy ll_fmd_cachep: rc %d\n", rc);
+                ll_fmd_cachep = NULL;
+        }
+
         class_unregister_type(LUSTRE_OSTSAN_NAME);
         class_unregister_type(LUSTRE_OST_NAME);
-        
         OBD_FREE(obdfilter_created_scratchpad,
                  OBDFILTER_CREATED_SCRATCHPAD_ENTRIES *
                  sizeof(*obdfilter_created_scratchpad));
index 6361fdb..83faf9c 100644 (file)
@@ -49,6 +49,30 @@ struct filter_fid {
         __u64           ff_group;
 };
 
+/* per-client-per-object persistent state (LRU) */
+struct filter_mod_data {
+        struct list_head fmd_list;      /* linked to fed_mod_list */
+        __u64            fmd_id;        /* object being written to */
+        __u64            fmd_gr;        /* group being written to */
+        __u64            fmd_mactime_xid;/* xid highest {m,a,c}time setattr */
+        unsigned long    fmd_expire;    /* jiffies when it should expire */
+        int              fmd_refcount;  /* reference counter - list holds 1 */
+};
+
+#ifdef BGL_SUPPORT
+#define FILTER_FMD_MAX_NUM_DEFAULT 128 /* many active files per client on BGL */
+#else
+#define FILTER_FMD_MAX_NUM_DEFAULT  32
+#endif
+#define FILTER_FMD_MAX_AGE_DEFAULT ((obd_timeout + 10) * HZ)
+
+struct filter_mod_data *filter_fmd_find(struct obd_export *exp,
+                                        obd_id objid, obd_gr group);
+struct filter_mod_data *filter_fmd_get(struct obd_export *exp,
+                                       obd_id objid, obd_gr group);
+void filter_fmd_put(struct obd_export *exp, struct filter_mod_data *fmd);
+void filter_fmd_expire(struct obd_export *exp);
+
 enum {
         LPROC_FILTER_READ_BYTES = 0,
         LPROC_FILTER_WRITE_BYTES = 1,
@@ -73,7 +97,7 @@ struct dentry *__filter_oa2dentry(struct obd_device *obd, struct obdo *oa,
 
 int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc);
 __u64 filter_next_id(struct filter_obd *, struct obdo *);
-__u64 filter_last_id(struct filter_obd *, struct obdo *);
+__u64 filter_last_id(struct filter_obd *, obd_gr group);
 int filter_update_fidea(struct obd_export *exp, struct inode *inode,
                         void *handle, struct obdo *oa);
 int filter_update_server_data(struct obd_device *, struct file *,
index 3bb0bb8..66d4f3f 100644 (file)
@@ -160,9 +160,9 @@ obd_size filter_grant_space_left(struct obd_export *exp)
 
         LASSERT_SPIN_LOCKED(&obd->obd_osfs_lock);
 
-        if (time_before(obd->obd_osfs_age, jiffies - HZ)) {
+        if (time_before_64(obd->obd_osfs_age, get_jiffies_64() - HZ)) {
 restat:
-                rc = fsfilt_statfs(obd, obd->u.obt.obt_sb, jiffies + 1);
+                rc = fsfilt_statfs(obd, obd->u.obt.obt_sb, get_jiffies_64() + HZ);
                 if (rc) /* N.B. statfs can't really fail */
                         RETURN(0);
                 statfs_done = 1;
@@ -509,6 +509,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
         struct niobuf_remote *rnb;
         struct niobuf_local *lnb = res;
         struct fsfilt_objinfo fso;
+        struct filter_mod_data *fmd;
         struct dentry *dentry = NULL;
         void *iobuf;
         obd_size left;
@@ -541,11 +542,22 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
 
         fsfilt_check_slow(exp->exp_obd, now, obd_timeout, "preprw_write setup");
 
+        /* Don't update inode timestamps if this write is older than a
+         * setattr which modifies the timestamps. b=10150 */
+        /* XXX when we start having persistent reservations this needs to
+         * be changed to filter_fmd_get() to create the fmd if it doesn't
+         * already exist so we can store the reservation handle there. */
+        fmd = filter_fmd_find(exp, obj->ioo_id, obj->ioo_gr);
+
         spin_lock(&exp->exp_obd->obd_osfs_lock);
         if (oa) {
                 filter_grant_incoming(exp, oa);
-                obdo_to_inode(dentry->d_inode, oa,
-                              OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
+                if (fmd && fmd->fmd_mactime_xid > oti->oti_xid)
+                        oa->o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+                                         OBD_MD_FLATIME);
+                else
+                        obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME |
+                                      OBD_MD_FLMTIME | OBD_MD_FLCTIME);
         }
         cleanup_phase = 3;
 
@@ -562,6 +574,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa,
         }
 
         spin_unlock(&exp->exp_obd->obd_osfs_lock);
+        filter_fmd_put(exp, fmd);
 
         if (rc)
                 GOTO(cleanup, rc);
index d4528fd..c3c2585 100644 (file)
@@ -381,26 +381,24 @@ int filter_do_bio(struct obd_device *obd, struct inode *inode,
  * not be dirty, because we already called fdatasync/fdatawait on them.
  */
 static int filter_clear_page_cache(struct inode *inode,
-                                    struct filter_iobuf *iobuf)
+                                   struct filter_iobuf *iobuf)
 {
         struct page *page;
-        int i, rc, rc2;
+        int i, rc = 0;
 
-        /* This is nearly generic_osync_inode, without the waiting on the inode
-        rc = generic_osync_inode(inode, inode->i_mapping,
-                                 OSYNC_DATA|OSYNC_METADATA);
-         */
+        /* This is nearly do_fsync(), without the waiting on the inode */
+        /* XXX: in 2.6.16 (at least) we don't need to hold i_mutex over
+         * filemap_fdatawrite() and filemap_fdatawait(), so we may no longer
+         * need this lock here at all. */
         LOCK_INODE_MUTEX(inode);
-        current->flags |= PF_SYNCWRITE;
-        rc = filemap_fdatawrite(inode->i_mapping);
-        rc2 = sync_mapping_buffers(inode->i_mapping);
-        if (rc == 0)
-                rc = rc2;
-        rc2 = filemap_fdatawait(inode->i_mapping);
-        current->flags &= ~PF_SYNCWRITE;
+        if (inode->i_mapping->nrpages) {
+                current->flags |= PF_SYNCWRITE;
+                rc = filemap_fdatawrite(inode->i_mapping);
+                if (rc == 0)
+                        rc = filemap_fdatawait(inode->i_mapping);
+                current->flags &= ~PF_SYNCWRITE;
+        }
         UNLOCK_INODE_MUTEX(inode);
-        if (rc == 0)
-                rc = rc2;
         if (rc != 0)
                 RETURN(rc);
 
index 935ae6f..47c27b8 100644 (file)
@@ -237,6 +237,62 @@ static int lprocfs_filter_wr_itune(struct file *file, const char *buffer,
 }
 #endif
 
+int lprocfs_filter_rd_fmd_max_num(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        int rc;
+
+        rc = snprintf(page, count, "%u\n", obd->u.filter.fo_fmd_max_num);
+        return rc;
+}
+
+int lprocfs_filter_wr_fmd_max_num(struct file *file, const char *buffer,
+                                  unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        int val;
+        int rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val > 65536 || val < 1)
+                return -EINVAL;
+
+        obd->u.filter.fo_fmd_max_num = val;
+        return count;
+}
+
+int lprocfs_filter_rd_fmd_max_age(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+        struct obd_device *obd = data;
+        int rc;
+
+        rc = snprintf(page, count, "%u\n", obd->u.filter.fo_fmd_max_age / HZ);
+        return rc;
+}
+
+int lprocfs_filter_wr_fmd_max_age(struct file *file, const char *buffer,
+                                  unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        int val;
+        int rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        if (val > 65536 || val < 1)
+                return -EINVAL;
+
+        obd->u.filter.fo_fmd_max_age = val * HZ;
+        return count;
+}
+
 static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "uuid",         lprocfs_rd_uuid,          0, 0 },
         { "blocksize",    lprocfs_rd_blksize,       0, 0 },
@@ -268,6 +324,10 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "quota_itune_sz", lprocfs_filter_rd_itune,
                             lprocfs_filter_wr_itune, 0},
 #endif
+        { "client_cache_count", lprocfs_filter_rd_fmd_max_num,
+                          lprocfs_filter_wr_fmd_max_num, 0 },
+        { "client_cache_seconds", lprocfs_filter_rd_fmd_max_age,
+                          lprocfs_filter_wr_fmd_max_age, 0 },
         { 0 }
 };
 
index 5d86f26..9dae4d6 100644 (file)
@@ -463,6 +463,11 @@ static int osc_punch(struct obd_export *exp, struct obd_info *oinfo,
         if (!req)
                 RETURN(-ENOMEM);
 
+        /* FIXME bug 249. Also see bug 7198 */
+        if (class_exp2cliimp(exp)->imp_connect_data.ocd_connect_flags &
+            OBD_CONNECT_REQPORTAL)
+                req->rq_request_portal = OST_IO_PORTAL;
+
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
         memcpy(&body->oa, oinfo->oi_oa, sizeof(*oinfo->oi_oa));
 
@@ -558,6 +563,11 @@ static int osc_destroy(struct obd_export *exp, struct obdo *oa,
         if (!req)
                 RETURN(-ENOMEM);
 
+        /* FIXME bug 249. Also see bug 7198 */
+        if (class_exp2cliimp(exp)->imp_connect_data.ocd_connect_flags &
+            OBD_CONNECT_REQPORTAL)
+                req->rq_request_portal = OST_IO_PORTAL;
+
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
 
         if (oti != NULL && oa->o_valid & OBD_MD_FLCOOKIE) {
@@ -814,9 +824,10 @@ static int osc_brw_prep_request(int cmd, struct obd_import *imp,struct obdo *oa,
         opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ;
         pool = ((cmd & OBD_BRW_WRITE) != 0) ? imp->imp_rq_pool : NULL;
 
-        for (niocount = i = 1; i < page_count; i++)
+        for (niocount = i = 1; i < page_count; i++) {
                 if (!can_merge_pages(pga[i - 1], pga[i]))
                         niocount++;
+        }
 
         size[REQ_REC_OFF + 1] = sizeof(*ioobj);
         size[REQ_REC_OFF + 2] = niocount * sizeof(*niobuf);
@@ -1673,6 +1684,14 @@ static struct ptlrpc_request *osc_build_req(struct client_obd *cli,
                 GOTO(out, req = ERR_PTR(rc));
         }
 
+        /* Need to update the timestamps after the request is built in case
+         * we race with setattr (locally or in queue at OST).  If OST gets
+         * later setattr before earlier BRW (as determined by the request xid),
+         * the OST will not use BRW timestamps.  Sadly, there is no obvious
+         * way to do this in a single call.  bug 10150 */
+        ops->ap_update_obdo(caller_data, cmd, oa,
+                            OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME);
+
         LASSERT(sizeof(*aa) <= sizeof(req->rq_async_args));
         aa = (struct osc_brw_async_args *)&req->rq_async_args;
         aa->aa_oa = oa;
@@ -2687,7 +2706,7 @@ out_req:
         RETURN(rc);
 }
 
-static int sanosc_brw(int cmd, struct obd_export *exp, struct obd_infl *oinfo,
+static int sanosc_brw(int cmd, struct obd_export *exp, struct obd_info *oinfo,
                       obd_count page_count, struct brw_page *pga,
                       struct obd_trans_info *oti)
 {
@@ -3056,7 +3075,7 @@ out:
 }
 
 static int osc_statfs_async(struct obd_device *obd, struct obd_info *oinfo,
-                            unsigned long max_age,
+                            cfs_time_t max_age,
                             struct ptlrpc_request_set *rqset)
 {
         struct ptlrpc_request *req;
index 37a0d50..5aff53a 100644 (file)
@@ -138,7 +138,8 @@ static int ost_statfs(struct ptlrpc_request *req)
 
         osfs = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF, sizeof(*osfs));
 
-        req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, jiffies-HZ);
+        req->rq_status = obd_statfs(req->rq_export->exp_obd, osfs, 
+                                    get_jiffies_64() - HZ);
         if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_OST_ENOSPC))
                 osfs->os_bfree = osfs->os_bavail = 64;
         if (req->rq_status != 0)
@@ -500,53 +501,6 @@ static void ost_nio_pages_put(struct ptlrpc_request *req,
         EXIT;
 }
 
-#if 0
-/* see ldlm_blocking_ast */
-/* cut-n-paste of mds_blocking_ast() */
-static int ost_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
-                            void *data, int flag)
-{
-        int do_ast;
-        ENTRY;
-
-        if (flag == LDLM_CB_CANCELING) {
-                /* Don't need to do anything here. */
-                RETURN(0);
-        }
-
-        /* XXX layering violation!  -phil */
-        lock_res_and_lock(lock);
-        /* Get this: if mds_blocking_ast is racing with mds_intent_policy,
-         * such that mds_blocking_ast is called just before l_i_p takes the
-         * ns_lock, then by the time we get the lock, we might not be the
-         * correct blocking function anymore.  So check, and return early, if
-         * so. */
-        if (lock->l_blocking_ast != ost_blocking_ast) {
-                unlock_res_and_lock(lock);
-                RETURN(0);
-        }
-
-        lock->l_flags |= LDLM_FL_CBPENDING;
-        do_ast = (!lock->l_readers && !lock->l_writers);
-        unlock_res_and_lock(lock);
-
-        if (do_ast) {
-                struct lustre_handle lockh;
-                int rc;
-
-                LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel");
-                ldlm_lock2handle(lock, &lockh);
-                rc = ldlm_cli_cancel(&lockh);
-                if (rc < 0)
-                        CERROR("ldlm_cli_cancel: %d\n", rc);
-        } else {
-                LDLM_DEBUG(lock, "Lock still has references, will be "
-                           "cancelled later");
-        }
-        RETURN(0);
-}
-#endif
-                           
 static int ost_brw_lock_get(int mode, struct obd_export *exp,
                             struct obd_ioobj *obj, struct niobuf_remote *nb,
                             struct lustre_handle *lh)
index 88f1f30..9072e83 100644 (file)
@@ -382,6 +382,9 @@ int ptlrpc_connect_import(struct obd_import *imp, char * new_uuid)
                 }
         }
 
+        /* Reset connect flags to the originally requested flags, in case
+         * the server is updated on-the-fly we will get the new features. */
+        imp->imp_connect_data.ocd_connect_flags = imp->imp_connect_flags_orig;
         rc = obd_reconnect(imp->imp_obd->obd_self_export, obd,
                            &obd->obd_uuid, &imp->imp_connect_data);
         if (rc)
index c518279..479e0cb 100644 (file)
@@ -1538,17 +1538,18 @@ void lustre_swab_ptlrpc_body(struct ptlrpc_body *b)
 
 void lustre_swab_connect(struct obd_connect_data *ocd)
 {
-        __swab64s (&ocd->ocd_connect_flags);
-        __swab32s (&ocd->ocd_version);
-        __swab32s (&ocd->ocd_grant);
-        __swab32s (&ocd->ocd_index);
-        __swab32s (&ocd->ocd_unused);
-        __swab64s (&ocd->ocd_ibits_known);
+        __swab64s(&ocd->ocd_connect_flags);
+        __swab32s(&ocd->ocd_version);
+        __swab32s(&ocd->ocd_grant);
+        __swab32s(&ocd->ocd_index);
+        __swab32s(&ocd->ocd_unused);
+        __swab64s(&ocd->ocd_ibits_known);
+        __swab32s(&ocd->ocd_nllu);
+        __swab32s(&ocd->ocd_nllg);
+        CLASSERT(offsetof(typeof(*ocd), padding1) != 0);
         CLASSERT(offsetof(typeof(*ocd), padding2) != 0);
         CLASSERT(offsetof(typeof(*ocd), padding3) != 0);
         CLASSERT(offsetof(typeof(*ocd), padding4) != 0);
-        CLASSERT(offsetof(typeof(*ocd), padding5) != 0);
-        CLASSERT(offsetof(typeof(*ocd), padding6) != 0);
 }
 
 void lustre_swab_obdo (struct obdo  *o)
@@ -2192,7 +2193,8 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
         CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
         CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
+        CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x10000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x20000ULL);
         /* Sizes and Offsets */
 
         /* Checks for struct obd_uuid */
@@ -2336,6 +2338,38 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct ptlrpc_body *)0)->pb_paddings[3]) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct ptlrpc_body *)0)->pb_paddings[3]));
 
+        /* Checks for struct obd_connect_data */
+        LASSERTF((int)sizeof(struct obd_connect_data) == 72, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_connect_data));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_version));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_grant));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_index));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_nllu) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_nllu));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllu) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllu));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_nllg) == 36, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_nllg));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllg) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllg));
+
         /* Checks for struct obdo */
         LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",
                  (long long)(int)sizeof(struct obdo));
index 0e512cc..213e72f 100755 (executable)
@@ -121,6 +121,7 @@ cleanup() {
     zconf_umount $CLIENTS $MOUNT
     stopall
     stop mds -f
+    cleanup_check
 }
 
 trap exit INT
index 52ed448..ad9860c 100644 (file)
@@ -196,7 +196,7 @@ int main(int argc, char **argv)
         fname = strrchr(argv[2], '/');
         fname++;
         strncpy((char *)lum_file1, fname, lum_size);
-        rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, lum_file1);
+        rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file1);
         if (rc) {
                 rc = errno;
                 goto cleanup;
@@ -214,7 +214,7 @@ int main(int argc, char **argv)
                 fname = strrchr(argv[3], '/');
                 fname++;
                 strncpy((char *)lum_file2, fname, lum_size);
-                rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, lum_file2);
+                rc = ioctl(dirfd(dir), IOC_MDC_GETFILESTRIPE, lum_file2);
                 if (rc) {
                         rc = errno;
                         goto cleanup;
index 98b8d12..01b4e17 100644 (file)
@@ -33,6 +33,7 @@ cleanup() {
     zconf_umount `hostname` $MOUNT
     stop mds ${FORCE} $MDSLCONFARGS
     stop ost ${FORCE} --dump $TMP/replay-ost-single-`hostname`.log
+    cleanup_check
 }
 
 if [ "$ONLY" == "cleanup" ]; then
index 23f96f1..d373448 100644 (file)
@@ -1558,6 +1558,54 @@ test_36e() {
 }
 run_test 36e "utime on non-owned file (should return error) ===="
 
+export TIMEOUT_OLD=`sysctl -n lustre.timeout`
+export TIMEOUT_NEW=16
+sysctl -w lustre.timeout=$TIMEOUT_NEW
+
+export FMD_MAX_AGE=`cat $LPROC/obdfilter/*/client_cache_seconds | head -n 1`
+for F in $LPROC/obdfilter/*/client_cache_seconds; do
+       echo 12 > $F
+done
+test_36f() {
+       export LANG=C LC_LANG=C # for date language
+
+       DATESTR="Dec 20  2000"
+       [ ! -d $DIR/d36 ] && mkdir $DIR/d36
+       #define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214
+        sysctl -w lustre.fail_loc=0x80000214
+       date; date +%s
+       cp /etc/hosts $DIR/d36/$tfile
+       sync & # write RPC generated with "current" inode timestamp, but delayed
+       sleep 1
+       touch --date="$DATESTR" $DIR/d36/$tfile # setattr with timestamp in past
+       LS_BEFORE="`ls -l $DIR/d36/$tfile`" # "old" timestamp from client cache
+       cancel_lru_locks OSC
+       LS_AFTER="`ls -l $DIR/d36/$tfile`"  # timestamp from OST object
+       date; date +%s
+       [ "$LS_BEFORE" != "$LS_AFTER" ] && \
+               echo "BEFORE: $LS_BEFORE" && \
+               echo "AFTER : $LS_AFTER" && \
+               echo "WANT  : $DATESTR" && \
+               error "$DIR/d36/$tfile timestamps changed" || true
+}
+run_test 36f "utime on file racing with OST BRW write =========="
+
+test_36g() {
+       FMD_BEFORE="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`"
+       touch $DIR/d36/$tfile
+       sleep $((TIMEOUT_NEW + 10))
+       FMD_AFTER="`awk '/ll_fmd_cache/ { print $2 }' /proc/slabinfo`"
+       [ "$FMD_AFTER" -gt "$FMD_BEFORE" ] && \
+               echo "AFTER : $FMD_AFTER > BEFORE $FMD_BEFORE" && \
+               error "fmd didn't expire after ping" || true
+}
+run_test 36g "filter mod data cache expiry ====================="
+
+sysctl -w lustre.timeout=$TIMEOUT_OLD
+for F in $LPROC/obdfilter/*/client_cache_seconds; do
+       echo $FMD_MAX_AGE > $F
+done
+
 test_37() {
        mkdir -p $DIR/dextra
        echo f > $DIR/dextra/fbugfile
index 94a78d4..836ab23 100644 (file)
@@ -36,6 +36,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <dirent.h>
+#include <time.h>
 
 #include <lnet/api-support.h>
 #include <lnet/lnetctl.h>
@@ -80,13 +81,20 @@ command_t cmdlist[] = {
          "\tstripe size:  Number of bytes on each OST (0 filesystem default)\n"
          "\tstripe start: OST index of first stripe (-1 filesystem default)\n"
          "\tstripe count: Number of OSTs to stripe over (0 default, -1 all)"},
-        {"find", lfs_find, 0,
-         "To list the extended attributes for a given filename or files in a\n"
-         "directory or recursively for all files in a directory tree.\n"
-         "usage: find [--obd <uuid>] [--quiet | --verbose] [--recursive] <dir|file> ..."},
         {"getstripe", lfs_getstripe, 0,
-         "To list the striping pattern for given filename.\n"
-         "usage:getstripe <filename>"},
+         "To list the striping info for a given filename or files in a\n"
+         "directory or recursively for all files in a directory tree.\n"
+         "usage: getstripe [--obd|-O <uuid>] [--quiet | -q] [--verbose | -v]\n"
+         "                 [--recursive | -r] <dir|file> ..."},
+        {"find", lfs_find, 0,
+         "To find files that match given parameters recursively in a directory tree.\n"
+         "usage: find [--atime|-A N] [--mtime|-M N] [--ctime|-C N] [--maxdepth|-D N]\n"
+         "            [--print0|-P] [--print|-p] [--obd|-O <uuid>] <dir/file> ...\n"
+         "If one of the options below is provided, find works the same as 'getstripe':\n"
+         "To list the striping info for a given filename or files in a directory or\n"
+         "recursively.\n"
+         "OBSOLETE usage: find [--quiet | -q] [--verbose | -v]\n"
+         "                     [--recursive | -r] <dir|file> ..."},
         {"check", lfs_check, 0,
          "Display the status of MDS or OSTs (as specified in the command)\n"
          "or all the servers (MDS and OSTs).\n"
@@ -185,25 +193,103 @@ static int lfs_setstripe(int argc, char **argv)
         return result;
 }
 
+static int set_time(time_t *time, time_t *set, char *str)
+{
+        time_t t;
+        int res = 0;
+        
+        if (str[0] == '+')
+                res = 1;
+        else if (str[0] == '-')
+                res = -1;
+
+        if (res)
+                str++;
+
+        t = strtol(str, NULL, 0);
+        if (*time < t * 24 * 60 * 60) {
+                if (res)
+                        str--;
+                fprintf(stderr, "Wrong time '%s' is specified.\n", str);
+                return INT_MAX;
+        }
+
+        *set = *time - t * 24 * 60 * 60;
+        return res;
+}
+
 static int lfs_find(int argc, char **argv)
 {
+        int new_fashion = 0;
+        int c, ret;
+        int zeroend;
+        time_t t;
+        unsigned int depth;
+        time_t atime, ctime, mtime;
+        int asign, msign, csign;
+        int quiet, verbose, recursive;
+        struct find_param param;
+        struct obd_uuid *obduuid = NULL;
         struct option long_opts[] = {
-                {"obd", 1, 0, 'o'},
-                {"quiet", 0, 0, 'q'},
-                {"recursive", 0, 0, 'r'},
-                {"verbose", 0, 0, 'v'},
+                /* New find options. */
+                {"atime",     required_argument, 0, 'A'},
+                {"ctime",     required_argument, 0, 'C'},
+                {"mtime",     required_argument, 0, 'M'},
+                {"maxdepth",  required_argument, 0, 'D'},
+                /* --obd is considered as a new option. */
+                {"obd",       required_argument, 0, 'O'},
+                {"print",     no_argument,       0, 'P'},
+                {"print0",    no_argument,       0, 'p'},
+                /* Old find options. */
+                {"quiet",     no_argument,       0, 'q'},
+                {"recursive", no_argument,       0, 'r'},
+                {"verbose",   no_argument,       0, 'v'},
                 {0, 0, 0, 0}
         };
-        char short_opts[] = "ho:qrv";
-        int quiet, verbose, recursive, c, rc;
-        struct obd_uuid *obduuid = NULL;
 
-        optind = 0;
+        time(&t);
+        zeroend = 0;
+        depth = -1;
+        atime = ctime = mtime = 0;
+        asign = csign = msign = 0;
         quiet = verbose = recursive = 0;
-        while ((c = getopt_long(argc, argv, short_opts,
-                                        long_opts, NULL)) != -1) {
+        
+        while ((c = getopt_long_only(argc, argv, "A:C:D:M:PpOqrv", 
+                                     long_opts, NULL)) >= 0)
+        {
                 switch (c) {
-                case 'o':
+                case 0:
+                        /* Long options. */
+                        break;
+                case 'A':
+                        new_fashion = 1;
+                        ret = set_time(&t, &atime, optarg);
+                        if (ret == INT_MAX)
+                                return -1;
+                        if (ret)
+                                asign = ret;
+                        break;
+                case 'C':
+                        new_fashion = 1;
+                        ret = set_time(&t, &ctime, optarg);
+                        if (ret == INT_MAX)
+                                return -1;
+                        if (ret)
+                                csign = ret;
+                        break;
+                case 'D':
+                        new_fashion = 1;
+                        depth = strtol(optarg, 0, 0);
+                        break;
+                case 'M':
+                        new_fashion = 1;
+                        ret = set_time(&t, &mtime, optarg);
+                        if (ret == INT_MAX)
+                                return -1;
+                        if (ret)
+                                msign = ret;
+                        break;
+                case 'O':
                         if (obduuid) {
                                 fprintf(stderr,
                                         "error: %s: only one obduuid allowed",
@@ -212,6 +298,13 @@ static int lfs_find(int argc, char **argv)
                         }
                         obduuid = (struct obd_uuid *)optarg;
                         break;
+                case 'p':
+                        new_fashion = 1;
+                        zeroend = 1;
+                        break;
+                case 'P':
+                        new_fashion = 1;
+                        break;
                 case 'q':
                         quiet++;
                         verbose = 0;
@@ -225,44 +318,68 @@ static int lfs_find(int argc, char **argv)
                         break;
                 case '?':
                         return CMD_HELP;
-                        break;
                 default:
                         fprintf(stderr, "error: %s: option '%s' unrecognized\n",
                                 argv[0], argv[optind - 1]);
                         return CMD_HELP;
-                        break;
-                }
+                };
         }
-
+        
         if (optind >= argc)
                 return CMD_HELP;
 
+        memset(&param, 0, sizeof(param));
+        param.obduuid = obduuid;
+        if (new_fashion) {
+                param.maxdepth = depth;
+                param.atime = atime;
+                param.ctime = ctime;
+                param.mtime = mtime;
+                param.asign = asign;
+                param.csign = csign;
+                param.msign = msign;
+                param.zeroend = zeroend;
+                param.quiet = 1;
+        } else {
+                param.recursive = recursive;
+                param.verbose = verbose;
+                param.quiet = quiet;
+                param.maxdepth = recursive ? -1 : 1;
+        }
+        
         do {
-                rc = llapi_find(argv[optind], obduuid, recursive,verbose,quiet);
-        } while (++optind < argc && !rc);
+                if (new_fashion)
+                        ret = llapi_find(argv[optind], &param);
+                else
+                        ret = llapi_getstripe(argv[optind], &param);
+        } while (++optind < argc && !ret);
 
-        if (rc)
-                fprintf(stderr, "error: %s: find failed\n", argv[0]);
-        return rc;
+        if (ret)
+                fprintf(stderr, "error: %s failed for %s.\n", 
+                        argv[0], argv[optind - 1]);
+        return ret;
 }
 
 static int lfs_getstripe(int argc, char **argv)
 {
         struct option long_opts[] = {
+                {"obd", 1, 0, 'O'},
                 {"quiet", 0, 0, 'q'},
+                {"recursive", 0, 0, 'r'},
                 {"verbose", 0, 0, 'v'},
                 {0, 0, 0, 0}
         };
-        char short_opts[] = "qv";
+        char short_opts[] = "hO:qrv";
         int quiet, verbose, recursive, c, rc;
         struct obd_uuid *obduuid = NULL;
+        struct find_param param;
 
         optind = 0;
         quiet = verbose = recursive = 0;
         while ((c = getopt_long(argc, argv, short_opts,
-                                        long_opts, NULL)) != -1) {
+                                long_opts, NULL)) != -1) {
                 switch (c) {
-                case 'o':
+                case 'O':
                         if (obduuid) {
                                 fprintf(stderr,
                                         "error: %s: only one obduuid allowed",
@@ -275,32 +392,39 @@ static int lfs_getstripe(int argc, char **argv)
                         quiet++;
                         verbose = 0;
                         break;
+                case 'r':
+                        recursive = 1;
+                        break;
                 case 'v':
                         verbose++;
                         quiet = 0;
                         break;
                 case '?':
                         return CMD_HELP;
-                        break;
                 default:
                         fprintf(stderr, "error: %s: option '%s' unrecognized\n",
                                 argv[0], argv[optind - 1]);
                         return CMD_HELP;
-                        break;
                 }
         }
 
         if (optind >= argc)
                 return CMD_HELP;
 
+        memset(&param, 0, sizeof(param));
+        param.recursive = recursive;
+        param.verbose = verbose;
+        param.quiet = quiet;
+        param.obduuid = obduuid;
+        param.maxdepth = recursive ? -1 : 1;
+
         do {
-                rc = llapi_find(argv[optind], obduuid, recursive,verbose,quiet);
+                rc = llapi_getstripe(argv[optind], &param);
         } while (++optind < argc && !rc);
 
         if (rc)
-                fprintf(stderr, "error: %s: getstripe failed for %s\n",
-                        argv[0], argv[1]);
-
+                fprintf(stderr, "error: %s failed for %s.\n", 
+                        argv[0], argv[optind - 1]);
         return rc;
 }
 
@@ -309,6 +433,7 @@ static int lfs_osts(int argc, char **argv)
         FILE *fp;
         struct mntent *mnt = NULL;
         struct obd_uuid *obduuid = NULL;
+        struct find_param param;
         int rc=0;
 
         if (argc != 1)
@@ -321,9 +446,11 @@ static int lfs_osts(int argc, char **argv)
                         strerror (errno));
         } else {
                 mnt = getmntent(fp);
+                memset(&param, 0, sizeof(param));
+                param.obduuid = obduuid;
                 while (feof(fp) == 0 && ferror(fp) ==0) {
                         if (llapi_is_lustre_mnttype(mnt)) {
-                                rc = llapi_find(mnt->mnt_dir, obduuid, 0, 0, 0);
+                                rc = llapi_getstripe(mnt->mnt_dir, &param);
                                 if (rc)
                                         fprintf(stderr,
                                                "error: %s: failed on %s\n",
index 24fd739..d64a920 100644 (file)
@@ -145,42 +145,27 @@ out:
         return rc;
 }
 
-struct find_param {
-        int     recursive;
-        int     verbose;
-        int     quiet;
-        struct  obd_uuid        *obduuid;
-        int     lumlen;
-        struct  lov_user_mds_data *lmd;
-/*        struct  lov_user_md     *lum;*/
-        int     got_uuids;
-        int     obdindex;
-        int     (* process_file)(DIR *dir, char *dname, char *fname,
-                        struct find_param *param);
-};
+typedef int (semantic_func_t)(char *path, DIR *parent, DIR *d, void *data);
 
 #define MAX_LOV_UUID_COUNT      max(LOV_MAX_STRIPE_COUNT, 1000)
 #define OBD_NOT_FOUND           (-1)
 
-static int prepare_find(struct find_param *param)
+static int common_param_init(struct find_param *param)
 {
         param->lumlen = lov_mds_md_size(MAX_LOV_UUID_COUNT);
         if ((param->lmd = malloc(sizeof(lstat_t) + param->lumlen)) == NULL) {
                 err_msg("error: allocation of %d bytes for ioctl",
                         sizeof(lstat_t) + param->lumlen);
-                return ENOMEM;
+                return -ENOMEM;
         }
 
         param->got_uuids = 0;
         param->obdindex = OBD_NOT_FOUND;
-
         return 0;
 }
 
-static void cleanup_find(struct find_param *param)
+static void find_param_fini(struct find_param *param)
 {
-        if (param->obduuid)
-                free(param->obduuid);
         if (param->lmd)
                 free(param->lmd);
 }
@@ -249,20 +234,20 @@ static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
         FILE *fp;
         int rc = 0, index;
 
-        param->got_uuids = 1;
-
         /* Get the lov name */
         rc = ioctl(dirfd(dir), OBD_IOC_GETNAME, (void *)uuid);
         if (rc) {
-                rc = errno;
-                if (rc == -ENOTTY)
-                        fprintf(stderr, "error: %s does not appear to be in "
-                                "a Lustre filesystem\n", dname);
-                else
-                        err_msg("error: can't get lov name: %s");
+                if (errno != ENOTTY) {
+                        err_msg("error: can't get lov name: %s", uuid);
+                        rc = errno;
+                } else {
+                        rc = 0;
+                }
                 return rc;
         }
 
+        param->got_uuids = 1;
+
         /* Now get the ost uuids from /proc */
         snprintf(buf, sizeof(buf), "/proc/fs/lustre/lov/%s/target_obd",
                  uuid);
@@ -293,35 +278,36 @@ static int setup_obd_uuids(DIR *dir, char *dname, struct find_param *param)
 
         fclose(fp);
 
-        if (param->obduuid && (param->obdindex == OBD_NOT_FOUND)) {
+        if (!param->quiet && param->obduuid && 
+            (param->obdindex == OBD_NOT_FOUND)) {
                 fprintf(stderr, "error: %s: unknown obduuid: %s\n",
                         __FUNCTION__, param->obduuid->uuid);
-                rc = EINVAL;
+                //rc = EINVAL;
         }
 
         return (rc);
 }
 
-void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *dname, char *fname,
+void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *path, int is_dir,
                           int obdindex, int quiet, int header, int body)
 {
         int i, obdstripe = 0;
 
         if (obdindex != OBD_NOT_FOUND) {
-                for (i = 0; fname[0] && i < lum->lmm_stripe_count; i++) {
+                for (i = 0; !is_dir && i < lum->lmm_stripe_count; i++) {
                         if (obdindex == lum->lmm_objects[i].l_ost_idx) {
-                                printf("%s/%s\n", dname, fname);
+                                printf("%s\n", path);
                                 obdstripe = 1;
                                 break;
                         }
                 }
         } else if (!quiet) {
-                printf("%s/%s\n", dname, fname);
+                printf("%s\n", path);
                 obdstripe = 1;
         }
 
         /* if it's a directory */
-        if (*fname == '\0') {
+        if (is_dir) {
                 if (obdstripe == 1) {
                         printf("default stripe_count: %d stripe_size: %u "
                                "stripe_offset: %d\n",
@@ -360,8 +346,8 @@ void lov_dump_user_lmm_v1(struct lov_user_md_v1 *lum, char *dname, char *fname,
         }
 }
 
-void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *dname,
-                            char *fname, int obdindex, int quiet,
+void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *path,
+                            int is_dir, int obdindex, int quiet,
                             int header, int body)
 {
         struct lov_user_md_join *lumj = (struct lov_user_md_join *)lum;
@@ -370,13 +356,13 @@ void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *dname,
         if (obdindex != OBD_NOT_FOUND) {
                 for (i = 0; i < lumj->lmm_stripe_count; i++) {
                         if (obdindex == lumj->lmm_objects[i].l_ost_idx) {
-                                printf("%s/%s\n", dname, fname);
+                                printf("%s\n", path);
                                 obdstripe = 1;
                                 break;
                         }
                 }
         } else if (!quiet) {
-                printf("%s/%s\n", dname, fname);
+                printf("%s\n", path);
                 obdstripe = 1;
         }
 
@@ -420,17 +406,18 @@ void lov_dump_user_lmm_join(struct lov_user_md_v1 *lum, char *dname,
         }
 }
 
-void llapi_lov_dump_user_lmm(struct find_param *param, char *dname, char *fname)
+void llapi_lov_dump_user_lmm(struct find_param *param, 
+                             char *path, int is_dir)
 {
         switch(*(__u32 *)&param->lmd->lmd_lmm) { /* lum->lmm_magic */
         case LOV_USER_MAGIC_V1:
-                lov_dump_user_lmm_v1(&param->lmd->lmd_lmm, dname, fname,
+                lov_dump_user_lmm_v1(&param->lmd->lmd_lmm, path, is_dir,
                                       param->obdindex, param->quiet,
                                       param->verbose,
                                       (param->verbose || !param->obduuid));
                 break;
         case LOV_USER_MAGIC_JOIN:
-                lov_dump_user_lmm_join(&param->lmd->lmd_lmm, dname, fname,
+                lov_dump_user_lmm_join(&param->lmd->lmd_lmm, path, is_dir,
                                        param->obdindex, param->quiet,
                                        param->verbose,
                                        (param->verbose || !param->obduuid));
@@ -471,7 +458,7 @@ int llapi_file_get_stripe(char *path, struct lov_user_md *lum)
         }
 
         strcpy((char *)lum, fname);
-        if (ioctl(fd, IOC_MDC_GETSTRIPE, (void *)lum) == -1) {
+        if (ioctl(fd, IOC_MDC_GETFILESTRIPE, (void *)lum) == -1) {
                 close(fd);
                 free(dname);
                 return errno;
@@ -511,51 +498,11 @@ int llapi_file_lookup(int dirfd, const char *name)
         return ioctl(dirfd, IOC_MDC_LOOKUP, buf);
 }
 
-static int find_process_file(DIR *dir, char *dname, char *fname,
-                        struct find_param *param)
-{
-        int rc;
-
-        strncpy((char *)&param->lmd->lmd_lmm, fname, param->lumlen);
-
-        rc = ioctl(dirfd(dir), IOC_MDC_GETSTRIPE, (void *)&param->lmd->lmd_lmm);
-        if (rc) {
-                if (errno == ENODATA) {
-                        if (!param->obduuid && !param->quiet)
-                                fprintf(stderr, "%s/%s has no stripe info\n",
-                                        dname, fname);
-                        rc = 0;
-                } else if (errno == ENOTTY) {
-                        fprintf(stderr, "error: %s/%s is not a Lustre fs?\n",
-                                dname, fname);
-                } else if (errno == EISDIR) {
-                        err_msg("error: %s: directory %s/%s",
-                                __FUNCTION__, dname, fname);
-                        /* add fname to directory list; */
-                        rc = errno;
-                } else {
-                        err_msg("error: IOC_MDC_GETSTRIPE failed for '%s/%s'",
-                                dname, fname);
-                        rc = errno;
-                }
-                return rc;
-        }
-
-        llapi_lov_dump_user_lmm(param, dname, fname);
-
-        return 0;
-}
-
 /* some 64bit libcs implement readdir64() by calling sys_getdents().  the
  * kernel's sys_getdents() doesn't return d_type.  */
-unsigned char handle_dt_unknown(char *parent, char *entry)
+unsigned char handle_dt_unknown(char *path)
 {
-        char path[PATH_MAX + 1];
-        int fd, ret;
-
-        ret = snprintf(path, PATH_MAX, "%s/%s", parent, entry);
-        if (ret >= PATH_MAX)
-                return DT_UNKNOWN;
+        int fd;
 
         fd = open(path, O_DIRECTORY|O_RDONLY);
         if (fd < 0) {
@@ -567,162 +514,429 @@ unsigned char handle_dt_unknown(char *parent, char *entry)
         return DT_DIR;
 }
 
-static int process_dir(DIR *dir, char *dname, struct find_param *param)
+static DIR *opendir_parent(char *path)
 {
-        struct dirent64 *dirp;
-        DIR *subdir;
-        char path[1024];
-        int rc;
+        DIR *parent;
+        char *fname;
+        char c;
+        
+        fname = strrchr(path, '/');
+        if (fname == NULL)
+                return opendir(".");
+        
+        c = fname[1];
+        fname[1] = '\0';
+        parent = opendir(path);
+        fname[1] = c;
+        return parent;
+}
 
-        if (!param->got_uuids) {
-                rc = setup_obd_uuids(dir, dname, param);
-                if (rc)
-                        return rc;
+static int llapi_semantic_traverse(char *path, DIR *parent,
+                                   semantic_func_t sem_init,
+                                   semantic_func_t sem_fini, void *data)
+{
+        struct dirent64 *dent;
+        int len, ret;
+        DIR *d, *p;
+        
+        ret = 0;
+        p = NULL;
+        len = strlen(path);
+
+        d = opendir(path);
+        if (!d && errno != ENOTDIR) {
+                fprintf(stderr, "%s: Failed to open '%s': %s.", 
+                        __FUNCTION__, path, strerror(errno));
+                return -EINVAL;
+        } else if (!d && !parent) {
+                /* ENOTDIR. Open the parent dir. */
+                p = opendir_parent(path);
+                if (!p)
+                        return -EINVAL;
         }
 
-        /* retrieve dir's stripe info */
-        strncpy((char *)&param->lmd->lmd_lmm, dname, param->lumlen);
-        rc = ioctl(dirfd(dir), LL_IOC_LOV_GETSTRIPE, (void *)&param->lmd->lmd_lmm);
-        if (rc) {
-                if (errno == ENODATA) {
-                        if (!param->obduuid && param->verbose)
-                                printf("%s has no stripe info\n", dname);
-                        rc = 0;
-                } else if (errno == ENOTTY) {
-                        fprintf(stderr, "error: %s: %s not on a Lustre fs?\n",
-                                __FUNCTION__, dname);
-                } else {
-                        err_msg("error: %s: LL_IOC_LOV_GETSTRIPE failed for %s",
-                                __FUNCTION__, dname);
-                }
-        } else {
-               llapi_lov_dump_user_lmm(param, dname, "");
-        }
+        if (sem_init && (ret = sem_init(path, parent ? parent : p, d, data)))
+                goto err;
+
+        if (!d)
+                GOTO(out, ret = 0);
 
-        /* Handle the contents of the directory */
-        while ((dirp = readdir64(dir)) != NULL) {
-                if (!strcmp(dirp->d_name, ".") || !strcmp(dirp->d_name, ".."))
+        while((dent = readdir64(d)) != NULL) {
+                if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
                         continue;
+                
+                path[len] = 0;
+                strcat(path, "/");
+                strcat(path, dent->d_name);
 
-                if (dirp->d_type == DT_UNKNOWN)
-                        dirp->d_type = handle_dt_unknown(dname, dirp->d_name);
+                if (dent->d_type == DT_UNKNOWN)
+                        dent->d_type = handle_dt_unknown(path);
 
-                switch (dirp->d_type) {
+                switch (dent->d_type) {
                 case DT_UNKNOWN:
                         fprintf(stderr, "error: %s: '%s' is UNKNOWN type %d",
-                                __FUNCTION__, dirp->d_name, dirp->d_type);
+                                __FUNCTION__, dent->d_name, dent->d_type);
                         /* If we cared we could stat the file to determine
                          * type and continue on here, but we don't since we
                          * know d_type should be valid for lustre and this
                          * tool only makes sense for lustre filesystems. */
                         break;
                 case DT_DIR:
-                        if (!param->recursive)
-                                break;
-                        strcpy(path, dname);
-                        strcat(path, "/");
-                        strcat(path, dirp->d_name);
-                        subdir = opendir(path);
-                        if (subdir == NULL) {
-                                err_msg("error: %s: opendir '%.40s'",
-                                        __FUNCTION__, path);
-                                return errno;
-                        }
-                        rc = process_dir(subdir, path, param);
-                        closedir(subdir);
-                        break;
-                case DT_REG:
-                        rc = param->process_file(dir,dname,dirp->d_name,param);
+                        ret = llapi_semantic_traverse(path, d, sem_init,
+                                                      sem_fini, data);
+                        if (ret < 0)
+                                goto out;
                         break;
                 default:
-                        break;
+                        ret = 0;
+                        if (sem_init) {
+                                ret = sem_init(path, d, NULL, data);
+                                if (ret < 0)
+                                        goto out;
+                        }
+                        if (sem_fini && ret == 0)
+                                sem_fini(path, d, NULL, data);
                 }
         }
 
-        return 0;
+out:
+        path[len] = 0;
+                
+        if (sem_fini)
+                sem_fini(path, parent, d, data);
+err:
+        if (d) 
+                closedir(d);
+        if (p)
+                closedir(p);
+        return ret;
+}
+
+/* Check if the file time matches 1 of the given criteria (e.g. --atime +/-N).
+ * @mds indicates if this is MDS timestamps and there are attributes on OSTs.
+ * 
+ * The result is -1 if it does not match, 0 if not yet clear, 1 if matches.
+ * The table bolow gives the answers for the specified parameters (time and 
+ * sign), 1st column is the answer for the MDS time, the 2nd is for the OST:
+ * --------------------------------------
+ * 1 | file > limit; sign > 0 | -1 / -1 |
+ * 2 | file = limit; sign > 0 |  ? /  1 |
+ * 3 | file < limit; sign > 0 |  ? /  1 |
+ * 4 | file > limit; sign = 0 | -1 / -1 |
+ * 5 | file = limit; sign = 0 |  ? /  1 |  <- (see the Note below)
+ * 6 | file < limit; sign = 0 |  ? / -1 |
+ * 7 | file > limit; sign < 0 |  1 /  1 |
+ * 8 | file = limit; sign < 0 |  ? / -1 |
+ * 9 | file < limit; sign < 0 |  ? / -1 |
+ * --------------------------------------
+ * Note: 5th actually means that the file time stamp is within the interval 
+ * (limit - 24hours, limit]. */
+static int find_time_cmp(time_t file, time_t limit, int sign, int mds) {
+        if (sign > 0) {
+                if (file <= limit)
+                        return mds ? 0 : 1;
+        }
+
+        if (sign == 0) {
+                if (file <= limit && file + 24 * 60 * 60 > limit)
+                        return mds ? 0 : 1;
+                if (file + 24 * 60 * 60 <= limit)
+                        return mds ? 0 : -1;
+        }
+
+        if (sign < 0) {
+                if (file > limit)
+                        return 1;
+                if (mds)
+                        return 0;
+        }
+
+        return -1;
 }
 
-static int process_path(char *path, struct find_param *param)
+/* Check if the file time matches all the given criteria (e.g. --atime +/-N).
+ * Return -1 or 1 if file timestamp does not or does match the given criteria
+ * correspondingly. Return 0 if the MDS time is being checked and there are 
+ * attributes on OSTs and it is not yet clear if the timespamp matches.
+ * 
+ * If 0 is returned, we need to do another RPC to the OSTs to obtain the 
+ * updated timestamps. */
+static int find_time_check(lstat_t *st, struct find_param *param, int mds)
 {
-        char *fname, *dname;
-        DIR *dir;
+        int ret;
         int rc = 0;
 
-        fname = strrchr(path, '/');
-        if (fname != NULL && fname[1] == '\0') {
-                /* Trailing '/', it must be a dir */
-                if (strlen(path) > 1)
-                        *fname = '\0';
-
-                dir = opendir(path);
-                if (dir == NULL) {
-                        err_msg("error: %s: '%.40s' opendir",__FUNCTION__,path);
-                        rc = errno;
+        /* Check if file is accepted. */
+        if (param->atime) {
+                ret = find_time_cmp(st->st_atime, param->atime, 
+                                    param->asign, mds);
+                if (ret < 0)
+                        return ret;
+                rc = ret;
+        }
+        
+        if (param->mtime) {
+                ret = find_time_cmp(st->st_mtime, param->mtime, 
+                                    param->msign, mds);
+                if (ret < 0)
+                        return ret;
+
+                /* If the previous check matches, but this one is not yet clear,
+                 * we should return 0 to do an RPC on OSTs. */
+                if (rc == 1)
+                        rc = ret;
+        }
+        
+        if (param->ctime) {
+                ret = find_time_cmp(st->st_ctime, param->ctime,
+                                    param->csign, mds);
+                if (ret < 0)
+                        return ret;
+                
+                /* If the previous check matches, but this one is not yet clear,
+                 * we should return 0 to do an RPC on OSTs. */
+                if (rc == 1)
+                        rc = ret;
+        }
+
+        return rc;
+}
+
+static int cb_find_init(char *path, DIR *parent, DIR *dir, void *data)
+{
+        struct find_param *param = (struct find_param *)data;
+        int decision = 1; /* 1 is accepted; -1 is rejected. */
+        lstat_t *st = &param->lmd->lmd_st;
+        int lustre_fs = 1;
+        int ret = 0;
+
+        LASSERT(parent != NULL || dir != NULL);
+        
+        param->lmd->lmd_lmm.lmm_stripe_count = 0;
+
+        /* If a time or OST should be checked, the decision is not taken yet. */
+        if (param->atime || param->ctime || param->mtime || param->obduuid)
+                decision = 0;
+
+        /* Request MDS for the stat info. */
+        if (!decision && dir) {
+                /* retrieve needed file info */
+                ret = ioctl(dirfd(dir), LL_IOC_MDC_GETINFO, 
+                            (void *)param->lmd);
+        } else if (!decision && parent) {
+                char *fname = strrchr(path, '/') + 1;
+                
+                /* retrieve needed file info */
+                strncpy((char *)param->lmd, fname, param->lumlen);
+                ret = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO, 
+                           (void *)param->lmd);
+        }
+        if (ret) {
+                if (errno == ENOTTY) {
+                        /* ioctl is not supported, it is not a lustre fs.
+                         * Do the regular lstat(2) instead. */
+                        lustre_fs = 0;
+                        ret = lstat_f(path, st);
+                        if (ret) {
+                                err_msg("error: %s: lstat failed for %s",
+                                        __FUNCTION__, path);
+                                return ret;
+                        }
                 } else {
-                        rc = process_dir(dir, path, param);
-                        closedir(dir);
+                        err_msg("error: %s: %s failed for %s", __FUNCTION__,
+                                dir ? "LL_IOC_MDC_GETINFO" : 
+                                "IOC_MDC_GETFILEINFO", path);
+                        return ret;
                 }
-        } else if ((dir = opendir(path)) != NULL) {
-                /* No trailing '/', but it is still a dir */
-                rc = process_dir(dir, path, param);
-                closedir(dir);
-        } else {
-                /* It must be a file (or other non-directory) */
-                if (fname == NULL) {
-                        dname = ".";
-                        fname = path;
-                } else {
-                        *fname = '\0';
-                        fname++;
-                        dname = path;
-                        if (dname[0] == '\0')
-                                dname = "/";
+        }
+
+        /* Prepare odb. */
+        if (param->obduuid) {
+                if (lustre_fs && param->got_uuids && 
+                    param->st_dev != st->st_dev) {
+                        /* A lustre/lustre mount point is crossed. */
+                        param->got_uuids = 0;
+                        param->obdindex = OBD_NOT_FOUND;
                 }
-                dir = opendir(dname);
-                if (dir == NULL) {
-                        err_msg("error: %s: '%.40s' open failed",
-                                __FUNCTION__, dname);
-                        rc = errno;
+                
+                if (lustre_fs && !param->got_uuids) {
+                        ret = setup_obd_uuids(dir ? dir : parent, path, param);
+                        if (ret)
+                                return ret;
+                        param->st_dev = st->st_dev;
+                } else if (!lustre_fs && param->got_uuids) {
+                        /* A lustre/non-lustre mount point is crossed. */
+                        param->got_uuids = 0;
+                        param->obdindex = OBD_NOT_FOUND;
+                }
+        }
+
+        /* If an OBD UUID is specified but no one matches, skip this file. */
+        if (param->obduuid && param->obdindex == OBD_NOT_FOUND)
+                decision = -1;
+
+        /* If a OST UUID is given, and some OST matches, check it here. */
+        if (decision != -1 && param->obdindex != OBD_NOT_FOUND) {
+                /* Only those files should be accepted, which have a strip on 
+                 * the specified OST. */
+                if (!param->lmd->lmd_lmm.lmm_stripe_count) {
+                        decision = -1;
                 } else {
-                        if (!param->got_uuids)
-                                rc = setup_obd_uuids(dir, dname, param);
-                        if (rc == 0)
-                                rc = param->process_file(dir, dname, fname, param);
-                        closedir(dir);
+                        int i;
+                        for (i = 0; 
+                             i < param->lmd->lmd_lmm.lmm_stripe_count;
+                             i++) {
+                                if (param->obdindex == 
+                                    param->lmd->lmd_lmm.lmm_objects[i].l_ost_idx)
+                                        break;
+                        }
+                        
+                        if (i == param->lmd->lmd_lmm.lmm_stripe_count)
+                                decision = -1;
                 }
         }
+        
+        /* Check the time on mds. */
+        if (!decision) {
+                int for_mds;
+                
+                for_mds = lustre_fs ? param->lmd->lmd_lmm.lmm_stripe_count : 0;
+                decision = find_time_check(st, param, for_mds);
+        }
+        
+        /* If file still fits the request, ask osd for updated info.
+           The regulat stat is almost of the same speed as some new 
+           'glimpse-size-ioctl'. */
+        if (!decision && param->lmd->lmd_lmm.lmm_stripe_count) {
+                if (dir) {
+                        ret = ioctl(dirfd(dir), IOC_LOV_GETINFO,
+                                    (void *)param->lmd);
+                } else if (parent) {
+                        ret = ioctl(dirfd(parent), IOC_LOV_GETINFO, 
+                                    (void *)param->lmd);
+                } 
+
+                if (ret) {
+                        fprintf(stderr, "%s: IOC_LOV_GETINFO on %s failed: "
+                                "%s.\n", __FUNCTION__, path, strerror(errno));
+                        return -EINVAL;
+                }
 
-        return rc;
+                /* Check the time on osc. */
+                if (!decision)
+                        decision = find_time_check(st, param, 0);
+        }
+
+        if (decision != -1) {
+                printf("%s", path);
+                if (param->zeroend)
+                        printf("%c", '\0');
+                else 
+                        printf("\n");
+        }
+
+        /* Do not get down anymore? */
+        if (param->depth == param->maxdepth)
+                return 1;
+
+        param->depth++;
+        return 0;
 }
 
-int llapi_find(char *path, struct obd_uuid *obduuid, int recursive,
-               int verbose, int quiet)
+static int cb_common_fini(char *path, DIR *parent, DIR *d, void *data)
 {
-        struct find_param param;
+        struct find_param *param = (struct find_param *)data;
+        param->depth--;
+        return 0;
+}
+
+int llapi_find(char *path, struct find_param *param)
+{
+        char buf[PATH_MAX + 1];
+        int ret;
+        
+        ret = common_param_init(param);
+        if (ret)
+                return ret;
+
+        param->depth = 0;
+        strncpy(buf, path, strlen(path));
+        buf[strlen(path)] = '\0';
+        
+        ret = llapi_semantic_traverse(buf, NULL, cb_find_init, 
+                                      cb_common_fini, param);
+
+        find_param_fini(param);
+        return ret < 0 ? ret : 0;
+}
+
+static int cb_getstripe(char *path, DIR *parent, DIR *d, void *data)
+{
+        struct find_param *param = (struct find_param *)data;
         int ret = 0;
 
-        memset(&param, 0, sizeof(param));
-        param.recursive = recursive;
-        param.verbose = verbose;
-        param.quiet = quiet;
-        param.process_file = find_process_file;
-        if (obduuid) {
-                param.obduuid = malloc(sizeof(*obduuid));
-                if (param.obduuid == NULL) {
-                        ret = ENOMEM;
+        LASSERT(parent != NULL || d != NULL);
+        
+        /* Prepare odb. */
+        if (!param->got_uuids) {
+                ret = setup_obd_uuids(d ? d : parent, path, param);
+                if (ret)
+                        return ret;
+        }
+        
+        if (d) {
+                ret = ioctl(dirfd(d), LL_IOC_LOV_GETSTRIPE, 
+                            (void *)&param->lmd->lmd_lmm);
+        } else if (parent) {
+                char *fname = strrchr(path, '/') + 1;
+                
+                strncpy((char *)&param->lmd->lmd_lmm, fname, param->lumlen);
+                ret = ioctl(dirfd(parent), IOC_MDC_GETFILESTRIPE,
+                            (void *)&param->lmd->lmd_lmm);
+        } 
+
+        if (ret) {
+                if (errno == ENODATA) {
+                        if (!param->obduuid && !param->quiet)
+                                printf("%s has no stripe info\n", 
+                                        path);
                         goto out;
+                } else if (errno == ENOTTY) {
+                        fprintf(stderr, "%s: '%s' not on a Lustre fs?\n",
+                                __FUNCTION__, path);
+                } else {
+                        err_msg("error: %s: %s failed for %s", __FUNCTION__,
+                                d ? "LL_IOC_LOV_GETSTRIPE" : 
+                                "IOC_MDC_GETFILESTRIPE", path);
                 }
-                memcpy(param.obduuid, obduuid, sizeof(*obduuid));
+
+                return ret;
         }
 
-        ret = prepare_find(&param);
+        llapi_lov_dump_user_lmm(param, path, d ? 1 : 0);
+out:
+        /* Do not get down anymore? */
+        if (param->depth == param->maxdepth)
+                return 1;
+
+        param->depth++;
+        return 0;
+}
+
+int llapi_getstripe(char *path, struct find_param *param)
+{
+        int ret = 0;
+
+        ret = common_param_init(param);
         if (ret)
-                goto out;
+                return ret;
 
-        process_path(path, &param);
-out:
-        cleanup_find(&param);
-        return ret;
+        param->depth = 0;
+        ret = llapi_semantic_traverse(path, NULL, cb_getstripe, 
+                                      cb_common_fini, param);
+        find_param_fini(param);
+        return ret < 0 ? ret : 0;
 }
 
 int llapi_obd_statfs(char *path, __u32 type, __u32 index,
@@ -758,7 +972,7 @@ int llapi_obd_statfs(char *path, __u32 type, __u32 index,
                 err_msg("error: %s: opening '%s'", __FUNCTION__, path);
                 return rc;
         }
-        rc = ioctl(fd, LL_IOC_OBD_STATFS, (void *)rawbuf);
+        rc = ioctl(fd, IOC_OBD_STATFS, (void *)rawbuf);
         if (rc)
                 rc = -errno;
 
@@ -968,40 +1182,51 @@ int llapi_quotactl(char *mnt, struct if_quotactl *qctl)
         return rc;
 }
 
-static int quotachown_process_file(DIR *dir, char *dname, char *fname,
-                        struct find_param *param)
+static int cb_quotachown(char *path, DIR *parent, DIR *d, void *data)
 {
+        struct find_param *param = (struct find_param *)data;
         lstat_t *st;
-        char pathname[PATH_MAX + 1] = "";
         int rc;
+        
+        LASSERT(parent != NULL || d != NULL);
+
+        if (d) {
+                rc = ioctl(dirfd(d), LL_IOC_MDC_GETINFO, 
+                           (void *)param->lmd);
+        } else if (parent) {
+                char *fname = strrchr(path, '/') + 1;
+
+                strncpy((char *)param->lmd, fname, param->lumlen);
+                rc = ioctl(dirfd(parent), IOC_MDC_GETFILEINFO, 
+                           (void *)param->lmd);
+        } else {
+                return 0;
+        }
 
-        strncpy((char *)param->lmd, fname, param->lumlen);
-
-        rc = ioctl(dirfd(dir), IOC_MDC_GETFILEINFO, (void *)param->lmd);
         if (rc) {
                 if (errno == ENODATA) {
                         if (!param->obduuid && !param->quiet)
-                                fprintf(stderr,
-                                        "%s/%s has no stripe info\n",
-                                        dname, fname);
+                                fprintf(stderr, "%s has no stripe info\n",
+                                        path);
                         rc = 0;
                 } else if (errno != EISDIR) {
-                        err_msg("IOC_MDC_GETFILEINFO ioctl failed");
+                        err_msg("%s ioctl failed for %s.",
+                                d ? "LL_IOC_MDC_GETINFO" : 
+                                "IOC_MDC_GETFILEINFO", path);
                         rc = errno;
                 }
                 return rc;
         }
 
         st = &param->lmd->lmd_st;
-        snprintf(pathname, sizeof(pathname), "%s/%s", dname, fname);
 
         /* libc chown() will do extra check, and if the real owner is
          * the same as the ones to set, it won't fall into kernel, so
          * invoke syscall directly. */
-        rc = syscall(SYS_chown, pathname, st->st_uid, st->st_gid);
+        rc = syscall(SYS_chown, path, st->st_uid, st->st_gid);
         if (rc)
                 err_msg("error: chown %s (%u,%u)",
-                        pathname, st->st_uid, st->st_gid);
+                        path, st->st_uid, st->st_gid);
         return rc;
 }
 
@@ -1014,14 +1239,14 @@ int llapi_quotachown(char *path, int flag)
         param.recursive = 1;
         param.verbose = 0;
         param.quiet = 1;
-        param.process_file = quotachown_process_file;
 
-        ret = prepare_find(&param);
+        ret = common_param_init(&param);
         if (ret)
                 goto out;
 
-        process_path(path, &param);
+        ret = llapi_semantic_traverse(path, NULL, cb_quotachown,
+                                      NULL, &param);
 out:
-        cleanup_find(&param);
+        find_param_fini(&param);
         return ret;
 }
index bdb9798..6d8be40 100644 (file)
@@ -93,7 +93,7 @@ static char rawbuf[8192];
 static char *buf = rawbuf;
 static int max = sizeof(rawbuf);
 
-static int cur_device = MAX_OBD_DEVICES;
+static int cur_device = -1;
 
 union lsm_buffer {
         char                 space [4096];
@@ -449,7 +449,7 @@ static int get_verbose(char *func, const char *arg)
 
 int do_disconnect(char *func, int verbose)
 {
-        cur_device = MAX_OBD_DEVICES;
+        cur_device = -1;
         return 0;
 }
 
index 8ee65e4..d4219f3 100644 (file)
@@ -131,6 +131,20 @@ check_ptlrpc_body(void)
 }
 
 static void
+check_obd_connect(void)
+{
+        BLANK_LINE();
+        CHECK_STRUCT(obd_connect_data);
+        CHECK_MEMBER(obd_connect_data, ocd_connect_flags);
+        CHECK_MEMBER(obd_connect_data, ocd_version);
+        CHECK_MEMBER(obd_connect_data, ocd_grant);
+        CHECK_MEMBER(obd_connect_data, ocd_index);
+        CHECK_MEMBER(obd_connect_data, ocd_ibits_known);
+        CHECK_MEMBER(obd_connect_data, ocd_nllu);
+        CHECK_MEMBER(obd_connect_data, ocd_nllg);
+}
+
+static void
 check_obdo(void)
 {
         BLANK_LINE();
@@ -1117,6 +1131,7 @@ main(int argc, char **argv)
         CHECK_CDEFINE(OBD_CONNECT_JOIN);
         CHECK_CDEFINE(OBD_CONNECT_ATTRFID);
         CHECK_CDEFINE(OBD_CONNECT_NODEVOH);
+        CHECK_CDEFINE(OBD_CONNECT_LCL_CLIENT);
         CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT);
 
         COMMENT("Sizes and Offsets");
@@ -1128,6 +1143,7 @@ main(int argc, char **argv)
         printf("        LASSERT(offsetof(struct lustre_msg_v1, lm_magic) == "
                "offsetof(struct lustre_msg_v2, lm_magic));\n");
         check_ptlrpc_body();
+        check_obd_connect();
         check_obdo();
         check_lov_mds_md_v1();
         check_lov_mds_md_join();
index 3b2701d..4cc1531 100644 (file)
@@ -243,7 +243,8 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
         CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
         CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
-        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
+        CLASSERT(OBD_CONNECT_LCL_CLIENT == 0x10000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x20000ULL);
         /* Sizes and Offsets */
 
         /* Checks for struct obd_uuid */
@@ -387,6 +388,38 @@ void lustre_assert_wire_constants(void)
         LASSERTF((int)sizeof(((struct ptlrpc_body *)0)->pb_paddings[3]) == 4, " found %lld\n",
                  (long long)(int)sizeof(((struct ptlrpc_body *)0)->pb_paddings[3]));
 
+        /* Checks for struct obd_connect_data */
+        LASSERTF((int)sizeof(struct obd_connect_data) == 72, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_connect_data));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_connect_flags) == 0, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_connect_flags));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_connect_flags));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_version) == 8, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_version));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_version) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_version));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_grant) == 12, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_grant));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_grant) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_grant));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_index) == 16, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_index));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_index) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_index));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_ibits_known) == 24, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_ibits_known));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known) == 8, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_ibits_known));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_nllu) == 32, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_nllu));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllu) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllu));
+        LASSERTF((int)offsetof(struct obd_connect_data, ocd_nllg) == 36, " found %lld\n",
+                 (long long)(int)offsetof(struct obd_connect_data, ocd_nllg));
+        LASSERTF((int)sizeof(((struct obd_connect_data *)0)->ocd_nllg) == 4, " found %lld\n",
+                 (long long)(int)sizeof(((struct obd_connect_data *)0)->ocd_nllg));
+
         /* Checks for struct obdo */
         LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n",
                  (long long)(int)sizeof(struct obdo));