From debecedbaa80d8153dbcf6fa205e06f3aea8f372 Mon Sep 17 00:00:00 2001 From: alex Date: Fri, 27 Feb 2009 08:39:48 +0000 Subject: [PATCH] - update from HEAD --- lustre/ChangeLog | 39 +- lustre/autoconf/lustre-core.m4 | 10 +- lustre/autoconf/lustre-version.ac | 2 +- lustre/cmm/cmm_device.c | 32 +- lustre/cmm/cmm_object.c | 4 +- lustre/cmm/mdc_internal.h | 12 +- lustre/doc/lctl.8 | 11 + lustre/doc/lfs.1 | 48 ++- lustre/include/lprocfs_status.h | 25 ++ lustre/include/lustre/liblustreapi.h | 7 +- lustre/include/lustre/lustre_idl.h | 78 ++-- lustre/include/lustre_disk.h | 2 +- lustre/include/lustre_lib.h | 18 +- lustre/include/lustre_log.h | 1 + lustre/include/lustre_net.h | 9 + lustre/include/lustre_req_layout.h | 2 + lustre/include/md_object.h | 13 +- lustre/include/obd.h | 55 ++- lustre/include/obd_ost.h | 1 + lustre/include/obd_support.h | 19 + .../kernel-2.6.18-2.6-rhel5-i686-smp.config | 160 ++++++- .../kernel-2.6.18-2.6-rhel5-i686.config | 160 ++++++- .../kernel-2.6.18-2.6-rhel5-ia64-smp.config | 148 ++++++- .../kernel-2.6.18-2.6-rhel5-ia64.config | 148 ++++++- .../kernel-2.6.18-2.6-rhel5-ppc64-smp.config | 32 +- .../kernel-2.6.18-2.6-rhel5-ppc64.config | 32 +- .../kernel-2.6.18-2.6-rhel5-x86_64-smp.config | 225 ++++++++-- .../kernel-2.6.18-2.6-rhel5-x86_64.config | 162 +++++++- .../jbd-journal-chksum-2.6.18-vanilla.patch | 7 +- .../patches/jbd-stats-2.6-rhel5.patch | 6 +- lustre/kernel_patches/targets/2.6-rhel5.target.in | 4 +- lustre/kernel_patches/which_patch | 2 +- lustre/ldlm/ldlm_lockd.c | 8 +- lustre/llite/llite_lib.c | 2 +- lustre/llite/xattr.c | 6 + lustre/lov/lov_cl_internal.h | 3 +- lustre/lov/lov_obd.c | 7 +- lustre/lov/lov_pack.c | 29 +- lustre/mdc/lproc_mdc.c | 57 +++ lustre/mdc/mdc_request.c | 58 ++- lustre/mdd/mdd_device.c | 319 ++++++++++++-- lustre/mdd/mdd_internal.h | 2 + lustre/mdd/mdd_lproc.c | 459 +++++++-------------- lustre/mdd/mdd_object.c | 30 +- lustre/mds/mds_log.c | 18 +- lustre/mdt/mdt_handler.c | 204 ++++++++- lustre/mdt/mdt_lproc.c | 94 ++--- lustre/obdclass/llog_swab.c | 9 + lustre/obdclass/lprocfs_status.c | 259 ++++++++++++ lustre/obdclass/obd_config.c | 7 +- lustre/obdclass/obd_mount.c | 8 +- lustre/obdfilter/filter.c | 26 +- lustre/obdfilter/filter_internal.h | 2 + lustre/obdfilter/filter_io.c | 25 +- lustre/obdfilter/filter_log.c | 4 +- lustre/osc/osc_page.c | 15 +- lustre/osc/osc_request.c | 193 ++++++++- lustre/ost/ost_handler.c | 36 +- lustre/ptlrpc/import.c | 9 +- lustre/ptlrpc/layout.c | 8 +- lustre/ptlrpc/llog_server.c | 29 +- lustre/ptlrpc/lproc_ptlrpc.c | 2 +- lustre/ptlrpc/pack_generic.c | 2 + lustre/ptlrpc/pinger.c | 162 +++++++- lustre/ptlrpc/ptlrpc_module.c | 2 + lustre/ptlrpc/wiretest.c | 13 +- lustre/quota/quota_context.c | 10 +- lustre/quota/quota_interface.c | 5 + lustre/quota/quota_master.c | 13 +- lustre/tests/Makefile.am | 2 +- lustre/tests/acceptance-small.sh | 14 +- lustre/tests/conf-sanity.sh | 105 ++++- lustre/tests/createmany.c | 26 +- lustre/tests/createtest.c | 4 +- lustre/tests/insanity.sh | 13 - lustre/tests/racer/dir_create.sh | 2 +- lustre/tests/racer/file_concat.sh | 4 +- lustre/tests/racer/file_create.sh | 4 +- lustre/tests/racer/file_link.sh | 4 +- lustre/tests/racer/file_rename.sh | 4 +- lustre/tests/racer/file_rm.sh | 2 +- lustre/tests/racer/file_symlink.sh | 4 +- lustre/tests/racer/racer.sh | 36 +- lustre/tests/recovery-double-scale.sh | 314 ++++++++++++++ lustre/tests/recovery-mds-scale.sh | 15 +- lustre/tests/replay-dual.sh | 15 - lustre/tests/replay-single.sh | 9 +- lustre/tests/run_dbench.sh | 2 +- lustre/tests/run_dd.sh | 2 +- lustre/tests/run_iozone.sh | 2 +- lustre/tests/run_tar.sh | 2 +- lustre/tests/runracer | 41 +- lustre/tests/sanity-quota.sh | 61 ++- lustre/tests/sanity.sh | 131 +++--- lustre/tests/test-framework.sh | 139 +++++-- lustre/utils/lctl.c | 45 +- lustre/utils/lfs.c | 42 +- lustre/utils/liblustreapi.c | 158 ++++--- lustre/utils/obd.c | 106 +++++ lustre/utils/obdctl.h | 2 + lustre/utils/wirecheck.c | 4 + lustre/utils/wiretest.c | 13 +- 102 files changed, 3904 insertions(+), 1032 deletions(-) create mode 100644 lustre/tests/recovery-double-scale.sh diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 7d14da4..0205427 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -2,7 +2,7 @@ tbd Sun Microsystems, Inc. * version 2.0.0 * Support for kernels: 2.6.16.60-0.33 (SLES 10), - 2.6.18-92.1.22.el5 (RHEL 5), + 2.6.18-128.1.1.el5 (RHEL 5), 2.6.22.14 vanilla (kernel.org). * Client support for unpatched kernels: (see http://wiki.lustre.org/index.php?title=Patchless_Client) @@ -13,6 +13,43 @@ tbd Sun Microsystems, Inc. removed cwd "./" (refer to Bugzilla 14399). * File join has been disabled in this release, refer to Bugzilla 16929. +Severity : enhancement +Bugzilla : 18289 +Description: Update to RHEL5U3 kernel-2.6.18-128.1.1.el5. + +Severity : normal +Frequency : normal +Bugzilla : 12069 +Descriptoin: OST grant too much space to client even there are not enough space. +Details : Client will shrink its grant cache to OST if there are no write + activity over 6 mins (GRANT_SHRINK_INTERVAL), and OST will retrieve + this grant cache if there are already not enough avaible space + (left_space < total_clients * 32M). + +Severity : normal +Frequency : start MDS on uncleanly shutdowned MDS device +Bugzilla : 16839 +Descriptoin: ll_sync thread stay in waiting mds<>ost recovery finished +Details : stay in waiting mds<>ost recovery finished produce random bugs + due race between two ll_sync thread for one lov target. send + ACTIVATE event only if connect realy finished and import have + FULL state. + +Severity : normal +Frequency : rare, connect and disconnect target at same time +Bugzilla : 17310 +Descriptoin: ASSERTION(atomic_read(&imp->imp_inflight) == 0 +Details : don't call obd_disconnect under lov_lock. this long time + operation and can block ptlrpcd which answer to connect request. + +Severity : normal +Frequency : rare +Bugzilla : 18154 +Descriptoin: don't lose wakeup for imp_recovery_waitq +Details : recover_import_no_retry or invalidate_import and import_close can + both sleep on imp_recovery_waitq, but we was send only one wakeup + to sleep queue. + Severity : normal Frequency : start MDS on uncleanly shutdowned MDS device Bugzilla : 16839 diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 579ea79..e11e891 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -198,7 +198,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFINE(HAVE_REGISTER_CACHE, 1, [register_cache found]) AC_MSG_CHECKING([if kernel expects return from cache shrink ]) tmp_flags="$EXTRA_KCFLAGS" - EXTRA_KCFLAGS="-Werror" +#EXTRA_KCFLAGS="-Werror" LB_LINUX_TRY_COMPILE([ #include #include @@ -935,7 +935,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFUN([LC_UMOUNTBEGIN_HAS_VFSMOUNT], [AC_MSG_CHECKING([if umount_begin needs vfsmount parameter instead of super_block]) tmp_flags="$EXTRA_KCFLAGS" -EXTRA_KCFLAGS="-Werror" +#EXTRA_KCFLAGS="-Werror" LB_LINUX_TRY_COMPILE([ #include @@ -1000,7 +1000,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFUN([LC_VFS_READDIR_U64_INO], [AC_MSG_CHECKING([check vfs_readdir need 64bit inode number]) tmp_flags="$EXTRA_KCFLAGS" -EXTRA_KCFLAGS="-Werror" +#EXTRA_KCFLAGS="-Werror" LB_LINUX_TRY_COMPILE([ #include int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, @@ -1379,7 +1379,7 @@ LB_LINUX_TRY_COMPILE([ AC_DEFUN([LC_RW_TREE_LOCK], [AC_MSG_CHECKING([if kernel has tree_lock as rwlock]) tmp_flags="$EXTRA_KCFLAGS" -EXTRA_KCFLAGS="-Werror" +#EXTRA_KCFLAGS="-Werror" LB_LINUX_TRY_COMPILE([ #include ],[ @@ -1914,7 +1914,7 @@ AC_DEFUN([LC_CONFIGURE], [LC_CONFIG_OBD_BUFFER_SIZE if test $target_cpu == "i686" -o $target_cpu == "x86_64"; then - CFLAGS="$CFLAGS -Werror" + CFLAGS="$CFLAGS" fi # include/liblustre.h diff --git a/lustre/autoconf/lustre-version.ac b/lustre/autoconf/lustre-version.ac index a81fe4c..4da8136 100644 --- a/lustre/autoconf/lustre-version.ac +++ b/lustre/autoconf/lustre-version.ac @@ -1,6 +1,6 @@ m4_define([LUSTRE_MAJOR],[1]) m4_define([LUSTRE_MINOR],[9]) -m4_define([LUSTRE_PATCH],[160]) +m4_define([LUSTRE_PATCH],[162]) m4_define([LUSTRE_FIX],[0]) dnl # don't forget to update the service tags info diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c index bae6967..8245d2f 100644 --- a/lustre/cmm/cmm_device.c +++ b/lustre/cmm/cmm_device.c @@ -65,7 +65,7 @@ static const struct lu_device_operations cmm_lu_ops; static inline int lu_device_is_cmm(struct lu_device *d) { - return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &cmm_lu_ops); + return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &cmm_lu_ops); } int cmm_root_get(const struct lu_env *env, struct md_device *md, @@ -84,7 +84,7 @@ static int cmm_statfs(const struct lu_env *env, struct md_device *md, struct kstatfs *sfs) { struct cmm_device *cmm_dev = md2cmm_dev(md); - int rc; + int rc; ENTRY; rc = cmm_child_ops(cmm_dev)->mdo_statfs(env, @@ -130,6 +130,18 @@ static int cmm_update_capa_key(const struct lu_env *env, RETURN(rc); } +static int cmm_llog_ctxt_get(const struct lu_env *env, struct md_device *m, + int idx, void **h) +{ + struct cmm_device *cmm_dev = md2cmm_dev(m); + int rc; + ENTRY; + + rc = cmm_child_ops(cmm_dev)->mdo_llog_ctxt_get(env, cmm_dev->cmm_child, + idx, h); + RETURN(rc); +} + #ifdef HAVE_QUOTA_SUPPORT static int cmm_quota_notify(const struct lu_env *env, struct md_device *m) { @@ -369,12 +381,26 @@ static int cmm_quota_finvalidate(const struct lu_env *env, struct md_device *m, } #endif +int cmm_iocontrol(const struct lu_env *env, struct md_device *m, + unsigned int cmd, int len, void *data) +{ + struct md_device *next = md2cmm_dev(m)->cmm_child; + int rc; + + ENTRY; + rc = next->md_ops->mdo_iocontrol(env, next, cmd, len, data); + RETURN(rc); +} + + static const struct md_device_operations cmm_md_ops = { .mdo_statfs = cmm_statfs, .mdo_root_get = cmm_root_get, .mdo_maxsize_get = cmm_maxsize_get, .mdo_init_capa_ctxt = cmm_init_capa_ctxt, .mdo_update_capa_key = cmm_update_capa_key, + .mdo_llog_ctxt_get = cmm_llog_ctxt_get, + .mdo_iocontrol = cmm_iocontrol, #ifdef HAVE_QUOTA_SUPPORT .mdo_quota = { .mqo_notify = cmm_quota_notify, @@ -632,7 +658,7 @@ static int cmm_prepare(const struct lu_env *env, } static const struct lu_device_operations cmm_lu_ops = { - .ldo_object_alloc = cmm_object_alloc, + .ldo_object_alloc = cmm_object_alloc, .ldo_process_config = cmm_process_config, .ldo_recovery_complete = cmm_recovery_complete, .ldo_prepare = cmm_prepare, diff --git a/lustre/cmm/cmm_object.c b/lustre/cmm/cmm_object.c index 6f5a78a..fa52599 100644 --- a/lustre/cmm/cmm_object.c +++ b/lustre/cmm/cmm_object.c @@ -346,7 +346,7 @@ static int cml_capa_get(const struct lu_env *env, struct md_object *mo, } static int cml_path(const struct lu_env *env, struct md_object *mo, - char *path, int pathlen, __u64 recno, int *linkno) + char *path, int pathlen, __u64 *recno, int *linkno) { int rc; ENTRY; @@ -943,7 +943,7 @@ static int cmr_capa_get(const struct lu_env *env, struct md_object *mo, } static int cmr_path(const struct lu_env *env, struct md_object *obj, - char *path, int pathlen, __u64 recno, int *linkno) + char *path, int pathlen, __u64 *recno, int *linkno) { return -EREMOTE; } diff --git a/lustre/cmm/mdc_internal.h b/lustre/cmm/mdc_internal.h index e7a1d13..bcd5f3f 100644 --- a/lustre/cmm/mdc_internal.h +++ b/lustre/cmm/mdc_internal.h @@ -73,12 +73,12 @@ struct mdc_thread_info { }; struct mdc_object { - struct md_object mco_obj; + struct md_object mco_obj; }; static inline struct lu_device *mdc2lu_dev(struct mdc_device *mc) { - return (&mc->mc_md_dev.md_lu_dev); + return (&mc->mc_md_dev.md_lu_dev); } static inline struct mdc_device *md2mdc_dev(struct md_device *md) @@ -88,22 +88,22 @@ static inline struct mdc_device *md2mdc_dev(struct md_device *md) static inline struct mdc_device *mdc_obj2dev(struct mdc_object *mco) { - return (md2mdc_dev(md_obj2dev(&mco->mco_obj))); + return (md2mdc_dev(md_obj2dev(&mco->mco_obj))); } static inline struct mdc_object *lu2mdc_obj(struct lu_object *lo) { - return container_of0(lo, struct mdc_object, mco_obj.mo_lu); + return container_of0(lo, struct mdc_object, mco_obj.mo_lu); } static inline struct mdc_object *md2mdc_obj(struct md_object *mo) { - return container_of0(mo, struct mdc_object, mco_obj); + return container_of0(mo, struct mdc_object, mco_obj); } static inline struct mdc_device *lu2mdc_dev(struct lu_device *ld) { - return container_of0(ld, struct mdc_device, mc_md_dev.md_lu_dev); + return container_of0(ld, struct mdc_device, mc_md_dev.md_lu_dev); } struct lu_object *mdc_object_alloc(const struct lu_env *, diff --git a/lustre/doc/lctl.8 b/lustre/doc/lctl.8 index 79c5812..fca7631 100644 --- a/lustre/doc/lctl.8 +++ b/lustre/doc/lctl.8 @@ -120,6 +120,17 @@ Detach the virtual block device. .BI blockdev_info " " Acquire which lustre file was attached to the device node. .PP +.SS Changelogs +.TP +.BI changelog_register +Register a new changelog user for a particular device. Changelog entries +will not be purged beyond any registered users' set point. (See lfs changelog_clear.) +.TP +.BI changelog_deregister " " +Unregister an existing changelog user. If the user's "clear" record number +is the minimum for the device, changelog records will be purged until the +next minimum. +.PP .SS Debug .TP .BI debug_daemon diff --git a/lustre/doc/lfs.1 b/lustre/doc/lfs.1 index 532a60b..4d6df5f 100644 --- a/lustre/doc/lfs.1 +++ b/lustre/doc/lfs.1 @@ -1,10 +1,14 @@ -.TH lfs 1 "2008 Mar 15" Lustre "user utilities" +.TH lfs 1 "2009 Jan 29" Lustre "user utilities" .SH NAME lfs \- Lustre utility to create a file with specific striping pattern, find the striping pattern of exiting files .SH SYNOPSIS .br .B lfs .br +.B lfs changelog [--follow] [startrec [endrec]] +.br +.B lfs changelog_clear +.br .B lfs check .br .B lfs df [-i] [-h] [path] @@ -30,7 +34,7 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the .br .B lfs poollist [.] | .br -.B lfs quota [-v] [-o obd_uuid] [-u|-g] +.B lfs quota [-v] [-o obd_uuid|-I ost_idx|-i mdt_idx] [-u|-g] .br .B lfs quota .br @@ -67,13 +71,6 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the \fB[-b ] [-i ] \fB\fR .br - -.B lfs quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] -.br -.B lfs quota -.br -.B lfs quota -t [-u|-g] -.br .B lfs help .SH DESCRIPTION .B lfs @@ -81,6 +78,15 @@ can be used to create a new file with a specific striping pattern, determine the .SH OPTIONS The various options supported by lctl are listed and explained below: .TP +.B changelog +Show the metadata changes on an MDT. Start and end points are optional. The --follow option will block on new changes; this option is only valid when run direclty on the MDT node. +.TP +.B changelog_clear +Indicate that changelog records previous to are no longer of +interest to a particular consumer , potentially allowing the MDT to +free up disk space. An of 0 indicates the current last record. +Changelog consumers must be registered on the MDT node using \fBlctl\fR. +.TP .B check Display the status of MDS or OSTs (as specified in the command) or all the servers (MDS and OSTs) .TP @@ -129,6 +135,12 @@ Delete the default striping on the specified directory. .B poollist [.] | List the pools in \fBfilesystem\fR or \fBpathname\fR, or the OSTs in \fBfilesystem.pool\fR .TP +.B quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] +To display disk usage and limits, either for the full filesystem, or for objects on a specific obd. A user or group name can be specified. If both user and group are omitted quotas for current uid/gid are shown. -v provides more verbose (with per-obd statistics) output. +.TP +.B quota -t [-u|-g] +To display block and inode grace times for user (-u) or group (-g) quotas +.TP .B quotachown To change files' owner and group on OSTs of the specified filesystem .TP @@ -150,12 +162,6 @@ To set filesystem quotas for users or groups. Limits can be specified with -b, - .B setquota -t [-u|-g] [--block-grace ] [--inode-grace ] To set filesystem quota grace times for users or groups. Grace time is specified in "XXwXXdXXhXXmXXs" format or as an integer seconds value, see EXAMPLES .TP -.B quota [-v] [-o obd_uuid|-i mdt_idx|-I ost_idx] [-u|-g] -To display disk usage and limits, either for the full filesystem, or for objects on a specific obd. A user or group name can be specified. If both user and group are omitted quotas for current uid/gid are shown. -v provides more verbose (with per-obd statistics) output. -.TP -.B quota -t [-u|-g] -To display block and inode grace times for user (-u) or group (-g) quotas -.TP .B help Provides brief help on the various arguments .TP @@ -193,6 +199,12 @@ Lists space usage per OST and MDT in human readable format. .B $ lfs df -i Lists inode usage per OST and MDT .TP +.B $ lfs quota -u bob /mnt/lustre +List quotas of user `bob' +.TP +.B $ lfs quota -t -u /mnt/lustre +Show grace times for user quotas on /mnt/lustre +.TP .B $ lfs quotachown -i /mnt/lustre Change file owner and group .TP @@ -210,12 +222,6 @@ Set quotas of user `bob': 1GB block quota hardlimit and 2 GB block quota softlim .TP .B $ lfs setquota -t -u --block-grace 1000 --inode-grace 1w4d /mnt/lustre Set grace times for user quotas: 1000 seconds for block quotas, 1 week and 4 days for inode quotas -.TP -.B $ lfs quota -u bob /mnt/lustre -List quotas of user `bob' -.TP -.B $ lfs quota -t -u /mnt/lustre -Show grace times for user quotas on /mnt/lustre .SH BUGS The \fBlfs find\fR command isn't as comprehensive as \fBfind\fR(1). Report bugs using http://bugzilla.lustre.org. diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index f20dae2..3056bd0 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -682,8 +682,33 @@ extern int lprocfs_quota_rd_qs_factor(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_quota_wr_qs_factor(struct file *file, const char *buffer, unsigned long count, void *data); + +/** struct for holding changelog data for seq_file processing */ +struct changelog_seq_iter { + void *csi_dev; + struct llog_ctxt *csi_ctxt; + struct llog_handle *csi_llh; + __u64 csi_startrec; + __u64 csi_endrec; + loff_t csi_pos; + int csi_wrote; + int csi_startcat; + int csi_startidx; + int csi_fill:1; + int csi_done:1; +}; +int changelog_seq_open(struct inode *inode, struct file *file, + struct changelog_seq_iter **csih); +int changelog_seq_release(struct inode *inode, struct file *file); +loff_t changelog_seq_lseek(struct file *file, loff_t offset, int origin); + + + #else /* LPROCFS is not defined */ + + + static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int index, long amount) { return; } static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, diff --git a/lustre/include/lustre/liblustreapi.h b/lustre/include/lustre/liblustreapi.h index 7dbe8c7..f667c00 100644 --- a/lustre/include/lustre/liblustreapi.h +++ b/lustre/include/lustre/liblustreapi.h @@ -174,9 +174,12 @@ extern int llapi_rgetfacl(int argc, char *argv[]); extern int llapi_cp(int argc, char *argv[]); extern int llapi_ls(int argc, char *argv[]); extern int llapi_changelog_open(const char *mdtname, long long startrec); -extern int llapi_changelog_clear(const char *mdtname, long long endrec); +extern int llapi_changelog_clear(const char *mdtname, const char *idstr, + long long endrec); +extern int llapi_changelog_register(const char *mdtname); +extern int llapi_changelog_unregister(const char *mdtname, int id); struct lu_fid; extern int llapi_fid2path(char *device, char *fid, char *path, int pathlen, - __u64 recno, int *linkno); + long long *recno, int *linkno); #endif diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 24091b5..df4d5b4 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -765,6 +765,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */ #define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */ #define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */ +#define OBD_CONNECT_GRANT_SHRINK 0x200000000ULL /* support grant shrink */ #define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */ /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ @@ -795,7 +796,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_CHANGE_QS | \ OBD_CONNECT_OSS_CAPA | OBD_CONNECT_RMT_CLIENT | \ OBD_CONNECT_RMT_CLIENT_FORCE | \ - OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN) + OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \ + OBD_CONNECT_GRANT_SHRINK) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT) @@ -893,18 +895,11 @@ typedef __u32 obd_count; #define OBD_FL_NO_GRPQUOTA (0x00000200) /* the object's group is over quota */ #define OBD_FL_CREATE_CROW (0x00000400) /* object should be create on write */ -/** - * Set this to delegate DLM locking during obd_punch() to the OSTs. Only OSTs - * that declared OBD_CONNECT_TRUNCLOCK in their connect flags support this - * functionality. - */ -#define OBD_FL_TRUNCLOCK (0x00000800) +#define OBD_FL_TRUNCLOCK (0x00000800) /* delegate DLM locking during punch */ +#define OBD_FL_CKSUM_CRC32 (0x00001000) /* CRC32 checksum type */ +#define OBD_FL_CKSUM_ADLER (0x00002000) /* ADLER checksum type */ +#define OBD_FL_SHRINK_GRANT (0x00004000) /* object shrink the grant */ -/* - * Checksum types - */ -#define OBD_FL_CKSUM_CRC32 (0x00001000) -#define OBD_FL_CKSUM_ADLER (0x00002000) #define OBD_FL_CKSUM_ALL (OBD_FL_CKSUM_CRC32 | OBD_FL_CKSUM_ADLER) #define LOV_MAGIC_V1 0x0BD10BD0 @@ -2188,20 +2183,20 @@ struct lov_mds_md_join { #define LLOG_OP_MASK 0xfff00000 typedef enum { - LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000, - OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00, - OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, - MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK, - MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR, - MDS_SETATTR64_REC= LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | REINT_SETATTR, - OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, - PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, /* obsolete */ - LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, - LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, - /** changelog record type */ - CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000, - LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, - LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, + LLOG_PAD_MAGIC = LLOG_OP_MAGIC | 0x00000, + OST_SZ_REC = LLOG_OP_MAGIC | 0x00f00, + OST_RAID1_REC = LLOG_OP_MAGIC | 0x01000, + MDS_UNLINK_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_UNLINK, + MDS_SETATTR_REC = LLOG_OP_MAGIC | 0x10000 | (MDS_REINT << 8) | REINT_SETATTR, + MDS_SETATTR64_REC = LLOG_OP_MAGIC | 0x90000 | (MDS_REINT << 8) | REINT_SETATTR, + OBD_CFG_REC = LLOG_OP_MAGIC | 0x20000, + PTL_CFG_REC = LLOG_OP_MAGIC | 0x30000, /* obsolete */ + LLOG_GEN_REC = LLOG_OP_MAGIC | 0x40000, + LLOG_JOIN_REC = LLOG_OP_MAGIC | 0x50000, + CHANGELOG_REC = LLOG_OP_MAGIC | 0x60000, + CHANGELOG_USER_REC = LLOG_OP_MAGIC | 0x70000, + LLOG_HDR_MAGIC = LLOG_OP_MAGIC | 0x45539, + LLOG_LOGID_MAGIC = LLOG_OP_MAGIC | 0x4553b, } llog_op_type; /* @@ -2336,18 +2331,33 @@ enum changelog_rec_type { CL_LAST }; +/** Changelog entry type names. Must be defined in the same order as the + * \a changelog_rec_type enum. + */ +#define DECLARE_CHANGELOG_NAMES static const char *changelog_str[] = \ + {"MARK","CREAT","MKDIR","HLINK","SLINK","MKNOD","UNLNK","RMDIR", \ + "RNMFM","RNMTO","OPEN","CLOSE","IOCTL","TRUNC","SATTR","XATTR"} + /** \a changelog_rec_type's that can't be masked */ -#define CL_MINMASK (1 << CL_MARK) +#define CHANGELOG_MINMASK (1 << CL_MARK) /** bits covering all \a changelog_rec_type's */ -#define CL_ALLMASK 0XFFFF +#define CHANGELOG_ALLMASK 0XFFFF /** default \a changelog_rec_type mask */ -#define CL_DEFMASK CL_ALLMASK +#define CHANGELOG_DEFMASK CHANGELOG_ALLMASK /* per-record flags */ #define CLF_VERSION 0x1000 #define CLF_FLAGMASK 0x0FFF #define CLF_HSM 0x0001 +/* changelog llog name, needed by client replicators */ +#define CHANGELOG_CATALOG "changelog_catalog" + +struct changelog_setinfo { + __u64 cs_recno; + __u32 cs_id; +}; + /** changelog record */ struct llog_changelog_rec { struct llog_rec_hdr cr_hdr; @@ -2368,6 +2378,16 @@ struct llog_changelog_rec { }; } __attribute__((packed)); +#define CHANGELOG_USER_PREFIX "cl" + +struct llog_changelog_user_rec { + struct llog_rec_hdr cur_hdr; + __u32 cur_id; + __u32 cur_padding; + __u64 cur_endrec; + struct llog_rec_tail cur_tail; +} __attribute__((packed)); + struct llog_gen { __u64 mnt_cnt; __u64 conn_cnt; diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index a2e80b9..b8fc609 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -57,7 +57,7 @@ #define LOV_OBJID "lov_objid" #define HEALTH_CHECK "health_check" #define CAPA_KEYS "capa_keys" -#define CHANGELOG_CATALOG "changelog_catalog" +#define CHANGELOG_USERS "changelog_users" /****************** persistent mount data *********************/ diff --git a/lustre/include/lustre_lib.h b/lustre/include/lustre_lib.h index a058fda..c6c9bc6 100644 --- a/lustre/include/lustre_lib.h +++ b/lustre/include/lustre_lib.h @@ -139,9 +139,18 @@ struct obd_ioctl_data { __u32 ioc_len; __u32 ioc_version; - __u64 ioc_cookie; - __u32 ioc_conn1; - __u32 ioc_conn2; + union { + __u64 ioc_cookie; + __u64 ioc_u64_1; + }; + union { + __u32 ioc_conn1; + __u32 ioc_u32_1; + }; + union { + __u32 ioc_conn2; + __u32 ioc_u32_2; + }; struct obdo ioc_obdo1; struct obdo ioc_obdo2; @@ -478,6 +487,9 @@ static inline void obd_ioctl_freedata(char *buf, int len) #define OBD_IOC_GETDEVICE _IOWR ('f', 149, OBD_IOC_DATA_TYPE) #define OBD_IOC_FID2PATH _IOWR ('f', 150, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_REG _IOW ('f', 151, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_DEREG _IOW ('f', 152, OBD_IOC_DATA_TYPE) +#define OBD_IOC_CHANGELOG_CLEAR _IOW ('f', 153, OBD_IOC_DATA_TYPE) #define OBD_IOC_LOV_SETSTRIPE _IOW ('f', 154, OBD_IOC_DATA_TYPE) #define OBD_IOC_LOV_GETSTRIPE _IOW ('f', 155, OBD_IOC_DATA_TYPE) diff --git a/lustre/include/lustre_log.h b/lustre/include/lustre_log.h index 167b366..34ca742 100644 --- a/lustre/include/lustre_log.h +++ b/lustre/include/lustre_log.h @@ -509,6 +509,7 @@ static inline int llog_write_rec(struct llog_handle *handle, if (lop->lop_write_rec == NULL) RETURN(-EOPNOTSUPP); + /* FIXME: Why doesn't caller just set the right lrh_len itself? */ if (buf) buflen = rec->lrh_len + sizeof(struct llog_rec_hdr) + sizeof(struct llog_rec_tail); diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 67efebc..f0babd3 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1281,8 +1281,17 @@ int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid); int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid); /* ptlrpc/pinger.c */ +enum timeout_event { + TIMEOUT_GRANT = 1 +}; +struct timeout_item; +typedef int (*timeout_cb_t)(struct timeout_item *, void *); int ptlrpc_pinger_add_import(struct obd_import *imp); int ptlrpc_pinger_del_import(struct obd_import *imp); +int ptlrpc_add_timeout_client(int time, enum timeout_event event, + timeout_cb_t cb, void *data, + struct list_head *obd_list); +int ptlrpc_del_timeout_client(struct list_head *obd_list); struct ptlrpc_request * ptlrpc_prep_ping(struct obd_import *imp); int ptlrpc_obd_ping(struct obd_device *obd); cfs_time_t ptlrpc_suspend_wakeup_time(void); diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h index 4f0c777..e08d367c 100644 --- a/lustre/include/lustre_req_layout.h +++ b/lustre/include/lustre_req_layout.h @@ -187,6 +187,7 @@ extern const struct req_format RQF_OST_DESTROY; extern const struct req_format RQF_OST_BRW; extern const struct req_format RQF_OST_STATFS; extern const struct req_format RQF_OST_SET_INFO; +extern const struct req_format RQF_OST_SET_GRANT_INFO; extern const struct req_format RQF_OST_GET_INFO_GENERIC; extern const struct req_format RQF_OST_GET_INFO_LAST_ID; extern const struct req_format RQF_OST_GET_INFO_FIEMAP; @@ -226,6 +227,7 @@ extern const struct req_msg_field RMF_TGTUUID; extern const struct req_msg_field RMF_CLUUID; extern const struct req_msg_field RMF_SETINFO_VAL; extern const struct req_msg_field RMF_SETINFO_KEY; + /* * connection handle received in MDS_CONNECT request. */ diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index 985550b..8832552 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -248,7 +248,7 @@ struct md_object_operations { int (*moo_object_sync)(const struct lu_env *, struct md_object *); int (*moo_path)(const struct lu_env *env, struct md_object *obj, - char *path, int pathlen, __u64 recno, int *linkno); + char *path, int pathlen, __u64 *recno, int *linkno); }; /** @@ -326,6 +326,12 @@ struct md_device_operations { struct md_device *m, struct lustre_capa_key *key); + int (*mdo_llog_ctxt_get)(const struct lu_env *env, + struct md_device *m, int idx, void **h); + + int (*mdo_iocontrol)(const struct lu_env *env, struct md_device *m, + unsigned int cmd, int len, void *data); + #ifdef HAVE_QUOTA_SUPPORT struct md_quota_operations { int (*mqo_notify)(const struct lu_env *env, @@ -685,9 +691,10 @@ static inline int mo_capa_get(const struct lu_env *env, } static inline int mo_path(const struct lu_env *env, struct md_object *m, - char *path, int pathlen, __u64 recno, int *linkno) + char *path, int pathlen, __u64 *recno, int *linkno) { - LASSERT(m->mo_ops->moo_path); + if (m->mo_ops->moo_path == NULL) + return -ENOSYS; return m->mo_ops->moo_path(env, m, path, pathlen, recno, linkno); } diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 9cfaae8..7c3e985 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -266,7 +266,9 @@ enum llog_ctxt_id { LLOG_TEST_REPL_CTXT, LLOG_LOVEA_ORIG_CTXT, LLOG_LOVEA_REPL_CTXT, - LLOG_CHANGELOG_ORIG_CTXT, /**< changelog context */ + LLOG_CHANGELOG_ORIG_CTXT, /**< changelog generation on mdd */ + LLOG_CHANGELOG_REPL_CTXT, /**< changelog access on clients */ + LLOG_CHANGELOG_USER_ORIG_CTXT, /**< for multiple changelog consumers */ LLOG_MAX_CTXTS }; @@ -314,6 +316,7 @@ struct filter_obd { obd_size fo_tot_dirty; /* protected by obd_osfs_lock */ obd_size fo_tot_granted; /* all values in bytes */ obd_size fo_tot_pending; + int fo_tot_granted_clients; obd_size fo_readcache_max_filesize; int fo_read_cache; @@ -369,6 +372,14 @@ struct filter_obd { int fo_sec_level; }; +struct timeout_item { + enum timeout_event ti_event; + cfs_time_t ti_timeout; + timeout_cb_t ti_cb; + void *ti_cb_data; + struct list_head ti_obd_list; + struct list_head ti_chain; +}; #define OSC_MAX_RIF_DEFAULT 8 #define OSC_MAX_RIF_MAX 256 #define OSC_MAX_DIRTY_DEFAULT (OSC_MAX_RIF_DEFAULT * 4) @@ -405,6 +416,9 @@ struct client_obd { long cl_avail_grant; /* bytes of credit for ost */ long cl_lost_grant; /* lost credits (trunc) */ struct list_head cl_cache_waiters; /* waiting for cache/grant */ + cfs_time_t cl_next_shrink_grant; /* jiffies */ + struct list_head cl_grant_shrink_list; /* Timeout event list */ + struct semaphore cl_grant_sem; /*grant shrink list semaphore*/ /* keep track of objects that have lois that contain pages which * have been queued for async brw. this lock also protects the @@ -1095,35 +1109,36 @@ enum obd_cleanup_stage { }; /* get/set_info keys */ -#define KEY_READ_ONLY "read-only" -#define KEY_MDS_CONN "mds_conn" -#define KEY_NEXT_ID "next_id" -#define KEY_LOVDESC "lovdesc" -#define KEY_INIT_RECOV "initial_recov" -#define KEY_INIT_RECOV_BACKUP "init_recov_bk" -#define KEY_FLUSH_CTX "flush_ctx" +#define KEY_BLOCKSIZE_BITS "blocksize_bits" +#define KEY_BLOCKSIZE "blocksize" #define KEY_CAPA_KEY "capa_key" +#define KEY_CHANGELOG_CLEAR "changelog_clear" +#define KEY_CHECKSUM "checksum" +#define KEY_CLEAR_FS "clear_fs" #define KEY_CONN_DATA "conn_data" -#define KEY_MAX_EASIZE "max_easize" -#define KEY_REVIMP_UPD "revimp_update" -#define KEY_LOV_IDX "lov_idx" +#define KEY_EVICT_BY_NID "evict_by_nid" +#define KEY_FIEMAP "fiemap" +#define KEY_FLUSH_CTX "flush_ctx" +#define KEY_INIT_RECOV_BACKUP "init_recov_bk" +#define KEY_INIT_RECOV "initial_recov" #define KEY_LAST_ID "last_id" -#define KEY_READONLY "read-only" #define KEY_LOCK_TO_STRIPE "lock_to_stripe" -#define KEY_CHECKSUM "checksum" -#define KEY_UNLINKED "unlinked" -#define KEY_EVICT_BY_NID "evict_by_nid" +#define KEY_LOVDESC "lovdesc" +#define KEY_LOV_IDX "lov_idx" +#define KEY_MAX_EASIZE "max_easize" +#define KEY_MDS_CONN "mds_conn" +#define KEY_MGSSEC "mgssec" +#define KEY_NEXT_ID "next_id" +#define KEY_READ_ONLY "read-only" #define KEY_REGISTER_TARGET "register_target" +#define KEY_REVIMP_UPD "revimp_update" #define KEY_SET_FS "set_fs" -#define KEY_CLEAR_FS "clear_fs" -#define KEY_BLOCKSIZE "blocksize" -#define KEY_BLOCKSIZE_BITS "blocksize_bits" -#define KEY_FIEMAP "fiemap" #define KEY_SPTLRPC_CONF "sptlrpc_conf" -#define KEY_MGSSEC "mgssec" +#define KEY_UNLINKED "unlinked" /* XXX unused ?*/ #define KEY_INTERMDS "inter_mds" #define KEY_ASYNC "async" +#define KEY_GRANT_SHRINK "grant_shrink" struct lu_context; diff --git a/lustre/include/obd_ost.h b/lustre/include/obd_ost.h index 8ddb969..81a3209 100644 --- a/lustre/include/obd_ost.h +++ b/lustre/include/obd_ost.h @@ -58,6 +58,7 @@ struct osc_brw_async_args { struct cl_req *aa_clerq; }; +#define osc_grant_args osc_brw_async_args struct osc_async_args { struct obd_info *aa_oi; }; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ff1b2d5..7096eaa 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -125,6 +125,12 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #endif #define LONG_UNLINK 300 /* Unlink should happen before now */ +/** + * Time interval of shrink, if the client is "idle" more than this interval, + * then the ll_grant thread will return the requested grant space to filter + */ +#define GRANT_SHRINK_INTERVAL 360/*6 minutes*/ + #define OBD_FAIL_MDS 0x100 #define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 @@ -335,6 +341,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MGS_PAUSE_TARGET_REG 0x905 #define OBD_FAIL_QUOTA_RET_QDATA 0xA02 +#define OBD_FAIL_QUOTA_DELAY_REL 0xA03 #define OBD_FAIL_LPROC_REMOVE 0xB00 @@ -352,6 +359,18 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_SEC_CTX_FINI_NET 0x1203 #define OBD_FAIL_SEC_CTX_HDL_PAUSE 0x1204 +#define OBD_FAIL_LLOG 0x1300 +#define OBD_FAIL_LLOG_ORIGIN_CONNECT_NET 0x1301 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CREATE_NET 0x1302 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_DESTROY_NET 0x1303 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_READ_HEADER_NET 0x1304 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_NEXT_BLOCK_NET 0x1305 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_PREV_BLOCK_NET 0x1306 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_WRITE_REC_NET 0x1307 +#define OBD_FAIL_LLOG_ORIGIN_HANDLE_CLOSE_NET 0x1308 +#define OBD_FAIL_LLOG_CATINFO_NET 0x1309 + + /* Failure injection control */ #define OBD_FAIL_MASK_SYS 0x0000FF00 #define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686-smp.config index ceb6da3..11f535b 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686-smp.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Fri Jun 27 01:40:54 2008 +# Thu Jan 22 12:00:56 2009 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y @@ -55,10 +55,14 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_TRACEPROBES=m CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -166,6 +170,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set CONFIG_PREEMPT_BKL=y +CONFIG_PREEMPT_NOTIFIERS=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_MCE=y @@ -263,6 +268,11 @@ CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y CONFIG_ACPI_SBS=m +CONFIG_THINKPAD_ACPI=m +# CONFIG_THINKPAD_ACPI_DEBUG is not set +CONFIG_THINKPAD_ACPI_BAY=y +CONFIG_THINKPAD_ACPI_VIDEO=y +CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y # # APM (Advanced Power Management) BIOS Support @@ -331,9 +341,13 @@ CONFIG_PCI_GOANY=y CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_VIRTIO=m +CONFIG_VIRTIO_RING=m +CONFIG_VIRTIO_PCI=m CONFIG_PCIEPORTBUS=y CONFIG_HOTPLUG_PCI_PCIE=m # CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set +CONFIG_PCI_DOMAINS=y CONFIG_PCI_MSI=y # CONFIG_PCI_DEBUG is not set CONFIG_ISA_DMA_API=y @@ -445,6 +459,7 @@ CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m +CONFIG_INET_LRO=y # # IP: Virtual Server Configuration @@ -805,9 +820,26 @@ CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y CONFIG_NET_WIRELESS_RTNETLINK=y CONFIG_MAC80211=m -CONFIG_MAC80211_RCSIMPLE=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set CONFIG_MAC80211_DEBUG=y # CONFIG_MAC80211_HT_DEBUG is not set # CONFIG_MAC80211_VERBOSE_DEBUG is not set @@ -1029,6 +1061,7 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=m # # ATA/ATAPI/MFM/RLL support @@ -1105,6 +1138,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -1141,6 +1175,8 @@ CONFIG_SCSI_SAS_LIBSAS=m # # SCSI low-level drivers # +CONFIG_LIBFC=m +CONFIG_FCOE=m # CONFIG_ISCSI_TCP is not set CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_3W_9XXX=m @@ -1207,10 +1243,15 @@ CONFIG_PCMCIA_FDOMAIN=m # CONFIG_PCMCIA_NINJA_SCSI is not set # CONFIG_PCMCIA_QLOGIC is not set # CONFIG_PCMCIA_SYM53C500 is not set +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m CONFIG_ATA=m # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y CONFIG_SATA_SVW=m CONFIG_ATA_PIIX=m CONFIG_SATA_MV=m @@ -1220,7 +1261,6 @@ CONFIG_SATA_QSTOR=m CONFIG_SATA_PROMISE=m CONFIG_SATA_SX4=m CONFIG_SATA_SIL=m -CONFIG_SATA_SIL24=m CONFIG_SATA_SIS=m CONFIG_SATA_ULI=m CONFIG_SATA_VIA=m @@ -1252,6 +1292,7 @@ CONFIG_PATA_MARVELL=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set CONFIG_PATA_NS87415=m # CONFIG_PATA_OPTI is not set @@ -1267,6 +1308,7 @@ CONFIG_PATA_PDC2027X=m CONFIG_PATA_SIS=m # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_SCH is not set CONFIG_ATA_INTEL_COMBINED=y # @@ -1291,6 +1333,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_MULTIPATH_HP=m +CONFIG_DM_RAID45=m # CONFIG_DM_UEVENT is not set # @@ -1304,8 +1347,13 @@ CONFIG_FUSION_MAX_SGE=40 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m # CONFIG_FUSION_LOGGING is not set + +# +# Enable only one of the two stacks, unless you know what you are doing +# CONFIG_FIREWIRE=m CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_OHCI_DEBUG=y CONFIG_FIREWIRE_SBP2=m # @@ -1435,7 +1483,6 @@ CONFIG_NS83820=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_R8169=m -CONFIG_R8169_NAPI=y CONFIG_R8169_VLAN=y CONFIG_SIS190=m CONFIG_SKGE=m @@ -1450,16 +1497,19 @@ CONFIG_QLA3XXX=m # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m -# CONFIG_CHELSIO_T3 is not set +CONFIG_CHELSIO_T3=m CONFIG_IXGBE=m CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m +CONFIG_ENIC=m CONFIG_NETXEN_NIC=m +CONFIG_NIU=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -1522,6 +1572,8 @@ CONFIG_PCMCIA_WL3501=m # CONFIG_PRISM54=m CONFIG_USB_ZD1201=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_HOSTAP=m CONFIG_HOSTAP_FIRMWARE=y CONFIG_HOSTAP_FIRMWARE_NVRAM=y @@ -1538,12 +1590,38 @@ CONFIG_BCM43XX_DMA_AND_PIO_MODE=y CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_NET_WIRELESS=y -CONFIG_IWL4965=m -# CONFIG_IWL4965_QOS is not set -# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set -# CONFIG_IWL4965_SENSITIVITY is not set -# CONFIG_IWL4965_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWLWIFI_RFKILL is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLAGN_LEDS is not set +CONFIG_IWL4965=y +CONFIG_IWL5000=y # CONFIG_IWL3945 is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2400PCI=m +# CONFIG_RT2400PCI_RFKILL is not set +# CONFIG_RT2400PCI_LEDS is not set +CONFIG_RT2500PCI=m +# CONFIG_RT2500PCI_RFKILL is not set +# CONFIG_RT2500PCI_LEDS is not set +CONFIG_RT61PCI=m +# CONFIG_RT61PCI_RFKILL is not set +# CONFIG_RT61PCI_LEDS is not set +CONFIG_RT2500USB=m +# CONFIG_RT2500USB_LEDS is not set +CONFIG_RT73USB=m +# CONFIG_RT73USB_LEDS is not set +# CONFIG_RT2X00_DEBUG is not set # # PCMCIA network device support @@ -1617,6 +1695,7 @@ CONFIG_NETPOLL=y # CONFIG_NETPOLL_RX is not set CONFIG_NETPOLL_TRAP=y CONFIG_NET_POLL_CONTROLLER=y +CONFIG_VIRTIO_NET=m # # ISDN subsystem @@ -2121,6 +2200,8 @@ CONFIG_SENSORS_HDAPS=m # Misc devices # CONFIG_IBM_ASM=m +CONFIG_EEPROM_93CX6=m +CONFIG_HP_ILO=m # # Multimedia devices @@ -2177,6 +2258,7 @@ CONFIG_VIDEO_UPD64083=m # # V4L USB devices # +CONFIG_USB_VIDEO_CLASS=m CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_24XXX=y CONFIG_VIDEO_PVRUSB2_SYSFS=y @@ -2237,6 +2319,7 @@ CONFIG_FB_CIRRUS=m # CONFIG_FB_IMSTT is not set CONFIG_FB_VGA16=m CONFIG_FB_VESA=y +# CONFIG_FB_EFI is not set # CONFIG_FB_IMAC is not set # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set @@ -2321,6 +2404,7 @@ CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y # # Generic devices @@ -2379,6 +2463,8 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2661,7 +2747,32 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -2742,8 +2853,14 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4DEV_FS=m +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -2869,6 +2986,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_CIFS_UPCALL=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -2985,6 +3103,9 @@ CONFIG_DEBUG_LIST=y # CONFIG_FORCED_INLINING is not set CONFIG_BOOT_DELAY=y # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_MARKERS=m +CONFIG_SAMPLE_TRACEPOINTS=m CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y @@ -3024,11 +3145,13 @@ CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT=y # CONFIG_CRYPTO=y CONFIG_CRYPTO_API=m +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=m CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NHMAC=m @@ -3067,16 +3190,29 @@ CONFIG_CRYPTO_SIGNATURE_DSA=y CONFIG_CRYPTO_MPILIB=y # +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m + +# # Hardware crypto devices # CONFIG_CRYPTO_DEV_PADLOCK=m CONFIG_CRYPTO_DEV_PADLOCK_AES=y +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m + +# +# Xen PV-ON-HVM Configuration +# +CONFIG_XEN_PV_ON_HVM=y # # Library routines # CONFIG_CRC_CCITT=m CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_AUDIT_GENERIC=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686.config index 903bc6c..94d8693 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-i686.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Fri Jun 27 01:41:47 2008 +# Thu Jan 22 12:06:24 2009 # CONFIG_X86_32=y CONFIG_GENERIC_TIME=y @@ -54,10 +54,14 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_TRACEPROBES=m CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -159,6 +163,7 @@ CONFIG_TICK_DIVIDER=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set +CONFIG_PREEMPT_NOTIFIERS=y # CONFIG_X86_UP_APIC is not set CONFIG_X86_MCE=y # CONFIG_X86_MCE_NONFATAL is not set @@ -253,6 +258,11 @@ CONFIG_ACPI_SYSTEM=y CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y CONFIG_ACPI_SBS=m +CONFIG_THINKPAD_ACPI=m +# CONFIG_THINKPAD_ACPI_DEBUG is not set +CONFIG_THINKPAD_ACPI_BAY=y +CONFIG_THINKPAD_ACPI_VIDEO=y +CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y # # APM (Advanced Power Management) BIOS Support @@ -321,9 +331,13 @@ CONFIG_PCI_GOANY=y CONFIG_PCI_BIOS=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_VIRTIO=m +CONFIG_VIRTIO_RING=m +CONFIG_VIRTIO_PCI=m CONFIG_PCIEPORTBUS=y CONFIG_HOTPLUG_PCI_PCIE=m # CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set +CONFIG_PCI_DOMAINS=y # CONFIG_PCI_DEBUG is not set CONFIG_ISA_DMA_API=y # CONFIG_ISA is not set @@ -433,6 +447,7 @@ CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m +CONFIG_INET_LRO=y # # IP: Virtual Server Configuration @@ -793,9 +808,26 @@ CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y CONFIG_NET_WIRELESS_RTNETLINK=y CONFIG_MAC80211=m -CONFIG_MAC80211_RCSIMPLE=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set CONFIG_MAC80211_DEBUG=y # CONFIG_MAC80211_HT_DEBUG is not set # CONFIG_MAC80211_VERBOSE_DEBUG is not set @@ -1017,6 +1049,7 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=m # # ATA/ATAPI/MFM/RLL support @@ -1093,6 +1126,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -1129,6 +1163,8 @@ CONFIG_SCSI_SAS_LIBSAS=m # # SCSI low-level drivers # +CONFIG_LIBFC=m +CONFIG_FCOE=m # CONFIG_ISCSI_TCP is not set CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_3W_9XXX=m @@ -1195,10 +1231,15 @@ CONFIG_PCMCIA_FDOMAIN=m # CONFIG_PCMCIA_NINJA_SCSI is not set # CONFIG_PCMCIA_QLOGIC is not set # CONFIG_PCMCIA_SYM53C500 is not set +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m CONFIG_ATA=m # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y CONFIG_SATA_SVW=m CONFIG_ATA_PIIX=m CONFIG_SATA_MV=m @@ -1208,7 +1249,6 @@ CONFIG_SATA_QSTOR=m CONFIG_SATA_PROMISE=m CONFIG_SATA_SX4=m CONFIG_SATA_SIL=m -CONFIG_SATA_SIL24=m CONFIG_SATA_SIS=m CONFIG_SATA_ULI=m CONFIG_SATA_VIA=m @@ -1240,6 +1280,7 @@ CONFIG_PATA_MARVELL=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set CONFIG_PATA_NS87415=m # CONFIG_PATA_OPTI is not set @@ -1255,6 +1296,7 @@ CONFIG_PATA_PDC2027X=m CONFIG_PATA_SIS=m # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_SCH is not set CONFIG_ATA_INTEL_COMBINED=y # @@ -1279,6 +1321,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_MULTIPATH_HP=m +CONFIG_DM_RAID45=m # CONFIG_DM_UEVENT is not set # @@ -1292,8 +1335,13 @@ CONFIG_FUSION_MAX_SGE=40 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m # CONFIG_FUSION_LOGGING is not set + +# +# Enable only one of the two stacks, unless you know what you are doing +# CONFIG_FIREWIRE=m CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_OHCI_DEBUG=y CONFIG_FIREWIRE_SBP2=m # @@ -1424,7 +1472,6 @@ CONFIG_NS83820=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_R8169=m -CONFIG_R8169_NAPI=y CONFIG_R8169_VLAN=y CONFIG_SIS190=m CONFIG_SKGE=m @@ -1439,16 +1486,19 @@ CONFIG_QLA3XXX=m # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m -# CONFIG_CHELSIO_T3 is not set +CONFIG_CHELSIO_T3=m CONFIG_IXGBE=m CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m +CONFIG_ENIC=m CONFIG_NETXEN_NIC=m +CONFIG_NIU=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -1511,6 +1561,8 @@ CONFIG_PCMCIA_WL3501=m # CONFIG_PRISM54=m CONFIG_USB_ZD1201=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_HOSTAP=m CONFIG_HOSTAP_FIRMWARE=y CONFIG_HOSTAP_FIRMWARE_NVRAM=y @@ -1527,12 +1579,38 @@ CONFIG_BCM43XX_DMA_AND_PIO_MODE=y CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_NET_WIRELESS=y -CONFIG_IWL4965=m -# CONFIG_IWL4965_QOS is not set -# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set -# CONFIG_IWL4965_SENSITIVITY is not set -# CONFIG_IWL4965_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWLWIFI_RFKILL is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLAGN_LEDS is not set +CONFIG_IWL4965=y +CONFIG_IWL5000=y # CONFIG_IWL3945 is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2400PCI=m +# CONFIG_RT2400PCI_RFKILL is not set +# CONFIG_RT2400PCI_LEDS is not set +CONFIG_RT2500PCI=m +# CONFIG_RT2500PCI_RFKILL is not set +# CONFIG_RT2500PCI_LEDS is not set +CONFIG_RT61PCI=m +# CONFIG_RT61PCI_RFKILL is not set +# CONFIG_RT61PCI_LEDS is not set +CONFIG_RT2500USB=m +# CONFIG_RT2500USB_LEDS is not set +CONFIG_RT73USB=m +# CONFIG_RT73USB_LEDS is not set +# CONFIG_RT2X00_DEBUG is not set # # PCMCIA network device support @@ -1606,6 +1684,7 @@ CONFIG_NETPOLL=y # CONFIG_NETPOLL_RX is not set CONFIG_NETPOLL_TRAP=y CONFIG_NET_POLL_CONTROLLER=y +CONFIG_VIRTIO_NET=m # # ISDN subsystem @@ -2114,6 +2193,8 @@ CONFIG_SENSORS_HDAPS=m # Misc devices # CONFIG_IBM_ASM=m +CONFIG_EEPROM_93CX6=m +CONFIG_HP_ILO=m # # Multimedia devices @@ -2170,6 +2251,7 @@ CONFIG_VIDEO_UPD64083=m # # V4L USB devices # +CONFIG_USB_VIDEO_CLASS=m CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_24XXX=y CONFIG_VIDEO_PVRUSB2_SYSFS=y @@ -2230,6 +2312,7 @@ CONFIG_FB_CIRRUS=m # CONFIG_FB_IMSTT is not set CONFIG_FB_VGA16=m CONFIG_FB_VESA=y +# CONFIG_FB_EFI is not set # CONFIG_FB_IMAC is not set # CONFIG_FB_HGA is not set # CONFIG_FB_S1D13XXX is not set @@ -2314,6 +2397,7 @@ CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y # # Generic devices @@ -2372,6 +2456,8 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2654,7 +2740,32 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -2735,8 +2846,14 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4DEV_FS=m +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -2862,6 +2979,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_CIFS_UPCALL=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -2978,6 +3096,9 @@ CONFIG_DEBUG_LIST=y # CONFIG_FORCED_INLINING is not set CONFIG_BOOT_DELAY=y # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_MARKERS=m +CONFIG_SAMPLE_TRACEPOINTS=m CONFIG_EARLY_PRINTK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y @@ -3015,11 +3136,13 @@ CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT=y # CONFIG_CRYPTO=y CONFIG_CRYPTO_API=m +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=m CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NHMAC=m @@ -3058,16 +3181,29 @@ CONFIG_CRYPTO_SIGNATURE_DSA=y CONFIG_CRYPTO_MPILIB=y # +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m + +# # Hardware crypto devices # CONFIG_CRYPTO_DEV_PADLOCK=m CONFIG_CRYPTO_DEV_PADLOCK_AES=y +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m + +# +# Xen PV-ON-HVM Configuration +# +CONFIG_XEN_PV_ON_HVM=y # # Library routines # CONFIG_CRC_CCITT=m CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_AUDIT_GENERIC=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64-smp.config index 125e389..8a6643e 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64-smp.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Fri Jun 27 01:42:44 2008 +# Thu Jan 22 12:02:35 2009 # CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -42,10 +42,14 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_TRACEPROBES=m CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -337,6 +341,7 @@ CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m +CONFIG_INET_LRO=y # # IP: Virtual Server Configuration @@ -697,9 +702,26 @@ CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y CONFIG_NET_WIRELESS_RTNETLINK=y CONFIG_MAC80211=m -CONFIG_MAC80211_RCSIMPLE=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set CONFIG_MAC80211_DEBUG=y # CONFIG_MAC80211_HT_DEBUG is not set # CONFIG_MAC80211_VERBOSE_DEBUG is not set @@ -890,6 +912,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -926,6 +949,8 @@ CONFIG_SCSI_SAS_LIBSAS=m # # SCSI low-level drivers # +CONFIG_LIBFC=m +CONFIG_FCOE=m # CONFIG_ISCSI_TCP is not set CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_3W_9XXX=m @@ -984,10 +1009,15 @@ CONFIG_SCSI_DC395x=m # CONFIG_PCMCIA_FDOMAIN is not set # CONFIG_PCMCIA_QLOGIC is not set # CONFIG_PCMCIA_SYM53C500 is not set +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m CONFIG_ATA=m # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y CONFIG_SATA_SVW=m CONFIG_ATA_PIIX=m CONFIG_SATA_MV=m @@ -997,7 +1027,6 @@ CONFIG_SATA_QSTOR=m CONFIG_SATA_PROMISE=m CONFIG_SATA_SX4=m CONFIG_SATA_SIL=m -CONFIG_SATA_SIL24=m CONFIG_SATA_SIS=m CONFIG_SATA_ULI=m CONFIG_SATA_VIA=m @@ -1027,6 +1056,7 @@ CONFIG_PATA_MARVELL=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set CONFIG_PATA_NS87415=m # CONFIG_PATA_OPTI is not set @@ -1042,6 +1072,7 @@ CONFIG_PATA_PDC2027X=m CONFIG_PATA_SIS=m # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_SCH is not set CONFIG_ATA_INTEL_COMBINED=y # @@ -1066,6 +1097,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_MULTIPATH_HP=m +CONFIG_DM_RAID45=m # CONFIG_DM_UEVENT is not set # @@ -1079,8 +1111,13 @@ CONFIG_FUSION_MAX_SGE=40 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m # CONFIG_FUSION_LOGGING is not set + +# +# Enable only one of the two stacks, unless you know what you are doing +# CONFIG_FIREWIRE=m CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_OHCI_DEBUG=y CONFIG_FIREWIRE_SBP2=m # @@ -1200,7 +1237,6 @@ CONFIG_NS83820=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_R8169=m -CONFIG_R8169_NAPI=y CONFIG_R8169_VLAN=y CONFIG_SIS190=m CONFIG_SKGE=m @@ -1215,16 +1251,19 @@ CONFIG_QLA3XXX=m # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m -# CONFIG_CHELSIO_T3 is not set +CONFIG_CHELSIO_T3=m CONFIG_IXGBE=m CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m +CONFIG_ENIC=m CONFIG_NETXEN_NIC=m +CONFIG_NIU=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -1278,6 +1317,8 @@ CONFIG_PCMCIA_WL3501=m # CONFIG_PRISM54=m CONFIG_USB_ZD1201=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_HOSTAP=m CONFIG_HOSTAP_FIRMWARE=y CONFIG_HOSTAP_FIRMWARE_NVRAM=y @@ -1288,12 +1329,38 @@ CONFIG_HOSTAP_CS=m CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_NET_WIRELESS=y -CONFIG_IWL4965=m -# CONFIG_IWL4965_QOS is not set -# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set -# CONFIG_IWL4965_SENSITIVITY is not set -# CONFIG_IWL4965_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWLWIFI_RFKILL is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLAGN_LEDS is not set +CONFIG_IWL4965=y +CONFIG_IWL5000=y # CONFIG_IWL3945 is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2400PCI=m +# CONFIG_RT2400PCI_RFKILL is not set +# CONFIG_RT2400PCI_LEDS is not set +CONFIG_RT2500PCI=m +# CONFIG_RT2500PCI_RFKILL is not set +# CONFIG_RT2500PCI_LEDS is not set +CONFIG_RT61PCI=m +# CONFIG_RT61PCI_RFKILL is not set +# CONFIG_RT61PCI_LEDS is not set +CONFIG_RT2500USB=m +# CONFIG_RT2500USB_LEDS is not set +CONFIG_RT73USB=m +# CONFIG_RT73USB_LEDS is not set +# CONFIG_RT2X00_DEBUG is not set # # PCMCIA network device support @@ -1817,6 +1884,8 @@ CONFIG_SENSORS_W83627EHF=m # # Misc devices # +CONFIG_EEPROM_93CX6=m +CONFIG_HP_ILO=m # # Multimedia devices @@ -1872,6 +1941,7 @@ CONFIG_VIDEO_UPD64083=m # # V4L USB devices # +CONFIG_USB_VIDEO_CLASS=m CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_24XXX=y CONFIG_VIDEO_PVRUSB2_SYSFS=y @@ -2001,6 +2071,7 @@ CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y # # Generic devices @@ -2057,6 +2128,8 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2337,7 +2410,33 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPATH=m +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -2401,8 +2500,14 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4DEV_FS=m +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -2518,6 +2623,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_CIFS_UPCALL=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -2599,6 +2705,7 @@ CONFIG_DLM_DEBUG=y # CONFIG_CRC_CCITT=m CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_ZLIB_INFLATE=y @@ -2657,6 +2764,9 @@ CONFIG_DEBUG_LIST=y # CONFIG_FORCED_INLINING is not set CONFIG_BOOT_DELAY=y # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_MARKERS=m +CONFIG_SAMPLE_TRACEPOINTS=m CONFIG_IA64_GRANULE_16MB=y # CONFIG_IA64_GRANULE_64MB is not set # CONFIG_IA64_PRINT_HAZARDS is not set @@ -2690,11 +2800,13 @@ CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT=y # CONFIG_CRYPTO=y CONFIG_CRYPTO_API=m +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=m CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NHMAC=m @@ -2732,5 +2844,17 @@ CONFIG_CRYPTO_SIGNATURE_DSA=y CONFIG_CRYPTO_MPILIB=y # +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m + +# # Hardware crypto devices # +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m + +# +# Xen PV-ON-HVM Configuration +# +CONFIG_XEN_PV_ON_HVM=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64.config index 852a070..6f7ef09 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ia64.config @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit # Linux kernel version: 2.6.18-prep -# Fri Jun 27 01:43:25 2008 +# Thu Jan 22 12:07:20 2009 # CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -41,10 +41,14 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_TRACEPROBES=m CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -330,6 +334,7 @@ CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m +CONFIG_INET_LRO=y # # IP: Virtual Server Configuration @@ -690,9 +695,26 @@ CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y CONFIG_NET_WIRELESS_RTNETLINK=y CONFIG_MAC80211=m -CONFIG_MAC80211_RCSIMPLE=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set CONFIG_MAC80211_DEBUG=y # CONFIG_MAC80211_HT_DEBUG is not set # CONFIG_MAC80211_VERBOSE_DEBUG is not set @@ -883,6 +905,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -919,6 +942,8 @@ CONFIG_SCSI_SAS_LIBSAS=m # # SCSI low-level drivers # +CONFIG_LIBFC=m +CONFIG_FCOE=m # CONFIG_ISCSI_TCP is not set CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_3W_9XXX=m @@ -977,10 +1002,15 @@ CONFIG_SCSI_DC395x=m # CONFIG_PCMCIA_FDOMAIN is not set # CONFIG_PCMCIA_QLOGIC is not set # CONFIG_PCMCIA_SYM53C500 is not set +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m CONFIG_ATA=m # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y CONFIG_SATA_SVW=m CONFIG_ATA_PIIX=m CONFIG_SATA_MV=m @@ -990,7 +1020,6 @@ CONFIG_SATA_QSTOR=m CONFIG_SATA_PROMISE=m CONFIG_SATA_SX4=m CONFIG_SATA_SIL=m -CONFIG_SATA_SIL24=m CONFIG_SATA_SIS=m CONFIG_SATA_ULI=m CONFIG_SATA_VIA=m @@ -1020,6 +1049,7 @@ CONFIG_PATA_MARVELL=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set CONFIG_PATA_NS87415=m # CONFIG_PATA_OPTI is not set @@ -1035,6 +1065,7 @@ CONFIG_PATA_PDC2027X=m CONFIG_PATA_SIS=m # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_SCH is not set CONFIG_ATA_INTEL_COMBINED=y # @@ -1059,6 +1090,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_MULTIPATH_HP=m +CONFIG_DM_RAID45=m # CONFIG_DM_UEVENT is not set # @@ -1072,8 +1104,13 @@ CONFIG_FUSION_MAX_SGE=40 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m # CONFIG_FUSION_LOGGING is not set + +# +# Enable only one of the two stacks, unless you know what you are doing +# CONFIG_FIREWIRE=m CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_OHCI_DEBUG=y CONFIG_FIREWIRE_SBP2=m # @@ -1194,7 +1231,6 @@ CONFIG_NS83820=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_R8169=m -CONFIG_R8169_NAPI=y CONFIG_R8169_VLAN=y CONFIG_SIS190=m CONFIG_SKGE=m @@ -1209,16 +1245,19 @@ CONFIG_QLA3XXX=m # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m -# CONFIG_CHELSIO_T3 is not set +CONFIG_CHELSIO_T3=m CONFIG_IXGBE=m CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m +CONFIG_ENIC=m CONFIG_NETXEN_NIC=m +CONFIG_NIU=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -1272,6 +1311,8 @@ CONFIG_PCMCIA_WL3501=m # CONFIG_PRISM54=m CONFIG_USB_ZD1201=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_HOSTAP=m CONFIG_HOSTAP_FIRMWARE=y CONFIG_HOSTAP_FIRMWARE_NVRAM=y @@ -1282,12 +1323,38 @@ CONFIG_HOSTAP_CS=m CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_NET_WIRELESS=y -CONFIG_IWL4965=m -# CONFIG_IWL4965_QOS is not set -# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set -# CONFIG_IWL4965_SENSITIVITY is not set -# CONFIG_IWL4965_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWLWIFI_RFKILL is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLAGN_LEDS is not set +CONFIG_IWL4965=y +CONFIG_IWL5000=y # CONFIG_IWL3945 is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2400PCI=m +# CONFIG_RT2400PCI_RFKILL is not set +# CONFIG_RT2400PCI_LEDS is not set +CONFIG_RT2500PCI=m +# CONFIG_RT2500PCI_RFKILL is not set +# CONFIG_RT2500PCI_LEDS is not set +CONFIG_RT61PCI=m +# CONFIG_RT61PCI_RFKILL is not set +# CONFIG_RT61PCI_LEDS is not set +CONFIG_RT2500USB=m +# CONFIG_RT2500USB_LEDS is not set +CONFIG_RT73USB=m +# CONFIG_RT73USB_LEDS is not set +# CONFIG_RT2X00_DEBUG is not set # # PCMCIA network device support @@ -1814,6 +1881,8 @@ CONFIG_SENSORS_W83627EHF=m # # Misc devices # +CONFIG_EEPROM_93CX6=m +CONFIG_HP_ILO=m # # Multimedia devices @@ -1869,6 +1938,7 @@ CONFIG_VIDEO_UPD64083=m # # V4L USB devices # +CONFIG_USB_VIDEO_CLASS=m CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_24XXX=y CONFIG_VIDEO_PVRUSB2_SYSFS=y @@ -1998,6 +2068,7 @@ CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y # # Generic devices @@ -2054,6 +2125,8 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2334,7 +2407,33 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPATH=m +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -2398,8 +2497,14 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4DEV_FS=m +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -2515,6 +2620,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_CIFS_UPCALL=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -2596,6 +2702,7 @@ CONFIG_DLM_DEBUG=y # CONFIG_CRC_CCITT=m CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_ZLIB_INFLATE=y @@ -2653,6 +2760,9 @@ CONFIG_DEBUG_LIST=y # CONFIG_FORCED_INLINING is not set CONFIG_BOOT_DELAY=y # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_MARKERS=m +CONFIG_SAMPLE_TRACEPOINTS=m CONFIG_IA64_GRANULE_16MB=y # CONFIG_IA64_GRANULE_64MB is not set # CONFIG_IA64_PRINT_HAZARDS is not set @@ -2686,11 +2796,13 @@ CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT=y # CONFIG_CRYPTO=y CONFIG_CRYPTO_API=m +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=m CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NHMAC=m @@ -2728,5 +2840,17 @@ CONFIG_CRYPTO_SIGNATURE_DSA=y CONFIG_CRYPTO_MPILIB=y # +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m + +# # Hardware crypto devices # +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m + +# +# Xen PV-ON-HVM Configuration +# +CONFIG_XEN_PV_ON_HVM=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64-smp.config index d8a493c..34e56ed 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64-smp.config @@ -1405,7 +1405,8 @@ CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m CONFIG_NETXEN_NIC=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -2686,7 +2687,34 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPATH=m +CONFIG_INFINIBAND_EHCA=m +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64.config index d8a493c..34e56ed 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-ppc64.config @@ -1405,7 +1405,8 @@ CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m CONFIG_NETXEN_NIC=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -2686,7 +2687,34 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPATH=m +CONFIG_INFINIBAND_EHCA=m +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64-smp.config index 30fb307..df7872b 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64-smp.config @@ -60,10 +60,14 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_TRACEPROBES=m CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -126,6 +130,7 @@ CONFIG_MICROCODE=m CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_X86_HT=y +# CONFIG_EFI is not set CONFIG_X86_IO_APIC=y CONFIG_X86_LOCAL_APIC=y CONFIG_MTRR=y @@ -136,6 +141,7 @@ CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set CONFIG_PREEMPT_BKL=y +CONFIG_PREEMPT_NOTIFIERS=y CONFIG_NUMA=y CONFIG_K8_NUMA=y CONFIG_NODES_SHIFT=6 @@ -174,6 +180,8 @@ CONFIG_TICK_DIVIDER=y CONFIG_IOMMU=y CONFIG_CALGARY_IOMMU=y # CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT is not set +CONFIG_AMD_IOMMU=y +CONFIG_IOMMU_HELPER=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y @@ -222,8 +230,7 @@ CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=y CONFIG_ACPI_NUMA=y CONFIG_ACPI_ASUS=m -CONFIG_ACPI_IBM=m -CONFIG_ACPI_IBM_BAY=y +# CONFIG_ACPI_IBM is not set CONFIG_ACPI_TOSHIBA=m CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set @@ -234,12 +241,17 @@ CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y CONFIG_ACPI_HOTPLUG_MEMORY=m CONFIG_ACPI_SBS=m +CONFIG_THINKPAD_ACPI=m +# CONFIG_THINKPAD_ACPI_DEBUG is not set +CONFIG_THINKPAD_ACPI_BAY=y +CONFIG_THINKPAD_ACPI_VIDEO=y +CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y # # CPU Frequency scaling # CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y +CONFIG_CPU_FREQ_TABLE=m CONFIG_CPU_FREQ_DEBUG=y CONFIG_CPU_FREQ_STAT=m CONFIG_CPU_FREQ_STAT_DETAILS=y @@ -254,9 +266,9 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m # # CPUFreq processor drivers # -CONFIG_X86_POWERNOW_K8=y +CONFIG_X86_POWERNOW_K8=m CONFIG_X86_POWERNOW_K8_ACPI=y -CONFIG_X86_SPEEDSTEP_CENTRINO=y +CONFIG_X86_SPEEDSTEP_CENTRINO=m CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI=y CONFIG_X86_ACPI_CPUFREQ=m @@ -272,9 +284,13 @@ CONFIG_X86_ACPI_CPUFREQ=m CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_VIRTIO=m +CONFIG_VIRTIO_RING=m +CONFIG_VIRTIO_PCI=m CONFIG_PCIEPORTBUS=y CONFIG_HOTPLUG_PCI_PCIE=m # CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set +CONFIG_PCI_DOMAINS=y CONFIG_PCI_MSI=y # CONFIG_PCI_DEBUG is not set @@ -382,6 +398,7 @@ CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m +CONFIG_INET_LRO=y # # IP: Virtual Server Configuration @@ -420,7 +437,7 @@ CONFIG_IPV6=m CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y -# CONFIG_IPV6_OPTIMISTIC_DAD is not set +CONFIG_IPV6_OPTIMISTIC_DAD=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m @@ -742,17 +759,27 @@ CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y CONFIG_NET_WIRELESS_RTNETLINK=y CONFIG_MAC80211=m -CONFIG_MAC80211_RCSIMPLE=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set -CONFIG_MAC80211_DEBUG=y -# CONFIG_MAC80211_HT_DEBUG is not set -# CONFIG_MAC80211_VERBOSE_DEBUG is not set -# CONFIG_MAC80211_LOWTX_FRAME_DUMP is not set -# CONFIG_TKIP_DEBUG is not set -# CONFIG_MAC80211_DEBUG_COUNTERS is not set -# CONFIG_MAC80211_IBSS_DEBUG is not set -# CONFIG_MAC80211_VERBOSE_PS_DEBUG is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set +# CONFIG_MAC80211_DEBUG is not set CONFIG_IEEE80211=m # CONFIG_IEEE80211_DEBUG is not set CONFIG_IEEE80211_CRYPT_WEP=m @@ -964,6 +991,7 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=m # # ATA/ATAPI/MFM/RLL support @@ -1038,6 +1066,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -1068,12 +1097,14 @@ CONFIG_SCSI_FC_ATTRS=m # CONFIG_SCSI_ISCSI_ATTRS is not set CONFIG_SCSI_SAS_ATTRS=m CONFIG_SCSI_SAS_LIBSAS=m -# CONFIG_SCSI_SAS_ATA is not set +CONFIG_SCSI_SAS_ATA=y # CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set # # SCSI low-level drivers # +CONFIG_LIBFC=m +CONFIG_FCOE=m # CONFIG_ISCSI_TCP is not set CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_3W_9XXX=m @@ -1103,6 +1134,7 @@ CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m CONFIG_SCSI_HPTIOP=m # CONFIG_SCSI_BUSLOGIC is not set +CONFIG_FCOE_FNIC=m # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -1135,10 +1167,15 @@ CONFIG_SCSI_DC395x=m # CONFIG_PCMCIA_FDOMAIN is not set # CONFIG_PCMCIA_QLOGIC is not set # CONFIG_PCMCIA_SYM53C500 is not set +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m CONFIG_ATA=m # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y CONFIG_SATA_SVW=m CONFIG_ATA_PIIX=m CONFIG_SATA_MV=m @@ -1148,13 +1185,12 @@ CONFIG_SATA_QSTOR=m CONFIG_SATA_PROMISE=m CONFIG_SATA_SX4=m CONFIG_SATA_SIL=m -CONFIG_SATA_SIL24=m CONFIG_SATA_SIS=m CONFIG_SATA_ULI=m CONFIG_SATA_VIA=m CONFIG_SATA_VITESSE=m CONFIG_SATA_INIC162X=m -CONFIG_PATA_ACPI=m +# CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set # CONFIG_PATA_AMD is not set # CONFIG_PATA_ARTOP is not set @@ -1178,8 +1214,9 @@ CONFIG_PATA_MARVELL=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set -CONFIG_PATA_NS87415=m +# CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set # CONFIG_PATA_PCMCIA is not set @@ -1189,10 +1226,11 @@ CONFIG_PATA_NS87415=m # CONFIG_PATA_SC1200 is not set # CONFIG_PATA_SERVERWORKS is not set CONFIG_PATA_PDC2027X=m -# CONFIG_PATA_SIL680 is not set +CONFIG_PATA_SIL680=m CONFIG_PATA_SIS=m # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_SCH is not set CONFIG_ATA_INTEL_COMBINED=y # @@ -1217,7 +1255,8 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_MULTIPATH_HP=m -# CONFIG_DM_UEVENT is not set +CONFIG_DM_RAID45=m +CONFIG_DM_UEVENT=y # # Fusion MPT device support @@ -1226,12 +1265,17 @@ CONFIG_FUSION=y CONFIG_FUSION_SPI=m CONFIG_FUSION_FC=m CONFIG_FUSION_SAS=m -CONFIG_FUSION_MAX_SGE=40 +CONFIG_FUSION_MAX_SGE=128 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m -# CONFIG_FUSION_LOGGING is not set +CONFIG_FUSION_LOGGING=y + +# +# Enable only one of the two stacks, unless you know what you are doing +# CONFIG_FIREWIRE=m CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_OHCI_DEBUG=y CONFIG_FIREWIRE_SBP2=m # @@ -1337,7 +1381,8 @@ CONFIG_8139TOO_8129=y # CONFIG_8139_OLD_RX_RESET is not set CONFIG_SIS900=m CONFIG_EPIC100=m -# CONFIG_SUNDANCE is not set +CONFIG_SUNDANCE=m +# CONFIG_SUNDANCE_MMIO is not set CONFIG_VIA_RHINE=m CONFIG_VIA_RHINE_MMIO=y CONFIG_VIA_RHINE_NAPI=y @@ -1361,7 +1406,6 @@ CONFIG_NS83820=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_R8169=m -CONFIG_R8169_NAPI=y CONFIG_R8169_VLAN=y CONFIG_SIS190=m CONFIG_SKGE=m @@ -1376,16 +1420,19 @@ CONFIG_QLA3XXX=m # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m -# CONFIG_CHELSIO_T3 is not set +CONFIG_CHELSIO_T3=m CONFIG_IXGBE=m CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m +CONFIG_ENIC=m CONFIG_NETXEN_NIC=m +CONFIG_NIU=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -1447,6 +1494,8 @@ CONFIG_PCMCIA_WL3501=m # CONFIG_PRISM54=m CONFIG_USB_ZD1201=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_HOSTAP=m CONFIG_HOSTAP_FIRMWARE=y CONFIG_HOSTAP_FIRMWARE_NVRAM=y @@ -1463,12 +1512,42 @@ CONFIG_BCM43XX_DMA_AND_PIO_MODE=y CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_NET_WIRELESS=y -CONFIG_IWL4965=m -# CONFIG_IWL4965_QOS is not set -# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set -# CONFIG_IWL4965_SENSITIVITY is not set -# CONFIG_IWL4965_DEBUG is not set -# CONFIG_IWL3945 is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWLWIFI_RFKILL is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLAGN_LEDS is not set +CONFIG_IWL4965=y +CONFIG_IWL5000=y +CONFIG_IWL3945=m +# CONFIG_IWL3945_RFKILL is not set +CONFIG_IWL3945_SPECTRUM_MEASUREMENT=y +# CONFIG_IWL3945_LEDS is not set +# CONFIG_IWL3945_DEBUG is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2400PCI=m +# CONFIG_RT2400PCI_RFKILL is not set +# CONFIG_RT2400PCI_LEDS is not set +CONFIG_RT2500PCI=m +# CONFIG_RT2500PCI_RFKILL is not set +# CONFIG_RT2500PCI_LEDS is not set +CONFIG_RT61PCI=m +# CONFIG_RT61PCI_RFKILL is not set +# CONFIG_RT61PCI_LEDS is not set +CONFIG_RT2500USB=m +# CONFIG_RT2500USB_LEDS is not set +CONFIG_RT73USB=m +# CONFIG_RT73USB_LEDS is not set +# CONFIG_RT2X00_DEBUG is not set # # PCMCIA network device support @@ -1537,6 +1616,7 @@ CONFIG_NETPOLL=y # CONFIG_NETPOLL_RX is not set CONFIG_NETPOLL_TRAP=y CONFIG_NET_POLL_CONTROLLER=y +CONFIG_VIRTIO_NET=m # # ISDN subsystem @@ -1808,7 +1888,8 @@ CONFIG_PPDEV=m # IPMI # CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set +CONFIG_IPMI_PANIC_EVENT=y +CONFIG_IPMI_PANIC_STRING=y CONFIG_IPMI_DEVICE_INTERFACE=m CONFIG_IPMI_SI=m CONFIG_IPMI_WATCHDOG=m @@ -1835,7 +1916,7 @@ CONFIG_IBMASR=m # CONFIG_WAFER_WDT is not set CONFIG_I6300ESB_WDT=m CONFIG_I8XX_TCO=m -CONFIG_HP_WATCHDOG=m +# CONFIG_HP_WATCHDOG is not set # CONFIG_SC1200_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_SBC8360_WDT is not set @@ -1902,8 +1983,12 @@ CONFIG_HPET=y # CONFIG_HPET_RTC_IRQ is not set # CONFIG_HPET_MMAP is not set CONFIG_HANGCHECK_TIMER=m -# CONFIG_TCG_TPM is not set -# CONFIG_TELCLOCK is not set +CONFIG_TCG_TPM=m +CONFIG_TCG_TIS=m +CONFIG_TCG_NSC=m +CONFIG_TCG_ATMEL=m +CONFIG_TCG_INFINEON=m +CONFIG_TELCLOCK=m # # I2C support @@ -1929,7 +2014,7 @@ CONFIG_I2C_AMD756_S4882=m CONFIG_I2C_AMD8111=m CONFIG_I2C_I801=m # CONFIG_I2C_I810 is not set -# CONFIG_I2C_PIIX4 is not set +CONFIG_I2C_PIIX4=m CONFIG_I2C_ISA=m CONFIG_I2C_NFORCE2=m # CONFIG_I2C_OCORES is not set @@ -2024,6 +2109,8 @@ CONFIG_SENSORS_HDAPS=m # Misc devices # # CONFIG_IBM_ASM is not set +CONFIG_EEPROM_93CX6=m +CONFIG_HP_ILO=m # # Multimedia devices @@ -2079,6 +2166,7 @@ CONFIG_VIDEO_UPD64083=m # # V4L USB devices # +CONFIG_USB_VIDEO_CLASS=m CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_24XXX=y CONFIG_VIDEO_PVRUSB2_SYSFS=y @@ -2218,6 +2306,7 @@ CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y # # Generic devices @@ -2275,6 +2364,8 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2557,7 +2648,33 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPATH=m +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -2641,8 +2758,14 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4DEV_FS=m +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -2767,7 +2890,9 @@ CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG2 is not set -# CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_CIFS_EXPERIMENTAL=y +CONFIG_CIFS_UPCALL=y +CONFIG_CIFS_DFS_UPCALL=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -2882,6 +3007,9 @@ CONFIG_DEBUG_LIST=y # CONFIG_FORCED_INLINING is not set CONFIG_BOOT_DELAY=y # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_MARKERS=m +CONFIG_SAMPLE_TRACEPOINTS=m CONFIG_DEBUG_RODATA=y # CONFIG_IOMMU_DEBUG is not set CONFIG_DEBUG_STACKOVERFLOW=y @@ -2913,11 +3041,13 @@ CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT=y # CONFIG_CRYPTO=y CONFIG_CRYPTO_API=m +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=m CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NHMAC=m @@ -2949,21 +3079,34 @@ CONFIG_CRYPTO_ANUBIS=m CONFIG_CRYPTO_DEFLATE=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_CRC32C=y -# CONFIG_CRYPTO_TEST is not set +CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_SIGNATURE=y CONFIG_CRYPTO_SIGNATURE_DSA=y CONFIG_CRYPTO_MPILIB=y # +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m + +# # Hardware crypto devices # +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m + +# +# Xen PV-ON-HVM Configuration +# +CONFIG_XEN_PV_ON_HVM=y # # Library routines # CONFIG_CRC_CCITT=m CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_ZLIB_INFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64.config b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64.config index 8fe5826..645f2ce 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.6.18-2.6-rhel5-x86_64.config @@ -59,10 +59,14 @@ CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y +CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SHMEM=y CONFIG_SLAB=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_TRACEPOINTS=y +CONFIG_MARKERS=y +CONFIG_TRACEPROBES=m CONFIG_RT_MUTEXES=y # CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 @@ -123,6 +127,7 @@ CONFIG_X86_GOOD_APIC=y CONFIG_MICROCODE=m CONFIG_X86_MSR=y CONFIG_X86_CPUID=y +# CONFIG_EFI is not set CONFIG_X86_IO_APIC=y CONFIG_X86_LOCAL_APIC=y CONFIG_MTRR=y @@ -130,6 +135,7 @@ CONFIG_MTRR=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set +CONFIG_PREEMPT_NOTIFIERS=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_ARCH_FLATMEM_ENABLE=y @@ -157,6 +163,8 @@ CONFIG_TICK_DIVIDER=y CONFIG_IOMMU=y CONFIG_CALGARY_IOMMU=y # CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT is not set +CONFIG_AMD_IOMMU=y +CONFIG_IOMMU_HELPER=y CONFIG_SWIOTLB=y CONFIG_X86_MCE=y CONFIG_X86_MCE_INTEL=y @@ -213,6 +221,11 @@ CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y CONFIG_ACPI_HOTPLUG_MEMORY=m CONFIG_ACPI_SBS=m +CONFIG_THINKPAD_ACPI=m +# CONFIG_THINKPAD_ACPI_DEBUG is not set +CONFIG_THINKPAD_ACPI_BAY=y +CONFIG_THINKPAD_ACPI_VIDEO=y +CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y # # CPU Frequency scaling @@ -251,9 +264,13 @@ CONFIG_X86_ACPI_CPUFREQ=m CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y +CONFIG_VIRTIO=m +CONFIG_VIRTIO_RING=m +CONFIG_VIRTIO_PCI=m CONFIG_PCIEPORTBUS=y CONFIG_HOTPLUG_PCI_PCIE=m # CONFIG_HOTPLUG_PCI_PCIE_POLL_EVENT_MODE is not set +CONFIG_PCI_DOMAINS=y CONFIG_PCI_MSI=y # CONFIG_PCI_DEBUG is not set @@ -361,6 +378,7 @@ CONFIG_TCP_CONG_VEGAS=m CONFIG_TCP_CONG_SCALABLE=m CONFIG_TCP_CONG_LP=m CONFIG_TCP_CONG_VENO=m +CONFIG_INET_LRO=y # # IP: Virtual Server Configuration @@ -721,9 +739,26 @@ CONFIG_NL80211=y CONFIG_WIRELESS_EXT=y CONFIG_NET_WIRELESS_RTNETLINK=y CONFIG_MAC80211=m -CONFIG_MAC80211_RCSIMPLE=y + +# +# Rate control algorithm selection +# +CONFIG_MAC80211_RC_DEFAULT_PID=y +# CONFIG_MAC80211_RC_DEFAULT_NONE is not set + +# +# Selecting 'y' for an algorithm will +# + +# +# build the algorithm into mac80211. +# +CONFIG_MAC80211_RC_DEFAULT="pid" +CONFIG_MAC80211_RC_PID=y +# CONFIG_MAC80211_MESH is not set CONFIG_MAC80211_LEDS=y # CONFIG_MAC80211_DEBUGFS is not set +# CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set CONFIG_MAC80211_DEBUG=y # CONFIG_MAC80211_HT_DEBUG is not set # CONFIG_MAC80211_VERBOSE_DEBUG is not set @@ -943,6 +978,7 @@ CONFIG_CDROM_PKTCDVD=m CONFIG_CDROM_PKTCDVD_BUFFERS=8 # CONFIG_CDROM_PKTCDVD_WCACHE is not set CONFIG_ATA_OVER_ETH=m +CONFIG_VIRTIO_BLK=m # # ATA/ATAPI/MFM/RLL support @@ -1017,6 +1053,7 @@ CONFIG_IDEDMA_AUTO=y # CONFIG_RAID_ATTRS=m CONFIG_SCSI=m +CONFIG_SCSI_DMA=y CONFIG_SCSI_NETLINK=y CONFIG_SCSI_PROC_FS=y @@ -1053,6 +1090,8 @@ CONFIG_SCSI_SAS_LIBSAS=m # # SCSI low-level drivers # +CONFIG_LIBFC=m +CONFIG_FCOE=m # CONFIG_ISCSI_TCP is not set CONFIG_BLK_DEV_3W_XXXX_RAID=m CONFIG_SCSI_3W_9XXX=m @@ -1082,6 +1121,7 @@ CONFIG_MEGARAID_LEGACY=m CONFIG_MEGARAID_SAS=m CONFIG_SCSI_HPTIOP=m # CONFIG_SCSI_BUSLOGIC is not set +CONFIG_FCOE_FNIC=m # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_EATA is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set @@ -1114,10 +1154,15 @@ CONFIG_SCSI_DC395x=m # CONFIG_PCMCIA_FDOMAIN is not set # CONFIG_PCMCIA_QLOGIC is not set # CONFIG_PCMCIA_SYM53C500 is not set +CONFIG_SCSI_DH=m +CONFIG_SCSI_DH_RDAC=m CONFIG_ATA=m # CONFIG_ATA_NONSTANDARD is not set CONFIG_ATA_ACPI=y +CONFIG_SATA_PMP=y CONFIG_SATA_AHCI=m +CONFIG_SATA_SIL24=m +CONFIG_ATA_SFF=y CONFIG_SATA_SVW=m CONFIG_ATA_PIIX=m CONFIG_SATA_MV=m @@ -1127,7 +1172,6 @@ CONFIG_SATA_QSTOR=m CONFIG_SATA_PROMISE=m CONFIG_SATA_SX4=m CONFIG_SATA_SIL=m -CONFIG_SATA_SIL24=m CONFIG_SATA_SIS=m CONFIG_SATA_ULI=m CONFIG_SATA_VIA=m @@ -1157,6 +1201,7 @@ CONFIG_PATA_MARVELL=m # CONFIG_PATA_MPIIX is not set # CONFIG_PATA_OLDPIIX is not set # CONFIG_PATA_NETCELL is not set +# CONFIG_PATA_NINJA32 is not set # CONFIG_PATA_NS87410 is not set CONFIG_PATA_NS87415=m # CONFIG_PATA_OPTI is not set @@ -1172,6 +1217,7 @@ CONFIG_PATA_PDC2027X=m CONFIG_PATA_SIS=m # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set +# CONFIG_PATA_SCH is not set CONFIG_ATA_INTEL_COMBINED=y # @@ -1196,6 +1242,7 @@ CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_EMC=m CONFIG_DM_MULTIPATH_RDAC=m CONFIG_DM_MULTIPATH_HP=m +CONFIG_DM_RAID45=m # CONFIG_DM_UEVENT is not set # @@ -1209,8 +1256,13 @@ CONFIG_FUSION_MAX_SGE=40 CONFIG_FUSION_CTL=m CONFIG_FUSION_LAN=m # CONFIG_FUSION_LOGGING is not set + +# +# Enable only one of the two stacks, unless you know what you are doing +# CONFIG_FIREWIRE=m CONFIG_FIREWIRE_OHCI=m +CONFIG_FIREWIRE_OHCI_DEBUG=y CONFIG_FIREWIRE_SBP2=m # @@ -1341,7 +1393,6 @@ CONFIG_NS83820=m # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set CONFIG_R8169=m -CONFIG_R8169_NAPI=y CONFIG_R8169_VLAN=y CONFIG_SIS190=m CONFIG_SKGE=m @@ -1356,16 +1407,19 @@ CONFIG_QLA3XXX=m # Ethernet (10000 Mbit) # CONFIG_CHELSIO_T1=m -# CONFIG_CHELSIO_T3 is not set +CONFIG_CHELSIO_T3=m CONFIG_IXGBE=m CONFIG_IXGB=m CONFIG_IXGB_NAPI=y CONFIG_S2IO=m CONFIG_S2IO_NAPI=y CONFIG_MYRI10GE=m +CONFIG_ENIC=m CONFIG_NETXEN_NIC=m +CONFIG_NIU=m CONFIG_BNX2X=m -# CONFIG_MLX4_CORE is not set +CONFIG_MLX4_CORE=m +CONFIG_MLX4_DEBUG=y # # Token Ring devices @@ -1427,6 +1481,8 @@ CONFIG_PCMCIA_WL3501=m # CONFIG_PRISM54=m CONFIG_USB_ZD1201=m +CONFIG_RTL8180=m +CONFIG_RTL8187=m CONFIG_HOSTAP=m CONFIG_HOSTAP_FIRMWARE=y CONFIG_HOSTAP_FIRMWARE_NVRAM=y @@ -1443,12 +1499,38 @@ CONFIG_BCM43XX_DMA_AND_PIO_MODE=y CONFIG_ZD1211RW=m # CONFIG_ZD1211RW_DEBUG is not set CONFIG_NET_WIRELESS=y -CONFIG_IWL4965=m -# CONFIG_IWL4965_QOS is not set -# CONFIG_IWL4965_SPECTRUM_MEASUREMENT is not set -# CONFIG_IWL4965_SENSITIVITY is not set -# CONFIG_IWL4965_DEBUG is not set +CONFIG_ATH5K=m +# CONFIG_ATH5K_DEBUG is not set +CONFIG_IWLWIFI=m +CONFIG_IWLCORE=m +# CONFIG_IWLWIFI_LEDS is not set +# CONFIG_IWLWIFI_RFKILL is not set +# CONFIG_IWLWIFI_DEBUG is not set +CONFIG_IWLAGN=m +CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y +# CONFIG_IWLAGN_LEDS is not set +CONFIG_IWL4965=y +CONFIG_IWL5000=y # CONFIG_IWL3945 is not set +CONFIG_RT2X00=m +CONFIG_RT2X00_LIB=m +CONFIG_RT2X00_LIB_PCI=m +CONFIG_RT2X00_LIB_USB=m +CONFIG_RT2X00_LIB_FIRMWARE=y +CONFIG_RT2400PCI=m +# CONFIG_RT2400PCI_RFKILL is not set +# CONFIG_RT2400PCI_LEDS is not set +CONFIG_RT2500PCI=m +# CONFIG_RT2500PCI_RFKILL is not set +# CONFIG_RT2500PCI_LEDS is not set +CONFIG_RT61PCI=m +# CONFIG_RT61PCI_RFKILL is not set +# CONFIG_RT61PCI_LEDS is not set +CONFIG_RT2500USB=m +# CONFIG_RT2500USB_LEDS is not set +CONFIG_RT73USB=m +# CONFIG_RT73USB_LEDS is not set +# CONFIG_RT2X00_DEBUG is not set # # PCMCIA network device support @@ -1517,6 +1599,7 @@ CONFIG_NETPOLL=y # CONFIG_NETPOLL_RX is not set CONFIG_NETPOLL_TRAP=y CONFIG_NET_POLL_CONTROLLER=y +CONFIG_VIRTIO_NET=m # # ISDN subsystem @@ -2008,6 +2091,8 @@ CONFIG_SENSORS_HDAPS=m # Misc devices # # CONFIG_IBM_ASM is not set +CONFIG_EEPROM_93CX6=m +CONFIG_HP_ILO=m # # Multimedia devices @@ -2063,6 +2148,7 @@ CONFIG_VIDEO_UPD64083=m # # V4L USB devices # +CONFIG_USB_VIDEO_CLASS=m CONFIG_VIDEO_PVRUSB2=m CONFIG_VIDEO_PVRUSB2_24XXX=y CONFIG_VIDEO_PVRUSB2_SYSFS=y @@ -2202,6 +2288,7 @@ CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_VERBOSE_PROCFS=y # CONFIG_SND_VERBOSE_PRINTK is not set # CONFIG_SND_DEBUG is not set +CONFIG_SND_VMASTER=y # # Generic devices @@ -2259,6 +2346,8 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_FM801_TEA575X_BOOL=y CONFIG_SND_FM801_TEA575X=m +CONFIG_SND_HDA_POWER_SAVE=y +CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0 CONFIG_SND_HDA_INTEL=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m @@ -2541,7 +2630,33 @@ CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=m CONFIG_LEDS_TRIGGER_IDE_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=m -# CONFIG_INFINIBAND is not set +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_USER_MAD=m +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_MTHCA_DEBUG=y +CONFIG_INFINIBAND_IPATH=m +CONFIG_INFINIBAND_AMSO1100=m +# CONFIG_INFINIBAND_AMSO1100_DEBUG is not set +CONFIG_INFINIBAND_CXGB3=m +# CONFIG_INFINIBAND_CXGB3_DEBUG is not set +CONFIG_INFINIBAND_NES=m +# CONFIG_INFINIBAND_NES_DEBUG is not set +CONFIG_MLX4_INFINIBAND=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INFINIBAND_IPOIB_CM=y +CONFIG_INFINIBAND_IPOIB_DEBUG=y +# CONFIG_INFINIBAND_IPOIB_DEBUG_DATA is not set +CONFIG_INFINIBAND_SRP=m +CONFIG_INFINIBAND_ISER=m +CONFIG_INFINIBAND_SDP=m +# CONFIG_INFINIBAND_SDP_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC=m +# CONFIG_INFINIBAND_QLGC_VNIC_DEBUG is not set +CONFIG_INFINIBAND_QLGC_VNIC_STATS=y +CONFIG_INFINIBAND_MADEYE=m # # EDAC - error detection and reporting (RAS) (EXPERIMENTAL) @@ -2625,8 +2740,14 @@ CONFIG_EXT3_FS=m CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4DEV_FS=m +CONFIG_EXT4DEV_FS_XATTR=y +CONFIG_EXT4DEV_FS_POSIX_ACL=y +CONFIG_EXT4DEV_FS_SECURITY=y CONFIG_JBD=m # CONFIG_JBD_DEBUG is not set +CONFIG_JBD2=m +# CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set @@ -2752,6 +2873,7 @@ CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y # CONFIG_CIFS_DEBUG2 is not set # CONFIG_CIFS_EXPERIMENTAL is not set +CONFIG_CIFS_UPCALL=y # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set @@ -2866,6 +2988,9 @@ CONFIG_DEBUG_LIST=y # CONFIG_FORCED_INLINING is not set CONFIG_BOOT_DELAY=y # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_SAMPLES=y +CONFIG_SAMPLE_MARKERS=m +CONFIG_SAMPLE_TRACEPOINTS=m CONFIG_DEBUG_RODATA=y # CONFIG_IOMMU_DEBUG is not set CONFIG_DEBUG_STACKOVERFLOW=y @@ -2897,11 +3022,13 @@ CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT=y # CONFIG_CRYPTO=y CONFIG_CRYPTO_API=m +CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_ALGAPI=m CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m CONFIG_CRYPTO_SEQIV=m CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_NHMAC=m @@ -2940,14 +3067,27 @@ CONFIG_CRYPTO_SIGNATURE_DSA=y CONFIG_CRYPTO_MPILIB=y # +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m + +# # Hardware crypto devices # +CONFIG_XEN_BLKDEV_FRONTEND=m +CONFIG_XEN_NETDEV_FRONTEND=m + +# +# Xen PV-ON-HVM Configuration +# +CONFIG_XEN_PV_ON_HVM=y # # Library routines # CONFIG_CRC_CCITT=m CONFIG_CRC16=m +CONFIG_CRC_ITU_T=m CONFIG_CRC32=y CONFIG_LIBCRC32C=y CONFIG_ZLIB_INFLATE=y diff --git a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch index c224061..2dca1c1 100644 --- a/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch +++ b/lustre/kernel_patches/patches/jbd-journal-chksum-2.6.18-vanilla.patch @@ -188,7 +188,7 @@ Index: linux-2.6.18.8/fs/jbd/commit.c /* * First job: lock down the current transaction and wait for -@@ -439,38 +532,14 @@ void journal_commit_transaction(journal_ +@@ -439,39 +532,14 @@ void journal_commit_transaction(journal_ journal_submit_data_buffers(journal, commit_transaction); /* @@ -206,10 +206,11 @@ Index: linux-2.6.18.8/fs/jbd/commit.c - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); -- if (unlikely(!buffer_uptodate(bh))) -- err = -EIO; - spin_lock(&journal->j_list_lock); - } +- if (unlikely(!buffer_uptodate(bh))) +- err = -EIO; +- - if (!inverted_lock(journal, bh)) { - put_bh(bh); - spin_lock(&journal->j_list_lock); diff --git a/lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch index e7c178d..c770722 100644 --- a/lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch +++ b/lustre/kernel_patches/patches/jbd-stats-2.6-rhel5.patch @@ -588,9 +588,9 @@ Index: linux-2.6.18-8.1.8/fs/jbd/checkpoint.c - retry = __process_buffer(journal, jh, bhs,&batch_count); + retry = __process_buffer(journal, jh, bhs,&batch_count, + transaction); - if (!retry && lock_need_resched(&journal->j_list_lock)){ - spin_unlock(&journal->j_list_lock); - retry = 1; + if (retry < 0 && !result) + result = retry; + if (!retry && lock_need_resched(&journal->j_list_lock)){ @@ -667,6 +672,8 @@ void __journal_insert_checkpoint(struct void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) diff --git a/lustre/kernel_patches/targets/2.6-rhel5.target.in b/lustre/kernel_patches/targets/2.6-rhel5.target.in index 18dc724..9f88573 100644 --- a/lustre/kernel_patches/targets/2.6-rhel5.target.in +++ b/lustre/kernel_patches/targets/2.6-rhel5.target.in @@ -1,5 +1,5 @@ lnxmaj="2.6.18" -lnxrel="92.1.22.el5" +lnxrel="128.1.1.el5" KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2 SERIES=2.6-rhel5.series @@ -9,7 +9,7 @@ RHBUILD=1 LINUX26=1 LUSTRE_VERSION=@VERSION@ -OFED_VERSION=1.3.1 +OFED_VERSION=inkernel BASE_ARCHS="i686 x86_64 ia64 ppc64" BIGMEM_ARCHS="" diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index 87465ea..c059fa5 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -3,7 +3,7 @@ SERIES VERSION COMMENT SUPPORTED KERNELS: 2.6-rhel4 RHEL4: 2.6.9-67.0.20.EL 2.6-sles10 SLES10: 2.6.16.60-0.33 -2.6-rhel5 RHEL5: 2.6.18-92.1.22.el5 +2.6-rhel5 RHEL5: 2.6.18-128.1.1.el5 2.6.18-vanilla kernel.org: 2.6.18.8 2.6.22-vanilla kernel.org: 2.6.22.14 diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 58875a9..ae31ec6 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -413,6 +413,7 @@ static int __ldlm_add_waiting_lock(struct ldlm_lock *lock, int seconds) static int ldlm_add_waiting_lock(struct ldlm_lock *lock) { int ret; + int timeout = ldlm_get_enq_timeout(lock); LASSERT(!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK)); @@ -428,15 +429,16 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) return 0; } - ret = __ldlm_add_waiting_lock(lock, ldlm_get_enq_timeout(lock)); + ret = __ldlm_add_waiting_lock(lock, timeout); if (ret) /* grab ref on the lock if it has been added to the * waiting list */ LDLM_LOCK_GET(lock); spin_unlock_bh(&waiting_locks_spinlock); - LDLM_DEBUG(lock, "%sadding to wait list", - ret == 0 ? "not re-" : ""); + LDLM_DEBUG(lock, "%sadding to wait list(timeout: %d, AT: %s)", + ret == 0 ? "not re-" : "", timeout, + AT_OFF ? "off" : "on"); return ret; } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index b407584..b4252fe 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -339,7 +339,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | - OBD_CONNECT_OSS_CAPA; + OBD_CONNECT_OSS_CAPA | OBD_CONNECT_GRANT_SHRINK; if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { /* OBD_CONNECT_CKSUM should always be set, even if checksums are diff --git a/lustre/llite/xattr.c b/lustre/llite/xattr.c index 0957d09..b8bb7f1 100644 --- a/lustre/llite/xattr.c +++ b/lustre/llite/xattr.c @@ -219,6 +219,12 @@ int ll_setxattr(struct dentry *dentry, const char *name, struct lov_user_md *lump = (struct lov_user_md *)value; int rc = 0; + /* Attributes that are saved via getxattr will always have + * the stripe_offset as 0. Instead, the MDS should be + * allowed to pick the starting OST index. b=17846 */ + if (lump->lmm_stripe_offset == 0) + lump->lmm_stripe_offset = -1; + if (S_ISREG(inode->i_mode)) { struct file f; int flags = FMODE_WRITE; diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index 98c1270..8da51aa 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -800,7 +800,8 @@ static inline struct lov_layout_raid0 *lov_r0(struct lov_object *lov) LASSERT(lov->lo_type == LLT_RAID0); raid0 = &lov->u.raid0; - LASSERT(raid0->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC); + LASSERT(raid0->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC || + raid0->lo_lsm->lsm_wire.lw_magic == LOV_MAGIC_V3); return raid0; } diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 88369ec2..bdbe9e6 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -492,8 +492,11 @@ static int lov_notify(struct obd_device *obd, struct obd_device *watched, int i; lov_getref(obd); for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (!lov->lov_tgts[i]) - continue; + /* don't send sync event if target not + * connected/activated */ + if (!lov->lov_tgts[i] || + !lov->lov_tgts[i]->ltd_active) + continue; if ((ev == OBD_NOTIFY_SYNC) || (ev == OBD_NOTIFY_SYNC_NONBLOCK)) diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c index 7fd5470..383c701 100644 --- a/lustre/lov/lov_pack.c +++ b/lustre/lov/lov_pack.c @@ -58,7 +58,7 @@ void lov_dump_lmm_v1(int level, struct lov_mds_md_v1 *lmm) { struct lov_ost_data_v1 *lod; int i; - + CDEBUG(level, "objid "LPX64", magic 0x%08x, pattern %#x\n", le64_to_cpu(lmm->lmm_object_id), le32_to_cpu(lmm->lmm_magic), le32_to_cpu(lmm->lmm_pattern)); @@ -476,23 +476,22 @@ static int __lov_setstripe(struct obd_export *exp, struct lov_stripe_md **lsmp, struct pool_desc *pool; pool = lov_find_pool(lov, lumv3.lmm_pool_name); - if (pool == NULL) - RETURN(-EINVAL); - - if (lumv3.lmm_stripe_offset != - (typeof(lumv3.lmm_stripe_offset))(-1)) { - rc = lov_check_index_in_pool(lumv3.lmm_stripe_offset, - pool); - if (rc < 0) { - lov_pool_putref(pool); - RETURN(-EINVAL); + if (pool != NULL) { + if (lumv3.lmm_stripe_offset != + (typeof(lumv3.lmm_stripe_offset))(-1)) { + rc = lov_check_index_in_pool( + lumv3.lmm_stripe_offset, pool); + if (rc < 0) { + lov_pool_putref(pool); + RETURN(-EINVAL); + } } - } - if (stripe_count > pool_tgt_count(pool)) - stripe_count = pool_tgt_count(pool); + if (stripe_count > pool_tgt_count(pool)) + stripe_count = pool_tgt_count(pool); - lov_pool_putref(pool); + lov_pool_putref(pool); + } } if ((__u64)lumv1->lmm_stripe_size * stripe_count > ~0UL) { diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index accb538..b8b1437 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef LPROCFS @@ -75,6 +76,61 @@ static int mdc_wr_max_rpcs_in_flight(struct file *file, const char *buffer, return count; } + +static int mdc_changelog_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct changelog_seq_iter *csi = seq->private; + + if (csi && csi->csi_llh) + llog_cat_put(csi->csi_llh); + if (csi && csi->csi_ctxt) + llog_ctxt_put(csi->csi_ctxt); + + return (changelog_seq_release(inode, file)); +} + +static int mdc_changelog_seq_open(struct inode *inode, struct file *file) +{ + struct changelog_seq_iter *csi; + int rc; + ENTRY; + + rc = changelog_seq_open(inode, file, &csi); + if (rc) + RETURN(rc); + + /* Set up the remote catalog handle */ + /* Note the proc file is set up with obd in data, not mdc_device */ + csi->csi_ctxt = llog_get_context((struct obd_device *)csi->csi_dev, + LLOG_CHANGELOG_REPL_CTXT); + if (csi->csi_ctxt == NULL) + GOTO(out, rc = -ENOENT); + rc = llog_create(csi->csi_ctxt, &csi->csi_llh, NULL, CHANGELOG_CATALOG); + if (rc) { + CERROR("llog_create() failed %d\n", rc); + GOTO(out, rc); + } + rc = llog_init_handle(csi->csi_llh, LLOG_F_IS_CAT, NULL); + if (rc) { + CERROR("llog_init_handle failed %d\n", rc); + GOTO(out, rc); + } + +out: + if (rc) + mdc_changelog_seq_release(inode, file); + RETURN(rc); +} + +static struct file_operations mdc_changelog_fops = { + .owner = THIS_MODULE, + .open = mdc_changelog_seq_open, + .read = seq_read, + .llseek = changelog_seq_lseek, + .release = mdc_changelog_seq_release, +}; + static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "uuid", lprocfs_rd_uuid, 0, 0 }, { "ping", 0, lprocfs_wr_ping, 0, 0, 0222 }, @@ -92,6 +148,7 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { mdc_wr_max_rpcs_in_flight, 0 }, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, { "import", lprocfs_rd_import, 0, 0 }, + { "changelog", 0, 0, 0, &mdc_changelog_fops, 0400 }, { 0 } }; diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index a5698b5..2823f10 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1047,6 +1047,14 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len, return -EINVAL; } switch (cmd) { + case OBD_IOC_CHANGELOG_CLEAR: { + struct changelog_setinfo cs = + {data->ioc_u64_1, data->ioc_u32_1}; + rc = obd_set_info_async(exp, strlen(KEY_CHANGELOG_CLEAR), + KEY_CHANGELOG_CLEAR, sizeof(cs), &cs, + NULL); + GOTO(out, rc); + } case OBD_IOC_CLIENT_RECOVER: rc = ptlrpc_recover_import(imp, data->ioc_inlbuf1); if (rc < 0) @@ -1098,19 +1106,6 @@ static int do_set_info_async(struct obd_export *exp, int rc; ENTRY; - if (vallen != sizeof(int)) - RETURN(-EINVAL); - - spin_lock(&imp->imp_lock); - if (*((int *)val)) { - imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY; - imp->imp_connect_data.ocd_connect_flags |= OBD_CONNECT_RDONLY; - } else { - imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY; - imp->imp_connect_data.ocd_connect_flags &= ~OBD_CONNECT_RDONLY; - } - spin_unlock(&imp->imp_lock); - req = ptlrpc_request_alloc(imp, &RQF_MDS_SET_INFO); if (req == NULL) RETURN(-ENOMEM); @@ -1176,6 +1171,19 @@ int mdc_set_info_async(struct obd_export *exp, RETURN(0); } if (KEY_IS(KEY_READ_ONLY)) { + if (vallen != sizeof(int)) + RETURN(-EINVAL); + + spin_lock(&imp->imp_lock); + if (*((int *)val)) { + imp->imp_connect_flags_orig |= OBD_CONNECT_RDONLY; + imp->imp_connect_data.ocd_connect_flags |= OBD_CONNECT_RDONLY; + } else { + imp->imp_connect_flags_orig &= ~OBD_CONNECT_RDONLY; + imp->imp_connect_data.ocd_connect_flags &= ~OBD_CONNECT_RDONLY; + } + spin_unlock(&imp->imp_lock); + rc = do_set_info_async(exp, keylen, key, vallen, val, set); RETURN(rc); } @@ -1195,9 +1203,13 @@ int mdc_set_info_async(struct obd_export *exp, imp->imp_server_timeout = 1; spin_unlock(&imp->imp_lock); imp->imp_client->cli_request_portal = MDS_MDS_PORTAL; - CDEBUG(D_OTHER|D_WARNING, "%s: timeout / 2\n", exp->exp_obd->obd_name); + CDEBUG(D_OTHER, "%s: timeout / 2\n", exp->exp_obd->obd_name); RETURN(0); } + if (KEY_IS(KEY_CHANGELOG_CLEAR)) { + rc = do_set_info_async(exp, keylen, key, vallen, val, set); + RETURN(rc); + } RETURN(rc); } @@ -1665,8 +1677,16 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_llog_group *olg, rc = llog_setup(obd, olg, LLOG_LOVEA_REPL_CTXT, tgt, 0, NULL, &llog_client_ops); + if (rc) + RETURN(rc); + ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); + llog_initiator_connect(ctxt); + llog_ctxt_put(ctxt); + + rc = llog_setup(obd, olg, LLOG_CHANGELOG_REPL_CTXT, tgt, 0, NULL, + &llog_client_ops); if (rc == 0) { - ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT); + ctxt = llog_group_get_ctxt(olg, LLOG_CHANGELOG_REPL_CTXT); llog_initiator_connect(ctxt); llog_ctxt_put(ctxt); } @@ -1684,6 +1704,10 @@ static int mdc_llog_finish(struct obd_device *obd, int count) if (ctxt) rc = llog_cleanup(ctxt); + ctxt = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT); + if (ctxt) + rc = llog_cleanup(ctxt); + RETURN(rc); } @@ -1698,8 +1722,8 @@ static int mdc_process_config(struct obd_device *obd, obd_count len, void *buf) default: rc = class_process_proc_param(PARAM_MDC, lvars.obd_vars, lcfg, obd); - if (rc > 0) - rc = 0; + if (rc > 0) + rc = 0; break; } return(rc); diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index d25815b..9de0d80 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -132,16 +132,10 @@ static int changelog_init_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; ENTRY; - if (!(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN)) { - CERROR("log is not plain\n"); - RETURN(-EINVAL); - } - if (rec->cr_hdr.lrh_type != CHANGELOG_REC) { - CERROR("Not a changelog rec? %d\n", rec->cr_hdr.lrh_type); - RETURN(-EINVAL); - } + LASSERT(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN); + LASSERT(rec->cr_hdr.lrh_type == CHANGELOG_REC); - CDEBUG(D_INODE, + CDEBUG(D_INFO, "seeing record at index %d/%d/"LPU64" t=%x %.*s in log "LPX64"\n", hdr->lrh_index, rec->cr_hdr.lrh_index, rec->cr_index, rec->cr_type, rec->cr_namelen, rec->cr_name, @@ -151,32 +145,76 @@ static int changelog_init_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, RETURN(LLOG_PROC_BREAK); } +static int changelog_user_init_cb(struct llog_handle *llh, + struct llog_rec_hdr *hdr, void *data) +{ + struct mdd_device *mdd = (struct mdd_device *)data; + struct llog_changelog_user_rec *rec = + (struct llog_changelog_user_rec *)hdr; + ENTRY; + + LASSERT(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN); + LASSERT(rec->cur_hdr.lrh_type == CHANGELOG_USER_REC); + + CDEBUG(D_INFO, "seeing user at index %d/%d id=%d endrec="LPU64 + " in log "LPX64"\n", hdr->lrh_index, rec->cur_hdr.lrh_index, + rec->cur_id, rec->cur_endrec, llh->lgh_id.lgl_oid); + + spin_lock(&mdd->mdd_cl.mc_user_lock); + mdd->mdd_cl.mc_lastuser = rec->cur_id; + spin_unlock(&mdd->mdd_cl.mc_user_lock); + + RETURN(LLOG_PROC_BREAK); +} + + static int mdd_changelog_llog_init(struct mdd_device *mdd) { struct obd_device *obd = mdd2obd_dev(mdd); struct llog_ctxt *ctxt; int rc; + /* Find last changelog entry number */ ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); if (ctxt == NULL) { - CERROR("no context\n"); + CERROR("no changelog context\n"); return -EINVAL; } if (!ctxt->loc_handle) { - CERROR("no handle\n"); + llog_ctxt_put(ctxt); return -EINVAL; } + rc = llog_cat_reverse_process(ctxt->loc_handle, changelog_init_cb, mdd); llog_ctxt_put(ctxt); - if (rc < 0) + if (rc < 0) { CERROR("changelog init failed: %d\n", rc); - else - rc = 0; /* llog_proc_break is ok */ + return rc; + } + CDEBUG(D_INODE, "changelog starting index="LPU64"\n", + mdd->mdd_cl.mc_index); + + /* Find last changelog user id */ + ctxt = llog_get_context(obd, LLOG_CHANGELOG_USER_ORIG_CTXT); + if (ctxt == NULL) { + CERROR("no changelog user context\n"); + return -EINVAL; + } + if (!ctxt->loc_handle) { + llog_ctxt_put(ctxt); + return -EINVAL; + } - CDEBUG(D_INODE, "changelog_init index="LPU64"\n", mdd->mdd_cl.mc_index); + rc = llog_cat_reverse_process(ctxt->loc_handle, changelog_user_init_cb, + mdd); + llog_ctxt_put(ctxt); - return rc; + if (rc < 0) { + CERROR("changelog user init failed: %d\n", rc); + return rc; + } + return 0; } static int mdd_changelog_init(const struct lu_env *env, struct mdd_device *mdd) @@ -186,15 +224,18 @@ static int mdd_changelog_init(const struct lu_env *env, struct mdd_device *mdd) mdd->mdd_cl.mc_index = 0; spin_lock_init(&mdd->mdd_cl.mc_lock); cfs_waitq_init(&mdd->mdd_cl.mc_waitq); - mdd->mdd_cl.mc_starttime = cfs_time_current_64(); mdd->mdd_cl.mc_flags = 0; /* off by default */ - mdd->mdd_cl.mc_mask = CL_DEFMASK; + mdd->mdd_cl.mc_mask = CHANGELOG_DEFMASK; + spin_lock_init(&mdd->mdd_cl.mc_user_lock); + mdd->mdd_cl.mc_lastuser = 0; + rc = mdd_changelog_llog_init(mdd); if (rc) { CERROR("Changelog setup during init failed %d\n", rc); mdd->mdd_cl.mc_flags |= CLM_ERR; } + return rc; } @@ -254,19 +295,41 @@ int mdd_changelog_llog_cancel(struct mdd_device *mdd, long long endrec) { struct obd_device *obd = mdd2obd_dev(mdd); struct llog_ctxt *ctxt; + long long unsigned cur; int rc; ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); if (ctxt == NULL) return -ENXIO; - /* Some records purged; reset repeat-access time */ + spin_lock(&mdd->mdd_cl.mc_lock); + cur = (long long)mdd->mdd_cl.mc_index; + spin_unlock(&mdd->mdd_cl.mc_lock); + if (endrec > cur) + endrec = cur; + + /* purge to "0" is shorthand for everything */ + if (endrec == 0) + endrec = cur; + + /* If purging all records, write a header entry so we don't have an + empty catalog and we're sure to have a valid starting index next + time. In case of crash, we just restart with old log so we're + allright. */ + if (endrec == cur) { + rc = mdd_changelog_write_header(mdd, CLM_PURGE); + if (rc) + goto out; + } + + /* Some records were purged, so reset repeat-access time (so we + record new mtime update records, so users can see a file has been + changed since the last purge) */ mdd->mdd_cl.mc_starttime = cfs_time_current_64(); rc = llog_cancel(ctxt, NULL, 1, (struct llog_cookie *)&endrec, 0); - +out: llog_ctxt_put(ctxt); - return rc; } @@ -366,7 +429,7 @@ static int dot_lustre_mdd_open(const struct lu_env *env, struct md_object *obj, } static int dot_lustre_path(const struct lu_env *env, struct md_object *obj, - char *path, int pathlen, __u64 recno, int *linkno) + char *path, int pathlen, __u64 *recno, int *linkno) { return -ENOSYS; } @@ -532,7 +595,7 @@ static int obf_mdd_readpage(const struct lu_env *env, struct md_object *obj, } static int obf_path(const struct lu_env *env, struct md_object *obj, - char *path, int pathlen, __u64 recno, int *linkno) + char *path, int pathlen, __u64 *recno, int *linkno) { return -ENOSYS; } @@ -717,6 +780,9 @@ static int mdd_process_config(const struct lu_env *env, CERROR("lov init error %d \n", rc); GOTO(out, rc); } + rc = mdd_txn_init_credits(env, m); + if (rc) + break; mdd_changelog_init(env, m); break; @@ -914,6 +980,15 @@ static int mdd_update_capa_key(const struct lu_env *env, RETURN(rc); } +static int mdd_llog_ctxt_get(const struct lu_env *env, struct md_device *m, + int idx, void **h) +{ + struct mdd_device *mdd = lu2mdd_dev(&m->md_lu_dev); + + *h = llog_group_get_ctxt(&mdd2obd_dev(mdd)->obd_olg, idx); + return (*h == NULL ? -ENOENT : 0); +} + static struct lu_device *mdd_device_alloc(const struct lu_env *env, struct lu_device_type *t, struct lustre_cfg *lcfg) @@ -989,6 +1064,202 @@ struct md_capainfo *md_capainfo(const struct lu_env *env) } EXPORT_SYMBOL(md_capainfo); +static int mdd_changelog_user_register(struct mdd_device *mdd, int *id) +{ + struct llog_ctxt *ctxt; + struct llog_changelog_user_rec *rec; + int rc; + ENTRY; + + ctxt = llog_get_context(mdd2obd_dev(mdd),LLOG_CHANGELOG_USER_ORIG_CTXT); + if (ctxt == NULL) + RETURN(-ENXIO); + + OBD_ALLOC_PTR(rec); + if (rec == NULL) { + llog_ctxt_put(ctxt); + RETURN(-ENOMEM); + } + + rec->cur_hdr.lrh_len = sizeof(*rec); + rec->cur_hdr.lrh_type = CHANGELOG_USER_REC; + rec->cur_endrec = 0ULL; + spin_lock(&mdd->mdd_cl.mc_user_lock); + if (mdd->mdd_cl.mc_lastuser == (unsigned int)(-1)) { + spin_unlock(&mdd->mdd_cl.mc_user_lock); + CERROR("Maximum number of changelog users exceeded!\n"); + GOTO(out, rc = -EOVERFLOW); + } + *id = rec->cur_id = ++mdd->mdd_cl.mc_lastuser; + spin_unlock(&mdd->mdd_cl.mc_user_lock); + rc = llog_add(ctxt, &rec->cur_hdr, NULL, NULL, 0); + + CDEBUG(D_INODE, "Registered changelog user %d\n", *id); +out: + OBD_FREE_PTR(rec); + llog_ctxt_put(ctxt); + RETURN(rc); +} + +struct mdd_changelog_user_data { + __u64 mcud_endrec; /**< purge record for this user */ + __u64 mcud_minrec; /**< lowest changelog recno still referenced */ + __u32 mcud_id; + __u32 mcud_minid; /**< user id with lowest rec reference */ + int mcud_found:1; +}; + +/** Two things: + * 1. Find the smallest record everyone is willing to purge + * 2. Update the last purgeable record for this user + */ +static int mdd_changelog_user_purge_cb(struct llog_handle *llh, + struct llog_rec_hdr *hdr, void *data) +{ + struct llog_changelog_user_rec *rec; + struct mdd_changelog_user_data *mcud = + (struct mdd_changelog_user_data *)data; + int rc; + ENTRY; + + LASSERT(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN); + + rec = (struct llog_changelog_user_rec *)hdr; + + /* If we have a new endrec for this id, use it for the min check */ + if (rec->cur_id == mcud->mcud_id) + rec->cur_endrec = max(rec->cur_endrec, mcud->mcud_endrec); + + /* Track the minimum referenced record */ + if (mcud->mcud_minid == 0 || mcud->mcud_minrec > rec->cur_endrec) { + mcud->mcud_minid = rec->cur_id; + mcud->mcud_minrec = rec->cur_endrec; + } + + if (rec->cur_id != mcud->mcud_id) + RETURN(0); + + /* Update this user's record */ + mcud->mcud_found = 1; + + /* Special case: unregister this user if endrec == -1 */ + if (mcud->mcud_endrec == -1) { + struct llog_cookie cookie; + cookie.lgc_lgl = llh->lgh_id; + cookie.lgc_index = hdr->lrh_index; + rc = llog_cat_cancel_records(llh->u.phd.phd_cat_handle, + 1, &cookie); + RETURN(rc); + } + + /* Update the endrec */ + CDEBUG(D_IOCTL, "Rewriting changelog user %d endrec to "LPU64"\n", + mcud->mcud_id, rec->cur_endrec); + + /* hdr+1 is loc of data */ + hdr->lrh_len -= sizeof(*hdr) + sizeof(struct llog_rec_tail); + rc = llog_write_rec(llh, hdr, NULL, 0, (void *)(hdr + 1), + hdr->lrh_index); + + RETURN(rc); +} + +static int mdd_changelog_user_purge(struct mdd_device *mdd, int id, + long long endrec) +{ + struct mdd_changelog_user_data data; + struct llog_ctxt *ctxt; + int rc; + ENTRY; + + CDEBUG(D_IOCTL, "Purge request: id=%d, endrec="LPD64"\n", id, endrec); + + ctxt = llog_get_context(mdd2obd_dev(mdd),LLOG_CHANGELOG_USER_ORIG_CTXT); + if (ctxt == NULL) + return -ENXIO; + LASSERT(ctxt->loc_handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT); + + data.mcud_id = id; + data.mcud_endrec = endrec; + data.mcud_minid = 0; + data.mcud_minrec = 0; + rc = llog_cat_process(ctxt->loc_handle, mdd_changelog_user_purge_cb, + (void *)&data, 0, 0); + if ((rc >= 0) && (data.mcud_minrec > 0)) { + CDEBUG(D_INODE, "Purging CL entries up to "LPD64 + ", referenced by "CHANGELOG_USER_PREFIX"%d\n", + data.mcud_minrec, data.mcud_minid); + rc = mdd_changelog_llog_cancel(mdd, data.mcud_minrec); + } else { + CWARN("Could not determine changelog records to purge; rc=%d\n", + rc); + } + + if (!data.mcud_found) { + CWARN("No entry for user %d. Last changelog reference is " + LPD64" by changelog user %d\n", data.mcud_id, + data.mcud_minrec, data.mcud_minid); + rc = -ENOENT; + } + + llog_ctxt_put(ctxt); + RETURN (rc); +} + +/** mdd_iocontrol + * May be called remotely from mdt_iocontrol_handle or locally from + * mdt_iocontrol. Data may be freeform - remote handling doesn't enforce or + * swab an obd_ioctl_data format (but local ioctl handler does). + * \param cmd - ioc + * \param len - data len + * \param karg - ioctl data, in kernel space + */ +static int mdd_iocontrol(const struct lu_env *env, struct md_device *m, + unsigned int cmd, int len, void *karg) +{ + struct mdd_device *mdd; + struct obd_ioctl_data *data = karg; + int rc; + ENTRY; + + mdd = lu2mdd_dev(&m->md_lu_dev); + + /* Doesn't use obd_ioctl_data */ + if (cmd == OBD_IOC_CHANGELOG_CLEAR) { + struct changelog_setinfo *cs = karg; + if (len != sizeof(*cs)) { + CERROR("Bad changelog_clear ioctl size %d\n", len); + RETURN(-EINVAL); + } + rc = mdd_changelog_user_purge(mdd, cs->cs_id, cs->cs_recno); + RETURN(rc); + } + + /* Below ioctls use obd_ioctl_data */ + if (len != sizeof(*data)) { + CERROR("Bad ioctl size %d\n", len); + RETURN(-EINVAL); + } + if (data->ioc_version != OBD_IOCTL_VERSION) { + CERROR("Bad magic %x != %x\n", data->ioc_version, + OBD_IOCTL_VERSION); + RETURN(-EINVAL); + } + + switch (cmd) { + case OBD_IOC_CHANGELOG_REG: + rc = mdd_changelog_user_register(mdd, &data->ioc_u32_1); + break; + case OBD_IOC_CHANGELOG_DEREG: + rc = mdd_changelog_user_purge(mdd, data->ioc_u32_1, -1); + break; + default: + rc = -EOPNOTSUPP; + } + + RETURN (rc); +} + /* type constructor/destructor: mdd_type_init, mdd_type_fini */ LU_TYPE_INIT_FINI(mdd, &mdd_thread_key, &mdd_ucred_key, &mdd_capainfo_key); @@ -998,6 +1269,8 @@ const struct md_device_operations mdd_ops = { .mdo_maxsize_get = mdd_maxsize_get, .mdo_init_capa_ctxt = mdd_init_capa_ctxt, .mdo_update_capa_key= mdd_update_capa_key, + .mdo_llog_ctxt_get = mdd_llog_ctxt_get, + .mdo_iocontrol = mdd_iocontrol, #ifdef HAVE_QUOTA_SUPPORT .mdo_quota = { .mqo_notify = mdd_quota_notify, diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 4083dc4..3a5da50 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -108,6 +108,8 @@ struct mdd_changelog { int mc_mask; __u64 mc_index; __u64 mc_starttime; + spinlock_t mc_user_lock; + int mc_lastuser; }; /** Objects in .lustre dir */ diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index 4cdc048..6e043bf 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -59,17 +59,9 @@ #include "mdd_internal.h" -#ifndef SEEK_CUR /* SLES10 needs this */ -#define SEEK_CUR 1 -#define SEEK_END 2 -#endif - static const char *mdd_counter_names[LPROC_MDD_NR] = { }; -/* from LPROC_SEQ_FOPS(mdd_changelog) below */ -extern struct file_operations mdd_changelog_fops; - int mdd_procfs_init(struct mdd_device *mdd, const char *name) { struct lprocfs_static_vars lvars; @@ -161,10 +153,9 @@ static int lprocfs_rd_atime_diff(char *page, char **start, off_t off, return snprintf(page, count, "%lu\n", mdd->mdd_atime_diff); } -/* match enum changelog_rec_type */ -static const char *changelog_str[] = {"MARK","CREAT","MKDIR","HLINK","SLINK", - "MKNOD","UNLNK","RMDIR","RNMFM","RNMTO","OPEN","CLOSE","IOCTL", - "TRUNC","SATTR","XATTR"}; + +/**** changelogs ****/ +DECLARE_CHANGELOG_NAMES; const char *changelog_bit2str(int bit) { @@ -173,8 +164,8 @@ const char *changelog_bit2str(int bit) return NULL; } -static int lprocfs_rd_cl_mask(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int lprocfs_rd_changelog_mask(char *page, char **start, off_t off, + int count, int *eof, void *data) { struct mdd_device *mdd = data; int i = 0, rc = 0; @@ -189,8 +180,8 @@ static int lprocfs_rd_cl_mask(char *page, char **start, off_t off, return rc; } -static int lprocfs_wr_cl_mask(struct file *file, const char *buffer, - unsigned long count, void *data) +static int lprocfs_wr_changelog_mask(struct file *file, const char *buffer, + unsigned long count, void *data) { struct mdd_device *mdd = data; char *kernbuf; @@ -206,8 +197,8 @@ static int lprocfs_wr_cl_mask(struct file *file, const char *buffer, GOTO(out, rc = -EFAULT); kernbuf[count] = 0; - rc = libcfs_str2mask(kernbuf, changelog_bit2str, - &mdd->mdd_cl.mc_mask, CL_MINMASK, CL_ALLMASK); + rc = libcfs_str2mask(kernbuf, changelog_bit2str, &mdd->mdd_cl.mc_mask, + CHANGELOG_MINMASK, CHANGELOG_ALLMASK); if (rc == 0) rc = count; out: @@ -215,21 +206,70 @@ out: return rc; } -/** struct for holding changelog data for seq_file processing */ -struct cl_seq_iter { - struct mdd_device *csi_mdd; - __u64 csi_startrec; - __u64 csi_endrec; - loff_t csi_pos; - int csi_wrote; - int csi_startcat; - int csi_startidx; - int csi_fill:1; +struct cucb_data { + char *page; + int count; + int idx; }; +static int lprocfs_changelog_users_cb(struct llog_handle *llh, + struct llog_rec_hdr *hdr, void *data) +{ + struct llog_changelog_user_rec *rec; + struct cucb_data *cucb = (struct cucb_data *)data; + + LASSERT(llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN); + + rec = (struct llog_changelog_user_rec *)hdr; + + cucb->idx += snprintf(cucb->page + cucb->idx, cucb->count - cucb->idx, + CHANGELOG_USER_PREFIX"%-3d "LPU64"\n", + rec->cur_id, rec->cur_endrec); + if (cucb->idx >= cucb->count) + return -ENOSPC; + + return 0; +} + +static int lprocfs_rd_changelog_users(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct mdd_device *mdd = data; + struct llog_ctxt *ctxt; + struct cucb_data cucb; + __u64 cur; + + *eof = 1; + + ctxt = llog_get_context(mdd2obd_dev(mdd),LLOG_CHANGELOG_USER_ORIG_CTXT); + if (ctxt == NULL) + return -ENXIO; + LASSERT(ctxt->loc_handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT); + + spin_lock(&mdd->mdd_cl.mc_lock); + cur = mdd->mdd_cl.mc_index; + spin_unlock(&mdd->mdd_cl.mc_lock); + + cucb.count = count; + cucb.page = page; + cucb.idx = 0; + + cucb.idx += snprintf(cucb.page + cucb.idx, cucb.count - cucb.idx, + "current index: "LPU64"\n", cur); + + cucb.idx += snprintf(cucb.page + cucb.idx, cucb.count - cucb.idx, + "%-5s %s\n", "ID", "index"); + + llog_cat_process(ctxt->loc_handle, lprocfs_changelog_users_cb, + &cucb, 0, 0); + + llog_ctxt_put(ctxt); + return cucb.idx; +} + /* non-seq version for direct calling by class_process_proc_param */ -static int lprocfs_wr_cl(struct file *file, const char *buffer, - unsigned long count, void *data) +static int mdd_changelog_write(struct file *file, const char *buffer, + unsigned long count, void *data) { struct mdd_device *mdd = (struct mdd_device *)data; char kernbuf[32]; @@ -271,32 +311,11 @@ static int lprocfs_wr_cl(struct file *file, const char *buffer, spin_unlock(&mdd->mdd_cl.mc_lock); } else { /* purge to an index */ - long long unsigned endrec, cur; - - spin_lock(&mdd->mdd_cl.mc_lock); - cur = (long long)mdd->mdd_cl.mc_index; - spin_unlock(&mdd->mdd_cl.mc_lock); + long long unsigned endrec; - if (strcmp(kernbuf, "0") == 0) - /* purge to "0" is shorthand for everything */ - endrec = cur; - else - endrec = (long long)simple_strtoull(kernbuf, &end, 0); - if ((kernbuf == end) || (endrec == 0)) + endrec = (long long)simple_strtoull(kernbuf, &end, 0); + if (end == kernbuf) goto out_usage; - if (endrec > cur) - endrec = cur; - - /* If purging all records, write a header entry so we - don't have an empty catalog and - we're sure to have a valid starting index next time. In - case of crash, we just restart with old log so we're - allright. */ - if (endrec == cur) { - rc = mdd_changelog_write_header(mdd, CLM_PURGE); - if (rc) - return rc; - } LCONSOLE_INFO("changelog purge to %llu\n", endrec); @@ -312,303 +331,103 @@ out_usage: return -EINVAL; } -static ssize_t mdd_cl_seq_write(struct file *file, const char *buffer, - size_t count, loff_t *off) +static ssize_t mdd_changelog_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) { struct seq_file *seq = file->private_data; - struct cl_seq_iter *csi = seq->private; - struct mdd_device *mdd = csi->csi_mdd; - - return lprocfs_wr_cl(file, buffer, count, mdd); -} - -#define D_CL 0 - -/* How many records per seq_show. Too small, we spawn llog_process threads - too often; too large, we run out of buffer space */ -#define CL_CHUNK_SIZE 100 - -static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, - void *data) -{ - struct seq_file *seq = (struct seq_file *)data; - struct cl_seq_iter *csi = seq->private; - struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; - int rc; - ENTRY; - - if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) || - (rec->cr_type >= CL_LAST)) { - CERROR("Not a changelog rec? %d/%d\n", rec->cr_hdr.lrh_type, - rec->cr_type); - RETURN(-EINVAL); - } - - CDEBUG(D_CL, "rec="LPU64" start="LPU64" cat=%d:%d start=%d:%d\n", - rec->cr_index, csi->csi_startrec, - llh->lgh_hdr->llh_cat_idx, llh->lgh_cur_idx, - csi->csi_startcat, csi->csi_startidx); - - if (rec->cr_index < csi->csi_startrec) - RETURN(0); - if (rec->cr_index == csi->csi_startrec) { - /* Remember where we started, since seq_read will re-read - * the data when it reallocs space. Sigh, if only there was - * a way to tell seq_file how big the buf should be in the - * first place... */ - csi->csi_startcat = llh->lgh_hdr->llh_cat_idx; - csi->csi_startidx = rec->cr_hdr.lrh_index - 1; - } - if (csi->csi_wrote > CL_CHUNK_SIZE) { - /* Stop at some point with a reasonable seq_file buffer size. - * Start from here the next time. - */ - csi->csi_endrec = rec->cr_index - 1; - csi->csi_startcat = llh->lgh_hdr->llh_cat_idx; - csi->csi_startidx = rec->cr_hdr.lrh_index - 1; - csi->csi_wrote = 0; - RETURN(LLOG_PROC_BREAK); - } - - rc = seq_printf(seq, LPU64" %02d%-5s "LPU64" 0x%x t="DFID, - rec->cr_index, rec->cr_type, - changelog_str[rec->cr_type], rec->cr_time, - rec->cr_flags & CLF_FLAGMASK, PFID(&rec->cr_tfid)); - - if (rec->cr_namelen) - /* namespace rec includes parent and filename */ - rc += seq_printf(seq, " p="DFID" %.*s\n", PFID(&rec->cr_pfid), - rec->cr_namelen, rec->cr_name); - else - rc += seq_puts(seq, "\n"); - - if (rc < 0) { - /* seq_read will dump the whole buffer and re-seq_start with a - larger one; no point in continuing the llog_process */ - CDEBUG(D_CL, "rec="LPU64" overflow "LPU64"<-"LPU64"\n", - rec->cr_index, csi->csi_startrec, csi->csi_endrec); - csi->csi_endrec = csi->csi_startrec - 1; - csi->csi_wrote = 0; - RETURN(LLOG_PROC_BREAK); - } - - csi->csi_wrote++; - csi->csi_endrec = rec->cr_index; - - RETURN(0); -} - -static int mdd_cl_seq_show(struct seq_file *seq, void *v) -{ - struct cl_seq_iter *csi = seq->private; - struct obd_device *obd = mdd2obd_dev(csi->csi_mdd); - struct llog_ctxt *ctxt; - int rc; - - if (csi->csi_fill) { - /* seq_read wants more data to fill his buffer. But we already - filled the buf as much as we cared to; force seq_read to - accept that. */ - while ((rc = seq_putc(seq, 0)) == 0); - return 0; - } - - ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); - if (ctxt == NULL) - return -ENOENT; - - /* Since we have to restart the llog_cat_process for each chunk of the - seq_ functions, start from where we left off. */ - rc = llog_cat_process(ctxt->loc_handle, changelog_show_cb, seq, - csi->csi_startcat, csi->csi_startidx); + struct changelog_seq_iter *csi = seq->private; + struct mdd_device *mdd = (struct mdd_device *)csi->csi_dev; - CDEBUG(D_CL, "seq_show "LPU64"-"LPU64" cat=%d:%d wrote=%d rc=%d\n", - csi->csi_startrec, csi->csi_endrec, csi->csi_startcat, - csi->csi_startidx, csi->csi_wrote, rc); - - llog_ctxt_put(ctxt); - - if (rc == LLOG_PROC_BREAK) - rc = 0; - - return rc; + return mdd_changelog_write(file, buffer, count, mdd); } -static int mdd_cl_done(struct cl_seq_iter *csi) +static int mdd_changelog_done(struct changelog_seq_iter *csi) { + struct mdd_device *mdd = (struct mdd_device *)csi->csi_dev; int done = 0; - spin_lock(&csi->csi_mdd->mdd_cl.mc_lock); - done = (csi->csi_endrec >= csi->csi_mdd->mdd_cl.mc_index); - spin_unlock(&csi->csi_mdd->mdd_cl.mc_lock); + + spin_lock(&mdd->mdd_cl.mc_lock); + done = (csi->csi_endrec >= mdd->mdd_cl.mc_index); + spin_unlock(&mdd->mdd_cl.mc_lock); return done; } - -static void *mdd_cl_seq_start(struct seq_file *seq, loff_t *pos) +/* handle nonblocking */ +static ssize_t mdd_changelog_seq_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { - struct cl_seq_iter *csi = seq->private; - LASSERT(csi); - - CDEBUG(D_CL, "start "LPU64"-"LPU64" pos="LPU64"\n", - csi->csi_startrec, csi->csi_endrec, *pos); - - csi->csi_fill = 0; - - if (mdd_cl_done(csi)) - /* no more records, seq_read should return 0 if buffer - is empty */ - return NULL; - - if (*pos > csi->csi_pos) { - /* The seq_read implementation sucks. It may call start - multiple times, using pos to indicate advances, if any, - by arbitrarily increasing it by 1. So ignore the actual - value of pos, and just register any increase as - "seq_read wants the next values". */ - csi->csi_startrec = csi->csi_endrec + 1; - csi->csi_pos = *pos; - } - /* else use old startrec/startidx */ - - return csi; -} + struct seq_file *seq = (struct seq_file *)file->private_data; + struct changelog_seq_iter *csi = seq->private; + int rc; + ENTRY; -static void mdd_cl_seq_stop(struct seq_file *seq, void *v) -{ - struct cl_seq_iter *csi = seq->private; + if ((file->f_flags & O_NONBLOCK) && mdd_changelog_done(csi)) + RETURN(-EAGAIN); - CDEBUG(D_CL, "stop "LPU64"-"LPU64"\n", - csi->csi_startrec, csi->csi_endrec); + csi->csi_done = 0; + rc = seq_read(file, buf, count, ppos); + RETURN(rc); } -static void *mdd_cl_seq_next(struct seq_file *seq, void *v, loff_t *pos) +/* handle nonblocking */ +static unsigned int mdd_changelog_seq_poll(struct file *file, poll_table *wait) { - struct cl_seq_iter *csi = seq->private; - - CDEBUG(D_CL, "next "LPU64"-"LPU64" pos="LPU64"\n", - csi->csi_startrec, csi->csi_endrec, *pos); + struct seq_file *seq = (struct seq_file *)file->private_data; + struct changelog_seq_iter *csi = seq->private; + struct mdd_device *mdd = (struct mdd_device *)csi->csi_dev; + ENTRY; - csi->csi_fill = 1; + csi->csi_done = 0; + poll_wait(file, &mdd->mdd_cl.mc_waitq, wait); + if (!mdd_changelog_done(csi)) + RETURN(POLLIN | POLLRDNORM); - return csi; + RETURN(0); } -struct seq_operations mdd_cl_sops = { - .start = mdd_cl_seq_start, - .stop = mdd_cl_seq_stop, - .next = mdd_cl_seq_next, - .show = mdd_cl_seq_show, -}; - -static int mdd_cl_seq_open(struct inode *inode, struct file *file) +static int mdd_changelog_seq_open(struct inode *inode, struct file *file) { - struct cl_seq_iter *csi; - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *seq; + struct changelog_seq_iter *csi; + struct obd_device *obd; int rc; + ENTRY; - LPROCFS_ENTRY_AND_CHECK(dp); - - rc = seq_open(file, &mdd_cl_sops); + rc = changelog_seq_open(inode, file, &csi); if (rc) - goto out; - - OBD_ALLOC_PTR(csi); - if (csi == NULL) { - rc = -ENOMEM; - goto out; + RETURN(rc); + + /* The proc file is set up with mdd in data, not obd */ + obd = mdd2obd_dev((struct mdd_device *)csi->csi_dev); + csi->csi_ctxt = llog_get_context(obd, LLOG_CHANGELOG_ORIG_CTXT); + if (csi->csi_ctxt == NULL) { + changelog_seq_release(inode, file); + RETURN(-ENOENT); } - csi->csi_mdd = dp->data; - seq = file->private_data; - seq->private = csi; - -out: - if (rc) - LPROCFS_EXIT(); - return rc; + /* The handle is set up in llog_obd_origin_setup */ + csi->csi_llh = csi->csi_ctxt->loc_handle; + RETURN(rc); } -static int mdd_cl_seq_release(struct inode *inode, struct file *file) +static int mdd_changelog_seq_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - struct cl_seq_iter *csi = seq->private; - - OBD_FREE_PTR(csi); - - return lprocfs_seq_release(inode, file); -} - -static loff_t mdd_cl_seq_lseek(struct file *file, loff_t offset, int origin) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct cl_seq_iter *csi = seq->private; - - CDEBUG(D_CL, "seek "LPU64"-"LPU64" off="LPU64":%d fpos="LPU64"\n", - csi->csi_startrec, csi->csi_endrec, offset, origin, file->f_pos); - - LL_SEQ_LOCK(seq); - - switch (origin) { - case SEEK_CUR: - offset += csi->csi_endrec; - break; - case SEEK_END: - spin_lock(&csi->csi_mdd->mdd_cl.mc_lock); - offset += csi->csi_mdd->mdd_cl.mc_index; - spin_unlock(&csi->csi_mdd->mdd_cl.mc_lock); - break; - } - - /* SEEK_SET */ + struct changelog_seq_iter *csi = seq->private; - if (offset < 0) { - LL_SEQ_UNLOCK(seq); - return -EINVAL; - } - - csi->csi_startrec = offset; - csi->csi_endrec = offset ? offset - 1 : 0; - - /* drop whatever is left in sucky seq_read's buffer */ - seq->count = 0; - seq->from = 0; - seq->index++; - LL_SEQ_UNLOCK(seq); - file->f_pos = csi->csi_startrec; - return csi->csi_startrec; -} - -static ssize_t mdd_cl_seq_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct cl_seq_iter *csi = seq->private; - - if ((file->f_flags & O_NONBLOCK) && mdd_cl_done(csi)) - return -EAGAIN; - return seq_read(file, buf, count, ppos); -} + if (csi && csi->csi_ctxt) + llog_ctxt_put(csi->csi_ctxt); -static unsigned int mdd_cl_seq_poll(struct file *file, poll_table *wait) -{ /* based on kmsg_poll */ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct cl_seq_iter *csi = seq->private; - - poll_wait(file, &csi->csi_mdd->mdd_cl.mc_waitq, wait); - if (!mdd_cl_done(csi)) - return POLLIN | POLLRDNORM; - - return 0; + return (changelog_seq_release(inode, file)); } +/* mdd changelog proc can handle nonblocking ops and writing to purge recs */ struct file_operations mdd_changelog_fops = { .owner = THIS_MODULE, - .open = mdd_cl_seq_open, - .read = mdd_cl_seq_read, - .write = mdd_cl_seq_write, - .llseek = mdd_cl_seq_lseek, - .poll = mdd_cl_seq_poll, - .release = mdd_cl_seq_release, + .open = mdd_changelog_seq_open, + .read = mdd_changelog_seq_read, + .write = mdd_changelog_seq_write, + .llseek = changelog_seq_lseek, + .poll = mdd_changelog_seq_poll, + .release = mdd_changelog_seq_release, }; #ifdef HAVE_QUOTA_SUPPORT @@ -629,9 +448,11 @@ static int mdd_lprocfs_quota_wr_type(struct file *file, const char *buffer, #endif static struct lprocfs_vars lprocfs_mdd_obd_vars[] = { - { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, - { "changelog_mask", lprocfs_rd_cl_mask, lprocfs_wr_cl_mask, 0 }, - { "changelog", 0, lprocfs_wr_cl, 0, &mdd_changelog_fops, 0600 }, + { "atime_diff", lprocfs_rd_atime_diff, lprocfs_wr_atime_diff, 0 }, + { "changelog_mask", lprocfs_rd_changelog_mask, + lprocfs_wr_changelog_mask, 0 }, + { "changelog_users", lprocfs_rd_changelog_users, 0, 0}, + { "changelog", 0, mdd_changelog_write, 0, &mdd_changelog_fops, 0600 }, #ifdef HAVE_QUOTA_SUPPORT { "quota_type", mdd_lprocfs_quota_rd_type, mdd_lprocfs_quota_wr_type, 0 }, diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 76cd0cb..990eb99 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -389,6 +389,7 @@ out: /** mdd_path() lookup structure. */ struct path_lookup_info { __u64 pli_recno; /**< history point */ + __u64 pli_currec; /**< current record */ struct lu_fid pli_fid; struct lu_fid pli_fids[MAX_PATH_DEPTH]; /**< path, in fids */ struct mdd_object *pli_mdd_obj; @@ -477,7 +478,12 @@ static int mdd_path_current(const struct lu_env *env, pli->pli_fids[pli->pli_fidcount] = *tmpfid; } - /* Verify that our path hasn't changed since we started the lookup */ + /* Verify that our path hasn't changed since we started the lookup. + Record the current index, and verify the path resolves to the + same fid. If it does, then the path is correct as of this index. */ + spin_lock(&mdd->mdd_cl.mc_lock); + pli->pli_currec = mdd->mdd_cl.mc_index; + spin_unlock(&mdd->mdd_cl.mc_lock); rc = mdd_path2fid(env, mdd, ptr, &pli->pli_fid); if (rc) { CDEBUG(D_INFO, "mdd_path2fid(%s) failed %d\n", ptr, rc); @@ -501,9 +507,15 @@ out: return rc; } +static int mdd_path_historic(const struct lu_env *env, + struct path_lookup_info *pli) +{ + return 0; +} + /* Returns the full path to this fid, as of changelog record recno. */ static int mdd_path(const struct lu_env *env, struct md_object *obj, - char *path, int pathlen, __u64 recno, int *linkno) + char *path, int pathlen, __u64 *recno, int *linkno) { struct path_lookup_info *pli; int tries = 3; @@ -524,7 +536,7 @@ static int mdd_path(const struct lu_env *env, struct md_object *obj, RETURN(-ENOMEM); pli->pli_mdd_obj = md2mdd_obj(obj); - pli->pli_recno = recno; + pli->pli_recno = *recno; pli->pli_path = path; pli->pli_pathlen = pathlen; pli->pli_linkno = *linkno; @@ -533,7 +545,6 @@ static int mdd_path(const struct lu_env *env, struct md_object *obj, while (tries-- && rc == -EAGAIN) rc = mdd_path_current(env, pli); -#if 0 /* We need old path names only for replication */ /* For historical path lookup, the current links may not have existed * at "recno" time. We must switch over to earlier links/parents * by using the changelog records. If the earlier parent doesn't @@ -542,12 +553,13 @@ static int mdd_path(const struct lu_env *env, struct md_object *obj, * We may ignore this problem for the initial implementation and * state that an "original" hardlink must still exist for us to find * historic path name. */ - if (pli->pli_recno != -1) + if (pli->pli_recno != -1) { rc = mdd_path_historic(env, pli); -#endif - - /* return next link index to caller */ - *linkno = pli->pli_linkno; + } else { + *recno = pli->pli_currec; + /* Return next link index to caller */ + *linkno = pli->pli_linkno; + } OBD_FREE_PTR(pli); diff --git a/lustre/mds/mds_log.c b/lustre/mds/mds_log.c index ab925e0..dd7a7ca 100644 --- a/lustre/mds/mds_log.c +++ b/lustre/mds/mds_log.c @@ -178,8 +178,18 @@ int mds_changelog_llog_init(struct obd_device *obd, struct obd_device *tgt) rc = llog_setup_named(obd, &obd->obd_olg, LLOG_CHANGELOG_ORIG_CTXT, tgt, 1, NULL, CHANGELOG_CATALOG, &changelog_orig_logops); - if (rc) + if (rc) { CERROR("changelog llog setup failed %d\n", rc); + RETURN(rc); + } + + rc = llog_setup_named(obd, &obd->obd_olg, LLOG_CHANGELOG_USER_ORIG_CTXT, + tgt, 1, NULL, CHANGELOG_USERS, + &changelog_orig_logops); + if (rc) { + CERROR("changelog users llog setup failed %d\n", rc); + RETURN(rc); + } RETURN(rc); } @@ -245,5 +255,11 @@ int mds_llog_finish(struct obd_device *obd, int count) if (!rc) rc = rc2; + ctxt = llog_get_context(obd, LLOG_CHANGELOG_USER_ORIG_CTXT); + if (ctxt) + rc2 = llog_cleanup(ctxt); + if (!rc) + rc = rc2; + RETURN(rc); } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index d8fc970..7296d72 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1068,12 +1068,15 @@ static int lu_device_is_mdt(struct lu_device *d) return ergo(d != NULL && d->ld_ops != NULL, d->ld_ops == &mdt_lu_ops); } +static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, + void *karg, void *uarg); + static int mdt_set_info(struct mdt_thread_info *info) { struct ptlrpc_request *req = mdt_info_req(info); char *key; - __u32 *val; - int keylen, rc = 0; + void *val; + int keylen, vallen, rc = 0; ENTRY; rc = req_capsule_server_pack(info->mti_pill); @@ -1095,19 +1098,35 @@ static int mdt_set_info(struct mdt_thread_info *info) RETURN(-EFAULT); } - if (!KEY_IS(KEY_READ_ONLY)) - RETURN(-EINVAL); + vallen = req_capsule_get_size(info->mti_pill, &RMF_SETINFO_VAL, + RCL_CLIENT); - req->rq_status = 0; - lustre_msg_set_status(req->rq_repmsg, 0); + if (KEY_IS(KEY_READ_ONLY)) { + req->rq_status = 0; + lustre_msg_set_status(req->rq_repmsg, 0); - spin_lock(&req->rq_export->exp_lock); - if (*val) - req->rq_export->exp_connect_flags |= OBD_CONNECT_RDONLY; - else - req->rq_export->exp_connect_flags &= ~OBD_CONNECT_RDONLY; - spin_unlock(&req->rq_export->exp_lock); + spin_lock(&req->rq_export->exp_lock); + if (*(__u32 *)val) + req->rq_export->exp_connect_flags |= OBD_CONNECT_RDONLY; + else + req->rq_export->exp_connect_flags &=~OBD_CONNECT_RDONLY; + spin_unlock(&req->rq_export->exp_lock); + + } else if (KEY_IS(KEY_CHANGELOG_CLEAR)) { + if (lustre_msg_swabbed(req->rq_reqmsg)) { + struct changelog_setinfo *cs = + (struct changelog_setinfo *)val; + __swab64s(&cs->cs_recno); + __swab32s(&cs->cs_id); + } + + rc = mdt_iocontrol(OBD_IOC_CHANGELOG_CLEAR, info->mti_exp, + vallen, val, NULL); + lustre_msg_set_status(req->rq_repmsg, rc); + } else { + RETURN(-EINVAL); + } RETURN(0); } @@ -1784,6 +1803,7 @@ static int mdt_quotactl_handle(struct mdt_thread_info *info) } #endif + /* * OBD PING and other handlers. */ @@ -1812,6 +1832,101 @@ static int mdt_obd_qc_callback(struct mdt_thread_info *info) /* + * LLOG handlers. + */ + +/** clone llog ctxt from child (mdd) + * This allows remote llog (replicator) access. + * We can either pass all llog RPCs (eg mdt_llog_create) on to child where the + * context was originally set up, or we can handle them directly. + * I choose the latter, but that means I need any llog + * contexts set up by child to be accessable by the mdt. So we clone the + * context into our context list here. + */ +static int mdt_llog_ctxt_clone(const struct lu_env *env, struct mdt_device *mdt, + int idx) +{ + struct md_device *next = mdt->mdt_child; + struct llog_ctxt *ctxt; + int rc; + + if (!llog_ctxt_null(mdt2obd_dev(mdt), idx)) + return 0; + + rc = next->md_ops->mdo_llog_ctxt_get(env, next, idx, (void **)&ctxt); + if (rc || ctxt == NULL) { + CERROR("Can't get mdd ctxt %d\n", rc); + return rc; + } + + rc = llog_group_set_ctxt(&mdt2obd_dev(mdt)->obd_olg, ctxt, idx); + if (rc) + CERROR("Can't set mdt ctxt %d\n", rc); + + return rc; +} + +static int mdt_llog_ctxt_unclone(const struct lu_env *env, + struct mdt_device *mdt, int idx) +{ + struct llog_ctxt *ctxt; + + ctxt = llog_get_context(mdt2obd_dev(mdt), idx); + if (ctxt == NULL) + return 0; + /* Put once for the get we just did, and once for the clone */ + llog_ctxt_put(ctxt); + llog_ctxt_put(ctxt); + return 0; +} + +static int mdt_llog_create(struct mdt_thread_info *info) +{ + int rc; + + req_capsule_set(info->mti_pill, &RQF_LLOG_ORIGIN_HANDLE_CREATE); + rc = llog_origin_handle_create(mdt_info_req(info)); + return (rc < 0 ? err_serious(rc) : rc); +} + +static int mdt_llog_destroy(struct mdt_thread_info *info) +{ + int rc; + + req_capsule_set(info->mti_pill, &RQF_LLOG_ORIGIN_HANDLE_DESTROY); + rc = llog_origin_handle_destroy(mdt_info_req(info)); + return (rc < 0 ? err_serious(rc) : rc); +} + +static int mdt_llog_read_header(struct mdt_thread_info *info) +{ + int rc; + + req_capsule_set(info->mti_pill, &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER); + rc = llog_origin_handle_read_header(mdt_info_req(info)); + return (rc < 0 ? err_serious(rc) : rc); +} + +static int mdt_llog_next_block(struct mdt_thread_info *info) +{ + int rc; + + req_capsule_set(info->mti_pill, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK); + rc = llog_origin_handle_next_block(mdt_info_req(info)); + return (rc < 0 ? err_serious(rc) : rc); +} + +static int mdt_llog_prev_block(struct mdt_thread_info *info) +{ + int rc; + + req_capsule_set(info->mti_pill, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK); + rc = llog_origin_handle_prev_block(mdt_info_req(info)); + return (rc < 0 ? err_serious(rc) : rc); +} + + +/* * DLM handlers. */ static struct ldlm_callback_suite cbs = { @@ -2231,7 +2346,9 @@ static struct mdt_handler *mdt_handler_find(__u32 opc, if (s->mos_opc_start <= opc && opc < s->mos_opc_end) { h = s->mos_hs + (opc - s->mos_opc_start); if (likely(h->mh_opc != 0)) - LASSERT(h->mh_opc == opc); + LASSERTF(h->mh_opc == opc, + "opcode mismatch %d != %d\n", + h->mh_opc, opc); else h = NULL; /* unsupported opc */ break; @@ -2335,6 +2452,7 @@ static int mdt_unpack_req_pack_rep(struct mdt_thread_info *info, __u32 flags) struct mdt_device *mdt = info->mti_mdt; /* Pack reply. */ + if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) req_capsule_set_size(pill, &RMF_MDT_MD, RCL_SERVER, mdt->mdt_max_mdsize); @@ -2794,7 +2912,8 @@ static int mdt_handle0(struct ptlrpc_request *req, if (likely(h != NULL)) { rc = mdt_req_handle(info, h, req); } else { - CERROR("The unsupported opc: 0x%x\n", lustre_msg_get_opc(msg) ); + CERROR("The unsupported opc: 0x%x\n", + lustre_msg_get_opc(msg) ); req->rq_status = -ENOTSUPP; rc = ptlrpc_error(req); RETURN(rc); @@ -4209,6 +4328,7 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m) target_recovery_fini(obd); mdt_stop_ptlrpc_service(m); + mdt_llog_ctxt_unclone(env, m, LLOG_CHANGELOG_ORIG_CTXT); mdt_obd_llog_cleanup(obd); obd_zombie_barrier(); #ifdef HAVE_QUOTA_SUPPORT @@ -4249,8 +4369,8 @@ static void mdt_fini(const struct lu_env *env, struct mdt_device *m) cfs_timer_disarm(&m->mdt_ck_timer); mdt_ck_thread_stop(m); - /* - * Finish the stack + /* + * Finish the stack */ mdt_stack_fini(env, m, md2lu_dev(m->mdt_child)); @@ -4530,6 +4650,10 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, if (rc) GOTO(err_fs_cleanup, rc); + rc = mdt_llog_ctxt_clone(env, m, LLOG_CHANGELOG_ORIG_CTXT); + if (rc) + GOTO(err_llog_cleanup, rc); + mdt_adapt_sptlrpc_conf(obd, 1); #ifdef HAVE_QUOTA_SUPPORT @@ -4571,8 +4695,9 @@ err_recovery: target_recovery_fini(obd); #ifdef HAVE_QUOTA_SUPPORT next->md_ops->mdo_quota.mqo_cleanup(env, next); -err_llog_cleanup: #endif +err_llog_cleanup: + mdt_llog_ctxt_unclone(env, m, LLOG_CHANGELOG_ORIG_CTXT); mdt_obd_llog_cleanup(obd); err_fs_cleanup: mdt_fs_cleanup(env, m); @@ -5237,7 +5362,7 @@ static int mdt_ioc_fid2path(struct lu_env *env, struct mdt_device *mdt, GOTO(out_free, rc); } - rc = mo_path(env, md_object_next(&obj->mot_obj), path, pathlen, recno, + rc = mo_path(env, md_object_next(&obj->mot_obj), path, pathlen, &recno, &linkno); mdt_object_put(env, obj); if (rc) @@ -5246,6 +5371,7 @@ static int mdt_ioc_fid2path(struct lu_env *env, struct mdt_device *mdt, if (copy_to_user(data->ioc_pbuf1, path, pathlen)) rc = -EFAULT; + memcpy(data->ioc_inlbuf2, &recno, sizeof(recno)); memcpy(data->ioc_inlbuf3, &linkno, sizeof(linkno)); EXIT; @@ -5257,6 +5383,31 @@ out_context: return rc; } +/* Pass the ioc down */ +static int mdt_ioc_child(struct lu_env *env, struct mdt_device *mdt, + unsigned int cmd, int len, void *data) +{ + struct lu_context ioctl_session; + struct md_device *next = mdt->mdt_child; + int rc; + ENTRY; + + rc = lu_context_init(&ioctl_session, LCT_SESSION); + if (rc) + RETURN(rc); + ioctl_session.lc_thread = (struct ptlrpc_thread *)cfs_current(); + lu_context_enter(&ioctl_session); + env->le_ses = &ioctl_session; + + LASSERT(next->md_ops->mdo_iocontrol); + rc = next->md_ops->mdo_iocontrol(env, next, cmd, len, data); + + lu_context_exit(&ioctl_session); + lu_context_fini(&ioctl_session); + RETURN(rc); +} + +/* ioctls on obd dev */ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void *uarg) { @@ -5287,6 +5438,11 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, case OBD_IOC_FID2PATH: rc = mdt_ioc_fid2path(&env, mdt, karg); break; + case OBD_IOC_CHANGELOG_REG: + case OBD_IOC_CHANGELOG_DEREG: + case OBD_IOC_CHANGELOG_CLEAR: + rc = mdt_ioc_child(&env, mdt, cmd, len, karg); + break; default: CERROR("Not supported cmd = %d for device %s\n", cmd, obd->obd_name); @@ -5574,7 +5730,19 @@ static struct mdt_handler mdt_dlm_ops[] = { DEF_DLM_HNDL_0(0, CP_CALLBACK, mdt_cp_callback) }; +#define DEF_LLOG_HNDL(flags, name, fn) \ + DEF_HNDL(LLOG, ORIGIN_HANDLE_CREATE, _NET, flags, name, fn, NULL) + static struct mdt_handler mdt_llog_ops[] = { + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_CREATE, mdt_llog_create), + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_NEXT_BLOCK, mdt_llog_next_block), + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_READ_HEADER, mdt_llog_read_header), + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_WRITE_REC, NULL), + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_CLOSE, NULL), + DEF_LLOG_HNDL(0, ORIGIN_CONNECT, NULL), + DEF_LLOG_HNDL(0, CATINFO, NULL), + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_PREV_BLOCK, mdt_llog_prev_block), + DEF_LLOG_HNDL(0, ORIGIN_HANDLE_DESTROY, mdt_llog_destroy), }; #define DEF_SEC_CTX_HNDL(name, fn) \ diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index de0a7bf..5d77e35 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -515,25 +515,20 @@ static int lprocfs_wr_root_squash(struct file *file, const char *buffer, { struct obd_device *obd = data; struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); - char kernbuf[50], *tmp, *end; + int rc; + char kernbuf[50], *tmp, *end, *errmsg; unsigned long uid, gid; int nouid, nogid; ENTRY; - if (count > (sizeof(kernbuf) - 1) || - copy_from_user(kernbuf, buffer, count)) { - CWARN("%s: can't copy string to kernel space, " - "uid:gid is expected, " - "continue with %u:%u, " - "there will be 0:0 on MDS restart\n", - obd->obd_name, mdt->mdt_squash_uid, - mdt->mdt_squash_gid); - RETURN(count); + if (count >= sizeof(kernbuf)) { + errmsg = "string too long"; + GOTO(failed, rc = -EINVAL); + } + if (copy_from_user(kernbuf, buffer, count)) { + errmsg = "bad address"; + GOTO(failed, rc = -EFAULT); } - - if (copy_from_user(kernbuf, buffer, count)) - RETURN(-EFAULT); - kernbuf[count] = '\0'; nouid = nogid = 0; @@ -557,19 +552,20 @@ static int lprocfs_wr_root_squash(struct file *file, const char *buffer, mdt->mdt_squash_uid = uid; mdt->mdt_squash_gid = gid; - if (nouid || nogid) - CWARN("%s: can't parse \"\%s\", uid:gid is expected, " - "continue with %u:%u, " - "there will be %u:%u on MDS restart\n", + if (nouid && nogid) { + errmsg = "needs uid:gid format"; + GOTO(failed, rc = -EINVAL); + } + + LCONSOLE_INFO("%s: root_squash is set to %u:%u\n", obd->obd_name, - buffer, mdt->mdt_squash_uid, mdt->mdt_squash_gid, - nouid ? 0 : mdt->mdt_squash_uid, - nogid ? 0 : mdt->mdt_squash_gid); - else - LCONSOLE_INFO("%s: root_squash is set to %u:%u\n", - obd->obd_name, - mdt->mdt_squash_uid, mdt->mdt_squash_gid); + mdt->mdt_squash_uid, mdt->mdt_squash_gid); RETURN(count); + + failed: + CWARN("%s: failed to set root_squash to \"%s\", %s: rc %d\n", + obd->obd_name, buffer, errmsg, rc); + RETURN(rc); } static int lprocfs_rd_nosquash_nids(char *page, char **start, off_t off, @@ -589,23 +585,22 @@ static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer, struct obd_device *obd = data; struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); int rc; - char *new; + char *kernbuf, *errmsg; struct list_head tmp; ENTRY; - /* copy to kernel space */ - OBD_ALLOC(new, count + 1); - if (new == 0) + OBD_ALLOC(kernbuf, count + 1); + if (kernbuf == NULL) { + errmsg = "no memory"; GOTO(failed, rc = -ENOMEM); - - if (copy_from_user(new, buffer, count)) + } + if (copy_from_user(kernbuf, buffer, count)) { + errmsg = "bad address"; GOTO(failed, rc = -EFAULT); - - new[count] = 0; - if (strlen(new) != count) - GOTO(failed, rc = -EINVAL); - - if (!strcmp(new, "NONE") || !strcmp(new, "clear")) { + } + kernbuf[count] = '\0'; + + if (!strcmp(kernbuf, "NONE") || !strcmp(kernbuf, "clear")) { /* empty string is special case */ down_write(&mdt->mdt_squash_sem); if (!list_empty(&mdt->mdt_nosquash_nids)) { @@ -618,35 +613,36 @@ static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer, up_write(&mdt->mdt_squash_sem); LCONSOLE_INFO("%s: nosquash_nids is cleared\n", obd->obd_name); - OBD_FREE(new, count + 1); - RETURN(0); + OBD_FREE(kernbuf, count + 1); + RETURN(count); } CFS_INIT_LIST_HEAD(&tmp); - if (cfs_parse_nidlist(new, count, &tmp) <= 0) + if (cfs_parse_nidlist(kernbuf, count, &tmp) <= 0) { + errmsg = "can't parse"; GOTO(failed, rc = -EINVAL); + } down_write(&mdt->mdt_squash_sem); if (!list_empty(&mdt->mdt_nosquash_nids)) { cfs_free_nidlist(&mdt->mdt_nosquash_nids); OBD_FREE(mdt->mdt_nosquash_str, mdt->mdt_nosquash_strlen); } - mdt->mdt_nosquash_str = new; + mdt->mdt_nosquash_str = kernbuf; mdt->mdt_nosquash_strlen = count + 1; list_splice(&tmp, &mdt->mdt_nosquash_nids); - LCONSOLE_INFO("%s: nosquash_nids is set to %s\n", obd->obd_name, new); + LCONSOLE_INFO("%s: nosquash_nids is set to %s\n", + obd->obd_name, kernbuf); up_write(&mdt->mdt_squash_sem); RETURN(count); failed: - CWARN("%s: failed to set nosquash_nids (rc %d), " - "on MDS restart we will try to set it again, " - "continue with current nosquash_nids\n", - obd->obd_name, rc); - if (new) - OBD_FREE(new, count + 1); - RETURN(count); + CWARN("%s: failed to set nosquash_nids to \"%s\", %s: rc %d\n", + obd->obd_name, kernbuf, errmsg, rc); + if (kernbuf) + OBD_FREE(kernbuf, count + 1); + RETURN(rc); } static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { diff --git a/lustre/obdclass/llog_swab.c b/lustre/obdclass/llog_swab.c index 4ccb6ba..a6a430d 100644 --- a/lustre/obdclass/llog_swab.c +++ b/lustre/obdclass/llog_swab.c @@ -166,6 +166,15 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec, struct llog_rec_tail *tail) break; } + case CHANGELOG_USER_REC: { + struct llog_changelog_user_rec *cur = + (struct llog_changelog_user_rec*)rec; + + __swab32s(&cur->cur_id); + __swab64s(&cur->cur_endrec); + break; + } + case MDS_SETATTR64_REC: { struct llog_setattr64_rec *lsr = (struct llog_setattr64_rec *)rec; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 043ba88..5c71a1f 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -50,6 +50,8 @@ #include #include #include +#include +#include #if defined(LPROCFS) @@ -2127,6 +2129,263 @@ int lprocfs_obd_wr_recovery_maxtime(struct file *file, const char *buffer, } EXPORT_SYMBOL(lprocfs_obd_wr_recovery_maxtime); + +/**** Changelogs *****/ +#define D_CHANGELOG 0 + +DECLARE_CHANGELOG_NAMES; + +/* How many records per seq_show. Too small, we spawn llog_process threads + too often; too large, we run out of buffer space */ +#define CHANGELOG_CHUNK_SIZE 100 + +static int changelog_show_cb(struct llog_handle *llh, struct llog_rec_hdr *hdr, + void *data) +{ + struct seq_file *seq = (struct seq_file *)data; + struct changelog_seq_iter *csi = seq->private; + struct llog_changelog_rec *rec = (struct llog_changelog_rec *)hdr; + int rc; + ENTRY; + + if ((rec->cr_hdr.lrh_type != CHANGELOG_REC) || + (rec->cr_type >= CL_LAST)) { + CERROR("Not a changelog rec %d/%d\n", rec->cr_hdr.lrh_type, + rec->cr_type); + RETURN(-EINVAL); + } + + CDEBUG(D_CHANGELOG, "rec="LPU64" start="LPU64" cat=%d:%d start=%d:%d\n", + rec->cr_index, csi->csi_startrec, + llh->lgh_hdr->llh_cat_idx, llh->lgh_cur_idx, + csi->csi_startcat, csi->csi_startidx); + + if (rec->cr_index < csi->csi_startrec) + /* Skip entries earlier than what we are interested in */ + RETURN(0); + if (rec->cr_index == csi->csi_startrec) { + /* Remember where we started, since seq_read will re-read + * the data when it reallocs space. Sigh, if only there was + * a way to tell seq_file how big the buf should be in the + * first place... + */ + csi->csi_startcat = llh->lgh_hdr->llh_cat_idx; + csi->csi_startidx = rec->cr_hdr.lrh_index - 1; + } + if (csi->csi_wrote > CHANGELOG_CHUNK_SIZE) { + /* Stop at some point with a reasonable seq_file buffer size. + * Start from here the next time. + */ + csi->csi_endrec = rec->cr_index - 1; + csi->csi_startcat = llh->lgh_hdr->llh_cat_idx; + csi->csi_startidx = rec->cr_hdr.lrh_index - 1; + csi->csi_wrote = 0; + RETURN(LLOG_PROC_BREAK); + } + + rc = seq_printf(seq, LPU64" %02d%-5s "LPU64" 0x%x t="DFID, + rec->cr_index, rec->cr_type, + changelog_str[rec->cr_type], rec->cr_time, + rec->cr_flags & CLF_FLAGMASK, PFID(&rec->cr_tfid)); + + if (rec->cr_namelen) + /* namespace rec includes parent and filename */ + rc += seq_printf(seq, " p="DFID" %.*s\n", PFID(&rec->cr_pfid), + rec->cr_namelen, rec->cr_name); + else + rc += seq_puts(seq, "\n"); + + if (rc < 0) { + /* Ran out of room in the seq buffer. seq_read will dump + * the whole buffer and re-seq_start with a larger one; + * no point in continuing the llog_process */ + CDEBUG(D_CHANGELOG, "rec="LPU64" overflow "LPU64"<-"LPU64"\n", + rec->cr_index, csi->csi_startrec, csi->csi_endrec); + csi->csi_endrec = csi->csi_startrec - 1; + csi->csi_wrote = 0; + RETURN(LLOG_PROC_BREAK); + } + + csi->csi_wrote++; + csi->csi_endrec = rec->cr_index; + + RETURN(0); +} + +static int changelog_seq_show(struct seq_file *seq, void *v) +{ + struct changelog_seq_iter *csi = seq->private; + int rc; + ENTRY; + + if (csi->csi_fill) { + /* seq_read wants more data to fill his buffer. But we already + filled the buf as much as we cared to; force seq_read to + accept that by padding with 0's */ + while (seq_putc(seq, 0) == 0); + RETURN(0); + } + + /* Since we have to restart the llog_cat_process for each chunk of the + seq_ functions, start from where we left off. */ + rc = llog_cat_process(csi->csi_llh, changelog_show_cb, seq, + csi->csi_startcat, csi->csi_startidx); + + CDEBUG(D_CHANGELOG,"seq_show "LPU64"-"LPU64" cat=%d:%d wrote=%d rc=%d\n", + csi->csi_startrec, csi->csi_endrec, csi->csi_startcat, + csi->csi_startidx, csi->csi_wrote, rc); + + if (rc == 0) + csi->csi_done = 1; + if (rc == LLOG_PROC_BREAK) + /* more records left, but seq_show must return 0 */ + rc = 0; + RETURN(rc); +} + +static void *changelog_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct changelog_seq_iter *csi = seq->private; + LASSERT(csi); + + CDEBUG(D_CHANGELOG, "start "LPU64"-"LPU64" pos="LPU64"\n", + csi->csi_startrec, csi->csi_endrec, *pos); + + csi->csi_fill = 0; + + if (csi->csi_done) + /* no more records, seq_read should return 0 if buffer + is empty */ + return NULL; + + if (*pos > csi->csi_pos) { + /* The seq_read implementation sucks. It may call start + multiple times, using pos to indicate advances, if any, + by arbitrarily increasing it by 1. So ignore the actual + value of pos, and just register any increase as + "seq_read wants the next values". */ + csi->csi_startrec = csi->csi_endrec + 1; + csi->csi_pos = *pos; + } + /* else use old startrec/startidx */ + + return csi; +} + +static void changelog_seq_stop(struct seq_file *seq, void *v) +{ + struct changelog_seq_iter *csi = seq->private; + + CDEBUG(D_CHANGELOG, "stop "LPU64"-"LPU64"\n", + csi->csi_startrec, csi->csi_endrec); +} + +static void *changelog_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct changelog_seq_iter *csi = seq->private; + + CDEBUG(D_CHANGELOG, "next "LPU64"-"LPU64" pos="LPU64"\n", + csi->csi_startrec, csi->csi_endrec, *pos); + + csi->csi_fill = 1; + + return csi; +} + +static struct seq_operations changelog_sops = { + .start = changelog_seq_start, + .stop = changelog_seq_stop, + .next = changelog_seq_next, + .show = changelog_seq_show, +}; + +int changelog_seq_open(struct inode *inode, struct file *file, + struct changelog_seq_iter **csih) +{ + struct changelog_seq_iter *csi; + struct proc_dir_entry *dp = PDE(inode); + struct seq_file *seq; + int rc; + + LPROCFS_ENTRY_AND_CHECK(dp); + + rc = seq_open(file, &changelog_sops); + if (rc) { + LPROCFS_EXIT(); + return rc; + } + + OBD_ALLOC_PTR(csi); + if (csi == NULL) { + lprocfs_seq_release(inode, file); + return -ENOMEM; + } + + csi->csi_dev = dp->data; + seq = file->private_data; + seq->private = csi; + *csih = csi; + + return rc; +} +EXPORT_SYMBOL(changelog_seq_open); + +int changelog_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct changelog_seq_iter *csi = seq->private; + + if (csi) + OBD_FREE_PTR(csi); + + return lprocfs_seq_release(inode, file); +} +EXPORT_SYMBOL(changelog_seq_release); + +#ifndef SEEK_CUR /* SLES10 needs this */ +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +loff_t changelog_seq_lseek(struct file *file, loff_t offset, int origin) +{ + struct seq_file *seq = (struct seq_file *)file->private_data; + struct changelog_seq_iter *csi = seq->private; + + CDEBUG(D_CHANGELOG,"seek "LPU64"-"LPU64" off="LPU64":%d fpos="LPU64"\n", + csi->csi_startrec, csi->csi_endrec, offset, origin, file->f_pos); + + LL_SEQ_LOCK(seq); + + switch (origin) { + case SEEK_CUR: + offset += csi->csi_endrec; + break; + case SEEK_END: + /* we don't know the last rec */ + offset = -1; + } + + /* SEEK_SET */ + + if (offset < 0) { + LL_SEQ_UNLOCK(seq); + return -EINVAL; + } + + csi->csi_startrec = offset; + csi->csi_endrec = offset ? offset - 1 : 0; + + /* drop whatever is left in sucky seq_read's buffer */ + seq->count = 0; + seq->from = 0; + seq->index++; + LL_SEQ_UNLOCK(seq); + file->f_pos = csi->csi_startrec; + return csi->csi_startrec; +} +EXPORT_SYMBOL(changelog_seq_lseek); + EXPORT_SYMBOL(lprocfs_register); EXPORT_SYMBOL(lprocfs_srch); EXPORT_SYMBOL(lprocfs_remove); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 7783a3a..5b0e0e0 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -998,9 +998,6 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, vallen, data); set_fs(oldfs); } - if (rc < 0) - CERROR("writing proc entry %s err %d\n", - var->name, rc); break; } j++; @@ -1014,6 +1011,10 @@ int class_process_proc_param(char *prefix, struct lprocfs_vars *lvars, (char *)lustre_cfg_string(lcfg, 0), key); /* rc = -EINVAL; continue parsing other params */ skip++; + } else if (rc < 0) { + CERROR("writing proc entry %s err %d\n", + var->name, rc); + rc = 0; } else { LCONSOLE_INFO("%s.%.*s: set parameter %.*s=%s\n", lustre_cfg_string(lcfg, 0), diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 0359876..91f546a 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -1723,8 +1723,8 @@ static void server_wait_finished(struct vfsmount *mnt) init_waitqueue_head(&waitq); while ((atomic_read(&mnt->mnt_count) > 1) && (retries > 0)) { - LCONSOLE_WARN("Mount still busy with %d refs, waiting for " - "%d secs...\n", + LCONSOLE_WARN("%s: Mount still busy with %d refs, waiting for " + "%d secs...\n", mnt->mnt_devname, atomic_read(&mnt->mnt_count), retries); /* Wait for a bit */ @@ -1733,8 +1733,8 @@ static void server_wait_finished(struct vfsmount *mnt) l_wait_event(waitq, 0, &lwi); } if (atomic_read(&mnt->mnt_count) > 1) { - CERROR("Mount %p is still busy (%d refs), giving up.\n", - mnt, atomic_read(&mnt->mnt_count)); + CERROR("%s: Mount still busy (%d refs), giving up.\n", + mnt->mnt_devname, atomic_read(&mnt->mnt_count)); } } diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index d6782e2..fdccecc 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -542,7 +542,7 @@ struct filter_mod_data *filter_fmd_get(struct obd_export *exp, struct filter_export_data *fed = &exp->exp_filter_data; struct filter_mod_data *found = NULL, *fmd_new = NULL; - OBD_SLAB_ALLOC(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO, sizeof(*fmd_new)); + OBD_SLAB_ALLOC_PTR_GFP(fmd_new, ll_fmd_cachep, CFS_ALLOC_IO); spin_lock(&fed->fed_lock); found = filter_fmd_find_nolock(&exp->exp_obd->u.filter,fed,objid,group); @@ -2676,6 +2676,7 @@ static int filter_connect_internal(struct obd_export *exp, RETURN(-EPROTO); if (exp->exp_connect_flags & OBD_CONNECT_GRANT) { + struct filter_obd *filter = &exp->exp_obd->u.filter; obd_size left, want; spin_lock(&exp->exp_obd->obd_osfs_lock); @@ -2689,6 +2690,8 @@ static int filter_connect_internal(struct obd_export *exp, LPU64" left: "LPU64"\n", exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, data->ocd_grant, want, left); + + filter->fo_tot_granted_clients ++; } if (data->ocd_connect_flags & OBD_CONNECT_INDEX) { @@ -2986,6 +2989,12 @@ static int filter_destroy_export(struct obd_export *exp) filter_grant_discard(exp); filter_fmd_cleanup(exp); + if (exp->exp_connect_flags & OBD_CONNECT_GRANT_SHRINK) { + struct filter_obd *filter = &exp->exp_obd->u.filter; + if (filter->fo_tot_granted_clients > 0) + filter->fo_tot_granted_clients --; + } + if (!(exp->exp_flags & OBD_OPT_FORCE)) filter_grant_sanity_check(exp->exp_obd, __FUNCTION__); @@ -4281,14 +4290,10 @@ static int filter_get_info(struct obd_export *exp, __u32 keylen, push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); rc = fsfilt_iocontrol(obd, dentry->d_inode, NULL, EXT3_IOC_FIEMAP, (long)fiemap); - if (rc) { - f_dput(dentry); - RETURN(rc); - } pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); f_dput(dentry); - RETURN(0); + RETURN(rc); } CDEBUG(D_IOCTL, "invalid key\n"); @@ -4348,6 +4353,15 @@ static int filter_set_info_async(struct obd_export *exp, __u32 keylen, RETURN(0); } + if (KEY_IS(KEY_GRANT_SHRINK)) { + struct ost_body *body = (struct ost_body *)val; + /* handle shrink grant */ + spin_lock(&exp->exp_obd->obd_osfs_lock); + filter_grant_incoming(exp, &body->oa); + spin_unlock(&exp->exp_obd->obd_osfs_lock); + RETURN(rc); + } + if (!KEY_IS(KEY_MDS_CONN)) RETURN(-EINVAL); diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index 28578d6..9cb6de03 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -59,6 +59,7 @@ OBD_INCOMPAT_COMMON_LR) #define FILTER_GRANT_CHUNK (2ULL * PTLRPC_MAX_BRW_SIZE) +#define FILTER_GRANT_SHRINK_LIMIT (16ULL * FILTER_GRANT_CHUNK) #define GRANT_FOR_LLOG(obd) 16 extern struct file_operations filter_per_export_stats_fops; @@ -188,6 +189,7 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, obd_size want, obd_size fs_space_left); void filter_grant_commit(struct obd_export *exp, int niocount, struct niobuf_local *res); +void filter_grant_incoming(struct obd_export *exp, struct obdo *oa); struct filter_iobuf *filter_alloc_iobuf(struct filter_obd *, int rw, int num_pages); void filter_free_iobuf(struct filter_iobuf *iobuf); diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 74d64ef..fa9a96f1 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -59,7 +59,7 @@ int *obdfilter_created_scratchpad; /* Grab the dirty and seen grant announcements from the incoming obdo. * We will later calculate the clients new grant and return it. * Caller must hold osfs lock */ -static void filter_grant_incoming(struct obd_export *exp, struct obdo *oa) +void filter_grant_incoming(struct obd_export *exp, struct obdo *oa) { struct filter_export_data *fed; struct obd_device *obd = exp->exp_obd; @@ -108,6 +108,26 @@ static void filter_grant_incoming(struct obd_export *exp, struct obdo *oa) obd->u.filter.fo_tot_granted -= oa->o_dropped; fed->fed_grant -= oa->o_dropped; fed->fed_dirty = oa->o_dirty; + + if (oa->o_flags & OBD_FL_SHRINK_GRANT) { + obd_size left_space = filter_grant_space_left(exp); + struct filter_obd *filter = &exp->exp_obd->u.filter; + + /*Only if left_space < fo_tot_clients * 32M, + *then the grant space could be shrinked */ + if (left_space < filter->fo_tot_granted_clients * + FILTER_GRANT_SHRINK_LIMIT) { + fed->fed_grant -= oa->o_grant; + filter->fo_tot_granted -= oa->o_grant; + CDEBUG(D_CACHE, "%s: cli %s/%p shrink "LPU64 + "fed_grant %ld total "LPU64"\n", + obd->obd_name, exp->exp_client_uuid.uuid, + exp, oa->o_grant, fed->fed_grant, + filter->fo_tot_granted); + oa->o_grant = 0; + } + } + if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) { CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, @@ -373,7 +393,8 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, spin_lock(&obd->obd_osfs_lock); filter_grant_incoming(exp, oa); - oa->o_grant = 0; + if (!(oa->o_flags & OBD_FL_SHRINK_GRANT)) + oa->o_grant = 0; spin_unlock(&obd->obd_osfs_lock); } diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c index 2462a07..d1f4c18 100644 --- a/lustre/obdfilter/filter_log.c +++ b/lustre/obdfilter/filter_log.c @@ -66,7 +66,7 @@ int filter_log_sz_change(struct llog_handle *cathandle, ENTRY; LOCK_INODE_MUTEX(inode); - ofd = inode->i_filterdata; + ofd = INODE_PRIVATE_DATA(inode); if (ofd && ofd->ofd_epoch >= ioepoch) { if (ofd->ofd_epoch > ioepoch) @@ -83,7 +83,7 @@ int filter_log_sz_change(struct llog_handle *cathandle, if (!ofd) GOTO(out, rc = -ENOMEM); igrab(inode); - inode->i_filterdata = ofd; + INODE_PRIVATE_DATA(inode) = ofd; ofd->ofd_epoch = ioepoch; } /* the decision to write a record is now made, unlock */ diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 5745278..ca9f7d7 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -75,6 +75,10 @@ static int osc_page_is_dlocked(const struct lu_env *env, dlmmode, &flags, NULL, lockh, unref); } +/** + * Checks an invariant that a page in the cache is covered by a lock, as + * needed. + */ static int osc_page_protected(const struct lu_env *env, const struct osc_page *opg, enum cl_lock_mode mode, int unref) @@ -87,11 +91,20 @@ static int osc_page_protected(const struct lu_env *env, LINVRNT(!opg->ops_temp); + page = opg->ops_cl.cpl_page; + if (page->cp_owner != NULL && + cl_io_top(page->cp_owner)->ci_lockreq == CILR_NEVER) + /* + * If IO is done without locks (liblustre, or lloop), lock is + * not required. + */ + result = 1; + else + /* otherwise check for a DLM lock */ result = osc_page_is_dlocked(env, opg, mode, 1, unref); if (result == 0) { /* maybe this page is a part of a lockless io? */ hdr = cl_object_header(opg->ops_cl.cpl_obj); - page = opg->ops_cl.cpl_page; descr = &osc_env_info(env)->oti_descr; descr->cld_mode = mode; descr->cld_start = page->cp_index; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 3cbe9c0..b8ee00a 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -795,6 +795,15 @@ static void osc_announce_cached(struct client_obd *cli, struct obdo *oa, client_obd_list_unlock(&cli->cl_loi_list_lock); CDEBUG(D_CACHE,"dirty: "LPU64" undirty: %u dropped %u grant: "LPU64"\n", oa->o_dirty, oa->o_undirty, oa->o_dropped, oa->o_grant); + +} + +static void osc_update_next_shrink(struct client_obd *cli) +{ + int time = GRANT_SHRINK_INTERVAL; + cli->cl_next_shrink_grant = cfs_time_shift(time); + CDEBUG(D_CACHE, "next time %ld to shrink grant \n", + cli->cl_next_shrink_grant); } /* caller must hold loi_list_lock */ @@ -809,6 +818,7 @@ static void osc_consume_write_grant(struct client_obd *cli, CDEBUG(D_CACHE, "using %lu grant credits for brw %p page %p\n", CFS_PAGE_SIZE, pga, pga->pg); LASSERT(cli->cl_avail_grant >= 0); + osc_update_next_shrink(cli); } /* the companion to osc_consume_write_grant, called when a brw has completed. @@ -902,25 +912,143 @@ void osc_wake_cache_waiters(struct client_obd *cli) EXIT; } -static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) +static void osc_update_grant(struct client_obd *cli, struct ost_body *body) { client_obd_list_lock(&cli->cl_loi_list_lock); - cli->cl_avail_grant = ocd->ocd_grant; + CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant); + if (body->oa.o_valid & OBD_MD_FLGRANT) + cli->cl_avail_grant += body->oa.o_grant; + /* waiters are woken in brw_interpret */ client_obd_list_unlock(&cli->cl_loi_list_lock); +} - CDEBUG(D_CACHE, "setting cl_avail_grant: %ld cl_lost_grant: %ld\n", - cli->cl_avail_grant, cli->cl_lost_grant); - LASSERT(cli->cl_avail_grant >= 0); +static int osc_set_info_async(struct obd_export *exp, obd_count keylen, + void *key, obd_count vallen, void *val, + struct ptlrpc_request_set *set); + +static int osc_shrink_grant_interpret(const struct lu_env *env, + struct ptlrpc_request *req, + void *aa, int rc) +{ + struct client_obd *cli = &req->rq_import->imp_obd->u.cli; + struct obdo *oa = ((struct osc_grant_args *)aa)->aa_oa; + struct ost_body *body; + + if (rc != 0) { + client_obd_list_lock(&cli->cl_loi_list_lock); + cli->cl_avail_grant += oa->o_grant; + client_obd_list_unlock(&cli->cl_loi_list_lock); + GOTO(out, rc); + } + + body = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); + LASSERT(body); + osc_update_grant(cli, body); +out: + OBD_FREE_PTR(oa); + return rc; } -static void osc_update_grant(struct client_obd *cli, struct ost_body *body) +static void osc_shrink_grant_local(struct client_obd *cli, struct obdo *oa) { client_obd_list_lock(&cli->cl_loi_list_lock); - CDEBUG(D_CACHE, "got "LPU64" extra grant\n", body->oa.o_grant); - if (body->oa.o_valid & OBD_MD_FLGRANT) + oa->o_grant = cli->cl_avail_grant / 4; + cli->cl_avail_grant -= oa->o_grant; + client_obd_list_unlock(&cli->cl_loi_list_lock); + oa->o_flags |= OBD_FL_SHRINK_GRANT; + osc_update_next_shrink(cli); +} + +static int osc_shrink_grant(struct client_obd *cli) +{ + int rc = 0; + struct ost_body *body; + ENTRY; + + OBD_ALLOC_PTR(body); + if (!body) + RETURN(-ENOMEM); + + osc_announce_cached(cli, &body->oa, 0); + osc_shrink_grant_local(cli, &body->oa); + rc = osc_set_info_async(cli->cl_import->imp_obd->obd_self_export, + sizeof(KEY_GRANT_SHRINK), KEY_GRANT_SHRINK, + sizeof(*body), body, NULL); + if (rc) { + client_obd_list_lock(&cli->cl_loi_list_lock); cli->cl_avail_grant += body->oa.o_grant; - /* waiters are woken in brw_interpret */ + client_obd_list_unlock(&cli->cl_loi_list_lock); + } + if (body) + OBD_FREE_PTR(body); + RETURN(rc); +} + +#define GRANT_SHRINK_LIMIT PTLRPC_MAX_BRW_SIZE +static int osc_should_shrink_grant(struct client_obd *client) +{ + cfs_time_t time = cfs_time_current(); + cfs_time_t next_shrink = client->cl_next_shrink_grant; + if (cfs_time_aftereq(time, next_shrink - 5 * CFS_TICK)) { + if (client->cl_import->imp_state == LUSTRE_IMP_FULL && + client->cl_avail_grant > GRANT_SHRINK_LIMIT) + return 1; + else + osc_update_next_shrink(client); + } + return 0; +} + +static int osc_grant_shrink_grant_cb(struct timeout_item *item, void *data) +{ + struct client_obd *client; + + list_for_each_entry(client, &item->ti_obd_list, cl_grant_shrink_list) { + if (osc_should_shrink_grant(client)) + osc_shrink_grant(client); + } + return 0; +} + +static int osc_add_shrink_grant(struct client_obd *client) +{ + int rc; + + rc = ptlrpc_add_timeout_client(GRANT_SHRINK_INTERVAL, + TIMEOUT_GRANT, + osc_grant_shrink_grant_cb, NULL, + &client->cl_grant_shrink_list); + if (rc) { + CERROR("add grant client %s error %d\n", + client->cl_import->imp_obd->obd_name, rc); + return rc; + } + CDEBUG(D_CACHE, "add grant client %s \n", + client->cl_import->imp_obd->obd_name); + osc_update_next_shrink(client); + return 0; +} + +static int osc_del_shrink_grant(struct client_obd *client) +{ + CDEBUG(D_CACHE, "del grant client %s \n", + client->cl_import->imp_obd->obd_name); + return ptlrpc_del_timeout_client(&client->cl_grant_shrink_list); +} + +static void osc_init_grant(struct client_obd *cli, struct obd_connect_data *ocd) +{ + client_obd_list_lock(&cli->cl_loi_list_lock); + cli->cl_avail_grant = ocd->ocd_grant; client_obd_list_unlock(&cli->cl_loi_list_lock); + + if (ocd->ocd_connect_flags & OBD_CONNECT_GRANT_SHRINK && + list_empty(&cli->cl_grant_shrink_list)) + osc_add_shrink_grant(cli); + + CDEBUG(D_CACHE, "setting cl_avail_grant: %ld cl_lost_grant: %ld \n", + cli->cl_avail_grant, cli->cl_lost_grant); + LASSERT(cli->cl_avail_grant >= 0); } /* We assume that the reason this OSC got a short read is because it read @@ -1172,6 +1300,8 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, (void *)(niobuf - niocount)); osc_announce_cached(cli, &body->oa, opc == OST_WRITE ? requested_nob:0); + if (osc_should_shrink_grant(cli)) + osc_shrink_grant_local(cli, &body->oa); /* size[REQ_REC_OFF] still sizeof (*body) */ if (opc == OST_WRITE) { @@ -3615,7 +3745,7 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, RETURN(0); } - if (!set) + if (!set && !KEY_IS(KEY_GRANT_SHRINK)) RETURN(-EINVAL); /* We pass all other commands directly to OST. Since nobody calls osc @@ -3625,9 +3755,12 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, Even if something bad goes through, we'd get a -EINVAL from OST anyway. */ - - req = ptlrpc_request_alloc(imp, &RQF_OST_SET_INFO); - if (req == NULL) + if (KEY_IS(KEY_GRANT_SHRINK)) + req = ptlrpc_request_alloc(imp, &RQF_OST_SET_GRANT_INFO); + else + req = ptlrpc_request_alloc(imp, &RQF_OST_SET_INFO); + + if (req == NULL) RETURN(-ENOMEM); req_capsule_set_size(&req->rq_pill, &RMF_SETINFO_KEY, @@ -3652,13 +3785,31 @@ static int osc_set_info_async(struct obd_export *exp, obd_count keylen, oscc->oscc_oa.o_valid |= OBD_MD_FLGROUP; LASSERT_MDS_GROUP(oscc->oscc_oa.o_gr); req->rq_interpret_reply = osc_setinfo_mds_conn_interpret; - } - - ptlrpc_request_set_replen(req); - ptlrpc_set_add_req(set, req); - ptlrpc_check_set(NULL, set); + } else if (KEY_IS(KEY_GRANT_SHRINK)) { + struct osc_grant_args *aa; + struct obdo *oa; - RETURN(0); + CLASSERT(sizeof(*aa) <= sizeof(req->rq_async_args)); + aa = ptlrpc_req_async_args(req); + OBD_ALLOC_PTR(oa); + if (!oa) { + ptlrpc_req_finished(req); + RETURN(-ENOMEM); + } + *oa = ((struct ost_body *)val)->oa; + aa->aa_oa = oa; + req->rq_interpret_reply = osc_shrink_grant_interpret; + } + + ptlrpc_request_set_replen(req); + if (!KEY_IS(KEY_GRANT_SHRINK)) { + LASSERT(set != NULL); + ptlrpc_set_add_req(set, req); + ptlrpc_check_set(NULL, set); + } else + ptlrpcd_add_req(req, PSCOPE_OTHER); + + RETURN(0); } @@ -3779,6 +3930,7 @@ static int osc_disconnect(struct obd_export *exp) obd); } + osc_del_shrink_grant(&obd->u.cli); rc = client_disconnect_export(exp); return rc; } @@ -3901,6 +4053,9 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) ptlrpc_init_rq_pool(cli->cl_max_rpcs_in_flight + 2, OST_MAXREQSIZE, ptlrpc_add_rqs_to_pool); + + CFS_INIT_LIST_HEAD(&cli->cl_grant_shrink_list); + sema_init(&cli->cl_grant_sem, 1); } RETURN(rc); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 78ff119..3b6572d 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -984,6 +984,10 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc != 0) GOTO(out_lock, rc); + rc = sptlrpc_svc_prep_bulk(req, desc); + if (rc != 0) + GOTO(out_lock, rc); + /* Check if client was evicted while we were doing i/o before touching network */ if (desc->bd_export->exp_failed) @@ -1154,6 +1158,8 @@ out: static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req) { + struct ost_body *body = NULL, *repbody; + __u32 size[2] = { sizeof(struct ptlrpc_body), sizeof(*body) }; char *key, *val = NULL; int keylen, vallen, rc = 0; ENTRY; @@ -1165,13 +1171,33 @@ static int ost_set_info(struct obd_export *exp, struct ptlrpc_request *req) } keylen = lustre_msg_buflen(req->rq_reqmsg, REQ_REC_OFF); - rc = lustre_pack_reply(req, 1, NULL, NULL); - if (rc) - RETURN(rc); + if (KEY_IS(KEY_GRANT_SHRINK)) { + rc = lustre_pack_reply(req, 2, size, NULL); + if (rc) + RETURN(rc); + } else { + rc = lustre_pack_reply(req, 1, NULL, NULL); + if (rc) + RETURN(rc); + } vallen = lustre_msg_buflen(req->rq_reqmsg, REQ_REC_OFF + 1); - if (vallen) - val = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, 0); + if (vallen) { + if (KEY_IS(KEY_GRANT_SHRINK)) { + body = lustre_swab_reqbuf(req, REQ_REC_OFF + 1, + sizeof(*body), + lustre_swab_ost_body); + if (!body) + RETURN(-EFAULT); + + repbody = lustre_msg_buf(req->rq_repmsg, + REPLY_REC_OFF, + sizeof(*repbody)); + memcpy(repbody, body, sizeof(*body)); + val = (char*)repbody; + } else + val = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1,0); + } if (KEY_IS(KEY_EVICT_BY_NID)) { if (val && vallen) diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index 65eedd1..cb0e2e6 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -1440,11 +1440,10 @@ int at_add(struct adaptive_timeout *at, unsigned int val) time_t binlimit = max_t(time_t, at_history / AT_BINS, 1); LASSERT(at); -#if 0 - CDEBUG(D_INFO, "add %u to %p time=%lu v=%u (%u %u %u %u)\n", + CDEBUG(D_OTHER, "add %u to %p time=%lu v=%u (%u %u %u %u)\n", val, at, now - at->at_binstart, at->at_current, at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]); -#endif + if (val == 0) /* 0's don't count, because we never want our timeout to drop to 0, and because 0 could mean an error */ @@ -1496,14 +1495,12 @@ int at_add(struct adaptive_timeout *at, unsigned int val) at->at_current = min(at->at_current, at_max); at->at_current = max(at->at_current, at_min); -#if 0 if (at->at_current != old) - CDEBUG(D_ADAPTTO, "AT %p change: old=%u new=%u delta=%d " + CDEBUG(D_OTHER, "AT %p change: old=%u new=%u delta=%d " "(val=%u) hist %u %u %u %u\n", at, old, at->at_current, at->at_current - old, val, at->at_hist[0], at->at_hist[1], at->at_hist[2], at->at_hist[3]); -#endif /* if we changed, report the old value */ old = (at->at_current != old) ? old : 0; diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index 764957e..66b0dc1 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -574,6 +574,7 @@ static const struct req_format *req_formats[] = { &RQF_OST_BRW, &RQF_OST_STATFS, &RQF_OST_SET_INFO, + &RQF_OST_SET_GRANT_INFO, &RQF_OST_GET_INFO_GENERIC, &RQF_OST_GET_INFO_LAST_ID, &RQF_OST_GET_INFO_FIEMAP, @@ -889,7 +890,7 @@ const struct req_format RQF_MGS_TARGET_REG = EXPORT_SYMBOL(RQF_MGS_TARGET_REG); const struct req_format RQF_MGS_SET_INFO = - DEFINE_REQ_FMT0("MGS_SET_INTO", mgs_set_info, + DEFINE_REQ_FMT0("MGS_SET_INFO", mgs_set_info, mgs_set_info); EXPORT_SYMBOL(RQF_MGS_SET_INFO); @@ -1202,6 +1203,11 @@ const struct req_format RQF_OST_SET_INFO = DEFINE_REQ_FMT0("OST_SET_INFO", ost_set_info_client, empty); EXPORT_SYMBOL(RQF_OST_SET_INFO); +const struct req_format RQF_OST_SET_GRANT_INFO = + DEFINE_REQ_FMT0("OST_SET_GRANT_INFO", ost_set_info_client, + ost_body_only); +EXPORT_SYMBOL(RQF_OST_SET_GRANT_INFO); + const struct req_format RQF_OST_GET_INFO_GENERIC = DEFINE_REQ_FMT0("OST_GET_INFO", ost_get_info_generic_client, ost_get_info_generic_server); diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index 8259bd1..fd88361 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -83,12 +83,15 @@ int llog_origin_handle_create(struct ptlrpc_request *req) name = req_capsule_client_get(&req->rq_pill, &RMF_NAME); if (name == NULL) RETURN(-EFAULT); - CDEBUG(D_INFO, "opening log %s\n", name); + CDEBUG(D_INFO, "%s: opening log %s\n", obd->obd_name, name); } ctxt = llog_get_context(obd, body->lgd_ctxt_idx); - if (ctxt == NULL) + if (ctxt == NULL) { + CDEBUG(D_WARNING, "%s: no ctxt. group=%p idx=%d name=%s\n", + obd->obd_name, &obd->obd_olg, body->lgd_ctxt_idx, name); RETURN(-ENODEV); + } disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -340,8 +343,8 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) if (rc) GOTO(out_pop, rc); - /* - * llog_init_handle() reads the llog header + /* + * llog_init_handle() reads the llog header */ flags = body->lgd_llh_flags; rc = llog_init_handle(loghandle, flags, NULL); @@ -407,28 +410,28 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) handle = fsfilt_start_log(disk_obd, inode, FSFILT_OP_CANCEL_UNLINK, NULL, 1); if (IS_ERR(handle)) { - CERROR("fsfilt_start_log() failed: %ld\n", + CERROR("fsfilt_start_log() failed: %ld\n", PTR_ERR(handle)); GOTO(pop_ctxt, rc = PTR_ERR(handle)); } rc = llog_cat_cancel_records(cathandle, 1, logcookies); - /* + /* * Do not raise -ENOENT errors for resent rpcs. This rec already - * might be killed. + * might be killed. */ - if (rc == -ENOENT && + if (rc == -ENOENT && (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)) { - /* + /* * Do not change this message, reply-single.sh test_59b - * expects to find this in log. + * expects to find this in log. */ CDEBUG(D_RPCTRACE, "RESENT cancel req %p - ignored\n", req); rc = 0; } else if (rc == 0) { - CDEBUG(D_RPCTRACE, "Canceled %d llog-records\n", + CDEBUG(D_RPCTRACE, "Canceled %d llog-records\n", num_cookies); } @@ -446,7 +449,7 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) pop_ctxt: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); if (rc) - CERROR("Cancel %d of %d llog-records failed: %d\n", + CERROR("Cancel %d of %d llog-records failed: %d\n", failed, num_cookies, rc); llog_ctxt_put(ctxt); @@ -543,7 +546,7 @@ static int llog_catinfo_cb(struct llog_handle *cat, if (!cbd->ctxt) RETURN(-ENODEV); - + lir = (struct llog_logid_rec *)rec; logid = &lir->lid_id; rc = llog_create(ctxt, &handle, logid, NULL); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 7303382..021df48 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -527,7 +527,7 @@ static int ptlrpc_lprocfs_wr_hp_ratio(struct file *file, const char *buffer, { struct ptlrpc_service *svc = data; int rc, val; - + rc = lprocfs_write_helper(buffer, count, &val); if (rc < 0) return rc; diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index e85951a..ce2e237 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2312,3 +2312,5 @@ void lustre_swab_lustre_capa_key (struct lustre_capa_key *k) __swab32s (&k->lk_keyid); __swab32s (&k->lk_padding); } + + diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c index 7a15386..daec0f5 100644 --- a/lustre/ptlrpc/pinger.c +++ b/lustre/ptlrpc/pinger.c @@ -50,7 +50,7 @@ struct semaphore pinger_sem; static CFS_LIST_HEAD(pinger_imports); - +static struct list_head timeout_list = CFS_LIST_HEAD_INIT(timeout_list); struct ptlrpc_request * ptlrpc_prep_ping(struct obd_import *imp) { @@ -135,6 +135,25 @@ static inline int ptlrpc_next_reconnect(struct obd_import *imp) static atomic_t suspend_timeouts = ATOMIC_INIT(0); static cfs_time_t suspend_wakeup_time = 0; +cfs_duration_t pinger_check_timeout(cfs_time_t time) +{ + struct timeout_item *item; + cfs_time_t timeout = PING_INTERVAL; + + /* The timeout list is a increase order sorted list */ + mutex_down(&pinger_sem); + list_for_each_entry(item, &timeout_list, ti_chain) { + int ti_timeout = item->ti_timeout; + if (timeout > ti_timeout) + timeout = ti_timeout; + break; + } + mutex_up(&pinger_sem); + + return cfs_time_sub(cfs_time_add(time, cfs_time_seconds(timeout)), + cfs_time_current()); +} + #ifdef __KERNEL__ static wait_queue_head_t suspend_timeouts_waitq; #endif @@ -250,10 +269,14 @@ static int ptlrpc_pinger_main(void *arg) while (1) { cfs_time_t this_ping = cfs_time_current(); struct l_wait_info lwi; - cfs_duration_t time_to_next_ping; + cfs_duration_t time_to_next_wake; + struct timeout_item *item; struct list_head *iter; mutex_down(&pinger_sem); + list_for_each_entry(item, &timeout_list, ti_chain) { + item->ti_cb(item, item->ti_cb_data); + } list_for_each(iter, &pinger_imports) { struct obd_import *imp = list_entry(iter, struct obd_import, @@ -272,25 +295,19 @@ static int ptlrpc_pinger_main(void *arg) obd_update_maxusage(); /* Wait until the next ping time, or until we're stopped. */ - time_to_next_ping = cfs_time_sub(cfs_time_add(this_ping, - cfs_time_seconds(PING_INTERVAL)), - cfs_time_current()); - + time_to_next_wake = pinger_check_timeout(this_ping); /* The ping sent by ptlrpc_send_rpc may get sent out say .01 second after this. ptlrpc_pinger_sending_on_import will then set the next ping time to next_ping + .01 sec, which means we will SKIP the next ping at next_ping, and the ping will get sent 2 timeouts from now! Beware. */ - CDEBUG(D_INFO, "next ping in "CFS_DURATION_T" ("CFS_TIME_T")\n", - time_to_next_ping, - cfs_time_add(this_ping, - cfs_time_seconds(PING_INTERVAL))); - if (time_to_next_ping > 0) { - lwi = LWI_TIMEOUT(max_t(cfs_duration_t, - time_to_next_ping, - cfs_time_seconds(1)), - NULL, NULL); + CDEBUG(D_INFO, "next wakeup in "CFS_DURATION_T" ("CFS_TIME_T")\n", + time_to_next_wake, + cfs_time_add(this_ping, cfs_time_seconds(PING_INTERVAL))); + if (time_to_next_wake > 0) { + lwi = LWI_TIMEOUT(max_t(cfs_duration_t, time_to_next_wake, cfs_time_seconds(1)), + NULL, NULL); l_wait_event(thread->t_ctl_waitq, thread->t_flags & (SVC_STOPPING|SVC_EVENT), &lwi); @@ -351,6 +368,8 @@ int ptlrpc_start_pinger(void) RETURN(0); } +int ptlrpc_pinger_remove_timeouts(void); + int ptlrpc_stop_pinger(void) { struct l_wait_info lwi = { 0 }; @@ -362,6 +381,8 @@ int ptlrpc_stop_pinger(void) if (pinger_thread == NULL) RETURN(-EALREADY); + + ptlrpc_pinger_remove_timeouts(); mutex_down(&pinger_sem); pinger_thread->t_flags = SVC_STOPPING; cfs_waitq_signal(&pinger_thread->t_ctl_waitq); @@ -419,6 +440,105 @@ int ptlrpc_pinger_del_import(struct obd_import *imp) RETURN(0); } +/** + * Register a timeout callback to the pinger list, and the callback will + * be called when timeout happens. + */ +struct timeout_item* ptlrpc_new_timeout(int time, enum timeout_event event, + timeout_cb_t cb, void *data) +{ + struct timeout_item *ti; + + OBD_ALLOC_PTR(ti); + if (!ti) + return(NULL); + + CFS_INIT_LIST_HEAD(&ti->ti_obd_list); + CFS_INIT_LIST_HEAD(&ti->ti_chain); + ti->ti_timeout = time; + ti->ti_event = event; + ti->ti_cb = cb; + ti->ti_cb_data = data; + + return ti; +} + +/** + * Register timeout event on the the pinger thread. + * Note: the timeout list is an sorted list with increased timeout value. + */ +static struct timeout_item* +ptlrpc_pinger_register_timeout(int time, enum timeout_event event, + timeout_cb_t cb, void *data) +{ + struct timeout_item *item; + struct timeout_item *ti = NULL; + + LASSERT_SEM_LOCKED(&pinger_sem); + list_for_each_entry_reverse(item, &timeout_list, ti_chain) { + if (item->ti_event == event) { + ti = item; + break; + } + if (item->ti_timeout < ti->ti_timeout) { + ti = ptlrpc_new_timeout(time, event, cb, data); + if (!ti) { + ti = ERR_PTR(-ENOMEM); + break; + } + list_add(&ti->ti_chain, &item->ti_chain); + } + } + if (!ti) { + ti = ptlrpc_new_timeout(time, event, cb, data); + if (ti) + list_add(&ti->ti_chain, &timeout_list); + } + + return ti; +} +/* Add a client_obd to the timeout event list, when timeout(@time) + * happens, the callback(@cb) will be called. + */ +int ptlrpc_add_timeout_client(int time, enum timeout_event event, + timeout_cb_t cb, void *data, + struct list_head *obd_list) +{ + struct timeout_item *ti; + + mutex_down(&pinger_sem); + ti = ptlrpc_pinger_register_timeout(time, event, cb, data); + if (!ti) { + mutex_up(&pinger_sem); + return (-EINVAL); + } + list_add(obd_list, &ti->ti_obd_list); + mutex_up(&pinger_sem); + return 0; +} + +int ptlrpc_del_timeout_client(struct list_head *obd_list) +{ + mutex_down(&pinger_sem); + list_del_init(obd_list); + mutex_up(&pinger_sem); + return 0; +} + +int ptlrpc_pinger_remove_timeouts(void) +{ + struct timeout_item *item, *tmp; + + mutex_down(&pinger_sem); + list_for_each_entry_safe(item, tmp, &timeout_list, ti_chain) { + LASSERT(list_empty(&item->ti_obd_list)); + list_del(&item->ti_chain); + OBD_FREE_PTR(item); + } + mutex_up(&pinger_sem); + return 0; +} + void ptlrpc_pinger_wake_up() { #ifdef ENABLE_PINGER @@ -764,6 +884,18 @@ void ptlrpc_pinger_sending_on_import(struct obd_import *imp) #endif } +int ptlrpc_add_timeout_client(int time, enum timeout_event event, + timeout_cb_t cb, void *data, + struct list_head *obd_list) +{ + return 0; +} + +int ptlrpc_del_timeout_client(struct list_head *obd_list) +{ + return 0; +} + int ptlrpc_pinger_add_import(struct obd_import *imp) { ENTRY; diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c index c097f65..681873d 100644 --- a/lustre/ptlrpc/ptlrpc_module.c +++ b/lustre/ptlrpc/ptlrpc_module.c @@ -325,6 +325,8 @@ EXPORT_SYMBOL(ptlrpc_recover_import); /* pinger.c */ EXPORT_SYMBOL(ptlrpc_pinger_add_import); EXPORT_SYMBOL(ptlrpc_pinger_del_import); +EXPORT_SYMBOL(ptlrpc_add_timeout_client); +EXPORT_SYMBOL(ptlrpc_del_timeout_client); EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import); /* ptlrpcd.c */ diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index c93be5d..b5199d4 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -65,8 +65,8 @@ void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' * (make -C lustre/utils newwiretest) - * running on Linux localhost.localdomain 2.6.18-prep #3 SMP Sun Nov 23 08:04:44 EST 2008 i68 - * with gcc version 4.1.1 20061011 (Red Hat 4.1.1-30) */ + * running on Linux cfs21 2.6.18-92.el5xen #1 SMP Tue Jun 10 19:55:54 EDT 2008 i686 i686 i386 + * with gcc version 4.1.2 20071124 (Red Hat 4.1.2-42) */ /* Constants... */ @@ -166,6 +166,14 @@ void lustre_assert_wire_constants(void) (long long)MDS_QUOTACHECK); LASSERTF(MDS_QUOTACTL == 48, " found %lld\n", (long long)MDS_QUOTACTL); + LASSERTF(MDS_GETXATTR == 49, " found %lld\n", + (long long)MDS_GETXATTR); + LASSERTF(MDS_SETXATTR == 50, " found %lld\n", + (long long)MDS_SETXATTR); + LASSERTF(MDS_WRITEPAGE == 51, " found %lld\n", + (long long)MDS_WRITEPAGE); + LASSERTF(MDS_IS_SUBDIR == 52, " found %lld\n", + (long long)MDS_IS_SUBDIR); LASSERTF(MDS_LAST_OPC == 53, " found %lld\n", (long long)MDS_LAST_OPC); LASSERTF(REINT_SETATTR == 1, " found %lld\n", @@ -473,6 +481,7 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_AT == 0x01000000ULL); CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL); CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x02000000ULL); + CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n", diff --git a/lustre/quota/quota_context.c b/lustre/quota/quota_context.c index 56ec970..6bcbcc5 100644 --- a/lustre/quota/quota_context.c +++ b/lustre/quota/quota_context.c @@ -541,6 +541,10 @@ dqacq_completion(struct obd_device *obd, struct lustre_quota_ctxt *qctxt, QDATA_DEBUG(qdata, "obd(%s): complete %s quota req\n", obd->obd_name, (opc == QUOTA_DQACQ) ? "acq" : "rel"); + /* do it only when a releasing quota req more than 5MB b=18491 */ + if (opc == QUOTA_DQREL && qdata->qd_count >= 5242880) + OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5); + /* update local operational quota file */ if (rc == 0) { __u64 count = QUSG(qdata->qd_count, QDATA_IS_BLK(qdata)); @@ -667,7 +671,11 @@ out: RETURN(rc1); } if (err || (rc < 0 && rc != -EBUSY && rc1 == 0) || is_master(qctxt)) - RETURN(err); + RETURN(err); + + if (opc == QUOTA_DQREL && qdata->qd_count >= 5242880 && + OBD_FAIL_CHECK(OBD_FAIL_QUOTA_DELAY_REL)) + RETURN(err); /* reschedule another dqacq/dqrel if needed */ qdata->qd_count = 0; diff --git a/lustre/quota/quota_interface.c b/lustre/quota/quota_interface.c index e7e72d1..59c6e70 100644 --- a/lustre/quota/quota_interface.c +++ b/lustre/quota/quota_interface.c @@ -334,6 +334,11 @@ static int quota_check_common(struct obd_device *obd, unsigned int uid, spin_unlock(&lqs->lqs_lock); + if (lqs->lqs_blk_rec < 0 && + qdata[i].qd_count < + lqs->lqs_bwrite_pending - lqs->lqs_blk_rec - mb) + OBD_FAIL_TIMEOUT(OBD_FAIL_QUOTA_DELAY_REL, 5); + /* When cycle is zero, lqs_*_pending will be changed. We will * get reference of the lqs here and put reference of lqs in * quota_pending_commit b=14784 */ diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index 77c5186..ab5214f 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -1599,7 +1599,6 @@ free: int mds_quota_recovery(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; - struct lov_obd *lov = &mds->mds_osc_obd->u.lov; struct qmaster_recov_thread_data data; int rc = 0; ENTRY; @@ -1607,13 +1606,15 @@ int mds_quota_recovery(struct obd_device *obd) if (unlikely(!mds->mds_quota)) RETURN(rc); - mutex_down(&lov->lov_lock); - if (lov->desc.ld_tgt_count != lov->desc.ld_active_tgt_count) { - CWARN("Not all osts are active, abort quota recovery\n"); - mutex_up(&lov->lov_lock); + mutex_down(&obd->obd_dev_sem); + if (mds->mds_lov_desc.ld_active_tgt_count != mds->mds_lov_objid_count) { + CWARN("Only %u/%u OSTs are active, abort quota recovery\n", + mds->mds_lov_desc.ld_active_tgt_count, + mds->mds_lov_objid_count); + mutex_up(&obd->obd_dev_sem); RETURN(rc); } - mutex_up(&lov->lov_lock); + mutex_up(&obd->obd_dev_sem); data.obd = obd; init_completion(&data.comp); diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index fae5f37..884d9fe 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -18,7 +18,7 @@ noinst_SCRIPTS += mdsrate-stat-small.sh mdsrate-stat-large.sh noinst_SCRIPTS += lockorder.sh socketclient socketserver runmultiop_bg_pause noinst_SCRIPTS += sanity-sec.sh sanity-gss.sh krb5_login.sh setup_kerberos.sh noinst_SCRIPTS += recovery-mds-scale.sh run_dd.sh run_tar.sh run_iozone.sh -noinst_SCRIPTS += run_dbench.sh +noinst_SCRIPTS += run_dbench.sh recovery-double-scale.sh nobase_noinst_SCRIPTS = cfg/local.sh nobase_noinst_SCRIPTS += acl/make-tree acl/run cfg/ncli.sh nobase_noinst_SCRIPTS += racer/dir_create.sh racer/file_create.sh racer/file_list.sh diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh index b21af68..166ba49 100755 --- a/lustre/tests/acceptance-small.sh +++ b/lustre/tests/acceptance-small.sh @@ -23,7 +23,7 @@ fi [ "$DEBUG_OFF" ] || DEBUG_OFF="eval lctl set_param debug=\"$DEBUG_LVL\"" [ "$DEBUG_ON" ] || DEBUG_ON="eval lctl set_param debug=0x33f0484" -export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY RECOVERY_MDS_SCALE" +export TESTSUITE_LIST="RUNTESTS SANITY DBENCH BONNIE IOZONE FSX SANITYN LFSCK LIBLUSTRE RACER REPLAY_SINGLE CONF_SANITY RECOVERY_SMALL REPLAY_OST_SINGLE REPLAY_DUAL INSANITY SANITY_QUOTA SANITY_SEC SANITY_GSS PERFORMANCE_SANITY RECOVERY_MDS_SCALE RECOVERY_DOUBLE_SCALE" if [ "$ACC_SM_ONLY" ]; then for O in $TESTSUITE_LIST; do @@ -348,8 +348,8 @@ for NAME in $CONFIGS; do [ "$SLOW" = "no" ] && DURATION=300 RACERCLIENTS=$HOSTNAME [ ! -z ${CLIENTS} ] && RACERCLIENTS=$CLIENTS - log "racer on clients: $RACERCLIENTS DURATION=$DURATION" - CLIENTS=${RACERCLIENTS} DURATION=$DURATION bash runracer + log "racer on clients: $RACERCLIENTS DURATION=$DURATION RACERDIRS=$RACERDIRS" + CLIENTS=${RACERCLIENTS} DURATION=$DURATION bash runracer $RACERDIRS $CLEANUP $SETUP RACER="done" @@ -440,6 +440,14 @@ if [ "$RECOVERY_MDS_SCALE" != "no" ]; then RECOVERY_MDS_SCALE="done" fi +[ "$RECOVERY_DOUBLE_SCALE" != "no" ] && skip_remmds recovery-double-scale && RECOVERY_DOUBLE_SCALE=no && MSKIPPED=1 +[ "$RECOVERY_DOUBLE_SCALE" != "no" ] && skip_remost recovery-double-scale && RECOVERY_DOUBLE_SCALE=no && OSKIPPED=1 +if [ "$RECOVERY_DOUBLE_SCALE" != "no" ]; then + title recovery-double-scale + bash recovery-double-scale.sh + RECOVERY_DOUBLE_SCALE="done" +fi + RC=$? title FINISHED echo "Finished at `date` in $((`date +%s` - $STARTTIME))s" diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 016582e..7c5dd43 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -783,9 +783,9 @@ set_and_check() { FINAL=$(($ORIG + 5)) fi echo "Setting $PARAM from $ORIG to $FINAL" - do_facet $SINGLEMDS "$LCTL conf_param $PARAM=$FINAL" || error conf_param failed + do_facet $SINGLEMDS "$LCTL conf_param $PARAM='$FINAL'" || error conf_param failed - wait_update $(facet_host $myfacet) "$TEST" $FINAL || error check failed! + wait_update $(facet_host $myfacet) "$TEST" "$FINAL" || error check failed! } test_27a() { @@ -1453,6 +1453,107 @@ test_42() { #bug 14693 } run_test 42 "invalid config param should not prevent client from mounting" +test_43() { + [ $UID -ne 0 -o $RUNAS_ID -eq 0 ] && skip "run as root" + setup + chmod ugo+x $DIR || error "chmod 0 failed" + set_and_check mds \ + "lctl get_param -n mdt.$FSNAME-MDT0000.root_squash" \ + "$FSNAME.mdt.root_squash" \ + "0:0" + set_and_check mds \ + "lctl get_param -n mdt.$FSNAME-MDT0000.nosquash_nids" \ + "$FSNAME.mdt.nosquash_nids" \ + "NONE" + + # + # create set of test files + # + echo "111" > $DIR/$tfile-userfile || error "write 1 failed" + chmod go-rw $DIR/$tfile-userfile || error "chmod 1 failed" + chown $RUNAS_ID.$RUNAS_ID $DIR/$tfile-userfile || error "chown failed" + + echo "222" > $DIR/$tfile-rootfile || error "write 2 failed" + chmod go-rw $DIR/$tfile-rootfile || error "chmod 2 faield" + + mkdir $DIR/$tdir-rootdir -p || error "mkdir failed" + chmod go-rwx $DIR/$tdir-rootdir || error "chmod 3 failed" + touch $DIR/$tdir-rootdir/tfile-1 || error "touch failed" + + # + # check root_squash: + # set root squash UID:GID to RUNAS_ID + # root should be able to access only files owned by RUNAS_ID + # + set_and_check mds \ + "lctl get_param -n mdt.$FSNAME-MDT0000.root_squash" \ + "$FSNAME.mdt.root_squash" \ + "$RUNAS_ID:$RUNAS_ID" + + ST=$(stat -c "%n: owner uid %u (%A)" $DIR/$tfile-userfile) + dd if=$DIR/$tfile-userfile 1>/dev/null 2>/dev/null || \ + error "$ST: root read permission is denied" + echo "$ST: root read permission is granted - ok" + + echo "444" | \ + dd conv=notrunc if=$DIR/$tfile-userfile 1>/dev/null 2>/dev/null || \ + error "$ST: root write permission is denied" + echo "$ST: root write permission is granted - ok" + + ST=$(stat -c "%n: owner uid %u (%A)" $DIR/$tfile-rootfile) + dd if=$DIR/$tfile-rootfile 1>/dev/null 2>/dev/null && \ + error "$ST: root read permission is granted" + echo "$ST: root read permission is denied - ok" + + echo "555" | \ + dd conv=notrunc of=$DIR/$tfile-rootfile 1>/dev/null 2>/dev/null && \ + error "$ST: root write permission is granted" + echo "$ST: root write permission is denied - ok" + + ST=$(stat -c "%n: owner uid %u (%A)" $DIR/$tdir-rootdir) + rm $DIR/$tdir-rootdir/tfile-1 1>/dev/null 2>/dev/null && \ + error "$ST: root unlink permission is granted" + echo "$ST: root unlink permission is denied - ok" + + touch $DIR/tdir-rootdir/tfile-2 1>/dev/null 2>/dev/null && \ + error "$ST: root create permission is granted" + echo "$ST: root create permission is denied - ok" + + # + # check nosquash_nids: + # put client's NID into nosquash_nids list, + # root should be able to access root file after that + # + local NIDLIST=$(lctl list_nids all | tr '\n' ' ') + NIDLIST="2@elan $NIDLIST 192.168.0.[2,10]@tcp" + NIDLIST=$(echo $NIDLIST | tr -s ' ' ' ') + set_and_check mds \ + "lctl get_param -n mdt.$FSNAME-MDT0000.nosquash_nids" \ + "$FSNAME-MDTall.mdt.nosquash_nids" \ + "$NIDLIST" + + ST=$(stat -c "%n: owner uid %u (%A)" $DIR/$tfile-rootfile) + dd if=$DIR/$tfile-rootfile 1>/dev/null 2>/dev/null || \ + error "$ST: root read permission is denied" + echo "$ST: root read permission is granted - ok" + + echo "666" | \ + dd conv=notrunc of=$DIR/$tfile-rootfile 1>/dev/null 2>/dev/null || \ + error "$ST: root write permission is denied" + echo "$ST: root write permission is granted - ok" + + ST=$(stat -c "%n: owner uid %u (%A)" $DIR/$tdir-rootdir) + rm $DIR/$tdir-rootdir/tfile-1 || \ + error "$ST: root unlink permission is denied" + echo "$ST: root unlink permission is granted - ok" + touch $DIR/$tdir-rootdir/tfile-2 || \ + error "$ST: root create permission is denied" + echo "$ST: root create permission is granted - ok" + + return 0 +} +run_test 43 "check root_squash and nosquash_nids" + umount_client $MOUNT cleanup_nocli diff --git a/lustre/tests/createmany.c b/lustre/tests/createmany.c index a937fc5..8cdacf1 100644 --- a/lustre/tests/createmany.c +++ b/lustre/tests/createmany.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,13 @@ static char *get_file_name(const char *fmt, long n, int has_fmt_spec) return filename; } +double now(void) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec / 1000000; +} + int main(int argc, char ** argv) { long i; @@ -74,8 +82,8 @@ int main(int argc, char ** argv) int do_unlink = 0, do_mknod = 0; char *filename; char *fmt = NULL, *fmt_unlink = NULL, *tgt = NULL; - long start, last, end = ~0UL >> 1; - long begin = 0, count = ~0UL >> 1; + double start, last; + long begin = 0, end = ~0UL >> 1, count = ~0UL >> 1; int c, has_fmt_spec = 0, unlink_has_fmt_spec = 0; /* Handle the last argument in form of "-seconds" */ @@ -132,7 +140,7 @@ int main(int argc, char ** argv) usage(argv[0]); } - start = last = time(NULL); + start = last = now(); has_fmt_spec = strchr(fmt, '%') != NULL; if (do_unlink) @@ -186,15 +194,15 @@ int main(int argc, char ** argv) } } - if ((i % 10000) == 0) { - printf(" - created %ld (time %ld total %ld last %ld)\n", - i, time(0), time(0) - start, time(0) - last); - last = time(NULL); + if (i && (i % 10000) == 0) { + printf(" - created %ld (time %.2f total %.2f last %.2f)" + "\n", i, now(), now() - start, now() - last); + last = now(); } } - printf("total: %ld creates%s in %ld seconds: %f creates/second\n", i, + printf("total: %ld creates%s in %.2f seconds: %.2f creates/second\n", i, do_unlink ? "/deletions" : "", - time(NULL) - start, ((float)i / (time(0) - start))); + now() - start, ((double)i / (now() - start))); return rc; } diff --git a/lustre/tests/createtest.c b/lustre/tests/createtest.c index 8d95339..2c62d7d 100644 --- a/lustre/tests/createtest.c +++ b/lustre/tests/createtest.c @@ -96,9 +96,9 @@ int main(int argc, char *argv[]) if (i == S_IFCHR || i == S_IFBLK) { if (st.st_rdev != 0x1234) { fprintf(stderr, "%s: ERROR rdev %s: " - "%lu != 0x1234", + "%llu != 0x1234", argv[0], name, - (unsigned long) st.st_rdev); + (unsigned long long)st.st_rdev); exit(13); } } diff --git a/lustre/tests/insanity.sh b/lustre/tests/insanity.sh index ebf5cb8..06b5ca3 100755 --- a/lustre/tests/insanity.sh +++ b/lustre/tests/insanity.sh @@ -60,19 +60,6 @@ set_fail_client() { echo "fail $FAIL_CLIENT, next is $FAIL_NEXT" } -shutdown_client() { - client=$1 - if [ "$FAILURE_MODE" = HARD ]; then - $POWER_DOWN $client - while ping -w 3 -c 1 $client > /dev/null 2>&1; do - echo "waiting for node $client to fail" - sleep 1 - done - elif [ "$FAILURE_MODE" = SOFT ]; then - zconf_umount $client $MOUNT -f - fi -} - fail_clients() { num=$1 diff --git a/lustre/tests/racer/dir_create.sh b/lustre/tests/racer/dir_create.sh index 80fbbe1..a280e0a 100755 --- a/lustre/tests/racer/dir_create.sh +++ b/lustre/tests/racer/dir_create.sh @@ -8,7 +8,7 @@ create(){ } while /bin/true ; do - file=$(($RANDOM%$MAX)) + file=$((RANDOM % MAX)) mkdir -p $DIR/$file/$file/ 2> /dev/null create 2> /dev/null done diff --git a/lustre/tests/racer/file_concat.sh b/lustre/tests/racer/file_concat.sh index 38181ad..8dde304 100755 --- a/lustre/tests/racer/file_concat.sh +++ b/lustre/tests/racer/file_concat.sh @@ -10,7 +10,7 @@ concat(){ } while /bin/true ; do - file=$(($RANDOM%$MAX)) - new_file=$(($RANDOM%$MAX)) + file=$((RANDOM % MAX)) + new_file=$((RANDOM % MAX)) concat 2> /dev/null done diff --git a/lustre/tests/racer/file_create.sh b/lustre/tests/racer/file_create.sh index d94502c..70ba0fd 100755 --- a/lustre/tests/racer/file_create.sh +++ b/lustre/tests/racer/file_create.sh @@ -5,13 +5,13 @@ MAX=$2 MAX_MB=256 create() { - SIZE=$(($RANDOM*MAX_MB/32)) + SIZE=$((RANDOM * MAX_MB / 32)) echo "file_create: SIZE=$SIZE" dd if=/dev/zero of=$DIR/$file bs=1k count=$SIZE } while /bin/true ; do - file=$(($RANDOM%$MAX)) + file=$((RANDOM % MAX)) create 2> /dev/null done diff --git a/lustre/tests/racer/file_link.sh b/lustre/tests/racer/file_link.sh index 5c1cac7..aaf498d 100755 --- a/lustre/tests/racer/file_link.sh +++ b/lustre/tests/racer/file_link.sh @@ -4,7 +4,7 @@ DIR=$1 MAX=$2 while /bin/true ; do - file=$(($RANDOM%$MAX)) - new_file=$((($file + 1)%$MAX)) + file=$((RANDOM % $MAX)) + new_file=$((RANDOM % MAX)) ln $file $DIR/$new_file 2> /dev/null done diff --git a/lustre/tests/racer/file_rename.sh b/lustre/tests/racer/file_rename.sh index 9552102..a4ef5b8 100755 --- a/lustre/tests/racer/file_rename.sh +++ b/lustre/tests/racer/file_rename.sh @@ -4,7 +4,7 @@ DIR=$1 MAX=$2 while /bin/true ; do - file=$(($RANDOM%$MAX)) - new_file=$((($file + 1)%$MAX)) + file=$((RANDOM % MAX)) + new_file=$((RANDOM % MAX)) mv $DIR/$file $DIR/$new_file 2> /dev/null done diff --git a/lustre/tests/racer/file_rm.sh b/lustre/tests/racer/file_rm.sh index 41d3d62..20d5226 100755 --- a/lustre/tests/racer/file_rm.sh +++ b/lustre/tests/racer/file_rm.sh @@ -4,7 +4,7 @@ DIR=$1 MAX=$2 while /bin/true ; do - file=$(($RANDOM%$MAX)) + file=$((RANDOM % MAX)) rm -rf $DIR/$file 2> /dev/null sleep 1 done diff --git a/lustre/tests/racer/file_symlink.sh b/lustre/tests/racer/file_symlink.sh index 44771a5..187b026 100755 --- a/lustre/tests/racer/file_symlink.sh +++ b/lustre/tests/racer/file_symlink.sh @@ -4,8 +4,8 @@ DIR=$1 MAX=$2 while /bin/true ; do - file=$(($RANDOM%$MAX)) - new_file=$((($file + 1)%$MAX)) + file=$((RANDOM % MAX)) + new_file=$((RANDOM % MAX)) ln -s $file $DIR/$new_file 2> /dev/null ln -s $file/$file/$file $DIR/$new_file 2> /dev/null done diff --git a/lustre/tests/racer/racer.sh b/lustre/tests/racer/racer.sh index 645e349..1274d02 100755 --- a/lustre/tests/racer/racer.sh +++ b/lustre/tests/racer/racer.sh @@ -10,40 +10,34 @@ NUM_THREADS=${NUM_THREADS:-3} mkdir -p $DIR +RACER_PROGS="file_create dir_create file_rm file_rename file_link file_symlink +file_list file_concat" + racer_cleanup() { - killall file_create.sh - killall dir_create.sh - killall file_rm.sh - killall file_rename.sh - killall file_link.sh - killall file_symlink.sh - killall file_list.sh - killall file_concat.sh - trap 0 + for P in $RACER_PROGS; do + killall $P.sh + done + trap 0 } echo "Running $0 for $DURATION seconds. CTRL-C to exit" trap " - echo \"Cleaning up\" - racer_cleanup - exit 0 + echo \"Cleaning up\" + racer_cleanup + exit 0 " 2 15 cd `dirname $0` for N in `seq 1 $NUM_THREADS`; do - ./file_create.sh $DIR $MAX_FILES & - ./dir_create.sh $DIR $MAX_FILES & - ./file_rename.sh $DIR $MAX_FILES & - ./file_link.sh $DIR $MAX_FILES & - ./file_symlink.sh $DIR $MAX_FILES & - ./file_concat.sh $DIR $MAX_FILES & - ./file_list.sh $DIR & - ./file_rm.sh $DIR $MAX_FILES & + for P in $RACER_PROGS; do + ./$P.sh $DIR $MAX_FILES & + done done -sleep $DURATION; +sleep $DURATION racer_cleanup + # Check our to see whether our test DIR is still available. df $DIR RC=$? diff --git a/lustre/tests/recovery-double-scale.sh b/lustre/tests/recovery-double-scale.sh new file mode 100644 index 0000000..d98dc65 --- /dev/null +++ b/lustre/tests/recovery-double-scale.sh @@ -0,0 +1,314 @@ +#!/bin/bash + +# All pairwise combinations of node failures. +# Was cmd3-17 +# +# Author: Chris Cooper +# +# Script fails pair of nodes: +# -- in parallel by default +# -- in series if SERIAL is set + +LUSTRE=${LUSTRE:-`dirname $0`/..} +SETUP=${SETUP:-""} +CLEANUP=${CLEANUP:-""} +. $LUSTRE/tests/test-framework.sh + +init_test_env $@ + +. ${CONFIG:=$LUSTRE/tests/cfg/$NAME.sh} +TESTSUITELOG=${TESTSUITELOG:-$TMP/recovery-double-scale} +DEBUGLOG=$TESTSUITELOG.debug +exec 2>$DEBUGLOG +echo "--- env ---" >&2 +env >&2 +echo "--- env ---" >&2 +set -x + +[ -n "$CLIENTS" ] || { skip "$0 Need two or more remote clients" && exit 0; } +[ $CLIENTCOUNT -ge 3 ] || \ + { skip "$0 Need two or more remote clients, have $CLIENTCOUNT" && exit 0; } + +END_RUN_FILE=${END_RUN_FILE:-$SHARED_DIRECTORY}/end_run_file} +LOAD_PID_FILE=${LOAD_PID_FILE:-$TMP/client-load.pid} + +remote_mds_nodsh && skip "remote MDS with nodsh" && exit 0 +remote_ost_nodsh && skip "remote OST with nodsh" && exit 0 + +check_timeout || exit 1 + +build_test_filter + +check_and_setup_lustre +rm -rf $DIR/[df][0-9]* + +# the test node needs to be insulated from a lustre failure as much as possible, +# so not even loading the lustre modules is ideal. +# -- umount lustre +# -- remove hostname from clients list +zconf_umount $(hostname) $MOUNT +NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} +NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname)) + +check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]} + +MDTS=$(get_facets MDS) +OSTS=$(get_facets OST) + +rm -f $END_RUN_FILE + +reboot_recover_node () { + # item var contains a pair of clients if nodetype=clients + # I would prefer to have a list here + local item=$1 + local nodetype=$2 + local timeout=$($LCTL get_param -n timeout) + + # MDS, OST item contains the facet + case $nodetype in + MDS|OST ) facet_failover $item + [ "$SERIAL" ] && wait_recovery_complete $item $((timeout * 4)) || true + ;; + clients) for c in ${item//,/ }; do + shutdown_client $c + boot_node $c + done + start_client_loads $list || return $? + ;; + * ) error "reboot_recover_node: nodetype=$nodetype. Must be one of 'MDS', 'OST', or 'clients'." + exit 1;; + esac +} + +get_item_type () { + local type=$1 + local excluded=${2:-""} + + local list + case $type in + MDS ) list=$MDTS;; + OST ) list=$OSTS;; + clients) list=$NODES_TO_USE + ;; + * ) error "Invalid type=$type. Must be one of 'MDS', 'OST', or 'clients'." + exit 1;; + esac + + [ "$excluded" ] && list=$(exclude_items_from_list $list $excluded) + # empty list + if [ ! "$(echo $list)" ]; then + echo + return + fi + + item=$(get_random_entry $list) + if [ "$type" = clients ] ; then + item="$item $(get_random_entry $(exclude_items_from_list $list $item))" + item=$(comma_list $item) + fi + echo $item +} + +# failover_pair +# +# for the two nodetypes specified, chooses a random node(s) from each +# class, reboots the nodes sequentially, and then restarts lustre on +# the nodes. +failover_pair() { + local type1=$1 + local type2=$2 + local title=$3 + + local client_nodes="" + local item1= + local item2= + local client1= + local client2= + + log " +==== START === $title " + + item1=$(get_item_type $type1) + [ "$item1" ] || \ + { echo "type1=$type1 item1 is empty" && return 0; } + item2=$(get_item_type $type2 $item1) + [ "$item2" ] || \ + { echo "type1=$type1 item1=$item1 type2=$type2 item2=$item2 is empty" && return 0; } + + # Check that our client loads are still running. If any have died, + # that means they have died outside of recovery, which is unacceptable. + log "==== Checking the clients loads BEFORE failover -- failure NOT OK" + + # FIXME. need print summary on exit + if ! check_client_loads $NODES_TO_USE; then + exit 4 + fi + + log "Done checking client loads. Failing type1=$type1 item1=$item1 ... " + + reboot_recover_node $item1 $type1 || return $? + + # Hendrix test17 description: + # Introduce a failure, wait at + # least 5 minutes (for recovery), + # introduce a 2nd + # failure, and wait another 5 + # minutes + + # reboot_recover_node waits recovery in according to + # SERIAL value. + # We have a "double failures" if SERIAL is not set, + # do not need a sleep between failures for "double failures" + + log " Failing type2=$type2 item2=$item2 ... " + reboot_recover_node $item2 $type2 || return $? + + # Client loads are allowed to die while in recovery, so we just + # restart them. + log "==== Checking the clients loads AFTER failovers -- ERRORS_OK=$ERRORS_OK" + restart_client_loads $NODES_TO_USE $ERRORS_OK || return $? + log "Done checking / re-Starting client loads. PASS" + return 0 +} + +summary_and_cleanup () { + local rc=$? + trap 0 + + # Having not empty END_RUN_FILE means the failed loads only + if [ -s $END_RUN_FILE ]; then + echo "Found the END_RUN_FILE file: $END_RUN_FILE" + cat $END_RUN_FILE + local END_RUN_NODE= + read END_RUN_NODE < $END_RUN_FILE + + # a client load will end (i.e. fail) if it finds + # the end run file. that does not mean that that client load + # actually failed though. the first node in the END_RUN_NODE is + # the one we are really interested in. + if [ -n "$END_RUN_NODE" ]; then + var=${END_RUN_NODE}_load + echo "Client load failed on node $END_RUN_NODE" + echo + echo "client $END_RUN_NODE load debug output :" + local logfile=${TESTSUITELOG}_run_${!var}.sh-${END_RUN_NODE}.debug + do_node ${END_RUN_NODE} "set -x; [ -e $logfile ] && cat $logfile " || true + fi + rc=1 + fi + + echo $(date +'%F %H:%M:%S') Terminating clients loads ... + echo "$0" >> $END_RUN_FILE + local result=PASS + [ $rc -eq 0 ] || result=FAIL + + log " +Server failover period: $FAILOVER_PERIOD seconds +Exited after: $ELAPSED seconds +Status: $result: rc=$rc" + + # make sure the client loads die + do_nodes $NODES_TO_USE "set -x; test -f $TMP/client-load.pid && \ + { kill -s TERM \$(cat $TMP/client-load.pid) || true; }" + + # and free up the pdshes that started them, if any are still around + if [ -n "$CLIENT_LOAD_PIDS" ]; then + kill $CLIENT_LOAD_PIDS || true + sleep 5 + kill -9 $CLIENT_LOAD_PIDS || true + fi + [ $rc -eq 0 ] && zconf_mount $(hostname) $MOUNT + exit $rc +} + +trap summary_and_cleanup EXIT TERM INT + +# +# MAIN +# +log "-----============= $0 starting =============-----" + +START_TS=$(date +%s) +CURRENT_TS=$START_TS +ELAPSED=0 + +# Set SERIAL to serialize the failure through a recovery of the first failure. +SERIAL=${SERIAL:-""} +ERRORS_OK="yes" + +[ "$SERIAL" ] && ERRORS_OK="" + +FAILOVER_PERIOD=${FAILOVER_PERIOD:-$((60*5))} # 5 minutes + +# Start client loads. +start_client_loads $NODES_TO_USE +echo clients load pids: +if ! do_nodes $NODES_TO_USE "set -x; echo \$(hostname): && cat $TMP/client-load.pid"; then + if [ -e $DEBUGLOG ]; then + exec 2<&- + cat $DEBUGLOG + exit 3 + fi +fi + +# FIXME: Do we want to have an initial sleep period where the clients +# just run before introducing a failure? +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.1 +failover_pair MDS OST "test 1: failover MDS, then OST ==========" +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.2 +failover_pair MDS clients "test 2: failover MDS, then 2 clients ====" +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.3 +if [ $MDSCOUNT -gt 1 ]; then + failover_pair MDS MDS "test 3: failover MDS, then another MDS ==" + sleep $FAILOVER_PERIOD +else + skip "$0 : $MDSCOUNT < 2 MDTs, test 3 skipped" +fi + +#CMD_TEST_NUM=17.4 +if [ $OSTCOUNT -gt 1 ]; then + failover_pair OST OST "test 4: failover OST, then another OST ==" + sleep $FAILOVER_PERIOD +else + skip "$0 : $OSTCOUNT < 2 OSTs, test 4 skipped" +fi + +#CMD_TEST_NUM=17.5 +failover_pair OST clients "test 5: failover OST, then 2 clients ====" +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.6 +failover_pair OST MDS "test 6: failover OST, then MDS ==========" +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.7 +failover_pair clients MDS "test 7: failover 2 clients, then MDS ====" +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.8 +#failover_pair clients OST "test 8: failover 2 clients, then OST ====" +sleep $FAILOVER_PERIOD + +#CMD_TEST_NUM=17.9 +if [ $CLIENTCOUNT -ge 5 ]; then + failover_pair clients clients "test 9: failover 2 clients, then 2 different clients ==" + sleep $FAILOVER_PERIOD +fi +log "==== Checking the clients loads AFTER all failovers -- failure NOT OK" +if ! check_client_loads $NODES_TO_USE; then + log "Client load failed after failover. Exiting" + exit 5 +fi + +CURRENT_TS=$(date +%s) +ELAPSED=$((CURRENT_TS - START_TS)) + +log "Completed successfully in $ELAPSED seconds" + +exit 0 diff --git a/lustre/tests/recovery-mds-scale.sh b/lustre/tests/recovery-mds-scale.sh index 4d6bb7c..598620b 100644 --- a/lustre/tests/recovery-mds-scale.sh +++ b/lustre/tests/recovery-mds-scale.sh @@ -47,21 +47,12 @@ rm -rf $DIR/[df][0-9]* # -- remove hostname from clients list zconf_umount $(hostname) $MOUNT NODES_TO_USE=${NODES_TO_USE:-$CLIENTS} -NODES_TO_USE=$(exclude_item_from_list $NODES_TO_USE $(hostname)) +NODES_TO_USE=$(exclude_items_from_list $NODES_TO_USE $(hostname)) check_progs_installed $NODES_TO_USE ${CLIENT_LOADS[@]} -MDTS="" -for ((i=1; i<=$MDSCOUNT; i++)) do - MDTS="$MDTS mds$i" -done -MDTS=$(comma_list $MDTS) - -OSTS="" -for ((i=1; i<=$OSTCOUNT; i++)) do - OSTS="$OSTS ost$i" -done -OSTS=$(comma_list $OSTS) +MDTS=$(get_facets MDS) +OSTS=$(get_facets OST) ERRORS_OK="" # No application failures should occur during this test. FLAVOR=${FLAVOR:-"MDS"} diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 54f5305..c6428c7 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -436,21 +436,6 @@ test_21a() { } run_test 21a "commit on sharing" -shutdown_client() { - local client=$1 - local mnt=$2 - - if [ "$FAILURE_MODE" = HARD ]; then - $POWER_DOWN $client - while ping -w 3 -c 1 $client > /dev/null 2>&1; do - echo "waiting for node $client to fail" - sleep 1 - done - else - zconf_umount_clients $client $mnt -f - fi -} - test_21b_sub () { local mds=$1 do_node $CLIENT1 rm -f $MOUNT1/$tfile-* diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index f7906bc..5b618e0 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -475,7 +475,7 @@ test_20b() { # bug 10480 fail $SINGLEMDS # start orphan recovery df -P $DIR || df -P $DIR || true # reconnect - wait_mds_recovery_done || error "MDS recovery not done" + wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" # FIXME just because recovery is done doesn't mean we've finished # orphan cleanup. Fake it with a sleep for now... @@ -1762,8 +1762,13 @@ test_68 () #bug 13813 at_start || return 0 local ldlm_enqueue_min=$(find /sys -name ldlm_enqueue_min) [ -z "$ldlm_enqueue_min" ] && skip "missing /sys/.../ldlm_enqueue_min" && return 0 + local ldlm_enqueue_min_r=$(do_facet ost1 "find /sys -name ldlm_enqueue_min") + [ -z "$ldlm_enqueue_min_r" ] && skip "missing /sys/.../ldlm_enqueue_min in the ost1" && return 0 local ENQ_MIN=$(cat $ldlm_enqueue_min) + local ENQ_MIN_R=$(do_facet ost1 "cat $ldlm_enqueue_min_r") echo $TIMEOUT >> $ldlm_enqueue_min + do_facet ost1 "echo $TIMEOUT >> $ldlm_enqueue_min_r" + rm -f $DIR/${tfile}_[1-2] lfs setstripe $DIR/$tfile --index=0 --count=1 #define OBD_FAIL_LDLM_PAUSE_CANCEL 0x312 @@ -1774,7 +1779,9 @@ test_68 () #bug 13813 sysctl -w lustre.fail_loc=0x80000312 cp /etc/profile $DIR/${tfile}_2 || error "2nd cp failed $?" sysctl -w lustre.fail_loc=0 + echo $ENQ_MIN >> $ldlm_enqueue_min + do_facet ost1 "echo $ENQ_MIN_R >> $ldlm_enqueue_min_r" return 0 } run_test 68 "AT: verify slowing locks" diff --git a/lustre/tests/run_dbench.sh b/lustre/tests/run_dbench.sh index f82d9dd..45cfceb 100755 --- a/lustre/tests/run_dbench.sh +++ b/lustre/tests/run_dbench.sh @@ -33,7 +33,7 @@ trap signaled TERM # recovery-mds-scale uses this to signal the client loads to die echo $$ >$LOAD_PID_FILE -TESTDIR=$MOUNT/dbench-$(hostname) +TESTDIR=$MOUNT/d0.dbench-$(hostname) CONTINUE=true diff --git a/lustre/tests/run_dd.sh b/lustre/tests/run_dd.sh index 96a4950..f4f1a54 100755 --- a/lustre/tests/run_dd.sh +++ b/lustre/tests/run_dd.sh @@ -31,7 +31,7 @@ trap signaled TERM # recovery-mds-scale uses this to signal the client loads to die echo $$ >$LOAD_PID_FILE -TESTDIR=$MOUNT/dd-$(hostname) +TESTDIR=$MOUNT/d0.dd-$(hostname) CONTINUE=true while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do diff --git a/lustre/tests/run_iozone.sh b/lustre/tests/run_iozone.sh index 2b71118..2d075d7 100755 --- a/lustre/tests/run_iozone.sh +++ b/lustre/tests/run_iozone.sh @@ -31,7 +31,7 @@ trap signaled TERM # recovery-mds-scale uses this to signal the client loads to die echo $$ >$LOAD_PID_FILE -TESTDIR=$MOUNT/iozone-$(hostname) +TESTDIR=$MOUNT/d0.iozone-$(hostname) # needed to debug oom problem #echo 1 > /proc/sys/vm/vm_gfp_debug diff --git a/lustre/tests/run_tar.sh b/lustre/tests/run_tar.sh index 7502c241..5f40e68 100755 --- a/lustre/tests/run_tar.sh +++ b/lustre/tests/run_tar.sh @@ -31,7 +31,7 @@ trap signaled TERM # recovery-mds-scale uses this to signal the client loads to die echo $$ >$LOAD_PID_FILE -TESTDIR=$MOUNT/tar-$(hostname) +TESTDIR=$MOUNT/d0.tar-$(hostname) CONTINUE=true while [ ! -e "$END_RUN_FILE" ] && $CONTINUE; do diff --git a/lustre/tests/runracer b/lustre/tests/runracer index fcc26ed..feca24b 100644 --- a/lustre/tests/runracer +++ b/lustre/tests/runracer @@ -11,8 +11,15 @@ racer=`which racer.sh` [ -z "$racer" ] && echo racer is not installed && exit 1 CLIENTS=${CLIENTS:-$HOSTNAME} -RDIR=$DIR/racer -mkdir -p $RDIR +RACERDIRS=$@ +RACERDIRS=${RACERDIRS:-$DIR} +echo RACERDIRS=$RACERDIRS +for d in ${RACERDIRS}; do + RDIRS="$RDIRS $d/racer" + mkdir -p $d/racer +# lfs setstripe $d/racer -c -1 +done + DURATION=${DURATION:-120} assert_env CLIENTS @@ -31,6 +38,8 @@ do_racer_cleanup () { local pids local rc=0 + local RDIR=$1 + echo "DOING RACER CLEANUP ... " # Check if all processes are killed @@ -81,11 +90,16 @@ racer_cleanup () { echo $timeout killing RACERPID=$RACERPID kill $RACERPID || true sleep 2 # give chance racer to kill it's processes - do_racer_cleanup + local dir + for dir in $RDIRS; do + do_racer_cleanup $dir + done else echo "Racer completed before DURATION=$DURATION expired. Cleaning up..." kill $TIMERPID - do_racer_cleanup + for dir in $RDIRS; do + do_racer_cleanup $dir + done fi } @@ -104,10 +118,23 @@ trap racer_timeout ALRM timer_on $((DURATION + 5)) -do_nodes $CLIENTS "DURATION=$DURATION $racer $RDIR" & -RACERPID=$! +RACERPID="" +for rdir in $RDIRS; do + do_nodes $CLIENTS "DURATION=$DURATION $racer $rdir $NUM_RACER_THREADS" & + pid=$! + RACERPID="$RACERPID $pid" +done + echo RACERPID=$RACERPID -wait $RACERPID || RC=2 +for rpid in $RACERPID; do + wait $rpid + rc=$? + echo "rpid=$rpid rc=$rc" + if [ $rc != 0 ]; then + RC=$((RC + 1)) + fi +done + racer_cleanup echo "$0: completed $RC" exit $RC diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index e928e14..4391e0e 100644 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -176,7 +176,7 @@ resetquota() { $LFS setquota "$1" "$2" -b 0 -B 0 -i 0 -I 0 $MOUNT || error "resetquota failed" } -quota_error() { +quota_scan() { LOCAL_UG=$1 LOCAL_ID=$2 @@ -189,11 +189,20 @@ quota_error() { log "Files for group ($LOCAL_ID):" ($LFS find -group $LOCAL_ID $DIR | xargs stat 2>/dev/null) fi +} +quota_error() { + quota_scan $1 $2 shift 2 error "$*" } +quota_log() { + quota_scan $1 $2 + shift 2 + log "$*" +} + quota_show_check() { LOCAL_BF=$1 LOCAL_UG=$2 @@ -204,12 +213,12 @@ quota_show_check() { if [ "$LOCAL_BF" == "a" -o "$LOCAL_BF" == "b" ]; then USAGE="`$LFS quota -$LOCAL_UG $LOCAL_ID $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $2 }'`" - [ $USAGE -ne 0 ] && quota_error $LOCAL_UG $LOCAL_ID "System is not clean for block ($LOCAL_UG:$LOCAL_ID:$USAGE)." + [ $USAGE -ne 0 ] && quota_log $LOCAL_UG $LOCAL_ID "System is not clean for block ($LOCAL_UG:$LOCAL_ID:$USAGE)." fi if [ "$LOCAL_BF" == "a" -o "$LOCAL_BF" == "f" ]; then USAGE="`$LFS quota -$LOCAL_UG $LOCAL_ID $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $5 }'`" - [ $USAGE -ne 0 ] && quota_error $LOCAL_UG $LOCAL_ID "System is not clean for file ($LOCAL_UG:$LOCAL_ID:$USAGE)." + [ $USAGE -ne 0 ] && quota_log $LOCAL_UG $LOCAL_ID "System is not clean for file ($LOCAL_UG:$LOCAL_ID:$USAGE)." fi } @@ -791,6 +800,7 @@ test_8() { [ "$SLOW" = "no" ] && duration=" -t 120" $RUNAS bash rundbench -D $DIR/$tdir 3 $duration || quota_error a $TSTUSR "dbench failed!" + rm -rf $DIR/$tdir sync; sleep 3; sync; return 0 @@ -1925,6 +1935,51 @@ test_25() { } run_test_with_stat 25 "test whether quota usage is transfered when chown/chgrp (18081) ===========" +test_26() { + mkdir -p $DIR/$tdir + chmod 0777 $DIR/$tdir + TESTFILE="$DIR/$tdir/$tfile-0" + TESTFILE2="$DIR/$tdir/$tfile-1" + set_blk_tunesz 512 + set_blk_unitsz 1024 + + wait_delete_completed + + # every quota slave gets 20MB + b_limit=$((OSTCOUNT * 20 * 1024)) + log "limit: ${b_limit}KB" + $LFS setquota -u $TSTUSR -b 0 -B $b_limit -i 0 -I 0 $DIR + sleep 3 + quota_show_check b u $TSTUSR + + $LFS setstripe $TESTFILE -c 1 -i 0 + $LFS setstripe $TESTFILE2 -c 1 -i 0 + chown $TSTUSR.$TSTUSR $TESTFILE + chown $TSTUSR.$TSTUSR $TESTFILE2 + + #define OBD_FAIL_QUOTA_DELAY_REL 0xA03 + lustre_fail ost 0xA03 + + log " Write the first file..." + $RUNAS $DIRECTIO write $TESTFILE 0 10 $((BLK_SZ * 1024)) || quota_error u $TSTUSR "write failure, but expect success" + log " Delete the first file..." + rm -f $TESTFILE + + + wait_delete_completed + + log " Write the second file..." + $RUNAS $DIRECTIO write $TESTFILE2 0 10 $((BLK_SZ * 1024)) || quota_error u $TSTUSR "write failure, but expect success" + log " Delete the second file..." + rm -f $TESTFILE2 + + lustre_fail ost 0 + set_blk_unitsz $((128 * 1024)) + set_blk_tunesz $((128 * 1024 / 2)) + resetquota -u $TSTUSR +} +run_test_with_stat 26 "test for false quota error(bz18491) ======================================" + # turn off quota test_99() { diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index ec00947..8f51dc5 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -111,11 +111,13 @@ check_and_setup_lustre DIR=${DIR:-$MOUNT} assert_DIR -LOVNAME=`lctl get_param -n llite.*.lov.common_name | tail -n 1` -OSTCOUNT=`lctl get_param -n lov.$LOVNAME.numobd` -STRIPECOUNT=`lctl get_param -n lov.$LOVNAME.stripecount` -STRIPESIZE=`lctl get_param -n lov.$LOVNAME.stripesize` -ORIGFREE=`lctl get_param -n lov.$LOVNAME.kbytesavail` +MDT0=$($LCTL get_param -n mdc.*.mds_server_uuid | \ + awk '{gsub(/_UUID/,""); print $1}' | head -1) +LOVNAME=$($LCTL get_param -n llite.*.lov.common_name | tail -n 1) +OSTCOUNT=$($LCTL get_param -n lov.$LOVNAME.numobd) +STRIPECOUNT=$($LCTL get_param -n lov.$LOVNAME.stripecount) +STRIPESIZE=$($LCTL get_param -n lov.$LOVNAME.stripesize) +ORIGFREE=$($LCTL get_param -n lov.$LOVNAME.kbytesavail) MAXFREE=${MAXFREE:-$((200000 * $OSTCOUNT))} [ -f $DIR/d52a/foo ] && chattr -a $DIR/d52a/foo @@ -5960,32 +5962,34 @@ test_153() { } run_test 153 "test if fdatasync does not crash =======================" -err17935 () { - if [ $MDSCOUNT -gt 1 ]; then - error_ignore 17935 $* - else - error $* - fi -} - test_154() { cp /etc/hosts $DIR/$tfile - fid=`$LFS path2fid $DIR/$tfile` + fid=$($LFS path2fid $DIR/$tfile) rc=$? [ $rc -ne 0 ] && error "error: could not get fid for $DIR/$tfile." - diff $DIR/$tfile $DIR/.lustre/fid/$fid || error "open by fid failed: did not find expected data in file." + echo "open fid $fid" + diff /etc/hosts $DIR/.lustre/fid/$fid || error "open by fid failed: did not find expected data in file." echo "Opening a file by FID succeeded" } run_test 154 "Opening a file by FID" #Changelogs +err17935 () { + if [ $MDSCOUNT -gt 1 ]; then + error_ignore 17935 $* + else + error $* + fi +} test_160() { - remote_mds && skip "remote MDS" && return - lctl set_param -n mdd.*.changelog on - $LFS changelog_clear $FSNAME 0 + do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog on + USER=$(do_facet $SINGLEMDS lctl --device $MDT0 changelog_register -n) + echo "Registered as changelog user $USER" + do_facet $SINGLEMDS lctl get_param -n mdd.$MDT0.changelog_users | \ + grep -q $USER || error "User $USER not found in changelog_users" # change something mkdir -p $DIR/$tdir/pics/2008/zachy @@ -5997,29 +6001,40 @@ test_160() { rm $DIR/$tdir/pics/desktop.jpg # verify contents - $LFS changelog $FSNAME - # check target fid - fidc=$($LFS changelog $FSNAME | grep timestamp | grep "CREAT" | tail -1 | \ - awk '{print $5}') + $LFS changelog $MDT0 | tail -5 + echo "verifying target fid" + fidc=$($LFS changelog $MDT0 | grep timestamp | grep "CREAT" | \ + tail -1 | awk '{print $5}') fidf=$($LFS path2fid $DIR/$tdir/pics/zach/timestamp) [ "$fidc" == "t=$fidf" ] || \ err17935 "fid in changelog $fidc != file fid $fidf" - # check parent fid - fidc=$($LFS changelog $FSNAME | grep timestamp | grep "CREAT" | tail -1 | \ - awk '{print $6}') + echo "verifying parent fid" + fidc=$($LFS changelog $MDT0 | grep timestamp | grep "CREAT" | \ + tail -1 | awk '{print $6}') fidf=$($LFS path2fid $DIR/$tdir/pics/zach) [ "$fidc" == "p=$fidf" ] || \ err17935 "pfid in changelog $fidc != dir fid $fidf" - # verify purge - FIRST_REC=$($LFS changelog $FSNAME | head -1 | awk '{print $1}') - $LFS changelog_clear $FSNAME $(($FIRST_REC + 5)) - PURGE_REC=$($LFS changelog $FSNAME | head -1 | awk '{print $1}') - [ $PURGE_REC == $(($FIRST_REC + 6)) ] || \ - err17935 "first rec after purge should be $(($FIRST_REC + 6)); is $PURGE_REC" - # purge all - $LFS changelog_clear $FSNAME 0 - lctl set_param -n mdd.*.changelog off + echo "verifying user clear" + USERS=$(( $(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | wc -l) - 2 )) + FIRST_REC=$($LFS changelog $MDT0 | head -1 | awk '{print $1}') + $LFS changelog_clear $MDT0 $USER $(($FIRST_REC + 5)) + USER_REC=$(do_facet $SINGLEMDS lctl get_param -n \ + mdd.$MDT0.changelog_users | grep $USER | awk '{print $2}') + [ $USER_REC == $(($FIRST_REC + 5)) ] || \ + err17935 "user index should be $(($FIRST_REC + 5)); is $USER_REC" + CLEAR_REC=$($LFS changelog $MDT0 | head -1 | awk '{print $1}') + [ $CLEAR_REC == $(($FIRST_REC + 6)) -o $USERS -gt 1 ] || \ + err17935 "first index should be $(($FIRST_REC + 6)); is $PURGE_REC" + + echo "verifying user deregister" + do_facet $SINGLEMDS lctl --device $MDT0 changelog_deregister $USER + do_facet $SINGLEMDS lctl get_param -n mdd.$MDT0.changelog_users | \ + grep -q $USER && error "User $USER still found in changelog_users" + + [ $USERS -eq 1 ] && \ + do_facet $SINGLEMDS lctl set_param mdd.$MDT0.changelog off || true } run_test 160 "changelog sanity" @@ -6035,7 +6050,7 @@ test_161() { ln $DIR/$tdir/$tfile $DIR/$tdir/foo2/zachary ln $DIR/$tdir/$tfile $DIR/$tdir/foo1/luna ln $DIR/$tdir/$tfile $DIR/$tdir/foo2/thor - local FID=$($LFS path2fid $DIR/$tdir/$tfile) + local FID=$($LFS path2fid $DIR/$tdir/$tfile | tr -d '[') if [ "$($LFS fid2path ${mds1_svc} $FID | wc -l)" != "5" ]; then $LFS fid2path ${mds1_svc} $FID err17935 "bad link ea" @@ -6096,19 +6111,19 @@ test_162() { touch $DIR/$tdir/d2/x2 mkdir -p $DIR/$tdir/d2/a/b/c mkdir -p $DIR/$tdir/d2/p/q/r - fid=$($LFS path2fid $DIR/$tdir/d2/$tfile) - check_path "/$tdir/d2/$tfile" ${mds1_svc} $fid --link 0 + FID=$($LFS path2fid $DIR/$tdir/d2/$tfile | tr -d '[') + check_path "/$tdir/d2/$tfile" ${mds1_svc} $FID --link 0 ln $DIR/$tdir/d2/$tfile $DIR/$tdir/d2/p/q/r/hlink mv $DIR/$tdir/d2/$tfile $DIR/$tdir/d2/a/b/c/new_file - fid=$($LFS path2fid $DIR/$tdir/d2/a/b/c/new_file) - check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $fid --link 1 - check_path "/$tdir/d2/p/q/r/hlink" ${mds1_svc} $fid --link 0 - # check that there are 2 links, and that --rec doesnt break anything - ${LFS} fid2path ${mds1_svc} $fid --rec 20 | wc -l | grep -q 2 || \ + FID=$($LFS path2fid $DIR/$tdir/d2/a/b/c/new_file | tr -d '[') + check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $FID --link 1 + check_path "/$tdir/d2/p/q/r/hlink" ${mds1_svc} $FID --link 0 + # check that there are 2 links + ${LFS} fid2path ${mds1_svc} $FID | wc -l | grep -q 2 || \ err17935 "expected 2 links" rm $DIR/$tdir/d2/p/q/r/hlink - check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $fid --link 0 + check_path "/$tdir/d2/a/b/c/new_file" ${mds1_svc} $FID --link 0 # Doesnt work with CMD yet: 17935 return 0 } @@ -6118,27 +6133,37 @@ test_170() { $LCTL debug_daemon start $TMP/${tfile}_log_good touch $DIR/$tfile $LCTL debug_daemon stop - cat $TMP/${tfile}_log_good | sed -e "s/^...../a/g" > $TMP/${tfile}_log_bad + sed -e "s/^...../a/g" $TMP/${tfile}_log_good > $TMP/${tfile}_log_bad || + error "sed failed to read log_good" $LCTL debug_daemon start $TMP/${tfile}_log_good rm -rf $DIR/$tfile $LCTL debug_daemon stop - $LCTL df $TMP/${tfile}_log_bad 2&> $TMP/${tfile}_log_bad.out - bad_line=`tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $9}'` - good_line1=`tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $5}'` + $LCTL df $TMP/${tfile}_log_bad 2&> $TMP/${tfile}_log_bad.out || + error "lctl df log_bad failed" + + local bad_line=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $9}') + local good_line1=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $5}') $LCTL df $TMP/${tfile}_log_good 2&>$TMP/${tfile}_log_good.out - good_line2=`tail -n 1 $TMP/${tfile}_log_good.out | awk '{print $5}'` + local good_line2=$(tail -n 1 $TMP/${tfile}_log_good.out | awk '{print $5}') + [ "$bad_line" ] && [ "$good_line1" ] && [ "$good_line2" ] || + error "bad_line good_line1 good_line2 are empty" + cat $TMP/${tfile}_log_good >> $TMP/${tfile}_logs_corrupt cat $TMP/${tfile}_log_bad >> $TMP/${tfile}_logs_corrupt cat $TMP/${tfile}_log_good >> $TMP/${tfile}_logs_corrupt $LCTL df $TMP/${tfile}_logs_corrupt 2&> $TMP/${tfile}_log_bad.out - bad_line_new=`tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $9}'` - good_line_new=`tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $5}'` - expected_good=$((good_line1 + good_line2*2)) + local bad_line_new=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $9}') + local good_line_new=$(tail -n 1 $TMP/${tfile}_log_bad.out | awk '{print $5}') + + [ "$bad_line_new" ] && [ "$good_line_new" ] || + error "bad_line_new good_line_new are empty" + + local expected_good=$((good_line1 + good_line2*2)) rm -rf $TMP/${tfile}* if [ $bad_line -ne $bad_line_new ]; then @@ -6152,7 +6177,7 @@ test_170() { fi true } -run_test 170 "test lctl df to handle corruputed log =====================" +run_test 170 "test lctl df to handle corrupted log =====================" # OST pools tests POOL=${POOL:-cea1} @@ -6266,6 +6291,8 @@ test_200h() { done wait_update $HOSTNAME "lctl get_param -n lov.$FSNAME-*.pools.$POOL" ""\ || error "Pool $FSNAME.$POOL cannot be drained" + # striping on an empty pool should fall back to "pool of everything" + $SETSTRIPE -p $POOL ${POOL_FILE}/$tfile || error "failed to create file with empty pool" } run_test 200h "Remove all targets from a pool ==========================" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 6bce6fa..27aaa0e 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -628,11 +628,36 @@ fi fi" } +shudown_node_hard () { + local host=$1 + local attempts=3 + + for i in $(seq $attempts) ; do + $POWER_DOWN $host + sleep 1 + ping -w 3 -c 1 $host > /dev/null 2>&1 || return 0 + echo "waiting for $host to fail attempts=$attempts" + [ $i -lt $attempts ] || \ + { echo "$host still pingable after power down! attempts=$attempts" && return 1; } + done +} + +shutdown_client() { + local client=$1 + local mnt=${2:-$MOUNT} + local attempts=3 + + if [ "$FAILURE_MODE" = HARD ]; then + shudown_node_hard $client + else + zconf_umount_clients $client $mnt -f + fi +} + shutdown_facet() { local facet=$1 if [ "$FAILURE_MODE" = HARD ]; then - $POWER_DOWN `facet_active_host $facet` - sleep 2 + shudown_node_hard $(facet_active_host $facet) elif [ "$FAILURE_MODE" = SOFT ]; then stop $facet fi @@ -667,30 +692,30 @@ check_progs_installed () { } start_client_load() { - local list=(${1//,/ }) - local nodenum=$2 - - local numloads=${#CLIENT_LOADS[@]} - local testnum=$((nodenum % numloads)) + local client=$1 + local var=${client}_load - do_node ${list[nodenum]} "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \ + do_node $client "PATH=$PATH MOUNT=$MOUNT ERRORS_OK=$ERRORS_OK \ BREAK_ON_ERROR=$BREAK_ON_ERROR \ END_RUN_FILE=$END_RUN_FILE \ LOAD_PID_FILE=$LOAD_PID_FILE \ TESTSUITELOG=$TESTSUITELOG \ - run_${CLIENT_LOADS[testnum]}.sh" & + run_${!var}.sh" & CLIENT_LOAD_PIDS="$CLIENT_LOAD_PIDS $!" - log "Started client load: ${CLIENT_LOADS[testnum]} on ${list[nodenum]}" + log "Started client load: ${!var} on $client" - eval export ${list[nodenum]}_load=${CLIENT_LOADS[testnum]} return 0 } start_client_loads () { local clients=(${1//,/ }) + local numloads=${#CLIENT_LOADS[@]} + local testnum - for ((num=0; num < ${#clients[@]}; num++ )); do - start_client_load $1 $num + for ((nodenum=0; nodenum < ${#clients[@]}; nodenum++ )); do + testnum=$((nodenum % numloads)) + eval export ${clients[nodenum]}_load=${CLIENT_LOADS[testnum]} + start_client_load ${clients[nodenum]} done } @@ -731,13 +756,39 @@ check_client_loads () { for client in $clients; do check_client_load $client - rc=$? + rc=${PIPESTATUS[0]} if [ "$rc" != 0 ]; then log "Client load failed on node $client, rc=$rc" return $rc fi done } + +restart_client_loads () { + local clients=${1//,/ } + local expectedfail=${2:-""} + local client= + local rc=0 + + for client in $clients; do + check_client_load $client + rc=${PIPESTATUS[0]} + if [ "$rc" != 0 -a "$expectedfail"]; then + start_client_load $client + echo "Restarted client load: on $client. Checking ..." + check_client_load $client + rc=${PIPESTATUS[0]} + if [ "$rc" != 0 ]; then + log "Client load failed to restart on node $client, rc=$rc" + # failure one client load means test fail + # we do not need to check other + return $rc + fi + else + return $rc + fi + done +} # End recovery-scale functions # verify that lustre actually cleaned up properly @@ -811,32 +862,39 @@ wait_delete_completed () { } wait_for_host() { - local HOST=$1 - check_network "$HOST" 900 - while ! do_node $HOST "ls -d $LUSTRE " > /dev/null; do sleep 5; done + local host=$1 + check_network "$host" 900 + while ! do_node $host "ls -d $LUSTRE " > /dev/null; do sleep 5; done } wait_for() { local facet=$1 - local HOST=`facet_active_host $facet` - wait_for_host $HOST + local host=`facet_active_host $facet` + wait_for_host $host } -wait_mds_recovery_done () { - local timeout=`do_facet $SINGLEMDS lctl get_param -n timeout` -#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) -# as we are in process of changing obd_timeout in different ways -# let's set MAX longer than that - local MAX=$(( timeout * 4 )) +wait_recovery_complete () { + local facet=$1 + + # Use default policy if $2 is not passed by caller. + #define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 / 2) + # as we are in process of changing obd_timeout in different ways + # let's set MAX longer than that + local MAX=${2:-$(( TIMEOUT * 4 ))} + + local var_svc=${facet}_svc + local procfile="*.${!var_svc}.recovery_status" local WAIT=0 + local STATUS= + while [ $WAIT -lt $MAX ]; do - STATUS=`do_facet $SINGLEMDS "lctl get_param -n mdt.*-MDT0000.recovery_status | grep status"` - echo $STATUS | grep COMPLETE && return 0 + STATUS=$(do_facet $facet lctl get_param -n $procfile | grep status) + [[ $STATUS = "status: COMPLETE" ]] && return 0 sleep 5 WAIT=$((WAIT + 5)) - echo "Waiting $(($MAX - $WAIT)) secs for MDS recovery done" + echo "Waiting $((MAX - WAIT)) secs for $facet recovery done. $STATUS" done - echo "MDS recovery not done in $MAX sec" + echo "$facet recovery not done in $MAX sec. $STATUS" return 1 } @@ -925,7 +983,7 @@ facet_failover() { DFPID=$! echo "df pid is $DFPID" change_active $facet - TO=`facet_active_host $facet` + local TO=`facet_active_host $facet` echo "Failover $facet to $TO" wait_for $facet mount_facet $facet || error "Restart of $facet failed" @@ -1568,13 +1626,16 @@ comma_list() { echo "$*" | tr -s " " "\n" | sort -b -u | tr "\n" " " | sed 's/ \([^$]\)/,\1/g' } -# list is comma separated list -exclude_item_from_list () { +# list, excluded are the comma separated lists +exclude_items_from_list () { local list=$1 local excluded=$2 + local item list=${list//,/ } - list=$(echo " $list " | sed -re "s/\s+$excluded\s+/ /g") + for item in ${excluded//,/ }; do + list=$(echo " $list " | sed -re "s/\s+$item\s+/ /g") + done echo $(comma_list $list) } @@ -1582,6 +1643,18 @@ absolute_path() { (cd `dirname $1`; echo $PWD/`basename $1`) } +get_facets () { + local name=$(echo $1 | tr "[:upper:]" "[:lower:]") + local type=$(echo $1 | tr "[:lower:]" "[:upper:]") + + local list="" + local count=${type}COUNT + for ((i=1; i<=${!count}; i++)) do + list="$list ${name}$i" + done + echo $(comma_list $list) +} + ################################## # Adaptive Timeouts funcs diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 264ffcc..aae7494 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -178,24 +178,6 @@ command_t cmdlist[] = { "provide gdb-friendly module information\n" "usage: modules "}, - /* Device configuration commands */ - {"== device setup (these are not normally used post 1.4) ==", - jt_noop, 0, "device config"}, - {"attach", jt_lcfg_attach, 0, - "set the type, name, and uuid of the current device\n" - "usage: attach type name uuid"}, - {"detach", jt_obd_detach, 0, - "remove driver (and name and uuid) from current device\n" - "usage: detach"}, - {"setup", jt_lcfg_setup, 0, - "type specific device configuration information\n" - "usage: setup "}, - {"cleanup", jt_obd_cleanup, 0, "cleanup previously setup device\n" - "usage: cleanup [force | failover]"}, - {"dump_cfg", jt_cfg_dump_log, 0, - "print log of recorded commands for this config to kernel debug log\n" - "usage: dump_cfg config-uuid-name"}, - /* virtual block operations */ {"==== virtual block device ====", jt_noop, 0, "virtual block device"}, {"blockdev_attach", jt_blockdev_attach, 0, @@ -226,6 +208,33 @@ command_t cmdlist[] = { "list pools and pools members\n" "usage pool_list [.] | "}, + /* Changelog commands */ + {"=== Changelogs ==", jt_noop, 0, "changelog user management"}, + {"changelog_register", jt_changelog_register, 0, + "register a new persistent changelog user, returns id\n" + "usage:\tdevice \n\tchangelog_register [-n]"}, + {"changelog_deregister", jt_changelog_deregister, 0, + "deregister an existing changelog user\n" + "usage:\tdevice \n\tchangelog_deregister "}, + + /* Device configuration commands */ + {"== device setup (these are not normally used post 1.4) ==", + jt_noop, 0, "device config"}, + {"attach", jt_lcfg_attach, 0, + "set the type, name, and uuid of the current device\n" + "usage: attach type name uuid"}, + {"detach", jt_obd_detach, 0, + "remove driver (and name and uuid) from current device\n" + "usage: detach"}, + {"setup", jt_lcfg_setup, 0, + "type specific device configuration information\n" + "usage: setup "}, + {"cleanup", jt_obd_cleanup, 0, "cleanup previously setup device\n" + "usage: cleanup [force | failover]"}, + {"dump_cfg", jt_cfg_dump_log, 0, + "print log of recorded commands for this config to kernel debug log\n" + "usage: dump_cfg config-uuid-name"}, + /* Test only commands */ {"==== testing (DANGEROUS) ====", jt_noop, 0, "testing (DANGEROUS)"}, {"--threads", jt_opt_threads, 0, diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 36c1fc3..03c8628 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -219,12 +219,14 @@ command_t cmdlist[] = { "Remote user list directory contents.\n" "usage: ls [OPTION]... [FILE]..."}, {"changelog", lfs_changelog, 0, - "Show the metadata changes in a filesystem between two snapshot times." - "\nusage: changelog [--follow] [startrec [endrec]]"}, + "Show the metadata changes on an MDT." + "\nusage: changelog [--follow] [startrec [endrec]]" + "\n(note: --follow is only valid when run on MDT node)"}, {"changelog_clear", lfs_changelog_clear, 0, - "Purge old changelog records up to to free up space.\n" + "Indicate that old changelog records up to are no longer of " + "interest to consumer , allowing the system to free up space.\n" "An of 0 means all records.\n" - "usage: changelog_clear "}, + "usage: changelog_clear "}, {"fid2path", lfs_fid2path, 0, "Resolve the full path to a given FID. For a specific hardlink " "specify link number .\n" @@ -2366,7 +2368,7 @@ static int lfs_changelog(int argc, char **argv) int fd, len; char c, *mdd, *ptr = NULL; struct option long_opts[] = { - {"follow", 0, 0, 'f'}, + {"follow", no_argument, 0, 'f'}, {0, 0, 0, 0} }; char short_opts[] = "f"; @@ -2426,7 +2428,7 @@ static int lfs_changelog(int argc, char **argv) close(fd); if (len < 0) { - printf("read err %d\n", errno); + fprintf(stderr, "read err %d\n", errno); return -errno; } @@ -2437,32 +2439,37 @@ static int lfs_changelog_clear(int argc, char **argv) { long long endrec; - if (argc != 3) + if (argc != 4) return CMD_HELP; - endrec = strtoll(argv[2], NULL, 10); + endrec = strtoll(argv[3], NULL, 10); - return(llapi_changelog_clear(argv[1], endrec)); + return(llapi_changelog_clear(argv[1], argv[2], endrec)); } static int lfs_fid2path(int argc, char **argv) { struct option long_opts[] = { - {"link", 1, 0, 'l'}, - {"rec", 1, 0, 'r'}, + {"cur", no_argument, 0, 'c'}, + {"link", required_argument, 0, 'l'}, + {"rec", required_argument, 0, 'r'}, {0, 0, 0, 0} }; - char c, short_opts[] = "l:r:"; + char c, short_opts[] = "cl:r:"; char *device, *fid, *path; long long recno = -1; int linkno = -1; int lnktmp; + int printcur = 0; int rc; optind = 0; while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) != -1) { switch (c) { + case 'c': + printcur++; + break; case 'l': linkno = strtol(optarg, NULL, 10); break; @@ -2488,15 +2495,20 @@ static int lfs_fid2path(int argc, char **argv) lnktmp = (linkno >= 0) ? linkno : 0; while (1) { int oldtmp = lnktmp; - rc = llapi_fid2path(device, fid, path, PATH_MAX, recno, + long long rectmp = recno; + rc = llapi_fid2path(device, fid, path, PATH_MAX, &rectmp, &lnktmp); if (rc < 0) { fprintf(stderr, "%s error: %s\n", argv[0], strerror(errno = -rc)); break; - } else { - fprintf(stdout, "%s\n", path); } + + if (printcur) + fprintf(stdout, "%lld %s\n", recno, path); + else + fprintf(stdout, "%s\n", path); + if (linkno >= 0) /* specified linkno */ break; diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index 9ef5f2c..26f3a5e 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -303,7 +303,7 @@ int llapi_file_open_pool(const char *name, int flags, int mode, ptr = strchr(pool_name, '.'); if (ptr != NULL) { strncpy(fsname, pool_name, ptr - pool_name); - fsname[ptr - pool_name] = '\0'; + *ptr = '\0'; /* if fsname matches a filesystem skip it * if not keep the poolname as is */ if (poolpath(fsname, NULL, NULL) == 0) @@ -395,8 +395,7 @@ static int print_pool_members(char *fs, char *pool_dir, char *pool_file) } /* - * search lustre fsname from pathname - * + * Resolve lustre fsname from pathname */ static int search_fsname(char *pathname, char *fsname) { @@ -433,6 +432,25 @@ static int search_fsname(char *pathname, char *fsname) return -ENOENT; } +/* return the first file matching this pattern */ +static int first_match(char *pattern, char *buffer) +{ + glob_t glob_info; + + if (glob(pattern, GLOB_BRACE, NULL, &glob_info)) + return -ENOENT; + + if (glob_info.gl_pathc < 1) { + globfree(&glob_info); + return -ENOENT; + } + + strcpy(buffer, glob_info.gl_pathv[0]); + + globfree(&glob_info); + return 0; +} + /* * find the pool directory path under /proc * (can be also used to test if a fsname is known) @@ -440,7 +458,6 @@ static int search_fsname(char *pathname, char *fsname) static int poolpath(char *fsname, char *pathname, char *pool_pathname) { int rc = 0; - glob_t glob_info; char pattern[PATH_MAX + 1]; char buffer[PATH_MAX]; @@ -455,18 +472,13 @@ static int poolpath(char *fsname, char *pathname, char *pool_pathname) snprintf(pattern, PATH_MAX, "/proc/fs/lustre/lov/%s-*/pools", fsname); - rc = glob(pattern, GLOB_BRACE, NULL, &glob_info); + rc = first_match(pattern, buffer); if (rc) - return -ENOENT; - - if (glob_info.gl_pathc == 0) { - globfree(&glob_info); - return -ENOENT; - } + return rc; /* in fsname test mode, pool_pathname is NULL */ if (pool_pathname != NULL) - strcpy(pool_pathname, glob_info.gl_pathv[0]); + strcpy(pool_pathname, buffer); return 0; } @@ -2396,16 +2408,21 @@ static int get_mdtname(const char *name, char *format, char *buf) return sprintf(buf, format, name, suffix); } -#define CHANGELOG_FILE "/proc/fs/lustre/mdd/%s%s/changelog" -/* return a file desc to readable changelog */ +/* Return a file descriptor to a readable changelog */ int llapi_changelog_open(const char *mdtname, long long startrec) { char path[256]; int rc, fd; - if (get_mdtname(mdtname, CHANGELOG_FILE, path) <0) + /* Use either the mdd changelog (preferred) or a client mdc changelog */ + if (get_mdtname(mdtname, + "/proc/fs/lustre/md[cd]/%s%s{,-mdc-*}/changelog", + path) < 0) return -EINVAL; + rc = first_match(path, path); + if (rc) + return rc; if ((fd = open(path, O_RDONLY)) < 0) { llapi_err(LLAPI_MSG_ERROR, "error: can't open |%s|\n", path); @@ -2421,42 +2438,11 @@ int llapi_changelog_open(const char *mdtname, long long startrec) return fd; } -int llapi_changelog_clear(const char *mdtname, long long endrec) -{ - char path[256]; - char val[20]; - int fd, len; - - if (endrec < 0) { - llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, - "can't purge negative records\n"); - return -EINVAL; - } - - if (get_mdtname(mdtname, CHANGELOG_FILE, path) <0) - return -EINVAL; - - if ((fd = open(path, O_WRONLY)) < 0) { - llapi_err(LLAPI_MSG_ERROR, "error: can't open |%s|\n", path); - return errno; - } - - snprintf(val, sizeof(val), "%llu", endrec); - len = write(fd, val, strlen(val)); - close(fd); - if (len != strlen(val)) { - llapi_err(LLAPI_MSG_ERROR, "purge err\n"); - return errno; - } - - return 0; -} - static int dev_ioctl(struct obd_ioctl_data *data, int dev, int cmd) { - int rc; static char rawbuf[8192]; static char *buf = rawbuf; + int rc; data->ioc_dev = dev; memset(buf, 0, sizeof(rawbuf)); @@ -2482,7 +2468,6 @@ static int dev_ioctl(struct obd_ioctl_data *data, int dev, int cmd) return rc; } -/* should we just grep it from proc? */ static int dev_name2dev(char *name) { struct obd_ioctl_data data; @@ -2491,8 +2476,8 @@ static int dev_name2dev(char *name) memset(&data, 0, sizeof(data)); data.ioc_inllen1 = strlen(name) + 1; data.ioc_inlbuf1 = name; - rc = dev_ioctl(&data, -1, OBD_IOC_NAME2DEV); + rc = dev_ioctl(&data, -1, OBD_IOC_NAME2DEV); if (rc < 0) { llapi_err(LLAPI_MSG_ERROR, "Device %s not found %d\n", name,rc); return rc; @@ -2500,11 +2485,75 @@ static int dev_name2dev(char *name) return data.ioc_dev; } +/* We need the full mdc name, and we shouldn't just grep from proc... */ +static void do_get_mdcname(char *obd_type_name, char *obd_name, + char *obd_uuid, void *name) +{ + if (strncmp(obd_name, (char *)name, strlen((char *)name)) == 0) + strcpy((char *)name, obd_name); +} + +static int get_mdcdev(const char *mdtname) +{ + char name[MAX_OBD_NAME]; + char *type[] = { "mdc" }; + int rc; + + strcpy(name, mdtname); + rc = llapi_target_iterate(1, type, (void *)name, do_get_mdcname); + rc = rc < 0 ? : -rc; + if (rc < 0) { + llapi_err(LLAPI_MSG_ERROR, "Device %s not found %d\n", name,rc); + return rc; + } + return dev_name2dev(name); +} + +int llapi_changelog_clear(const char *mdtname, const char *idstr, + long long endrec) +{ + struct obd_ioctl_data data; + int dev, id, rc; + + if (endrec < 0) { + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "can't purge negative records\n"); + return -EINVAL; + } + + id = strtol(idstr + strlen(CHANGELOG_USER_PREFIX), NULL, 10); + if ((id == 0) || (strncmp(idstr, CHANGELOG_USER_PREFIX, + strlen(CHANGELOG_USER_PREFIX)) != 0)) { + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "expecting id of the form '"CHANGELOG_USER_PREFIX + "'; got '%s'\n", idstr); + return -EINVAL; + } + + dev = get_mdcdev(mdtname); + if (dev < 0) { + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "can't find mdc for '%s'\n", mdtname); + return dev; + } + + memset(&data, 0, sizeof(data)); + data.ioc_u32_1 = id; + data.ioc_u64_1 = endrec; + rc = dev_ioctl(&data, dev, OBD_IOC_CHANGELOG_CLEAR); + if (rc) + llapi_err(LLAPI_MSG_ERROR | LLAPI_MSG_NO_ERRNO, + "ioctl err %d", rc); + return rc; +} + + int llapi_fid2path(char *device, char *fidstr, char *buf, int buflen, - __u64 recno, int *linkno) + long long *recno, int *linkno) { struct lu_fid fid; struct obd_ioctl_data data; + char buffer[256]; int dev, rc; while (*fidstr == '[') @@ -2519,14 +2568,18 @@ int llapi_fid2path(char *device, char *fidstr, char *buf, int buflen, return -EINVAL; } - dev = dev_name2dev(device); + rc = get_mdtname(device, "%s%s", buffer); + if (rc < 0) + return rc; + + dev = dev_name2dev(buffer); if (dev < 0) return dev; memset(&data, 0, sizeof(data)); data.ioc_inlbuf1 = (char *)&fid; data.ioc_inllen1 = sizeof(fid); - data.ioc_inlbuf2 = (char *)&recno; + data.ioc_inlbuf2 = (char *)recno; data.ioc_inllen2 = sizeof(__u64); data.ioc_inlbuf3 = (char *)linkno; data.ioc_inllen3 = sizeof(int); @@ -2556,3 +2609,4 @@ int llapi_path2fid(const char *path, unsigned long long *seq, return rc; } + diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index a60d2a1..8d1b671 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -3156,5 +3156,111 @@ void llapi_ping_target(char *obd_type, char *obd_name, } else { printf("%s active.\n", obd_name); } +} + +int jt_changelog_register(int argc, char **argv) +{ + char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; + struct obd_ioctl_data data; + char devname[30]; + int rc; + + if (argc > 2) + return CMD_HELP; + else if (argc == 2 && strcmp(argv[1], "-n") != 0) + return CMD_HELP; + if (cur_device < 0) + return CMD_HELP; + memset(&data, 0x00, sizeof(data)); + data.ioc_dev = cur_device; + memset(buf, 0, sizeof(rawbuf)); + rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf)); + if (rc) { + fprintf(stderr, "error: %s: invalid ioctl\n", + jt_cmdname(argv[0])); + return rc; + } + + rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_CHANGELOG_REG, buf); + if (rc < 0) { + fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]), + strerror(rc = errno)); + return rc; + } + obd_ioctl_unpack(&data, buf, sizeof(rawbuf)); + + if (data.ioc_u32_1 == 0) { + fprintf(stderr, "received invalid userid!\n"); + return EPROTO; + } + + if (lcfg_get_devname() != NULL) + strcpy(devname, lcfg_get_devname()); + else + sprintf(devname, "dev %d", cur_device); + + if (argc == 2) + /* -n means bare name */ + printf(CHANGELOG_USER_PREFIX"%u\n", data.ioc_u32_1); + else + printf("%s: Registered changelog userid '"CHANGELOG_USER_PREFIX + "%u'\n", devname, data.ioc_u32_1); + return 0; } + +int jt_changelog_deregister(int argc, char **argv) +{ + char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; + struct obd_ioctl_data data; + char devname[30]; + int id, rc; + + if (argc != 2 || cur_device < 0) + return CMD_HELP; + + id = strtol(argv[1] + strlen(CHANGELOG_USER_PREFIX), NULL, 10); + if ((id == 0) || (strncmp(argv[1], CHANGELOG_USER_PREFIX, + strlen(CHANGELOG_USER_PREFIX)) != 0)) { + fprintf(stderr, "expecting id of the form '" + CHANGELOG_USER_PREFIX"'; got '%s'\n", argv[1]); + return CMD_HELP; + } + + memset(&data, 0x00, sizeof(data)); + data.ioc_dev = cur_device; + data.ioc_u32_1 = id; + memset(buf, 0, sizeof(rawbuf)); + rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf)); + if (rc) { + fprintf(stderr, "error: %s: invalid ioctl\n", + jt_cmdname(argv[0])); + return rc; + } + + rc = l2_ioctl(OBD_DEV_ID, OBD_IOC_CHANGELOG_DEREG, buf); + if (rc < 0) { + fprintf(stderr, "error: %s: %s\n", jt_cmdname(argv[0]), + strerror(rc = errno)); + return rc; + } + obd_ioctl_unpack(&data, buf, sizeof(rawbuf)); + + if (data.ioc_u32_1 != id) { + fprintf(stderr, "No changelog user '%s'. Blocking user" + " is '"CHANGELOG_USER_PREFIX"%d'.\n", argv[1], + data.ioc_u32_1); + return ENOENT; + } + + if (lcfg_get_devname() != NULL) + strcpy(devname, lcfg_get_devname()); + else + sprintf(devname, "dev %d", cur_device); + + printf("%s: Deregistered changelog user '"CHANGELOG_USER_PREFIX"%d'\n", + devname, data.ioc_u32_1); + return 0; +} + + diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h index eeb1bb8..dccd213 100644 --- a/lustre/utils/obdctl.h +++ b/lustre/utils/obdctl.h @@ -120,5 +120,7 @@ int jt_blockdev_detach(int argc, char **argv); int jt_blockdev_info(int argc, char **argv); int jt_pool_cmd(int argc, char **argv); +int jt_changelog_register(int argc, char **argv); +int jt_changelog_deregister(int argc, char **argv); #endif diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 490c6a9..ed189d9 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -1342,6 +1342,10 @@ main(int argc, char **argv) CHECK_VALUE(MDS_SET_INFO); CHECK_VALUE(MDS_QUOTACHECK); CHECK_VALUE(MDS_QUOTACTL); + CHECK_VALUE(MDS_GETXATTR); + CHECK_VALUE(MDS_SETXATTR); + CHECK_VALUE(MDS_WRITEPAGE); + CHECK_VALUE(MDS_IS_SUBDIR); CHECK_VALUE(MDS_LAST_OPC); CHECK_VALUE(REINT_SETATTR); diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 1a1fa03..b2c591b 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -62,8 +62,8 @@ void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' * (make -C lustre/utils newwiretest) - * running on Linux localhost.localdomain 2.6.18-prep #3 SMP Sun Nov 23 08:04:44 EST 2008 i68 - * with gcc version 4.1.1 20061011 (Red Hat 4.1.1-30) */ + * running on Linux cfs21 2.6.18-92.el5xen #1 SMP Tue Jun 10 19:55:54 EDT 2008 i686 i686 i386 + * with gcc version 4.1.2 20071124 (Red Hat 4.1.2-42) */ /* Constants... */ @@ -163,6 +163,14 @@ void lustre_assert_wire_constants(void) (long long)MDS_QUOTACHECK); LASSERTF(MDS_QUOTACTL == 48, " found %lld\n", (long long)MDS_QUOTACTL); + LASSERTF(MDS_GETXATTR == 49, " found %lld\n", + (long long)MDS_GETXATTR); + LASSERTF(MDS_SETXATTR == 50, " found %lld\n", + (long long)MDS_SETXATTR); + LASSERTF(MDS_WRITEPAGE == 51, " found %lld\n", + (long long)MDS_WRITEPAGE); + LASSERTF(MDS_IS_SUBDIR == 52, " found %lld\n", + (long long)MDS_IS_SUBDIR); LASSERTF(MDS_LAST_OPC == 53, " found %lld\n", (long long)MDS_LAST_OPC); LASSERTF(REINT_SETATTR == 1, " found %lld\n", @@ -470,6 +478,7 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_CONNECT_AT == 0x01000000ULL); CLASSERT(OBD_CONNECT_CANCELSET == 0x400000ULL); CLASSERT(OBD_CONNECT_LRU_RESIZE == 0x02000000ULL); + CLASSERT(OBD_CONNECT_SKIP_ORPHAN == 0x400000000ULL); /* Checks for struct obdo */ LASSERTF((int)sizeof(struct obdo) == 208, " found %lld\n", -- 1.8.3.1