From 0dfd5314e044da1dc261acac66900e5c6f752771 Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 13 Feb 2007 19:31:12 +0000 Subject: [PATCH] Branch HEAD Description: RPCs being resent when they shouldn't be Details : Some RPCs that should not be resent are being resent. This can cause inconsistencies in the RPC state machine. Do not resend such requests. b=11203 r=nathan r=green Fix 2.4 kernel compile error introduced by 10866 landing (missing typecheck). --- lustre/ChangeLog | 45 +++++++++++++++++++++++++--------- lustre/include/linux/lustre_compat25.h | 13 ++++++++++ lustre/ldlm/ldlm_resource.c | 7 +++--- lustre/llite/super.c | 4 +-- lustre/ptlrpc/recover.c | 3 ++- 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 19f16fa..62beebf 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -5,7 +5,7 @@ tbd Cluster File Systems, Inc. special upgrade step is needed. Please read the user documentation before upgrading any part of a live system. * WIRE PROTOCOL CHANGE from previous 1.6 beta versions. This - version will not interoperate with 1.6 betas before beta7 (1.5.97). + version will not interoperate with 1.6 betas before beta7 (1.5.97). * WARNING: Lustre configuration and startup changes are required with this release. See https://mail.clusterfs.com/wikis/lustre/MountConf for details. @@ -176,14 +176,33 @@ Details : Check the req->rq_arrival time after the call to Severity : major Frequency : when an incorrect nid is specified during startup -Bugzilla : 10743 +Bugzilla : 10734 Description: ptlrpc connect to non-existant node causes kernel crash Details : LNET can't be re-entered from an event callback, which happened when we expire a message after the export has been cleaned up. Instead, hand the zombie cleanup off to another thread. +Severity : enhancement +Bugzilla : 10902 +Description: plain/inodebits lock performance improvement +Details : Grouping plain/inodebits in granted list by their request modes + and bits policy, thus improving the performance of search through + the granted list. + +------------------------------------------------------------------------------ + +TBD Cluster File Systems, Inc. + * version 1.4.10 + * Support for kernels: + 2.6.9-42.0.3EL (RHEL 4) + 2.6.5-7.276 (SLES 9) + 2.4.21-47.0.1.EL (RHEL 3) + 2.6.12.6 vanilla (kernel.org) + * Recommended e2fsprogs version: 1.39.cfs2-0 + Severity : normal +Frequency : always Bugzilla : 10214 Description: make O_SYNC working on 2.6 kernels Details : 2.6 kernels use different method for mark pages for write, @@ -197,25 +216,27 @@ Details : Put inode details into lock acquired in ll_intent_file_open. Use mdc_intent_lock in ll_intent_open to properly detect all kind of errors unhandled by mdc_enqueue -Severity : enhancement -Bugzilla : 10902 -Description: plain/inodebits lock performance improvement -Details : Grouping plain/inodebits in granted list by their request modes - and bits policy, thus improving the performance of search through - the granted list. - Severity : major Frequency : rare Bugzilla : 10866 Description: proc file read during shutdown sometimes raced obd removal, causing node crash -Details : Add lock to prevent obd access after proc file removal +Details : Add lock to prevent obd access after proc file removal. Severity : normal +Frequency : Only for files larger than 4GB on 32-bit clients. Bugzilla : 11237 Description: improperly doing page alignment of locks Details : Modify lustre core code to use CFS_PAGE_* defines instead of - PAGE_*. Make CFS_PAGE_MASK 64bit long. + PAGE_*. Make CFS_PAGE_MASK 64bit long. + +Severity : normal +Frequency : rarely +Bugzilla : 11203 +Description: RPCs being resent when they shouldn't be +Details : Some RPCs that should not be resent are being resent. This + can cause inconsistencies in the RPC state machine. Do not + resend such requests. ------------------------------------------------------------------------------ @@ -985,7 +1006,7 @@ Severity : minor Frequency : always Bugzilla : 10611 Description: Inability to activate failout mode -Details : lconf script incorrectly assumed that in pythong string's numeric +Details : lconf script incorrectly assumed that in python string's numeric value is used in comparisons. Severity : minor diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index 8139856..b0f2cb94 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -397,6 +397,19 @@ static inline void file_accessed(struct file *file) touch_atime(file->f_vfsmnt, file->f_dentry); } +#ifndef typecheck +/* + * Check at compile time that something is of a particular type. + * Always evaluates to 1 so you may use it easily in comparisons. + */ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ +}) +#endif + #endif /* end of 2.4 compat macros */ #ifdef HAVE_PAGE_LIST diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 32c2334..5d7a8fa7 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -664,10 +664,11 @@ int ldlm_resource_putref(struct ldlm_resource *res) CDEBUG(D_INFO, "putref res: %p count: %d\n", res, atomic_read(&res->lr_refcount) - 1); - LASSERT(atomic_read(&res->lr_refcount) > 0); - LASSERT(atomic_read(&res->lr_refcount) < LI_POISON); + LASSERTF(atomic_read(&res->lr_refcount) > 0, "%d", + atomic_read(&res->lr_refcount)); + LASSERTF(atomic_read(&res->lr_refcount) < LI_POISON, "%d", + atomic_read(&res->lr_refcount)); - LASSERT(atomic_read(&res->lr_refcount) >= 0); if (atomic_dec_and_lock(&res->lr_refcount, &ns->ns_hash_lock)) { __ldlm_resource_putref_final(res); spin_unlock(&ns->ns_hash_lock); diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 66d186d..4543b52 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -60,7 +60,7 @@ void lustre_register_client_process_config(int (*cpc)(struct lustre_cfg *lcfg)); static int __init init_lustre_lite(void) { - int i, rc, seed[2]; + int i, seed[2]; struct timeval tv; lnet_process_id_t lnet_id; @@ -84,7 +84,7 @@ static int __init init_lustre_lite(void) /* Nodes with small feet have little entropy * the NID for this node gives the most entropy in the low bits */ - for (i=0; ; i++) { + for (i = 0; ; i++) { if (LNetGetId(i, &lnet_id) == -ENOENT) { break; } diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 114e17a..2d57808 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -154,7 +154,8 @@ int ptlrpc_resend(struct obd_import *imp) LASSERTF((long)req > CFS_PAGE_SIZE && req != LP_POISON, "req %p bad\n", req); LASSERTF(req->rq_type != LI_POISON, "req %p freed\n", req); - ptlrpc_resend_req(req); + if (!req->rq_no_resend) + ptlrpc_resend_req(req); } RETURN(0); -- 1.8.3.1