From 0e80dc9a2e053f9ee0c9ffe488463fee1c2b1ba4 Mon Sep 17 00:00:00 2001 From: johann Date: Thu, 17 Sep 2009 21:01:06 +0000 Subject: [PATCH] Branch b_release_1_8_1 b=20518 i=rread i=tappro Make sure all exports have been properly destroyed by the zombie thread processed before stopping the target. --- lustre/ChangeLog | 20 ++++++++++++++------ lustre/include/obd_class.h | 1 + lustre/mds/mds_fs.c | 6 ++++++ lustre/obdclass/genops.c | 31 +++++++++++++++++++++++++++++++ lustre/obdfilter/filter.c | 5 +++++ 5 files changed, 57 insertions(+), 6 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index adb076d..c39153e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -15,11 +15,17 @@ TBD Sun Microsystems, Inc. * ext4 support for RHEL5 is experimental and thus should not be used in production. +Severity : major +Bugzilla : 20560 +Description: File checksum failures with OST read cache on +Details : Disable page poisoning when the bulk transfer has to be aborted + because the client got evicted. + Severity : normal Bugzilla : 19557 Description: Don't allow make backward step on assiging osc next id. Details : race between allocation next id and ll_sync thread can be cause - of set wrong osc next id and can be kill valid ost objects. + of set wrong osc next id and can be kill valid ost objects. Severity : enhancement Bugzilla : 20400 @@ -29,11 +35,6 @@ Severity : enhancement Bugzilla : 20758 Description: Update kernel to SLES10 SP2 2.6.16.60-0.42.4. -Severity : major -Bugzilla : 20560 -Description: File checksum failures with OST read cache on -Details : Clear page cache's uptodate bit when its contents are cleared. - Severity : normal Bugzilla : 20533 Description: Changes in raid5-large-io-rhel5.patch to calculate sectors properly @@ -111,6 +112,13 @@ Details : if a client doesn't respond to a blocking callback within the adaptive ldlm enqueue timeout, don't adjust the adaptive estimate when the lock is next granted. +Severity : normal +Bugzilla : 20518 +Description: OST is being unmounted w/o all writes to last_rcvd landing on disk. + Affects recovery negatively. +Details : Make sure all exports have been properly destroyed by the zombie + thread processed before stopping the target. + ------------------------------------------------------------------------------- diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 8088f0d..35fdeb8 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -102,6 +102,7 @@ int obd_export_evict_by_uuid(struct obd_device *obd, char *uuid); int obd_zombie_impexp_init(void); void obd_zombie_impexp_stop(void); void obd_zombie_impexp_cull(void); +void obd_zombie_barrier(void); /* obd_config.c */ int class_process_config(struct lustre_cfg *lcfg); diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index a48a69c..941f7b5 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -884,6 +884,12 @@ int mds_fs_cleanup(struct obd_device *obd) "will be preserved.\n", obd->obd_name); class_disconnect_exports(obd); /* cleans up client info too */ + + /* some exports may still be in the zombie queue, so we make sure that + * all the exports have been processed, otherwise the last_rcvd slot + * may not be updated on time */ + obd_zombie_barrier(); + mds_server_free_data(mds); push_ctxt(saved, &obd->obd_lvfs_ctxt, NULL); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 9f3ae9f..b328b95 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -1490,6 +1490,31 @@ static void obd_zombie_impexp_notify(void) cfs_waitq_signal(&obd_zombie_waitq); } +/** + * check whether obd_zombie is idle + */ +static int obd_zombie_is_idle(void) +{ + int rc; + + LASSERT(!test_bit(OBD_ZOMBIE_STOP, &obd_zombie_flags)); + spin_lock(&obd_zombie_impexp_lock); + rc = list_empty(&obd_zombie_imports) && + list_empty(&obd_zombie_exports); + spin_unlock(&obd_zombie_impexp_lock); + return rc; +} + +/** + * wait when obd_zombie import/export queues become empty + */ +void obd_zombie_barrier(void) +{ + struct l_wait_info lwi = { 0 }; + l_wait_event(obd_zombie_waitq, obd_zombie_is_idle(), &lwi); +} +EXPORT_SYMBOL(obd_zombie_barrier); + #ifdef __KERNEL__ static int obd_zombie_impexp_thread(void *unused) @@ -1509,6 +1534,12 @@ static int obd_zombie_impexp_thread(void *unused) l_wait_event(obd_zombie_waitq, !obd_zombi_impexp_check(NULL), &lwi); obd_zombie_impexp_cull(); + + /* + * Notify obd_zombie_barrier callers that queues + * may be empty. + */ + cfs_waitq_signal(&obd_zombie_waitq); } complete(&obd_zombie_stop); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index ee6007f..3cb1d67 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -2239,6 +2239,11 @@ static int filter_cleanup(struct obd_device *obd) } } + /* some exports may still be in the zombie queue, so we make sure that + * all the exports have been processed, otherwise the last_rcvd slot + * may not be updated on time */ + obd_zombie_barrier(); + remove_proc_entry("clear", obd->obd_proc_exports_entry); lprocfs_free_per_client_stats(obd); lprocfs_free_obd_stats(obd); -- 1.8.3.1