From bc34babc1765f6f99220256e96ce5dc5bb390676 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Mon, 14 Jul 2014 23:06:09 -0400 Subject: [PATCH] LU-5331 obdclass: serialize lu_site purge Umount process relies on lu_site_purge(-1) to purge all objects before umount, however, if there happen to have a cache shrinker which calls lu_site_purge(nr) in parallel, some objects may still being freed by cache shrinker even after the lu_site_purge(-1) called by umount done. This can be simply fixed by serializing purge threads, since it doesn't make any sense to have them in parallel. Signed-off-by: Niu Yawei Change-Id: Iec5d7fd1d5714ad0e4bd12a50a837858a60b2b1c Reviewed-on: http://review.whamcloud.com/11099 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/lu_object.h | 5 ++++- lustre/obdclass/lu_object.c | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index d3b234d..96b65869 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -626,7 +626,10 @@ struct lu_site { **/ cfs_list_t ls_ld_linkage; spinlock_t ls_ld_lock; - + /** + * Lock to serialize site purge. + */ + struct mutex ls_purge_mutex; /** * lu_site stats */ diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 618c5bc..86ccdc4 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -367,6 +367,11 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) start = s->ls_purge_start; bnr = (nr == ~0) ? -1 : nr / CFS_HASH_NBKT(s->ls_obj_hash) + 1; again: + /* + * It doesn't make any sense to make purge threads parallel, that can + * only bring troubles to us. See LU-5331. + */ + mutex_lock(&s->ls_purge_mutex); did_sth = 0; cfs_hash_for_each_bucket(s->ls_obj_hash, &bd, i) { if (i < start) @@ -411,6 +416,7 @@ int lu_site_purge(const struct lu_env *env, struct lu_site *s, int nr) if (nr == 0) break; } + mutex_unlock(&s->ls_purge_mutex); if (nr != 0 && did_sth && start != 0) { start = 0; /* restart from the first bucket */ @@ -1095,6 +1101,7 @@ int lu_site_init(struct lu_site *s, struct lu_device *top) ENTRY; memset(s, 0, sizeof *s); + mutex_init(&s->ls_purge_mutex); bits = lu_htable_order(top); snprintf(name, 16, "lu_site_%s", top->ld_type->ldt_name); for (bits = min(max(LU_SITE_BITS_MIN, bits), LU_SITE_BITS_MAX); -- 1.8.3.1