From 64f99d8993d8f4087de2adf59e9a834823f92898 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Mon, 8 Oct 2012 01:05:44 +0800 Subject: [PATCH] LU-2101 clio: cl_sync_io_wait() need a barrier cl_sync_io_note() has a small window between cfs_atomic_dec_and_test and cfs_waitq_broadcast, another thread might have destroyed anchor between this window, then cl_sync_io_note() is calling wakeup on a poisoned waitq and crash. Signed-off-by: Liang Zhen Change-Id: I04bfc738c8c5bec3a1e8b51131fae4199ef5ec9a Reviewed-on: http://review.whamcloud.com/5199 Reviewed-by: Johann Lombardi Tested-by: Hudson Reviewed-by: Jinshan Xiong Tested-by: Maloo --- lustre/include/cl_object.h | 14 ++++++++------ lustre/obdclass/cl_io.c | 32 ++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index bd254e8..087e251 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -3193,12 +3193,14 @@ void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret); * anchor and wakes up waiting thread when transfer is complete. */ struct cl_sync_io { - /** number of pages yet to be transferred. */ - cfs_atomic_t csi_sync_nr; - /** completion to be signaled when transfer is complete. */ - cfs_waitq_t csi_waitq; - /** error code. */ - int csi_sync_rc; + /** number of pages yet to be transferred. */ + cfs_atomic_t csi_sync_nr; + /** error code. */ + int csi_sync_rc; + /** barrier of destroy this structure */ + cfs_atomic_t csi_barrier; + /** completion to be signaled when transfer is complete. */ + cfs_waitq_t csi_waitq; }; void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages); diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c index aed5f5e..ecfe76e 100644 --- a/lustre/obdclass/cl_io.c +++ b/lustre/obdclass/cl_io.c @@ -1683,11 +1683,12 @@ EXPORT_SYMBOL(cl_req_attr_set); */ void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages) { - ENTRY; - cfs_waitq_init(&anchor->csi_waitq); - cfs_atomic_set(&anchor->csi_sync_nr, nrpages); - anchor->csi_sync_rc = 0; - EXIT; + ENTRY; + cfs_waitq_init(&anchor->csi_waitq); + cfs_atomic_set(&anchor->csi_sync_nr, nrpages); + cfs_atomic_set(&anchor->csi_barrier, nrpages > 0); + anchor->csi_sync_rc = 0; + EXIT; } EXPORT_SYMBOL(cl_sync_io_init); @@ -1725,8 +1726,16 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, } LASSERT(cfs_atomic_read(&anchor->csi_sync_nr) == 0); cl_page_list_assume(env, io, queue); - POISON(anchor, 0x5a, sizeof *anchor); - RETURN(rc); + + /* wait until cl_sync_io_note() has done wakeup */ + while (unlikely(cfs_atomic_read(&anchor->csi_barrier) != 0)) { +#ifdef __KERNEL__ + cpu_relax(); +#endif + } + + POISON(anchor, 0x5a, sizeof *anchor); + RETURN(rc); } EXPORT_SYMBOL(cl_sync_io_wait); @@ -1744,8 +1753,11 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret) * IO. */ LASSERT(cfs_atomic_read(&anchor->csi_sync_nr) > 0); - if (cfs_atomic_dec_and_test(&anchor->csi_sync_nr)) - cfs_waitq_broadcast(&anchor->csi_waitq); - EXIT; + if (cfs_atomic_dec_and_test(&anchor->csi_sync_nr)) { + cfs_waitq_broadcast(&anchor->csi_waitq); + /* it's safe to nuke or reuse anchor now */ + cfs_atomic_set(&anchor->csi_barrier, 0); + } + EXIT; } EXPORT_SYMBOL(cl_sync_io_note); -- 1.8.3.1