From: Liang Zhen Date: Sun, 7 Oct 2012 17:05:44 +0000 (+0800) Subject: LU-2101 clio: cl_sync_io_wait() need a barrier X-Git-Tag: 2.3.61~76 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=64f99d8993d8f4087de2adf59e9a834823f92898;hp=fac772adaa35947aeb3feb8c5c55f6c563b28de5 LU-2101 clio: cl_sync_io_wait() need a barrier cl_sync_io_note() has a small window between cfs_atomic_dec_and_test and cfs_waitq_broadcast, another thread might have destroyed anchor between this window, then cl_sync_io_note() is calling wakeup on a poisoned waitq and crash. Signed-off-by: Liang Zhen Change-Id: I04bfc738c8c5bec3a1e8b51131fae4199ef5ec9a Reviewed-on: http://review.whamcloud.com/5199 Reviewed-by: Johann Lombardi Tested-by: Hudson Reviewed-by: Jinshan Xiong Tested-by: Maloo --- diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index bd254e8..087e251 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -3193,12 +3193,14 @@ void cl_req_completion(const struct lu_env *env, struct cl_req *req, int ioret); * anchor and wakes up waiting thread when transfer is complete. */ struct cl_sync_io { - /** number of pages yet to be transferred. */ - cfs_atomic_t csi_sync_nr; - /** completion to be signaled when transfer is complete. */ - cfs_waitq_t csi_waitq; - /** error code. */ - int csi_sync_rc; + /** number of pages yet to be transferred. */ + cfs_atomic_t csi_sync_nr; + /** error code. */ + int csi_sync_rc; + /** barrier of destroy this structure */ + cfs_atomic_t csi_barrier; + /** completion to be signaled when transfer is complete. */ + cfs_waitq_t csi_waitq; }; void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages); diff --git a/lustre/obdclass/cl_io.c b/lustre/obdclass/cl_io.c index aed5f5e..ecfe76e 100644 --- a/lustre/obdclass/cl_io.c +++ b/lustre/obdclass/cl_io.c @@ -1683,11 +1683,12 @@ EXPORT_SYMBOL(cl_req_attr_set); */ void cl_sync_io_init(struct cl_sync_io *anchor, int nrpages) { - ENTRY; - cfs_waitq_init(&anchor->csi_waitq); - cfs_atomic_set(&anchor->csi_sync_nr, nrpages); - anchor->csi_sync_rc = 0; - EXIT; + ENTRY; + cfs_waitq_init(&anchor->csi_waitq); + cfs_atomic_set(&anchor->csi_sync_nr, nrpages); + cfs_atomic_set(&anchor->csi_barrier, nrpages > 0); + anchor->csi_sync_rc = 0; + EXIT; } EXPORT_SYMBOL(cl_sync_io_init); @@ -1725,8 +1726,16 @@ int cl_sync_io_wait(const struct lu_env *env, struct cl_io *io, } LASSERT(cfs_atomic_read(&anchor->csi_sync_nr) == 0); cl_page_list_assume(env, io, queue); - POISON(anchor, 0x5a, sizeof *anchor); - RETURN(rc); + + /* wait until cl_sync_io_note() has done wakeup */ + while (unlikely(cfs_atomic_read(&anchor->csi_barrier) != 0)) { +#ifdef __KERNEL__ + cpu_relax(); +#endif + } + + POISON(anchor, 0x5a, sizeof *anchor); + RETURN(rc); } EXPORT_SYMBOL(cl_sync_io_wait); @@ -1744,8 +1753,11 @@ void cl_sync_io_note(struct cl_sync_io *anchor, int ioret) * IO. */ LASSERT(cfs_atomic_read(&anchor->csi_sync_nr) > 0); - if (cfs_atomic_dec_and_test(&anchor->csi_sync_nr)) - cfs_waitq_broadcast(&anchor->csi_waitq); - EXIT; + if (cfs_atomic_dec_and_test(&anchor->csi_sync_nr)) { + cfs_waitq_broadcast(&anchor->csi_waitq); + /* it's safe to nuke or reuse anchor now */ + cfs_atomic_set(&anchor->csi_barrier, 0); + } + EXIT; } EXPORT_SYMBOL(cl_sync_io_note);