From 0f8db7e06abbc341e1ecc6ae164fca7b4a040c4a Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Fri, 2 Jul 2021 13:24:48 -0400 Subject: [PATCH] LU-14805 llite: No locked parallel DIO If we are doing locked DIO, the OSC & LDLM locks are released at the end of cl_io_loop, ie, before we wait for parallel DIO at the llite layer. This is problematic because the locks are released before i/o done using them is complete; this can lead to data inconsistencies. (And at least one LBUG, see LU-14805.) The easiest solution for now is only do parallel DIO when working lockless (which is the default; DIO only switches to locked to manage conflicts with buffered i/o). This problem & fix apply to AIO as well as parallel DIO. Signed-off-by: Patrick Farrell Change-Id: If98a0551d6dde54220b406b26e978e284a6b1ebf Reviewed-on: https://review.whamcloud.com/44131 Tested-by: jenkins Reviewed-by: Wang Shilong Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/llite/rw26.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 36d9c56..d4122613 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -518,10 +518,19 @@ out: else vio->u.readwrite.vui_read += tot_bytes; - /* If async dio submission is not allowed, we must wait here. */ - if (is_sync_kiocb(iocb) && !io->ci_parallel_dio) { + /* We cannot do async submission - for AIO or regular DIO - unless + * lockless because it causes us to release the lock early. + * + * There are also several circumstances in which we must disable + * parallel DIO, so we check if it is enabled. + * + * The check for "is_sync_kiocb" excludes AIO, which does not need to + * be disabled in these situations. + */ + if (io->ci_dio_lock || (is_sync_kiocb(iocb) && !io->ci_parallel_dio)) { ssize_t rc2; + /* Wait here rather than doing async submission */ rc2 = cl_sync_io_wait_recycle(env, &aio->cda_sync, 0, 0); if (result == 0 && rc2) result = rc2; -- 1.8.3.1