From cda70f7911cadb315c0ebe29938ac794881d57e2 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Sun, 21 May 2023 17:37:57 -0400 Subject: [PATCH] LU-13805 llite: Implement unaligned DIO connect flag Unupgraded ZFS servers may crash if they received unaligned DIO, so we need a compat flag and a test to recognize those servers. This patch implements that logic. Signed-off-by: Patrick Farrell Change-Id: I5d6ee3fa5dca989c671417f35a981767ee55d6e2 --- lustre/include/cl_object.h | 11 +++++++++-- lustre/include/lustre_import.h | 7 +++++++ lustre/include/uapi/linux/lustre/lustre_idl.h | 6 ++++-- lustre/ldlm/ldlm_lib.c | 12 ++++++++++++ lustre/llite/file.c | 5 +++++ lustre/llite/llite_lib.c | 6 ++++-- lustre/llite/rw26.c | 6 ++++++ lustre/osc/osc_io.c | 8 ++++++++ lustre/ptlrpc/wiretest.c | 1 - lustre/utils/wiretest.c | 1 - 10 files changed, 55 insertions(+), 8 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 65cf21f..f28b0e9 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1937,11 +1937,18 @@ struct cl_io { * this DIO is at least partly unaligned, and so the unaligned DIO * path is being used for this entire IO */ - ci_unaligned_dio:1; + ci_unaligned_dio:1, + /** + * there is a compat issue with unupgraded ZFS targets which means we + * must refuse to do unaligned DIO to these targets, so this is used + * to annotate that in the IO (since we learn if there is a problematic + * OST/MDT target as we build the IO) + */ + ci_allow_unaligned_dio:1, /** * Bypass quota check */ - unsigned ci_noquota:1, + ci_noquota:1, /** * io_uring direct IO with flags IOCB_NOWAIT. */ diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 71b9d14..e1ed170 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -163,6 +163,12 @@ struct import_state_hist { time64_t ish_time; }; +enum lustre_backing_fstype { + FSTYPE_LDISKFS = 0, + FSTYPE_ZFS = 1, + FSTYPE_LAST = 2, +}; + /** * Defintion of PortalRPC import structure. * Imports are representing client-side view to remote target. @@ -333,6 +339,7 @@ struct obd_import { u32 imp_idle_timeout; u32 imp_idle_debug; struct obd_connect_data imp_connect_data; + enum lustre_backing_fstype imp_backing_fstype; __u64 imp_connect_flags_orig; __u64 imp_connect_flags2_orig; int imp_connect_error; diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 1728218..39f4f62 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -923,7 +923,8 @@ struct ptlrpc_body_v2 { OBD_CONNECT2_BATCH_RPC | \ OBD_CONNECT2_ENCRYPT_NAME | \ OBD_CONNECT2_ENCRYPT_FID2PATH | \ - OBD_CONNECT2_DMV_IMP_INHERIT) + OBD_CONNECT2_DMV_IMP_INHERIT |\ + OBD_CONNECT2_UNALIGNED_DIO) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ @@ -947,7 +948,8 @@ struct ptlrpc_body_v2 { #define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\ OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK |\ OBD_CONNECT2_REP_MBITS |\ - OBD_CONNECT2_REPLAY_CREATE) + OBD_CONNECT2_REPLAY_CREATE |\ + OBD_CONNECT2_UNALIGNED_DIO) #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID | OBD_CONNECT_FLAGS2) #define ECHO_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 4c0d616..a587d59 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -672,6 +672,18 @@ int client_connect_import(const struct lu_env *env, data->ocd_connect_flags, ocd->ocd_connect_flags); data->ocd_connect_flags = ocd->ocd_connect_flags; data->ocd_connect_flags2 = ocd->ocd_connect_flags2; + if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) { + /* ZFS maxbytes is ~2^63, ldiskfs maxbytes is ~2^44, so + * this should be a reliable test + * NB: Not using exact values as it seems likely either + * one could change in the future, but should stay in + * the same general range + */ + if (data->ocd_maxbytes > (2ULL << 59)) + imp->imp_backing_fstype = FSTYPE_ZFS; + else + imp->imp_backing_fstype = FSTYPE_LDISKFS; + } } ptlrpc_pinger_add_import(imp); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 6b0ebeb..c4d763a 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -1669,6 +1669,11 @@ void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot, /* FLR: only use non-delay I/O for read as there is only one * avaliable mirror for write. */ io->ci_ndelay = !(iot == CIT_WRITE); + /* unaligned DIO has compat issues with some older servers, but we find + * out if there are such servers while setting up the IO, so it starts + * out allowed + */ + io->ci_allow_unaligned_dio = true; ll_io_set_mirror(io, file); } diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index b391e19..3d8ace3 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -356,7 +356,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT2_REP_MBITS | OBD_CONNECT2_ATOMIC_OPEN_LOCK | OBD_CONNECT2_BATCH_RPC | - OBD_CONNECT2_DMV_IMP_INHERIT; + OBD_CONNECT2_DMV_IMP_INHERIT | + OBD_CONNECT2_UNALIGNED_DIO; #ifdef HAVE_LRU_RESIZE_SUPPORT if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags)) @@ -581,7 +582,8 @@ retry_connect: OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK; data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK | - OBD_CONNECT2_REP_MBITS; + OBD_CONNECT2_REP_MBITS | + OBD_CONNECT2_UNALIGNED_DIO; if (!CFS_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM)) data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM; diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 791feb0..57045dd 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -540,6 +540,12 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw) io = lcc->lcc_io; LASSERT(io != NULL); + /* this means we encountered an old server which can't safely support + * unaligned DIO, so we have to disable it + */ + if (unaligned && !cl_io_top(io)->ci_allow_unaligned_dio) + RETURN(-EINVAL); + /* if one part of an I/O is unaligned, just handle all of it that way - * otherwise we create significant complexities with managing the iovec * in different ways, etc, all for very marginal benefits diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 215cb38..99b5e72 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -1322,10 +1322,18 @@ static const struct cl_io_operations osc_io_ops = { int osc_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io) { + struct osc_object *osc = cl2osc(obj); + struct obd_import *imp = osc_cli(osc)->cl_import; struct osc_io *oio = osc_env_io(env); + struct obd_export *exp = osc_export(osc); CL_IO_SLICE_CLEAN(oio, oi_cl); cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops); + + if (!exp_connect_unaligned_dio(exp) && + imp->imp_backing_fstype == FSTYPE_ZFS) + cl_io_top(io)->ci_allow_unaligned_dio = false; + return 0; } diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 172903f..81bcd4c 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1458,7 +1458,6 @@ void lustre_assert_wire_constants(void) OBD_CONNECT2_COMPRESS); LASSERTF(OBD_CONNECT2_UNALIGNED_DIO == 0x400000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_UNALIGNED_DIO); - LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", (unsigned)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index b475413..295bfca 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -1482,7 +1482,6 @@ void lustre_assert_wire_constants(void) OBD_CONNECT2_COMPRESS); LASSERTF(OBD_CONNECT2_UNALIGNED_DIO == 0x400000000ULL, "found 0x%.16llxULL\n", OBD_CONNECT2_UNALIGNED_DIO); - LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n", (unsigned)OBD_CKSUM_CRC32); LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n", -- 1.8.3.1