Unupgraded ZFS servers may crash if they received unaligned
DIO, so we need a compat flag and a test to recognize those
servers.
This patch implements that logic.
Fixes:
7194eb6431 ("LU-13805 clio: bounce buffer for unaligned DIO")
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I5d6ee3fa5dca989c671417f35a981767ee55d6e2
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51126
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
* this DIO is at least partly unaligned, and so the unaligned DIO
* path is being used for this entire IO
*/
- ci_unaligned_dio:1;
+ ci_unaligned_dio:1,
+ /**
+ * there is a compat issue with unupgraded ZFS targets which means we
+ * must refuse to do unaligned DIO to these targets, so this is used
+ * to annotate that in the IO (since we learn if there is a problematic
+ * OST/MDT target as we build the IO)
+ */
+ ci_allow_unaligned_dio:1,
/**
* Bypass quota check
*/
- unsigned ci_noquota:1,
+ ci_noquota:1,
/**
* io_uring direct IO with flags IOCB_NOWAIT.
*/
OBD_CONNECT2_BATCH_RPC | \
OBD_CONNECT2_ENCRYPT_NAME | \
OBD_CONNECT2_ENCRYPT_FID2PATH | \
- OBD_CONNECT2_DMV_IMP_INHERIT)
+ OBD_CONNECT2_DMV_IMP_INHERIT |\
+ OBD_CONNECT2_UNALIGNED_DIO)
#define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
#define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\
OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK |\
OBD_CONNECT2_REP_MBITS |\
- OBD_CONNECT2_REPLAY_CREATE)
+ OBD_CONNECT2_REPLAY_CREATE |\
+ OBD_CONNECT2_UNALIGNED_DIO)
#define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID | OBD_CONNECT_FLAGS2)
#define ECHO_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS
data->ocd_connect_flags = ocd->ocd_connect_flags;
data->ocd_connect_flags2 = ocd->ocd_connect_flags2;
}
+ /* ldiskfs servers do not actually need patching to support unaligned
+ * DIO, so we always set the flag in that case
+ */
+ if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) {
+ /* > 2ULL << 59 implies ZFS, so this is ldiskfs */
+ if (data->ocd_maxbytes < (2ULL << 59))
+ data->ocd_connect_flags2 |= OBD_CONNECT2_UNALIGNED_DIO;
+ }
ptlrpc_pinger_add_import(imp);
/* FLR: only use non-delay I/O for read as there is only one
* avaliable mirror for write. */
io->ci_ndelay = !(iot == CIT_WRITE);
+ /* unaligned DIO has compat issues with some older servers, but we find
+ * out if there are such servers while setting up the IO, so it starts
+ * out allowed
+ */
+ io->ci_allow_unaligned_dio = true;
ll_io_set_mirror(io, file);
}
OBD_CONNECT2_REP_MBITS |
OBD_CONNECT2_ATOMIC_OPEN_LOCK |
OBD_CONNECT2_BATCH_RPC |
- OBD_CONNECT2_DMV_IMP_INHERIT;
+ OBD_CONNECT2_DMV_IMP_INHERIT |
+ OBD_CONNECT2_UNALIGNED_DIO;
#ifdef HAVE_LRU_RESIZE_SUPPORT
if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags))
OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK |
- OBD_CONNECT2_REP_MBITS;
+ OBD_CONNECT2_REP_MBITS |
+ OBD_CONNECT2_UNALIGNED_DIO;
if (!CFS_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;
io = lcc->lcc_io;
LASSERT(io != NULL);
+ /* this means we encountered an old server which can't safely support
+ * unaligned DIO, so we have to disable it
+ */
+ if (unaligned && !cl_io_top(io)->ci_allow_unaligned_dio)
+ RETURN(-EINVAL);
+
/* if one part of an I/O is unaligned, just handle all of it that way -
* otherwise we create significant complexities with managing the iovec
* in different ways, etc, all for very marginal benefits
int osc_io_init(const struct lu_env *env,
struct cl_object *obj, struct cl_io *io)
{
+ struct obd_export *exp = osc_export(cl2osc(obj));
struct osc_io *oio = osc_env_io(env);
CL_IO_SLICE_CLEAN(oio, oi_cl);
cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops);
+
+ if (!exp_connect_unaligned_dio(exp))
+ cl_io_top(io)->ci_allow_unaligned_dio = false;
+
return 0;
}