Whamcloud - gitweb
LU-13805 llite: Implement unaligned DIO connect flag 26/51126/38
authorPatrick Farrell <pfarrell@whamcloud.com>
Sun, 21 May 2023 21:37:57 +0000 (17:37 -0400)
committerPatrick Farrell <pfarrell@whamcloud.com>
Sat, 9 Sep 2023 21:38:58 +0000 (17:38 -0400)
Unupgraded ZFS servers may crash if they received unaligned
DIO, so we need a compat flag and a test to recognize those
servers.

This patch implements that logic.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I5d6ee3fa5dca989c671417f35a981767ee55d6e2

lustre/include/cl_object.h
lustre/include/lustre_import.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/ldlm/ldlm_lib.c
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/llite/rw26.c
lustre/osc/osc_io.c
lustre/ptlrpc/wiretest.c
lustre/utils/wiretest.c

index 65cf21f..f28b0e9 100644 (file)
@@ -1937,11 +1937,18 @@ struct cl_io {
         * this DIO is at least partly unaligned, and so the unaligned DIO
         * path is being used for this entire IO
         */
-                            ci_unaligned_dio:1;
+                            ci_unaligned_dio:1,
+       /**
+        * there is a compat issue with unupgraded ZFS targets which means we
+        * must refuse to do unaligned DIO to these targets, so this is used
+        * to annotate that in the IO (since we learn if there is a problematic
+        * OST/MDT target as we build the IO)
+        */
+                            ci_allow_unaligned_dio:1,
        /**
         * Bypass quota check
         */
-       unsigned             ci_noquota:1,
+                            ci_noquota:1,
        /**
         * io_uring direct IO with flags IOCB_NOWAIT.
         */
index 71b9d14..e1ed170 100644 (file)
@@ -163,6 +163,12 @@ struct import_state_hist {
        time64_t                ish_time;
 };
 
+enum lustre_backing_fstype {
+       FSTYPE_LDISKFS  = 0,
+       FSTYPE_ZFS      = 1,
+       FSTYPE_LAST     = 2,
+};
+
 /**
  * Defintion of PortalRPC import structure.
  * Imports are representing client-side view to remote target.
@@ -333,6 +339,7 @@ struct obd_import {
        u32                       imp_idle_timeout;
        u32                       imp_idle_debug;
        struct obd_connect_data   imp_connect_data;
+       enum lustre_backing_fstype      imp_backing_fstype;
        __u64                     imp_connect_flags_orig;
        __u64                     imp_connect_flags2_orig;
        int                       imp_connect_error;
index 1728218..39f4f62 100644 (file)
@@ -923,7 +923,8 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT2_BATCH_RPC | \
                                OBD_CONNECT2_ENCRYPT_NAME | \
                                OBD_CONNECT2_ENCRYPT_FID2PATH | \
-                               OBD_CONNECT2_DMV_IMP_INHERIT)
+                               OBD_CONNECT2_DMV_IMP_INHERIT |\
+                               OBD_CONNECT2_UNALIGNED_DIO)
 
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
@@ -947,7 +948,8 @@ struct ptlrpc_body_v2 {
 #define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\
                                OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK |\
                                OBD_CONNECT2_REP_MBITS |\
-                               OBD_CONNECT2_REPLAY_CREATE)
+                               OBD_CONNECT2_REPLAY_CREATE |\
+                               OBD_CONNECT2_UNALIGNED_DIO)
 
 #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID | OBD_CONNECT_FLAGS2)
 #define ECHO_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS
index 4c0d616..a587d59 100644 (file)
@@ -672,6 +672,18 @@ int client_connect_import(const struct lu_env *env,
                         data->ocd_connect_flags, ocd->ocd_connect_flags);
                data->ocd_connect_flags = ocd->ocd_connect_flags;
                data->ocd_connect_flags2 = ocd->ocd_connect_flags2;
+               if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) {
+                       /* ZFS maxbytes is ~2^63, ldiskfs maxbytes is ~2^44, so
+                        * this should be a reliable test
+                        * NB: Not using exact values as it seems likely either
+                        * one could change in the future, but should stay in
+                        * the same general range
+                        */
+                       if (data->ocd_maxbytes > (2ULL << 59))
+                               imp->imp_backing_fstype = FSTYPE_ZFS;
+                       else
+                               imp->imp_backing_fstype = FSTYPE_LDISKFS;
+               }
        }
 
        ptlrpc_pinger_add_import(imp);
index 6b0ebeb..c4d763a 100644 (file)
@@ -1669,6 +1669,11 @@ void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot,
        /* FLR: only use non-delay I/O for read as there is only one
         * avaliable mirror for write. */
        io->ci_ndelay = !(iot == CIT_WRITE);
+       /* unaligned DIO has compat issues with some older servers, but we find
+        * out if there are such servers while setting up the IO, so it starts
+        * out allowed
+        */
+       io->ci_allow_unaligned_dio = true;
 
        ll_io_set_mirror(io, file);
 }
index b391e19..3d8ace3 100644 (file)
@@ -356,7 +356,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT2_REP_MBITS |
                                   OBD_CONNECT2_ATOMIC_OPEN_LOCK |
                                   OBD_CONNECT2_BATCH_RPC |
-                                  OBD_CONNECT2_DMV_IMP_INHERIT;
+                                  OBD_CONNECT2_DMV_IMP_INHERIT |
+                                  OBD_CONNECT2_UNALIGNED_DIO;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
        if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags))
@@ -581,7 +582,8 @@ retry_connect:
                                  OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
        data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
                                   OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK |
-                                  OBD_CONNECT2_REP_MBITS;
+                                  OBD_CONNECT2_REP_MBITS |
+                                  OBD_CONNECT2_UNALIGNED_DIO;
 
        if (!CFS_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
                data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;
index 791feb0..57045dd 100644 (file)
@@ -540,6 +540,12 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
        io = lcc->lcc_io;
        LASSERT(io != NULL);
 
+       /* this means we encountered an old server which can't safely support
+        * unaligned DIO, so we have to disable it
+        */
+       if (unaligned && !cl_io_top(io)->ci_allow_unaligned_dio)
+               RETURN(-EINVAL);
+
        /* if one part of an I/O is unaligned, just handle all of it that way -
         * otherwise we create significant complexities with managing the iovec
         * in different ways, etc, all for very marginal benefits
index 215cb38..99b5e72 100644 (file)
@@ -1322,10 +1322,18 @@ static const struct cl_io_operations osc_io_ops = {
 int osc_io_init(const struct lu_env *env,
                 struct cl_object *obj, struct cl_io *io)
 {
+       struct osc_object *osc = cl2osc(obj);
+       struct obd_import *imp = osc_cli(osc)->cl_import;
         struct osc_io *oio = osc_env_io(env);
+       struct obd_export *exp = osc_export(osc);
 
         CL_IO_SLICE_CLEAN(oio, oi_cl);
         cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops);
+
+       if (!exp_connect_unaligned_dio(exp) &&
+           imp->imp_backing_fstype == FSTYPE_ZFS)
+               cl_io_top(io)->ci_allow_unaligned_dio = false;
+
         return 0;
 }
 
index 172903f..81bcd4c 100644 (file)
@@ -1458,7 +1458,6 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT2_COMPRESS);
        LASSERTF(OBD_CONNECT2_UNALIGNED_DIO == 0x400000000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT2_UNALIGNED_DIO);
-
        LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)OBD_CKSUM_CRC32);
        LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
index b475413..295bfca 100644 (file)
@@ -1482,7 +1482,6 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT2_COMPRESS);
        LASSERTF(OBD_CONNECT2_UNALIGNED_DIO == 0x400000000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT2_UNALIGNED_DIO);
-
        LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
                (unsigned)OBD_CKSUM_CRC32);
        LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",