Whamcloud - gitweb
LU-13805 llite: Implement unaligned DIO connect flag 26/51126/44
authorPatrick Farrell <pfarrell@whamcloud.com>
Tue, 24 Oct 2023 18:29:27 +0000 (14:29 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 18 Nov 2023 21:41:43 +0000 (21:41 +0000)
Unupgraded ZFS servers may crash if they received unaligned
DIO, so we need a compat flag and a test to recognize those
servers.

This patch implements that logic.

Fixes: 7194eb6431 ("LU-13805 clio: bounce buffer for unaligned DIO")
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I5d6ee3fa5dca989c671417f35a981767ee55d6e2
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51126
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Sebastien Buisson <sbuisson@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/include/cl_object.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/ldlm/ldlm_lib.c
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/llite/rw26.c
lustre/osc/osc_io.c

index e24e0cb..4b34ba1 100644 (file)
@@ -1945,11 +1945,18 @@ struct cl_io {
         * this DIO is at least partly unaligned, and so the unaligned DIO
         * path is being used for this entire IO
         */
-                            ci_unaligned_dio:1;
+                            ci_unaligned_dio:1,
+       /**
+        * there is a compat issue with unupgraded ZFS targets which means we
+        * must refuse to do unaligned DIO to these targets, so this is used
+        * to annotate that in the IO (since we learn if there is a problematic
+        * OST/MDT target as we build the IO)
+        */
+                            ci_allow_unaligned_dio:1,
        /**
         * Bypass quota check
         */
-       unsigned             ci_noquota:1,
+                            ci_noquota:1,
        /**
         * io_uring direct IO with flags IOCB_NOWAIT.
         */
index 07fc673..a1cf4a8 100644 (file)
@@ -943,7 +943,8 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT2_BATCH_RPC | \
                                OBD_CONNECT2_ENCRYPT_NAME | \
                                OBD_CONNECT2_ENCRYPT_FID2PATH | \
-                               OBD_CONNECT2_DMV_IMP_INHERIT)
+                               OBD_CONNECT2_DMV_IMP_INHERIT |\
+                               OBD_CONNECT2_UNALIGNED_DIO)
 
 #define OST_CONNECT_SUPPORTED  (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \
                                OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \
@@ -967,7 +968,8 @@ struct ptlrpc_body_v2 {
 #define OST_CONNECT_SUPPORTED2 (OBD_CONNECT2_LOCKAHEAD | OBD_CONNECT2_INC_XID |\
                                OBD_CONNECT2_ENCRYPT | OBD_CONNECT2_LSEEK |\
                                OBD_CONNECT2_REP_MBITS |\
-                               OBD_CONNECT2_REPLAY_CREATE)
+                               OBD_CONNECT2_REPLAY_CREATE |\
+                               OBD_CONNECT2_UNALIGNED_DIO)
 
 #define ECHO_CONNECT_SUPPORTED (OBD_CONNECT_FID | OBD_CONNECT_FLAGS2)
 #define ECHO_CONNECT_SUPPORTED2 OBD_CONNECT2_REP_MBITS
index ca44c1d..e64e79b 100644 (file)
@@ -670,6 +670,14 @@ int client_connect_import(const struct lu_env *env,
                data->ocd_connect_flags = ocd->ocd_connect_flags;
                data->ocd_connect_flags2 = ocd->ocd_connect_flags2;
        }
+       /* ldiskfs servers do not actually need patching to support unaligned
+        * DIO, so we always set the flag in that case
+        */
+       if (data->ocd_connect_flags & OBD_CONNECT_MAXBYTES) {
+               /* > 2ULL << 59 implies ZFS, so this is ldiskfs */
+               if (data->ocd_maxbytes < (2ULL << 59))
+                       data->ocd_connect_flags2 |= OBD_CONNECT2_UNALIGNED_DIO;
+       }
 
        ptlrpc_pinger_add_import(imp);
 
index f7b852a..1914e02 100644 (file)
@@ -1669,6 +1669,11 @@ void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot,
        /* FLR: only use non-delay I/O for read as there is only one
         * avaliable mirror for write. */
        io->ci_ndelay = !(iot == CIT_WRITE);
+       /* unaligned DIO has compat issues with some older servers, but we find
+        * out if there are such servers while setting up the IO, so it starts
+        * out allowed
+        */
+       io->ci_allow_unaligned_dio = true;
 
        ll_io_set_mirror(io, file);
 }
index 1baa507..7a060e6 100644 (file)
@@ -356,7 +356,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                   OBD_CONNECT2_REP_MBITS |
                                   OBD_CONNECT2_ATOMIC_OPEN_LOCK |
                                   OBD_CONNECT2_BATCH_RPC |
-                                  OBD_CONNECT2_DMV_IMP_INHERIT;
+                                  OBD_CONNECT2_DMV_IMP_INHERIT |
+                                  OBD_CONNECT2_UNALIGNED_DIO;
 
 #ifdef HAVE_LRU_RESIZE_SUPPORT
        if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags))
@@ -581,7 +582,8 @@ retry_connect:
                                  OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
        data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
                                   OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK |
-                                  OBD_CONNECT2_REP_MBITS;
+                                  OBD_CONNECT2_REP_MBITS |
+                                  OBD_CONNECT2_UNALIGNED_DIO;
 
        if (!CFS_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
                data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;
index 142a693..4b45abc 100644 (file)
@@ -540,6 +540,12 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
        io = lcc->lcc_io;
        LASSERT(io != NULL);
 
+       /* this means we encountered an old server which can't safely support
+        * unaligned DIO, so we have to disable it
+        */
+       if (unaligned && !cl_io_top(io)->ci_allow_unaligned_dio)
+               RETURN(-EINVAL);
+
        /* if one part of an I/O is unaligned, just handle all of it that way -
         * otherwise we create significant complexities with managing the iovec
         * in different ways, etc, all for very marginal benefits
index 5030427..23ef563 100644 (file)
@@ -1344,10 +1344,15 @@ static const struct cl_io_operations osc_io_ops = {
 int osc_io_init(const struct lu_env *env,
                 struct cl_object *obj, struct cl_io *io)
 {
+       struct obd_export *exp = osc_export(cl2osc(obj));
         struct osc_io *oio = osc_env_io(env);
 
         CL_IO_SLICE_CLEAN(oio, oi_cl);
         cl_io_slice_add(io, &oio->oi_cl, obj, &osc_io_ops);
+
+       if (!exp_connect_unaligned_dio(exp))
+               cl_io_top(io)->ci_allow_unaligned_dio = false;
+
         return 0;
 }