Whamcloud - gitweb
LU-12275 sec: O_DIRECT for encrypted file 67/38967/15
authorSebastien Buisson <sbuisson@ddn.com>
Wed, 17 Jun 2020 16:03:04 +0000 (16:03 +0000)
committerOleg Drokin <green@whamcloud.com>
Tue, 8 Sep 2020 18:07:38 +0000 (18:07 +0000)
Add O_DIRECT support for encrypted files.
By default, fscrypt does not support O_DIRECT because it needs
pagecache pages to proceed.
With Lustre, we can make use of pages being used for sending RPCs.
They can be twisted so that they have a proper mapping and index,
suitable for encryption/decryption.

One of the benefits of O_DIRECT support for encrypted files is that
we get support for mirroring at the same time.

Test-Parameters: testlist=sanity-sec envdefinitions=ONLY="36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 52" clientdistro=el8.1 fstype=ldiskfs mdscount=2 mdtcount=4
Test-Parameters: testlist=sanity-sec envdefinitions=ONLY="36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 52" clientdistro=el8.1 fstype=zfs mdscount=2 mdtcount=4
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I12f61c44b55f3a454f38016200f81eb735ab8f18
Reviewed-on: https://review.whamcloud.com/38967
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Wang Shilong <wshilong@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Documentation/client_side_encryption/access_semantics.txt
lustre/llite/llite_lib.c
lustre/llite/rw26.c
lustre/obdclass/cl_io.c
lustre/osc/osc_request.c
lustre/tests/sanity-sec.sh

index fe2c28d..7ed0bc7 100644 (file)
@@ -42,9 +42,6 @@ astute users may notice some differences in behavior:
   may be used to overwrite the source files but isn't guaranteed to be
   effective on all filesystems and storage devices.
 
   may be used to overwrite the source files but isn't guaranteed to be
   effective on all filesystems and storage devices.
 
-- Direct I/O is not supported on encrypted files.  Attempts to use
-  direct I/O on such files will fall back to buffered I/O.
-
 - The fallocate operations FALLOC_FL_COLLAPSE_RANGE,
   FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported
   on encrypted files and will fail with EOPNOTSUPP.
 - The fallocate operations FALLOC_FL_COLLAPSE_RANGE,
   FALLOC_FL_INSERT_RANGE, and FALLOC_FL_ZERO_RANGE are not supported
   on encrypted files and will fail with EOPNOTSUPP.
index 4c647df..a3b4a04 100644 (file)
@@ -2035,7 +2035,15 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                            attr->ia_valid & ATTR_SIZE) {
                                xvalid |= OP_XVALID_FLAGS;
                                flags = LUSTRE_ENCRYPT_FL;
                            attr->ia_valid & ATTR_SIZE) {
                                xvalid |= OP_XVALID_FLAGS;
                                flags = LUSTRE_ENCRYPT_FL;
-                               if (attr->ia_size & ~PAGE_MASK) {
+                               /* Call to ll_io_zero_page is not necessary if
+                                * truncating on PAGE_SIZE boundary, because
+                                * whole pages will be wiped.
+                                * In case of Direct IO, all we need is to set
+                                * new size.
+                                */
+                               if (attr->ia_size & ~PAGE_MASK &&
+                                   !(attr->ia_valid & ATTR_FILE &&
+                                     attr->ia_file->f_flags & O_DIRECT)) {
                                        pgoff_t offset =
                                                attr->ia_size & (PAGE_SIZE - 1);
 
                                        pgoff_t offset =
                                                attr->ia_size & (PAGE_SIZE - 1);
 
index 94b2436..65024f2 100644 (file)
@@ -308,6 +308,7 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
        int io_pages    = 0;
        size_t page_size = cl_page_size(obj);
        int i;
        int io_pages    = 0;
        size_t page_size = cl_page_size(obj);
        int i;
+       pgoff_t index = offset >> PAGE_SHIFT;
        ssize_t rc = 0;
 
        ENTRY;
        ssize_t rc = 0;
 
        ENTRY;
@@ -329,6 +330,28 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size,
                }
 
                page->cp_sync_io = anchor;
                }
 
                page->cp_sync_io = anchor;
+               if (inode && IS_ENCRYPTED(inode)) {
+                       struct page *vmpage = cl_page_vmpage(page);
+
+                       /* In case of Direct IO on encrypted file, we need to
+                        * set the correct page index, and add a reference to
+                        * the mapping. This is required by llcrypt to proceed
+                        * to encryption/decryption, because each block is
+                        * encrypted independently, and each block's IV is set
+                        * to the logical block number within the file.
+                        * This is safe because we know these pages are private
+                        * to the thread doing the Direct IO, and despite
+                        * setting a mapping on the pages, cached lookups will
+                        * not find them.
+                        * Set PageChecked to detect special case of Direct IO
+                        * in osc_brw_fini_request().
+                        * Reference to the mapping and PageChecked flag are
+                        * removed in cl_aio_end().
+                        */
+                       vmpage->index = index++;
+                       vmpage->mapping = inode->i_mapping;
+                       SetPageChecked(vmpage);
+               }
                cl_2queue_add(queue, page);
                /*
                 * Set page clip to tell transfer formation engine
                cl_2queue_add(queue, page);
                /*
                 * Set page clip to tell transfer formation engine
@@ -405,10 +428,6 @@ ll_direct_IO_impl(struct kiocb *iocb, struct iov_iter *iter, int rw)
        loff_t file_offset = iocb->ki_pos;
        struct vvp_io *vio;
 
        loff_t file_offset = iocb->ki_pos;
        struct vvp_io *vio;
 
-       /* if file is encrypted, return 0 so that we fall back to buffered IO */
-       if (IS_ENCRYPTED(inode))
-               return 0;
-
        /* Check EOF by ourselves */
        if (rw == READ && file_offset >= i_size_read(inode))
                return 0;
        /* Check EOF by ourselves */
        if (rw == READ && file_offset >= i_size_read(inode))
                return 0;
index 9c1a553..705aa89 100644 (file)
@@ -45,6 +45,7 @@
 #include <lustre_fid.h>
 #include <cl_object.h>
 #include "cl_internal.h"
 #include <lustre_fid.h>
 #include <cl_object.h>
 #include "cl_internal.h"
+#include <libcfs/crypto/llcrypt.h>
 
 /*****************************************************************************
  *
 
 /*****************************************************************************
  *
@@ -1166,8 +1167,19 @@ static void cl_aio_end(const struct lu_env *env, struct cl_sync_io *anchor)
        /* release pages */
        while (aio->cda_pages.pl_nr > 0) {
                struct cl_page *page = cl_page_list_first(&aio->cda_pages);
        /* release pages */
        while (aio->cda_pages.pl_nr > 0) {
                struct cl_page *page = cl_page_list_first(&aio->cda_pages);
+               struct page *vmpage = cl_page_vmpage(page);
+               struct inode *inode = vmpage ? page2inode(vmpage) : NULL;
 
                cl_page_get(page);
 
                cl_page_get(page);
+               /* We end up here in case of Direct IO only. For encrypted file,
+                * mapping was set on pages in ll_direct_rw_pages(), so it has
+                * to be cleared now before page cleanup.
+                * PageChecked flag was also set there, so we clean up here.
+                */
+               if (inode && IS_ENCRYPTED(inode)) {
+                       vmpage->mapping = NULL;
+                       ClearPageChecked(vmpage);
+               }
                cl_page_list_del(env, &aio->cda_pages, page);
                cl_page_delete(env, page);
                cl_page_put(env, page);
                cl_page_list_del(env, &aio->cda_pages, page);
                cl_page_delete(env, page);
                cl_page_put(env, page);
index bef2b26..c9db263 100644 (file)
@@ -1354,13 +1354,9 @@ static inline void osc_release_bounce_pages(struct brw_page **pga,
        int i;
 
        for (i = 0; i < page_count; i++) {
        int i;
 
        for (i = 0; i < page_count; i++) {
-               if (pga[i]->pg->mapping)
+               if (!pga[i]->pg->mapping)
                        /* bounce pages are unmapped */
                        /* bounce pages are unmapped */
-                       continue;
-               if (pga[i]->flag & OBD_BRW_SYNC)
-                       /* sync transfer cannot have encrypted pages */
-                       continue;
-               llcrypt_finalize_bounce_page(&pga[i]->pg);
+                       llcrypt_finalize_bounce_page(&pga[i]->pg);
                pga[i]->count -= pga[i]->bp_count_diff;
                pga[i]->off += pga[i]->bp_off_diff;
        }
                pga[i]->count -= pga[i]->bp_count_diff;
                pga[i]->off += pga[i]->bp_off_diff;
        }
@@ -1454,6 +1450,19 @@ retry_encrypt:
                        pg->bp_off_diff = pg->off & ~PAGE_MASK;
                        pg->off = pg->off & PAGE_MASK;
                }
                        pg->bp_off_diff = pg->off & ~PAGE_MASK;
                        pg->off = pg->off & PAGE_MASK;
                }
+       } else if (opc == OST_READ && inode && IS_ENCRYPTED(inode)) {
+               for (i = 0; i < page_count; i++) {
+                       struct brw_page *pg = pga[i];
+
+                       /* count/off are forced to cover the whole page so that
+                        * all encrypted data is stored on the OST, so adjust
+                        * bp_{count,off}_diff for the size of the clear text.
+                        */
+                       pg->bp_count_diff = PAGE_SIZE - pg->count;
+                       pg->count = PAGE_SIZE;
+                       pg->bp_off_diff = pg->off & ~PAGE_MASK;
+                       pg->off = pg->off & PAGE_MASK;
+               }
        }
 
         for (niocount = i = 1; i < page_count; i++) {
        }
 
         for (niocount = i = 1; i < page_count; i++) {
@@ -1467,8 +1476,13 @@ retry_encrypt:
         req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
                              niocount * sizeof(*niobuf));
 
         req_capsule_set_size(pill, &RMF_NIOBUF_REMOTE, RCL_CLIENT,
                              niocount * sizeof(*niobuf));
 
-       for (i = 0; i < page_count; i++)
+       for (i = 0; i < page_count; i++) {
                short_io_size += pga[i]->count;
                short_io_size += pga[i]->count;
+               if (!inode || !IS_ENCRYPTED(inode)) {
+                       pga[i]->bp_count_diff = 0;
+                       pga[i]->bp_off_diff = 0;
+               }
+       }
 
        /* Check if read/write is small enough to be a short io. */
        if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 ||
 
        /* Check if read/write is small enough to be a short io. */
        if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 ||
@@ -2067,8 +2081,17 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                                continue;
                        }
 
                                continue;
                        }
 
+                       /* The page is already locked when we arrive here,
+                        * except when we deal with a twisted page for
+                        * specific Direct IO support, in which case
+                        * PageChecked flag is set on page.
+                        */
+                       if (PageChecked(pg->pg))
+                               lock_page(pg->pg);
                        rc = llcrypt_decrypt_pagecache_blocks(pg->pg,
                                                              PAGE_SIZE, 0);
                        rc = llcrypt_decrypt_pagecache_blocks(pg->pg,
                                                              PAGE_SIZE, 0);
+                       if (PageChecked(pg->pg))
+                               unlock_page(pg->pg);
                        if (rc)
                                GOTO(out, rc);
                }
                        if (rc)
                                GOTO(out, rc);
                }
index 772a0a0..f8ca92e 100755 (executable)
@@ -3195,6 +3195,9 @@ run_test 43 "test race on encrypted file size (3)"
 
 test_44() {
        local testfile=$DIR/$tdir/$tfile
 
 test_44() {
        local testfile=$DIR/$tdir/$tfile
+       local tmpfile=$TMP/abc
+       local resfile=$TMP/resfile
+       local respage
 
        $LCTL get_param mdc.*.import | grep -q client_encryption ||
                skip "client encryption not supported"
 
        $LCTL get_param mdc.*.import | grep -q client_encryption ||
                skip "client encryption not supported"
@@ -3204,28 +3207,59 @@ test_44() {
 
        which vmtouch || skip "This test needs vmtouch utility"
 
 
        which vmtouch || skip "This test needs vmtouch utility"
 
-       # Direct I/O is not supported on encrypted files.
-       # Attempts to use direct I/O on such files should fall back to
-       # buffered I/O.
+       # Direct I/O is now supported on encrypted files.
 
        stack_trap cleanup_for_enc_tests EXIT
        setup_for_enc_tests
 
 
        stack_trap cleanup_for_enc_tests EXIT
        setup_for_enc_tests
 
-       # write a page in file with O_DIRECT
        $LFS setstripe -c1 -i0 $testfile
        $LFS setstripe -c1 -i0 $testfile
-       dd if=/dev/urandom of=$testfile bs=4096 count=1 conv=fsync oflag=direct
+       dd if=/dev/urandom of=$tmpfile bs=8192 count=1 conv=fsync
+       dd if=$tmpfile of=$testfile bs=8192 count=1 oflag=direct ||
+               error "could not write to file with O_DIRECT (1)"
 
        respage=$(vmtouch $testfile | awk '/Resident\ Pages:/ {print $3}')
 
        respage=$(vmtouch $testfile | awk '/Resident\ Pages:/ {print $3}')
-       [ "$respage" == "1/1" ] ||
-               error "write to enc file did not fall back to buffered IO"
+       [ "$respage" == "0/2" ] ||
+               error "write to enc file fell back to buffered IO"
 
 
-       cancel_lru_locks osc ; cancel_lru_locks mdc
+       cancel_lru_locks
 
 
-       dd if=$testfile of=/dev/null bs=4096 count=1 iflag=direct
+       dd if=$testfile of=$resfile bs=8192 count=1 iflag=direct ||
+               error "could not read from file with O_DIRECT (1)"
 
        respage=$(vmtouch $testfile | awk '/Resident\ Pages:/ {print $3}')
 
        respage=$(vmtouch $testfile | awk '/Resident\ Pages:/ {print $3}')
-       [ "$respage" == "1/1" ] ||
-               error "write to enc file did not fall back to buffered IO"
+       [ "$respage" == "0/2" ] ||
+               error "read from enc file fell back to buffered IO"
+
+       cmp -bl $tmpfile $resfile ||
+               error "file $testfile is corrupted (1)"
+
+       rm -f $resfile
+
+       $TRUNCATE $tmpfile 4096
+       dd if=$tmpfile of=$testfile bs=4096 count=1 seek=13 oflag=direct ||
+               error "could not write to file with O_DIRECT (2)"
+
+       cancel_lru_locks
+
+       dd if=$testfile of=$resfile bs=4096 count=1 skip=13 iflag=direct ||
+               error "could not read from file with O_DIRECT (2)"
+       cmp -bl $tmpfile $resfile ||
+               error "file $testfile is corrupted (2)"
+
+       rm -f $testfile $resfile
+       $LFS setstripe -c1 -i0 $testfile
+
+       $TRUNCATE $tmpfile 2043
+       cp $tmpfile $testfile
+
+       cancel_lru_locks
+
+       dd if=$testfile of=$resfile bs=4096 count=1 iflag=direct ||
+               error "could not read from file with O_DIRECT (3)"
+       cmp -bl $tmpfile $resfile ||
+               error "file $testfile is corrupted (3)"
+
+       rm -f $tmpfile $resfile
 }
 run_test 44 "encrypted file access semantics: direct IO"
 
 }
 run_test 44 "encrypted file access semantics: direct IO"
 
@@ -3764,6 +3798,62 @@ test_51() {
 }
 run_test 51 "FS capabilities ==============="
 
 }
 run_test 51 "FS capabilities ==============="
 
+test_52() {
+       local testfile=$DIR/$tdir/$tfile
+       local tmpfile=$TMP/$tfile
+       local mirror1=$TMP/$tfile.mirror1
+       local mirror2=$TMP/$tfile.mirror2
+
+       $LCTL get_param mdc.*.import | grep -q client_encryption ||
+               skip "client encryption not supported"
+
+       mount.lustre --help |& grep -q "test_dummy_encryption:" ||
+               skip "need dummy encryption support"
+
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs"
+
+       stack_trap cleanup_for_enc_tests EXIT
+       setup_for_enc_tests
+
+       dd if=/dev/urandom of=$tmpfile bs=5000 count=1 conv=fsync
+
+       $LFS mirror create -N -i0 -N -i1 $testfile ||
+               error "could not create mirror"
+
+       dd if=$tmpfile of=$testfile bs=5000 count=1 conv=fsync ||
+               error "could not write to $testfile"
+
+       $LFS mirror resync $testfile ||
+               error "could not resync mirror"
+
+       $LFS mirror verify -v $testfile ||
+               error "verify mirror failed"
+
+       $LFS mirror read -N 1 -o $mirror1 $testfile ||
+               error "could not read from mirror 1"
+
+       cmp -bl $tmpfile $mirror1 ||
+               error "mirror 1 is corrupted"
+
+       $LFS mirror read -N 2 -o $mirror2 $testfile ||
+               error "could not read from mirror 2"
+
+       cmp -bl $tmpfile $mirror2 ||
+               error "mirror 2 is corrupted"
+
+       tr '\0' '2' < /dev/zero |
+           dd of=$tmpfile bs=9000 count=1 conv=fsync
+
+       $LFS mirror write -N 1 -i $tmpfile $testfile ||
+               error "could not write to mirror 1"
+
+       $LFS mirror verify -v $testfile &&
+               error "mirrors should be different"
+
+       rm -f $tmpfile $mirror1 $mirror2
+}
+run_test 52 "Mirrored encrypted file"
+
 log "cleanup: ======================================================"
 
 sec_unsetup() {
 log "cleanup: ======================================================"
 
 sec_unsetup() {