Whamcloud - gitweb
LU-15608 sec: fix DIO for encrypted files 64/46664/6
authorSebastien Buisson <sbuisson@ddn.com>
Tue, 1 Mar 2022 16:26:09 +0000 (17:26 +0100)
committerOleg Drokin <green@whamcloud.com>
Thu, 17 Mar 2022 07:08:06 +0000 (07:08 +0000)
With Direct IO, we do not have proper page cache pages. So we need to
retrieve by ourselves the page mapping and the page index of the page
to be encrypted/decrypted.

For the index, we need to use the offset of the page within the file,
and not the object.
So we rename cl_page's cp_osc_index to cp_page_index for that purpose.
cp_osc_index is redundant with osc_async_page's oap_obj_off and only
used by osc_index(), so we also adapt this function.
cp_page_index is initialized in cl_page_alloc(), and accessed in
the OSC layer where the llcrypt primitives are called.

For the mapping, problem is page->mapping is not set to NULL on page
allocation, so it cannot safely be used to see if a page is a direct
I/O page.
Use cl_page for direct I/O and page->mapping for buffered
I/O.  (clpage->cp_inode is only set for direct I/O and
cannot easily be always set.)
Without this, we sometimes get panics when page2inode is
used in the OSC layer.  (Note the remaining use in dom is
safe because ll_dom_readpage is a page cache helper and
will never see DIO pages.)

Fixes: a71e0dd7f7 ("LU-14306 sec: get rid of bad rss-counter state messages")
Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: Icb53a4e45463b8d3febc2e6212b39dc25719d866
Reviewed-on: https://review.whamcloud.com/46664
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/cl_object.h
lustre/include/lustre_osc.h
lustre/include/obd.h
lustre/llite/file.c
lustre/obdclass/cl_page.c
lustre/osc/osc_page.c
lustre/osc/osc_request.c
lustre/tests/sanity-sec.sh

index a9f254b..2a0d921 100644 (file)
@@ -735,7 +735,8 @@ struct cl_page {
        atomic_t                cp_ref;
        /** layout_entry + stripe index, composed using lov_comp_index() */
        unsigned int            cp_lov_index;
-       pgoff_t                 cp_osc_index;
+       /** page->index of the page within the whole file */
+       pgoff_t                 cp_page_index;
        /** An object this page is a part of. Immutable after creation. */
        struct cl_object        *cp_obj;
        /** vmpage */
index e1c201b..5c3a157 100644 (file)
@@ -843,7 +843,7 @@ static inline struct osc_page *oap2osc(struct osc_async_page *oap)
 
 static inline pgoff_t osc_index(struct osc_page *opg)
 {
-       return opg->ops_cl.cpl_page->cp_osc_index;
+       return opg->ops_oap.oap_obj_off >> PAGE_SHIFT;
 }
 
 static inline struct cl_page *oap2cl_page(struct osc_async_page *oap)
index ce22ac7..f3bf355 100644 (file)
@@ -1355,6 +1355,9 @@ static inline void client_adjust_max_dirty(struct client_obd *cli)
                                           1 << (20 - PAGE_SHIFT));
 }
 
+/* Must be used for page cache pages only,
+ * not safe otherwise (e.g. direct IO pages)
+ */
 static inline struct inode *page2inode(struct page *page)
 {
        if (page->mapping) {
index 3a8fa24..1912240 100644 (file)
@@ -444,11 +444,15 @@ out:
 
 static inline int ll_dom_readpage(void *data, struct page *page)
 {
+       /* since ll_dom_readpage is a page cache helper, it is safe to assume
+        * mapping and host pointers are set here
+        */
+       struct inode *inode;
        struct niobuf_local *lnb = data;
        void *kaddr;
        int rc = 0;
 
-       struct inode *inode = page2inode(page);
+       inode = page2inode(page);
 
        kaddr = kmap_atomic(page);
        memcpy(kaddr, lnb->lnb_data, lnb->lnb_len);
index 0dc44cf..b573e8d 100644 (file)
@@ -294,10 +294,17 @@ struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_object *o,
                cl_page->cp_vmpage = vmpage;
                cl_page->cp_state = CPS_CACHED;
                cl_page->cp_type = type;
-               cl_page->cp_inode = NULL;
+               if (type == CPT_TRANSIENT)
+                       /* ref to correct inode will be added
+                        * in ll_direct_rw_pages
+                        */
+                       cl_page->cp_inode = NULL;
+               else
+                       cl_page->cp_inode = page2inode(vmpage);
                INIT_LIST_HEAD(&cl_page->cp_batch);
                lu_ref_init(&cl_page->cp_reference);
                head = o;
+               cl_page->cp_page_index = ind;
                cl_object_for_each(o, head) {
                        if (o->co_ops->coo_page_init != NULL) {
                                result = o->co_ops->coo_page_init(env, o,
index eb60998..fa5d86e 100644 (file)
@@ -270,7 +270,6 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj,
 
        opg->ops_srvlock = osc_io_srvlock(oio);
        cl_page_slice_add(cl_page, &opg->ops_cl, obj, &osc_page_ops);
-       cl_page->cp_osc_index = index;
 
        /* reserve an LRU space for this page */
        if (cl_page->cp_type == CPT_CACHEABLE) {
index 889f72a..6bc9389 100644 (file)
@@ -1407,22 +1407,14 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
        struct inode *inode = NULL;
        bool directio = false;
        bool enable_checksum = true;
+       struct cl_page *clpage;
 
        ENTRY;
        if (pga[0]->pg) {
-               inode = page2inode(pga[0]->pg);
-               if (inode == NULL) {
-                       /* Try to get reference to inode from cl_page if we are
-                        * dealing with direct IO, as handled pages are not
-                        * actual page cache pages.
-                        */
-                       struct osc_async_page *oap = brw_page2oap(pga[0]);
-                       struct cl_page *clpage = oap2cl_page(oap);
-
-                       inode = clpage->cp_inode;
-                       if (inode)
-                               directio = true;
-               }
+               clpage = oap2cl_page(brw_page2oap(pga[0]));
+               inode = clpage->cp_inode;
+               if (clpage->cp_type == CPT_TRANSIENT)
+                       directio = true;
        }
        if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
                RETURN(-ENOMEM); /* Recoverable */
@@ -1444,11 +1436,11 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
        if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode) &&
            llcrypt_has_encryption_key(inode)) {
                for (i = 0; i < page_count; i++) {
-                       struct brw_page *pg = pga[i];
+                       struct brw_page *brwpg = pga[i];
                        struct page *data_page = NULL;
                        bool retried = false;
                        bool lockedbymyself;
-                       u32 nunits = (pg->off & ~PAGE_MASK) + pg->count;
+                       u32 nunits = (brwpg->off & ~PAGE_MASK) + brwpg->count;
                        struct address_space *map_orig = NULL;
                        pgoff_t index_orig;
 
@@ -1463,23 +1455,24 @@ retry_encrypt:
                         * end in vvp_page_completion_write/cl_page_completion,
                         * which means only once the page is fully processed.
                         */
-                       lockedbymyself = trylock_page(pg->pg);
+                       lockedbymyself = trylock_page(brwpg->pg);
                        if (directio) {
-                               map_orig = pg->pg->mapping;
-                               pg->pg->mapping = inode->i_mapping;
-                               index_orig = pg->pg->index;
-                               pg->pg->index = pg->off >> PAGE_SHIFT;
+                               map_orig = brwpg->pg->mapping;
+                               brwpg->pg->mapping = inode->i_mapping;
+                               index_orig = brwpg->pg->index;
+                               clpage = oap2cl_page(brw_page2oap(brwpg));
+                               brwpg->pg->index = clpage->cp_page_index;
                        }
                        data_page =
-                               llcrypt_encrypt_pagecache_blocks(pg->pg,
+                               llcrypt_encrypt_pagecache_blocks(brwpg->pg,
                                                                 nunits, 0,
                                                                 GFP_NOFS);
                        if (directio) {
-                               pg->pg->mapping = map_orig;
-                               pg->pg->index = index_orig;
+                               brwpg->pg->mapping = map_orig;
+                               brwpg->pg->index = index_orig;
                        }
                        if (lockedbymyself)
-                               unlock_page(pg->pg);
+                               unlock_page(brwpg->pg);
                        if (IS_ERR(data_page)) {
                                rc = PTR_ERR(data_page);
                                if (rc == -ENOMEM && !retried) {
@@ -1494,10 +1487,11 @@ retry_encrypt:
                         * disambiguation in osc_release_bounce_pages().
                         */
                        SetPageChecked(data_page);
-                       pg->pg = data_page;
+                       brwpg->pg = data_page;
                        /* there should be no gap in the middle of page array */
                        if (i == page_count - 1) {
-                               struct osc_async_page *oap = brw_page2oap(pg);
+                               struct osc_async_page *oap =
+                                       brw_page2oap(brwpg);
 
                                oa->o_size = oap->oap_count +
                                        oap->oap_obj_off + oap->oap_page_off;
@@ -1505,10 +1499,10 @@ retry_encrypt:
                        /* len is forced to nunits, and relative offset to 0
                         * so store the old, clear text info
                         */
-                       pg->bp_count_diff = nunits - pg->count;
-                       pg->count = nunits;
-                       pg->bp_off_diff = pg->off & ~PAGE_MASK;
-                       pg->off = pg->off & PAGE_MASK;
+                       brwpg->bp_count_diff = nunits - brwpg->count;
+                       brwpg->count = nunits;
+                       brwpg->bp_off_diff = brwpg->off & ~PAGE_MASK;
+                       brwpg->off = brwpg->off & PAGE_MASK;
                }
        } else if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode)) {
                struct osc_async_page *oap = brw_page2oap(pga[0]);
@@ -1974,8 +1968,9 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                &req->rq_import->imp_connection->c_peer;
        struct ost_body *body;
        u32 client_cksum = 0;
-       struct inode *inode;
+       struct inode *inode = NULL;
        unsigned int blockbits = 0, blocksize = 0;
+       struct cl_page *clpage;
 
        ENTRY;
 
@@ -2169,19 +2164,12 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                rc = 0;
        }
 
-       inode = page2inode(aa->aa_ppga[0]->pg);
-       if (inode == NULL) {
-               /* Try to get reference to inode from cl_page if we are
-                * dealing with direct IO, as handled pages are not
-                * actual page cache pages.
-                */
-               struct osc_async_page *oap = brw_page2oap(aa->aa_ppga[0]);
-
-               inode = oap2cl_page(oap)->cp_inode;
-               if (inode) {
-                       blockbits = inode->i_blkbits;
-                       blocksize = 1 << blockbits;
-               }
+       /* get the inode from the first cl_page */
+       clpage = oap2cl_page(brw_page2oap(aa->aa_ppga[0]));
+       inode = clpage->cp_inode;
+       if (clpage->cp_type == CPT_TRANSIENT && inode) {
+               blockbits = inode->i_blkbits;
+               blocksize = 1 << blockbits;
        }
        if (inode && IS_ENCRYPTED(inode)) {
                int idx;
@@ -2191,19 +2179,19 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                        GOTO(out, rc);
                }
                for (idx = 0; idx < aa->aa_page_count; idx++) {
-                       struct brw_page *pg = aa->aa_ppga[idx];
+                       struct brw_page *brwpg = aa->aa_ppga[idx];
                        unsigned int offs = 0;
 
                        while (offs < PAGE_SIZE) {
                                /* do not decrypt if page is all 0s */
-                               if (memchr_inv(page_address(pg->pg) + offs, 0,
-                                        LUSTRE_ENCRYPTION_UNIT_SIZE) == NULL) {
+                               if (memchr_inv(page_address(brwpg->pg) + offs,
+                                     0, LUSTRE_ENCRYPTION_UNIT_SIZE) == NULL) {
                                        /* if page is empty forward info to
                                         * upper layers (ll_io_zero_page) by
                                         * clearing PagePrivate2
                                         */
                                        if (!offs)
-                                               ClearPagePrivate2(pg->pg);
+                                               ClearPagePrivate2(brwpg->pg);
                                        break;
                                }
 
@@ -2213,19 +2201,22 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                                         * input parameter. Page does not need
                                         * to be locked.
                                         */
-                                       u64 lblk_num =
-                                               ((u64)(pg->off >> PAGE_SHIFT) <<
-                                                    (PAGE_SHIFT - blockbits)) +
-                                                      (offs >> blockbits);
+                                       u64 lblk_num;
                                        unsigned int i;
 
+                                       clpage =
+                                              oap2cl_page(brw_page2oap(brwpg));
+                                       lblk_num =
+                                               ((u64)(clpage->cp_page_index) <<
+                                               (PAGE_SHIFT - blockbits)) +
+                                               (offs >> blockbits);
                                        for (i = offs;
                                             i < offs +
                                                    LUSTRE_ENCRYPTION_UNIT_SIZE;
                                             i += blocksize, lblk_num++) {
                                                rc =
                                                  llcrypt_decrypt_block_inplace(
-                                                         inode, pg->pg,
+                                                         inode, brwpg->pg,
                                                          blocksize, i,
                                                          lblk_num);
                                                if (rc)
@@ -2233,7 +2224,7 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                                        }
                                } else {
                                        rc = llcrypt_decrypt_pagecache_blocks(
-                                               pg->pg,
+                                               brwpg->pg,
                                                LUSTRE_ENCRYPTION_UNIT_SIZE,
                                                offs);
                                }
index 2e4c5b1..be5142c 100755 (executable)
@@ -3354,7 +3354,38 @@ test_44() {
        cmp -bl $tmpfile $resfile ||
                error "file $testfile is corrupted (3)"
 
-       rm -f $tmpfile $resfile
+       rm -f $tmpfile $resfile $testfile
+
+       if [ $OSTCOUNT -ge 2 ]; then
+               dd if=/dev/urandom of=$tmpfile bs=$pagesz count=1 conv=fsync
+               $LFS setstripe -S 256k -c2 $testfile
+
+               # write in file, at beginning of first stripe, buffered IO
+               dd if=$tmpfile of=$testfile bs=$pagesz count=1 \
+                       conv=fsync,notrunc
+
+               # write at beginning of second stripe, direct IO
+               dd if=$tmpfile of=$testfile bs=$pagesz count=1 seek=256k \
+                       oflag=seek_bytes,direct conv=fsync,notrunc
+
+               cancel_lru_locks
+
+               # read at beginning of first stripe, direct IO
+               dd if=$testfile of=$resfile bs=$pagesz count=1 \
+                       iflag=direct conv=fsync
+
+               cmp -bl $tmpfile $resfile ||
+                       error "file $testfile is corrupted (4)"
+
+               # read at beginning of second stripe, buffered IO
+               dd if=$testfile of=$resfile bs=$pagesz count=1 skip=256k \
+                       iflag=skip_bytes conv=fsync
+
+               cmp -bl $tmpfile $resfile ||
+                       error "file $testfile is corrupted (5)"
+
+               rm -f $tmpfile $resfile
+       fi
 }
 run_test 44 "encrypted file access semantics: direct IO"