Whamcloud - gitweb
LU-12275 sec: support truncate for encrypted files 94/37794/15
authorSebastien Buisson <sbuisson@ddn.com>
Thu, 20 Feb 2020 14:45:07 +0000 (14:45 +0000)
committerOleg Drokin <green@whamcloud.com>
Tue, 16 Jun 2020 15:28:29 +0000 (15:28 +0000)
Truncation of encrypted files is not a trivial operation. The page
corresponding to the point where truncation occurs must be read,
decrypted, zeroed after truncation point, re-encrypted and then
written back.

Signed-off-by: Sebastien Buisson <sbuisson@ddn.com>
Change-Id: I834f9372913d7051b1e0821515d3fea0873ffd78
Reviewed-on: https://review.whamcloud.com/37794
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/llite/rw.c
lustre/llite/vvp_io.c
lustre/ofd/ofd_objects.c
lustre/osc/osc_request.c
lustre/osd-ldiskfs/osd_io.c
lustre/osd-zfs/osd_io.c

index 5a543da..166b527 100644 (file)
@@ -2255,6 +2255,13 @@ static int ll_lov_setstripe(struct inode *inode, struct file *file,
                        GOTO(out, rc);
 
                rc = ll_file_getstripe(inode, arg, lum_size);
+               if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode) &&
+                   ll_i2info(inode)->lli_clob) {
+                       struct iattr attr = { 0 };
+
+                       rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, &attr,
+                                           OP_XVALID_FLAGS, LUSTRE_ENCRYPT_FL);
+               }
        }
        cl_lov_delay_create_clear(&file->f_flags);
 
index 49fa5c0..0aaae5f 100644 (file)
@@ -1744,6 +1744,162 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
        RETURN(rc);
 }
 
+/**
+ * Zero portion of page that is part of @inode.
+ * This implies, if necessary:
+ * - taking cl_lock on range corresponding to concerned page
+ * - grabbing vm page
+ * - associating cl_page
+ * - proceeding to clio read
+ * - zeroing range in page
+ * - proceeding to cl_page flush
+ * - releasing cl_lock
+ *
+ * \param[in] inode    inode
+ * \param[in] index    page index
+ * \param[in] offset   offset in page to start zero from
+ * \param[in] len      len to zero
+ *
+ * \retval 0           on success
+ * \retval negative    errno on failure
+ */
+int ll_io_zero_page(struct inode *inode, pgoff_t index, pgoff_t offset,
+                   unsigned len)
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct cl_object *clob = lli->lli_clob;
+       __u16 refcheck;
+       struct lu_env *env = NULL;
+       struct cl_io *io = NULL;
+       struct cl_page *clpage = NULL;
+       struct page *vmpage = NULL;
+       unsigned from = index << PAGE_SHIFT;
+       struct cl_lock *lock = NULL;
+       struct cl_lock_descr *descr = NULL;
+       struct cl_2queue *queue = NULL;
+       struct cl_sync_io *anchor = NULL;
+       bool holdinglock = false;
+       bool lockedbymyself = true;
+       int rc;
+
+       ENTRY;
+
+       env = cl_env_get(&refcheck);
+       if (IS_ERR(env))
+               RETURN(PTR_ERR(env));
+
+       io = vvp_env_thread_io(env);
+       io->ci_obj = clob;
+       rc = cl_io_rw_init(env, io, CIT_WRITE, from, PAGE_SIZE);
+       if (rc)
+               GOTO(putenv, rc);
+
+       lock = vvp_env_lock(env);
+       descr = &lock->cll_descr;
+       descr->cld_obj   = io->ci_obj;
+       descr->cld_start = cl_index(io->ci_obj, from);
+       descr->cld_end   = cl_index(io->ci_obj, from + PAGE_SIZE - 1);
+       descr->cld_mode  = CLM_WRITE;
+       descr->cld_enq_flags = CEF_MUST | CEF_NONBLOCK;
+
+       /* request lock for page */
+       rc = cl_lock_request(env, io, lock);
+       /* -ECANCELED indicates a matching lock with a different extent
+        * was already present, and -EEXIST indicates a matching lock
+        * on exactly the same extent was already present.
+        * In both cases it means we are covered.
+        */
+       if (rc == -ECANCELED || rc == -EEXIST)
+               rc = 0;
+       else if (rc < 0)
+               GOTO(iofini, rc);
+       else
+               holdinglock = true;
+
+       /* grab page */
+       vmpage = grab_cache_page_nowait(inode->i_mapping, index);
+       if (vmpage == NULL)
+               GOTO(rellock, rc = -EOPNOTSUPP);
+
+       if (!PageDirty(vmpage)) {
+               /* associate cl_page */
+               clpage = cl_page_find(env, clob, vmpage->index,
+                                     vmpage, CPT_CACHEABLE);
+               if (IS_ERR(clpage))
+                       GOTO(pagefini, rc = PTR_ERR(clpage));
+
+               cl_page_assume(env, io, clpage);
+       }
+
+       if (!PageUptodate(vmpage) && !PageDirty(vmpage) &&
+           !PageWriteback(vmpage)) {
+               /* read page */
+               /* set PagePrivate2 to detect special case of empty page
+                * in osc_brw_fini_request()
+                */
+               SetPagePrivate2(vmpage);
+               rc = ll_io_read_page(env, io, clpage, NULL);
+               if (!PagePrivate2(vmpage))
+                       /* PagePrivate2 was cleared in osc_brw_fini_request()
+                        * meaning we read an empty page. In this case, in order
+                        * to avoid allocating unnecessary block in truncated
+                        * file, we must not zero and write as below. Subsequent
+                        * server-side truncate will handle things correctly.
+                        */
+                       GOTO(clpfini, rc = 0);
+               ClearPagePrivate2(vmpage);
+               if (rc)
+                       GOTO(clpfini, rc);
+               lockedbymyself = trylock_page(vmpage);
+               cl_page_assume(env, io, clpage);
+       }
+
+       /* zero range in page */
+       zero_user(vmpage, offset, len);
+
+       if (holdinglock && clpage) {
+               /* explicitly write newly modified page */
+               queue = &io->ci_queue;
+               cl_2queue_init(queue);
+               anchor = &vvp_env_info(env)->vti_anchor;
+               cl_sync_io_init(anchor, 1);
+               clpage->cp_sync_io = anchor;
+               cl_2queue_add(queue, clpage);
+               rc = cl_io_submit_rw(env, io, CRT_WRITE, queue);
+               if (rc)
+                       GOTO(queuefini1, rc);
+               rc = cl_sync_io_wait(env, anchor, 0);
+               if (rc)
+                       GOTO(queuefini2, rc);
+               cl_page_assume(env, io, clpage);
+
+queuefini2:
+               cl_2queue_discard(env, io, queue);
+queuefini1:
+               cl_2queue_disown(env, io, queue);
+               cl_2queue_fini(env, queue);
+       }
+
+clpfini:
+       if (clpage)
+               cl_page_put(env, clpage);
+pagefini:
+       if (lockedbymyself) {
+               unlock_page(vmpage);
+               put_page(vmpage);
+       }
+rellock:
+       if (holdinglock)
+               cl_lock_release(env, lock);
+iofini:
+       cl_io_fini(env, io);
+putenv:
+       if (env)
+               cl_env_put(env, &refcheck);
+
+       RETURN(rc);
+}
+
 /* If this inode has objects allocated to it (lsm != NULL), then the OST
  * object(s) determine the file size and mtime.  Otherwise, the MDS will
  * keep these values until such a time that objects are allocated for it.
@@ -1875,6 +2031,8 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                                GOTO(out, rc);
                        }
                } else {
+                       unsigned int flags = 0;
+
                        /* For truncate and utimes sending attributes to OSTs,
                         * setting mtime/atime to the past will be performed
                         * under PW [0:EOF] extent lock (new_size:EOF for
@@ -1883,7 +2041,22 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
                         * it is necessary due to possible time
                         * de-synchronization between MDT inode and OST objects
                         */
-                       rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, 0);
+                       if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode) &&
+                           attr->ia_valid & ATTR_SIZE) {
+                               xvalid |= OP_XVALID_FLAGS;
+                               flags = LUSTRE_ENCRYPT_FL;
+                               if (attr->ia_size & ~PAGE_MASK) {
+                                       pgoff_t offset =
+                                               attr->ia_size & (PAGE_SIZE - 1);
+
+                                       rc = ll_io_zero_page(inode,
+                                                   attr->ia_size >> PAGE_SHIFT,
+                                                   offset, PAGE_SIZE - offset);
+                                       if (rc)
+                                               GOTO(out, rc);
+                               }
+                       }
+                       rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, flags);
                }
        }
 
@@ -1947,6 +2120,11 @@ int ll_setattr(struct dentry *de, struct iattr *attr)
 {
        int mode = de->d_inode->i_mode;
        enum op_xvalid xvalid = 0;
+       int rc;
+
+       rc = llcrypt_prepare_setattr(de, attr);
+       if (rc)
+               return rc;
 
        if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
                              (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
index 29ec2b7..a78f87c 100644 (file)
@@ -1449,8 +1449,8 @@ int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
 {
        struct inode              *inode  = vvp_object_inode(page->cp_obj);
        struct ll_sb_info         *sbi    = ll_i2sbi(inode);
-       struct ll_file_data       *fd     = file->private_data;
-       struct ll_readahead_state *ras    = &fd->fd_ras;
+       struct ll_file_data       *fd     = NULL;
+       struct ll_readahead_state *ras    = NULL;
        struct cl_2queue          *queue  = &io->ci_queue;
        struct cl_sync_io         *anchor = NULL;
        struct vvp_page           *vpg;
@@ -1460,10 +1460,15 @@ int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
        pgoff_t io_end_index;
        ENTRY;
 
+       if (file) {
+               fd = file->private_data;
+               ras = &fd->fd_ras;
+       }
+
        vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
        uptodate = vpg->vpg_defer_uptodate;
 
-       if (ll_readahead_enabled(sbi) && !vpg->vpg_ra_updated) {
+       if (ll_readahead_enabled(sbi) && !vpg->vpg_ra_updated && ras) {
                struct vvp_io *vio = vvp_env_io(env);
                enum ras_update_flags flags = 0;
 
@@ -1490,7 +1495,7 @@ int ll_io_read_page(const struct lu_env *env, struct cl_io *io,
        io_start_index = cl_index(io->ci_obj, io->u.ci_rw.crw_pos);
        io_end_index = cl_index(io->ci_obj, io->u.ci_rw.crw_pos +
                                io->u.ci_rw.crw_count - 1);
-       if (ll_readahead_enabled(sbi)) {
+       if (ll_readahead_enabled(sbi) && ras) {
                rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
                                   uptodate, file);
                CDEBUG(D_READA, DFID " %d pages read ahead at %lu\n",
index 55e21fa..85d630f 100644 (file)
@@ -653,7 +653,14 @@ static int vvp_io_setattr_lock(const struct lu_env *env,
        __u32 enqflags = 0;
 
        if (cl_io_is_trunc(io)) {
-               if (io->u.ci_setattr.sa_attr.lvb_size == 0)
+               struct inode *inode = vvp_object_inode(io->ci_obj);
+
+               /* set enqueue flags to CEF_MUST in case of encrypted file,
+                * to prevent lockless truncate
+                */
+               if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode))
+                       enqflags = CEF_MUST;
+               else if (io->u.ci_setattr.sa_attr.lvb_size == 0)
                        enqflags = CEF_DISCARD_DATA;
        } else if (cl_io_is_fallocate(io)) {
                lock_start = io->u.ci_setattr.sa_falloc_offset;
index bb92058..4982a49 100644 (file)
@@ -919,6 +919,17 @@ int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo,
                oa->o_valid &= ~OBD_MD_LAYOUT_VERSION;
        }
 
+       if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & LUSTRE_ENCRYPT_FL) {
+               /* punch must be aware we are dealing with an encrypted file */
+               struct lu_attr la = {
+                       .la_valid = LA_FLAGS,
+                       .la_flags = LUSTRE_ENCRYPT_FL,
+               };
+
+               rc = dt_attr_set(env, dob, &la, th);
+               if (rc)
+                       GOTO(unlock, rc);
+       }
        rc = dt_punch(env, dob, start, OBD_OBJECT_EOF, th);
        if (rc)
                GOTO(unlock, rc);
index e87b4f0..a9d95bc 100644 (file)
@@ -2065,8 +2065,13 @@ static int osc_brw_fini_request(struct ptlrpc_request *req, int rc)
                                        break;
                                p++;
                        }
-                       if (p - q == PAGE_SIZE / sizeof(*p))
+                       if (p - q == PAGE_SIZE / sizeof(*p)) {
+                               /* if page is empty forward info to upper layers
+                                * (ll_io_zero_page) by clearing PagePrivate2
+                                */
+                               ClearPagePrivate2(pg->pg);
                                continue;
+                       }
 
                        rc = llcrypt_decrypt_pagecache_blocks(pg->pg,
                                                              PAGE_SIZE, 0);
index 961e10d..7880dec 100644 (file)
@@ -2010,6 +2010,14 @@ static int osd_punch(const struct lu_env *env, struct dt_object *dt,
                grow = true;
        i_size_write(inode, start);
        spin_unlock(&inode->i_lock);
+       /* if object holds encrypted content, we need to make sure we truncate
+        * on an encryption unit boundary, or subsequent reads will get
+        * corrupted content
+        */
+       if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+           start & ~LUSTRE_ENCRYPTION_MASK)
+               start = (start & LUSTRE_ENCRYPTION_MASK) +
+                       LUSTRE_ENCRYPTION_UNIT_SIZE;
        ll_truncate_pagecache(inode, start);
 
        /* optimize grow case */
@@ -2242,15 +2250,29 @@ void osd_execute_truncate(struct osd_object *obj)
                return;
        }
 
+       size = i_size_read(inode);
        inode_lock(inode);
+       /* if object holds encrypted content, we need to make sure we truncate
+        * on an encryption unit boundary, or block content will get corrupted
+        */
+       if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+           size & ~LUSTRE_ENCRYPTION_MASK)
+               inode->i_size = (size & LUSTRE_ENCRYPTION_MASK) +
+                       LUSTRE_ENCRYPTION_UNIT_SIZE;
        ldiskfs_truncate(inode);
        inode_unlock(inode);
+       if (inode->i_size != size) {
+               spin_lock(&inode->i_lock);
+               i_size_write(inode, size);
+               LDISKFS_I(inode)->i_disksize = size;
+               spin_unlock(&inode->i_lock);
+               osd_dirty_inode(inode, I_DIRTY_DATASYNC);
+       }
 
        /*
         * For a partial-page truncate, flush the page to disk immediately to
         * avoid data corruption during direct disk write.  b=17397
         */
-       size = i_size_read(inode);
        if ((size & ~PAGE_MASK) == 0)
                return;
        if (osd_use_page_cache(d)) {
index f2a72b6..b270228 100644 (file)
@@ -1024,9 +1024,11 @@ static int osd_read_prep(const struct lu_env *env, struct dt_object *dt,
  * dmu_tx_hold_sa() and if off < size, dmu_tx_hold_free()
  * called and then assigned to a transaction group.
  */
-static int __osd_object_punch(objset_t *os, dnode_t *dn, dmu_tx_t *tx,
-                               uint64_t size, uint64_t off, uint64_t len)
+static int __osd_object_punch(struct osd_object *obj, objset_t *os,
+                             dmu_tx_t *tx, uint64_t off, uint64_t len)
 {
+       dnode_t *dn = obj->oo_dn;
+       uint64_t size = obj->oo_attr.la_size;
        int rc = 0;
 
        /* Assert that the transaction has been assigned to a
@@ -1038,6 +1040,19 @@ static int __osd_object_punch(objset_t *os, dnode_t *dn, dmu_tx_t *tx,
        if (len == DMU_OBJECT_END && size == off)
                return 0;
 
+       /* if object holds encrypted content, we need to make sure we truncate
+        * on an encryption unit boundary, or subsequent reads will get
+        * corrupted content
+        */
+       if (len != DMU_OBJECT_END)
+               len -= LUSTRE_ENCRYPTION_UNIT_SIZE -
+                       (off & ~LUSTRE_ENCRYPTION_MASK);
+       if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+           off & ~LUSTRE_ENCRYPTION_MASK)
+               off = (off & LUSTRE_ENCRYPTION_MASK) +
+                       LUSTRE_ENCRYPTION_UNIT_SIZE;
+
+
        /* XXX: dnode_free_range() can be used to save on dnode lookup */
        if (off < size)
                dmu_free_range(os, dn->dn_object, off, len, tx);
@@ -1069,8 +1084,8 @@ static int osd_punch(const struct lu_env *env, struct dt_object *dt,
                len = end - start;
        write_unlock(&obj->oo_attr_lock);
 
-       rc = __osd_object_punch(osd->od_os, obj->oo_dn, oh->ot_tx,
-                               obj->oo_attr.la_size, start, len);
+       rc = __osd_object_punch(obj, osd->od_os, oh->ot_tx, start, len);
+
        /* set new size */
        if (len == DMU_OBJECT_END) {
                write_lock(&obj->oo_attr_lock);
@@ -1100,6 +1115,14 @@ static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt,
                len = end - start;
 
        /* declare we'll free some blocks ... */
+       /* if object holds encrypted content, we need to make sure we truncate
+        * on an encryption unit boundary, or subsequent reads will get
+        * corrupted content
+        */
+       if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+           start & ~LUSTRE_ENCRYPTION_MASK)
+               start = (start & LUSTRE_ENCRYPTION_MASK) +
+                       LUSTRE_ENCRYPTION_UNIT_SIZE;
        if (start < obj->oo_attr.la_size) {
                read_unlock(&obj->oo_attr_lock);
                dmu_tx_mark_netfree(oh->ot_tx);