GOTO(out, rc);
rc = ll_file_getstripe(inode, arg, lum_size);
+ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode) &&
+ ll_i2info(inode)->lli_clob) {
+ struct iattr attr = { 0 };
+
+ rc = cl_setattr_ost(ll_i2info(inode)->lli_clob, &attr,
+ OP_XVALID_FLAGS, LUSTRE_ENCRYPT_FL);
+ }
}
cl_lov_delay_create_clear(&file->f_flags);
RETURN(rc);
}
+/**
+ * Zero portion of page that is part of @inode.
+ * This implies, if necessary:
+ * - taking cl_lock on range corresponding to concerned page
+ * - grabbing vm page
+ * - associating cl_page
+ * - proceeding to clio read
+ * - zeroing range in page
+ * - proceeding to cl_page flush
+ * - releasing cl_lock
+ *
+ * \param[in] inode inode
+ * \param[in] index page index
+ * \param[in] offset offset in page to start zero from
+ * \param[in] len len to zero
+ *
+ * \retval 0 on success
+ * \retval negative errno on failure
+ */
+int ll_io_zero_page(struct inode *inode, pgoff_t index, pgoff_t offset,
+ unsigned len)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct cl_object *clob = lli->lli_clob;
+ __u16 refcheck;
+ struct lu_env *env = NULL;
+ struct cl_io *io = NULL;
+ struct cl_page *clpage = NULL;
+ struct page *vmpage = NULL;
+ unsigned from = index << PAGE_SHIFT;
+ struct cl_lock *lock = NULL;
+ struct cl_lock_descr *descr = NULL;
+ struct cl_2queue *queue = NULL;
+ struct cl_sync_io *anchor = NULL;
+ bool holdinglock = false;
+ bool lockedbymyself = true;
+ int rc;
+
+ ENTRY;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ RETURN(PTR_ERR(env));
+
+ io = vvp_env_thread_io(env);
+ io->ci_obj = clob;
+ rc = cl_io_rw_init(env, io, CIT_WRITE, from, PAGE_SIZE);
+ if (rc)
+ GOTO(putenv, rc);
+
+ lock = vvp_env_lock(env);
+ descr = &lock->cll_descr;
+ descr->cld_obj = io->ci_obj;
+ descr->cld_start = cl_index(io->ci_obj, from);
+ descr->cld_end = cl_index(io->ci_obj, from + PAGE_SIZE - 1);
+ descr->cld_mode = CLM_WRITE;
+ descr->cld_enq_flags = CEF_MUST | CEF_NONBLOCK;
+
+ /* request lock for page */
+ rc = cl_lock_request(env, io, lock);
+ /* -ECANCELED indicates a matching lock with a different extent
+ * was already present, and -EEXIST indicates a matching lock
+ * on exactly the same extent was already present.
+ * In both cases it means we are covered.
+ */
+ if (rc == -ECANCELED || rc == -EEXIST)
+ rc = 0;
+ else if (rc < 0)
+ GOTO(iofini, rc);
+ else
+ holdinglock = true;
+
+ /* grab page */
+ vmpage = grab_cache_page_nowait(inode->i_mapping, index);
+ if (vmpage == NULL)
+ GOTO(rellock, rc = -EOPNOTSUPP);
+
+ if (!PageDirty(vmpage)) {
+ /* associate cl_page */
+ clpage = cl_page_find(env, clob, vmpage->index,
+ vmpage, CPT_CACHEABLE);
+ if (IS_ERR(clpage))
+ GOTO(pagefini, rc = PTR_ERR(clpage));
+
+ cl_page_assume(env, io, clpage);
+ }
+
+ if (!PageUptodate(vmpage) && !PageDirty(vmpage) &&
+ !PageWriteback(vmpage)) {
+ /* read page */
+ /* set PagePrivate2 to detect special case of empty page
+ * in osc_brw_fini_request()
+ */
+ SetPagePrivate2(vmpage);
+ rc = ll_io_read_page(env, io, clpage, NULL);
+ if (!PagePrivate2(vmpage))
+ /* PagePrivate2 was cleared in osc_brw_fini_request()
+ * meaning we read an empty page. In this case, in order
+ * to avoid allocating unnecessary block in truncated
+ * file, we must not zero and write as below. Subsequent
+ * server-side truncate will handle things correctly.
+ */
+ GOTO(clpfini, rc = 0);
+ ClearPagePrivate2(vmpage);
+ if (rc)
+ GOTO(clpfini, rc);
+ lockedbymyself = trylock_page(vmpage);
+ cl_page_assume(env, io, clpage);
+ }
+
+ /* zero range in page */
+ zero_user(vmpage, offset, len);
+
+ if (holdinglock && clpage) {
+ /* explicitly write newly modified page */
+ queue = &io->ci_queue;
+ cl_2queue_init(queue);
+ anchor = &vvp_env_info(env)->vti_anchor;
+ cl_sync_io_init(anchor, 1);
+ clpage->cp_sync_io = anchor;
+ cl_2queue_add(queue, clpage);
+ rc = cl_io_submit_rw(env, io, CRT_WRITE, queue);
+ if (rc)
+ GOTO(queuefini1, rc);
+ rc = cl_sync_io_wait(env, anchor, 0);
+ if (rc)
+ GOTO(queuefini2, rc);
+ cl_page_assume(env, io, clpage);
+
+queuefini2:
+ cl_2queue_discard(env, io, queue);
+queuefini1:
+ cl_2queue_disown(env, io, queue);
+ cl_2queue_fini(env, queue);
+ }
+
+clpfini:
+ if (clpage)
+ cl_page_put(env, clpage);
+pagefini:
+ if (lockedbymyself) {
+ unlock_page(vmpage);
+ put_page(vmpage);
+ }
+rellock:
+ if (holdinglock)
+ cl_lock_release(env, lock);
+iofini:
+ cl_io_fini(env, io);
+putenv:
+ if (env)
+ cl_env_put(env, &refcheck);
+
+ RETURN(rc);
+}
+
/* If this inode has objects allocated to it (lsm != NULL), then the OST
* object(s) determine the file size and mtime. Otherwise, the MDS will
* keep these values until such a time that objects are allocated for it.
GOTO(out, rc);
}
} else {
+ unsigned int flags = 0;
+
/* For truncate and utimes sending attributes to OSTs,
* setting mtime/atime to the past will be performed
* under PW [0:EOF] extent lock (new_size:EOF for
* it is necessary due to possible time
* de-synchronization between MDT inode and OST objects
*/
- rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, 0);
+ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode) &&
+ attr->ia_valid & ATTR_SIZE) {
+ xvalid |= OP_XVALID_FLAGS;
+ flags = LUSTRE_ENCRYPT_FL;
+ if (attr->ia_size & ~PAGE_MASK) {
+ pgoff_t offset =
+ attr->ia_size & (PAGE_SIZE - 1);
+
+ rc = ll_io_zero_page(inode,
+ attr->ia_size >> PAGE_SHIFT,
+ offset, PAGE_SIZE - offset);
+ if (rc)
+ GOTO(out, rc);
+ }
+ }
+ rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, flags);
}
}
{
int mode = de->d_inode->i_mode;
enum op_xvalid xvalid = 0;
+ int rc;
+
+ rc = llcrypt_prepare_setattr(de, attr);
+ if (rc)
+ return rc;
if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
(ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
{
struct inode *inode = vvp_object_inode(page->cp_obj);
struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ll_file_data *fd = file->private_data;
- struct ll_readahead_state *ras = &fd->fd_ras;
+ struct ll_file_data *fd = NULL;
+ struct ll_readahead_state *ras = NULL;
struct cl_2queue *queue = &io->ci_queue;
struct cl_sync_io *anchor = NULL;
struct vvp_page *vpg;
pgoff_t io_end_index;
ENTRY;
+ if (file) {
+ fd = file->private_data;
+ ras = &fd->fd_ras;
+ }
+
vpg = cl2vvp_page(cl_object_page_slice(page->cp_obj, page));
uptodate = vpg->vpg_defer_uptodate;
- if (ll_readahead_enabled(sbi) && !vpg->vpg_ra_updated) {
+ if (ll_readahead_enabled(sbi) && !vpg->vpg_ra_updated && ras) {
struct vvp_io *vio = vvp_env_io(env);
enum ras_update_flags flags = 0;
io_start_index = cl_index(io->ci_obj, io->u.ci_rw.crw_pos);
io_end_index = cl_index(io->ci_obj, io->u.ci_rw.crw_pos +
io->u.ci_rw.crw_count - 1);
- if (ll_readahead_enabled(sbi)) {
+ if (ll_readahead_enabled(sbi) && ras) {
rc2 = ll_readahead(env, io, &queue->c2_qin, ras,
uptodate, file);
CDEBUG(D_READA, DFID " %d pages read ahead at %lu\n",
__u32 enqflags = 0;
if (cl_io_is_trunc(io)) {
- if (io->u.ci_setattr.sa_attr.lvb_size == 0)
+ struct inode *inode = vvp_object_inode(io->ci_obj);
+
+ /* set enqueue flags to CEF_MUST in case of encrypted file,
+ * to prevent lockless truncate
+ */
+ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode))
+ enqflags = CEF_MUST;
+ else if (io->u.ci_setattr.sa_attr.lvb_size == 0)
enqflags = CEF_DISCARD_DATA;
} else if (cl_io_is_fallocate(io)) {
lock_start = io->u.ci_setattr.sa_falloc_offset;
oa->o_valid &= ~OBD_MD_LAYOUT_VERSION;
}
+ if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & LUSTRE_ENCRYPT_FL) {
+ /* punch must be aware we are dealing with an encrypted file */
+ struct lu_attr la = {
+ .la_valid = LA_FLAGS,
+ .la_flags = LUSTRE_ENCRYPT_FL,
+ };
+
+ rc = dt_attr_set(env, dob, &la, th);
+ if (rc)
+ GOTO(unlock, rc);
+ }
rc = dt_punch(env, dob, start, OBD_OBJECT_EOF, th);
if (rc)
GOTO(unlock, rc);
break;
p++;
}
- if (p - q == PAGE_SIZE / sizeof(*p))
+ if (p - q == PAGE_SIZE / sizeof(*p)) {
+ /* if page is empty forward info to upper layers
+ * (ll_io_zero_page) by clearing PagePrivate2
+ */
+ ClearPagePrivate2(pg->pg);
continue;
+ }
rc = llcrypt_decrypt_pagecache_blocks(pg->pg,
PAGE_SIZE, 0);
grow = true;
i_size_write(inode, start);
spin_unlock(&inode->i_lock);
+ /* if object holds encrypted content, we need to make sure we truncate
+ * on an encryption unit boundary, or subsequent reads will get
+ * corrupted content
+ */
+ if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+ start & ~LUSTRE_ENCRYPTION_MASK)
+ start = (start & LUSTRE_ENCRYPTION_MASK) +
+ LUSTRE_ENCRYPTION_UNIT_SIZE;
ll_truncate_pagecache(inode, start);
/* optimize grow case */
return;
}
+ size = i_size_read(inode);
inode_lock(inode);
+ /* if object holds encrypted content, we need to make sure we truncate
+ * on an encryption unit boundary, or block content will get corrupted
+ */
+ if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+ size & ~LUSTRE_ENCRYPTION_MASK)
+ inode->i_size = (size & LUSTRE_ENCRYPTION_MASK) +
+ LUSTRE_ENCRYPTION_UNIT_SIZE;
ldiskfs_truncate(inode);
inode_unlock(inode);
+ if (inode->i_size != size) {
+ spin_lock(&inode->i_lock);
+ i_size_write(inode, size);
+ LDISKFS_I(inode)->i_disksize = size;
+ spin_unlock(&inode->i_lock);
+ osd_dirty_inode(inode, I_DIRTY_DATASYNC);
+ }
/*
* For a partial-page truncate, flush the page to disk immediately to
* avoid data corruption during direct disk write. b=17397
*/
- size = i_size_read(inode);
if ((size & ~PAGE_MASK) == 0)
return;
if (osd_use_page_cache(d)) {
* dmu_tx_hold_sa() and if off < size, dmu_tx_hold_free()
* called and then assigned to a transaction group.
*/
-static int __osd_object_punch(objset_t *os, dnode_t *dn, dmu_tx_t *tx,
- uint64_t size, uint64_t off, uint64_t len)
+static int __osd_object_punch(struct osd_object *obj, objset_t *os,
+ dmu_tx_t *tx, uint64_t off, uint64_t len)
{
+ dnode_t *dn = obj->oo_dn;
+ uint64_t size = obj->oo_attr.la_size;
int rc = 0;
/* Assert that the transaction has been assigned to a
if (len == DMU_OBJECT_END && size == off)
return 0;
+ /* if object holds encrypted content, we need to make sure we truncate
+ * on an encryption unit boundary, or subsequent reads will get
+ * corrupted content
+ */
+ if (len != DMU_OBJECT_END)
+ len -= LUSTRE_ENCRYPTION_UNIT_SIZE -
+ (off & ~LUSTRE_ENCRYPTION_MASK);
+ if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+ off & ~LUSTRE_ENCRYPTION_MASK)
+ off = (off & LUSTRE_ENCRYPTION_MASK) +
+ LUSTRE_ENCRYPTION_UNIT_SIZE;
+
+
/* XXX: dnode_free_range() can be used to save on dnode lookup */
if (off < size)
dmu_free_range(os, dn->dn_object, off, len, tx);
len = end - start;
write_unlock(&obj->oo_attr_lock);
- rc = __osd_object_punch(osd->od_os, obj->oo_dn, oh->ot_tx,
- obj->oo_attr.la_size, start, len);
+ rc = __osd_object_punch(obj, osd->od_os, oh->ot_tx, start, len);
+
/* set new size */
if (len == DMU_OBJECT_END) {
write_lock(&obj->oo_attr_lock);
len = end - start;
/* declare we'll free some blocks ... */
+ /* if object holds encrypted content, we need to make sure we truncate
+ * on an encryption unit boundary, or subsequent reads will get
+ * corrupted content
+ */
+ if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL &&
+ start & ~LUSTRE_ENCRYPTION_MASK)
+ start = (start & LUSTRE_ENCRYPTION_MASK) +
+ LUSTRE_ENCRYPTION_UNIT_SIZE;
if (start < obj->oo_attr.la_size) {
read_unlock(&obj->oo_attr_lock);
dmu_tx_mark_netfree(oh->ot_tx);