* and will write it out. This saves a lot of processing time.
*
* All writes here are within one page, so exclusion is handled by the page
- * lock on the vm page. Exception is appending, which requires locking the
- * full file to handle size issues. We do not do tiny writes for writes which
- * touch multiple pages because it's very unlikely multiple sequential pages
+ * lock on the vm page. We do not do tiny writes for writes which touch
+ * multiple pages because it's very unlikely multiple sequential pages are
* are already dirty.
*
* We limit these to < PAGE_SIZE because PAGE_SIZE writes are relatively common
* and are unlikely to be to already dirty pages.
*
- * Attribute updates are important here, we do it in ll_tiny_write_end.
+ * Attribute updates are important here, we do them in ll_tiny_write_end.
*/
static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
{
ssize_t count = iov_iter_count(iter);
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct ll_inode_info *lli = ll_i2info(inode);
- struct range_lock range;
ssize_t result = 0;
- bool append = false;
ENTRY;
- /* NB: we can't do direct IO for tiny writes because they use the page
- * cache, and we can't do sync writes because tiny writes can't flush
- * pages.
- */
- if (file->f_flags & (O_DIRECT | O_SYNC))
- RETURN(0);
-
- /* It is relatively unlikely we will overwrite a full dirty page, so
- * limit tiny writes to < PAGE_SIZE
+ /* Restrict writes to single page and < PAGE_SIZE. See comment at top
+ * of function for why.
*/
- if (count >= PAGE_SIZE)
+ if (count >= PAGE_SIZE ||
+ (iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE)
RETURN(0);
- /* For append writes, we must take the range lock to protect size
- * and also move pos to current size before writing.
- */
- if (file->f_flags & O_APPEND) {
- struct lu_env *env;
- __u16 refcheck;
-
- append = true;
- range_lock_init(&range, 0, LUSTRE_EOF);
- result = range_lock(&lli->lli_write_tree, &range);
- if (result)
- RETURN(result);
- env = cl_env_get(&refcheck);
- if (IS_ERR(env))
- GOTO(out, result = PTR_ERR(env));
- ll_merge_attr(env, inode);
- cl_env_put(env, &refcheck);
- iocb->ki_pos = i_size_read(inode);
- }
-
- /* Does this write touch multiple pages?
- *
- * This partly duplicates the PAGE_SIZE check above, but must come
- * after range locking for append writes because it depends on the
- * write position (ki_pos).
- */
- if ((iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE)
- goto out;
-
result = __generic_file_write_iter(iocb, iter);
/* If the page is not already dirty, ll_tiny_write_begin returns
ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED);
}
-out:
- if (append)
- range_unlock(&lli->lli_write_tree, &range);
-
CDEBUG(D_VFSTRACE, "result: %zu, original count %zu\n", result, count);
RETURN(result);
{
struct vvp_io_args *args;
struct lu_env *env;
- ssize_t rc_tiny, rc_normal;
+ ssize_t rc_tiny = 0, rc_normal;
__u16 refcheck;
ENTRY;
- rc_tiny = ll_do_tiny_write(iocb, from);
+ /* NB: we can't do direct IO for tiny writes because they use the page
+ * cache, we can't do sync writes because tiny writes can't flush
+ * pages, and we can't do append writes because we can't guarantee the
+ * required DLM locks are held to protect file size.
+ */
+ if (ll_sbi_has_tiny_write(ll_i2sbi(file_inode(iocb->ki_filp))) &&
+ !(iocb->ki_filp->f_flags & (O_DIRECT | O_SYNC | O_APPEND)))
+ rc_tiny = ll_do_tiny_write(iocb, from);
/* In case of error, go on and try normal write - Only stop if tiny
* write completed I/O.
}
LPROC_SEQ_FOPS_RO(ll_sbi_flags);
+static int ll_tiny_write_seq_show(struct seq_file *m, void *v)
+{
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+ seq_printf(m, "%u\n", !!(sbi->ll_flags & LL_SBI_TINY_WRITE));
+ return 0;
+}
+
+static ssize_t ll_tiny_write_seq_write(
+ struct file *file, const char __user *buffer, size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct super_block *sb = m->private;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ bool val;
+ int rc;
+
+ rc = kstrtobool_from_user(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ spin_lock(&sbi->ll_lock);
+ if (val)
+ sbi->ll_flags |= LL_SBI_TINY_WRITE;
+ else
+ sbi->ll_flags &= ~LL_SBI_TINY_WRITE;
+ spin_unlock(&sbi->ll_lock);
+
+ return count;
+}
+LPROC_SEQ_FOPS(ll_tiny_write);
+
static int ll_fast_read_seq_show(struct seq_file *m, void *v)
{
struct super_block *sb = m->private;
.fops = &ll_fast_read_fops, },
{ .name = "pio",
.fops = &ll_pio_fops, },
+ { .name = "tiny_write",
+ .fops = &ll_tiny_write_fops, },
{ NULL }
};
}
run_test 8 "remove of open special file on other node =========="
-test_9() {
+test_9a() {
MTPT=1
local dir
> $DIR2/f9
[ "`cat $DIR1/f9`" = "abcdefghijkl" ] || \
error "`od -a $DIR1/f9` != abcdefghijkl"
}
-run_test 9 "append of file with sub-page size on multiple mounts"
+run_test 9a "append of file with sub-page size on multiple mounts"
+
+#LU-10681 - tiny writes & appending to sparse striped file
+test_9b() {
+ [[ $OSTCOUNT -ge 2 ]] || { skip "needs >= 2 OSTs"; return; }
+
+ $LFS setstripe -c 2 -S 1M $DIR/$tfile
+ echo "foo" >> $DIR/$tfile
+ dd if=/dev/zero of=$DIR2/$tfile bs=1M count=1 seek=1 conv=notrunc ||
+ error "sparse dd $DIR2/$tfile failed"
+ echo "foo" >> $DIR/$tfile
+
+ data=$(dd if=$DIR2/$tfile bs=1 count=3 skip=$((2 * 1048576)) conv=notrunc)
+ echo "Data read (expecting 'foo')": $data
+ [ "$data" = "foo" ] || error "append to sparse striped file failed"
+}
+run_test 9b "append to striped sparse file"
test_10a() {
MTPT=1