LU-10681: Disable tiny writes for append

[fs/lustre-release.git] / lustre / llite / file.c
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index e33d030..79f5b2a 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -1572,70 +1572,31 @@ out:
   * and will write it out.  This saves a lot of processing time.
   *
   * All writes here are within one page, so exclusion is handled by the page
- * lock on the vm page.  Exception is appending, which requires locking the
- * full file to handle size issues.  We do not do tiny writes for writes which
- * touch multiple pages because it's very unlikely multiple sequential pages
+ * lock on the vm page.  We do not do tiny writes for writes which touch
+ * multiple pages because it's very unlikely multiple sequential pages are
   * are already dirty.
   *
   * We limit these to < PAGE_SIZE because PAGE_SIZE writes are relatively common
   * and are unlikely to be to already dirty pages.
   *
- * Attribute updates are important here, we do it in ll_tiny_write_end.
+ * Attribute updates are important here, we do them in ll_tiny_write_end.
   */
  static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
  {
         ssize_t count = iov_iter_count(iter);
         struct file *file = iocb->ki_filp;
         struct inode *inode = file_inode(file);
-       struct ll_inode_info *lli = ll_i2info(inode);
-       struct range_lock range;
         ssize_t result = 0;
-       bool append = false;
  
         ENTRY;
  
-       /* NB: we can't do direct IO for tiny writes because they use the page
-        * cache, and we can't do sync writes because tiny writes can't flush
-        * pages.
-        */
-       if (file->f_flags & (O_DIRECT | O_SYNC))
-               RETURN(0);
-
-       /* It is relatively unlikely we will overwrite a full dirty page, so
-        * limit tiny writes to < PAGE_SIZE
+       /* Restrict writes to single page and < PAGE_SIZE.  See comment at top
+        * of function for why.
          */
-       if (count >= PAGE_SIZE)
+       if (count >= PAGE_SIZE ||
+           (iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE)
                 RETURN(0);
  
-       /* For append writes, we must take the range lock to protect size
-        * and also move pos to current size before writing.
-        */
-       if (file->f_flags & O_APPEND) {
-               struct lu_env *env;
-               __u16 refcheck;
-
-               append = true;
-               range_lock_init(&range, 0, LUSTRE_EOF);
-               result = range_lock(&lli->lli_write_tree, &range);
-               if (result)
-                       RETURN(result);
-               env = cl_env_get(&refcheck);
-               if (IS_ERR(env))
-                       GOTO(out, result = PTR_ERR(env));
-               ll_merge_attr(env, inode);
-               cl_env_put(env, &refcheck);
-               iocb->ki_pos = i_size_read(inode);
-       }
-
-       /* Does this write touch multiple pages?
-        *
-        * This partly duplicates the PAGE_SIZE check above, but must come
-        * after range locking for append writes because it depends on the
-        * write position (ki_pos).
-        */
-       if ((iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE)
-               goto out;
-
         result = __generic_file_write_iter(iocb, iter);
  
         /* If the page is not already dirty, ll_tiny_write_begin returns
@@ -1650,10 +1611,6 @@ static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
                 ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED);
         }
  
-out:
-       if (append)
-               range_unlock(&lli->lli_write_tree, &range);
-
         CDEBUG(D_VFSTRACE, "result: %zu, original count %zu\n", result, count);
  
         RETURN(result);
@@ -1666,12 +1623,19 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
         struct vvp_io_args *args;
         struct lu_env *env;
-       ssize_t rc_tiny, rc_normal;
+       ssize_t rc_tiny = 0, rc_normal;
         __u16 refcheck;
  
         ENTRY;
  
-       rc_tiny = ll_do_tiny_write(iocb, from);
+       /* NB: we can't do direct IO for tiny writes because they use the page
+        * cache, we can't do sync writes because tiny writes can't flush
+        * pages, and we can't do append writes because we can't guarantee the
+        * required DLM locks are held to protect file size.
+        */
+       if (ll_sbi_has_tiny_write(ll_i2sbi(file_inode(iocb->ki_filp))) &&
+           !(iocb->ki_filp->f_flags & (O_DIRECT | O_SYNC | O_APPEND)))
+               rc_tiny = ll_do_tiny_write(iocb, from);
  
         /* In case of error, go on and try normal write - Only stop if tiny
          * write completed I/O.