LU-9409 llite: Add tiny write support

author Patrick Farrell <paf@cray.com>

Mon, 5 Feb 2018 15:55:35 +0000 (09:55 -0600)

committer Oleg Drokin <oleg.drokin@intel.com>

Wed, 14 Feb 2018 00:51:28 +0000 (00:51 +0000)
author Patrick Farrell <paf@cray.com>
Mon, 5 Feb 2018 15:55:35 +0000 (09:55 -0600)
committer Oleg Drokin <oleg.drokin@intel.com>
Wed, 14 Feb 2018 00:51:28 +0000 (00:51 +0000)
diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h

index 7a9b999..789e0d7 100644 (file)
--- a/lustre/include/cl_object.h
+++ b/lustre/include/cl_object.h
@@ -869,6 +869,13 @@ struct cl_page_operations {
           */
          int (*cpo_is_vmlocked)(const struct lu_env *env,
                                 const struct cl_page_slice *slice);
           */
          int (*cpo_is_vmlocked)(const struct lu_env *env,
                                 const struct cl_page_slice *slice);
+
+       /**
+        * Update file attributes when all we have is this page.  Used for tiny
+        * writes to update attributes when we don't have a full cl_io.
+        */
+       void (*cpo_page_touch)(const struct lu_env *env,
+                              const struct cl_page_slice *slice, size_t to);
          /**
           * Page destruction.
           */
          /**
           * Page destruction.
           */
@@ -2227,6 +2234,8 @@ void    cl_page_discard(const struct lu_env *env, struct cl_io *io,
  void    cl_page_delete(const struct lu_env *env, struct cl_page *pg);
  int     cl_page_is_vmlocked(const struct lu_env *env,
                             const struct cl_page *pg);
  void    cl_page_delete(const struct lu_env *env, struct cl_page *pg);
  int     cl_page_is_vmlocked(const struct lu_env *env,
                             const struct cl_page *pg);
+void   cl_page_touch(const struct lu_env *env, const struct cl_page *pg,
+                     size_t to);
  void    cl_page_export(const struct lu_env *env,
                        struct cl_page *pg, int uptodate);
  loff_t  cl_offset(const struct cl_object *obj, pgoff_t idx);
  void    cl_page_export(const struct lu_env *env,
                        struct cl_page *pg, int uptodate);
  loff_t  cl_offset(const struct cl_object *obj, pgoff_t idx);
diff --git a/lustre/llite/file.c b/lustre/llite/file.c

index eb18c91..68adb40 100644 (file)
--- a/lustre/llite/file.c
+++ b/lustre/llite/file.c
@@ -1563,6 +1563,101 @@ out:
         return result;
  }
  
         return result;
  }
  
+/**
+ * Similar trick to ll_do_fast_read, this improves write speed for tiny writes.
+ * If a page is already in the page cache and dirty (and some other things -
+ * See ll_tiny_write_begin for the instantiation of these rules), then we can
+ * write to it without doing a full I/O, because Lustre already knows about it
+ * and will write it out.  This saves a lot of processing time.
+ *
+ * All writes here are within one page, so exclusion is handled by the page
+ * lock on the vm page.  Exception is appending, which requires locking the
+ * full file to handle size issues.  We do not do tiny writes for writes which
+ * touch multiple pages because it's very unlikely multiple sequential pages
+ * are already dirty.
+ *
+ * We limit these to < PAGE_SIZE because PAGE_SIZE writes are relatively common
+ * and are unlikely to be to already dirty pages.
+ *
+ * Attribute updates are important here, we do it in ll_tiny_write_end.
+ */
+static ssize_t ll_do_tiny_write(struct kiocb *iocb, struct iov_iter *iter)
+{
+       ssize_t count = iov_iter_count(iter);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct range_lock range;
+       ssize_t result = 0;
+       bool append = false;
+
+       ENTRY;
+
+       /* NB: we can't do direct IO for tiny writes because they use the page
+        * cache, and we can't do sync writes because tiny writes can't flush
+        * pages.
+        */
+       if (file->f_flags & (O_DIRECT | O_SYNC))
+               RETURN(0);
+
+       /* It is relatively unlikely we will overwrite a full dirty page, so
+        * limit tiny writes to < PAGE_SIZE
+        */
+       if (count >= PAGE_SIZE)
+               RETURN(0);
+
+       /* For append writes, we must take the range lock to protect size
+        * and also move pos to current size before writing.
+        */
+       if (file->f_flags & O_APPEND) {
+               struct lu_env *env;
+               __u16 refcheck;
+
+               append = true;
+               range_lock_init(&range, 0, LUSTRE_EOF);
+               result = range_lock(&lli->lli_write_tree, &range);
+               if (result)
+                       RETURN(result);
+               env = cl_env_get(&refcheck);
+               if (IS_ERR(env))
+                       GOTO(out, result = PTR_ERR(env));
+               ll_merge_attr(env, inode);
+               cl_env_put(env, &refcheck);
+               iocb->ki_pos = i_size_read(inode);
+       }
+
+       /* Does this write touch multiple pages?
+        *
+        * This partly duplicates the PAGE_SIZE check above, but must come
+        * after range locking for append writes because it depends on the
+        * write position (ki_pos).
+        */
+       if ((iocb->ki_pos & (PAGE_SIZE-1)) + count > PAGE_SIZE)
+               goto out;
+
+       result = __generic_file_write_iter(iocb, iter);
+
+       /* If the page is not already dirty, ll_tiny_write_begin returns
+        * -ENODATA.  We continue on to normal write.
+        */
+       if (result == -ENODATA)
+               result = 0;
+
+       if (result > 0) {
+               ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_WRITE_BYTES,
+                                  result);
+               ll_file_set_flag(ll_i2info(inode), LLIF_DATA_MODIFIED);
+       }
+
+out:
+       if (append)
+               range_unlock(&lli->lli_write_tree, &range);
+
+       CDEBUG(D_VFSTRACE, "result: %zu, original count %zu\n", result, count);
+
+       RETURN(result);
+}
+
  /*
   * Write to a file (through the page cache).
   */
  /*
   * Write to a file (through the page cache).
   */
@@ -1570,9 +1665,19 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
  {
         struct vvp_io_args *args;
         struct lu_env *env;
  {
         struct vvp_io_args *args;
         struct lu_env *env;
-       ssize_t result;
+       ssize_t rc_tiny, rc_normal;
         __u16 refcheck;
  
         __u16 refcheck;
  
+       ENTRY;
+
+       rc_tiny = ll_do_tiny_write(iocb, from);
+
+       /* In case of error, go on and try normal write - Only stop if tiny
+        * write completed I/O.
+        */
+       if (iov_iter_count(from) == 0)
+               GOTO(out, rc_normal = rc_tiny);
+
         env = cl_env_get(&refcheck);
         if (IS_ERR(env))
                 return PTR_ERR(env);
         env = cl_env_get(&refcheck);
         if (IS_ERR(env))
                 return PTR_ERR(env);
@@ -1581,10 +1686,21 @@ static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
         args->u.normal.via_iter = from;
         args->u.normal.via_iocb = iocb;
  
         args->u.normal.via_iter = from;
         args->u.normal.via_iocb = iocb;
  
-       result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
+       rc_normal = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
                                     &iocb->ki_pos, iov_iter_count(from));
                                     &iocb->ki_pos, iov_iter_count(from));
+
+       /* On success, combine bytes written. */
+       if (rc_tiny >= 0 && rc_normal > 0)
+               rc_normal += rc_tiny;
+       /* On error, only return error from normal write if tiny write did not
+        * write any bytes.  Otherwise return bytes written by tiny write.
+        */
+       else if (rc_tiny > 0)
+               rc_normal = rc_tiny;
+
         cl_env_put(env, &refcheck);
         cl_env_put(env, &refcheck);
-       return result;
+out:
+       RETURN(rc_normal);
  }
  
  #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
  }
  
  #ifndef HAVE_FILE_OPERATIONS_READ_WRITE_ITER
@@ -1694,31 +1810,24 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
  static ssize_t ll_file_write(struct file *file, const char __user *buf,
                              size_t count, loff_t *ppos)
  {
  static ssize_t ll_file_write(struct file *file, const char __user *buf,
                              size_t count, loff_t *ppos)
  {
-       struct lu_env *env;
         struct iovec   iov = { .iov_base = (void __user *)buf,
                                .iov_len = count };
         struct iovec   iov = { .iov_base = (void __user *)buf,
                                .iov_len = count };
-        struct kiocb  *kiocb;
-        ssize_t        result;
-       __u16          refcheck;
-        ENTRY;
+       struct kiocb   kiocb;
+       ssize_t        result;
  
  
-        env = cl_env_get(&refcheck);
-        if (IS_ERR(env))
-                RETURN(PTR_ERR(env));
+       ENTRY;
  
  
-       kiocb = &ll_env_info(env)->lti_kiocb;
-        init_sync_kiocb(kiocb, file);
-        kiocb->ki_pos = *ppos;
+       init_sync_kiocb(&kiocb, file);
+       kiocb.ki_pos = *ppos;
  #ifdef HAVE_KIOCB_KI_LEFT
  #ifdef HAVE_KIOCB_KI_LEFT
-       kiocb->ki_left = count;
+       kiocb.ki_left = count;
  #elif defined(HAVE_KI_NBYTES)
  #elif defined(HAVE_KI_NBYTES)
-       kiocb->ki_nbytes = count;
+       kiocb.ki_nbytes = count;
  #endif
  
  #endif
  
-       result = ll_file_aio_write(kiocb, &iov, 1, kiocb->ki_pos);
-       *ppos = kiocb->ki_pos;
+       result = ll_file_aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
+       *ppos = kiocb.ki_pos;
  
  
-       cl_env_put(env, &refcheck);
         RETURN(result);
  }
  #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
         RETURN(result);
  }
  #endif /* !HAVE_FILE_OPERATIONS_READ_WRITE_ITER */
diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h

index 0f345dc..a249fd9 100644 (file)
--- a/lustre/llite/llite_internal.h
+++ b/lustre/llite/llite_internal.h
@@ -1024,7 +1024,6 @@ struct ll_thread_info {
         struct iov_iter         lti_iter;
         struct vvp_io_args      lti_args;
         struct ra_io_arg        lti_ria;
         struct iov_iter         lti_iter;
         struct vvp_io_args      lti_args;
         struct ra_io_arg        lti_ria;
-       struct kiocb            lti_kiocb;
         struct ll_cl_context    lti_io_ctx;
  };
  
         struct ll_cl_context    lti_io_ctx;
  };
  
diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c

index 8f94c83..2f1008b 100644 (file)
--- a/lustre/llite/rw26.c
+++ b/lustre/llite/rw26.c
@@ -637,13 +637,23 @@ out:
         return result;
  }
  
         return result;
  }
  
+static int ll_tiny_write_begin(struct page *vmpage)
+{
+       /* Page must be present, up to date, dirty, and not in writeback. */
+       if (!vmpage || !PageUptodate(vmpage) || !PageDirty(vmpage) ||
+           PageWriteback(vmpage))
+               return -ENODATA;
+
+       return 0;
+}
+
  static int ll_write_begin(struct file *file, struct address_space *mapping,
                           loff_t pos, unsigned len, unsigned flags,
                           struct page **pagep, void **fsdata)
  {
  static int ll_write_begin(struct file *file, struct address_space *mapping,
                           loff_t pos, unsigned len, unsigned flags,
                           struct page **pagep, void **fsdata)
  {
-       struct ll_cl_context *lcc;
+       struct ll_cl_context *lcc = NULL;
         const struct lu_env  *env = NULL;
         const struct lu_env  *env = NULL;
-       struct cl_io   *io;
+       struct cl_io   *io = NULL;
         struct cl_page *page = NULL;
  
         struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
         struct cl_page *page = NULL;
  
         struct cl_object *clob = ll_i2info(mapping->host)->lli_clob;
@@ -658,8 +668,9 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
  
         lcc = ll_cl_find(file);
         if (lcc == NULL) {
  
         lcc = ll_cl_find(file);
         if (lcc == NULL) {
-               io = NULL;
-               GOTO(out, result = -EIO);
+               vmpage = grab_cache_page_nowait(mapping, index);
+               result = ll_tiny_write_begin(vmpage);
+               GOTO(out, result);
         }
  
         env = lcc->lcc_env;
         }
  
         env = lcc->lcc_env;
@@ -672,6 +683,7 @@ static int ll_write_begin(struct file *file, struct address_space *mapping,
                  * problem submitting the I/O. */
                 GOTO(out, result = -EBUSY);
         }
                  * problem submitting the I/O. */
                 GOTO(out, result = -EBUSY);
         }
+
  again:
         /* To avoid deadlock, try to lock page first. */
         vmpage = grab_cache_page_nowait(mapping, index);
  again:
         /* To avoid deadlock, try to lock page first. */
         vmpage = grab_cache_page_nowait(mapping, index);
@@ -733,7 +745,6 @@ again:
  
                                 if (result == -EAGAIN)
                                         goto again;
  
                                 if (result == -EAGAIN)
                                         goto again;
-
                                 GOTO(out, result);
                         }
                 }
                                 GOTO(out, result);
                         }
                 }
@@ -745,6 +756,7 @@ out:
                         unlock_page(vmpage);
                         put_page(vmpage);
                 }
                         unlock_page(vmpage);
                         put_page(vmpage);
                 }
+               /* On tiny_write failure, page and io are always null. */
                 if (!IS_ERR_OR_NULL(page)) {
                         lu_ref_del(&page->cp_reference, "cl_io", io);
                         cl_page_put(env, page);
                 if (!IS_ERR_OR_NULL(page)) {
                         lu_ref_del(&page->cp_reference, "cl_io", io);
                         cl_page_put(env, page);
@@ -758,6 +770,47 @@ out:
         RETURN(result);
  }
  
         RETURN(result);
  }
  
+static int ll_tiny_write_end(struct file *file, struct address_space *mapping,
+                            loff_t pos, unsigned int len, unsigned int copied,
+                            struct page *vmpage)
+{
+       struct cl_page *clpage = (struct cl_page *) vmpage->private;
+       loff_t kms = pos+copied;
+       loff_t to = kms & (PAGE_SIZE-1) ? kms & (PAGE_SIZE-1) : PAGE_SIZE;
+       __u16 refcheck;
+       struct lu_env *env = cl_env_get(&refcheck);
+       int rc = 0;
+
+       ENTRY;
+
+       if (IS_ERR(env)) {
+               rc = PTR_ERR(env);
+               goto out;
+       }
+
+       /* This page is dirty in cache, so it should have a cl_page pointer
+        * set in vmpage->private.
+        */
+       LASSERT(clpage != NULL);
+
+       if (copied == 0)
+               goto out_env;
+
+       /* Update the underlying size information in the OSC/LOV objects this
+        * page is part of.
+        */
+       cl_page_touch(env, clpage, to);
+
+out_env:
+       cl_env_put(env, &refcheck);
+
+out:
+       /* Must return page unlocked. */
+       unlock_page(vmpage);
+
+       RETURN(rc);
+}
+
  static int ll_write_end(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned copied,
                         struct page *vmpage, void *fsdata)
  static int ll_write_end(struct file *file, struct address_space *mapping,
                         loff_t pos, unsigned len, unsigned copied,
                         struct page *vmpage, void *fsdata)
@@ -774,6 +827,14 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
  
         put_page(vmpage);
  
  
         put_page(vmpage);
  
+       CDEBUG(D_VFSTRACE, "pos %llu, len %u, copied %u\n", pos, len, copied);
+
+       if (lcc == NULL) {
+               result = ll_tiny_write_end(file, mapping, pos, len, copied,
+                                          vmpage);
+               GOTO(out, result);
+       }
+
         LASSERT(lcc != NULL);
         env  = lcc->lcc_env;
         page = lcc->lcc_page;
         LASSERT(lcc != NULL);
         env  = lcc->lcc_env;
         page = lcc->lcc_page;
@@ -821,6 +882,9 @@ static int ll_write_end(struct file *file, struct address_space *mapping,
  
         if (result < 0)
                 io->ci_result = result;
  
         if (result < 0)
                 io->ci_result = result;
+
+
+out:
         RETURN(result >= 0 ? copied : result);
  }
  
         RETURN(result >= 0 ? copied : result);
  }
  
diff --git a/lustre/obdclass/cl_object.c b/lustre/obdclass/cl_object.c

index 1dc6e9f..39d3800 100644 (file)
--- a/lustre/obdclass/cl_object.c
+++ b/lustre/obdclass/cl_object.c
@@ -844,13 +844,11 @@ EXPORT_SYMBOL(cl_env_put);
   */
  void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr)
  {
   */
  void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr)
  {
-        ENTRY;
          lvb->lvb_size   = attr->cat_size;
          lvb->lvb_mtime  = attr->cat_mtime;
          lvb->lvb_atime  = attr->cat_atime;
          lvb->lvb_ctime  = attr->cat_ctime;
          lvb->lvb_blocks = attr->cat_blocks;
          lvb->lvb_size   = attr->cat_size;
          lvb->lvb_mtime  = attr->cat_mtime;
          lvb->lvb_atime  = attr->cat_atime;
          lvb->lvb_ctime  = attr->cat_ctime;
          lvb->lvb_blocks = attr->cat_blocks;
-        EXIT;
  }
  
  /**
  }
  
  /**
@@ -860,13 +858,11 @@ void cl_attr2lvb(struct ost_lvb *lvb, const struct cl_attr *attr)
   */
  void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
  {
   */
  void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
  {
-        ENTRY;
          attr->cat_size   = lvb->lvb_size;
          attr->cat_mtime  = lvb->lvb_mtime;
          attr->cat_atime  = lvb->lvb_atime;
          attr->cat_ctime  = lvb->lvb_ctime;
          attr->cat_blocks = lvb->lvb_blocks;
          attr->cat_size   = lvb->lvb_size;
          attr->cat_mtime  = lvb->lvb_mtime;
          attr->cat_atime  = lvb->lvb_atime;
          attr->cat_ctime  = lvb->lvb_ctime;
          attr->cat_blocks = lvb->lvb_blocks;
-        EXIT;
  }
  EXPORT_SYMBOL(cl_lvb2attr);
  
  }
  EXPORT_SYMBOL(cl_lvb2attr);
  
diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c

index 10c3fed..301af05 100644 (file)
--- a/lustre/obdclass/cl_page.c
+++ b/lustre/obdclass/cl_page.c
@@ -804,6 +804,22 @@ int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
  }
  EXPORT_SYMBOL(cl_page_is_vmlocked);
  
  }
  EXPORT_SYMBOL(cl_page_is_vmlocked);
  
+void cl_page_touch(const struct lu_env *env, const struct cl_page *pg,
+                 size_t to)
+{
+       const struct cl_page_slice *slice;
+
+       ENTRY;
+
+       list_for_each_entry(slice, &pg->cp_layers, cpl_linkage) {
+               if (slice->cpl_ops->cpo_page_touch != NULL)
+                       (*slice->cpl_ops->cpo_page_touch)(env, slice, to);
+       }
+
+       EXIT;
+}
+EXPORT_SYMBOL(cl_page_touch);
+
  static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
  {
          ENTRY;
  static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
  {
          ENTRY;
diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h

index e9df607..3e6cefd 100644 (file)
--- a/lustre/osc/osc_internal.h
+++ b/lustre/osc/osc_internal.h
@@ -160,6 +160,8 @@ int osc_quotactl(struct obd_device *unused, struct obd_export *exp,
  void osc_inc_unstable_pages(struct ptlrpc_request *req);
  void osc_dec_unstable_pages(struct ptlrpc_request *req);
  bool osc_over_unstable_soft_limit(struct client_obd *cli);
  void osc_inc_unstable_pages(struct ptlrpc_request *req);
  void osc_dec_unstable_pages(struct ptlrpc_request *req);
  bool osc_over_unstable_soft_limit(struct client_obd *cli);
+void osc_page_touch_at(const struct lu_env *env, struct cl_object *obj,
+                      pgoff_t idx, size_t to);
  
  struct ldlm_lock *osc_obj_dlmlock_at_pgoff(const struct lu_env *env,
                                            struct osc_object *obj,
  
  struct ldlm_lock *osc_obj_dlmlock_at_pgoff(const struct lu_env *env,
                                            struct osc_object *obj,
diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c

index 1de6c8b..8755b4f 100644 (file)
--- a/lustre/osc/osc_io.c
+++ b/lustre/osc/osc_io.c
@@ -214,34 +214,28 @@ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
  EXPORT_SYMBOL(osc_io_submit);
  
  /**
  EXPORT_SYMBOL(osc_io_submit);
  
  /**
- * This is called when a page is accessed within file in a way that creates
- * new page, if one were missing (i.e., if there were a hole at that place in
- * the file, or accessed page is beyond the current file size).
+ * This is called to update the attributes when modifying a specific page,
+ * both when making new pages and when doing updates to existing cached pages.
   *
   * Expand stripe KMS if necessary.
   */
   *
   * Expand stripe KMS if necessary.
   */
-static void osc_page_touch_at(const struct lu_env *env,
-                             struct cl_object *obj, pgoff_t idx, size_t to)
+void osc_page_touch_at(const struct lu_env *env, struct cl_object *obj,
+                      pgoff_t idx, size_t to)
  {
  {
-        struct lov_oinfo  *loi  = cl2osc(obj)->oo_oinfo;
-        struct cl_attr    *attr = &osc_env_info(env)->oti_attr;
-        int valid;
-        __u64 kms;
+       struct lov_oinfo  *loi  = cl2osc(obj)->oo_oinfo;
+       struct cl_attr    *attr = &osc_env_info(env)->oti_attr;
+       int valid;
+       __u64 kms;
  
  
-        /* offset within stripe */
-        kms = cl_offset(obj, idx) + to;
+       ENTRY;
  
  
-        cl_object_attr_lock(obj);
-        /*
-         * XXX old code used
-         *
-         *         ll_inode_size_lock(inode, 0); lov_stripe_lock(lsm);
-         *
-         * here
-         */
+       /* offset within stripe */
+       kms = cl_offset(obj, idx) + to;
+
+       cl_object_attr_lock(obj);
         CDEBUG(D_INODE, "stripe KMS %sincreasing %llu->%llu %llu\n",
         CDEBUG(D_INODE, "stripe KMS %sincreasing %llu->%llu %llu\n",
-               kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
-               loi->loi_lvb.lvb_size);
+              kms > loi->loi_kms ? "" : "not ", loi->loi_kms, kms,
+              loi->loi_lvb.lvb_size);
  
         attr->cat_mtime = attr->cat_ctime = ktime_get_real_seconds();
         valid = CAT_MTIME | CAT_CTIME;
  
         attr->cat_mtime = attr->cat_ctime = ktime_get_real_seconds();
         valid = CAT_MTIME | CAT_CTIME;
@@ -255,6 +249,8 @@ static void osc_page_touch_at(const struct lu_env *env,
         }
         cl_object_attr_update(env, obj, attr, valid);
         cl_object_attr_unlock(obj);
         }
         cl_object_attr_update(env, obj, attr, valid);
         cl_object_attr_unlock(obj);
+
+       EXIT;
  }
  
  int osc_io_commit_async(const struct lu_env *env,
  }
  
  int osc_io_commit_async(const struct lu_env *env,
diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c

index eae7a92..8bc7fca 100644 (file)
--- a/lustre/osc/osc_page.c
+++ b/lustre/osc/osc_page.c
@@ -250,12 +250,22 @@ static int osc_page_flush(const struct lu_env *env,
         RETURN(rc);
  }
  
         RETURN(rc);
  }
  
+static void osc_page_touch(const struct lu_env *env,
+                         const struct cl_page_slice *slice, size_t to)
+{
+       struct osc_page *opg = cl2osc_page(slice);
+       struct cl_object *obj = opg->ops_cl.cpl_obj;
+
+       osc_page_touch_at(env, obj, osc_index(opg), to);
+}
+
  static const struct cl_page_operations osc_page_ops = {
         .cpo_print         = osc_page_print,
         .cpo_delete        = osc_page_delete,
         .cpo_clip           = osc_page_clip,
         .cpo_cancel         = osc_page_cancel,
  static const struct cl_page_operations osc_page_ops = {
         .cpo_print         = osc_page_print,
         .cpo_delete        = osc_page_delete,
         .cpo_clip           = osc_page_clip,
         .cpo_cancel         = osc_page_cancel,
-       .cpo_flush          = osc_page_flush
+       .cpo_flush          = osc_page_flush,
+       .cpo_page_touch    = osc_page_touch,
  };
  
  int osc_page_init(const struct lu_env *env, struct cl_object *obj,
  };
  
  int osc_page_init(const struct lu_env *env, struct cl_object *obj,
diff --git a/lustre/tests/functions.sh b/lustre/tests/functions.sh

index 00889e4..f703f40 100644 (file)
--- a/lustre/tests/functions.sh
+++ b/lustre/tests/functions.sh
@@ -804,6 +804,7 @@ run_write_disjoint() {
      # threads per client
      wdisjoint_THREADS=${wdisjoint_THREADS:-4}
      wdisjoint_REP=${wdisjoint_REP:-10000}
      # threads per client
      wdisjoint_THREADS=${wdisjoint_THREADS:-4}
      wdisjoint_REP=${wdisjoint_REP:-10000}
+       chunk_size_limit=$1
  
      if [ "$NFSCLIENT" ]; then
          skip "skipped for NFSCLIENT mode"
  
      if [ "$NFSCLIENT" ]; then
          skip "skipped for NFSCLIENT mode"
@@ -823,7 +824,8 @@ run_write_disjoint() {
      # mpi_run uses mpiuser
      chmod 0777 $testdir
  
      # mpi_run uses mpiuser
      chmod 0777 $testdir
  
-    local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP"
+       local cmd="$WRITE_DISJOINT -f $testdir/file -n $wdisjoint_REP -m "\
+                         "$chunk_size_limit"
  
         echo "+ $cmd"
         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
  
         echo "+ $cmd"
         mpi_run ${MACHINEFILE_OPTION} ${MACHINEFILE} \
diff --git a/lustre/tests/mpi/write_disjoint.c b/lustre/tests/mpi/write_disjoint.c

index 602c033..9ab6f44 100644 (file)
--- a/lustre/tests/mpi/write_disjoint.c
+++ b/lustre/tests/mpi/write_disjoint.c
@@ -53,9 +53,11 @@
  #include <errno.h>
  #include <unistd.h>
  #include <stdarg.h>
  #include <errno.h>
  #include <unistd.h>
  #include <stdarg.h>
+#include <time.h>
  #include "mpi.h"
  
  #include "mpi.h"
  
-#define CHUNK_MAX_SIZE 123456
+/* Chosen arbitrarily.  Actually running this large will take a long time.*/
+#define CHUNK_MAX_SIZE (1024*1024*16)
  
  void rprintf(int rank, int loop, const char *fmt, ...)
  {
  
  void rprintf(int rank, int loop, const char *fmt, ...)
  {
@@ -84,20 +86,34 @@ int main (int argc, char *argv[]) {
          ssize_t ret;
          char *filename = "/mnt/lustre/write_disjoint";
          int numloops = 1000;
          ssize_t ret;
          char *filename = "/mnt/lustre/write_disjoint";
          int numloops = 1000;
+       int max_size = CHUNK_MAX_SIZE;
          int random = 0;
          int random = 0;
+       unsigned int seed = 0;
+       int seed_provided = 0;
  
          error = MPI_Init(&argc, &argv);
          if (error != MPI_SUCCESS)
                  rprintf(-1, -1, "MPI_Init failed: %d\n", error);
          /* Parse command line options */
  
          error = MPI_Init(&argc, &argv);
          if (error != MPI_SUCCESS)
                  rprintf(-1, -1, "MPI_Init failed: %d\n", error);
          /* Parse command line options */
-        while ((c = getopt(argc, argv, "f:n:")) != EOF) {
+       while ((c = getopt(argc, argv, "f:n:m:s:")) != EOF) {
+               errno = 0;
                  switch (c) {
                  case 'f':
                          filename = optarg;
                          break;
                  case 'n':
                          numloops = strtoul(optarg, NULL, 0);
                  switch (c) {
                  case 'f':
                          filename = optarg;
                          break;
                  case 'n':
                          numloops = strtoul(optarg, NULL, 0);
+                       break;
+               case 'm':
+                       max_size = strtoul(optarg, NULL, 0);
+                       if (max_size > CHUNK_MAX_SIZE)
+                               rprintf(-1, -1, "Chunk size larger than %d.\n",
+                                       CHUNK_MAX_SIZE);
                          break;
                          break;
+               case 's':
+                       seed = strtoul(optarg, NULL, 0);
+                       seed_provided = 1;
+                       break;
                  }
          }
  
                  }
          }
  
@@ -106,10 +122,10 @@ int main (int argc, char *argv[]) {
  
          chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0]));
          for (i=0; i < noProcessors; i++) {
  
          chunk_buf = malloc(noProcessors * sizeof(chunk_buf[0]));
          for (i=0; i < noProcessors; i++) {
-                chunk_buf[i] = malloc(CHUNK_MAX_SIZE);
-                memset(chunk_buf[i], 'A'+ i, CHUNK_MAX_SIZE);
+               chunk_buf[i] = malloc(max_size);
+               memset(chunk_buf[i], 'A' + i, max_size);
          }
          }
-        read_buf = malloc(noProcessors * CHUNK_MAX_SIZE);
+       read_buf = malloc(noProcessors * max_size);
  
          if (rank == 0) {
                  fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
  
          if (rank == 0) {
                  fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0666);
@@ -123,6 +139,14 @@ int main (int argc, char *argv[]) {
          if (fd < 0)
                  rprintf(rank, -1, "open() returned %s\n", strerror(errno));
  
          if (fd < 0)
                  rprintf(rank, -1, "open() returned %s\n", strerror(errno));
  
+       if (rank == 0) {
+               if (!seed_provided)
+                       seed = (unsigned int) time(NULL);
+               printf("random seed: %d\n", seed);
+               srand(seed);
+       }
+
+
          for (n = 0; n < numloops; n++) {
                  /* reset the environment */
                  if (rank == 0) {
          for (n = 0; n < numloops; n++) {
                  /* reset the environment */
                  if (rank == 0) {
@@ -130,10 +154,11 @@ int main (int argc, char *argv[]) {
                          if (ret != 0)
                                  rprintf(rank, n, "truncate() returned %s\n",
                                          strerror(errno) );
                          if (ret != 0)
                                  rprintf(rank, n, "truncate() returned %s\n",
                                          strerror(errno) );
+
                          random = rand();
                  }
                  MPI_Bcast(&random, 1, MPI_INT, 0, MPI_COMM_WORLD);
                          random = rand();
                  }
                  MPI_Bcast(&random, 1, MPI_INT, 0, MPI_COMM_WORLD);
-                CHUNK_SIZE(n) = random % CHUNK_MAX_SIZE;
+               CHUNK_SIZE(n) = random % max_size;
  
                  if (n % 1000 == 0 && rank == 0)
                          printf("loop %d: chunk_size %lu\n", n, CHUNK_SIZE(n));
  
                  if (n % 1000 == 0 && rank == 0)
                          printf("loop %d: chunk_size %lu\n", n, CHUNK_SIZE(n));
diff --git a/lustre/tests/parallel-scale.sh b/lustre/tests/parallel-scale.sh

index ec6d2fc..c725969 100644 (file)
--- a/lustre/tests/parallel-scale.sh
+++ b/lustre/tests/parallel-scale.sh
@@ -136,11 +136,18 @@ test_write_append_truncate() {
  }
  run_test write_append_truncate "write_append_truncate"
  
  }
  run_test write_append_truncate "write_append_truncate"
  
+# Argument is chunk size limit, the upper bound on write size
  test_write_disjoint() {
  test_write_disjoint() {
-    run_write_disjoint
+    run_write_disjoint 123456
  }
  run_test write_disjoint "write_disjoint"
  
  }
  run_test write_disjoint "write_disjoint"
  
+# Make sure to exercise the tiny write code
+test_write_disjoint() {
+    run_write_disjoint 16384
+}
+run_test write_disjoint "write_disjoint_tiny"
+
  test_parallel_grouplock() {
      run_parallel_grouplock
  }
  test_parallel_grouplock() {
      run_parallel_grouplock
  }
diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh

index f0964f7..7d1561f 100755 (executable)
--- a/lustre/tests/sanity.sh
+++ b/lustre/tests/sanity.sh
@@ -858,6 +858,38 @@ test_23b() { # bug 18988
  }
  run_test 23b "O_APPEND check"
  
  }
  run_test 23b "O_APPEND check"
  
+# LU-9409, size with O_APPEND and tiny writes
+test_23c() {
+       local file=$DIR/$tfile
+
+       # single dd
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=8 count=800
+       $CHECKSTAT -s 6400 $file || error "wrong size, expected 6400"
+       rm -f $file
+
+       # racing tiny writes
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=8 count=800 &
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=8 count=800 &
+       wait
+       $CHECKSTAT -s 12800 $file || error "wrong size, expected 12800"
+       rm -f $file
+
+       #racing tiny & normal writes
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=4096 count=4 &
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=8 count=100 &
+       wait
+       $CHECKSTAT -s 17184 $file || error "wrong size, expected 17184"
+       rm -f $file
+
+       #racing tiny & normal writes 2, ugly numbers
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=4099 count=11 &
+       dd conv=notrunc oflag=append if=/dev/zero of=$file bs=17 count=173 &
+       wait
+       $CHECKSTAT -s 48030 $file || error "wrong size, expected 48030"
+       rm -f $file
+}
+run_test 23c "O_APPEND size checks for tiny writes"
+
  # rename sanity
  test_24a() {
         echo '-- same directory rename'
  # rename sanity
  test_24a() {
         echo '-- same directory rename'
diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh

index 094dcef..f614e16 100755 (executable)
--- a/lustre/tests/sanityn.sh
+++ b/lustre/tests/sanityn.sh
@@ -385,7 +385,7 @@ else
         FSXP=100
  fi
  
         FSXP=100
  fi
  
-test_16() {
+test_16a() {
         local file1=$DIR1/$tfile
         local file2=$DIR2/$tfile
  
         local file1=$DIR1/$tfile
         local file2=$DIR2/$tfile
  
@@ -404,7 +404,26 @@ test_16() {
         fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \
                 -w 4096 $file1 $file2 || error "fsx with O_DIRECT failed."
  }
         fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \
                 -w 4096 $file1 $file2 || error "fsx with O_DIRECT failed."
  }
-run_test 16 "$FSXNUM iterations of dual-mount fsx"
+run_test 16a "$FSXNUM iterations of dual-mount fsx"
+
+# Consistency check for tiny writes, LU-9409
+test_16b() {
+       local file1=$DIR1/$tfile
+       local file2=$DIR2/$tfile
+
+       # to allocate grant because it may run out due to test_15.
+       lfs setstripe -c -1 $file1
+       dd if=/dev/zero of=$file1 bs=$STRIPE_BYTES count=$OSTCOUNT oflag=sync
+       dd if=/dev/zero of=$file2 bs=$STRIPE_BYTES count=$OSTCOUNT oflag=sync
+       rm -f $file1
+
+       lfs setstripe -c -1 $file1 # b=10919
+       # -o is set to 8192 because writes < 1 page and between 1 and 2 pages
+       # create a mix of tiny writes & normal writes
+       fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 $file1 \
+       $file2
+}
+run_test 16b "$FSXNUM iterations of dual-mount fsx at small size"
  
  test_17() { # bug 3513, 3667
         remote_ost_nodsh && skip "remote OST with nodsh" && return
  
  test_17() { # bug 3513, 3667
         remote_ost_nodsh && skip "remote OST with nodsh" && return
author	Patrick Farrell <paf@cray.com>
	Mon, 5 Feb 2018 15:55:35 +0000 (09:55 -0600)
committer	Oleg Drokin <oleg.drokin@intel.com>
	Wed, 14 Feb 2018 00:51:28 +0000 (00:51 +0000)
lustre/include/cl_object.h		patch \| blob \| history
lustre/llite/file.c		patch \| blob \| history
lustre/llite/llite_internal.h		patch \| blob \| history
lustre/llite/rw26.c		patch \| blob \| history
lustre/obdclass/cl_object.c		patch \| blob \| history
lustre/obdclass/cl_page.c		patch \| blob \| history
lustre/osc/osc_internal.h		patch \| blob \| history
lustre/osc/osc_io.c		patch \| blob \| history
lustre/osc/osc_page.c		patch \| blob \| history
lustre/tests/functions.sh		patch \| blob \| history
lustre/tests/mpi/write_disjoint.c		patch \| blob \| history
lustre/tests/parallel-scale.sh		patch \| blob \| history
lustre/tests/sanity.sh		patch \| blob \| history
lustre/tests/sanityn.sh		patch \| blob \| history