Whamcloud - gitweb
- show correct id in debug line
[fs/lustre-release.git] / lustre / obdfilter / filter_io_24.c
index 2c59f68..0010961 100644 (file)
 #include <linux/lustre_fsfilt.h>
 #include "filter_internal.h"
 
-
-/* We should only change the file mtime (and not the ctime, like
- * update_inode_times() in generic_file_write()) when we only change data. */
-void inode_update_time(struct inode *inode, int ctime_too)
-{
-        time_t now = CURRENT_TIME;
-        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
-                return;
-        inode->i_mtime = now;
-        if (ctime_too)
-                inode->i_ctime = now;
-        mark_inode_dirty_sync(inode);
-}
-
 /* Bug 2254 -- this is better done in ext3_map_inode_page, but this
  * workaround will suffice until everyone has upgraded their kernels */
 static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev,
@@ -108,23 +94,55 @@ static void dump_page(int rw, unsigned long block, struct page *page)
 }
 #endif
 
-static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
+/* These are our hacks to keep our directio/bh IO coherent with ext3's
+ * page cache use.  Most notably ext3 reads file data into the page
+ * cache when it is zeroing the tail of partial-block truncates and
+ * leaves it there, sometimes generating io from it at later truncates.
+ * This removes the partial page and its buffers from the page cache,
+ * so it should only ever cause a wait in rare cases, as otherwise we
+ * always do full-page IO to the OST.
+ *
+ * The call to truncate_complete_page() will call journal_flushpage() to
+ * free the buffers and drop the page from cache.  The buffers should not
+ * be dirty, because we already called fdatasync/fdatawait on them.
+ */
+static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
 {
         struct page *page;
-        int i;
+        int i, rc, rc2;
 
+        check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,
+                          inode->i_dev, 1 << inode->i_blkbits);
+
+        /* This is nearly generic_osync_inode, without the waiting on the inode
+        rc = generic_osync_inode(inode, inode->i_mapping,
+                                 OSYNC_DATA|OSYNC_METADATA);
+         */
+        rc = filemap_fdatasync(inode->i_mapping);
+        rc2 = fsync_inode_data_buffers(inode);
+        if (rc == 0)
+                rc = rc2;
+        rc2 = filemap_fdatawait(inode->i_mapping);
+        if (rc == 0)
+                rc = rc2;
+        if (rc != 0)
+                RETURN(rc);
+
+        /* be careful to call this after fsync_inode_data_buffers has waited
+         * for IO to complete before we evict it from the cache */
         for (i = 0; i < iobuf->nr_pages ; i++) {
                 page = find_lock_page(inode->i_mapping,
                                       iobuf->maplist[i]->index);
                 if (page == NULL)
                         continue;
-                if (page->mapping != NULL) {
-                        block_flushpage(page, 0);
-                        truncate_complete_page(page);
-                }
+                if (page->mapping != NULL)
+                        ll_truncate_complete_page(page);
+
                 unlock_page(page);
                 page_cache_release(page);
         }
+
+        return 0;
 }
 
 /* Must be called with i_sem taken for writes; this will drop it */
@@ -134,7 +152,7 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
 {
         struct obd_device *obd = exp->exp_obd;
         struct inode *inode = dchild->d_inode;
-         struct kiobuf *iobuf = buf;
+        struct kiobuf *iobuf = buf;
         int rc, create = (rw == OBD_BRW_WRITE), *created = NULL, committed = 0;
         int blocks_per_page = PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0;
         struct semaphore *sem = NULL;
@@ -148,9 +166,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
         if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
                 GOTO(cleanup, rc = -EINVAL);
 
-        OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
-        if (created == NULL)
-                GOTO(cleanup, rc = -ENOMEM);
+        if (iobuf->nr_pages * blocks_per_page > 
+            OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)
+                GOTO(cleanup, rc = -EINVAL);
+
         cleanup_phase = 1;
 
         rc = lock_kiovec(1, &iobuf, 1);
@@ -164,8 +183,8 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
         }
         
         rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist,
-                                    iobuf->nr_pages, iobuf->blocks, created,
-                                    create, sem);
+                                    iobuf->nr_pages, iobuf->blocks, 
+                                    obdfilter_created_scratchpad, create, sem);
         if (rc)
                 GOTO(cleanup, rc);
 
@@ -195,28 +214,10 @@ int filter_direct_io(int rw, struct dentry *dchild, void *buf,
                         GOTO(cleanup, rc);
         }
 
-        /* these are our hacks to keep our directio/bh IO coherent with ext3's
-         * page cache use.  Most notably ext3 reads file data into the page
-         * cache when it is zeroing the tail of partial-block truncates and
-         * leaves it there, sometimes generating io from it at later truncates.
-         * Someday very soon we'll be performing our brw_kiovec() IO to and
-         * from the page cache. */
-
-        check_pending_bhs(iobuf->blocks, iobuf->nr_pages, inode->i_dev,
-                          1 << inode->i_blkbits);
-
-        rc = filemap_fdatasync(inode->i_mapping);
-        if (rc == 0)
-                rc = fsync_inode_data_buffers(inode);
-        if (rc == 0)
-                rc = filemap_fdatawait(inode->i_mapping);
+        rc = filter_clear_page_cache(inode, iobuf);
         if (rc < 0)
                 GOTO(cleanup, rc);
 
-        /* be careful to call this after fsync_inode_data_buffers has waited
-         * for IO to complete before we evict it from the cache */
-        filter_clear_page_cache(inode, iobuf);
-
         rc = fsfilt_send_bio(rw, obd, inode, iobuf);
 
         CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n",
@@ -244,8 +245,6 @@ cleanup:
         case 2:
                 unlock_kiovec(1, &iobuf);
         case 1:
-                OBD_FREE(created, sizeof(*created) *
-                         iobuf->nr_pages*blocks_per_page);
         case 0:
                 if (cleanup_phase != 3 && rw == OBD_BRW_WRITE)            
                         up(&inode->i_sem);
@@ -279,7 +278,6 @@ int filter_range_is_mapped(struct inode *inode, obd_size offset, int len)
         return 1;
 }
 
-
 /* some kernels require alloc_kiovec callers to zero members through the use of
  * map_user_kiobuf and unmap_.. we don't use those, so we have a little helper
  * that makes sure we don't break the rules. */
@@ -377,13 +375,14 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
 
                 /* If overwriting an existing block, we don't need a grant */
                 if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
-                     filter_range_is_mapped(inode, lnb->offset, lnb->len))    
+                    filter_range_is_mapped(inode, lnb->offset, lnb->len))
                         lnb->rc = 0;
 
                 if (lnb->rc) /* ENOSPC, network RPC error */
                         continue;
 
                 filter_iobuf_add_page(obd, iobuf, inode, lnb->page);
+                
                 /* We expect these pages to be in offset order, but we'll
                  * be forgiving */
                 this_size = lnb->offset + lnb->len;
@@ -406,8 +405,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
                 GOTO(cleanup, rc);
         }
 
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "brw_start");
 
         iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
         /* filter_direct_io drops i_sem */
@@ -416,16 +414,16 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
         if (rc == 0)
                 obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
 
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+        fsfilt_check_slow(now, obd_timeout, "direct_io");
 
         err = fsfilt_commit_wait(obd, inode, wait_handle);
         if (err)
                 rc = err;
-        if (obd_sync_filter)
-                LASSERT(oti->oti_transno <= obd->obd_last_committed);
-        if (time_after(jiffies, now + 15 * HZ))
-                CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+        if (obd_sync_filter && !err)
+                LASSERTF(oti->oti_transno <= obd->obd_last_committed,
+                         "oti_transno "LPU64" last_committed "LPU64"\n",
+                         oti->oti_transno, obd->obd_last_committed);
+        fsfilt_check_slow(now, obd_timeout, "commitrw commit");
 cleanup:
         filter_grant_commit(exp, niocount, res);
 
@@ -442,4 +440,3 @@ cleanup:
 
         RETURN(rc);
 }
-