#include <linux/lustre_fsfilt.h>
#include "filter_internal.h"
-
-/* We should only change the file mtime (and not the ctime, like
- * update_inode_times() in generic_file_write()) when we only change data. */
-void inode_update_time(struct inode *inode, int ctime_too)
-{
- time_t now = CURRENT_TIME;
- if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
- return;
- inode->i_mtime = now;
- if (ctime_too)
- inode->i_ctime = now;
- mark_inode_dirty_sync(inode);
-}
-
/* Bug 2254 -- this is better done in ext3_map_inode_page, but this
* workaround will suffice until everyone has upgraded their kernels */
static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev,
}
#endif
-static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
+/* These are our hacks to keep our directio/bh IO coherent with ext3's
+ * page cache use. Most notably ext3 reads file data into the page
+ * cache when it is zeroing the tail of partial-block truncates and
+ * leaves it there, sometimes generating io from it at later truncates.
+ * This removes the partial page and its buffers from the page cache,
+ * so it should only ever cause a wait in rare cases, as otherwise we
+ * always do full-page IO to the OST.
+ *
+ * The call to truncate_complete_page() will call journal_flushpage() to
+ * free the buffers and drop the page from cache. The buffers should not
+ * be dirty, because we already called fdatasync/fdatawait on them.
+ */
+static int filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf)
{
struct page *page;
- int i;
+ int i, rc, rc2;
+
+ check_pending_bhs(KIOBUF_GET_BLOCKS(iobuf), iobuf->nr_pages,
+ inode->i_dev, 1 << inode->i_blkbits);
+ /* This is nearly generic_osync_inode, without the waiting on the inode
+ rc = generic_osync_inode(inode, inode->i_mapping,
+ OSYNC_DATA|OSYNC_METADATA);
+ */
+ rc = filemap_fdatasync(inode->i_mapping);
+ rc2 = fsync_inode_data_buffers(inode);
+ if (rc == 0)
+ rc = rc2;
+ rc2 = filemap_fdatawait(inode->i_mapping);
+ if (rc == 0)
+ rc = rc2;
+ if (rc != 0)
+ RETURN(rc);
+
+ /* be careful to call this after fsync_inode_data_buffers has waited
+ * for IO to complete before we evict it from the cache */
for (i = 0; i < iobuf->nr_pages ; i++) {
page = find_lock_page(inode->i_mapping,
iobuf->maplist[i]->index);
if (page == NULL)
continue;
- if (page->mapping != NULL) {
- block_flushpage(page, 0);
- truncate_complete_page(page);
- }
+ if (page->mapping != NULL)
+ ll_truncate_complete_page(page);
+
unlock_page(page);
page_cache_release(page);
}
+
+ return 0;
}
/* Must be called with i_sem taken for writes; this will drop it */
{
struct obd_device *obd = exp->exp_obd;
struct inode *inode = dchild->d_inode;
- struct kiobuf *iobuf = buf;
+ struct kiobuf *iobuf = buf;
int rc, create = (rw == OBD_BRW_WRITE), *created = NULL, committed = 0;
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0;
struct semaphore *sem = NULL;
if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
GOTO(cleanup, rc = -EINVAL);
- OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
- if (created == NULL)
- GOTO(cleanup, rc = -ENOMEM);
+ if (iobuf->nr_pages * blocks_per_page >
+ OBDFILTER_CREATED_SCRATCHPAD_ENTRIES)
+ GOTO(cleanup, rc = -EINVAL);
+
cleanup_phase = 1;
rc = lock_kiovec(1, &iobuf, 1);
}
rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist,
- iobuf->nr_pages, iobuf->blocks, created,
- create, sem);
+ iobuf->nr_pages, iobuf->blocks,
+ obdfilter_created_scratchpad, create, sem);
if (rc)
GOTO(cleanup, rc);
GOTO(cleanup, rc);
}
- /* these are our hacks to keep our directio/bh IO coherent with ext3's
- * page cache use. Most notably ext3 reads file data into the page
- * cache when it is zeroing the tail of partial-block truncates and
- * leaves it there, sometimes generating io from it at later truncates.
- * Someday very soon we'll be performing our brw_kiovec() IO to and
- * from the page cache. */
-
- check_pending_bhs(iobuf->blocks, iobuf->nr_pages, inode->i_dev,
- 1 << inode->i_blkbits);
-
- rc = filemap_fdatasync(inode->i_mapping);
- if (rc == 0)
- rc = fsync_inode_data_buffers(inode);
- if (rc == 0)
- rc = filemap_fdatawait(inode->i_mapping);
+ rc = filter_clear_page_cache(inode, iobuf);
if (rc < 0)
GOTO(cleanup, rc);
- /* be careful to call this after fsync_inode_data_buffers has waited
- * for IO to complete before we evict it from the cache */
- filter_clear_page_cache(inode, iobuf);
-
rc = fsfilt_send_bio(rw, obd, inode, iobuf);
CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n",
case 2:
unlock_kiovec(1, &iobuf);
case 1:
- OBD_FREE(created, sizeof(*created) *
- iobuf->nr_pages*blocks_per_page);
case 0:
if (cleanup_phase != 3 && rw == OBD_BRW_WRITE)
up(&inode->i_sem);
return 1;
}
-
/* some kernels require alloc_kiovec callers to zero members through the use of
* map_user_kiobuf and unmap_.. we don't use those, so we have a little helper
* that makes sure we don't break the rules. */
/* If overwriting an existing block, we don't need a grant */
if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC &&
- filter_range_is_mapped(inode, lnb->offset, lnb->len))
+ filter_range_is_mapped(inode, lnb->offset, lnb->len))
lnb->rc = 0;
if (lnb->rc) /* ENOSPC, network RPC error */
continue;
filter_iobuf_add_page(obd, iobuf, inode, lnb->page);
+
/* We expect these pages to be in offset order, but we'll
* be forgiving */
this_size = lnb->offset + lnb->len;
GOTO(cleanup, rc);
}
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "brw_start");
iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME);
/* filter_direct_io drops i_sem */
if (rc == 0)
obdo_from_inode(oa, inode, FILTER_VALID_FLAGS);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+ fsfilt_check_slow(now, obd_timeout, "direct_io");
err = fsfilt_commit_wait(obd, inode, wait_handle);
if (err)
rc = err;
- if (obd_sync_filter)
- LASSERT(oti->oti_transno <= obd->obd_last_committed);
- if (time_after(jiffies, now + 15 * HZ))
- CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+ if (obd_sync_filter && !err)
+ LASSERTF(oti->oti_transno <= obd->obd_last_committed,
+ "oti_transno "LPU64" last_committed "LPU64"\n",
+ oti->oti_transno, obd->obd_last_committed);
+ fsfilt_check_slow(now, obd_timeout, "commitrw commit");
cleanup:
filter_grant_commit(exp, niocount, res);