-/*
- * OBDFS Super operations
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
*
- * This code is issued under the GNU General Public License.
- * See the file COPYING in this distribution
+ * Lustre Lite I/O Page Cache
*
- * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
- * Copryright (C) 1999 Stelias Computing Inc,
- * (author Peter J. Braam <braam@stelias.com>)
- * Copryright (C) 1999 Seagate Technology Inc.
-*/
-
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ */
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/stat.h>
+#include <linux/iobuf.h>
#include <linux/errno.h>
#include <linux/locks.h>
#include <linux/unistd.h>
-
+#include <linux/version.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <asm/uaccess.h>
-#include <linux/vmalloc.h>
#include <asm/segment.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_idl.h>
-#include <linux/lustre_mds.h>
-#include <linux/lustre_light.h>
-
-void ll_change_inode(struct inode *inode);
+#define DEBUG_SUBSYSTEM S_LLITE
-static int cache_writes = 0;
-
-
-/* page cache support stuff */
-
-
-/*
- * Add a page to the dirty page list.
- */
-void set_page_dirty(struct page *page)
-{
- if (!test_and_set_bit(PG_dirty, &page->flags)) {
- struct address_space *mapping = page->mapping;
-
- if (mapping) {
- spin_lock(&pagecache_lock);
- list_del(&page->list);
- list_add(&page->list, &mapping->dirty_pages);
- spin_unlock(&pagecache_lock);
-
- if (mapping->host)
- mark_inode_dirty_pages(mapping->host);
- }
- }
-}
-
-/*
- * Remove page from dirty list
- */
-void __set_page_clean(struct page *page)
-{
- struct address_space *mapping = page->mapping;
- struct inode *inode;
-
- if (!mapping)
- return;
-
- spin_lock(&pagecache_lock);
- list_del(&page->list);
- list_add(&page->list, &mapping->clean_pages);
-
- inode = mapping->host;
- if (list_empty(&mapping->dirty_pages)) {
- CDEBUG(D_INODE, "inode clean\n");
- inode->i_state &= ~I_DIRTY_PAGES;
- }
- spin_unlock(&pagecache_lock);
- EXIT;
-}
+#include <linux/lustre_mds.h>
+#include <linux/lustre_lite.h>
+#include <linux/lustre_lib.h>
-inline void set_page_clean(struct page *page)
-{
- if (PageDirty(page)) {
- ClearPageDirty(page);
- __set_page_clean(page);
- }
-}
-/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
+/* SYNCHRONOUS I/O to object storage for an inode */
static int ll_brw(int rw, struct inode *inode, struct page *page, int create)
{
- obd_count num_obdo = 1;
- obd_count bufs_per_obdo = 1;
- struct obdo *oa;
- obd_size count = PAGE_SIZE;
- obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
- obd_flag flags = create ? OBD_BRW_CREATE : 0;
- int err;
-
- ENTRY;
-
- oa = obdo_alloc();
- if ( !oa ) {
- EXIT;
- return -ENOMEM;
- }
- oa->o_valid = OBD_MD_FLNOTOBD;
- ll_from_inode(oa, inode);
-
- err = obd_brw(rw, IID(inode), num_obdo, &oa, &bufs_per_obdo,
- &page, &count, &offset, &flags);
- //if ( !err )
- // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
-
- obdo_free(oa);
- EXIT;
- return err;
-} /* ll_brw */
-
-extern void set_page_clean(struct page *);
-
-/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated too */
-static int ll_commit_page(struct page *page, int create, int from, int to)
-{
- struct inode *inode = page->mapping->host;
- obd_count num_obdo = 1;
- obd_count bufs_per_obdo = 1;
- struct obdo *oa;
- obd_size count = to;
- obd_off offset = (((obd_off)page->index) << PAGE_SHIFT);
- obd_flag flags = create ? OBD_BRW_CREATE : 0;
- int err;
-
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lov_stripe_md *md = lli->lli_smd;
+ struct brw_page pg;
+ int err;
+ struct io_cb_data *cbd = ll_init_cb();
ENTRY;
- oa = obdo_alloc();
- if ( !oa ) {
- EXIT;
- return -ENOMEM;
- }
- oa->o_valid = OBD_MD_FLNOTOBD;
- ll_from_inode(oa, inode);
- CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
- from, to, count);
+ if (!cbd)
+ RETURN(-ENOMEM);
- err = obd_brw(WRITE, IID(inode), num_obdo, &oa, &bufs_per_obdo,
- &page, &count, &offset, &flags);
- if ( !err ) {
- SetPageUptodate(page);
- set_page_clean(page);
- }
+ pg.pg = page;
+ pg.count = PAGE_SIZE;
+ pg.off = ((obd_off)page->index) << PAGE_SHIFT;
+ pg.flag = create ? OBD_BRW_CREATE : 0;
- //if ( !err )
- // ll_to_inode(inode, oa); /* copy o_blocks to i_blocks */
+ err = obd_brw(rw, ll_i2obdconn(inode), md, 1, &pg, ll_sync_io_cb, cbd);
- obdo_free(oa);
- EXIT;
- return err;
+ RETURN(err);
} /* ll_brw */
-
-/* returns the page unlocked, but with a reference */
-int ll_readpage(struct file *file, struct page *page)
-{
- struct inode *inode = page->mapping->host;
- int rc;
-
- ENTRY;
-
- if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
- <= page->index) {
- memset(kmap(page), 0, PAGE_CACHE_SIZE);
- kunmap(page);
- goto readpage_out;
- }
-
- if (Page_Uptodate(page)) {
- EXIT;
- goto readpage_out;
- }
-
- rc = ll_brw(READ, inode, page, 0);
- if ( rc ) {
- EXIT;
- return rc;
- }
- /* PDEBUG(page, "READ"); */
-
- readpage_out:
- SetPageUptodate(page);
- obd_unlock_page(page);
- EXIT;
- return 0;
-} /* ll_readpage */
-
-
-
/* returns the page unlocked, but with a reference */
-int ll_dir_readpage(struct file *file, struct page *page)
+static int ll_readpage(struct file *file, struct page *page)
{
- struct inode *inode = page->mapping->host;
- char *buf;
- __u64 offset;
+ struct inode *inode = page->mapping->host;
+ obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
int rc = 0;
- struct mds_rep_hdr *hdr;
-
ENTRY;
- if ( ((inode->i_size + PAGE_CACHE_SIZE -1)>>PAGE_SHIFT)
- <= page->index) {
- memset(kmap(page), 0, PAGE_CACHE_SIZE);
- kunmap(page);
- goto readpage_out;
- }
-
- if (Page_Uptodate(page)) {
- EXIT;
- goto readpage_out;
- }
-
- offset = page->index << PAGE_SHIFT;
- buf = kmap(page);
- rc = mdc_readpage(inode->i_ino, S_IFDIR, offset, buf, NULL, &hdr);
- kunmap(buff);
- if ( rc ) {
- EXIT;
- goto readpage_out;
- }
-
- if ((rc = hdr->status)) {
- EXIT;
- goto readpage_out;
- }
-
- /* PDEBUG(page, "READ"); */
-
- SetPageUptodate(page);
- readpage_out:
- unlock_page(page);
- EXIT;
- return rc;
-} /* ll_dir_readpage */
+ if (!PageLocked(page))
+ LBUG();
-int ll_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
-{
- struct inode *inode = page->mapping->host;
- obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
- int rc = 0;
- ENTRY;
-
- kmap(page);
- if (Page_Uptodate(page)) {
- EXIT;
- goto prepare_done;
+ if (inode->i_size <= offset) {
+ memset(kmap(page), 0, PAGE_SIZE);
+ kunmap(page);
+ GOTO(readpage_out, rc);
}
- if ( (from <= offset) && (to >= offset + PAGE_SIZE) ) {
- EXIT;
- return 0;
+ if (Page_Uptodate(page)) {
+ CERROR("Explain this please?\n");
+ GOTO(readpage_out, rc);
}
-
- rc = ll_brw(READ, inode, page, 0);
- if ( !rc ) {
- SetPageUptodate(page);
- }
- prepare_done:
- set_page_dirty(page);
- //SetPageDirty(page);
+ rc = ll_brw(OBD_BRW_READ, inode, page, 0);
EXIT;
- return rc;
-}
-
-
-
-
-
-static kmem_cache_t *ll_pgrq_cachep = NULL;
-
-int ll_init_pgrqcache(void)
-{
- ENTRY;
- if (ll_pgrq_cachep == NULL) {
- CDEBUG(D_CACHE, "allocating ll_pgrq_cache\n");
- ll_pgrq_cachep = kmem_cache_create("ll_pgrq",
- sizeof(struct ll_pgrq),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (ll_pgrq_cachep == NULL) {
- EXIT;
- return -ENOMEM;
- } else {
- CDEBUG(D_CACHE, "allocated cache at %p\n",
- ll_pgrq_cachep);
- }
- } else {
- CDEBUG(D_CACHE, "using existing cache at %p\n",
- ll_pgrq_cachep);
- }
- EXIT;
+ readpage_out:
+ if (!rc)
+ SetPageUptodate(page);
+ UnlockPage(page);
return 0;
-} /* ll_init_wreqcache */
+} /* ll_readpage */
-inline void ll_pgrq_del(struct ll_pgrq *pgrq)
-{
- --ll_cache_count;
- CDEBUG(D_INFO, "deleting page %p from list [count %ld]\n",
- pgrq->rq_page, ll_cache_count);
- list_del(&pgrq->rq_plist);
- OBDClearCachePage(pgrq->rq_page);
- kmem_cache_free(ll_pgrq_cachep, pgrq);
-}
-void ll_cleanup_pgrqcache(void)
+static int ll_prepare_write(struct file *file, struct page *page, unsigned from,
+ unsigned to)
{
+ struct inode *inode = page->mapping->host;
+ obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
+ int rc = 0;
+ char *addr;
ENTRY;
- if (ll_pgrq_cachep != NULL) {
- CDEBUG(D_CACHE, "destroying ll_pgrqcache at %p, count %ld\n",
- ll_pgrq_cachep, ll_cache_count);
- if (kmem_cache_destroy(ll_pgrq_cachep))
- printk(KERN_INFO __FUNCTION__
- ": unable to free all of cache\n");
- ll_pgrq_cachep = NULL;
- } else
- printk(KERN_INFO __FUNCTION__ ": called with NULL pointer\n");
- EXIT;
-} /* ll_cleanup_wreqcache */
+ addr = kmap(page);
+ if (!PageLocked(page))
+ LBUG();
+ if (Page_Uptodate(page))
+ GOTO(prepare_done, rc);
-/* called with the list lock held */
-static struct page *ll_find_page_index(struct inode *inode,
- unsigned long index)
-{
- struct list_head *page_list = ll_iplist(inode);
- struct list_head *tmp;
- struct page *page;
+ /* We're completely overwriting an existing page, so _don't_ set it up
+ * to date until commit_write */
+ if (from == 0 && to == PAGE_SIZE)
+ RETURN(0);
- ENTRY;
-
- CDEBUG(D_INFO, "looking for inode %ld pageindex %ld\n",
- inode->i_ino, index);
- OIDEBUG(inode);
-
- if (list_empty(page_list)) {
- EXIT;
- return NULL;
+ /* We are writing to a new page, no need to read old data */
+ if (inode->i_size <= offset) {
+ memset(addr, 0, PAGE_SIZE);
+ goto prepare_done;
}
- tmp = page_list;
- while ( (tmp = tmp->next) != page_list ) {
- struct ll_pgrq *pgrq;
-
- pgrq = list_entry(tmp, struct ll_pgrq, rq_plist);
- page = pgrq->rq_page;
- if (index == page->index) {
- CDEBUG(D_INFO,
- "INDEX SEARCH found page %p, index %ld\n",
- page, index);
- EXIT;
- return page;
- }
- }
+
+ rc = ll_brw(OBD_BRW_READ, inode, page, 0);
EXIT;
- return NULL;
-} /* ll_find_page_index */
+ prepare_done:
+ if (!rc)
+ SetPageUptodate(page);
+ return rc;
+}
-/* call and free pages from Linux page cache: called with io lock on inodes */
-int ll_do_vec_wr(struct inode **inodes, obd_count num_io,
- obd_count num_obdos, struct obdo **obdos,
- obd_count *oa_bufs, struct page **pages, char **bufs,
- obd_size *counts, obd_off *offsets, obd_flag *flags)
+/* returns the page unlocked, but with a reference */
+static int ll_writepage(struct page *page)
{
+ struct inode *inode = page->mapping->host;
int err;
-
ENTRY;
- CDEBUG(D_INFO, "writing %d page(s), %d obdo(s) in vector\n",
- num_io, num_obdos);
- if (obd_debug_level & D_INFO) { /* DEBUGGING */
- int i;
- printk("OBDOS: ");
- for (i = 0; i < num_obdos; i++)
- printk("%ld:0x%p ", (long)obdos[i]->o_id, obdos[i]);
-
- printk("\nPAGES: ");
- for (i = 0; i < num_io; i++)
- printk("0x%p ", pages[i]);
- printk("\n");
- }
+ if (!PageLocked(page))
+ LBUG();
- err = obd_brw(WRITE, IID(inodes[0]), num_obdos, obdos,
- oa_bufs, pages, counts, offsets, flags);
-
- CDEBUG(D_INFO, "BRW done\n");
- /* release the pages from the page cache */
- while ( num_io > 0 ) {
- --num_io;
- CDEBUG(D_INFO, "calling put_page for %p, index %ld\n",
- pages[num_io], pages[num_io]->index);
- /* PDEBUG(pages[num_io], "do_vec_wr"); */
- put_page(pages[num_io]);
- /* PDEBUG(pages[num_io], "do_vec_wr"); */
- }
- CDEBUG(D_INFO, "put_page done\n");
-
- while ( num_obdos > 0) {
- --num_obdos;
- CDEBUG(D_INFO, "free obdo %ld\n",(long)obdos[num_obdos]->o_id);
- /* copy o_blocks to i_blocks */
- ll_set_size (inodes[num_obdos], obdos[num_obdos]->o_size);
- //ll_to_inode(inodes[num_obdos], obdos[num_obdos]);
- obdo_free(obdos[num_obdos]);
+ err = ll_brw(OBD_BRW_WRITE, inode, page, 1);
+ if ( !err ) {
+ //SetPageUptodate(page);
+ set_page_clean(page);
+ } else {
+ CERROR("ll_brw failure %d\n", err);
}
- CDEBUG(D_INFO, "obdo_free done\n");
- EXIT;
- return err;
+ unlock_page(page);
+ RETURN(err);
}
-/*
- * Add a page to the write request cache list for later writing.
- * ASYNCHRONOUS write method.
- */
-static int ll_add_page_to_cache(struct inode *inode, struct page *page)
-{
- int err = 0;
- ENTRY;
-
- /* The PG_obdcache bit is cleared by ll_pgrq_del() BEFORE the page
- * is written, so at worst we will write the page out twice.
- *
- * If the page has the PG_obdcache bit set, then the inode MUST be
- * on the superblock dirty list so we don't need to check this.
- * Dirty inodes are removed from the superblock list ONLY when they
- * don't have any more cached pages. It is possible to have an inode
- * with no dirty pages on the superblock list, but not possible to
- * have an inode with dirty pages NOT on the superblock dirty list.
- */
- if (!OBDAddCachePage(page)) {
- struct ll_pgrq *pgrq;
- pgrq = kmem_cache_alloc(ll_pgrq_cachep, SLAB_KERNEL);
- if (!pgrq) {
- OBDClearCachePage(page);
- EXIT;
- return -ENOMEM;
- }
- /* not really necessary since we set all pgrq fields here
- memset(pgrq, 0, sizeof(*pgrq));
- */
-
- pgrq->rq_page = page;
- pgrq->rq_jiffies = jiffies;
- get_page(pgrq->rq_page);
-
- obd_down(&ll_i2sbi(inode)->ll_list_mutex);
- list_add(&pgrq->rq_plist, ll_iplist(inode));
- ll_cache_count++;
- //printk("-- count %d\n", ll_cache_count);
-
- /* If inode isn't already on superblock inodes list, add it.
- *
- * We increment the reference count on the inode to keep it
- * from being freed from memory. This _should_ be an iget()
- * with an iput() in both flush_reqs() and put_inode(), but
- * since put_inode() is called from iput() we can't call iput()
- * again there. Instead we just increment/decrement i_count,
- * which is mostly what iget/iput do for an inode in memory.
- */
- if ( list_empty(ll_islist(inode)) ) {
- atomic_inc(&inode->i_count);
- CDEBUG(D_INFO,
- "adding inode %ld to superblock list %p\n",
- inode->i_ino, ll_slist(inode));
- list_add(ll_islist(inode), ll_slist(inode));
- }
- obd_up(&ll_i2sbi(inode)->ll_list_mutex);
-
- }
-
- /* XXX For testing purposes, we can write out the page here.
- err = ll_flush_reqs(ll_slist(inode), ~0UL);
- */
-
- EXIT;
- return err;
-} /* ll_add_page_to_cache */
-
-void rebalance(void)
-{
- if (ll_cache_count > 60000) {
- printk("-- count %ld\n", ll_cache_count);
- //ll_flush_dirty_pages(~0UL);
- printk("-- count %ld\n", ll_cache_count);
- }
-}
-
-/* select between SYNC and ASYNC I/O methods */
-int ll_do_writepage(struct page *page, int sync)
+/* SYNCHRONOUS I/O to object storage for an inode -- object attr will be updated
+ * too */
+static int ll_commit_write(struct file *file, struct page *page,
+ unsigned from, unsigned to)
{
+ int create = 1;
struct inode *inode = page->mapping->host;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lov_stripe_md *md = lli->lli_smd;
+ struct brw_page pg;
int err;
+ loff_t size;
+ struct io_cb_data *cbd = ll_init_cb();
+
+ pg.pg = page;
+ pg.count = to;
+ pg.off = (((obd_off)page->index) << PAGE_SHIFT);
+ pg.flag = create ? OBD_BRW_CREATE : 0;
ENTRY;
- /* PDEBUG(page, "WRITEPAGE"); */
- if ( sync )
- err = ll_brw(WRITE, inode, page, 1);
- else {
- err = ll_add_page_to_cache(inode, page);
- CDEBUG(D_INFO, "DO_WR ino: %ld, page %p, err %d, uptodate %d\n",
- inode->i_ino, page, err, Page_Uptodate(page));
- }
-
- if ( !err ) {
- SetPageUptodate(page);
- set_page_clean(page);
- }
- /* PDEBUG(page,"WRITEPAGE"); */
- EXIT;
- return err;
-} /* ll_do_writepage */
+ if (!cbd)
+ RETURN(-ENOMEM);
+ SetPageUptodate(page);
+ if (!PageLocked(page))
+ LBUG();
-/* returns the page unlocked, but with a reference */
-int ll_writepage(struct page *page)
-{
- int rc;
- struct inode *inode = page->mapping->host;
- ENTRY;
- printk("---> writepage called ino %ld!\n", inode->i_ino);
- BUG();
- rc = ll_do_writepage(page, 1);
- if ( !rc ) {
- set_page_clean(page);
- } else {
- CDEBUG(D_INODE, "--> GRR %d\n", rc);
- }
- EXIT;
- return rc;
-}
+ CDEBUG(D_INODE, "commit_page writing (at %d) to %d, count %Ld\n",
+ from, to, (unsigned long long)pg.count);
-void write_inode_pages(struct inode *inode)
-{
- struct list_head *tmp = &inode->i_mapping->dirty_pages;
-
- while ( (tmp = tmp->next) != &inode->i_mapping->dirty_pages) {
- struct page *page;
- page = list_entry(tmp, struct page, list);
- ll_writepage(page);
- }
-}
+ err = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode), md,
+ 1, &pg, ll_sync_io_cb, cbd);
+ kunmap(page);
+ size = pg.off + pg.count;
+ /* do NOT truncate when writing in the middle of a file */
+ if (size > inode->i_size)
+ inode->i_size = size;
-int ll_commit_write(struct file *file, struct page *page, unsigned from, unsigned to)
-{
- struct inode *inode = page->mapping->host;
- int rc = 0;
- loff_t len = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
- ENTRY;
- CDEBUG(D_INODE, "commit write ino %ld (end at %Ld) from %d to %d ,ind %ld\n",
- inode->i_ino, len, from, to, page->index);
-
+ RETURN(err);
+} /* ll_commit_write */
- if (cache_writes == 0) {
- rc = ll_commit_page(page, 1, from, to);
- }
+void ll_truncate(struct inode *inode)
+{
+ struct obdo oa = {0};
+ struct lov_stripe_md *md = ll_i2info(inode)->lli_smd;
+ struct lustre_handle *lockhs = NULL;
+ int err;
+ ENTRY;
- if (len > inode->i_size) {
- ll_set_size(inode, len);
+ if (!md) {
+ /* object not yet allocated */
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ return;
}
- kunmap(page);
- EXIT;
- return rc;
-}
-
+ oa.o_id = md->lmd_object_id;
+ oa.o_size = inode->i_size;
-/*
- * This does the "real" work of the write. The generic routine has
- * allocated the page, locked it, done all the page alignment stuff
- * calculations etc. Now we should just copy the data from user
- * space and write it back to the real medium..
- *
- * If the writer ends up delaying the write, the writer needs to
- * increment the page use counts until he is done with the page.
- *
- * Return value is the number of bytes written.
- */
-int ll_write_one_page(struct file *file, struct page *page,
- unsigned long offset, unsigned long bytes,
- const char * buf)
-{
- struct inode *inode = file->f_dentry->d_inode;
- int err;
+ CDEBUG(D_INFO, "calling punch for %ld (all bytes after %Ld)\n",
+ (long)oa.o_id, (unsigned long long)oa.o_size);
- ENTRY;
- /* We check for complete page writes here, as we then don't have to
- * get the page before writing over everything anyways.
- */
- if ( !Page_Uptodate(page) && (offset != 0 || bytes != PAGE_SIZE) ) {
- err = ll_brw(READ, inode, page, 0);
- if ( err )
- return err;
- SetPageUptodate(page);
+ err = ll_size_lock(inode, md, oa.o_size, LCK_PW, &lockhs);
+ if (err) {
+ CERROR("ll_size_lock failed: %d\n", err);
+ /* FIXME: What to do here? It's too late to back out... */
+ LBUG();
}
- if (copy_from_user((u8*)page_address(page) + offset, buf, bytes))
- return -EFAULT;
+ oa.o_valid = OBD_MD_FLID;
+ /* truncate == punch to/from start from/to end:
+ set end to -1 for that. */
+ err = obd_punch(ll_i2obdconn(inode), &oa, md, inode->i_size,
+ 0xffffffffffffffff);
+ if (err)
+ CERROR("obd_truncate fails (%d)\n", err);
+ else
+ obdo_to_inode(inode, &oa, oa.o_valid);
- lock_kernel();
- err = ll_writepage(page);
- unlock_kernel();
+ err = ll_size_unlock(inode, md, LCK_PW, lockhs);
+ if (err)
+ CERROR("ll_size_unlock failed: %d\n", err);
- return (err < 0 ? err : bytes);
-} /* ll_write_one_page */
+ EXIT;
+ return;
+} /* ll_truncate */
-/*
- * return an up to date page:
- * - if locked is true then is returned locked
- * - if create is true the corresponding disk blocks are created
- * - page is held, i.e. caller must release the page
- *
- * modeled on NFS code.
- */
-struct page *ll_getpage(struct inode *inode, unsigned long offset,
- int create, int locked)
+int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
+ unsigned long blocknr, int blocksize)
{
- struct page * page;
- int index;
- int err;
+ obd_count bufs_per_obdo = iobuf->nr_pages;
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct lov_stripe_md *md = lli->lli_smd;
+ struct brw_page *pga;
+ int rc = 0;
+ int i;
+ struct io_cb_data *cbd = ll_init_cb();
ENTRY;
+ if (!cbd)
+ RETURN(-ENOMEM);
- offset = offset & PAGE_CACHE_MASK;
- CDEBUG(D_INFO, "ino: %ld, offset %ld, create %d, locked %d\n",
- inode->i_ino, offset, create, locked);
- index = offset >> PAGE_CACHE_SHIFT;
-
- page = grab_cache_page(&inode->i_data, index);
-
- /* Yuck, no page */
- if (! page) {
- printk(KERN_WARNING " grab_cache_page says no dice ...\n");
- EXIT;
- return NULL;
+ if (blocksize != PAGE_SIZE) {
+ CERROR("direct_IO blocksize != PAGE_SIZE\n");
+ return -EINVAL;
}
- /* PDEBUG(page, "GETPAGE: got page - before reading\n"); */
- /* now check if the data in the page is up to date */
- if ( Page_Uptodate(page)) {
- if (!locked) {
- if (PageLocked(page))
- obd_unlock_page(page);
- } else {
- printk("file %s, line %d: expecting locked page\n",
- __FILE__, __LINE__);
- }
- EXIT;
- return page;
- }
-
-
-#ifdef EXT2_OBD_DEBUG
- if ((obd_debug_level & D_INFO) && ll_find_page_index(inode, index)) {
- CDEBUG(D_INFO, "OVERWRITE: found dirty page %p, index %ld\n",
- page, page->index);
+ OBD_ALLOC(pga, sizeof(*pga) * bufs_per_obdo);
+ if (!pga)
+ GOTO(out, rc = -ENOMEM);
+
+ /* NB: we can't use iobuf->maplist[i]->index for the offset
+ * instead of "blocknr" because ->index contains garbage.
+ */
+ for (i = 0; i < bufs_per_obdo; i++, blocknr++) {
+ pga[i].pg = iobuf->maplist[i];
+ pga[i].count = PAGE_SIZE;
+ pga[i].off = (obd_off)blocknr << PAGE_SHIFT;
+ pga[i].flag = OBD_BRW_CREATE;
}
-#endif
- err = ll_brw(READ, inode, page, create);
+ if (!md || !md->lmd_object_id)
+ GOTO(out, rc = -ENOMEM);
- if ( err ) {
- SetPageError(page);
- obd_unlock_page(page);
- EXIT;
- return page;
- }
+ rc = obd_brw(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ,
+ ll_i2obdconn(inode), md, bufs_per_obdo, pga,
+ ll_sync_io_cb, cbd);
+ if (rc == 0)
+ rc = bufs_per_obdo * PAGE_SIZE;
- if ( !locked )
- obd_unlock_page(page);
- SetPageUptodate(page);
- /* PDEBUG(page,"GETPAGE - after reading"); */
- EXIT;
- return page;
-} /* ll_getpage */
+out:
+ OBD_FREE(pga, sizeof(*pga) * bufs_per_obdo);
+ RETURN(rc);
+}
-void ll_truncate(struct inode *inode)
+int ll_flush_inode_pages(struct inode * inode)
{
- struct obdo *oa;
- int err;
+ obd_count bufs_per_obdo = 0;
+ obd_size *count = NULL;
+ obd_off *offset = NULL;
+ obd_flag *flags = NULL;
+ int err = 0;
+
ENTRY;
- //ll_dequeue_pages(inode);
+ spin_lock(&pagecache_lock);
- oa = obdo_alloc();
- if ( !oa ) {
- /* XXX This would give an inconsistent FS, so deal with it as
- * best we can for now - an obdo on the stack is not pretty.
- */
- struct obdo obdo;
+ spin_unlock(&pagecache_lock);
- printk(__FUNCTION__ ": obdo_alloc failed - using stack!\n");
- obdo.o_valid = OBD_MD_FLNOTOBD;
- ll_from_inode(&obdo, inode);
+ OBD_ALLOC(count, sizeof(*count) * bufs_per_obdo);
+ OBD_ALLOC(offset, sizeof(*offset) * bufs_per_obdo);
+ OBD_ALLOC(flags, sizeof(*flags) * bufs_per_obdo);
+ if (!count || !offset || !flags)
+ GOTO(out, err=-ENOMEM);
- err = obd_punch(IID(inode), &obdo, 0, obdo.o_size);
- } else {
- oa->o_valid = OBD_MD_FLNOTOBD;
- ll_from_inode(oa, inode);
+#if 0
+ for (i = 0 ; i < bufs_per_obdo ; i++) {
+ count[i] = PAGE_SIZE;
+ offset[i] = ((obd_off)(iobuf->maplist[i])->index) << PAGE_SHIFT;
+ flags[i] = OBD_BRW_CREATE;
+ }
- CDEBUG(D_INFO, "calling punch for %ld (%Lu bytes at 0)\n",
- (long)oa->o_id, oa->o_size);
- err = obd_punch(IID(inode), oa, oa->o_size, 0);
+ err = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode),
+ ll_i2info(inode)->lli_smd, bufs_per_obdo,
+ iobuf->maplist, count, offset, flags, NULL, NULL);
+ if (err == 0)
+ err = bufs_per_obdo * 4096;
+#endif
+ out:
+ OBD_FREE(flags, sizeof(*flags) * bufs_per_obdo);
+ OBD_FREE(count, sizeof(*count) * bufs_per_obdo);
+ OBD_FREE(offset, sizeof(*offset) * bufs_per_obdo);
+ RETURN(err);
+}
- obdo_free(oa);
- }
- if (err) {
- printk(__FUNCTION__ ": obd_truncate fails (%d)\n", err);
- EXIT;
- return;
- }
- EXIT;
-} /* ll_truncate */
struct address_space_operations ll_aops = {
readpage: ll_readpage,
writepage: ll_writepage,
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,4,17))
+ direct_IO: ll_direct_IO,
+#endif
sync_page: block_sync_page,
- prepare_write: ll_prepare_write,
+ prepare_write: ll_prepare_write,
commit_write: ll_commit_write,
bmap: NULL
};
-
-
-struct address_space_operations ll_dir_aops = {
- readpage: ll_dir_readpage
-};