Whamcloud - gitweb
Changes for the flushdaemon.
[fs/lustre-release.git] / lustre / obdfs / rw.c
index 389058c..f58704b 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * OBDFS Super operations
  *
+ * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
  * Copryright (C) 1999 Stelias Computing Inc, 
  *                (author Peter J. Braam <braam@stelias.com>)
  * Copryright (C) 1999 Seagate Technology Inc.
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
 
-#include <../obd/linux/obd_support.h>
-#include <../obd/linux/obd_sim.h>
-#include <obdfs.h>
+#include <linux/obd_support.h>
+#include <linux/obd_ext2.h>
+#include <linux/obdfs.h>
 
-/* VFS super_block ops */
+int console_loglevel;
+
+/* SYNCHRONOUS I/O for an inode */
+static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create)
+{
+       struct obdo *obdo;
+       obd_size count = PAGE_SIZE;
+       int err;
+
+       ENTRY;
+       obdo = obdo_alloc();
+       if ( ! obdo ) {
+               EXIT;
+               return -ENOMEM;
+       }
+
+       obdo->o_id = inode->i_ino;
+
+       err = IOPS(inode, brw)(rw, IID(inode), obdo, (char *)page_address(page),
+                              &count, (page->index) >> PAGE_SHIFT, create);
+
+       obdo_to_inode(inode, obdo); /* copy o_blocks to i_blocks */
+       obdo_free(obdo);
+       
+       EXIT;
+       return err;
+} /* obdfs_brw */
 
 /* returns the page unlocked, but with a reference */
-int obdfs_readpage(struct file *file, struct page *page)
+int obdfs_readpage(struct dentry *dentry, struct page *page)
 {
-        struct obdfs_sb_info *sbi;
-       struct super_block *sb = file->f_dentry->d_inode->i_sb;
+       struct inode *inode = dentry->d_inode;
        int rc;
 
-        ENTRY;
-
-       /* XXX flush stuff */
-       sbi = sb->u.generic_sbp;
-       PDEBUG(page, READ);
-       rc =  sbi->osi_ops->o_brw(READ, sbi->osi_conn_info.conn_id, 
-                     file->f_dentry->d_inode->i_ino, page);
-       if (rc == PAGE_SIZE ) {
+       ENTRY;
+       PDEBUG(page, "READ");
+       rc = obdfs_brw(READ, inode, page, 0);
+       if ( !rc ) {
                SetPageUptodate(page);
                UnlockPage(page);
        } 
-       PDEBUG(page, READ);
-       if ( rc == PAGE_SIZE ) 
-               rc = 0;
+       PDEBUG(page, "READ");
+       EXIT;
+       return rc;
+} /* obdfs_readpage */
+
+static kmem_cache_t *obdfs_pgrq_cachep;
+
+int obdfs_init_pgrqcache(void)
+{
+       ENTRY;
+       obdfs_pgrq_cachep = kmem_cache_create("obdfs_pgrq",
+                                             sizeof(struct obdfs_pgrq),
+                                             0, SLAB_HWCACHE_ALIGN,
+                                             NULL, NULL);
+       if (obdfs_pgrq_cachep == NULL) {
+               EXIT;
+               return -ENOMEM;
+       }
+
+       EXIT;
+       return 0;
+} /* obdfs_init_wreqcache */
+
+void obdfs_cleanup_pgrqcache(void)
+{
+       if (obdfs_pgrq_cachep != NULL)
+               kmem_cache_destroy(obdfs_pgrq_cachep);
+       obdfs_pgrq_cachep = NULL;
+       EXIT;
+} /* obdfs_cleanup_wreqcache */
+
+
+/*
+ * Find a specific page in the page cache.  If it is found, we return
+ * the write request struct associated with it, if not found return NULL.
+ */
+static struct obdfs_pgrq *
+obdfs_find_in_page_cache(struct inode *inode, struct page *page)
+{
+       struct list_head *page_list = &OBD_LIST(inode);
+       struct list_head *tmp;
+       struct obdfs_pgrq *pgrq;
+
+       ENTRY;
+       CDEBUG(D_INODE, "looking for inode %ld page %p\n", inode->i_ino, page);
+       if (list_empty(page_list)) {
+               CDEBUG(D_INODE, "empty list\n");
+               EXIT;
+               return NULL;
+       }
+       tmp = page_list;
+       while ( (tmp = tmp->next) != page_list ) {
+               pgrq = list_entry(tmp, struct obdfs_pgrq, rq_list);
+               CDEBUG(D_INODE, "checking page %p\n", pgrq->rq_page);
+               if (pgrq->rq_page == page) {
+                       CDEBUG(D_INODE, "found page %p in list\n", page);
+                       EXIT;
+                       return pgrq;
+               }
+       } 
+
+       EXIT;
+       return NULL;
+} /* obdfs_find_in_page_cache */
+
+
+/*
+ * Remove a writeback request from a list
+ */
+static inline int
+obdfs_remove_from_page_cache(struct obdfs_pgrq *pgrq)
+{
+       struct inode *inode = pgrq->rq_inode;
+       struct page *page = pgrq->rq_page;
+       int rc;
+
+       ENTRY;
+       CDEBUG(D_INODE, "removing inode %ld page %p, pgrq: %p\n",
+              inode->i_ino, page, pgrq);
+       rc = obdfs_brw(WRITE, inode, page, 1);
+       /* XXX probably should handle error here somehow.  I think that
+        *     ext2 also does the same thing - discard write even if error?
+        */
+       put_page(page);
+        list_del(&pgrq->rq_list);
+       kmem_cache_free(obdfs_pgrq_cachep, pgrq);
+
+       EXIT;
        return rc;
+} /* obdfs_remove_from_page_cache */
+
+/*
+ * Add a page to the write request cache list for later writing
+ */
+static int obdfs_add_to_page_cache(struct inode *inode, struct page *page)
+{
+       struct obdfs_pgrq *pgrq;
+
+       ENTRY;
+       pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL);
+       CDEBUG(D_INODE, "adding inode %ld page %p, pgrq: %p\n",
+              inode->i_ino, page, pgrq);
+       if (!pgrq) {
+               EXIT;
+               return -ENOMEM;
+       }
+       memset(pgrq, 0, sizeof(*pgrq)); 
+
+       pgrq->rq_page = page;
+       pgrq->rq_inode = inode;
+
+       get_page(pgrq->rq_page);
+       list_add(&pgrq->rq_list, &OBD_LIST(inode));
+
+       /* For testing purposes, we write out the page here.
+        * In the future, a flush daemon will write out the page.
+       return 0;
+        */
+       pgrq = obdfs_find_in_page_cache(inode, page);
+       if (!pgrq) {
+               CDEBUG(D_INODE, "XXXX Can't find page after adding it!!!\n");
+               EXIT;
+               return -EINVAL;
+       } 
+               
+       return obdfs_remove_from_page_cache(pgrq);
+} /* obdfs_add_to_page_cache */
+
+
+int obdfs_do_writepage(struct inode *inode, struct page *page, int sync)
+{
+       int err;
+
+       ENTRY;
+       PDEBUG(page, "WRITEPAGE");
+       if ( sync )
+               err = obdfs_brw(WRITE, inode, page, 1);
+       else
+               err = obdfs_add_to_page_cache(inode, page);
+               
+       if ( !err )
+               SetPageUptodate(page);
+       PDEBUG(page,"WRITEPAGE");
+       return err;
+} /* obdfs_do_writepage */
 
+/* returns the page unlocked, but with a reference */
+int obdfs_writepage(struct dentry *dentry, struct page *page)
+{
+       return obdfs_do_writepage(dentry->d_inode, page, 0);
 }
 
 /*
@@ -68,137 +235,95 @@ int obdfs_readpage(struct file *file, struct page *page)
  * If the writer ends up delaying the write, the writer needs to
  * increment the page use counts until he is done with the page.
  */
-int obdfs_write_one_page(struct file *file, struct page *page, unsigned long offset, unsigned long bytes, const char * buf)
+int obdfs_write_one_page(struct file *file, struct page *page,
+                        unsigned long offset, unsigned long bytes,
+                        const char * buf)
 {
-       long status;
+       struct inode *inode = file->f_dentry->d_inode;
+       int err;
 
+       ENTRY;
+       if ( !Page_Uptodate(page) ) {
+               err = obdfs_brw(READ, inode, page, 1);
+               if ( !err )
+                       SetPageUptodate(page);
+               else
+                       return err;
+       }
        bytes -= copy_from_user((u8*)page_address(page) + offset, buf, bytes);
-       status = -EFAULT;
-       CDEBUG(D_INODE, "page offset %ld, bytes %ld, offset %ld, page addr %lx, writing: %s, beg of page %s\n", page->offset, bytes, offset, page_address(page), ((char *) page_address(page)) + offset, (char *)page_address(page));
+       err = -EFAULT;
+
        if (bytes) {
                lock_kernel();
-               status = obdfs_writepage(file, page);
+               err = obdfs_writepage(file->f_dentry, page);
                unlock_kernel();
        }
-       if ( status != PAGE_SIZE ) 
-               return status;
-       else
-               return bytes;
-}
-
-
-
-
-/* returns the page unlocked, but with a reference */
-int obdfs_writepage(struct file *file, struct page *page)
-{
-        struct obdfs_sb_info *sbi;
-       struct super_block *sb = file->f_dentry->d_inode->i_sb;
-       int rc;
-
-        ENTRY;
-       PDEBUG(page,WRITE);
-       /* XXX flush stuff */
-       sbi = sb->u.generic_sbp;
-
-       rc = sbi->osi_ops->o_brw(WRITE, sbi->osi_conn_info.conn_id, 
-                     file->f_dentry->d_inode->i_ino, page);
-       SetPageUptodate(page);
-       PDEBUG(page,WRITE);
-       return rc;
-}
 
+       return err;
+} /* obdfs_write_one_page */
 
 /* 
-   page is returned unlocked, with the up to date flag set, 
-   and held, i.e. caller must do a page_put
+   return an up to date page:
+    - if locked is true then is returned locked
+    - if create is true the corresponding disk blocks are created 
+    - page is held, i.e. caller must release the page
+
+   modeled on NFS code.
 */
-struct page *obdfs_getpage(struct inode *inode, unsigned long offset)
+struct page *obdfs_getpage(struct inode *inode, unsigned long offset, int create, int locked)
 {
-       unsigned long new_page;
+       struct page *page_cache;
        struct page ** hash;
-       struct page * page; 
-       struct obdfs_sb_info *sbi;
-       struct super_block *sb = inode->i_sb;
+       struct page * page;
+       int err;
 
-        ENTRY;
+       ENTRY;
 
-       sbi = sb->u.generic_sbp;
+       offset = offset & PAGE_CACHE_MASK;
+       CDEBUG(D_INODE, "\n");
        
-       page = find_get_page(inode, offset); 
-       if (page && Page_Uptodate(page)) { 
-               PDEBUG(page,READ);
+       page = NULL;
+       page_cache = page_cache_alloc();
+       if ( ! page_cache ) {
+               EXIT;
+               return NULL;
+       }
+       CDEBUG(D_INODE, "page_cache %p\n", page_cache);
+
+       hash = page_hash(&inode->i_data, offset);
+       page = grab_cache_page(&inode->i_data, offset);
+
+       /* Yuck, no page */
+       if (! page) {
+           printk("grab_cache_page says no dice ...\n");
+           EXIT;
+           return 0;
+       }
+
+       PDEBUG(page, "GETPAGE: got page - before reading\n");
+       /* now check if the data in the page is up to date */
+       if ( Page_Uptodate(page)) { 
+               if (!locked)
+                       UnlockPage(page);
+               EXIT;
                return page;
        } 
-               
-       if (page && !Page_Uptodate(page) ) {
-               CDEBUG(D_INODE, "Page found but not up to date\n");
-       }
 
-       /* page_cache_alloc returns address of page */
-       new_page = page_cache_alloc();
-       if (!new_page)
-               return NULL;
-       
-       /* corresponding struct page in the mmap */
-       hash = page_hash(inode, offset);
-       page = page_cache_entry(new_page);
-       if (!add_to_page_cache_unique(page, inode, offset, hash)) {
-               CDEBUG(D_INODE, "Page not found. Reading it.\n");
-               PDEBUG(page,READ);
-               sbi->osi_ops->o_brw(READ, sbi->osi_conn_info.conn_id, 
-                                   inode->i_ino, page);
+       err = obdfs_brw(READ, inode, page, create);
+
+       if ( err ) {
+               SetPageError(page);
                UnlockPage(page);
-               SetPageUptodate(page);
+               EXIT;
                return page;
        }
-       /*
-        * We arrive here in the unlikely event that someone 
-        * raced with us and added our page to the cache first.
-        */
-       CDEBUG(D_INODE, "Page not found. Someone raced us.\n");
-       PDEBUG(page,READ);
-       return page;
-}
 
+       if ( !locked )
+               UnlockPage(page);
+       SetPageUptodate(page);
+       PDEBUG(page,"GETPAGE - after reading");
+       EXIT;
+       return page;
+} /* obdfs_getpage */
 
 
-struct file_operations obdfs_file_operations = {
-       NULL,                   /* lseek - default */
-       generic_file_read,      /* read */
-       obdfs_file_write,     /* write - bad */
-        obdfs_readdir,         /* readdir */
-       NULL,                   /* poll - default */
-       NULL,                   /* ioctl */
-       NULL,                   /* mmap */
-       NULL,                   /* no special open code */
-       NULL,                   /* flush */
-       NULL,                   /* no special release code */
-       NULL,                   /* fsync */
-       NULL,                   /* fasync */
-       NULL,                   /* check_media_change */
-       NULL                    /* revalidate */
-};
-
-struct inode_operations obdfs_inode_ops = {
-       &obdfs_file_operations, /* default directory file-ops */
-       NULL,           /* create */
-       obdfs_lookup,   /* lookup */
-       NULL,           /* link */
-       NULL,           /* unlink */
-       NULL,           /* symlink */
-       NULL,           /* mkdir */
-       NULL,           /* rmdir */
-       NULL,           /* mknod */
-       NULL,           /* rename */
-       NULL,           /* readlink */
-       NULL,           /* follow_link */
-       NULL,           /* get_block */
-       obdfs_readpage, /* readpage */
-       obdfs_writepage, /* writepage */
-       NULL,           /* flushpage */
-       NULL,           /* truncate */
-       NULL,           /* permission */
-       NULL,           /* smap */
-       NULL            /* revalidate */
-};