int (*o_write)(struct obd_conn *conn, struct obdo *oa, char *buf,
obd_size *count, obd_off offset);
int (*o_brw)(int rw, struct obd_conn *conn, obd_count *num_io,
- struct obdo **oa, char **buf, obd_size **count,
+ struct obdo **oa, char **buf, obd_size *count,
obd_off *offset, obd_flag *flags);
int (*o_punch)(struct obd_conn *conn, struct obdo *tgt, obd_size count,
obd_off offset);
#define OBT(dev) dev->obd_type->typ_ops
#define OBP(dev,op) dev->obd_type->typ_ops->o_ ## op
+#define MAX_IOVEC 16
+
/*
* ======== OBD Metadata Support ===========
struct dentry *obdfs_follow_link(struct dentry *, struct dentry *, unsigned int);
-struct obdfs_super_info {
- struct list_head s_wr_head;
-};
-
-
/* list of all OBDFS super blocks */
struct list_head obdfs_super_list;
struct obdfs_super_entry {
struct list_head sl_chain;
- struct obdfs_super_info *sl_sbi;
+ struct obdfs_sb_info *sl_sbi;
};
struct obdfs_pgrq {
- struct list_head rq_list; /* linked list of req's */
+ struct list_head rq_ilist; /* linked list of req's */
+ struct list_head rq_slist; /* linked list of req's */
unsigned long rq_jiffies;
struct inode *rq_inode; /* dentry referenced */
struct page *rq_page; /* page to be written */
};
+inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq);
+int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io,
+ struct obdo **obdos,
+ struct page **pages, char **bufs, obd_size *counts,
+ obd_off *offsets, obd_flag *flags);
+
struct obdfs_sb_info {
struct obd_conn osi_conn;
struct obd_ops *osi_ops;
ino_t osi_rootino; /* which root inode */
int osi_minor; /* minor of /dev/obdX */
- struct list_head osi_list; /* linked list of inodes to write */
+ struct list_head osi_pages; /* linked list of inodes to write */
};
struct obdfs_inode_info {
};
-#define OBDFS_LIST(inode) (((struct obdfs_inode_info *)(&(inode)->u.generic_ip))->oi_pages)
-#define WREQ(entry) (list_entry(entry, struct obdfs_pgrq, rq_list))
+static inline struct list_head *obdfs_ilist(struct inode *inode)
+{
+ struct obdfs_inode_info *info = (struct obdfs_inode_info *)&inode->u.generic_ip;
+
+ return &info->oi_pages;
+}
+
+static inline struct list_head *obdfs_slist(struct inode *inode) {
+ struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)(&inode->i_sb->u.generic_sbp);
+ return &sbi->osi_pages;
+}
+
#define OBDFS_INFO(inode) ((struct obdfs_inode_info *)(&(inode)->u.generic_ip))
void obdfs_sysctl_init(void);
while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
obd_count num = 1;
char *buf;
- obd_size brw_size = PAGE_SIZE;
- obd_size *brw_count = &brw_size;
+ obd_size brw_count = PAGE_SIZE;
obd_off brw_offset = (page->index) << PAGE_SHIFT;
obd_flag flagr = 0;
obd_flag flagw = OBD_BRW_CREATE;
int age_super; /* Time for superblock to age before we flush it */
} pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ };
-/* static void obdfs_flush_reqs(struct obdfs_super_info *sbi, int wait,
-
-*/
-static void obdfs_flush_reqs(struct obdfs_super_info *sbi, int check_time)
+
+/* Remove writeback requests from an inode */
+int obdfs_flush_reqs(struct list_head *page_list,
+ int flush_inode, int check_time)
{
- struct list_head *wr;
- struct obdfs_pgrq *req;
-
- wr = &sbi->s_wr_head;
- while ( (wr = wr->next) != &sbi->s_wr_head ) {
- req = list_entry(wr, struct obdfs_pgrq, rq_list);
-
- if (!check_time ||
- req->rq_jiffies <= (jiffies - pupd_prm.age_buffer)) {
- /* write request out to disk */
- obdfs_do_writepage(req->rq_inode, req->rq_page, 1);
+ struct list_head *tmp = page_list;
+ obd_count num_io = 0;
+ struct obdo *oa = NULL;
+ struct obdo *obdos[MAX_IOVEC];
+ struct page *pages[MAX_IOVEC];
+ char *bufs[MAX_IOVEC];
+ obd_size counts[MAX_IOVEC];
+ obd_off offsets[MAX_IOVEC];
+ obd_flag flags[MAX_IOVEC];
+ int err = 0;
+ int i;
+ struct inode *inode = NULL;
+
+ ENTRY;
+
+ if ( list_empty(page_list)) {
+ CDEBUG(D_INODE, "list empty\n");
+ EXIT;
+ return 0;
+ }
+
+
+ /* add all of the outstanding pages to a write vector, and write it */
+ while ( (tmp = tmp->next) != page_list ) {
+ struct obdfs_pgrq *pgrq;
+ struct page *page;
+
+ if ( flush_inode )
+ pgrq = list_entry(tmp, struct obdfs_pgrq, rq_ilist);
+ else
+ pgrq = list_entry(tmp, struct obdfs_pgrq, rq_slist);
+ page = pgrq->rq_page;
+ inode = pgrq->rq_inode;
+
+ if (check_time &&
+ pgrq->rq_jiffies > (jiffies - pupd_prm.age_buffer))
+ continue;
+
+ oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
+ if ( IS_ERR(oa) ) {
+ EXIT;
+ return PTR_ERR(oa);
}
+ obdfs_from_inode(oa, inode);
+
+ CDEBUG(D_INODE, "adding page %p to vector\n", page);
+ obdos[num_io] = oa;
+ bufs[num_io] = (char *)page_address(page);
+ pages[num_io] = page;
+ counts[num_io] = PAGE_SIZE;
+ offsets[num_io] = ((obd_off)page->index) << PAGE_SHIFT;
+ flags[num_io] = OBD_BRW_CREATE;
+ num_io++;
+ /* remove request from list before write to avoid conflict */
+ obdfs_pgrq_del(pgrq);
+
+ if ( num_io == MAX_IOVEC ) {
+ err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos,
+ pages,
+ bufs, counts, offsets, flags);
+ for (i=0 ; i<MAX_IOVEC ; i++) {
+ obdo_free(obdos[i]);
+ if ( err ) {
+ /* XXX Probably should handle error here -
+ * discard other writes, or put
+ * (MAX_IOVEC - num_io) I/Os back to list?
+ */
+ EXIT;
+ goto ERR;
+ }
+ }
+ num_io = 0;
+ }
+ }
+
+ /* flush any remaining I/Os */
+ if ( num_io ) {
+ i = num_io - 1;
+ err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, pages, bufs,
+ counts, offsets, flags);
+ for ( ; i>=0 ; i-- ) {
+ obdo_free(obdos[i]);
+ }
}
+ EXIT;
+ERR:
-}
+ return err;
+} /* obdfs_remove_pages_from_cache */
static void obdfs_flush_dirty_pages(int check_time)
{
struct list_head *sl;
- struct obdfs_super_info *sbi;
+ struct obdfs_sb_info *sbi;
sl = &obdfs_super_list;
while ( (sl = sl->next) != &obdfs_super_list ) {
list_entry(sl, struct obdfs_super_entry, sl_chain);
sbi = entry->sl_sbi;
- /* walk write requests here */
- obdfs_flush_reqs(sbi, jiffies);
+ /* walk write requests here, use the sb, check the time */
+ obdfs_flush_reqs(&sbi->osi_pages, 0, 1);
}
/* again, but now we wait for completion */
sbi = entry->sl_sbi;
/* walk write requests here */
- /* XXX should jiffies be 0 here? */
- obdfs_flush_reqs(sbi, jiffies);
+ obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
}
}
+
static struct task_struct *pupdated;
static int pupdate(void *unused)
if (stopped)
goto stop_pupdate;
}
+ /* asynchronous setattr etc for the future ... */
/* flush_inodes(); */
+ CDEBUG(D_INODE, "about to flush pages...\n");
+ /*
obdfs_flush_dirty_pages(1);
+ */
+ CDEBUG(D_INODE, "done flushing pages...\n");
}
}
* returns a locked and held page upon success
*/
-/* XXX I believe these pages should in fact NOT be locked */
+/* We do this with a locked page: that's not necessary, since the semaphore on the inode protects this page as well. */
static struct page *obdfs_add_entry (struct inode * dir,
const char * name, int namelen,
struct ext2_dir_entry_2 ** res_dir,
*res_dir = de;
*err = 0;
PDEBUG(page, "add_entry");
+ /* XXX unlock page here */
EXIT;
return page;
}
return ERR_PTR(-EIO);
}
obdo_free(oa);
- INIT_LIST_HEAD(&OBDFS_LIST(inode));
EXIT;
return inode;
dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
err = obdfs_do_writepage(dir, page, IS_SYNC(dir));
- /* XXX handle err? */
UnlockPage(page);
#include <linux/obd_ext2.h>
#include <linux/obdfs.h>
-int console_loglevel;
+
+int obdfs_flush_reqs(struct list_head *page_list,
+ int flush_inode, int check_time);
+
/* SYNCHRONOUS I/O for an inode */
static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create)
obd_count num_io = 1;
struct obdo *oa;
char *buf = (char *)page_address(page);
- obd_size size = PAGE_SIZE;
- obd_size *count = &size;
+ obd_size count = PAGE_SIZE;
obd_off offset = ((obd_off)page->index) << PAGE_SHIFT;
obd_flag flags = create ? OBD_BRW_CREATE : 0;
int err;
return 0;
} /* obdfs_init_wreqcache */
+inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq)
+{
+ list_del(&pgrq->rq_ilist);
+ list_del(&pgrq->rq_slist);
+ kmem_cache_free(obdfs_pgrq_cachep, pgrq);
+}
+
void obdfs_cleanup_pgrqcache(void)
{
ENTRY;
* Find a specific page in the page cache. If it is found, we return
* the write request struct associated with it, if not found return NULL.
*/
+#if 0
static struct obdfs_pgrq *
obdfs_find_in_page_cache(struct inode *inode, struct page *page)
{
EXIT;
return NULL;
} /* obdfs_find_in_page_cache */
+#endif
-/*
- * Remove a writeback request from a list
- */
-static inline int
-obdfs_remove_from_page_cache(struct obdfs_pgrq *pgrq)
+int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io,
+ struct obdo **obdos,
+ struct page **pages, char **bufs, obd_size *counts,
+ obd_off *offsets, obd_flag *flags)
{
- struct inode *inode = pgrq->rq_inode;
- struct page *page = pgrq->rq_page;
+ int last_io = *num_io;
int err;
-
+ struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)&sb->u.generic_sbp;
ENTRY;
- CDEBUG(D_INODE, "writing inode %ld page %p, pgrq: %p\n",
- inode->i_ino, page, pgrq);
- OIDEBUG(inode);
- PDEBUG(page, "REM_CACHE");
- err = obdfs_brw(WRITE, inode, page, 1);
- /* XXX probably should handle error here somehow. I think that
- * ext2 also does the same thing - discard write even if error?
- */
- put_page(page);
- list_del(&pgrq->rq_list);
- kmem_cache_free(obdfs_pgrq_cachep, pgrq);
- OIDEBUG(inode);
+ CDEBUG(D_INODE, "writing %d pages in vector\n", last_io);
+ err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_io, obdos,
+ bufs, counts, offsets, flags);
+
+ do {
+ put_page(pages[--last_io]);
+ } while ( last_io > 0 );
EXIT;
return err;
-} /* obdfs_remove_from_page_cache */
+}
+
/*
* Add a page to the write request cache list for later writing
* ASYNCHRONOUS write method.
*/
-static int obdfs_add_to_page_cache(struct inode *inode, struct page *page)
+static int obdfs_add_page_to_cache(struct inode *inode, struct page *page)
{
struct obdfs_pgrq *pgrq;
+ int rc = 0;
ENTRY;
pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL);
pgrq->rq_inode = inode;
get_page(pgrq->rq_page);
- list_add(&pgrq->rq_list, &OBDFS_LIST(inode));
+ list_add(&pgrq->rq_ilist, obdfs_ilist(inode));
+ list_add(&pgrq->rq_slist, obdfs_slist(inode));
- /* For testing purposes, we write out the page here.
- * In the future, a flush daemon will write out the page.
+ /* XXX For testing purposes, we write out the page here.
+ * In the future, a flush daemon will write out the page.
return 0;
*/
- pgrq = obdfs_find_in_page_cache(inode, page);
- if (!pgrq) {
- CDEBUG(D_INODE, "XXXX Can't find page after adding it!!!\n");
- EXIT;
- return -EINVAL;
- }
-
- return obdfs_remove_from_page_cache(pgrq);
-} /* obdfs_add_to_page_cache */
+ rc = obdfs_flush_reqs(obdfs_slist(inode), 0, 0);
+ EXIT;
+ return rc;
+} /* obdfs_add_page_to_cache */
/* select between SYNC and ASYNC I/O methods */
if ( sync )
err = obdfs_brw(WRITE, inode, page, 1);
else
- err = obdfs_add_to_page_cache(inode, page);
+ err = obdfs_add_page_to_cache(inode, page);
if ( !err )
SetPageUptodate(page);
PDEBUG(page,"WRITEPAGE");
+ EXIT;
return err;
} /* obdfs_do_writepage */
goto ERR;
}
- INIT_LIST_HEAD(&sbi->osi_list);
+ INIT_LIST_HEAD(&sbi->osi_pages);
sbi->osi_super = sb;
ODEBUG(oa);
obdfs_to_inode(inode, oa);
- INIT_LIST_HEAD(&OBDFS_LIST(inode));
+ INIT_LIST_HEAD(obdfs_ilist(inode));
obdo_free(oa);
OIDEBUG(inode);