#define OBD_MD_FLGENER (0x00002000UL)
#define OBD_MD_FLINLINE (0x00004000UL)
#define OBD_MD_FLOBDMD (0x00008000UL)
-#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDMD | OBD_MD_FLOBDFLG))
+#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDMD | OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS))
/*
* ======== OBD Device Declarations ===========
obd_size *count, obd_off offset);
int (*o_write)(struct obd_conn *conn, struct obdo *oa, char *buf,
obd_size *count, obd_off offset);
- int (*o_brw)(int rw, struct obd_conn *conn, obd_count *num_io,
- struct obdo **oa, char **buf, obd_size *count,
- obd_off *offset, obd_flag *flags);
+ int (*o_brw)(int rw, struct obd_conn *conn, obd_count num_oa,
+ struct obdo **oa, obd_count *oa_bufs, char **buf,
+ obd_size *count, obd_off *offset, obd_flag *flags);
int (*o_punch)(struct obd_conn *conn, struct obdo *tgt, obd_size count,
obd_off offset);
int (*o_sync)(struct obd_conn *conn, struct obdo *tgt, obd_size count,
#define OBT(dev) dev->obd_type->typ_ops
#define OBP(dev,op) dev->obd_type->typ_ops->o_ ## op
+/* This value is not arbitrarily chosen. KIO_STATIC_PAGES from linux/iobuf.h */
+#define MAX_IOVEC (KIO_STATIC_PAGES - 1)
+
/*
* ======== OBD Metadata Support ===========
#define OBD_EXT2_RUNIT _IOWR('f', 61, long)
+#include <linux/iobuf.h>
struct ext2_obd {
struct super_block * ext2_sb;
struct list_head obdfs_super_list;
struct obdfs_pgrq {
- struct list_head rq_ilist; /* linked list of req's */
- struct list_head rq_slist; /* linked list of req's */
- unsigned long rq_jiffies;
- struct inode *rq_inode; /* dentry referenced */
+ struct list_head rq_plist; /* linked list of req's */
+ unsigned long rq_jiffies;
struct page *rq_page; /* page to be written */
};
+
inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq);
-int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io,
- struct obdo **obdos,
+int obdfs_do_vec_wr(struct super_block *sb, obd_count num_io, obd_count num_oa,
+ struct obdo **obdos, obd_count *oa_bufs,
struct page **pages, char **bufs, obd_size *counts,
obd_off *offsets, obd_flag *flags);
struct obd_ops *osi_ops;
ino_t osi_rootino; /* which root inode */
int osi_minor; /* minor of /dev/obdX */
- struct list_head osi_pages; /* linked list of inodes to write */
+ struct list_head osi_inodes; /* linked list of dirty inodes */
};
struct obdfs_inode_info {
int oi_flags;
+ struct list_head oi_inodes;
struct list_head oi_pages;
char oi_inline[OBD_INLINESZ];
};
-#define MAX_IOVEC 16
-
-static inline struct list_head *obdfs_ilist(struct inode *inode)
+static inline struct list_head *obdfs_iplist(struct inode *inode)
{
struct obdfs_inode_info *info = (struct obdfs_inode_info *)&inode->u.generic_ip;
return &info->oi_pages;
}
+static inline struct list_head *obdfs_islist(struct inode *inode)
+{
+ struct obdfs_inode_info *info = (struct obdfs_inode_info *)&inode->u.generic_ip;
+
+ return &info->oi_inodes;
+}
+
static inline struct list_head *obdfs_slist(struct inode *inode) {
struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)(&inode->i_sb->u.generic_sbp);
- return &sbi->osi_pages;
+ return &sbi->osi_inodes;
}
#define OBDFS_INFO(inode) ((struct obdfs_inode_info *)(&(inode)->u.generic_ip))
* and arrays to handle the request parameters.
*/
while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) {
- obd_count num = 1;
+ obd_count num_oa = 1;
+ obd_count num_buf = 1;
char *buf;
obd_size brw_count = PAGE_SIZE;
obd_off brw_offset = (page->index) << PAGE_SHIFT;
page->index = index;
buf = (char *)page_address(page);
- err = OBP(src_conn->oc_dev, brw)(READ, src_conn, &num, &src,
- &buf, &brw_count, &brw_offset,
- &flagr);
+ err = OBP(src_conn->oc_dev, brw)(READ, src_conn, num_oa, &src,
+ &num_buf, &buf, &brw_count,
+ &brw_offset, &flagr);
if ( err ) {
EXIT;
}
CDEBUG(D_INODE, "Read page %ld ...\n", page->index);
- err = OBP(dst_conn->oc_dev, brw)(WRITE, dst_conn, &num, &dst,
- &buf, &brw_count, &brw_offset,
- &flagw);
+ err = OBP(dst_conn->oc_dev, brw)(WRITE, dst_conn, num_oa, &dst,
+ &num_buf, &buf, &brw_count,
+ &brw_offset, &flagw);
/* XXX should handle dst->o_size, dst->o_blocks here */
if ( err ) {
NULL, /* readdir - bad */
NULL, /* poll - default */
NULL, /* ioctl */
- NULL, /* mmap */
+ generic_file_mmap, /* mmap */
NULL, /* no special open code */
NULL, /* flush */
NULL, /* no special release code */
} pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ };
+static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
+ int nr_slots, struct page **pages, char **bufs,
+ obd_size *counts, obd_off *offsets,
+ obd_flag *flag, int check_time)
+{
+ struct list_head *page_list = obdfs_iplist(inode);
+ struct list_head *tmp;
+ int i = 0;
+
+ ENTRY;
+ if (list_empty(obdfs_iplist(inode))) {
+ list_del(obdfs_islist(inode));
+ CDEBUG(D_INODE, "empty list\n");
+ EXIT;
+ return 0;
+ }
+
+ *obdo = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
+ if ( IS_ERR(*obdo) ) {
+ EXIT;
+ return PTR_ERR(*obdo);
+ }
+
+ obdfs_from_inode(*obdo, inode);
+ *flag = OBD_BRW_CREATE;
+
+ tmp = page_list;
+ while ( (tmp = tmp->next) != page_list && (i < nr_slots) ) {
+ struct obdfs_pgrq *req;
+ struct page *page;
+
+ req = list_entry(tmp, struct obdfs_pgrq, rq_plist);
+ /* remove request from list before write to avoid conflict */
+ obdfs_pgrq_del(req);
+ page = req->rq_page;
+
+ if ( !page ) {
+ CDEBUG(D_INODE, "no page \n");
+ EXIT;
+ return 0;
+ }
+
+ if (check_time &&
+ req->rq_jiffies > (jiffies - pupd_prm.age_buffer))
+ continue;
+
+ CDEBUG(D_INODE, "adding page %p to vector\n", page);
+ bufs[i] = (char *)page_address(page);
+ pages[i] = page;
+ counts[i] = PAGE_SIZE;
+ offsets[i] = ((obd_off)page->index) << PAGE_SHIFT;
+ i++;
+ }
+
+ /* If no more pages for this inode, remove from superblock list */
+ if ( list_empty(obdfs_iplist(inode)) )
+ list_del(obdfs_islist(inode));
+
+ EXIT;
+ return i;
+}
+
+
/* Remove writeback requests from an inode */
-int obdfs_flush_reqs(struct list_head *page_list,
- int flush_inode, int check_time)
+int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
+ int check_time)
{
- struct list_head *tmp = page_list;
+ struct list_head *tmp = inode_list;
obd_count num_io = 0;
- struct inode *inode = NULL;
- struct obdo *oa = NULL;
+ obd_count num_obdos = 0;
+ struct inode *inodes[MAX_IOVEC];
struct obdo *obdos[MAX_IOVEC];
struct page *pages[MAX_IOVEC];
char *bufs[MAX_IOVEC];
obd_size counts[MAX_IOVEC];
obd_off offsets[MAX_IOVEC];
obd_flag flags[MAX_IOVEC];
+ obd_count bufs_per_obdo[MAX_IOVEC];
int err = 0;
int i;
ENTRY;
- if (!page_list) {
+ if (!inode_list) {
CDEBUG(D_INODE, "no list\n");
EXIT;
return 0;
}
- if ( list_empty(page_list)) {
+ if ( list_empty(inode_list)) {
CDEBUG(D_INODE, "list empty\n");
EXIT;
return 0;
/* add all of the outstanding pages to a write vector, and write it */
- while ( (tmp = tmp->next) != page_list ) {
- struct obdfs_pgrq *pgrq;
- struct page *page;
-
- if ( flush_inode )
- pgrq = list_entry(tmp, struct obdfs_pgrq, rq_ilist);
- else
- pgrq = list_entry(tmp, struct obdfs_pgrq, rq_slist);
- page = pgrq->rq_page;
- inode = pgrq->rq_inode;
-
- if ( !inode ) {
- CDEBUG(D_INODE, "no inode\n");
- EXIT;
- return 0;
+ while ( (tmp = tmp->next) != inode_list ) {
+ struct obdfs_inode_info *ii;
+ int res;
+
+ ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes);
+ inodes[num_obdos] = list_entry(ii, struct inode, u);
+
+ res = obdfs_enqueue_pages(inodes[num_obdos], &obdos[num_obdos],
+ MAX_IOVEC - num_io, &pages[num_io],
+ &bufs[num_io], &counts[num_io],
+ &offsets[num_io], &flags[num_obdos],1);
+ if ( res < 0 ) {
+ return -EIO;
}
-
- if ( !page ) {
- CDEBUG(D_INODE, "no page \n");
- EXIT;
- return 0;
- }
-
-
- if (check_time &&
- pgrq->rq_jiffies > (jiffies - pupd_prm.age_buffer))
- continue;
- oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
- if ( IS_ERR(oa) ) {
- EXIT;
- return PTR_ERR(oa);
- }
- obdfs_from_inode(oa, inode);
-
- CDEBUG(D_INODE, "adding page %p to vector\n", page);
- obdos[num_io] = oa;
- bufs[num_io] = (char *)page_address(page);
- pages[num_io] = page;
- counts[num_io] = PAGE_SIZE;
- offsets[num_io] = ((obd_off)page->index) << PAGE_SHIFT;
- flags[num_io] = OBD_BRW_CREATE;
- num_io++;
-
- /* remove request from list before write to avoid conflict */
- obdfs_pgrq_del(pgrq);
+ bufs_per_obdo[num_obdos] = res;
+ num_io += res;
+ num_obdos++;
if ( num_io == MAX_IOVEC ) {
- err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos,
- pages,
- bufs, counts, offsets, flags);
- for (i = 0 ; i < MAX_IOVEC ; i++) {
+ err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io,
+ num_obdos, obdos, bufs_per_obdo,
+ pages, bufs, counts, offsets,
+ flags);
+ for (i = 0 ; i < num_obdos ; i++) {
+ obdfs_to_inode(inodes[i], obdos[i]);
obdo_free(obdos[i]);
+ }
if ( err ) {
- /* XXX Probably should handle error here -
- * discard other writes, or put
- * (MAX_IOVEC - num_io) I/Os back to list?
- */
EXIT;
goto ERR;
}
- }
num_io = 0;
+ num_obdos = 0;
}
}
/* flush any remaining I/Os */
if ( num_io ) {
- i = num_io - 1;
- err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, pages, bufs,
+ err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos,
+ obdos, bufs_per_obdo, pages, bufs,
counts, offsets, flags);
- for ( ; i>=0 ; i-- ) {
+ for (i = 0 ; i < num_obdos ; i++) {
+ obdfs_to_inode(inodes[i], obdos[i]);
obdo_free(obdos[i]);
}
}
list_entry(sl, struct obdfs_sb_info, osi_list);
/* walk write requests here, use the sb, check the time */
- obdfs_flush_reqs(&sbi->osi_pages, 0, 1);
+ obdfs_flush_reqs(&sbi->osi_inodes, 0, 1);
}
#if 0
/* SYNCHRONOUS I/O for an inode */
static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create)
{
- obd_count num_io = 1;
+ obd_count num_oa = 1;
+ obd_count oa_bufs = 1;
struct obdo *oa;
char *buf = (char *)page_address(page);
obd_size count = PAGE_SIZE;
}
obdfs_from_inode(oa, inode);
- err = IOPS(inode, brw)(rw, IID(inode), &num_io, &oa, &buf, &count,
- &offset, &flags);
+ err = IOPS(inode, brw)(rw, IID(inode), num_oa, &oa, &oa_bufs, &buf,
+ &count, &offset, &flags);
if ( !err )
obdfs_to_inode(inode, oa); /* copy o_blocks to i_blocks */
inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq)
{
- list_del(&pgrq->rq_ilist);
- list_del(&pgrq->rq_slist);
+ list_del(&pgrq->rq_plist);
kmem_cache_free(obdfs_pgrq_cachep, pgrq);
}
* Find a specific page in the page cache. If it is found, we return
* the write request struct associated with it, if not found return NULL.
*/
-#if 0
static struct obdfs_pgrq *
-obdfs_find_in_page_cache(struct inode *inode, struct page *page)
+obdfs_find_in_page_list(struct inode *inode, struct page *page)
{
- struct list_head *page_list = &OBDFS_LIST(inode);
+ struct list_head *page_list = obdfs_iplist(inode);
struct list_head *tmp;
struct obdfs_pgrq *pgrq;
}
tmp = page_list;
while ( (tmp = tmp->next) != page_list ) {
- pgrq = list_entry(tmp, struct obdfs_pgrq, rq_list);
+ pgrq = list_entry(tmp, struct obdfs_pgrq, rq_plist);
CDEBUG(D_INODE, "checking page %p\n", pgrq->rq_page);
if (pgrq->rq_page == page) {
CDEBUG(D_INODE, "found page %p in list\n", page);
EXIT;
return NULL;
-} /* obdfs_find_in_page_cache */
-#endif
+} /* obdfs_find_in_page_list */
-int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io,
- struct obdo **obdos,
- struct page **pages, char **bufs, obd_size *counts,
- obd_off *offsets, obd_flag *flags)
+/* call and free pages from Linux page cache */
+int obdfs_do_vec_wr(struct super_block *sb, obd_count num_io,
+ obd_count num_obdos, struct obdo **obdos,
+ obd_count *oa_bufs, struct page **pages, char **bufs,
+ obd_size *counts, obd_off *offsets, obd_flag *flags)
{
- int last_io = *num_io;
- int err;
struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)&sb->u.generic_sbp;
+ int err;
+
ENTRY;
- CDEBUG(D_INODE, "writing %d pages in vector\n", last_io);
- err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_io, obdos,
+ CDEBUG(D_INODE, "writing %d pages, %d obdos in vector\n",
+ num_io, num_obdos);
+ err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_obdos, obdos, oa_bufs,
bufs, counts, offsets, flags);
do {
- put_page(pages[--last_io]);
- } while ( last_io > 0 );
+ put_page(pages[--num_io]);
+ } while ( num_io > 0 );
EXIT;
return err;
static int obdfs_add_page_to_cache(struct inode *inode, struct page *page)
{
struct obdfs_pgrq *pgrq;
- int rc = 0;
ENTRY;
pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL);
memset(pgrq, 0, sizeof(*pgrq));
pgrq->rq_page = page;
- pgrq->rq_inode = inode;
get_page(pgrq->rq_page);
- list_add(&pgrq->rq_ilist, obdfs_ilist(inode));
- list_add(&pgrq->rq_slist, obdfs_slist(inode));
+ /* If this page isn't already in the inode page list, add it */
+ if ( !obdfs_find_in_page_list(inode, page) ) {
+ CDEBUG(D_INODE, "adding page %p to inode list %p\n", page,
+ obdfs_iplist(inode));
+ list_add(&pgrq->rq_plist, obdfs_iplist(inode));
+ }
+
+ /* If inode isn't already on the superblock inodes list, add it */
+ if ( list_empty(obdfs_islist(inode)) ) {
+ CDEBUG(D_INODE, "adding inode %p to superblock list %p\n",
+ obdfs_islist(inode), obdfs_islist(inode));
+ list_add(obdfs_islist(inode), obdfs_slist(inode));
+ }
+
+ EXIT;
/* XXX For testing purposes, we write out the page here.
* In the future, a flush daemon will write out the page.
return 0;
*/
- /*
- rc = obdfs_flush_reqs(obdfs_slist(inode), 0, 0);
- */
- EXIT;
- return rc;
+ return obdfs_flush_reqs(obdfs_slist(inode), 0, 0);
} /* obdfs_add_page_to_cache */
goto ERR;
}
- INIT_LIST_HEAD(&sbi->osi_pages);
+ INIT_LIST_HEAD(&sbi->osi_inodes);
sbi->osi_super = sb;
EXIT;
} /* obdfs_put_super */
+
/* all filling in of inodes postponed until lookup */
void obdfs_read_inode(struct inode *inode)
{
struct obdo *oa;
ENTRY;
- oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
+ oa = obdo_fromid(IID(inode), inode->i_ino,
+ OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS);
if ( IS_ERR(oa) ) {
printk("obdfs_read_inode: obdo_fromid failed\n");
EXIT;
ODEBUG(oa);
obdfs_to_inode(inode, oa);
- INIT_LIST_HEAD(obdfs_ilist(inode));
+ INIT_LIST_HEAD(obdfs_iplist(inode)); /* list of dirty pages on inode */
+ INIT_LIST_HEAD(obdfs_islist(inode)); /* list of inodes in superblock */
obdo_free(oa);
OIDEBUG(inode);