From 43f2bbd9ce5dc4991d28ab3b998c2070bb640a4d Mon Sep 17 00:00:00 2001 From: adilger Date: Thu, 20 Jan 2000 01:09:05 +0000 Subject: [PATCH] ext2obd.c: changed ext2obd_brw() to use vector I/O with brw_kiovec rw.c, flushd.c: updated to allow async I/O from page cache (in test) --- lustre/include/linux/obd_class.h | 4 +- lustre/include/linux/obdfs.h | 32 +++++++---- lustre/obdclass/genops.c | 3 +- lustre/obdfs/flushd.c | 121 ++++++++++++++++++++++++++++++++------- lustre/obdfs/namei.c | 5 +- lustre/obdfs/rw.c | 79 +++++++++++++------------ lustre/obdfs/super.c | 4 +- 7 files changed, 171 insertions(+), 77 deletions(-) diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 8ef35a7..a189e57 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -152,7 +152,7 @@ struct obd_ops { int (*o_write)(struct obd_conn *conn, struct obdo *oa, char *buf, obd_size *count, obd_off offset); int (*o_brw)(int rw, struct obd_conn *conn, obd_count *num_io, - struct obdo **oa, char **buf, obd_size **count, + struct obdo **oa, char **buf, obd_size *count, obd_off *offset, obd_flag *flags); int (*o_punch)(struct obd_conn *conn, struct obdo *tgt, obd_size count, obd_off offset); @@ -170,6 +170,8 @@ struct obd_ops { #define OBT(dev) dev->obd_type->typ_ops #define OBP(dev,op) dev->obd_type->typ_ops->o_ ## op +#define MAX_IOVEC 16 + /* * ======== OBD Metadata Support =========== diff --git a/lustre/include/linux/obdfs.h b/lustre/include/linux/obdfs.h index 34f3adb..1696cdd 100644 --- a/lustre/include/linux/obdfs.h +++ b/lustre/include/linux/obdfs.h @@ -50,24 +50,26 @@ int obdfs_readlink (struct dentry *, char *, int); struct dentry *obdfs_follow_link(struct dentry *, struct dentry *, unsigned int); -struct obdfs_super_info { - struct list_head s_wr_head; -}; - - /* list of all OBDFS super blocks */ struct list_head obdfs_super_list; struct obdfs_super_entry { struct list_head sl_chain; - struct obdfs_super_info *sl_sbi; + struct obdfs_sb_info *sl_sbi; }; struct obdfs_pgrq { - struct list_head rq_list; /* linked list of req's */ + struct list_head rq_ilist; /* linked list of req's */ + struct list_head rq_slist; /* linked list of req's */ unsigned long rq_jiffies; struct inode *rq_inode; /* dentry referenced */ struct page *rq_page; /* page to be written */ }; +inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq); +int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io, + struct obdo **obdos, + struct page **pages, char **bufs, obd_size *counts, + obd_off *offsets, obd_flag *flags); + struct obdfs_sb_info { struct obd_conn osi_conn; @@ -76,7 +78,7 @@ struct obdfs_sb_info { struct obd_ops *osi_ops; ino_t osi_rootino; /* which root inode */ int osi_minor; /* minor of /dev/obdX */ - struct list_head osi_list; /* linked list of inodes to write */ + struct list_head osi_pages; /* linked list of inodes to write */ }; struct obdfs_inode_info { @@ -86,8 +88,18 @@ struct obdfs_inode_info { }; -#define OBDFS_LIST(inode) (((struct obdfs_inode_info *)(&(inode)->u.generic_ip))->oi_pages) -#define WREQ(entry) (list_entry(entry, struct obdfs_pgrq, rq_list)) +static inline struct list_head *obdfs_ilist(struct inode *inode) +{ + struct obdfs_inode_info *info = (struct obdfs_inode_info *)&inode->u.generic_ip; + + return &info->oi_pages; +} + +static inline struct list_head *obdfs_slist(struct inode *inode) { + struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)(&inode->i_sb->u.generic_sbp); + return &sbi->osi_pages; +} + #define OBDFS_INFO(inode) ((struct obdfs_inode_info *)(&(inode)->u.generic_ip)) void obdfs_sysctl_init(void); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 6940a95..807e452 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -271,8 +271,7 @@ int gen_copy_data(struct obd_conn *dst_conn, struct obdo *dst, while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) { obd_count num = 1; char *buf; - obd_size brw_size = PAGE_SIZE; - obd_size *brw_count = &brw_size; + obd_size brw_count = PAGE_SIZE; obd_off brw_offset = (page->index) << PAGE_SHIFT; obd_flag flagr = 0; obd_flag flagw = OBD_BRW_CREATE; diff --git a/lustre/obdfs/flushd.c b/lustre/obdfs/flushd.c index 27d93a4..12986e2 100644 --- a/lustre/obdfs/flushd.c +++ b/lustre/obdfs/flushd.c @@ -50,33 +50,107 @@ struct { int age_super; /* Time for superblock to age before we flush it */ } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; -/* static void obdfs_flush_reqs(struct obdfs_super_info *sbi, int wait, - -*/ -static void obdfs_flush_reqs(struct obdfs_super_info *sbi, int check_time) + +/* Remove writeback requests from an inode */ +int obdfs_flush_reqs(struct list_head *page_list, + int flush_inode, int check_time) { - struct list_head *wr; - struct obdfs_pgrq *req; - - wr = &sbi->s_wr_head; - while ( (wr = wr->next) != &sbi->s_wr_head ) { - req = list_entry(wr, struct obdfs_pgrq, rq_list); - - if (!check_time || - req->rq_jiffies <= (jiffies - pupd_prm.age_buffer)) { - /* write request out to disk */ - obdfs_do_writepage(req->rq_inode, req->rq_page, 1); + struct list_head *tmp = page_list; + obd_count num_io = 0; + struct obdo *oa = NULL; + struct obdo *obdos[MAX_IOVEC]; + struct page *pages[MAX_IOVEC]; + char *bufs[MAX_IOVEC]; + obd_size counts[MAX_IOVEC]; + obd_off offsets[MAX_IOVEC]; + obd_flag flags[MAX_IOVEC]; + int err = 0; + int i; + struct inode *inode = NULL; + + ENTRY; + + if ( list_empty(page_list)) { + CDEBUG(D_INODE, "list empty\n"); + EXIT; + return 0; + } + + + /* add all of the outstanding pages to a write vector, and write it */ + while ( (tmp = tmp->next) != page_list ) { + struct obdfs_pgrq *pgrq; + struct page *page; + + if ( flush_inode ) + pgrq = list_entry(tmp, struct obdfs_pgrq, rq_ilist); + else + pgrq = list_entry(tmp, struct obdfs_pgrq, rq_slist); + page = pgrq->rq_page; + inode = pgrq->rq_inode; + + if (check_time && + pgrq->rq_jiffies > (jiffies - pupd_prm.age_buffer)) + continue; + + oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD); + if ( IS_ERR(oa) ) { + EXIT; + return PTR_ERR(oa); } + obdfs_from_inode(oa, inode); + + CDEBUG(D_INODE, "adding page %p to vector\n", page); + obdos[num_io] = oa; + bufs[num_io] = (char *)page_address(page); + pages[num_io] = page; + counts[num_io] = PAGE_SIZE; + offsets[num_io] = ((obd_off)page->index) << PAGE_SHIFT; + flags[num_io] = OBD_BRW_CREATE; + num_io++; + /* remove request from list before write to avoid conflict */ + obdfs_pgrq_del(pgrq); + + if ( num_io == MAX_IOVEC ) { + err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, + pages, + bufs, counts, offsets, flags); + for (i=0 ; ii_sb, &num_io, obdos, pages, bufs, + counts, offsets, flags); + for ( ; i>=0 ; i-- ) { + obdo_free(obdos[i]); + } } + EXIT; +ERR: -} + return err; +} /* obdfs_remove_pages_from_cache */ static void obdfs_flush_dirty_pages(int check_time) { struct list_head *sl; - struct obdfs_super_info *sbi; + struct obdfs_sb_info *sbi; sl = &obdfs_super_list; while ( (sl = sl->next) != &obdfs_super_list ) { @@ -84,8 +158,8 @@ static void obdfs_flush_dirty_pages(int check_time) list_entry(sl, struct obdfs_super_entry, sl_chain); sbi = entry->sl_sbi; - /* walk write requests here */ - obdfs_flush_reqs(sbi, jiffies); + /* walk write requests here, use the sb, check the time */ + obdfs_flush_reqs(&sbi->osi_pages, 0, 1); } /* again, but now we wait for completion */ @@ -96,11 +170,11 @@ static void obdfs_flush_dirty_pages(int check_time) sbi = entry->sl_sbi; /* walk write requests here */ - /* XXX should jiffies be 0 here? */ - obdfs_flush_reqs(sbi, jiffies); + obdfs_flush_reqs(&sbi->osi_pages, 0, check_time); } } + static struct task_struct *pupdated; static int pupdate(void *unused) @@ -158,8 +232,13 @@ static int pupdate(void *unused) if (stopped) goto stop_pupdate; } + /* asynchronous setattr etc for the future ... */ /* flush_inodes(); */ + CDEBUG(D_INODE, "about to flush pages...\n"); + /* obdfs_flush_dirty_pages(1); + */ + CDEBUG(D_INODE, "done flushing pages...\n"); } } diff --git a/lustre/obdfs/namei.c b/lustre/obdfs/namei.c index 7b44ec0..ea94b011 100644 --- a/lustre/obdfs/namei.c +++ b/lustre/obdfs/namei.c @@ -202,8 +202,8 @@ struct dentry *obdfs_lookup(struct inode *dir, struct dentry *dentry) * returns a locked and held page upon success */ -/* XXX I believe these pages should in fact NOT be locked */ +/* We do this with a locked page: that's not necessary, since the semaphore on the inode protects this page as well. */ static struct page *obdfs_add_entry (struct inode * dir, const char * name, int namelen, struct ext2_dir_entry_2 ** res_dir, @@ -345,6 +345,7 @@ static struct page *obdfs_add_entry (struct inode * dir, *res_dir = de; *err = 0; PDEBUG(page, "add_entry"); + /* XXX unlock page here */ EXIT; return page; } @@ -494,7 +495,6 @@ static struct inode *obdfs_new_inode(struct inode *dir, int mode) return ERR_PTR(-EIO); } obdo_free(oa); - INIT_LIST_HEAD(&OBDFS_LIST(inode)); EXIT; return inode; @@ -658,7 +658,6 @@ int obdfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL; mark_inode_dirty(dir); err = obdfs_do_writepage(dir, page, IS_SYNC(dir)); - /* XXX handle err? */ UnlockPage(page); diff --git a/lustre/obdfs/rw.c b/lustre/obdfs/rw.c index cdb7bd6..6240239 100644 --- a/lustre/obdfs/rw.c +++ b/lustre/obdfs/rw.c @@ -33,7 +33,10 @@ #include #include -int console_loglevel; + +int obdfs_flush_reqs(struct list_head *page_list, + int flush_inode, int check_time); + /* SYNCHRONOUS I/O for an inode */ static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create) @@ -41,8 +44,7 @@ static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create) obd_count num_io = 1; struct obdo *oa; char *buf = (char *)page_address(page); - obd_size size = PAGE_SIZE; - obd_size *count = &size; + obd_size count = PAGE_SIZE; obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; obd_flag flags = create ? OBD_BRW_CREATE : 0; int err; @@ -111,6 +113,13 @@ int obdfs_init_pgrqcache(void) return 0; } /* obdfs_init_wreqcache */ +inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq) +{ + list_del(&pgrq->rq_ilist); + list_del(&pgrq->rq_slist); + kmem_cache_free(obdfs_pgrq_cachep, pgrq); +} + void obdfs_cleanup_pgrqcache(void) { ENTRY; @@ -130,6 +139,7 @@ void obdfs_cleanup_pgrqcache(void) * Find a specific page in the page cache. If it is found, we return * the write request struct associated with it, if not found return NULL. */ +#if 0 static struct obdfs_pgrq * obdfs_find_in_page_cache(struct inode *inode, struct page *page) { @@ -158,43 +168,39 @@ obdfs_find_in_page_cache(struct inode *inode, struct page *page) EXIT; return NULL; } /* obdfs_find_in_page_cache */ +#endif -/* - * Remove a writeback request from a list - */ -static inline int -obdfs_remove_from_page_cache(struct obdfs_pgrq *pgrq) +int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io, + struct obdo **obdos, + struct page **pages, char **bufs, obd_size *counts, + obd_off *offsets, obd_flag *flags) { - struct inode *inode = pgrq->rq_inode; - struct page *page = pgrq->rq_page; + int last_io = *num_io; int err; - + struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)&sb->u.generic_sbp; ENTRY; - CDEBUG(D_INODE, "writing inode %ld page %p, pgrq: %p\n", - inode->i_ino, page, pgrq); - OIDEBUG(inode); - PDEBUG(page, "REM_CACHE"); - err = obdfs_brw(WRITE, inode, page, 1); - /* XXX probably should handle error here somehow. I think that - * ext2 also does the same thing - discard write even if error? - */ - put_page(page); - list_del(&pgrq->rq_list); - kmem_cache_free(obdfs_pgrq_cachep, pgrq); - OIDEBUG(inode); + CDEBUG(D_INODE, "writing %d pages in vector\n", last_io); + err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_io, obdos, + bufs, counts, offsets, flags); + + do { + put_page(pages[--last_io]); + } while ( last_io > 0 ); EXIT; return err; -} /* obdfs_remove_from_page_cache */ +} + /* * Add a page to the write request cache list for later writing * ASYNCHRONOUS write method. */ -static int obdfs_add_to_page_cache(struct inode *inode, struct page *page) +static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) { struct obdfs_pgrq *pgrq; + int rc = 0; ENTRY; pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL); @@ -210,21 +216,17 @@ static int obdfs_add_to_page_cache(struct inode *inode, struct page *page) pgrq->rq_inode = inode; get_page(pgrq->rq_page); - list_add(&pgrq->rq_list, &OBDFS_LIST(inode)); + list_add(&pgrq->rq_ilist, obdfs_ilist(inode)); + list_add(&pgrq->rq_slist, obdfs_slist(inode)); - /* For testing purposes, we write out the page here. - * In the future, a flush daemon will write out the page. + /* XXX For testing purposes, we write out the page here. + * In the future, a flush daemon will write out the page. return 0; */ - pgrq = obdfs_find_in_page_cache(inode, page); - if (!pgrq) { - CDEBUG(D_INODE, "XXXX Can't find page after adding it!!!\n"); - EXIT; - return -EINVAL; - } - - return obdfs_remove_from_page_cache(pgrq); -} /* obdfs_add_to_page_cache */ + rc = obdfs_flush_reqs(obdfs_slist(inode), 0, 0); + EXIT; + return rc; +} /* obdfs_add_page_to_cache */ /* select between SYNC and ASYNC I/O methods */ @@ -237,11 +239,12 @@ int obdfs_do_writepage(struct inode *inode, struct page *page, int sync) if ( sync ) err = obdfs_brw(WRITE, inode, page, 1); else - err = obdfs_add_to_page_cache(inode, page); + err = obdfs_add_page_to_cache(inode, page); if ( !err ) SetPageUptodate(page); PDEBUG(page,"WRITEPAGE"); + EXIT; return err; } /* obdfs_do_writepage */ diff --git a/lustre/obdfs/super.c b/lustre/obdfs/super.c index 6a30cea..e73dbbc 100644 --- a/lustre/obdfs/super.c +++ b/lustre/obdfs/super.c @@ -195,7 +195,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, goto ERR; } - INIT_LIST_HEAD(&sbi->osi_list); + INIT_LIST_HEAD(&sbi->osi_pages); sbi->osi_super = sb; @@ -301,7 +301,7 @@ void obdfs_read_inode(struct inode *inode) ODEBUG(oa); obdfs_to_inode(inode, oa); - INIT_LIST_HEAD(&OBDFS_LIST(inode)); + INIT_LIST_HEAD(obdfs_ilist(inode)); obdo_free(oa); OIDEBUG(inode); -- 1.8.3.1