From: adilger Date: Fri, 21 Jan 2000 01:06:10 +0000 (+0000) Subject: Reworked to have a dirty inode list on superblock, dirty pages per inode. X-Git-Tag: 0.4.2~740 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=b76e3cc83eac3928bfc983165b247c594a9a0ff0;p=fs%2Flustre-release.git Reworked to have a dirty inode list on superblock, dirty pages per inode. --- diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h index 4faa960..07b9951 100644 --- a/lustre/include/linux/obd_class.h +++ b/lustre/include/linux/obd_class.h @@ -76,7 +76,7 @@ struct obdo { #define OBD_MD_FLGENER (0x00002000UL) #define OBD_MD_FLINLINE (0x00004000UL) #define OBD_MD_FLOBDMD (0x00008000UL) -#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDMD | OBD_MD_FLOBDFLG)) +#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDMD | OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS)) /* * ======== OBD Device Declarations =========== @@ -151,9 +151,9 @@ struct obd_ops { obd_size *count, obd_off offset); int (*o_write)(struct obd_conn *conn, struct obdo *oa, char *buf, obd_size *count, obd_off offset); - int (*o_brw)(int rw, struct obd_conn *conn, obd_count *num_io, - struct obdo **oa, char **buf, obd_size *count, - obd_off *offset, obd_flag *flags); + int (*o_brw)(int rw, struct obd_conn *conn, obd_count num_oa, + struct obdo **oa, obd_count *oa_bufs, char **buf, + obd_size *count, obd_off *offset, obd_flag *flags); int (*o_punch)(struct obd_conn *conn, struct obdo *tgt, obd_size count, obd_off offset); int (*o_sync)(struct obd_conn *conn, struct obdo *tgt, obd_size count, @@ -170,6 +170,9 @@ struct obd_ops { #define OBT(dev) dev->obd_type->typ_ops #define OBP(dev,op) dev->obd_type->typ_ops->o_ ## op +/* This value is not arbitrarily chosen. KIO_STATIC_PAGES from linux/iobuf.h */ +#define MAX_IOVEC (KIO_STATIC_PAGES - 1) + /* * ======== OBD Metadata Support =========== diff --git a/lustre/include/linux/obd_ext2.h b/lustre/include/linux/obd_ext2.h index d76b90f..34d92f6 100644 --- a/lustre/include/linux/obd_ext2.h +++ b/lustre/include/linux/obd_ext2.h @@ -3,6 +3,7 @@ #define OBD_EXT2_RUNIT _IOWR('f', 61, long) +#include struct ext2_obd { struct super_block * ext2_sb; diff --git a/lustre/include/linux/obdfs.h b/lustre/include/linux/obdfs.h index b142edb..7fb17f1 100644 --- a/lustre/include/linux/obdfs.h +++ b/lustre/include/linux/obdfs.h @@ -54,15 +54,14 @@ struct dentry *obdfs_follow_link(struct dentry *, struct dentry *, unsigned int) struct list_head obdfs_super_list; struct obdfs_pgrq { - struct list_head rq_ilist; /* linked list of req's */ - struct list_head rq_slist; /* linked list of req's */ - unsigned long rq_jiffies; - struct inode *rq_inode; /* dentry referenced */ + struct list_head rq_plist; /* linked list of req's */ + unsigned long rq_jiffies; struct page *rq_page; /* page to be written */ }; + inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq); -int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io, - struct obdo **obdos, +int obdfs_do_vec_wr(struct super_block *sb, obd_count num_io, obd_count num_oa, + struct obdo **obdos, obd_count *oa_bufs, struct page **pages, char **bufs, obd_size *counts, obd_off *offsets, obd_flag *flags); @@ -75,27 +74,33 @@ struct obdfs_sb_info { struct obd_ops *osi_ops; ino_t osi_rootino; /* which root inode */ int osi_minor; /* minor of /dev/obdX */ - struct list_head osi_pages; /* linked list of inodes to write */ + struct list_head osi_inodes; /* linked list of dirty inodes */ }; struct obdfs_inode_info { int oi_flags; + struct list_head oi_inodes; struct list_head oi_pages; char oi_inline[OBD_INLINESZ]; }; -#define MAX_IOVEC 16 - -static inline struct list_head *obdfs_ilist(struct inode *inode) +static inline struct list_head *obdfs_iplist(struct inode *inode) { struct obdfs_inode_info *info = (struct obdfs_inode_info *)&inode->u.generic_ip; return &info->oi_pages; } +static inline struct list_head *obdfs_islist(struct inode *inode) +{ + struct obdfs_inode_info *info = (struct obdfs_inode_info *)&inode->u.generic_ip; + + return &info->oi_inodes; +} + static inline struct list_head *obdfs_slist(struct inode *inode) { struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)(&inode->i_sb->u.generic_sbp); - return &sbi->osi_pages; + return &sbi->osi_inodes; } #define OBDFS_INFO(inode) ((struct obdfs_inode_info *)(&(inode)->u.generic_ip)) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 807e452..c6e7dfd 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -269,7 +269,8 @@ int gen_copy_data(struct obd_conn *dst_conn, struct obdo *dst, * and arrays to handle the request parameters. */ while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) { - obd_count num = 1; + obd_count num_oa = 1; + obd_count num_buf = 1; char *buf; obd_size brw_count = PAGE_SIZE; obd_off brw_offset = (page->index) << PAGE_SHIFT; @@ -278,9 +279,9 @@ int gen_copy_data(struct obd_conn *dst_conn, struct obdo *dst, page->index = index; buf = (char *)page_address(page); - err = OBP(src_conn->oc_dev, brw)(READ, src_conn, &num, &src, - &buf, &brw_count, &brw_offset, - &flagr); + err = OBP(src_conn->oc_dev, brw)(READ, src_conn, num_oa, &src, + &num_buf, &buf, &brw_count, + &brw_offset, &flagr); if ( err ) { EXIT; @@ -288,9 +289,9 @@ int gen_copy_data(struct obd_conn *dst_conn, struct obdo *dst, } CDEBUG(D_INODE, "Read page %ld ...\n", page->index); - err = OBP(dst_conn->oc_dev, brw)(WRITE, dst_conn, &num, &dst, - &buf, &brw_count, &brw_offset, - &flagw); + err = OBP(dst_conn->oc_dev, brw)(WRITE, dst_conn, num_oa, &dst, + &num_buf, &buf, &brw_count, + &brw_offset, &flagw); /* XXX should handle dst->o_size, dst->o_blocks here */ if ( err ) { diff --git a/lustre/obdfs/file.c b/lustre/obdfs/file.c index d96b4d2..75c8617 100644 --- a/lustre/obdfs/file.c +++ b/lustre/obdfs/file.c @@ -79,7 +79,7 @@ struct file_operations obdfs_file_operations = { NULL, /* readdir - bad */ NULL, /* poll - default */ NULL, /* ioctl */ - NULL, /* mmap */ + generic_file_mmap, /* mmap */ NULL, /* no special open code */ NULL, /* flush */ NULL, /* no special release code */ diff --git a/lustre/obdfs/flushd.c b/lustre/obdfs/flushd.c index 8623434..39e0679 100644 --- a/lustre/obdfs/flushd.c +++ b/lustre/obdfs/flushd.c @@ -51,32 +51,96 @@ struct { } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; +static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo, + int nr_slots, struct page **pages, char **bufs, + obd_size *counts, obd_off *offsets, + obd_flag *flag, int check_time) +{ + struct list_head *page_list = obdfs_iplist(inode); + struct list_head *tmp; + int i = 0; + + ENTRY; + if (list_empty(obdfs_iplist(inode))) { + list_del(obdfs_islist(inode)); + CDEBUG(D_INODE, "empty list\n"); + EXIT; + return 0; + } + + *obdo = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD); + if ( IS_ERR(*obdo) ) { + EXIT; + return PTR_ERR(*obdo); + } + + obdfs_from_inode(*obdo, inode); + *flag = OBD_BRW_CREATE; + + tmp = page_list; + while ( (tmp = tmp->next) != page_list && (i < nr_slots) ) { + struct obdfs_pgrq *req; + struct page *page; + + req = list_entry(tmp, struct obdfs_pgrq, rq_plist); + /* remove request from list before write to avoid conflict */ + obdfs_pgrq_del(req); + page = req->rq_page; + + if ( !page ) { + CDEBUG(D_INODE, "no page \n"); + EXIT; + return 0; + } + + if (check_time && + req->rq_jiffies > (jiffies - pupd_prm.age_buffer)) + continue; + + CDEBUG(D_INODE, "adding page %p to vector\n", page); + bufs[i] = (char *)page_address(page); + pages[i] = page; + counts[i] = PAGE_SIZE; + offsets[i] = ((obd_off)page->index) << PAGE_SHIFT; + i++; + } + + /* If no more pages for this inode, remove from superblock list */ + if ( list_empty(obdfs_iplist(inode)) ) + list_del(obdfs_islist(inode)); + + EXIT; + return i; +} + + /* Remove writeback requests from an inode */ -int obdfs_flush_reqs(struct list_head *page_list, - int flush_inode, int check_time) +int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode, + int check_time) { - struct list_head *tmp = page_list; + struct list_head *tmp = inode_list; obd_count num_io = 0; - struct inode *inode = NULL; - struct obdo *oa = NULL; + obd_count num_obdos = 0; + struct inode *inodes[MAX_IOVEC]; struct obdo *obdos[MAX_IOVEC]; struct page *pages[MAX_IOVEC]; char *bufs[MAX_IOVEC]; obd_size counts[MAX_IOVEC]; obd_off offsets[MAX_IOVEC]; obd_flag flags[MAX_IOVEC]; + obd_count bufs_per_obdo[MAX_IOVEC]; int err = 0; int i; ENTRY; - if (!page_list) { + if (!inode_list) { CDEBUG(D_INODE, "no list\n"); EXIT; return 0; } - if ( list_empty(page_list)) { + if ( list_empty(inode_list)) { CDEBUG(D_INODE, "list empty\n"); EXIT; return 0; @@ -84,78 +148,50 @@ int obdfs_flush_reqs(struct list_head *page_list, /* add all of the outstanding pages to a write vector, and write it */ - while ( (tmp = tmp->next) != page_list ) { - struct obdfs_pgrq *pgrq; - struct page *page; - - if ( flush_inode ) - pgrq = list_entry(tmp, struct obdfs_pgrq, rq_ilist); - else - pgrq = list_entry(tmp, struct obdfs_pgrq, rq_slist); - page = pgrq->rq_page; - inode = pgrq->rq_inode; - - if ( !inode ) { - CDEBUG(D_INODE, "no inode\n"); - EXIT; - return 0; + while ( (tmp = tmp->next) != inode_list ) { + struct obdfs_inode_info *ii; + int res; + + ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes); + inodes[num_obdos] = list_entry(ii, struct inode, u); + + res = obdfs_enqueue_pages(inodes[num_obdos], &obdos[num_obdos], + MAX_IOVEC - num_io, &pages[num_io], + &bufs[num_io], &counts[num_io], + &offsets[num_io], &flags[num_obdos],1); + if ( res < 0 ) { + return -EIO; } - - if ( !page ) { - CDEBUG(D_INODE, "no page \n"); - EXIT; - return 0; - } - - - if (check_time && - pgrq->rq_jiffies > (jiffies - pupd_prm.age_buffer)) - continue; - oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD); - if ( IS_ERR(oa) ) { - EXIT; - return PTR_ERR(oa); - } - obdfs_from_inode(oa, inode); - - CDEBUG(D_INODE, "adding page %p to vector\n", page); - obdos[num_io] = oa; - bufs[num_io] = (char *)page_address(page); - pages[num_io] = page; - counts[num_io] = PAGE_SIZE; - offsets[num_io] = ((obd_off)page->index) << PAGE_SHIFT; - flags[num_io] = OBD_BRW_CREATE; - num_io++; - - /* remove request from list before write to avoid conflict */ - obdfs_pgrq_del(pgrq); + bufs_per_obdo[num_obdos] = res; + num_io += res; + num_obdos++; if ( num_io == MAX_IOVEC ) { - err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, - pages, - bufs, counts, offsets, flags); - for (i = 0 ; i < MAX_IOVEC ; i++) { + err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, + num_obdos, obdos, bufs_per_obdo, + pages, bufs, counts, offsets, + flags); + for (i = 0 ; i < num_obdos ; i++) { + obdfs_to_inode(inodes[i], obdos[i]); obdo_free(obdos[i]); + } if ( err ) { - /* XXX Probably should handle error here - - * discard other writes, or put - * (MAX_IOVEC - num_io) I/Os back to list? - */ EXIT; goto ERR; } - } num_io = 0; + num_obdos = 0; } } /* flush any remaining I/Os */ if ( num_io ) { - i = num_io - 1; - err = obdfs_do_vec_wr(inode->i_sb, &num_io, obdos, pages, bufs, + err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos, + obdos, bufs_per_obdo, pages, bufs, counts, offsets, flags); - for ( ; i>=0 ; i-- ) { + for (i = 0 ; i < num_obdos ; i++) { + obdfs_to_inode(inodes[i], obdos[i]); obdo_free(obdos[i]); } } @@ -176,7 +212,7 @@ static void obdfs_flush_dirty_pages(int check_time) list_entry(sl, struct obdfs_sb_info, osi_list); /* walk write requests here, use the sb, check the time */ - obdfs_flush_reqs(&sbi->osi_pages, 0, 1); + obdfs_flush_reqs(&sbi->osi_inodes, 0, 1); } #if 0 diff --git a/lustre/obdfs/rw.c b/lustre/obdfs/rw.c index 545ede5..ee4ee93 100644 --- a/lustre/obdfs/rw.c +++ b/lustre/obdfs/rw.c @@ -41,7 +41,8 @@ int obdfs_flush_reqs(struct list_head *page_list, /* SYNCHRONOUS I/O for an inode */ static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create) { - obd_count num_io = 1; + obd_count num_oa = 1; + obd_count oa_bufs = 1; struct obdo *oa; char *buf = (char *)page_address(page); obd_size count = PAGE_SIZE; @@ -57,8 +58,8 @@ static int obdfs_brw(int rw, struct inode *inode, struct page *page, int create) } obdfs_from_inode(oa, inode); - err = IOPS(inode, brw)(rw, IID(inode), &num_io, &oa, &buf, &count, - &offset, &flags); + err = IOPS(inode, brw)(rw, IID(inode), num_oa, &oa, &oa_bufs, &buf, + &count, &offset, &flags); if ( !err ) obdfs_to_inode(inode, oa); /* copy o_blocks to i_blocks */ @@ -115,8 +116,7 @@ int obdfs_init_pgrqcache(void) inline void obdfs_pgrq_del(struct obdfs_pgrq *pgrq) { - list_del(&pgrq->rq_ilist); - list_del(&pgrq->rq_slist); + list_del(&pgrq->rq_plist); kmem_cache_free(obdfs_pgrq_cachep, pgrq); } @@ -139,11 +139,10 @@ void obdfs_cleanup_pgrqcache(void) * Find a specific page in the page cache. If it is found, we return * the write request struct associated with it, if not found return NULL. */ -#if 0 static struct obdfs_pgrq * -obdfs_find_in_page_cache(struct inode *inode, struct page *page) +obdfs_find_in_page_list(struct inode *inode, struct page *page) { - struct list_head *page_list = &OBDFS_LIST(inode); + struct list_head *page_list = obdfs_iplist(inode); struct list_head *tmp; struct obdfs_pgrq *pgrq; @@ -156,7 +155,7 @@ obdfs_find_in_page_cache(struct inode *inode, struct page *page) } tmp = page_list; while ( (tmp = tmp->next) != page_list ) { - pgrq = list_entry(tmp, struct obdfs_pgrq, rq_list); + pgrq = list_entry(tmp, struct obdfs_pgrq, rq_plist); CDEBUG(D_INODE, "checking page %p\n", pgrq->rq_page); if (pgrq->rq_page == page) { CDEBUG(D_INODE, "found page %p in list\n", page); @@ -167,26 +166,27 @@ obdfs_find_in_page_cache(struct inode *inode, struct page *page) EXIT; return NULL; -} /* obdfs_find_in_page_cache */ -#endif +} /* obdfs_find_in_page_list */ -int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io, - struct obdo **obdos, - struct page **pages, char **bufs, obd_size *counts, - obd_off *offsets, obd_flag *flags) +/* call and free pages from Linux page cache */ +int obdfs_do_vec_wr(struct super_block *sb, obd_count num_io, + obd_count num_obdos, struct obdo **obdos, + obd_count *oa_bufs, struct page **pages, char **bufs, + obd_size *counts, obd_off *offsets, obd_flag *flags) { - int last_io = *num_io; - int err; struct obdfs_sb_info *sbi = (struct obdfs_sb_info *)&sb->u.generic_sbp; + int err; + ENTRY; - CDEBUG(D_INODE, "writing %d pages in vector\n", last_io); - err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_io, obdos, + CDEBUG(D_INODE, "writing %d pages, %d obdos in vector\n", + num_io, num_obdos); + err = OPS(sb, brw)(WRITE, &sbi->osi_conn, num_obdos, obdos, oa_bufs, bufs, counts, offsets, flags); do { - put_page(pages[--last_io]); - } while ( last_io > 0 ); + put_page(pages[--num_io]); + } while ( num_io > 0 ); EXIT; return err; @@ -200,7 +200,6 @@ int obdfs_do_vec_wr(struct super_block *sb, obd_count *num_io, static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) { struct obdfs_pgrq *pgrq; - int rc = 0; ENTRY; pgrq = kmem_cache_alloc(obdfs_pgrq_cachep, SLAB_KERNEL); @@ -213,21 +212,29 @@ static int obdfs_add_page_to_cache(struct inode *inode, struct page *page) memset(pgrq, 0, sizeof(*pgrq)); pgrq->rq_page = page; - pgrq->rq_inode = inode; get_page(pgrq->rq_page); - list_add(&pgrq->rq_ilist, obdfs_ilist(inode)); - list_add(&pgrq->rq_slist, obdfs_slist(inode)); + /* If this page isn't already in the inode page list, add it */ + if ( !obdfs_find_in_page_list(inode, page) ) { + CDEBUG(D_INODE, "adding page %p to inode list %p\n", page, + obdfs_iplist(inode)); + list_add(&pgrq->rq_plist, obdfs_iplist(inode)); + } + + /* If inode isn't already on the superblock inodes list, add it */ + if ( list_empty(obdfs_islist(inode)) ) { + CDEBUG(D_INODE, "adding inode %p to superblock list %p\n", + obdfs_islist(inode), obdfs_islist(inode)); + list_add(obdfs_islist(inode), obdfs_slist(inode)); + } + + EXIT; /* XXX For testing purposes, we write out the page here. * In the future, a flush daemon will write out the page. return 0; */ - /* - rc = obdfs_flush_reqs(obdfs_slist(inode), 0, 0); - */ - EXIT; - return rc; + return obdfs_flush_reqs(obdfs_slist(inode), 0, 0); } /* obdfs_add_page_to_cache */ diff --git a/lustre/obdfs/super.c b/lustre/obdfs/super.c index 077ad51..fafdf96 100644 --- a/lustre/obdfs/super.c +++ b/lustre/obdfs/super.c @@ -195,7 +195,7 @@ static struct super_block * obdfs_read_super(struct super_block *sb, goto ERR; } - INIT_LIST_HEAD(&sbi->osi_pages); + INIT_LIST_HEAD(&sbi->osi_inodes); sbi->osi_super = sb; @@ -287,13 +287,15 @@ static void obdfs_put_super(struct super_block *sb) EXIT; } /* obdfs_put_super */ + /* all filling in of inodes postponed until lookup */ void obdfs_read_inode(struct inode *inode) { struct obdo *oa; ENTRY; - oa = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD); + oa = obdo_fromid(IID(inode), inode->i_ino, + OBD_MD_FLNOTOBD | OBD_MD_FLBLOCKS); if ( IS_ERR(oa) ) { printk("obdfs_read_inode: obdo_fromid failed\n"); EXIT; @@ -302,7 +304,8 @@ void obdfs_read_inode(struct inode *inode) ODEBUG(oa); obdfs_to_inode(inode, oa); - INIT_LIST_HEAD(obdfs_ilist(inode)); + INIT_LIST_HEAD(obdfs_iplist(inode)); /* list of dirty pages on inode */ + INIT_LIST_HEAD(obdfs_islist(inode)); /* list of inodes in superblock */ obdo_free(oa); OIDEBUG(inode);