Whamcloud - gitweb
Found one bug; testing for list_empty(slist) is ok, but you
[fs/lustre-release.git] / lustre / obdfs / flushd.c
index 2732e44..92f366f 100644 (file)
@@ -50,57 +50,188 @@ struct {
        int age_super;  /* Time for superblock to age before we flush it */
 } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ }; 
 
-/* static void obdfs_flush_reqs(struct obdfs_super_info *sbi, int wait, 
-                            
-*/
-static void obdfs_flush_reqs(struct obdfs_super_info *sbi, int check_time) 
+
+static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
+                              int nr_slots, struct page **pages, char **bufs,
+                              obd_size *counts, obd_off *offsets,
+                              obd_flag *flag, int check_time)
 {
-       struct list_head *wr;
-       struct obdfs_pgrq *req;
-       
-       wr = &sbi->s_wr_head;
-       while ( (wr = wr->next) != &sbi->s_wr_head ) {
-               req = list_entry(wr, struct obdfs_pgrq, rq_list);
-
-               if (!check_time || 
-                   req->rq_jiffies <= (jiffies - pupd_prm.age_buffer)) {
-                       /* write request out to disk */
-                       obdfs_do_writepage(req->rq_inode, req->rq_page, 1);
+       struct list_head *page_list = obdfs_iplist(inode);
+       struct list_head *tmp;
+       int i = 0;
+
+       ENTRY;
+       /* if there are no pages, remove from super block list */
+       if (list_empty(obdfs_iplist(inode))) {
+               list_del(obdfs_islist(inode));
+               /* we check for "empty" on this animal: must init it! */
+               INIT_LIST_HEAD(obdfs_islist(inode));
+               CDEBUG(D_INODE, "empty list\n");
+               EXIT;
+               return 0;
+       }
+
+       *obdo = obdo_fromid(IID(inode), inode->i_ino, OBD_MD_FLNOTOBD);
+       if ( IS_ERR(*obdo) ) {
+               EXIT;
+               return PTR_ERR(*obdo);
+       }
+
+       obdfs_from_inode(*obdo, inode);
+       *flag = OBD_BRW_CREATE;
+
+       tmp = page_list;
+       while ( (tmp = tmp->next) != page_list && (i < nr_slots) ) {
+               struct obdfs_pgrq *req;
+               struct page *page;
+               
+               req = list_entry(tmp, struct obdfs_pgrq, rq_plist);
+               /* remove request from list before write to avoid conflict */
+               obdfs_pgrq_del(req);
+               page = req->rq_page;
+
+               if ( !page  ) {
+                       CDEBUG(D_INODE, "no page \n");
+                       EXIT;
+                       return 0;
                }
 
+               if (check_time && 
+                   req->rq_jiffies > (jiffies - pupd_prm.age_buffer))
+                       continue;
+               
+               CDEBUG(D_INODE, "adding page %p to vector\n", page);
+               bufs[i] = (char *)page_address(page);
+               pages[i] = page;
+               counts[i] = PAGE_SIZE;
+               offsets[i] = ((obd_off)page->index) << PAGE_SHIFT;
+               i++;
        }
 
+       /* If no more pages for this inode, remove from superblock list */
+       if ( list_empty(obdfs_iplist(inode)) )
+               list_del(obdfs_islist(inode));
+
+       EXIT;
+       return i;  
 }
 
 
+/* Remove writeback requests for the superblock */
+int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
+                    int check_time)
+{
+       struct list_head *tmp = inode_list;
+       obd_count         num_io = 0;
+       obd_count         num_obdos = 0;
+       struct inode     *inodes[MAX_IOVEC];
+       struct obdo      *obdos[MAX_IOVEC];
+       struct page      *pages[MAX_IOVEC];
+       char             *bufs[MAX_IOVEC];
+       obd_size          counts[MAX_IOVEC];
+       obd_off           offsets[MAX_IOVEC];
+       obd_flag          flags[MAX_IOVEC];
+       obd_count         bufs_per_obdo[MAX_IOVEC];
+       int               err = 0;
+       int i;
+
+       ENTRY;
+
+       if (!inode_list) {
+               CDEBUG(D_INODE, "no list\n");
+               EXIT;
+               return 0;
+       }
+
+       if ( list_empty(inode_list)) {
+               CDEBUG(D_INODE, "list empty\n");
+               EXIT;
+               return 0;
+       }
+
+
+       /* add all of the outstanding pages to a write vector, and write it */
+       while ( (tmp = tmp->next) != inode_list ) {
+               struct obdfs_inode_info *ii;
+               int res;
+
+               ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes);
+               inodes[num_obdos] = list_entry(ii, struct inode, u);
+
+               res = obdfs_enqueue_pages(inodes[num_obdos], &obdos[num_obdos],
+                                         MAX_IOVEC - num_io, &pages[num_io],
+                                         &bufs[num_io], &counts[num_io],
+                                         &offsets[num_io], &flags[num_obdos],1);
+               if ( res < 0 ) {
+                       return -EIO;
+               }
+               
+               bufs_per_obdo[num_obdos] = res;
+               num_io += res;
+               num_obdos++;
+
+               if ( num_io == MAX_IOVEC ) {
+                       err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io,
+                                             num_obdos, obdos, bufs_per_obdo,
+                                             pages, bufs, counts, offsets,
+                                             flags);
+                       for (i = 0 ; i < num_obdos ; i++) {
+                               obdfs_to_inode(inodes[i], obdos[i]);
+                               obdo_free(obdos[i]);
+                       }
+                       if ( err ) {
+                               EXIT;
+                               goto ERR;
+                       }
+                       num_io = 0;
+                       num_obdos = 0;
+               }
+       } 
+
+       /* flush any remaining I/Os */
+       if ( num_io ) {
+               err = obdfs_do_vec_wr(inodes[0]->i_sb, num_io, num_obdos, 
+                                     obdos, bufs_per_obdo, pages, bufs,
+                                     counts, offsets, flags);
+               for (i = 0 ; i < num_obdos ; i++) {
+                       obdfs_to_inode(inodes[i], obdos[i]);
+                       obdo_free(obdos[i]);
+               }
+       }
+       EXIT;
+ERR:
+
+       return err;
+} /* obdfs_remove_pages_from_cache */
+
+
 static void obdfs_flush_dirty_pages(int check_time)
 {
        struct list_head *sl;
-       struct obdfs_super_info *sbi;
 
        sl = &obdfs_super_list;
        while ( (sl = sl->next) != &obdfs_super_list ) {
-               struct obdfs_super_entry *entry = 
-                       list_entry(sl, struct obdfs_super_entry, sl_chain);
-               sbi = entry->sl_sbi;
+               struct obdfs_sb_info *sbi = 
+                       list_entry(sl, struct obdfs_sb_info, osi_list);
 
-               /* walk write requests here */
-               obdfs_flush_reqs(sbi, jiffies);
+               /* walk write requests here, use the sb, check the time */
+               obdfs_flush_reqs(&sbi->osi_inodes, 0, 1);
        }
 
+#if 0
        /* again, but now we wait for completion */
        sl = &obdfs_super_list;
        while ( (sl = sl->next) != &obdfs_super_list ) {
-               struct obdfs_super_entry *entry = 
-                       list_entry(sl, struct obdfs_super_entry, sl_chain);
-               sbi = entry->sl_sbi;
+               struct obdfs_sb_info *sbi = 
+                       list_entry(sl, struct obdfs_sb_info, sl_chain);
 
                /* walk write requests here */
-               /* XXX should jiffies be 0 here? */
-               obdfs_flush_reqs(sbi, jiffies);
+               obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
        }
+#endif
 }
 
+
 static struct task_struct *pupdated;
 
 static int pupdate(void *unused) 
@@ -115,15 +246,15 @@ static int pupdate(void *unused)
 
        tsk->session = 1;
        tsk->pgrp = 1;
-       sprintf(tsk->comm, "pupd");
+       sprintf(tsk->comm, "pupdated");
        pupdated = current;
 
-       printk("pupdate() activated...\n");
+       printk("pupdated activated...\n");
 
-       /* sigstop and sigcont will stop and wakeup kupdate */
+       /* sigstop and sigcont will stop and wakeup pupdate */
        spin_lock_irq(&tsk->sigmask_lock);
        sigfillset(&tsk->blocked);
-       siginitsetinv(&tsk->blocked, sigmask(SIGCONT) | sigmask(SIGSTOP));
+       siginitsetinv(&tsk->blocked, sigmask(SIGTERM));
        recalc_sigpending(tsk);
        spin_unlock_irq(&tsk->sigmask_lock);
 
@@ -138,10 +269,9 @@ static int pupdate(void *unused)
                else
                {
                stop_pupdate:
-                       printk("pupdate() stopped...\n");
                        tsk->state = TASK_STOPPED;
-                       MOD_DEC_USE_COUNT;
-                       printk("RETURN from PUPD\n");
+                       /* MOD_DEC_USE_COUNT; */
+                       printk("pupdated stopped...\n");
                        return 0;
                }
                /* check for sigstop */
@@ -159,16 +289,21 @@ static int pupdate(void *unused)
                        if (stopped)
                                goto stop_pupdate;
                }
+               /* asynchronous setattr etc for the future ... */
                /* flush_inodes(); */
+               CDEBUG(D_INODE, "about to flush pages...\n");
                obdfs_flush_dirty_pages(1);
+               CDEBUG(D_INODE, "done flushing pages...\n");
        }
 }
 
 
 int flushd_init(void)
 {
-       /*      kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); */
-       MOD_INC_USE_COUNT;
+       /*
+       kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
+        */
+       /* MOD_INC_USE_COUNT; */
        kernel_thread(pupdate, NULL, 0);
        printk("flushd inited\n");
        return 0;
@@ -180,12 +315,14 @@ int flushd_cleanup(void)
        
 
        /* XXX Andreas, we will do this later, for now, you must kill
-          pupdated with a SIGSTOP from userland, before unloading obdfs.o
+          pupdated with a SIGTERM from userland, before unloading obdfs.o
        */
        if (pupdated) {
-               /* send updated a STOP signal */
                /* then let it run at least once, before continuing */
 
+               /* XXX need to do something like this here:
+               send_sig(SIGTERM, current, 0);
+                */
                1;
        }