Whamcloud - gitweb
obdfs/flushd.c: turned on page aging, and set pupdated to run more often
[fs/lustre-release.git] / lustre / obdfs / flushd.c
index d12c63f..6d16cfc 100644 (file)
@@ -35,7 +35,6 @@
 #include <linux/obdfs.h>
 
 
-
 struct {
        int nfract;  /* Percentage of buffer cache dirty to 
                        activate bdflush */
@@ -45,13 +44,12 @@ struct {
                                each time we call refill */
        int nref_dirt; /* Dirty buffer threshold for activating bdflush
                          when trying to refill buffers. */
-       int interval; /* jiffies delay between kupdate flushes */
+       int interval; /* jiffies delay between pupdate flushes */
        int age_buffer;  /* Time for normal buffer to age before we flush it */
        int age_super;  /* Time for superblock to age before we flush it */
-/* } pupd_prm = {40, 500, 64, 256, 5*HZ, 30*HZ, 5*HZ };  */
-} pupd_prm = {40, 500, 64, 256, 10*HZ, 30*HZ, 5*HZ }; 
-
+} pupd_prm = {40, 500, 64, 256, 2*HZ, 30*HZ, 5*HZ };
 
+/* Called with the superblock list lock */
 static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
                               int nr_slots, struct page **pages, char **bufs,
                               obd_size *counts, obd_off *offsets,
@@ -74,13 +72,14 @@ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
        *flag = OBD_BRW_CREATE;
 
        tmp = page_list;
-       while ( (tmp = tmp->next) != page_list && (num < nr_slots) ) {
+       while ( ((tmp = tmp->next) != page_list) && (num < nr_slots) ) {
                struct obdfs_pgrq *req;
                struct page *page;
                
                req = list_entry(tmp, struct obdfs_pgrq, rq_plist);
                page = req->rq_page;
 
+               
                if (check_time && 
                    (jiffies - req->rq_jiffies) < pupd_prm.age_buffer)
                        continue;
@@ -89,17 +88,17 @@ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
                 * Note that obdfs_pgrq_del() also deletes the request.
                 */
                obdfs_pgrq_del(req);
-               
                if ( !page ) {
                        CDEBUG(D_INODE, "no page \n");
                        continue;
                }
 
-               CDEBUG(D_INODE, "adding page %p to vector\n", page);
                bufs[num] = (char *)page_address(page);
                pages[num] = page;
                counts[num] = PAGE_SIZE;
                offsets[num] = ((obd_off)page->index) << PAGE_SHIFT;
+               CDEBUG(D_INODE, "ENQ inode %ld, page %p addr %p to vector\n", 
+                      inode->i_ino, page, (char *)page_address(page));
                num++;
        }
 
@@ -109,14 +108,12 @@ static int obdfs_enqueue_pages(struct inode *inode, struct obdo **obdo,
 
        EXIT;
        return num;  
-}
-
+} /* obdfs_enqueue_pages */
 
 /* Remove writeback requests for the superblock */
-int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
-                    int check_time)
+int obdfs_flush_reqs(struct list_head *inode_list, int check_time)
 {
-       struct list_head *tmp = inode_list;
+       struct list_head *tmp;
        int               total_io = 0;
        obd_count         num_io = 0;
        obd_count         num_obdos = 0;
@@ -129,23 +126,30 @@ int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
        obd_flag          flags[MAX_IOVEC];
        obd_count         bufs_per_obdo[MAX_IOVEC];
        int               err = 0;
+       struct obdfs_sb_info *sbi;
 
        ENTRY;
-
        if (!inode_list) {
                CDEBUG(D_INODE, "no list\n");
                EXIT;
                return 0;
        }
 
+       sbi = list_entry(inode_list, struct obdfs_sb_info, osi_inodes);
+
+       obd_down(&sbi->osi_list_mutex);
        if ( list_empty(inode_list)) {
                CDEBUG(D_INODE, "list empty\n");
+               obd_up(&sbi->osi_list_mutex);
                EXIT;
                return 0;
        }
 
-       /* add each inode's outstanding pages to a write vector, and write it */
-       while ( (tmp = tmp->next) != inode_list && total_io < pupd_prm.ndirty) {
+       /* add each inode's dirty pages to a write vector, and write it */
+ again:
+       tmp = inode_list;
+       while ( (tmp = tmp->next) != inode_list && 
+               total_io < pupd_prm.ndirty) {
                struct obdfs_inode_info *ii;
                struct inode *inode;
                int res;
@@ -153,6 +157,7 @@ int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
                ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes);
                inode = list_entry(ii, struct inode, u);
                inodes[num_obdos] = inode;
+               CDEBUG(D_INODE, "checking inode %ld pages\n", inode->i_ino);
 
                res = 1;
 
@@ -168,8 +173,12 @@ int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
                                                  &pages[num_io], &bufs[num_io],
                                                  &counts[num_io],
                                                  &offsets[num_io],
-                                                 &flags[num_obdos],1);
+                                                 &flags[num_obdos],
+                                                 check_time);
+                       CDEBUG(D_INODE, "FLUSHED inode %ld, pages flushed: %d\n", 
+                              inode->i_ino, res);
                        if ( res < 0 ) {
+                               obd_up(&sbi->osi_list_mutex);
                                err = res;
                                goto ERR;
                        }
@@ -180,6 +189,7 @@ int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
                        num_obdos++;
 
                        if ( num_io == MAX_IOVEC ) {
+                               obd_up(&sbi->osi_list_mutex);
                                err = obdfs_do_vec_wr(inodes, num_io, num_obdos,
                                                      obdos, bufs_per_obdo,
                                                      pages, bufs, counts,
@@ -191,65 +201,72 @@ int obdfs_flush_reqs(struct list_head *inode_list, int flush_inode,
                                inodes[0] = inode;
                                num_io = 0;
                                num_obdos = 0;
+                               obd_down(&sbi->osi_list_mutex);
+                               goto again;
                        }
                }
+       }
 
-               /* Remove inode from superblock dirty list when no more pages.
-                * Make sure we don't point at the current inode with tmp
-                * when we re-init the list on the inode, or we will loop.
-                */
+       obd_up(&sbi->osi_list_mutex);
+
+       /* flush any remaining I/Os */
+       if ( num_io ) {
+               err = obdfs_do_vec_wr(inodes, num_io, num_obdos, obdos,
+                                     bufs_per_obdo, pages, bufs, counts,
+                                     offsets, flags);
+       }
+
+       /* Remove inode from superblock dirty list when no more pages.
+        * Make sure we don't point at the current inode with tmp
+        * when we re-init the list on the inode, or we will loop.
+        */
+       obd_down(&sbi->osi_list_mutex);
+       tmp = inode_list;
+       while ( (tmp = tmp->next) != inode_list ) {
+               struct obdfs_inode_info *ii;
+               struct inode *inode;
+
+               ii = list_entry(tmp, struct obdfs_inode_info, oi_inodes);
+               inode = list_entry(ii, struct inode, u);
+               CDEBUG(D_INODE, "checking inode %ld empty\n", inode->i_ino);
                if (list_empty(obdfs_iplist(inode))) {
                        CDEBUG(D_INODE, "remove inode %ld from dirty list\n",
                               inode->i_ino);
                        tmp = tmp->prev;
                        list_del(obdfs_islist(inode));
+                       /* decrement inode reference for page cache */
+                       inode->i_count--;
                        INIT_LIST_HEAD(obdfs_islist(inode));
                }
        }
+       obd_up(&sbi->osi_list_mutex);
 
-       /* flush any remaining I/Os */
-       if ( num_io ) {
-               err = obdfs_do_vec_wr(inodes, num_io, num_obdos, obdos,
-                                     bufs_per_obdo, pages, bufs, counts,
-                                     offsets, flags);
-       }
        CDEBUG(D_INODE, "flushed %d pages in total\n", total_io);
        EXIT;
 ERR:
        return err;
-} /* obdfs_remove_pages_from_cache */
+} /* obdfs_flush_reqs */
 
 
-static void obdfs_flush_dirty_pages(int check_time)
+void obdfs_flush_dirty_pages(int check_time)
 {
        struct list_head *sl;
 
+       ENTRY;
        sl = &obdfs_super_list;
        while ( (sl = sl->next) != &obdfs_super_list ) {
                struct obdfs_sb_info *sbi = 
                        list_entry(sl, struct obdfs_sb_info, osi_list);
 
                /* walk write requests here, use the sb, check the time */
-               obdfs_flush_reqs(&sbi->osi_inodes, 0, 1);
-       }
-
-#if 0
-       /* again, but now we wait for completion */
-       sl = &obdfs_super_list;
-       while ( (sl = sl->next) != &obdfs_super_list ) {
-               struct obdfs_sb_info *sbi = 
-                       list_entry(sl, struct obdfs_sb_info, sl_chain);
-
-               /* walk write requests here */
-               obdfs_flush_reqs(&sbi->osi_pages, 0, check_time);
+               obdfs_flush_reqs(&sbi->osi_inodes, check_time);
        }
-#endif
-}
+       EXIT;
+} /* obdfs_flush_dirty_pages */
 
 
 static struct task_struct *pupdated;
 
-
 static int pupdate(void *unused) 
 {
        struct task_struct * tsk = current;
@@ -265,6 +282,7 @@ static int pupdate(void *unused)
        sprintf(tsk->comm, "pupdated");
        pupdated = current;
 
+       MOD_INC_USE_COUNT;      /* XXX until send_sig works */
        printk("pupdated activated...\n");
 
        /* sigstop and sigcont will stop and wakeup pupdate */
@@ -286,7 +304,7 @@ static int pupdate(void *unused)
                {
                stop_pupdate:
                        tsk->state = TASK_STOPPED;
-                       /* MOD_DEC_USE_COUNT; */
+                       MOD_DEC_USE_COUNT; /* XXX until send_sig works */
                        printk("pupdated stopped...\n");
                        return 0;
                }
@@ -305,41 +323,35 @@ static int pupdate(void *unused)
                        if (stopped)
                                goto stop_pupdate;
                }
-               /* asynchronous setattr etc for the future ... */
-               /* flush_inodes(); */
-               obdfs_flush_dirty_pages(1);
+               /* asynchronous setattr etc for the future ...
+               flush_inodes();
+                */
+               obdfs_flush_dirty_pages(1); 
        }
 }
 
 
-int flushd_init(void)
+int obdfs_flushd_init(void)
 {
        /*
        kernel_thread(bdflush, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
         */
-       /* MOD_INC_USE_COUNT; */
        kernel_thread(pupdate, NULL, 0);
-       printk("flushd inited\n");
+       CDEBUG(D_PSDEV, __FUNCTION__ ": flushd inited\n");
        return 0;
 }
 
-int flushd_cleanup(void)
+int obdfs_flushd_cleanup(void)
 {
-       /* this should deliver a signal to */
-       
-
-       /* XXX Andreas, we will do this later, for now, you must kill
-          pupdated with a SIGTERM from userland, before unloading obdfs.o
-       */
+       ENTRY;
+       /* deliver a signal to pupdated to shut it down
+          XXX need to kill it from user space for now XXX
        if (pupdated) {
-               /* then let it run at least once, before continuing */
-
-               /* XXX need to do something like this here:
-               send_sig(SIGTERM, current, 0);
-                */
-               1;
+               send_sig_info(SIGTERM, 1, pupdated);
        }
+        */
 
+       EXIT;
        /* not reached */
        return 0;