Whamcloud - gitweb
- some changes on liod api params
authornfshp <nfshp>
Mon, 16 Dec 2002 12:56:59 +0000 (12:56 +0000)
committernfshp <nfshp>
Mon, 16 Dec 2002 12:56:59 +0000 (12:56 +0000)
- add actual page flushing code
- some cleanup

todo:
- try to figure out a better way of select dirty pages on
  a inode to flush
- export functions needed from kernel
- test with io/vm benchmark tools such as dbench/bonnie/iozone

lustre/llite/iod.c

index a2f58c6..e8f699b 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
 #include <linux/kmod.h>
+#include <linux/pagemap.h>
+#include <linux/low-latency.h>
 #include <asm/unistd.h>
 
 #define DEBUG_SUBSYSTEM S_LLITE
 /* wakeup every 30s */
 #define LIOD_WAKEUP_CYCLE      (30)
 
+/* FIXME  temporary!!!, export this from kernel later */
+/* return value:
+ * -1: no need to flush
+ * 0:  need async flush
+ * 1:  need sync flush
+ */
+static int balance_dirty_state(void)
+{
+       static int arr[3] = {-1, 0, 1};
+       static int index = 0;
+
+       index++;
+       index = index % 3;
+       return arr[index];
+}
+
+/* FIXME  temporary!!!, export this from kernel later */
+static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
+
+static void flush_some_pages(struct super_block *sb);
+
+/* the main liod loop */
 static int liod_main(void *arg)
 {
-        struct ll_io_daemon *iod = (struct ll_io_daemon *)arg;
+       struct super_block *sb = (struct super_block *)arg;
+        struct ll_io_daemon *iod = &ll_s2sbi(sb)->ll_iod;
 
         ENTRY;
 
@@ -38,7 +63,7 @@ static int liod_main(void *arg)
        set_bit(LIOD_FLAG_ALIVE, &iod->io_flag);
         wake_up(&iod->io_waitq);
 
-        CDEBUG(D_NET, "liod(%d) started\n", current->pid);
+        CDEBUG(D_CACHE, "liod(%d) started\n", current->pid);
         while (1) {
                int t;
 
@@ -50,6 +75,11 @@ static int liod_main(void *arg)
                                               LIOD_WAKEUP_CYCLE*HZ);
                CDEBUG(D_NET, "liod(%d) active due to %s\n",
                                (t ? "wakeup" : "timeout"));
+
+               do {
+                       flush_some_pages(sb);
+                       conditional_schedule();
+               } while (balance_dirty_state() >= 0);
         }
 
        clear_bit(LIOD_FLAG_ALIVE, &iod->io_flag);
@@ -59,8 +89,9 @@ static int liod_main(void *arg)
         RETURN(0);
 }
 
-int liod_start(struct ll_io_daemon *iod)
+int liod_start(struct super_block *sb)
 {
+       struct ll_io_daemon *iod = &ll_s2sbi(sb)->ll_iod;
         int rc;
 
         /* initialize */
@@ -68,7 +99,7 @@ int liod_start(struct ll_io_daemon *iod)
         init_waitqueue_head(&iod->io_sleepq);
         init_waitqueue_head(&iod->io_waitq);
 
-        rc = kernel_thread(liod_main, (void *) iod,
+        rc = kernel_thread(liod_main, (void *) sb,
                            CLONE_VM | CLONE_FS | CLONE_FILES);
 
         if (rc < 0) {
@@ -82,13 +113,210 @@ int liod_start(struct ll_io_daemon *iod)
         return 0;
 }
 
-void liod_wakeup(struct ll_io_daemon *iod)
+static inline void liod_wakeup(struct ll_io_daemon *iod)
 {
         wake_up(&iod->io_sleepq);
 }
 
-void liod_stop(struct ll_io_daemon *iod)
+static inline void select_one_page(struct brw_page *pg,
+                                   struct inode *inode,
+                                   struct page *page)
+{
+       obd_off off;
+
+       pg->pg = page;
+       pg->off = ((obd_off)page->index) << PAGE_SHIFT;
+       pg->flag = OBD_BRW_CREATE;
+
+       off = ((obd_off)(page->index + 1)) << PAGE_SHIFT;
+       if (off > inode->i_size)
+               pg->count = inode->i_size & ~PAGE_MASK;
+       else
+               pg->count = PAGE_SIZE;
+}
+
+/* syncronously flush certain amount of dirty pages right away
+ * don't simply call fdatasync(), we need a more efficient way
+ * to do flush in bunch mode.
+ * FIXME now we simply flush pages on at most one inode, probably
+ * need add multiple inode flush later.
+ */
+#define FLUSH_NR (32)
+static void flush_some_pages(struct super_block *sb)
+{
+       struct brw_page *pgs;
+       struct obd_brw_set *set;
+       struct list_head *tmp;
+       struct inode *inode;
+       struct address_space *mapping;
+       struct page *page;
+       int cnt, rc;
+
+       set = obd_brw_set_new();
+       if (!set) {
+               CERROR("can't alloc obd_brw_set!\n");
+               return;
+       }
+
+       OBD_ALLOC(pgs, FLUSH_NR * sizeof(struct brw_page));
+       if (!pgs)
+               goto out_free_set;
+
+       /* FIXME simutanously gain inode_lock and pagecache_lock could
+        * cause busy spin forever? Check this */
+       spin_lock(&inode_lock);
+
+       /* sync dirty inodes from tail, since we try to sync
+        * from the oldest one */
+       tmp = sb->s_dirty.prev;
+       for (cnt = 0; cnt < FLUSH_NR; tmp = tmp->prev) {
+               struct list_head *list, *next;
+
+               /* no dirty inodes left */
+               if (tmp == &sb->s_dirty)
+                       break;
+
+               inode = list_entry(tmp, struct inode, i_list);
+               mapping = inode->i_mapping;
+
+               /* if inode is locked, it should be have been moved away
+                * from dirty list */
+               if (inode->i_state & I_LOCK)
+                       LBUG();
+
+               /* select candidate dirty pages within the inode */
+               spin_lock(&pagecache_lock);
+               /* if no dirty pages, search next inode */
+               if (list_empty(&mapping->dirty_pages)) {
+                       spin_unlock(&pagecache_lock);
+                       continue;
+               }
+
+               list = mapping->dirty_pages.prev;
+next_page:
+               if (list == &mapping->dirty_pages) {
+                       /* no more dirty pages on this inode, and
+                        * if we already got some, just quit */
+                       if (cnt)
+                               break;
+                       else {
+                               /* this inode have dirty pages, but all of
+                                * them are locked by others or in fact clean
+                                * ones, so continue search next inode */
+                               spin_unlock(&pagecache_lock);
+                               continue;
+                       }
+               }
+
+               next = list->prev;
+               page = list_entry(list, struct page, list);
+
+               /* flush pages only if we could gain the lock */
+               if (!TryLockPage(page)) {
+                       /* remove from dirty list */
+                       list_del(&page->list);
+
+                       if (PageDirty(page)) {
+                               page_cache_get(page);
+                               /* add to locked list */
+                               list_add(&page->list, &mapping->locked_pages);
+                               //ClearPageDirty(page);
+
+                               select_one_page(&pgs[cnt++], inode, page);
+                               
+                               if (cnt >= FLUSH_NR) {
+                                       spin_unlock(&pagecache_lock);
+                                       continue;
+                               }
+                       } else {
+                               /* it's quite possible. add to clean list */
+                               list_add(&page->list, &mapping->clean_pages);
+                               UnlockPage(page);
+                       }
+               }
+
+               list = next;
+               goto next_page;
+       }
+
+       spin_unlock(&inode_lock);
+
+       if (!cnt)
+               goto out_free_pgs;
+
+       if (!inode)
+               LBUG();
+
+       CDEBUG(D_CACHE, "got %d pages of inode %lu to flush\n",
+                       inode->i_ino, cnt);
+
+       set->brw_callback = ll_brw_sync_wait;
+       rc = obd_brw(OBD_BRW_WRITE, ll_i2obdconn(inode),
+                     ll_i2info(inode)->lli_smd, cnt, pgs, set);
+       if (rc) {
+               CERROR("error from obd_brw: rc = %d\n", rc);
+       } else {
+               rc = ll_brw_sync_wait(set, CB_PHASE_START);
+               if (rc)
+                       CERROR("error from callback: rc = %d\n", rc);
+       }
+
+       /* finish the page status here */
+       spin_lock(&pagecache_lock);
+
+       while (--cnt >= 0) {
+               page = pgs[cnt].pg;
+
+               if (!PageLocked(page))
+                       LBUG();
+
+               if (!rc) {
+                       /* move pages to clean list */
+                       ClearPageDirty(page);
+                       list_del(&page->list);
+                       list_add(&page->list, &inode->i_mapping->clean_pages);
+               } else {
+                       /* add back to dirty list */
+                       SetPageDirty(page);
+                       list_del(&page->list);
+                       list_add(&page->list, &inode->i_mapping->dirty_pages);
+               }
+               UnlockPage(page);
+               page_cache_release(page);
+       }
+
+       if (list_empty(&inode->i_mapping->dirty_pages))
+               inode->i_state &= ~I_DIRTY_PAGES;
+
+       spin_unlock(&pagecache_lock);
+
+out_free_pgs:
+       OBD_FREE(pgs, FLUSH_NR * sizeof(struct brw_page));
+out_free_set:
+       obd_brw_set_free(set);
+       return;
+}
+
+void ll_balance_dirty_pages(struct super_block *sb)
+{
+       int flush;
+       struct ll_sb_info *sbi = ll_s2sbi(sb);
+
+       flush = balance_dirty_state();
+       if (flush < 0)
+               return;
+
+       if (flush > 0)
+               flush_some_pages(sb);
+
+       /* FIXME we need a way to wake up liods on *all* llite fs */
+       liod_wakeup(&sbi->ll_iod);
+}
+
+void liod_stop(struct super_block *sb)
 {
+       struct ll_io_daemon *iod = &ll_s2sbi(sb)->ll_iod;
+
        if (!test_bit(LIOD_FLAG_ALIVE, &iod->io_flag)) {
                CERROR("liod died unexpectedly!\n");
                return;