LU-744 clio: save memory allocations for cl_page

[fs/lustre-release.git] / lustre / llite / lloop.c
diff --git a/lustre/llite/lloop.c b/lustre/llite/lloop.c

index 8c867c5..5f00ab2 100644 (file)
--- a/lustre/llite/lloop.c
+++ b/lustre/llite/lloop.c
@@ -27,7 +27,7 @@
   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
   *
- * Copyright (c) 2011, Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
@@ -79,9 +79,6 @@
   *
   */
  
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
  #include <linux/module.h>
  
  #include <linux/sched.h>
@@ -135,12 +132,12 @@ struct lloop_device {
  
          int                  old_gfp_mask;
  
-        cfs_spinlock_t       lo_lock;
-        struct bio          *lo_bio;
-        struct bio          *lo_biotail;
-        int                  lo_state;
-        cfs_semaphore_t      lo_sem;
-        cfs_mutex_t          lo_ctl_mutex;
+       spinlock_t              lo_lock;
+       struct bio              *lo_bio;
+       struct bio              *lo_biotail;
+       int                     lo_state;
+       struct semaphore        lo_sem;
+       struct mutex            lo_ctl_mutex;
          cfs_atomic_t         lo_pending;
          cfs_waitq_t          lo_bh_wait;
  
@@ -169,7 +166,7 @@ static int lloop_major;
  static int max_loop = MAX_LOOP_DEFAULT;
  static struct lloop_device *loop_dev;
  static struct gendisk **disks;
-static cfs_mutex_t lloop_mutex;
+static struct mutex lloop_mutex;
  static void *ll_iocontrol_magic = NULL;
  
  static loff_t get_loop_size(struct lloop_device *lo, struct file *file)
@@ -244,32 +241,32 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
          pvec->ldp_size = page_count << PAGE_CACHE_SHIFT;
          pvec->ldp_nr = page_count;
  
-        /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
-         * write those pages into OST. Even worse case is that more pages
-         * would be asked to write out to swap space, and then finally get here
-         * again.
-         * Unfortunately this is NOT easy to fix.
-         * Thoughts on solution:
-         * 0. Define a reserved pool for cl_pages, which could be a list of
-         *    pre-allocated cl_pages from cl_page_kmem;
-         * 1. Define a new operation in cl_object_operations{}, says clo_depth,
-         *    which measures how many layers for this lustre object. Generally
-         *    speaking, the depth would be 2, one for llite, and one for lovsub.
-         *    However, for SNS, there will be more since we need additional page
-         *    to store parity;
-         * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
-         *    pool. Afterwards, the clio would allocate the pages from reserved 
-         *    pool, this guarantees we neeedn't allocate the cl_pages from
-         *    generic cl_page slab cache.
-         *    Of course, if there is NOT enough pages in the pool, we might
-         *    be asked to write less pages once, this purely depends on
-         *    implementation. Anyway, we should be careful to avoid deadlocking.
-         */
-        LOCK_INODE_MUTEX(inode);
-        bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
-        UNLOCK_INODE_MUTEX(inode);
-        cl_io_fini(env, io);
-        return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
+       /* FIXME: in ll_direct_rw_pages, it has to allocate many cl_page{}s to
+        * write those pages into OST. Even worse case is that more pages
+        * would be asked to write out to swap space, and then finally get here
+        * again.
+        * Unfortunately this is NOT easy to fix.
+        * Thoughts on solution:
+        * 0. Define a reserved pool for cl_pages, which could be a list of
+        *    pre-allocated cl_pages;
+        * 1. Define a new operation in cl_object_operations{}, says clo_depth,
+        *    which measures how many layers for this lustre object. Generally
+        *    speaking, the depth would be 2, one for llite, and one for lovsub.
+        *    However, for SNS, there will be more since we need additional page
+        *    to store parity;
+        * 2. Reserve the # of (page_count * depth) cl_pages from the reserved
+        *    pool. Afterwards, the clio would allocate the pages from reserved
+        *    pool, this guarantees we neeedn't allocate the cl_pages from
+        *    generic cl_page slab cache.
+        *    Of course, if there is NOT enough pages in the pool, we might
+        *    be asked to write less pages once, this purely depends on
+        *    implementation. Anyway, we should be careful to avoid deadlocking.
+        */
+       mutex_lock(&inode->i_mutex);
+       bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
+       mutex_unlock(&inode->i_mutex);
+       cl_io_fini(env, io);
+       return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
  }
  
  /*
@@ -277,19 +274,19 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
   */
  static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
  {
-        unsigned long flags;
-
-        cfs_spin_lock_irqsave(&lo->lo_lock, flags);
-        if (lo->lo_biotail) {
-                lo->lo_biotail->bi_next = bio;
-                lo->lo_biotail = bio;
-        } else
-                lo->lo_bio = lo->lo_biotail = bio;
-        cfs_spin_unlock_irqrestore(&lo->lo_lock, flags);
-
-        cfs_atomic_inc(&lo->lo_pending);
-        if (cfs_waitq_active(&lo->lo_bh_wait))
-                cfs_waitq_signal(&lo->lo_bh_wait);
+       unsigned long flags;
+
+       spin_lock_irqsave(&lo->lo_lock, flags);
+       if (lo->lo_biotail) {
+               lo->lo_biotail->bi_next = bio;
+               lo->lo_biotail = bio;
+       } else
+               lo->lo_bio = lo->lo_biotail = bio;
+       spin_unlock_irqrestore(&lo->lo_lock, flags);
+
+       cfs_atomic_inc(&lo->lo_pending);
+       if (cfs_waitq_active(&lo->lo_bh_wait))
+               cfs_waitq_signal(&lo->lo_bh_wait);
  }
  
  /*
@@ -297,18 +294,18 @@ static void loop_add_bio(struct lloop_device *lo, struct bio *bio)
   */
  static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
  {
-        struct bio *first;
-        struct bio **bio;
-        unsigned int count = 0;
-        unsigned int page_count = 0;
-        int rw;
-
-        cfs_spin_lock_irq(&lo->lo_lock);
-        first = lo->lo_bio;
-        if (unlikely(first == NULL)) {
-                cfs_spin_unlock_irq(&lo->lo_lock);
-                return 0;
-        }
+       struct bio *first;
+       struct bio **bio;
+       unsigned int count = 0;
+       unsigned int page_count = 0;
+       int rw;
+
+       spin_lock_irq(&lo->lo_lock);
+       first = lo->lo_bio;
+       if (unlikely(first == NULL)) {
+               spin_unlock_irq(&lo->lo_lock);
+               return 0;
+       }
  
          /* TODO: need to split the bio, too bad. */
          LASSERT(first->bi_vcnt <= LLOOP_MAX_SEGMENTS);
@@ -337,11 +334,12 @@ static unsigned int loop_get_bio(struct lloop_device *lo, struct bio **req)
                  lo->lo_bio = NULL;
          }
          *req = first;
-        cfs_spin_unlock_irq(&lo->lo_lock);
-        return count;
+       spin_unlock_irq(&lo->lo_lock);
+       return count;
  }
  
-static int loop_make_request(struct request_queue *q, struct bio *old_bio)
+static ll_mrf_ret
+loop_make_request(struct request_queue *q, struct bio *old_bio)
  {
          struct lloop_device *lo = q->queuedata;
          int rw = bio_rw(old_bio);
@@ -353,9 +351,9 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio)
          CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
                 (unsigned long long)old_bio->bi_sector, old_bio->bi_size);
  
-        cfs_spin_lock_irq(&lo->lo_lock);
-        inactive = (lo->lo_state != LLOOP_BOUND);
-        cfs_spin_unlock_irq(&lo->lo_lock);
+       spin_lock_irq(&lo->lo_lock);
+       inactive = (lo->lo_state != LLOOP_BOUND);
+       spin_unlock_irq(&lo->lo_lock);
          if (inactive)
                  goto err;
  
@@ -369,10 +367,10 @@ static int loop_make_request(struct request_queue *q, struct bio *old_bio)
                  goto err;
          }
          loop_add_bio(lo, old_bio);
-        return 0;
+       LL_MRF_RETURN(0);
  err:
-        cfs_bio_io_error(old_bio, old_bio->bi_size);
-        return 0;
+       cfs_bio_io_error(old_bio, old_bio->bi_size);
+       LL_MRF_RETURN(0);
  }
  
  #ifdef HAVE_REQUEST_QUEUE_UNPLUG_FN
@@ -440,15 +438,15 @@ static int loop_thread(void *data)
          /*
           * up sem, we are running
           */
-        cfs_up(&lo->lo_sem);
-
-        for (;;) {
-                cfs_wait_event(lo->lo_bh_wait, loop_active(lo));
-                if (!cfs_atomic_read(&lo->lo_pending)) {
-                        int exiting = 0;
-                        cfs_spin_lock_irq(&lo->lo_lock);
-                        exiting = (lo->lo_state == LLOOP_RUNDOWN);
-                        cfs_spin_unlock_irq(&lo->lo_lock);
+       up(&lo->lo_sem);
+
+       for (;;) {
+               cfs_wait_event(lo->lo_bh_wait, loop_active(lo));
+               if (!cfs_atomic_read(&lo->lo_pending)) {
+                       int exiting = 0;
+                       spin_lock_irq(&lo->lo_lock);
+                       exiting = (lo->lo_state == LLOOP_RUNDOWN);
+                       spin_unlock_irq(&lo->lo_lock);
                          if (exiting)
                                  break;
                  }
@@ -480,7 +478,7 @@ static int loop_thread(void *data)
          cl_env_put(env, &refcheck);
  
  out:
-        cfs_up(&lo->lo_sem);
+       up(&lo->lo_sem);
          return ret;
  }
  
@@ -557,7 +555,7 @@ static int loop_set_fd(struct lloop_device *lo, struct file *unused,
          set_blocksize(bdev, lo->lo_blocksize);
  
          cfs_create_thread(loop_thread, lo, CLONE_KERNEL);
-        cfs_down(&lo->lo_sem);
+       down(&lo->lo_sem);
          return 0;
  
   out:
@@ -581,12 +579,12 @@ static int loop_clr_fd(struct lloop_device *lo, struct block_device *bdev,
          if (filp == NULL)
                  return -EINVAL;
  
-        cfs_spin_lock_irq(&lo->lo_lock);
-        lo->lo_state = LLOOP_RUNDOWN;
-        cfs_spin_unlock_irq(&lo->lo_lock);
-        cfs_waitq_signal(&lo->lo_bh_wait);
+       spin_lock_irq(&lo->lo_lock);
+       lo->lo_state = LLOOP_RUNDOWN;
+       spin_unlock_irq(&lo->lo_lock);
+       cfs_waitq_signal(&lo->lo_bh_wait);
  
-        cfs_down(&lo->lo_sem);
+       down(&lo->lo_sem);
          lo->lo_backing_file = NULL;
          lo->ioctl = NULL;
          lo->lo_device = NULL;
@@ -614,9 +612,9 @@ static int lo_open(struct inode *inode, struct file *file)
          struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
  #endif
  
-        cfs_mutex_lock(&lo->lo_ctl_mutex);
+       mutex_lock(&lo->lo_ctl_mutex);
          lo->lo_refcnt++;
-        cfs_mutex_unlock(&lo->lo_ctl_mutex);
+       mutex_unlock(&lo->lo_ctl_mutex);
  
          return 0;
  }
@@ -631,9 +629,9 @@ static int lo_release(struct inode *inode, struct file *file)
          struct lloop_device *lo = inode->i_bdev->bd_disk->private_data;
  #endif
  
-        cfs_mutex_lock(&lo->lo_ctl_mutex);
+       mutex_lock(&lo->lo_ctl_mutex);
          --lo->lo_refcnt;
-        cfs_mutex_unlock(&lo->lo_ctl_mutex);
+       mutex_unlock(&lo->lo_ctl_mutex);
  
          return 0;
  }
@@ -655,7 +653,7 @@ static int lo_ioctl(struct inode *inode, struct file *unused,
          int err = 0;
  #endif
  
-        cfs_mutex_lock(&lloop_mutex);
+       mutex_lock(&lloop_mutex);
          switch (cmd) {
          case LL_IOC_LLOOP_DETACH: {
                  err = loop_clr_fd(lo, bdev, 2);
@@ -684,7 +682,7 @@ static int lo_ioctl(struct inode *inode, struct file *unused,
                  err = -EINVAL;
                  break;
          }
-        cfs_mutex_unlock(&lloop_mutex);
+       mutex_unlock(&lloop_mutex);
  
          return err;
  }
@@ -720,7 +718,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
  
          CWARN("Enter llop_ioctl\n");
  
-        cfs_mutex_lock(&lloop_mutex);
+       mutex_lock(&lloop_mutex);
          switch (cmd) {
          case LL_IOC_LLOOP_ATTACH: {
                  struct lloop_device *lo_free = NULL;
@@ -790,7 +788,7 @@ static enum llioc_iter lloop_ioctl(struct inode *unused, struct file *file,
          }
  
  out:
-        cfs_mutex_unlock(&lloop_mutex);
+       mutex_unlock(&lloop_mutex);
  out1:
          if (rcp)
                  *rcp = err;
@@ -836,7 +834,7 @@ static int __init lloop_init(void)
                          goto out_mem3;
          }
  
-        cfs_mutex_init(&lloop_mutex);
+       mutex_init(&lloop_mutex);
  
          for (i = 0; i < max_loop; i++) {
                  struct lloop_device *lo = &loop_dev[i];
@@ -846,11 +844,11 @@ static int __init lloop_init(void)
                  if (!lo->lo_queue)
                          goto out_mem4;
  
-                cfs_mutex_init(&lo->lo_ctl_mutex);
-                cfs_sema_init(&lo->lo_sem, 0);
-                cfs_waitq_init(&lo->lo_bh_wait);
-                lo->lo_number = i;
-                cfs_spin_lock_init(&lo->lo_lock);
+               mutex_init(&lo->lo_ctl_mutex);
+               sema_init(&lo->lo_sem, 0);
+               cfs_waitq_init(&lo->lo_bh_wait);
+               lo->lo_number = i;
+               spin_lock_init(&lo->lo_lock);
                  disk->major = lloop_major;
                  disk->first_minor = i;
                  disk->fops = &lo_fops;