Whamcloud - gitweb
LU-3321 obdclass: Add a preallocated percpu cl_env 74/8174/5
authorJinshan Xiong <jinshan.xiong@intel.com>
Tue, 5 Nov 2013 03:39:58 +0000 (19:39 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 22 Nov 2013 07:46:50 +0000 (07:46 +0000)
This change adds support for a single preallocated cl_env per CPU
which can be used in circumstances where reschedule is not possible.
Currently this interface is only used by the ll_releasepage function.

Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Signed-off-by: Prakash Surya <surya1@llnl.gov>
Change-Id: I14a06294f0c2caae8806d7da134a8076f75ddc81
Reviewed-on: http://review.whamcloud.com/8174
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Bobi Jam <bobijam@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
libcfs/include/libcfs/user-prim.h
lustre/include/cl_object.h
lustre/llite/rw26.c
lustre/obdclass/cl_lock.c
lustre/obdclass/cl_object.c
lustre/obdclass/cl_page.c

index f073ba5..33e0f62 100644 (file)
@@ -71,6 +71,19 @@ typedef struct proc_dir_entry           cfs_proc_dir_entry_t;
 #ifndef num_possible_cpus
 # define num_possible_cpus() 1
 #endif
 #ifndef num_possible_cpus
 # define num_possible_cpus() 1
 #endif
+#ifndef get_cpu
+# define get_cpu() 0
+#endif
+#ifndef put_cpu
+# define put_cpu() do {} while (0)
+#endif
+#ifndef NR_CPUS
+# define NR_CPUS 1
+#endif
+#ifndef for_each_possible_cpu
+# define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
+#endif
+
 /*
  * Wait Queue.
  */
 /*
  * Wait Queue.
  */
index f57e2ac..6c9bdf0 100644 (file)
@@ -2769,6 +2769,15 @@ static inline void *cl_object_page_slice(struct cl_object *clob,
        return (void *)((char *)page + clob->co_slice_off);
 }
 
        return (void *)((char *)page + clob->co_slice_off);
 }
 
+/**
+ * Return refcount of cl_object.
+ */
+static inline int cl_object_refc(struct cl_object *clob)
+{
+       struct lu_object_header *header = clob->co_lu.lo_header;
+       return cfs_atomic_read(&header->loh_ref);
+}
+
 /** @} cl_object */
 
 /** \defgroup cl_page cl_page
 /** @} cl_object */
 
 /** \defgroup cl_page cl_page
@@ -3250,6 +3259,8 @@ void           cl_env_reexit     (void *cookie);
 void           cl_env_implant    (struct lu_env *env, int *refcheck);
 void           cl_env_unplant    (struct lu_env *env, int *refcheck);
 unsigned       cl_env_cache_purge(unsigned nr);
 void           cl_env_implant    (struct lu_env *env, int *refcheck);
 void           cl_env_unplant    (struct lu_env *env, int *refcheck);
 unsigned       cl_env_cache_purge(unsigned nr);
+struct lu_env *cl_env_percpu_get (void);
+void           cl_env_percpu_put (struct lu_env *env);
 
 /** @} cl_env */
 
 
 /** @} cl_env */
 
index 0a4b9a9..18098dc 100644 (file)
@@ -118,49 +118,62 @@ static void ll_invalidatepage(struct page *vmpage, unsigned long offset)
 #endif
 static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
 {
 #endif
 static int ll_releasepage(struct page *vmpage, RELEASEPAGE_ARG_TYPE gfp_mask)
 {
-        struct cl_env_nest nest;
-        struct lu_env     *env;
-        struct cl_object  *obj;
-        struct cl_page    *page;
-        struct address_space *mapping;
-        int result;
+       struct lu_env           *env;
+       void                    *cookie;
+       struct cl_object        *obj;
+       struct cl_page          *page;
+       struct address_space    *mapping;
+       int result = 0;
+
+       LASSERT(PageLocked(vmpage));
+       if (PageWriteback(vmpage) || PageDirty(vmpage))
+               return 0;
+
+       mapping = vmpage->mapping;
+       if (mapping == NULL)
+               return 1;
+
+       obj = ll_i2info(mapping->host)->lli_clob;
+       if (obj == NULL)
+               return 1;
+
+       /* 1 for caller, 1 for cl_page and 1 for page cache */
+       if (page_count(vmpage) > 3)
+               return 0;
+
+       page = cl_vmpage_page(vmpage, obj);
+       if (page == NULL)
+               return 1;
+
+       cookie = cl_env_reenter();
+       env = cl_env_percpu_get();
+       LASSERT(!IS_ERR(env));
+
+       if (!cl_page_in_use(page)) {
+               result = 1;
+               cl_page_delete(env, page);
+       }
 
 
-        LASSERT(PageLocked(vmpage));
-        if (PageWriteback(vmpage) || PageDirty(vmpage))
-                return 0;
-
-        mapping = vmpage->mapping;
-        if (mapping == NULL)
-                return 1;
-
-        obj = ll_i2info(mapping->host)->lli_clob;
-        if (obj == NULL)
-                return 1;
-
-        /* 1 for page allocator, 1 for cl_page and 1 for page cache */
-        if (page_count(vmpage) > 3)
-                return 0;
-
-        /* TODO: determine what gfp should be used by @gfp_mask. */
-        env = cl_env_nested_get(&nest);
-        if (IS_ERR(env))
-                /* If we can't allocate an env we won't call cl_page_put()
-                 * later on which further means it's impossible to drop
-                 * page refcount by cl_page, so ask kernel to not free
-                 * this page. */
-                return 0;
-
-        page = cl_vmpage_page(vmpage, obj);
-        result = page == NULL;
-        if (page != NULL) {
-                if (!cl_page_in_use(page)) {
-                        result = 1;
-                        cl_page_delete(env, page);
-                }
-                cl_page_put(env, page);
-        }
-        cl_env_nested_put(&nest, env);
-        return result;
+       /* To use percpu env array, the call path can not be rescheduled;
+        * otherwise percpu array will be messed if ll_releaspage() called
+        * again on the same CPU.
+        *
+        * If this page holds the last refc of cl_object, the following
+        * call path may cause reschedule:
+        *   cl_page_put -> cl_page_free -> cl_object_put ->
+        *     lu_object_put -> lu_object_free -> lov_delete_raid0 ->
+        *     cl_locks_prune.
+        *
+        * However, the kernel can't get rid of this inode until all pages have
+        * been cleaned up. Now that we hold page lock here, it's pretty safe
+        * that we won't get into object delete path.
+        */
+       LASSERT(cl_object_refc(obj) > 1);
+       cl_page_put(env, page);
+
+       cl_env_percpu_put(env);
+       cl_env_reexit(cookie);
+       return result;
 }
 
 static int ll_set_page_dirty(struct page *vmpage)
 }
 
 static int ll_set_page_dirty(struct page *vmpage)
index a2a1624..2a71cc3 100644 (file)
@@ -268,7 +268,6 @@ static void cl_lock_free(const struct lu_env *env, struct cl_lock *lock)
 
        ENTRY;
        cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
 
        ENTRY;
        cl_lock_trace(D_DLMTRACE, env, "free lock", lock);
-       might_sleep();
        while (!cfs_list_empty(&lock->cll_layers)) {
                struct cl_lock_slice *slice;
 
        while (!cfs_list_empty(&lock->cll_layers)) {
                struct cl_lock_slice *slice;
 
index 6ad9b0a..78bc76c 100644 (file)
@@ -426,6 +426,8 @@ int cache_stats_print(const struct cache_stats *cs,
        return nob;
 }
 
        return nob;
 }
 
+static void cl_env_percpu_refill(void);
+
 /**
  * Initialize client site.
  *
 /**
  * Initialize client site.
  *
@@ -445,8 +447,9 @@ int cl_site_init(struct cl_site *s, struct cl_device *d)
                         cfs_atomic_set(&s->cs_pages_state[0], 0);
                 for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
                         cfs_atomic_set(&s->cs_locks_state[i], 0);
                         cfs_atomic_set(&s->cs_pages_state[0], 0);
                 for (i = 0; i < ARRAY_SIZE(s->cs_locks_state); ++i)
                         cfs_atomic_set(&s->cs_locks_state[i], 0);
-        }
-        return result;
+               cl_env_percpu_refill();
+       }
+       return result;
 }
 EXPORT_SYMBOL(cl_site_init);
 
 }
 EXPORT_SYMBOL(cl_site_init);
 
@@ -1112,6 +1115,103 @@ void cl_lvb2attr(struct cl_attr *attr, const struct ost_lvb *lvb)
 }
 EXPORT_SYMBOL(cl_lvb2attr);
 
 }
 EXPORT_SYMBOL(cl_lvb2attr);
 
+static struct cl_env cl_env_percpu[NR_CPUS];
+
+static int cl_env_percpu_init(void)
+{
+       struct cl_env *cle;
+       int tags = LCT_REMEMBER | LCT_NOREF;
+       int i, j;
+       int rc = 0;
+
+       for_each_possible_cpu(i) {
+               struct lu_env *env;
+
+               cle = &cl_env_percpu[i];
+               env = &cle->ce_lu;
+
+               CFS_INIT_LIST_HEAD(&cle->ce_linkage);
+               cle->ce_magic = &cl_env_init0;
+               rc = lu_env_init(env, LCT_CL_THREAD | tags);
+               if (rc == 0) {
+                       rc = lu_context_init(&cle->ce_ses, LCT_SESSION | tags);
+                        if (rc == 0) {
+                                lu_context_enter(&cle->ce_ses);
+                                env->le_ses = &cle->ce_ses;
+                       } else {
+                               lu_env_fini(env);
+                       }
+               }
+               if (rc != 0)
+                       break;
+       }
+       if (rc != 0) {
+               /* Indices 0 to i (excluding i) were correctly initialized,
+                * thus we must uninitialize up to i, the rest are undefined. */
+               for (j = 0; j < i; j++) {
+                       cle = &cl_env_percpu[i];
+                       lu_context_exit(&cle->ce_ses);
+                       lu_context_fini(&cle->ce_ses);
+                       lu_env_fini(&cle->ce_lu);
+               }
+       }
+
+       return rc;
+}
+
+static void cl_env_percpu_fini(void)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               struct cl_env *cle = &cl_env_percpu[i];
+
+               lu_context_exit(&cle->ce_ses);
+               lu_context_fini(&cle->ce_ses);
+               lu_env_fini(&cle->ce_lu);
+       }
+}
+
+static void cl_env_percpu_refill(void)
+{
+       int i;
+
+       for_each_possible_cpu(i)
+               lu_env_refill(&cl_env_percpu[i].ce_lu);
+}
+
+void cl_env_percpu_put(struct lu_env *env)
+{
+       struct cl_env *cle;
+       int cpu;
+
+       cpu = smp_processor_id();
+       cle = cl_env_container(env);
+       LASSERT(cle == &cl_env_percpu[cpu]);
+
+       cle->ce_ref--;
+       LASSERT(cle->ce_ref == 0);
+
+       CL_ENV_DEC(busy);
+       cl_env_detach(cle);
+       cle->ce_debug = NULL;
+
+       put_cpu();
+}
+EXPORT_SYMBOL(cl_env_percpu_put);
+
+struct lu_env *cl_env_percpu_get()
+{
+       struct cl_env *cle;
+
+       cle = &cl_env_percpu[get_cpu()];
+       cl_env_init0(cle, __builtin_return_address(0));
+
+       cl_env_attach(cle);
+       return &cle->ce_lu;
+}
+EXPORT_SYMBOL(cl_env_percpu_get);
+
 /*****************************************************************************
  *
  * Temporary prototype thing: mirror obd-devices into cl devices.
 /*****************************************************************************
  *
  * Temporary prototype thing: mirror obd-devices into cl devices.
@@ -1267,6 +1367,11 @@ int cl_global_init(void)
         if (result)
                 goto out_lock;
 
         if (result)
                 goto out_lock;
 
+       result = cl_env_percpu_init();
+       if (result)
+               /* no cl_env_percpu_fini on error */
+               goto out_lock;
+
         return 0;
 out_lock:
         cl_lock_fini();
         return 0;
 out_lock:
         cl_lock_fini();
@@ -1284,6 +1389,7 @@ out_store:
  */
 void cl_global_fini(void)
 {
  */
 void cl_global_fini(void)
 {
+       cl_env_percpu_fini();
         cl_lock_fini();
         cl_page_fini();
         lu_context_key_degister(&cl_key);
         cl_lock_fini();
         cl_page_fini();
         lu_context_key_degister(&cl_key);
index a8f46ea..0f02723 100644 (file)
@@ -158,7 +158,6 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
         PASSERT(env, page, page->cp_state == CPS_FREEING);
 
        ENTRY;
         PASSERT(env, page, page->cp_state == CPS_FREEING);
 
        ENTRY;
-       might_sleep();
        while (!cfs_list_empty(&page->cp_layers)) {
                struct cl_page_slice *slice;
 
        while (!cfs_list_empty(&page->cp_layers)) {
                struct cl_page_slice *slice;