Whamcloud - gitweb
LU-1346 libcfs: cleanup libcfs primitive (linux-prim.h)
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
index 96d3551..5db41e8 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg,
                             int radix);
 
-static cfs_mem_cache_t      *cl_page_kmem = NULL;
-
-static struct lu_kmem_descr cl_page_caches[] = {
-        {
-                .ckd_cache = &cl_page_kmem,
-                .ckd_name  = "cl_page_kmem",
-                .ckd_size  = sizeof (struct cl_page)
-        },
-        {
-                .ckd_cache = NULL
-        }
-};
-
 #ifdef LIBCFS_DEBUG
 # define PASSERT(env, page, expr)                                       \
   do {                                                                    \
@@ -77,7 +64,7 @@ static struct lu_kmem_descr cl_page_caches[] = {
         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
 #endif /* !LIBCFS_DEBUG */
 
-#ifdef INVARIANT_CHECK
+#ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
 # define PINVRNT(env, page, expr)                                       \
   do {                                                                    \
           if (unlikely(!(expr))) {                                      \
@@ -85,10 +72,10 @@ static struct lu_kmem_descr cl_page_caches[] = {
                   LINVRNT(0);                                           \
           }                                                             \
   } while (0)
-#else /* !INVARIANT_CHECK */
+#else /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
 # define PINVRNT(env, page, exp) \
-        ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
-#endif /* !INVARIANT_CHECK */
+        ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
+#endif /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
 
 /* Disable page statistic by default due to huge performance penalty. */
 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
@@ -168,14 +155,14 @@ cl_page_at_trusted(const struct cl_page *page,
  */
 struct cl_page *cl_page_lookup(struct cl_object_header *hdr, pgoff_t index)
 {
-        struct cl_page *page;
+       struct cl_page *page;
 
-        LASSERT_SPIN_LOCKED(&hdr->coh_page_guard);
+       LASSERT(spin_is_locked(&hdr->coh_page_guard));
 
-        page = radix_tree_lookup(&hdr->coh_tree, index);
-        if (page != NULL)
-                cl_page_get_trust(page);
-        return page;
+       page = radix_tree_lookup(&hdr->coh_tree, index);
+       if (page != NULL)
+               cl_page_get_trust(page);
+       return page;
 }
 EXPORT_SYMBOL(cl_page_lookup);
 
@@ -192,8 +179,8 @@ EXPORT_SYMBOL(cl_page_lookup);
  * Return at least one page in @queue unless there is no covered page.
  */
 int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
-                        struct cl_io *io, pgoff_t start, pgoff_t end,
-                        cl_page_gang_cb_t cb, void *cbdata)
+                       struct cl_io *io, pgoff_t start, pgoff_t end,
+                       cl_page_gang_cb_t cb, void *cbdata)
 {
         struct cl_object_header *hdr;
         struct cl_page          *page;
@@ -236,46 +223,46 @@ int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
                          */
                         PASSERT(env, page, slice != NULL);
 
-                        page = slice->cpl_page;
-                        /*
-                         * Can safely call cl_page_get_trust() under
-                         * radix-tree spin-lock.
-                         *
-                         * XXX not true, because @page is from object another
-                         * than @hdr and protected by different tree lock.
-                         */
-                        cl_page_get_trust(page);
-                        lu_ref_add_atomic(&page->cp_reference,
-                                          "gang_lookup", cfs_current());
-                        pvec[j++] = page;
-                }
+                       page = slice->cpl_page;
+                       /*
+                        * Can safely call cl_page_get_trust() under
+                        * radix-tree spin-lock.
+                        *
+                        * XXX not true, because @page is from object another
+                        * than @hdr and protected by different tree lock.
+                        */
+                       cl_page_get_trust(page);
+                       lu_ref_add_atomic(&page->cp_reference,
+                                         "gang_lookup", current);
+                       pvec[j++] = page;
+               }
 
-                /*
-                 * Here a delicate locking dance is performed. Current thread
-                 * holds a reference to a page, but has to own it before it
-                 * can be placed into queue. Owning implies waiting, so
-                 * radix-tree lock is to be released. After a wait one has to
-                 * check that pages weren't truncated (cl_page_own() returns
-                 * error in the latter case).
-                 */
+               /*
+                * Here a delicate locking dance is performed. Current thread
+                * holds a reference to a page, but has to own it before it
+                * can be placed into queue. Owning implies waiting, so
+                * radix-tree lock is to be released. After a wait one has to
+                * check that pages weren't truncated (cl_page_own() returns
+                * error in the latter case).
+                */
                spin_unlock(&hdr->coh_page_guard);
-                tree_lock = 0;
-
-                for (i = 0; i < j; ++i) {
-                        page = pvec[i];
-                        if (res == CLP_GANG_OKAY)
-                                res = (*cb)(env, io, page, cbdata);
-                        lu_ref_del(&page->cp_reference,
-                                   "gang_lookup", cfs_current());
-                        cl_page_put(env, page);
-                }
-                if (nr < CLT_PVEC_SIZE || end_of_region)
-                        break;
+               tree_lock = 0;
+
+               for (i = 0; i < j; ++i) {
+                       page = pvec[i];
+                       if (res == CLP_GANG_OKAY)
+                               res = (*cb)(env, io, page, cbdata);
+                       lu_ref_del(&page->cp_reference,
+                                  "gang_lookup", current);
+                       cl_page_put(env, page);
+               }
+               if (nr < CLT_PVEC_SIZE || end_of_region)
+                       break;
 
-                if (res == CLP_GANG_OKAY && cfs_need_resched())
-                        res = CLP_GANG_RESCHED;
-                if (res != CLP_GANG_OKAY)
-                        break;
+               if (res == CLP_GANG_OKAY && need_resched())
+                       res = CLP_GANG_RESCHED;
+               if (res != CLP_GANG_OKAY)
+                       break;
 
                spin_lock(&hdr->coh_page_guard);
                tree_lock = 1;
@@ -289,6 +276,7 @@ EXPORT_SYMBOL(cl_page_gang_lookup);
 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
 {
         struct cl_object *obj  = page->cp_obj;
+       int pagesize = cl_object_header(obj)->coh_page_bufsize;
 
         PASSERT(env, page, cfs_list_empty(&page->cp_batch));
         PASSERT(env, page, page->cp_owner == NULL);
@@ -308,10 +296,10 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page)
         }
        CS_PAGE_DEC(obj, total);
        CS_PAGESTATE_DEC(obj, page->cp_state);
-        lu_object_ref_del_at(&obj->co_lu, page->cp_obj_ref, "cl_page", page);
+       lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
         cl_object_put(env, obj);
         lu_ref_fini(&page->cp_reference);
-        OBD_SLAB_FREE_PTR(page, cl_page_kmem);
+        OBD_FREE(page, pagesize);
         EXIT;
 }
 
@@ -326,58 +314,56 @@ static inline void cl_page_state_set_trust(struct cl_page *page,
         *(enum cl_page_state *)&page->cp_state = state;
 }
 
-static int cl_page_alloc(const struct lu_env *env, struct cl_object *o,
-                         pgoff_t ind, struct page *vmpage,
-                         enum cl_page_type type, struct cl_page **out)
+static struct cl_page *cl_page_alloc(const struct lu_env *env,
+               struct cl_object *o, pgoff_t ind, struct page *vmpage,
+               enum cl_page_type type)
 {
-        struct cl_page          *page;
-        struct cl_page          *err  = NULL;
-        struct lu_object_header *head;
-        int                      result;
+       struct cl_page          *page;
+       struct lu_object_header *head;
 
-        ENTRY;
-        result = +1;
-        OBD_SLAB_ALLOC_PTR_GFP(page, cl_page_kmem, CFS_ALLOC_IO);
-        if (page != NULL) {
-                cfs_atomic_set(&page->cp_ref, 1);
+       ENTRY;
+       OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize,
+                       __GFP_IO);
+       if (page != NULL) {
+               int result = 0;
+               cfs_atomic_set(&page->cp_ref, 1);
                if (type == CPT_CACHEABLE) /* for radix tree */
                        cfs_atomic_inc(&page->cp_ref);
-                page->cp_obj = o;
-                cl_object_get(o);
-                page->cp_obj_ref = lu_object_ref_add(&o->co_lu,
-                                                     "cl_page", page);
-                page->cp_index = ind;
-                cl_page_state_set_trust(page, CPS_CACHED);
+               page->cp_obj = o;
+               cl_object_get(o);
+               lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
+                                    page);
+               page->cp_index = ind;
+               cl_page_state_set_trust(page, CPS_CACHED);
                page->cp_type = type;
                CFS_INIT_LIST_HEAD(&page->cp_layers);
                CFS_INIT_LIST_HEAD(&page->cp_batch);
                CFS_INIT_LIST_HEAD(&page->cp_flight);
                mutex_init(&page->cp_mutex);
-                lu_ref_init(&page->cp_reference);
-                head = o->co_lu.lo_header;
-                cfs_list_for_each_entry(o, &head->loh_layers,
-                                        co_lu.lo_linkage) {
-                        if (o->co_ops->coo_page_init != NULL) {
-                                err = o->co_ops->coo_page_init(env, o,
-                                                               page, vmpage);
-                                if (err != NULL) {
-                                        cl_page_delete0(env, page, 0);
-                                        cl_page_free(env, page);
-                                        page = err;
-                                        break;
-                                }
-                        }
-                }
-                if (err == NULL) {
+               lu_ref_init(&page->cp_reference);
+               head = o->co_lu.lo_header;
+               cfs_list_for_each_entry(o, &head->loh_layers,
+                                       co_lu.lo_linkage) {
+                       if (o->co_ops->coo_page_init != NULL) {
+                               result = o->co_ops->coo_page_init(env, o,
+                                                                 page, vmpage);
+                               if (result != 0) {
+                                       cl_page_delete0(env, page, 0);
+                                       cl_page_free(env, page);
+                                       page = ERR_PTR(result);
+                                       break;
+                               }
+                       }
+               }
+               if (result == 0) {
                        CS_PAGE_INC(o, total);
                        CS_PAGE_INC(o, create);
                        CS_PAGESTATE_DEC(o, CPS_CACHED);
-                        result = 0;
-                }
-        } else
-                page = ERR_PTR(-ENOMEM);
-        *out = page;
-        RETURN(result);
+               }
+       } else {
+               page = ERR_PTR(-ENOMEM);
+       }
+       RETURN(page);
 }
 
 /**
@@ -440,8 +426,8 @@ static struct cl_page *cl_page_find0(const struct lu_env *env,
         }
 
         /* allocate and initialize cl_page */
-        err = cl_page_alloc(env, o, idx, vmpage, type, &page);
-        if (err != 0)
+        page = cl_page_alloc(env, o, idx, vmpage, type);
+        if (IS_ERR(page))
                 RETURN(page);
 
         if (type == CPT_TRANSIENT) {
@@ -528,7 +514,7 @@ static inline int cl_page_invariant(const struct cl_page *pg)
         child  = pg->cp_child;
         owner  = pg->cp_owner;
 
-        return cl_page_in_use(pg) &&
+        return cl_page_in_use_noref(pg) &&
                 ergo(parent != NULL, parent->cp_child == pg) &&
                 ergo(child != NULL, child->cp_parent == pg) &&
                 ergo(child != NULL, pg->cp_obj != child->cp_obj) &&
@@ -669,7 +655,7 @@ EXPORT_SYMBOL(cl_page_put);
 /**
  * Returns a VM page associated with a given cl_page.
  */
-cfs_page_t *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
+struct page *cl_page_vmpage(const struct lu_env *env, struct cl_page *page)
 {
         const struct cl_page_slice *slice;
 
@@ -692,7 +678,7 @@ EXPORT_SYMBOL(cl_page_vmpage);
 /**
  * Returns a cl_page associated with a VM page, and given cl_object.
  */
-struct cl_page *cl_vmpage_page(cfs_page_t *vmpage, struct cl_object *obj)
+struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
 {
        struct cl_page *top;
        struct cl_page *page;
@@ -877,7 +863,7 @@ void cl_page_disown0(const struct lu_env *env,
         ENTRY;
         state = pg->cp_state;
         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
-        PINVRNT(env, pg, cl_page_invariant(pg));
+        PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
         cl_page_owner_clear(pg);
 
         if (state == CPS_OWNED)
@@ -1059,7 +1045,8 @@ EXPORT_SYMBOL(cl_page_unassume);
 void cl_page_disown(const struct lu_env *env,
                     struct cl_io *io, struct cl_page *pg)
 {
-        PINVRNT(env, pg, cl_page_is_owned(pg, io));
+        PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
+                        pg->cp_state == CPS_FREEING);
 
         ENTRY;
         pg = cl_page_top(pg);
@@ -1282,6 +1269,8 @@ int cl_page_prep(const struct lu_env *env, struct cl_io *io,
          * PG_writeback without risking other layers deciding to skip this
          * page.
          */
+       if (crt >= CRT_NR)
+               return -EINVAL;
         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
         if (result == 0)
                 cl_page_io_start(env, pg, crt);
@@ -1327,6 +1316,8 @@ void cl_page_completion(const struct lu_env *env,
         }
 
         cl_page_state_set(env, pg, CPS_CACHED);
+       if (crt >= CRT_NR)
+               return;
         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
                                (const struct lu_env *,
                                 const struct cl_page_slice *, int), ioret);
@@ -1334,8 +1325,17 @@ void cl_page_completion(const struct lu_env *env,
                 LASSERT(cl_page_is_vmlocked(env, pg));
                 LASSERT(pg->cp_sync_io == anchor);
                 pg->cp_sync_io = NULL;
+       }
+       /*
+        * As page->cp_obj is pinned by a reference from page->cp_req, it is
+        * safe to call cl_page_put() without risking object destruction in a
+        * non-blocking context.
+        */
+       cl_page_put(env, pg);
+
+       if (anchor)
                 cl_sync_io_note(anchor, ioret);
-        }
+
         EXIT;
 }
 EXPORT_SYMBOL(cl_page_completion);
@@ -1357,6 +1357,8 @@ int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
         PINVRNT(env, pg, crt < CRT_NR);
 
         ENTRY;
+       if (crt >= CRT_NR)
+               RETURN(-EINVAL);
         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
                                 (const struct lu_env *,
                                  const struct cl_page_slice *));
@@ -1393,6 +1395,9 @@ int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
 
        ENTRY;
 
+       if (crt >= CRT_NR)
+               RETURN(-EINVAL);
+
        cfs_list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
                if (scan->cpl_ops->io[crt].cpo_cache_add == NULL)
                        continue;
@@ -1470,36 +1475,36 @@ static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
  */
 int cl_pages_prune(const struct lu_env *env, struct cl_object *clobj)
 {
-        struct cl_thread_info   *info;
-        struct cl_object        *obj = cl_object_top(clobj);
-        struct cl_io            *io;
-        int                      result;
-
-        ENTRY;
-        info  = cl_env_info(env);
-        io    = &info->clt_io;
+       struct cl_thread_info   *info;
+       struct cl_object        *obj = cl_object_top(clobj);
+       struct cl_io            *io;
+       int                      result;
 
-        /*
-         * initialize the io. This is ugly since we never do IO in this
-         * function, we just make cl_page_list functions happy. -jay
-         */
-        io->ci_obj = obj;
+       ENTRY;
+       info  = cl_env_info(env);
+       io    = &info->clt_io;
+
+       /*
+        * initialize the io. This is ugly since we never do IO in this
+        * function, we just make cl_page_list functions happy. -jay
+        */
+       io->ci_obj = obj;
        io->ci_ignore_layout = 1;
-        result = cl_io_init(env, io, CIT_MISC, obj);
-        if (result != 0) {
-                cl_io_fini(env, io);
-                RETURN(io->ci_result);
-        }
+       result = cl_io_init(env, io, CIT_MISC, obj);
+       if (result != 0) {
+               cl_io_fini(env, io);
+               RETURN(io->ci_result);
+       }
 
-        do {
-                result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
-                                             page_prune_cb, NULL);
-                if (result == CLP_GANG_RESCHED)
-                        cfs_cond_resched();
-        } while (result != CLP_GANG_OKAY);
+       do {
+               result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
+                                            page_prune_cb, NULL);
+               if (result == CLP_GANG_RESCHED)
+                       cond_resched();
+       } while (result != CLP_GANG_OKAY);
 
-        cl_io_fini(env, io);
-        RETURN(result);
+       cl_io_fini(env, io);
+       RETURN(result);
 }
 EXPORT_SYMBOL(cl_pages_prune);
 
@@ -1571,10 +1576,7 @@ EXPORT_SYMBOL(cl_page_cancel);
  */
 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
 {
-        /*
-         * XXX for now.
-         */
-        return (loff_t)idx << CFS_PAGE_SHIFT;
+       return (loff_t)idx << PAGE_CACHE_SHIFT;
 }
 EXPORT_SYMBOL(cl_offset);
 
@@ -1583,16 +1585,13 @@ EXPORT_SYMBOL(cl_offset);
  */
 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
 {
-        /*
-         * XXX for now.
-         */
-        return offset >> CFS_PAGE_SHIFT;
+       return offset >> PAGE_CACHE_SHIFT;
 }
 EXPORT_SYMBOL(cl_index);
 
 int cl_page_size(const struct cl_object *obj)
 {
-        return 1 << CFS_PAGE_SHIFT;
+       return 1 << PAGE_CACHE_SHIFT;
 }
 EXPORT_SYMBOL(cl_page_size);
 
@@ -1620,10 +1619,9 @@ EXPORT_SYMBOL(cl_page_slice_add);
 
 int  cl_page_init(void)
 {
-        return lu_kmem_init(cl_page_caches);
+        return 0;
 }
 
 void cl_page_fini(void)
 {
-        lu_kmem_fini(cl_page_caches);
 }