4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2011, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
33 * Author: Nikita Danilov <nikita.danilov@sun.com>
34 * Author: Jinshan Xiong <jinshan.xiong@intel.com>
37 #define DEBUG_SUBSYSTEM S_CLASS
39 #include <linux/list.h>
40 #include <libcfs/libcfs.h>
41 #include <obd_class.h>
42 #include <obd_support.h>
44 #include <cl_object.h>
45 #include "cl_internal.h"
47 static void __cl_page_delete(const struct lu_env *env, struct cl_page *pg);
48 static DEFINE_MUTEX(cl_page_kmem_mutex);
51 # define PASSERT(env, page, expr) \
53 if (unlikely(!(expr))) { \
54 CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
58 #else /* !LIBCFS_DEBUG */
59 #define PASSERT(env, page, exp) \
60 ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
61 #endif /* !LIBCFS_DEBUG */
63 #ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
64 # define PINVRNT(env, page, expr) \
66 if (unlikely(!(expr))) { \
67 CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
71 #else /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
72 # define PINVRNT(env, page, exp) \
73 ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
74 #endif /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
76 /* Disable page statistic by default due to huge performance penalty. */
77 static void cs_page_inc(const struct cl_object *obj,
78 enum cache_stats_item item)
80 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
81 atomic_inc(&cl_object_site(obj)->cs_pages.cs_stats[item]);
85 static void cs_page_dec(const struct cl_object *obj,
86 enum cache_stats_item item)
88 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
89 atomic_dec(&cl_object_site(obj)->cs_pages.cs_stats[item]);
93 static void cs_pagestate_inc(const struct cl_object *obj,
94 enum cl_page_state state)
96 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
97 atomic_inc(&cl_object_site(obj)->cs_pages_state[state]);
101 static void cs_pagestate_dec(const struct cl_object *obj,
102 enum cl_page_state state)
104 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
105 atomic_dec(&cl_object_site(obj)->cs_pages_state[state]);
110 * Internal version of cl_page_get().
112 * This function can be used to obtain initial reference to previously
113 * unreferenced cached object. It can be called only if concurrent page
114 * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
115 * associated with \a page.
117 * Use with care! Not exported.
119 static void cl_page_get_trust(struct cl_page *page)
121 LASSERT(refcount_read(&page->cp_ref) > 0);
122 refcount_inc(&page->cp_ref);
125 static struct cl_page_slice *
126 cl_page_slice_get(const struct cl_page *cl_page, int index)
128 if (index < 0 || index >= cl_page->cp_layer_count)
131 /* To get the cp_layer_offset values fit under 256 bytes, we
132 * use the offset beyond the end of struct cl_page.
134 return (struct cl_page_slice *)((char *)cl_page + sizeof(*cl_page) +
135 cl_page->cp_layer_offset[index]);
138 #define cl_page_slice_for_each(cl_page, slice, i) \
139 for (i = 0, slice = cl_page_slice_get(cl_page, 0); \
140 i < (cl_page)->cp_layer_count; \
141 slice = cl_page_slice_get(cl_page, ++i))
143 #define cl_page_slice_for_each_reverse(cl_page, slice, i) \
144 for (i = (cl_page)->cp_layer_count - 1, \
145 slice = cl_page_slice_get(cl_page, i); i >= 0; \
146 slice = cl_page_slice_get(cl_page, --i))
148 static void __cl_page_free(struct cl_page *cl_page, unsigned short bufsize)
150 int index = cl_page->cp_kmem_index;
153 LASSERT(index < ARRAY_SIZE(cl_page_kmem_array));
154 LASSERT(cl_page_kmem_size_array[index] == bufsize);
155 OBD_SLAB_FREE(cl_page, cl_page_kmem_array[index], bufsize);
157 OBD_FREE(cl_page, bufsize);
161 static void cl_page_free(const struct lu_env *env, struct cl_page *cp,
162 struct folio_batch *fbatch)
164 struct cl_object *obj = cp->cp_obj;
165 unsigned short bufsize = cl_object_header(obj)->coh_page_bufsize;
169 PASSERT(env, cp, list_empty(&cp->cp_batch));
170 PASSERT(env, cp, cp->cp_owner == NULL);
171 if (cp->cp_type != CPT_TRANSIENT)
172 PASSERT(env, cp, cp->cp_state == CPS_FREEING);
174 if (cp->cp_type == CPT_CACHEABLE) {
175 /* vmpage->private was already cleared when page was
176 * moved into CPS_FREEING state.
178 vmpage = cp->cp_vmpage;
179 LASSERT(vmpage != NULL);
180 LASSERT((struct cl_page *)vmpage->private != cp);
182 if (fbatch != NULL) {
183 if (!folio_batch_add_page(fbatch, vmpage))
184 folio_batch_release(fbatch);
190 cp->cp_layer_count = 0;
191 cs_page_dec(obj, CS_total);
192 if (cp->cp_type != CPT_TRANSIENT)
193 cs_pagestate_dec(obj, cp->cp_state);
194 lu_object_ref_del_at(&obj->co_lu, &cp->cp_obj_ref, "cl_page", cp);
195 if (cp->cp_type != CPT_TRANSIENT)
196 cl_object_put(env, obj);
197 lu_ref_fini(&cp->cp_reference);
198 __cl_page_free(cp, bufsize);
202 static struct cl_page *__cl_page_alloc(struct cl_object *o)
205 struct cl_page *cl_page = NULL;
206 unsigned short bufsize = cl_object_header(o)->coh_page_bufsize;
208 if (CFS_FAIL_CHECK(OBD_FAIL_LLITE_PAGE_ALLOC))
212 /* the number of entries in cl_page_kmem_array is expected to
213 * only be 2-3 entries, so the lookup overhead should be low.
215 for ( ; i < ARRAY_SIZE(cl_page_kmem_array); i++) {
216 if (smp_load_acquire(&cl_page_kmem_size_array[i]) == bufsize) {
217 OBD_SLAB_ALLOC_GFP(cl_page, cl_page_kmem_array[i],
220 cl_page->cp_kmem_index = i;
223 if (cl_page_kmem_size_array[i] == 0)
227 if (i < ARRAY_SIZE(cl_page_kmem_array)) {
230 mutex_lock(&cl_page_kmem_mutex);
231 if (cl_page_kmem_size_array[i]) {
232 mutex_unlock(&cl_page_kmem_mutex);
235 snprintf(cache_name, sizeof(cache_name),
236 "cl_page_kmem-%u", bufsize);
237 cl_page_kmem_array[i] =
238 kmem_cache_create(cache_name, bufsize,
240 if (cl_page_kmem_array[i] == NULL) {
241 mutex_unlock(&cl_page_kmem_mutex);
244 smp_store_release(&cl_page_kmem_size_array[i], bufsize);
245 mutex_unlock(&cl_page_kmem_mutex);
248 OBD_ALLOC_GFP(cl_page, bufsize, GFP_NOFS);
250 cl_page->cp_kmem_index = -1;
256 struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_object *o,
257 pgoff_t ind, struct page *vmpage,
258 enum cl_page_type type)
260 struct cl_page *cl_page;
261 struct cl_object *head;
265 cl_page = __cl_page_alloc(o);
266 if (cl_page != NULL) {
269 /* Please fix cl_page:cp_state/type declaration if
270 * these assertions fail in the future.
272 BUILD_BUG_ON((1 << CP_STATE_BITS) < CPS_NR); /* cp_state */
273 BUILD_BUG_ON((1 << CP_TYPE_BITS) < CPT_NR); /* cp_type */
274 refcount_set(&cl_page->cp_ref, 1);
276 if (type != CPT_TRANSIENT)
278 lu_object_ref_add_at(&o->co_lu, &cl_page->cp_obj_ref,
280 cl_page->cp_vmpage = vmpage;
281 if (cl_page->cp_type != CPT_TRANSIENT)
282 cl_page->cp_state = CPS_CACHED;
283 cl_page->cp_type = type;
284 if (type == CPT_TRANSIENT)
285 /* correct inode to be added in ll_direct_rw_pages */
286 cl_page->cp_inode = NULL;
288 cl_page->cp_inode = page2inode(vmpage);
289 INIT_LIST_HEAD(&cl_page->cp_batch);
290 lu_ref_init(&cl_page->cp_reference);
292 cl_page->cp_page_index = ind;
293 cl_object_for_each(o, head) {
294 if (o->co_ops->coo_page_init != NULL) {
295 result = o->co_ops->coo_page_init(env, o,
298 __cl_page_delete(env, cl_page);
299 cl_page_free(env, cl_page, NULL);
300 cl_page = ERR_PTR(result);
306 cs_page_inc(o, CS_total);
307 cs_page_inc(o, CS_create);
308 cs_pagestate_dec(o, CPS_CACHED);
311 cl_page = ERR_PTR(-ENOMEM);
317 * Returns a cl_page with index \a idx at the object \a o, and associated with
318 * the VM page \a vmpage.
320 * This is the main entry point into the cl_page caching interface. First, a
321 * cache (implemented as a per-object radix tree) is consulted. If page is
322 * found there, it is returned immediately. Otherwise new page is allocated
323 * and returned. In any case, additional reference to page is acquired.
325 * \see cl_object_find(), cl_lock_find()
327 struct cl_page *cl_page_find(const struct lu_env *env,
329 pgoff_t idx, struct page *vmpage,
330 enum cl_page_type type)
332 struct cl_page *page = NULL;
333 struct cl_object_header *hdr;
335 LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
340 hdr = cl_object_header(o);
341 cs_page_inc(o, CS_lookup);
343 CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
344 idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
346 if (type == CPT_CACHEABLE) {
347 /* vmpage lock used to protect the child/parent relationship */
348 LASSERT(PageLocked(vmpage));
350 * cl_vmpage_page() can be called here without any locks as
352 * - "vmpage" is locked (which prevents ->private from
353 * concurrent updates), and
355 * - "o" cannot be destroyed while current thread holds a
358 page = cl_vmpage_page(vmpage, o);
360 cs_page_inc(o, CS_hit);
365 /* allocate and initialize cl_page */
366 page = cl_page_alloc(env, o, idx, vmpage, type);
369 EXPORT_SYMBOL(cl_page_find);
371 static inline int cl_page_invariant(const struct cl_page *pg)
373 return cl_page_in_use_noref(pg);
376 static void __cl_page_state_set(const struct lu_env *env,
377 struct cl_page *cl_page,
378 enum cl_page_state state)
380 enum cl_page_state old;
382 /* Matrix of allowed state transitions [old][new] for sanity checking */
383 static const int allowed_transitions[CPS_NR][CPS_NR] = {
386 [CPS_OWNED] = 1, /* io finds existing cached page */
388 [CPS_PAGEOUT] = 1, /* write-out from the cache */
389 [CPS_FREEING] = 1, /* eviction on the memory pressure */
392 [CPS_CACHED] = 1, /* release to the cache */
394 [CPS_PAGEIN] = 1, /* start read immediately */
395 [CPS_PAGEOUT] = 1, /* start write immediately */
396 [CPS_FREEING] = 1, /* lock invalidation or truncate */
399 [CPS_CACHED] = 1, /* io completion */
406 [CPS_CACHED] = 1, /* io completion */
422 old = cl_page->cp_state;
423 PASSERT(env, cl_page, allowed_transitions[old][state]);
424 CL_PAGE_HEADER(D_TRACE, env, cl_page, "%d -> %d\n", old, state);
425 PASSERT(env, cl_page, cl_page->cp_state == old);
426 PASSERT(env, cl_page, equi(state == CPS_OWNED,
427 cl_page->cp_owner != NULL));
429 cs_pagestate_dec(cl_page->cp_obj, cl_page->cp_state);
430 cs_pagestate_inc(cl_page->cp_obj, state);
431 cl_page->cp_state = state;
435 static void cl_page_state_set(const struct lu_env *env,
436 struct cl_page *page, enum cl_page_state state)
438 LASSERT(page->cp_type != CPT_TRANSIENT);
439 __cl_page_state_set(env, page, state);
443 * Acquires an additional reference to a page.
445 * This can be called only by caller already possessing a reference to \a
448 * \see cl_object_get(), cl_lock_get().
450 void cl_page_get(struct cl_page *page)
453 cl_page_get_trust(page);
456 EXPORT_SYMBOL(cl_page_get);
459 * Releases a reference to a page, use the folio_batch to release the pages
460 * in batch if provided.
462 * Users need to do a final folio_batch_release() to release any trailing pages.
464 void cl_batch_put(const struct lu_env *env, struct cl_page *page,
465 struct folio_batch *fbatch)
468 CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
469 refcount_read(&page->cp_ref));
471 if (refcount_dec_and_test(&page->cp_ref)) {
472 if (page->cp_type != CPT_TRANSIENT)
473 LASSERT(page->cp_state == CPS_FREEING);
475 LASSERT(refcount_read(&page->cp_ref) == 0);
476 PASSERT(env, page, page->cp_owner == NULL);
477 PASSERT(env, page, list_empty(&page->cp_batch));
478 /* Page is no longer reachable by other threads. Tear it down */
479 cl_page_free(env, page, fbatch);
484 EXPORT_SYMBOL(cl_batch_put);
487 * Releases a reference to a page, wrapper to cl_batch_put
489 * When last reference is released, page is returned to the cache, unless it
490 * is in cl_page_state::CPS_FREEING state, in which case it is immediately
493 * \see cl_object_put(), cl_lock_put().
495 void cl_page_put(const struct lu_env *env, struct cl_page *page)
497 cl_batch_put(env, page, NULL);
499 EXPORT_SYMBOL(cl_page_put);
501 /* Returns a cl_page associated with a VM page, and given cl_object. */
502 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
504 struct cl_page *page;
507 LASSERT(PageLocked(vmpage));
510 * NOTE: absence of races and liveness of data are guaranteed by page
511 * lock on a "vmpage". That works because object destruction has
512 * bottom-to-top pass.
515 page = (struct cl_page *)vmpage->private;
517 cl_page_get_trust(page);
518 LASSERT(page->cp_type == CPT_CACHEABLE);
522 EXPORT_SYMBOL(cl_vmpage_page);
524 static void cl_page_owner_clear(struct cl_page *page)
527 if (page->cp_owner != NULL) {
528 LASSERT(page->cp_owner->ci_owned_nr > 0);
529 page->cp_owner->ci_owned_nr--;
530 page->cp_owner = NULL;
535 static void cl_page_owner_set(struct cl_page *page)
538 LASSERT(page->cp_owner != NULL);
539 page->cp_owner->ci_owned_nr++;
543 void __cl_page_disown(const struct lu_env *env, struct cl_page *cp)
546 enum cl_page_state state;
549 cl_page_owner_clear(cp);
551 if (cp->cp_type == CPT_CACHEABLE) {
552 state = cp->cp_state;
553 PINVRNT(env, cp, state == CPS_OWNED || state == CPS_FREEING);
554 PINVRNT(env, cp, cl_page_invariant(cp) || state == CPS_FREEING);
555 if (state == CPS_OWNED)
556 cl_page_state_set(env, cp, CPS_CACHED);
557 vmpage = cp->cp_vmpage;
558 LASSERT(vmpage != NULL);
559 LASSERT(PageLocked(vmpage));
566 /* returns true, iff page is owned by the given io. */
567 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
569 struct cl_io *top = cl_io_top((struct cl_io *)io);
571 LINVRNT(cl_object_same(pg->cp_obj, top->ci_obj));
573 if (pg->cp_type != CPT_TRANSIENT)
574 RETURN(pg->cp_state == CPS_OWNED && pg->cp_owner == top);
576 RETURN(pg->cp_owner == top);
578 EXPORT_SYMBOL(cl_page_is_owned);
581 * Try to own a page by IO.
583 * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
584 * into cl_page_state::CPS_OWNED state.
586 * \pre !cl_page_is_owned(cl_page, io)
587 * \post result == 0 iff cl_page_is_owned(cl_page, io)
591 * \retval -ve failure, e.g., cl_page was destroyed (and landed in
592 * cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
593 * or, page was owned by another thread, or in IO.
595 * \see cl_page_disown()
596 * \see cl_page_own_try()
599 static int __cl_page_own(const struct lu_env *env, struct cl_io *io,
600 struct cl_page *cl_page, int nonblock)
602 struct page *vmpage = cl_page->cp_vmpage;
606 PINVRNT(env, cl_page, !cl_page_is_owned(cl_page, io));
608 if (cl_page->cp_type != CPT_TRANSIENT &&
609 cl_page->cp_state == CPS_FREEING) {
614 LASSERT(vmpage != NULL);
616 if (cl_page->cp_type == CPT_TRANSIENT) {
618 } else if (nonblock) {
619 if (!trylock_page(vmpage)) {
624 if (unlikely(PageWriteback(vmpage))) {
631 wait_on_page_writeback(vmpage);
634 PASSERT(env, cl_page, cl_page->cp_owner == NULL);
635 cl_page->cp_owner = cl_io_top(io);
636 cl_page_owner_set(cl_page);
638 if (cl_page->cp_type != CPT_TRANSIENT) {
639 if (cl_page->cp_state == CPS_FREEING) {
640 __cl_page_disown(env, cl_page);
645 cl_page_state_set(env, cl_page, CPS_OWNED);
650 CDEBUG(D_INFO, "res %d\n", result);
651 PINVRNT(env, cl_page, ergo(result == 0,
652 cl_page_invariant(cl_page)));
656 /* Own a page, might be blocked. (see __cl_page_own()) */
657 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
659 return __cl_page_own(env, io, pg, 0);
661 EXPORT_SYMBOL(cl_page_own);
663 /* Nonblock version of cl_page_own(). (see __cl_page_own()) */
664 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
667 return __cl_page_own(env, io, pg, 1);
669 EXPORT_SYMBOL(cl_page_own_try);
673 * Assume page ownership.
675 * Called when page is already locked by the hosting VM.
677 * \pre !cl_page_is_owned(cp, io)
678 * \post cl_page_is_owned(cp, io)
680 void cl_page_assume(const struct lu_env *env,
681 struct cl_io *io, struct cl_page *cp)
686 PINVRNT(env, cp, cl_object_same(cp->cp_obj, cl_io_top(io)->ci_obj));
688 if (cp->cp_type == CPT_CACHEABLE) {
689 vmpage = cp->cp_vmpage;
690 LASSERT(vmpage != NULL);
691 LASSERT(PageLocked(vmpage));
692 wait_on_page_writeback(vmpage);
695 PASSERT(env, cp, cp->cp_owner == NULL);
696 cp->cp_owner = cl_io_top(io);
697 cl_page_owner_set(cp);
698 if (cp->cp_type != CPT_TRANSIENT)
699 cl_page_state_set(env, cp, CPS_OWNED);
702 EXPORT_SYMBOL(cl_page_assume);
705 * Releases page ownership without unlocking the page.
707 * Moves cl_page into cl_page_state::CPS_CACHED without releasing a lock
708 * on the underlying VM page (as VM is supposed to do this itself).
710 * \pre cl_page_is_owned(cp, io)
711 * \post !cl_page_is_owned(cp, io)
713 void cl_page_unassume(const struct lu_env *env,
714 struct cl_io *io, struct cl_page *cp)
719 PINVRNT(env, cp, cl_page_is_owned(cp, io));
720 PINVRNT(env, cp, cl_page_invariant(cp));
722 cl_page_owner_clear(cp);
723 if (cp->cp_type != CPT_TRANSIENT)
724 cl_page_state_set(env, cp, CPS_CACHED);
726 if (cp->cp_type == CPT_CACHEABLE) {
727 vmpage = cp->cp_vmpage;
728 LASSERT(vmpage != NULL);
729 LASSERT(PageLocked(vmpage));
734 EXPORT_SYMBOL(cl_page_unassume);
737 * Releases page ownership.
739 * Moves page into cl_page_state::CPS_CACHED.
741 * \pre cl_page_is_owned(pg, io)
742 * \post !cl_page_is_owned(pg, io)
746 void cl_page_disown(const struct lu_env *env,
747 struct cl_io *io, struct cl_page *pg)
749 if (pg->cp_type != CPT_TRANSIENT) {
750 PINVRNT(env, pg, cl_page_is_owned(pg, cl_io_top(io)) ||
751 pg->cp_state == CPS_FREEING);
754 __cl_page_disown(env, pg);
756 EXPORT_SYMBOL(cl_page_disown);
759 * Called when cl_page is to be removed from the object, e.g.,
760 * as a result of truncate.
762 * Calls cl_page_operations::cpo_discard() top-to-bottom.
764 * \pre cl_page_is_owned(cl_page, io)
766 * \see cl_page_operations::cpo_discard()
768 void cl_page_discard(const struct lu_env *env,
769 struct cl_io *io, struct cl_page *cp)
772 const struct cl_page_slice *slice;
775 PINVRNT(env, cp, cl_page_is_owned(cp, io));
776 PINVRNT(env, cp, cl_page_invariant(cp));
778 cl_page_slice_for_each(cp, slice, i) {
779 if (slice->cpl_ops->cpo_discard != NULL)
780 (*slice->cpl_ops->cpo_discard)(env, slice, io);
783 if (cp->cp_type == CPT_CACHEABLE) {
784 vmpage = cp->cp_vmpage;
785 LASSERT(vmpage != NULL);
786 LASSERT(PageLocked(vmpage));
787 generic_error_remove_folio(vmpage->mapping, page_folio(vmpage));
789 cl_page_delete(env, cp);
792 EXPORT_SYMBOL(cl_page_discard);
795 * Version of cl_page_delete() that can be called for not fully constructed
796 * cl_pages, e.g. in an error handling cl_page_find()->__cl_page_delete()
797 * path. Doesn't check cl_page invariant.
799 static void __cl_page_delete(const struct lu_env *env, struct cl_page *cp)
801 const struct cl_page_slice *slice;
805 if (cp->cp_type != CPT_TRANSIENT)
806 PASSERT(env, cp, cp->cp_state != CPS_FREEING);
808 /* Severe all ways to obtain new pointers to @pg. */
809 cl_page_owner_clear(cp);
810 if (cp->cp_type != CPT_TRANSIENT)
811 __cl_page_state_set(env, cp, CPS_FREEING);
813 cl_page_slice_for_each_reverse(cp, slice, i) {
814 if (slice->cpl_ops->cpo_delete != NULL)
815 (*slice->cpl_ops->cpo_delete)(env, slice);
822 * Called when a decision is made to throw page out of memory.
824 * Notifies all layers about page destruction by calling
825 * cl_page_operations::cpo_delete() method top-to-bottom.
827 * Moves page into cl_page_state::CPS_FREEING state (this is the only place
828 * where transition to this state happens).
830 * Eliminates all venues through which new references to the page can be
833 * - removes page from the radix trees,
835 * - breaks linkage from VM page to cl_page.
837 * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
838 * drain after some time, at which point page will be recycled.
840 * \pre VM page is locked
841 * \post pg->cp_state == CPS_FREEING
843 * \see cl_page_operations::cpo_delete()
845 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
847 PINVRNT(env, pg, cl_page_invariant(pg));
849 __cl_page_delete(env, pg);
852 EXPORT_SYMBOL(cl_page_delete);
854 void cl_page_touch(const struct lu_env *env,
855 const struct cl_page *cl_page, size_t to)
857 const struct cl_page_slice *slice;
862 cl_page_slice_for_each(cl_page, slice, i) {
863 if (slice->cpl_ops->cpo_page_touch != NULL)
864 (*slice->cpl_ops->cpo_page_touch)(env, slice, to);
869 EXPORT_SYMBOL(cl_page_touch);
871 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
874 RETURN(crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN);
877 static void cl_page_io_start(const struct lu_env *env,
878 struct cl_page *pg, enum cl_req_type crt)
880 /* Page is queued for IO, change its state. */
882 cl_page_owner_clear(pg);
883 if (pg->cp_type != CPT_TRANSIENT)
884 cl_page_state_set(env, pg, cl_req_type_state(crt));
889 * Prepares page for immediate transfer. Return -EALREADY if this page
890 * should be omitted from transfer.
892 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
893 struct cl_page *cp, enum cl_req_type crt)
895 struct page *vmpage = cp->cp_vmpage;
898 PASSERT(env, cp, crt < CRT_NR);
899 PINVRNT(env, cp, cl_page_is_owned(cp, io));
900 PINVRNT(env, cp, cl_page_invariant(cp));
902 if (cp->cp_type == CPT_TRANSIENT) {
904 } else if (crt == CRT_READ) {
905 if (PageUptodate(vmpage))
906 GOTO(out, rc = -EALREADY);
908 LASSERT(PageLocked(vmpage));
909 LASSERT(!PageDirty(vmpage));
911 /* ll_writepage path is not a sync write, so need to
912 * set page writeback flag
914 if (cp->cp_sync_io == NULL)
915 set_page_writeback(vmpage);
918 cl_page_io_start(env, cp, crt);
921 CL_PAGE_HEADER(D_TRACE, env, cp, "%d %d\n", crt, rc);
925 EXPORT_SYMBOL(cl_page_prep);
928 * Notify layers about transfer completion.
930 * Invoked by transfer sub-system (which is a part of osc) to notify layers
931 * that a transfer, of which this page is a part of has completed.
933 * Completion call-backs are executed in the bottom-up order, so that
934 * uppermost layer (llite), responsible for the VFS/VM interaction runs last
935 * and can release locks safely.
937 * \pre cl_page->cp_state == CPS_PAGEIN || cl_page->cp_state == CPS_PAGEOUT
938 * \post cl_page->cl_page_state == CPS_CACHED
940 * \see cl_page_operations::cpo_completion()
942 void cl_page_completion(const struct lu_env *env,
943 struct cl_page *cl_page, enum cl_req_type crt,
946 const struct cl_page_slice *slice;
947 struct cl_sync_io *anchor = cl_page->cp_sync_io;
951 PASSERT(env, cl_page, crt < CRT_NR);
952 if (cl_page->cp_type != CPT_TRANSIENT)
953 PASSERT(env, cl_page,
954 cl_page->cp_state == cl_req_type_state(crt));
956 CL_PAGE_HEADER(D_TRACE, env, cl_page, "%d %d\n", crt, ioret);
957 if (cl_page->cp_type != CPT_TRANSIENT)
958 cl_page_state_set(env, cl_page, CPS_CACHED);
962 cl_page_slice_for_each_reverse(cl_page, slice, i) {
963 if (slice->cpl_ops->io[crt].cpo_completion != NULL)
964 (*slice->cpl_ops->io[crt].cpo_completion)(env, slice,
968 if (anchor != NULL) {
969 LASSERT(cl_page->cp_sync_io == anchor);
970 cl_page->cp_sync_io = NULL;
971 cl_sync_io_note(env, anchor, ioret);
975 EXPORT_SYMBOL(cl_page_completion);
978 * Notify layers that transfer formation engine decided to yank this page from
979 * the cache and to make it a part of a transfer.
981 * \pre cl_page->cp_state == CPS_CACHED
982 * \post cl_page->cp_state == CPS_PAGEIN || cl_page->cp_state == CPS_PAGEOUT
984 int cl_page_make_ready(const struct lu_env *env, struct cl_page *cp,
985 enum cl_req_type crt)
987 struct page *vmpage = cp->cp_vmpage;
992 PASSERT(env, cp, crt == CRT_WRITE);
994 if (cp->cp_type == CPT_TRANSIENT)
998 PASSERT(env, cp, PageUptodate(vmpage));
1001 if (clear_page_dirty_for_io(vmpage)) {
1002 LASSERT(cp->cp_state == CPS_CACHED);
1003 /* This actually clears the dirty bit in the radix tree */
1004 set_page_writeback(vmpage);
1005 CL_PAGE_HEADER(D_PAGE, env, cp, "readied\n");
1007 } else if (cp->cp_state == CPS_PAGEOUT) {
1008 /* is it possible for osc_flush_async_page()
1009 * to already make it ready?
1013 CL_PAGE_DEBUG(D_ERROR, env, cp,
1014 "unexpecting page state %d\n",
1021 PASSERT(env, cp, cp->cp_state == CPS_CACHED);
1022 cl_page_io_start(env, cp, crt);
1026 unlock_page(vmpage);
1028 CL_PAGE_HEADER(D_TRACE, env, cp, "%d %d\n", crt, rc);
1032 EXPORT_SYMBOL(cl_page_make_ready);
1035 * Called if a page is being written back by kernel's intention.
1037 * \pre cl_page_is_owned(cl_page, io)
1038 * \post ergo(result == 0, cl_page->cp_state == CPS_PAGEOUT)
1040 * \see cl_page_operations::cpo_flush()
1042 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
1043 struct cl_page *cl_page)
1045 const struct cl_page_slice *slice;
1050 PINVRNT(env, cl_page, cl_page_is_owned(cl_page, io));
1051 PINVRNT(env, cl_page, cl_page_invariant(cl_page));
1053 cl_page_slice_for_each(cl_page, slice, i) {
1054 if (slice->cpl_ops->cpo_flush != NULL)
1055 result = (*slice->cpl_ops->cpo_flush)(env, slice, io);
1062 CL_PAGE_HEADER(D_TRACE, env, cl_page, "%d\n", result);
1065 EXPORT_SYMBOL(cl_page_flush);
1068 * Tells transfer engine that only part of a page is to be transmitted.
1070 * \see cl_page_operations::cpo_clip()
1072 void cl_page_clip(const struct lu_env *env, struct cl_page *cl_page,
1075 const struct cl_page_slice *slice;
1078 PINVRNT(env, cl_page, cl_page_invariant(cl_page));
1080 CL_PAGE_HEADER(D_TRACE, env, cl_page, "%d %d\n", from, to);
1081 cl_page_slice_for_each(cl_page, slice, i) {
1082 if (slice->cpl_ops->cpo_clip != NULL)
1083 (*slice->cpl_ops->cpo_clip)(env, slice, from, to);
1086 EXPORT_SYMBOL(cl_page_clip);
1088 /* Prints human readable representation of \a pg to the \a f. */
1089 void cl_page_header_print(const struct lu_env *env, void *cookie,
1090 lu_printer_t printer, const struct cl_page *pg)
1092 (*printer)(env, cookie,
1093 "page@%p[%d %p %d %d %p]\n",
1094 pg, refcount_read(&pg->cp_ref), pg->cp_obj,
1095 pg->cp_state, pg->cp_type,
1098 EXPORT_SYMBOL(cl_page_header_print);
1100 /* Prints human readable representation of \a cl_page to the \a f. */
1101 void cl_page_print(const struct lu_env *env, void *cookie,
1102 lu_printer_t printer, const struct cl_page *cp)
1104 struct page *vmpage = cp->cp_vmpage;
1105 const struct cl_page_slice *slice;
1109 cl_page_header_print(env, cookie, printer, cp);
1111 (*printer)(env, cookie, "vmpage @%p", vmpage);
1113 if (vmpage != NULL) {
1114 (*printer)(env, cookie, " %lx %d:%d %lx %lu %slru",
1115 (long)vmpage->flags, page_count(vmpage),
1116 page_mapcount(vmpage), vmpage->private,
1118 list_empty(&vmpage->lru) ? "not-" : "");
1121 (*printer)(env, cookie, "\n");
1123 cl_page_slice_for_each(cp, slice, i) {
1124 if (slice->cpl_ops->cpo_print != NULL)
1125 result = (*slice->cpl_ops->cpo_print)(env, slice,
1131 (*printer)(env, cookie, "end page@%p\n", cp);
1133 EXPORT_SYMBOL(cl_page_print);
1136 * Adds page slice to the compound page.
1138 * This is called by cl_object_operations::coo_page_init() methods to add a
1139 * per-layer state to the page. New state is added at the end of
1140 * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1142 * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1144 void cl_page_slice_add(struct cl_page *cl_page, struct cl_page_slice *slice,
1145 struct cl_object *obj,
1146 const struct cl_page_operations *ops)
1148 unsigned int offset = (char *)slice -
1149 ((char *)cl_page + sizeof(*cl_page));
1152 LASSERT(cl_page->cp_layer_count < CP_MAX_LAYER);
1153 LASSERT(offset < (1 << sizeof(cl_page->cp_layer_offset[0]) * 8));
1154 cl_page->cp_layer_offset[cl_page->cp_layer_count++] = offset;
1155 slice->cpl_ops = ops;
1156 slice->cpl_page = cl_page;
1160 EXPORT_SYMBOL(cl_page_slice_add);
1162 /* Allocate and initialize cl_cache, called by ll_init_sbi(). */
1163 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1165 struct cl_client_cache *cache = NULL;
1168 OBD_ALLOC(cache, sizeof(*cache));
1172 /* Initialize cache data */
1173 refcount_set(&cache->ccc_users, 1);
1174 cache->ccc_lru_max = lru_page_max;
1175 atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1176 spin_lock_init(&cache->ccc_lru_lock);
1177 INIT_LIST_HEAD(&cache->ccc_lru);
1179 cache->ccc_unstable_check = 1;
1180 atomic_long_set(&cache->ccc_unstable_nr, 0);
1181 mutex_init(&cache->ccc_max_cache_mb_lock);
1185 EXPORT_SYMBOL(cl_cache_init);
1187 /* Increase cl_cache refcount */
1188 void cl_cache_incref(struct cl_client_cache *cache)
1190 refcount_inc(&cache->ccc_users);
1192 EXPORT_SYMBOL(cl_cache_incref);
1195 * Decrease cl_cache refcount and free the cache if refcount=0.
1196 * Since llite, lov and osc all hold cl_cache refcount,
1197 * the free will not cause race. (LU-6173)
1199 void cl_cache_decref(struct cl_client_cache *cache)
1201 if (refcount_dec_and_test(&cache->ccc_users))
1202 OBD_FREE(cache, sizeof(*cache));
1204 EXPORT_SYMBOL(cl_cache_decref);