Whamcloud - gitweb
7d64d8ea046100ce8b80e9b97b51381b8aa3f872
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * Client Lustre Page.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_CLASS
39
40 #include <linux/list.h>
41 #include <libcfs/libcfs.h>
42 #include <obd_class.h>
43 #include <obd_support.h>
44
45 #include <cl_object.h>
46 #include "cl_internal.h"
47
48 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
49
50 #ifdef LIBCFS_DEBUG
51 # define PASSERT(env, page, expr)                                       \
52   do {                                                                    \
53           if (unlikely(!(expr))) {                                      \
54                   CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n");    \
55                   LASSERT(0);                                           \
56           }                                                             \
57   } while (0)
58 #else /* !LIBCFS_DEBUG */
59 # define PASSERT(env, page, exp) \
60         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
61 #endif /* !LIBCFS_DEBUG */
62
63 #ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
64 # define PINVRNT(env, page, expr)                                       \
65   do {                                                                    \
66           if (unlikely(!(expr))) {                                      \
67                   CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n");    \
68                   LINVRNT(0);                                           \
69           }                                                             \
70   } while (0)
71 #else /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
72 # define PINVRNT(env, page, exp) \
73          ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
74 #endif /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
75
76 /* Disable page statistic by default due to huge performance penalty. */
77 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
78 #define CS_PAGE_INC(o, item) \
79         atomic_inc(&cl_object_site(o)->cs_pages.cs_stats[CS_##item])
80 #define CS_PAGE_DEC(o, item) \
81         atomic_dec(&cl_object_site(o)->cs_pages.cs_stats[CS_##item])
82 #define CS_PAGESTATE_INC(o, state) \
83         atomic_inc(&cl_object_site(o)->cs_pages_state[state])
84 #define CS_PAGESTATE_DEC(o, state) \
85         atomic_dec(&cl_object_site(o)->cs_pages_state[state])
86 #else
87 #define CS_PAGE_INC(o, item)
88 #define CS_PAGE_DEC(o, item)
89 #define CS_PAGESTATE_INC(o, state)
90 #define CS_PAGESTATE_DEC(o, state)
91 #endif
92
93 /**
94  * Internal version of cl_page_get().
95  *
96  * This function can be used to obtain initial reference to previously
97  * unreferenced cached object. It can be called only if concurrent page
98  * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
99  * associated with \a page.
100  *
101  * Use with care! Not exported.
102  */
103 static void cl_page_get_trust(struct cl_page *page)
104 {
105         LASSERT(atomic_read(&page->cp_ref) > 0);
106         atomic_inc(&page->cp_ref);
107 }
108
109 /**
110  * Returns a slice within a page, corresponding to the given layer in the
111  * device stack.
112  *
113  * \see cl_lock_at()
114  */
115 static const struct cl_page_slice *
116 cl_page_at_trusted(const struct cl_page *page,
117                    const struct lu_device_type *dtype)
118 {
119         const struct cl_page_slice *slice;
120         ENTRY;
121
122         list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
123                 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
124                         RETURN(slice);
125         }
126         RETURN(NULL);
127 }
128
129 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
130 {
131         struct cl_object *obj  = page->cp_obj;
132         int pagesize = cl_object_header(obj)->coh_page_bufsize;
133
134         PASSERT(env, page, list_empty(&page->cp_batch));
135         PASSERT(env, page, page->cp_owner == NULL);
136         PASSERT(env, page, page->cp_state == CPS_FREEING);
137
138         ENTRY;
139         while (!list_empty(&page->cp_layers)) {
140                 struct cl_page_slice *slice;
141
142                 slice = list_entry(page->cp_layers.next,
143                                    struct cl_page_slice, cpl_linkage);
144                 list_del_init(page->cp_layers.next);
145                 if (unlikely(slice->cpl_ops->cpo_fini != NULL))
146                         slice->cpl_ops->cpo_fini(env, slice);
147         }
148         CS_PAGE_DEC(obj, total);
149         CS_PAGESTATE_DEC(obj, page->cp_state);
150         lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
151         cl_object_put(env, obj);
152         lu_ref_fini(&page->cp_reference);
153         OBD_FREE(page, pagesize);
154         EXIT;
155 }
156
157 /**
158  * Helper function updating page state. This is the only place in the code
159  * where cl_page::cp_state field is mutated.
160  */
161 static inline void cl_page_state_set_trust(struct cl_page *page,
162                                            enum cl_page_state state)
163 {
164         /* bypass const. */
165         *(enum cl_page_state *)&page->cp_state = state;
166 }
167
168 struct cl_page *cl_page_alloc(const struct lu_env *env,
169                 struct cl_object *o, pgoff_t ind, struct page *vmpage,
170                 enum cl_page_type type)
171 {
172         struct cl_page          *page;
173         struct lu_object_header *head;
174
175         ENTRY;
176         OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize,
177                         GFP_NOFS);
178         if (page != NULL) {
179                 int result = 0;
180                 atomic_set(&page->cp_ref, 1);
181                 page->cp_obj = o;
182                 cl_object_get(o);
183                 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
184                                      page);
185                 page->cp_vmpage = vmpage;
186                 cl_page_state_set_trust(page, CPS_CACHED);
187                 page->cp_type = type;
188                 INIT_LIST_HEAD(&page->cp_layers);
189                 INIT_LIST_HEAD(&page->cp_batch);
190                 lu_ref_init(&page->cp_reference);
191                 head = o->co_lu.lo_header;
192                 list_for_each_entry(o, &head->loh_layers,
193                                     co_lu.lo_linkage) {
194                         if (o->co_ops->coo_page_init != NULL) {
195                                 result = o->co_ops->coo_page_init(env, o, page,
196                                                                   ind);
197                                 if (result != 0) {
198                                         cl_page_delete0(env, page);
199                                         cl_page_free(env, page);
200                                         page = ERR_PTR(result);
201                                         break;
202                                 }
203                         }
204                 }
205                 if (result == 0) {
206                         CS_PAGE_INC(o, total);
207                         CS_PAGE_INC(o, create);
208                         CS_PAGESTATE_DEC(o, CPS_CACHED);
209                 }
210         } else {
211                 page = ERR_PTR(-ENOMEM);
212         }
213         RETURN(page);
214 }
215
216 /**
217  * Returns a cl_page with index \a idx at the object \a o, and associated with
218  * the VM page \a vmpage.
219  *
220  * This is the main entry point into the cl_page caching interface. First, a
221  * cache (implemented as a per-object radix tree) is consulted. If page is
222  * found there, it is returned immediately. Otherwise new page is allocated
223  * and returned. In any case, additional reference to page is acquired.
224  *
225  * \see cl_object_find(), cl_lock_find()
226  */
227 struct cl_page *cl_page_find(const struct lu_env *env,
228                              struct cl_object *o,
229                              pgoff_t idx, struct page *vmpage,
230                              enum cl_page_type type)
231 {
232         struct cl_page          *page = NULL;
233         struct cl_object_header *hdr;
234
235         LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
236         might_sleep();
237
238         ENTRY;
239
240         hdr = cl_object_header(o);
241         CS_PAGE_INC(o, lookup);
242
243         CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
244                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
245         /* fast path. */
246         if (type == CPT_CACHEABLE) {
247                 /* vmpage lock is used to protect the child/parent
248                  * relationship */
249                 KLASSERT(PageLocked(vmpage));
250                 /*
251                  * cl_vmpage_page() can be called here without any locks as
252                  *
253                  *     - "vmpage" is locked (which prevents ->private from
254                  *       concurrent updates), and
255                  *
256                  *     - "o" cannot be destroyed while current thread holds a
257                  *       reference on it.
258                  */
259                 page = cl_vmpage_page(vmpage, o);
260                 if (page != NULL) {
261                         CS_PAGE_INC(o, hit);
262                         RETURN(page);
263                 }
264         }
265
266         /* allocate and initialize cl_page */
267         page = cl_page_alloc(env, o, idx, vmpage, type);
268         RETURN(page);
269 }
270 EXPORT_SYMBOL(cl_page_find);
271
272 static inline int cl_page_invariant(const struct cl_page *pg)
273 {
274         return cl_page_in_use_noref(pg);
275 }
276
277 static void cl_page_state_set0(const struct lu_env *env,
278                                struct cl_page *page, enum cl_page_state state)
279 {
280         enum cl_page_state old;
281
282         /*
283          * Matrix of allowed state transitions [old][new], for sanity
284          * checking.
285          */
286         static const int allowed_transitions[CPS_NR][CPS_NR] = {
287                 [CPS_CACHED] = {
288                         [CPS_CACHED]  = 0,
289                         [CPS_OWNED]   = 1, /* io finds existing cached page */
290                         [CPS_PAGEIN]  = 0,
291                         [CPS_PAGEOUT] = 1, /* write-out from the cache */
292                         [CPS_FREEING] = 1, /* eviction on the memory pressure */
293                 },
294                 [CPS_OWNED] = {
295                         [CPS_CACHED]  = 1, /* release to the cache */
296                         [CPS_OWNED]   = 0,
297                         [CPS_PAGEIN]  = 1, /* start read immediately */
298                         [CPS_PAGEOUT] = 1, /* start write immediately */
299                         [CPS_FREEING] = 1, /* lock invalidation or truncate */
300                 },
301                 [CPS_PAGEIN] = {
302                         [CPS_CACHED]  = 1, /* io completion */
303                         [CPS_OWNED]   = 0,
304                         [CPS_PAGEIN]  = 0,
305                         [CPS_PAGEOUT] = 0,
306                         [CPS_FREEING] = 0,
307                 },
308                 [CPS_PAGEOUT] = {
309                         [CPS_CACHED]  = 1, /* io completion */
310                         [CPS_OWNED]   = 0,
311                         [CPS_PAGEIN]  = 0,
312                         [CPS_PAGEOUT] = 0,
313                         [CPS_FREEING] = 0,
314                 },
315                 [CPS_FREEING] = {
316                         [CPS_CACHED]  = 0,
317                         [CPS_OWNED]   = 0,
318                         [CPS_PAGEIN]  = 0,
319                         [CPS_PAGEOUT] = 0,
320                         [CPS_FREEING] = 0,
321                 }
322         };
323
324         ENTRY;
325         old = page->cp_state;
326         PASSERT(env, page, allowed_transitions[old][state]);
327         CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
328         PASSERT(env, page, page->cp_state == old);
329         PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner != NULL));
330
331         CS_PAGESTATE_DEC(page->cp_obj, page->cp_state);
332         CS_PAGESTATE_INC(page->cp_obj, state);
333         cl_page_state_set_trust(page, state);
334         EXIT;
335 }
336
337 static void cl_page_state_set(const struct lu_env *env,
338                               struct cl_page *page, enum cl_page_state state)
339 {
340         cl_page_state_set0(env, page, state);
341 }
342
343 /**
344  * Acquires an additional reference to a page.
345  *
346  * This can be called only by caller already possessing a reference to \a
347  * page.
348  *
349  * \see cl_object_get(), cl_lock_get().
350  */
351 void cl_page_get(struct cl_page *page)
352 {
353         ENTRY;
354         cl_page_get_trust(page);
355         EXIT;
356 }
357 EXPORT_SYMBOL(cl_page_get);
358
359 /**
360  * Releases a reference to a page.
361  *
362  * When last reference is released, page is returned to the cache, unless it
363  * is in cl_page_state::CPS_FREEING state, in which case it is immediately
364  * destroyed.
365  *
366  * \see cl_object_put(), cl_lock_put().
367  */
368 void cl_page_put(const struct lu_env *env, struct cl_page *page)
369 {
370         ENTRY;
371         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
372                        atomic_read(&page->cp_ref));
373
374         if (atomic_dec_and_test(&page->cp_ref)) {
375                 LASSERT(page->cp_state == CPS_FREEING);
376
377                 LASSERT(atomic_read(&page->cp_ref) == 0);
378                 PASSERT(env, page, page->cp_owner == NULL);
379                 PASSERT(env, page, list_empty(&page->cp_batch));
380                 /*
381                  * Page is no longer reachable by other threads. Tear
382                  * it down.
383                  */
384                 cl_page_free(env, page);
385         }
386
387         EXIT;
388 }
389 EXPORT_SYMBOL(cl_page_put);
390
391 /**
392  * Returns a cl_page associated with a VM page, and given cl_object.
393  */
394 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
395 {
396         struct cl_page *page;
397
398         ENTRY;
399         KLASSERT(PageLocked(vmpage));
400
401         /*
402          * NOTE: absence of races and liveness of data are guaranteed by page
403          *       lock on a "vmpage". That works because object destruction has
404          *       bottom-to-top pass.
405          */
406
407         page = (struct cl_page *)vmpage->private;
408         if (page != NULL) {
409                 cl_page_get_trust(page);
410                 LASSERT(page->cp_type == CPT_CACHEABLE);
411         }
412         RETURN(page);
413 }
414 EXPORT_SYMBOL(cl_vmpage_page);
415
416 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
417                                        const struct lu_device_type *dtype)
418 {
419         return cl_page_at_trusted(page, dtype);
420 }
421 EXPORT_SYMBOL(cl_page_at);
422
423 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
424
425 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)                   \
426 ({                                                                      \
427         const struct lu_env        *__env  = (_env);                    \
428         struct cl_page             *__page = (_page);                   \
429         const struct cl_page_slice *__scan;                             \
430         int                         __result;                           \
431         ptrdiff_t                   __op   = (_op);                     \
432         int                        (*__method)_proto;                   \
433                                                                         \
434         __result = 0;                                                   \
435         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
436                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
437                 if (__method != NULL) {                                 \
438                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
439                         if (__result != 0)                              \
440                                 break;                                  \
441                 }                                                       \
442         }                                                               \
443         if (__result > 0)                                               \
444                 __result = 0;                                           \
445         __result;                                                       \
446 })
447
448 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)                   \
449 do {                                                                    \
450         const struct lu_env        *__env  = (_env);                    \
451         struct cl_page             *__page = (_page);                   \
452         const struct cl_page_slice *__scan;                             \
453         ptrdiff_t                   __op   = (_op);                     \
454         void                      (*__method)_proto;                    \
455                                                                         \
456         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
457                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
458                 if (__method != NULL)                                   \
459                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
460         }                                                               \
461 } while (0)
462
463 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)           \
464 do {                                                                    \
465         const struct lu_env        *__env  = (_env);                    \
466         struct cl_page             *__page = (_page);                   \
467         const struct cl_page_slice *__scan;                             \
468         ptrdiff_t                   __op   = (_op);                     \
469         void                      (*__method)_proto;                    \
470                                                                         \
471         /* get to the bottom page. */                                   \
472         list_for_each_entry_reverse(__scan, &__page->cp_layers,         \
473                                     cpl_linkage) {                      \
474                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
475                 if (__method != NULL)                                   \
476                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
477         }                                                               \
478 } while (0)
479
480 static int cl_page_invoke(const struct lu_env *env,
481                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
482
483 {
484         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
485         ENTRY;
486         RETURN(CL_PAGE_INVOKE(env, page, op,
487                               (const struct lu_env *,
488                                const struct cl_page_slice *, struct cl_io *),
489                               io));
490 }
491
492 static void cl_page_invoid(const struct lu_env *env,
493                            struct cl_io *io, struct cl_page *page, ptrdiff_t op)
494
495 {
496         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
497         ENTRY;
498         CL_PAGE_INVOID(env, page, op,
499                        (const struct lu_env *,
500                         const struct cl_page_slice *, struct cl_io *), io);
501         EXIT;
502 }
503
504 static void cl_page_owner_clear(struct cl_page *page)
505 {
506         ENTRY;
507         if (page->cp_owner != NULL) {
508                 LASSERT(page->cp_owner->ci_owned_nr > 0);
509                 page->cp_owner->ci_owned_nr--;
510                 page->cp_owner = NULL;
511         }
512         EXIT;
513 }
514
515 static void cl_page_owner_set(struct cl_page *page)
516 {
517         ENTRY;
518         LASSERT(page->cp_owner != NULL);
519         page->cp_owner->ci_owned_nr++;
520         EXIT;
521 }
522
523 void cl_page_disown0(const struct lu_env *env,
524                      struct cl_io *io, struct cl_page *pg)
525 {
526         enum cl_page_state state;
527
528         ENTRY;
529         state = pg->cp_state;
530         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
531         PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
532         cl_page_owner_clear(pg);
533
534         if (state == CPS_OWNED)
535                 cl_page_state_set(env, pg, CPS_CACHED);
536         /*
537          * Completion call-backs are executed in the bottom-up order, so that
538          * uppermost layer (llite), responsible for VFS/VM interaction runs
539          * last and can release locks safely.
540          */
541         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
542                                (const struct lu_env *,
543                                 const struct cl_page_slice *, struct cl_io *),
544                                io);
545         EXIT;
546 }
547
548 /**
549  * returns true, iff page is owned by the given io.
550  */
551 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
552 {
553         struct cl_io *top = cl_io_top((struct cl_io *)io);
554         LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
555         ENTRY;
556         RETURN(pg->cp_state == CPS_OWNED && pg->cp_owner == top);
557 }
558 EXPORT_SYMBOL(cl_page_is_owned);
559
560 /**
561  * Try to own a page by IO.
562  *
563  * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
564  * into cl_page_state::CPS_OWNED state.
565  *
566  * \pre  !cl_page_is_owned(pg, io)
567  * \post result == 0 iff cl_page_is_owned(pg, io)
568  *
569  * \retval 0   success
570  *
571  * \retval -ve failure, e.g., page was destroyed (and landed in
572  *             cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
573  *             or, page was owned by another thread, or in IO.
574  *
575  * \see cl_page_disown()
576  * \see cl_page_operations::cpo_own()
577  * \see cl_page_own_try()
578  * \see cl_page_own
579  */
580 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
581                         struct cl_page *pg, int nonblock)
582 {
583         int result;
584
585         PINVRNT(env, pg, !cl_page_is_owned(pg, io));
586
587         ENTRY;
588         io = cl_io_top(io);
589
590         if (pg->cp_state == CPS_FREEING) {
591                 result = -ENOENT;
592         } else {
593                 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
594                                         (const struct lu_env *,
595                                          const struct cl_page_slice *,
596                                          struct cl_io *, int),
597                                         io, nonblock);
598                 if (result == 0) {
599                         PASSERT(env, pg, pg->cp_owner == NULL);
600                         pg->cp_owner = cl_io_top(io);;
601                         cl_page_owner_set(pg);
602                         if (pg->cp_state != CPS_FREEING) {
603                                 cl_page_state_set(env, pg, CPS_OWNED);
604                         } else {
605                                 cl_page_disown0(env, io, pg);
606                                 result = -ENOENT;
607                         }
608                 }
609         }
610         PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
611         RETURN(result);
612 }
613
614 /**
615  * Own a page, might be blocked.
616  *
617  * \see cl_page_own0()
618  */
619 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
620 {
621         return cl_page_own0(env, io, pg, 0);
622 }
623 EXPORT_SYMBOL(cl_page_own);
624
625 /**
626  * Nonblock version of cl_page_own().
627  *
628  * \see cl_page_own0()
629  */
630 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
631                     struct cl_page *pg)
632 {
633         return cl_page_own0(env, io, pg, 1);
634 }
635 EXPORT_SYMBOL(cl_page_own_try);
636
637
638 /**
639  * Assume page ownership.
640  *
641  * Called when page is already locked by the hosting VM.
642  *
643  * \pre !cl_page_is_owned(pg, io)
644  * \post cl_page_is_owned(pg, io)
645  *
646  * \see cl_page_operations::cpo_assume()
647  */
648 void cl_page_assume(const struct lu_env *env,
649                     struct cl_io *io, struct cl_page *pg)
650 {
651         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
652
653         ENTRY;
654         io = cl_io_top(io);
655
656         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
657         PASSERT(env, pg, pg->cp_owner == NULL);
658         pg->cp_owner = cl_io_top(io);
659         cl_page_owner_set(pg);
660         cl_page_state_set(env, pg, CPS_OWNED);
661         EXIT;
662 }
663 EXPORT_SYMBOL(cl_page_assume);
664
665 /**
666  * Releases page ownership without unlocking the page.
667  *
668  * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
669  * underlying VM page (as VM is supposed to do this itself).
670  *
671  * \pre   cl_page_is_owned(pg, io)
672  * \post !cl_page_is_owned(pg, io)
673  *
674  * \see cl_page_assume()
675  */
676 void cl_page_unassume(const struct lu_env *env,
677                       struct cl_io *io, struct cl_page *pg)
678 {
679         PINVRNT(env, pg, cl_page_is_owned(pg, io));
680         PINVRNT(env, pg, cl_page_invariant(pg));
681
682         ENTRY;
683         io = cl_io_top(io);
684         cl_page_owner_clear(pg);
685         cl_page_state_set(env, pg, CPS_CACHED);
686         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
687                                (const struct lu_env *,
688                                 const struct cl_page_slice *, struct cl_io *),
689                                io);
690         EXIT;
691 }
692 EXPORT_SYMBOL(cl_page_unassume);
693
694 /**
695  * Releases page ownership.
696  *
697  * Moves page into cl_page_state::CPS_CACHED.
698  *
699  * \pre   cl_page_is_owned(pg, io)
700  * \post !cl_page_is_owned(pg, io)
701  *
702  * \see cl_page_own()
703  * \see cl_page_operations::cpo_disown()
704  */
705 void cl_page_disown(const struct lu_env *env,
706                     struct cl_io *io, struct cl_page *pg)
707 {
708         PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
709                 pg->cp_state == CPS_FREEING);
710
711         ENTRY;
712         io = cl_io_top(io);
713         cl_page_disown0(env, io, pg);
714         EXIT;
715 }
716 EXPORT_SYMBOL(cl_page_disown);
717
718 /**
719  * Called when page is to be removed from the object, e.g., as a result of
720  * truncate.
721  *
722  * Calls cl_page_operations::cpo_discard() top-to-bottom.
723  *
724  * \pre cl_page_is_owned(pg, io)
725  *
726  * \see cl_page_operations::cpo_discard()
727  */
728 void cl_page_discard(const struct lu_env *env,
729                      struct cl_io *io, struct cl_page *pg)
730 {
731         PINVRNT(env, pg, cl_page_is_owned(pg, io));
732         PINVRNT(env, pg, cl_page_invariant(pg));
733
734         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
735 }
736 EXPORT_SYMBOL(cl_page_discard);
737
738 /**
739  * Version of cl_page_delete() that can be called for not fully constructed
740  * pages, e.g. in an error handling cl_page_find()->cl_page_delete0()
741  * path. Doesn't check page invariant.
742  */
743 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
744 {
745         ENTRY;
746
747         PASSERT(env, pg, pg->cp_state != CPS_FREEING);
748
749         /*
750          * Severe all ways to obtain new pointers to @pg.
751          */
752         cl_page_owner_clear(pg);
753
754         cl_page_state_set0(env, pg, CPS_FREEING);
755
756         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
757                        (const struct lu_env *, const struct cl_page_slice *));
758
759         EXIT;
760 }
761
762 /**
763  * Called when a decision is made to throw page out of memory.
764  *
765  * Notifies all layers about page destruction by calling
766  * cl_page_operations::cpo_delete() method top-to-bottom.
767  *
768  * Moves page into cl_page_state::CPS_FREEING state (this is the only place
769  * where transition to this state happens).
770  *
771  * Eliminates all venues through which new references to the page can be
772  * obtained:
773  *
774  *     - removes page from the radix trees,
775  *
776  *     - breaks linkage from VM page to cl_page.
777  *
778  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
779  * drain after some time, at which point page will be recycled.
780  *
781  * \pre  VM page is locked
782  * \post pg->cp_state == CPS_FREEING
783  *
784  * \see cl_page_operations::cpo_delete()
785  */
786 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
787 {
788         PINVRNT(env, pg, cl_page_invariant(pg));
789         ENTRY;
790         cl_page_delete0(env, pg);
791         EXIT;
792 }
793 EXPORT_SYMBOL(cl_page_delete);
794
795 /**
796  * Marks page up-to-date.
797  *
798  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
799  * layer responsible for VM interaction has to mark/clear page as up-to-date
800  * by the \a uptodate argument.
801  *
802  * \see cl_page_operations::cpo_export()
803  */
804 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
805 {
806         PINVRNT(env, pg, cl_page_invariant(pg));
807         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
808                        (const struct lu_env *,
809                         const struct cl_page_slice *, int), uptodate);
810 }
811 EXPORT_SYMBOL(cl_page_export);
812
813 /**
814  * Returns true, iff \a pg is VM locked in a suitable sense by the calling
815  * thread.
816  */
817 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
818 {
819         int result;
820         const struct cl_page_slice *slice;
821
822         ENTRY;
823         slice = container_of(pg->cp_layers.next,
824                              const struct cl_page_slice, cpl_linkage);
825         PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked != NULL);
826         /*
827          * Call ->cpo_is_vmlocked() directly instead of going through
828          * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
829          * cl_page_invariant().
830          */
831         result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
832         PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
833         RETURN(result == -EBUSY);
834 }
835 EXPORT_SYMBOL(cl_page_is_vmlocked);
836
837 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
838 {
839         ENTRY;
840         RETURN(crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN);
841 }
842
843 static void cl_page_io_start(const struct lu_env *env,
844                              struct cl_page *pg, enum cl_req_type crt)
845 {
846         /*
847          * Page is queued for IO, change its state.
848          */
849         ENTRY;
850         cl_page_owner_clear(pg);
851         cl_page_state_set(env, pg, cl_req_type_state(crt));
852         EXIT;
853 }
854
855 /**
856  * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
857  * called top-to-bottom. Every layer either agrees to submit this page (by
858  * returning 0), or requests to omit this page (by returning -EALREADY). Layer
859  * handling interactions with the VM also has to inform VM that page is under
860  * transfer now.
861  */
862 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
863                  struct cl_page *pg, enum cl_req_type crt)
864 {
865         int result;
866
867         PINVRNT(env, pg, cl_page_is_owned(pg, io));
868         PINVRNT(env, pg, cl_page_invariant(pg));
869         PINVRNT(env, pg, crt < CRT_NR);
870
871         /*
872          * XXX this has to be called bottom-to-top, so that llite can set up
873          * PG_writeback without risking other layers deciding to skip this
874          * page.
875          */
876         if (crt >= CRT_NR)
877                 return -EINVAL;
878         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
879         if (result == 0)
880                 cl_page_io_start(env, pg, crt);
881
882         KLASSERT(ergo(crt == CRT_WRITE && pg->cp_type == CPT_CACHEABLE,
883                       equi(result == 0,
884                            PageWriteback(cl_page_vmpage(pg)))));
885         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
886         return result;
887 }
888 EXPORT_SYMBOL(cl_page_prep);
889
890 /**
891  * Notify layers about transfer completion.
892  *
893  * Invoked by transfer sub-system (which is a part of osc) to notify layers
894  * that a transfer, of which this page is a part of has completed.
895  *
896  * Completion call-backs are executed in the bottom-up order, so that
897  * uppermost layer (llite), responsible for the VFS/VM interaction runs last
898  * and can release locks safely.
899  *
900  * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
901  * \post pg->cp_state == CPS_CACHED
902  *
903  * \see cl_page_operations::cpo_completion()
904  */
905 void cl_page_completion(const struct lu_env *env,
906                         struct cl_page *pg, enum cl_req_type crt, int ioret)
907 {
908         struct cl_sync_io *anchor = pg->cp_sync_io;
909
910         PASSERT(env, pg, crt < CRT_NR);
911         PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
912
913         ENTRY;
914         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
915         cl_page_state_set(env, pg, CPS_CACHED);
916         if (crt >= CRT_NR)
917                 return;
918         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
919                                (const struct lu_env *,
920                                 const struct cl_page_slice *, int), ioret);
921         if (anchor != NULL) {
922                 LASSERT(pg->cp_sync_io == anchor);
923                 pg->cp_sync_io = NULL;
924                 cl_sync_io_note(env, anchor, ioret);
925         }
926         EXIT;
927 }
928 EXPORT_SYMBOL(cl_page_completion);
929
930 /**
931  * Notify layers that transfer formation engine decided to yank this page from
932  * the cache and to make it a part of a transfer.
933  *
934  * \pre  pg->cp_state == CPS_CACHED
935  * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
936  *
937  * \see cl_page_operations::cpo_make_ready()
938  */
939 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
940                        enum cl_req_type crt)
941 {
942         int result;
943
944         PINVRNT(env, pg, crt < CRT_NR);
945
946         ENTRY;
947         if (crt >= CRT_NR)
948                 RETURN(-EINVAL);
949         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
950                                 (const struct lu_env *,
951                                  const struct cl_page_slice *));
952         if (result == 0) {
953                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
954                 cl_page_io_start(env, pg, crt);
955         }
956         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
957         RETURN(result);
958 }
959 EXPORT_SYMBOL(cl_page_make_ready);
960
961 /**
962  * Called if a pge is being written back by kernel's intention.
963  *
964  * \pre  cl_page_is_owned(pg, io)
965  * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
966  *
967  * \see cl_page_operations::cpo_flush()
968  */
969 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
970                   struct cl_page *pg)
971 {
972         int result;
973
974         PINVRNT(env, pg, cl_page_is_owned(pg, io));
975         PINVRNT(env, pg, cl_page_invariant(pg));
976
977         ENTRY;
978
979         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
980
981         CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
982         RETURN(result);
983 }
984 EXPORT_SYMBOL(cl_page_flush);
985
986 /**
987  * Tells transfer engine that only part of a page is to be transmitted.
988  *
989  * \see cl_page_operations::cpo_clip()
990  */
991 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
992                   int from, int to)
993 {
994         PINVRNT(env, pg, cl_page_invariant(pg));
995
996         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
997         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
998                        (const struct lu_env *,
999                         const struct cl_page_slice *,int, int),
1000                        from, to);
1001 }
1002 EXPORT_SYMBOL(cl_page_clip);
1003
1004 /**
1005  * Prints human readable representation of \a pg to the \a f.
1006  */
1007 void cl_page_header_print(const struct lu_env *env, void *cookie,
1008                           lu_printer_t printer, const struct cl_page *pg)
1009 {
1010         (*printer)(env, cookie,
1011                    "page@%p[%d %p %d %d %p]\n",
1012                    pg, atomic_read(&pg->cp_ref), pg->cp_obj,
1013                    pg->cp_state, pg->cp_type,
1014                    pg->cp_owner);
1015 }
1016 EXPORT_SYMBOL(cl_page_header_print);
1017
1018 /**
1019  * Prints human readable representation of \a pg to the \a f.
1020  */
1021 void cl_page_print(const struct lu_env *env, void *cookie,
1022                    lu_printer_t printer, const struct cl_page *pg)
1023 {
1024         cl_page_header_print(env, cookie, printer, pg);
1025         CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
1026                        (const struct lu_env *env,
1027                         const struct cl_page_slice *slice,
1028                         void *cookie, lu_printer_t p), cookie, printer);
1029         (*printer)(env, cookie, "end page@%p\n", pg);
1030 }
1031 EXPORT_SYMBOL(cl_page_print);
1032
1033 /**
1034  * Cancel a page which is still in a transfer.
1035  */
1036 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1037 {
1038         return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1039                               (const struct lu_env *,
1040                                const struct cl_page_slice *));
1041 }
1042
1043 /**
1044  * Converts a byte offset within object \a obj into a page index.
1045  */
1046 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1047 {
1048         return (loff_t)idx << PAGE_SHIFT;
1049 }
1050 EXPORT_SYMBOL(cl_offset);
1051
1052 /**
1053  * Converts a page index into a byte offset within object \a obj.
1054  */
1055 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1056 {
1057         return offset >> PAGE_SHIFT;
1058 }
1059 EXPORT_SYMBOL(cl_index);
1060
1061 size_t cl_page_size(const struct cl_object *obj)
1062 {
1063         return 1UL << PAGE_SHIFT;
1064 }
1065 EXPORT_SYMBOL(cl_page_size);
1066
1067 /**
1068  * Adds page slice to the compound page.
1069  *
1070  * This is called by cl_object_operations::coo_page_init() methods to add a
1071  * per-layer state to the page. New state is added at the end of
1072  * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1073  *
1074  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1075  */
1076 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1077                        struct cl_object *obj, pgoff_t index,
1078                        const struct cl_page_operations *ops)
1079 {
1080         ENTRY;
1081         list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1082         slice->cpl_obj  = obj;
1083         slice->cpl_index = index;
1084         slice->cpl_ops  = ops;
1085         slice->cpl_page = page;
1086         EXIT;
1087 }
1088 EXPORT_SYMBOL(cl_page_slice_add);
1089
1090 /**
1091  * Allocate and initialize cl_cache, called by ll_init_sbi().
1092  */
1093 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1094 {
1095         struct cl_client_cache  *cache = NULL;
1096
1097         ENTRY;
1098         OBD_ALLOC(cache, sizeof(*cache));
1099         if (cache == NULL)
1100                 RETURN(NULL);
1101
1102         /* Initialize cache data */
1103         atomic_set(&cache->ccc_users, 1);
1104         cache->ccc_lru_max = lru_page_max;
1105         atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1106         spin_lock_init(&cache->ccc_lru_lock);
1107         INIT_LIST_HEAD(&cache->ccc_lru);
1108
1109         /* turn unstable check off by default as it impacts performance */
1110         cache->ccc_unstable_check = 0;
1111         atomic_long_set(&cache->ccc_unstable_nr, 0);
1112         init_waitqueue_head(&cache->ccc_unstable_waitq);
1113
1114         RETURN(cache);
1115 }
1116 EXPORT_SYMBOL(cl_cache_init);
1117
1118 /**
1119  * Increase cl_cache refcount
1120  */
1121 void cl_cache_incref(struct cl_client_cache *cache)
1122 {
1123         atomic_inc(&cache->ccc_users);
1124 }
1125 EXPORT_SYMBOL(cl_cache_incref);
1126
1127 /**
1128  * Decrease cl_cache refcount and free the cache if refcount=0.
1129  * Since llite, lov and osc all hold cl_cache refcount,
1130  * the free will not cause race. (LU-6173)
1131  */
1132 void cl_cache_decref(struct cl_client_cache *cache)
1133 {
1134         if (atomic_dec_and_test(&cache->ccc_users))
1135                 OBD_FREE(cache, sizeof(*cache));
1136 }
1137 EXPORT_SYMBOL(cl_cache_decref);