Whamcloud - gitweb
LU-8560 libcfs: handle PAGE_CACHE_* removal in newer kernels
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2015, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * Client Lustre Page.
37  *
38  *   Author: Nikita Danilov <nikita.danilov@sun.com>
39  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_CLASS
43
44 #include <linux/list.h>
45 #include <libcfs/libcfs.h>
46 #include <obd_class.h>
47 #include <obd_support.h>
48
49 #include <cl_object.h>
50 #include "cl_internal.h"
51
52 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
53
54 #ifdef LIBCFS_DEBUG
55 # define PASSERT(env, page, expr)                                       \
56   do {                                                                    \
57           if (unlikely(!(expr))) {                                      \
58                   CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n");    \
59                   LASSERT(0);                                           \
60           }                                                             \
61   } while (0)
62 #else /* !LIBCFS_DEBUG */
63 # define PASSERT(env, page, exp) \
64         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
65 #endif /* !LIBCFS_DEBUG */
66
67 #ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
68 # define PINVRNT(env, page, expr)                                       \
69   do {                                                                    \
70           if (unlikely(!(expr))) {                                      \
71                   CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n");    \
72                   LINVRNT(0);                                           \
73           }                                                             \
74   } while (0)
75 #else /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
76 # define PINVRNT(env, page, exp) \
77          ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
78 #endif /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
79
80 /* Disable page statistic by default due to huge performance penalty. */
81 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
82 #define CS_PAGE_INC(o, item) \
83         atomic_inc(&cl_object_site(o)->cs_pages.cs_stats[CS_##item])
84 #define CS_PAGE_DEC(o, item) \
85         atomic_dec(&cl_object_site(o)->cs_pages.cs_stats[CS_##item])
86 #define CS_PAGESTATE_INC(o, state) \
87         atomic_inc(&cl_object_site(o)->cs_pages_state[state])
88 #define CS_PAGESTATE_DEC(o, state) \
89         atomic_dec(&cl_object_site(o)->cs_pages_state[state])
90 #else
91 #define CS_PAGE_INC(o, item)
92 #define CS_PAGE_DEC(o, item)
93 #define CS_PAGESTATE_INC(o, state)
94 #define CS_PAGESTATE_DEC(o, state)
95 #endif
96
97 /**
98  * Internal version of cl_page_get().
99  *
100  * This function can be used to obtain initial reference to previously
101  * unreferenced cached object. It can be called only if concurrent page
102  * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
103  * associated with \a page.
104  *
105  * Use with care! Not exported.
106  */
107 static void cl_page_get_trust(struct cl_page *page)
108 {
109         LASSERT(atomic_read(&page->cp_ref) > 0);
110         atomic_inc(&page->cp_ref);
111 }
112
113 /**
114  * Returns a slice within a page, corresponding to the given layer in the
115  * device stack.
116  *
117  * \see cl_lock_at()
118  */
119 static const struct cl_page_slice *
120 cl_page_at_trusted(const struct cl_page *page,
121                    const struct lu_device_type *dtype)
122 {
123         const struct cl_page_slice *slice;
124         ENTRY;
125
126         list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
127                 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
128                         RETURN(slice);
129         }
130         RETURN(NULL);
131 }
132
133 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
134 {
135         struct cl_object *obj  = page->cp_obj;
136         int pagesize = cl_object_header(obj)->coh_page_bufsize;
137
138         PASSERT(env, page, list_empty(&page->cp_batch));
139         PASSERT(env, page, page->cp_owner == NULL);
140         PASSERT(env, page, page->cp_state == CPS_FREEING);
141
142         ENTRY;
143         while (!list_empty(&page->cp_layers)) {
144                 struct cl_page_slice *slice;
145
146                 slice = list_entry(page->cp_layers.next,
147                                    struct cl_page_slice, cpl_linkage);
148                 list_del_init(page->cp_layers.next);
149                 if (unlikely(slice->cpl_ops->cpo_fini != NULL))
150                         slice->cpl_ops->cpo_fini(env, slice);
151         }
152         CS_PAGE_DEC(obj, total);
153         CS_PAGESTATE_DEC(obj, page->cp_state);
154         lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
155         cl_object_put(env, obj);
156         lu_ref_fini(&page->cp_reference);
157         OBD_FREE(page, pagesize);
158         EXIT;
159 }
160
161 /**
162  * Helper function updating page state. This is the only place in the code
163  * where cl_page::cp_state field is mutated.
164  */
165 static inline void cl_page_state_set_trust(struct cl_page *page,
166                                            enum cl_page_state state)
167 {
168         /* bypass const. */
169         *(enum cl_page_state *)&page->cp_state = state;
170 }
171
172 struct cl_page *cl_page_alloc(const struct lu_env *env,
173                 struct cl_object *o, pgoff_t ind, struct page *vmpage,
174                 enum cl_page_type type)
175 {
176         struct cl_page          *page;
177         struct lu_object_header *head;
178
179         ENTRY;
180         OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize,
181                         GFP_NOFS);
182         if (page != NULL) {
183                 int result = 0;
184                 atomic_set(&page->cp_ref, 1);
185                 page->cp_obj = o;
186                 cl_object_get(o);
187                 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
188                                      page);
189                 page->cp_vmpage = vmpage;
190                 cl_page_state_set_trust(page, CPS_CACHED);
191                 page->cp_type = type;
192                 INIT_LIST_HEAD(&page->cp_layers);
193                 INIT_LIST_HEAD(&page->cp_batch);
194                 lu_ref_init(&page->cp_reference);
195                 head = o->co_lu.lo_header;
196                 list_for_each_entry(o, &head->loh_layers,
197                                     co_lu.lo_linkage) {
198                         if (o->co_ops->coo_page_init != NULL) {
199                                 result = o->co_ops->coo_page_init(env, o, page,
200                                                                   ind);
201                                 if (result != 0) {
202                                         cl_page_delete0(env, page);
203                                         cl_page_free(env, page);
204                                         page = ERR_PTR(result);
205                                         break;
206                                 }
207                         }
208                 }
209                 if (result == 0) {
210                         CS_PAGE_INC(o, total);
211                         CS_PAGE_INC(o, create);
212                         CS_PAGESTATE_DEC(o, CPS_CACHED);
213                 }
214         } else {
215                 page = ERR_PTR(-ENOMEM);
216         }
217         RETURN(page);
218 }
219
220 /**
221  * Returns a cl_page with index \a idx at the object \a o, and associated with
222  * the VM page \a vmpage.
223  *
224  * This is the main entry point into the cl_page caching interface. First, a
225  * cache (implemented as a per-object radix tree) is consulted. If page is
226  * found there, it is returned immediately. Otherwise new page is allocated
227  * and returned. In any case, additional reference to page is acquired.
228  *
229  * \see cl_object_find(), cl_lock_find()
230  */
231 struct cl_page *cl_page_find(const struct lu_env *env,
232                              struct cl_object *o,
233                              pgoff_t idx, struct page *vmpage,
234                              enum cl_page_type type)
235 {
236         struct cl_page          *page = NULL;
237         struct cl_object_header *hdr;
238
239         LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
240         might_sleep();
241
242         ENTRY;
243
244         hdr = cl_object_header(o);
245         CS_PAGE_INC(o, lookup);
246
247         CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
248                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
249         /* fast path. */
250         if (type == CPT_CACHEABLE) {
251                 /* vmpage lock is used to protect the child/parent
252                  * relationship */
253                 KLASSERT(PageLocked(vmpage));
254                 /*
255                  * cl_vmpage_page() can be called here without any locks as
256                  *
257                  *     - "vmpage" is locked (which prevents ->private from
258                  *       concurrent updates), and
259                  *
260                  *     - "o" cannot be destroyed while current thread holds a
261                  *       reference on it.
262                  */
263                 page = cl_vmpage_page(vmpage, o);
264                 if (page != NULL) {
265                         CS_PAGE_INC(o, hit);
266                         RETURN(page);
267                 }
268         }
269
270         /* allocate and initialize cl_page */
271         page = cl_page_alloc(env, o, idx, vmpage, type);
272         RETURN(page);
273 }
274 EXPORT_SYMBOL(cl_page_find);
275
276 static inline int cl_page_invariant(const struct cl_page *pg)
277 {
278         return cl_page_in_use_noref(pg);
279 }
280
281 static void cl_page_state_set0(const struct lu_env *env,
282                                struct cl_page *page, enum cl_page_state state)
283 {
284         enum cl_page_state old;
285
286         /*
287          * Matrix of allowed state transitions [old][new], for sanity
288          * checking.
289          */
290         static const int allowed_transitions[CPS_NR][CPS_NR] = {
291                 [CPS_CACHED] = {
292                         [CPS_CACHED]  = 0,
293                         [CPS_OWNED]   = 1, /* io finds existing cached page */
294                         [CPS_PAGEIN]  = 0,
295                         [CPS_PAGEOUT] = 1, /* write-out from the cache */
296                         [CPS_FREEING] = 1, /* eviction on the memory pressure */
297                 },
298                 [CPS_OWNED] = {
299                         [CPS_CACHED]  = 1, /* release to the cache */
300                         [CPS_OWNED]   = 0,
301                         [CPS_PAGEIN]  = 1, /* start read immediately */
302                         [CPS_PAGEOUT] = 1, /* start write immediately */
303                         [CPS_FREEING] = 1, /* lock invalidation or truncate */
304                 },
305                 [CPS_PAGEIN] = {
306                         [CPS_CACHED]  = 1, /* io completion */
307                         [CPS_OWNED]   = 0,
308                         [CPS_PAGEIN]  = 0,
309                         [CPS_PAGEOUT] = 0,
310                         [CPS_FREEING] = 0,
311                 },
312                 [CPS_PAGEOUT] = {
313                         [CPS_CACHED]  = 1, /* io completion */
314                         [CPS_OWNED]   = 0,
315                         [CPS_PAGEIN]  = 0,
316                         [CPS_PAGEOUT] = 0,
317                         [CPS_FREEING] = 0,
318                 },
319                 [CPS_FREEING] = {
320                         [CPS_CACHED]  = 0,
321                         [CPS_OWNED]   = 0,
322                         [CPS_PAGEIN]  = 0,
323                         [CPS_PAGEOUT] = 0,
324                         [CPS_FREEING] = 0,
325                 }
326         };
327
328         ENTRY;
329         old = page->cp_state;
330         PASSERT(env, page, allowed_transitions[old][state]);
331         CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
332         PASSERT(env, page, page->cp_state == old);
333         PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner != NULL));
334
335         CS_PAGESTATE_DEC(page->cp_obj, page->cp_state);
336         CS_PAGESTATE_INC(page->cp_obj, state);
337         cl_page_state_set_trust(page, state);
338         EXIT;
339 }
340
341 static void cl_page_state_set(const struct lu_env *env,
342                               struct cl_page *page, enum cl_page_state state)
343 {
344         cl_page_state_set0(env, page, state);
345 }
346
347 /**
348  * Acquires an additional reference to a page.
349  *
350  * This can be called only by caller already possessing a reference to \a
351  * page.
352  *
353  * \see cl_object_get(), cl_lock_get().
354  */
355 void cl_page_get(struct cl_page *page)
356 {
357         ENTRY;
358         cl_page_get_trust(page);
359         EXIT;
360 }
361 EXPORT_SYMBOL(cl_page_get);
362
363 /**
364  * Releases a reference to a page.
365  *
366  * When last reference is released, page is returned to the cache, unless it
367  * is in cl_page_state::CPS_FREEING state, in which case it is immediately
368  * destroyed.
369  *
370  * \see cl_object_put(), cl_lock_put().
371  */
372 void cl_page_put(const struct lu_env *env, struct cl_page *page)
373 {
374         ENTRY;
375         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
376                        atomic_read(&page->cp_ref));
377
378         if (atomic_dec_and_test(&page->cp_ref)) {
379                 LASSERT(page->cp_state == CPS_FREEING);
380
381                 LASSERT(atomic_read(&page->cp_ref) == 0);
382                 PASSERT(env, page, page->cp_owner == NULL);
383                 PASSERT(env, page, list_empty(&page->cp_batch));
384                 /*
385                  * Page is no longer reachable by other threads. Tear
386                  * it down.
387                  */
388                 cl_page_free(env, page);
389         }
390
391         EXIT;
392 }
393 EXPORT_SYMBOL(cl_page_put);
394
395 /**
396  * Returns a cl_page associated with a VM page, and given cl_object.
397  */
398 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
399 {
400         struct cl_page *page;
401
402         ENTRY;
403         KLASSERT(PageLocked(vmpage));
404
405         /*
406          * NOTE: absence of races and liveness of data are guaranteed by page
407          *       lock on a "vmpage". That works because object destruction has
408          *       bottom-to-top pass.
409          */
410
411         page = (struct cl_page *)vmpage->private;
412         if (page != NULL) {
413                 cl_page_get_trust(page);
414                 LASSERT(page->cp_type == CPT_CACHEABLE);
415         }
416         RETURN(page);
417 }
418 EXPORT_SYMBOL(cl_vmpage_page);
419
420 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
421                                        const struct lu_device_type *dtype)
422 {
423         return cl_page_at_trusted(page, dtype);
424 }
425 EXPORT_SYMBOL(cl_page_at);
426
427 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
428
429 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)                   \
430 ({                                                                      \
431         const struct lu_env        *__env  = (_env);                    \
432         struct cl_page             *__page = (_page);                   \
433         const struct cl_page_slice *__scan;                             \
434         int                         __result;                           \
435         ptrdiff_t                   __op   = (_op);                     \
436         int                        (*__method)_proto;                   \
437                                                                         \
438         __result = 0;                                                   \
439         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
440                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
441                 if (__method != NULL) {                                 \
442                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
443                         if (__result != 0)                              \
444                                 break;                                  \
445                 }                                                       \
446         }                                                               \
447         if (__result > 0)                                               \
448                 __result = 0;                                           \
449         __result;                                                       \
450 })
451
452 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)                   \
453 do {                                                                    \
454         const struct lu_env        *__env  = (_env);                    \
455         struct cl_page             *__page = (_page);                   \
456         const struct cl_page_slice *__scan;                             \
457         ptrdiff_t                   __op   = (_op);                     \
458         void                      (*__method)_proto;                    \
459                                                                         \
460         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
461                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
462                 if (__method != NULL)                                   \
463                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
464         }                                                               \
465 } while (0)
466
467 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)           \
468 do {                                                                    \
469         const struct lu_env        *__env  = (_env);                    \
470         struct cl_page             *__page = (_page);                   \
471         const struct cl_page_slice *__scan;                             \
472         ptrdiff_t                   __op   = (_op);                     \
473         void                      (*__method)_proto;                    \
474                                                                         \
475         /* get to the bottom page. */                                   \
476         list_for_each_entry_reverse(__scan, &__page->cp_layers,         \
477                                     cpl_linkage) {                      \
478                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
479                 if (__method != NULL)                                   \
480                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
481         }                                                               \
482 } while (0)
483
484 static int cl_page_invoke(const struct lu_env *env,
485                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
486
487 {
488         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
489         ENTRY;
490         RETURN(CL_PAGE_INVOKE(env, page, op,
491                               (const struct lu_env *,
492                                const struct cl_page_slice *, struct cl_io *),
493                               io));
494 }
495
496 static void cl_page_invoid(const struct lu_env *env,
497                            struct cl_io *io, struct cl_page *page, ptrdiff_t op)
498
499 {
500         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
501         ENTRY;
502         CL_PAGE_INVOID(env, page, op,
503                        (const struct lu_env *,
504                         const struct cl_page_slice *, struct cl_io *), io);
505         EXIT;
506 }
507
508 static void cl_page_owner_clear(struct cl_page *page)
509 {
510         ENTRY;
511         if (page->cp_owner != NULL) {
512                 LASSERT(page->cp_owner->ci_owned_nr > 0);
513                 page->cp_owner->ci_owned_nr--;
514                 page->cp_owner = NULL;
515         }
516         EXIT;
517 }
518
519 static void cl_page_owner_set(struct cl_page *page)
520 {
521         ENTRY;
522         LASSERT(page->cp_owner != NULL);
523         page->cp_owner->ci_owned_nr++;
524         EXIT;
525 }
526
527 void cl_page_disown0(const struct lu_env *env,
528                      struct cl_io *io, struct cl_page *pg)
529 {
530         enum cl_page_state state;
531
532         ENTRY;
533         state = pg->cp_state;
534         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
535         PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
536         cl_page_owner_clear(pg);
537
538         if (state == CPS_OWNED)
539                 cl_page_state_set(env, pg, CPS_CACHED);
540         /*
541          * Completion call-backs are executed in the bottom-up order, so that
542          * uppermost layer (llite), responsible for VFS/VM interaction runs
543          * last and can release locks safely.
544          */
545         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
546                                (const struct lu_env *,
547                                 const struct cl_page_slice *, struct cl_io *),
548                                io);
549         EXIT;
550 }
551
552 /**
553  * returns true, iff page is owned by the given io.
554  */
555 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
556 {
557         struct cl_io *top = cl_io_top((struct cl_io *)io);
558         LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
559         ENTRY;
560         RETURN(pg->cp_state == CPS_OWNED && pg->cp_owner == top);
561 }
562 EXPORT_SYMBOL(cl_page_is_owned);
563
564 /**
565  * Try to own a page by IO.
566  *
567  * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
568  * into cl_page_state::CPS_OWNED state.
569  *
570  * \pre  !cl_page_is_owned(pg, io)
571  * \post result == 0 iff cl_page_is_owned(pg, io)
572  *
573  * \retval 0   success
574  *
575  * \retval -ve failure, e.g., page was destroyed (and landed in
576  *             cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
577  *             or, page was owned by another thread, or in IO.
578  *
579  * \see cl_page_disown()
580  * \see cl_page_operations::cpo_own()
581  * \see cl_page_own_try()
582  * \see cl_page_own
583  */
584 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
585                         struct cl_page *pg, int nonblock)
586 {
587         int result;
588
589         PINVRNT(env, pg, !cl_page_is_owned(pg, io));
590
591         ENTRY;
592         io = cl_io_top(io);
593
594         if (pg->cp_state == CPS_FREEING) {
595                 result = -ENOENT;
596         } else {
597                 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
598                                         (const struct lu_env *,
599                                          const struct cl_page_slice *,
600                                          struct cl_io *, int),
601                                         io, nonblock);
602                 if (result == 0) {
603                         PASSERT(env, pg, pg->cp_owner == NULL);
604                         pg->cp_owner = cl_io_top(io);;
605                         cl_page_owner_set(pg);
606                         if (pg->cp_state != CPS_FREEING) {
607                                 cl_page_state_set(env, pg, CPS_OWNED);
608                         } else {
609                                 cl_page_disown0(env, io, pg);
610                                 result = -ENOENT;
611                         }
612                 }
613         }
614         PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
615         RETURN(result);
616 }
617
618 /**
619  * Own a page, might be blocked.
620  *
621  * \see cl_page_own0()
622  */
623 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
624 {
625         return cl_page_own0(env, io, pg, 0);
626 }
627 EXPORT_SYMBOL(cl_page_own);
628
629 /**
630  * Nonblock version of cl_page_own().
631  *
632  * \see cl_page_own0()
633  */
634 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
635                     struct cl_page *pg)
636 {
637         return cl_page_own0(env, io, pg, 1);
638 }
639 EXPORT_SYMBOL(cl_page_own_try);
640
641
642 /**
643  * Assume page ownership.
644  *
645  * Called when page is already locked by the hosting VM.
646  *
647  * \pre !cl_page_is_owned(pg, io)
648  * \post cl_page_is_owned(pg, io)
649  *
650  * \see cl_page_operations::cpo_assume()
651  */
652 void cl_page_assume(const struct lu_env *env,
653                     struct cl_io *io, struct cl_page *pg)
654 {
655         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
656
657         ENTRY;
658         io = cl_io_top(io);
659
660         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
661         PASSERT(env, pg, pg->cp_owner == NULL);
662         pg->cp_owner = cl_io_top(io);
663         cl_page_owner_set(pg);
664         cl_page_state_set(env, pg, CPS_OWNED);
665         EXIT;
666 }
667 EXPORT_SYMBOL(cl_page_assume);
668
669 /**
670  * Releases page ownership without unlocking the page.
671  *
672  * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
673  * underlying VM page (as VM is supposed to do this itself).
674  *
675  * \pre   cl_page_is_owned(pg, io)
676  * \post !cl_page_is_owned(pg, io)
677  *
678  * \see cl_page_assume()
679  */
680 void cl_page_unassume(const struct lu_env *env,
681                       struct cl_io *io, struct cl_page *pg)
682 {
683         PINVRNT(env, pg, cl_page_is_owned(pg, io));
684         PINVRNT(env, pg, cl_page_invariant(pg));
685
686         ENTRY;
687         io = cl_io_top(io);
688         cl_page_owner_clear(pg);
689         cl_page_state_set(env, pg, CPS_CACHED);
690         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
691                                (const struct lu_env *,
692                                 const struct cl_page_slice *, struct cl_io *),
693                                io);
694         EXIT;
695 }
696 EXPORT_SYMBOL(cl_page_unassume);
697
698 /**
699  * Releases page ownership.
700  *
701  * Moves page into cl_page_state::CPS_CACHED.
702  *
703  * \pre   cl_page_is_owned(pg, io)
704  * \post !cl_page_is_owned(pg, io)
705  *
706  * \see cl_page_own()
707  * \see cl_page_operations::cpo_disown()
708  */
709 void cl_page_disown(const struct lu_env *env,
710                     struct cl_io *io, struct cl_page *pg)
711 {
712         PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
713                 pg->cp_state == CPS_FREEING);
714
715         ENTRY;
716         io = cl_io_top(io);
717         cl_page_disown0(env, io, pg);
718         EXIT;
719 }
720 EXPORT_SYMBOL(cl_page_disown);
721
722 /**
723  * Called when page is to be removed from the object, e.g., as a result of
724  * truncate.
725  *
726  * Calls cl_page_operations::cpo_discard() top-to-bottom.
727  *
728  * \pre cl_page_is_owned(pg, io)
729  *
730  * \see cl_page_operations::cpo_discard()
731  */
732 void cl_page_discard(const struct lu_env *env,
733                      struct cl_io *io, struct cl_page *pg)
734 {
735         PINVRNT(env, pg, cl_page_is_owned(pg, io));
736         PINVRNT(env, pg, cl_page_invariant(pg));
737
738         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
739 }
740 EXPORT_SYMBOL(cl_page_discard);
741
742 /**
743  * Version of cl_page_delete() that can be called for not fully constructed
744  * pages, e.g. in an error handling cl_page_find()->cl_page_delete0()
745  * path. Doesn't check page invariant.
746  */
747 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
748 {
749         ENTRY;
750
751         PASSERT(env, pg, pg->cp_state != CPS_FREEING);
752
753         /*
754          * Severe all ways to obtain new pointers to @pg.
755          */
756         cl_page_owner_clear(pg);
757
758         cl_page_state_set0(env, pg, CPS_FREEING);
759
760         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
761                        (const struct lu_env *, const struct cl_page_slice *));
762
763         EXIT;
764 }
765
766 /**
767  * Called when a decision is made to throw page out of memory.
768  *
769  * Notifies all layers about page destruction by calling
770  * cl_page_operations::cpo_delete() method top-to-bottom.
771  *
772  * Moves page into cl_page_state::CPS_FREEING state (this is the only place
773  * where transition to this state happens).
774  *
775  * Eliminates all venues through which new references to the page can be
776  * obtained:
777  *
778  *     - removes page from the radix trees,
779  *
780  *     - breaks linkage from VM page to cl_page.
781  *
782  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
783  * drain after some time, at which point page will be recycled.
784  *
785  * \pre  VM page is locked
786  * \post pg->cp_state == CPS_FREEING
787  *
788  * \see cl_page_operations::cpo_delete()
789  */
790 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
791 {
792         PINVRNT(env, pg, cl_page_invariant(pg));
793         ENTRY;
794         cl_page_delete0(env, pg);
795         EXIT;
796 }
797 EXPORT_SYMBOL(cl_page_delete);
798
799 /**
800  * Marks page up-to-date.
801  *
802  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
803  * layer responsible for VM interaction has to mark/clear page as up-to-date
804  * by the \a uptodate argument.
805  *
806  * \see cl_page_operations::cpo_export()
807  */
808 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
809 {
810         PINVRNT(env, pg, cl_page_invariant(pg));
811         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
812                        (const struct lu_env *,
813                         const struct cl_page_slice *, int), uptodate);
814 }
815 EXPORT_SYMBOL(cl_page_export);
816
817 /**
818  * Returns true, iff \a pg is VM locked in a suitable sense by the calling
819  * thread.
820  */
821 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
822 {
823         int result;
824         const struct cl_page_slice *slice;
825
826         ENTRY;
827         slice = container_of(pg->cp_layers.next,
828                              const struct cl_page_slice, cpl_linkage);
829         PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked != NULL);
830         /*
831          * Call ->cpo_is_vmlocked() directly instead of going through
832          * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
833          * cl_page_invariant().
834          */
835         result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
836         PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
837         RETURN(result == -EBUSY);
838 }
839 EXPORT_SYMBOL(cl_page_is_vmlocked);
840
841 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
842 {
843         ENTRY;
844         RETURN(crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN);
845 }
846
847 static void cl_page_io_start(const struct lu_env *env,
848                              struct cl_page *pg, enum cl_req_type crt)
849 {
850         /*
851          * Page is queued for IO, change its state.
852          */
853         ENTRY;
854         cl_page_owner_clear(pg);
855         cl_page_state_set(env, pg, cl_req_type_state(crt));
856         EXIT;
857 }
858
859 /**
860  * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
861  * called top-to-bottom. Every layer either agrees to submit this page (by
862  * returning 0), or requests to omit this page (by returning -EALREADY). Layer
863  * handling interactions with the VM also has to inform VM that page is under
864  * transfer now.
865  */
866 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
867                  struct cl_page *pg, enum cl_req_type crt)
868 {
869         int result;
870
871         PINVRNT(env, pg, cl_page_is_owned(pg, io));
872         PINVRNT(env, pg, cl_page_invariant(pg));
873         PINVRNT(env, pg, crt < CRT_NR);
874
875         /*
876          * XXX this has to be called bottom-to-top, so that llite can set up
877          * PG_writeback without risking other layers deciding to skip this
878          * page.
879          */
880         if (crt >= CRT_NR)
881                 return -EINVAL;
882         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
883         if (result == 0)
884                 cl_page_io_start(env, pg, crt);
885
886         KLASSERT(ergo(crt == CRT_WRITE && pg->cp_type == CPT_CACHEABLE,
887                       equi(result == 0,
888                            PageWriteback(cl_page_vmpage(pg)))));
889         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
890         return result;
891 }
892 EXPORT_SYMBOL(cl_page_prep);
893
894 /**
895  * Notify layers about transfer completion.
896  *
897  * Invoked by transfer sub-system (which is a part of osc) to notify layers
898  * that a transfer, of which this page is a part of has completed.
899  *
900  * Completion call-backs are executed in the bottom-up order, so that
901  * uppermost layer (llite), responsible for the VFS/VM interaction runs last
902  * and can release locks safely.
903  *
904  * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
905  * \post pg->cp_state == CPS_CACHED
906  *
907  * \see cl_page_operations::cpo_completion()
908  */
909 void cl_page_completion(const struct lu_env *env,
910                         struct cl_page *pg, enum cl_req_type crt, int ioret)
911 {
912         struct cl_sync_io *anchor = pg->cp_sync_io;
913
914         PASSERT(env, pg, crt < CRT_NR);
915         PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
916
917         ENTRY;
918         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
919         cl_page_state_set(env, pg, CPS_CACHED);
920         if (crt >= CRT_NR)
921                 return;
922         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
923                                (const struct lu_env *,
924                                 const struct cl_page_slice *, int), ioret);
925         if (anchor != NULL) {
926                 LASSERT(pg->cp_sync_io == anchor);
927                 pg->cp_sync_io = NULL;
928                 cl_sync_io_note(env, anchor, ioret);
929         }
930         EXIT;
931 }
932 EXPORT_SYMBOL(cl_page_completion);
933
934 /**
935  * Notify layers that transfer formation engine decided to yank this page from
936  * the cache and to make it a part of a transfer.
937  *
938  * \pre  pg->cp_state == CPS_CACHED
939  * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
940  *
941  * \see cl_page_operations::cpo_make_ready()
942  */
943 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
944                        enum cl_req_type crt)
945 {
946         int result;
947
948         PINVRNT(env, pg, crt < CRT_NR);
949
950         ENTRY;
951         if (crt >= CRT_NR)
952                 RETURN(-EINVAL);
953         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
954                                 (const struct lu_env *,
955                                  const struct cl_page_slice *));
956         if (result == 0) {
957                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
958                 cl_page_io_start(env, pg, crt);
959         }
960         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
961         RETURN(result);
962 }
963 EXPORT_SYMBOL(cl_page_make_ready);
964
965 /**
966  * Called if a pge is being written back by kernel's intention.
967  *
968  * \pre  cl_page_is_owned(pg, io)
969  * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
970  *
971  * \see cl_page_operations::cpo_flush()
972  */
973 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
974                   struct cl_page *pg)
975 {
976         int result;
977
978         PINVRNT(env, pg, cl_page_is_owned(pg, io));
979         PINVRNT(env, pg, cl_page_invariant(pg));
980
981         ENTRY;
982
983         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
984
985         CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
986         RETURN(result);
987 }
988 EXPORT_SYMBOL(cl_page_flush);
989
990 /**
991  * Tells transfer engine that only part of a page is to be transmitted.
992  *
993  * \see cl_page_operations::cpo_clip()
994  */
995 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
996                   int from, int to)
997 {
998         PINVRNT(env, pg, cl_page_invariant(pg));
999
1000         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
1001         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
1002                        (const struct lu_env *,
1003                         const struct cl_page_slice *,int, int),
1004                        from, to);
1005 }
1006 EXPORT_SYMBOL(cl_page_clip);
1007
1008 /**
1009  * Prints human readable representation of \a pg to the \a f.
1010  */
1011 void cl_page_header_print(const struct lu_env *env, void *cookie,
1012                           lu_printer_t printer, const struct cl_page *pg)
1013 {
1014         (*printer)(env, cookie,
1015                    "page@%p[%d %p %d %d %p]\n",
1016                    pg, atomic_read(&pg->cp_ref), pg->cp_obj,
1017                    pg->cp_state, pg->cp_type,
1018                    pg->cp_owner);
1019 }
1020 EXPORT_SYMBOL(cl_page_header_print);
1021
1022 /**
1023  * Prints human readable representation of \a pg to the \a f.
1024  */
1025 void cl_page_print(const struct lu_env *env, void *cookie,
1026                    lu_printer_t printer, const struct cl_page *pg)
1027 {
1028         cl_page_header_print(env, cookie, printer, pg);
1029         CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
1030                        (const struct lu_env *env,
1031                         const struct cl_page_slice *slice,
1032                         void *cookie, lu_printer_t p), cookie, printer);
1033         (*printer)(env, cookie, "end page@%p\n", pg);
1034 }
1035 EXPORT_SYMBOL(cl_page_print);
1036
1037 /**
1038  * Cancel a page which is still in a transfer.
1039  */
1040 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1041 {
1042         return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1043                               (const struct lu_env *,
1044                                const struct cl_page_slice *));
1045 }
1046
1047 /**
1048  * Converts a byte offset within object \a obj into a page index.
1049  */
1050 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1051 {
1052         return (loff_t)idx << PAGE_SHIFT;
1053 }
1054 EXPORT_SYMBOL(cl_offset);
1055
1056 /**
1057  * Converts a page index into a byte offset within object \a obj.
1058  */
1059 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1060 {
1061         return offset >> PAGE_SHIFT;
1062 }
1063 EXPORT_SYMBOL(cl_index);
1064
1065 size_t cl_page_size(const struct cl_object *obj)
1066 {
1067         return 1UL << PAGE_SHIFT;
1068 }
1069 EXPORT_SYMBOL(cl_page_size);
1070
1071 /**
1072  * Adds page slice to the compound page.
1073  *
1074  * This is called by cl_object_operations::coo_page_init() methods to add a
1075  * per-layer state to the page. New state is added at the end of
1076  * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1077  *
1078  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1079  */
1080 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1081                        struct cl_object *obj, pgoff_t index,
1082                        const struct cl_page_operations *ops)
1083 {
1084         ENTRY;
1085         list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1086         slice->cpl_obj  = obj;
1087         slice->cpl_index = index;
1088         slice->cpl_ops  = ops;
1089         slice->cpl_page = page;
1090         EXIT;
1091 }
1092 EXPORT_SYMBOL(cl_page_slice_add);
1093
1094 /**
1095  * Allocate and initialize cl_cache, called by ll_init_sbi().
1096  */
1097 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1098 {
1099         struct cl_client_cache  *cache = NULL;
1100
1101         ENTRY;
1102         OBD_ALLOC(cache, sizeof(*cache));
1103         if (cache == NULL)
1104                 RETURN(NULL);
1105
1106         /* Initialize cache data */
1107         atomic_set(&cache->ccc_users, 1);
1108         cache->ccc_lru_max = lru_page_max;
1109         atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1110         spin_lock_init(&cache->ccc_lru_lock);
1111         INIT_LIST_HEAD(&cache->ccc_lru);
1112
1113         /* turn unstable check off by default as it impacts performance */
1114         cache->ccc_unstable_check = 0;
1115         atomic_long_set(&cache->ccc_unstable_nr, 0);
1116         init_waitqueue_head(&cache->ccc_unstable_waitq);
1117
1118         RETURN(cache);
1119 }
1120 EXPORT_SYMBOL(cl_cache_init);
1121
1122 /**
1123  * Increase cl_cache refcount
1124  */
1125 void cl_cache_incref(struct cl_client_cache *cache)
1126 {
1127         atomic_inc(&cache->ccc_users);
1128 }
1129 EXPORT_SYMBOL(cl_cache_incref);
1130
1131 /**
1132  * Decrease cl_cache refcount and free the cache if refcount=0.
1133  * Since llite, lov and osc all hold cl_cache refcount,
1134  * the free will not cause race. (LU-6173)
1135  */
1136 void cl_cache_decref(struct cl_client_cache *cache)
1137 {
1138         if (atomic_dec_and_test(&cache->ccc_users))
1139                 OBD_FREE(cache, sizeof(*cache));
1140 }
1141 EXPORT_SYMBOL(cl_cache_decref);