Whamcloud - gitweb
LU-6854 llite: Do not set writeback for sync write pages
[fs/lustre-release.git] / lustre / obdclass / cl_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * Client Lustre Page.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_CLASS
39
40 #include <linux/list.h>
41 #include <libcfs/libcfs.h>
42 #include <obd_class.h>
43 #include <obd_support.h>
44
45 #include <cl_object.h>
46 #include "cl_internal.h"
47
48 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
49
50 #ifdef LIBCFS_DEBUG
51 # define PASSERT(env, page, expr)                                       \
52   do {                                                                    \
53           if (unlikely(!(expr))) {                                      \
54                   CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n");    \
55                   LASSERT(0);                                           \
56           }                                                             \
57   } while (0)
58 #else /* !LIBCFS_DEBUG */
59 # define PASSERT(env, page, exp) \
60         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
61 #endif /* !LIBCFS_DEBUG */
62
63 #ifdef CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK
64 # define PINVRNT(env, page, expr)                                       \
65   do {                                                                    \
66           if (unlikely(!(expr))) {                                      \
67                   CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n");    \
68                   LINVRNT(0);                                           \
69           }                                                             \
70   } while (0)
71 #else /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
72 # define PINVRNT(env, page, exp) \
73          ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
74 #endif /* !CONFIG_LUSTRE_DEBUG_EXPENSIVE_CHECK */
75
76 /* Disable page statistic by default due to huge performance penalty. */
77 #ifdef CONFIG_DEBUG_PAGESTATE_TRACKING
78 #define CS_PAGE_INC(o, item) \
79         atomic_inc(&cl_object_site(o)->cs_pages.cs_stats[CS_##item])
80 #define CS_PAGE_DEC(o, item) \
81         atomic_dec(&cl_object_site(o)->cs_pages.cs_stats[CS_##item])
82 #define CS_PAGESTATE_INC(o, state) \
83         atomic_inc(&cl_object_site(o)->cs_pages_state[state])
84 #define CS_PAGESTATE_DEC(o, state) \
85         atomic_dec(&cl_object_site(o)->cs_pages_state[state])
86 #else
87 #define CS_PAGE_INC(o, item)
88 #define CS_PAGE_DEC(o, item)
89 #define CS_PAGESTATE_INC(o, state)
90 #define CS_PAGESTATE_DEC(o, state)
91 #endif
92
93 /**
94  * Internal version of cl_page_get().
95  *
96  * This function can be used to obtain initial reference to previously
97  * unreferenced cached object. It can be called only if concurrent page
98  * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
99  * associated with \a page.
100  *
101  * Use with care! Not exported.
102  */
103 static void cl_page_get_trust(struct cl_page *page)
104 {
105         LASSERT(atomic_read(&page->cp_ref) > 0);
106         atomic_inc(&page->cp_ref);
107 }
108
109 /**
110  * Returns a slice within a page, corresponding to the given layer in the
111  * device stack.
112  *
113  * \see cl_lock_at()
114  */
115 static const struct cl_page_slice *
116 cl_page_at_trusted(const struct cl_page *page,
117                    const struct lu_device_type *dtype)
118 {
119         const struct cl_page_slice *slice;
120         ENTRY;
121
122         list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
123                 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
124                         RETURN(slice);
125         }
126         RETURN(NULL);
127 }
128
129 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
130 {
131         struct cl_object *obj  = page->cp_obj;
132         int pagesize = cl_object_header(obj)->coh_page_bufsize;
133
134         PASSERT(env, page, list_empty(&page->cp_batch));
135         PASSERT(env, page, page->cp_owner == NULL);
136         PASSERT(env, page, page->cp_state == CPS_FREEING);
137
138         ENTRY;
139         while (!list_empty(&page->cp_layers)) {
140                 struct cl_page_slice *slice;
141
142                 slice = list_entry(page->cp_layers.next,
143                                    struct cl_page_slice, cpl_linkage);
144                 list_del_init(page->cp_layers.next);
145                 if (unlikely(slice->cpl_ops->cpo_fini != NULL))
146                         slice->cpl_ops->cpo_fini(env, slice);
147         }
148         CS_PAGE_DEC(obj, total);
149         CS_PAGESTATE_DEC(obj, page->cp_state);
150         lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
151         cl_object_put(env, obj);
152         lu_ref_fini(&page->cp_reference);
153         OBD_FREE(page, pagesize);
154         EXIT;
155 }
156
157 /**
158  * Helper function updating page state. This is the only place in the code
159  * where cl_page::cp_state field is mutated.
160  */
161 static inline void cl_page_state_set_trust(struct cl_page *page,
162                                            enum cl_page_state state)
163 {
164         /* bypass const. */
165         *(enum cl_page_state *)&page->cp_state = state;
166 }
167
168 struct cl_page *cl_page_alloc(const struct lu_env *env,
169                 struct cl_object *o, pgoff_t ind, struct page *vmpage,
170                 enum cl_page_type type)
171 {
172         struct cl_page          *page;
173         struct lu_object_header *head;
174
175         ENTRY;
176         OBD_ALLOC_GFP(page, cl_object_header(o)->coh_page_bufsize,
177                         GFP_NOFS);
178         if (page != NULL) {
179                 int result = 0;
180                 atomic_set(&page->cp_ref, 1);
181                 page->cp_obj = o;
182                 cl_object_get(o);
183                 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
184                                      page);
185                 page->cp_vmpage = vmpage;
186                 cl_page_state_set_trust(page, CPS_CACHED);
187                 page->cp_type = type;
188                 INIT_LIST_HEAD(&page->cp_layers);
189                 INIT_LIST_HEAD(&page->cp_batch);
190                 lu_ref_init(&page->cp_reference);
191                 head = o->co_lu.lo_header;
192                 list_for_each_entry(o, &head->loh_layers,
193                                     co_lu.lo_linkage) {
194                         if (o->co_ops->coo_page_init != NULL) {
195                                 result = o->co_ops->coo_page_init(env, o, page,
196                                                                   ind);
197                                 if (result != 0) {
198                                         cl_page_delete0(env, page);
199                                         cl_page_free(env, page);
200                                         page = ERR_PTR(result);
201                                         break;
202                                 }
203                         }
204                 }
205                 if (result == 0) {
206                         CS_PAGE_INC(o, total);
207                         CS_PAGE_INC(o, create);
208                         CS_PAGESTATE_DEC(o, CPS_CACHED);
209                 }
210         } else {
211                 page = ERR_PTR(-ENOMEM);
212         }
213         RETURN(page);
214 }
215
216 /**
217  * Returns a cl_page with index \a idx at the object \a o, and associated with
218  * the VM page \a vmpage.
219  *
220  * This is the main entry point into the cl_page caching interface. First, a
221  * cache (implemented as a per-object radix tree) is consulted. If page is
222  * found there, it is returned immediately. Otherwise new page is allocated
223  * and returned. In any case, additional reference to page is acquired.
224  *
225  * \see cl_object_find(), cl_lock_find()
226  */
227 struct cl_page *cl_page_find(const struct lu_env *env,
228                              struct cl_object *o,
229                              pgoff_t idx, struct page *vmpage,
230                              enum cl_page_type type)
231 {
232         struct cl_page          *page = NULL;
233         struct cl_object_header *hdr;
234
235         LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
236         might_sleep();
237
238         ENTRY;
239
240         hdr = cl_object_header(o);
241         CS_PAGE_INC(o, lookup);
242
243         CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
244                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
245         /* fast path. */
246         if (type == CPT_CACHEABLE) {
247                 /* vmpage lock is used to protect the child/parent
248                  * relationship */
249                 KLASSERT(PageLocked(vmpage));
250                 /*
251                  * cl_vmpage_page() can be called here without any locks as
252                  *
253                  *     - "vmpage" is locked (which prevents ->private from
254                  *       concurrent updates), and
255                  *
256                  *     - "o" cannot be destroyed while current thread holds a
257                  *       reference on it.
258                  */
259                 page = cl_vmpage_page(vmpage, o);
260                 if (page != NULL) {
261                         CS_PAGE_INC(o, hit);
262                         RETURN(page);
263                 }
264         }
265
266         /* allocate and initialize cl_page */
267         page = cl_page_alloc(env, o, idx, vmpage, type);
268         RETURN(page);
269 }
270 EXPORT_SYMBOL(cl_page_find);
271
272 static inline int cl_page_invariant(const struct cl_page *pg)
273 {
274         return cl_page_in_use_noref(pg);
275 }
276
277 static void cl_page_state_set0(const struct lu_env *env,
278                                struct cl_page *page, enum cl_page_state state)
279 {
280         enum cl_page_state old;
281
282         /*
283          * Matrix of allowed state transitions [old][new], for sanity
284          * checking.
285          */
286         static const int allowed_transitions[CPS_NR][CPS_NR] = {
287                 [CPS_CACHED] = {
288                         [CPS_CACHED]  = 0,
289                         [CPS_OWNED]   = 1, /* io finds existing cached page */
290                         [CPS_PAGEIN]  = 0,
291                         [CPS_PAGEOUT] = 1, /* write-out from the cache */
292                         [CPS_FREEING] = 1, /* eviction on the memory pressure */
293                 },
294                 [CPS_OWNED] = {
295                         [CPS_CACHED]  = 1, /* release to the cache */
296                         [CPS_OWNED]   = 0,
297                         [CPS_PAGEIN]  = 1, /* start read immediately */
298                         [CPS_PAGEOUT] = 1, /* start write immediately */
299                         [CPS_FREEING] = 1, /* lock invalidation or truncate */
300                 },
301                 [CPS_PAGEIN] = {
302                         [CPS_CACHED]  = 1, /* io completion */
303                         [CPS_OWNED]   = 0,
304                         [CPS_PAGEIN]  = 0,
305                         [CPS_PAGEOUT] = 0,
306                         [CPS_FREEING] = 0,
307                 },
308                 [CPS_PAGEOUT] = {
309                         [CPS_CACHED]  = 1, /* io completion */
310                         [CPS_OWNED]   = 0,
311                         [CPS_PAGEIN]  = 0,
312                         [CPS_PAGEOUT] = 0,
313                         [CPS_FREEING] = 0,
314                 },
315                 [CPS_FREEING] = {
316                         [CPS_CACHED]  = 0,
317                         [CPS_OWNED]   = 0,
318                         [CPS_PAGEIN]  = 0,
319                         [CPS_PAGEOUT] = 0,
320                         [CPS_FREEING] = 0,
321                 }
322         };
323
324         ENTRY;
325         old = page->cp_state;
326         PASSERT(env, page, allowed_transitions[old][state]);
327         CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
328         PASSERT(env, page, page->cp_state == old);
329         PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner != NULL));
330
331         CS_PAGESTATE_DEC(page->cp_obj, page->cp_state);
332         CS_PAGESTATE_INC(page->cp_obj, state);
333         cl_page_state_set_trust(page, state);
334         EXIT;
335 }
336
337 static void cl_page_state_set(const struct lu_env *env,
338                               struct cl_page *page, enum cl_page_state state)
339 {
340         cl_page_state_set0(env, page, state);
341 }
342
343 /**
344  * Acquires an additional reference to a page.
345  *
346  * This can be called only by caller already possessing a reference to \a
347  * page.
348  *
349  * \see cl_object_get(), cl_lock_get().
350  */
351 void cl_page_get(struct cl_page *page)
352 {
353         ENTRY;
354         cl_page_get_trust(page);
355         EXIT;
356 }
357 EXPORT_SYMBOL(cl_page_get);
358
359 /**
360  * Releases a reference to a page.
361  *
362  * When last reference is released, page is returned to the cache, unless it
363  * is in cl_page_state::CPS_FREEING state, in which case it is immediately
364  * destroyed.
365  *
366  * \see cl_object_put(), cl_lock_put().
367  */
368 void cl_page_put(const struct lu_env *env, struct cl_page *page)
369 {
370         ENTRY;
371         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
372                        atomic_read(&page->cp_ref));
373
374         if (atomic_dec_and_test(&page->cp_ref)) {
375                 LASSERT(page->cp_state == CPS_FREEING);
376
377                 LASSERT(atomic_read(&page->cp_ref) == 0);
378                 PASSERT(env, page, page->cp_owner == NULL);
379                 PASSERT(env, page, list_empty(&page->cp_batch));
380                 /*
381                  * Page is no longer reachable by other threads. Tear
382                  * it down.
383                  */
384                 cl_page_free(env, page);
385         }
386
387         EXIT;
388 }
389 EXPORT_SYMBOL(cl_page_put);
390
391 /**
392  * Returns a cl_page associated with a VM page, and given cl_object.
393  */
394 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
395 {
396         struct cl_page *page;
397
398         ENTRY;
399         KLASSERT(PageLocked(vmpage));
400
401         /*
402          * NOTE: absence of races and liveness of data are guaranteed by page
403          *       lock on a "vmpage". That works because object destruction has
404          *       bottom-to-top pass.
405          */
406
407         page = (struct cl_page *)vmpage->private;
408         if (page != NULL) {
409                 cl_page_get_trust(page);
410                 LASSERT(page->cp_type == CPT_CACHEABLE);
411         }
412         RETURN(page);
413 }
414 EXPORT_SYMBOL(cl_vmpage_page);
415
416 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
417                                        const struct lu_device_type *dtype)
418 {
419         return cl_page_at_trusted(page, dtype);
420 }
421 EXPORT_SYMBOL(cl_page_at);
422
423 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
424
425 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)                   \
426 ({                                                                      \
427         const struct lu_env        *__env  = (_env);                    \
428         struct cl_page             *__page = (_page);                   \
429         const struct cl_page_slice *__scan;                             \
430         int                         __result;                           \
431         ptrdiff_t                   __op   = (_op);                     \
432         int                        (*__method)_proto;                   \
433                                                                         \
434         __result = 0;                                                   \
435         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
436                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
437                 if (__method != NULL) {                                 \
438                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
439                         if (__result != 0)                              \
440                                 break;                                  \
441                 }                                                       \
442         }                                                               \
443         if (__result > 0)                                               \
444                 __result = 0;                                           \
445         __result;                                                       \
446 })
447
448 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)                   \
449 do {                                                                    \
450         const struct lu_env        *__env  = (_env);                    \
451         struct cl_page             *__page = (_page);                   \
452         const struct cl_page_slice *__scan;                             \
453         ptrdiff_t                   __op   = (_op);                     \
454         void                      (*__method)_proto;                    \
455                                                                         \
456         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
457                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
458                 if (__method != NULL)                                   \
459                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
460         }                                                               \
461 } while (0)
462
463 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)           \
464 do {                                                                    \
465         const struct lu_env        *__env  = (_env);                    \
466         struct cl_page             *__page = (_page);                   \
467         const struct cl_page_slice *__scan;                             \
468         ptrdiff_t                   __op   = (_op);                     \
469         void                      (*__method)_proto;                    \
470                                                                         \
471         /* get to the bottom page. */                                   \
472         list_for_each_entry_reverse(__scan, &__page->cp_layers,         \
473                                     cpl_linkage) {                      \
474                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
475                 if (__method != NULL)                                   \
476                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
477         }                                                               \
478 } while (0)
479
480 static int cl_page_invoke(const struct lu_env *env,
481                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
482
483 {
484         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
485         ENTRY;
486         RETURN(CL_PAGE_INVOKE(env, page, op,
487                               (const struct lu_env *,
488                                const struct cl_page_slice *, struct cl_io *),
489                               io));
490 }
491
492 static void cl_page_invoid(const struct lu_env *env,
493                            struct cl_io *io, struct cl_page *page, ptrdiff_t op)
494
495 {
496         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
497         ENTRY;
498         CL_PAGE_INVOID(env, page, op,
499                        (const struct lu_env *,
500                         const struct cl_page_slice *, struct cl_io *), io);
501         EXIT;
502 }
503
504 static void cl_page_owner_clear(struct cl_page *page)
505 {
506         ENTRY;
507         if (page->cp_owner != NULL) {
508                 LASSERT(page->cp_owner->ci_owned_nr > 0);
509                 page->cp_owner->ci_owned_nr--;
510                 page->cp_owner = NULL;
511         }
512         EXIT;
513 }
514
515 static void cl_page_owner_set(struct cl_page *page)
516 {
517         ENTRY;
518         LASSERT(page->cp_owner != NULL);
519         page->cp_owner->ci_owned_nr++;
520         EXIT;
521 }
522
523 void cl_page_disown0(const struct lu_env *env,
524                      struct cl_io *io, struct cl_page *pg)
525 {
526         enum cl_page_state state;
527
528         ENTRY;
529         state = pg->cp_state;
530         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
531         PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
532         cl_page_owner_clear(pg);
533
534         if (state == CPS_OWNED)
535                 cl_page_state_set(env, pg, CPS_CACHED);
536         /*
537          * Completion call-backs are executed in the bottom-up order, so that
538          * uppermost layer (llite), responsible for VFS/VM interaction runs
539          * last and can release locks safely.
540          */
541         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
542                                (const struct lu_env *,
543                                 const struct cl_page_slice *, struct cl_io *),
544                                io);
545         EXIT;
546 }
547
548 /**
549  * returns true, iff page is owned by the given io.
550  */
551 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
552 {
553         struct cl_io *top = cl_io_top((struct cl_io *)io);
554         LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
555         ENTRY;
556         RETURN(pg->cp_state == CPS_OWNED && pg->cp_owner == top);
557 }
558 EXPORT_SYMBOL(cl_page_is_owned);
559
560 /**
561  * Try to own a page by IO.
562  *
563  * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
564  * into cl_page_state::CPS_OWNED state.
565  *
566  * \pre  !cl_page_is_owned(pg, io)
567  * \post result == 0 iff cl_page_is_owned(pg, io)
568  *
569  * \retval 0   success
570  *
571  * \retval -ve failure, e.g., page was destroyed (and landed in
572  *             cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
573  *             or, page was owned by another thread, or in IO.
574  *
575  * \see cl_page_disown()
576  * \see cl_page_operations::cpo_own()
577  * \see cl_page_own_try()
578  * \see cl_page_own
579  */
580 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
581                         struct cl_page *pg, int nonblock)
582 {
583         int result;
584
585         PINVRNT(env, pg, !cl_page_is_owned(pg, io));
586
587         ENTRY;
588         io = cl_io_top(io);
589
590         if (pg->cp_state == CPS_FREEING) {
591                 result = -ENOENT;
592         } else {
593                 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
594                                         (const struct lu_env *,
595                                          const struct cl_page_slice *,
596                                          struct cl_io *, int),
597                                         io, nonblock);
598                 if (result == 0) {
599                         PASSERT(env, pg, pg->cp_owner == NULL);
600                         pg->cp_owner = cl_io_top(io);;
601                         cl_page_owner_set(pg);
602                         if (pg->cp_state != CPS_FREEING) {
603                                 cl_page_state_set(env, pg, CPS_OWNED);
604                         } else {
605                                 cl_page_disown0(env, io, pg);
606                                 result = -ENOENT;
607                         }
608                 }
609         }
610         PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
611         RETURN(result);
612 }
613
614 /**
615  * Own a page, might be blocked.
616  *
617  * \see cl_page_own0()
618  */
619 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
620 {
621         return cl_page_own0(env, io, pg, 0);
622 }
623 EXPORT_SYMBOL(cl_page_own);
624
625 /**
626  * Nonblock version of cl_page_own().
627  *
628  * \see cl_page_own0()
629  */
630 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
631                     struct cl_page *pg)
632 {
633         return cl_page_own0(env, io, pg, 1);
634 }
635 EXPORT_SYMBOL(cl_page_own_try);
636
637
638 /**
639  * Assume page ownership.
640  *
641  * Called when page is already locked by the hosting VM.
642  *
643  * \pre !cl_page_is_owned(pg, io)
644  * \post cl_page_is_owned(pg, io)
645  *
646  * \see cl_page_operations::cpo_assume()
647  */
648 void cl_page_assume(const struct lu_env *env,
649                     struct cl_io *io, struct cl_page *pg)
650 {
651         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
652
653         ENTRY;
654         io = cl_io_top(io);
655
656         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
657         PASSERT(env, pg, pg->cp_owner == NULL);
658         pg->cp_owner = cl_io_top(io);
659         cl_page_owner_set(pg);
660         cl_page_state_set(env, pg, CPS_OWNED);
661         EXIT;
662 }
663 EXPORT_SYMBOL(cl_page_assume);
664
665 /**
666  * Releases page ownership without unlocking the page.
667  *
668  * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
669  * underlying VM page (as VM is supposed to do this itself).
670  *
671  * \pre   cl_page_is_owned(pg, io)
672  * \post !cl_page_is_owned(pg, io)
673  *
674  * \see cl_page_assume()
675  */
676 void cl_page_unassume(const struct lu_env *env,
677                       struct cl_io *io, struct cl_page *pg)
678 {
679         PINVRNT(env, pg, cl_page_is_owned(pg, io));
680         PINVRNT(env, pg, cl_page_invariant(pg));
681
682         ENTRY;
683         io = cl_io_top(io);
684         cl_page_owner_clear(pg);
685         cl_page_state_set(env, pg, CPS_CACHED);
686         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
687                                (const struct lu_env *,
688                                 const struct cl_page_slice *, struct cl_io *),
689                                io);
690         EXIT;
691 }
692 EXPORT_SYMBOL(cl_page_unassume);
693
694 /**
695  * Releases page ownership.
696  *
697  * Moves page into cl_page_state::CPS_CACHED.
698  *
699  * \pre   cl_page_is_owned(pg, io)
700  * \post !cl_page_is_owned(pg, io)
701  *
702  * \see cl_page_own()
703  * \see cl_page_operations::cpo_disown()
704  */
705 void cl_page_disown(const struct lu_env *env,
706                     struct cl_io *io, struct cl_page *pg)
707 {
708         PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
709                 pg->cp_state == CPS_FREEING);
710
711         ENTRY;
712         io = cl_io_top(io);
713         cl_page_disown0(env, io, pg);
714         EXIT;
715 }
716 EXPORT_SYMBOL(cl_page_disown);
717
718 /**
719  * Called when page is to be removed from the object, e.g., as a result of
720  * truncate.
721  *
722  * Calls cl_page_operations::cpo_discard() top-to-bottom.
723  *
724  * \pre cl_page_is_owned(pg, io)
725  *
726  * \see cl_page_operations::cpo_discard()
727  */
728 void cl_page_discard(const struct lu_env *env,
729                      struct cl_io *io, struct cl_page *pg)
730 {
731         PINVRNT(env, pg, cl_page_is_owned(pg, io));
732         PINVRNT(env, pg, cl_page_invariant(pg));
733
734         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
735 }
736 EXPORT_SYMBOL(cl_page_discard);
737
738 /**
739  * Version of cl_page_delete() that can be called for not fully constructed
740  * pages, e.g. in an error handling cl_page_find()->cl_page_delete0()
741  * path. Doesn't check page invariant.
742  */
743 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
744 {
745         ENTRY;
746
747         PASSERT(env, pg, pg->cp_state != CPS_FREEING);
748
749         /*
750          * Severe all ways to obtain new pointers to @pg.
751          */
752         cl_page_owner_clear(pg);
753
754         cl_page_state_set0(env, pg, CPS_FREEING);
755
756         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
757                        (const struct lu_env *, const struct cl_page_slice *));
758
759         EXIT;
760 }
761
762 /**
763  * Called when a decision is made to throw page out of memory.
764  *
765  * Notifies all layers about page destruction by calling
766  * cl_page_operations::cpo_delete() method top-to-bottom.
767  *
768  * Moves page into cl_page_state::CPS_FREEING state (this is the only place
769  * where transition to this state happens).
770  *
771  * Eliminates all venues through which new references to the page can be
772  * obtained:
773  *
774  *     - removes page from the radix trees,
775  *
776  *     - breaks linkage from VM page to cl_page.
777  *
778  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
779  * drain after some time, at which point page will be recycled.
780  *
781  * \pre  VM page is locked
782  * \post pg->cp_state == CPS_FREEING
783  *
784  * \see cl_page_operations::cpo_delete()
785  */
786 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
787 {
788         PINVRNT(env, pg, cl_page_invariant(pg));
789         ENTRY;
790         cl_page_delete0(env, pg);
791         EXIT;
792 }
793 EXPORT_SYMBOL(cl_page_delete);
794
795 /**
796  * Marks page up-to-date.
797  *
798  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
799  * layer responsible for VM interaction has to mark/clear page as up-to-date
800  * by the \a uptodate argument.
801  *
802  * \see cl_page_operations::cpo_export()
803  */
804 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
805 {
806         PINVRNT(env, pg, cl_page_invariant(pg));
807         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
808                        (const struct lu_env *,
809                         const struct cl_page_slice *, int), uptodate);
810 }
811 EXPORT_SYMBOL(cl_page_export);
812
813 /**
814  * Returns true, iff \a pg is VM locked in a suitable sense by the calling
815  * thread.
816  */
817 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
818 {
819         int result;
820         const struct cl_page_slice *slice;
821
822         ENTRY;
823         slice = container_of(pg->cp_layers.next,
824                              const struct cl_page_slice, cpl_linkage);
825         PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked != NULL);
826         /*
827          * Call ->cpo_is_vmlocked() directly instead of going through
828          * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
829          * cl_page_invariant().
830          */
831         result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
832         PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
833         RETURN(result == -EBUSY);
834 }
835 EXPORT_SYMBOL(cl_page_is_vmlocked);
836
837 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
838 {
839         ENTRY;
840         RETURN(crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN);
841 }
842
843 static void cl_page_io_start(const struct lu_env *env,
844                              struct cl_page *pg, enum cl_req_type crt)
845 {
846         /*
847          * Page is queued for IO, change its state.
848          */
849         ENTRY;
850         cl_page_owner_clear(pg);
851         cl_page_state_set(env, pg, cl_req_type_state(crt));
852         EXIT;
853 }
854
855 /**
856  * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
857  * called top-to-bottom. Every layer either agrees to submit this page (by
858  * returning 0), or requests to omit this page (by returning -EALREADY). Layer
859  * handling interactions with the VM also has to inform VM that page is under
860  * transfer now.
861  */
862 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
863                  struct cl_page *pg, enum cl_req_type crt)
864 {
865         int result;
866
867         PINVRNT(env, pg, cl_page_is_owned(pg, io));
868         PINVRNT(env, pg, cl_page_invariant(pg));
869         PINVRNT(env, pg, crt < CRT_NR);
870
871         /*
872          * XXX this has to be called bottom-to-top, so that llite can set up
873          * PG_writeback without risking other layers deciding to skip this
874          * page.
875          */
876         if (crt >= CRT_NR)
877                 return -EINVAL;
878         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
879         if (result == 0)
880                 cl_page_io_start(env, pg, crt);
881
882         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
883         return result;
884 }
885 EXPORT_SYMBOL(cl_page_prep);
886
887 /**
888  * Notify layers about transfer completion.
889  *
890  * Invoked by transfer sub-system (which is a part of osc) to notify layers
891  * that a transfer, of which this page is a part of has completed.
892  *
893  * Completion call-backs are executed in the bottom-up order, so that
894  * uppermost layer (llite), responsible for the VFS/VM interaction runs last
895  * and can release locks safely.
896  *
897  * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
898  * \post pg->cp_state == CPS_CACHED
899  *
900  * \see cl_page_operations::cpo_completion()
901  */
902 void cl_page_completion(const struct lu_env *env,
903                         struct cl_page *pg, enum cl_req_type crt, int ioret)
904 {
905         struct cl_sync_io *anchor = pg->cp_sync_io;
906
907         PASSERT(env, pg, crt < CRT_NR);
908         PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
909
910         ENTRY;
911         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
912         cl_page_state_set(env, pg, CPS_CACHED);
913         if (crt >= CRT_NR)
914                 return;
915         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
916                                (const struct lu_env *,
917                                 const struct cl_page_slice *, int), ioret);
918         if (anchor != NULL) {
919                 LASSERT(pg->cp_sync_io == anchor);
920                 pg->cp_sync_io = NULL;
921                 cl_sync_io_note(env, anchor, ioret);
922         }
923         EXIT;
924 }
925 EXPORT_SYMBOL(cl_page_completion);
926
927 /**
928  * Notify layers that transfer formation engine decided to yank this page from
929  * the cache and to make it a part of a transfer.
930  *
931  * \pre  pg->cp_state == CPS_CACHED
932  * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
933  *
934  * \see cl_page_operations::cpo_make_ready()
935  */
936 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
937                        enum cl_req_type crt)
938 {
939         int result;
940
941         PINVRNT(env, pg, crt < CRT_NR);
942
943         ENTRY;
944         if (crt >= CRT_NR)
945                 RETURN(-EINVAL);
946         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
947                                 (const struct lu_env *,
948                                  const struct cl_page_slice *));
949         if (result == 0) {
950                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
951                 cl_page_io_start(env, pg, crt);
952         }
953         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
954         RETURN(result);
955 }
956 EXPORT_SYMBOL(cl_page_make_ready);
957
958 /**
959  * Called if a pge is being written back by kernel's intention.
960  *
961  * \pre  cl_page_is_owned(pg, io)
962  * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
963  *
964  * \see cl_page_operations::cpo_flush()
965  */
966 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
967                   struct cl_page *pg)
968 {
969         int result;
970
971         PINVRNT(env, pg, cl_page_is_owned(pg, io));
972         PINVRNT(env, pg, cl_page_invariant(pg));
973
974         ENTRY;
975
976         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
977
978         CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
979         RETURN(result);
980 }
981 EXPORT_SYMBOL(cl_page_flush);
982
983 /**
984  * Tells transfer engine that only part of a page is to be transmitted.
985  *
986  * \see cl_page_operations::cpo_clip()
987  */
988 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
989                   int from, int to)
990 {
991         PINVRNT(env, pg, cl_page_invariant(pg));
992
993         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
994         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
995                        (const struct lu_env *,
996                         const struct cl_page_slice *,int, int),
997                        from, to);
998 }
999 EXPORT_SYMBOL(cl_page_clip);
1000
1001 /**
1002  * Prints human readable representation of \a pg to the \a f.
1003  */
1004 void cl_page_header_print(const struct lu_env *env, void *cookie,
1005                           lu_printer_t printer, const struct cl_page *pg)
1006 {
1007         (*printer)(env, cookie,
1008                    "page@%p[%d %p %d %d %p]\n",
1009                    pg, atomic_read(&pg->cp_ref), pg->cp_obj,
1010                    pg->cp_state, pg->cp_type,
1011                    pg->cp_owner);
1012 }
1013 EXPORT_SYMBOL(cl_page_header_print);
1014
1015 /**
1016  * Prints human readable representation of \a pg to the \a f.
1017  */
1018 void cl_page_print(const struct lu_env *env, void *cookie,
1019                    lu_printer_t printer, const struct cl_page *pg)
1020 {
1021         cl_page_header_print(env, cookie, printer, pg);
1022         CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
1023                        (const struct lu_env *env,
1024                         const struct cl_page_slice *slice,
1025                         void *cookie, lu_printer_t p), cookie, printer);
1026         (*printer)(env, cookie, "end page@%p\n", pg);
1027 }
1028 EXPORT_SYMBOL(cl_page_print);
1029
1030 /**
1031  * Cancel a page which is still in a transfer.
1032  */
1033 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1034 {
1035         return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1036                               (const struct lu_env *,
1037                                const struct cl_page_slice *));
1038 }
1039
1040 /**
1041  * Converts a byte offset within object \a obj into a page index.
1042  */
1043 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1044 {
1045         return (loff_t)idx << PAGE_SHIFT;
1046 }
1047 EXPORT_SYMBOL(cl_offset);
1048
1049 /**
1050  * Converts a page index into a byte offset within object \a obj.
1051  */
1052 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1053 {
1054         return offset >> PAGE_SHIFT;
1055 }
1056 EXPORT_SYMBOL(cl_index);
1057
1058 size_t cl_page_size(const struct cl_object *obj)
1059 {
1060         return 1UL << PAGE_SHIFT;
1061 }
1062 EXPORT_SYMBOL(cl_page_size);
1063
1064 /**
1065  * Adds page slice to the compound page.
1066  *
1067  * This is called by cl_object_operations::coo_page_init() methods to add a
1068  * per-layer state to the page. New state is added at the end of
1069  * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1070  *
1071  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1072  */
1073 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1074                        struct cl_object *obj, pgoff_t index,
1075                        const struct cl_page_operations *ops)
1076 {
1077         ENTRY;
1078         list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1079         slice->cpl_obj  = obj;
1080         slice->cpl_index = index;
1081         slice->cpl_ops  = ops;
1082         slice->cpl_page = page;
1083         EXIT;
1084 }
1085 EXPORT_SYMBOL(cl_page_slice_add);
1086
1087 /**
1088  * Allocate and initialize cl_cache, called by ll_init_sbi().
1089  */
1090 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1091 {
1092         struct cl_client_cache  *cache = NULL;
1093
1094         ENTRY;
1095         OBD_ALLOC(cache, sizeof(*cache));
1096         if (cache == NULL)
1097                 RETURN(NULL);
1098
1099         /* Initialize cache data */
1100         atomic_set(&cache->ccc_users, 1);
1101         cache->ccc_lru_max = lru_page_max;
1102         atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1103         spin_lock_init(&cache->ccc_lru_lock);
1104         INIT_LIST_HEAD(&cache->ccc_lru);
1105
1106         /* turn unstable check off by default as it impacts performance */
1107         cache->ccc_unstable_check = 0;
1108         atomic_long_set(&cache->ccc_unstable_nr, 0);
1109         init_waitqueue_head(&cache->ccc_unstable_waitq);
1110
1111         RETURN(cache);
1112 }
1113 EXPORT_SYMBOL(cl_cache_init);
1114
1115 /**
1116  * Increase cl_cache refcount
1117  */
1118 void cl_cache_incref(struct cl_client_cache *cache)
1119 {
1120         atomic_inc(&cache->ccc_users);
1121 }
1122 EXPORT_SYMBOL(cl_cache_incref);
1123
1124 /**
1125  * Decrease cl_cache refcount and free the cache if refcount=0.
1126  * Since llite, lov and osc all hold cl_cache refcount,
1127  * the free will not cause race. (LU-6173)
1128  */
1129 void cl_cache_decref(struct cl_client_cache *cache)
1130 {
1131         if (atomic_dec_and_test(&cache->ccc_users))
1132                 OBD_FREE(cache, sizeof(*cache));
1133 }
1134 EXPORT_SYMBOL(cl_cache_decref);