-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
*/
#define DEBUG_SUBSYSTEM S_CLASS
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
#include <libcfs/libcfs.h>
#include <obd_class.h>
/**
* Internal version of cl_page_top, it should be called with page referenced,
- * or coh_page_guard held.
+ * or cp_lock held.
*/
static struct cl_page *cl_page_top_trusted(struct cl_page *page)
{
const struct cl_page_slice *slice;
#ifdef INVARIANT_CHECK
- struct cl_object_header *ch = cl_object_header(page->cp_obj);
-
if (!cfs_atomic_read(&page->cp_ref))
- LASSERT_SPIN_LOCKED(&ch->coh_page_guard);
+ LASSERT_SPIN_LOCKED(&page->cp_lock);
#endif
ENTRY;
*
* Gang tree lookup (radix_tree_gang_lookup()) optimization is absolutely
* crucial in the face of [offset, EOF] locks.
+ *
+ * Return at least one page in @queue unless there is no covered page.
*/
-void cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
- struct cl_io *io, pgoff_t start, pgoff_t end,
- struct cl_page_list *queue, int nonblock,
- int *resched)
+int cl_page_gang_lookup(const struct lu_env *env, struct cl_object *obj,
+ struct cl_io *io, pgoff_t start, pgoff_t end,
+ cl_page_gang_cb_t cb, void *cbdata)
{
struct cl_object_header *hdr;
struct cl_page *page;
unsigned int nr;
unsigned int i;
unsigned int j;
- int (*page_own)(const struct lu_env *env,
- struct cl_io *io,
- struct cl_page *pg);
+ int res = CLP_GANG_OKAY;
+ int tree_lock = 1;
ENTRY;
- if (resched != NULL)
- *resched = 0;
- page_own = nonblock ? cl_page_own_try : cl_page_own;
-
idx = start;
hdr = cl_object_header(obj);
pvec = cl_env_info(env)->clt_pvec;
cfs_spin_lock(&hdr->coh_page_guard);
while ((nr = radix_tree_gang_lookup(&hdr->coh_tree, (void **)pvec,
idx, CLT_PVEC_SIZE)) > 0) {
+ int end_of_region = 0;
idx = pvec[nr - 1]->cp_index + 1;
for (i = 0, j = 0; i < nr; ++i) {
page = pvec[i];
pvec[i] = NULL;
LASSERT(page->cp_type == CPT_CACHEABLE);
- if (page->cp_index > end)
+ if (page->cp_index > end) {
+ end_of_region = 1;
break;
+ }
if (page->cp_state == CPS_FREEING)
continue;
*/
cl_page_get_trust(page);
lu_ref_add_atomic(&page->cp_reference,
- "page_list", cfs_current());
+ "gang_lookup", cfs_current());
pvec[j++] = page;
}
* error in the latter case).
*/
cfs_spin_unlock(&hdr->coh_page_guard);
+ tree_lock = 0;
+
for (i = 0; i < j; ++i) {
page = pvec[i];
- if (page_own(env, io, page) == 0)
- cl_page_list_add(queue, page);
+ if (res == CLP_GANG_OKAY)
+ res = (*cb)(env, io, page, cbdata);
lu_ref_del(&page->cp_reference,
- "page_list", cfs_current());
+ "gang_lookup", cfs_current());
cl_page_put(env, page);
}
- cfs_spin_lock(&hdr->coh_page_guard);
- if (nr < CLT_PVEC_SIZE)
+ if (nr < CLT_PVEC_SIZE || end_of_region)
break;
- if (resched != NULL && cfs_need_resched()) {
- *resched = 1;
+
+ if (res == CLP_GANG_OKAY && cfs_need_resched())
+ res = CLP_GANG_RESCHED;
+ if (res != CLP_GANG_OKAY)
break;
- }
+
+ cfs_spin_lock(&hdr->coh_page_guard);
+ tree_lock = 1;
}
- cfs_spin_unlock(&hdr->coh_page_guard);
- EXIT;
+ if (tree_lock)
+ cfs_spin_unlock(&hdr->coh_page_guard);
+ RETURN(res);
}
EXPORT_SYMBOL(cl_page_gang_lookup);
"cl_page", page);
page->cp_index = ind;
cl_page_state_set_trust(page, CPS_CACHED);
+ cfs_spin_lock_init(&page->cp_lock);
page->cp_type = type;
CFS_INIT_LIST_HEAD(&page->cp_layers);
CFS_INIT_LIST_HEAD(&page->cp_batch);
struct cl_site *site = cl_object_site(o);
int err;
- LINVRNT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
+ LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
cfs_might_sleep();
ENTRY;
idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
/* fast path. */
if (type == CPT_CACHEABLE) {
+ /* cl_page::cp_lock is used to protect the page state and
+ * refcount, but need an external lock to protect the
+ * child/parent relationship, so vmpage lock must be held for
+ * this purpose. */
+ KLASSERT(PageLocked(vmpage));
/*
* cl_vmpage_page() can be called here without any locks as
*
* Either page is early in initialization (has neither child
* nor parent yet), or it is in the object radix tree.
*/
- ergo(pg->cp_state < CPS_FREEING,
+ ergo(pg->cp_state < CPS_FREEING && pg->cp_type == CPT_CACHEABLE,
(void *)radix_tree_lookup(&header->coh_tree,
pg->cp_index) == pg ||
(child == NULL && parent == NULL));
static void cl_page_state_set(const struct lu_env *env,
struct cl_page *page, enum cl_page_state state)
{
- PINVRNT(env, page, cl_page_invariant(page));
cl_page_state_set0(env, page, state);
}
*/
void cl_page_put(const struct lu_env *env, struct cl_page *page)
{
- struct cl_object_header *hdr;
struct cl_site *site = cl_object_site(page->cp_obj);
PASSERT(env, page, cfs_atomic_read(&page->cp_ref) > !!page->cp_parent);
CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
cfs_atomic_read(&page->cp_ref));
- hdr = cl_object_header(cl_object_top(page->cp_obj));
- if (cfs_atomic_dec_and_lock(&page->cp_ref, &hdr->coh_page_guard)) {
+ if (cfs_atomic_dec_and_lock(&page->cp_ref, &page->cp_lock)) {
cfs_atomic_dec(&site->cs_pages.cs_busy);
/* We're going to access the page w/o a reference, but it's
- * ok because we have grabbed the lock coh_page_guard, which
+ * ok because we have grabbed the lock cp_lock, which
* means nobody is able to free this page behind us.
*/
if (page->cp_state == CPS_FREEING) {
/* We drop the page reference and check the page state
- * inside the coh_page_guard. So that if it gets here,
+ * inside the cp_lock. So that if it gets here,
* it is the REALLY last reference to this page.
*/
- cfs_spin_unlock(&hdr->coh_page_guard);
+ cfs_spin_unlock(&page->cp_lock);
LASSERT(cfs_atomic_read(&page->cp_ref) == 0);
PASSERT(env, page, page->cp_owner == NULL);
EXIT;
return;
}
- cfs_spin_unlock(&hdr->coh_page_guard);
+ cfs_spin_unlock(&page->cp_lock);
}
EXIT;
*/
struct cl_page *cl_vmpage_page(cfs_page_t *vmpage, struct cl_object *obj)
{
- struct cl_page *page;
- struct cl_object_header *hdr;
+ struct cl_page *top;
+ struct cl_page *page;
ENTRY;
KLASSERT(PageLocked(vmpage));
* This loop assumes that ->private points to the top-most page. This
* can be rectified easily.
*/
- hdr = cl_object_header(cl_object_top(obj));
- cfs_spin_lock(&hdr->coh_page_guard);
- for (page = (void *)vmpage->private;
- page != NULL; page = page->cp_child) {
+ top = (struct cl_page *)vmpage->private;
+ if (top == NULL)
+ RETURN(NULL);
+
+ cfs_spin_lock(&top->cp_lock);
+ for (page = top; page != NULL; page = page->cp_child) {
if (cl_object_same(page->cp_obj, obj)) {
cl_page_get_trust(page);
break;
}
}
- cfs_spin_unlock(&hdr->coh_page_guard);
+ cfs_spin_unlock(&top->cp_lock);
LASSERT(ergo(page, page->cp_type == CPT_CACHEABLE));
RETURN(page);
}
io = cl_io_top(io);
if (pg->cp_state == CPS_FREEING) {
- result = -EAGAIN;
+ result = -ENOENT;
} else {
result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
(const struct lu_env *,
cl_page_state_set(env, pg, CPS_OWNED);
} else {
cl_page_disown0(env, io, pg);
- result = -EAGAIN;
+ result = -ENOENT;
}
}
}
void cl_page_assume(const struct lu_env *env,
struct cl_io *io, struct cl_page *pg)
{
- PASSERT(env, pg, pg->cp_state < CPS_OWNED);
- PASSERT(env, pg, pg->cp_owner == NULL);
PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
- PINVRNT(env, pg, cl_page_invariant(pg));
ENTRY;
pg = cl_page_top(pg);
io = cl_io_top(io);
cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
+ PASSERT(env, pg, pg->cp_owner == NULL);
pg->cp_owner = io;
pg->cp_task = current;
cl_page_owner_set(pg);
/* cl_page::cp_req already cleared by the caller (osc_completion()) */
PASSERT(env, pg, pg->cp_req == NULL);
PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
- PINVRNT(env, pg, cl_page_invariant(pg));
ENTRY;
CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
(const struct lu_env *,
const struct cl_page_slice *, int), ioret);
if (anchor) {
+ LASSERT(cl_page_is_vmlocked(env, pg));
LASSERT(pg->cp_sync_io == anchor);
pg->cp_sync_io = NULL;
cl_sync_io_note(anchor, ioret);
}
-
- /* Don't assert the page writeback bit here because the lustre file
- * may be as a backend of swap space. in this case, the page writeback
- * is set by VM, and obvious we shouldn't clear it at all. Fortunately
- * this type of pages are all TRANSIENT pages. */
- KLASSERT(ergo(pg->cp_type == CPT_CACHEABLE,
- !PageWriteback(cl_page_vmpage(env, pg))));
EXIT;
}
EXPORT_SYMBOL(cl_page_completion);
* its queues.
*
* \pre cl_page_is_owned(pg, io)
- * \post ergo(result == 0,
- * pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT)
+ * \post cl_page_is_owned(pg, io)
*
* \see cl_page_operations::cpo_cache_add()
*/
int cl_page_cache_add(const struct lu_env *env, struct cl_io *io,
struct cl_page *pg, enum cl_req_type crt)
{
- int result;
+ const struct cl_page_slice *scan;
+ int result = 0;
- PINVRNT(env, pg, crt < CRT_NR);
- PINVRNT(env, pg, cl_page_is_owned(pg, io));
- PINVRNT(env, pg, cl_page_invariant(pg));
+ PINVRNT(env, pg, crt < CRT_NR);
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
- ENTRY;
- result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_cache_add));
- if (result == 0) {
- cl_page_owner_clear(pg);
- cl_page_state_set(env, pg, CPS_CACHED);
- }
- CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
- RETURN(result);
+ ENTRY;
+
+ cfs_list_for_each_entry(scan, &pg->cp_layers, cpl_linkage) {
+ if (scan->cpl_ops->io[crt].cpo_cache_add == NULL)
+ continue;
+
+ result = scan->cpl_ops->io[crt].cpo_cache_add(env, scan, io);
+ if (result != 0)
+ break;
+ }
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
+ RETURN(result);
}
EXPORT_SYMBOL(cl_page_cache_add);
/**
+ * Called if a pge is being written back by kernel's intention.
+ *
+ * \pre cl_page_is_owned(pg, io)
+ * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
+ *
+ * \see cl_page_operations::cpo_flush()
+ */
+int cl_page_flush(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *pg)
+{
+ int result;
+
+ PINVRNT(env, pg, cl_page_is_owned(pg, io));
+ PINVRNT(env, pg, cl_page_invariant(pg));
+
+ ENTRY;
+
+ result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
+
+ CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
+ RETURN(result);
+}
+EXPORT_SYMBOL(cl_page_flush);
+
+/**
* Checks whether page is protected by any extent lock is at least required
* mode.
*
}
EXPORT_SYMBOL(cl_page_is_under_lock);
+static int page_prune_cb(const struct lu_env *env, struct cl_io *io,
+ struct cl_page *page, void *cbdata)
+{
+ cl_page_own(env, io, page);
+ cl_page_unmap(env, io, page);
+ cl_page_discard(env, io, page);
+ cl_page_disown(env, io, page);
+ return CLP_GANG_OKAY;
+}
+
/**
* Purges all cached pages belonging to the object \a obj.
*/
struct cl_thread_info *info;
struct cl_object *obj = cl_object_top(clobj);
struct cl_io *io;
- struct cl_page_list *plist;
- int resched;
int result;
ENTRY;
info = cl_env_info(env);
- plist = &info->clt_list;
io = &info->clt_io;
/*
* function, we just make cl_page_list functions happy. -jay
*/
io->ci_obj = obj;
+ io->ci_ignore_layout = 1;
result = cl_io_init(env, io, CIT_MISC, obj);
if (result != 0) {
cl_io_fini(env, io);
}
do {
- cl_page_list_init(plist);
- cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF, plist, 0,
- &resched);
- /*
- * Since we're purging the pages of an object, we don't care
- * the possible outcomes of the following functions.
- */
- cl_page_list_unmap(env, io, plist);
- cl_page_list_discard(env, io, plist);
- cl_page_list_disown(env, io, plist);
- cl_page_list_fini(env, plist);
-
- if (resched)
+ result = cl_page_gang_lookup(env, obj, io, 0, CL_PAGE_EOF,
+ page_prune_cb, NULL);
+ if (result == CLP_GANG_RESCHED)
cfs_cond_resched();
- } while (resched);
+ } while (result != CLP_GANG_OKAY);
cl_io_fini(env, io);
RETURN(result);