]) # LC_BIO_ENDIO_USES_ONE_ARG
#
+# LC_ACCOUNT_PAGE_DIRTIED_3ARGS
+#
+# 4.2 kernel page dirtied takes 3 arguments
+#
+AC_DEFUN([LC_ACCOUNT_PAGE_DIRTIED_3ARGS], [
+LB_CHECK_COMPILE([if 'account_page_dirtied' with 3 args exists],
+account_page_dirtied, [
+ #include <linux/mm.h>
+],[
+ account_page_dirtied(NULL, NULL, NULL);
+],[
+ AC_DEFINE(HAVE_ACCOUNT_PAGE_DIRTIED_3ARGS, 1,
+ [account_page_dirtied takes three arguments])
+])
+]) # LC_ACCOUNT_PAGE_DIRTIED_3ARGS
+
+#
# LC_HAVE_INTERVAL_EXP_BLK_INTEGRITY
#
# 4.3 replace interval with interval_exp in 'struct blk_integrity'
]) # LC_D_IN_LOOKUP
#
+# LC_LOCK_PAGE_MEMCG
+#
+# Kernel version 4.6 adds lock_page_memcg
+#
+AC_DEFUN([LC_LOCK_PAGE_MEMCG], [
+LB_CHECK_COMPILE([if 'lock_page_memcg' is defined],
+lock_page_memcg, [
+ #include <linux/memcontrol.h>
+],[
+ lock_page_memcg(NULL);
+],[
+ AC_DEFINE(HAVE_LOCK_PAGE_MEMCG, 1,
+ [lock_page_memcg is defined])
+])
+]) # LC_LOCK_PAGE_MEMCG
+
+#
# LC_DIRECTIO_2ARGS
#
# Kernel version 4.7 commit c8b8e32d700fe943a935e435ae251364d016c497
# 4.2
LC_BIO_ENDIO_USES_ONE_ARG
LC_SYMLINK_OPS_USE_NAMEIDATA
+ LC_ACCOUNT_PAGE_DIRTIED_3ARGS
# 4.3
LC_HAVE_INTERVAL_EXP_BLK_INTEGRITY
# 4.6
LC_HAVE_IN_COMPAT_SYSCALL
LC_HAVE_XATTR_HANDLER_INODE_PARAM
+ LC_LOCK_PAGE_MEMCG
# 4.7
LC_D_IN_LOOKUP
};
typedef void (*cl_commit_cbt)(const struct lu_env *, struct cl_io *,
- struct cl_page *);
+ struct pagevec *);
struct cl_read_ahead {
/* Maximum page index the readahead window will end.
#define page_tree i_pages
#else
#define i_pages tree_lock
-#define xa_lock_irq(lockp) spin_lock_irq(lockp)
-#define xa_unlock_irq(lockp) spin_unlock_irq(lockp)
+#endif
+
+#ifndef xa_lock_irqsave
+#define xa_lock_irqsave(lockp, flags) spin_lock_irqsave(lockp, flags)
+#define xa_unlock_irqrestore(lockp, flags) spin_unlock_irqrestore(lockp, flags)
+#endif
+
+#ifndef HAVE_LOCK_PAGE_MEMCG
+#define lock_page_memcg(page) do {} while (0)
+#define unlock_page_memcg(page) do {} while (0)
#endif
#ifndef KMEM_CACHE_USERCOPY
int osc_prep_async_page(struct osc_object *osc, struct osc_page *ops,
struct page *page, loff_t offset);
int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops);
-int osc_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io);
+ struct osc_page *ops, cl_commit_cbt cb);
+int osc_page_cache_add(const struct lu_env *env, struct osc_page *opg,
+ struct cl_io *io, cl_commit_cbt cb);
int osc_teardown_async_page(const struct lu_env *env, struct osc_object *obj,
struct osc_page *ops);
int osc_flush_async_page(const struct lu_env *env, struct cl_io *io,
struct ll_inode_info *lli = ll_i2info(inode);
struct address_space *mapping = &inode->i_data;
unsigned long nrpages;
+ unsigned long flags;
+
ENTRY;
if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) {
*/
nrpages = mapping->nrpages;
if (nrpages) {
- xa_lock_irq(&mapping->i_pages);
+ xa_lock_irqsave(&mapping->i_pages, flags);
nrpages = mapping->nrpages;
- xa_unlock_irq(&mapping->i_pages);
+ xa_unlock_irqrestore(&mapping->i_pages, flags);
} /* Workaround end */
LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
#include <obd.h>
+#include <linux/pagevec.h>
+#include <linux/memcontrol.h>
#include "llite_internal.h"
#include "vvp_internal.h"
#include <libcfs/linux/linux-misc.h>
RETURN(bytes > 0 ? bytes : rc);
}
+/* Taken from kernel set_page_dirty, __set_page_dirty_nobuffers
+ * Last change to this area: b93b016313b3ba8003c3b8bb71f569af91f19fc7
+ *
+ * Current with Linus tip of tree (7/13/2019):
+ * v5.2-rc4-224-ge01e060fe0
+ *
+ * Backwards compat for 3.x, 4.x kernels relating to memcg handling
+ * & rename of radix tree to xarray. */
+void vvp_set_pagevec_dirty(struct pagevec *pvec)
+{
+ struct page *page = pvec->pages[0];
+ struct address_space *mapping = page->mapping;
+#if defined HAVE_ACCOUNT_PAGE_DIRTIED_3ARGS
+ struct mem_cgroup *memcg;
+#endif
+ unsigned long flags;
+ int count = pagevec_count(pvec);
+ int dirtied = 0;
+ int i = 0;
+
+ ENTRY;
+
+ /* From set_page_dirty */
+ for (i = 0; i < count; i++)
+ ClearPageReclaim(pvec->pages[i]);
+
+ LASSERTF(page->mapping,
+ "mapping must be set. page %p, page->private (cl_page) %p",
+ page, (void *) page->private);
+
+ /* Rest of code derived from __set_page_dirty_nobuffers */
+ xa_lock_irqsave(&mapping->i_pages, flags);
+
+ /* Notes on differences with __set_page_dirty_nobuffers:
+ * 1. We don't need to call page_mapping because we know this is a page
+ * cache page.
+ * 2. We have the pages locked, so there is no need for the careful
+ * mapping/mapping2 dance.
+ * 3. No mapping is impossible. (Race w/truncate mentioned in
+ * dirty_nobuffers should be impossible because we hold the page lock.)
+ * 4. All mappings are the same because i/o is only to one file.
+ * 5. We invert the lock order on lock_page_memcg(page) and the mapping
+ * xa_lock, but this is the only function that should use that pair of
+ * locks and it can't race because Lustre locks pages throughout i/o.
+ */
+ for (i = 0; i < count; i++) {
+ page = pvec->pages[i];
+ lock_page_memcg(page);
+ if (TestSetPageDirty(page)) {
+ unlock_page_memcg(page);
+ continue;
+ }
+ LASSERTF(page->mapping == mapping,
+ "all pages must have the same mapping. page %p, mapping %p, first mapping %p\n",
+ page, page->mapping, mapping);
+ WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+#ifdef HAVE_ACCOUNT_PAGE_DIRTIED_3ARGS
+ memcg = mem_cgroup_begin_page_stat(page);
+ account_page_dirtied(page, mapping, memcg);
+ mem_cgroup_end_page_stat(memcg);
+#else
+ account_page_dirtied(page, mapping);
+#endif
+ radix_tree_tag_set(&mapping->page_tree, page_index(page),
+ PAGECACHE_TAG_DIRTY);
+ dirtied++;
+ unlock_page_memcg(page);
+ }
+ xa_unlock_irqrestore(&mapping->i_pages, flags);
+
+ CDEBUG(D_VFSTRACE, "mapping %p, count %d, dirtied %d\n", mapping,
+ count, dirtied);
+
+ if (mapping->host && dirtied) {
+ /* !PageAnon && !swapper_space */
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ }
+
+ EXIT;
+}
+
static void write_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
+ struct pagevec *pvec)
{
- struct page *vmpage = page->cp_vmpage;
+ int count = 0;
+ int i = 0;
+
+ ENTRY;
- SetPageUptodate(vmpage);
- set_page_dirty(vmpage);
+ count = pagevec_count(pvec);
+ LASSERT(count > 0);
- cl_page_disown(env, io, page);
+ for (i = 0; i < count; i++) {
+ struct page *vmpage = pvec->pages[i];
+ SetPageUptodate(vmpage);
+ }
+
+ vvp_set_pagevec_dirty(pvec);
- /* held in ll_cl_init() */
- lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
- cl_page_put(env, page);
+ for (i = 0; i < count; i++) {
+ struct page *vmpage = pvec->pages[i];
+ struct cl_page *page = (struct cl_page *) vmpage->private;
+ cl_page_disown(env, io, page);
+ lu_ref_del(&page->cp_reference, "cl_io", cl_io_top(io));
+ cl_page_put(env, page);
+ }
+
+ EXIT;
}
/* make sure the page list is contiguous */
}
static void mkwrite_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
+ struct pagevec *pvec)
{
- set_page_dirty(page->cp_vmpage);
+ vvp_set_pagevec_dirty(pvec);
}
static int vvp_io_fault_start(const struct lu_env *env,
*/
unsigned long offset = hash_x_index(*hash, hash64);
struct page *page;
+ unsigned long flags;
int found;
- xa_lock_irq(&mapping->i_pages);
+ xa_lock_irqsave(&mapping->i_pages, flags);
found = radix_tree_gang_lookup(&mapping->page_tree,
(void **)&page, offset, 1);
if (found > 0 && !radix_tree_exceptional_entry(page)) {
struct lu_dirpage *dp;
get_page(page);
- xa_unlock_irq(&mapping->i_pages);
+ xa_unlock_irqrestore(&mapping->i_pages, flags);
/*
* In contrast to find_lock_page() we are sure that directory
* page cannot be truncated (while DLM lock is held) and,
page = ERR_PTR(-EIO);
}
} else {
- xa_unlock_irq(&mapping->i_pages);
+ xa_unlock_irqrestore(&mapping->i_pages, flags);
page = NULL;
}
return page;
* \see cl_io_operations::cio_commit_async()
*/
int cl_io_commit_async(const struct lu_env *env, struct cl_io *io,
- struct cl_page_list *queue, int from, int to,
- cl_commit_cbt cb)
+ struct cl_page_list *queue, int from, int to,
+ cl_commit_cbt cb)
{
const struct cl_io_slice *scan;
int result = 0;
}
static void echo_commit_callback(const struct lu_env *env, struct cl_io *io,
- struct cl_page *page)
+ struct pagevec *pvec)
{
struct echo_thread_info *info;
struct cl_2queue *queue;
+ int i = 0;
info = echo_env_info(env);
LASSERT(io == &info->eti_io);
queue = &info->eti_queue;
- cl_page_list_add(&queue->c2_qout, page);
+
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ struct page *vmpage = pvec->pages[i];
+ struct cl_page *page = (struct cl_page *)vmpage->private;
+
+ cl_page_list_add(&queue->c2_qout, page);
+ }
}
static int cl_echo_object_brw(struct echo_object *eco, int rw, u64 offset,
EXPORT_SYMBOL(osc_prep_async_page);
int osc_queue_async_io(const struct lu_env *env, struct cl_io *io,
- struct osc_page *ops)
+ struct osc_page *ops, cl_commit_cbt cb)
{
struct osc_io *oio = osc_env_io(env);
struct osc_extent *ext = NULL;
struct osc_async_page *oap = &ops->ops_oap;
struct client_obd *cli = oap->oap_cli;
struct osc_object *osc = oap->oap_obj;
+ struct pagevec *pvec = &osc_env_info(env)->oti_pagevec;
pgoff_t index;
unsigned int tmp;
unsigned int grants = 0;
rc = 0;
if (grants == 0) {
- /* we haven't allocated grant for this page. */
+ /* We haven't allocated grant for this page, and we
+ * must not hold a page lock while we do enter_cache,
+ * so we must mark dirty & unlock any pages in the
+ * write commit pagevec. */
+ if (pagevec_count(pvec)) {
+ cb(env, io, pvec);
+ pagevec_reinit(pvec);
+ }
rc = osc_enter_cache(env, cli, oap, tmp);
if (rc == 0)
grants = tmp;
#include <lustre_obdo.h>
#include <lustre_osc.h>
+#include <linux/pagevec.h>
#include "osc_internal.h"
struct cl_page *page;
struct cl_page *last_page;
struct osc_page *opg;
+ struct pagevec *pvec = &osc_env_info(env)->oti_pagevec;
int result = 0;
ENTRY;
}
}
+ ll_pagevec_init(pvec, 0);
+
while (qin->pl_nr > 0) {
struct osc_async_page *oap;
/* The page may be already in dirty cache. */
if (list_empty(&oap->oap_pending_item)) {
- result = osc_page_cache_add(env, &opg->ops_cl, io);
+ result = osc_page_cache_add(env, opg, io, cb);
if (result != 0)
break;
}
cl_page_list_del(env, qin, page);
- (*cb)(env, io, page);
- /* Can't access page any more. Page can be in transfer and
- * complete at any time. */
+ /* if there are no more slots, do the callback & reinit */
+ if (pagevec_add(pvec, page->cp_vmpage) == 0) {
+ (*cb)(env, io, pvec);
+ pagevec_reinit(pvec);
+ }
}
+ /* Clean up any partially full pagevecs */
+ if (pagevec_count(pvec) != 0)
+ (*cb)(env, io, pvec);
+
+ /* Can't access these pages any more. Page can be in transfer and
+ * complete at any time. */
+
/* for sync write, kernel will wait for this page to be flushed before
* osc_io_end() is called, so release it earlier.
* for mkwrite(), it's known there is no further pages. */
osc_lru_use(osc_cli(obj), opg);
}
-int osc_page_cache_add(const struct lu_env *env,
- const struct cl_page_slice *slice, struct cl_io *io)
+int osc_page_cache_add(const struct lu_env *env, struct osc_page *opg,
+ struct cl_io *io, cl_commit_cbt cb)
{
- struct osc_page *opg = cl2osc_page(slice);
int result;
ENTRY;
osc_page_transfer_get(opg, "transfer\0cache");
- result = osc_queue_async_io(env, io, opg);
+ result = osc_queue_async_io(env, io, opg, cb);
if (result != 0)
osc_page_transfer_put(env, opg);
else