From: Li Dongyang Date: Fri, 26 Oct 2018 09:09:26 +0000 (+1100) Subject: LU-9906 clio: use pagevec_release for many pages X-Git-Tag: 2.12.0-RC1~31 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=b4a959eb61bc7e6a64261c704f3f3f5e220c2f02 LU-9906 clio: use pagevec_release for many pages When Lustre releases cached pages, it always uses page_release, even when releasing many pages. When clearing OST ldlm lock lrus in parallel with lots of cached data, the ldlm_bl threads spend most of their time contending for the zone lock taken by page_release. Also, when osc_lru_reclaim kicks in when there's not enough LRU slots during I/O, the contention on zone lock kills I/O performance. Switching to pagevec when we expect to actually release the pages (discard_pages, truncate, lru reclaim) brings significant performance benefits as shown below. This patch introduces cl_pagevec_put() to release the pages in batches using pagevec, which is essentially calling release_pages(). mpirun -np 48 ior -w -r -t 16m -b 16g -F -e -vv -o ... -i 1 [-B] mode write (GB/s) read (GB/s) master O_DIRECT 20.8 21.8 master+patch O_DIRECT 20.7 22.2 master Buffered 11.6 12.3 master+patch Buffered 15.3 19.6 Also clean up the dead lovsub_page related code. Signed-off-by: Patrick Farrell Signed-off-by: Li Dongyang Change-Id: I71447528db12858defb627c9c03b7193d116c935 Reviewed-on: https://review.whamcloud.com/28667 Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alexey Lyashkov Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index bf031e4..ae6ada5 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -99,6 +99,7 @@ #include #include #include +#include #include struct obd_info; @@ -900,7 +901,8 @@ struct cl_page_operations { const struct cl_page_slice *slice); /** Destructor. Frees resources and slice itself. */ void (*cpo_fini)(const struct lu_env *env, - struct cl_page_slice *slice); + struct cl_page_slice *slice, + struct pagevec *pvec); /** * Optional debugging helper. Prints given page slice. * @@ -2168,6 +2170,9 @@ struct cl_page *cl_page_alloc (const struct lu_env *env, void cl_page_get (struct cl_page *page); void cl_page_put (const struct lu_env *env, struct cl_page *page); +void cl_pagevec_put (const struct lu_env *env, + struct cl_page *page, + struct pagevec *pvec); void cl_page_print (const struct lu_env *env, void *cookie, lu_printer_t printer, const struct cl_page *pg); diff --git a/lustre/include/lustre_compat.h b/lustre/include/lustre_compat.h index 66a5bb7..13ed36b 100644 --- a/lustre/include/lustre_compat.h +++ b/lustre/include/lustre_compat.h @@ -775,4 +775,10 @@ static inline bool bdev_integrity_enabled(struct block_device *bdev, int rw) return false; } +#ifdef HAVE_PAGEVEC_INIT_ONE_PARAM +#define ll_pagevec_init(pvec, n) pagevec_init(pvec) +#else +#define ll_pagevec_init(pvec, n) pagevec_init(pvec, n) +#endif + #endif /* _LUSTRE_COMPAT_H */ diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index ae7a602..3c9ebef 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -171,6 +171,7 @@ struct osc_thread_info { struct lustre_handle oti_handle; struct cl_page_list oti_plist; struct cl_io oti_io; + struct pagevec oti_pagevec; void *oti_pvec[OTI_PVEC_SIZE]; /** * Fields used by cl_lock_discard_pages(). diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index 4f7e6d5..064e86e 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -54,16 +54,22 @@ * */ -static void vvp_page_fini_common(struct vvp_page *vpg) +static void vvp_page_fini_common(struct vvp_page *vpg, struct pagevec *pvec) { struct page *vmpage = vpg->vpg_page; LASSERT(vmpage != NULL); - put_page(vmpage); + if (pvec) { + if (!pagevec_add(pvec, vmpage)) + pagevec_release(pvec); + } else { + put_page(vmpage); + } } static void vvp_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) + struct cl_page_slice *slice, + struct pagevec *pvec) { struct vvp_page *vpg = cl2vvp_page(slice); struct page *vmpage = vpg->vpg_page; @@ -73,7 +79,7 @@ static void vvp_page_fini(const struct lu_env *env, * VPG_FREEING state. */ LASSERT((struct cl_page *)vmpage->private != slice->cpl_page); - vvp_page_fini_common(vpg); + vvp_page_fini_common(vpg, pvec); } static int vvp_page_own(const struct lu_env *env, @@ -490,13 +496,14 @@ vvp_transient_page_completion(const struct lu_env *env, } static void vvp_transient_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) + struct cl_page_slice *slice, + struct pagevec *pvec) { struct vvp_page *vpg = cl2vvp_page(slice); struct cl_page *clp = slice->cpl_page; struct vvp_object *clobj = cl2vvp(clp->cp_obj); - vvp_page_fini_common(vpg); + vvp_page_fini_common(vpg, pvec); atomic_dec(&clobj->vob_transient_pages); } diff --git a/lustre/lov/Makefile.in b/lustre/lov/Makefile.in index 0763571..ee9909d 100644 --- a/lustre/lov/Makefile.in +++ b/lustre/lov/Makefile.in @@ -13,7 +13,6 @@ lov-objs := lov_dev.o \ lov_request.o \ lovsub_dev.o \ lovsub_object.o \ - lovsub_page.o \ lproc_lov.o EXTRA_DIST = $(lov-objs:.o=.c) lov_internal.h lov_cl_internal.h diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index 1609be5..b28ff35 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -463,11 +463,6 @@ struct lov_sublock_env { struct cl_io *lse_io; }; -struct lovsub_page { - struct cl_page_slice lsb_cl; -}; - - struct lov_thread_info { struct cl_object_conf lti_stripe_conf; struct lu_fid lti_fid; @@ -624,8 +619,6 @@ struct lov_io_sub *lov_sub_get(const struct lu_env *env, struct lov_io *lio, int lov_page_init (const struct lu_env *env, struct cl_object *ob, struct cl_page *page, pgoff_t index); -int lovsub_page_init (const struct lu_env *env, struct cl_object *ob, - struct cl_page *page, pgoff_t index); int lov_page_init_empty (const struct lu_env *env, struct cl_object *obj, struct cl_page *page, pgoff_t index); int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, @@ -781,13 +774,6 @@ static inline struct lov_page *cl2lov_page(const struct cl_page_slice *slice) return container_of0(slice, struct lov_page, lps_cl); } -static inline struct lovsub_page * -cl2lovsub_page(const struct cl_page_slice *slice) -{ - LINVRNT(lovsub_is_object(&slice->cpl_obj->co_lu)); - return container_of0(slice, struct lovsub_page, lsb_cl); -} - static inline struct lov_io *cl2lov_io(const struct lu_env *env, const struct cl_io_slice *ios) { diff --git a/lustre/lov/lovsub_page.c b/lustre/lov/lovsub_page.c deleted file mode 100644 index c10a3df..0000000 --- a/lustre/lov/lovsub_page.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.gnu.org/licenses/gpl-2.0.html - * - * GPL HEADER END - */ -/* - * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2013, 2014, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * Implementation of cl_page for LOVSUB layer. - * - * Author: Nikita Danilov - */ - -#define DEBUG_SUBSYSTEM S_LOV - -#include "lov_cl_internal.h" - -/** \addtogroup lov - * @{ - */ - -/***************************************************************************** - * - * Lovsub page operations. - * - */ - -static void lovsub_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) -{ -} - -static const struct cl_page_operations lovsub_page_ops = { - .cpo_fini = lovsub_page_fini -}; - -int lovsub_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) -{ - struct lovsub_page *lsb = cl_object_page_slice(obj, page); - ENTRY; - - cl_page_slice_add(page, &lsb->lsb_cl, obj, index, &lovsub_page_ops); - RETURN(0); -} - -/** @} lov */ diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 301af05c..a1b1e13 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -142,7 +142,8 @@ cl_page_at_trusted(const struct cl_page *page, RETURN(NULL); } -static void cl_page_free(const struct lu_env *env, struct cl_page *page) +static void cl_page_free(const struct lu_env *env, struct cl_page *page, + struct pagevec *pvec) { struct cl_object *obj = page->cp_obj; int pagesize = cl_object_header(obj)->coh_page_bufsize; @@ -159,7 +160,7 @@ static void cl_page_free(const struct lu_env *env, struct cl_page *page) struct cl_page_slice, cpl_linkage); list_del_init(page->cp_layers.next); if (unlikely(slice->cpl_ops->cpo_fini != NULL)) - slice->cpl_ops->cpo_fini(env, slice); + slice->cpl_ops->cpo_fini(env, slice, pvec); } cs_page_dec(obj, CS_total); cs_pagestate_dec(obj, page->cp_state); @@ -212,7 +213,7 @@ struct cl_page *cl_page_alloc(const struct lu_env *env, ind); if (result != 0) { cl_page_delete0(env, page); - cl_page_free(env, page); + cl_page_free(env, page, NULL); page = ERR_PTR(result); break; } @@ -373,15 +374,13 @@ void cl_page_get(struct cl_page *page) EXPORT_SYMBOL(cl_page_get); /** - * Releases a reference to a page. + * Releases a reference to a page, use the pagevec to release the pages + * in batch if provided. * - * When last reference is released, page is returned to the cache, unless it - * is in cl_page_state::CPS_FREEING state, in which case it is immediately - * destroyed. - * - * \see cl_object_put(), cl_lock_put(). + * Users need to do a final pagevec_release() to release any trailing pages. */ -void cl_page_put(const struct lu_env *env, struct cl_page *page) +void cl_pagevec_put(const struct lu_env *env, struct cl_page *page, + struct pagevec *pvec) { ENTRY; CL_PAGE_HEADER(D_TRACE, env, page, "%d\n", @@ -397,11 +396,26 @@ void cl_page_put(const struct lu_env *env, struct cl_page *page) * Page is no longer reachable by other threads. Tear * it down. */ - cl_page_free(env, page); + cl_page_free(env, page, pvec); } EXIT; } +EXPORT_SYMBOL(cl_pagevec_put); + +/** + * Releases a reference to a page, wrapper to cl_pagevec_put + * + * When last reference is released, page is returned to the cache, unless it + * is in cl_page_state::CPS_FREEING state, in which case it is immediately + * destroyed. + * + * \see cl_object_put(), cl_lock_put(). + */ +void cl_page_put(const struct lu_env *env, struct cl_page *page) +{ + cl_pagevec_put(env, page, NULL); +} EXPORT_SYMBOL(cl_page_put); /** diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 316836e..265bb54 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -328,7 +328,8 @@ static void echo_page_completion(const struct lu_env *env, } static void echo_page_fini(const struct lu_env *env, - struct cl_page_slice *slice) + struct cl_page_slice *slice, + struct pagevec *pvec) { struct echo_object *eco = cl2echo_obj(slice->cpl_obj); ENTRY; diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 7524899..5394a75 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -976,6 +976,7 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, struct client_obd *cli = osc_cli(obj); struct osc_async_page *oap; struct osc_async_page *tmp; + struct pagevec *pvec; int pages_in_chunk = 0; int ppc_bits = cli->cl_chunkbits - PAGE_SHIFT; @@ -1000,6 +1001,8 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, io = osc_env_thread_io(env); io->ci_obj = cl_object_top(osc2cl(obj)); io->ci_ignore_layout = 1; + pvec = &osc_env_info(env)->oti_pagevec; + ll_pagevec_init(pvec, 0); rc = cl_io_init(env, io, CIT_MISC, io->ci_obj); if (rc < 0) GOTO(out, rc); @@ -1037,11 +1040,13 @@ static int osc_extent_truncate(struct osc_extent *ext, pgoff_t trunc_index, } lu_ref_del(&page->cp_reference, "truncate", current); - cl_page_put(env, page); + cl_pagevec_put(env, page, pvec); --ext->oe_nr_pages; ++nr_pages; } + pagevec_release(pvec); + EASSERTF(ergo(ext->oe_start >= trunc_index + !!partial, ext->oe_nr_pages == 0), ext, "trunc_index %lu, partial %d\n", trunc_index, partial); @@ -3142,6 +3147,7 @@ int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, osc_page_gang_cbt cb, void *cbdata) { struct osc_page *ops; + struct pagevec *pagevec; void **pvec; pgoff_t idx; unsigned int nr; @@ -3153,6 +3159,8 @@ int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, idx = start; pvec = osc_env_info(env)->oti_pvec; + pagevec = &osc_env_info(env)->oti_pagevec; + ll_pagevec_init(pagevec, 0); spin_lock(&osc->oo_tree_lock); while ((nr = radix_tree_gang_lookup(&osc->oo_tree, pvec, idx, OTI_PVEC_SIZE)) > 0) { @@ -3199,8 +3207,10 @@ int osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, page = ops->ops_cl.cpl_page; lu_ref_del(&page->cp_reference, "gang_lookup", current); - cl_page_put(env, page); + cl_pagevec_put(env, page, pagevec); } + pagevec_release(pagevec); + if (nr < OTI_PVEC_SIZE || end_of_region) break; diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 709e9dd..ff64374 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -528,19 +528,22 @@ static void osc_lru_use(struct client_obd *cli, struct osc_page *opg) static void discard_pagevec(const struct lu_env *env, struct cl_io *io, struct cl_page **pvec, int max_index) { - int i; + struct pagevec *pagevec = &osc_env_info(env)->oti_pagevec; + int i; - for (i = 0; i < max_index; i++) { - struct cl_page *page = pvec[i]; + ll_pagevec_init(pagevec, 0); + for (i = 0; i < max_index; i++) { + struct cl_page *page = pvec[i]; LASSERT(cl_page_is_owned(page, io)); cl_page_delete(env, page); cl_page_discard(env, io, page); cl_page_disown(env, io, page); - cl_page_put(env, page); + cl_pagevec_put(env, page, pagevec); - pvec[i] = NULL; - } + pvec[i] = NULL; + } + pagevec_release(pagevec); } /** diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 55e238b..fa04a31 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -766,11 +766,7 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt, struct pagevec pvec; int i; -#ifdef HAVE_PAGEVEC_INIT_ONE_PARAM - pagevec_init(&pvec); -#else - pagevec_init(&pvec, 0); -#endif + ll_pagevec_init(&pvec, 0); for (i = 0; i < npages; i++) { struct page *page = lnb[i].lnb_page;