From 0d6d0b7bc95a82dee02d35d0a8a41d24692cad45 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 3 Mar 2021 09:50:04 +0300 Subject: [PATCH] LU-11290 osc: Batch gang_lookup cbs The osc_page_gang_lookup call backs can be trivially converted to operate in batches rather than one page at a time. This improves cancellation time for locks protecting large numbers of pages by about 10% (after landing another optimization (LU-11290 ldlm: page discard speedup) it shows 6% for canceling a lock for 30GB cached file ). Truncate to zero time (with one lock protecting many pages) was improved by about 5-10% as well. Lock weighing performance should be improved slightly as well, but is tricky to benchmark. HPE-bug-id: LUS-6432 Change-Id: Ib30594ae97182cbeb18051d6cee860c97ae7e119 Signed-off-by: Patrick Farrell Signed-off-by: Alexander Zarochentsev Reviewed-on: https://review.whamcloud.com/33089 Tested-by: jenkins Reviewed-by: Bobi Jam Reviewed-by: Wang Shilong Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_osc.h | 4 +- lustre/mdc/mdc_dev.c | 47 +++++++------- lustre/osc/osc_cache.c | 149 ++++++++++++++++++++++++-------------------- lustre/osc/osc_io.c | 33 +++++----- lustre/osc/osc_lock.c | 18 ++++-- 5 files changed, 141 insertions(+), 110 deletions(-) diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index 9bcb313..c4de03a 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -621,12 +621,12 @@ static inline void osc_io_unplug(const struct lu_env *env, } typedef bool (*osc_page_gang_cbt)(const struct lu_env *, struct cl_io *, - struct osc_page *, void *); + void**, int, void *); bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, struct osc_object *osc, pgoff_t start, pgoff_t end, osc_page_gang_cbt cb, void *cbdata); bool osc_discard_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata); + void**, int, void *cbdata); /* osc_dev.c */ int osc_device_init(const struct lu_env *env, struct lu_device *d, diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index 02e562a..b23470b 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -185,33 +185,38 @@ again: * Check if page @page is covered by an extra lock or discard it. */ static bool mdc_check_and_discard_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) + void **pvec, int count, void *cbdata) { struct osc_thread_info *info = osc_env_info(env); struct osc_object *osc = cbdata; pgoff_t index; - - index = osc_index(ops); - if (index >= info->oti_fn_index) { - struct ldlm_lock *tmp; - struct cl_page *page = ops->ops_cl.cpl_page; - - /* refresh non-overlapped index */ - tmp = mdc_dlmlock_at_pgoff(env, osc, index, - OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_AST); - if (tmp != NULL) { - info->oti_fn_index = CL_PAGE_EOF; - LDLM_LOCK_PUT(tmp); - } else if (cl_page_own(env, io, page) == 0) { - /* discard the page */ - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); + int i; + + for (i = 0; i < count; i++) { + struct osc_page *ops = pvec[i]; + + index = osc_index(ops); + if (index >= info->oti_fn_index) { + struct ldlm_lock *tmp; + struct cl_page *page = ops->ops_cl.cpl_page; + + /* refresh non-overlapped index */ + tmp = mdc_dlmlock_at_pgoff(env, osc, index, + OSC_DAP_FL_TEST_LOCK | OSC_DAP_FL_AST); + if (tmp != NULL) { + info->oti_fn_index = CL_PAGE_EOF; + LDLM_LOCK_PUT(tmp); + } else if (cl_page_own(env, io, page) == 0) { + /* discard the page */ + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } } - } - info->oti_next_index = index + 1; + info->oti_next_index = index + 1; + } return true; } diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 51e9417..0168dd7 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -3082,11 +3082,10 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io, spin_unlock(&osc->oo_tree_lock); tree_lock = false; + res = (*cb)(env, io, pvec, j, cbdata); + for (i = 0; i < j; ++i) { ops = pvec[i]; - if (res) - res = (*cb)(env, io, ops, cbdata); - page = ops->ops_cl.cpl_page; lu_ref_del(&page->cp_reference, "gang_lookup", current); cl_pagevec_put(env, page, pagevec); @@ -3114,85 +3113,101 @@ EXPORT_SYMBOL(osc_page_gang_lookup); * Check if page @page is covered by an extra lock or discard it. */ static bool check_and_discard_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) + void **pvec, int count, void *cbdata) { struct osc_thread_info *info = osc_env_info(env); struct osc_object *osc = cbdata; - struct cl_page *page = ops->ops_cl.cpl_page; - pgoff_t index; - bool discard = false; - - index = osc_index(ops); - - /* negative lock caching */ - if (index < info->oti_ng_index) { - discard = true; - } else if (index >= info->oti_fn_index) { - struct ldlm_lock *tmp; - /* refresh non-overlapped index */ - tmp = osc_dlmlock_at_pgoff(env, osc, index, - OSC_DAP_FL_TEST_LOCK | - OSC_DAP_FL_AST | OSC_DAP_FL_RIGHT); - if (tmp != NULL) { - __u64 end = tmp->l_policy_data.l_extent.end; - __u64 start = tmp->l_policy_data.l_extent.start; - - /* no lock covering this page */ - if (index < cl_index(osc2cl(osc), start)) { - /* no lock at @index, first lock at @start */ - info->oti_ng_index = cl_index(osc2cl(osc), - start); - discard = true; + int i; + + for (i = 0; i < count; i++) { + struct osc_page *ops = pvec[i]; + struct cl_page *page = ops->ops_cl.cpl_page; + pgoff_t index = osc_index(ops); + bool discard = false; + + /* negative lock caching */ + if (index < info->oti_ng_index) { + discard = true; + } else if (index >= info->oti_fn_index) { + struct ldlm_lock *tmp; + /* refresh non-overlapped index */ + tmp = osc_dlmlock_at_pgoff(env, osc, index, + OSC_DAP_FL_TEST_LOCK | + OSC_DAP_FL_AST | + OSC_DAP_FL_RIGHT); + if (tmp != NULL) { + __u64 end = + tmp->l_policy_data.l_extent.end; + __u64 start = + tmp->l_policy_data.l_extent.start; + + /* no lock covering this page */ + if (index < cl_index(osc2cl(osc), start)) { + /* no lock at @index, + * first lock at @start + */ + info->oti_ng_index = + cl_index(osc2cl(osc), start); + discard = true; + } else { + /* Cache the first-non-overlapped + * index so as to skip all pages + * within [index, oti_fn_index). + * This is safe because if tmp lock + * is canceled, it will discard these + * pages. + */ + info->oti_fn_index = + cl_index(osc2cl(osc), end + 1); + if (end == OBD_OBJECT_EOF) + info->oti_fn_index = + CL_PAGE_EOF; + } + LDLM_LOCK_PUT(tmp); } else { - /* Cache the first-non-overlapped index so as to - * skip all pages within [index, oti_fn_index). - * This is safe because if tmp lock is canceled, - * it will discard these pages. - */ - info->oti_fn_index = cl_index(osc2cl(osc), - end + 1); - if (end == OBD_OBJECT_EOF) - info->oti_fn_index = CL_PAGE_EOF; + info->oti_ng_index = CL_PAGE_EOF; + discard = true; } - LDLM_LOCK_PUT(tmp); - } else { - info->oti_ng_index = CL_PAGE_EOF; - discard = true; } - } - if (discard) { - if (cl_page_own(env, io, page) == 0) { - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); + if (discard) { + if (cl_page_own(env, io, page) == 0) { + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } } - } - info->oti_next_index = index + 1; + info->oti_next_index = index + 1; + } return true; } bool osc_discard_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) + void **pvec, int count, void *cbdata) { struct osc_thread_info *info = osc_env_info(env); - struct cl_page *page = ops->ops_cl.cpl_page; - - /* page is top page. */ - info->oti_next_index = osc_index(ops) + 1; - if (cl_page_own(env, io, page) == 0) { - if (!ergo(page->cp_type == CPT_CACHEABLE, - !PageDirty(cl_page_vmpage(page)))) - CL_PAGE_DEBUG(D_ERROR, env, page, - "discard dirty page?\n"); - - /* discard the page */ - cl_page_discard(env, io, page); - cl_page_disown(env, io, page); - } else { - LASSERT(page->cp_state == CPS_FREEING); + int i; + + for (i = 0; i < count; i++) { + struct osc_page *ops = pvec[i]; + struct cl_page *page = ops->ops_cl.cpl_page; + + /* page is top page. */ + info->oti_next_index = osc_index(ops) + 1; + if (cl_page_own(env, io, page) == 0) { + if (!ergo(page->cp_type == CPT_CACHEABLE, + !PageDirty(cl_page_vmpage(page)))) + CL_PAGE_DEBUG(D_ERROR, env, page, + "discard dirty page?\n"); + + /* discard the page */ + cl_page_discard(env, io, page); + cl_page_disown(env, io, page); + } else { + LASSERT(page->cp_state == CPS_FREEING); + } } return true; diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index a7c2237..0038286 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -496,22 +496,27 @@ static int osc_async_upcall(void *a, int rc) * Checks that there are no pages being written in the extent being truncated. */ static bool trunc_check_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops , void *cbdata) + void **pvec, int count, void *cbdata) { - struct cl_page *page = ops->ops_cl.cpl_page; - struct osc_async_page *oap; - __u64 start = *(__u64 *)cbdata; - - oap = &ops->ops_oap; - if (oap->oap_cmd & OBD_BRW_WRITE && - !list_empty(&oap->oap_pending_item)) - CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n", - start, current->comm); - - if (PageLocked(page->cp_vmpage)) - CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", - ops, osc_index(ops), oap->oap_cmd & OBD_BRW_RWMASK); + int i; + for (i = 0; i < count; i++) { + struct osc_page *ops = pvec[i]; + struct cl_page *page = ops->ops_cl.cpl_page; + struct osc_async_page *oap; + __u64 start = *(__u64 *)cbdata; + + oap = &ops->ops_oap; + if (oap->oap_cmd & OBD_BRW_WRITE && + !list_empty(&oap->oap_pending_item)) + CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n", + start, current->comm); + + if (PageLocked(page->cp_vmpage)) + CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", + ops, osc_index(ops), + oap->oap_cmd & OBD_BRW_RWMASK); + } return true; } diff --git a/lustre/osc/osc_lock.c b/lustre/osc/osc_lock.c index 3d0cf0f..a440fda 100644 --- a/lustre/osc/osc_lock.c +++ b/lustre/osc/osc_lock.c @@ -648,15 +648,21 @@ out: EXPORT_SYMBOL(osc_ldlm_glimpse_ast); static bool weigh_cb(const struct lu_env *env, struct cl_io *io, - struct osc_page *ops, void *cbdata) + void **pvec, int count, void *cbdata) { - struct cl_page *page = ops->ops_cl.cpl_page; + int i; - if (cl_page_is_vmlocked(env, page) || PageDirty(page->cp_vmpage) || - PageWriteback(page->cp_vmpage)) - return false; + for (i = 0; i < count; i++) { + struct osc_page *ops = pvec[i]; + struct cl_page *page = ops->ops_cl.cpl_page; - *(pgoff_t *)cbdata = osc_index(ops) + 1; + if (cl_page_is_vmlocked(env, page) || + PageDirty(page->cp_vmpage) || + PageWriteback(page->cp_vmpage)) + return false; + + *(pgoff_t *)cbdata = osc_index(ops) + 1; + } return true; } -- 1.8.3.1