From 58ce808d4c127d9b896037121e477ece584e81f0 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Fri, 15 Sep 2023 11:31:24 -0400 Subject: [PATCH] LU-13805 llite: fail unaligned DIO for RDMA pages Unaligned DIO needs to directly access the page contents in order to copy to the buffer. This means it can't work with RDMA only (non-CPU accessible) pages. Implement that limitation. Signed-off-by: Patrick Farrell Change-Id: I52bd1d4cc143e1018ddf6942403142f26be4430f --- lustre/include/cl_object.h | 7 +++++-- lustre/include/lustre_osc.h | 5 +++-- lustre/include/obd_support.h | 1 + lustre/llite/file.c | 2 +- lustre/llite/llite_lib.c | 2 +- lustre/llite/rw.c | 7 ++++--- lustre/llite/rw26.c | 5 +++-- lustre/llite/vvp_internal.h | 4 ++-- lustre/llite/vvp_io.c | 3 ++- lustre/llite/vvp_page.c | 4 ++-- lustre/lov/lov_cl_internal.h | 15 +++++++++------ lustre/lov/lov_object.c | 14 ++++++++------ lustre/lov/lov_page.c | 20 ++++++++++++-------- lustre/obdclass/cl_page.c | 12 ++++++------ lustre/osc/osc_page.c | 17 +++++++++++++++-- lustre/tests/sanity.sh | 23 +++++++++++++++++++++++ 16 files changed, 97 insertions(+), 44 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index f28b0e9..50f5bde 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -331,8 +331,9 @@ struct cl_object_operations { * \retval valid-pointer pointer to already existing referenced page * to be used instead of newly created. */ - int (*coo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); + int (*coo_page_init)(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index); /** * Initialize lock slice for this layer. Called top-to-bottom through * every object layer when a new cl_lock is instantiated. Layer @@ -2205,10 +2206,12 @@ static inline int cl_object_refc(struct cl_object *clob) /** \defgroup cl_page cl_page * @{ */ struct cl_page *cl_page_find (const struct lu_env *env, + struct cl_io *io, struct cl_object *obj, pgoff_t idx, struct page *vmpage, enum cl_page_type type); struct cl_page *cl_page_alloc (const struct lu_env *env, + struct cl_io *io, struct cl_object *o, pgoff_t ind, struct page *vmpage, enum cl_page_type type); diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index 37cd7a4..58df0b1 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -558,8 +558,9 @@ extern struct lu_context_key osc_session_key; #define OSC_FLAGS (ASYNC_URGENT|ASYNC_READY) /* osc_page.c */ -int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t ind); +int osc_page_init(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *cl_page, + pgoff_t index); void osc_index2policy(union ldlm_policy_data *policy, const struct cl_object *obj, pgoff_t start, pgoff_t end); void osc_lru_add_batch(struct client_obd *cli, struct list_head *list); diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ebefb06..bb075f3 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -441,6 +441,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSC_NO_SIZE_DATA 0x415 #define OBD_FAIL_OSC_DELAY_CANCEL 0x416 #define OBD_FAIL_OSC_SLOW_PAGE_EVICT 0x417 +#define OBD_FAIL_OSC_UNALIGNED_RDMA_ONLY 0x418 #define OBD_FAIL_PTLRPC 0x500 #define OBD_FAIL_PTLRPC_ACK 0x501 diff --git a/lustre/llite/file.c b/lustre/llite/file.c index c4d763a..bcaa524 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -603,7 +603,7 @@ void ll_dom_finish_open(struct inode *inode, struct ptlrpc_request *req) break; } /* attach VM page to CL page cache */ - page = cl_page_find(env, obj, vmpage->index, vmpage, + page = cl_page_find(env, io, obj, vmpage->index, vmpage, CPT_CACHEABLE); if (IS_ERR(page)) { ClearPageUptodate(vmpage); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 3d8ace3..0079307 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -2079,7 +2079,7 @@ static int ll_io_zero_page(struct inode *inode, pgoff_t index, pgoff_t offset, if (!PageDirty(vmpage)) { /* associate cl_page */ - clpage = cl_page_find(env, clob, vmpage->index, + clpage = cl_page_find(env, io, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (IS_ERR(clpage)) GOTO(pagefini, rc = PTR_ERR(clpage)); diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index c4468ee..e2725f0 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -249,7 +249,7 @@ static int ll_read_ahead_page(const struct lu_env *env, struct cl_io *io, GOTO(out, rc = -EBUSY); } - cp = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + cp = cl_page_find(env, io, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (IS_ERR(cp)) { which = RA_STAT_FAILED_GRAB_PAGE; msg = "cl_page_find failed"; @@ -1484,7 +1484,7 @@ int ll_writepage(struct page *vmpage, struct writeback_control *wbc) io->ci_ignore_layout = 1; result = cl_io_init(env, io, CIT_MISC, clob); if (result == 0) { - page = cl_page_find(env, clob, vmpage->index, + page = cl_page_find(env, io, clob, vmpage->index, vmpage, CPT_CACHEABLE); if (!IS_ERR(page)) { lu_ref_add(&page->cp_reference, "writepage", @@ -2034,7 +2034,8 @@ int ll_readpage(struct file *file, struct page *vmpage) } LASSERT(io->ci_state == CIS_IO_GOING); - page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + page = cl_page_find(env, io, clob, vmpage->index, vmpage, + CPT_CACHEABLE); if (!IS_ERR(page)) { LASSERT(page->cp_type == CPT_CACHEABLE); if (likely(!PageUptodate(vmpage))) { diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 57045dd..dfca570 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -382,7 +382,7 @@ ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, size_t size, size_t from = offset & ~PAGE_MASK; size_t to = min(from + size, PAGE_SIZE); - page = cl_page_find(env, obj, offset >> PAGE_SHIFT, + page = cl_page_find(env, io, obj, offset >> PAGE_SHIFT, pv->ldp_pages[i], CPT_TRANSIENT); if (IS_ERR(page)) { rc = PTR_ERR(page); @@ -890,7 +890,8 @@ again: goto again; } - page = cl_page_find(env, clob, vmpage->index, vmpage, CPT_CACHEABLE); + page = cl_page_find(env, io, clob, vmpage->index, vmpage, + CPT_CACHEABLE); if (IS_ERR(page)) GOTO(out, result = PTR_ERR(page)); diff --git a/lustre/llite/vvp_internal.h b/lustre/llite/vvp_internal.h index 40648c4..cba815f 100644 --- a/lustre/llite/vvp_internal.h +++ b/lustre/llite/vvp_internal.h @@ -268,8 +268,8 @@ int lov_read_and_clear_async_rc(struct cl_object *clob); int vvp_io_init(const struct lu_env *env, struct cl_object *obj, struct cl_io *io); int vvp_io_write_commit(const struct lu_env *env, struct cl_io *io); -int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); +int vvp_page_init(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, pgoff_t index); struct lu_object *vvp_object_alloc(const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index acabea0..7ea6a40 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1566,7 +1566,8 @@ static int vvp_io_fault_start(const struct lu_env *env, } } - page = cl_page_find(env, obj, fio->ft_index, vmpage, CPT_CACHEABLE); + page = cl_page_find(env, io, obj, fio->ft_index, vmpage, + CPT_CACHEABLE); if (IS_ERR(page)) GOTO(out, result = PTR_ERR(page)); diff --git a/lustre/llite/vvp_page.c b/lustre/llite/vvp_page.c index 07d3906..88eb189 100644 --- a/lustre/llite/vvp_page.c +++ b/lustre/llite/vvp_page.c @@ -210,8 +210,8 @@ static const struct cl_page_operations vvp_page_ops = { static const struct cl_page_operations vvp_transient_page_ops = { }; -int vvp_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) +int vvp_page_init(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, pgoff_t index) { struct cl_page_slice *cpl = cl_object_page_slice(obj, page); struct page *vmpage = page->cp_vmpage; diff --git a/lustre/lov/lov_cl_internal.h b/lustre/lov/lov_cl_internal.h index a2f5b45..4973081 100644 --- a/lustre/lov/lov_cl_internal.h +++ b/lustre/lov/lov_cl_internal.h @@ -625,12 +625,15 @@ static inline bool lov_page_is_empty(const struct cl_page *cp) return cp->cp_lov_index == CP_LOV_INDEX_EMPTY; } -int lov_page_init_empty (const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); -int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); -int lov_page_init_foreign(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); +int lov_page_init_empty (const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index); +int lov_page_init_composite(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index); +int lov_page_init_foreign(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index); struct lu_object *lov_object_alloc (const struct lu_env *env, const struct lu_object_header *hdr, struct lu_device *dev); diff --git a/lustre/lov/lov_object.c b/lustre/lov/lov_object.c index 1aeb6a8..f4dddf4 100644 --- a/lustre/lov/lov_object.c +++ b/lustre/lov/lov_object.c @@ -66,8 +66,9 @@ struct lov_layout_operations { union lov_layout_state *state); int (*llo_print)(const struct lu_env *env, void *cookie, lu_printer_t p, const struct lu_object *o); - int (*llo_page_init)(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index); + int (*llo_page_init)(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index); int (*llo_lock_init)(const struct lu_env *env, struct cl_object *obj, struct cl_lock *lock, const struct cl_io *io); @@ -1531,11 +1532,12 @@ static int lov_object_print(const struct lu_env *env, void *cookie, return LOV_2DISPATCH_NOLOCK(lu2lov(o), llo_print, env, cookie, p, o); } -static int lov_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) +static int lov_page_init(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index) { - return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, obj, page, - index); + return LOV_2DISPATCH_NOLOCK(cl2lov(obj), llo_page_init, env, io, obj, + page, index); } /** diff --git a/lustre/lov/lov_page.c b/lustre/lov/lov_page.c index 5812d43..f5e26ae 100644 --- a/lustre/lov/lov_page.c +++ b/lustre/lov/lov_page.c @@ -49,8 +49,9 @@ * Lov page operations. * */ -int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) +int lov_page_init_composite(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index) { struct lov_object *loo = cl2lov(obj); struct lov_io *lio = lov_env_io(env); @@ -99,7 +100,7 @@ int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, if (entry < 0 || !lsm_entry_inited(loo->lo_lsm, entry)) { /* non-existing layout component */ - lov_page_init_empty(env, obj, page, index); + lov_page_init_empty(env, io, obj, page, index); RETURN(0); } @@ -125,7 +126,8 @@ int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, subobj = lovsub2cl(r0->lo_sub[stripe]); cl_object_for_each(o, subobj) { if (o->co_ops->coo_page_init) { - rc = o->co_ops->coo_page_init(sub->sub_env, o, page, + rc = o->co_ops->coo_page_init(sub->sub_env, io, o, + page, suboff >> PAGE_SHIFT); if (rc != 0) break; @@ -135,8 +137,9 @@ int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj, RETURN(rc); } -int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, - struct cl_page *cl_page, pgoff_t index) +int lov_page_init_empty(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *cl_page, + pgoff_t index) { void *addr; @@ -151,8 +154,9 @@ int lov_page_init_empty(const struct lu_env *env, struct cl_object *obj, RETURN(0); } -int lov_page_init_foreign(const struct lu_env *env, struct cl_object *obj, - struct cl_page *page, pgoff_t index) +int lov_page_init_foreign(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *page, + pgoff_t index) { CDEBUG(D_PAGE, DFID" has no data\n", PFID(lu_object_fid(&obj->co_lu))); RETURN(-ENODATA); diff --git a/lustre/obdclass/cl_page.c b/lustre/obdclass/cl_page.c index 39a7f72..55cf688 100644 --- a/lustre/obdclass/cl_page.c +++ b/lustre/obdclass/cl_page.c @@ -252,9 +252,9 @@ check: return cl_page; } -struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_object *o, - pgoff_t ind, struct page *vmpage, - enum cl_page_type type) +struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_io *io, + struct cl_object *o, pgoff_t ind, + struct page *vmpage, enum cl_page_type type) { struct cl_page *cl_page; struct cl_object *head; @@ -293,7 +293,7 @@ struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_object *o, cl_page->cp_page_index = ind; cl_object_for_each(o, head) { if (o->co_ops->coo_page_init != NULL) { - result = o->co_ops->coo_page_init(env, o, + result = o->co_ops->coo_page_init(env, io, o, cl_page, ind); if (result != 0) { __cl_page_delete(env, cl_page); @@ -325,7 +325,7 @@ struct cl_page *cl_page_alloc(const struct lu_env *env, struct cl_object *o, * * \see cl_object_find(), cl_lock_find() */ -struct cl_page *cl_page_find(const struct lu_env *env, +struct cl_page *cl_page_find(const struct lu_env *env, struct cl_io *io, struct cl_object *o, pgoff_t idx, struct page *vmpage, enum cl_page_type type) @@ -365,7 +365,7 @@ struct cl_page *cl_page_find(const struct lu_env *env, } /* allocate and initialize cl_page */ - page = cl_page_alloc(env, o, idx, vmpage, type); + page = cl_page_alloc(env, io, o, idx, vmpage, type); RETURN(page); } EXPORT_SYMBOL(cl_page_find); diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index a53e1bb..54e3780 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -36,6 +36,7 @@ #define DEBUG_SUBSYSTEM S_OSC #include +#include #include "osc_internal.h" @@ -240,8 +241,8 @@ static const struct cl_page_operations osc_page_ops = { .cpo_page_touch = osc_page_touch, }; -int osc_page_init(const struct lu_env *env, struct cl_object *obj, - struct cl_page *cl_page, pgoff_t index) +int osc_page_init(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj, struct cl_page *cl_page, pgoff_t index) { struct osc_object *osc = cl2osc(obj); struct osc_page *opg = cl_object_page_slice(obj, cl_page); @@ -260,6 +261,18 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, opg->ops_srvlock = osc_io_srvlock(oio); cl_page_slice_add(cl_page, &opg->ops_cl, obj, &osc_page_ops); + if ((lnet_is_rdma_only_page(cl_page->cp_vmpage) || + CFS_FAIL_CHECK(OBD_FAIL_OSC_UNALIGNED_RDMA_ONLY)) && + io->ci_unaligned_dio) { + LASSERT(cl_page->cp_type == CPT_TRANSIENT); + /* setting this flag saves a little time if there are further + * stripes, but this is mostly for clarity in case we need this + * info later + */ + io->ci_allow_unaligned_dio = false; + return -EINVAL; + } + /* reserve an LRU space for this page */ if (cl_page->cp_type == CPT_CACHEABLE) { result = osc_lru_alloc(env, osc_cli(osc), opg); diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index d63d657..ed6704c 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13809,6 +13809,29 @@ test_119i() } run_test 119i "test unaligned aio at varying sizes" +test_119j() +{ + (( $MDS1_VERSION >= $(version_code 2.15.58) )) || + skip "Need server version at least 2.15.58" + (( $OSTCOUNT >= 2 )) || skip "needs >= 2 OSTs" + + $LFS setstripe -c 2 -S 1M $DIR/$tfile + + # trivial test of unaligned DIO, should work fine + dd if=/dev/urandom bs=4095 of=$DIR/$tfile count=4 \ + oflag=direct || + error "trivial unaligned dio failed" + +#define OBD_FAIL_OSC_UNALIGNED_RDMA_ONLY 0x418 + $LCTL set_param fail_loc=0x418 + # trivial test of unaligned DIO + dd if=/dev/urandom bs=4095 of=$DIR/$tfile count=4 \ + oflag=direct && + error "unaligned DIO succeeded with (fake) rdma only page" + echo "unaligned DIO with rdma only pages failed as expected" +} +run_test 119j "verify unaligned DIO + rdma_only fails as expected" + test_120a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" remote_mds_nodsh && skip "remote MDS with nodsh" -- 1.8.3.1