From 0bfc8eca5c3d26235846bab347d7e53b8ab0576a Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Fri, 4 Nov 2022 03:13:34 -0600 Subject: [PATCH] LU-15619 osc: pack osc_async_page better The oap_cmd field was used to store a number of other flags, but those were redundant with oap_brw_page.flag, and never used. That allows shrinking oap_cmd down to 2 bits. Modern GCC allows specifying a bitfield for an enum, so the size can be explicitly set. The oap_page_off always holds < PAGE_SIZE, so it can safely fit into PAGE_SHIFT bits, similar to ops_from. However, since this field is used in math operations and we don't need the space, always allocate it as an aligned 16-bit field. This allows packing oap_async_flags, oap_cmd, and oap_page_off into a 32-bit space. This avoids having holes in the struct. The explicit oap_padding fields are needed so that "packed" does not cause the fields to be misaligned, but still allows packing with the following 4-byte field in osc_page. Also move oap_brw_page to the end of the struct, since the bp_padding field therein is useless and can be removed. This allows better packing with the bitfields in struct osc_page. brw_page old size: 32, holes: 0, padding: 4 brw_page new size: 28, holes: 0, padding: 0 osc_async_page old size: 104, holes: 8, padding: 4 osc_async_page new size: 92, holes: 0, bit holes: 10 osc_page old size: 144, holes: 8, bit holes: 4 osc_page new size: 128, holes: 0, bit holes: 4 Together this saves 16 bytes *per page* in cache, and fits osc_page into a noce-sized allocation. That is 512MiB on a system with 128GiB of cache. Signed-off-by: Patrick Farrell Change-Id: Ief6aa7664d7299dba02332bc9029e4e9219d0876 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/46721 Reviewed-by: Oleg Drokin Reviewed-by: Arshad Hussain Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo --- lustre/include/lustre_osc.h | 30 +++++++++++++++++++----------- lustre/include/obd.h | 3 +-- lustre/obdecho/echo.c | 2 +- lustre/osc/osc_io.c | 9 ++++----- lustre/osc/osc_page.c | 4 +--- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index 38abb70..14ed9e2 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -60,31 +60,38 @@ struct osc_quota_info { __u32 oqi_id; }; -enum async_flags { - ASYNC_READY = 0x1, /* ap_make_ready will not be called before this - page is added to an rpc */ - ASYNC_URGENT = 0x2, /* page must be put into an RPC before return */ +enum oap_async_flags { + ASYNC_READY = 0x1, /* ap_make_ready will not be called before + * this page is added to an rpc */ + ASYNC_URGENT = 0x2, /* page must be put into RPC before return */ ASYNC_COUNT_STABLE = 0x4, /* ap_refresh_count will not be called to give the caller a chance to update or cancel the size of the io */ - ASYNC_HP = 0x10, + ASYNC_HP = 0x8, + OAP_ASYNC_MAX, + OAP_ASYNC_BITS = 4 }; +/* add explicit padding to keep fields aligned despite "packed", + * which is needed to pack with following field in osc_page */ +#define OAP_PAD_BITS (16 - OBD_BRW_WRITE - OAP_ASYNC_BITS) struct osc_async_page { - unsigned short oap_cmd; + unsigned short oap_page_off /* :PAGE_SHIFT */; + unsigned int oap_cmd:OBD_BRW_WRITE; + enum oap_async_flags oap_async_flags:OAP_ASYNC_BITS; + unsigned int oap_padding1:OAP_PAD_BITS; /* unused */ + unsigned int oap_padding2; /* unused */ struct list_head oap_pending_item; struct list_head oap_rpc_item; loff_t oap_obj_off; - unsigned oap_page_off; - enum async_flags oap_async_flags; - - struct brw_page oap_brw_page; struct ptlrpc_request *oap_request; struct osc_object *oap_obj; -}; + + struct brw_page oap_brw_page; +} __attribute__((packed)); #define oap_page oap_brw_page.pg #define oap_count oap_brw_page.count @@ -92,6 +99,7 @@ struct osc_async_page { static inline struct osc_async_page *brw_page2oap(struct brw_page *pga) { + BUILD_BUG_ON(OAP_ASYNC_MAX - 1 >= (1 << OAP_ASYNC_BITS)); return container_of(pga, struct osc_async_page, oap_brw_page); } diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 7d27b5d..f556eb9 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -125,8 +125,7 @@ struct brw_page { u16 bp_off_diff; /* used for encryption: difference with count in clear text page */ u16 bp_count_diff; - u32 bp_padding; -}; +} __attribute__((packed)); struct timeout_item { enum timeout_event ti_event; diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 5de02d8..720b0e3 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -400,7 +400,7 @@ static int echo_commitrw(const struct lu_env *env, int cmd, if (rc) GOTO(commitrw_cleanup, rc); - if ((cmd & OBD_BRW_RWMASK) == OBD_BRW_READ) { + if (cmd == OBD_BRW_READ) { CDEBUG(D_PAGE, "reading %d obdos with %d IOs\n", objcount, niocount); } else { diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index f06dec3..524ab9b 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -516,14 +516,13 @@ static bool trunc_check_cb(const struct lu_env *env, struct cl_io *io, oap = &ops->ops_oap; if (oap->oap_cmd & OBD_BRW_WRITE && - !list_empty(&oap->oap_pending_item)) + !list_empty(&oap->oap_pending_item)) CL_PAGE_DEBUG(D_ERROR, env, page, "exists %llu/%s.\n", - start, current->comm); + start, current->comm); if (PageLocked(page->cp_vmpage)) - CDEBUG(D_CACHE, "page %p index %lu locked for %d.\n", - ops, osc_index(ops), - oap->oap_cmd & OBD_BRW_RWMASK); + CDEBUG(D_CACHE, "page %p index %lu locked for cmd=%d\n", + ops, osc_index(ops), oap->oap_cmd); } return true; } diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 06aff96..4467cd3 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -300,10 +300,8 @@ void osc_page_submit(const struct lu_env *env, struct osc_page *opg, oap->oap_count = opg->ops_to - opg->ops_from + 1; oap->oap_brw_flags = OBD_BRW_SYNC | brw_flags; - if (oio->oi_cap_sys_resource) { + if (oio->oi_cap_sys_resource) oap->oap_brw_flags |= OBD_BRW_SYS_RESOURCE; - oap->oap_cmd |= OBD_BRW_SYS_RESOURCE; - } osc_page_transfer_get(opg, "transfer\0imm"); osc_page_transfer_add(env, opg, crt); -- 1.8.3.1