From: Patrick Farrell Date: Fri, 7 Oct 2016 20:51:19 +0000 (-0500) Subject: LU-8515 osc: Send RPCs when extents are full X-Git-Tag: 2.9.51~60 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=ecb6712a19fa836ecdba41ccda80de0a10b1336a LU-8515 osc: Send RPCs when extents are full Currently, Lustre decides to send an RPC under a number of conditions (such as memory pressure or lock cancellcation); one of the conditions it looks for is "enough dirty pages to fill an RPC". This worked fine when only one process could be dirtying pages at a time, but in newer Lustre versions, more than one process can write to the same file (and the same osc object) at once. In this case, the "count dirty pages method" will see there are enough dirty pages to fill an RPC, but since the dirty pages are being created by multiple writers, they are not contiguous and will not fit in to one RPC. This resulted in many RPCs of less than full size being sent, despite a good I/O pattern. (Earlier versions of Lustre usually send only full RPCs when presented with this pattern.) Instead, we remove this check and add extents to a special full extent list when they reach max pages per RPC, then send from that list. (This is similar to high priority and urgent extents.) With a good I/O pattern, like usually used in benchmarking, it should be possible to send only full size RPCs. This patch achieves that without degrading performance in other cases. In IOR tests with multiple writers to a single file, this patch improves performance by several times, and returns performance to equal levels (single striped files) or much greater levels (very high speed OSTs, files with many stripes) vs earlier versions. Supporting data is provided in LU-8515. Signed-off-by: Patrick Farrell Change-Id: Icc3c99b953d11f774600996648c9b20ed1f81f89 Reviewed-on: https://review.whamcloud.com/22012 Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index 236263c..2fd076b 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -594,6 +594,10 @@ int osc_extent_release(const struct lu_env *env, struct osc_extent *ext) if (ext->oe_urgent) list_move_tail(&ext->oe_link, &obj->oo_urgent_exts); + else if (ext->oe_nr_pages == ext->oe_mppr) { + list_move_tail(&ext->oe_link, + &obj->oo_full_exts); + } } osc_object_unlock(obj); @@ -1738,9 +1742,10 @@ static int osc_makes_rpc(struct client_obd *cli, struct osc_object *osc, CDEBUG(D_CACHE, "cache waiters forcing RPC\n"); RETURN(1); } - if (atomic_read(&osc->oo_nr_writes) >= - cli->cl_max_pages_per_rpc) + if (!list_empty(&osc->oo_full_exts)) { + CDEBUG(D_CACHE, "full extent ready, make an RPC\n"); RETURN(1); + } } else { if (atomic_read(&osc->oo_nr_reads) == 0) RETURN(0); @@ -1913,6 +1918,7 @@ static int try_to_add_extent_for_io(struct client_obd *cli, EASSERT((ext->oe_state == OES_CACHE || ext->oe_state == OES_LOCK_DONE), ext); + OSC_EXTENT_DUMP(D_CACHE, ext, "trying to add this extent\n"); if (data->erd_max_extents == 0) RETURN(0); @@ -2042,19 +2048,20 @@ static unsigned int get_write_extents(struct osc_object *obj, struct osc_extent, oe_link); if (!try_to_add_extent_for_io(cli, ext, &data)) return data.erd_page_count; + } + if (data.erd_page_count == data.erd_max_pages) + return data.erd_page_count; - if (!ext->oe_intree) - continue; - - while ((ext = next_extent(ext)) != NULL) { - if ((ext->oe_state != OES_CACHE) || - (!list_empty(&ext->oe_link) && - ext->oe_owner != NULL)) - continue; - - if (!try_to_add_extent_for_io(cli, ext, &data)) - return data.erd_page_count; - } + /* One key difference between full extents and other extents: full + * extents can usually only be added if the rpclist was empty, so if we + * can't add one, we continue on to trying to add normal extents. This + * is so we don't miss adding extra extents to an RPC containing high + * priority or urgent extents. */ + while (!list_empty(&obj->oo_full_exts)) { + ext = list_entry(obj->oo_full_exts.next, + struct osc_extent, oe_link); + if (!try_to_add_extent_for_io(cli, ext, &data)) + break; } if (data.erd_page_count == data.erd_max_pages) return data.erd_page_count; @@ -2833,8 +2840,12 @@ again: osc_update_pending(obj, OBD_BRW_WRITE, -ext->oe_nr_pages); } - EASSERT(list_empty(&ext->oe_link), ext); - list_add_tail(&ext->oe_link, &list); + /* This extent could be on the full extents list, that's OK */ + EASSERT(!ext->oe_hp && !ext->oe_urgent, ext); + if (!list_empty(&ext->oe_link)) + list_move_tail(&ext->oe_link, &list); + else + list_add_tail(&ext->oe_link, &list); ext = next_extent(ext); } diff --git a/lustre/osc/osc_cl_internal.h b/lustre/osc/osc_cl_internal.h index 5648a13..ebafbdf 100644 --- a/lustre/osc/osc_cl_internal.h +++ b/lustre/osc/osc_cl_internal.h @@ -144,7 +144,7 @@ struct osc_object { */ struct list_head oo_hp_exts; /* list of hp extents */ struct list_head oo_urgent_exts; /* list of writeback extents */ - struct list_head oo_rpc_exts; + struct list_head oo_full_exts; struct list_head oo_reading_exts; diff --git a/lustre/osc/osc_object.c b/lustre/osc/osc_object.c index aa878d2..502cfef 100644 --- a/lustre/osc/osc_object.c +++ b/lustre/osc/osc_object.c @@ -84,7 +84,7 @@ static int osc_object_init(const struct lu_env *env, struct lu_object *obj, osc->oo_root.rb_node = NULL; INIT_LIST_HEAD(&osc->oo_hp_exts); INIT_LIST_HEAD(&osc->oo_urgent_exts); - INIT_LIST_HEAD(&osc->oo_rpc_exts); + INIT_LIST_HEAD(&osc->oo_full_exts); INIT_LIST_HEAD(&osc->oo_reading_exts); atomic_set(&osc->oo_nr_reads, 0); atomic_set(&osc->oo_nr_writes, 0); @@ -113,7 +113,7 @@ static void osc_object_free(const struct lu_env *env, struct lu_object *obj) LASSERT(osc->oo_root.rb_node == NULL); LASSERT(list_empty(&osc->oo_hp_exts)); LASSERT(list_empty(&osc->oo_urgent_exts)); - LASSERT(list_empty(&osc->oo_rpc_exts)); + LASSERT(list_empty(&osc->oo_full_exts)); LASSERT(list_empty(&osc->oo_reading_exts)); LASSERT(atomic_read(&osc->oo_nr_reads) == 0); LASSERT(atomic_read(&osc->oo_nr_writes) == 0);