From 150246e73c925d628ce9cbbd8184c0b0eefc9a16 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Mon, 1 Sep 2014 12:38:31 +0200 Subject: [PATCH] LU-5521 grant: quiet message on grant waiting timeout Use at_max in osc_enter_cache() to bound how long we wait for grant space before switching to synchronous I/Os. Do not print a message on the console when the timeout is hit since such long wait can be legitimate with flaky network (i.e. BRW is resent multiple times). Signed-off-by: Johann Lombardi Change-Id: I9c507a11ea4a3612e932e22bebb5087dbdcf248a Reviewed-on: http://review.whamcloud.com/11716 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- lustre/osc/osc_cache.c | 94 +++++++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 40 deletions(-) diff --git a/lustre/osc/osc_cache.c b/lustre/osc/osc_cache.c index de90f1a..eb3084f 100644 --- a/lustre/osc/osc_cache.c +++ b/lustre/osc/osc_cache.c @@ -1321,7 +1321,7 @@ static int osc_completion(const struct lu_env *env, struct osc_async_page *oap, struct client_obd *__tmp = (cli); \ CDEBUG(lvl, "%s: grant { dirty: %ld/%ld dirty_pages: %d/%d " \ "dropped: %ld avail: %ld, reserved: %ld, flight: %d }" \ - "lru {in list: %d, left: %d, waiters: %d }" fmt, \ + "lru {in list: %d, left: %d, waiters: %d }" fmt "\n", \ __tmp->cl_import->imp_obd->obd_name, \ __tmp->cl_dirty_pages, __tmp->cl_dirty_max_pages, \ atomic_read(&obd_dirty_pages), obd_max_dirty_pages, \ @@ -1470,7 +1470,7 @@ static int osc_enter_cache_try(struct client_obd *cli, { int rc; - OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes); + OSC_DUMP_GRANT(D_CACHE, cli, "need:%d", bytes); rc = osc_reserve_grant(cli, bytes); if (rc < 0) @@ -1511,15 +1511,17 @@ static int ocw_granted(struct client_obd *cli, struct osc_cache_waiter *ocw) static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, struct osc_async_page *oap, int bytes) { - struct osc_object *osc = oap->oap_obj; - struct lov_oinfo *loi = osc->oo_oinfo; - struct osc_cache_waiter ocw; - struct l_wait_info lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(600), NULL, - LWI_ON_SIGNAL_NOOP, NULL); - int rc = -EDQUOT; + struct osc_object *osc = oap->oap_obj; + struct lov_oinfo *loi = osc->oo_oinfo; + struct osc_cache_waiter ocw; + struct l_wait_info lwi; + int rc = -EDQUOT; ENTRY; - OSC_DUMP_GRANT(D_CACHE, cli, "need:%d.\n", bytes); + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(AT_OFF ? obd_timeout : at_max), + NULL, LWI_ON_SIGNAL_NOOP, NULL); + + OSC_DUMP_GRANT(D_CACHE, cli, "need:%d", bytes); client_obd_list_lock(&cli->cl_loi_list_lock); @@ -1527,12 +1529,16 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, * of queued writes and create a discontiguous rpc stream */ if (OBD_FAIL_CHECK(OBD_FAIL_OSC_NO_GRANT) || cli->cl_dirty_max_pages == 0 || - cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) + cli->cl_ar.ar_force_sync || loi->loi_ar.ar_force_sync) { + OSC_DUMP_GRANT(D_CACHE, cli, "forced sync i/o"); GOTO(out, rc = -EDQUOT); + } /* Hopefully normal case - cache space and write credits available */ - if (osc_enter_cache_try(cli, oap, bytes, 0)) + if (osc_enter_cache_try(cli, oap, bytes, 0)) { + OSC_DUMP_GRANT(D_CACHE, cli, "granted from cache"); GOTO(out, rc = 0); + } /* We can get here for two reasons: too many dirty pages in cache, or * run out of grants. In both cases we should write dirty pages out. @@ -1557,42 +1563,52 @@ static int osc_enter_cache(const struct lu_env *env, struct client_obd *cli, client_obd_list_lock(&cli->cl_loi_list_lock); - /* l_wait_event is interrupted by signal, or timed out */ if (rc < 0) { - switch (rc) { - case -ETIMEDOUT: - OSC_DUMP_GRANT(D_ERROR, cli, - "try to reserve %d.\n", bytes); - osc_extent_tree_dump(D_ERROR, osc); - rc = -EDQUOT; - break; - case -EINTR: - /* Ensures restartability - LU-3581 */ - rc = -ERESTARTSYS; - break; - default: - CDEBUG(D_CACHE, "%s: event for cache space @" - " %p never arrived due to %d\n", - cli->cl_import->imp_obd->obd_name, - &ocw, rc); - break; - } + /* l_wait_event is interrupted by signal or timed out */ list_del_init(&ocw.ocw_entry); - GOTO(out, rc); + break; } - LASSERT(list_empty(&ocw.ocw_entry)); rc = ocw.ocw_rc; if (rc != -EDQUOT) - GOTO(out, rc); - if (osc_enter_cache_try(cli, oap, bytes, 0)) - GOTO(out, rc = 0); + break; + if (osc_enter_cache_try(cli, oap, bytes, 0)) { + rc = 0; + break; + } + } + + switch (rc) { + case 0: + OSC_DUMP_GRANT(D_CACHE, cli, "finally got grant space"); + break; + case -ETIMEDOUT: + OSC_DUMP_GRANT(D_CACHE, cli, "timeout, fall back to sync i/o"); + osc_extent_tree_dump(D_CACHE, osc); + /* fall back to synchronous I/O */ + rc = -EDQUOT; + break; + case -EINTR: + /* Ensures restartability - LU-3581 */ + OSC_DUMP_GRANT(D_CACHE, cli, "interrupted"); + rc = -ERESTARTSYS; + break; + case -EDQUOT: + OSC_DUMP_GRANT(D_CACHE, cli, "no grant space, fall back to sync" + " i/o"); + break; + default: + CDEBUG(D_CACHE, "%s: event for cache space @ %p never arrived " + "due to %d, fall back to sync i/o\n", + cli->cl_import->imp_obd->obd_name, &ocw, rc); + /* fall back to synchronous I/O */ + rc = -EDQUOT; + break; } EXIT; out: client_obd_list_unlock(&cli->cl_loi_list_lock); - OSC_DUMP_GRANT(D_CACHE, cli, "returned %d.\n", rc); RETURN(rc); } @@ -1617,10 +1633,8 @@ void osc_wake_cache_waiters(struct client_obd *cli) goto wakeup; } - ocw->ocw_rc = 0; - if (!osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0)) - ocw->ocw_rc = -EDQUOT; - + if (osc_enter_cache_try(cli, ocw->ocw_oap, ocw->ocw_grant, 0)) + ocw->ocw_rc = 0; wakeup: CDEBUG(D_CACHE, "wake up %p for oap %p, avail grant %ld, %d\n", ocw, ocw->ocw_oap, cli->cl_avail_grant, ocw->ocw_rc); -- 1.8.3.1