From 4f97197dad7c6ad3655abd46c8e44a58ea0201fb Mon Sep 17 00:00:00 2001 From: Andrew Perepechko Date: Thu, 7 Oct 2010 18:27:39 +0400 Subject: [PATCH] b=23596 account direct i/o inflight separately from non-direct i/o Account direct i/o inflight rpcs separately from non-direct i/o so that direct i/o, which is limited by max_rpcs_in_flight, should not block non-direct i/o, which is not limited by max_rpcs_in_flight. i=Oleg Drokin i=Alexander Zarochentsev i=Johann Lombardi (author of the original patch) --- lustre/include/obd.h | 2 ++ lustre/ldlm/ldlm_lib.c | 2 ++ lustre/osc/lproc_osc.c | 14 +++++++++----- lustre/osc/osc_request.c | 29 +++++++++++++++++++---------- 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 023d9a6..e89805d 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -509,6 +509,8 @@ struct client_obd { struct list_head cl_loi_read_list; int cl_r_in_flight; int cl_w_in_flight; + int cl_dio_r_in_flight; + int cl_dio_w_in_flight; /* just a sum of the loi/lop pending numbers to be exported by /proc */ int cl_pending_w_pages; int cl_pending_r_pages; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 919f1ea5..3666723 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -267,6 +267,8 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) client_obd_list_lock_init(&cli->cl_loi_list_lock); cli->cl_r_in_flight = 0; cli->cl_w_in_flight = 0; + cli->cl_dio_r_in_flight = 0; + cli->cl_dio_w_in_flight = 0; spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); spin_lock_init(&cli->cl_read_page_hist.oh_lock); diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index 5b8cb3b..9693772 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -589,15 +589,19 @@ static int osc_rpc_stats_seq_show(struct seq_file *seq, void *v) client_obd_list_lock(&cli->cl_loi_list_lock); - seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", + seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n", now.tv_sec, now.tv_usec); - seq_printf(seq, "read RPCs in flight: %d\n", + seq_printf(seq, "read RPCs in flight: %d\n", cli->cl_r_in_flight); - seq_printf(seq, "write RPCs in flight: %d\n", + seq_printf(seq, "write RPCs in flight: %d\n", cli->cl_w_in_flight); - seq_printf(seq, "pending write pages: %d\n", + seq_printf(seq, "dio read RPCs in flight: %d\n", + cli->cl_dio_r_in_flight); + seq_printf(seq, "dio write RPCs in flight: %d\n", + cli->cl_dio_w_in_flight); + seq_printf(seq, "pending write pages: %d\n", cli->cl_pending_w_pages); - seq_printf(seq, "pending read pages: %d\n", + seq_printf(seq, "pending read pages: %d\n", cli->cl_pending_r_pages); seq_printf(seq, "\n\t\t\tread\t\t\twrite\n"); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 8ed40e5..434090e 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1713,6 +1713,7 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, if (rc == 0) { aa = ptlrpc_req_async_args(request); + /* Do we need to separate dio stats? */ if (cmd == OBD_BRW_READ) { lprocfs_oh_tally_log2(&cli->cl_read_page_hist, page_count); lprocfs_oh_tally(&cli->cl_read_rpc_hist, cli->cl_r_in_flight); @@ -1727,12 +1728,14 @@ static int async_internal(int cmd, struct obd_export *exp, struct obdo *oa, request->rq_interpret_reply = brw_interpret; ptlrpc_set_add_req(set, request); + client_obd_list_lock(&cli->cl_loi_list_lock); if (cmd == OBD_BRW_READ) - cli->cl_r_in_flight++; + cli->cl_dio_r_in_flight++; else - cli->cl_w_in_flight++; + cli->cl_dio_w_in_flight++; client_obd_list_unlock(&cli->cl_loi_list_lock); + OBD_FAIL_TIMEOUT(OBD_FAIL_OSC_DIO_PAUSE, 3); } else if (cmd == OBD_BRW_WRITE) { client_obd_list_lock(&cli->cl_loi_list_lock); @@ -2282,16 +2285,17 @@ static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) cli = aa->aa_cli; client_obd_list_lock(&cli->cl_loi_list_lock); - /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters - * is called so we know whether to go to sync BRWs or wait for more - * RPCs to complete */ - if (lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE) - cli->cl_w_in_flight--; - else - cli->cl_r_in_flight--; - if (!list_empty(&aa->aa_oaps)) { /* from osc_send_oap_rpc() */ struct osc_async_page *oap, *tmp; + + /* We need to decrement before osc_ap_completion->osc_wake_cache_waiters + * is called so we know whether to go to sync BRWs or wait for more + * RPCs to complete */ + if (lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE) + cli->cl_w_in_flight--; + else + cli->cl_r_in_flight--; + /* the caller may re-use the oap after the completion call so * we need to clean it up a little */ list_for_each_entry_safe(oap, tmp, &aa->aa_oaps, oap_rpc_item) { @@ -2307,6 +2311,11 @@ static int brw_interpret(struct ptlrpc_request *request, void *data, int rc) if (aa->aa_oa->o_valid & OBD_MD_FLFLAGS && aa->aa_oa->o_flags & OBD_FL_TEMPORARY) OBDO_FREE(aa->aa_oa); + + if (lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE) + cli->cl_dio_w_in_flight--; + else + cli->cl_dio_r_in_flight--; } osc_wake_cache_waiters(cli); osc_check_rpcs(cli); -- 1.8.3.1