From 34cbc31a9e73498d7f9324e942c85fbc44791c83 Mon Sep 17 00:00:00 2001 From: Patrick Farrell Date: Wed, 12 Jan 2022 15:40:28 -0500 Subject: [PATCH] LU-12585 obdfilter: Use actual I/O bytes in stats Currently the obdfilter stats note the number of bytes requested by the client rather than the number of bytes actually read or written. This is particularly confusing for reads because clients can request more data than exists and some applications do this normally. This results in statistics that can be off by almost any amount from the actual number of bytes read. This patch moves the stats to be collected after 'commit', which allows the true number of bytes to be recorded. It also adds the 'commit' operation to the time recorded in the stats for each operation, which is reasonable because the commit must occur before the operation is complete. Signed-off-by: Patrick Farrell Change-Id: I81fe9a6afdad5b48e8421f4aa72b8ef10a0eee93 --- lustre/include/obd.h | 3 ++- lustre/include/obd_class.h | 6 +++-- lustre/mdt/mdt_internal.h | 3 ++- lustre/mdt/mdt_io.c | 26 ++++++++------------ lustre/obdecho/echo.c | 3 ++- lustre/obdecho/echo_client.c | 2 +- lustre/ofd/ofd_internal.h | 3 ++- lustre/ofd/ofd_io.c | 38 ++++++++++++++++------------- lustre/target/tgt_handler.c | 57 +++++++++++++++++++++++++------------------- 9 files changed, 77 insertions(+), 64 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index d9c014e..a17867c 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1049,7 +1049,8 @@ struct obd_ops { struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *remote, int pages, - struct niobuf_local *local, int rc); + struct niobuf_local *local, int rc, int nob, + ktime_t kstart); int (*o_init_export)(struct obd_export *exp); int (*o_destroy_export)(struct obd_export *exp); diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index bbb571b..361e586 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1138,7 +1138,8 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int pages, - struct niobuf_local *local, const int orig_rc) + struct niobuf_local *local, const int orig_rc, + int nob, ktime_t kstart) { int rc; ENTRY; @@ -1154,7 +1155,8 @@ static inline int obd_commitrw(const struct lu_env *env, int cmd, } rc = OBP(exp->exp_obd, commitrw)(env, cmd, exp, oa, objcount, obj, - rnb, pages, local, orig_rc); + rnb, pages, local, orig_rc, nob, + kstart); RETURN(rc); } diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 2726f86..47faea4 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -1360,7 +1360,8 @@ int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int npages, - struct niobuf_local *lnb, int old_rc); + struct niobuf_local *lnb, int old_rc, int nob, + ktime_t kstart); int mdt_punch_hdl(struct tgt_session_info *tsi); int mdt_fallocate_hdl(struct tgt_session_info *tsi); int mdt_glimpse_enqueue(struct mdt_thread_info *mti, struct ldlm_namespace *ns, diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index bf80be4..a25f66e 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -362,10 +362,8 @@ static int mdt_preprw_read(const struct lu_env *env, struct obd_export *exp, struct mdt_device *mdt, struct mdt_object *mo, struct lu_attr *la, int niocount, struct niobuf_remote *rnb, int *nr_local, - struct niobuf_local *lnb, char *jobid) + struct niobuf_local *lnb) { - struct tgt_session_info *tsi = tgt_ses_info(env); - struct ptlrpc_request *req = tgt_ses_req(tsi); struct dt_object *dob; int i, j, rc, tot_bytes = 0; int maxlnb = *nr_local; @@ -424,7 +422,6 @@ static int mdt_preprw_read(const struct lu_env *env, struct obd_export *exp, if (unlikely(rc)) GOTO(buf_put, rc); - mdt_counter_incr(req, LPROC_MDT_IO_READ, tot_bytes); RETURN(0); buf_put: dt_bufs_put(env, dob, lnb, *nr_local); @@ -437,10 +434,8 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp, struct lu_attr *la, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, - struct niobuf_local *lnb, char *jobid) + struct niobuf_local *lnb) { - struct tgt_session_info *tsi = tgt_ses_info(env); - struct ptlrpc_request *req = tgt_ses_req(tsi); struct dt_object *dob; int i, j, k, rc = 0, tot_bytes = 0; int maxlnb = *nr_local; @@ -500,7 +495,6 @@ static int mdt_preprw_write(const struct lu_env *env, struct obd_export *exp, if (likely(rc)) GOTO(err, rc); - mdt_counter_incr(req, LPROC_MDT_IO_WRITE, tot_bytes); RETURN(0); err: dt_bufs_put(env, dob, lnb, *nr_local); @@ -525,7 +519,6 @@ int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct lu_attr *la = &info->mti_attr.ma_attr; struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev); struct mdt_object *mo; - char *jobid; int rc = 0; /* The default value PTLRPC_MAX_BRW_PAGES is set in tgt_brw_write() @@ -533,8 +526,6 @@ int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, if (*nr_local > MD_MAX_BRW_PAGES) *nr_local = MD_MAX_BRW_PAGES; - jobid = tsi->tsi_jobid; - if (!oa || objcount != 1 || obj->ioo_bufcnt == 0) { CERROR("%s: bad parameters %p/%i/%i\n", exp->exp_obd->obd_name, oa, objcount, obj->ioo_bufcnt); @@ -551,13 +542,11 @@ int mdt_obd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, if (cmd == OBD_BRW_WRITE) { la_from_obdo(la, oa, OBD_MD_FLGETATTR); rc = mdt_preprw_write(env, exp, mdt, mo, la, oa, - objcount, obj, rnb, nr_local, lnb, - jobid); + objcount, obj, rnb, nr_local, lnb); } else if (cmd == OBD_BRW_READ) { tgt_grant_prepare_read(env, exp, oa); rc = mdt_preprw_read(env, exp, mdt, mo, la, - obj->ioo_bufcnt, rnb, nr_local, lnb, - jobid); + obj->ioo_bufcnt, rnb, nr_local, lnb); obdo_from_la(oa, la, LA_ATIME); } else { CERROR("%s: wrong cmd %d received!\n", @@ -753,8 +742,11 @@ void mdt_dom_obj_lvb_update(const struct lu_env *env, struct mdt_object *mo, int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int npages, - struct niobuf_local *lnb, int old_rc) + struct niobuf_local *lnb, int old_rc, int nob, + ktime_t kstart) { + struct tgt_session_info *tsi = tgt_ses_info(env); + struct ptlrpc_request *req = tgt_ses_req(tsi); struct mdt_thread_info *info = mdt_th_info(env); struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev); struct mdt_object *mo = info->mti_object; @@ -854,6 +846,7 @@ int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, oa->o_uid = mapped_uid; oa->o_gid = mapped_gid; oa->o_projid = mapped_projid; + mdt_counter_incr(req, LPROC_MDT_IO_WRITE, nob); } else if (cmd == OBD_BRW_READ) { /* If oa != NULL then mdt_preprw_read updated the inode * atime and we should update the lvb so that other glimpses @@ -863,6 +856,7 @@ int mdt_obd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, rc = mdt_commitrw_read(env, mdt, mo, objcount, npages, lnb); if (old_rc) rc = old_rc; + mdt_counter_incr(req, LPROC_MDT_IO_READ, nob); } else { rc = -EPROTO; } diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 5183047..b65d01a 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -384,7 +384,8 @@ static int echo_commitrw(const struct lu_env *env, int cmd, struct obd_export *export, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rb, int niocount, - struct niobuf_local *res, int rc) + struct niobuf_local *res, int rc, int nob, + ktime_t kstart) { struct obd_device *obd; int pgs = 0; diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c index 500358c..79f7596 100644 --- a/lustre/obdecho/echo_client.c +++ b/lustre/obdecho/echo_client.c @@ -2714,7 +2714,7 @@ static int echo_client_prep_commit(const struct lu_env *env, } ret = obd_commitrw(env, rw, exp, oa, 1, &ioo, &rnb, npages, lnb, - ret); + ret, rnb.rnb_len, 0); if (ret != 0) break; diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 12da352..cb2280e 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -352,7 +352,8 @@ int ofd_preprw(const struct lu_env *env,int cmd, struct obd_export *exp, int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int npages, - struct niobuf_local *lnb, int old_rc); + struct niobuf_local *lnb, int old_rc, int nob, + ktime_t kstart); /* ofd_trans.c */ struct thandle *ofd_trans_create(const struct lu_env *env, diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 3b96d63..63fdb70 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -570,14 +570,13 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, struct ofd_device *ofd, const struct lu_fid *fid, struct lu_attr *la, struct obdo *oa, int niocount, struct niobuf_remote *rnb, int *nr_local, - struct niobuf_local *lnb, char *jobid) + struct niobuf_local *lnb) { struct ofd_object *fo; int i, j, rc, tot_bytes = 0; enum dt_bufs_type dbt = DT_BUFS_TYPE_READ; int maxlnb = *nr_local; __u64 begin, end; - ktime_t kstart = ktime_get(); ENTRY; LASSERT(env != NULL); @@ -644,9 +643,6 @@ static int ofd_preprw_read(const struct lu_env *env, struct obd_export *exp, niocount, READ); - ofd_counter_incr(exp, LPROC_OFD_STATS_READ_BYTES, jobid, tot_bytes); - ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, - ktime_us_delta(ktime_get(), kstart)); RETURN(0); buf_put: @@ -685,14 +681,13 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, struct lu_attr *la, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int *nr_local, - struct niobuf_local *lnb, char *jobid) + struct niobuf_local *lnb) { struct ofd_object *fo; int i, j, k, rc = 0, tot_bytes = 0; enum dt_bufs_type dbt = DT_BUFS_TYPE_WRITE; int maxlnb = *nr_local; __u64 begin, end; - ktime_t kstart = ktime_get(); struct range_lock *range = &ofd_info(env)->fti_write_range; ENTRY; @@ -868,9 +863,6 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, range_lock(&fo->ofo_write_tree, range); ofd_info(env)->fti_range_locked = 1; - ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE_BYTES, jobid, tot_bytes); - ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, - ktime_us_delta(ktime_get(), kstart)); RETURN(0); err: dt_bufs_put(env, ofd_object_child(fo), lnb, *nr_local); @@ -913,7 +905,6 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, struct tgt_session_info *tsi = tgt_ses_info(env); struct ofd_device *ofd = ofd_exp(exp); struct ofd_thread_info *info; - char *jobid; const struct lu_fid *fid = &oa->o_oi.oi_fid; int rc = 0; @@ -925,10 +916,8 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, if (tgt_ses_req(tsi) == NULL) { /* echo client case */ info = ofd_info_init(env, exp); - jobid = NULL; } else { info = tsi2ofd_info(tsi); - jobid = tsi->tsi_jobid; } LASSERT(oa != NULL); @@ -960,12 +949,11 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp, if (cmd == OBD_BRW_WRITE) { la_from_obdo(&info->fti_attr, oa, OBD_MD_FLGETATTR); rc = ofd_preprw_write(env, exp, ofd, fid, &info->fti_attr, oa, - objcount, obj, rnb, nr_local, lnb, jobid); + objcount, obj, rnb, nr_local, lnb); } else if (cmd == OBD_BRW_READ) { tgt_grant_prepare_read(env, exp, oa); rc = ofd_preprw_read(env, exp, ofd, fid, &info->fti_attr, oa, - obj->ioo_bufcnt, rnb, nr_local, lnb, - jobid); + obj->ioo_bufcnt, rnb, nr_local, lnb); } else { CERROR("%s: wrong cmd %d received!\n", exp->exp_obd->obd_name, cmd); @@ -1447,19 +1435,27 @@ out: int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, struct niobuf_remote *rnb, int npages, - struct niobuf_local *lnb, int old_rc) + struct niobuf_local *lnb, int old_rc, int nob, ktime_t kstart) { + struct tgt_session_info *tsi = tgt_ses_info(env); struct ofd_thread_info *info = ofd_info(env); struct ofd_device *ofd = ofd_exp(exp); const struct lu_fid *fid = &oa->o_oi.oi_fid; struct ldlm_namespace *ns = ofd->ofd_namespace; struct ldlm_resource *rs = NULL; + char *jobid; __u64 valid; int rc = 0; int root_squash = 0; LASSERT(npages > 0); + if (tgt_ses_req(tsi) == NULL) { /* echo client case */ + jobid = NULL; + } else { + jobid = tsi->tsi_jobid; + } + if (cmd == OBD_BRW_WRITE) { struct lu_nodemap *nodemap; __u32 mapped_uid, mapped_gid, mapped_projid; @@ -1559,11 +1555,19 @@ int ofd_commitrw(const struct lu_env *env, int cmd, struct obd_export *exp, oa->o_uid = mapped_uid; oa->o_gid = mapped_gid; oa->o_projid = mapped_projid; + CWARN("write NOB: %d\n", nob); + ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE_BYTES, jobid, nob); + ofd_counter_incr(exp, LPROC_OFD_STATS_WRITE, jobid, + ktime_us_delta(ktime_get(), kstart)); } else if (cmd == OBD_BRW_READ) { rc = ofd_commitrw_read(env, ofd, fid, objcount, npages, lnb); if (old_rc) rc = old_rc; + CWARN("read NOB: %d\n", nob); + ofd_counter_incr(exp, LPROC_OFD_STATS_READ_BYTES, jobid, nob); + ofd_counter_incr(exp, LPROC_OFD_STATS_READ, jobid, + ktime_us_delta(ktime_get(), kstart)); } else { LBUG(); rc = -EPROTO; diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index 5e97949..53c0533 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -2260,6 +2260,7 @@ int tgt_brw_read(struct tgt_session_info *tsi) npages_read; struct tgt_thread_big_cache *tbc = req->rq_svc_thread->t_data; const char *obd_name = exp->exp_obd->obd_name; + ktime_t kstart; ENTRY; @@ -2358,6 +2359,7 @@ int tgt_brw_read(struct tgt_session_info *tsi) repbody->oa = body->oa; npages = PTLRPC_MAX_BRW_PAGES; + kstart = ktime_get(); rc = obd_preprw(tsi->tsi_env, OBD_BRW_READ, exp, &repbody->oa, 1, ioo, remote_nb, &npages, local_nb); if (rc != 0) @@ -2375,7 +2377,6 @@ int tgt_brw_read(struct tgt_session_info *tsi) GOTO(out_commitrw, rc = -ENOMEM); } - nob = 0; npages_read = npages; for (i = 0; i < npages; i++) { int page_rc = local_nb[i].lnb_rc; @@ -2474,7 +2475,7 @@ int tgt_brw_read(struct tgt_session_info *tsi) out_commitrw: /* Must commit after prep above in all cases */ rc = obd_commitrw(tsi->tsi_env, OBD_BRW_READ, exp, &repbody->oa, 1, ioo, - remote_nb, npages, local_nb, rc); + remote_nb, npages, local_nb, rc, nob, kstart); out_lock: tgt_brw_unlock(exp, ioo, remote_nb, &lockh, LCK_PR); @@ -2618,6 +2619,8 @@ int tgt_brw_write(struct tgt_session_info *tsi) const char *obd_name = exp->exp_obd->obd_name; /* '1' for consistency with code that checks !mpflag to restore */ unsigned int mpflags = 1; + ktime_t kstart; + int nob = 0; ENTRY; @@ -2744,6 +2747,7 @@ int tgt_brw_write(struct tgt_session_info *tsi) repbody->oa = body->oa; npages = PTLRPC_MAX_BRW_PAGES; + kstart = ktime_get(); rc = obd_preprw(tsi->tsi_env, OBD_BRW_WRITE, exp, &repbody->oa, objcount, ioo, remote_nb, &npages, local_nb); if (rc < 0) @@ -2828,15 +2832,41 @@ skip_transfer: OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK2, cfs_fail_val); out_commitrw: + /* doing this before commit lets us calculate actual bytes (nob) for + * OFD stats, and if the commit fails, the per page niobuf rcs are + * ignored anyway + */ + if (rc == 0) { + /* set per-requested niobuf return codes */ + for (i = j = 0; i < niocount; i++) { + int len = remote_nb[i].rnb_len; + + nob += len; + rcs[i] = 0; + do { + LASSERT(j < npages); + if (local_nb[j].lnb_rc < 0) + rcs[i] = local_nb[j].lnb_rc; + len -= local_nb[j].lnb_len; + j++; + } while (len > 0); + LASSERT(len == 0); + } + LASSERT(j == npages); + } /* Must commit after prep above in all cases */ rc = obd_commitrw(tsi->tsi_env, OBD_BRW_WRITE, exp, &repbody->oa, - objcount, ioo, remote_nb, npages, local_nb, rc); + objcount, ioo, remote_nb, npages, local_nb, rc, nob, + kstart); if (rc == -ENOTCONN) /* quota acquire process has been given up because * either the client has been evicted or the client * has timed out the request already */ no_reply = true; + if (rc == 0) + ptlrpc_lprocfs_brw(req, nob); + for (i = 0; i < niocount; i++) { if (!(local_nb[i].lnb_flags & OBD_BRW_ASYNC)) { wait_sync = true; @@ -2850,27 +2880,6 @@ out_commitrw: */ repbody->oa.o_valid &= ~(OBD_MD_FLMTIME | OBD_MD_FLATIME); - if (rc == 0) { - int nob = 0; - - /* set per-requested niobuf return codes */ - for (i = j = 0; i < niocount; i++) { - int len = remote_nb[i].rnb_len; - - nob += len; - rcs[i] = 0; - do { - LASSERT(j < npages); - if (local_nb[j].lnb_rc < 0) - rcs[i] = local_nb[j].lnb_rc; - len -= local_nb[j].lnb_len; - j++; - } while (len > 0); - LASSERT(len == 0); - } - LASSERT(j == npages); - ptlrpc_lprocfs_brw(req, nob); - } out_lock: tgt_brw_unlock(exp, ioo, remote_nb, &lockh, LCK_PW); if (desc) -- 1.8.3.1