From bc5ee103b432d1d390e3b84ac7804e1f513ad6e4 Mon Sep 17 00:00:00 2001 From: "Alexander.Zarochentsev" Date: Wed, 22 Sep 2010 16:19:49 +0400 Subject: [PATCH] b=22598 osd thandle usage counters i=mikhail.pershin i=oleg.drokin --- lustre/osd-ldiskfs/osd_handler.c | 91 ++++++++++++++++++++++++++++++++++++++- lustre/osd-ldiskfs/osd_internal.h | 22 ++++++++++ lustre/osd-ldiskfs/osd_lproc.c | 8 ++-- 3 files changed, 116 insertions(+), 5 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 2999143..0bbdaa9 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -133,6 +133,13 @@ struct osd_thandle { /* Link to the device, for debugging. */ struct lu_ref_link *ot_dev_link; +#if OSD_THANDLE_STATS + /** time when this handle was allocated */ + cfs_time_t oth_alloced; + + /** time when this thanle was started */ + cfs_time_t oth_started; +#endif }; /* @@ -500,6 +507,83 @@ enum { * Journal */ +#if OSD_THANDLE_STATS +/** + * Set time when the handle is allocated + */ +static void osd_th_alloced(struct osd_thandle *oth) +{ + oth->oth_alloced = cfs_time_current(); +} + +/** + * Set time when the handle started + */ +static void osd_th_started(struct osd_thandle *oth) +{ + oth->oth_started = cfs_time_current(); +} + +/** + * Helper function to convert time interval to microseconds packed in + * long int (default time units for the counter in "stats" initialized + * by lu_time_init() ) + */ +static long interval_to_usec(cfs_time_t start, cfs_time_t end) +{ + struct timeval val; + + cfs_duration_usec(cfs_time_sub(end, start), &val); + return val.tv_sec * 1000000 + val.tv_usec; +} + +/** + * Check whether the we deal with this handle for too long. + */ +static void __osd_th_check_slow(const struct osd_thandle *oth, + struct osd_device *dev, + cfs_time_t closed) +{ + cfs_time_t now = cfs_time_current(); + + LASSERT(dev != NULL); + + lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING, + interval_to_usec(oth->oth_alloced, oth->oth_started)); + lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN, + interval_to_usec(oth->oth_started, closed)); + lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING, + interval_to_usec(closed, now)); + + if (cfs_time_before(cfs_time_add(oth->oth_alloced, cfs_time_seconds(30)), + now)) { + CWARN("transaction handle %p was open for too long: " + "now "CFS_TIME_T" ," + "alloced "CFS_TIME_T" ," + "started "CFS_TIME_T" ," + "closed "CFS_TIME_T"\n", + oth, now, oth->oth_alloced, + oth->oth_started, closed); + libcfs_debug_dumpstack(NULL); + } +} + +#define OSD_CHECK_SLOW_TH(oth, dev, expr) \ +{ \ + cfs_time_t __closed = cfs_time_current(); \ + \ + expr; \ + __osd_th_check_slow(oth, dev, __closed); \ +} + +#else /* OSD_THANDLE_STATS */ + +#define osd_th_alloced(h) do {} while(0) +#define osd_th_started(h) do {} while(0) +#define OSD_CHECK_SLOW_TH(oth, dev, expr) expr + +#endif /* OSD_THANDLE_STATS */ + /* * Concurrency: doesn't access mutable data. */ @@ -572,8 +656,10 @@ static struct thandle *osd_trans_start(const struct lu_env *env, * XXX temporary stuff. Some abstraction layer should * be used. */ - + oti->oti_dev = dev; + osd_th_alloced(oh); jh = ldiskfs_journal_start_sb(osd_sb(dev), p->tp_credits); + osd_th_started(oh); if (!IS_ERR(jh)) { oh->ot_handle = jh; th = &oh->ot_super; @@ -630,7 +716,8 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th) if (result != 0) CERROR("Failure in transaction hook: %d\n", result); oh->ot_handle = NULL; - result = ldiskfs_journal_stop(hdl); + OSD_CHECK_SLOW_TH(oh, oti->oti_dev, + result = ldiskfs_journal_stop(hdl)); if (result != 0) CERROR("Failure to stop transaction: %d\n", result); } diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index e56974e..b76f043 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -79,6 +79,9 @@ struct inode; #define OSD_OII_NOGEN (0) #define OSD_COUNTERS (0) +/** Enable thandle usage statistics */ +#define OSD_THANDLE_STATS (1) + #ifdef HAVE_QUOTA_SUPPORT struct osd_ctxt { __u32 oc_uid; @@ -136,6 +139,21 @@ struct osd_device { __u32 od_iop_mode; }; +/* + * osd dev stats + */ + +#ifdef LPROCFS +enum { +#if OSD_THANDLE_STATS + LPROC_OSD_THANDLE_STARTING, + LPROC_OSD_THANDLE_OPEN, + LPROC_OSD_THANDLE_CLOSING, +#endif + LPROC_OSD_NR +}; +#endif + /** * Storage representation for fids. * @@ -220,6 +238,10 @@ struct osd_thread_info { struct lustre_capa_key oti_capa_key; struct lustre_capa oti_capa; + /** osd_device reference, initialized in osd_trans_start() and + used in osd_trans_stop() */ + struct osd_device *oti_dev; + /** * following ipd and it structures are used for osd_index_iam_lookup() * these are defined separately as we might do index operation diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index d50b528..df25006 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -48,11 +48,13 @@ #include "osd_internal.h" #ifdef LPROCFS -enum { - LPROC_OSD_NR -}; static const char *osd_counter_names[LPROC_OSD_NR] = { +#if OSD_THANDLE_STATS + [LPROC_OSD_THANDLE_STARTING] = "thandle starting", + [LPROC_OSD_THANDLE_OPEN] = "thandle open", + [LPROC_OSD_THANDLE_CLOSING] = "thandle closing" +#endif }; int osd_procfs_init(struct osd_device *osd, const char *name) -- 1.8.3.1