Whamcloud - gitweb
b=22598 osd_trans_stop() page fault fix
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 2a6b90a..78a6778 100644 (file)
@@ -133,6 +133,13 @@ struct osd_thandle {
         /* Link to the device, for debugging. */
         struct lu_ref_link     *ot_dev_link;
 
+#if OSD_THANDLE_STATS
+        /** time when this handle was allocated */
+        cfs_time_t oth_alloced;
+
+        /** time when this thanle was started */
+        cfs_time_t oth_started;
+#endif
 };
 
 /*
@@ -500,6 +507,83 @@ enum {
  * Journal
  */
 
+#if OSD_THANDLE_STATS
+/**
+ * Set time when the handle is allocated
+ */
+static void osd_th_alloced(struct osd_thandle *oth)
+{
+        oth->oth_alloced = cfs_time_current();
+}
+
+/**
+ * Set time when the handle started
+ */
+static void osd_th_started(struct osd_thandle *oth)
+{
+        oth->oth_started = cfs_time_current();
+}
+
+/**
+ * Helper function to convert time interval to microseconds packed in
+ * long int (default time units for the counter in "stats" initialized
+ * by lu_time_init() )
+ */
+static long interval_to_usec(cfs_time_t start, cfs_time_t end)
+{
+        struct timeval val;
+
+        cfs_duration_usec(cfs_time_sub(end, start), &val);
+        return val.tv_sec * 1000000 + val.tv_usec;
+}
+
+/**
+ * Check whether the we deal with this handle for too long.
+ */
+static void __osd_th_check_slow(void *oth, struct osd_device *dev,
+                                cfs_time_t alloced, cfs_time_t started,
+                                cfs_time_t closed)
+{
+        cfs_time_t now = cfs_time_current();
+
+        LASSERT(dev != NULL);
+
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING,
+                            interval_to_usec(alloced, started));
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN,
+                            interval_to_usec(started, closed));
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING,
+                            interval_to_usec(closed, now));
+
+        if (cfs_time_before(cfs_time_add(alloced, cfs_time_seconds(30)), now)) {
+                CWARN("transaction handle %p was open for too long: "
+                      "now "CFS_TIME_T" ,"
+                      "alloced "CFS_TIME_T" ,"
+                      "started "CFS_TIME_T" ,"
+                      "closed "CFS_TIME_T"\n",
+                      oth, now, alloced, started, closed);
+                libcfs_debug_dumpstack(NULL);
+        }
+}
+
+#define OSD_CHECK_SLOW_TH(oth, dev, expr)                               \
+{                                                                       \
+        cfs_time_t __closed = cfs_time_current();                       \
+        cfs_time_t __alloced = oth->oth_alloced;                        \
+        cfs_time_t __started = oth->oth_started;                        \
+                                                                        \
+        expr;                                                           \
+        __osd_th_check_slow(oth, dev, __alloced, __started, __closed);  \
+}
+
+#else /* OSD_THANDLE_STATS */
+
+#define osd_th_alloced(h)                  do {} while(0)
+#define osd_th_started(h)                  do {} while(0)
+#define OSD_CHECK_SLOW_TH(oth, dev, expr)  expr
+
+#endif /* OSD_THANDLE_STATS */
+
 /*
  * Concurrency: doesn't access mutable data.
  */
@@ -572,8 +656,10 @@ static struct thandle *osd_trans_start(const struct lu_env *env,
                          * XXX temporary stuff. Some abstraction layer should
                          * be used.
                          */
-
+                        oti->oti_dev = dev;
+                        osd_th_alloced(oh);
                         jh = ldiskfs_journal_start_sb(osd_sb(dev), p->tp_credits);
+                        osd_th_started(oh);
                         if (!IS_ERR(jh)) {
                                 oh->ot_handle = jh;
                                 th = &oh->ot_super;
@@ -630,7 +716,8 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th)
                 if (result != 0)
                         CERROR("Failure in transaction hook: %d\n", result);
                 oh->ot_handle = NULL;
-                result = ldiskfs_journal_stop(hdl);
+                OSD_CHECK_SLOW_TH(oh, oti->oti_dev,
+                                  result = ldiskfs_journal_stop(hdl));
                 if (result != 0)
                         CERROR("Failure to stop transaction: %d\n", result);
         }
@@ -2055,9 +2142,9 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env,
                 __u32 d[4], s[4];
 
                 s[0] = obj->oo_inode->i_uid;
-                ll_get_random_bytes(&(s[1]), sizeof(__u32));
+                cfs_get_random_bytes(&(s[1]), sizeof(__u32));
                 s[2] = obj->oo_inode->i_gid;
-                ll_get_random_bytes(&(s[3]), sizeof(__u32));
+                cfs_get_random_bytes(&(s[3]), sizeof(__u32));
                 rc = capa_encrypt_id(d, s, key->lk_key, CAPA_HMAC_KEY_MAX_LEN);
                 if (unlikely(rc))
                         RETURN(ERR_PTR(rc));