Whamcloud - gitweb
LU-1311 Disable local irqs when locking tcds while walking them
authorOleg Drokin <green@whamcloud.com>
Thu, 26 Apr 2012 21:43:11 +0000 (17:43 -0400)
committerOleg Drokin <green@whamcloud.com>
Mon, 7 May 2012 19:37:13 +0000 (15:37 -0400)
This helps us to avoid a deadlock with other threads that goes like this:
thread doint the walking = (1)
other thread = (2)

(1) get some non-irq type tcd locked while walking the tcds
(2) get some irq/bh lock
(2) try to print a debugging message and block on the tcd lock held by (1)
(1) irq/bh arrives, tries to get irq/bh lock held by (2) == BAM!

Change-Id: If1e1cc57c0f2e287b453e34db0c894c1ca830b9a
Signed-off-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: http://review.whamcloud.com/2605
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Liang Zhen <liang@whamcloud.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
libcfs/libcfs/linux/linux-tracefile.c
libcfs/libcfs/tracefile.h
libcfs/libcfs/winnt/winnt-tracefile.c

index 7b499f8..69315ab 100644 (file)
@@ -147,25 +147,35 @@ cfs_trace_buf_type_t cfs_trace_buf_idx_get()
                return CFS_TCD_TYPE_PROC;
 }
 
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd)
+/*
+ * The walking argument indicates the locking comes from all tcd types
+ * iterator and we must lock it and dissable local irqs to avoid deadlocks
+ * with other interrupt locks that might be happening. See LU-1311
+ * for details.
+ */
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
 {
        __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
         if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
                 cfs_spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
         else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
                 cfs_spin_lock_bh(&tcd->tcd_lock);
+        else if (unlikely(walking))
+                cfs_spin_lock_irq(&tcd->tcd_lock);
         else
                 cfs_spin_lock(&tcd->tcd_lock);
        return 1;
 }
 
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd)
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
 {
        __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
         if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
                 cfs_spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
         else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
                 cfs_spin_unlock_bh(&tcd->tcd_lock);
+        else if (unlikely(walking))
+                cfs_spin_unlock_irq(&tcd->tcd_lock);
         else
                 cfs_spin_unlock(&tcd->tcd_lock);
 }
index a39ed41..9250017 100644 (file)
@@ -198,7 +198,7 @@ extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[CFS_NR_CPUS];
 #define cfs_tcd_for_each_type_lock(tcd, i, cpu)                           \
     for (i = 0; cfs_trace_data[i] &&                                      \
          (tcd = &(*cfs_trace_data[i])[cpu].tcd) &&                        \
-         cfs_trace_lock_tcd(tcd); cfs_trace_unlock_tcd(tcd), i++)
+         cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
 
 /* XXX nikita: this declaration is internal to tracefile.c and should probably
  * be moved there */
@@ -266,8 +266,8 @@ extern void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
                                  const char *buf, int len, const char *file,
                                  const char *fn);
 
-extern int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd);
-extern void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd);
+extern int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
+extern void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking);
 
 /**
  * trace_buf_type_t, trace_buf_idx_get() and trace_console_buffers[][]
@@ -300,7 +300,7 @@ cfs_trace_get_tcd(void)
        struct cfs_trace_cpu_data *tcd =
                 &(*cfs_trace_data[cfs_trace_buf_idx_get()])[cfs_get_cpu()].tcd;
 
-       cfs_trace_lock_tcd(tcd);
+       cfs_trace_lock_tcd(tcd, 0);
 
        return tcd;
 }
@@ -308,7 +308,7 @@ cfs_trace_get_tcd(void)
 static inline void
 cfs_trace_put_tcd (struct cfs_trace_cpu_data *tcd)
 {
-       cfs_trace_unlock_tcd(tcd);
+       cfs_trace_unlock_tcd(tcd, 0);
 
        cfs_put_cpu();
 }
index 5c50f98..9d4b72f 100644 (file)
@@ -144,13 +144,13 @@ cfs_trace_buf_type_t cfs_trace_buf_idx_get()
                 return CFS_TCD_TYPE_PASSIVE;
 }
 
-int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd)
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
 {
        __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
        return 1;
 }
 
-void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd)
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
 {
        __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX);
 }