From eba158939e20b37e178ad3e0e78f4647e9faee0d Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Thu, 26 Apr 2012 17:43:11 -0400 Subject: [PATCH] LU-1311 Disable local irqs when locking tcds while walking them This helps us to avoid a deadlock with other threads that goes like this: thread doint the walking = (1) other thread = (2) (1) get some non-irq type tcd locked while walking the tcds (2) get some irq/bh lock (2) try to print a debugging message and block on the tcd lock held by (1) (1) irq/bh arrives, tries to get irq/bh lock held by (2) == BAM! Change-Id: If1e1cc57c0f2e287b453e34db0c894c1ca830b9a Signed-off-by: Oleg Drokin Reviewed-on: http://review.whamcloud.com/2605 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Liang Zhen Reviewed-by: James Simmons --- libcfs/libcfs/linux/linux-tracefile.c | 14 ++++++++++++-- libcfs/libcfs/tracefile.h | 10 +++++----- libcfs/libcfs/winnt/winnt-tracefile.c | 4 ++-- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/libcfs/libcfs/linux/linux-tracefile.c b/libcfs/libcfs/linux/linux-tracefile.c index 7b499f8..69315ab 100644 --- a/libcfs/libcfs/linux/linux-tracefile.c +++ b/libcfs/libcfs/linux/linux-tracefile.c @@ -147,25 +147,35 @@ cfs_trace_buf_type_t cfs_trace_buf_idx_get() return CFS_TCD_TYPE_PROC; } -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd) +/* + * The walking argument indicates the locking comes from all tcd types + * iterator and we must lock it and dissable local irqs to avoid deadlocks + * with other interrupt locks that might be happening. See LU-1311 + * for details. + */ +int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) { __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) cfs_spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags); else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) cfs_spin_lock_bh(&tcd->tcd_lock); + else if (unlikely(walking)) + cfs_spin_lock_irq(&tcd->tcd_lock); else cfs_spin_lock(&tcd->tcd_lock); return 1; } -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd) +void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) { __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); if (tcd->tcd_type == CFS_TCD_TYPE_IRQ) cfs_spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags); else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ) cfs_spin_unlock_bh(&tcd->tcd_lock); + else if (unlikely(walking)) + cfs_spin_unlock_irq(&tcd->tcd_lock); else cfs_spin_unlock(&tcd->tcd_lock); } diff --git a/libcfs/libcfs/tracefile.h b/libcfs/libcfs/tracefile.h index a39ed41..9250017 100644 --- a/libcfs/libcfs/tracefile.h +++ b/libcfs/libcfs/tracefile.h @@ -198,7 +198,7 @@ extern union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[CFS_NR_CPUS]; #define cfs_tcd_for_each_type_lock(tcd, i, cpu) \ for (i = 0; cfs_trace_data[i] && \ (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \ - cfs_trace_lock_tcd(tcd); cfs_trace_unlock_tcd(tcd), i++) + cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++) /* XXX nikita: this declaration is internal to tracefile.c and should probably * be moved there */ @@ -266,8 +266,8 @@ extern void cfs_print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, int len, const char *file, const char *fn); -extern int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd); -extern void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd); +extern int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking); +extern void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking); /** * trace_buf_type_t, trace_buf_idx_get() and trace_console_buffers[][] @@ -300,7 +300,7 @@ cfs_trace_get_tcd(void) struct cfs_trace_cpu_data *tcd = &(*cfs_trace_data[cfs_trace_buf_idx_get()])[cfs_get_cpu()].tcd; - cfs_trace_lock_tcd(tcd); + cfs_trace_lock_tcd(tcd, 0); return tcd; } @@ -308,7 +308,7 @@ cfs_trace_get_tcd(void) static inline void cfs_trace_put_tcd (struct cfs_trace_cpu_data *tcd) { - cfs_trace_unlock_tcd(tcd); + cfs_trace_unlock_tcd(tcd, 0); cfs_put_cpu(); } diff --git a/libcfs/libcfs/winnt/winnt-tracefile.c b/libcfs/libcfs/winnt/winnt-tracefile.c index 5c50f98..9d4b72f 100644 --- a/libcfs/libcfs/winnt/winnt-tracefile.c +++ b/libcfs/libcfs/winnt/winnt-tracefile.c @@ -144,13 +144,13 @@ cfs_trace_buf_type_t cfs_trace_buf_idx_get() return CFS_TCD_TYPE_PASSIVE; } -int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd) +int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking) { __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); return 1; } -void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd) +void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking) { __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_MAX); } -- 1.8.3.1