* Author: Phil Schwan <phil@clusterfs.com>
*/
-
#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
#include "tracefile.h"
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/tty.h>
#include <linux/uaccess.h>
#include <libcfs/linux/linux-fs.h>
#include <libcfs/libcfs.h>
-/* XXX move things up to the top, comment */
-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
+
+enum cfs_trace_buf_type {
+ CFS_TCD_TYPE_PROC = 0,
+ CFS_TCD_TYPE_SOFTIRQ,
+ CFS_TCD_TYPE_IRQ,
+ CFS_TCD_TYPE_CNT
+};
+
+union cfs_trace_data_union (*cfs_trace_data[CFS_TCD_TYPE_CNT])[NR_CPUS] __cacheline_aligned;
char cfs_tracefile[TRACEFILE_NAME_SIZE];
long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
struct cfs_trace_cpu_data *tcd);
+/* trace file lock routines */
+/* The walking argument indicates the locking comes from all tcd types
+ * iterator and we must lock it and dissable local irqs to avoid deadlocks
+ * with other interrupt locks that might be happening. See LU-1311
+ * for details.
+ */
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+ __acquires(&tcd->tcd_lock)
+{
+ __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_CNT);
+ if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+ spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
+ else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+ spin_lock_bh(&tcd->tcd_lock);
+ else if (unlikely(walking))
+ spin_lock_irq(&tcd->tcd_lock);
+ else
+ spin_lock(&tcd->tcd_lock);
+ return 1;
+}
+
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+ __releases(&tcd->tcd_lock)
+{
+ __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_CNT);
+ if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+ spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
+ else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+ spin_unlock_bh(&tcd->tcd_lock);
+ else if (unlikely(walking))
+ spin_unlock_irq(&tcd->tcd_lock);
+ else
+ spin_unlock(&tcd->tcd_lock);
+}
+
+#define cfs_tcd_for_each(tcd, i, j) \
+ for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i]; i++) \
+ for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \
+ j < num_possible_cpus(); \
+ j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
+
+#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \
+ for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i] && \
+ (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \
+ cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
+
+enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
+{
+ if (in_irq())
+ return CFS_TCD_TYPE_IRQ;
+ if (in_softirq())
+ return CFS_TCD_TYPE_SOFTIRQ;
+ return CFS_TCD_TYPE_PROC;
+}
+
+static inline struct cfs_trace_cpu_data *
+cfs_trace_get_tcd(void)
+{
+ struct cfs_trace_cpu_data *tcd =
+ &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
+
+ cfs_trace_lock_tcd(tcd, 0);
+
+ return tcd;
+}
+
+static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
+{
+ cfs_trace_unlock_tcd(tcd, 0);
+
+ put_cpu();
+}
+
static inline struct cfs_trace_page *
cfs_tage_from_list(struct list_head *list)
{
struct cfs_trace_page *tage;
/* My caller is trying to free memory */
- if (!in_interrupt() && memory_pressure_get())
+ if (!in_interrupt() && (current->flags & PF_MEMALLOC))
return NULL;
/*
list_move_tail(&tage->linkage, queue);
}
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
- struct list_head *stock)
-{
- int i;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
- struct cfs_trace_page *tage;
-
- tage = cfs_tage_alloc(gfp);
- if (tage == NULL)
- break;
- list_add_tail(&tage->linkage, stock);
- }
- return i;
-}
-
/* return a page that has 'len' bytes left at the end */
static struct cfs_trace_page *
cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
} else {
tage = cfs_tage_alloc(GFP_ATOMIC);
if (unlikely(tage == NULL)) {
- if ((!memory_pressure_get() ||
+ if ((!(current->flags & PF_MEMALLOC) ||
in_interrupt()) && printk_ratelimit())
pr_warn("Lustre: cannot allocate a tage (%ld)\n",
tcd->tcd_cur_pages);
return tage;
}
+static void cfs_set_ptldebug_header(struct ptldebug_header *header,
+ struct libcfs_debug_msg_data *msgdata,
+ unsigned long stack)
+{
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+
+ header->ph_subsys = msgdata->msg_subsys;
+ header->ph_mask = msgdata->msg_mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_type = cfs_trace_buf_idx_get();
+ /* y2038 safe since all user space treats this as unsigned, but
+ * will overflow in 2106
+ */
+ header->ph_sec = (u32)ts.tv_sec;
+ header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
+ header->ph_stack = stack;
+ header->ph_pid = current->pid;
+ header->ph_line_num = msgdata->msg_line;
+ header->ph_extern_pid = 0;
+}
+
+/**
+ * tty_write_msg - write a message to a certain tty, not just the console.
+ * @tty: the destination tty_struct
+ * @msg: the message to write
+ *
+ * tty_write_message is not exported, so write a same function for it
+ *
+ */
+static void tty_write_msg(struct tty_struct *tty, const char *msg)
+{
+ mutex_lock(&tty->atomic_write_lock);
+ tty_lock(tty);
+ if (tty->ops->write && tty->count > 0)
+ tty->ops->write(tty, msg, strlen(msg));
+ tty_unlock(tty);
+ mutex_unlock(&tty->atomic_write_lock);
+ wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+}
+
+static void cfs_tty_write_message(const char *prefix, int mask, const char *msg)
+{
+ struct tty_struct *tty;
+
+ tty = get_current_tty();
+ if (!tty)
+ return;
+
+ tty_write_msg(tty, prefix);
+ if ((mask & D_EMERG) || (mask & D_ERROR))
+ tty_write_msg(tty, "Error");
+ tty_write_msg(tty, ": ");
+ tty_write_msg(tty, msg);
+ tty_kref_put(tty);
+}
+
+static void cfs_vprint_to_console(struct ptldebug_header *hdr, int mask,
+ struct va_format *vaf, const char *file,
+ const char *fn)
+{
+ char *prefix = "Lustre";
+
+ if (hdr->ph_subsys == S_LND || hdr->ph_subsys == S_LNET)
+ prefix = "LNet";
+
+ if (mask & D_CONSOLE) {
+ if (mask & D_EMERG)
+ pr_emerg("%sError: %pV", prefix, vaf);
+ else if (mask & D_ERROR)
+ pr_err("%sError: %pV", prefix, vaf);
+ else if (mask & D_WARNING)
+ pr_warn("%s: %pV", prefix, vaf);
+ else if (mask & libcfs_printk)
+ pr_info("%s: %pV", prefix, vaf);
+ } else {
+ if (mask & D_EMERG)
+ pr_emerg("%sError: %d:%d:(%s:%d:%s()) %pV", prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, vaf);
+ else if (mask & D_ERROR)
+ pr_err("%sError: %d:%d:(%s:%d:%s()) %pV", prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, vaf);
+ else if (mask & D_WARNING)
+ pr_warn("%s: %d:%d:(%s:%d:%s()) %pV", prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, vaf);
+ else if (mask & (D_CONSOLE | libcfs_printk))
+ pr_info("%s: %pV", prefix, vaf);
+ }
+
+ if (mask & D_TTY)
+ /* tty_write_msg doesn't handle formatting */
+ cfs_tty_write_message(prefix, mask, vaf->fmt);
+}
+
+static void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
+ const char *file, const char *fn,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ cfs_vprint_to_console(hdr, mask, &vaf, file, fn);
+}
+
int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
const char *format, ...)
{
if (needed < 2 || *(string_buf + needed - 2) != '\r')
pr_info("Lustre: format at %s:%d:%s doesn't end in '\\r\\n'\n",
file, msgdata->msg_line, msgdata->msg_fn);
+ if (strnchr(string_buf, needed, '%'))
+ pr_info("Lustre: format at %s:%d:%s mustn't contain %%\n",
+ file, msgdata->msg_line, msgdata->msg_fn);
}
header.ph_len = known_size + needed;
}
if (tcd) {
- cfs_print_to_console(&header, mask, string_buf, needed, file,
- msgdata->msg_fn);
+ cfs_print_to_console(&header, mask, file, msgdata->msg_fn,
+ "%s", string_buf);
cfs_trace_put_tcd(tcd);
} else {
- string_buf = cfs_trace_get_console_buffer();
+ struct va_format vaf;
va_start(ap, format);
- needed = vscnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
- format, ap);
+ vaf.fmt = format;
+ vaf.va = ≈
+ cfs_vprint_to_console(&header, mask,
+ &vaf, file, msgdata->msg_fn);
va_end(ap);
-
- cfs_print_to_console(&header, mask,
- string_buf, needed, file, msgdata->msg_fn);
-
- put_cpu();
}
if (cdls != NULL && cdls->cdls_count != 0) {
- string_buf = cfs_trace_get_console_buffer();
-
- needed = scnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
- "Skipped %d previous similar message%s\n",
- cdls->cdls_count,
- (cdls->cdls_count > 1) ? "s" : "");
-
/* Do not allow print this to TTY */
- cfs_print_to_console(&header, mask & ~D_TTY, string_buf,
- needed, file, msgdata->msg_fn);
+ cfs_print_to_console(&header, mask & ~D_TTY, file,
+ msgdata->msg_fn,
+ "Skipped %d previous similar message%s\n",
+ cdls->cdls_count,
+ (cdls->cdls_count > 1) ? "s" : "");
- put_cpu();
cdls->cdls_count = 0;
}
cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
- cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
- msgdata->msg_file, msgdata->msg_fn);
+ cfs_print_to_console(&hdr, D_EMERG, msgdata->msg_file, msgdata->msg_fn,
+ "%s", str);
panic("Lustre debug assertion failure\n");
}
}
+#ifdef LNET_DUMP_ON_PANIC
void cfs_trace_debug_print(void)
{
struct page_collection pc;
page = tage->page;
p = page_address(page);
while (p < ((char *)page_address(page) + tage->used)) {
- struct ptldebug_header *hdr;
- int len;
- hdr = (void *)p;
- p += sizeof(*hdr);
- file = p;
- p += strlen(file) + 1;
- fn = p;
- p += strlen(fn) + 1;
- len = hdr->ph_len - (int)(p - (char *)hdr);
-
- cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
-
- p += len;
- }
+ struct ptldebug_header *hdr;
+ int len;
+ hdr = (void *)p;
+ p += sizeof(*hdr);
+ file = p;
+ p += strlen(file) + 1;
+ fn = p;
+ p += strlen(fn) + 1;
+ len = hdr->ph_len - (int)(p - (char *)hdr);
+
+ cfs_print_to_console(hdr, D_EMERG, file, fn,
+ "%.*s", len, p);
+
+ p += len;
+ }
list_del(&tage->linkage);
cfs_tage_free(tage);
}
}
+#endif /* LNET_DUMP_ON_PANIC */
int cfs_tracefile_dump_all_pages(char *filename)
{
{
struct page_collection pc;
struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
pc.pc_want_daemon_pages = 1;
collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-
+ while (!list_empty(&pc.pc_pages)) {
+ tage = list_first_entry(&pc.pc_pages,
+ struct cfs_trace_page, linkage);
__LASSERT_TAGE_INVARIANT(tage);
list_del(&tage->linkage);
}
}
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob)
-{
- int nob;
-
- if (usr_buffer_nob > knl_buffer_nob)
- return -EOVERFLOW;
-
- if (copy_from_user(knl_buffer, usr_buffer, usr_buffer_nob))
- return -EFAULT;
-
- nob = strnlen(knl_buffer, usr_buffer_nob);
- while (--nob >= 0) /* strip trailing whitespace */
- if (!isspace(knl_buffer[nob]))
- break;
-
- if (nob < 0) /* empty string */
- return -EINVAL;
-
- if (nob == knl_buffer_nob) /* no space to terminate */
- return -EOVERFLOW;
-
- knl_buffer[nob + 1] = 0; /* terminate */
- return 0;
-}
-EXPORT_SYMBOL(cfs_trace_copyin_string);
-
int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
const char *knl_buffer, char *append)
{
int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
{
- char *str;
- int rc;
-
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
- return rc;
+ char *str;
+ char *path;
+ int rc;
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc != 0)
- goto out;
+ str = memdup_user_nul(usr_str, usr_str_nob);
+ if (!str)
+ return -ENOMEM;
- if (str[0] != '/') {
- rc = -EINVAL;
- goto out;
- }
- rc = cfs_tracefile_dump_all_pages(str);
-out:
+ path = strim(str);
+ if (path[0] != '/')
+ rc = -EINVAL;
+ else
+ rc = cfs_tracefile_dump_all_pages(path);
kfree(str);
- return rc;
+
+ return rc;
}
int cfs_trace_daemon_command(char *str)
int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
{
- char *str;
- int rc;
+ char *str;
+ int rc;
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
- return rc;
-
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc == 0)
- rc = cfs_trace_daemon_command(str);
+ str = memdup_user_nul(usr_str, usr_str_nob);
+ if (!str)
+ return -ENOMEM;
+ rc = cfs_trace_daemon_command(strim(str));
kfree(str);
- return rc;
+
+ return rc;
}
int cfs_trace_set_debug_mb(int mb)
{
int i;
int j;
- int pages;
- int limit = cfs_trace_max_debug_mb();
+ unsigned long pages;
+ unsigned long total_mb = (cfs_totalram_pages() >> (20 - PAGE_SHIFT));
+ unsigned long limit = max_t(unsigned long, 512, (total_mb * 4) / 5);
struct cfs_trace_cpu_data *tcd;
if (mb < num_possible_cpus()) {
}
if (mb > limit) {
- pr_warn("Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n",
+ pr_warn("Lustre: %d MB is too large for debug buffer size, setting it to %lu MB.\n",
mb, limit);
mb = limit;
}
complete(&tctl->tctl_start);
- while (1) {
- wait_queue_entry_t __wait;
+ pc.pc_want_daemon_pages = 0;
- pc.pc_want_daemon_pages = 0;
- collect_pages(&pc);
+ while (!last_loop) {
+ wait_event_timeout(tctl->tctl_waitq,
+ ({ collect_pages(&pc);
+ !list_empty(&pc.pc_pages); }) ||
+ atomic_read(&tctl->tctl_shutdown),
+ cfs_time_seconds(1));
+ if (atomic_read(&tctl->tctl_shutdown))
+ last_loop = 1;
if (list_empty(&pc.pc_pages))
- goto end_loop;
+ continue;
- filp = NULL;
+ filp = NULL;
down_read(&cfs_tracefile_sem);
- if (cfs_tracefile[0] != 0) {
+ if (cfs_tracefile[0] != 0) {
filp = filp_open(cfs_tracefile,
O_CREAT | O_RDWR | O_LARGEFILE,
0600);
}
}
up_read(&cfs_tracefile_sem);
- if (filp == NULL) {
- put_pages_on_daemon_list(&pc);
+ if (filp == NULL) {
+ put_pages_on_daemon_list(&pc);
__LASSERT(list_empty(&pc.pc_pages));
- goto end_loop;
- }
+ continue;
+ }
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
struct dentry *de = file_dentry(filp);
__LASSERT(list_empty(&pc.pc_pages));
break;
}
- }
+ }
filp_close(filp, NULL);
- put_pages_on_daemon_list(&pc);
+ put_pages_on_daemon_list(&pc);
if (!list_empty(&pc.pc_pages)) {
- int i;
+ int i;
pr_alert("Lustre: trace pages aren't empty\n");
pr_err("Lustre: total cpus(%d): ", num_possible_cpus());
pr_err("Lustre: There are %d pages unwritten\n", i);
}
__LASSERT(list_empty(&pc.pc_pages));
-end_loop:
- if (atomic_read(&tctl->tctl_shutdown)) {
- if (last_loop == 0) {
- last_loop = 1;
- continue;
- } else {
- break;
- }
- }
- init_waitqueue_entry(&__wait, current);
- add_wait_queue(&tctl->tctl_waitq, &__wait);
- schedule_timeout_interruptible(cfs_time_seconds(1));
- remove_wait_queue(&tctl->tctl_waitq, &__wait);
- }
+ }
complete(&tctl->tctl_stop);
- return 0;
+ return 0;
}
int cfs_trace_start_thread(void)
mutex_unlock(&cfs_trace_thread_mutex);
}
+/* percents to share the total debug memory for each type */
+static unsigned int pages_factor[CFS_TCD_TYPE_CNT] = {
+ 80, /* 80% pages for CFS_TCD_TYPE_PROC */
+ 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
+ 10 /* 10% pages for CFS_TCD_TYPE_IRQ */
+};
+
int cfs_tracefile_init(int max_pages)
{
struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
- int rc;
- int factor;
+ int i;
+ int j;
- rc = cfs_tracefile_init_arch();
- if (rc != 0)
- return rc;
+ /* initialize trace_data */
+ memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
+ for (i = 0; i < CFS_TCD_TYPE_CNT; i++) {
+ cfs_trace_data[i] =
+ kmalloc_array(num_possible_cpus(),
+ sizeof(union cfs_trace_data_union),
+ GFP_KERNEL);
+ if (!cfs_trace_data[i])
+ goto out_trace_data;
+ }
+ /* arch related info initialized */
cfs_tcd_for_each(tcd, i, j) {
- /* tcd_pages_factor is initialized int tracefile_init_arch. */
- factor = tcd->tcd_pages_factor;
+ int factor = pages_factor[i];
+
+ spin_lock_init(&tcd->tcd_lock);
+ tcd->tcd_pages_factor = factor;
+ tcd->tcd_type = i;
+ tcd->tcd_cpu = j;
+
INIT_LIST_HEAD(&tcd->tcd_pages);
INIT_LIST_HEAD(&tcd->tcd_stock_pages);
INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
LASSERT(tcd->tcd_max_pages > 0);
tcd->tcd_shutting_down = 0;
}
+
return 0;
+
+out_trace_data:
+ for (i = 0; cfs_trace_data[i]; i++) {
+ kfree(cfs_trace_data[i]);
+ cfs_trace_data[i] = NULL;
+ }
+ pr_err("lnet: Not enough memory\n");
+ return -ENOMEM;
}
static void trace_cleanup_on_all_cpus(void)
{
struct cfs_trace_cpu_data *tcd;
struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
int i, cpu;
for_each_possible_cpu(cpu) {
cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+ if (!tcd->tcd_pages_factor)
+ /* Not initialised */
+ continue;
tcd->tcd_shutting_down = 1;
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+ while (!list_empty(&tcd->tcd_pages)) {
+ tage = list_first_entry(&tcd->tcd_pages,
+ struct cfs_trace_page,
+ linkage);
__LASSERT_TAGE_INVARIANT(tage);
list_del(&tage->linkage);
static void cfs_trace_cleanup(void)
{
struct page_collection pc;
+ int i;
INIT_LIST_HEAD(&pc.pc_pages);
trace_cleanup_on_all_cpus();
- cfs_tracefile_fini_arch();
+ for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i]; i++) {
+ kfree(cfs_trace_data[i]);
+ cfs_trace_data[i] = NULL;
+ }
}
void cfs_tracefile_exit(void)