*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* libcfs/libcfs/tracefile.c
*
* Author: Phil Schwan <phil@clusterfs.com>
*/
-
#define DEBUG_SUBSYSTEM S_LNET
-#define LUSTRE_TRACEFILE_PRIVATE
#include "tracefile.h"
#include <linux/ctype.h>
#include <linux/fs.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>
+#include <linux/poll.h>
#include <linux/uaccess.h>
#include <libcfs/linux/linux-fs.h>
#include <libcfs/libcfs.h>
-/* XXX move things up to the top, comment */
-union cfs_trace_data_union (*cfs_trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
+
+enum cfs_trace_buf_type {
+ CFS_TCD_TYPE_PROC = 0,
+ CFS_TCD_TYPE_SOFTIRQ,
+ CFS_TCD_TYPE_IRQ,
+ CFS_TCD_TYPE_CNT
+};
+
+union cfs_trace_data_union (*cfs_trace_data[CFS_TCD_TYPE_CNT])[NR_CPUS] __cacheline_aligned;
+
+/* Pages containing records already processed by daemon.
+ * Link via ->lru, use size in ->private
+ */
+static LIST_HEAD(daemon_pages);
+static long daemon_pages_count;
+static long daemon_pages_max;
char cfs_tracefile[TRACEFILE_NAME_SIZE];
long long cfs_tracefile_size = CFS_TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-static DEFINE_MUTEX(cfs_trace_thread_mutex);
-static int thread_running = 0;
+
+struct task_struct *tctl_task;
static atomic_t cfs_tage_allocated = ATOMIC_INIT(0);
static DECLARE_RWSEM(cfs_tracefile_sem);
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct cfs_trace_cpu_data *tcd);
+/* trace file lock routines */
+/* The walking argument indicates the locking comes from all tcd types
+ * iterator and we must lock it and dissable local irqs to avoid deadlocks
+ * with other interrupt locks that might be happening. See LU-1311
+ * for details.
+ */
+int cfs_trace_lock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+ __acquires(&tcd->tcd_lock)
+{
+ __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_CNT);
+ if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+ spin_lock_irqsave(&tcd->tcd_lock, tcd->tcd_lock_flags);
+ else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+ spin_lock_bh(&tcd->tcd_lock);
+ else if (unlikely(walking))
+ spin_lock_irq(&tcd->tcd_lock);
+ else
+ spin_lock(&tcd->tcd_lock);
+ return 1;
+}
+
+void cfs_trace_unlock_tcd(struct cfs_trace_cpu_data *tcd, int walking)
+ __releases(&tcd->tcd_lock)
+{
+ __LASSERT(tcd->tcd_type < CFS_TCD_TYPE_CNT);
+ if (tcd->tcd_type == CFS_TCD_TYPE_IRQ)
+ spin_unlock_irqrestore(&tcd->tcd_lock, tcd->tcd_lock_flags);
+ else if (tcd->tcd_type == CFS_TCD_TYPE_SOFTIRQ)
+ spin_unlock_bh(&tcd->tcd_lock);
+ else if (unlikely(walking))
+ spin_unlock_irq(&tcd->tcd_lock);
+ else
+ spin_unlock(&tcd->tcd_lock);
+}
+
+#define cfs_tcd_for_each(tcd, i, j) \
+ for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i]; i++) \
+ for (j = 0, ((tcd) = &(*cfs_trace_data[i])[j].tcd); \
+ j < num_possible_cpus(); \
+ j++, (tcd) = &(*cfs_trace_data[i])[j].tcd)
+
+#define cfs_tcd_for_each_type_lock(tcd, i, cpu) \
+ for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i] && \
+ (tcd = &(*cfs_trace_data[i])[cpu].tcd) && \
+ cfs_trace_lock_tcd(tcd, 1); cfs_trace_unlock_tcd(tcd, 1), i++)
+
+enum cfs_trace_buf_type cfs_trace_buf_idx_get(void)
+{
+ if (in_irq())
+ return CFS_TCD_TYPE_IRQ;
+ if (in_softirq())
+ return CFS_TCD_TYPE_SOFTIRQ;
+ return CFS_TCD_TYPE_PROC;
+}
+
+static inline struct cfs_trace_cpu_data *
+cfs_trace_get_tcd(void)
+{
+ struct cfs_trace_cpu_data *tcd =
+ &(*cfs_trace_data[cfs_trace_buf_idx_get()])[get_cpu()].tcd;
+
+ cfs_trace_lock_tcd(tcd, 0);
+
+ return tcd;
+}
+
+static inline void cfs_trace_put_tcd(struct cfs_trace_cpu_data *tcd)
+{
+ cfs_trace_unlock_tcd(tcd, 0);
+
+ put_cpu();
+}
static inline struct cfs_trace_page *
cfs_tage_from_list(struct list_head *list)
struct cfs_trace_page *tage;
/* My caller is trying to free memory */
- if (!in_interrupt() && memory_pressure_get())
+ if (!in_interrupt() && (current->flags & PF_MEMALLOC))
return NULL;
/*
list_move_tail(&tage->linkage, queue);
}
-int cfs_trace_refill_stock(struct cfs_trace_cpu_data *tcd, gfp_t gfp,
- struct list_head *stock)
-{
- int i;
-
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
-
- for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
- struct cfs_trace_page *tage;
-
- tage = cfs_tage_alloc(gfp);
- if (tage == NULL)
- break;
- list_add_tail(&tage->linkage, stock);
- }
- return i;
-}
-
/* return a page that has 'len' bytes left at the end */
static struct cfs_trace_page *
cfs_trace_get_tage_try(struct cfs_trace_cpu_data *tcd, unsigned long len)
{
- struct cfs_trace_page *tage;
+ struct cfs_trace_page *tage;
+ struct task_struct *tsk;
- if (tcd->tcd_cur_pages > 0) {
+ if (tcd->tcd_cur_pages > 0) {
__LASSERT(!list_empty(&tcd->tcd_pages));
- tage = cfs_tage_from_list(tcd->tcd_pages.prev);
+ tage = cfs_tage_from_list(tcd->tcd_pages.prev);
if (tage->used + len <= PAGE_SIZE)
- return tage;
- }
+ return tage;
+ }
if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
if (tcd->tcd_cur_stock_pages > 0) {
} else {
tage = cfs_tage_alloc(GFP_ATOMIC);
if (unlikely(tage == NULL)) {
- if ((!memory_pressure_get() ||
+ if ((!(current->flags & PF_MEMALLOC) ||
in_interrupt()) && printk_ratelimit())
pr_warn("Lustre: cannot allocate a tage (%ld)\n",
tcd->tcd_cur_pages);
list_add_tail(&tage->linkage, &tcd->tcd_pages);
tcd->tcd_cur_pages++;
- if (tcd->tcd_cur_pages > 8 && thread_running) {
- struct tracefiled_ctl *tctl = &trace_tctl;
+ tsk = tctl_task;
+ if (tcd->tcd_cur_pages > 8 && tsk)
/*
* wake up tracefiled to process some pages.
*/
- wake_up(&tctl->tctl_waitq);
- }
+ wake_up_process(tsk);
+
return tage;
- }
- return NULL;
+ }
+ return NULL;
}
static void cfs_tcd_shrink(struct cfs_trace_cpu_data *tcd)
if (pgcount-- == 0)
break;
- list_move_tail(&tage->linkage, &pc.pc_pages);
+ list_del(&tage->linkage);
+ cfs_tage_free(tage);
tcd->tcd_cur_pages--;
}
- put_pages_on_tcd_daemon_list(&pc, tcd);
}
/* return a page that has 'len' bytes left at the end */
static struct cfs_trace_page *cfs_trace_get_tage(struct cfs_trace_cpu_data *tcd,
- unsigned long len)
+ unsigned long len)
{
- struct cfs_trace_page *tage;
+ struct cfs_trace_page *tage;
- /*
- * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
- * from here: this will lead to infinite recursion.
- */
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
if (len > PAGE_SIZE) {
pr_err("LustreError: cowardly refusing to write %lu bytes in a page\n",
return NULL;
}
- tage = cfs_trace_get_tage_try(tcd, len);
- if (tage != NULL)
- return tage;
- if (thread_running)
- cfs_tcd_shrink(tcd);
- if (tcd->tcd_cur_pages > 0) {
- tage = cfs_tage_from_list(tcd->tcd_pages.next);
- tage->used = 0;
- cfs_tage_to_tail(tage, &tcd->tcd_pages);
- }
- return tage;
+ tage = cfs_trace_get_tage_try(tcd, len);
+ if (tage != NULL)
+ return tage;
+ if (tctl_task)
+ cfs_tcd_shrink(tcd);
+ if (tcd->tcd_cur_pages > 0) {
+ tage = cfs_tage_from_list(tcd->tcd_pages.next);
+ tage->used = 0;
+ cfs_tage_to_tail(tage, &tcd->tcd_pages);
+ }
+ return tage;
+}
+
+static void cfs_set_ptldebug_header(struct ptldebug_header *header,
+ struct libcfs_debug_msg_data *msgdata,
+ unsigned long stack)
+{
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+
+ header->ph_subsys = msgdata->msg_subsys;
+ header->ph_mask = msgdata->msg_mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_type = cfs_trace_buf_idx_get();
+ /* y2038 safe since all user space treats this as unsigned, but
+ * will overflow in 2106
+ */
+ header->ph_sec = (u32)ts.tv_sec;
+ header->ph_usec = ts.tv_nsec / NSEC_PER_USEC;
+ header->ph_stack = stack;
+ header->ph_pid = current->pid;
+ header->ph_line_num = msgdata->msg_line;
+ header->ph_extern_pid = 0;
+}
+
+static void cfs_vprint_to_console(struct ptldebug_header *hdr, int mask,
+ struct va_format *vaf, const char *file,
+ const char *fn)
+{
+ char *prefix = "Lustre";
+
+ if (hdr->ph_subsys == S_LND || hdr->ph_subsys == S_LNET)
+ prefix = "LNet";
+
+ if (mask & D_CONSOLE) {
+ if (mask & D_EMERG)
+ pr_emerg("%sError: %pV", prefix, vaf);
+ else if (mask & D_ERROR)
+ pr_err("%sError: %pV", prefix, vaf);
+ else if (mask & D_WARNING)
+ pr_warn("%s: %pV", prefix, vaf);
+ else if (mask & libcfs_printk)
+ pr_info("%s: %pV", prefix, vaf);
+ } else {
+ if (mask & D_EMERG)
+ pr_emerg("%sError: %d:%d:(%s:%d:%s()) %pV", prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, vaf);
+ else if (mask & D_ERROR)
+ pr_err("%sError: %d:%d:(%s:%d:%s()) %pV", prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, vaf);
+ else if (mask & D_WARNING)
+ pr_warn("%s: %d:%d:(%s:%d:%s()) %pV", prefix,
+ hdr->ph_pid, hdr->ph_extern_pid, file,
+ hdr->ph_line_num, fn, vaf);
+ else if (mask & (D_CONSOLE | libcfs_printk))
+ pr_info("%s: %pV", prefix, vaf);
+ }
+}
+
+static void cfs_print_to_console(struct ptldebug_header *hdr, int mask,
+ const char *file, const char *fn,
+ const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ cfs_vprint_to_console(hdr, mask, &vaf, file, fn);
}
int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
if (*(string_buf + needed - 1) != '\n') {
pr_info("Lustre: format at %s:%d:%s doesn't end in newline\n",
file, msgdata->msg_line, msgdata->msg_fn);
- } else if (mask & D_TTY) {
- /* TTY needs '\r\n' to move carriage to leftmost position */
- if (needed < 2 || *(string_buf + needed - 2) != '\r')
- pr_info("Lustre: format at %s:%d:%s doesn't end in '\\r\\n'\n",
- file, msgdata->msg_line, msgdata->msg_fn);
}
header.ph_len = known_size + needed;
}
if (tcd) {
- cfs_print_to_console(&header, mask, string_buf, needed, file,
- msgdata->msg_fn);
+ cfs_print_to_console(&header, mask, file, msgdata->msg_fn,
+ "%s", string_buf);
cfs_trace_put_tcd(tcd);
} else {
- string_buf = cfs_trace_get_console_buffer();
+ struct va_format vaf;
va_start(ap, format);
- needed = vscnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
- format, ap);
+ vaf.fmt = format;
+ vaf.va = ≈
+ cfs_vprint_to_console(&header, mask,
+ &vaf, file, msgdata->msg_fn);
va_end(ap);
-
- cfs_print_to_console(&header, mask,
- string_buf, needed, file, msgdata->msg_fn);
-
- put_cpu();
}
if (cdls != NULL && cdls->cdls_count != 0) {
- string_buf = cfs_trace_get_console_buffer();
-
- needed = scnprintf(string_buf, CFS_TRACE_CONSOLE_BUFFER_SIZE,
- "Skipped %d previous similar message%s\n",
- cdls->cdls_count,
- (cdls->cdls_count > 1) ? "s" : "");
+ cfs_print_to_console(&header, mask, file,
+ msgdata->msg_fn,
+ "Skipped %d previous similar message%s\n",
+ cdls->cdls_count,
+ (cdls->cdls_count > 1) ? "s" : "");
- /* Do not allow print this to TTY */
- cfs_print_to_console(&header, mask & ~D_TTY, string_buf,
- needed, file, msgdata->msg_fn);
-
- put_cpu();
cdls->cdls_count = 0;
}
cfs_set_ptldebug_header(&hdr, msgdata, CDEBUG_STACK());
- cfs_print_to_console(&hdr, D_EMERG, str, strlen(str),
- msgdata->msg_file, msgdata->msg_fn);
+ cfs_print_to_console(&hdr, D_EMERG, msgdata->msg_file, msgdata->msg_fn,
+ "%s", str);
panic("Lustre debug assertion failure\n");
cfs_tcd_for_each(tcd, i, j) {
list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
tcd->tcd_cur_pages = 0;
-
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages,
- &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
}
}
cfs_tcd_for_each_type_lock(tcd, i, cpu) {
list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
tcd->tcd_cur_pages = 0;
- if (pc->pc_want_daemon_pages) {
- list_splice_init(&tcd->tcd_daemon_pages,
- &pc->pc_pages);
- tcd->tcd_cur_daemon_pages = 0;
- }
}
}
}
put_pages_back_on_all_cpus(pc);
}
-/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon. */
-static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
- struct cfs_trace_cpu_data *tcd)
-{
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
-
- list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
- __LASSERT_TAGE_INVARIANT(tage);
-
- if (tage->cpu != tcd->tcd_cpu || tage->type != tcd->tcd_type)
- continue;
-
- cfs_tage_to_tail(tage, &tcd->tcd_daemon_pages);
- tcd->tcd_cur_daemon_pages++;
-
- if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
- struct cfs_trace_page *victim;
-
- __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
- victim = cfs_tage_from_list(tcd->tcd_daemon_pages.next);
-
- __LASSERT_TAGE_INVARIANT(victim);
-
- list_del(&victim->linkage);
- cfs_tage_free(victim);
- tcd->tcd_cur_daemon_pages--;
- }
- }
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
- struct cfs_trace_cpu_data *tcd;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- cfs_tcd_for_each_type_lock(tcd, i, cpu)
- put_pages_on_tcd_daemon_list(pc, tcd);
- }
-}
-
+#ifdef LNET_DUMP_ON_PANIC
void cfs_trace_debug_print(void)
{
struct page_collection pc;
struct cfs_trace_page *tage;
struct cfs_trace_page *tmp;
+ struct page *page;
- pc.pc_want_daemon_pages = 1;
collect_pages(&pc);
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
char *p, *file, *fn;
- struct page *page;
__LASSERT_TAGE_INVARIANT(tage);
page = tage->page;
p = page_address(page);
while (p < ((char *)page_address(page) + tage->used)) {
- struct ptldebug_header *hdr;
- int len;
- hdr = (void *)p;
- p += sizeof(*hdr);
- file = p;
- p += strlen(file) + 1;
- fn = p;
- p += strlen(fn) + 1;
- len = hdr->ph_len - (int)(p - (char *)hdr);
-
- cfs_print_to_console(hdr, D_EMERG, p, len, file, fn);
-
- p += len;
- }
+ struct ptldebug_header *hdr;
+ int len;
+ hdr = (void *)p;
+ p += sizeof(*hdr);
+ file = p;
+ p += strlen(file) + 1;
+ fn = p;
+ p += strlen(fn) + 1;
+ len = hdr->ph_len - (int)(p - (char *)hdr);
+
+ cfs_print_to_console(hdr, D_EMERG, file, fn,
+ "%.*s", len, p);
+
+ p += len;
+ }
list_del(&tage->linkage);
cfs_tage_free(tage);
}
+ down_write(&cfs_tracefile_sem);
+ while ((page = list_first_entry_or_null(&daemon_pages,
+ struct page, lru)) != NULL) {
+ char *p, *file, *fn;
+
+ p = page_address(page);
+ while (p < ((char *)page_address(page) + page->private)) {
+ struct ptldebug_header *hdr;
+ int len;
+
+ hdr = (void *)p;
+ p += sizeof(*hdr);
+ file = p;
+ p += strlen(file) + 1;
+ fn = p;
+ p += strlen(fn) + 1;
+ len = hdr->ph_len - (int)(p - (char *)hdr);
+
+ cfs_print_to_console(hdr, D_EMERG, file, fn,
+ "%.*s", len, p);
+
+ p += len;
+ }
+ list_del_init(&page->lru);
+ daemon_pages_count -= 1;
+ put_page(page);
+ }
+ up_write(&cfs_tracefile_sem);
}
+#endif /* LNET_DUMP_ON_PANIC */
int cfs_tracefile_dump_all_pages(char *filename)
{
- struct page_collection pc;
- struct file *filp;
- struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
- char *buf;
+ struct page_collection pc;
+ struct file *filp;
+ struct cfs_trace_page *tage;
+ struct cfs_trace_page *tmp;
+ char *buf;
+ struct page *page;
int rc;
down_write(&cfs_tracefile_sem);
rc = PTR_ERR(filp);
filp = NULL;
pr_err("LustreError: can't open %s for dump: rc = %d\n",
- filename, rc);
+ filename, rc);
goto out;
}
- pc.pc_want_daemon_pages = 1;
- collect_pages(&pc);
+ collect_pages(&pc);
if (list_empty(&pc.pc_pages)) {
- rc = 0;
- goto close;
- }
+ rc = 0;
+ goto close;
+ }
/* ok, for now, just write the pages. in the future we'll be building
* iobufs with the pages and calling generic_direct_IO */
break;
}
list_del(&tage->linkage);
- cfs_tage_free(tage);
- }
-
+ cfs_tage_free(tage);
+ }
+ while ((page = list_first_entry_or_null(&daemon_pages,
+ struct page, lru)) != NULL) {
+ buf = page_address(page);
+ rc = cfs_kernel_write(filp, buf, page->private, &filp->f_pos);
+ if (rc != (int)page->private) {
+ pr_warn("Lustre: wanted to write %u but wrote %d\n",
+ (int)page->private, rc);
+ break;
+ }
+ list_del(&page->lru);
+ daemon_pages_count -= 1;
+ put_page(page);
+ }
rc = vfs_fsync_range(filp, 0, LLONG_MAX, 1);
if (rc)
pr_err("LustreError: sync returns: rc = %d\n", rc);
{
struct page_collection pc;
struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
+ struct page *page;
- pc.pc_want_daemon_pages = 1;
collect_pages(&pc);
- list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
-
+ while (!list_empty(&pc.pc_pages)) {
+ tage = list_first_entry(&pc.pc_pages,
+ struct cfs_trace_page, linkage);
__LASSERT_TAGE_INVARIANT(tage);
list_del(&tage->linkage);
cfs_tage_free(tage);
}
-}
-
-int cfs_trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
- const char __user *usr_buffer, int usr_buffer_nob)
-{
- int nob;
-
- if (usr_buffer_nob > knl_buffer_nob)
- return -EOVERFLOW;
-
- if (copy_from_user(knl_buffer, usr_buffer, usr_buffer_nob))
- return -EFAULT;
-
- nob = strnlen(knl_buffer, usr_buffer_nob);
- while (--nob >= 0) /* strip trailing whitespace */
- if (!isspace(knl_buffer[nob]))
- break;
-
- if (nob < 0) /* empty string */
- return -EINVAL;
-
- if (nob == knl_buffer_nob) /* no space to terminate */
- return -EOVERFLOW;
- knl_buffer[nob + 1] = 0; /* terminate */
- return 0;
+ down_write(&cfs_tracefile_sem);
+ while ((page = list_first_entry_or_null(&daemon_pages,
+ struct page, lru)) != NULL) {
+ list_del(&page->lru);
+ daemon_pages_count -= 1;
+ put_page(page);
+ }
+ up_write(&cfs_tracefile_sem);
}
-EXPORT_SYMBOL(cfs_trace_copyin_string);
int cfs_trace_copyout_string(char __user *usr_buffer, int usr_buffer_nob,
const char *knl_buffer, char *append)
}
EXPORT_SYMBOL(cfs_trace_copyout_string);
-int cfs_trace_allocate_string_buffer(char **str, int nob)
-{
- if (nob > 2 * PAGE_SIZE) /* string must be "sensible" */
- return -EINVAL;
-
- *str = kmalloc(nob, GFP_KERNEL | __GFP_ZERO);
- if (*str == NULL)
- return -ENOMEM;
-
- return 0;
-}
-
int cfs_trace_dump_debug_buffer_usrstr(void __user *usr_str, int usr_str_nob)
{
- char *str;
- int rc;
-
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
- return rc;
+ char *str;
+ char *path;
+ int rc;
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc != 0)
- goto out;
+ str = memdup_user_nul(usr_str, usr_str_nob);
+ if (IS_ERR(str))
+ return PTR_ERR(str);
- if (str[0] != '/') {
- rc = -EINVAL;
- goto out;
- }
- rc = cfs_tracefile_dump_all_pages(str);
-out:
+ path = strim(str);
+ if (path[0] != '/')
+ rc = -EINVAL;
+ else
+ rc = cfs_tracefile_dump_all_pages(path);
kfree(str);
- return rc;
+
+ return rc;
}
int cfs_trace_daemon_command(char *str)
int cfs_trace_daemon_command_usrstr(void __user *usr_str, int usr_str_nob)
{
- char *str;
- int rc;
+ char *str;
+ int rc;
- rc = cfs_trace_allocate_string_buffer(&str, usr_str_nob + 1);
- if (rc != 0)
- return rc;
-
- rc = cfs_trace_copyin_string(str, usr_str_nob + 1,
- usr_str, usr_str_nob);
- if (rc == 0)
- rc = cfs_trace_daemon_command(str);
+ str = memdup_user_nul(usr_str, usr_str_nob);
+ if (IS_ERR(str))
+ return PTR_ERR(str);
+ rc = cfs_trace_daemon_command(strim(str));
kfree(str);
- return rc;
+
+ return rc;
}
int cfs_trace_set_debug_mb(int mb)
{
int i;
int j;
- int pages;
- int limit = cfs_trace_max_debug_mb();
+ unsigned long pages;
+ unsigned long total_mb = (cfs_totalram_pages() >> (20 - PAGE_SHIFT));
+ unsigned long limit = max_t(unsigned long, 512, (total_mb * 4) / 5);
struct cfs_trace_cpu_data *tcd;
if (mb < num_possible_cpus()) {
}
if (mb > limit) {
- pr_warn("Lustre: %d MB is too large for debug buffer size, setting it to %d MB.\n",
+ pr_warn("Lustre: %d MB is too large for debug buffer size, setting it to %lu MB.\n",
mb, limit);
mb = limit;
}
cfs_tcd_for_each(tcd, i, j)
tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
+ daemon_pages_max = pages;
up_write(&cfs_tracefile_sem);
- return 0;
+ return mb;
}
int cfs_trace_get_debug_mb(void)
static int tracefiled(void *arg)
{
struct page_collection pc;
- struct tracefiled_ctl *tctl = arg;
struct cfs_trace_page *tage;
struct cfs_trace_page *tmp;
struct file *filp;
int last_loop = 0;
int rc;
- /* we're started late enough that we pick up init's fs context */
- /* this is so broken in uml? what on earth is going on? */
-
- complete(&tctl->tctl_start);
-
- while (1) {
- wait_queue_entry_t __wait;
-
- pc.pc_want_daemon_pages = 0;
- collect_pages(&pc);
+ while (!last_loop) {
+ LIST_HEAD(for_daemon_pages);
+ int for_daemon_pages_count = 0;
+ schedule_timeout_interruptible(cfs_time_seconds(1));
+ if (kthread_should_stop())
+ last_loop = 1;
+ collect_pages(&pc);
if (list_empty(&pc.pc_pages))
- goto end_loop;
+ continue;
- filp = NULL;
+ filp = NULL;
down_read(&cfs_tracefile_sem);
- if (cfs_tracefile[0] != 0) {
+ if (cfs_tracefile[0] != 0) {
filp = filp_open(cfs_tracefile,
O_CREAT | O_RDWR | O_LARGEFILE,
0600);
}
}
up_read(&cfs_tracefile_sem);
- if (filp == NULL) {
- put_pages_on_daemon_list(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- goto end_loop;
- }
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
- struct dentry *de = file_dentry(filp);
- static loff_t f_pos;
-
__LASSERT_TAGE_INVARIANT(tage);
- if (f_pos >= (off_t)cfs_tracefile_size)
- f_pos = 0;
- else if (f_pos > i_size_read(de->d_inode))
- f_pos = i_size_read(de->d_inode);
-
- buf = kmap(tage->page);
- rc = cfs_kernel_write(filp, buf, tage->used, &f_pos);
- kunmap(tage->page);
- if (rc != (int)tage->used) {
- pr_warn("Lustre: wanted to write %u but wrote %d\n",
- tage->used, rc);
- put_pages_back(&pc);
- __LASSERT(list_empty(&pc.pc_pages));
- break;
+ if (filp) {
+ struct dentry *de = file_dentry(filp);
+ static loff_t f_pos;
+
+ if (f_pos >= (off_t)cfs_tracefile_size)
+ f_pos = 0;
+ else if (f_pos > i_size_read(de->d_inode))
+ f_pos = i_size_read(de->d_inode);
+
+ buf = kmap(tage->page);
+ rc = cfs_kernel_write(filp, buf, tage->used,
+ &f_pos);
+ kunmap(tage->page);
+ if (rc != (int)tage->used) {
+ pr_warn("Lustre: wanted to write %u but wrote %d\n",
+ tage->used, rc);
+ put_pages_back(&pc);
+ __LASSERT(list_empty(&pc.pc_pages));
+ break;
+ }
}
- }
+ list_del_init(&tage->linkage);
+ list_add_tail(&tage->page->lru, &for_daemon_pages);
+ for_daemon_pages_count += 1;
+
+ tage->page->private = (int)tage->used;
+ kfree(tage);
+ atomic_dec(&cfs_tage_allocated);
+ }
+
+ if (filp)
+ filp_close(filp, NULL);
+
+ down_write(&cfs_tracefile_sem);
+ list_splice_tail(&for_daemon_pages, &daemon_pages);
+ daemon_pages_count += for_daemon_pages_count;
+ while (daemon_pages_count > daemon_pages_max) {
+ struct page *p = list_first_entry(&daemon_pages,
+ struct page, lru);
+ list_del(&p->lru);
+ put_page(p);
+ daemon_pages_count -= 1;
+ }
+ up_write(&cfs_tracefile_sem);
- filp_close(filp, NULL);
- put_pages_on_daemon_list(&pc);
if (!list_empty(&pc.pc_pages)) {
- int i;
+ int i;
pr_alert("Lustre: trace pages aren't empty\n");
pr_err("Lustre: total cpus(%d): ", num_possible_cpus());
pr_err("Lustre: There are %d pages unwritten\n", i);
}
__LASSERT(list_empty(&pc.pc_pages));
-end_loop:
- if (atomic_read(&tctl->tctl_shutdown)) {
- if (last_loop == 0) {
- last_loop = 1;
- continue;
- } else {
- break;
- }
- }
- init_waitqueue_entry(&__wait, current);
- add_wait_queue(&tctl->tctl_waitq, &__wait);
- schedule_timeout_interruptible(cfs_time_seconds(1));
- remove_wait_queue(&tctl->tctl_waitq, &__wait);
- }
- complete(&tctl->tctl_stop);
- return 0;
+ }
+
+ return 0;
}
int cfs_trace_start_thread(void)
{
- struct tracefiled_ctl *tctl = &trace_tctl;
- int rc = 0;
-
- mutex_lock(&cfs_trace_thread_mutex);
- if (thread_running)
- goto out;
+ struct task_struct *tsk;
+ int rc = 0;
- init_completion(&tctl->tctl_start);
- init_completion(&tctl->tctl_stop);
- init_waitqueue_head(&tctl->tctl_waitq);
- atomic_set(&tctl->tctl_shutdown, 0);
+ if (tctl_task)
+ return 0;
- if (IS_ERR(kthread_run(tracefiled, tctl, "ktracefiled"))) {
+ tsk = kthread_create(tracefiled, NULL, "ktracefiled");
+ if (IS_ERR(tsk))
rc = -ECHILD;
- goto out;
- }
+ else if (cmpxchg(&tctl_task, NULL, tsk) != NULL)
+ /* already running */
+ kthread_stop(tsk);
+ else
+ wake_up_process(tsk);
- wait_for_completion(&tctl->tctl_start);
- thread_running = 1;
-out:
- mutex_unlock(&cfs_trace_thread_mutex);
- return rc;
+ return rc;
}
void cfs_trace_stop_thread(void)
{
- struct tracefiled_ctl *tctl = &trace_tctl;
+ struct task_struct *tsk;
- mutex_lock(&cfs_trace_thread_mutex);
- if (thread_running) {
+ tsk = xchg(&tctl_task, NULL);
+ if (tsk) {
pr_info("Lustre: shutting down debug daemon thread...\n");
- atomic_set(&tctl->tctl_shutdown, 1);
- wait_for_completion(&tctl->tctl_stop);
- thread_running = 0;
+ kthread_stop(tsk);
}
- mutex_unlock(&cfs_trace_thread_mutex);
}
+/* percents to share the total debug memory for each type */
+static unsigned int pages_factor[CFS_TCD_TYPE_CNT] = {
+ 80, /* 80% pages for CFS_TCD_TYPE_PROC */
+ 10, /* 10% pages for CFS_TCD_TYPE_SOFTIRQ */
+ 10 /* 10% pages for CFS_TCD_TYPE_IRQ */
+};
+
int cfs_tracefile_init(int max_pages)
{
struct cfs_trace_cpu_data *tcd;
- int i;
- int j;
- int rc;
- int factor;
+ int i;
+ int j;
- rc = cfs_tracefile_init_arch();
- if (rc != 0)
- return rc;
+ /* initialize trace_data */
+ memset(cfs_trace_data, 0, sizeof(cfs_trace_data));
+ for (i = 0; i < CFS_TCD_TYPE_CNT; i++) {
+ cfs_trace_data[i] =
+ kmalloc_array(num_possible_cpus(),
+ sizeof(union cfs_trace_data_union),
+ GFP_KERNEL);
+ if (!cfs_trace_data[i])
+ goto out_trace_data;
+ }
+ /* arch related info initialized */
cfs_tcd_for_each(tcd, i, j) {
- /* tcd_pages_factor is initialized int tracefile_init_arch. */
- factor = tcd->tcd_pages_factor;
+ int factor = pages_factor[i];
+
+ spin_lock_init(&tcd->tcd_lock);
+ tcd->tcd_pages_factor = factor;
+ tcd->tcd_type = i;
+ tcd->tcd_cpu = j;
+
INIT_LIST_HEAD(&tcd->tcd_pages);
INIT_LIST_HEAD(&tcd->tcd_stock_pages);
- INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
tcd->tcd_cur_pages = 0;
tcd->tcd_cur_stock_pages = 0;
- tcd->tcd_cur_daemon_pages = 0;
tcd->tcd_max_pages = (max_pages * factor) / 100;
LASSERT(tcd->tcd_max_pages > 0);
tcd->tcd_shutting_down = 0;
}
+ daemon_pages_max = max_pages;
+
return 0;
+
+out_trace_data:
+ for (i = 0; cfs_trace_data[i]; i++) {
+ kfree(cfs_trace_data[i]);
+ cfs_trace_data[i] = NULL;
+ }
+ pr_err("lnet: Not enough memory\n");
+ return -ENOMEM;
}
static void trace_cleanup_on_all_cpus(void)
{
struct cfs_trace_cpu_data *tcd;
struct cfs_trace_page *tage;
- struct cfs_trace_page *tmp;
int i, cpu;
for_each_possible_cpu(cpu) {
cfs_tcd_for_each_type_lock(tcd, i, cpu) {
+ if (!tcd->tcd_pages_factor)
+ /* Not initialised */
+ continue;
tcd->tcd_shutting_down = 1;
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+ while (!list_empty(&tcd->tcd_pages)) {
+ tage = list_first_entry(&tcd->tcd_pages,
+ struct cfs_trace_page,
+ linkage);
__LASSERT_TAGE_INVARIANT(tage);
list_del(&tage->linkage);
static void cfs_trace_cleanup(void)
{
struct page_collection pc;
+ int i;
INIT_LIST_HEAD(&pc.pc_pages);
trace_cleanup_on_all_cpus();
- cfs_tracefile_fini_arch();
+ for (i = 0; i < CFS_TCD_TYPE_CNT && cfs_trace_data[i]; i++) {
+ kfree(cfs_trace_data[i]);
+ cfs_trace_data[i] = NULL;
+ }
}
void cfs_tracefile_exit(void)
{
- cfs_trace_stop_thread();
- cfs_trace_cleanup();
+ cfs_trace_stop_thread();
+ cfs_trace_flush_pages();
+ cfs_trace_cleanup();
}