Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/CREDITS =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/CREDITS 2004-11-11 10:28:48.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/CREDITS 2004-11-18 20:59:11.000000000 -0500 @@ -2522,6 +2522,7 @@ E: mikpe@csd.uu.se W: http://www.csd.uu.se/~mikpe/ D: Miscellaneous fixes +D: Performance-monitoring counters driver N: Reed H. Petty E: rhp@draper.net Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/Documentation/ioctl-number.txt =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/Documentation/ioctl-number.txt 2004-04-03 22:38:18.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/Documentation/ioctl-number.txt 2004-11-18 20:59:11.000000000 -0500 @@ -187,5 +187,7 @@ 0xB1 00-1F PPPoX 0xCB 00-1F CBM serial IEC bus in development: +0xD0 all performance counters see drivers/perfctr/ + 0xDD 00-3F ZFCP device driver see drivers/s390/scsi/ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/MAINTAINERS 2004-11-11 10:28:39.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/MAINTAINERS 2004-11-18 20:59:11.000000000 -0500 @@ -1608,6 +1608,12 @@ L: linux-net@vger.kernel.org S: Supported +PERFORMANCE-MONITORING COUNTERS DRIVER +P: Mikael Pettersson +M: mikpe@csd.uu.se +W: http://www.csd.uu.se/~mikpe/linux/perfctr/ +S: Maintained + PNP SUPPORT P: Adam Belay M: ambx1@neo.rr.com Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/Kconfig =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/Kconfig 2004-11-11 10:28:16.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/Kconfig 2004-11-18 20:59:11.000000000 -0500 @@ -857,6 +857,8 @@ generate incorrect output with certain kernel constructs when -mregparm=3 is used. +source "drivers/perfctr/Kconfig" + endmenu menu "Special options" Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/entry.S =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/kernel/entry.S 2004-11-11 10:28:47.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/entry.S 2004-11-18 20:59:11.000000000 -0500 @@ -444,6 +444,16 @@ /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + pushl $LOCAL_PERFCTR_VECTOR-256 + SAVE_ALL + pushl %esp + call smp_perfctr_interrupt + addl $4, %esp + jmp ret_from_intr +#endif + ENTRY(divide_error) pushl $0 # no error code pushl $do_divide_error Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/i8259.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/kernel/i8259.c 2004-11-11 10:27:12.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/i8259.c 2004-11-18 20:59:11.000000000 -0500 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -436,6 +437,8 @@ */ intr_init_hook(); + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/process.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/i386/kernel/process.c 2004-11-11 10:28:16.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/i386/kernel/process.c 2004-11-18 20:59:11.000000000 -0500 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -305,6 +306,7 @@ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } + perfctr_exit_thread(&tsk->thread); if (tsk->thread.debugreg[7]) dr_dec_use_count(tsk->thread.debugreg[7]); } @@ -371,6 +373,8 @@ savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); + perfctr_copy_thread(&p->thread); + tsk = current; if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -519,6 +523,8 @@ /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ + perfctr_suspend_thread(prev); + __unlazy_fpu(prev_p); /* @@ -599,6 +605,9 @@ */ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; } + + perfctr_resume_thread(next); + return prev_p; } Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/Kconfig =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/ppc/Kconfig 2004-11-11 10:28:15.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/Kconfig 2004-11-18 20:59:11.000000000 -0500 @@ -214,6 +214,8 @@ depends on 4xx || 8xx default y +source "drivers/perfctr/Kconfig" + endmenu menu "Platform options" Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/kernel/process.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/ppc/kernel/process.c 2004-11-11 10:28:48.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/ppc/kernel/process.c 2004-11-18 20:59:11.000000000 -0500 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -255,7 +256,9 @@ new->thread.regs->msr |= MSR_VEC; new_thread = &new->thread; old_thread = ¤t->thread; + perfctr_suspend_thread(&prev->thread); last = _switch(old_thread, new_thread); + perfctr_resume_thread(¤t->thread); local_irq_restore(s); return last; } @@ -314,6 +317,7 @@ last_task_used_math = NULL; if (last_task_used_altivec == current) last_task_used_altivec = NULL; + perfctr_exit_thread(¤t->thread); } void flush_thread(void) @@ -400,6 +404,8 @@ p->thread.last_syscall = -1; + perfctr_copy_thread(&p->thread); + return 0; } Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/Kconfig 2004-11-11 10:28:39.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/Kconfig 2004-11-18 20:59:11.000000000 -0500 @@ -319,6 +319,8 @@ bool default y +source "drivers/perfctr/Kconfig" + endmenu Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/entry.S =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/entry.S 2004-04-03 22:36:56.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/entry.S 2004-11-18 20:59:11.000000000 -0500 @@ -556,6 +556,11 @@ apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt #endif +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_KPERFCTR) +ENTRY(perfctr_interrupt) + apicinterrupt LOCAL_PERFCTR_VECTOR,smp_perfctr_interrupt +#endif + /* * Exception entry points. */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/i8259.c 2004-04-03 22:36:25.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/i8259.c 2004-11-18 20:59:11.000000000 -0500 @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -484,6 +485,8 @@ set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif + perfctr_vector_init(); + /* * Set the clock to HZ Hz, we already have a valid * vector now: Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/process.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/arch/x86_64/kernel/process.c 2004-11-11 10:27:51.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/arch/x86_64/kernel/process.c 2004-11-18 20:59:11.000000000 -0500 @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -258,6 +259,7 @@ (init_tss + smp_processor_id())->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; } + perfctr_exit_thread(&me->thread); } void flush_thread(void) @@ -361,6 +363,8 @@ asm("movl %%es,%0" : "=m" (p->thread.es)); asm("movl %%ds,%0" : "=m" (p->thread.ds)); + perfctr_copy_thread(&p->thread); + if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) @@ -407,6 +411,8 @@ int cpu = smp_processor_id(); struct tss_struct *tss = init_tss + cpu; + perfctr_suspend_thread(prev); + unlazy_fpu(prev_p); /* @@ -510,6 +516,8 @@ } } + perfctr_resume_thread(next); + return prev_p; } Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/Makefile 2004-11-18 20:59:08.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/Makefile 2004-11-18 20:59:11.000000000 -0500 @@ -51,6 +51,7 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ +obj-$(CONFIG_KPERFCTR) += perfctr/ obj-$(CONFIG_INFINIBAND) += infiniband/ obj-y += firmware/ obj-$(CONFIG_CRASH_DUMP) += dump/ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_compat.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_compat.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_compat.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,25 @@ +/* $Id: x86_64_compat.h,v 1.1 2003/05/14 21:51:57 mikpe Exp $ + * Performance-monitoring counters driver. + * x86_64-specific compatibility definitions for 2.4/2.5 kernels. + * + * Copyright (C) 2003 Mikael Pettersson + */ +#include +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) + +/* irq_enter() and irq_exit() take two parameters in 2.4. However, + we only use them to disable preemption in the interrupt handler, + which isn't needed in non-preemptive 2.4 kernels. */ +#ifdef CONFIG_PREEMPT +#error "not yet ported to 2.4+PREEMPT" +#endif +#undef irq_enter +#undef irq_exit +#define irq_enter() do{}while(0) +#define irq_exit() do{}while(0) + +#endif + +extern unsigned int perfctr_cpu_khz(void); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Makefile 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,46 @@ +# $Id: Makefile,v 1.20.2.2 2004/08/02 22:24:58 mikpe Exp $ +# Makefile for the Performance-monitoring counters driver. + +ifeq ($(VERSION)$(PATCHLEVEL),24) +include Makefile24 +else + +# We need -fno-unit-at-a-time with gcc-3.4 on x86 to avoid stack overflow. +# Kernels >= 2.6.6 do that automatically but older ones do not, so we +# unconditionally add that option here just in case. +my_check_gcc = $(shell if $(CC) $(CFLAGS) $(1) -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; else echo "$(2)"; fi ;) +EXTRA_CFLAGS_$(CONFIG_X86) := $(call my_check_gcc,-fno-unit-at-a-time,) +EXTRA_CFLAGS_$(CONFIG_X86_64) := +EXTRA_CFLAGS_$(CONFIG_PPC32) := +EXTRA_CFLAGS := $(EXTRA_CFLAGS_y) + +# construct various object file lists: +# kernel-objs-y kernel objects +# m-objs-m perfctr.o if driver is module, empty otherwise +# driver-objs-y objects for perfctr.o module, or empty + +# This also covers x86_64. +driver-objs-$(CONFIG_X86) := x86.o +tests-objs-$(CONFIG_X86) := x86_tests.o +kernel-objs-$(CONFIG_X86) := x86_setup.o + +driver-objs-$(CONFIG_PPC32) := ppc.o +tests-objs-$(CONFIG_PPC32) := ppc_tests.o +kernel-objs-$(CONFIG_PPC32) := ppc_setup.o + +driver-objs-y += init.o marshal.o +driver-objs-$(CONFIG_PERFCTR_INIT_TESTS) += $(tests-objs-y) +driver-objs-$(CONFIG_PERFCTR_VIRTUAL) += virtual.o +stub-objs-$(CONFIG_PERFCTR)-$(CONFIG_PERFCTR_VIRTUAL) := virtual_stub.o +driver-objs-$(CONFIG_PERFCTR_GLOBAL) += global.o +m-objs-$(CONFIG_PERFCTR) := perfctr.o +kernel-objs-$(CONFIG_PERFCTR) += $(driver-objs-y) +kernel-objs-y += $(stub-objs-m-y) + +perfctr-objs := $(driver-objs-y) +obj-m += $(m-objs-m) + +obj-$(CONFIG_KPERFCTR) += kperfctr.o +kperfctr-objs := $(kernel-objs-y) + +endif # ifeq 24 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/RELEASE-NOTES =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/RELEASE-NOTES 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/RELEASE-NOTES 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,1357 @@ +$Id: RELEASE-NOTES,v 1.234.2.28 2004/10/19 16:22:47 mikpe Exp $ + +RELEASE NOTES +============= + +Version 2.6.10.2, 2004-10-19 +- virtual.c: replace nrctrs_lock with a mutex. Avoids illegal + may-sleep-while-holding-lock, caused by mutex operations in + perfctr_cpu_{reserve,release}(). + Backport from perfctr-2.7.6. +- PPC32: Correct MMCR0 handling for FCECE/TRIGGER. Read + MMCR0 at suspend and then freeze the counters. Move + this code from read_counters() to suspend(). At resume, + reload MMCR0 to unfreeze the counters. Clean up the + cstatus checks controlling this behaviour. + Backport from perfctr-2.7.6. + +Version 2.6.10, 2004-09-14 +- Fixed p4_clear_counters() to not access IQ_ESCR{0,1} + on P4 models >= 3. + +Version 2.6.10-pre1, 2004-08-03 +- Changed x86-64 to use the x86 include file and driver. + Intel's 64-bit P4 should now work in the x86-64 kernel. +- Replaced PERFCTR_INTERRUPT_SUPPORT and NMI_LOCAL_APIC + #if:s in x86 code by #ifdef:s on CONFIG_X86_LOCAL_APIC. +- Use macros to clean up x86 per-cpu cache accesses. +- Recognize model 13 Pentium-Ms. +- Changed isuspend_cpu on x86 to be like x86-64's: it + now stores a CPU number instead of a cache pointer. +- x86: make perfctr_cpu_name more approximate. +- The x86 driver records a simplified CPU type for x86_tests, + but this only occurs if PERFCTR_INIT_TESTS is configured. + perfctr_info.cpu_type is now unused. +- Changed P4 driver to set up and check an explicit flag + for EXTENDED_CASCADE availability. perfctr_info.cpu_type + is now unused except for perfctr_x86_init_tests(). +- x86: Reformatted "if( x )" to "if (x)" and similarly for while + and switch statements. Deleted #if 0 blocks. + +Version 2.6.9, 2004-07-27 +- Fix ppc_check_control() to allow 7400/7410 processors to + specify MMCR2[THRESHMULT]. +- PPC32 cleanups: make get_cpu_cache() return pointer not lvalue, + eliminate duplicated initialisation/cleanup code. +- Makefile: enforce -fno-unit-at-a-time with gcc-3.4 on x86, + to prevent stack overflow in 2.6 kernels < 2.6.6. +- Do sync_core() before rdtsc() in x86_tests, to avoid bogus + benchmarking data on K8. Add sync_core() implementation for + the 32-bit kernel. Add sync_core() benchmark. +- Added __perfctr_mk_cstatus() to allow x86.c:finalise_backpatching() + to create a cstatus with i-mode counters marked as present, but + with zero actual counters. This prevents perfctr_cpu_isuspend() + from clearing the control register for counter #0 at init-time, + when the hardware doesn't belong to this driver. On AMD and P6 + this would accidentally disable the NMI watchdog. +- x86: Marked initial targets of backpatchable calls + 'noinline' to prevent gcc from inlining them, which + completely breaks the backpatching mechanism. +- x86_tests: fix CONFIG_X86_LOCAL_APIC=n linkage error. +- 2.6.8-rc1 no longer makes cpu_online_map a #define on UP, + breaking modules. Reintroduce the macro. +- 2.6.8-rc1 changed cpus_complement() calling convention. + Replace cpus_complement();cpus_and() with cpus_andnot(), + and provide cpus_andnot() compat macro. +- PPC32: support generic CPUs using only the TB. +- PPC32: query OF for CPU/TB frequencies, drop /proc/cpuinfo + parsing code. +- PPC32: avoid CPU re-detection in tests code. +- PPC32: clean up and sync with current perfctr-2.7 code. + +Version 2.6.8, 2004-05-29 +- Added recognition of PowerPC 750GX. +- Changes for the {reserve,release}_lapic_nmi() API added in + kernel 2.6.6 backported from perfctr-2.7.1: + * Starting with kernel 2.6.6 we no longer need access to + nmi_perfctr_msr, so removed EXPORT_SYMBOL() and + patches related to this variable (except for older kernels). + * Updated x86.c to use the new API. Added simulation (without + the non-conflict guarantees) for older kernels. + * Moved hardware reservation to x86.c's "reserve" procedure. + The init code now only does read-only hardware detection. + * Added a mutex to the reserve/release procedures, eliminating + * a long-standing race possibility. + * Changed x86.c to reserve and release the hardware around its + call to perfctr_x86_init_tests(). + * Similarly updated x86_64.c for the new API. + +Version 2.6.7, 2004-05-04 +- Replaced x86_64_tests.{c,h} with x86_tests.{c,h}. +- sys_device_{,un}register() was renamed as sysdev_{,un}register() + in 2.6.4-rc2. Updated x86.c and x86_64.c accordingly, and + added a compatibility definition in compat.h. +- Removed unnecessary '#include "compat.h"' from x86_tests.c. +- Replaced x86_64_setup.c with x86_setup.c. +- Replaced x86_64_compat.h with x86_compat.h. +- Moved perfctr_interrupt entry point from x86_setup.c to patch kit, + for kernels older than 2.4.21. Cleanup to facilitate future merge + of x86_setup.c and x86_64_setup.c. + +Version 2.6.6, 2004-02-21 +- Fixed a bug in x86-64's perfctr interrupt entry code in 2.4 kernels, + causing it to pass the wrong value for "struct pt_regs*". This + was harmless since the retrieved "rip" was unused, but still wrong. + Renamed do_perfctr_interrupt to smp_perfctr_interrupt to allow + using the 2.4 kernel's standard BUILD_SMP_INTERRUPT macro. +- Unmask LVTPC after interrupt on Pentium-M. An oprofile user + reports that P-M auto-masks LVTPC just like P4. Preliminary + measurements indicate a 40 to 60 cycle cost for the apic write + on P4s and P6s, so the unmask is not done unconditionally. +- Measure LVTPC write overhead in x86{,_64}_tests.c. +- Add Pentium 4 Model 3 detection. +- The 2.4.21-193 SuSE kernel does EXPORT_SYMBOL(mmu_cr4_features). + Add compat24.h workaround for this. + +Version 2.6.5, 2004-01-26 +- Added perfctr_info.cpu_type constants to . +- Init filp->f_mapping in virtual.c for 2.6.2-rc1+ kernels. +- Updated p4_check_control(): + * Allow ESCR.CPL_T1 to be non-zero when using global-mode + counters on HT processors. + * Don't require ESCR.CPL_T0 to be non-zero. CPL_T0==0b00 + is safe and potentially useful (global counters on HT). + * Require CCCR.ACTIVE_THREAD==0b11 on non-HT processors, as + documented in the IA32 Volume 3 manual. Old non-HT P4s + seem to work Ok for all four values (see perfctr-2.6.0-pre3 + notes), but this is neither guaranteed nor useful. +- x86.c now detects & records P4 HT-ness also in UP kernels. +- Added 'is_global' parameter to perfctr_cpu_update_control(). + This flag is ignored on everything except P4 (sigh). + +Version 2.6.4, 2004-01-12 +- Added 'tsc_to_cpu_mult' field to struct perfctr_info, replacing + '_reserved1'. This is needed on PowerPC to map time-base ticks + to actual time. On x86/AMD64, tsc_to_cpu_mult == 1. +- Added support for PowerPC 604/7xx/74xx processors. Overflow + interrupts are currently not allowed due to the PMI/DECR erratum. +- Replaced perfctr_cpus_mask() with cpus_addr(). Updated cpumask.h + to define cpus_addr() for kernels older than 2.6.1. + +Version 2.6.3-pl1, 2004-01-01 +- Moved the x86 interrupt handler definition from x86_setup.c to + the patch kit for 2.4.21 and later 2.4 kernels, like it already + is done for 2.6 kernels. This change is needed due to extensive + interrupt handler changes in RedHat's 2.4.21-6.EL kernel. +- Simplified : now that early 2.4 kernels no + longer are supported, LOCAL_PERFCTR_VECTOR is known to be defined, + so CONFIG_X86_LOCAL_APIC implies PERFCTR_INTERRUPT_SUPPORT. + +Version 2.6.3, 2003-12-21 +- Removed gperfctr_cpu_state_only_cpu_sdesc's total_sizeof + optimisation. The ABI change in 2.6.2 broke it, leading to + the new fields not being cleared and later causing EOVERFLOW. +- The perfctr_ioctl32_handler() workaround is now only applied + to kernels older than 2.4.23, since 2.4.23 added the "NULL + handler == sys_ioctl" logic. + +Version 2.6.2, 2003-11-23 +- Added 16 bytes (four fields) of reserved data to perfctr_info, + perfctr_cpu_control, vperfctr_control, gperfctr_cpu_control, + and gperfctr_cpu_state. Renumbered marshalling tags for + generic structures. Bumped ABI versions. +- Only allow use of IQ_ESCR{0,1} on P4 models <= 2. These ESCRs + were removed from later models, according to a recent Intel + documentation update (252046-006). +- Fixes for Fedora Core 1's 2.4.22-1.2115.nptl kernel: + * Work around their incomplete and broken cpumask_t backport. + * Avoid name conflict due to their on_each_cpu() backport. + * Handle their preempt_disable()/enable() macros. +- Added new perfctr_cpu_is_forbidden() macro to fix a + compilation error affecting AMD64 in SMP 2.6 kernels. + SMP cpu_isset() requires that mask is an lvalue, but + for AMD64 the mask is a constant. + +Version 2.6.1, 2003-10-05 +- Kernel 2.6.0-test6 changed /proc/self and the /proc// + namespace to refer to "processes" (groups of CLONE tasks) + instead of actual kernel tasks. This forced the planned + transition of the vperfctr API from /proc//perfctr + to /dev/perfctr to occur immediately. Changes: + * Moved /dev/perfctr implementation from global.c to init.c. + * Implemented VPERFCTR_{CREAT,OPEN}, vperfctr_attach(), and + the vperfctrfs pseudo-fs needed to support the magic files. + The fs code was ported from perfctr-1.6/3.1, but updated + for 2.6 and fixed to permit module unloading in 2.4. + * Fixed VPERFCTR_OPEN to accept tsk->thread.perfctr == NULL. + (Needed to info querying commands.) + * Removed /proc//perfctr code. Simplified vperfctr_stub code. + * Updated vperfctr_attach() to mimic the old /proc vperfctr_open(). + This fixes some synchronisation issues. +- Cleanups: + * Removed #if checks and code for kernels older than 2.4.16. + * Eliminated compat macros that are identical in 2.6 and 2.4. + * Moved ptrace_check_attach EXPORT_SYMBOL from x86{,_64}_setup.c + to virtual_stub.c. + * get_task_by_proc_pid_inode() is now trivial. Eliminated it. + * p4_ht_finalise() is now trivial. Eliminated it. +- Added MODULE_ALIAS() declaration, eliminating the need for + an alias in /etc/modprobe.conf with 2.6 kernels. Added + MODULE_ALIAS() compatibility #define in compat24.h. +- Added detection of AMD K8 Revision C processors. +- Updated K8C detection for Revision C Athlon64s. + +Version 2.6.0, 2003-09-08 +- Handle set_cpus_allowed() when PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED: + * Add bad_cpus_allowed flag to struct vperfctr. + * Check bad_cpus_allowed in __vperfctr_resume: if resuming + with PMCs on forbidden CPU, kill counters and SIGILL current. + * __vperfctr_set_cpus_allowed() callback: set bad_cpus_allowed + and print warning if mask allows forbidden CPUs. + * Use task_lock/unlock instead of preempt_disable/enable to + synchronise task_struct accesses. + * Ensure sampling_timer and bad_cpus_allowed share cache line. + * #include explicitly for 2.4.18 and older + kernels; newer kernels include it from . + * Hook in virtual_stub.c. + * Hook and cpumask_t typedef in . +- Simplify #if test for set_cpus_allowed() emulation code. + Also don't define it if CONFIG_PERFCTR_VIRTUAL isn't set. +- cpumask.h only typedefs cpumask_t if hasn't. +- Don't hide #include in compat24.h. +- Fixed compat24.h to test for MODULE not CONFIG_MODULES at the + __module_get/module_put macros. + +Version 2.6.0-pre5, 2003-08-31 +- printk() is not allowed in switch_to(). Disabled debug code + which could violate that rule. Changed virtual_stub.c to BUG() + instead of printk() if the driver is invoked when not loaded. +- Renamed vperfctr_exit2() to vperfctr_unlink() for clarity. +- gcc-3.3.1 issued several "dereferencing type-punned pointer will + break strict-aliasing rules" warnings for marshal.c. Used explicit + unions to fix the warnings and clean up the code. +- Removed compat22.h. +- cpumask_t was included in standard 2.6.0-test4; replace #ifndef + test in cpumask.h with normal kernel version test. +- x86-64 fix: sys_ioctl() isn't exported to modules, so call + filp->f_op->ioctl() instead in perfctr_ioctl32_handler(). +- x86-64 fix: init.c must include not + for compatibility with 2.4 kernels. + +Version 2.6.0-pre4, 2003-08-19 +- Fix x86-64 register_ioctl32_conversion() usage for 2.4 kernels: + * Supply dummy handler since a NULL handler oopses the kernel. + * Test CONFIG_IA32_EMULATION since CONFIG_COMPAT is post-2.4. +- Fixed and merged the new API struct marshalling code: + * New files marshal.c and marshal.h contain the marshalling code + and high-level helper functions (source shared with the library). + * User-space structs are struct perfctr_struct_buf and accessed using + perfctr_copy_{from,to}_user() with ptr to appropriate descriptor. + The cpumask stuff isn't changed. + * All ioctls registered as trivially 32-bit compatible on x86-64. + * Changed perfctr_info cpu_type/cpu_features from short to int: + this avoids the need for UINT16 marshalling support, and cpumask_t + caused perfctr_info to change binary representation anyway. +- Declared VPERFCTR_{CREAT,OPEN} ioctls, but left them unimplemented. +- Fixed vperfctr_open() preemption bug. The O_CREAT check+install + code could be preempted, leading to remote-control races. +- Fixed perfctr_exit_thread() preemption bug. It detached the vperfctr + before calling __vperfctr_exit(). If current was preempted before + __vperfctr_exit() called vperfctr_suspend(), perfctr_suspend_thread() + would fail to suspend the counters. The suspend+detach is now done + atomically within __vperfctr_exit(). +- Changes to handle 2.6 kernels with the cpumask_t patch (-mm, -osdl): + * Convert perfctr_cpus_forbidden_mask accesses to cpumask_t API. + Based in part on a patch for the -osdl kernel by Stephen Hemminger. + * Remove cpus and cpus_forbidden from struct perfctr_info, + since their sizes depend on the kernel configuration. + * Add struct perfctr_cpu_mask to export cpumask_t objects + sanely (i.e., using ints not longs) to user-space. + * Add CPUS and CPUS_FORBIDDEN commands to retrieve these sets. + * Add cpumask.h to emulate cpumask_t API in cpumask_t-free kernels. + * Move perfctr_cpus_forbidden_mask declaration/#define from + to cpumask.h -- necessary since + doesn't have access to the driver's compatibility definitions. +- Cleaned up perfctr_cpu_ireload(). +- Removed struct field offset check from init.c. +- 2.4.22-rc1 does EXPORT_SYMBOL(mmu_cr4_features). Added + new compat #define to handle this. +- Rename x86.c's rdmsrl() to rdmsr_low() to work around msr.h + changes in 2.6.0-test3. Also rename rdpmcl() to rdpmc_low(). +- Replaced __attribute__((__aligned__(SMP_CACHE_BYTES))) usage + with the official ____cacheline_aligned macro. +- Detect cpuid 0x69x VIA C3s (Antaur/Nehemiah). + +Version 2.6.0-pre3, 2003-08-03 +- Changed perfctr_info.cpus and cpus_forbidden to be int instead of + long, to make x86-32 and x86-64 compatible. This is a temporary + solution, as there are patches for >32 CPUs on x86-32. The real + solution is to make these sets variable-sized, and have user-space + retrieve them with a new command. +- Simplified GPERFCTR_CONTROL to update a single CPU instead of + a set of CPUs. Moved cstatus clearing to release_hardware(). +- Moved gperfctr start to new GPERFCTR_START command. +- Simplified GPERFCTR_READ to access a single CPU instead of a + set of CPUs. +- Removed the requirement that CCCR.ACTIVE_THREAD == 3 on P4. + HT processors define behaviour for all four possible values, + and non-HT processors behave sanely for all four values. +- Moved struct perfctr_low_ctrs definition from to + the corresponding low-level driver, since it's only used there. +- Changed perfctr_info.cpu_khz and vperfctr_control.preserve to be + int instead of long. This corrects x86-64 and makes it compatible + with x86-32. +- Updated x86.c to permit extended cascading on P4M2. +- Fixed a bug where the perfctr module's refcount could be zero with + code still running in the module (pending returns to exit_thread()). + This could race with rmmod in preemptive kernels, and in theory + also in SMP kernels. + * module owner field added to vperfctr_stub + * _vperfctr_exit() in the modular case is now a function in + vperfctr_stub.c, which brackets the vperfctr_stub.exit() call + with __module_get() and module_put() on vperfctr_stub.owner + * updated 2.4 and 2.2 compat definitions of __module_get() and + module_put() to work for modules != THIS_MODULE +- Replaced uses of (void)try_module_get() with __module_get() as the + latter is more appropriate for 2.6 kernels. Updated compat stuff. + +Version 2.6.0-pre2, 2003-07-13 +- vperfctr API fixes: + * The new VPERFCTR_READ_CONTROL command retrieves a vperfctr's + control data. + * Renamed VPERFCTR_SAMPLE to VPERFCTR_READ_SUM, and made it + write the sums to a perfctr_sum_ctrs user-space buffer. + * Non-write commands are now always permitted on unlinked perfctrs. + The first change was needed since the control data no longer is + accessible via the mmap()ed state. The other changes clean up and + simplify perfex and the library's slow-path read_ctrs() operation. +- sys_vperfctr_ functions now mark the tsk parameter as "const" if + they don't need write access to it. Typically they only need to + compare it with current to detect self-access cases. +- perfctr_cpu_state no longer makes the perfctr_cpu_control part + accessible to user-space (via mmap() of vperfctrs). +- Simplified {set,is}_isuspend_cpu() in x86_64.c by having callers + pass the CPU number instead of the cache pointer (which was only + used to derive the CPU number). +- Eliminated NMI_LOCAL_APIC #ifs from x86-64 code since x86-64 + always defines it. +- x86.c cleanups: the non-PERFCTR_INTERRUPT_SUPPORT case now uses + dummy stub functions, eliminated six #ifdefs. +- x86_64_setup.c needs . +- Protected cpu_has_mmx and cpu_has_ht #defines in x86_compat.h + with #ifndef since 2.4.22-pre3 added those #defines. +- Eliminated PERFCTR_INTERRUPT_SUPPORT #ifs from x86-64 code + since x86-64 always defines CONFIG_X86_LOCAL_APIC. +- Removed the P4-specific versions of isuspend() and iresume(). + P4 now uses p6_like_{isuspend,iresume}(), just like P6/K7/K8. +- Long overdue cleanup in x86.c/x86_64.c: renamed per_cpu_cache + pointer variables from 'cpu' to 'cache'. +- Added inline functions in virtual.c for registering the overflow + handler and for clearing iresume_cstatus. Cleaned out several + #if PERFCTR_INTERRUPT_SUPPORT occurrences from the main code. + (Partial backport from the abandoned perfctr-3.1 branch.) +- Inlined now useless 'struct vperfctr_state' in 'struct vperfctr'. + +Version 2.6.0-pre1, 2003-07-02 +- Rearranged 'struct perfctr_cpu_state' to reduce the number of + cache lines needed to be touched by key operations (suspend, + resume, sample). Switched from struct-of-arrays to array-of-struct + for perfctr counts, and copied pmc_map into the PMC data array. + The old representation touched at least 3 cache lines at key + operations, the new one only needs one cache line in most cases. + The user-space mmap() view of the new representation is binary + compatible between x86 and x86-64. +- Changed 'isuspend_cpu' in perfctr_cpu_state on x86-64 to be a + 32-bit CPU number, to maintain binary compatibility with x86. +- Removed the union of p5_cesr and id; use id throughout. +- Removed _filler and si_signo from 'struct vperfctr_state', making + the user-space view of it identical to 'struct perfctr_cpu_state'. + +Version 2.5.5, 2003-06-15 +- Updated x86 driver for 2.5.71 local APIC driver model changes. +- Updated x86-64 driver for 2.5.71 NMI watchdog enable/disable API. +- x86-64 is broken in 2.5.71 since x86-64 updated to driver model + for local APIC and NMI watchdog, at the same time as x86 moved + to a newer version of the "system device" driver model. Updated + the x86-64 driver for the new model, which is expected to be in + x86-64 by 2.5.72 (patch exists for 2.5.71). + +Version 2.5.4, 2003-06-01 +- The generic-x86-with-TSC driver now uses rdpmc_read_counters + and p6_write_control instead of its own procedures. +- K8 docs are now available. Updated comment in x86.c accordingly. +- P4 OVF_PMI+FORCE_OVF counters didn't work at all, resulting in + BUG messages from the driver since identify_overflow failed to + detect which counters had overflowed, and vperfctr_ihandler + left the vperfctr in an inconsistent state. This works now. + However, hardware quirks makes this configuration only useful + for one-shot counters, since resuming generates a new interrupt + and the faulting instruction again doesn't complete. The same + problem can occur with regular OVF_PMI counters if ireset is + a small-magnitude value, like -5. + This is a user-space problem; the driver survives. +- On P4, OVF_PMI+FORCE_OVF counters must have an ireset value of -1. + This allows the regular overflow check to also handle FORCE_OVF + counters. Not having this restriction would lead to MAJOR + complications in the driver's "detect overflow counters" code. + There is no loss of functionality since the ireset value doesn't + affect the counter's PMI rate for FORCE_OVF counters. +- Moved P4 APIC_LVTPC reinit from p4_isuspend() to identify_overflow(). + Reduces context-switch overheads when i-mode counters are active. +- Corrected vperfctr_suspend()'s precondition. +- Corrected comment in to state that ireset[] + values must be negative rather than non-positive. +- Made 'perfctr_cpu_name' __initdata, like its predecessor. + +Version 2.5.3.1, 2003-05-21 +- Replaced 'char *perfctr_cpu_name[]' by 'char *perfctr_cpu_name'. + This is needed for x86-64 and other non-x86 architectures. +- Changed to use 'long long' for 64-bit sums. + This doesn't change the ABI, but improves user-space source code + compatibility with 32-bit x86. +- Removed the !defined(set_cpus_allowed) check added to compat24.h + in 2.5.3. It's wrong for SMP builds with modules and MODVERSIONS, + since the set_cpus_allowed() emulation function becomes a #define + from include/linux/modules/x86_setup.ver. Instead add the already + used HAVE_SET_CPUS_ALLOWED #define to include/linux/config.h in + the kernel patch, but make it conditional on CONFIG_X86_64. + +Version 2.5.3, 2003-05-16 +- Added detection code for Pentium M. MISC_ENABLE_PERF_AVAIL is + now checked on both P4 and Pentium M. +- Added x86_64 driver code. Both x86_64.c and asm-x86_64/perfctr.h + are basically simplified versions of corresponding x86 files, + with P5 and P4 support removed, 2.2 kernel support removed, and + 'long long' for sums replaced by 'long'. The last change is + painful for user-space and may be reverted. +- compat24.h: don't define set_cpus_allowed() if already #defined, + workaround for RawHide's 2.4.20-9.2 x86_64 kernel. +- Removed list of supported CPUs from Kconfig. That information + belongs elsewhere (and it's a pain to maintain for 2.2/2.4). + +Version 2.5.2, 2003-04-13 +- Minor cleanup: use PROC_I() unconditionally in virtual.c, + implement trivial compat macro in compat24.h. +- Updated power management code for the local APIC and NMI + watchdog driver model changes in kernel 2.5.67. + The suspend/resume procedures are still no-ops, however. + This revealed a bug in the lapic_nmi_watchdog resume code: + it resumes the lapic_nmi_watchdog even when it was disabled + before suspend. Perfctr's 2.5.67 kernel patch includes a fix. +- perfctr_sample_thread() is now used also on UP. Anton Ertl's + 2.26GHz UP P4 managed to execute a process for more than 2^32 + cycles before suspending it, causing TSC inaccuracies. +- RH9's 2.4.20-8 kernel changed cpu_online(), put_task_struct() and + remap_page_range() to be more like in 2.5 kernels, and moved the + declaration of ptrace_check_attach() from mm.h to ptrace.h, also + like in 2.5 kernels, requiring fixes to compat24.h and x86_setup.c. +- Added note in x86.c about the new Pentium M processor. + +Version 2.5.1, 2003-03-23 +- Fix P4 HT initialisation. I've seen several boot logs from + people running MP P4 Xeons with HT disabled: this produces + an ugly "restricting access for CPUs 0x0" message, and would + cause P4 HT init to unnecessarily return error in older kernels + lacking set_cpus_allowed(). Now only print the message or + signal error if non-zero siblings actually are found. +- The set_cpus_allowed() emulation doesn't compile in 2.4 + kernels older than 2.4.15 due to the p->cpus_running field. + Updated version checks to skip it in 2.4.x when x<15. +- Fix set_cpus_allowed() emulation compile error on BUG_ON() + in 2.4 kernels older than 2.4.19. +- Added Nehemiah note/reminder in x86.c:centaur_init(). + +Version 2.5.0, 2003-03-10 +- Reverted the 2.5.0-pre2 change that replaced the PERFCTR_INFO + ioctl by read(): it made the API look too weird. + Added a PERFCTR_ABI ioctl which only retrieves 'abi_version'. +- Cleaned up struct perfctr_info: renamed abi_magic to abi_version, + and version to driver_version. Renamed PERFCTR_*_MAGIC too. +- Cleaned up struct perfctr_cpu_control: moved evntsel_aux[] + into the p4 sub-struct and renamed it as escr[]. Only P4 needs + it anyway, and the new name clarifies its purpose. +- Renumbered the vperfctr ioctls to the 8-15 range (8-11 are used) + and reserved 0-7 (0-1 are used) for generic ioctls. +- Added 'use_nmi' field to struct gperfctr_control, reserved for + future use if/when support for i-mode gperfctrs is implemented. +- Replaced some preempt/smp_call_function combinations with 2.5.64's + new on_each_cpu() construct. Added compatibility definitions to + compat24.h and compat22.h. + +Version 2.5.0-pre2, 2003-03-03 +- Added ABI version to perfctr_info. Replaced PERFCTR_INFO ioctl + by read() on the fd, since that allows reading the ABI version + even in the case of a version mismatch. Removed binary layout + magic number from vperfctr_state. Rearranged perfctr_info to + make the 'long' fields 8-byte aligned. +- Added #ifdef CONFIG_KPERFCTR to to ensure + that isn't included unless CONFIG_KPERFCTR=y. + This allows the patched kernel source to compile cleanly also + in archs not yet supported by perfctr. +- Removed PERFCTR_PROC_PID_MODE #define and replaced it with + /*notype*/S_IRUSR in the patch files. +- Added perfctr_vector_init() to . Cleaned + up arch/i386/kernel/i8259.c patch. +- Removed apic_lvtpc_irqs[] array. Removed irq.c patch. +- Updated CONFIG_PERFCTR_INIT_TESTS help text to match reality. +- Kernel 2.4.21-pre5 added set_cpus_allowed(), which required + fixing compat24.h and x86_setup.c. +- Fixed init.c for kernel 2.5.63 removing EXPORT_NO_SYMBOLS. +- Cleaned up compat.h by moving 2.2/2.4 stuff to separate files. + +Version 2.5.0-pre1, 2003-02-19 +- Repair global perfctr API: the target CPUs are now explicit + in the calls to write control and read state. Global perfctrs + now work on 2.5 SMP kernels (which no longer have smp_num_cpus + or cpu_logical_map()), and HT P4s (asymmetric MPs). +- struct perfctr_info has new bitmask fields for the set of CPUs + (cpu_online_map) and forbidden CPUs; dropped the nrcpus field. +- add cpu_online() compat macro to compat.h +- VPERFCTR_STOP is subsumed by VPERFCTR_CONTROL. Removed it. +- Detect K8 as K8 not K7. They are not identical. +- Makefile cleanup: moved 2.4/2.2 kernel stuff to Makefile24. +- Makefile fix: removed export-objs for 2.5 kernels. +- Kconfig fix: don't mention obsolete .o module suffix. + +Version 2.4.5, 2003-02-09 +- Fixed two minor compile warnings in x86_tests.c for 2.5 kernels. + +Version 2.4.4, 2003-01-18 +- Fixed a bug in iresume() where an interrupt-mode counter could + increment unexpectedly, and also miss the overflow interrupt. + The following setup would cause the problem: + P1 has EVNTSELn in non-interrupt mode, counting some high- + frequency event (e.g. INST_RETIRED) in kernel-mode. P2 has + EVNTSELn in interrupt-mode, counting some low-frequency event + (e.g. MMX_ASSIST) in user-mode. P1 suspends. Since EVNTSELn is + in non-interrupt mode, it is not disabled. P2 resumes. First + iresume() finds that the CPU cache ID is not P2's, so it reloads + PERFCTRn with P2's restart value. Then write_control() reloads + EVNTSELn with P2's EVNTSEL. At this point, P2's PERFCTRn has been + counting with P1's EVNTSELn since iresume(), so it will no longer + equal P2's restart value. And if PERFCTRn overflowed, the overflow + will go undetected since P1's EVNTSELn was in non-interrupt mode. + To avoid this problem, iresume() now ensures that a counter's + control register is disabled before reloading the counter. +- Fixed some ugly log messages from the new HT P4 init code: + * forbidden_mask would be printed as "0X" (capital X) + * finalise_backpatching() could trigger a BUG! printk from + p4_write_control() if the CPU the init code runs on was + in the forbidden set. At init-time this is not an error. + Avoided this by temporarily resetting the forbidden_mask. +- Added preliminary support for AMD K8 processors with the + regular 32-bit x86 kernel. The K8 performance counters appear + to be identical or very similar to the K7 performance counters. + +Version 2.4.3, 2002-12-11 +- Added x86.c:perfctr_cpus_forbidden_mask. This bitmask describes + the set of CPUs that must not access the perfctrs. On HT P4 MPs, + only logical CPU #0 in each package is allowed access -- this + avoids the resource conflict that would occur if both logical + processors were to access the perfctrs. In other cases (UP or + non-HT-P4 MPs) the mask is zero. +- vperfctr_control() now calls set_cpus_allowed() to ensure that + the task stays away from CPUs in perfctr_cpus_forbidden_mask. + This is racy with sys_sched_setaffinity(), and possibly some + of the kernel's internal set_cpus_allowed() calls, but the race + is unlikely to occur in current 2.4 kernels. +- Cleaned up the parameter passing protocol between vperfctr_ioctl() + and the individual vperfctr "system call" procedures. +- Added safety check in global.c to disallow global-mode perfctrs + on asymmetric MPs until the API has been fixed. +- Added set_cpus_allowed() implementation for 2.4 kernels, except + those that already have it as indicated by HAVE_SET_CPUS_ALLOWED: + this symbol is added to by the kernel patch. +- 2.2 kernels can't enforce CPU affinity masks, so x86.c warns if + a HT P4 MP runs a 2.2 kernel, and falls back to generic x86 mode. + Added dummy set_cpus_allowed() macro for 2.2 kernels. +- x86_compat.h now implements cpuid_ebx() and cpu_has_ht for old kernels. +- Makefile cleanup: Rules.make is obsolete in 2.5. +- Compile fixes in x86.c and virtual_stub.c: needs to + be included explicitly for the 2.5.50 kernel. + +Version 2.4.2, 2002-11-25 +- Fixed virtual.c:inc_nrctrs() to handle the -EBUSY case correctly. + If the HW was busy (e.g. global running), then the first attempt + to open a vperfctr would fail but further attempts would succeed. + Updated error propagation to distinguish -EBUSY from -ENOMEM. +- Updated global.c for preempt-safety. +- Made the driver safe for preemptible kernels. This required a lot + of analysis, but resulted in relatively few actual code changes. + (Backport from the perfctr-3.1 branch.) +- Ported to 2.5.48: Replaced MOD_INC_USE_COUNT by try_module_get() + and MOD_DEC_USE_COUNT by module_put(). Updated compat.h. +- Ported to 2.5.45: added Kconfig, removed Config.help. + +Version 2.4.1, 2002-10-12 +- RedHat 8.0's 2.4.18-14 kernel does EXPORT_SYMBOL(cpu_khz) while + the vanilla 2.4.18 does not. This clashes with x86_setup.c's + EXPORT_SYMBOL(cpu_khz). I've found no easy way to distinguish + between these kernels at C preprocessing time, so I changed + x86_setup.c to define a trivial perfctr_cpu_khz() function and + EXPORT_SYMBOL that one instead. + +Version 2.4.0, 2002-09-26 +- Config.help updated to state that Pentium 4 is supported. +- 2.5.32 moved ptrace_check_attach() declaration to . +- Removed redundant /proc//perfctr access control check + from vperfctr_stub_open(). Since 2.4.0-pre1 this check didn't + match the real one, which prevented remote opens when the + driver was built as a module. + +Version 2.4.0-pre2, 2002-08-27 +- vperfctr_control() now allows the user to specify that some PMC + sums are not to be cleared when updating the control. + There is a new bitmap field `preserve' in struct vperfctr_control: + if bit i is set then PMC(i)'s sum is not cleared. + `preserve' is a simple `unsigned long' for now, since this type + fits all currently known CPU types. + This change breaks binary compatibility, but user-space code which + clears the entire control record before filling in relevant fields + will continue to work as before after a recompile. + This feature removes a limitation which some people felt was a + problem for some usage scenarios. + +Version 2.4.0-pre1, 2002-08-12 +- Initial implementation of a new remote-control API for virtual + per-process perfctrs. A monitor process may access a target + process' perfctrs via /proc/pid/perfctr and operations on that + file, if the monitor holds the target under ptrace ATTACH control. + Updated virtual.c to allow remote access. + Updated x86.c:perfctr_cpu_ireload() to work also in the remote + control case on SMP machines. + +Version 2.3.12, 2002-08-12 +- Trivial comment fixes in compat.h and x86_compat.h. +- Removed __vperfctr_sample(), vperfctr_stub.sample, and bug_sample() + from UP builds, since they are needed only on SMP. + +Version 2.3.11, 2002-07-21 +- Accumulated sums are now maintained for interrupt-mode perfctrs. + User-space can use the standard syscall-less algorithm for computing + these counters' current sums, should that be needed. + +Version 2.3.10, 2002-07-19 +- Added PERFCTR_X86_INTEL_P4M2 CPU type for Model 2 P4s, since + they have ESCR Event Mask changes in a few events. +- The driver now supports replay tagging events on P4, using the + pebs_enable and pebs_matrix_vert control fields added in 2.3.8. +- Some Pentium MMX and Pentium Pro processors have an erratum + (Pentium erratum #74, Pentium Pro erratum 26) which causes SMM + to shut down if CR4.PCE is set. intel_init() now clears the + RDPMC feature on the affected steppings, to avoid the problem. +- perfctr_cpu_release() now clears the hardware registers and + invalidates the per-cpu cache. This should allow the counter + hardware to power down when not used, especially on P4. +- Callers of update_control() have no active i-mode counters. + Documented this as a precondition, and changed update_control() + to not call isuspend(). update_control() no longer needs hardware + access, which should ease a port to CONFIG_PREEMPT=y. + +Version 2.3.9, 2002-06-27 +- Updated p4_escr_addr() in x86.c to match the latest revision of + Intel's IA32 Volume 3 manual, #245472-007. An error in previous + revisions of this document caused the driver to program the wrong + ESCR in some cases. (CCCRs 12/13/16 with ESCR_SELECT(2) were mapped + to SSU_ESCR0 instead of RAT_ESCR0, affecting the uop_type event.) + +Version 2.3.8, 2002-06-26 +- Added counter overflow interrupt support for Intel P4. +- 2.5.23 dropped smp_num_cpus and cpu_logical_map(). Added + temporary workarounds to x86.c and global.c to allow compilation + and testing under 2.5. May have to change the API (esp. global's) + to be based on the sparse cpu_online_map instead. +- RedHat's 2.4.9-34 defines cpu_relax(). Updated compat.h. +- Added pebs_enable and pebs_matrix_vert fields (currently unused) + to perfctr_cpu_control to support replay tagging events on P4. + Updated the perfctr_cpu_state binary layout magic number. +- Silenced redefinition warnings for MSR_P6_PERFCTR0 and cpu_has_mmx. +- Updated Makefile for the 2.5.19 kernel's Makefile changes. +- Merged the P6 and K7 isuspend/iresume/write_control driver code. +- Added a VC3 specific clear_counters() procedure. +- Removed pointless code from perfctr_cpu_identify_overflow(). +- Removed _vperfctr_get/set_thread() wrappers and thread->perfctr + clobber checks from the DEBUG code. Removed unused "ibuf" and + obsolete si_code fields from vperfctr state and control objects. + Updated the vperfctr state magic number. +- Fixed the CONFIG_PREEMPT anti-dependency check in Config.in. +- vperfctr_control() now preserves the TSC sum on STOP;CONTROL + transitions. The failure to do this caused problems for the + PAPI P4 support being developed. + +Version 2.3.7, 2002-04-14 +- Kernel 2.5.8-pre3 changed the way APIC/SMP interrupt entries + are defined. Defining these with asm() in C is no longer + practical, so the kernel patch for 2.5.8-pre3 now defines + the perfctr interrupt entry in arch/i386/kernel/entry.S. +- Permit use of cascading counters on P4: in the slave counter + one sets the CASCADE flag instead of the ENABLE flag. +- Added P4 hyperthreading bit field definitions. +- Preliminary infrastructure to support a new remote-control + interface via ptrace(). Updates to compat.h, virtual.c, + virtual_stub.c, and x86_setup.c. ptrace_check_attach() + emulation for older kernels is in x86_setup.c since + virtual_stub.c isn't compiled if the driver isn't a module. + +Version 2.3.6, 2002-03-21 +- Rewrote sys_vperfctr_control() to do a proper suspend before + updating the control, and to skip trying to preserve the TSC + start value around the resume. This cleaned up the code and + eliminated the bogus "BUG! resuming non-suspended perfctr" + warnings that control calls to active perfctrs caused. +- Rewrote sys_vperfctr_iresume() to not preserve the TSC start + value around the resume. Since we had just done a suspend(), + this would cause double-accounting of the TSC. + +Version 2.3.5, 2002-03-17 +- Added detection of the VIA C3 Ezra-T processor. +- CPU detection now uses current_cpu_data instead of boot_cpu_data, + to avoid the boot_cpu_data.x86_vendor bug which is present is + all current 2.2/2.4/2.5 kernels. The bug caused the x86_vendor + field to be cleared on SMP machines, which in turn tricked the + driver to identify MP AMD K7 machines as MP Intel P6, with + disastrous results when the wrong MSRs were programmed. +- Updated compat.h for /proc// inode change in 2.5.4. +- Added a check to prevent building on preemptible 2.4/2.5 kernels, + since the driver isn't yet safe for those. +- Put perfctr's configuration help text in Config.help in this + directory: kernel 2.5.3-pre5 changed from a having a common + Configure.help file to having local Config.help files. + +Version 2.3.4, 2002-01-23 +- Updated virtual.c for remap_page_range() change in 2.5.3-pre1. + Added emulation for older kernels to compat.h. +- Permit use of tagging on P4 for at-retirement counting. This may + not yet work as expected, since up-stream (tag producing) counters + aren't disabled at context switches: a process may therefore see + more tagged uops than expected. +- Fixed uses of __FUNCTION__ to comply with changes in GCC 3.0.3. + +Version 2.3.3, 2001-12-31 +- Minor x86.c cleanup: reordered function definitions so that + write_control comes after isuspend/iresume: this makes it easier + to follow the runtime control flow. +- Fixed isuspend()/iresume()'s broken cache checking protocol. The + old protocol didn't handle process migration across CPUs in SMP + machines correctly, as illustrated by the following scenario: + P1 runs on CPU1 and suspends. P1 and CPU1 now have the same + cache id (->k1.id). P1 is resumed and suspended on CPU2: the state + in CPU1 is now stale. Then P1 is resumed on CPU1, and no other + process has been using CPU1's performance counters since P1's last + suspend on CPU1. The old protocol would see matching cache ids and + that P1's i-mode EVNTSELs are stopped, so it would accept the cache + and resume P1 with CPU1's stale PERFCTRS values. + In the new protocol isuspend() records the active CPU in the + state object, and iresume() checks if both the CPU and the control + id match. The new protocol is also simpler since iresume() no longer + checks if the i-mode EVNTSELs are cleared or not. +- P6 nasty i-mode to a-mode context switch bug fixed: p6_isuspend() + used to simply clear EVNTSEL0's Enable flag in order to stop all + i-mode counters. Unfortunately, that was insufficient as shown by + the following case (which actually happened). + P1 has EVNTSEL0 in a-mode and EVNTSEL1 in i-mode. P1 suspends: + PERFCTR1 is stopped but EVNTSEL1 is still in i-mode. P2 has EVNTSEL0 + in a-mode and no EVNTSEL1. P2 resumes and updates EVNTSEL0. This + activates not only P2's PERFCTR0 but also the dormant PERFCTR1. If + PERFCTR1 overflows, then P2 will receive an unexpected interrupt. If + PERFCTR1 doesn't overflow, but P2 suspends and P1 resumes, then P1 + will find that PERFCTR1 has a larger than expected value. + p6_isuspend() and p6_iresume() were changed to ignore the global + Enable flag and to disable/enable each i-mode EVNTSEL individually, + just like how it's done on the K7. +- x86.c cleanups: P5MMX, MII, C6, VC3, P6, K7, and P4 now all + use the same rdpmc_read_counters() method. VIA C3 now uses + p6_write_control() instead of its own method. +- Removed "pmc_map[] must be identity" restriction from P6 and K7. + The API uses the virtual counter index to distinguish a-mode + and i-mode counters, but P6 events aren't entirely symmetric: + this lead to some strange cases with the old pmc_map[] rule. + P6 and K7 isuspend() now need access to the control, so + update_control() and its callers had to be changed to allow it + to isuspend() _before_ the new control is installed. +- P4 write_control fixes: changed the ESCR cache to be indexed by + MSR offset from 0x3A0, and changed P4 write_control to index the + CCCR/ESCR cache with physical instead of virtual indices. Added + call to debug_evntsel_cache(), after updating it for pmc_map[]. +- Added P4 and Generic support to x86_tests.c, and some cleanups. + +Version 2.3.2, 2001-11-19 +- P4 fix: the mapping from CCCR 17 to its associated ESCRs was + wrong due to an off-by-one error in x86.c:p4_escr_addr(). +- P4 fix: also clear the PEBS MSRs when initialising the driver. +- Minor cleanup in x86.c: replaced the "clear MSRs" loops with + calls to a helper procedure. + +Version 2.3.1, 2001-11-06 +- Microscopic P4 cleanups. Testing on my new P4 box has confirmed + that the PMAVAIL flag in MSR_IA32_MISC_ENABLE is read-only. + +Version 2.3, 2001-10-24 +- Added support for multiple interrupt-mode virtual perfctrs + with automatic restart. Added an identify_overflow() method + to x86.c to identify and reset the overflowed counters. + Added checks to ensure that the user-specified restart values + for interrupt-mode counters are negative. + Updated virtual.c's signal delivery interface to pass a + bitmask describing which counters overflowed; the siginfo + si_code is now fixed as SI_PMC_OVF (fault-class). +- Fixed some typos in x86.c. Added a note about the C3 Ezra. +- Added EXPORT_NO_SYMBOLS to init.c, for compatibility with + announced changes in modutils 2.5. + +Version 2.2, 2001-10-09 +- Added preliminary support for the Pentium 4. Only basic stuff + for now: no cascading counters, overflow interrupts, tagged + micro-ops, or use of DS/PEBS. The code compiles but hasn't been + tested on an actual Pentium 4. + +Version 2.1.4, 2001-09-30 +- No driver-level changes. + +Version 2.1.3, 2001-09-13 +- Fixed a compilation problem where virtual_stub couldn't be compiled + in modular kernels older than 2.2.20pre10 if KMOD was disabled, due + to an incompatible stub definition of request_module(). +- Replaced most occurrences of "VIA Cyrix III / C3" with "VIA C3". + +Version 2.1.2, 2001-09-05 +- Added MODULE_LICENSE() tag, for compatibility with the tainted/ + non-tainted kernel stuff being put into 2.4.9-ac and modutils. +- VIA C3 support is not "preliminary" any more. Testing has revealed + that the reserved bits in the C3's EVNTSEL1 have no function and + need not be preserved. The driver now fills these bits with zeroes. + (Thanks to Dave Jones @ SuSE for running these tests.) +- Minor bug fix in the perfctr interrupt assembly code. + (Inherited from the 2.4 kernel. Fixed in 2.4.9-ac4.) + +Version 2.1.1, 2001-08-28 +- Preliminary recognition of Pentium 4 processors, including + checking the IA32_MISC_ENABLE MSR. +- Moved %cr4 access functions from to + x86_compat.h, to work around changes in 2.4.9-ac3. +- More %cr4 cleanups possible since the removal of dodgy_tsc() + in Version 2.1: moved {set,clear}_in_cr4_local() into x86.c, + and eliminated the set_in_cr4() compat macro. +- Fixed a bug in x86.c:finalise_backpatching(): the fake cstatus + mustn't include i-mode counters unless we have PCINT support. + Failure to check this cased fatal init-time oopses in some + configs (CONFIG_X86_UP_APIC set but no local APIC in the CPU). +- Minor comment updates in x86.c due to AMD #22007 Revision J. +- Removed '%' before 'cr4' in printouts from x86_tests.c, to + avoid the '%' being mutated by log-reading user-space code. + +Version 2.1, 2001-08-19 +- Fixed a call backpatching bug, caused by an incompatibility + between the 2.4 and 2.2 kernels' xchg() macros. The 2.2 version + lacks a "volatile" causing gcc to remove the entire statement + if xchg() is used for side-effect only. Reverted to a plain + assignment, which is safe since the 2.0.1 backpatching changes. +- Fixed a bug where an attempt to use /proc//perfctr on an + unsupported processor would cause a (well-behaved) kernel oops, + due to calling a NULL function pointer in x86.c, vperfctr_open() + now returns -ENODEV if virtual.c hasn't been initialised. +- Removed the WinChip configuration option, the dodgy_tsc() callback, + and the clr_cap_tsc() x86_compat macro. WinChip users should configure + for generic 586 or less and use the kernel's "notsc" boot parameter. + This cleans up the driver and the 2.4 kernel patches, at the expense + of more code in the 2.2 kernel patches to implement "notsc" support. +- Minor cleanup: moved version number definition from init.c to + a separate file, version.h. + +Version 2.0.1, 2001-08-14 +- The unsynchronised backpatching in x86.c didn't work on SMP, + due to Pentium III erratum E49, and similar errata for other + P6 processors. (The change in 2.0-pre6 was insufficient.) + x86.c now finalises the backpatching at driver init time, + by "priming" the relevant code paths. To make this feasible, + the isuspend() and iresume() methods are now merged into + the other high-level methods; virtual.c became a bit cleaner. +- Removed obsolete "WinChip pmc_map[] must be identity" check. + +Version 2.0, 2001-08-08 +- Resurrected partial support for interrupt-mode virtual perfctrs. + virtual.c permits a single i-mode perfctr, in addition to TSC + and a number of a-mode perfctrs. BUG: The i-mode PMC must be last, + which constrains CPUs like the P6 where we currently restrict + the pmc_map[] to be the identity mapping. (Not a problem for + K7 since it is symmetric, or P4 since it is expected to use a + non-identity pmc_map[].) + New perfctr_cpu_ireload() procedure to force reload of i-mode + PMCs from their start values before resuming. Currently, this + just invalidates the CPU cache, which forces the following + iresume() and resume() to do the right thing. + perfctr_cpu_update_control() now calls setup_imode_start_values() + to "prime" i-mode PMCs from the control.ireset[] array. +- Bug fix in perfctr_cpu_update_control(): start by clearing cstatus. + Prevents a failed attempt to update the control from leaving the + object in a state with old cstatus != 0 but new control. + +Version 2.0-pre7, 2001-08-07 +- Cleaned up the driver's debugging code (virtual, x86). +- Internal driver rearrangements. The low-level driver (x86) now handles + sampling/suspending/resuming counters. Merged counter state (sums and + start values) and CPU control data to a single "CPU state" object. + This simplifies the high-level drivers, and permits some optimisations + in the low-level driver by avoiding the need to buffer tsc/pmc samples + in memory before updating the accumulated sums (not yet implemented). +- Removed the read_counters, write_control, disable_rdpmc, and enable_rdpmc + methods from , since they have been obsoleted by the + new suspend/resume/sample methods. +- Rearranged the 'cstatus' encoding slightly by putting 'nractrs' in + the low 7 bits; this was done because 'nractrs' is retrieved more + often than 'nrctrs'. +- Removed the obsolete 'status' field from vperfctr_state. Exported + 'cstatus' and its access methods to user-space. (Remove the + control.tsc_on/nractrs/nrictrs fields entirely?) +- Removed WinChip "fake TSC" support. The user-space library can now + sample with slightly less overhead on sane processors. +- WinChip and VIA C3 now use p5mmx_read_counters() instead of their + own versions. + +Version 2.0-pre6, 2001-07-27 +- New patches for kernels 2.4.6, 2.4.7, and 2.4.7-ac1. +- Sampling bug fix for SMP. Normally processes are suspended and + resumed many times per second, but on SMP machines it is possible + for a process to run for a long time without being suspended. + Since sampling is performed at the suspend and resume actions, + a performance counter may wrap around more than once between + sampling points. When this occurs, the accumulated counts will + be highly variable and much lower than expected. + A software timer is now used to ensure that sampling deadlines + aren't missed on SMP machines. (The timer is run by the same code + which runs the ITIMER_VIRTUAL interval timer.) +- Bug fix in the x86 "redirect call" backpatching routine. To be + SMP safe, a bus-locked write to the code must be used. +- Bug fix in the internal debugging code (CONFIG_PERFCTR_DEBUG). + The "shadow" data structure used to detect if a process' perfctr + pointer has been clobbered could cause lockups with SMP kernels. + Rewrote the code to be simpler and more robust. +- Minor performance tweak for the P5/P5MMX read counters procedures, + to work around the P5's cache which doesn't allocate a cache line + on a write miss. +- To avoid undetected data layout mismatches, the user-space library + now checks the data layout version field in a virtual perfctr when + it is being mmap:ed into the user's address space. +- A few minor cleanups. + +Version 2.0-pre5, 2001-06-11 +- Internally use a single 'cstatus' field instead of the three + tsc_on/nractrs/nrictrs fields. Should reduce overhead slightly. +- Reorder the fields in cpu_control so that 'cstatus' and other + frequently used fields get small offsets -- avoids some disp32 + addressing modes in timing-critical code. +- Fixed a bug in p6_iresume where it forgot to invalidate the + EVNTSEL cache, causing p6_write_control to fail to reload the + MSRs. (K7 had a similar bug.) Since i-mode support is disabled + at the moment, no-one was actually bitten by this. +- Fixed another iresume/write_control cache invalidation bug where a + switch to an "uninitialised" CPU would fail to initialise the MSRs. +- Added a CONFIG_PERFCTR_DEBUG option to enable internal consistency + checks. Currently, this checks that a task's vperfctr pointer + isn't clobbered behind our backs, that resume and suspend for + a vperfctr are performed on the same CPU, and that the EVNTSEL + cache is semi-consistent when reloading is optimised away. + ("semi" because it only checks that the cache agrees with the + user's control data, and not that the cache agrees with the MSRs.) +- Minor cleanups. + +Version 2.0-pre4, 2001-04-30 +- Cleanups in x86.c. #defines introduced for magic constants. + More sharing of procedures between different CPU drivers. + Fixed a bug where k7_iresume() could cause k7_write_control() + to fail to reload the correct EVNTSELs. + The WinChip C6/2/3 driver now "fakes" an incrementing TSC. +- General cleanups: s/__inline__/inline/ following Linux kernel + coding standards, and renamed the low-level control objects to + cpu_control to distinguish them from {v,g}perfctr_control objects. +- O_CREAT is now interpreted when /proc/self/perfctr is opened: + if the vperfctr does not exist, then it is created; if the + vperfctr does exist, then EEXIST is returned (unfortunately + O_EXCL doesn't work, since it's intercepted by the VFS layer). + "perfex -i" uses this to avoid having to create a vperfctr when + only an INFO command is to be issued. + libperfctr.c:vperfctr_open() uses this to decide whether to + UNLINK the newly opened vperfctr in case of errors or not. +- Cleaned up virtual.c's 2.4/2.2 VFS interface code a little, + and eliminated the OWNER_THIS_MODULE compat macro. +- Added MOD_{INC,DEC}_USE_COUNTs to virtual.c's file_operations + open and release procedures for 2.2 kernels. This should + simulate 2.4's fops_get/put at >open() and >release(). + +Version 2.0-pre3, 2001-04-17 +- Interrupt-mode virtual perfctrs are temporarily disabled since + x86.c doesn't yet detect which PMC overflowed. The old API + could be made to work, but it was broken anyway. +- Integrated the new P4-ready data structures and APIs. + The driver compiles but the user-space stuff hasn't been + updated yet, so there may be some remaining bugs. + + I have not yet committed to all details of this API. Some + things, like accumulating counters in virtual.c and global.c, + are uglier now, and going from a single "status == nrctrs" + field to three separate fields (tsc_on, nrctrs, nrictrs) + cannot be good for performance. + + In the new API the control information is split in separate + arrays depending on their use, i.e. a struct-of-arrays layout + instead of an array-of-struct layout. The advantage of the + struct-of-arrays layout is that it should cause fewer cache + lines to be touched at the performance-critical operations. + The disadvantage is that the layout changes whenever the + number of array elements has to be increased -- as is the + case for the future Pentium 4 support (18 counters). + +Version 2.0-pre2, 2001-04-07 +- Removed automatic inheritance of per-process virtual perfctrs + across fork(). Unless wait4() is modified, it's difficult to + communicate the final values back to the parent: the now + abandoned code did this in a way which made it impossible + to distinguish one child's final counts from another's. + Inheritance can be implemented in user-space anyway, so the + loss is not great. The interface between the driver and the rest + of the kernel is now smaller and simpler than before. +- Simulating cpu_khz by a macro in very old kernels broke since + there's also a struct field with that name :-( Instead of + putting the ugly workaround back in, I decided to drop support + for kernels older than 2.2.16. +- Preliminary support for the VIA C3 processor -- the C3 is + apparently a faster version of the VIA Cyrix III. +- Added rdtsc cost deduction to the init tests code, and changed + it to output per-instruction costs as well. +- More cleanups, making 2.2 compatibility crud less visible. + +Version 2.0-pre1, 2001-03-25 +- First round of API and coding changes/cleanups for version 2.0: + made perfctr_info.version a string, moved some perfctr_info inits + to x86.c and eliminated some redundant variables, removed dead VFS + code from virtual.c, removed obsolete K7 tests from x86_tests.c, + removed mmu_cr4_features wrappers from x86_compat.h, minor cleanup + in virtual_stub.c. +- Fixed an include file problem which made some C compilers (not gcc) + fail when compiling user-space applications using the driver. +- Added missing EXPORT_SYMBOL declarations needed by the UP-APIC PM + code when the driver is built as a module. +- Preliminary changes in x86.c to deal with UP-APIC power management + issues in 2.4-ac kernels. The PM callback is only a stub for now. + +Version 1.9, 2001-02-13 +- Fixed compilation problems for 2.2 and SMP kernels. +- Found updated documentation on "VIA Cyrix III". Apparently, there + are two distinct chips: the older Joshua (a Cyrix design) and the + newer Samuel (a Centaur design). Our current code supported Joshua, + but mistook Samuel for Joshua. Corrected the identification of Samuel + and added explicit support for it. Samuel's EVNTSEL1 is not well- + documented, so there are some new Samuel-specific tests in x86_tests.c. +- Added preliminary interrupt-mode support for AMD K7. +- Small tweaks to virtual.c's interrupt handling. + +Version 1.8, 2001-01-23 +- Added preliminary interrupt-mode support to virtual perfctrs. + Currently for P6 only, and the local APIC must have been enabled. + Tested on 2.4.0-ac10 with CONFIG_X86_UP_APIC=y. + When an i-mode vperfctr interrupts on overflow, the counters are + suspended and a user-specified signal is sent to the process. The + user's signal handler can read the trap pc from the mmap:ed vperfctr, + and should then issue an IRESUME ioctl to restart the counters. + The next version will support buffering and automatic restart. +- Some cleanups in the x86.c init and exit code. Removed the implicit + smp_call_function() calls from x86_compat.h. + +Version 1.7, 2001-01-01 +- Updated Makefile for 2.4.0-test13-pre3 Rules.make changes. +- Removed PERFCTR_ATTACH ioctl from /dev/perfctr, making the + vperfctrs only accessible via /proc/self/perfctr. Removed + the "attach" code from virtual.c, and temporarily commented + out the "vperfctr fs" code. Moved /dev/perfctr initialisation + and implementation from init.c to global.c. +- Eliminated CONFIG_VPERFCTR_PROC, making /proc/pid/perfctr + mandatory if CONFIG_PERFCTR_VIRTUAL is set. +- Some 2.2/2.4 compatibility cleanups. +- VIA Cyrix III detection bug fix. Contrary to VIA's documentation, + the Cyrix III vendor field is Centaur, not Cyrix. + +Version 1.6, 2000-11-21 +- Preliminary implementation of /proc/pid/perfctr. Seems to work, + but virtual.c and virtual_stub.c is again filled with + #if LINUX_VERSION_CODE crap which will need to be cleaned up. + The INFO ioctl is now implemented by vperfctrs too, to avoid the + need for opening /dev/perfctr. +- virtual.c now puts the perfctr pointer in filp->private_data + instead of inode->u.generic_ip. The main reason for this change + is that proc-fs places a dentry pointer in inode->u.generic_ip. +- sys_vperfctr_control() no longer resets the virtual TSC + if it already is active. The virtual TSC therefore runs + continuously from its first activation until the process + stops or unlinks its vperfctrs. +- Updates for 2.4.0-test11pre6. Use 2.4-style cpu_has_XXX + feature testing macros. Updated x86_compat.h to implement + missing cpu_has_mmx and cpu_has_msr, and compatibility + macros for 2.2. Changed vperfctr_fs_read_super() to use + new_inode(sb) instead of get_empty_inode() + some init code. +- Updates for 2.4.0-test9. Fixed x86_compat.h for cpu_khz change. + Since drivers/Makefile was converted to the new list style, + it became more difficult to handle CONFIG_PERFCTR=m. Changed + Config.in to set CONFIG_KPERFCTR=y when CONFIG_PERFCTR != n, + resulting in a much cleaner kernel patch for 2.4.0-test9. +- Removed d_alloc_root wrapper since 2.2 doesn't need it any more. +- When building for 2.2.18pre, use some of its 2.4 compatibility + features (module_init, module_exit and DECLARE_MUTEX). +- Updates for 2.4.0-test8: repaired kernel patch for new parameter + in do_fork, and fixed CLONE_PERFCTR conflict with CLONE_THREAD. + +Version 1.5, 2000-09-03 +- Dropped support for intermediate 2.3 and early 2.4.0-test kernels. + The code now supports kernels 2.2.xx and 2.4.0-test7 or later only. + Cleanups in compat.h and virtual.c. +- Rewrote the Makefile to use object file lists instead of conditionals. + This gets slightly hairy since kernel extensions are needed even + when the driver proper is built as a module. +- Removed the definition of CONFIG_PERFCTR_X86 from Config.in. + Use the 2.4 standard CONFIG_X86 instead. The 2.2.xx kernel + patches now define CONFIG_X86 in arch/i386/config.in. +- Cleaned up the vperfctr inheritance filter. Instead of setting + a disable flag (CLONE_KTHREAD) when kernel-internal threads are + created, I now set CLONE_PERFCTR in sys_fork and sys_vfork. +- /dev/perfctr no longer accepts the SAMPLE and UNLINK ioctls. + All operations pertaining to a process' virtual perfctrs must + be applied to the fd returned from the ATTACH ioctl. +- Removed the remote-control features from the virtual perfctrs. + Significant simplifications in virtual.c. Removed some now + unused stuff from compat.h and virtual_stub.c. + +Version 1.4, 2000-08-11 +- Fixed a memory leak bug in virtual.c. An extraneous dget() in + get_vperfctr_filp() prevented reclaiming the dentry and inode + allocated for a vperfctr file. +- Major changes to the VFS interface in virtual.c. Starting with + 2.4.0-test6, inode->i_sb == NULL no longer works. Added code to + register a "vperfctr" fs and define a superblock and a mount point. + Completely rewrote the dentry init code. Most of the new code is + adapted from fs/pipe.c, with simplifications and macros to continue + supporting 2.2.x kernels. `ls -l /proc/*/fd/' now prints recognizable + names for vperfctr files. +- Cleaned up virtual.c slightly. Removed "#if 1" tests around the + vperfctr inheritance code. Rewrote vperfctr_alloc and vperfctr_free + to use the virt_to_page and {Set,Clear}PageReserved macros; + also updated compat.h to provide these for older kernels. +- Updated for 2.4.0-test3: a dummy `open' file operation is no longer + required by drivers/char/misc.c. +- Updated for `owner' field in file_operations added in 2.4.0-test2. + Removed MOD_{INC,DEC}_USE_COUNT from init.c (except when compiling + for 2.2.x) and virtual.c. Added MOD_{INC,DEC}_USE_COUNT to the + reserve/release functions in x86.c -- needed because the driver + may be active even if no open file refers to it. Using can_unload + in the module struct instead is possible but not as tidy. + +Version 1.3, 2000-06-29 +- Implemented inheritance for virtual perfctrs: fork() copies the + evntsel data to the child, exit() stops the child's counters but + does not detach the vperfctr object, and wait() adds the child's + counters to the parent's `children' counters. + Added a CLONE_KTHREAD flag to prevent inheritance to threads + created implicitly by request_module() and kernel_thread(). +- Fixed a half-broken printk() in x86_tests.c. +- Added checks to virtual.c to prevent the remote-control interface + from trying to activate dead vperfctrs. +- Updated vperfctr_attach() for changes in 2.3.99-pre7 and 2.4.0-test2. +- Fixed a problem introduced in 1.2 which caused linker errors if + CONFIG_PERFCTR=m and CONFIG_PERFCTR_INIT_TESTS=y. +- Export CPU kHz via a new field in PERFCTR_INFO ioctl, to enable + user-space to map accumulated TSC counts to actual time. + +Version 1.2, 2000-05-24 +- Added support for generic x86 processors with a time-stamp counter + but no performance-monitoring counters. By using the driver to + virtualise the TSC, accurate cycle-count measurements are now + possible on PMC-less processors like the AMD K6. +- Removed some of the special-casing of the x86 time-stamp counter. + It's now "just another counter", except that no evntsel is + needed to enable it. +- WinChip bug fix: the "fake TSC" code would increment an + uninitialised counter. +- Reorganised the x86 driver. Moved the optional init-time testing + code to a separate source file. +- Miscellaneous code cleanups and naming convention changes. + +Version 1.1, 2000-05-13 +- vperfctr_attach() now accepts pid 0 as an alias for the current + process. This reduces the number of getpid() calls needed in + the user-space library. (Suggested by Ulrich Drepper.) +- Added support for the VIA Cyrix III processor. +- Tuned the x86 driver interface. Replaced function pointers + with stubs which rewrite callers to invoke the correct callees. +- Added ARRAY_SIZE definition to compat.h for 2.2.x builds. +- Updated for 2.3.48 inode changes. +- Moved code closer to 2.3.x coding standards. Removed init_module + and cleanup_module, added __exit, module_init, and module_exit, + and extended "compat.h" accordingly. Cleaned up + and a little. + +Version 1.0, 2000-01-31 +- Prepared the driver to cope with non-x86 architectures: + - Moved generic parts of to . + - Merged driver's private "x86.h" into . + - Config.in now defines CONFIG_PERFCTR_${ARCH}, and Makefile uses + it to select appropriate arch-dependent object files +- The driver now reads the low 32 bits of the counters, + instead of 40 or 48 bits zero-extended to 64 bits. + Sums are still 64 bits. This was done to reduce the number + of cache lines needed for certain data structures, to + simplify and improve the performance of the sampling + procedures, and to change 64+(64-64) arithmetic to 64+(32-32) + for the benefit of gcc on x86. This change doesn't reduce + precision, as long as no event occurs more than 2^32 times + between two sampling points. +- PERFCTR_GLOBAL_READ now forces all CPUs to be sampled, if the + sampling timer isn't running. + +Version 0.11, 2000-01-30 +- Added a missing EXPORT_SYMBOL which prevented the driver + from being built as a module in SMP kernels. +- Support for the CPU sampling instructions (i.e. RDPMC and + RDTSC on x86) is now announced explicitly by PERFCTR_INFO. +- The x86 hardware driver now keeps CR4.PCE globally enabled. + There are two reasons for this. First, the cost of toggling + this flag at process suspend/resume is high. Second, changes + in kernel 2.3.40 imply that any processor's %cr4 may be updated + asynchronously from the global variable mmu_cr4_features. + +Version 0.10, 2000-01-23 +- Added support for global-mode perfctrs (global.c). +- There is now a config option controlling whether to + perform init-time hardware tests or not. +- Added a hardware reserve/release mechanism so that multiple + high-level services don't simultaneously use the hardware. +- The driver is now officially device . +- Tuned the 64-bit tsc/msr/pmc read operations in x86.c. +- Support for virtual perfctrs can now be enabled or disabled + via CONFIG_PERFCTR_VIRTUAL. +- Added support for the WinChip 3 processor. +- Split the code into several files: x86.c (x86 drivers), + virtual.c (virtualised perfctrs), setup.c (boot-time actions), + init.c (driver top-level and init code). + +Version 0.9, 2000-01-02 +- The driver can now be built as a module. +- Dropped sys_perfctr() system call and went back to using a + /dev/perfctr character device. Generic operations are now + ioctl commands on /dev/perfctr, and control operations on + virtual perfctrs are ioctl commands on their file descriptors. + Initially this change was done because new system calls in 2.3.x + made maintenance and binary compatibility with 2.2.x hard, but + the new API is actually cleaner than the previous system call. +- Moved this code from arch/i386/kernel/ to drivers/perfctr/. + +Version 0.8, 1999-11-14 +- Made the process management callback functions inline to + reduce scheduling overhead for processes not using perfctrs. +- Changed the 'status' field to contain the number of active + counters. Changed read_counters, write_control, and accumulate + to use this information to avoid unnecessary work. +- Fixed a bug in k7_check_control() which caused it to + require all four counters to be enabled. +- Fixed sys_perfctr() to return -ENODEV instead of -ENOSYS + if the processor doesn't support perfctrs. +- Some code cleanups. +- Evntsel MSRs are updated lazily, and counters are not written to. + + The following table lists the costs (in cycles) of various + instructions which access the counter or evntsel registers. + The table was derived from data collected by init-time tests + run by previous versions of this driver. + + Processor P5 P5MMX PII PIII K7 + Clock freq. (MHz) 133 233 266 450 500 + + RDPMC n/a 14 31 36 13 + RDMSR (counter) 29 28 81 80 52 + WRMSR (counter) 35 37 97 115 80 + WRMSR (evntsel) 33 37 88 105 232 + + Several things are apparent from this table: + + 1. It's much cheaper to use RDPMC than RDMSR to read the counters. + 2. It's much more expensive to reset a counter than to read it. + 3. It's expensive to write to an evntsel register. + + As of version 0.8, this driver uses the following strategies: + * The evntsel registers are updated lazily. A per_cpu_control[] + array caches the contents of each CPU's evntsel registers, + and only when a process requires a different setup are the + evntsel registers written to. In most cases, this eliminates the + need to reprogram the evntsel registers when switching processes. + The older drivers would write to the evntsel registers both at + process suspend and resume. + * The counter registers are read both at process resume and suspend, + and the difference is added to the process' accumulated counters. + The older drivers would reset the counters at resume, read them + at suspend, and add the values read to the accumulated counters. + * Only those registers enabled by the user's control information + are manipulated, instead of blindly manipulating all of them. + +Version 0.7 1999-10-25 +- The init-time checks in version 0.6 of this driver showed that + RDMSR is a lot slower than RDPMC for reading the PMCs. The driver + now uses RDPMC instead of RDMSR whenever possible. +- Added an mmap() operation to perfctr files. This allows any client + to read the accumulated counter state without making a system call. + The old "sync to user-provided buffer" method has been removed, + as it entailed additional copy operations and only worked for the + "active" process. The PERFCTR_READ operation has been replaced + by a simpler PERFCTR_SAMPLE operation, for the benefit of pre-MMX + Intel P5 processors which cannot sample counters in user-mode. + This rewrite actually simplified the code. +- The AMD K7 should now be supported correctly. The init-time checks + in version 0.6 of this driver revealed that each K7 counter has + its own ENable bit. (Thanks to Nathan Slingerland for running the + test and reporting the results to me.) +- Plugged a potential memory leak in perfctr_attach_task(). +- No longer piggyback on prctl(); sys_perfctr() is a real system call. +- Some code cleanups. + +Version 0.6 1999-09-08 +- Temporarily added some init-time code that checks the + costs of RDPMC/RDMSR/WRMSR operations applied to perfctr MSRs, + the semantics of the ENable bit on the Athlon, and gets + the boot-time value of the WinChip CESR register. + This code can be turned off by #defining INIT_DEBUG to 0. +- Preliminary support for the AMD K7 Athlon processor. +- The code will now build in both 2.3.x and 2.2.x kernels. + +Version 0.5 1999-08-29 +- The user-space buffer is updated whenever state.status changes, + even when a remote command triggers the change. +- Reworked and simplified the high-level code. All accesses + now require an attached file in order to implement proper + accounting and syncronisation. The only exception is UNLINK: + a process may always UNLINK its own PMCs. +- Fixed counting bug in sys_perfctr_read(). +- Improved support for the Intel Pentium III. +- Another WinChip fix: fake TSC update at process resume. +- The code should now be safe for 'gcc -fstrict-aliasing'. + +Version 0.4 1999-07-31 +- Implemented PERFCTR_ATTACH and PERFCTR_{READ,CONTROL,STOP,UNLINK} + on attached perfctrs. An attached perfctr is represented as a file. +- Fixed an error in the WinChip-specific code. +- Perfctrs now survive exec(). + +Version 0.3 1999-07-22 +- Interface now via sys_prctl() instead of /dev/perfctr. +- Added NYI stubs for accessing other processes' perfctrs. +- Moved to dynamic allocation of a task's perfctr state. +- Minor code cleanups. + +Version 0.2 1999-06-07 +- Added support for WinChip CPUs. +- Restart counters from zero, not their previous values. This + corrected a problem for Intel P6 (WRMSR writes 32 bits to a PERFCTR + MSR and then sign-extends to 40 bits), and also simplified the code. +- Added support for syncing the kernel's counter values to a user- + provided buffer each time a process is resumed. This feature, and + the fact that the driver enables RDPMC in processes using PMCs, + allows user-level computation of a process' accumulated counter + values without incurring the overhead of making a system call. + +Version 0.1 1999-05-30 +- First public release. Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat24.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/compat24.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat24.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,97 @@ +/* $Id: compat24.h,v 1.22.2.1 2004/07/26 14:05:49 mikpe Exp $ + * Performance-monitoring counters driver. + * Compatibility definitions for 2.4 kernels. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#include /* for remap_page_range() [redefined here] */ + +#include "cpumask.h" + +/* 2.4.18-redhat had BUG_ON() before 2.4.19 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,19) && !defined(BUG_ON) +#define BUG_ON(condition) do { if ((condition) != 0) BUG(); } while(0) +#endif + +/* 2.4.18-redhat had set_cpus_allowed() before 2.4.21-pre5 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) && !defined(HAVE_SET_CPUS_ALLOWED) +#if defined(CONFIG_SMP) +extern void set_cpus_allowed(struct task_struct*, unsigned long); +#else +#define set_cpus_allowed(tsk, mask) do{}while(0) +#endif +#endif + +/* 2.4.20-8-redhat added cpu_online() */ +#if !defined(cpu_online) +#define cpu_online(cpu) (cpu_online_map & (1UL << (cpu))) +#endif + +/* 2.4.20-8-redhat added put_task_struct() */ +#if defined(put_task_struct) /* RH 2.4.20-8 */ +#define EXPORT_SYMBOL___put_task_struct EXPORT_SYMBOL(__put_task_struct) +#else /* standard 2.4 */ +#define put_task_struct(tsk) free_task_struct((tsk)) +#define EXPORT_SYMBOL___put_task_struct /*empty*/ +#endif + +/* remap_page_range() changed in 2.5.3-pre1 and 2.4.20-8-redhat */ +#if !defined(HAVE_5ARG_REMAP_PAGE_RANGE) +static inline int perfctr_remap_page_range(struct vm_area_struct *vma, unsigned long from, unsigned long to, unsigned long size, pgprot_t prot) +{ + return remap_page_range(from, to, size, prot); +} +#undef remap_page_range +#define remap_page_range(vma,from,to,size,prot) perfctr_remap_page_range((vma),(from),(to),(size),(prot)) +#endif + +/* 2.4.22-rc1 added EXPORT_SYMBOL(mmu_cr4_features) */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22) || defined(HAVE_EXPORT_mmu_cr4_features) +#define EXPORT_SYMBOL_mmu_cr4_features /*empty*/ +#else +#define EXPORT_SYMBOL_mmu_cr4_features EXPORT_SYMBOL(mmu_cr4_features) +#endif + +/* not in 2.4 proper, but some people use 2.4 with preemption patches */ +#ifdef CONFIG_PREEMPT +#error "not yet ported to 2.4+PREEMPT" +#endif +#ifndef preempt_disable +#define preempt_disable() do{}while(0) +#define preempt_enable() do{}while(0) +#endif + +#ifdef MODULE +#define __module_get(module) do { if ((module)) __MOD_INC_USE_COUNT((module)); } while(0) +#define module_put(module) do { if ((module)) __MOD_DEC_USE_COUNT((module)); } while(0) +#else +#define __module_get(module) do{}while(0) +#define module_put(module) do{}while(0) +#endif + +#define MODULE_ALIAS(alias) /*empty*/ + +/* introduced in 2.5.64; backported to 2.4.22-1.2115.nptl (FC1) */ +static inline int +perfctr_on_each_cpu(void (*func) (void *info), void *info, + int retry, int wait) +{ + int ret = 0; + + preempt_disable(); + ret = smp_call_function(func, info, retry, wait); + func(info); + preempt_enable(); + return ret; +} +#undef on_each_cpu +#define on_each_cpu(f,i,r,w) perfctr_on_each_cpu((f),(i),(r),(w)) + +/* 2.6.4 added 'noinline' */ +#if !defined(noinline) +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 +#define noinline __attribute__((noinline)) +#else +#define noinline /* unimplemented */ +#endif +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/init.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/init.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/init.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,216 @@ +/* $Id: init.c,v 1.68 2004/01/11 22:12:09 mikpe Exp $ + * Performance-monitoring counters driver. + * Top-level initialisation code. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "virtual.h" +#include "global.h" +#include "version.h" +#include "marshal.h" + +MODULE_AUTHOR("Mikael Pettersson "); +MODULE_DESCRIPTION("Performance-monitoring counters driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("char-major-10-182"); +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,63) +EXPORT_NO_SYMBOLS; +#endif + +#ifdef CONFIG_PERFCTR_DEBUG +#define VERSION_DEBUG " DEBUG" +#else +#define VERSION_DEBUG +#endif + +struct perfctr_info perfctr_info = { + .abi_version = PERFCTR_ABI_VERSION, + .driver_version = VERSION VERSION_DEBUG, +}; + +char *perfctr_cpu_name __initdata; + +int sys_perfctr_abi(unsigned int *argp) +{ + if( put_user(PERFCTR_ABI_VERSION, argp) ) + return -EFAULT; + return 0; +} + +int sys_perfctr_info(struct perfctr_struct_buf *argp) +{ + return perfctr_copy_to_user(argp, &perfctr_info, &perfctr_info_sdesc); +} + +static int cpus_copy_to_user(const cpumask_t *cpus, struct perfctr_cpu_mask *argp) +{ + const unsigned int k_nrwords = PERFCTR_CPUMASK_NRLONGS*(sizeof(long)/sizeof(int)); + unsigned int u_nrwords; + unsigned int ui, ki, j; + + if( get_user(u_nrwords, &argp->nrwords) ) + return -EFAULT; + if( put_user(k_nrwords, &argp->nrwords) ) + return -EFAULT; + if( u_nrwords < k_nrwords ) + return -EOVERFLOW; + for(ui = 0, ki = 0; ki < PERFCTR_CPUMASK_NRLONGS; ++ki) { + unsigned long mask = cpus_addr(*cpus)[ki]; + for(j = 0; j < sizeof(long)/sizeof(int); ++j) { + if( put_user((unsigned int)mask, &argp->mask[ui]) ) + return -EFAULT; + ++ui; + mask = (mask >> (8*sizeof(int)-1)) >> 1; + } + } + return 0; +} + +int sys_perfctr_cpus(struct perfctr_cpu_mask *argp) +{ + cpumask_t cpus = cpu_online_map; + return cpus_copy_to_user(&cpus, argp); +} + +int sys_perfctr_cpus_forbidden(struct perfctr_cpu_mask *argp) +{ + cpumask_t cpus = perfctr_cpus_forbidden_mask; + return cpus_copy_to_user(&cpus, argp); +} + +#ifdef CONFIG_IA32_EMULATION +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) +static int perfctr_ioctl32_handler(unsigned int fd, unsigned int cmd, unsigned long arg, struct file *filp) +{ + /* filp->f_op->ioctl is known to exist; see sys32_ioctl() */ + return filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); +} +#else +#define perfctr_ioctl32_handler 0 +#endif + +static void __init perfctr_register_ioctl32_conversions(void) +{ + int err; + + err = register_ioctl32_conversion(PERFCTR_ABI, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(PERFCTR_INFO, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(PERFCTR_CPUS, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(PERFCTR_CPUS_FORBIDDEN, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_CREAT, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_OPEN, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_READ_SUM, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_UNLINK, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_CONTROL, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_IRESUME, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(VPERFCTR_READ_CONTROL, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(GPERFCTR_CONTROL, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(GPERFCTR_READ, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(GPERFCTR_STOP, perfctr_ioctl32_handler); + err |= register_ioctl32_conversion(GPERFCTR_START, perfctr_ioctl32_handler); + if( err ) + printk(KERN_ERR "perfctr: register_ioctl32_conversion() failed\n"); +} + +static void __exit perfctr_unregister_ioctl32_conversions(void) +{ + unregister_ioctl32_conversion(PERFCTR_ABI); + unregister_ioctl32_conversion(PERFCTR_INFO); + unregister_ioctl32_conversion(PERFCTR_CPUS); + unregister_ioctl32_conversion(PERFCTR_CPUS_FORBIDDEN); + unregister_ioctl32_conversion(VPERFCTR_CREAT); + unregister_ioctl32_conversion(VPERFCTR_OPEN); + unregister_ioctl32_conversion(VPERFCTR_READ_SUM); + unregister_ioctl32_conversion(VPERFCTR_UNLINK); + unregister_ioctl32_conversion(VPERFCTR_CONTROL); + unregister_ioctl32_conversion(VPERFCTR_IRESUME); + unregister_ioctl32_conversion(VPERFCTR_READ_CONTROL); + unregister_ioctl32_conversion(GPERFCTR_CONTROL); + unregister_ioctl32_conversion(GPERFCTR_READ); + unregister_ioctl32_conversion(GPERFCTR_STOP); + unregister_ioctl32_conversion(GPERFCTR_START); +} + +#else +#define perfctr_register_ioctl32_conversions() do{}while(0) +#define perfctr_unregister_ioctl32_conversions() do{}while(0) +#endif + +static int dev_perfctr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch( cmd ) { + case PERFCTR_ABI: + return sys_perfctr_abi((unsigned int*)arg); + case PERFCTR_INFO: + return sys_perfctr_info((struct perfctr_struct_buf*)arg); + case PERFCTR_CPUS: + return sys_perfctr_cpus((struct perfctr_cpu_mask*)arg); + case PERFCTR_CPUS_FORBIDDEN: + return sys_perfctr_cpus_forbidden((struct perfctr_cpu_mask*)arg); + case VPERFCTR_CREAT: + return vperfctr_attach((int)arg, 1); + case VPERFCTR_OPEN: + return vperfctr_attach((int)arg, 0); + default: + return gperfctr_ioctl(inode, filp, cmd, arg); + } + return -EINVAL; +} + +static struct file_operations dev_perfctr_file_ops = { + .owner = THIS_MODULE, + .ioctl = dev_perfctr_ioctl, +}; + +static struct miscdevice dev_perfctr = { + .minor = 182, + .name = "perfctr", + .fops = &dev_perfctr_file_ops, +}; + +int __init perfctr_init(void) +{ + int err; + if( (err = perfctr_cpu_init()) != 0 ) { + printk(KERN_INFO "perfctr: not supported by this processor\n"); + return err; + } + if( (err = vperfctr_init()) != 0 ) + return err; + gperfctr_init(); + if( (err = misc_register(&dev_perfctr)) != 0 ) { + printk(KERN_ERR "/dev/perfctr: failed to register, errno %d\n", + -err); + return err; + } + perfctr_register_ioctl32_conversions(); + printk(KERN_INFO "perfctr: driver %s, cpu type %s at %u kHz\n", + perfctr_info.driver_version, + perfctr_cpu_name, + perfctr_info.cpu_khz); + return 0; +} + +void __exit perfctr_exit(void) +{ + perfctr_unregister_ioctl32_conversions(); + misc_deregister(&dev_perfctr); + vperfctr_exit(); + perfctr_cpu_exit(); +} + +module_init(perfctr_init) +module_exit(perfctr_exit) Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/virtual.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,15 @@ +/* $Id: virtual.h,v 1.11 2003/10/04 20:29:43 mikpe Exp $ + * Virtual per-process performance counters. + * + * Copyright (C) 1999-2003 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_VIRTUAL +extern int vperfctr_attach(int, int); +extern int vperfctr_init(void); +extern void vperfctr_exit(void); +#else +static inline int vperfctr_attach(int tid, int creat) { return -EINVAL; } +static inline int vperfctr_init(void) { return 0; } +static inline void vperfctr_exit(void) { } +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/marshal.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,722 @@ +/* $Id: marshal.c,v 1.6.2.1 2004/08/02 22:24:58 mikpe Exp $ + * Performance-monitoring counters driver. + * Structure marshalling support. + * + * Copyright (C) 2003-2004 Mikael Pettersson + */ +#ifdef __KERNEL__ +#include +struct inode; +#include +#include +#include +#include +#include +#include +#else /* !__KERNEL__ */ +#define CONFIG_KPERFCTR +#include +#include +#include +#include +#include +#define put_user(w, p) (*(p) = (w), 0) +#define get_user(w, p) ((w) = *(p), 0) +#endif /* !__KERNEL__ */ + +#include "marshal.h" + +/**************************************************************** + * * + * Struct encoding support. * + * * + ****************************************************************/ + +static void stream_write(struct perfctr_marshal_stream *stream, unsigned int word) +{ + if( !stream->error ) { + if( stream->pos >= stream->size ) + stream->error = -EOVERFLOW; + else if( put_user(word, &stream->buffer[stream->pos]) ) + stream->error = -EFAULT; + } + ++stream->pos; +} + +static void encode_field(const void *address, + const struct perfctr_field_desc *field, + struct perfctr_marshal_stream *stream) +{ + unsigned int base_type = PERFCTR_TYPE_BASE(field->type); + unsigned int nr_items = PERFCTR_TYPE_NRITEMS(field->type); + unsigned int tag = field->tag; + const char *pointer = (const char*)address + field->offset; + unsigned int uint32_val; + union { + unsigned long long ull; + unsigned int ui[2]; + } uint64_val; + unsigned int i = 0; + + do { + if( base_type == PERFCTR_TYPE_UINT64 ) { + uint64_val.ull = *(unsigned long long*)pointer; + pointer += sizeof(long long); + if( !uint64_val.ull ) + continue; + stream_write(stream, PERFCTR_HEADER(PERFCTR_HEADER_UINT64, tag, i)); + stream_write(stream, uint64_val.ui[0]); + stream_write(stream, uint64_val.ui[1]); + } else { /* PERFCTR_TYPE_BYTES4 */ + memcpy(&uint32_val, pointer, sizeof(int)); + pointer += sizeof(int); + if( !uint32_val ) + continue; + stream_write(stream, PERFCTR_HEADER(PERFCTR_HEADER_UINT32, tag, i)); + stream_write(stream, uint32_val); + } + } while( ++i < nr_items ); +} + +void perfctr_encode_struct(const void *address, + const struct perfctr_struct_desc *sdesc, + struct perfctr_marshal_stream *stream) +{ + unsigned int i; + + for(i = 0; i < sdesc->nrfields; ++i) + encode_field(address, &sdesc->fields[i], stream); + for(i = 0; i < sdesc->nrsubs; ++i) { + const struct perfctr_sub_struct_desc *sub = &sdesc->subs[i]; + perfctr_encode_struct((char*)address + sub->offset, sub->sdesc, stream); + } +} + +/**************************************************************** + * * + * Struct decoding support. * + * * + ****************************************************************/ + +static int stream_read(struct perfctr_marshal_stream *stream, unsigned int *word) +{ + if( stream->pos >= stream->size ) + return 0; + if( get_user(*word, &stream->buffer[stream->pos]) ) + return -EFAULT; + ++stream->pos; + return 1; +} + +static const struct perfctr_field_desc* +find_field(unsigned int *struct_offset, + const struct perfctr_struct_desc *sdesc, + unsigned int tag) +{ + unsigned int low, high, mid, i; + const struct perfctr_field_desc *field; + const struct perfctr_sub_struct_desc *sub; + + low = 0; + high = sdesc->nrfields; /* [low,high[ */ + while( low < high ) { + mid = (low + high) / 2; + field = &sdesc->fields[mid]; + if( field->tag == tag ) + return field; + if( field->tag < tag ) + low = mid + 1; + else + high = mid; + } + for(i = 0; i < sdesc->nrsubs; ++i) { + sub = &sdesc->subs[i]; + field = find_field(struct_offset, sub->sdesc, tag); + if( field ) { + *struct_offset += sub->offset; + return field; + } + } + return 0; +} + +int perfctr_decode_struct(void *address, + const struct perfctr_struct_desc *sdesc, + struct perfctr_marshal_stream *stream) +{ + unsigned int header; + int err; + const struct perfctr_field_desc *field; + unsigned int struct_offset; + union { + unsigned long long ull; + unsigned int ui[2]; + } val; + char *target; + unsigned int itemnr; + + for(;;) { + err = stream_read(stream, &header); + if( err <= 0 ) + return err; + struct_offset = 0; + field = find_field(&struct_offset, sdesc, PERFCTR_HEADER_TAG(header)); + if( !field ) + goto err_eproto; + /* a 64-bit datum must have a 64-bit target field */ + if( PERFCTR_HEADER_TYPE(header) != PERFCTR_HEADER_UINT32 && + PERFCTR_TYPE_BASE(field->type) != PERFCTR_TYPE_UINT64 ) + goto err_eproto; + err = stream_read(stream, &val.ui[0]); + if( err <= 0 ) + goto err_err; + target = (char*)address + struct_offset + field->offset; + itemnr = PERFCTR_HEADER_ITEMNR(header); + if( itemnr >= PERFCTR_TYPE_NRITEMS(field->type) ) + goto err_eproto; + if( PERFCTR_TYPE_BASE(field->type) == PERFCTR_TYPE_UINT64 ) { + /* a 64-bit field must have a 64-bit datum */ + if( PERFCTR_HEADER_TYPE(header) == PERFCTR_HEADER_UINT32 ) + goto err_eproto; + err = stream_read(stream, &val.ui[1]); + if( err <= 0 ) + goto err_err; + ((unsigned long long*)target)[itemnr] = val.ull; + } else + memcpy(&((unsigned int*)target)[itemnr], &val.ui[0], sizeof(int)); + } + err_err: /* err ? err : -EPROTO */ + if( err ) + return err; + err_eproto: /* saves object code over inlining it */ + return -EPROTO; +} + +/**************************************************************** + * * + * Structure descriptors. * + * * + ****************************************************************/ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define STRUCT_ARRAY_SIZE(TYPE, MEMBER) ARRAY_SIZE(((TYPE*)0)->MEMBER) + +#if defined(__i386__) || defined(__x86_64__) + +#define PERFCTR_TAG_CPU_CONTROL_TSC_ON 32 +#define PERFCTR_TAG_CPU_CONTROL_NRACTRS 33 +#define PERFCTR_TAG_CPU_CONTROL_NRICTRS 34 +#define PERFCTR_TAG_CPU_CONTROL_PMC_MAP 35 +#define PERFCTR_TAG_CPU_CONTROL_EVNTSEL 36 +#define PERFCTR_TAG_CPU_CONTROL_IRESET 37 +#define PERFCTR_TAG_CPU_CONTROL_P4_ESCR 38 +#define PERFCTR_TAG_CPU_CONTROL_P4_PE 39 +#define PERFCTR_TAG_CPU_CONTROL_P4_PMV 40 +#define PERFCTR_TAG_CPU_CONTROL_RSVD1 41 +#define PERFCTR_TAG_CPU_CONTROL_RSVD2 42 +#define PERFCTR_TAG_CPU_CONTROL_RSVD3 43 +#define PERFCTR_TAG_CPU_CONTROL_RSVD4 44 +#define PERFCTR_CPU_CONTROL_NRFIELDS_0 (7 + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, pmc_map) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, evntsel) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, ireset)) +#define PERFCTR_CPU_CONTROL_NRFIELDS_1 (2 + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, p4.escr)) +#define PERFCTR_CPU_CONTROL_NRFIELDS (PERFCTR_CPU_CONTROL_NRFIELDS_0 + PERFCTR_CPU_CONTROL_NRFIELDS_1) + +#define PERFCTR_TAG_SUM_CTRS_TSC 48 +#define PERFCTR_TAG_SUM_CTRS_PMC 49 +#define PERFCTR_SUM_CTRS_NRFIELDS (1 + STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs, pmc)) + +static const struct perfctr_field_desc perfctr_sum_ctrs_fields[] = { + { .offset = offsetof(struct perfctr_sum_ctrs, tsc), + .tag = PERFCTR_TAG_SUM_CTRS_TSC, + .type = PERFCTR_TYPE_UINT64 }, + { .offset = offsetof(struct perfctr_sum_ctrs, pmc), + .tag = PERFCTR_TAG_SUM_CTRS_PMC, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs,pmc), + PERFCTR_TYPE_UINT64) }, +}; + +const struct perfctr_struct_desc perfctr_sum_ctrs_sdesc = { + .total_sizeof = sizeof(struct perfctr_sum_ctrs), + .total_nrfields = PERFCTR_SUM_CTRS_NRFIELDS, + .nrfields = ARRAY_SIZE(perfctr_sum_ctrs_fields), + .fields = perfctr_sum_ctrs_fields, +}; + +static const struct perfctr_field_desc perfctr_cpu_control_fields[] = { + { .offset = offsetof(struct perfctr_cpu_control, tsc_on), + .tag = PERFCTR_TAG_CPU_CONTROL_TSC_ON, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, nractrs), + .tag = PERFCTR_TAG_CPU_CONTROL_NRACTRS, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, nrictrs), + .tag = PERFCTR_TAG_CPU_CONTROL_NRICTRS, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, pmc_map), + .tag = PERFCTR_TAG_CPU_CONTROL_PMC_MAP, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,pmc_map), + PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_cpu_control, evntsel), + .tag = PERFCTR_TAG_CPU_CONTROL_EVNTSEL, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,evntsel), + PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_cpu_control, ireset), + .tag = PERFCTR_TAG_CPU_CONTROL_IRESET, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,ireset), + PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_cpu_control, p4.escr), + .tag = PERFCTR_TAG_CPU_CONTROL_P4_ESCR, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,p4.escr), + PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_cpu_control, p4.pebs_enable), + .tag = PERFCTR_TAG_CPU_CONTROL_P4_PE, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, p4.pebs_matrix_vert), + .tag = PERFCTR_TAG_CPU_CONTROL_P4_PMV, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved1), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD1, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved2), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD2, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved3), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD3, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved4), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD4, + .type = PERFCTR_TYPE_BYTES4 }, +}; + +const struct perfctr_struct_desc perfctr_cpu_control_sdesc = { + .total_sizeof = sizeof(struct perfctr_cpu_control), + .total_nrfields = PERFCTR_CPU_CONTROL_NRFIELDS, + .nrfields = ARRAY_SIZE(perfctr_cpu_control_fields), + .fields = perfctr_cpu_control_fields, +}; + +#endif /* __i386__ || __x86_64__ */ + +#if defined(__powerpc__) /* XXX: can be merged with x86/amd64 */ + +#define PERFCTR_TAG_CPU_CONTROL_TSC_ON 32 +#define PERFCTR_TAG_CPU_CONTROL_NRACTRS 33 +#define PERFCTR_TAG_CPU_CONTROL_NRICTRS 34 +#define PERFCTR_TAG_CPU_CONTROL_PMC_MAP 35 +#define PERFCTR_TAG_CPU_CONTROL_EVNTSEL 36 +#define PERFCTR_TAG_CPU_CONTROL_IRESET 37 +#define PERFCTR_TAG_CPU_CONTROL_PPC_MMCR0 38 +#define PERFCTR_TAG_CPU_CONTROL_PPC_MMCR2 39 +/* 40: unused */ +#define PERFCTR_TAG_CPU_CONTROL_RSVD1 41 +#define PERFCTR_TAG_CPU_CONTROL_RSVD2 42 +#define PERFCTR_TAG_CPU_CONTROL_RSVD3 43 +#define PERFCTR_TAG_CPU_CONTROL_RSVD4 44 +#define PERFCTR_CPU_CONTROL_NRFIELDS_0 (7 + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, pmc_map) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, evntsel) + STRUCT_ARRAY_SIZE(struct perfctr_cpu_control, ireset)) +#ifdef __powerpc__ +#define PERFCTR_CPU_CONTROL_NRFIELDS_1 2 +#endif +#define PERFCTR_CPU_CONTROL_NRFIELDS (PERFCTR_CPU_CONTROL_NRFIELDS_0 + PERFCTR_CPU_CONTROL_NRFIELDS_1) + +#define PERFCTR_TAG_SUM_CTRS_TSC 48 +#define PERFCTR_TAG_SUM_CTRS_PMC 49 +#define PERFCTR_SUM_CTRS_NRFIELDS (1 + STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs, pmc)) + +static const struct perfctr_field_desc perfctr_sum_ctrs_fields[] = { + { .offset = offsetof(struct perfctr_sum_ctrs, tsc), + .tag = PERFCTR_TAG_SUM_CTRS_TSC, + .type = PERFCTR_TYPE_UINT64 }, + { .offset = offsetof(struct perfctr_sum_ctrs, pmc), + .tag = PERFCTR_TAG_SUM_CTRS_PMC, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_sum_ctrs,pmc), + PERFCTR_TYPE_UINT64) }, +}; + +const struct perfctr_struct_desc perfctr_sum_ctrs_sdesc = { + .total_sizeof = sizeof(struct perfctr_sum_ctrs), + .total_nrfields = PERFCTR_SUM_CTRS_NRFIELDS, + .nrfields = ARRAY_SIZE(perfctr_sum_ctrs_fields), + .fields = perfctr_sum_ctrs_fields, +}; + +static const struct perfctr_field_desc perfctr_cpu_control_fields[] = { + { .offset = offsetof(struct perfctr_cpu_control, tsc_on), + .tag = PERFCTR_TAG_CPU_CONTROL_TSC_ON, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, nractrs), + .tag = PERFCTR_TAG_CPU_CONTROL_NRACTRS, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, nrictrs), + .tag = PERFCTR_TAG_CPU_CONTROL_NRICTRS, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, pmc_map), + .tag = PERFCTR_TAG_CPU_CONTROL_PMC_MAP, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,pmc_map), + PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_cpu_control, evntsel), + .tag = PERFCTR_TAG_CPU_CONTROL_EVNTSEL, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,evntsel), + PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_cpu_control, ireset), + .tag = PERFCTR_TAG_CPU_CONTROL_IRESET, + .type = PERFCTR_TYPE_ARRAY(STRUCT_ARRAY_SIZE(struct perfctr_cpu_control,ireset), + PERFCTR_TYPE_BYTES4) }, +#ifdef __powerpc__ + { .offset = offsetof(struct perfctr_cpu_control, ppc.mmcr0), + .tag = PERFCTR_TAG_CPU_CONTROL_PPC_MMCR0, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, ppc.mmcr2), + .tag = PERFCTR_TAG_CPU_CONTROL_PPC_MMCR2, + .type = PERFCTR_TYPE_BYTES4 }, +#endif /* __powerpc__ */ + { .offset = offsetof(struct perfctr_cpu_control, _reserved1), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD1, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved2), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD2, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved3), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD3, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_cpu_control, _reserved4), + .tag = PERFCTR_TAG_CPU_CONTROL_RSVD4, + .type = PERFCTR_TYPE_BYTES4 }, +}; + +const struct perfctr_struct_desc perfctr_cpu_control_sdesc = { + .total_sizeof = sizeof(struct perfctr_cpu_control), + .total_nrfields = PERFCTR_CPU_CONTROL_NRFIELDS, + .nrfields = ARRAY_SIZE(perfctr_cpu_control_fields), + .fields = perfctr_cpu_control_fields, +}; + +#endif /* __powerpc__ */ + +#define PERFCTR_TAG_INFO_ABI_VERSION 0 +#define PERFCTR_TAG_INFO_DRIVER_VERSION 1 +#define PERFCTR_TAG_INFO_CPU_TYPE 2 +#define PERFCTR_TAG_INFO_CPU_FEATURES 3 +#define PERFCTR_TAG_INFO_CPU_KHZ 4 +#define PERFCTR_TAG_INFO_TSC_TO_CPU_MULT 5 +#define PERFCTR_TAG_INFO_RSVD2 6 +#define PERFCTR_TAG_INFO_RSVD3 7 +#define PERFCTR_TAG_INFO_RSVD4 8 +#define PERFCTR_INFO_NRFIELDS (8 + sizeof(((struct perfctr_info*)0)->driver_version)/sizeof(int)) + +#define VPERFCTR_TAG_CONTROL_SIGNO 9 +#define VPERFCTR_TAG_CONTROL_PRESERVE 10 +#define VPERFCTR_TAG_CONTROL_RSVD1 11 +#define VPERFCTR_TAG_CONTROL_RSVD2 12 +#define VPERFCTR_TAG_CONTROL_RSVD3 13 +#define VPERFCTR_TAG_CONTROL_RSVD4 14 +#define VPERFCTR_CONTROL_NRFIELDS (6 + PERFCTR_CPU_CONTROL_NRFIELDS) + +#define GPERFCTR_TAG_CPU_CONTROL_CPU 15 +#define GPERFCTR_TAG_CPU_CONTROL_RSVD1 16 +#define GPERFCTR_TAG_CPU_CONTROL_RSVD2 17 +#define GPERFCTR_TAG_CPU_CONTROL_RSVD3 18 +#define GPERFCTR_TAG_CPU_CONTROL_RSVD4 19 +#define GPERFCTR_CPU_CONTROL_NRFIELDS (5 + PERFCTR_CPU_CONTROL_NRFIELDS) + +#define GPERFCTR_TAG_CPU_STATE_CPU 20 +#define GPERFCTR_TAG_CPU_STATE_RSVD1 21 +#define GPERFCTR_TAG_CPU_STATE_RSVD2 22 +#define GPERFCTR_TAG_CPU_STATE_RSVD3 23 +#define GPERFCTR_TAG_CPU_STATE_RSVD4 24 +#define GPERFCTR_CPU_STATE_ONLY_CPU_NRFIELDS 5 +#define GPERFCTR_CPU_STATE_NRFIELDS (GPERFCTR_CPU_STATE_ONLY_CPU_NRFIELDS + PERFCTR_CPU_CONTROL_NRFIELDS + PERFCTR_SUM_CTRS_NRFIELDS) + +static const struct perfctr_field_desc perfctr_info_fields[] = { + { .offset = offsetof(struct perfctr_info, abi_version), + .tag = PERFCTR_TAG_INFO_ABI_VERSION, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, driver_version), + .tag = PERFCTR_TAG_INFO_DRIVER_VERSION, + .type = PERFCTR_TYPE_ARRAY(sizeof(((struct perfctr_info*)0)->driver_version)/sizeof(int), PERFCTR_TYPE_BYTES4) }, + { .offset = offsetof(struct perfctr_info, cpu_type), + .tag = PERFCTR_TAG_INFO_CPU_TYPE, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, cpu_features), + .tag = PERFCTR_TAG_INFO_CPU_FEATURES, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, cpu_khz), + .tag = PERFCTR_TAG_INFO_CPU_KHZ, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, tsc_to_cpu_mult), + .tag = PERFCTR_TAG_INFO_TSC_TO_CPU_MULT, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, _reserved2), + .tag = PERFCTR_TAG_INFO_RSVD2, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, _reserved3), + .tag = PERFCTR_TAG_INFO_RSVD3, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct perfctr_info, _reserved4), + .tag = PERFCTR_TAG_INFO_RSVD4, + .type = PERFCTR_TYPE_BYTES4 }, +}; + +const struct perfctr_struct_desc perfctr_info_sdesc = { + .total_sizeof = sizeof(struct perfctr_info), + .total_nrfields = PERFCTR_INFO_NRFIELDS, + .nrfields = ARRAY_SIZE(perfctr_info_fields), + .fields = perfctr_info_fields, +}; + +#if defined(CONFIG_PERFCTR_VIRTUAL) || !defined(__KERNEL__) +static const struct perfctr_field_desc vperfctr_control_fields[] = { + { .offset = offsetof(struct vperfctr_control, si_signo), + .tag = VPERFCTR_TAG_CONTROL_SIGNO, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct vperfctr_control, preserve), + .tag = VPERFCTR_TAG_CONTROL_PRESERVE, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct vperfctr_control, _reserved1), + .tag = VPERFCTR_TAG_CONTROL_RSVD1, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct vperfctr_control, _reserved2), + .tag = VPERFCTR_TAG_CONTROL_RSVD2, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct vperfctr_control, _reserved3), + .tag = VPERFCTR_TAG_CONTROL_RSVD3, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct vperfctr_control, _reserved4), + .tag = VPERFCTR_TAG_CONTROL_RSVD4, + .type = PERFCTR_TYPE_BYTES4 }, +}; + +static const struct perfctr_sub_struct_desc vperfctr_control_subs[] = { + { .offset = offsetof(struct vperfctr_control, cpu_control), + .sdesc = &perfctr_cpu_control_sdesc }, +}; + +const struct perfctr_struct_desc vperfctr_control_sdesc = { + .total_sizeof = sizeof(struct vperfctr_control), + .total_nrfields = VPERFCTR_CONTROL_NRFIELDS, + .nrfields = ARRAY_SIZE(vperfctr_control_fields), + .fields = vperfctr_control_fields, + .nrsubs = ARRAY_SIZE(vperfctr_control_subs), + .subs = vperfctr_control_subs, +}; +#endif /* CONFIG_PERFCTR_VIRTUAL || !__KERNEL__ */ + +#if defined(CONFIG_PERFCTR_GLOBAL) || !defined(__KERNEL__) +static const struct perfctr_field_desc gperfctr_cpu_control_fields[] = { + { .offset = offsetof(struct gperfctr_cpu_control, cpu), + .tag = GPERFCTR_TAG_CPU_CONTROL_CPU, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_control, _reserved1), + .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD1, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_control, _reserved2), + .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD2, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_control, _reserved3), + .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD3, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_control, _reserved4), + .tag = GPERFCTR_TAG_CPU_CONTROL_RSVD4, + .type = PERFCTR_TYPE_BYTES4 }, +}; + +static const struct perfctr_sub_struct_desc gperfctr_cpu_control_subs[] = { + { .offset = offsetof(struct gperfctr_cpu_control, cpu_control), + .sdesc = &perfctr_cpu_control_sdesc }, +}; + +const struct perfctr_struct_desc gperfctr_cpu_control_sdesc = { + .total_sizeof = sizeof(struct gperfctr_cpu_control), + .total_nrfields = GPERFCTR_CPU_CONTROL_NRFIELDS, + .nrfields = ARRAY_SIZE(gperfctr_cpu_control_fields), + .fields = gperfctr_cpu_control_fields, + .nrsubs = ARRAY_SIZE(gperfctr_cpu_control_subs), + .subs = gperfctr_cpu_control_subs, +}; + +static const struct perfctr_field_desc gperfctr_cpu_state_fields[] = { + { .offset = offsetof(struct gperfctr_cpu_state, cpu), + .tag = GPERFCTR_TAG_CPU_STATE_CPU, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_state, _reserved1), + .tag = GPERFCTR_TAG_CPU_STATE_RSVD1, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_state, _reserved2), + .tag = GPERFCTR_TAG_CPU_STATE_RSVD2, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_state, _reserved3), + .tag = GPERFCTR_TAG_CPU_STATE_RSVD3, + .type = PERFCTR_TYPE_BYTES4 }, + { .offset = offsetof(struct gperfctr_cpu_state, _reserved4), + .tag = GPERFCTR_TAG_CPU_STATE_RSVD4, + .type = PERFCTR_TYPE_BYTES4 }, +}; + +static const struct perfctr_sub_struct_desc gperfctr_cpu_state_subs[] = { + { .offset = offsetof(struct gperfctr_cpu_state, cpu_control), + .sdesc = &perfctr_cpu_control_sdesc }, + { .offset = offsetof(struct gperfctr_cpu_state, sum), + .sdesc = &perfctr_sum_ctrs_sdesc }, +}; + +const struct perfctr_struct_desc gperfctr_cpu_state_only_cpu_sdesc = { + .total_sizeof = sizeof(struct gperfctr_cpu_state), + .total_nrfields = GPERFCTR_CPU_STATE_ONLY_CPU_NRFIELDS, + .nrfields = ARRAY_SIZE(gperfctr_cpu_state_fields), + .fields = gperfctr_cpu_state_fields, +}; + +const struct perfctr_struct_desc gperfctr_cpu_state_sdesc = { + .total_sizeof = sizeof(struct gperfctr_cpu_state), + .total_nrfields = GPERFCTR_CPU_STATE_NRFIELDS, + .nrfields = ARRAY_SIZE(gperfctr_cpu_state_fields), + .fields = gperfctr_cpu_state_fields, + .nrsubs = ARRAY_SIZE(gperfctr_cpu_state_subs), + .subs = gperfctr_cpu_state_subs, +}; +#endif /* CONFIG_PERFCTR_GLOBAL || !__KERNEL__ */ + +#ifdef __KERNEL__ + +int perfctr_copy_from_user(void *struct_address, + struct perfctr_struct_buf *argp, + const struct perfctr_struct_desc *sdesc) +{ + struct perfctr_marshal_stream stream; + + if( get_user(stream.size, &argp->rdsize) ) + return -EFAULT; + stream.buffer = argp->buffer; + stream.pos = 0; + stream.error = 0; + memset(struct_address, 0, sdesc->total_sizeof); + return perfctr_decode_struct(struct_address, sdesc, &stream); +} + +int perfctr_copy_to_user(struct perfctr_struct_buf *argp, + void *struct_address, + const struct perfctr_struct_desc *sdesc) +{ + struct perfctr_marshal_stream stream; + + if( get_user(stream.size, &argp->wrsize) ) + return -EFAULT; + stream.buffer = argp->buffer; + stream.pos = 0; + stream.error = 0; + perfctr_encode_struct(struct_address, sdesc, &stream); + if( stream.error ) + return stream.error; + if( put_user(stream.pos, &argp->rdsize) ) + return -EFAULT; + return 0; +} + +#else /* !__KERNEL__ */ + +#define sdesc_bufsize(sdesc) ((sdesc)->total_nrfields + (sdesc)->total_sizeof/sizeof(int)) + +static int common_ioctl_w(const void *arg, + const struct perfctr_struct_desc *sdesc, + struct perfctr_struct_buf *buf, + unsigned int bufsize) +{ + struct perfctr_marshal_stream stream; + + stream.size = bufsize; + stream.buffer = buf->buffer; + stream.pos = 0; + stream.error = 0; + perfctr_encode_struct(arg, sdesc, &stream); + if( stream.error ) { + errno = -stream.error; + return -1; + } + buf->rdsize = stream.pos; + return 0; +} + +int perfctr_ioctl_w(int fd, unsigned int cmd, const void *arg, + const struct perfctr_struct_desc *sdesc) +{ + unsigned int bufsize = sdesc_bufsize(sdesc); + union { + struct perfctr_struct_buf buf; + struct { + unsigned int rdsize; + unsigned int wrsize; + unsigned int buffer[bufsize]; + } buf_bufsize; + } u; + int err; + + err = common_ioctl_w(arg, sdesc, &u.buf, bufsize); + if( err < 0 ) + return err; + u.buf.wrsize = 0; + return ioctl(fd, cmd, &u.buf); +} + +static int common_ioctl_r(int fd, unsigned int cmd, void *res, + const struct perfctr_struct_desc *sdesc, + struct perfctr_struct_buf *buf) +{ + struct perfctr_marshal_stream stream; + int err; + + if( ioctl(fd, cmd, buf) < 0 ) + return -1; + stream.size = buf->rdsize; + stream.buffer = buf->buffer; + stream.pos = 0; + stream.error = 0; + memset(res, 0, sdesc->total_sizeof); + err = perfctr_decode_struct(res, sdesc, &stream); + if( err < 0 ) { + errno = -err; + return -1; + } + return 0; +} + +int perfctr_ioctl_r(int fd, unsigned int cmd, void *res, + const struct perfctr_struct_desc *sdesc) +{ + unsigned int bufsize = sdesc_bufsize(sdesc); + union { + struct perfctr_struct_buf buf; + struct { + unsigned int rdsize; + unsigned int wrsize; + unsigned int buffer[bufsize]; + } buf_bufsize; + } u; + + u.buf.rdsize = 0; + u.buf.wrsize = bufsize; + return common_ioctl_r(fd, cmd, res, sdesc, &u.buf); +} + +int perfctr_ioctl_wr(int fd, unsigned int cmd, void *argres, + const struct perfctr_struct_desc *arg_sdesc, + const struct perfctr_struct_desc *res_sdesc) +{ + unsigned int arg_bufsize = sdesc_bufsize(arg_sdesc); + unsigned int res_bufsize = sdesc_bufsize(res_sdesc); + unsigned int bufsize = arg_bufsize > res_bufsize ? arg_bufsize : res_bufsize; + union { + struct perfctr_struct_buf buf; + struct { + unsigned int rdsize; + unsigned int wrsize; + unsigned int buffer[bufsize]; + } buf_bufsize; + } u; + int err; + + err = common_ioctl_w(argres, arg_sdesc, &u.buf, arg_bufsize); + if( err < 0 ) + return err; + u.buf.wrsize = res_bufsize; + return common_ioctl_r(fd, cmd, argres, res_sdesc, &u.buf); +} + +#endif /* !__KERNEL__ */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_compat.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_compat.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_compat.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,62 @@ +/* $Id: ppc_compat.h,v 1.1.2.1 2004/06/21 22:32:14 mikpe Exp $ + * Performance-monitoring counters driver. + * PPC32-specific compatibility definitions for 2.4/2.6 kernels. + * + * Copyright (C) 2004 Mikael Pettersson + */ + +#define SPRN_MMCR0 0x3B8 /* 604 and up */ +#define SPRN_PMC1 0x3B9 /* 604 and up */ +#define SPRN_PMC2 0x3BA /* 604 and up */ +#define SPRN_SIA 0x3BB /* 604 and up */ +#define SPRN_MMCR1 0x3BC /* 604e and up */ +#define SPRN_PMC3 0x3BD /* 604e and up */ +#define SPRN_PMC4 0x3BE /* 604e and up */ +#define SPRN_MMCR2 0x3B0 /* 7400 and up */ +#define SPRN_BAMR 0x3B7 /* 7400 and up */ +#define SPRN_PMC5 0x3B1 /* 7450 and up */ +#define SPRN_PMC6 0x3B2 /* 7450 and up */ + +/* MMCR0 layout (74xx terminology) */ +#define MMCR0_FC 0x80000000 /* Freeze counters unconditionally. */ +#define MMCR0_FCS 0x40000000 /* Freeze counters while MSR[PR]=0 (supervisor mode). */ +#define MMCR0_FCP 0x20000000 /* Freeze counters while MSR[PR]=1 (user mode). */ +#define MMCR0_FCM1 0x10000000 /* Freeze counters while MSR[PM]=1. */ +#define MMCR0_FCM0 0x08000000 /* Freeze counters while MSR[PM]=0. */ +#define MMCR0_PMXE 0x04000000 /* Enable performance monitor exceptions. + * Cleared by hardware when a PM exception occurs. + * 604: PMXE is not cleared by hardware. + */ +#define MMCR0_FCECE 0x02000000 /* Freeze counters on enabled condition or event. + * FCECE is treated as 0 if TRIGGER is 1. + * 74xx: FC is set when the event occurs. + * 604/750: ineffective when PMXE=0. + */ +#define MMCR0_TBSEL 0x01800000 /* Time base lower (TBL) bit selector. + * 00: bit 31, 01: bit 23, 10: bit 19, 11: bit 15. + */ +#define MMCR0_TBEE 0x00400000 /* Enable event on TBL bit transition from 0 to 1. */ +#define MMCR0_THRESHOLD 0x003F0000 /* Threshold value for certain events. */ +#define MMCR0_PMC1CE 0x00008000 /* Enable event on PMC1 overflow. */ +#define MMCR0_PMCjCE 0x00004000 /* Enable event on PMC2-PMC6 overflow. + * 604/750: Overrides FCECE (DISCOUNT). + */ +#define MMCR0_TRIGGER 0x00002000 /* Disable PMC2-PMC6 until PMC1 overflow or other event. + * 74xx: cleared by hardware when the event occurs. + */ +#define MMCR0_PMC1SEL 0x00001FB0 /* PMC1 event selector, 7 bits. */ +#define MMCR0_PMC2SEL 0x0000003F /* PMC2 event selector, 6 bits. */ + +/* MMCR1 layout (604e-7457) */ +#define MMCR1_PMC3SEL 0xF8000000 /* PMC3 event selector, 5 bits. */ +#define MMCR1_PMC4SEL 0x07B00000 /* PMC4 event selector, 5 bits. */ +#define MMCR1_PMC5SEL 0x003E0000 /* PMC5 event selector, 5 bits. (745x only) */ +#define MMCR1_PMC6SEL 0x0001F800 /* PMC6 event selector, 6 bits. (745x only) */ +#define MMCR1__RESERVED 0x000007FF /* should be zero */ + +/* MMCR2 layout (7400-7457) */ +#define MMCR2_THRESHMULT 0x80000000 /* MMCR0[THRESHOLD] multiplier. */ +#define MMCR2_SMCNTEN 0x40000000 /* 7400/7410 only, should be zero. */ +#define MMCR2_SMINTEN 0x20000000 /* 7400/7410 only, should be zero. */ +#define MMCR2__RESERVED 0x1FFFFFFF /* should be zero */ +#define MMCR2_RESERVED (MMCR2_SMCNTEN | MMCR2_SMINTEN | MMCR2__RESERVED) Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/compat.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/compat.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,23 @@ +/* $Id: compat.h,v 1.42 2004/05/02 22:52:13 mikpe Exp $ + * Performance-monitoring counters driver. + * Compatibility definitions for 2.6 kernels. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +#include "compat24.h" +#else + +#include "cpumask.h" + +#define EXPORT_SYMBOL_mmu_cr4_features EXPORT_SYMBOL(mmu_cr4_features) +#define EXPORT_SYMBOL___put_task_struct EXPORT_SYMBOL(__put_task_struct) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) /* names changed in 2.6.4-rc2 */ +#define sysdev_register(dev) sys_device_register((dev)) +#define sysdev_unregister(dev) sys_device_unregister((dev)) +#endif + +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Config.in =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Config.in 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Config.in 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,15 @@ +# $Id: Config.in,v 1.15 2002/11/25 13:01:46 mikpe Exp $ +# Performance-monitoring counters driver configuration +# + +mainmenu_option next_comment +comment 'Performance-monitoring counters support' +tristate 'Performance-monitoring counters support' CONFIG_PERFCTR +if [ "$CONFIG_PERFCTR" != "n" ]; then + define_bool CONFIG_KPERFCTR y + bool ' Additional internal consistency checks' CONFIG_PERFCTR_DEBUG + bool ' Init-time hardware tests' CONFIG_PERFCTR_INIT_TESTS + bool ' Virtual performance counters support' CONFIG_PERFCTR_VIRTUAL $CONFIG_PERFCTR + bool ' Global performance counters support' CONFIG_PERFCTR_GLOBAL $CONFIG_PERFCTR +fi +endmenu Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/global.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,244 @@ +/* $Id: global.c,v 1.38 2004/01/25 14:45:35 mikpe Exp $ + * Global-mode performance-monitoring counters via /dev/perfctr. + * + * Copyright (C) 2000-2003 Mikael Pettersson + * + * XXX: Doesn't do any authentication yet. Should we limit control + * to root, or base it on having write access to /dev/perfctr? + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "global.h" +#include "marshal.h" + +static const char this_service[] = __FILE__; +static int hardware_is_ours = 0; +static struct timer_list sampling_timer; +static DECLARE_MUTEX(control_mutex); +static unsigned int nr_active_cpus = 0; + +struct gperfctr { + struct perfctr_cpu_state cpu_state; + spinlock_t lock; +} ____cacheline_aligned; + +static struct gperfctr per_cpu_gperfctr[NR_CPUS] __cacheline_aligned; + +static int reserve_hardware(void) +{ + const char *other; + + if( hardware_is_ours ) + return 0; + other = perfctr_cpu_reserve(this_service); + if( other ) { + printk(KERN_ERR __FILE__ ":%s: failed because hardware is taken by '%s'\n", + __FUNCTION__, other); + return -EBUSY; + } + hardware_is_ours = 1; + __module_get(THIS_MODULE); + return 0; +} + +static void release_hardware(void) +{ + int i; + + nr_active_cpus = 0; + if( hardware_is_ours ) { + hardware_is_ours = 0; + del_timer(&sampling_timer); + sampling_timer.data = 0; + perfctr_cpu_release(this_service); + module_put(THIS_MODULE); + for(i = 0; i < NR_CPUS; ++i) + per_cpu_gperfctr[i].cpu_state.cstatus = 0; + } +} + +static void sample_this_cpu(void *unused) +{ + /* PREEMPT note: when called via smp_call_function(), + this is in IRQ context with preemption disabled. */ + struct gperfctr *perfctr; + + perfctr = &per_cpu_gperfctr[smp_processor_id()]; + if( !perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) + return; + spin_lock(&perfctr->lock); + perfctr_cpu_sample(&perfctr->cpu_state); + spin_unlock(&perfctr->lock); +} + +static void sample_all_cpus(void) +{ + on_each_cpu(sample_this_cpu, NULL, 1, 1); +} + +static void sampling_timer_function(unsigned long interval) +{ + sample_all_cpus(); + sampling_timer.expires = jiffies + interval; + add_timer(&sampling_timer); +} + +static unsigned long usectojiffies(unsigned long usec) +{ + usec += 1000000 / HZ - 1; + usec /= 1000000 / HZ; + return usec; +} + +static void start_sampling_timer(unsigned long interval_usec) +{ + if( interval_usec > 0 ) { + unsigned long interval = usectojiffies(interval_usec); + init_timer(&sampling_timer); + sampling_timer.function = sampling_timer_function; + sampling_timer.data = interval; + sampling_timer.expires = jiffies + interval; + add_timer(&sampling_timer); + } +} + +static void start_this_cpu(void *unused) +{ + /* PREEMPT note: when called via smp_call_function(), + this is in IRQ context with preemption disabled. */ + struct gperfctr *perfctr; + + perfctr = &per_cpu_gperfctr[smp_processor_id()]; + if( perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) + perfctr_cpu_resume(&perfctr->cpu_state); +} + +static void start_all_cpus(void) +{ + on_each_cpu(start_this_cpu, NULL, 1, 1); +} + +static int gperfctr_control(struct perfctr_struct_buf *argp) +{ + int ret; + struct gperfctr *perfctr; + struct gperfctr_cpu_control cpu_control; + + ret = perfctr_copy_from_user(&cpu_control, argp, &gperfctr_cpu_control_sdesc); + if( ret ) + return ret; + if( cpu_control.cpu >= NR_CPUS || + !cpu_online(cpu_control.cpu) || + perfctr_cpu_is_forbidden(cpu_control.cpu) ) + return -EINVAL; + /* we don't permit i-mode counters */ + if( cpu_control.cpu_control.nrictrs != 0 ) + return -EPERM; + down(&control_mutex); + ret = -EBUSY; + if( hardware_is_ours ) + goto out_up; /* you have to stop them first */ + perfctr = &per_cpu_gperfctr[cpu_control.cpu]; + spin_lock(&perfctr->lock); + perfctr->cpu_state.tsc_start = 0; + perfctr->cpu_state.tsc_sum = 0; + memset(&perfctr->cpu_state.pmc, 0, sizeof perfctr->cpu_state.pmc); + perfctr->cpu_state.control = cpu_control.cpu_control; + ret = perfctr_cpu_update_control(&perfctr->cpu_state, 1); + spin_unlock(&perfctr->lock); + if( ret < 0 ) + goto out_up; + if( perfctr_cstatus_enabled(perfctr->cpu_state.cstatus) ) + ++nr_active_cpus; + ret = nr_active_cpus; + out_up: + up(&control_mutex); + return ret; +} + +static int gperfctr_start(unsigned int interval_usec) +{ + int ret; + + if( interval_usec < 10000 ) + return -EINVAL; + down(&control_mutex); + ret = nr_active_cpus; + if( ret > 0 ) { + if( reserve_hardware() < 0 ) { + ret = -EBUSY; + } else { + start_all_cpus(); + start_sampling_timer(interval_usec); + } + } + up(&control_mutex); + return ret; +} + +static int gperfctr_stop(void) +{ + down(&control_mutex); + release_hardware(); + up(&control_mutex); + return 0; +} + +static int gperfctr_read(struct perfctr_struct_buf *argp) +{ + struct gperfctr *perfctr; + struct gperfctr_cpu_state state; + int err; + + // XXX: sample_all_cpus() ??? + err = perfctr_copy_from_user(&state, argp, &gperfctr_cpu_state_only_cpu_sdesc); + if( err ) + return err; + if( state.cpu >= NR_CPUS || !cpu_online(state.cpu) ) + return -EINVAL; + perfctr = &per_cpu_gperfctr[state.cpu]; + spin_lock(&perfctr->lock); + state.cpu_control = perfctr->cpu_state.control; + //state.sum = perfctr->cpu_state.sum; + { + int j; + state.sum.tsc = perfctr->cpu_state.tsc_sum; + for(j = 0; j < ARRAY_SIZE(state.sum.pmc); ++j) + state.sum.pmc[j] = perfctr->cpu_state.pmc[j].sum; + } + spin_unlock(&perfctr->lock); + return perfctr_copy_to_user(argp, &state, &gperfctr_cpu_state_sdesc); +} + +int gperfctr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + switch( cmd ) { + case GPERFCTR_CONTROL: + return gperfctr_control((struct perfctr_struct_buf*)arg); + case GPERFCTR_READ: + return gperfctr_read((struct perfctr_struct_buf*)arg); + case GPERFCTR_STOP: + return gperfctr_stop(); + case GPERFCTR_START: + return gperfctr_start(arg); + } + return -EINVAL; +} + +void __init gperfctr_init(void) +{ + int i; + + for(i = 0; i < NR_CPUS; ++i) + per_cpu_gperfctr[i].lock = SPIN_LOCK_UNLOCKED; +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_tests.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,14 @@ +/* $Id: x86_64_tests.h,v 1.1 2003/05/14 21:51:57 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional x86_64-specific init-time tests. + * + * Copyright (C) 2003 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_INIT_TESTS +extern void perfctr_k8_init_tests(void); +extern void perfctr_generic_init_tests(void); +#else +#define perfctr_k8_init_tests() +#define perfctr_generic_init_tests() +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Kconfig =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Kconfig 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Kconfig 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,79 @@ +# $Id: Kconfig,v 1.7 2003/05/14 21:51:32 mikpe Exp $ +# Performance-monitoring counters driver configuration +# + +menu "Performance-monitoring counters support" + +config PERFCTR + tristate "Performance monitoring counters support" + help + This driver provides access to the performance-monitoring counter + registers available in some (but not all) modern processors. + These special-purpose registers can be programmed to count low-level + performance-related events which occur during program execution, + such as cache misses, pipeline stalls, etc. + + You can safely say Y here, even if you intend to run the kernel + on a processor without performance-monitoring counters. + + You can also say M here to compile the driver as a module; the + module will be called `perfctr'. + +config KPERFCTR + bool + depends on PERFCTR + default y + +config PERFCTR_DEBUG + bool "Additional internal consistency checks" + depends on PERFCTR + help + This option enables additional internal consistency checking in + the perfctr driver. The scope of these checks is unspecified and + may vary between different versions of the driver. + + Enabling this option will reduce performance, so say N unless you + are debugging the driver. + +config PERFCTR_INIT_TESTS + bool "Init-time hardware tests" + depends on PERFCTR + help + This option makes the driver perform additional hardware tests + during initialisation, and log their results in the kernel's + message buffer. For most supported processors, these tests simply + measure the runtime overheads of performance counter operations. + + If you have a less well-known processor (one not listed in the + etc/costs/ directory in the user-space package), you should enable + this option and email the results to the perfctr developers. + + If unsure, say N. + +config PERFCTR_VIRTUAL + bool "Virtual performance counters support" + depends on PERFCTR + help + The processor's performance-monitoring counters are special-purpose + global registers. This option adds support for virtual per-process + performance-monitoring counters which only run when the process + to which they belong is executing. This improves the accuracy of + performance measurements by reducing "noise" from other processes. + + Say Y. + +config PERFCTR_GLOBAL + bool "Global performance counters support" + depends on PERFCTR + help + This option adds driver support for global-mode (system-wide) + performance-monitoring counters. In this mode, the driver allows + each performance-monitoring counter on each processor to be + controlled and read. The driver provides a sampling timer to + maintain 64-bit accumulated event counts. + + Global-mode performance counters cannot be used if some process + is currently using virtual-mode performance counters, and vice versa. + + Say Y. +endmenu Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/marshal.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/marshal.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,104 @@ +/* $Id: marshal.h,v 1.1 2003/08/19 13:37:07 mikpe Exp $ + * Performance-monitoring counters driver. + * Structure marshalling support. + * + * Copyright (C) 2003 Mikael Pettersson + */ + +/* + * Each encoded datum starts with a 32-bit header word, containing + * the datum's type (1 bit: UINT32 or UINT64), the target's field + * tag (16 bits), and the target field's array index (15 bits). + * + * After the header follows the datum's value, in one (for UINT32) + * or two (for UINT64) words. Multi-word values are emitted in + * native word order. + * + * To encode a struct, encode each field with a non-zero value, + * and place the encodings in sequence. The field order is arbitrary. + * + * To decode an encoded struct, first memset() the target struct + * to zero. Then decode each encoded field in the sequence and + * update the corresponding field in the target struct. + */ +#define PERFCTR_HEADER(TYPE,TAG,ITEMNR) (((TAG)<<16)|((ITEMNR)<<1)|(TYPE)) +#define PERFCTR_HEADER_TYPE(H) ((H) & 0x1) +#define PERFCTR_HEADER_ITEMNR(H) (((H) >> 1) & 0x7FFF) +#define PERFCTR_HEADER_TAG(H) ((H) >> 16) + +#define PERFCTR_HEADER_UINT32 0 +#define PERFCTR_HEADER_UINT64 1 + +/* + * A field descriptor describes a struct field to the + * encoding and decoding procedures. + * + * To keep the descriptors small, field tags and array sizes + * are currently restricted to 8 and 7 bits, respectively. + * This does not change the encoded format. + */ +struct perfctr_field_desc { + unsigned short offset; /* offsetof() for this field */ + unsigned char tag; /* identifying tag in encoded format */ + unsigned char type; /* base type (1 bit), array size - 1 (7 bits) */ +}; + +#define PERFCTR_TYPE_ARRAY(N,T) ((((N) - 1) << 1) | (T)) +#define PERFCTR_TYPE_BASE(T) ((T) & 0x1) +#define PERFCTR_TYPE_NRITEMS(T) (((T) >> 1) + 1) + +#define PERFCTR_TYPE_BYTES4 0 /* uint32 or char[4] */ +#define PERFCTR_TYPE_UINT64 1 /* long long */ + +struct perfctr_struct_desc { + unsigned short total_sizeof; /* for buffer allocation and decode memset() */ + unsigned short total_nrfields; /* for buffer allocation */ + unsigned short nrfields; + unsigned short nrsubs; + /* Note: the fields must be in ascending tag order */ + const struct perfctr_field_desc *fields; + const struct perfctr_sub_struct_desc { + unsigned short offset; + const struct perfctr_struct_desc *sdesc; + } *subs; +}; + +struct perfctr_marshal_stream { + unsigned int size; + unsigned int *buffer; + unsigned int pos; + unsigned int error; +}; + +extern void perfctr_encode_struct(const void *address, + const struct perfctr_struct_desc *sdesc, + struct perfctr_marshal_stream *stream); + +extern int perfctr_decode_struct(void *address, + const struct perfctr_struct_desc *sdesc, + struct perfctr_marshal_stream *stream); + +extern const struct perfctr_struct_desc perfctr_sum_ctrs_sdesc; +extern const struct perfctr_struct_desc perfctr_cpu_control_sdesc; +extern const struct perfctr_struct_desc perfctr_info_sdesc; +extern const struct perfctr_struct_desc vperfctr_control_sdesc; +extern const struct perfctr_struct_desc gperfctr_cpu_control_sdesc; +extern const struct perfctr_struct_desc gperfctr_cpu_state_only_cpu_sdesc; +extern const struct perfctr_struct_desc gperfctr_cpu_state_sdesc; + +#ifdef __KERNEL__ +extern int perfctr_copy_to_user(struct perfctr_struct_buf *argp, + void *struct_address, + const struct perfctr_struct_desc *sdesc); +extern int perfctr_copy_from_user(void *struct_address, + struct perfctr_struct_buf *argp, + const struct perfctr_struct_desc *sdesc); +#else +extern int perfctr_ioctl_w(int fd, unsigned int cmd, const void *arg, + const struct perfctr_struct_desc *sdesc); +extern int perfctr_ioctl_r(int fd, unsigned int cmd, void *res, + const struct perfctr_struct_desc *sdesc); +extern int perfctr_ioctl_wr(int fd, unsigned int cmd, void *argres, + const struct perfctr_struct_desc *arg_sdesc, + const struct perfctr_struct_desc *res_sdesc); +#endif /* __KERNEL__ */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual_stub.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/virtual_stub.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual_stub.c 2004-11-18 23:52:29.000000000 -0500 @@ -0,0 +1,67 @@ +/* $Id: virtual_stub.c,v 1.26 2003/10/04 22:53:42 mikpe Exp $ + * Kernel stub used to support virtual perfctrs when the + * perfctr driver is built as a module. + * + * Copyright (C) 2000-2003 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include "compat.h" + +static void bug_void_perfctr(struct vperfctr *perfctr) +{ + current->thread.perfctr = NULL; + BUG(); +} + +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED +static void bug_set_cpus_allowed(struct task_struct *owner, struct vperfctr *perfctr, cpumask_t new_mask) +{ + owner->thread.perfctr = NULL; + BUG(); +} +#endif + +struct vperfctr_stub vperfctr_stub = { + .exit = bug_void_perfctr, + .suspend = bug_void_perfctr, + .resume = bug_void_perfctr, + .sample = bug_void_perfctr, +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + .set_cpus_allowed = bug_set_cpus_allowed, +#endif +}; + +/* + * exit_thread() calls __vperfctr_exit() via vperfctr_stub.exit(). + * If the process' reference was the last reference to this + * vperfctr object, and this was the last live vperfctr object, + * then the perfctr module's use count will drop to zero. + * This is Ok, except for the fact that code is still running + * in the module (pending returns back to exit_thread()). This + * could race with rmmod in a preemptive UP kernel, leading to + * code running in freed memory. The race also exists in SMP + * kernels, but the time window is extremely small. + * + * Since exit() isn't performance-critical, we wrap the call to + * vperfctr_stub.exit() with code to increment the module's use + * count before the call, and decrement it again afterwards. Thus, + * the final drop to zero occurs here and not in the module itself. + * (All other code paths that drop the use count do so via a file + * object, and VFS in 2.4+ kernels also refcount the module.) + */ +void _vperfctr_exit(struct vperfctr *perfctr) +{ + __module_get(vperfctr_stub.owner); + vperfctr_stub.exit(perfctr); + module_put(vperfctr_stub.owner); +} + +EXPORT_SYMBOL(vperfctr_stub); + +#include /* for 2.4.15 and up, except 2.4.20-8-redhat */ +#include /* for 2.5.32 and up, and 2.4.20-8-redhat */ +EXPORT_SYMBOL(ptrace_check_attach); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_tests.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,292 @@ +/* $Id: ppc_tests.c,v 1.1.2.3 2004/07/27 16:42:03 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional PPC32-specific init-time tests. + * + * Copyright (C) 2004 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include /* for tb_ticks_per_jiffy */ +#include "compat.h" +#include "ppc_compat.h" +#include "ppc_tests.h" + +#define NITER 256 +#define X2(S) S"; "S +#define X8(S) X2(X2(X2(S))) + +static void __init do_read_tbl(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mftbl %0") : "=r"(dummy)); +} + +static void __init do_read_pmc1(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC1)) : "=r"(dummy)); +} + +static void __init do_read_pmc2(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC2)) : "=r"(dummy)); +} + +static void __init do_read_pmc3(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC3)) : "=r"(dummy)); +} + +static void __init do_read_pmc4(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_PMC4)) : "=r"(dummy)); +} + +static void __init do_read_mmcr0(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_MMCR0)) : "=r"(dummy)); +} + +static void __init do_read_mmcr1(unsigned int unused) +{ + unsigned int i, dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mfspr %0," __stringify(SPRN_MMCR1)) : "=r"(dummy)); +} + +static void __init do_write_pmc2(unsigned int arg) +{ + unsigned int i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mtspr " __stringify(SPRN_PMC2) ",%0") : : "r"(arg)); +} + +static void __init do_write_pmc3(unsigned int arg) +{ + unsigned int i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mtspr " __stringify(SPRN_PMC3) ",%0") : : "r"(arg)); +} + +static void __init do_write_pmc4(unsigned int arg) +{ + unsigned int i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mtspr " __stringify(SPRN_PMC4) ",%0") : : "r"(arg)); +} + +static void __init do_write_mmcr1(unsigned int arg) +{ + unsigned int i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mtspr " __stringify(SPRN_MMCR1) ",%0") : : "r"(arg)); +} + +static void __init do_write_mmcr0(unsigned int arg) +{ + unsigned int i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("mtspr " __stringify(SPRN_MMCR0) ",%0") : : "r"(arg)); +} + +static void __init do_empty_loop(unsigned int unused) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__("" : : ); +} + +static unsigned __init run(void (*doit)(unsigned int), unsigned int arg) +{ + unsigned int start, stop; + start = mfspr(SPRN_PMC1); + (*doit)(arg); /* should take < 2^32 cycles to complete */ + stop = mfspr(SPRN_PMC1); + return stop - start; +} + +static void __init init_tests_message(void) +{ + unsigned int pvr = mfspr(SPRN_PVR); + printk(KERN_INFO "Please email the following PERFCTR INIT lines " + "to mikpe@csd.uu.se\n" + KERN_INFO "To remove this message, rebuild the driver " + "with CONFIG_PERFCTR_INIT_TESTS=n\n"); + printk(KERN_INFO "PERFCTR INIT: PVR 0x%08x, CPU clock %u kHz, TB clock %u kHz\n", + pvr, + perfctr_info.cpu_khz, + tb_ticks_per_jiffy*(HZ/10)/(1000/10)); +} + +static void __init clear(int have_mmcr1) +{ + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_PMC1, 0); + mtspr(SPRN_PMC2, 0); + if (have_mmcr1) { + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_PMC3, 0); + mtspr(SPRN_PMC4, 0); + } +} + +static void __init check_fcece(unsigned int pmc1ce) +{ + unsigned int mmcr0; + + /* + * This test checks if MMCR0[FC] is set after PMC1 overflows + * when MMCR0[FCECE] is set. + * 74xx documentation states this behaviour, while documentation + * for 604/750 processors doesn't mention this at all. + * + * Also output the value of PMC1 shortly after the overflow. + * This tells us if PMC1 really was frozen. On 604/750, it may not + * freeze since we don't enable PMIs. [No freeze confirmed on 750.] + * + * When pmc1ce == 0, MMCR0[PMC1CE] is zero. It's unclear whether + * this masks all PMC1 overflow events or just PMC1 PMIs. + * + * PMC1 counts processor cycles, with 100 to go before overflowing. + * FCECE is set. + * PMC1CE is clear if !pmc1ce, otherwise set. + */ + mtspr(SPRN_PMC1, 0x80000000-100); + mmcr0 = (1<<(31-6)) | (0x01 << 6); + if (pmc1ce) + mmcr0 |= (1<<(31-16)); + mtspr(SPRN_MMCR0, mmcr0); + do { + do_empty_loop(0); + } while (!(mfspr(SPRN_PMC1) & 0x80000000)); + do_empty_loop(0); + printk(KERN_INFO "PERFCTR INIT: %s(%u): MMCR0[FC] is %u, PMC1 is %#x\n", + __FUNCTION__, pmc1ce, + !!(mfspr(SPRN_MMCR0) & (1<<(31-0))), mfspr(SPRN_PMC1)); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_PMC1, 0); +} + +static void __init check_trigger(unsigned int pmc1ce) +{ + unsigned int mmcr0; + + /* + * This test checks if MMCR0[TRIGGER] is reset after PMC1 overflows. + * 74xx documentation states this behaviour, while documentation + * for 604/750 processors doesn't mention this at all. + * [No reset confirmed on 750.] + * + * Also output the values of PMC1 and PMC2 shortly after the overflow. + * PMC2 should be equal to PMC1-0x80000000. + * + * When pmc1ce == 0, MMCR0[PMC1CE] is zero. It's unclear whether + * this masks all PMC1 overflow events or just PMC1 PMIs. + * + * PMC1 counts processor cycles, with 100 to go before overflowing. + * PMC2 counts processor cycles, starting from 0. + * TRIGGER is set, so PMC2 doesn't start until PMC1 overflows. + * PMC1CE is clear if !pmc1ce, otherwise set. + */ + mtspr(SPRN_PMC2, 0); + mtspr(SPRN_PMC1, 0x80000000-100); + mmcr0 = (1<<(31-18)) | (0x01 << 6) | (0x01 << 0); + if (pmc1ce) + mmcr0 |= (1<<(31-16)); + mtspr(SPRN_MMCR0, mmcr0); + do { + do_empty_loop(0); + } while (!(mfspr(SPRN_PMC1) & 0x80000000)); + do_empty_loop(0); + printk(KERN_INFO "PERFCTR INIT: %s(%u): MMCR0[TRIGGER] is %u, PMC1 is %#x, PMC2 is %#x\n", + __FUNCTION__, pmc1ce, + !!(mfspr(SPRN_MMCR0) & (1<<(31-18))), mfspr(SPRN_PMC1), mfspr(SPRN_PMC2)); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_PMC1, 0); + mtspr(SPRN_PMC2, 0); +} + +static void __init +measure_overheads(int have_mmcr1) +{ + int i; + unsigned int mmcr0, loop, ticks[12]; + const char *name[12]; + + clear(have_mmcr1); + + /* PMC1 = "processor cycles", + PMC2 = "completed instructions", + not disabled in any mode, + no interrupts */ + mmcr0 = (0x01 << 6) | (0x02 << 0); + mtspr(SPRN_MMCR0, mmcr0); + + name[0] = "mftbl"; + ticks[0] = run(do_read_tbl, 0); + name[1] = "mfspr (pmc1)"; + ticks[1] = run(do_read_pmc1, 0); + name[2] = "mfspr (pmc2)"; + ticks[2] = run(do_read_pmc2, 0); + name[3] = "mfspr (pmc3)"; + ticks[3] = have_mmcr1 ? run(do_read_pmc3, 0) : 0; + name[4] = "mfspr (pmc4)"; + ticks[4] = have_mmcr1 ? run(do_read_pmc4, 0) : 0; + name[5] = "mfspr (mmcr0)"; + ticks[5] = run(do_read_mmcr0, 0); + name[6] = "mfspr (mmcr1)"; + ticks[6] = have_mmcr1 ? run(do_read_mmcr1, 0) : 0; + name[7] = "mtspr (pmc2)"; + ticks[7] = run(do_write_pmc2, 0); + name[8] = "mtspr (pmc3)"; + ticks[8] = have_mmcr1 ? run(do_write_pmc3, 0) : 0; + name[9] = "mtspr (pmc4)"; + ticks[9] = have_mmcr1 ? run(do_write_pmc4, 0) : 0; + name[10] = "mtspr (mmcr1)"; + ticks[10] = have_mmcr1 ? run(do_write_mmcr1, 0) : 0; + name[11] = "mtspr (mmcr0)"; + ticks[11] = run(do_write_mmcr0, mmcr0); + + loop = run(do_empty_loop, 0); + + clear(have_mmcr1); + + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); + printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); + for(i = 0; i < ARRAY_SIZE(ticks); ++i) { + unsigned int x; + if (!ticks[i]) + continue; + x = ((ticks[i] - loop) * 10) / NITER; + printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", + name[i], x/10, x%10, ticks[i]); + } + check_fcece(0); + check_fcece(1); + check_trigger(0); + check_trigger(1); +} + +void __init perfctr_ppc_init_tests(int have_mmcr1) +{ + preempt_disable(); + measure_overheads(have_mmcr1); + preempt_enable(); +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_setup.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_setup.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_setup.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,116 @@ +/* $Id: x86_setup.c,v 1.47.2.2 2004/08/02 19:38:51 mikpe Exp $ + * Performance-monitoring counters driver. + * x86/x86_64-specific kernel-resident code. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "x86_compat.h" +#include "compat.h" + +/* XXX: belongs to a virtual_compat.c file */ +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED && defined(CONFIG_PERFCTR_VIRTUAL) && LINUX_VERSION_CODE < KERNEL_VERSION(2,4,21) && !defined(HAVE_SET_CPUS_ALLOWED) +/** + * set_cpus_allowed() - change a given task's processor affinity + * @p: task to bind + * @new_mask: bitmask of allowed processors + * + * Upon return, the task is running on a legal processor. Note the caller + * must have a valid reference to the task: it must not exit() prematurely. + * This call can sleep; do not hold locks on call. + */ +void set_cpus_allowed(struct task_struct *p, unsigned long new_mask) +{ + new_mask &= cpu_online_map; + BUG_ON(!new_mask); + + /* This must be our own, safe, call from sys_vperfctr_control(). */ + + p->cpus_allowed = new_mask; + + /* + * If the task is on a no-longer-allowed processor, we need to move + * it. If the task is not current, then set need_resched and send + * its processor an IPI to reschedule. + */ + if (!(p->cpus_runnable & p->cpus_allowed)) { + if (p != current) { + p->need_resched = 1; + smp_send_reschedule(p->processor); + } + /* + * Wait until we are on a legal processor. If the task is + * current, then we should be on a legal processor the next + * time we reschedule. Otherwise, we need to wait for the IPI. + */ + while (!(p->cpus_runnable & p->cpus_allowed)) + schedule(); + } +} +EXPORT_SYMBOL(set_cpus_allowed); +#endif + +#ifdef CONFIG_X86_LOCAL_APIC +static void perfctr_default_ihandler(unsigned long pc) +{ +} + +static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; + +asmlinkage void smp_perfctr_interrupt(struct pt_regs *regs) +{ + /* PREEMPT note: invoked via an interrupt gate, which + masks interrupts. We're still on the originating CPU. */ + /* XXX: recursive interrupts? delay the ACK, mask LVTPC, or queue? */ + ack_APIC_irq(); + irq_enter(); + (*perfctr_ihandler)(instruction_pointer(regs)); + irq_exit(); +} + +void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) +{ + perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; +} +#endif + +#ifdef __x86_64__ +extern unsigned int cpu_khz; +#else +extern unsigned long cpu_khz; +#endif + +/* Wrapper to avoid namespace clash in RedHat 8.0's 2.4.18-14 kernel. */ +unsigned int perfctr_cpu_khz(void) +{ + return cpu_khz; +} + +#ifdef CONFIG_PERFCTR_MODULE +EXPORT_SYMBOL_mmu_cr4_features; +EXPORT_SYMBOL(perfctr_cpu_khz); + +#ifdef CONFIG_X86_LOCAL_APIC +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) +EXPORT_SYMBOL(nmi_perfctr_msr); +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,67) && defined(CONFIG_PM) +EXPORT_SYMBOL(apic_pm_register); +EXPORT_SYMBOL(apic_pm_unregister); +EXPORT_SYMBOL(nmi_pmdev); +#endif + +EXPORT_SYMBOL(perfctr_cpu_set_ihandler); +#endif /* CONFIG_X86_LOCAL_APIC */ + +#endif /* MODULE */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/global.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/global.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,17 @@ +/* $Id: global.h,v 1.7 2003/10/02 20:04:35 mikpe Exp $ + * Global-mode performance-monitoring counters. + * + * Copyright (C) 2000-2003 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_GLOBAL +extern int gperfctr_ioctl(struct inode*, struct file*, unsigned int, unsigned long); +extern void gperfctr_init(void); +#else +extern int gperfctr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -EINVAL; +} +static inline void gperfctr_init(void) { } +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/cpumask.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/cpumask.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/cpumask.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,81 @@ +/* $Id: cpumask.h,v 1.6.2.1 2004/07/12 21:09:45 mikpe Exp $ + * Performance-monitoring counters driver. + * Partial simulation of cpumask_t on non-cpumask_t kernels. + * Extension to allow inspecting a cpumask_t as array of ulong. + * Appropriate definition of perfctr_cpus_forbidden_mask. + * + * Copyright (C) 2003-2004 Mikael Pettersson + */ + +/* 2.6.0-test4 changed set-of-CPUs values from ulong to cpumask_t */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + +#if !defined(PERFCTR_HAVE_CPUMASK_T) && !defined(HAVE_CPUMASK_T) +typedef unsigned long cpumask_t; +#endif + +/* RH/FC1 kernel 2.4.22-1.2115.nptl added cpumask_t, but with + an incomplete API and a broken cpus_and() [misspelled parameter + in its body]. Sigh. + Assume cpumask_t is unsigned long and use our own code. */ +#undef cpu_set +#define cpu_set(cpu, map) atomic_set_mask((1UL << (cpu)), &(map)) +#undef cpu_isset +#define cpu_isset(cpu, map) ((map) & (1UL << (cpu))) +#undef cpus_and +#define cpus_and(dst,src1,src2) do { (dst) = (src1) & (src2); } while(0) +#undef cpus_clear +#define cpus_clear(map) do { (map) = 0UL; } while(0) +#undef cpus_complement +#define cpus_complement(map) do { (map) = ~(map); } while(0) +#undef cpus_empty +#define cpus_empty(map) ((map) == 0UL) +#undef cpus_equal +#define cpus_equal(map1, map2) ((map1) == (map2)) +#undef cpus_addr +#define cpus_addr(map) (&(map)) + +#undef CPU_MASK_NONE +#define CPU_MASK_NONE 0UL + +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,1) + +/* 2.6.1-rc1 introduced cpus_addr() */ +#ifdef CPU_ARRAY_SIZE +#define cpus_addr(map) ((map).mask) +#else +#define cpus_addr(map) (&(map)) +#endif + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) && !defined(cpus_andnot) +#define cpus_andnot(dst, src1, src2) \ +do { \ + cpumask_t _tmp2; \ + _tmp2 = (src2); \ + cpus_complement(_tmp2); \ + cpus_and((dst), (src1), _tmp2); \ +} while(0) +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,8) && !defined(CONFIG_SMP) +#undef cpu_online_map +#define cpu_online_map cpumask_of_cpu(0) +#endif + +#ifdef CPU_ARRAY_SIZE +#define PERFCTR_CPUMASK_NRLONGS CPU_ARRAY_SIZE +#else +#define PERFCTR_CPUMASK_NRLONGS 1 +#endif + +/* `perfctr_cpus_forbidden_mask' used to be defined in , + but cpumask_t compatibility issues forced it to be moved here. */ +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED +extern cpumask_t perfctr_cpus_forbidden_mask; +#define perfctr_cpu_is_forbidden(cpu) cpu_isset((cpu), perfctr_cpus_forbidden_mask) +#else +#define perfctr_cpus_forbidden_mask CPU_MASK_NONE +#define perfctr_cpu_is_forbidden(cpu) 0 /* cpu_isset() needs an lvalue :-( */ +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_compat.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_compat.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_compat.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,41 @@ +/* $Id: x86_compat.h,v 1.33 2004/02/29 16:03:03 mikpe Exp $ + * Performance-monitoring counters driver. + * x86/x86_64-specific compatibility definitions for 2.4/2.6 kernels. + * + * Copyright (C) 2000-2004 Mikael Pettersson + */ +#include +#include + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,18) + +/* missing from */ +#define cpu_has_msr boot_cpu_has(X86_FEATURE_MSR) + +#else /* 2.4 */ + +/* missing from */ +#ifndef cpu_has_mmx /* added in 2.4.22-pre3 */ +#define cpu_has_mmx (test_bit(X86_FEATURE_MMX, boot_cpu_data.x86_capability)) +#endif +#define cpu_has_msr (test_bit(X86_FEATURE_MSR, boot_cpu_data.x86_capability)) +#ifndef cpu_has_ht /* added in 2.4.22-pre3 */ +#define cpu_has_ht (test_bit(28, boot_cpu_data.x86_capability)) +#endif + +#endif /* 2.4 */ + +/* irq_enter() and irq_exit() take two parameters in 2.4. However, + we only use them to disable preemption in the interrupt handler, + which isn't needed in non-preemptive 2.4 kernels. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +#ifdef CONFIG_PREEMPT +#error "not yet ported to 2.4+PREEMPT" +#endif +#undef irq_enter +#undef irq_exit +#define irq_enter() do{}while(0) +#define irq_exit() do{}while(0) +#endif + +extern unsigned int perfctr_cpu_khz(void); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile24 =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/Makefile24 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/Makefile24 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,39 @@ +# $Id: Makefile24,v 1.7.2.1 2004/08/02 22:24:58 mikpe Exp $ +# Performance-monitoring counters driver Makefile for 2.4 kernels. + +# construct various object file lists: +# kernel-objs-y kernel objects exporting symbols +# y-objs-y kernel objects not exporting symbols +# m-objs-m perfctr.o if driver is module, empty otherwise +# driver-objs-y objects for perfctr.o module, or empty + +# This also covers x86_64. +driver-objs-$(CONFIG_X86) := x86.o +tests-objs-$(CONFIG_X86) := x86_tests.o +kernel-objs-$(CONFIG_X86) := x86_setup.o + +driver-objs-$(CONFIG_PPC32) := ppc.o +tests-objs-$(CONFIG_PPC32) := ppc_tests.o +kernel-objs-$(CONFIG_PPC32) := ppc_setup.o + +driver-objs-y += init.o marshal.o +driver-objs-$(CONFIG_PERFCTR_INIT_TESTS) += $(tests-objs-y) +driver-objs-$(CONFIG_PERFCTR_VIRTUAL) += virtual.o +stub-objs-$(CONFIG_PERFCTR)-$(CONFIG_PERFCTR_VIRTUAL) := virtual_stub.o +driver-objs-$(CONFIG_PERFCTR_GLOBAL) += global.o +m-objs-$(CONFIG_PERFCTR) := perfctr.o +y-objs-$(CONFIG_PERFCTR) := $(driver-objs-y) +kernel-objs-y += $(stub-objs-m-y) + +perfctr-objs := $(driver-objs-y) +obj-m += $(m-objs-m) + +export-objs := $(kernel-objs-y) +O_TARGET := kperfctr.o +obj-y := $(kernel-objs-y) $(y-objs-y) +list-multi := perfctr.o + +include $(TOPDIR)/Rules.make + +perfctr.o: $(perfctr-objs) + $(LD) -r -o $@ $(perfctr-objs) Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_tests.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,310 @@ +/* $Id: x86_tests.c,v 1.23.2.5 2004/08/02 22:24:58 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional x86/x86_64-specific init-time tests. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#undef MSR_P6_PERFCTR0 +#undef MSR_P4_IQ_CCCR0 +#undef MSR_P4_CRU_ESCR0 +#include +#include +#include "x86_compat.h" +#include "x86_tests.h" + +#define MSR_P5_CESR 0x11 +#define MSR_P5_CTR0 0x12 +#define P5_CESR_VAL (0x16 | (3<<6)) +#define MSR_P6_PERFCTR0 0xC1 +#define MSR_P6_EVNTSEL0 0x186 +#define P6_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) +#define MSR_K7_EVNTSEL0 0xC0010000 +#define MSR_K7_PERFCTR0 0xC0010004 +#define K7_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) +#define VC3_EVNTSEL1_VAL 0xC0 +#define MSR_P4_IQ_COUNTER0 0x30C +#define MSR_P4_IQ_CCCR0 0x36C +#define MSR_P4_CRU_ESCR0 0x3B8 +#define P4_CRU_ESCR0_VAL ((2<<25) | (1<<9) | (0x3<<2)) +#define P4_IQ_CCCR0_VAL ((0x3<<16) | (4<<13) | (1<<12)) + +#define NITER 64 +#define X2(S) S";"S +#define X8(S) X2(X2(X2(S))) + +#ifdef __x86_64__ +#define CR4MOV "movq" +#else +#define CR4MOV "movl" +#endif + +#ifndef CONFIG_X86_LOCAL_APIC +#undef apic_write +#define apic_write(reg,vector) do{}while(0) +#endif + +#if !defined(__x86_64__) +/* Avoid speculative execution by the CPU */ +extern inline void sync_core(void) +{ + int tmp; + asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); +} +#endif + +static void __init do_rdpmc(unsigned pmc, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdpmc") : : "c"(pmc) : "eax", "edx"); +} + +static void __init do_rdmsr(unsigned msr, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdmsr") : : "c"(msr) : "eax", "edx"); +} + +static void __init do_wrmsr(unsigned msr, unsigned data) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("wrmsr") : : "c"(msr), "a"(data), "d"(0)); +} + +static void __init do_rdcr4(unsigned unused1, unsigned unused2) +{ + unsigned i; + unsigned long dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8(CR4MOV" %%cr4,%0") : "=r"(dummy)); +} + +static void __init do_wrcr4(unsigned cr4, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8(CR4MOV" %0,%%cr4") : : "r"((long)cr4)); +} + +static void __init do_rdtsc(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdtsc") : : : "eax", "edx"); +} + +static void __init do_wrlvtpc(unsigned val, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) { + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + } +} + +static void __init do_sync_core(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) { + sync_core(); + sync_core(); + sync_core(); + sync_core(); + sync_core(); + sync_core(); + sync_core(); + sync_core(); + } +} + +static void __init do_empty_loop(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__("" : : "c"(0)); +} + +static unsigned __init run(void (*doit)(unsigned, unsigned), + unsigned arg1, unsigned arg2) +{ + unsigned start, dummy, stop; + sync_core(); + rdtsc(start, dummy); + (*doit)(arg1, arg2); /* should take < 2^32 cycles to complete */ + sync_core(); + rdtsc(stop, dummy); + return stop - start; +} + +static void __init init_tests_message(void) +{ + printk(KERN_INFO "Please email the following PERFCTR INIT lines " + "to mikpe@csd.uu.se\n" + KERN_INFO "To remove this message, rebuild the driver " + "with CONFIG_PERFCTR_INIT_TESTS=n\n"); + printk(KERN_INFO "PERFCTR INIT: vendor %u, family %u, model %u, stepping %u, clock %u kHz\n", + current_cpu_data.x86_vendor, + current_cpu_data.x86, + current_cpu_data.x86_model, + current_cpu_data.x86_mask, + perfctr_cpu_khz()); +} + +static void __init +measure_overheads(unsigned msr_evntsel0, unsigned evntsel0, unsigned msr_perfctr0, + unsigned msr_cccr, unsigned cccr_val) +{ + int i; + unsigned int loop, ticks[13]; + const char *name[13]; + + if (msr_evntsel0) + wrmsr(msr_evntsel0, 0, 0); + if (msr_cccr) + wrmsr(msr_cccr, 0, 0); + + name[0] = "rdtsc"; + ticks[0] = run(do_rdtsc, 0, 0); + name[1] = "rdpmc"; + ticks[1] = (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + ? run(do_rdpmc,1,0) : 0; + name[2] = "rdmsr (counter)"; + ticks[2] = msr_perfctr0 ? run(do_rdmsr, msr_perfctr0, 0) : 0; + name[3] = msr_cccr ? "rdmsr (escr)" : "rdmsr (evntsel)"; + ticks[3] = msr_evntsel0 ? run(do_rdmsr, msr_evntsel0, 0) : 0; + name[4] = "wrmsr (counter)"; + ticks[4] = msr_perfctr0 ? run(do_wrmsr, msr_perfctr0, 0) : 0; + name[5] = msr_cccr ? "wrmsr (escr)" : "wrmsr (evntsel)"; + ticks[5] = msr_evntsel0 ? run(do_wrmsr, msr_evntsel0, evntsel0) : 0; + name[6] = "read cr4"; + ticks[6] = run(do_rdcr4, 0, 0); + name[7] = "write cr4"; + ticks[7] = run(do_wrcr4, read_cr4(), 0); + name[8] = "rdpmc (fast)"; + ticks[8] = msr_cccr ? run(do_rdpmc, 0x80000001, 0) : 0; + name[9] = "rdmsr (cccr)"; + ticks[9] = msr_cccr ? run(do_rdmsr, msr_cccr, 0) : 0; + name[10] = "wrmsr (cccr)"; + ticks[10] = msr_cccr ? run(do_wrmsr, msr_cccr, cccr_val) : 0; + name[11] = "write LVTPC"; + ticks[11] = (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + ? run(do_wrlvtpc, APIC_DM_NMI|APIC_LVT_MASKED, 0) : 0; + name[12] = "sync_core"; + ticks[12] = run(do_sync_core, 0, 0); + + loop = run(do_empty_loop, 0, 0); + + if (msr_evntsel0) + wrmsr(msr_evntsel0, 0, 0); + if (msr_cccr) + wrmsr(msr_cccr, 0, 0); + + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); + printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); + for(i = 0; i < ARRAY_SIZE(ticks); ++i) { + unsigned int x; + if (!ticks[i]) + continue; + x = ((ticks[i] - loop) * 10) / NITER; + printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", + name[i], x/10, x%10, ticks[i]); + } +} + +#ifndef __x86_64__ +static inline void perfctr_p5_init_tests(void) +{ + measure_overheads(MSR_P5_CESR, P5_CESR_VAL, MSR_P5_CTR0, 0, 0); +} + +static inline void perfctr_p6_init_tests(void) +{ + measure_overheads(MSR_P6_EVNTSEL0, P6_EVNTSEL0_VAL, MSR_P6_PERFCTR0, 0, 0); +} + +#if !defined(CONFIG_X86_TSC) +static inline void perfctr_c6_init_tests(void) +{ + unsigned int cesr, dummy; + + rdmsr(MSR_P5_CESR, cesr, dummy); + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: boot CESR == %#08x\n", cesr); +} +#endif + +static inline void perfctr_vc3_init_tests(void) +{ + measure_overheads(MSR_P6_EVNTSEL0+1, VC3_EVNTSEL1_VAL, MSR_P6_PERFCTR0+1, 0, 0); +} +#endif /* !__x86_64__ */ + +static inline void perfctr_p4_init_tests(void) +{ + measure_overheads(MSR_P4_CRU_ESCR0, P4_CRU_ESCR0_VAL, MSR_P4_IQ_COUNTER0, + MSR_P4_IQ_CCCR0, P4_IQ_CCCR0_VAL); +} + +static inline void perfctr_k7_init_tests(void) +{ + measure_overheads(MSR_K7_EVNTSEL0, K7_EVNTSEL0_VAL, MSR_K7_PERFCTR0, 0, 0); +} + +static inline void perfctr_generic_init_tests(void) +{ + measure_overheads(0, 0, 0, 0, 0); +} + +enum perfctr_x86_tests_type perfctr_x86_tests_type __initdata = PTT_UNKNOWN; + +void __init perfctr_x86_init_tests(void) +{ + switch (perfctr_x86_tests_type) { +#ifndef __x86_64__ + case PTT_P5: /* Intel P5, P5MMX; Cyrix 6x86MX, MII, III */ + perfctr_p5_init_tests(); + break; + case PTT_P6: /* Intel PPro, PII, PIII, PENTM */ + perfctr_p6_init_tests(); + break; +#if !defined(CONFIG_X86_TSC) + case PTT_WINCHIP: /* WinChip C6, 2, 3 */ + perfctr_c6_init_tests(); + break; +#endif + case PTT_VC3: /* VIA C3 */ + perfctr_vc3_init_tests(); + break; +#endif /* !__x86_64__ */ + case PTT_P4: /* Intel P4 */ + perfctr_p4_init_tests(); + break; + case PTT_AMD: /* AMD K7, K8 */ + perfctr_k7_init_tests(); + break; + case PTT_GENERIC: + perfctr_generic_init_tests(); + break; + default: + printk(KERN_INFO "%s: unknown CPU type %u\n", + __FUNCTION__, perfctr_x86_tests_type); + break; + } +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,925 @@ +/* $Id: ppc.c,v 1.3.2.8 2004/10/19 15:18:21 mikpe Exp $ + * PPC32 performance-monitoring counters driver. + * + * Copyright (C) 2004 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include /* tb_ticks_per_jiffy, get_tbl() */ + +#include "compat.h" +#include "ppc_compat.h" +#include "ppc_tests.h" + +/* Support for lazy evntsel and perfctr SPR updates. */ +struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ + union { + unsigned int id; /* cache owner id */ + } k1; + /* Physically indexed cache of the MMCRs. */ + unsigned int ppc_mmcr[3]; +} ____cacheline_aligned; +static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; +#define get_cpu_cache() (&per_cpu_cache[smp_processor_id()]) + +/* Structure for counter snapshots, as 32-bit values. */ +struct perfctr_low_ctrs { + unsigned int tsc; + unsigned int pmc[6]; +}; + +enum pm_type { + PM_NONE, + PM_604, + PM_604e, + PM_750, /* XXX: Minor event set diffs between IBM and Moto. */ + PM_7400, + PM_7450, +}; +static enum pm_type pm_type; + +/* Bits users shouldn't set in control.ppc.mmcr0: + * - PMXE because we don't yet support overflow interrupts + * - PMC1SEL/PMC2SEL because event selectors are in control.evntsel[] + */ +#define MMCR0_RESERVED (MMCR0_PMXE | MMCR0_PMC1SEL | MMCR0_PMC2SEL) + +static unsigned int new_id(void) +{ + static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static unsigned int counter; + int id; + + spin_lock(&lock); + id = ++counter; + spin_unlock(&lock); + return id; +} + +#ifndef PERFCTR_INTERRUPT_SUPPORT +#define perfctr_cstatus_has_ictrs(cstatus) 0 +#endif + +#if defined(CONFIG_SMP) && defined(PERFCTR_INTERRUPT_SUPPORT) + +static inline void +set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) +{ + state->k1.isuspend_cpu = cpu; +} + +static inline int +is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) +{ + return state->k1.isuspend_cpu == cpu; +} + +static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) +{ + state->k1.isuspend_cpu = NR_CPUS; +} + +#else +static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { } +static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return 1; } +static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } +#endif + +/* The ppc driver internally uses cstatus & (1<<30) to record that + a context has an asynchronously changing MMCR0. */ +static inline unsigned int perfctr_cstatus_set_mmcr0_quirk(unsigned int cstatus) +{ + return cstatus | (1 << 30); +} + +static inline int perfctr_cstatus_has_mmcr0_quirk(unsigned int cstatus) +{ + return cstatus & (1 << 30); +} + +/**************************************************************** + * * + * Driver procedures. * + * * + ****************************************************************/ + +/* + * The PowerPC 604/750/74xx family. + * + * Common features + * --------------- + * - Per counter event selection data in subfields of control registers. + * MMCR0 contains both global control and PMC1/PMC2 event selectors. + * - Overflow interrupt support is present in all processors, but an + * erratum makes it difficult to use in 750/7400/7410 processors. + * - There is no concept of per-counter qualifiers: + * - User-mode/supervisor-mode restrictions are global. + * - Two groups of counters, PMC1 and PMC2-PMC. Each group + * has a single overflow interrupt/event enable/disable flag. + * - The instructions used to read (mfspr) and write (mtspr) the control + * and counter registers (SPRs) only support hardcoded register numbers. + * There is no support for accessing an SPR via a runtime value. + * - Each counter supports its own unique set of events. However, events + * 0-1 are common for PMC1-PMC4, and events 2-4 are common for PMC1-PMC4. + * - There is no separate high-resolution core clock counter. + * The time-base counter is available, but it typically runs an order of + * magnitude slower than the core clock. + * Any performance counter can be programmed to count core clocks, but + * doing this (a) reserves one PMC, and (b) needs indirect accesses + * since the SPR number in general isn't known at compile-time. + * + * Driver notes + * ------------ + * - The driver currently does not support performance monitor interrupts, + * mostly because of the 750/7400/7410 erratum. Working around it would + * require disabling the decrementer interrupt, reserving a performance + * counter and setting it up for TBL bit-flip events, and having the PMI + * handler invoke the decrementer handler. + * + * 604 + * --- + * 604 has MMCR0, PMC1, PMC2, SIA, and SDA. + * + * MMCR0[THRESHOLD] is not automatically multiplied. + * + * On the 604, software must always reset MMCR0[ENINT] after + * taking a PMI. This is not the case for the 604e. + * + * 604e + * ---- + * 604e adds MMCR1, PMC3, and PMC4. + * Bus-to-core multiplier is available via HID1[PLL_CFG]. + * + * MMCR0[THRESHOLD] is automatically multiplied by 4. + * + * When the 604e vectors to the PMI handler, it automatically + * clears any pending PMIs. Unlike the 604, the 604e does not + * require MMCR0[ENINT] to be cleared (and possibly reset) + * before external interrupts can be re-enabled. + * + * 750 + * --- + * 750 adds user-readable MMCRn/PMCn/SIA registers, and removes SDA. + * + * MMCR0[THRESHOLD] is not automatically multiplied. + * + * Motorola MPC750UM.pdf, page C-78, states: "The performance monitor + * of the MPC755 functions the same as that of the MPC750, (...), except + * that for both the MPC750 and MPC755, no combination of the thermal + * assist unit, the decrementer register, and the performance monitor + * can be used at any one time. If exceptions for any two of these + * functional blocks are enabled together, multiple exceptions caused + * by any of these three blocks cause unpredictable results." + * + * IBM 750CXe_Err_DD2X.pdf, Erratum #13, states that a PMI which + * occurs immediately after a delayed decrementer exception can + * corrupt SRR0, causing the processor to hang. It also states that + * PMIs via TB bit transitions can be used to simulate the decrementer. + * + * 750FX adds dual-PLL support and programmable core frequency switching. + * + * 74xx + * ---- + * 7400 adds MMCR2 and BAMR. + * + * MMCR0[THRESHOLD] is multiplied by 2 or 32, as specified + * by MMCR2[THRESHMULT]. + * + * 74xx changes the semantics of several MMCR0 control bits, + * compared to 604/750. + * + * PPC7410 Erratum No. 10: Like the MPC750 TAU/DECR/PMI erratum. + * Erratum No. 14 marks TAU as unsupported in 7410, but this leaves + * perfmon and decrementer interrupts as being mutually exclusive. + * Affects PPC7410 1.0-1.2 (PVR 0x800C1100-0x800C1102). 1.3 and up + * (PVR 0x800C1103 up) are Ok. + * + * 7450 adds PMC5 and PMC6. + * + * 7455/7445 V3.3 (PVR 80010303) and later use the 7457 PLL table, + * earlier revisions use the 7450 PLL table + */ + +static inline unsigned int read_pmc(unsigned int pmc) +{ + switch (pmc) { + default: /* impossible, but silences gcc warning */ + case 0: + return mfspr(SPRN_PMC1); + case 1: + return mfspr(SPRN_PMC2); + case 2: + return mfspr(SPRN_PMC3); + case 3: + return mfspr(SPRN_PMC4); + case 4: + return mfspr(SPRN_PMC5); + case 5: + return mfspr(SPRN_PMC6); + } +} + +static void ppc_read_counters(struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + ctrs->tsc = get_tbl(); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc = state->pmc[i].map; + ctrs->pmc[i] = read_pmc(pmc); + } +} + +static unsigned int pmc_max_event(unsigned int pmc) +{ + switch (pmc) { + default: /* impossible, but silences gcc warning */ + case 0: + return 127; + case 1: + return 63; + case 2: + return 31; + case 3: + return 31; + case 4: + return 31; + case 5: + return 63; + } +} + +static unsigned int get_nr_pmcs(void) +{ + switch (pm_type) { + case PM_7450: + return 6; + case PM_7400: + case PM_750: + case PM_604e: + return 4; + case PM_604: + return 2; + default: /* PM_NONE, but silences gcc warning */ + return 0; + } +} + +static int ppc_check_control(struct perfctr_cpu_state *state) +{ + unsigned int i, nrctrs, pmc_mask, pmc; + unsigned int nr_pmcs, evntsel[6]; + + nr_pmcs = get_nr_pmcs(); + nrctrs = state->control.nractrs; + if (state->control.nrictrs || nrctrs > nr_pmcs) + return -EINVAL; + + pmc_mask = 0; + memset(evntsel, 0, sizeof evntsel); + for(i = 0; i < nrctrs; ++i) { + pmc = state->control.pmc_map[i]; + state->pmc[i].map = pmc; + if (pmc >= nr_pmcs || (pmc_mask & (1<control.evntsel[i]; + if (evntsel[pmc] > pmc_max_event(pmc)) + return -EINVAL; + } + + switch (pm_type) { + case PM_7450: + case PM_7400: + if (state->control.ppc.mmcr2 & MMCR2_RESERVED) + return -EINVAL; + state->ppc_mmcr[2] = state->control.ppc.mmcr2; + break; + default: + if (state->control.ppc.mmcr2) + return -EINVAL; + state->ppc_mmcr[2] = 0; + } + + if (state->control.ppc.mmcr0 & MMCR0_RESERVED) + return -EINVAL; + state->ppc_mmcr[0] = (state->control.ppc.mmcr0 + | (evntsel[0] << (31-25)) + | (evntsel[1] << (31-31))); + + state->ppc_mmcr[1] = (( evntsel[2] << (31-4)) + | (evntsel[3] << (31-9)) + | (evntsel[4] << (31-14)) + | (evntsel[5] << (31-20))); + + state->k1.id = new_id(); + + /* + * MMCR0[FC] and MMCR0[TRIGGER] may change on 74xx if FCECE or + * TRIGGER is set. At suspends we must read MMCR0 back into + * the state and the cache and then freeze the counters, and + * at resumes we must unfreeze the counters and reload MMCR0. + */ + switch (pm_type) { + case PM_7450: + case PM_7400: + if (state->ppc_mmcr[0] & (MMCR0_FCECE | MMCR0_TRIGGER)) + state->cstatus = perfctr_cstatus_set_mmcr0_quirk(state->cstatus); + default: + ; + } + + return 0; +} + +#ifdef PERFCTR_INTERRUPT_SUPPORT +static void ppc_isuspend(struct perfctr_cpu_state *state) +{ + // XXX +} + +static void ppc_iresume(const struct perfctr_cpu_state *state) +{ + // XXX +} +#endif + +static void ppc_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int value; + + cache = get_cpu_cache(); + if (cache->k1.id == state->k1.id) + return; + /* + * Order matters here: update threshmult and event + * selectors before updating global control, which + * potentially enables PMIs. + * + * Since mtspr doesn't accept a runtime value for the + * SPR number, unroll the loop so each mtspr targets + * a constant SPR. + * + * For processors without MMCR2, we ensure that the + * cache and the state indicate the same value for it, + * preventing any actual mtspr to it. Ditto for MMCR1. + */ + value = state->ppc_mmcr[2]; + if (value != cache->ppc_mmcr[2]) { + cache->ppc_mmcr[2] = value; + mtspr(SPRN_MMCR2, value); + } + value = state->ppc_mmcr[1]; + if (value != cache->ppc_mmcr[1]) { + cache->ppc_mmcr[1] = value; + mtspr(SPRN_MMCR1, value); + } + value = state->ppc_mmcr[0]; + if (value != cache->ppc_mmcr[0]) { + cache->ppc_mmcr[0] = value; + mtspr(SPRN_MMCR0, value); + } + cache->k1.id = state->k1.id; +} + +static void ppc_clear_counters(void) +{ + switch (pm_type) { + case PM_7450: + case PM_7400: + mtspr(SPRN_MMCR2, 0); + mtspr(SPRN_BAMR, 0); + case PM_750: + case PM_604e: + mtspr(SPRN_MMCR1, 0); + case PM_604: + mtspr(SPRN_MMCR0, 0); + case PM_NONE: + ; + } + switch (pm_type) { + case PM_7450: + mtspr(SPRN_PMC6, 0); + mtspr(SPRN_PMC5, 0); + case PM_7400: + case PM_750: + case PM_604e: + mtspr(SPRN_PMC4, 0); + mtspr(SPRN_PMC3, 0); + case PM_604: + mtspr(SPRN_PMC2, 0); + mtspr(SPRN_PMC1, 0); + case PM_NONE: + ; + } +} + +/* + * Driver methods, internal and exported. + */ + +static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) +{ + return ppc_write_control(state); +} + +static void perfctr_cpu_read_counters(struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + return ppc_read_counters(state, ctrs); +} + +#ifdef PERFCTR_INTERRUPT_SUPPORT +static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) +{ + return ppc_isuspend(state); +} + +static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) +{ + return ppc_iresume(state); +} + +/* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to + bypass internal caching and force a reload if the I-mode PMCs. */ +void perfctr_cpu_ireload(struct perfctr_cpu_state *state) +{ +#ifdef CONFIG_SMP + clear_isuspend_cpu(state); +#else + get_cpu_cache()->k1.id = 0; +#endif +} + +/* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ +unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) +{ + unsigned int cstatus, nrctrs, pmc, pmc_mask; + + cstatus = state->cstatus; + pmc = perfctr_cstatus_nractrs(cstatus); + nrctrs = perfctr_cstatus_nrctrs(cstatus); + + for(pmc_mask = 0; pmc < nrctrs; ++pmc) { + if ((int)state->pmc[pmc].start < 0) { /* PPC-specific */ + /* XXX: "+=" to correct for overshots */ + state->pmc[pmc].start = state->control.ireset[pmc]; + pmc_mask |= (1 << pmc); + } + } + /* XXX: if pmc_mask == 0, then it must have been a TBL bit flip */ + /* XXX: HW cleared MMCR0[ENINT]. We presumably cleared the entire + MMCR0, so the re-enable occurs automatically later, no? */ + return pmc_mask; +} + +static inline int check_ireset(const struct perfctr_cpu_state *state) +{ + unsigned int nrctrs, i; + + i = state->control.nractrs; + nrctrs = i + state->control.nrictrs; + for(; i < nrctrs; ++i) + if (state->control.ireset[i] < 0) /* PPC-specific */ + return -EINVAL; + return 0; +} + +static inline void setup_imode_start_values(struct perfctr_cpu_state *state) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) + state->pmc[i].start = state->control.ireset[i]; +} + +#else /* PERFCTR_INTERRUPT_SUPPORT */ +static inline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { } +static inline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { } +static inline int check_ireset(const struct perfctr_cpu_state *state) { return 0; } +static inline void setup_imode_start_values(struct perfctr_cpu_state *state) { } +#endif /* PERFCTR_INTERRUPT_SUPPORT */ + +static int check_control(struct perfctr_cpu_state *state) +{ + return ppc_check_control(state); +} + +int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) +{ + int err; + + clear_isuspend_cpu(state); + state->cstatus = 0; + + /* disallow i-mode counters if we cannot catch the interrupts */ + if (!(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + && state->control.nrictrs) + return -EPERM; + + err = check_ireset(state); + if (err < 0) + return err; + err = check_control(state); /* may initialise state->cstatus */ + if (err < 0) + return err; + state->cstatus |= perfctr_mk_cstatus(state->control.tsc_on, + state->control.nractrs, + state->control.nrictrs); + setup_imode_start_values(state); + return 0; +} + +void perfctr_cpu_suspend(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + if (perfctr_cstatus_has_mmcr0_quirk(state->cstatus)) { + unsigned int mmcr0 = mfspr(SPRN_MMCR0); + mtspr(SPRN_MMCR0, mmcr0 | MMCR0_FC); + get_cpu_cache()->ppc_mmcr[0] = mmcr0 | MMCR0_FC; + state->ppc_mmcr[0] = mmcr0; + } + if (perfctr_cstatus_has_ictrs(state->cstatus)) + perfctr_cpu_isuspend(state); + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + state->tsc_sum += now.tsc - state->tsc_start; + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) + state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; +} + +void perfctr_cpu_resume(struct perfctr_cpu_state *state) +{ + if (perfctr_cstatus_has_ictrs(state->cstatus)) + perfctr_cpu_iresume(state); + if (perfctr_cstatus_has_mmcr0_quirk(state->cstatus)) + get_cpu_cache()->k1.id = 0; /* force reload of MMCR0 */ + perfctr_cpu_write_control(state); + //perfctr_cpu_read_counters(state, &state->start); + { + struct perfctr_low_ctrs now; + unsigned int i, cstatus, nrctrs; + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + state->tsc_start = now.tsc; + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) + state->pmc[i].start = now.pmc[i]; + } + /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ +} + +void perfctr_cpu_sample(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) { + state->tsc_sum += now.tsc - state->tsc_start; + state->tsc_start = now.tsc; + } + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) { + state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; + state->pmc[i].start = now.pmc[i]; + } +} + +static void perfctr_cpu_clear_counters(void) +{ + struct per_cpu_cache *cache; + + cache = get_cpu_cache(); + memset(cache, 0, sizeof *cache); + cache->k1.id = -1; + + ppc_clear_counters(); +} + +/**************************************************************** + * * + * Processor detection and initialisation procedures. * + * * + ****************************************************************/ + +/* Derive CPU core frequency from TB frequency and PLL_CFG. */ + +enum pll_type { + PLL_NONE, /* for e.g. 604 which has no HID1[PLL_CFG] */ + PLL_604e, + PLL_750, + PLL_750FX, + PLL_7400, + PLL_7450, + PLL_7457, +}; + +/* These are the known bus-to-core ratios, indexed by PLL_CFG. + Multiplied by 2 since half-multiplier steps are present. */ + +static unsigned char cfg_ratio_604e[16] __initdata = { // *2 + 2, 2, 14, 2, 4, 13, 5, 9, + 6, 11, 8, 10, 3, 12, 7, 0 +}; + +static unsigned char cfg_ratio_750[16] __initdata = { // *2 + 5, 15, 14, 2, 4, 13, 20, 9, // 0b0110 is 18 if L1_TSTCLK=0, but that is abnormal + 6, 11, 8, 10, 16, 12, 7, 0 +}; + +static unsigned char cfg_ratio_750FX[32] __initdata = { // *2 + 0, 0, 2, 2, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 22, 24, 26, + 28, 30, 32, 34, 36, 38, 40, 0 +}; + +static unsigned char cfg_ratio_7400[16] __initdata = { // *2 + 18, 15, 14, 2, 4, 13, 5, 9, + 6, 11, 8, 10, 16, 12, 7, 0 +}; + +static unsigned char cfg_ratio_7450[32] __initdata = { // *2 + 1, 0, 15, 30, 14, 0, 2, 0, + 4, 0, 13, 26, 5, 0, 9, 18, + 6, 0, 11, 22, 8, 20, 10, 24, + 16, 28, 12, 32, 7, 0, 0, 0 +}; + +static unsigned char cfg_ratio_7457[32] __initdata = { // *2 + 23, 34, 15, 30, 14, 36, 2, 40, + 4, 42, 13, 26, 17, 48, 19, 18, + 6, 21, 11, 22, 8, 20, 10, 24, + 16, 28, 12, 32, 27, 56, 0, 25 +}; + +static unsigned int __init tb_to_core_ratio(enum pll_type pll_type) +{ + unsigned char *cfg_ratio; + unsigned int shift = 28, mask = 0xF, hid1, pll_cfg, ratio; + + switch (pll_type) { + case PLL_604e: + cfg_ratio = cfg_ratio_604e; + break; + case PLL_750: + cfg_ratio = cfg_ratio_750; + break; + case PLL_750FX: + cfg_ratio = cfg_ratio_750FX; + hid1 = mfspr(SPRN_HID1); + switch ((hid1 >> 16) & 0x3) { /* HID1[PI0,PS] */ + case 0: /* PLL0 with external config */ + shift = 31-4; /* access HID1[PCE] */ + break; + case 2: /* PLL0 with internal config */ + shift = 31-20; /* access HID1[PC0] */ + break; + case 1: case 3: /* PLL1 */ + shift = 31-28; /* access HID1[PC1] */ + break; + } + mask = 0x1F; + break; + case PLL_7400: + cfg_ratio = cfg_ratio_7400; + break; + case PLL_7450: + cfg_ratio = cfg_ratio_7450; + shift = 12; + mask = 0x1F; + break; + case PLL_7457: + cfg_ratio = cfg_ratio_7457; + shift = 12; + mask = 0x1F; + break; + default: + return 0; + } + hid1 = mfspr(SPRN_HID1); + pll_cfg = (hid1 >> shift) & mask; + ratio = cfg_ratio[pll_cfg]; + if (!ratio) + printk(KERN_WARNING "perfctr: unknown PLL_CFG 0x%x\n", pll_cfg); + return (4/2) * ratio; +} + +static unsigned int __init pll_to_core_khz(enum pll_type pll_type) +{ + unsigned int tb_to_core = tb_to_core_ratio(pll_type); + perfctr_info.tsc_to_cpu_mult = tb_to_core; + return tb_ticks_per_jiffy * tb_to_core * (HZ/10) / (1000/10); +} + +/* Extract core and timebase frequencies from Open Firmware. */ + +static unsigned int __init of_to_core_khz(void) +{ + struct device_node *cpu; + unsigned int *fp, core, tb; + + cpu = find_type_devices("cpu"); + if (!cpu) + return 0; + fp = (unsigned int*)get_property(cpu, "clock-frequency", NULL); + if (!fp || !(core = *fp)) + return 0; + fp = (unsigned int*)get_property(cpu, "timebase-frequency", NULL); + if (!fp || !(tb = *fp)) + return 0; + perfctr_info.tsc_to_cpu_mult = core / tb; + return core / 1000; +} + +static unsigned int __init detect_cpu_khz(enum pll_type pll_type) +{ + unsigned int khz; + + khz = pll_to_core_khz(pll_type); + if (khz) + return khz; + + khz = of_to_core_khz(); + if (khz) + return khz; + + printk(KERN_WARNING "perfctr: unable to determine CPU speed\n"); + return 0; +} + +static int __init known_init(void) +{ + static char known_name[] __initdata = "PowerPC 60x/7xx/74xx"; + unsigned int features; + enum pll_type pll_type; + unsigned int pvr; + int have_mmcr1; + + features = PERFCTR_FEATURE_RDTSC | PERFCTR_FEATURE_RDPMC; + have_mmcr1 = 1; + pvr = mfspr(SPRN_PVR); + switch (PVR_VER(pvr)) { + case 0x0004: /* 604 */ + pm_type = PM_604; + pll_type = PLL_NONE; + features = PERFCTR_FEATURE_RDTSC; + have_mmcr1 = 0; + break; + case 0x0009: /* 604e; */ + case 0x000A: /* 604ev */ + pm_type = PM_604e; + pll_type = PLL_604e; + features = PERFCTR_FEATURE_RDTSC; + break; + case 0x0008: /* 750/740 */ + pm_type = PM_750; + pll_type = PLL_750; + break; + case 0x7000: case 0x7001: /* IBM750FX */ + case 0x7002: /* IBM750GX */ + pm_type = PM_750; + pll_type = PLL_750FX; + break; + case 0x000C: /* 7400 */ + pm_type = PM_7400; + pll_type = PLL_7400; + break; + case 0x800C: /* 7410 */ + pm_type = PM_7400; + pll_type = PLL_7400; + break; + case 0x8000: /* 7451/7441 */ + pm_type = PM_7450; + pll_type = PLL_7450; + break; + case 0x8001: /* 7455/7445 */ + pm_type = PM_7450; + pll_type = ((pvr & 0xFFFF) < 0x0303) ? PLL_7450 : PLL_7457; + break; + case 0x8002: /* 7457/7447 */ + pm_type = PM_7450; + pll_type = PLL_7457; + break; + default: + return -ENODEV; + } + perfctr_info.cpu_features = features; + perfctr_info.cpu_type = 0; /* user-space should inspect PVR */ + perfctr_cpu_name = known_name; + perfctr_info.cpu_khz = detect_cpu_khz(pll_type); + perfctr_ppc_init_tests(have_mmcr1); + return 0; +} + +static int __init unknown_init(void) +{ + static char unknown_name[] __initdata = "Generic PowerPC with TB"; + unsigned int khz; + + khz = detect_cpu_khz(PLL_NONE); + if (!khz) + return -ENODEV; + perfctr_info.cpu_features = PERFCTR_FEATURE_RDTSC; + perfctr_info.cpu_type = 0; + perfctr_cpu_name = unknown_name; + perfctr_info.cpu_khz = khz; + pm_type = PM_NONE; + return 0; +} + +static void perfctr_cpu_clear_one(void *ignore) +{ + /* PREEMPT note: when called via on_each_cpu(), + this is in IRQ context with preemption disabled. */ + perfctr_cpu_clear_counters(); +} + +static void perfctr_cpu_reset(void) +{ + on_each_cpu(perfctr_cpu_clear_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); +} + +int __init perfctr_cpu_init(void) +{ + int err; + + perfctr_info.cpu_features = 0; + + err = known_init(); + if (err) { + err = unknown_init(); + if (err) + goto out; + } + + perfctr_cpu_reset(); + out: + return err; +} + +void __exit perfctr_cpu_exit(void) +{ + perfctr_cpu_reset(); +} + +/**************************************************************** + * * + * Hardware reservation. * + * * + ****************************************************************/ + +static DECLARE_MUTEX(mutex); +static const char *current_service = 0; + +const char *perfctr_cpu_reserve(const char *service) +{ + const char *ret; + + down(&mutex); + ret = current_service; + if (!ret) + { + current_service = service; + __module_get(THIS_MODULE); + } + up(&mutex); + return ret; +} + +void perfctr_cpu_release(const char *service) +{ + down(&mutex); + if (service != current_service) { + printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", + __FUNCTION__, service, current_service); + } else { + /* power down the counters */ + perfctr_cpu_reset(); + current_service = 0; + module_put(THIS_MODULE); + } + up(&mutex); +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/virtual.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/virtual.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,1049 @@ +/* $Id: virtual.c,v 1.88.2.2 2004/10/19 15:23:43 mikpe Exp $ + * Virtual per-process performance counters. + * + * Copyright (C) 1999-2003 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include /* for unlikely() in 2.4.18 and older */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "compat.h" +#include "virtual.h" +#include "marshal.h" + +/**************************************************************** + * * + * Data types and macros. * + * * + ****************************************************************/ + +struct vperfctr { +/* User-visible fields: (must be first for mmap()) */ + struct perfctr_cpu_state cpu_state; +/* Kernel-private fields: */ + int si_signo; + atomic_t count; + spinlock_t owner_lock; + struct task_struct *owner; + /* sampling_timer and bad_cpus_allowed are frequently + accessed, so they get to share a cache line */ + unsigned int sampling_timer ____cacheline_aligned; +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + atomic_t bad_cpus_allowed; +#endif +#if 0 && defined(CONFIG_PERFCTR_DEBUG) + unsigned start_smp_id; + unsigned suspended; +#endif +#if PERFCTR_INTERRUPT_SUPPORT + unsigned int iresume_cstatus; +#endif +}; +#define IS_RUNNING(perfctr) perfctr_cstatus_enabled((perfctr)->cpu_state.cstatus) + +/* XXX: disabled: called from switch_to() where printk() is disallowed */ +#if 0 && defined(CONFIG_PERFCTR_DEBUG) +#define debug_free(perfctr) \ +do { \ + int i; \ + for(i = 0; i < PAGE_SIZE/sizeof(int); ++i) \ + ((int*)(perfctr))[i] = 0xfedac0ed; \ +} while( 0 ) +#define debug_init(perfctr) do { (perfctr)->suspended = 1; } while( 0 ) +#define debug_suspend(perfctr) \ +do { \ + if( (perfctr)->suspended ) \ + printk(KERN_ERR "%s: BUG! suspending non-running perfctr (pid %d, comm %s)\n", \ + __FUNCTION__, current->pid, current->comm); \ + (perfctr)->suspended = 1; \ +} while( 0 ) +#define debug_resume(perfctr) \ +do { \ + if( !(perfctr)->suspended ) \ + printk(KERN_ERR "%s: BUG! resuming non-suspended perfctr (pid %d, comm %s)\n", \ + __FUNCTION__, current->pid, current->comm); \ + (perfctr)->suspended = 0; \ +} while( 0 ) +#define debug_check_smp_id(perfctr) \ +do { \ + if( (perfctr)->start_smp_id != smp_processor_id() ) { \ + printk(KERN_ERR "%s: BUG! current cpu %u differs from start cpu %u (pid %d, comm %s)\n", \ + __FUNCTION__, smp_processor_id(), (perfctr)->start_smp_id, \ + current->pid, current->comm); \ + return; \ + } \ +} while( 0 ) +#define debug_set_smp_id(perfctr) \ + do { (perfctr)->start_smp_id = smp_processor_id(); } while( 0 ) +#else /* CONFIG_PERFCTR_DEBUG */ +#define debug_free(perfctr) do{}while(0) +#define debug_init(perfctr) do{}while(0) +#define debug_suspend(perfctr) do{}while(0) +#define debug_resume(perfctr) do{}while(0) +#define debug_check_smp_id(perfctr) do{}while(0) +#define debug_set_smp_id(perfctr) do{}while(0) +#endif /* CONFIG_PERFCTR_DEBUG */ + +#if PERFCTR_INTERRUPT_SUPPORT + +static void vperfctr_ihandler(unsigned long pc); + +static inline void vperfctr_set_ihandler(void) +{ + perfctr_cpu_set_ihandler(vperfctr_ihandler); +} + +static inline void vperfctr_clear_iresume_cstatus(struct vperfctr *perfctr) +{ + perfctr->iresume_cstatus = 0; +} + +#else +static inline void vperfctr_set_ihandler(void) { } +static inline void vperfctr_clear_iresume_cstatus(struct vperfctr *perfctr) { } +#endif + +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + +static inline void vperfctr_init_bad_cpus_allowed(struct vperfctr *perfctr) +{ + atomic_set(&perfctr->bad_cpus_allowed, 0); +} + +/* Concurrent set_cpus_allowed() is possible. The only lock it + can take is the task lock, so we have to take it as well. + task_lock/unlock also disables/enables preemption. */ + +static inline void vperfctr_task_lock(struct task_struct *p) +{ + task_lock(p); +} + +static inline void vperfctr_task_unlock(struct task_struct *p) +{ + task_unlock(p); +} + +#else /* !PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED */ + +static inline void vperfctr_init_bad_cpus_allowed(struct vperfctr *perfctr) { } + +/* Concurrent set_cpus_allowed() is impossible or irrelevant. + Disabling and enabling preemption suffices for an atomic region. */ + +static inline void vperfctr_task_lock(struct task_struct *p) +{ + preempt_disable(); +} + +static inline void vperfctr_task_unlock(struct task_struct *p) +{ + preempt_enable(); +} + +#endif /* !PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED */ + +/**************************************************************** + * * + * Resource management. * + * * + ****************************************************************/ + +/* XXX: perhaps relax this to number of _live_ perfctrs */ +static DECLARE_MUTEX(nrctrs_mutex); +static int nrctrs; +static const char this_service[] = __FILE__; + +static int inc_nrctrs(void) +{ + const char *other; + + other = NULL; + down(&nrctrs_mutex); + if( ++nrctrs == 1 ) { + other = perfctr_cpu_reserve(this_service); + if( other ) + nrctrs = 0; + } + up(&nrctrs_mutex); + if( other ) { + printk(KERN_ERR __FILE__ + ": cannot operate, perfctr hardware taken by '%s'\n", + other); + return -EBUSY; + } + vperfctr_set_ihandler(); + return 0; +} + +static void dec_nrctrs(void) +{ + down(&nrctrs_mutex); + if( --nrctrs == 0 ) + perfctr_cpu_release(this_service); + up(&nrctrs_mutex); +} + +static struct vperfctr *vperfctr_alloc(void) +{ + unsigned long page; + + if( inc_nrctrs() != 0 ) + return ERR_PTR(-EBUSY); + page = get_zeroed_page(GFP_KERNEL); + if( !page ) { + dec_nrctrs(); + return ERR_PTR(-ENOMEM); + } + SetPageReserved(virt_to_page(page)); + return (struct vperfctr*) page; +} + +static void vperfctr_free(struct vperfctr *perfctr) +{ + debug_free(perfctr); + ClearPageReserved(virt_to_page(perfctr)); + free_page((unsigned long)perfctr); + dec_nrctrs(); +} + +static struct vperfctr *get_empty_vperfctr(void) +{ + struct vperfctr *perfctr = vperfctr_alloc(); + if( !IS_ERR(perfctr) ) { + atomic_set(&perfctr->count, 1); + vperfctr_init_bad_cpus_allowed(perfctr); + spin_lock_init(&perfctr->owner_lock); + debug_init(perfctr); + } + return perfctr; +} + +static void put_vperfctr(struct vperfctr *perfctr) +{ + if( atomic_dec_and_test(&perfctr->count) ) + vperfctr_free(perfctr); +} + +/**************************************************************** + * * + * Basic counter operations. * + * These must all be called by the owner process only. * + * These must all be called with preemption disabled. * + * * + ****************************************************************/ + +/* PRE: IS_RUNNING(perfctr) + * Suspend the counters. + * XXX: When called from switch_to(), perfctr belongs to 'prev' + * but current is 'next'. Debug messages will refer to 'next'... + */ +static inline void vperfctr_suspend(struct vperfctr *perfctr) +{ + debug_suspend(perfctr); + debug_check_smp_id(perfctr); + perfctr_cpu_suspend(&perfctr->cpu_state); +} + +static inline void vperfctr_reset_sampling_timer(struct vperfctr *perfctr) +{ + /* XXX: base the value on perfctr_info.cpu_khz instead! */ + perfctr->sampling_timer = HZ/2; +} + +/* PRE: perfctr == current->thread.perfctr && IS_RUNNING(perfctr) + * Restart the counters. + */ +static inline void vperfctr_resume(struct vperfctr *perfctr) +{ + debug_resume(perfctr); + perfctr_cpu_resume(&perfctr->cpu_state); + vperfctr_reset_sampling_timer(perfctr); + debug_set_smp_id(perfctr); +} + +/* Sample the counters but do not suspend them. */ +static void vperfctr_sample(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) { + debug_check_smp_id(perfctr); + perfctr_cpu_sample(&perfctr->cpu_state); + vperfctr_reset_sampling_timer(perfctr); + } +} + +#if PERFCTR_INTERRUPT_SUPPORT +/* vperfctr interrupt handler (XXX: add buffering support) */ +/* PREEMPT note: called in IRQ context with preemption disabled. */ +static void vperfctr_ihandler(unsigned long pc) +{ + struct task_struct *tsk = current; + struct vperfctr *perfctr; + unsigned int pmc_mask; + siginfo_t si; + + perfctr = tsk->thread.perfctr; + if( !perfctr ) { + printk(KERN_ERR "%s: BUG! pid %d has no vperfctr\n", + __FUNCTION__, tsk->pid); + return; + } + if( !perfctr_cstatus_has_ictrs(perfctr->cpu_state.cstatus) ) { + printk(KERN_ERR "%s: BUG! vperfctr has cstatus %#x (pid %d, comm %s)\n", + __FUNCTION__, perfctr->cpu_state.cstatus, tsk->pid, tsk->comm); + return; + } + vperfctr_suspend(perfctr); + pmc_mask = perfctr_cpu_identify_overflow(&perfctr->cpu_state); + if( !pmc_mask ) { + printk(KERN_ERR "%s: BUG! pid %d has unidentifiable overflow source\n", + __FUNCTION__, tsk->pid); + return; + } + /* suspend a-mode and i-mode PMCs, leaving only TSC on */ + /* XXX: some people also want to suspend the TSC */ + perfctr->iresume_cstatus = perfctr->cpu_state.cstatus; + if( perfctr_cstatus_has_tsc(perfctr->iresume_cstatus) ) { + perfctr->cpu_state.cstatus = perfctr_mk_cstatus(1, 0, 0); + vperfctr_resume(perfctr); + } else + perfctr->cpu_state.cstatus = 0; + si.si_signo = perfctr->si_signo; + si.si_errno = 0; + si.si_code = SI_PMC_OVF; + si.si_pmc_ovf_mask = pmc_mask; + if( !send_sig_info(si.si_signo, &si, tsk) ) + send_sig(si.si_signo, tsk, 1); +} +#endif + +/**************************************************************** + * * + * Process management operations. * + * These must all, with the exception of vperfctr_unlink() * + * and __vperfctr_set_cpus_allowed(), be called by the owner * + * process only. * + * * + ****************************************************************/ + +/* Called from exit_thread() or sys_vperfctr_unlink(). + * If the counters are running, stop them and sample their final values. + * Detach the vperfctr object from its owner task. + * PREEMPT note: exit_thread() does not run with preemption disabled. + */ +static void vperfctr_unlink(struct task_struct *owner, struct vperfctr *perfctr) +{ + /* this synchronises with vperfctr_ioctl() */ + spin_lock(&perfctr->owner_lock); + perfctr->owner = NULL; + spin_unlock(&perfctr->owner_lock); + + /* perfctr suspend+detach must be atomic wrt process suspend */ + /* this also synchronises with perfctr_set_cpus_allowed() */ + vperfctr_task_lock(owner); + if( IS_RUNNING(perfctr) && owner == current ) + vperfctr_suspend(perfctr); + owner->thread.perfctr = NULL; + vperfctr_task_unlock(owner); + + perfctr->cpu_state.cstatus = 0; + vperfctr_clear_iresume_cstatus(perfctr); + put_vperfctr(perfctr); +} + +void __vperfctr_exit(struct vperfctr *perfctr) +{ + vperfctr_unlink(current, perfctr); +} + +/* schedule() --> switch_to() --> .. --> __vperfctr_suspend(). + * If the counters are running, suspend them. + * PREEMPT note: switch_to() runs with preemption disabled. + */ +void __vperfctr_suspend(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) + vperfctr_suspend(perfctr); +} + +/* schedule() --> switch_to() --> .. --> __vperfctr_resume(). + * PRE: perfctr == current->thread.perfctr + * If the counters are runnable, resume them. + * PREEMPT note: switch_to() runs with preemption disabled. + */ +void __vperfctr_resume(struct vperfctr *perfctr) +{ + if( IS_RUNNING(perfctr) ) { +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + if( unlikely(atomic_read(&perfctr->bad_cpus_allowed)) && + perfctr_cstatus_nrctrs(perfctr->cpu_state.cstatus) ) { + perfctr->cpu_state.cstatus = 0; + vperfctr_clear_iresume_cstatus(perfctr); + BUG_ON(current->state != TASK_RUNNING); + send_sig(SIGILL, current, 1); + return; + } +#endif + vperfctr_resume(perfctr); + } +} + +/* Called from update_one_process() [triggered by timer interrupt]. + * PRE: perfctr == current->thread.perfctr. + * Sample the counters but do not suspend them. + * Needed to avoid precision loss due to multiple counter + * wraparounds between resume/suspend for CPU-bound processes. + * PREEMPT note: called in IRQ context with preemption disabled. + */ +void __vperfctr_sample(struct vperfctr *perfctr) +{ + if( --perfctr->sampling_timer == 0 ) + vperfctr_sample(perfctr); +} + +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED +/* Called from set_cpus_allowed(). + * PRE: current holds task_lock(owner) + * PRE: owner->thread.perfctr == perfctr + */ +void __vperfctr_set_cpus_allowed(struct task_struct *owner, + struct vperfctr *perfctr, + cpumask_t new_mask) +{ + cpumask_t tmp; + + cpus_and(tmp, new_mask, perfctr_cpus_forbidden_mask); + if( !cpus_empty(tmp) ) { + atomic_set(&perfctr->bad_cpus_allowed, 1); + printk(KERN_WARNING "perfctr: process %d (comm %s) issued unsafe" + " set_cpus_allowed() on process %d (comm %s)\n", + current->pid, current->comm, owner->pid, owner->comm); + } else + atomic_set(&perfctr->bad_cpus_allowed, 0); +} +#endif + +/**************************************************************** + * * + * Virtual perfctr "system calls". * + * These can be called by the owner process (tsk == current), * + * a monitor process which has the owner under ptrace ATTACH * + * control (tsk && tsk != current), or anyone with a handle to * + * an unlinked perfctr (!tsk). * + * * + ****************************************************************/ + +static int sys_vperfctr_control(struct vperfctr *perfctr, + struct perfctr_struct_buf *argp, + struct task_struct *tsk) +{ + struct vperfctr_control control; + int err; + unsigned int next_cstatus; + unsigned int nrctrs, i; + + if( !tsk ) + return -ESRCH; /* attempt to update unlinked perfctr */ + + err = perfctr_copy_from_user(&control, argp, &vperfctr_control_sdesc); + if( err ) + return err; + + if( control.cpu_control.nractrs || control.cpu_control.nrictrs ) { + cpumask_t old_mask, new_mask; + + old_mask = tsk->cpus_allowed; + cpus_andnot(new_mask, old_mask, perfctr_cpus_forbidden_mask); + + if( cpus_empty(new_mask) ) + return -EINVAL; + if( !cpus_equal(new_mask, old_mask) ) + set_cpus_allowed(tsk, new_mask); + } + + /* PREEMPT note: preemption is disabled over the entire + region since we're updating an active perfctr. */ + preempt_disable(); + if( IS_RUNNING(perfctr) ) { + if( tsk == current ) + vperfctr_suspend(perfctr); + perfctr->cpu_state.cstatus = 0; + vperfctr_clear_iresume_cstatus(perfctr); + } + perfctr->cpu_state.control = control.cpu_control; + /* remote access note: perfctr_cpu_update_control() is ok */ + err = perfctr_cpu_update_control(&perfctr->cpu_state, 0); + if( err < 0 ) + goto out; + next_cstatus = perfctr->cpu_state.cstatus; + if( !perfctr_cstatus_enabled(next_cstatus) ) + goto out; + + /* XXX: validate si_signo? */ + perfctr->si_signo = control.si_signo; + + if( !perfctr_cstatus_has_tsc(next_cstatus) ) + perfctr->cpu_state.tsc_sum = 0; + + nrctrs = perfctr_cstatus_nrctrs(next_cstatus); + for(i = 0; i < nrctrs; ++i) + if( !(control.preserve & (1<cpu_state.pmc[i].sum = 0; + + if( tsk == current ) + vperfctr_resume(perfctr); + + out: + preempt_enable(); + return err; +} + +static int sys_vperfctr_iresume(struct vperfctr *perfctr, const struct task_struct *tsk) +{ +#if PERFCTR_INTERRUPT_SUPPORT + unsigned int iresume_cstatus; + + if( !tsk ) + return -ESRCH; /* attempt to update unlinked perfctr */ + + iresume_cstatus = perfctr->iresume_cstatus; + if( !perfctr_cstatus_has_ictrs(iresume_cstatus) ) + return -EPERM; + + /* PREEMPT note: preemption is disabled over the entire + region because we're updating an active perfctr. */ + preempt_disable(); + + if( IS_RUNNING(perfctr) && tsk == current ) + vperfctr_suspend(perfctr); + + perfctr->cpu_state.cstatus = iresume_cstatus; + perfctr->iresume_cstatus = 0; + + /* remote access note: perfctr_cpu_ireload() is ok */ + perfctr_cpu_ireload(&perfctr->cpu_state); + + if( tsk == current ) + vperfctr_resume(perfctr); + + preempt_enable(); + + return 0; +#else + return -ENOSYS; +#endif +} + +static int sys_vperfctr_unlink(struct vperfctr *perfctr, struct task_struct *tsk) +{ + if( tsk ) + vperfctr_unlink(tsk, perfctr); + return 0; +} + +static int sys_vperfctr_read_sum(struct vperfctr *perfctr, + struct perfctr_struct_buf *argp, + const struct task_struct *tsk) +{ + struct perfctr_sum_ctrs sum; + + if( tsk == current ) { + preempt_disable(); + vperfctr_sample(perfctr); + } + //sum = perfctr->cpu_state.sum; + { + int j; + sum.tsc = perfctr->cpu_state.tsc_sum; + for(j = 0; j < ARRAY_SIZE(sum.pmc); ++j) + sum.pmc[j] = perfctr->cpu_state.pmc[j].sum; + } + if( tsk == current ) + preempt_enable(); + return perfctr_copy_to_user(argp, &sum, &perfctr_sum_ctrs_sdesc); +} + +static int sys_vperfctr_read_control(struct vperfctr *perfctr, + struct perfctr_struct_buf *argp, + const struct task_struct *tsk) +{ + struct vperfctr_control control; + + /* PREEMPT note: While we're reading our own control, another + process may ptrace ATTACH to us and update our control. + Disable preemption to ensure we get a consistent copy. + Not needed for other cases since the perfctr is either + unlinked or its owner is ptrace ATTACH suspended by us. */ + if( tsk == current ) + preempt_disable(); + control.si_signo = perfctr->si_signo; + control.cpu_control = perfctr->cpu_state.control; + if( tsk == current ) + preempt_enable(); + control.preserve = 0; + return perfctr_copy_to_user(argp, &control, &vperfctr_control_sdesc); +} + +/**************************************************************** + * * + * Virtual perfctr file operations. * + * * + ****************************************************************/ + +static int vperfctr_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct vperfctr *perfctr; + + /* Only allow read-only mapping of first page. */ + if( (vma->vm_end - vma->vm_start) != PAGE_SIZE || + vma->vm_pgoff != 0 || + (pgprot_val(vma->vm_page_prot) & _PAGE_RW) || + (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) ) + return -EPERM; + perfctr = filp->private_data; + if( !perfctr ) + return -EPERM; + return remap_page_range(vma, vma->vm_start, virt_to_phys(perfctr), + PAGE_SIZE, vma->vm_page_prot); +} + +static int vperfctr_release(struct inode *inode, struct file *filp) +{ + struct vperfctr *perfctr = filp->private_data; + filp->private_data = NULL; + if( perfctr ) + put_vperfctr(perfctr); + return 0; +} + +static int vperfctr_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct vperfctr *perfctr; + struct task_struct *tsk; + int ret; + + switch( cmd ) { + case PERFCTR_ABI: + return sys_perfctr_abi((unsigned int*)arg); + case PERFCTR_INFO: + return sys_perfctr_info((struct perfctr_struct_buf*)arg); + case PERFCTR_CPUS: + return sys_perfctr_cpus((struct perfctr_cpu_mask*)arg); + case PERFCTR_CPUS_FORBIDDEN: + return sys_perfctr_cpus_forbidden((struct perfctr_cpu_mask*)arg); + } + perfctr = filp->private_data; + if( !perfctr ) + return -EINVAL; + tsk = current; + if( perfctr != current->thread.perfctr ) { + /* this synchronises with vperfctr_unlink() and itself */ + spin_lock(&perfctr->owner_lock); + tsk = perfctr->owner; + if( tsk ) + get_task_struct(tsk); + spin_unlock(&perfctr->owner_lock); + if( tsk ) { + ret = ptrace_check_attach(tsk, 0); + if( ret < 0 ) + goto out; + } + } + switch( cmd ) { + case VPERFCTR_CONTROL: + ret = sys_vperfctr_control(perfctr, (struct perfctr_struct_buf*)arg, tsk); + break; + case VPERFCTR_UNLINK: + ret = sys_vperfctr_unlink(perfctr, tsk); + break; + case VPERFCTR_READ_SUM: + ret = sys_vperfctr_read_sum(perfctr, (struct perfctr_struct_buf*)arg, tsk); + break; + case VPERFCTR_IRESUME: + ret = sys_vperfctr_iresume(perfctr, tsk); + break; + case VPERFCTR_READ_CONTROL: + ret = sys_vperfctr_read_control(perfctr, (struct perfctr_struct_buf*)arg, tsk); + break; + default: + ret = -EINVAL; + } + out: + if( tsk && tsk != current ) + put_task_struct(tsk); + return ret; +} + +static struct file_operations vperfctr_file_ops = { + .owner = THIS_MODULE, + .mmap = vperfctr_mmap, + .release = vperfctr_release, + .ioctl = vperfctr_ioctl, +}; + +/**************************************************************** + * * + * File system for virtual perfctrs. Based on pipefs. * + * * + ****************************************************************/ + +#define VPERFCTRFS_MAGIC (('V'<<24)|('P'<<16)|('M'<<8)|('C')) + +/* The code to set up a `struct file_system_type' for a pseudo fs + is unfortunately not the same in 2.4 and 2.6. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) +#include /* needed for 2.6, included by fs.h in 2.4 */ + +/* 2.6 doesn't EXPORT_SYMBOL() fs/libfs.c:get_sb_pseudo(). + This is a verbatim copy, only renamed. */ +#ifdef MODULE +static +struct super_block * +perfctr_get_sb_pseudo(struct file_system_type *fs_type, char *name, + struct super_operations *ops, unsigned long magic) +{ + struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + static struct super_operations default_ops = {.statfs = simple_statfs}; + struct dentry *dentry; + struct inode *root; + struct qstr d_name = {.name = name, .len = strlen(name)}; + + if (IS_ERR(s)) + return s; + + s->s_flags = MS_NOUSER; + s->s_maxbytes = ~0ULL; + s->s_blocksize = 1024; + s->s_blocksize_bits = 10; + s->s_magic = magic; + s->s_op = ops ? ops : &default_ops; + root = new_inode(s); + if (!root) + goto Enomem; + root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; + root->i_uid = root->i_gid = 0; + root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; + dentry = d_alloc(NULL, &d_name); + if (!dentry) { + iput(root); + goto Enomem; + } + dentry->d_sb = s; + dentry->d_parent = dentry; + d_instantiate(dentry, root); + s->s_root = dentry; + s->s_flags |= MS_ACTIVE; + return s; + +Enomem: + up_write(&s->s_umount); + deactivate_super(s); + return ERR_PTR(-ENOMEM); +} +#undef get_sb_pseudo +#define get_sb_pseudo perfctr_get_sb_pseudo +#endif /* MODULE */ + +static struct super_block * +vperfctrfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "vperfctr:", NULL, VPERFCTRFS_MAGIC); +} + +static struct file_system_type vperfctrfs_type = { + .name = "vperfctrfs", + .get_sb = vperfctrfs_get_sb, + .kill_sb = kill_anon_super, +}; + +#else /* 2.4 */ + +static int vperfctrfs_statfs(struct super_block *sb, struct statfs *buf) +{ + buf->f_type = VPERFCTRFS_MAGIC; + buf->f_bsize = 1024; + buf->f_namelen = 255; + return 0; +} + +static struct super_operations vperfctrfs_ops = { + .statfs = vperfctrfs_statfs, +}; + +static struct super_block* +vperfctrfs_read_super(struct super_block *sb, void *data, int silent) +{ + static const struct qstr d_name = { "vperfctrfs:", 11, 0 }; + struct dentry *dentry; + struct inode *root; + + root = new_inode(sb); + if( !root ) + return NULL; + root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; + root->i_uid = root->i_gid = 0; + root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = VPERFCTRFS_MAGIC; + sb->s_op = &vperfctrfs_ops; /* XXX: check if 2.4 really needs this */ + sb->s_root = dentry = d_alloc(NULL, &d_name); + if( !dentry ) { + iput(root); + return NULL; + } + dentry->d_sb = sb; + dentry->d_parent = dentry; + d_instantiate(dentry, root); + return sb; +} + +/* DECLARE_FSTYPE() hides 'owner: THIS_MODULE'. kern_mount() increments + owner's use count, and since we're not unmountable from user-space, + the module can't be unloaded because it's use count is >= 1. + So we declare the file_system_type manually without the owner field. */ +static struct file_system_type vperfctrfs_type = { + .name = "vperfctrfs", + .read_super = vperfctrfs_read_super, + .fs_flags = FS_NOMOUNT, +}; + +#endif /* 2.4 */ + +/* XXX: check if s/vperfctr_mnt/vperfctrfs_type.kern_mnt/ would work */ +static struct vfsmount *vperfctr_mnt; + +static int __init vperfctrfs_init(void) +{ + int err = register_filesystem(&vperfctrfs_type); + if( !err ) { + vperfctr_mnt = kern_mount(&vperfctrfs_type); + if( !IS_ERR(vperfctr_mnt) ) + return 0; + err = PTR_ERR(vperfctr_mnt); + unregister_filesystem(&vperfctrfs_type); + } + return err; +} + +static void __exit vperfctrfs_exit(void) +{ + unregister_filesystem(&vperfctrfs_type); + mntput(vperfctr_mnt); +} + +static struct inode *vperfctr_get_inode(void) +{ + struct inode *inode; + + inode = new_inode(vperfctr_mnt->mnt_sb); + if( !inode ) + return NULL; + inode->i_fop = &vperfctr_file_ops; + inode->i_state = I_DIRTY; + inode->i_mode = S_IFCHR | S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blksize = 0; + return inode; +} + +static int vperfctrfs_delete_dentry(struct dentry *dentry) +{ + return 1; +} + +static struct dentry_operations vperfctrfs_dentry_operations = { + .d_delete = vperfctrfs_delete_dentry, +}; + +static struct dentry *vperfctr_d_alloc_root(struct inode *inode) +{ + struct qstr this; + char name[32]; + struct dentry *dentry; + + sprintf(name, "[%lu]", inode->i_ino); + this.name = name; + this.len = strlen(name); + this.hash = inode->i_ino; /* will go */ + dentry = d_alloc(vperfctr_mnt->mnt_sb->s_root, &this); + if( dentry ) { + dentry->d_op = &vperfctrfs_dentry_operations; + d_add(dentry, inode); + } + return dentry; +} + +static struct file *vperfctr_get_filp(void) +{ + struct file *filp; + struct inode *inode; + struct dentry *dentry; + + filp = get_empty_filp(); + if( !filp ) + goto out; + inode = vperfctr_get_inode(); + if( !inode ) + goto out_filp; + dentry = vperfctr_d_alloc_root(inode); + if( !dentry ) + goto out_inode; + + filp->f_vfsmnt = mntget(vperfctr_mnt); + filp->f_dentry = dentry; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,2) + filp->f_mapping = dentry->d_inode->i_mapping; +#endif + + filp->f_pos = 0; + filp->f_flags = 0; + filp->f_op = fops_get(&vperfctr_file_ops); /* fops_get() for MODULE */ + filp->f_mode = FMODE_READ; + filp->f_version = 0; + + return filp; + + out_inode: + iput(inode); + out_filp: + put_filp(filp); /* doesn't run ->release() like fput() does */ + out: + return NULL; +} + +/* tid is the actual task/thread id (née pid, stored as ->pid), + pid/tgid is that 2.6 thread group id crap (stored as ->tgid) */ +int vperfctr_attach(int tid, int creat) +{ + struct file *filp; + struct task_struct *tsk; + struct vperfctr *perfctr; + int err; + int fd; + + filp = vperfctr_get_filp(); + if( !filp ) + return -ENOMEM; + err = fd = get_unused_fd(); + if( err < 0 ) + goto err_filp; + perfctr = NULL; + if( creat ) { + perfctr = get_empty_vperfctr(); /* may sleep */ + if( IS_ERR(perfctr) ) { + err = PTR_ERR(perfctr); + goto err_fd; + } + } + tsk = current; + if( tid != 0 && tid != tsk->pid ) { /* remote? */ + read_lock(&tasklist_lock); + tsk = find_task_by_pid(tid); + if( tsk ) + get_task_struct(tsk); + read_unlock(&tasklist_lock); + err = -ESRCH; + if( !tsk ) + goto err_perfctr; + err = ptrace_check_attach(tsk, 0); + if( err < 0 ) + goto err_tsk; + } + if( creat ) { + /* check+install must be atomic to prevent remote-control races */ + vperfctr_task_lock(tsk); + if( !tsk->thread.perfctr ) { + perfctr->owner = tsk; + tsk->thread.perfctr = perfctr; + err = 0; + } else + err = -EEXIST; + vperfctr_task_unlock(tsk); + if( err ) + goto err_tsk; + } else { + perfctr = tsk->thread.perfctr; + /* PERFCTR_ABI and PERFCTR_INFO don't need the perfctr. + Hence no non-NULL check here. */ + } + filp->private_data = perfctr; + if( perfctr ) + atomic_inc(&perfctr->count); + if( tsk != current ) + put_task_struct(tsk); + fd_install(fd, filp); + return fd; + err_tsk: + if( tsk != current ) + put_task_struct(tsk); + err_perfctr: + if( perfctr ) /* can only occur if creat != 0 */ + put_vperfctr(perfctr); + err_fd: + put_unused_fd(fd); + err_filp: + fput(filp); + return err; +} + +/**************************************************************** + * * + * module_init/exit * + * * + ****************************************************************/ + +#ifdef MODULE +static struct vperfctr_stub off; + +static void vperfctr_stub_init(void) +{ + off = vperfctr_stub; + vperfctr_stub.owner = THIS_MODULE; + vperfctr_stub.exit = __vperfctr_exit; + vperfctr_stub.suspend = __vperfctr_suspend; + vperfctr_stub.resume = __vperfctr_resume; + vperfctr_stub.sample = __vperfctr_sample; +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + vperfctr_stub.set_cpus_allowed = __vperfctr_set_cpus_allowed; +#endif +} + +static void vperfctr_stub_exit(void) +{ + vperfctr_stub = off; +} +#else +static inline void vperfctr_stub_init(void) { } +static inline void vperfctr_stub_exit(void) { } +#endif /* MODULE */ + +int __init vperfctr_init(void) +{ + int err = vperfctrfs_init(); + if( err ) + return err; + vperfctr_stub_init(); + return 0; +} + +void __exit vperfctr_exit(void) +{ + vperfctrfs_exit(); + vperfctr_stub_exit(); +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_tests.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_tests.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,30 @@ +/* $Id: x86_tests.h,v 1.8.2.2 2004/08/02 15:53:19 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional x86/x86_64-specific init-time tests. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ + +/* 'enum perfctr_x86_tests_type' classifies CPUs according + to relevance for perfctr_x86_init_tests(). */ +enum perfctr_x86_tests_type { + PTT_UNKNOWN, + PTT_GENERIC, + PTT_P5, + PTT_P6, + PTT_P4, + PTT_AMD, + PTT_WINCHIP, + PTT_VC3, +}; + +extern enum perfctr_x86_tests_type perfctr_x86_tests_type; + +static inline void perfctr_set_tests_type(enum perfctr_x86_tests_type t) +{ +#ifdef CONFIG_PERFCTR_INIT_TESTS + perfctr_x86_tests_type = t; +#endif +} + +extern void perfctr_x86_init_tests(void); Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_setup.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_setup.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_setup.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,40 @@ +/* $Id: ppc_setup.c,v 1.1 2004/01/12 01:59:11 mikpe Exp $ + * Performance-monitoring counters driver. + * PPC32-specific kernel-resident code. + * + * Copyright (C) 2004 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include "ppc_compat.h" +#include "compat.h" + +#if PERFCTR_INTERRUPT_SUPPORT +static void perfctr_default_ihandler(unsigned long pc) +{ +} + +static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; + +void do_perfctr_interrupt(struct pt_regs *regs) +{ + preempt_disable(); + (*perfctr_ihandler)(regs->nip); + preempt_enable_no_resched(); +} + +void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) +{ + perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; +} + +#ifdef CONFIG_PERFCTR_MODULE +EXPORT_SYMBOL(perfctr_cpu_set_ihandler); +#endif /* MODULE */ +#endif /* PERFCTR_INTERRUPT_SUPPORT */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/ppc_tests.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/ppc_tests.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,12 @@ +/* $Id: ppc_tests.h,v 1.1.2.1 2004/06/21 22:33:35 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional PPC32-specific init-time tests. + * + * Copyright (C) 2004 Mikael Pettersson + */ + +#ifdef CONFIG_PERFCTR_INIT_TESTS +extern void perfctr_ppc_init_tests(int have_mmcr1); +#else +static inline void perfctr_ppc_init_tests(int have_mmcr1) { } +#endif Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/version.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/version.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/version.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1 @@ +#define VERSION "2.6.10.2" Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_tests.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_tests.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,174 @@ +/* $Id: x86_64_tests.c,v 1.3 2004/02/21 11:04:46 mikpe Exp $ + * Performance-monitoring counters driver. + * Optional x86_64-specific init-time tests. + * + * Copyright (C) 2003-2004 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include +#include +#include +#include +#include "x86_64_compat.h" +#include "x86_64_tests.h" + +#define MSR_K8_EVNTSEL0 0xC0010000 +#define MSR_K8_PERFCTR0 0xC0010004 +#define K8_EVNTSEL0_VAL (0xC0 | (3<<16) | (1<<22)) + +#define NITER 64 +#define X2(S) S";"S +#define X8(S) X2(X2(X2(S))) + +static void __init do_rdpmc(unsigned pmc, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdpmc") : : "c"(pmc) : "eax", "edx"); +} + +static void __init do_rdmsr(unsigned msr, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdmsr") : : "c"(msr) : "eax", "edx"); +} + +static void __init do_wrmsr(unsigned msr, unsigned data) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("wrmsr") : : "c"(msr), "a"(data), "d"(0)); +} + +static void __init do_rdcr4(unsigned unused1, unsigned unused2) +{ + unsigned i; + unsigned long dummy; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("movq %%cr4,%0") : "=r"(dummy)); +} + +static void __init do_wrcr4(unsigned cr4, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("movq %0,%%cr4") : : "r"((long)cr4)); +} + +static void __init do_rdtsc(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__(X8("rdtsc") : : : "eax", "edx"); +} + +static void __init do_wrlvtpc(unsigned val, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) { + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + apic_write(APIC_LVTPC, val); + } +} + +static void __init do_empty_loop(unsigned unused1, unsigned unused2) +{ + unsigned i; + for(i = 0; i < NITER/8; ++i) + __asm__ __volatile__("" : : "c"(0)); +} + +static unsigned __init run(void (*doit)(unsigned, unsigned), + unsigned arg1, unsigned arg2) +{ + unsigned start, dummy, stop; + rdtsc(start, dummy); + (*doit)(arg1, arg2); /* should take < 2^32 cycles to complete */ + rdtsc(stop, dummy); + return stop - start; +} + +static void __init init_tests_message(void) +{ + printk(KERN_INFO "Please email the following PERFCTR INIT lines " + "to mikpe@csd.uu.se\n" + KERN_INFO "To remove this message, rebuild the driver " + "with CONFIG_PERFCTR_INIT_TESTS=n\n"); + printk(KERN_INFO "PERFCTR INIT: vendor %u, family %u, model %u, stepping %u, clock %u kHz\n", + current_cpu_data.x86_vendor, + current_cpu_data.x86, + current_cpu_data.x86_model, + current_cpu_data.x86_mask, + perfctr_cpu_khz()); +} + +static void __init +measure_overheads(unsigned msr_evntsel0, unsigned evntsel0, unsigned msr_perfctr0) +{ + int i; + unsigned int loop, ticks[9]; + const char *name[9]; + + if( msr_evntsel0 ) + wrmsr(msr_evntsel0, 0, 0); + + name[0] = "rdtsc"; + ticks[0] = run(do_rdtsc, 0, 0); + name[1] = "rdpmc"; + ticks[1] = (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + ? run(do_rdpmc,1,0) : 0; + name[2] = "rdmsr (counter)"; + ticks[2] = msr_perfctr0 ? run(do_rdmsr, msr_perfctr0, 0) : 0; + name[3] = "rdmsr (evntsel)"; + ticks[3] = msr_evntsel0 ? run(do_rdmsr, msr_evntsel0, 0) : 0; + name[4] = "wrmsr (counter)"; + ticks[4] = msr_perfctr0 ? run(do_wrmsr, msr_perfctr0, 0) : 0; + name[5] = "wrmsr (evntsel)"; + ticks[5] = msr_evntsel0 ? run(do_wrmsr, msr_evntsel0, evntsel0) : 0; + name[6] = "read cr4"; + ticks[6] = run(do_rdcr4, 0, 0); + name[7] = "write cr4"; + ticks[7] = run(do_wrcr4, read_cr4(), 0); + name[8] = "write LVTPC"; + ticks[8] = (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + ? run(do_wrlvtpc, APIC_DM_NMI|APIC_LVT_MASKED, 0) : 0; + + loop = run(do_empty_loop, 0, 0); + + if( msr_evntsel0 ) + wrmsr(msr_evntsel0, 0, 0); + + init_tests_message(); + printk(KERN_INFO "PERFCTR INIT: NITER == %u\n", NITER); + printk(KERN_INFO "PERFCTR INIT: loop overhead is %u cycles\n", loop); + for(i = 0; i < ARRAY_SIZE(ticks); ++i) { + unsigned int x; + if( !ticks[i] ) + continue; + x = ((ticks[i] - loop) * 10) / NITER; + printk(KERN_INFO "PERFCTR INIT: %s cost is %u.%u cycles (%u total)\n", + name[i], x/10, x%10, ticks[i]); + } +} + +void __init perfctr_k8_init_tests(void) +{ + measure_overheads(MSR_K8_EVNTSEL0, K8_EVNTSEL0_VAL, MSR_K8_PERFCTR0); +} + +void __init perfctr_generic_init_tests(void) +{ + measure_overheads(0, 0, 0); +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_setup.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64_setup.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64_setup.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,63 @@ +/* $Id: x86_64_setup.c,v 1.9 2004/02/21 11:56:53 mikpe Exp $ + * Performance-monitoring counters driver. + * x86_86-specific kernel-resident code. + * + * Copyright (C) 2003-2004 Mikael Pettersson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "x86_64_compat.h" +#include "compat.h" + +static void perfctr_default_ihandler(unsigned long pc) +{ +} + +static perfctr_ihandler_t perfctr_ihandler = perfctr_default_ihandler; + +asmlinkage void smp_perfctr_interrupt(struct pt_regs *regs) +{ + /* PREEMPT note: invoked via an interrupt gate, which + masks interrupts. We're still on the originating CPU. */ + ack_APIC_irq(); + irq_enter(); + (*perfctr_ihandler)(regs->rip); + irq_exit(); +} + +void perfctr_cpu_set_ihandler(perfctr_ihandler_t ihandler) +{ + perfctr_ihandler = ihandler ? ihandler : perfctr_default_ihandler; +} + +extern unsigned int cpu_khz; + +/* Wrapper to avoid namespace clash in RedHat 8.0's 2.4.18-14 kernel. */ +unsigned int perfctr_cpu_khz(void) +{ + return cpu_khz; +} + +#ifdef CONFIG_PERFCTR_MODULE +EXPORT_SYMBOL_mmu_cr4_features; +EXPORT_SYMBOL(perfctr_cpu_khz); + +EXPORT_SYMBOL(nmi_perfctr_msr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) && defined(CONFIG_PM) +EXPORT_SYMBOL(apic_pm_register); +EXPORT_SYMBOL(apic_pm_unregister); +EXPORT_SYMBOL(nmi_pmdev); +#endif + +EXPORT_SYMBOL(perfctr_cpu_set_ihandler); + +#endif /* MODULE */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86_64.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86_64.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,776 @@ +/* $Id: x86_64.c,v 1.22.2.1 2004/05/29 22:25:22 mikpe Exp $ + * x86_64 performance-monitoring counters driver. + * + * Copyright (C) 2003-2004 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include + +#include +#include +#include +struct hw_interrupt_type; +#include + +#include "compat.h" +#include "x86_compat.h" +#include "x86_tests.h" + +/* Support for lazy evntsel and perfctr MSR updates. */ +struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ + union { + unsigned int id; /* cache owner id */ + } k1; + struct { + /* NOTE: these caches have physical indices, not virtual */ + unsigned int evntsel[4]; + } control; +} ____cacheline_aligned; +static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; + +/* Structure for counter snapshots, as 32-bit values. */ +struct perfctr_low_ctrs { + unsigned int tsc; + unsigned int pmc[4]; +}; + +/* AMD K8 */ +#define MSR_K8_EVNTSEL0 0xC0010000 /* .. 0xC0010003 */ +#define MSR_K8_PERFCTR0 0xC0010004 /* .. 0xC0010007 */ +#define K8_EVNTSEL_ENABLE 0x00400000 +#define K8_EVNTSEL_INT 0x00100000 +#define K8_EVNTSEL_CPL 0x00030000 +#define K8_EVNTSEL_RESERVED 0x00280000 + +#define rdpmc_low(ctr,low) \ + __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") + +static void clear_msr_range(unsigned int base, unsigned int n) +{ + unsigned int i; + + for(i = 0; i < n; ++i) + wrmsr(base+i, 0, 0); +} + +static inline void set_in_cr4_local(unsigned int mask) +{ + write_cr4(read_cr4() | mask); +} + +static inline void clear_in_cr4_local(unsigned int mask) +{ + write_cr4(read_cr4() & ~mask); +} + +static unsigned int new_id(void) +{ + static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static unsigned int counter; + int id; + + spin_lock(&lock); + id = ++counter; + spin_unlock(&lock); + return id; +} + +#if defined(CONFIG_SMP) + +static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, + int cpu) +{ + state->k1.isuspend_cpu = cpu; +} + +static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, + int cpu) +{ + return state->k1.isuspend_cpu == cpu; +} + +static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) +{ + state->k1.isuspend_cpu = NR_CPUS; +} + +#else +static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, + int cpu) { } +static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, + int cpu) { return 1; } +static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } +#endif + +/* XXX: disabled: called from switch_to() where printk() is disallowed */ +#if 0 && defined(CONFIG_PERFCTR_DEBUG) +static void debug_evntsel_cache(const struct perfctr_cpu_state *state, + const struct per_cpu_cache *cache) +{ + unsigned int nrctrs, i; + + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int evntsel = state->control.evntsel[i]; + unsigned int pmc = state->control.pmc_map[i]; + if( evntsel != cache->control.evntsel[pmc] ) { + printk(KERN_ERR "perfctr: (pid %d, comm %s) " + "evntsel[%u] is %#x, should be %#x\n", + current->pid, current->comm, + i, cache->control.evntsel[pmc], evntsel); + return; + } + } +} +#else +static inline void debug_evntsel_cache(const struct perfctr_cpu_state *s, + const struct per_cpu_cache *c) +{ } +#endif + +/**************************************************************** + * * + * Driver procedures. * + * * + ****************************************************************/ + +static void perfctr_cpu_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) + rdtscl(ctrs->tsc); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc = state->pmc[i].map; + rdpmc_low(pmc, ctrs->pmc[i]); + } +} + +static int k8_check_control(struct perfctr_cpu_state *state) +{ + unsigned int evntsel, i, nractrs, nrctrs, pmc_mask, pmc; + + nractrs = state->control.nractrs; + nrctrs = nractrs + state->control.nrictrs; + if( nrctrs < nractrs || nrctrs > 4 ) + return -EINVAL; + + pmc_mask = 0; + for(i = 0; i < nrctrs; ++i) { + pmc = state->control.pmc_map[i]; + state->pmc[i].map = pmc; + if( pmc >= 4 || (pmc_mask & (1<control.evntsel[i]; + /* protect reserved bits */ + if( evntsel & K8_EVNTSEL_RESERVED ) + return -EPERM; + /* ENable bit must be set in each evntsel */ + if( !(evntsel & K8_EVNTSEL_ENABLE) ) + return -EINVAL; + /* the CPL field must be non-zero */ + if( !(evntsel & K8_EVNTSEL_CPL) ) + return -EINVAL; + /* INT bit must be off for a-mode and on for i-mode counters */ + if( evntsel & K8_EVNTSEL_INT ) { + if( i < nractrs ) + return -EINVAL; + } else { + if( i >= nractrs ) + return -EINVAL; + } + } + state->k1.id = new_id(); + return 0; +} + +static void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int cstatus, nrctrs, i; + int cpu; + + cpu = smp_processor_id(); + cache = &per_cpu_cache[cpu]; + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { + unsigned int pmc, now; + pmc = state->pmc[i].map; + cache->control.evntsel[pmc] = 0; + wrmsr(MSR_K8_EVNTSEL0+pmc, 0, 0); + rdpmc_low(pmc, now); + state->pmc[i].sum += now - state->pmc[i].start; + state->pmc[i].start = now; + } + /* cache->k1.id is still == state->k1.id */ + set_isuspend_cpu(state, cpu); +} + +static void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int cstatus, nrctrs, i; + int cpu; + + cpu = smp_processor_id(); + cache = &per_cpu_cache[cpu]; + if( cache->k1.id == state->k1.id ) { + cache->k1.id = 0; /* force reload of cleared EVNTSELs */ + if( is_isuspend_cpu(state, cpu) ) + return; /* skip reload of PERFCTRs */ + } + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { + unsigned int pmc = state->pmc[i].map; + /* If the control wasn't ours we must disable the evntsels + before reinitialising the counters, to prevent unexpected + counter increments and missed overflow interrupts. */ + if( cache->control.evntsel[pmc] ) { + cache->control.evntsel[pmc] = 0; + wrmsr(MSR_K8_EVNTSEL0+pmc, 0, 0); + } + wrmsr(MSR_K8_PERFCTR0+pmc, state->pmc[i].start, -1); + } + /* cache->k1.id remains != state->k1.id */ +} + +static void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int nrctrs, i; + + cache = &per_cpu_cache[smp_processor_id()]; + if( cache->k1.id == state->k1.id ) { + debug_evntsel_cache(state, cache); + return; + } + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int evntsel = state->control.evntsel[i]; + unsigned int pmc = state->pmc[i].map; + if( evntsel != cache->control.evntsel[pmc] ) { + cache->control.evntsel[pmc] = evntsel; + wrmsr(MSR_K8_EVNTSEL0+pmc, evntsel, 0); + } + } + cache->k1.id = state->k1.id; +} + +static void k8_clear_counters(void) +{ + clear_msr_range(MSR_K8_EVNTSEL0, 4+4); +} + +/* + * Generic driver for any x86-64 with a working TSC. + * (Mainly for testing with Screwdriver.) + */ + +static int generic_check_control(struct perfctr_cpu_state *state) +{ + if( state->control.nractrs || state->control.nrictrs ) + return -EINVAL; + return 0; +} + +static void generic_clear_counters(void) +{ +} + +/* + * Driver methods, internal and exported. + */ + +/* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to + bypass internal caching and force a reload if the I-mode PMCs. */ +void perfctr_cpu_ireload(struct perfctr_cpu_state *state) +{ +#ifdef CONFIG_SMP + clear_isuspend_cpu(state); +#else + per_cpu_cache[smp_processor_id()].k1.id = 0; +#endif +} + +/* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ +unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) +{ + unsigned int cstatus, nrctrs, pmc, pmc_mask; + + cstatus = state->cstatus; + pmc = perfctr_cstatus_nractrs(cstatus); + nrctrs = perfctr_cstatus_nrctrs(cstatus); + + for(pmc_mask = 0; pmc < nrctrs; ++pmc) { + if( (int)state->pmc[pmc].start >= 0 ) { /* XXX: ">" ? */ + /* XXX: "+=" to correct for overshots */ + state->pmc[pmc].start = state->control.ireset[pmc]; + pmc_mask |= (1 << pmc); + } + } + return pmc_mask; +} + +static inline int check_ireset(const struct perfctr_cpu_state *state) +{ + unsigned int nrctrs, i; + + i = state->control.nractrs; + nrctrs = i + state->control.nrictrs; + for(; i < nrctrs; ++i) + if( state->control.ireset[i] >= 0 ) + return -EINVAL; + return 0; +} + +static inline void setup_imode_start_values(struct perfctr_cpu_state *state) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) + state->pmc[i].start = state->control.ireset[i]; +} + +static inline void debug_no_imode(const struct perfctr_cpu_state *state) +{ +#ifdef CONFIG_PERFCTR_DEBUG + if( perfctr_cstatus_has_ictrs(state->cstatus) ) + printk(KERN_ERR "perfctr: BUG! updating control in" + " perfctr %p on cpu %u while it has cstatus %x" + " (pid %d, comm %s)\n", + state, smp_processor_id(), state->cstatus, + current->pid, current->comm); +#endif +} + +static int (*check_control)(struct perfctr_cpu_state*); +int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) +{ + int err; + + debug_no_imode(state); + clear_isuspend_cpu(state); + state->cstatus = 0; + + /* disallow i-mode counters if we cannot catch the interrupts */ + if( !(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + && state->control.nrictrs ) + return -EPERM; + + err = check_control(state); + if( err < 0 ) + return err; + err = check_ireset(state); + if( err < 0 ) + return err; + state->cstatus = perfctr_mk_cstatus(state->control.tsc_on, + state->control.nractrs, + state->control.nrictrs); + setup_imode_start_values(state); + return 0; +} + +void perfctr_cpu_suspend(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + if( perfctr_cstatus_has_ictrs(state->cstatus) ) + perfctr_cpu_isuspend(state); + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) + state->tsc_sum += now.tsc - state->tsc_start; + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) + state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; +} + +void perfctr_cpu_resume(struct perfctr_cpu_state *state) +{ + if( perfctr_cstatus_has_ictrs(state->cstatus) ) + perfctr_cpu_iresume(state); + perfctr_cpu_write_control(state); + //perfctr_cpu_read_counters(state, &state->start); + { + struct perfctr_low_ctrs now; + unsigned int i, cstatus, nrctrs; + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) + state->tsc_start = now.tsc; + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) + state->pmc[i].start = now.pmc[i]; + } + /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ +} + +void perfctr_cpu_sample(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if( perfctr_cstatus_has_tsc(cstatus) ) { + state->tsc_sum += now.tsc - state->tsc_start; + state->tsc_start = now.tsc; + } + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) { + state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; + state->pmc[i].start = now.pmc[i]; + } +} + +static void (*clear_counters)(void); +static void perfctr_cpu_clear_counters(void) +{ + return clear_counters(); +} + +/**************************************************************** + * * + * Processor detection and initialisation procedures. * + * * + ****************************************************************/ + +static int __init amd_init(void) +{ + static char k8_name[] __initdata = "AMD K8"; + static char k8c_name[] __initdata = "AMD K8C"; + + if( !cpu_has_tsc ) + return -ENODEV; + if( boot_cpu_data.x86 != 15 ) + return -ENODEV; + if( (boot_cpu_data.x86_model > 5) || + (boot_cpu_data.x86_model >= 4 && boot_cpu_data.x86_mask >= 8) ) { + perfctr_info.cpu_type = PERFCTR_X86_AMD_K8C; + perfctr_cpu_name = k8c_name; + } else { + perfctr_info.cpu_type = PERFCTR_X86_AMD_K8; + perfctr_cpu_name = k8_name; + } + check_control = k8_check_control; + clear_counters = k8_clear_counters; + if( cpu_has_apic ) + perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; + return 0; +} + +/* For testing on Screwdriver. */ +static int __init generic_init(void) +{ + static char generic_name[] __initdata = "Generic x86-64 with TSC"; + if( !cpu_has_tsc ) + return -ENODEV; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + perfctr_info.cpu_type = PERFCTR_X86_GENERIC; + perfctr_cpu_name = generic_name; + check_control = generic_check_control; + clear_counters = generic_clear_counters; + return 0; +} + +static void perfctr_cpu_init_one(void *ignore) +{ + /* PREEMPT note: when called via smp_call_function(), + this is in IRQ context with preemption disabled. */ + perfctr_cpu_clear_counters(); + if( cpu_has_apic ) + apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + set_in_cr4_local(X86_CR4_PCE); +} + +static void perfctr_cpu_exit_one(void *ignore) +{ + /* PREEMPT note: when called via smp_call_function(), + this is in IRQ context with preemption disabled. */ + perfctr_cpu_clear_counters(); + if( cpu_has_apic ) + apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + clear_in_cr4_local(X86_CR4_PCE); +} + +#if defined(CONFIG_PM) + +static void perfctr_pm_suspend(void) +{ + /* XXX: clear control registers */ + printk("perfctr: PM suspend\n"); +} + +static void perfctr_pm_resume(void) +{ + /* XXX: reload control registers */ + printk("perfctr: PM resume\n"); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,71) + +#include + +static int perfctr_device_suspend(struct sys_device *dev, u32 state) +{ + perfctr_pm_suspend(); + return 0; +} + +static int perfctr_device_resume(struct sys_device *dev) +{ + perfctr_pm_resume(); + return 0; +} + +static struct sysdev_class perfctr_sysclass = { + set_kset_name("perfctr"), + .resume = perfctr_device_resume, + .suspend = perfctr_device_suspend, +}; + +static struct sys_device device_perfctr = { + .id = 0, + .cls = &perfctr_sysclass, +}; + +static void x86_pm_init(void) +{ + if( sysdev_class_register(&perfctr_sysclass) == 0 ) + sysdev_register(&device_perfctr); +} + +static void x86_pm_exit(void) +{ + sysdev_unregister(&device_perfctr); + sysdev_class_unregister(&perfctr_sysclass); +} + +#else /* 2.4 kernel */ + +static int x86_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) +{ + switch( rqst ) { + case PM_SUSPEND: + perfctr_pm_suspend(); + break; + case PM_RESUME: + perfctr_pm_resume(); + break; + } + return 0; +} + +static struct pm_dev *x86_pmdev; + +static void x86_pm_init(void) +{ + x86_pmdev = apic_pm_register(PM_SYS_DEV, 0, x86_pm_callback); +} + +static void x86_pm_exit(void) +{ + if( x86_pmdev ) { + apic_pm_unregister(x86_pmdev); + x86_pmdev = NULL; + } +} + +#endif /* 2.4 kernel */ + +#else + +static inline void x86_pm_init(void) { } +static inline void x86_pm_exit(void) { } + +#endif /* CONFIG_PM */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) +static void disable_lapic_nmi_watchdog(void) +{ +#ifdef CONFIG_PM + if( nmi_pmdev ) { + apic_pm_unregister(nmi_pmdev); + nmi_pmdev = 0; + } +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) +static int reserve_lapic_nmi(void) +{ + int ret = 0; + if( nmi_perfctr_msr ) { + nmi_perfctr_msr = 0; + disable_lapic_nmi_watchdog(); + ret = 1; + } + return ret; +} + +static inline void release_lapic_nmi(void) { } +#endif + +static void do_init_tests(void) +{ +#ifdef CONFIG_PERFCTR_INIT_TESTS + if( reserve_lapic_nmi() >= 0 ) { + perfctr_x86_init_tests(); + release_lapic_nmi(); + } +#endif +} + +static void invalidate_per_cpu_cache(void) +{ + /* + * per_cpu_cache[] is initialised to contain "impossible" + * evntsel values guaranteed to differ from anything accepted + * by perfctr_cpu_update_control(). This way, initialisation of + * a CPU's evntsel MSRs will happen automatically the first time + * perfctr_cpu_write_control() executes on it. + * All-bits-one works for all currently supported processors. + * The memset also sets the ids to -1, which is intentional. + */ + memset(per_cpu_cache, ~0, sizeof per_cpu_cache); +} + +int __init perfctr_cpu_init(void) +{ + int err = -ENODEV; + + preempt_disable(); + + /* RDPMC and RDTSC are on by default. They will be disabled + by the init procedures if necessary. */ + perfctr_info.cpu_features = PERFCTR_FEATURE_RDPMC | PERFCTR_FEATURE_RDTSC; + + switch( boot_cpu_data.x86_vendor ) { + case X86_VENDOR_AMD: + err = amd_init(); + break; + } + if( err ) { + err = generic_init(); /* last resort */ + if( err ) + goto out; + } + do_init_tests(); +#if 0 + /* + * Put the hardware in a sane state: + * - clear perfctr MSRs + * - set up APIC_LVTPC + * - set CR4.PCE [on permanently due to __flush_tlb_global()] + * - install our default interrupt handler + */ + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + mmu_cr4_features |= X86_CR4_PCE; + perfctr_cpu_init_one(NULL); + smp_call_function(perfctr_cpu_init_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + /* + * Fix up the connection to the local APIC: + * - disable and disconnect the NMI watchdog + * - register our PM callback + */ + disable_nmi_watchdog(); + x86_pm_init(); +#endif + + invalidate_per_cpu_cache(); + + perfctr_info.cpu_khz = perfctr_cpu_khz(); + perfctr_info.tsc_to_cpu_mult = 1; + + out: + preempt_enable(); + return err; +} + +void __exit perfctr_cpu_exit(void) +{ +#if 0 + preempt_disable(); + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + mmu_cr4_features &= ~X86_CR4_PCE; + perfctr_cpu_exit_one(NULL); + smp_call_function(perfctr_cpu_exit_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + x86_pm_exit(); + /* XXX: restart nmi watchdog? */ + preempt_enable(); +#endif +} + +/**************************************************************** + * * + * Hardware reservation. * + * * + ****************************************************************/ + +static DECLARE_MUTEX(mutex); +static const char *current_service = 0; + +const char *perfctr_cpu_reserve(const char *service) +{ + const char *ret; + + down(&mutex); + ret = current_service; + if( ret ) + goto out_up; + ret = "unknown driver (oprofile?)"; + if( reserve_lapic_nmi() < 0 ) + goto out_up; + current_service = service; + __module_get(THIS_MODULE); + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + mmu_cr4_features |= X86_CR4_PCE; + on_each_cpu(perfctr_cpu_init_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + x86_pm_init(); + ret = NULL; + out_up: + up(&mutex); + return ret; +} + +void perfctr_cpu_release(const char *service) +{ + down(&mutex); + if( service != current_service ) { + printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", + __FUNCTION__, service, current_service); + goto out_up; + } + /* power down the counters */ + invalidate_per_cpu_cache(); + if( perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC ) + mmu_cr4_features &= ~X86_CR4_PCE; + on_each_cpu(perfctr_cpu_exit_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + x86_pm_exit(); + current_service = 0; + release_lapic_nmi(); + module_put(THIS_MODULE); + out_up: + up(&mutex); +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/drivers/perfctr/x86.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/drivers/perfctr/x86.c 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,1720 @@ +/* $Id: x86.c,v 1.127.2.13 2004/09/14 17:56:42 mikpe Exp $ + * x86/x86_64 performance-monitoring counters driver. + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#include +#define __NO_VERSION__ +#include +#include +#include +#include +#include + +#include +#undef MSR_P6_PERFCTR0 +#undef MSR_IA32_MISC_ENABLE +#include +#include +struct hw_interrupt_type; +#include + +#include "compat.h" +#include "x86_compat.h" +#include "x86_tests.h" + +/* Support for lazy evntsel and perfctr MSR updates. */ +struct per_cpu_cache { /* roughly a subset of perfctr_cpu_state */ + union { + unsigned int p5_cesr; + unsigned int id; /* cache owner id */ + } k1; + struct { + /* NOTE: these caches have physical indices, not virtual */ + unsigned int evntsel[18]; + unsigned int escr[0x3E2-0x3A0]; + unsigned int pebs_enable; + unsigned int pebs_matrix_vert; + } control; +} ____cacheline_aligned; +static struct per_cpu_cache per_cpu_cache[NR_CPUS] __cacheline_aligned; +#define __get_cpu_cache(cpu) (&per_cpu_cache[cpu]) +#define get_cpu_cache() __get_cpu_cache(smp_processor_id()) + +/* Structure for counter snapshots, as 32-bit values. */ +struct perfctr_low_ctrs { + unsigned int tsc; + unsigned int pmc[18]; +}; + +/* Intel P5, Cyrix 6x86MX/MII/III, Centaur WinChip C6/2/3 */ +#define MSR_P5_CESR 0x11 +#define MSR_P5_CTR0 0x12 /* .. 0x13 */ +#define P5_CESR_CPL 0x00C0 +#define P5_CESR_RESERVED (~0x01FF) +#define MII_CESR_RESERVED (~0x05FF) +#define C6_CESR_RESERVED (~0x00FF) + +/* Intel P6, VIA C3 */ +#define MSR_P6_PERFCTR0 0xC1 /* .. 0xC2 */ +#define MSR_P6_EVNTSEL0 0x186 /* .. 0x187 */ +#define P6_EVNTSEL_ENABLE 0x00400000 +#define P6_EVNTSEL_INT 0x00100000 +#define P6_EVNTSEL_CPL 0x00030000 +#define P6_EVNTSEL_RESERVED 0x00280000 +#define VC3_EVNTSEL1_RESERVED (~0x1FF) + +/* AMD K7 */ +#define MSR_K7_EVNTSEL0 0xC0010000 /* .. 0xC0010003 */ +#define MSR_K7_PERFCTR0 0xC0010004 /* .. 0xC0010007 */ + +/* Intel P4, Intel Pentium M */ +#define MSR_IA32_MISC_ENABLE 0x1A0 +#define MSR_IA32_MISC_ENABLE_PERF_AVAIL (1<<7) /* read-only status bit */ +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1<<12) /* read-only status bit */ + +/* Intel P4 */ +#define MSR_P4_PERFCTR0 0x300 /* .. 0x311 */ +#define MSR_P4_CCCR0 0x360 /* .. 0x371 */ +#define MSR_P4_ESCR0 0x3A0 /* .. 0x3E1, with some gaps */ + +#define MSR_P4_PEBS_ENABLE 0x3F1 +#define P4_PE_REPLAY_TAG_BITS 0x00000607 +#define P4_PE_UOP_TAG 0x01000000 +#define P4_PE_RESERVED 0xFEFFF9F8 /* only allow ReplayTagging */ + +#define MSR_P4_PEBS_MATRIX_VERT 0x3F2 +#define P4_PMV_REPLAY_TAG_BITS 0x00000003 +#define P4_PMV_RESERVED 0xFFFFFFFC + +#define P4_CCCR_OVF 0x80000000 +#define P4_CCCR_CASCADE 0x40000000 +#define P4_CCCR_OVF_PMI_T1 0x08000000 +#define P4_CCCR_OVF_PMI_T0 0x04000000 +#define P4_CCCR_FORCE_OVF 0x02000000 +#define P4_CCCR_ACTIVE_THREAD 0x00030000 +#define P4_CCCR_ENABLE 0x00001000 +#define P4_CCCR_ESCR_SELECT(X) (((X) >> 13) & 0x7) +#define P4_CCCR_EXTENDED_CASCADE 0x00000800 +#define P4_CCCR_RESERVED (0x300007FF|P4_CCCR_OVF|P4_CCCR_OVF_PMI_T1) + +#define P4_ESCR_CPL_T1 0x00000003 +#define P4_ESCR_CPL_T0 0x0000000C +#define P4_ESCR_TAG_ENABLE 0x00000010 +#define P4_ESCR_RESERVED (0x80000000) + +#define P4_FAST_RDPMC 0x80000000 +#define P4_MASK_FAST_RDPMC 0x0000001F /* we only need low 5 bits */ + +#define rdmsr_low(msr,low) \ + __asm__ __volatile__("rdmsr" : "=a"(low) : "c"(msr) : "edx") +#define rdpmc_low(ctr,low) \ + __asm__ __volatile__("rdpmc" : "=a"(low) : "c"(ctr) : "edx") + +static void clear_msr_range(unsigned int base, unsigned int n) +{ + unsigned int i; + + for(i = 0; i < n; ++i) + wrmsr(base+i, 0, 0); +} + +static inline void set_in_cr4_local(unsigned int mask) +{ + write_cr4(read_cr4() | mask); +} + +static inline void clear_in_cr4_local(unsigned int mask) +{ + write_cr4(read_cr4() & ~mask); +} + +static unsigned int new_id(void) +{ + static spinlock_t lock = SPIN_LOCK_UNLOCKED; + static unsigned int counter; + int id; + + spin_lock(&lock); + id = ++counter; + spin_unlock(&lock); + return id; +} + +#if !defined(CONFIG_X86_LOCAL_APIC) +#define perfctr_cstatus_has_ictrs(cstatus) 0 +#undef cpu_has_apic +#define cpu_has_apic 0 +#undef apic_write +#define apic_write(reg,vector) do{}while(0) +#endif + +#if defined(CONFIG_SMP) + +static inline void +set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) +{ + state->k1.isuspend_cpu = cpu; +} + +static inline int +is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) +{ + return state->k1.isuspend_cpu == cpu; +} + +static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) +{ + state->k1.isuspend_cpu = NR_CPUS; +} + +#else +static inline void set_isuspend_cpu(struct perfctr_cpu_state *state, int cpu) { } +static inline int is_isuspend_cpu(const struct perfctr_cpu_state *state, int cpu) { return 1; } +static inline void clear_isuspend_cpu(struct perfctr_cpu_state *state) { } +#endif + +/**************************************************************** + * * + * Driver procedures. * + * * + ****************************************************************/ + +/* + * Intel P5 family (Pentium, family code 5). + * - One TSC and two 40-bit PMCs. + * - A single 32-bit CESR (MSR 0x11) controls both PMCs. + * CESR has two halves, each controlling one PMC. + * To keep the API reasonably clean, the user puts 16 bits of + * control data in each counter's evntsel; the driver combines + * these to a single 32-bit CESR value. + * - Overflow interrupts are not available. + * - Pentium MMX added the RDPMC instruction. RDPMC has lower + * overhead than RDMSR and it can be used in user-mode code. + * - The MMX events are not symmetric: some events are only available + * for some PMC, and some event codes denote different events + * depending on which PMCs they control. + */ + +/* shared with MII and C6 */ +static int p5_like_check_control(struct perfctr_cpu_state *state, + unsigned int reserved_bits, int is_c6) +{ + unsigned short cesr_half[2]; + unsigned int pmc, evntsel, i; + + if (state->control.nrictrs != 0 || state->control.nractrs > 2) + return -EINVAL; + cesr_half[0] = 0; + cesr_half[1] = 0; + for(i = 0; i < state->control.nractrs; ++i) { + pmc = state->control.pmc_map[i]; + state->pmc[i].map = pmc; + if (pmc > 1 || cesr_half[pmc] != 0) + return -EINVAL; + evntsel = state->control.evntsel[i]; + /* protect reserved bits */ + if ((evntsel & reserved_bits) != 0) + return -EPERM; + /* the CPL field (if defined) must be non-zero */ + if (!is_c6 && !(evntsel & P5_CESR_CPL)) + return -EINVAL; + cesr_half[pmc] = evntsel; + } + state->k1.id = (cesr_half[1] << 16) | cesr_half[0]; + return 0; +} + +static int p5_check_control(struct perfctr_cpu_state *state, int is_global) +{ + return p5_like_check_control(state, P5_CESR_RESERVED, 0); +} + +/* shared with MII but not C6 */ +static void p5_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int cesr; + + cesr = state->k1.id; + if (!cesr) /* no PMC is on (this test doesn't work on C6) */ + return; + cache = get_cpu_cache(); + if (cache->k1.p5_cesr != cesr) { + cache->k1.p5_cesr = cesr; + wrmsr(MSR_P5_CESR, cesr, 0); + } +} + +static void p5_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + /* The P5 doesn't allocate a cache line on a write miss, so do + a dummy read to avoid a write miss here _and_ a read miss + later in our caller. */ + asm("" : : "r"(ctrs->tsc)); + + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + rdtscl(ctrs->tsc); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc = state->pmc[i].map; + rdmsr_low(MSR_P5_CTR0+pmc, ctrs->pmc[i]); + } +} + +/* used by all except pre-MMX P5 */ +static void rdpmc_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + rdtscl(ctrs->tsc); + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc = state->pmc[i].map; + rdpmc_low(pmc, ctrs->pmc[i]); + } +} + +/* shared with MII and C6 */ +static void p5_clear_counters(void) +{ + clear_msr_range(MSR_P5_CESR, 1+2); +} + +/* + * Cyrix 6x86/MII/III. + * - Same MSR assignments as P5 MMX. Has RDPMC and two 48-bit PMCs. + * - Event codes and CESR formatting as in the plain P5 subset. + * - Many but not all P5 MMX event codes are implemented. + * - Cyrix adds a few more event codes. The event code is widened + * to 7 bits, and Cyrix puts the high bit in CESR bit 10 + * (and CESR bit 26 for PMC1). + */ + +static int mii_check_control(struct perfctr_cpu_state *state, int is_global) +{ + return p5_like_check_control(state, MII_CESR_RESERVED, 0); +} + +/* + * Centaur WinChip C6/2/3. + * - Same MSR assignments as P5 MMX. Has RDPMC and two 40-bit PMCs. + * - CESR is formatted with two halves, like P5. However, there + * are no defined control fields for e.g. CPL selection, and + * there is no defined method for stopping the counters. + * - Only a few event codes are defined. + * - The 64-bit TSC is synthesised from the low 32 bits of the + * two PMCs, and CESR has to be set up appropriately. + * Reprogramming CESR causes RDTSC to yield invalid results. + * (The C6 may also hang in this case, due to C6 erratum I-13.) + * Therefore, using the PMCs on any of these processors requires + * that the TSC is not accessed at all: + * 1. The kernel must be configured or a TSC-less processor, i.e. + * generic 586 or less. + * 2. The "notsc" boot parameter must be passed to the kernel. + * 3. User-space libraries and code must also be configured and + * compiled for a generic 586 or less. + */ + +#if !defined(CONFIG_X86_TSC) +static int c6_check_control(struct perfctr_cpu_state *state, int is_global) +{ + if (state->control.tsc_on) + return -EINVAL; + return p5_like_check_control(state, C6_CESR_RESERVED, 1); +} + +static void c6_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int cesr; + + if (perfctr_cstatus_nractrs(state->cstatus) == 0) /* no PMC is on */ + return; + cache = get_cpu_cache(); + cesr = state->k1.id; + if (cache->k1.p5_cesr != cesr) { + cache->k1.p5_cesr = cesr; + wrmsr(MSR_P5_CESR, cesr, 0); + } +} +#endif + +/* + * Intel P6 family (Pentium Pro, Pentium II, and Pentium III cores, + * and Xeon and Celeron versions of Pentium II and III cores). + * - One TSC and two 40-bit PMCs. + * - One 32-bit EVNTSEL MSR for each PMC. + * - EVNTSEL0 contains a global enable/disable bit. + * That bit is reserved in EVNTSEL1. + * - Each EVNTSEL contains a CPL field. + * - Overflow interrupts are possible, but requires that the + * local APIC is available. Some Mobile P6s have no local APIC. + * - The PMCs cannot be initialised with arbitrary values, since + * wrmsr fills the high bits by sign-extending from bit 31. + * - Most events are symmetric, but a few are not. + */ + +/* shared with K7 */ +static int p6_like_check_control(struct perfctr_cpu_state *state, int is_k7) +{ + unsigned int evntsel, i, nractrs, nrctrs, pmc_mask, pmc; + + nractrs = state->control.nractrs; + nrctrs = nractrs + state->control.nrictrs; + if (nrctrs < nractrs || nrctrs > (is_k7 ? 4 : 2)) + return -EINVAL; + + pmc_mask = 0; + for(i = 0; i < nrctrs; ++i) { + pmc = state->control.pmc_map[i]; + state->pmc[i].map = pmc; + if (pmc >= (is_k7 ? 4 : 2) || (pmc_mask & (1<control.evntsel[i]; + /* protect reserved bits */ + if (evntsel & P6_EVNTSEL_RESERVED) + return -EPERM; + /* check ENable bit */ + if (is_k7) { + /* ENable bit must be set in each evntsel */ + if (!(evntsel & P6_EVNTSEL_ENABLE)) + return -EINVAL; + } else { + /* only evntsel[0] has the ENable bit */ + if (evntsel & P6_EVNTSEL_ENABLE) { + if (pmc > 0) + return -EPERM; + } else { + if (pmc == 0) + return -EINVAL; + } + } + /* the CPL field must be non-zero */ + if (!(evntsel & P6_EVNTSEL_CPL)) + return -EINVAL; + /* INT bit must be off for a-mode and on for i-mode counters */ + if (evntsel & P6_EVNTSEL_INT) { + if (i < nractrs) + return -EINVAL; + } else { + if (i >= nractrs) + return -EINVAL; + } + } + state->k1.id = new_id(); + return 0; +} + +static int p6_check_control(struct perfctr_cpu_state *state, int is_global) +{ + return p6_like_check_control(state, 0); +} + +#ifdef CONFIG_X86_LOCAL_APIC +/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ +/* shared with K7 and P4 */ +static void p6_like_isuspend(struct perfctr_cpu_state *state, + unsigned int msr_evntsel0) +{ + struct per_cpu_cache *cache; + unsigned int cstatus, nrctrs, i; + int cpu; + + cpu = smp_processor_id(); + set_isuspend_cpu(state, cpu); /* early to limit cpu's live range */ + cache = __get_cpu_cache(cpu); + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { + unsigned int pmc_raw, pmc_idx, now; + pmc_raw = state->pmc[i].map; + /* Note: P4_MASK_FAST_RDPMC is a no-op for P6 and K7. + We don't need to make it into a parameter. */ + pmc_idx = pmc_raw & P4_MASK_FAST_RDPMC; + cache->control.evntsel[pmc_idx] = 0; + /* On P4 this intensionally also clears the CCCR.OVF flag. */ + wrmsr(msr_evntsel0+pmc_idx, 0, 0); + /* P4 erratum N17 does not apply since we read only low 32 bits. */ + rdpmc_low(pmc_raw, now); + state->pmc[i].sum += now - state->pmc[i].start; + state->pmc[i].start = now; + } + /* cache->k1.id is still == state->k1.id */ +} + +/* PRE: perfctr_cstatus_has_ictrs(state->cstatus) != 0 */ +/* shared with K7 and P4 */ +static void p6_like_iresume(const struct perfctr_cpu_state *state, + unsigned int msr_evntsel0, + unsigned int msr_perfctr0) +{ + struct per_cpu_cache *cache; + unsigned int cstatus, nrctrs, i; + int cpu; + + cpu = smp_processor_id(); + cache = __get_cpu_cache(cpu); + if (cache->k1.id == state->k1.id) { + cache->k1.id = 0; /* force reload of cleared EVNTSELs */ + if (is_isuspend_cpu(state, cpu)) + return; /* skip reload of PERFCTRs */ + } + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) { + /* Note: P4_MASK_FAST_RDPMC is a no-op for P6 and K7. + We don't need to make it into a parameter. */ + unsigned int pmc = state->pmc[i].map & P4_MASK_FAST_RDPMC; + /* If the control wasn't ours we must disable the evntsels + before reinitialising the counters, to prevent unexpected + counter increments and missed overflow interrupts. */ + if (cache->control.evntsel[pmc]) { + cache->control.evntsel[pmc] = 0; + wrmsr(msr_evntsel0+pmc, 0, 0); + } + /* P4 erratum N15 does not apply since the CCCR is disabled. */ + wrmsr(msr_perfctr0+pmc, state->pmc[i].start, -1); + } + /* cache->k1.id remains != state->k1.id */ +} + +static void p6_isuspend(struct perfctr_cpu_state *state) +{ + p6_like_isuspend(state, MSR_P6_EVNTSEL0); +} + +static void p6_iresume(const struct perfctr_cpu_state *state) +{ + p6_like_iresume(state, MSR_P6_EVNTSEL0, MSR_P6_PERFCTR0); +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +/* shared with K7 and VC3 */ +static void p6_like_write_control(const struct perfctr_cpu_state *state, + unsigned int msr_evntsel0) +{ + struct per_cpu_cache *cache; + unsigned int nrctrs, i; + + cache = get_cpu_cache(); + if (cache->k1.id == state->k1.id) + return; + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int evntsel = state->control.evntsel[i]; + unsigned int pmc = state->pmc[i].map; + if (evntsel != cache->control.evntsel[pmc]) { + cache->control.evntsel[pmc] = evntsel; + wrmsr(msr_evntsel0+pmc, evntsel, 0); + } + } + cache->k1.id = state->k1.id; +} + +/* shared with VC3, Generic*/ +static void p6_write_control(const struct perfctr_cpu_state *state) +{ + p6_like_write_control(state, MSR_P6_EVNTSEL0); +} + +static void p6_clear_counters(void) +{ + clear_msr_range(MSR_P6_EVNTSEL0, 2); + clear_msr_range(MSR_P6_PERFCTR0, 2); +} + +/* + * AMD K7 family (Athlon, Duron). + * - Somewhat similar to the Intel P6 family. + * - Four 48-bit PMCs. + * - Four 32-bit EVNTSEL MSRs with similar layout as in P6. + * - Completely different MSR assignments :-( + * - Fewer countable events defined :-( + * - The events appear to be completely symmetric. + * - The EVNTSEL MSRs are symmetric since each has its own enable bit. + * - Publicly available documentation is incomplete. + * - K7 model 1 does not have a local APIC. AMD Document #22007 + * Revision J hints that it may use debug interrupts instead. + * + * The K8 has the same hardware layout as the K7. It also has + * better documentation and a different set of available events. + */ + +static int k7_check_control(struct perfctr_cpu_state *state, int is_global) +{ + return p6_like_check_control(state, 1); +} + +#ifdef CONFIG_X86_LOCAL_APIC +static void k7_isuspend(struct perfctr_cpu_state *state) +{ + p6_like_isuspend(state, MSR_K7_EVNTSEL0); +} + +static void k7_iresume(const struct perfctr_cpu_state *state) +{ + p6_like_iresume(state, MSR_K7_EVNTSEL0, MSR_K7_PERFCTR0); +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +static void k7_write_control(const struct perfctr_cpu_state *state) +{ + p6_like_write_control(state, MSR_K7_EVNTSEL0); +} + +static void k7_clear_counters(void) +{ + clear_msr_range(MSR_K7_EVNTSEL0, 4+4); +} + +/* + * VIA C3 family. + * - A Centaur design somewhat similar to the P6/Celeron. + * - PERFCTR0 is an alias for the TSC, and EVNTSEL0 is read-only. + * - PERFCTR1 is 32 bits wide. + * - EVNTSEL1 has no defined control fields, and there is no + * defined method for stopping the counter. + * - According to testing, the reserved fields in EVNTSEL1 have + * no function. We always fill them with zeroes. + * - Only a few event codes are defined. + * - No local APIC or interrupt-mode support. + * - pmc_map[0] must be 1, if nractrs == 1. + */ +static int vc3_check_control(struct perfctr_cpu_state *state, int is_global) +{ + if (state->control.nrictrs || state->control.nractrs > 1) + return -EINVAL; + if (state->control.nractrs == 1) { + if (state->control.pmc_map[0] != 1) + return -EINVAL; + state->pmc[0].map = 1; + if (state->control.evntsel[0] & VC3_EVNTSEL1_RESERVED) + return -EPERM; + state->k1.id = state->control.evntsel[0]; + } else + state->k1.id = 0; + return 0; +} + +static void vc3_clear_counters(void) +{ + /* Not documented, but seems to be default after boot. */ + wrmsr(MSR_P6_EVNTSEL0+1, 0x00070079, 0); +} + +/* + * Intel Pentium 4. + * Current implementation restrictions: + * - No DS/PEBS support. + * + * Known quirks: + * - OVF_PMI+FORCE_OVF counters must have an ireset value of -1. + * This allows the regular overflow check to also handle FORCE_OVF + * counters. Not having this restriction would lead to MAJOR + * complications in the driver's "detect overflow counters" code. + * There is no loss of functionality since the ireset value doesn't + * affect the counter's PMI rate for FORCE_OVF counters. + * - In experiments with FORCE_OVF counters, and regular OVF_PMI + * counters with small ireset values between -8 and -1, it appears + * that the faulting instruction is subjected to a new PMI before + * it can complete, ad infinitum. This occurs even though the driver + * clears the CCCR (and in testing also the ESCR) and invokes a + * user-space signal handler before restoring the CCCR and resuming + * the instruction. + */ + +/* + * Table 15-4 in the IA32 Volume 3 manual contains a 18x8 entry mapping + * from counter/CCCR number (0-17) and ESCR SELECT value (0-7) to the + * actual ESCR MSR number. This mapping contains some repeated patterns, + * so we can compact it to a 4x8 table of MSR offsets: + * + * 1. CCCRs 16 and 17 are mapped just like CCCRs 13 and 14, respectively. + * Thus, we only consider the 16 CCCRs 0-15. + * 2. The CCCRs are organised in pairs, and both CCCRs in a pair use the + * same mapping. Thus, we only consider the 8 pairs 0-7. + * 3. In each pair of pairs, the second odd-numbered pair has the same domain + * as the first even-numbered pair, and the range is 1+ the range of the + * the first even-numbered pair. For example, CCCR(0) and (1) map ESCR + * SELECT(7) to 0x3A0, and CCCR(2) and (3) map it to 0x3A1. + * The only exception is that pair (7) [CCCRs 14 and 15] does not have + * ESCR SELECT(3) in its domain, like pair (6) [CCCRs 12 and 13] has. + * NOTE: Revisions of IA32 Volume 3 older than #245472-007 had an error + * in this table: CCCRs 12, 13, and 16 had their mappings for ESCR SELECT + * values 2 and 3 swapped. + * 4. All MSR numbers are on the form 0x3??. Instead of storing these as + * 16-bit numbers, the table only stores the 8-bit offsets from 0x300. + */ + +static const unsigned char p4_cccr_escr_map[4][8] = { + /* 0x00 and 0x01 as is, 0x02 and 0x03 are +1 */ + [0x00/4] { [7] 0xA0, + [6] 0xA2, + [2] 0xAA, + [4] 0xAC, + [0] 0xB2, + [1] 0xB4, + [3] 0xB6, + [5] 0xC8, }, + /* 0x04 and 0x05 as is, 0x06 and 0x07 are +1 */ + [0x04/4] { [0] 0xC0, + [2] 0xC2, + [1] 0xC4, }, + /* 0x08 and 0x09 as is, 0x0A and 0x0B are +1 */ + [0x08/4] { [1] 0xA4, + [0] 0xA6, + [5] 0xA8, + [2] 0xAE, + [3] 0xB0, }, + /* 0x0C, 0x0D, and 0x10 as is, + 0x0E, 0x0F, and 0x11 are +1 except [3] is not in the domain */ + [0x0C/4] { [4] 0xB8, + [5] 0xCC, + [6] 0xE0, + [0] 0xBA, + [2] 0xBC, + [3] 0xBE, + [1] 0xCA, }, +}; + +static unsigned int p4_escr_addr(unsigned int pmc, unsigned int cccr_val) +{ + unsigned int escr_select, pair, escr_offset; + + escr_select = P4_CCCR_ESCR_SELECT(cccr_val); + if (pmc > 0x11) + return 0; /* pmc range error */ + if (pmc > 0x0F) + pmc -= 3; /* 0 <= pmc <= 0x0F */ + pair = pmc / 2; /* 0 <= pair <= 7 */ + escr_offset = p4_cccr_escr_map[pair / 2][escr_select]; + if (!escr_offset || (pair == 7 && escr_select == 3)) + return 0; /* ESCR SELECT range error */ + return escr_offset + (pair & 1) + 0x300; +}; + +static int p4_IQ_ESCR_ok; /* only models <= 2 can use IQ_ESCR{0,1} */ +static int p4_is_ht; /* affects several CCCR & ESCR fields */ +static int p4_extended_cascade_ok; /* only models >= 2 can use extended cascading */ + +static int p4_check_control(struct perfctr_cpu_state *state, int is_global) +{ + unsigned int i, nractrs, nrctrs, pmc_mask; + + nractrs = state->control.nractrs; + nrctrs = nractrs + state->control.nrictrs; + if (nrctrs < nractrs || nrctrs > 18) + return -EINVAL; + + pmc_mask = 0; + for(i = 0; i < nrctrs; ++i) { + unsigned int pmc, cccr_val, escr_val, escr_addr; + /* check that pmc_map[] is well-defined; + pmc_map[i] is what we pass to RDPMC, the PMC itself + is extracted by masking off the FAST_RDPMC flag */ + pmc = state->control.pmc_map[i] & ~P4_FAST_RDPMC; + state->pmc[i].map = state->control.pmc_map[i]; + if (pmc >= 18 || (pmc_mask & (1<control.evntsel[i]; + if (cccr_val & P4_CCCR_RESERVED) + return -EPERM; + if (cccr_val & P4_CCCR_EXTENDED_CASCADE) { + if (!p4_extended_cascade_ok) + return -EPERM; + if (!(pmc == 12 || pmc >= 15)) + return -EPERM; + } + if ((cccr_val & P4_CCCR_ACTIVE_THREAD) != P4_CCCR_ACTIVE_THREAD && !p4_is_ht) + return -EINVAL; + if (!(cccr_val & (P4_CCCR_ENABLE | P4_CCCR_CASCADE | P4_CCCR_EXTENDED_CASCADE))) + return -EINVAL; + if (cccr_val & P4_CCCR_OVF_PMI_T0) { + if (i < nractrs) + return -EINVAL; + if ((cccr_val & P4_CCCR_FORCE_OVF) && + state->control.ireset[i] != -1) + return -EINVAL; + } else { + if (i >= nractrs) + return -EINVAL; + } + /* check ESCR contents */ + escr_val = state->control.p4.escr[i]; + if (escr_val & P4_ESCR_RESERVED) + return -EPERM; + if ((escr_val & P4_ESCR_CPL_T1) && (!p4_is_ht || !is_global)) + return -EINVAL; + /* compute and cache ESCR address */ + escr_addr = p4_escr_addr(pmc, cccr_val); + if (!escr_addr) + return -EINVAL; /* ESCR SELECT range error */ + /* IQ_ESCR0 and IQ_ESCR1 only exist in models <= 2 */ + if ((escr_addr & ~0x001) == 0x3BA && !p4_IQ_ESCR_ok) + return -EINVAL; + /* XXX: Two counters could map to the same ESCR. Should we + check that they use the same ESCR value? */ + state->p4_escr_map[i] = escr_addr - MSR_P4_ESCR0; + } + /* check ReplayTagging control (PEBS_ENABLE and PEBS_MATRIX_VERT) */ + if (state->control.p4.pebs_enable) { + if (!nrctrs) + return -EPERM; + if (state->control.p4.pebs_enable & P4_PE_RESERVED) + return -EPERM; + if (!(state->control.p4.pebs_enable & P4_PE_UOP_TAG)) + return -EINVAL; + if (!(state->control.p4.pebs_enable & P4_PE_REPLAY_TAG_BITS)) + return -EINVAL; + if (state->control.p4.pebs_matrix_vert & P4_PMV_RESERVED) + return -EPERM; + if (!(state->control.p4.pebs_matrix_vert & P4_PMV_REPLAY_TAG_BITS)) + return -EINVAL; + } else if (state->control.p4.pebs_matrix_vert) + return -EPERM; + state->k1.id = new_id(); + return 0; +} + +#ifdef CONFIG_X86_LOCAL_APIC +static void p4_isuspend(struct perfctr_cpu_state *state) +{ + return p6_like_isuspend(state, MSR_P4_CCCR0); +} + +static void p4_iresume(const struct perfctr_cpu_state *state) +{ + return p6_like_iresume(state, MSR_P4_CCCR0, MSR_P4_PERFCTR0); +} +#endif /* CONFIG_X86_LOCAL_APIC */ + +static void p4_write_control(const struct perfctr_cpu_state *state) +{ + struct per_cpu_cache *cache; + unsigned int nrctrs, i; + + /* XXX: temporary debug check */ + if (cpu_isset(smp_processor_id(), perfctr_cpus_forbidden_mask) && + perfctr_cstatus_nrctrs(state->cstatus)) + printk(KERN_ERR "%s: BUG! CPU %u is in the forbidden set\n", + __FUNCTION__, smp_processor_id()); + cache = get_cpu_cache(); + if (cache->k1.id == state->k1.id) + return; + nrctrs = perfctr_cstatus_nrctrs(state->cstatus); + for(i = 0; i < nrctrs; ++i) { + unsigned int escr_val, escr_off, cccr_val, pmc; + escr_val = state->control.p4.escr[i]; + escr_off = state->p4_escr_map[i]; + if (escr_val != cache->control.escr[escr_off]) { + cache->control.escr[escr_off] = escr_val; + wrmsr(MSR_P4_ESCR0+escr_off, escr_val, 0); + } + cccr_val = state->control.evntsel[i]; + pmc = state->pmc[i].map & P4_MASK_FAST_RDPMC; + if (cccr_val != cache->control.evntsel[pmc]) { + cache->control.evntsel[pmc] = cccr_val; + wrmsr(MSR_P4_CCCR0+pmc, cccr_val, 0); + } + } + if (state->control.p4.pebs_enable != cache->control.pebs_enable) { + cache->control.pebs_enable = state->control.p4.pebs_enable; + wrmsr(MSR_P4_PEBS_ENABLE, state->control.p4.pebs_enable, 0); + } + if (state->control.p4.pebs_matrix_vert != cache->control.pebs_matrix_vert) { + cache->control.pebs_matrix_vert = state->control.p4.pebs_matrix_vert; + wrmsr(MSR_P4_PEBS_MATRIX_VERT, state->control.p4.pebs_matrix_vert, 0); + } + cache->k1.id = state->k1.id; +} + +static void p4_clear_counters(void) +{ + /* MSR 0x3F0 seems to have a default value of 0xFC00, but current + docs doesn't fully define it, so leave it alone for now. */ + /* clear PEBS_ENABLE and PEBS_MATRIX_VERT; they handle both PEBS + and ReplayTagging, and should exist even if PEBS is disabled */ + clear_msr_range(0x3F1, 2); + clear_msr_range(0x3A0, 26); + if (p4_IQ_ESCR_ok) + clear_msr_range(0x3BA, 2); + clear_msr_range(0x3BC, 3); + clear_msr_range(0x3C0, 6); + clear_msr_range(0x3C8, 6); + clear_msr_range(0x3E0, 2); + clear_msr_range(MSR_P4_CCCR0, 18); + clear_msr_range(MSR_P4_PERFCTR0, 18); +} + +/* + * Generic driver for any x86 with a working TSC. + */ + +static int generic_check_control(struct perfctr_cpu_state *state, int is_global) +{ + if (state->control.nractrs || state->control.nrictrs) + return -EINVAL; + return 0; +} + +static void generic_clear_counters(void) +{ +} + +/* + * Driver methods, internal and exported. + * + * Frequently called functions (write_control, read_counters, + * isuspend and iresume) are back-patched to invoke the correct + * processor-specific methods directly, thereby saving the + * overheads of indirect function calls. + * + * Backpatchable call sites must have been "finalised" after + * initialisation. The reason for this is that unsynchronised code + * modification doesn't work in multiprocessor systems, due to + * Intel P6 errata. Consequently, all backpatchable call sites + * must be known and local to this file. + * + * Backpatchable calls must initially be to 'noinline' stubs. + * Otherwise the compiler may inline the stubs, which breaks + * redirect_call() and finalise_backpatching(). + */ + +static int redirect_call_disable; + +static noinline void redirect_call(void *ra, void *to) +{ + /* XXX: make this function __init later */ + if (redirect_call_disable) + printk(KERN_ERR __FILE__ ":%s: unresolved call to %p at %p\n", + __FUNCTION__, to, ra); + /* we can only redirect `call near relative' instructions */ + if (*((unsigned char*)ra - 5) != 0xE8) { + printk(KERN_WARNING __FILE__ ":%s: unable to redirect caller %p to %p\n", + __FUNCTION__, ra, to); + return; + } + *(int*)((char*)ra - 4) = (char*)to - (char*)ra; +} + +static void (*write_control)(const struct perfctr_cpu_state*); +static noinline void perfctr_cpu_write_control(const struct perfctr_cpu_state *state) +{ + redirect_call(__builtin_return_address(0), write_control); + return write_control(state); +} + +static void (*read_counters)(const struct perfctr_cpu_state*, + struct perfctr_low_ctrs*); +static noinline void perfctr_cpu_read_counters(const struct perfctr_cpu_state *state, + struct perfctr_low_ctrs *ctrs) +{ + redirect_call(__builtin_return_address(0), read_counters); + return read_counters(state, ctrs); +} + +#ifdef CONFIG_X86_LOCAL_APIC +static void (*cpu_isuspend)(struct perfctr_cpu_state*); +static noinline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) +{ + redirect_call(__builtin_return_address(0), cpu_isuspend); + return cpu_isuspend(state); +} + +static void (*cpu_iresume)(const struct perfctr_cpu_state*); +static noinline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) +{ + redirect_call(__builtin_return_address(0), cpu_iresume); + return cpu_iresume(state); +} + +/* Call perfctr_cpu_ireload() just before perfctr_cpu_resume() to + bypass internal caching and force a reload if the I-mode PMCs. */ +void perfctr_cpu_ireload(struct perfctr_cpu_state *state) +{ +#ifdef CONFIG_SMP + clear_isuspend_cpu(state); +#else + get_cpu_cache()->k1.id = 0; +#endif +} + +/* PRE: the counters have been suspended and sampled by perfctr_cpu_suspend() */ +static int lvtpc_reinit_needed; +unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state *state) +{ + unsigned int cstatus, nrctrs, pmc, pmc_mask; + + cstatus = state->cstatus; + pmc = perfctr_cstatus_nractrs(cstatus); + nrctrs = perfctr_cstatus_nrctrs(cstatus); + + for(pmc_mask = 0; pmc < nrctrs; ++pmc) { + if ((int)state->pmc[pmc].start >= 0) { /* XXX: ">" ? */ + /* XXX: "+=" to correct for overshots */ + state->pmc[pmc].start = state->control.ireset[pmc]; + pmc_mask |= (1 << pmc); + /* On a P4 we should now clear the OVF flag in the + counter's CCCR. However, p4_isuspend() already + did that as a side-effect of clearing the CCCR + in order to stop the i-mode counters. */ + } + } + if (lvtpc_reinit_needed) + apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); + return pmc_mask; +} + +static inline int check_ireset(const struct perfctr_cpu_state *state) +{ + unsigned int nrctrs, i; + + i = state->control.nractrs; + nrctrs = i + state->control.nrictrs; + for(; i < nrctrs; ++i) + if (state->control.ireset[i] >= 0) + return -EINVAL; + return 0; +} + +static inline void setup_imode_start_values(struct perfctr_cpu_state *state) +{ + unsigned int cstatus, nrctrs, i; + + cstatus = state->cstatus; + nrctrs = perfctr_cstatus_nrctrs(cstatus); + for(i = perfctr_cstatus_nractrs(cstatus); i < nrctrs; ++i) + state->pmc[i].start = state->control.ireset[i]; +} + +static inline void debug_no_imode(const struct perfctr_cpu_state *state) +{ +#ifdef CONFIG_PERFCTR_DEBUG + if (perfctr_cstatus_has_ictrs(state->cstatus)) + printk(KERN_ERR "perfctr/x86.c: BUG! updating control in" + " perfctr %p on cpu %u while it has cstatus %x" + " (pid %d, comm %s)\n", + state, smp_processor_id(), state->cstatus, + current->pid, current->comm); +#endif +} + +#else /* CONFIG_X86_LOCAL_APIC */ +static inline void perfctr_cpu_isuspend(struct perfctr_cpu_state *state) { } +static inline void perfctr_cpu_iresume(const struct perfctr_cpu_state *state) { } +static inline int check_ireset(const struct perfctr_cpu_state *state) { return 0; } +static inline void setup_imode_start_values(struct perfctr_cpu_state *state) { } +static inline void debug_no_imode(const struct perfctr_cpu_state *state) { } +#endif /* CONFIG_X86_LOCAL_APIC */ + +static int (*check_control)(struct perfctr_cpu_state*, int); +int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global) +{ + int err; + + debug_no_imode(state); + clear_isuspend_cpu(state); + state->cstatus = 0; + + /* disallow i-mode counters if we cannot catch the interrupts */ + if (!(perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + && state->control.nrictrs) + return -EPERM; + + err = check_control(state, is_global); + if (err < 0) + return err; + err = check_ireset(state); + if (err < 0) + return err; + state->cstatus = perfctr_mk_cstatus(state->control.tsc_on, + state->control.nractrs, + state->control.nrictrs); + setup_imode_start_values(state); + return 0; +} + +void perfctr_cpu_suspend(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + if (perfctr_cstatus_has_ictrs(state->cstatus)) + perfctr_cpu_isuspend(state); + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + state->tsc_sum += now.tsc - state->tsc_start; + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) + state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; + /* perfctr_cpu_disable_rdpmc(); */ /* not for x86 */ +} + +void perfctr_cpu_resume(struct perfctr_cpu_state *state) +{ + if (perfctr_cstatus_has_ictrs(state->cstatus)) + perfctr_cpu_iresume(state); + /* perfctr_cpu_enable_rdpmc(); */ /* not for x86 or global-mode */ + perfctr_cpu_write_control(state); + //perfctr_cpu_read_counters(state, &state->start); + { + struct perfctr_low_ctrs now; + unsigned int i, cstatus, nrctrs; + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) + state->tsc_start = now.tsc; + nrctrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nrctrs; ++i) + state->pmc[i].start = now.pmc[i]; + } + /* XXX: if (SMP && start.tsc == now.tsc) ++now.tsc; */ +} + +void perfctr_cpu_sample(struct perfctr_cpu_state *state) +{ + unsigned int i, cstatus, nractrs; + struct perfctr_low_ctrs now; + + perfctr_cpu_read_counters(state, &now); + cstatus = state->cstatus; + if (perfctr_cstatus_has_tsc(cstatus)) { + state->tsc_sum += now.tsc - state->tsc_start; + state->tsc_start = now.tsc; + } + nractrs = perfctr_cstatus_nractrs(cstatus); + for(i = 0; i < nractrs; ++i) { + state->pmc[i].sum += now.pmc[i] - state->pmc[i].start; + state->pmc[i].start = now.pmc[i]; + } +} + +static void (*clear_counters)(void); +static void perfctr_cpu_clear_counters(void) +{ + return clear_counters(); +} + +/**************************************************************** + * * + * Processor detection and initialisation procedures. * + * * + ****************************************************************/ + +static inline void clear_perfctr_cpus_forbidden_mask(void) +{ +#if !defined(perfctr_cpus_forbidden_mask) + cpus_clear(perfctr_cpus_forbidden_mask); +#endif +} + +static inline void set_perfctr_cpus_forbidden_mask(cpumask_t mask) +{ +#if !defined(perfctr_cpus_forbidden_mask) + perfctr_cpus_forbidden_mask = mask; +#endif +} + +/* see comment above at redirect_call() */ +static void __init finalise_backpatching(void) +{ + struct per_cpu_cache *cache; + struct perfctr_cpu_state state; + cpumask_t old_mask; + + old_mask = perfctr_cpus_forbidden_mask; + clear_perfctr_cpus_forbidden_mask(); + + cache = get_cpu_cache(); + memset(cache, 0, sizeof *cache); + memset(&state, 0, sizeof state); + state.cstatus = + (perfctr_info.cpu_features & PERFCTR_FEATURE_PCINT) + ? __perfctr_mk_cstatus(0, 1, 0, 0) + : 0; + perfctr_cpu_sample(&state); + perfctr_cpu_resume(&state); + perfctr_cpu_suspend(&state); + + set_perfctr_cpus_forbidden_mask(old_mask); + + redirect_call_disable = 1; +} + +#ifdef CONFIG_SMP + +cpumask_t perfctr_cpus_forbidden_mask; + +static void __init p4_ht_mask_setup_cpu(void *forbidden) +{ + unsigned int local_apic_physical_id = cpuid_ebx(1) >> 24; + unsigned int logical_processor_id = local_apic_physical_id & 1; + if (logical_processor_id != 0) + /* We rely on cpu_set() being atomic! */ + cpu_set(smp_processor_id(), *(cpumask_t*)forbidden); +} + +static int __init p4_ht_smp_init(void) +{ + cpumask_t forbidden; + unsigned int cpu; + + cpus_clear(forbidden); + smp_call_function(p4_ht_mask_setup_cpu, &forbidden, 1, 1); + p4_ht_mask_setup_cpu(&forbidden); + if (cpus_empty(forbidden)) + return 0; + perfctr_cpus_forbidden_mask = forbidden; + printk(KERN_INFO "perfctr/x86.c: hyper-threaded P4s detected:" + " restricting access for CPUs"); + for(cpu = 0; cpu < NR_CPUS; ++cpu) + if (cpu_isset(cpu, forbidden)) + printk(" %u", cpu); + printk("\n"); + return 0; +} +#else /* SMP */ +#define p4_ht_smp_init() (0) +#endif /* SMP */ + +static int __init p4_ht_init(void) +{ + unsigned int nr_siblings; + + if (!cpu_has_ht) + return 0; + nr_siblings = (cpuid_ebx(1) >> 16) & 0xFF; + if (nr_siblings > 2) { + printk(KERN_WARNING "perfctr/x86.c: hyper-threaded P4s detected:" + " unsupported number of siblings: %u -- bailing out\n", + nr_siblings); + return -ENODEV; + } + if (nr_siblings < 2) + return 0; + p4_is_ht = 1; /* needed even in a UP kernel */ + return p4_ht_smp_init(); +} + +static int __init intel_init(void) +{ + static char p5_name[] __initdata = "Intel P5"; + static char p6_name[] __initdata = "Intel P6"; + static char p4_name[] __initdata = "Intel P4"; + unsigned int misc_enable; + + if (!cpu_has_tsc) + return -ENODEV; + switch (current_cpu_data.x86) { + case 5: + if (cpu_has_mmx) { + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P5MMX; + read_counters = rdpmc_read_counters; + + /* Avoid Pentium Erratum 74. */ + if (current_cpu_data.x86_model == 4 && + (current_cpu_data.x86_mask == 4 || + (current_cpu_data.x86_mask == 3 && + ((cpuid_eax(1) >> 12) & 0x3) == 1))) + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + } else { + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P5; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + read_counters = p5_read_counters; + } + perfctr_set_tests_type(PTT_P5); + perfctr_cpu_name = p5_name; + write_control = p5_write_control; + check_control = p5_check_control; + clear_counters = p5_clear_counters; + return 0; + case 6: + if (current_cpu_data.x86_model == 9 || + current_cpu_data.x86_model == 13) { /* Pentium M */ + /* Pentium M added the MISC_ENABLE MSR from P4. */ + rdmsr_low(MSR_IA32_MISC_ENABLE, misc_enable); + if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL)) + break; + /* Erratum Y3 probably does not apply since we + read only the low 32 bits. */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_PENTM; + } else if (current_cpu_data.x86_model >= 7) { /* PIII */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_PIII; + } else if (current_cpu_data.x86_model >= 3) { /* PII or Celeron */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_PII; + } else { + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P6; + + /* Avoid Pentium Pro Erratum 26. */ + if (current_cpu_data.x86_mask < 9) + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + } + perfctr_set_tests_type(PTT_P6); + perfctr_cpu_name = p6_name; + read_counters = rdpmc_read_counters; + write_control = p6_write_control; + check_control = p6_check_control; + clear_counters = p6_clear_counters; +#ifdef CONFIG_X86_LOCAL_APIC + if (cpu_has_apic) { + perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; + cpu_isuspend = p6_isuspend; + cpu_iresume = p6_iresume; + /* P-M apparently inherited P4's LVTPC auto-masking :-( */ + if (current_cpu_data.x86_model == 9 || + current_cpu_data.x86_model == 13) + lvtpc_reinit_needed = 1; + } +#endif + return 0; + case 15: /* Pentium 4 */ + rdmsr_low(MSR_IA32_MISC_ENABLE, misc_enable); + if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL)) + break; + if (p4_ht_init() != 0) + break; + if (current_cpu_data.x86_model <= 2) + p4_IQ_ESCR_ok = 1; + if (current_cpu_data.x86_model >= 2) + p4_extended_cascade_ok = 1; + if (current_cpu_data.x86_model >= 3) { + /* Model 3 removes IQ_ESCR{0,1} and adds one event. */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4M3; + } else if (current_cpu_data.x86_model >= 2) { + /* Model 2 changed the ESCR Event Mask programming + details for several events. */ + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4M2; + } else { + perfctr_info.cpu_type = PERFCTR_X86_INTEL_P4; + } + perfctr_set_tests_type(PTT_P4); + perfctr_cpu_name = p4_name; + read_counters = rdpmc_read_counters; + write_control = p4_write_control; + check_control = p4_check_control; + clear_counters = p4_clear_counters; +#ifdef CONFIG_X86_LOCAL_APIC + if (cpu_has_apic) { + perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; + cpu_isuspend = p4_isuspend; + cpu_iresume = p4_iresume; + lvtpc_reinit_needed = 1; + } +#endif + return 0; + } + return -ENODEV; +} + +static int __init amd_init(void) +{ + static char amd_name[] __initdata = "AMD K7/K8"; + + if (!cpu_has_tsc) + return -ENODEV; + switch (current_cpu_data.x86) { + case 6: /* K7 */ + perfctr_info.cpu_type = PERFCTR_X86_AMD_K7; + break; + case 15: /* K8. Like a K7 with a different event set. */ + if ((current_cpu_data.x86_model > 5) || + (current_cpu_data.x86_model >= 4 && current_cpu_data.x86_mask >= 8)) { + perfctr_info.cpu_type = PERFCTR_X86_AMD_K8C; + } else { + perfctr_info.cpu_type = PERFCTR_X86_AMD_K8; + } + break; + default: + return -ENODEV; + } + perfctr_set_tests_type(PTT_AMD); + perfctr_cpu_name = amd_name; + read_counters = rdpmc_read_counters; + write_control = k7_write_control; + check_control = k7_check_control; + clear_counters = k7_clear_counters; +#ifdef CONFIG_X86_LOCAL_APIC + if (cpu_has_apic) { + perfctr_info.cpu_features |= PERFCTR_FEATURE_PCINT; + cpu_isuspend = k7_isuspend; + cpu_iresume = k7_iresume; + } +#endif + return 0; +} + +static int __init cyrix_init(void) +{ + static char mii_name[] __initdata = "Cyrix 6x86MX/MII/III"; + if (!cpu_has_tsc) + return -ENODEV; + switch (current_cpu_data.x86) { + case 6: /* 6x86MX, MII, or III */ + perfctr_info.cpu_type = PERFCTR_X86_CYRIX_MII; + perfctr_set_tests_type(PTT_P5); + perfctr_cpu_name = mii_name; + read_counters = rdpmc_read_counters; + write_control = p5_write_control; + check_control = mii_check_control; + clear_counters = p5_clear_counters; + return 0; + } + return -ENODEV; +} + +static int __init centaur_init(void) +{ +#if !defined(CONFIG_X86_TSC) + static char winchip_name[] __initdata = "WinChip C6/2/3"; +#endif + static char vc3_name[] __initdata = "VIA C3"; + switch (current_cpu_data.x86) { +#if !defined(CONFIG_X86_TSC) + case 5: + switch (current_cpu_data.x86_model) { + case 4: /* WinChip C6 */ + perfctr_info.cpu_type = PERFCTR_X86_WINCHIP_C6; + break; + case 8: /* WinChip 2, 2A, or 2B */ + case 9: /* WinChip 3, a 2A with larger cache and lower voltage */ + perfctr_info.cpu_type = PERFCTR_X86_WINCHIP_2; + break; + default: + return -ENODEV; + } + perfctr_set_tests_type(PTT_WINCHIP); + perfctr_cpu_name = winchip_name; + /* + * TSC must be inaccessible for perfctrs to work. + */ + if (!(read_cr4() & X86_CR4_TSD) || cpu_has_tsc) + return -ENODEV; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDTSC; + read_counters = rdpmc_read_counters; + write_control = c6_write_control; + check_control = c6_check_control; + clear_counters = p5_clear_counters; + return 0; +#endif + case 6: /* VIA C3 */ + if (!cpu_has_tsc) + return -ENODEV; + switch (current_cpu_data.x86_model) { + case 6: /* Cyrix III */ + case 7: /* Samuel 2, Ezra (steppings >= 8) */ + case 8: /* Ezra-T */ + case 9: /* Antaur/Nehemiah */ + break; + default: + return -ENODEV; + } + perfctr_info.cpu_type = PERFCTR_X86_VIA_C3; + perfctr_set_tests_type(PTT_VC3); + perfctr_cpu_name = vc3_name; + read_counters = rdpmc_read_counters; + write_control = p6_write_control; + check_control = vc3_check_control; + clear_counters = vc3_clear_counters; + return 0; + } + return -ENODEV; +} + +static int __init generic_init(void) +{ + static char generic_name[] __initdata = "Generic x86 with TSC"; + if (!cpu_has_tsc) + return -ENODEV; + perfctr_info.cpu_features &= ~PERFCTR_FEATURE_RDPMC; + perfctr_info.cpu_type = PERFCTR_X86_GENERIC; + perfctr_set_tests_type(PTT_GENERIC); + perfctr_cpu_name = generic_name; + check_control = generic_check_control; + write_control = p6_write_control; + read_counters = rdpmc_read_counters; + clear_counters = generic_clear_counters; + return 0; +} + +static void perfctr_cpu_invalidate_cache(void) +{ + /* + * per_cpu_cache[] is initialised to contain "impossible" + * evntsel values guaranteed to differ from anything accepted + * by perfctr_cpu_update_control(). + * All-bits-one works for all currently supported processors. + * The memset also sets the ids to -1, which is intentional. + */ + memset(get_cpu_cache(), ~0, sizeof(struct per_cpu_cache)); +} + +static void perfctr_cpu_init_one(void *ignore) +{ + /* PREEMPT note: when called via smp_call_function(), + this is in IRQ context with preemption disabled. */ + perfctr_cpu_clear_counters(); + perfctr_cpu_invalidate_cache(); + if (cpu_has_apic) + apic_write(APIC_LVTPC, LOCAL_PERFCTR_VECTOR); + if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + set_in_cr4_local(X86_CR4_PCE); +} + +static void perfctr_cpu_exit_one(void *ignore) +{ + /* PREEMPT note: when called via smp_call_function(), + this is in IRQ context with preemption disabled. */ + perfctr_cpu_clear_counters(); + perfctr_cpu_invalidate_cache(); + if (cpu_has_apic) + apic_write(APIC_LVTPC, APIC_DM_NMI | APIC_LVT_MASKED); + if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + clear_in_cr4_local(X86_CR4_PCE); +} + +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PM) + +static void perfctr_pm_suspend(void) +{ + /* XXX: clear control registers */ + printk("perfctr/x86: PM suspend\n"); +} + +static void perfctr_pm_resume(void) +{ + /* XXX: reload control registers */ + printk("perfctr/x86: PM resume\n"); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,71) + +#include + +static int perfctr_device_suspend(struct sys_device *dev, u32 state) +{ + perfctr_pm_suspend(); + return 0; +} + +static int perfctr_device_resume(struct sys_device *dev) +{ + perfctr_pm_resume(); + return 0; +} + +static struct sysdev_class perfctr_sysclass = { + set_kset_name("perfctr"), + .resume = perfctr_device_resume, + .suspend = perfctr_device_suspend, +}; + +static struct sys_device device_perfctr = { + .id = 0, + .cls = &perfctr_sysclass, +}; + +static void x86_pm_init(void) +{ + if (sysdev_class_register(&perfctr_sysclass) == 0) + sysdev_register(&device_perfctr); +} + +static void x86_pm_exit(void) +{ + sysdev_unregister(&device_perfctr); + sysdev_class_unregister(&perfctr_sysclass); +} + +#else /* 2.4 kernel */ + +static int x86_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data) +{ + switch (rqst) { + case PM_SUSPEND: + perfctr_pm_suspend(); + break; + case PM_RESUME: + perfctr_pm_resume(); + break; + } + return 0; +} + +static struct pm_dev *x86_pmdev; + +static void x86_pm_init(void) +{ + x86_pmdev = apic_pm_register(PM_SYS_DEV, 0, x86_pm_callback); +} + +static void x86_pm_exit(void) +{ + if (x86_pmdev) { + apic_pm_unregister(x86_pmdev); + x86_pmdev = NULL; + } +} + +#endif /* 2.4 kernel */ + +#else + +static inline void x86_pm_init(void) { } +static inline void x86_pm_exit(void) { } + +#endif /* CONFIG_X86_LOCAL_APIC && CONFIG_PM */ + +#ifdef CONFIG_X86_LOCAL_APIC + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,67) +static void disable_lapic_nmi_watchdog(void) +{ +#ifdef CONFIG_PM + if (nmi_pmdev) { + apic_pm_unregister(nmi_pmdev); + nmi_pmdev = 0; + } +#endif +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) +static int reserve_lapic_nmi(void) +{ + int ret = 0; + if (nmi_perfctr_msr) { + nmi_perfctr_msr = 0; + disable_lapic_nmi_watchdog(); + ret = 1; + } + return ret; +} + +static inline void release_lapic_nmi(void) { } +#endif + +#else +static inline int reserve_lapic_nmi(void) { return 0; } +static inline void release_lapic_nmi(void) { } +#endif + +static void do_init_tests(void) +{ +#ifdef CONFIG_PERFCTR_INIT_TESTS + if (reserve_lapic_nmi() >= 0) { + perfctr_x86_init_tests(); + release_lapic_nmi(); + } +#endif +} + +int __init perfctr_cpu_init(void) +{ + int err = -ENODEV; + + preempt_disable(); + + /* RDPMC and RDTSC are on by default. They will be disabled + by the init procedures if necessary. */ + perfctr_info.cpu_features = PERFCTR_FEATURE_RDPMC | PERFCTR_FEATURE_RDTSC; + + if (cpu_has_msr) { + switch (current_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + err = intel_init(); + break; + case X86_VENDOR_AMD: + err = amd_init(); + break; + case X86_VENDOR_CYRIX: + err = cyrix_init(); + break; + case X86_VENDOR_CENTAUR: + err = centaur_init(); + } + } + if (err) { + err = generic_init(); /* last resort */ + if (err) + goto out; + } + do_init_tests(); + finalise_backpatching(); + + perfctr_info.cpu_khz = perfctr_cpu_khz(); + perfctr_info.tsc_to_cpu_mult = 1; + + out: + preempt_enable(); + return err; +} + +void __exit perfctr_cpu_exit(void) +{ +} + +/**************************************************************** + * * + * Hardware reservation. * + * * + ****************************************************************/ + +static DECLARE_MUTEX(mutex); +static const char *current_service = 0; + +const char *perfctr_cpu_reserve(const char *service) +{ + const char *ret; + + down(&mutex); + ret = current_service; + if (ret) + goto out_up; + ret = "unknown driver (oprofile?)"; + if (reserve_lapic_nmi() < 0) + goto out_up; + current_service = service; + __module_get(THIS_MODULE); + if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + mmu_cr4_features |= X86_CR4_PCE; + on_each_cpu(perfctr_cpu_init_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + x86_pm_init(); + ret = NULL; + out_up: + up(&mutex); + return ret; +} + +void perfctr_cpu_release(const char *service) +{ + down(&mutex); + if (service != current_service) { + printk(KERN_ERR "%s: attempt by %s to release while reserved by %s\n", + __FUNCTION__, service, current_service); + goto out_up; + } + /* power down the counters */ + if (perfctr_info.cpu_features & PERFCTR_FEATURE_RDPMC) + mmu_cr4_features &= ~X86_CR4_PCE; + on_each_cpu(perfctr_cpu_exit_one, NULL, 1, 1); + perfctr_cpu_set_ihandler(NULL); + x86_pm_exit(); + current_service = 0; + release_lapic_nmi(); + module_put(THIS_MODULE); + out_up: + up(&mutex); +} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/apic.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/apic.h 2004-04-03 22:38:23.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/apic.h 2004-11-18 20:59:11.000000000 -0500 @@ -99,6 +99,8 @@ #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +extern unsigned int nmi_perfctr_msr; + #endif /* CONFIG_X86_LOCAL_APIC */ #endif /* __ASM_APIC_H */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-default/irq_vectors.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/mach-default/irq_vectors.h 2004-11-11 10:27:51.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-default/irq_vectors.h 2004-11-18 20:59:11.000000000 -0500 @@ -59,14 +59,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-pc9800/irq_vectors.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/mach-pc9800/irq_vectors.h 2004-04-03 22:36:16.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-pc9800/irq_vectors.h 2004-11-18 20:59:11.000000000 -0500 @@ -59,14 +59,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-visws/irq_vectors.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/mach-visws/irq_vectors.h 2004-04-03 22:36:18.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/mach-visws/irq_vectors.h 2004-11-18 20:59:11.000000000 -0500 @@ -35,14 +35,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef +#define FIRST_SYSTEM_VECTOR 0xee #define TIMER_IRQ 0 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/processor.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/processor.h 2004-11-11 10:27:40.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/processor.h 2004-11-18 20:59:11.000000000 -0500 @@ -427,6 +427,8 @@ unsigned int saved_fs, saved_gs; /* IO permissions */ unsigned long *io_bitmap_ptr; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD { \ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/perfctr.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-i386/perfctr.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-i386/perfctr.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,189 @@ +/* $Id: perfctr.h,v 1.48.2.4 2004/08/02 22:24:58 mikpe Exp $ + * x86/x86_64 Performance-Monitoring Counters driver + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#ifndef _ASM_I386_PERFCTR_H +#define _ASM_I386_PERFCTR_H + +struct perfctr_sum_ctrs { + unsigned long long tsc; + unsigned long long pmc[18]; +}; + +struct perfctr_cpu_control { + unsigned int tsc_on; + unsigned int nractrs; /* # of a-mode counters */ + unsigned int nrictrs; /* # of i-mode counters */ + unsigned int pmc_map[18]; + unsigned int evntsel[18]; /* one per counter, even on P5 */ + struct { + unsigned int escr[18]; + unsigned int pebs_enable; /* for replay tagging */ + unsigned int pebs_matrix_vert; /* for replay tagging */ + } p4; + int ireset[18]; /* < 0, for i-mode counters */ + unsigned int _reserved1; + unsigned int _reserved2; + unsigned int _reserved3; + unsigned int _reserved4; +}; + +struct perfctr_cpu_state { + unsigned int cstatus; + struct { /* k1 is opaque in the user ABI */ + unsigned int id; + int isuspend_cpu; + } k1; + /* The two tsc fields must be inlined. Placing them in a + sub-struct causes unwanted internal padding on x86-64. */ + unsigned int tsc_start; + unsigned long long tsc_sum; + struct { + unsigned int map; + unsigned int start; + unsigned long long sum; + } pmc[18]; /* the size is not part of the user ABI */ +#ifdef __KERNEL__ + struct perfctr_cpu_control control; + unsigned int p4_escr_map[18]; +#endif +}; + +/* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs + which should have less overhead in most cases */ + +static inline +unsigned int __perfctr_mk_cstatus(unsigned int tsc_on, unsigned int have_ictrs, + unsigned int nrictrs, unsigned int nractrs) +{ + return (tsc_on<<31) | (have_ictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; +} + +static inline +unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, + unsigned int nrictrs) +{ + return __perfctr_mk_cstatus(tsc_on, nrictrs, nrictrs, nractrs); +} + +static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) +{ + return cstatus; +} + +static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) +{ + return (int)cstatus < 0; /* test and jump on sign */ +} + +static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) +{ + return cstatus & 0x7F; /* and with imm8 */ +} + +static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) +{ + return (cstatus >> 8) & 0x7F; +} + +static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) +{ + return cstatus & (0x7F << 16); +} + +/* + * 'struct siginfo' support for perfctr overflow signals. + * In unbuffered mode, si_code is set to SI_PMC_OVF and a bitmask + * describing which perfctrs overflowed is put in si_pmc_ovf_mask. + * A bitmask is used since more than one perfctr can have overflowed + * by the time the interrupt handler runs. + * + * glibc's doesn't seem to define __SI_FAULT or __SI_CODE(), + * and including as well may cause redefinition errors, + * so the user and kernel values are different #defines here. + */ +#ifdef __KERNEL__ +#define SI_PMC_OVF (__SI_FAULT|'P') +#else +#define SI_PMC_OVF ('P') +#endif +#define si_pmc_ovf_mask _sifields._pad[0] /* XXX: use an unsigned field later */ + +/* version number for user-visible CPU-specific data */ +#define PERFCTR_CPU_VERSION 0x0501 /* 5.1 */ + +#ifdef __KERNEL__ + +#if defined(CONFIG_PERFCTR) || defined(CONFIG_PERFCTR_MODULE) + +/* Driver init/exit. */ +extern int perfctr_cpu_init(void); +extern void perfctr_cpu_exit(void); + +/* CPU type name. */ +extern char *perfctr_cpu_name; + +/* Hardware reservation. */ +extern const char *perfctr_cpu_reserve(const char *service); +extern void perfctr_cpu_release(const char *service); + +/* PRE: state has no running interrupt-mode counters. + Check that the new control data is valid. + Update the driver's private control data. + is_global should be zero for per-process counters and non-zero + for global-mode counters. This matters for HT P4s, alas. + Returns a negative error code if the control data is invalid. */ +extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global); + +/* Read a-mode counters. Subtract from start and accumulate into sums. + Must be called with preemption disabled. */ +extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); + +/* Write control registers. Read a-mode counters into start. + Must be called with preemption disabled. */ +extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); + +/* Perform an efficient combined suspend/resume operation. + Must be called with preemption disabled. */ +extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); + +/* The type of a perfctr overflow interrupt handler. + It will be called in IRQ context, with preemption disabled. */ +typedef void (*perfctr_ihandler_t)(unsigned long pc); + +#if defined(CONFIG_X86_LOCAL_APIC) +#define PERFCTR_INTERRUPT_SUPPORT 1 +#endif + +/* Operations related to overflow interrupt handling. */ +#ifdef CONFIG_X86_LOCAL_APIC +extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); +extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); +extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); +#else +static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } +#endif + +#if defined(CONFIG_SMP) +/* CPUs in `perfctr_cpus_forbidden_mask' must not use the + performance-monitoring counters. TSC use is unrestricted. + This is needed to prevent resource conflicts on hyper-threaded P4s. + The declaration of `perfctr_cpus_forbidden_mask' is in the driver's + private compat.h, since it needs to handle cpumask_t incompatibilities. */ +#define PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED 1 +#endif + +#endif /* CONFIG_PERFCTR */ + +#if defined(CONFIG_KPERFCTR) && defined(CONFIG_X86_LOCAL_APIC) +asmlinkage void perfctr_interrupt(struct pt_regs*); +#define perfctr_vector_init() \ + set_intr_gate(LOCAL_PERFCTR_VECTOR, perfctr_interrupt) +#else +#define perfctr_vector_init() do{}while(0) +#endif + +#endif /* __KERNEL__ */ + +#endif /* _ASM_I386_PERFCTR_H */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/processor.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-ppc/processor.h 2004-11-11 10:27:19.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/processor.h 2004-11-18 20:59:11.000000000 -0500 @@ -119,6 +119,7 @@ unsigned long vrsave; int used_vr; /* set if process has used altivec */ #endif /* CONFIG_ALTIVEC */ + struct vperfctr *perfctr; /* performance counters */ }; #define ARCH_MIN_TASKALIGN 16 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/perfctr.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-ppc/perfctr.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-ppc/perfctr.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,171 @@ +/* $Id: perfctr.h,v 1.3.2.2 2004/06/21 22:38:30 mikpe Exp $ + * PPC32 Performance-Monitoring Counters driver + * + * Copyright (C) 2004 Mikael Pettersson + */ +#ifndef _ASM_PPC_PERFCTR_H +#define _ASM_PPC_PERFCTR_H + +/* perfctr_info.cpu_type values */ +#define PERFCTR_PPC_GENERIC 0 +#define PERFCTR_PPC_604 1 +#define PERFCTR_PPC_604e 2 +#define PERFCTR_PPC_750 3 +#define PERFCTR_PPC_7400 4 +#define PERFCTR_PPC_7450 5 + +struct perfctr_sum_ctrs { + unsigned long long tsc; + unsigned long long pmc[6]; +}; + +struct perfctr_cpu_control { + unsigned int tsc_on; + unsigned int nractrs; /* # of a-mode counters */ + unsigned int nrictrs; /* # of i-mode counters */ + unsigned int pmc_map[6]; + unsigned int evntsel[6]; /* one per counter, even on P5 */ + int ireset[6]; /* [0,0x7fffffff], for i-mode counters */ + struct { + unsigned int mmcr0; /* sans PMC{1,2}SEL */ + unsigned int mmcr2; /* only THRESHMULT */ + /* IABR/DABR/BAMR not supported */ + } ppc; + unsigned int _reserved1; + unsigned int _reserved2; + unsigned int _reserved3; + unsigned int _reserved4; +}; + +struct perfctr_cpu_state { + unsigned int cstatus; + struct { /* k1 is opaque in the user ABI */ + unsigned int id; + int isuspend_cpu; + } k1; + /* The two tsc fields must be inlined. Placing them in a + sub-struct causes unwanted internal padding on x86-64. */ + unsigned int tsc_start; + unsigned long long tsc_sum; + struct { + unsigned int map; + unsigned int start; + unsigned long long sum; + } pmc[6]; /* the size is not part of the user ABI */ +#ifdef __KERNEL__ + unsigned int ppc_mmcr[3]; + struct perfctr_cpu_control control; +#endif +}; + +/* cstatus is a re-encoding of control.tsc_on/nractrs/nrictrs + which should have less overhead in most cases */ +/* XXX: ppc driver internally also uses cstatus&(1<<30) */ + +static inline +unsigned int perfctr_mk_cstatus(unsigned int tsc_on, unsigned int nractrs, + unsigned int nrictrs) +{ + return (tsc_on<<31) | (nrictrs<<16) | ((nractrs+nrictrs)<<8) | nractrs; +} + +static inline unsigned int perfctr_cstatus_enabled(unsigned int cstatus) +{ + return cstatus; +} + +static inline int perfctr_cstatus_has_tsc(unsigned int cstatus) +{ + return (int)cstatus < 0; /* test and jump on sign */ +} + +static inline unsigned int perfctr_cstatus_nractrs(unsigned int cstatus) +{ + return cstatus & 0x7F; /* and with imm8 */ +} + +static inline unsigned int perfctr_cstatus_nrctrs(unsigned int cstatus) +{ + return (cstatus >> 8) & 0x7F; +} + +static inline unsigned int perfctr_cstatus_has_ictrs(unsigned int cstatus) +{ + return cstatus & (0x7F << 16); +} + +/* + * 'struct siginfo' support for perfctr overflow signals. + * In unbuffered mode, si_code is set to SI_PMC_OVF and a bitmask + * describing which perfctrs overflowed is put in si_pmc_ovf_mask. + * A bitmask is used since more than one perfctr can have overflowed + * by the time the interrupt handler runs. + * + * glibc's doesn't seem to define __SI_FAULT or __SI_CODE(), + * and including as well may cause redefinition errors, + * so the user and kernel values are different #defines here. + */ +#ifdef __KERNEL__ +#define SI_PMC_OVF (__SI_FAULT|'P') +#else +#define SI_PMC_OVF ('P') +#endif +#define si_pmc_ovf_mask _sifields._pad[0] /* XXX: use an unsigned field later */ + +/* version number for user-visible CPU-specific data */ +#define PERFCTR_CPU_VERSION 0 /* XXX: not yet cast in stone */ + +#ifdef __KERNEL__ + +#if defined(CONFIG_PERFCTR) || defined(CONFIG_PERFCTR_MODULE) + +/* Driver init/exit. */ +extern int perfctr_cpu_init(void); +extern void perfctr_cpu_exit(void); + +/* CPU type name. */ +extern char *perfctr_cpu_name; + +/* Hardware reservation. */ +extern const char *perfctr_cpu_reserve(const char *service); +extern void perfctr_cpu_release(const char *service); + +/* PRE: state has no running interrupt-mode counters. + Check that the new control data is valid. + Update the driver's private control data. + Returns a negative error code if the control data is invalid. */ +extern int perfctr_cpu_update_control(struct perfctr_cpu_state *state, int is_global); + +/* Read a-mode counters. Subtract from start and accumulate into sums. + Must be called with preemption disabled. */ +extern void perfctr_cpu_suspend(struct perfctr_cpu_state *state); + +/* Write control registers. Read a-mode counters into start. + Must be called with preemption disabled. */ +extern void perfctr_cpu_resume(struct perfctr_cpu_state *state); + +/* Perform an efficient combined suspend/resume operation. + Must be called with preemption disabled. */ +extern void perfctr_cpu_sample(struct perfctr_cpu_state *state); + +/* The type of a perfctr overflow interrupt handler. + It will be called in IRQ context, with preemption disabled. */ +typedef void (*perfctr_ihandler_t)(unsigned long pc); + +/* XXX: The hardware supports overflow interrupts, but the driver + does not yet enable this due to an erratum in 750/7400/7410. */ +//#define PERFCTR_INTERRUPT_SUPPORT 1 + +#ifdef PERFCTR_INTERRUPT_SUPPORT +extern void perfctr_cpu_set_ihandler(perfctr_ihandler_t); +extern void perfctr_cpu_ireload(struct perfctr_cpu_state*); +extern unsigned int perfctr_cpu_identify_overflow(struct perfctr_cpu_state*); +#else +static inline void perfctr_cpu_set_ihandler(perfctr_ihandler_t x) { } +#endif + +#endif /* CONFIG_PERFCTR */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_PPC_PERFCTR_H */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apic.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/apic.h 2004-11-11 10:28:46.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/apic.h 2004-11-18 20:59:11.000000000 -0500 @@ -96,6 +96,8 @@ #define NMI_LOCAL_APIC 2 #define NMI_INVALID 3 +extern unsigned int nmi_perfctr_msr; + #endif /* CONFIG_X86_LOCAL_APIC */ #define esr_disable 0 Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/hw_irq.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/hw_irq.h 2004-11-11 10:28:31.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/hw_irq.h 2004-11-18 20:59:11.000000000 -0500 @@ -65,14 +65,15 @@ * sources per level' errata. */ #define LOCAL_TIMER_VECTOR 0xef +#define LOCAL_PERFCTR_VECTOR 0xee /* - * First APIC vector available to drivers: (vectors 0x30-0xee) + * First APIC vector available to drivers: (vectors 0x30-0xed) * we start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR 0x31 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in irq.h */ #ifndef __ASSEMBLY__ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/irq.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/irq.h 2004-11-11 10:28:46.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/irq.h 2004-11-18 20:59:11.000000000 -0500 @@ -29,7 +29,7 @@ */ #define NR_VECTORS 256 -#define FIRST_SYSTEM_VECTOR 0xef /* duplicated in hw_irq.h */ +#define FIRST_SYSTEM_VECTOR 0xee /* duplicated in hw_irq.h */ #ifdef CONFIG_PCI_USE_VECTOR #define NR_IRQS FIRST_SYSTEM_VECTOR Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/processor.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/processor.h 2004-11-11 10:27:40.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/processor.h 2004-11-18 20:59:11.000000000 -0500 @@ -258,6 +258,8 @@ unsigned long *io_bitmap_ptr; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; +/* performance counters */ + struct vperfctr *perfctr; }; #define INIT_THREAD {} Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/perfctr.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/asm-x86_64/perfctr.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/asm-x86_64/perfctr.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1 @@ +#include Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/perfctr.h =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/include/linux/perfctr.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/include/linux/perfctr.h 2004-11-18 20:59:11.000000000 -0500 @@ -0,0 +1,246 @@ +/* $Id: perfctr.h,v 1.69 2004/02/20 21:31:02 mikpe Exp $ + * Performance-Monitoring Counters driver + * + * Copyright (C) 1999-2004 Mikael Pettersson + */ +#ifndef _LINUX_PERFCTR_H +#define _LINUX_PERFCTR_H + +#ifdef CONFIG_KPERFCTR /* don't break archs without */ + +#include + +struct perfctr_info { + unsigned int abi_version; + char driver_version[32]; + unsigned int cpu_type; + unsigned int cpu_features; + unsigned int cpu_khz; + unsigned int tsc_to_cpu_mult; + unsigned int _reserved2; + unsigned int _reserved3; + unsigned int _reserved4; +}; + +struct perfctr_cpu_mask { + unsigned int nrwords; + unsigned int mask[1]; /* actually 'nrwords' */ +}; + +/* abi_version values: Lower 16 bits contain the CPU data version, upper + 16 bits contain the API version. Each half has a major version in its + upper 8 bits, and a minor version in its lower 8 bits. */ +#define PERFCTR_API_VERSION 0x0501 /* 5.1 */ +#define PERFCTR_ABI_VERSION ((PERFCTR_API_VERSION<<16)|PERFCTR_CPU_VERSION) + +/* cpu_type values */ +#define PERFCTR_X86_GENERIC 0 /* any x86 with rdtsc */ +#define PERFCTR_X86_INTEL_P5 1 /* no rdpmc */ +#define PERFCTR_X86_INTEL_P5MMX 2 +#define PERFCTR_X86_INTEL_P6 3 +#define PERFCTR_X86_INTEL_PII 4 +#define PERFCTR_X86_INTEL_PIII 5 +#define PERFCTR_X86_CYRIX_MII 6 +#define PERFCTR_X86_WINCHIP_C6 7 /* no rdtsc */ +#define PERFCTR_X86_WINCHIP_2 8 /* no rdtsc */ +#define PERFCTR_X86_AMD_K7 9 +#define PERFCTR_X86_VIA_C3 10 /* no pmc0 */ +#define PERFCTR_X86_INTEL_P4 11 /* model 0 and 1 */ +#define PERFCTR_X86_INTEL_P4M2 12 /* model 2 */ +#define PERFCTR_X86_AMD_K8 13 +#define PERFCTR_X86_INTEL_PENTM 14 /* Pentium M */ +#define PERFCTR_X86_AMD_K8C 15 /* Revision C */ +#define PERFCTR_X86_INTEL_P4M3 16 /* model 3 and above */ + +/* cpu_features flag bits */ +#define PERFCTR_FEATURE_RDPMC 0x01 +#define PERFCTR_FEATURE_RDTSC 0x02 +#define PERFCTR_FEATURE_PCINT 0x04 + +/* user's view of mmap:ed virtual perfctr */ +struct vperfctr_state { + struct perfctr_cpu_state cpu_state; +}; + +/* parameter in VPERFCTR_CONTROL command */ +struct vperfctr_control { + int si_signo; + struct perfctr_cpu_control cpu_control; + unsigned int preserve; + unsigned int _reserved1; + unsigned int _reserved2; + unsigned int _reserved3; + unsigned int _reserved4; +}; + +/* parameter in GPERFCTR_CONTROL command */ +struct gperfctr_cpu_control { + unsigned int cpu; + struct perfctr_cpu_control cpu_control; + unsigned int _reserved1; + unsigned int _reserved2; + unsigned int _reserved3; + unsigned int _reserved4; +}; + +/* returned by GPERFCTR_READ command */ +struct gperfctr_cpu_state { + unsigned int cpu; + struct perfctr_cpu_control cpu_control; + struct perfctr_sum_ctrs sum; + unsigned int _reserved1; + unsigned int _reserved2; + unsigned int _reserved3; + unsigned int _reserved4; +}; + +/* buffer for encodings of most of the above structs */ +struct perfctr_struct_buf { + unsigned int rdsize; + unsigned int wrsize; + unsigned int buffer[1]; /* actually 'max(rdsize,wrsize)' */ +}; + +#include +#define _PERFCTR_IOCTL 0xD0 /* 'P'+128, currently unassigned */ + +#define PERFCTR_ABI _IOR(_PERFCTR_IOCTL,0,unsigned int) +#define PERFCTR_INFO _IOR(_PERFCTR_IOCTL,1,struct perfctr_struct_buf) +#define PERFCTR_CPUS _IOWR(_PERFCTR_IOCTL,2,struct perfctr_cpu_mask) +#define PERFCTR_CPUS_FORBIDDEN _IOWR(_PERFCTR_IOCTL,3,struct perfctr_cpu_mask) +#define VPERFCTR_CREAT _IO(_PERFCTR_IOCTL,6)/*int tid*/ +#define VPERFCTR_OPEN _IO(_PERFCTR_IOCTL,7)/*int tid*/ + +#define VPERFCTR_READ_SUM _IOR(_PERFCTR_IOCTL,8,struct perfctr_struct_buf) +#define VPERFCTR_UNLINK _IO(_PERFCTR_IOCTL,9) +#define VPERFCTR_CONTROL _IOW(_PERFCTR_IOCTL,10,struct perfctr_struct_buf) +#define VPERFCTR_IRESUME _IO(_PERFCTR_IOCTL,11) +#define VPERFCTR_READ_CONTROL _IOR(_PERFCTR_IOCTL,12,struct perfctr_struct_buf) + +#define GPERFCTR_CONTROL _IOWR(_PERFCTR_IOCTL,16,struct perfctr_struct_buf) +#define GPERFCTR_READ _IOWR(_PERFCTR_IOCTL,17,struct perfctr_struct_buf) +#define GPERFCTR_STOP _IO(_PERFCTR_IOCTL,18) +#define GPERFCTR_START _IO(_PERFCTR_IOCTL,19)/*unsigned int*/ + +#ifdef __KERNEL__ +extern struct perfctr_info perfctr_info; +extern int sys_perfctr_abi(unsigned int*); +extern int sys_perfctr_info(struct perfctr_struct_buf*); +extern int sys_perfctr_cpus(struct perfctr_cpu_mask*); +extern int sys_perfctr_cpus_forbidden(struct perfctr_cpu_mask*); +#endif /* __KERNEL__ */ + +#endif /* CONFIG_KPERFCTR */ + +#ifdef __KERNEL__ + +/* Needed for perfctr_set_cpus_allowed() prototype. */ +#include +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) && !defined(HAVE_CPUMASK_T) +typedef unsigned long cpumask_t; +#define PERFCTR_HAVE_CPUMASK_T +#endif + +#ifdef CONFIG_PERFCTR_VIRTUAL + +/* + * Virtual per-process performance-monitoring counters. + */ +struct vperfctr; /* opaque */ + +/* process management operations */ +extern struct vperfctr *__vperfctr_copy(struct vperfctr*); +extern void __vperfctr_exit(struct vperfctr*); +extern void __vperfctr_suspend(struct vperfctr*); +extern void __vperfctr_resume(struct vperfctr*); +extern void __vperfctr_sample(struct vperfctr*); +extern void __vperfctr_set_cpus_allowed(struct task_struct*, struct vperfctr*, cpumask_t); + +#ifdef CONFIG_PERFCTR_MODULE +extern struct vperfctr_stub { + struct module *owner; + void (*exit)(struct vperfctr*); + void (*suspend)(struct vperfctr*); + void (*resume)(struct vperfctr*); + void (*sample)(struct vperfctr*); +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + void (*set_cpus_allowed)(struct task_struct*, struct vperfctr*, cpumask_t); +#endif +} vperfctr_stub; +extern void _vperfctr_exit(struct vperfctr*); +#define _vperfctr_suspend(x) vperfctr_stub.suspend((x)) +#define _vperfctr_resume(x) vperfctr_stub.resume((x)) +#define _vperfctr_sample(x) vperfctr_stub.sample((x)) +#define _vperfctr_set_cpus_allowed(x,y,z) (*vperfctr_stub.set_cpus_allowed)((x),(y),(z)) +#else /* !CONFIG_PERFCTR_MODULE */ +#define _vperfctr_exit(x) __vperfctr_exit((x)) +#define _vperfctr_suspend(x) __vperfctr_suspend((x)) +#define _vperfctr_resume(x) __vperfctr_resume((x)) +#define _vperfctr_sample(x) __vperfctr_sample((x)) +#define _vperfctr_set_cpus_allowed(x,y,z) __vperfctr_set_cpus_allowed((x),(y),(z)) +#endif /* CONFIG_PERFCTR_MODULE */ + +static inline void perfctr_copy_thread(struct thread_struct *thread) +{ + thread->perfctr = NULL; +} + +static inline void perfctr_exit_thread(struct thread_struct *thread) +{ + struct vperfctr *perfctr; + perfctr = thread->perfctr; + if( perfctr ) + _vperfctr_exit(perfctr); +} + +static inline void perfctr_suspend_thread(struct thread_struct *prev) +{ + struct vperfctr *perfctr; + perfctr = prev->perfctr; + if( perfctr ) + _vperfctr_suspend(perfctr); +} + +static inline void perfctr_resume_thread(struct thread_struct *next) +{ + struct vperfctr *perfctr; + perfctr = next->perfctr; + if( perfctr ) + _vperfctr_resume(perfctr); +} + +static inline void perfctr_sample_thread(struct thread_struct *thread) +{ + struct vperfctr *perfctr; + perfctr = thread->perfctr; + if( perfctr ) + _vperfctr_sample(perfctr); +} + +static inline void perfctr_set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) +{ +#if PERFCTR_CPUS_FORBIDDEN_MASK_NEEDED + struct vperfctr *perfctr; + + task_lock(p); + perfctr = p->thread.perfctr; + if( perfctr ) + _vperfctr_set_cpus_allowed(p, perfctr, new_mask); + task_unlock(p); +#endif +} + +#else /* !CONFIG_PERFCTR_VIRTUAL */ + +static inline void perfctr_copy_thread(struct thread_struct *t) { } +static inline void perfctr_exit_thread(struct thread_struct *t) { } +static inline void perfctr_suspend_thread(struct thread_struct *t) { } +static inline void perfctr_resume_thread(struct thread_struct *t) { } +static inline void perfctr_sample_thread(struct thread_struct *t) { } +static inline void perfctr_set_cpus_allowed(struct task_struct *p, cpumask_t m) { } + +#endif /* CONFIG_PERFCTR_VIRTUAL */ + +#endif /* __KERNEL__ */ + +#endif /* _LINUX_PERFCTR_H */ Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sched.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/sched.c 2004-11-11 10:28:49.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/sched.c 2004-11-18 20:59:11.000000000 -0500 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -3304,6 +3305,8 @@ migration_req_t req; runqueue_t *rq; + perfctr_set_cpus_allowed(p, new_mask); + rq = task_rq_lock(p, &flags); if (any_online_cpu(new_mask) == NR_CPUS) { ret = -EINVAL; Index: linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/timer.c =================================================================== --- linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891.orig/kernel/timer.c 2004-11-11 10:28:46.000000000 -0500 +++ linux-2.6.5-SLES9_SP1_BRANCH_2004111114454891/kernel/timer.c 2004-11-18 20:59:11.000000000 -0500 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -844,6 +845,7 @@ do_process_times(p, user, system); do_it_virt(p, user); do_it_prof(p); + perfctr_sample_thread(&p->thread); } /*