2 From: Ray Bryant <raybry@sgi.com>
4 This version of the lockmeter patch has been updated to work with
5 the out-of-line spinlocks that were recently merged into the mainline.
6 Basically what is done here is that if CONFIG_LOCKMETER is set, then the
7 lock routines in kernel/spinlock.c are not used and the corresponding
8 versions in kernel/lockmeter.c are used instead. The former set of
9 lock routines call _raw_ spin lock code defined by the architecture;
10 the latter call _metered_ versions of the spinlock routines, which in
11 turn call the _raw_ routines. The versions in the two files will have
12 to kept in sync manually, but given that the lock APIs don't change
13 very often, that should be ok. The lockmeter.c versions are written
14 so that the address of the original caller is passed to the _metered_
15 lock routines; elsewise all lock requests would look like they were
16 coming from the lockmeter.c versions of the lock routine.
18 I've had trouble testing this on an Altix so have really only tested this
19 on i386. But the changes are almost exclusively in machine independent
20 code so that should be ok. I did do some fixup in the Sparc64 arch files,
21 so someone with such a box should test this stuff.
23 The existing lockstat routine will work with this patch, although we have
24 had to add some "#include <errno.h>" lines to that program to get it to
25 compile properly. An updated version of lockstat will be posted to
26 oss.sgi.com in the near future.
28 Signed-off-by: Ray Bryant <raybry@sgi.com>
30 =========================================================================
33 ia64 CONFIG_LOCKMETER fix
35 From: John Hawkes <hawkes@babylon.engr.sgi.com>
37 The 2.6.3-mm4 patch seems to have the CONFIG_LOCKMETER in the wrong spot
38 for ia64. When I make this change, I can build and run an ia64
46 From: Alexander Nyberg <alexn@telia.com>
48 This is basically a cut and paste from i386 code. At some places however
49 some unresolved addresses at places like [0x1000211eb38] shows up, which is
50 a bit weird. I'm hoping for a comment from any of the SGI guys, as the
51 code is so similar to i386 I don't know if problem lies below or in the
53 Signed-off-by: Andrew Morton <akpm@osdl.org>
54 Index: linux/arch/i386/Kconfig.debug
55 ===================================================================
56 --- linux.orig/arch/i386/Kconfig.debug
57 +++ linux/arch/i386/Kconfig.debug
58 @@ -67,6 +67,13 @@ config SCHEDSTATS
59 application, you can say N to avoid the very slight overhead
63 + bool "Kernel lock metering"
66 + Say Y to enable kernel lock metering, which adds overhead to SMP locks,
67 + but allows you to see various statistics using the lockstat command.
69 config X86_FIND_SMP_CONFIG
71 depends on X86_LOCAL_APIC || X86_VOYAGER
72 Index: linux/arch/i386/lib/dec_and_lock.c
73 ===================================================================
74 --- linux.orig/arch/i386/lib/dec_and_lock.c
75 +++ linux/arch/i386/lib/dec_and_lock.c
77 #include <linux/spinlock.h>
78 #include <asm/atomic.h>
80 +#ifndef ATOMIC_DEC_AND_LOCK
81 int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
84 @@ -38,3 +39,5 @@ slow_path:
90 Index: linux/arch/ia64/Kconfig.debug
91 ===================================================================
92 --- linux.orig/arch/ia64/Kconfig.debug
93 +++ linux/arch/ia64/Kconfig.debug
94 @@ -72,4 +72,11 @@ config SYSVIPC_COMPAT
95 depends on COMPAT && SYSVIPC
99 + bool "Kernel lock metering"
102 + Say Y to enable kernel lock metering, which adds overhead to SMP locks,
103 + but allows you to see various statistics using the lockstat command.
106 Index: linux/arch/ia64/lib/dec_and_lock.c
107 ===================================================================
108 --- linux.orig/arch/ia64/lib/dec_and_lock.c
109 +++ linux/arch/ia64/lib/dec_and_lock.c
111 #include <linux/spinlock.h>
112 #include <asm/atomic.h>
114 +#ifndef CONFIG_LOCKMETER
116 * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these
117 * operations have to be done atomically, so that the count doesn't drop to zero without
118 @@ -40,3 +41,4 @@ atomic_dec_and_lock (atomic_t *refcount,
121 EXPORT_SYMBOL(atomic_dec_and_lock);
123 Index: linux/arch/sparc64/Kconfig.debug
124 ===================================================================
125 --- linux.orig/arch/sparc64/Kconfig.debug
126 +++ linux/arch/sparc64/Kconfig.debug
127 @@ -33,12 +33,19 @@ config DEBUG_BOOTMEM
128 depends on DEBUG_KERNEL
129 bool "Debug BOOTMEM initialization"
132 + bool "Kernel lock metering"
133 + depends on SMP && !PREEMPT
135 + Say Y to enable kernel lock metering, which adds overhead to SMP locks,
136 + but allows you to see various statistics using the lockstat command.
138 # We have a custom atomic_dec_and_lock() implementation but it's not
139 # compatible with spinlock debugging so we need to fall back on
140 # the generic version in that case.
143 - depends on SMP && !DEBUG_SPINLOCK
144 + depends on SMP && !DEBUG_SPINLOCK && !LOCKMETER
148 Index: linux/arch/x86_64/Kconfig.debug
149 ===================================================================
150 --- linux.orig/arch/x86_64/Kconfig.debug
151 +++ linux/arch/x86_64/Kconfig.debug
152 @@ -66,4 +66,11 @@ config IOMMU_LEAK
153 #config X86_REMOTE_DEBUG
154 # bool "kgdb debugging stub"
157 + bool "Kernel lock metering"
160 + Say Y to enable kernel lock metering, which adds overhead to SMP locks,
161 + but allows you to see various statistics using the lockstat command.
164 Index: linux/arch/x86_64/lib/dec_and_lock.c
165 ===================================================================
166 --- linux.orig/arch/x86_64/lib/dec_and_lock.c
167 +++ linux/arch/x86_64/lib/dec_and_lock.c
169 #include <linux/spinlock.h>
170 #include <asm/atomic.h>
172 +#ifndef ATOMIC_DEC_AND_LOCK
173 int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
176 @@ -38,3 +39,4 @@ slow_path:
181 Index: linux/fs/proc/proc_misc.c
182 ===================================================================
183 --- linux.orig/fs/proc/proc_misc.c
184 +++ linux/fs/proc/proc_misc.c
185 @@ -578,6 +578,36 @@ static void create_seq_entry(char *name,
186 entry->proc_fops = f;
189 +#ifdef CONFIG_LOCKMETER
190 +extern ssize_t get_lockmeter_info(char *, size_t, loff_t *);
191 +extern ssize_t put_lockmeter_info(const char *, size_t);
192 +extern int get_lockmeter_info_size(void);
195 + * This function accesses lock metering information.
197 +static ssize_t read_lockmeter(struct file *file, char *buf,
198 + size_t count, loff_t *ppos)
200 + return get_lockmeter_info(buf, count, ppos);
204 + * Writing to /proc/lockmeter resets the counters
206 +static ssize_t write_lockmeter(struct file * file, const char * buf,
207 + size_t count, loff_t *ppos)
209 + return put_lockmeter_info(buf, count);
212 +static struct file_operations proc_lockmeter_operations = {
214 + read: read_lockmeter,
215 + write: write_lockmeter,
217 +#endif /* CONFIG_LOCKMETER */
219 void __init proc_misc_init(void)
221 struct proc_dir_entry *entry;
222 @@ -638,6 +668,13 @@ void __init proc_misc_init(void)
224 entry->proc_fops = &proc_sysrq_trigger_operations;
226 +#ifdef CONFIG_LOCKMETER
227 + entry = create_proc_entry("lockmeter", S_IWUSR | S_IRUGO, NULL);
229 + entry->proc_fops = &proc_lockmeter_operations;
230 + entry->size = get_lockmeter_info_size();
235 extern struct file_operations ppc_htab_operations;
236 Index: linux/include/asm-alpha/lockmeter.h
237 ===================================================================
238 --- linux.orig/include/asm-alpha/lockmeter.h
239 +++ linux/include/asm-alpha/lockmeter.h
242 + * Written by John Hawkes (hawkes@sgi.com)
243 + * Based on klstat.h by Jack Steiner (steiner@sgi.com)
245 + * Modified by Peter Rival (frival@zk3.dec.com)
248 +#ifndef _ALPHA_LOCKMETER_H
249 +#define _ALPHA_LOCKMETER_H
251 +#include <asm/hwrpb.h>
252 +#define CPU_CYCLE_FREQUENCY hwrpb->cycle_freq
254 +#define get_cycles64() get_cycles()
256 +#define THIS_CPU_NUMBER smp_processor_id()
258 +#include <linux/version.h>
260 +#define SPINLOCK_MAGIC_INIT /**/
263 + * Macros to cache and retrieve an index value inside of a lock
264 + * these macros assume that there are less than 65536 simultaneous
265 + * (read mode) holders of a rwlock.
266 + * We also assume that the hash table has less than 32767 entries.
267 + * the high order bit is used for write locking a rw_lock
268 + * Note: although these defines and macros are the same as what is being used
269 + * in include/asm-i386/lockmeter.h, they are present here to easily
270 + * allow an alternate Alpha implementation.
273 + * instrumented spinlock structure -- never used to allocate storage
274 + * only used in macros below to overlay a spinlock_t
276 +typedef struct inst_spinlock_s {
277 + /* remember, Alpha is little endian */
278 + unsigned short lock;
279 + unsigned short index;
281 +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv
282 +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index
285 + * macros to cache and retrieve an index value in a read/write lock
286 + * as well as the cpu where a reader busy period started
287 + * we use the 2nd word (the debug word) for this, so require the
288 + * debug word to be present
291 + * instrumented rwlock structure -- never used to allocate storage
292 + * only used in macros below to overlay a rwlock_t
294 +typedef struct inst_rwlock_s {
296 + unsigned short index;
297 + unsigned short cpu;
299 +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
300 +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index
301 +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
302 +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu
305 + * return true if rwlock is write locked
306 + * (note that other lock attempts can cause the lock value to be negative)
308 +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) (((inst_rwlock_t *)rwlock_ptr)->lock & 1)
309 +#define IABS(x) ((x) > 0 ? (x) : -(x))
311 +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr)
312 +extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
314 + int tmp = (int) ((inst_rwlock_t *)rwlock_ptr)->lock;
315 + /* readers subtract 2, so we have to: */
316 + /* - andnot off a possible writer (bit 0) */
317 + /* - get the absolute value */
318 + /* - divide by 2 (right shift by one) */
319 + /* to find the number of readers */
320 + if (tmp == 0) return(0);
321 + else return(IABS(tmp & ~1)>>1);
324 +#endif /* _ALPHA_LOCKMETER_H */
325 Index: linux/include/asm-alpha/spinlock.h
326 ===================================================================
327 --- linux.orig/include/asm-alpha/spinlock.h
328 +++ linux/include/asm-alpha/spinlock.h
330 #include <linux/kernel.h>
331 #include <asm/current.h>
333 +#ifdef CONFIG_LOCKMETER
334 +#undef DEBUG_SPINLOCK
339 * Simple spin lock operations. There are two variants, one clears IRQ's
340 @@ -96,9 +100,18 @@ static inline int _raw_spin_trylock(spin
343 volatile unsigned int write_lock:1, read_counter:31;
344 +#ifdef CONFIG_LOCKMETER
345 + /* required for LOCKMETER since all bits in lock are used */
346 + /* need this storage for CPU and lock INDEX ............. */
349 } /*__attribute__((aligned(32)))*/ rwlock_t;
351 +#ifdef CONFIG_LOCKMETER
352 +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 }
354 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
357 #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
358 #define rwlock_is_locked(x) (*(volatile int *)(x) != 0)
359 @@ -193,4 +206,41 @@ static inline void _raw_read_unlock(rwlo
360 : "m" (*lock) : "memory");
363 +#ifdef CONFIG_LOCKMETER
364 +static inline int _raw_write_trylock(rwlock_t *lock)
368 + __asm__ __volatile__(
375 + : "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result)
376 + : "m" (*(volatile int *)lock)
382 +static inline int _raw_read_trylock(rwlock_t *lock)
384 + unsigned long temp,result;
386 + __asm__ __volatile__(
393 + : "=m" (*(volatile int *)lock), "=&r" (temp), "=&r" (result)
394 + : "m" (*(volatile int *)lock)
398 +#endif /* CONFIG_LOCKMETER */
400 #endif /* _ALPHA_SPINLOCK_H */
401 Index: linux/include/asm-i386/lockmeter.h
402 ===================================================================
403 --- linux.orig/include/asm-i386/lockmeter.h
404 +++ linux/include/asm-i386/lockmeter.h
407 + * Copyright (C) 1999,2000 Silicon Graphics, Inc.
409 + * Written by John Hawkes (hawkes@sgi.com)
410 + * Based on klstat.h by Jack Steiner (steiner@sgi.com)
412 + * Modified by Ray Bryant (raybry@us.ibm.com)
413 + * Changes Copyright (C) 2000 IBM, Inc.
414 + * Added save of index in spinlock_t to improve efficiency
415 + * of "hold" time reporting for spinlocks.
416 + * Added support for hold time statistics for read and write
418 + * Moved machine dependent code here from include/lockmeter.h.
422 +#ifndef _I386_LOCKMETER_H
423 +#define _I386_LOCKMETER_H
425 +#include <asm/spinlock.h>
426 +#include <asm/rwlock.h>
428 +#include <linux/version.h>
431 +extern unsigned long cpu_khz;
432 +#define CPU_CYCLE_FREQUENCY (cpu_khz * 1000)
434 +#define CPU_CYCLE_FREQUENCY 450000000
437 +#define THIS_CPU_NUMBER smp_processor_id()
440 + * macros to cache and retrieve an index value inside of a spin lock
441 + * these macros assume that there are less than 65536 simultaneous
442 + * (read mode) holders of a rwlock. Not normally a problem!!
443 + * we also assume that the hash table has less than 65535 entries.
446 + * instrumented spinlock structure -- never used to allocate storage
447 + * only used in macros below to overlay a spinlock_t
449 +typedef struct inst_spinlock_s {
450 + /* remember, Intel is little endian */
451 + unsigned short lock;
452 + unsigned short index;
454 +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv
455 +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index
458 + * macros to cache and retrieve an index value in a read/write lock
459 + * as well as the cpu where a reader busy period started
460 + * we use the 2nd word (the debug word) for this, so require the
461 + * debug word to be present
464 + * instrumented rwlock structure -- never used to allocate storage
465 + * only used in macros below to overlay a rwlock_t
467 +typedef struct inst_rwlock_s {
469 + unsigned short index;
470 + unsigned short cpu;
472 +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
473 +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index
474 +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
475 +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu
478 + * return the number of readers for a rwlock_t
480 +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr)
482 +extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
484 + int tmp = (int) rwlock_ptr->lock;
485 + /* read and write lock attempts may cause the lock value to temporarily */
486 + /* be negative. Until it is >= 0 we know nothing (i. e. can't tell if */
487 + /* is -1 because it was write locked and somebody tried to read lock it */
488 + /* or if it is -1 because it was read locked and somebody tried to write*/
489 + /* lock it. ........................................................... */
491 + tmp = (int) rwlock_ptr->lock;
493 + if (tmp == 0) return(0);
494 + else return(RW_LOCK_BIAS-tmp);
498 + * return true if rwlock is write locked
499 + * (note that other lock attempts can cause the lock value to be negative)
501 +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock <= 0)
502 +#define IABS(x) ((x) > 0 ? (x) : -(x))
503 +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((IABS((rwlock_ptr)->lock) % RW_LOCK_BIAS) != 0)
505 +/* this is a lot of typing just to get gcc to emit "rdtsc" */
506 +static inline long long get_cycles64 (void)
516 + rdtsc(longlong.intint.eax,longlong.intint.edx);
517 + return longlong.intlong;
520 +#endif /* _I386_LOCKMETER_H */
521 Index: linux/include/asm-i386/spinlock.h
522 ===================================================================
523 --- linux.orig/include/asm-i386/spinlock.h
524 +++ linux/include/asm-i386/spinlock.h
525 @@ -163,6 +163,11 @@ static inline void _raw_spin_lock_flags
528 volatile unsigned int lock;
529 +#ifdef CONFIG_LOCKMETER
530 + /* required for LOCKMETER since all bits in lock are used */
531 + /* and we need this storage for CPU and lock INDEX */
532 + unsigned lockmeter_magic;
534 #ifdef CONFIG_DEBUG_SPINLOCK
537 @@ -170,11 +175,19 @@ typedef struct {
539 #define RWLOCK_MAGIC 0xdeaf1eed
541 +#ifdef CONFIG_LOCKMETER
542 +#ifdef CONFIG_DEBUG_SPINLOCK
543 +#define RWLOCK_MAGIC_INIT , 0, RWLOCK_MAGIC
545 +#define RWLOCK_MAGIC_INIT , 0
547 +#else /* !CONFIG_LOCKMETER */
548 #ifdef CONFIG_DEBUG_SPINLOCK
549 #define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
551 #define RWLOCK_MAGIC_INIT /* */
553 +#endif /* !CONFIG_LOCKMETER */
555 #define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
557 @@ -212,6 +225,16 @@ static inline void _raw_write_lock(rwloc
558 #define _raw_read_unlock(rw) asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
559 #define _raw_write_unlock(rw) asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
561 +static inline int _raw_read_trylock(rwlock_t *lock)
563 + atomic_t *count = (atomic_t *)lock;
565 + if (atomic_read(count) >= 0)
571 static inline int _raw_write_trylock(rwlock_t *lock)
573 atomic_t *count = (atomic_t *)lock;
574 @@ -221,4 +244,47 @@ static inline int _raw_write_trylock(rwl
578 +#if defined(CONFIG_LOCKMETER) && defined(CONFIG_HAVE_DEC_LOCK)
579 +extern void _metered_spin_lock (spinlock_t *lock, void *caller_pc);
580 +extern void _metered_spin_unlock(spinlock_t *lock);
583 + * Matches what is in arch/i386/lib/dec_and_lock.c, except this one is
584 + * "static inline" so that the spin_lock(), if actually invoked, is charged
585 + * against the real caller, not against the catch-all atomic_dec_and_lock
587 +static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
593 + counter = atomic_read(atomic);
594 + newcount = counter-1;
599 + asm volatile("lock; cmpxchgl %1,%2"
601 + :"r" (newcount), "m" (atomic->counter), "0" (counter));
603 + /* If the above failed, "eax" will have changed */
604 + if (newcount != counter)
610 + _metered_spin_lock(lock, __builtin_return_address(0));
611 + if (atomic_dec_and_test(atomic))
613 + _metered_spin_unlock(lock);
618 +#define ATOMIC_DEC_AND_LOCK
621 #endif /* __ASM_SPINLOCK_H */
622 Index: linux/include/asm-ia64/lockmeter.h
623 ===================================================================
624 --- linux.orig/include/asm-ia64/lockmeter.h
625 +++ linux/include/asm-ia64/lockmeter.h
628 + * Copyright (C) 1999,2000 Silicon Graphics, Inc.
630 + * Written by John Hawkes (hawkes@sgi.com)
631 + * Based on klstat.h by Jack Steiner (steiner@sgi.com)
634 +#ifndef _IA64_LOCKMETER_H
635 +#define _IA64_LOCKMETER_H
637 +#ifdef local_cpu_data
638 +#define CPU_CYCLE_FREQUENCY local_cpu_data->itc_freq
640 +#define CPU_CYCLE_FREQUENCY my_cpu_data.itc_freq
642 +#define get_cycles64() get_cycles()
644 +#define THIS_CPU_NUMBER smp_processor_id()
647 + * macros to cache and retrieve an index value inside of a lock
648 + * these macros assume that there are less than 65536 simultaneous
649 + * (read mode) holders of a rwlock.
650 + * we also assume that the hash table has less than 32767 entries.
653 + * instrumented spinlock structure -- never used to allocate storage
654 + * only used in macros below to overlay a spinlock_t
656 +typedef struct inst_spinlock_s {
657 + /* remember, Intel is little endian */
658 + volatile unsigned short lock;
659 + volatile unsigned short index;
661 +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv
662 +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index
665 + * macros to cache and retrieve an index value in a read/write lock
666 + * as well as the cpu where a reader busy period started
667 + * we use the 2nd word (the debug word) for this, so require the
668 + * debug word to be present
671 + * instrumented rwlock structure -- never used to allocate storage
672 + * only used in macros below to overlay a rwlock_t
674 +typedef struct inst_rwlock_s {
675 + volatile int read_counter:31;
676 + volatile int write_lock:1;
677 + volatile unsigned short index;
678 + volatile unsigned short cpu;
680 +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
681 +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index
682 +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
683 +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu
686 + * return the number of readers for a rwlock_t
688 +#define RWLOCK_READERS(rwlock_ptr) ((rwlock_ptr)->read_counter)
691 + * return true if rwlock is write locked
692 + * (note that other lock attempts can cause the lock value to be negative)
694 +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->write_lock)
695 +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((rwlock_ptr)->read_counter)
697 +#endif /* _IA64_LOCKMETER_H */
699 Index: linux/include/asm-ia64/spinlock.h
700 ===================================================================
701 --- linux.orig/include/asm-ia64/spinlock.h
702 +++ linux/include/asm-ia64/spinlock.h
703 @@ -116,8 +116,18 @@ do { \
705 volatile unsigned int read_counter : 31;
706 volatile unsigned int write_lock : 1;
707 +#ifdef CONFIG_LOCKMETER
708 + /* required for LOCKMETER since all bits in lock are used */
709 + /* and we need this storage for CPU and lock INDEX */
710 + unsigned lockmeter_magic;
714 +#ifdef CONFIG_LOCKMETER
715 +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0 }
717 #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
720 #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
721 #define rwlock_is_locked(x) (*(volatile int *) (x) != 0)
722 @@ -133,6 +143,48 @@ do { \
726 +#ifdef CONFIG_LOCKMETER
728 + * HACK: This works, but still have a timing window that affects performance:
729 + * we see that no one owns the Write lock, then someone * else grabs for Write
730 + * lock before we do a read_lock().
731 + * This means that on rare occasions our read_lock() will stall and spin-wait
732 + * until we acquire for Read, instead of simply returning a trylock failure.
734 +static inline int _raw_read_trylock(rwlock_t *rw)
736 + if (rw->write_lock) {
739 + _raw_read_lock(rw);
744 +static inline int _raw_write_trylock(rwlock_t *rw)
746 + if (!(rw->write_lock)) {
747 + /* isn't currently write-locked... that looks promising... */
748 + if (test_and_set_bit(31, rw) == 0) {
749 + /* now it is write-locked by me... */
750 + if (rw->read_counter) {
751 + /* really read-locked, so release write-lock and fail */
754 + /* we've the the write-lock, no read-lockers... success! */
762 + /* falls through ... fails to write-lock */
768 #define _raw_read_unlock(rw) \
770 rwlock_t *__read_lock_ptr = (rw); \
771 @@ -196,4 +248,25 @@ do { \
772 clear_bit(31, (x)); \
775 +#ifdef CONFIG_LOCKMETER
776 +extern void _metered_spin_lock (spinlock_t *lock, void *caller_pc);
777 +extern void _metered_spin_unlock(spinlock_t *lock);
780 + * Use a less efficient, and inline, atomic_dec_and_lock() if lockmetering
781 + * so we can see the callerPC of who is actually doing the spin_lock().
782 + * Otherwise, all we see is the generic rollup of all locks done by
783 + * atomic_dec_and_lock().
785 +static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
787 + _metered_spin_lock(lock, __builtin_return_address(0));
788 + if (atomic_dec_and_test(atomic))
790 + _metered_spin_unlock(lock);
793 +#define ATOMIC_DEC_AND_LOCK
796 #endif /* _ASM_IA64_SPINLOCK_H */
797 Index: linux/include/asm-mips/lockmeter.h
798 ===================================================================
799 --- linux.orig/include/asm-mips/lockmeter.h
800 +++ linux/include/asm-mips/lockmeter.h
803 + * Copyright (C) 1999,2000 Silicon Graphics, Inc.
805 + * Written by John Hawkes (hawkes@sgi.com)
806 + * Based on klstat.h by Jack Steiner (steiner@sgi.com)
807 + * Ported to mips32 for Asita Technologies
808 + * by D.J. Barrow ( dj.barrow@asitatechnologies.com )
810 +#ifndef _ASM_LOCKMETER_H
811 +#define _ASM_LOCKMETER_H
813 +/* do_gettimeoffset is a function pointer on mips */
814 +/* & it is not included by <linux/time.h> */
815 +#include <asm/time.h>
816 +#include <linux/time.h>
817 +#include <asm/div64.h>
819 +#define SPINLOCK_MAGIC_INIT /* */
821 +#define CPU_CYCLE_FREQUENCY get_cpu_cycle_frequency()
823 +#define THIS_CPU_NUMBER smp_processor_id()
825 +static uint32_t cpu_cycle_frequency = 0;
827 +static uint32_t get_cpu_cycle_frequency(void)
829 + /* a total hack, slow and invasive, but ... it works */
831 + uint32_t start_cycles;
834 + if (cpu_cycle_frequency == 0) { /* uninitialized */
835 + do_gettimeofday(&tv);
836 + sec = tv.tv_sec; /* set up to catch the tv_sec rollover */
837 + while (sec == tv.tv_sec) { do_gettimeofday(&tv); }
838 + sec = tv.tv_sec; /* rolled over to a new sec value */
839 + start_cycles = get_cycles();
840 + while (sec == tv.tv_sec) { do_gettimeofday(&tv); }
841 + cpu_cycle_frequency = get_cycles() - start_cycles;
844 + return cpu_cycle_frequency;
847 +extern struct timeval xtime;
849 +static uint64_t get_cycles64(void)
851 + static uint64_t last_get_cycles64 = 0;
854 + unsigned long usec, usec_offset;
857 + sec = xtime.tv_sec;
858 + usec = xtime.tv_usec;
859 + usec_offset = do_gettimeoffset();
860 + if ((xtime.tv_sec != sec) ||
861 + (xtime.tv_usec != usec)||
862 + (usec_offset >= 20000))
865 + ret = ((uint64_t)(usec + usec_offset) * cpu_cycle_frequency);
866 + /* We can't do a normal 64 bit division on mips without libgcc.a */
867 + do_div(ret,1000000);
868 + ret += ((uint64_t)sec * cpu_cycle_frequency);
870 + /* XXX why does time go backwards? do_gettimeoffset? general time adj? */
871 + if (ret <= last_get_cycles64)
872 + ret = last_get_cycles64+1;
873 + last_get_cycles64 = ret;
879 + * macros to cache and retrieve an index value inside of a lock
880 + * these macros assume that there are less than 65536 simultaneous
881 + * (read mode) holders of a rwlock.
882 + * we also assume that the hash table has less than 32767 entries.
883 + * the high order bit is used for write locking a rw_lock
885 +#define INDEX_MASK 0x7FFF0000
886 +#define READERS_MASK 0x0000FFFF
887 +#define INDEX_SHIFT 16
888 +#define PUT_INDEX(lockp,index) \
889 + lockp->lock = (((lockp->lock) & ~INDEX_MASK) | (index) << INDEX_SHIFT)
890 +#define GET_INDEX(lockp) \
891 + (((lockp->lock) & INDEX_MASK) >> INDEX_SHIFT)
894 + * macros to cache and retrieve an index value in a read/write lock
895 + * as well as the cpu where a reader busy period started
896 + * we use the 2nd word (the debug word) for this, so require the
897 + * debug word to be present
900 + * instrumented rwlock structure -- never used to allocate storage
901 + * only used in macros below to overlay a rwlock_t
903 +typedef struct inst_rwlock_s {
905 + unsigned short index;
906 + unsigned short cpu;
908 +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
909 +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index
910 +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
911 +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu
914 + * return the number of readers for a rwlock_t
916 +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr)
918 +extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
920 + int tmp = (int) rwlock_ptr->lock;
921 + return (tmp >= 0) ? tmp : 0;
924 +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock < 0)
925 +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock > 0)
927 +#endif /* _ASM_LOCKMETER_H */
928 Index: linux/include/asm-mips/spinlock.h
929 ===================================================================
930 --- linux.orig/include/asm-mips/spinlock.h
931 +++ linux/include/asm-mips/spinlock.h
932 @@ -92,9 +92,18 @@ static inline unsigned int _raw_spin_try
935 volatile unsigned int lock;
936 +#ifdef CONFIG_LOCKMETER
937 + /* required for LOCKMETER since all bits in lock are used */
938 + /* and we need this storage for CPU and lock INDEX */
939 + unsigned lockmeter_magic;
943 +#ifdef CONFIG_LOCKMETER
944 +#define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0 }
946 #define RW_LOCK_UNLOCKED (rwlock_t) { 0 }
949 #define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
951 Index: linux/include/asm-sparc64/lockmeter.h
952 ===================================================================
953 --- linux.orig/include/asm-sparc64/lockmeter.h
954 +++ linux/include/asm-sparc64/lockmeter.h
957 + * Copyright (C) 2000 Anton Blanchard (anton@linuxcare.com)
958 + * Copyright (C) 2003 David S. Miller (davem@redhat.com)
961 +#ifndef _SPARC64_LOCKMETER_H
962 +#define _SPARC64_LOCKMETER_H
964 +#include <linux/smp.h>
965 +#include <asm/spinlock.h>
966 +#include <asm/timer.h>
967 +#include <asm/timex.h>
969 +/* Actually, this is not the CPU frequency by the system tick
970 + * frequency which is good enough for lock metering.
972 +#define CPU_CYCLE_FREQUENCY (timer_tick_offset * HZ)
973 +#define THIS_CPU_NUMBER smp_processor_id()
975 +#define PUT_INDEX(lock_ptr,indexv) (lock_ptr)->index = (indexv)
976 +#define GET_INDEX(lock_ptr) (lock_ptr)->index
978 +#define PUT_RWINDEX(rwlock_ptr,indexv) (rwlock_ptr)->index = (indexv)
979 +#define GET_RWINDEX(rwlock_ptr) (rwlock_ptr)->index
980 +#define PUT_RW_CPU(rwlock_ptr,cpuv) (rwlock_ptr)->cpu = (cpuv)
981 +#define GET_RW_CPU(rwlock_ptr) (rwlock_ptr)->cpu
983 +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr)
985 +extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
987 + signed int tmp = rwlock_ptr->lock;
995 +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((signed int)((rwlock_ptr)->lock) < 0)
996 +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((signed int)((rwlock_ptr)->lock) > 0)
998 +#define get_cycles64() get_cycles()
1000 +#endif /* _SPARC64_LOCKMETER_H */
1001 Index: linux/include/asm-x86_64/lockmeter.h
1002 ===================================================================
1003 --- linux.orig/include/asm-x86_64/lockmeter.h
1004 +++ linux/include/asm-x86_64/lockmeter.h
1007 + * Copyright (C) 1999,2000 Silicon Graphics, Inc.
1009 + * Written by John Hawkes (hawkes@sgi.com)
1010 + * Based on klstat.h by Jack Steiner (steiner@sgi.com)
1012 + * Modified by Ray Bryant (raybry@us.ibm.com)
1013 + * Changes Copyright (C) 2000 IBM, Inc.
1014 + * Added save of index in spinlock_t to improve efficiency
1015 + * of "hold" time reporting for spinlocks.
1016 + * Added support for hold time statistics for read and write
1018 + * Moved machine dependent code here from include/lockmeter.h.
1022 +#ifndef _X8664_LOCKMETER_H
1023 +#define _X8664_LOCKMETER_H
1025 +#include <asm/spinlock.h>
1026 +#include <asm/rwlock.h>
1028 +#include <linux/version.h>
1031 +extern unsigned int cpu_khz;
1032 +#define CPU_CYCLE_FREQUENCY (cpu_khz * 1000)
1034 +#define CPU_CYCLE_FREQUENCY 450000000
1037 +#define THIS_CPU_NUMBER smp_processor_id()
1040 + * macros to cache and retrieve an index value inside of a spin lock
1041 + * these macros assume that there are less than 65536 simultaneous
1042 + * (read mode) holders of a rwlock. Not normally a problem!!
1043 + * we also assume that the hash table has less than 65535 entries.
1046 + * instrumented spinlock structure -- never used to allocate storage
1047 + * only used in macros below to overlay a spinlock_t
1049 +typedef struct inst_spinlock_s {
1050 + /* remember, Intel is little endian */
1051 + unsigned short lock;
1052 + unsigned short index;
1054 +#define PUT_INDEX(lock_ptr,indexv) ((inst_spinlock_t *)(lock_ptr))->index = indexv
1055 +#define GET_INDEX(lock_ptr) ((inst_spinlock_t *)(lock_ptr))->index
1058 + * macros to cache and retrieve an index value in a read/write lock
1059 + * as well as the cpu where a reader busy period started
1060 + * we use the 2nd word (the debug word) for this, so require the
1061 + * debug word to be present
1064 + * instrumented rwlock structure -- never used to allocate storage
1065 + * only used in macros below to overlay a rwlock_t
1067 +typedef struct inst_rwlock_s {
1068 + volatile int lock;
1069 + unsigned short index;
1070 + unsigned short cpu;
1072 +#define PUT_RWINDEX(rwlock_ptr,indexv) ((inst_rwlock_t *)(rwlock_ptr))->index = indexv
1073 +#define GET_RWINDEX(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->index
1074 +#define PUT_RW_CPU(rwlock_ptr,cpuv) ((inst_rwlock_t *)(rwlock_ptr))->cpu = cpuv
1075 +#define GET_RW_CPU(rwlock_ptr) ((inst_rwlock_t *)(rwlock_ptr))->cpu
1078 + * return the number of readers for a rwlock_t
1080 +#define RWLOCK_READERS(rwlock_ptr) rwlock_readers(rwlock_ptr)
1082 +extern inline int rwlock_readers(rwlock_t *rwlock_ptr)
1084 + int tmp = (int) rwlock_ptr->lock;
1085 + /* read and write lock attempts may cause the lock value to temporarily */
1086 + /* be negative. Until it is >= 0 we know nothing (i. e. can't tell if */
1087 + /* is -1 because it was write locked and somebody tried to read lock it */
1088 + /* or if it is -1 because it was read locked and somebody tried to write*/
1089 + /* lock it. ........................................................... */
1091 + tmp = (int) rwlock_ptr->lock;
1092 + } while (tmp < 0);
1093 + if (tmp == 0) return(0);
1094 + else return(RW_LOCK_BIAS-tmp);
1098 + * return true if rwlock is write locked
1099 + * (note that other lock attempts can cause the lock value to be negative)
1101 +#define RWLOCK_IS_WRITE_LOCKED(rwlock_ptr) ((rwlock_ptr)->lock <= 0)
1102 +#define IABS(x) ((x) > 0 ? (x) : -(x))
1103 +#define RWLOCK_IS_READ_LOCKED(rwlock_ptr) ((IABS((rwlock_ptr)->lock) % RW_LOCK_BIAS) != 0)
1105 +#define get_cycles64() get_cycles()
1107 +#endif /* _X8664_LOCKMETER_H */
1108 Index: linux/include/asm-x86_64/spinlock.h
1109 ===================================================================
1110 --- linux.orig/include/asm-x86_64/spinlock.h
1111 +++ linux/include/asm-x86_64/spinlock.h
1112 @@ -136,6 +136,11 @@ static inline void _raw_spin_lock(spinlo
1115 volatile unsigned int lock;
1116 +#ifdef CONFIG_LOCKMETER
1117 + /* required for LOCKMETER since all bits in lock are used */
1118 + /* and we need this storage for CPU and lock INDEX */
1119 + unsigned lockmeter_magic;
1121 #ifdef CONFIG_DEBUG_SPINLOCK
1124 @@ -143,11 +148,19 @@ typedef struct {
1126 #define RWLOCK_MAGIC 0xdeaf1eed
1128 +#ifdef CONFIG_LOCKMETER
1129 +#ifdef CONFIG_DEBUG_SPINLOCK
1130 +#define RWLOCK_MAGIC_INIT , 0, RWLOCK_MAGIC
1132 +#define RWLOCK_MAGIC_INIT , 0
1134 +#else /* !CONFIG_LOCKMETER */
1135 #ifdef CONFIG_DEBUG_SPINLOCK
1136 #define RWLOCK_MAGIC_INIT , RWLOCK_MAGIC
1138 #define RWLOCK_MAGIC_INIT /* */
1140 +#endif /* !CONFIG_LOCKMETER */
1142 #define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
1144 @@ -194,4 +207,47 @@ static inline int _raw_write_trylock(rwl
1148 +#if defined(CONFIG_LOCKMETER) && defined(CONFIG_HAVE_DEC_LOCK)
1149 +extern void _metered_spin_lock (spinlock_t *lock, void *caller_pc);
1150 +extern void _metered_spin_unlock(spinlock_t *lock);
1153 + * Matches what is in arch/x86_64/lib/dec_and_lock.c, except this one is
1154 + * "static inline" so that the spin_lock(), if actually invoked, is charged
1155 + * against the real caller, not against the catch-all atomic_dec_and_lock
1157 +static inline int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
1163 + counter = atomic_read(atomic);
1164 + newcount = counter-1;
1169 + asm volatile("lock; cmpxchgl %1,%2"
1171 + :"r" (newcount), "m" (atomic->counter), "0" (counter));
1173 + /* If the above failed, "eax" will have changed */
1174 + if (newcount != counter)
1179 + preempt_disable();
1180 + _metered_spin_lock(lock, __builtin_return_address(0));
1181 + if (atomic_dec_and_test(atomic))
1183 + _metered_spin_unlock(lock);
1188 +#define ATOMIC_DEC_AND_LOCK
1191 #endif /* __ASM_SPINLOCK_H */
1192 Index: linux/include/linux/lockmeter.h
1193 ===================================================================
1194 --- linux.orig/include/linux/lockmeter.h
1195 +++ linux/include/linux/lockmeter.h
1198 + * Copyright (C) 1999-2002 Silicon Graphics, Inc.
1200 + * Written by John Hawkes (hawkes@sgi.com)
1201 + * Based on klstat.h by Jack Steiner (steiner@sgi.com)
1203 + * Modified by Ray Bryant (raybry@us.ibm.com) Feb-Apr 2000
1204 + * Changes Copyright (C) 2000 IBM, Inc.
1205 + * Added save of index in spinlock_t to improve efficiency
1206 + * of "hold" time reporting for spinlocks
1207 + * Added support for hold time statistics for read and write
1209 + * Moved machine dependent code to include/asm/lockmeter.h.
1213 +#ifndef _LINUX_LOCKMETER_H
1214 +#define _LINUX_LOCKMETER_H
1217 +/*---------------------------------------------------
1218 + * architecture-independent lockmeter.h
1219 + *-------------------------------------------------*/
1222 + * raybry -- version 2: added efficient hold time statistics
1223 + * requires lstat recompile, so flagged as new version
1224 + * raybry -- version 3: added global reader lock data
1225 + * hawkes -- version 4: removed some unnecessary fields to simplify mips64 port
1227 +#define LSTAT_VERSION 5
1229 +int lstat_update(void*, void*, int);
1230 +int lstat_update_time(void*, void*, int, uint32_t);
1233 + * Currently, the mips64 and sparc64 kernels talk to a 32-bit lockstat, so we
1234 + * need to force compatibility in the inter-communication data structure.
1237 +#if defined(CONFIG_MIPS32_COMPAT)
1238 +#define TIME_T uint32_t
1239 +#elif defined(CONFIG_SPARC) || defined(CONFIG_SPARC64)
1240 +#define TIME_T uint64_t
1242 +#define TIME_T time_t
1245 +#if defined(__KERNEL__) || (!defined(CONFIG_MIPS32_COMPAT) && !defined(CONFIG_SPARC) && !defined(CONFIG_SPARC64)) || (_MIPS_SZLONG==32)
1246 +#define POINTER void *
1248 +#define POINTER int64_t
1252 + * Values for the "action" parameter passed to lstat_update.
1253 + * ZZZ - do we want a try-success status here???
1255 +#define LSTAT_ACT_NO_WAIT 0
1256 +#define LSTAT_ACT_SPIN 1
1257 +#define LSTAT_ACT_REJECT 2
1258 +#define LSTAT_ACT_WW_SPIN 3
1259 +#define LSTAT_ACT_SLEPT 4 /* UNUSED */
1261 +#define LSTAT_ACT_MAX_VALUES 4 /* NOTE: Increase to 5 if use ACT_SLEPT */
1264 + * Special values for the low 2 bits of an RA passed to
1267 +/* we use these values to figure out what kind of lock data */
1268 +/* is stored in the statistics table entry at index ....... */
1269 +#define LSTAT_RA_SPIN 0 /* spin lock data */
1270 +#define LSTAT_RA_READ 1 /* read lock statistics */
1271 +#define LSTAT_RA_SEMA 2 /* RESERVED */
1272 +#define LSTAT_RA_WRITE 3 /* write lock statistics*/
1274 +#define LSTAT_RA(n) \
1275 + ((void*)( ((unsigned long) caller_pc & ~3) | n) )
1278 + * Constants used for lock addresses in the lstat_directory
1279 + * to indicate special values of the lock address.
1281 +#define LSTAT_MULTI_LOCK_ADDRESS NULL
1284 + * Maximum size of the lockstats tables. Increase this value
1285 + * if its not big enough. (Nothing bad happens if its not
1286 + * big enough although some locks will not be monitored.)
1287 + * We record overflows of this quantity in lstat_control.dir_overflows
1289 + * Note: The max value here must fit into the field set
1290 + * and obtained by the macro's PUT_INDEX() and GET_INDEX().
1291 + * This value depends on how many bits are available in the
1292 + * lock word in the particular machine implementation we are on.
1294 +#define LSTAT_MAX_STAT_INDEX 2000
1297 + * Size and mask for the hash table into the directory.
1299 +#define LSTAT_HASH_TABLE_SIZE 4096 /* must be 2**N */
1300 +#define LSTAT_HASH_TABLE_MASK (LSTAT_HASH_TABLE_SIZE-1)
1302 +#define DIRHASH(ra) ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK)
1305 + * This defines an entry in the lockstat directory. It contains
1306 + * information about a lock being monitored.
1307 + * A directory entry only contains the lock identification -
1308 + * counts on usage of the lock are kept elsewhere in a per-cpu
1309 + * data structure to minimize cache line pinging.
1312 + POINTER caller_ra; /* RA of code that set lock */
1313 + POINTER lock_ptr; /* lock address */
1314 + ushort next_stat_index; /* Used to link multiple locks that have the same hash table value */
1315 +} lstat_directory_entry_t;
1318 + * A multi-dimensioned array used to contain counts for lock accesses.
1319 + * The array is 3-dimensional:
1320 + * - CPU number. Keep from thrashing cache lines between CPUs
1321 + * - Directory entry index. Identifies the lock
1322 + * - Action. Indicates what kind of contention occurred on an
1323 + * access to the lock.
1325 + * The index of an entry in the directory is the same as the 2nd index
1326 + * of the entry in the counts array.
1329 + * This table contains data for spin_locks, write locks, and read locks
1330 + * Not all data is used for all cases. In particular, the hold time
1331 + * information is not stored here for read locks since that is a global
1332 + * (e. g. cannot be separated out by return address) quantity.
1333 + * See the lstat_read_lock_counts_t structure for the global read lock
1337 + uint64_t cum_wait_ticks; /* sum of wait times */
1338 + /* for write locks, sum of time a */
1339 + /* writer is waiting for a reader */
1340 + int64_t cum_hold_ticks; /* cumulative sum of holds */
1341 + /* not used for read mode locks */
1342 + /* must be signed. ............... */
1343 + uint32_t max_wait_ticks; /* max waiting time */
1344 + uint32_t max_hold_ticks; /* max holding time */
1345 + uint64_t cum_wait_ww_ticks; /* sum times writer waits on writer*/
1346 + uint32_t max_wait_ww_ticks; /* max wait time writer vs writer */
1347 + /* prev 2 only used for write locks*/
1348 + uint32_t acquire_time; /* time lock acquired this CPU */
1349 + uint32_t count[LSTAT_ACT_MAX_VALUES];
1350 +} lstat_lock_counts_t;
1352 +typedef lstat_lock_counts_t lstat_cpu_counts_t[LSTAT_MAX_STAT_INDEX];
1355 + * User request to:
1356 + * - turn statistic collection on/off, or to reset
1358 +#define LSTAT_OFF 0
1360 +#define LSTAT_RESET 2
1361 +#define LSTAT_RELEASE 3
1363 +#define LSTAT_MAX_READ_LOCK_INDEX 1000
1365 + POINTER lock_ptr; /* address of lock for output stats */
1366 + uint32_t read_lock_count;
1367 + int64_t cum_hold_ticks; /* sum of read lock hold times over */
1368 + /* all callers. ....................*/
1369 + uint32_t write_index; /* last write lock hash table index */
1370 + uint32_t busy_periods; /* count of busy periods ended this */
1371 + uint64_t start_busy; /* time this busy period started. ..*/
1372 + uint64_t busy_ticks; /* sum of busy periods this lock. ..*/
1373 + uint64_t max_busy; /* longest busy period for this lock*/
1374 + uint32_t max_readers; /* maximum number of readers ...... */
1375 +#ifdef USER_MODE_TESTING
1376 + rwlock_t entry_lock; /* lock for this read lock entry... */
1377 + /* avoid having more than one rdr at*/
1378 + /* needed for user space testing... */
1379 + /* not needed for kernel 'cause it */
1380 + /* is non-preemptive. ............. */
1382 +} lstat_read_lock_counts_t;
1383 +typedef lstat_read_lock_counts_t lstat_read_lock_cpu_counts_t[LSTAT_MAX_READ_LOCK_INDEX];
1385 +#if defined(__KERNEL__) || defined(USER_MODE_TESTING)
1387 +#ifndef USER_MODE_TESTING
1388 +#include <asm/lockmeter.h>
1390 +#include "asm_newlockmeter.h"
1394 + * Size and mask for the hash table into the directory.
1396 +#define LSTAT_HASH_TABLE_SIZE 4096 /* must be 2**N */
1397 +#define LSTAT_HASH_TABLE_MASK (LSTAT_HASH_TABLE_SIZE-1)
1399 +#define DIRHASH(ra) ((unsigned long)(ra)>>2 & LSTAT_HASH_TABLE_MASK)
1402 + * This version eliminates the per processor lock stack. What we do is to
1403 + * store the index of the lock hash structure in unused bits in the lock
1404 + * itself. Then on unlock we can find the statistics record without doing
1405 + * any additional hash or lock stack lookup. This works for spin_locks.
1406 + * Hold time reporting is now basically as cheap as wait time reporting
1407 + * so we ignore the difference between LSTAT_ON_HOLD and LSTAT_ON_WAIT
1408 + * as in version 1.1.* of lockmeter.
1410 + * For rw_locks, we store the index of a global reader stats structure in
1411 + * the lock and the writer index is stored in the latter structure.
1412 + * For read mode locks we hash at the time of the lock to find an entry
1413 + * in the directory for reader wait time and the like.
1414 + * At unlock time for read mode locks, we update just the global structure
1415 + * so we don't need to know the reader directory index value at unlock time.
1420 + * Protocol to change lstat_control.state
1421 + * This is complicated because we don't want the cum_hold_time for
1422 + * a rw_lock to be decremented in _read_lock_ without making sure it
1423 + * is incremented in _read_lock_ and vice versa. So here is the
1424 + * way we change the state of lstat_control.state:
1425 + * I. To Turn Statistics On
1426 + * After allocating storage, set lstat_control.state non-zero.
1427 + * This works because we don't start updating statistics for in use
1428 + * locks until the reader lock count goes to zero.
1429 + * II. To Turn Statistics Off:
1430 + * (0) Disable interrupts on this CPU
1431 + * (1) Seize the lstat_control.directory_lock
1432 + * (2) Obtain the current value of lstat_control.next_free_read_lock_index
1433 + * (3) Store a zero in lstat_control.state.
1434 + * (4) Release the lstat_control.directory_lock
1435 + * (5) For each lock in the read lock list up to the saved value
1436 + * (well, -1) of the next_free_read_lock_index, do the following:
1437 + * (a) Check validity of the stored lock address
1438 + * by making sure that the word at the saved addr
1439 + * has an index that matches this entry. If not
1440 + * valid, then skip this entry.
1441 + * (b) If there is a write lock already set on this lock,
1442 + * skip to (d) below.
1443 + * (c) Set a non-metered write lock on the lock
1444 + * (d) set the cached INDEX in the lock to zero
1445 + * (e) Release the non-metered write lock.
1446 + * (6) Re-enable interrupts
1448 + * These rules ensure that a read lock will not have its statistics
1449 + * partially updated even though the global lock recording state has
1450 + * changed. See put_lockmeter_info() for implementation.
1452 + * The reason for (b) is that there may be write locks set on the
1453 + * syscall path to put_lockmeter_info() from user space. If we do
1454 + * not do this check, then we can deadlock. A similar problem would
1455 + * occur if the lock was read locked by the current CPU. At the
1456 + * moment this does not appear to happen.
1460 + * Main control structure for lockstat. Used to turn statistics on/off
1461 + * and to maintain directory info.
1465 + spinlock_t control_lock; /* used to serialize turning statistics on/off */
1466 + spinlock_t directory_lock; /* for serialize adding entries to directory */
1467 + volatile int next_free_dir_index;/* next free entry in the directory */
1468 + /* FIXME not all of these fields are used / needed .............. */
1469 + /* the following fields represent data since */
1470 + /* first "lstat on" or most recent "lstat reset" */
1471 + TIME_T first_started_time; /* time when measurement first enabled */
1472 + TIME_T started_time; /* time when measurement last started */
1473 + TIME_T ending_time; /* time when measurement last disabled */
1474 + uint64_t started_cycles64; /* cycles when measurement last started */
1475 + uint64_t ending_cycles64; /* cycles when measurement last disabled */
1476 + uint64_t enabled_cycles64; /* total cycles with measurement enabled */
1477 + int intervals; /* number of measurement intervals recorded */
1478 + /* i. e. number of times did lstat on;lstat off */
1479 + lstat_directory_entry_t *dir; /* directory */
1480 + int dir_overflow; /* count of times ran out of space in directory */
1481 + int rwlock_overflow; /* count of times we couldn't allocate a rw block*/
1482 + ushort *hashtab; /* hash table for quick dir scans */
1483 + lstat_cpu_counts_t *counts[NR_CPUS]; /* Array of pointers to per-cpu stats */
1484 + int next_free_read_lock_index; /* next rwlock reader (global) stats block */
1485 + lstat_read_lock_cpu_counts_t *read_lock_counts[NR_CPUS]; /* per cpu read lock stats */
1488 +#endif /* defined(__KERNEL__) || defined(USER_MODE_TESTING) */
1491 + short lstat_version; /* version of the data */
1492 + short state; /* the current state is returned */
1493 + int maxcpus; /* Number of cpus present */
1494 + int next_free_dir_index; /* index of the next free directory entry */
1495 + TIME_T first_started_time; /* when measurement enabled for first time */
1496 + TIME_T started_time; /* time in secs since 1969 when stats last turned on */
1497 + TIME_T ending_time; /* time in secs since 1969 when stats last turned off */
1498 + uint32_t cycleval; /* cycles per second */
1500 + void *kernel_magic_addr; /* address of kernel_magic */
1501 + void *kernel_end_addr; /* contents of kernel magic (points to "end") */
1503 + int next_free_read_lock_index; /* index of next (global) read lock stats struct */
1504 + uint64_t started_cycles64; /* cycles when measurement last started */
1505 + uint64_t ending_cycles64; /* cycles when stats last turned off */
1506 + uint64_t enabled_cycles64; /* total cycles with measurement enabled */
1507 + int intervals; /* number of measurement intervals recorded */
1508 + /* i.e. number of times we did lstat on;lstat off*/
1509 + int dir_overflow; /* number of times we wanted more space in directory */
1510 + int rwlock_overflow; /* # of times we wanted more space in read_locks_count */
1511 + struct new_utsname uts; /* info about machine where stats are measured */
1512 + /* -T option of lockstat allows data to be */
1513 + /* moved to another machine. ................. */
1514 +} lstat_user_request_t;
1516 +#endif /* _LINUX_LOCKMETER_H */
1517 Index: linux/include/linux/spinlock.h
1518 ===================================================================
1519 --- linux.orig/include/linux/spinlock.h
1520 +++ linux/include/linux/spinlock.h
1521 @@ -74,7 +74,16 @@ void __lockfunc _write_unlock_irqrestore
1522 void __lockfunc _write_unlock_irq(rwlock_t *lock);
1523 void __lockfunc _write_unlock_bh(rwlock_t *lock);
1524 int __lockfunc _spin_trylock_bh(spinlock_t *lock);
1525 -int in_lock_functions(unsigned long addr);
1527 +static inline int in_lock_functions(unsigned long addr)
1529 + /* Linker adds these: start and end of __lockfunc functions */
1530 + extern char __lock_text_start[], __lock_text_end[];
1532 + return addr >= (unsigned long)__lock_text_start
1533 + && addr < (unsigned long)__lock_text_end;
1538 #define in_lock_functions(ADDR) 0
1539 @@ -472,17 +481,6 @@ do { \
1540 1 : ({local_irq_restore(flags); 0;}); \
1543 -#ifdef CONFIG_LOCKMETER
1544 -extern void _metered_spin_lock (spinlock_t *lock);
1545 -extern void _metered_spin_unlock (spinlock_t *lock);
1546 -extern int _metered_spin_trylock(spinlock_t *lock);
1547 -extern void _metered_read_lock (rwlock_t *lock);
1548 -extern void _metered_read_unlock (rwlock_t *lock);
1549 -extern void _metered_write_lock (rwlock_t *lock);
1550 -extern void _metered_write_unlock (rwlock_t *lock);
1551 -extern int _metered_write_trylock(rwlock_t *lock);
1554 /* "lock on reference count zero" */
1555 #ifndef ATOMIC_DEC_AND_LOCK
1556 #include <asm/atomic.h>
1557 @@ -558,5 +556,4 @@ static inline int bit_spin_is_locked(int
1562 #endif /* __LINUX_SPINLOCK_H */
1563 Index: linux/kernel/Makefile
1564 ===================================================================
1565 --- linux.orig/kernel/Makefile
1566 +++ linux/kernel/Makefile
1567 @@ -11,7 +11,12 @@ obj-y = sched.o fork.o exec_domain.o
1569 obj-$(CONFIG_FUTEX) += futex.o
1570 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
1571 +ifneq ($(CONFIG_LOCKMETER),y)
1572 obj-$(CONFIG_SMP) += cpu.o spinlock.o
1574 +obj-$(CONFIG_SMP) += cpu.o
1575 +obj-$(CONFIG_LOCKMETER) += lockmeter.o
1577 obj-$(CONFIG_UID16) += uid16.o
1578 obj-$(CONFIG_MODULES) += module.o module-verify.o
1579 obj-$(CONFIG_MODULE_SIG) += module-verify-sig.o
1580 Index: linux/kernel/lockmeter.c
1581 ===================================================================
1582 --- linux.orig/kernel/lockmeter.c
1583 +++ linux/kernel/lockmeter.c
1586 + * Copyright (C) 1999,2000 Silicon Graphics, Inc.
1588 + * Written by John Hawkes (hawkes@sgi.com)
1589 + * Based on klstat.c by Jack Steiner (steiner@sgi.com)
1591 + * Modified by Ray Bryant (raybry@us.ibm.com)
1592 + * Changes Copyright (C) 2000 IBM, Inc.
1593 + * Added save of index in spinlock_t to improve efficiency
1594 + * of "hold" time reporting for spinlocks
1595 + * Added support for hold time statistics for read and write
1598 + * Modified by Ray Bryant (raybry@sgi.com)
1599 + * Changes Copyright (C) 2004, Silicon Graphics, Inc.
1600 + * Fix to work with out-of-line spinlocks.
1603 +#include <linux/config.h>
1604 +#include <linux/linkage.h>
1605 +#include <linux/preempt.h>
1606 +#include <linux/interrupt.h>
1607 +#include <linux/module.h>
1608 +#include <linux/types.h>
1609 +#include <linux/errno.h>
1610 +#include <linux/slab.h>
1611 +#include <linux/sched.h>
1612 +#include <linux/smp.h>
1613 +#include <linux/threads.h>
1614 +#include <linux/version.h>
1615 +#include <linux/vmalloc.h>
1616 +#include <linux/spinlock.h>
1617 +#include <linux/utsname.h>
1618 +#include <linux/module.h>
1619 +#include <asm/system.h>
1620 +#include <asm/uaccess.h>
1622 +#include <linux/lockmeter.h>
1624 +#define ASSERT(cond)
1625 +#define bzero(loc,size) memset(loc,0,size)
1627 +/*<---------------------------------------------------*/
1629 +/*>---------------------------------------------------*/
1631 +static lstat_control_t lstat_control __cacheline_aligned =
1632 + { LSTAT_OFF, SPIN_LOCK_UNLOCKED, SPIN_LOCK_UNLOCKED,
1633 + 19 * 0, NR_CPUS * 0, 0, NR_CPUS * 0 };
1635 +static ushort lstat_make_dir_entry(void *, void *);
1640 + * Given a RA, locate the directory entry for the lock.
1643 +lstat_lookup(void *lock_ptr, void *caller_ra)
1646 + lstat_directory_entry_t *dirp;
1648 + dirp = lstat_control.dir;
1650 + index = lstat_control.hashtab[DIRHASH(caller_ra)];
1651 + while (dirp[index].caller_ra != caller_ra) {
1653 + return lstat_make_dir_entry(lock_ptr, caller_ra);
1655 + index = dirp[index].next_stat_index;
1658 + if (dirp[index].lock_ptr != NULL && dirp[index].lock_ptr != lock_ptr) {
1659 + dirp[index].lock_ptr = NULL;
1666 + * lstat_make_dir_entry
1667 + * Called to add a new lock to the lock directory.
1670 +lstat_make_dir_entry(void *lock_ptr, void *caller_ra)
1672 + lstat_directory_entry_t *dirp;
1673 + ushort index, hindex;
1674 + unsigned long flags;
1676 + /* lock the table without recursively reentering this metering code */
1677 + local_irq_save(flags);
1678 + _raw_spin_lock(&lstat_control.directory_lock);
1680 + hindex = DIRHASH(caller_ra);
1681 + index = lstat_control.hashtab[hindex];
1682 + dirp = lstat_control.dir;
1683 + while (index && dirp[index].caller_ra != caller_ra)
1684 + index = dirp[index].next_stat_index;
1687 + if (lstat_control.next_free_dir_index < LSTAT_MAX_STAT_INDEX) {
1688 + index = lstat_control.next_free_dir_index++;
1689 + lstat_control.dir[index].caller_ra = caller_ra;
1690 + lstat_control.dir[index].lock_ptr = lock_ptr;
1691 + lstat_control.dir[index].next_stat_index =
1692 + lstat_control.hashtab[hindex];
1693 + lstat_control.hashtab[hindex] = index;
1695 + lstat_control.dir_overflow++;
1698 + _raw_spin_unlock(&lstat_control.directory_lock);
1699 + local_irq_restore(flags);
1704 +lstat_update(void *lock_ptr, void *caller_ra, int action)
1709 + ASSERT(action < LSTAT_ACT_MAX_VALUES);
1711 + if (lstat_control.state == LSTAT_OFF)
1714 + index = lstat_lookup(lock_ptr, caller_ra);
1715 + cpu = THIS_CPU_NUMBER;
1716 + (*lstat_control.counts[cpu])[index].count[action]++;
1717 + (*lstat_control.counts[cpu])[index].acquire_time = get_cycles();
1723 +lstat_update_time(void *lock_ptr, void *caller_ra, int action, uint32_t ticks)
1728 + ASSERT(action < LSTAT_ACT_MAX_VALUES);
1730 + if (lstat_control.state == LSTAT_OFF)
1733 + index = lstat_lookup(lock_ptr, caller_ra);
1734 + cpu = THIS_CPU_NUMBER;
1735 + (*lstat_control.counts[cpu])[index].count[action]++;
1736 + (*lstat_control.counts[cpu])[index].cum_wait_ticks += (uint64_t) ticks;
1737 + if ((*lstat_control.counts[cpu])[index].max_wait_ticks < ticks)
1738 + (*lstat_control.counts[cpu])[index].max_wait_ticks = ticks;
1740 + (*lstat_control.counts[cpu])[index].acquire_time = get_cycles();
1746 +_metered_spin_lock(spinlock_t * lock_ptr, void *caller_pc)
1748 + if (lstat_control.state == LSTAT_OFF) {
1749 + _raw_spin_lock(lock_ptr); /* do the real lock */
1750 + PUT_INDEX(lock_ptr, 0); /* clean index in case lockmetering */
1751 + /* gets turned on before unlock */
1753 + void *this_pc = LSTAT_RA(LSTAT_RA_SPIN);
1756 + if (_raw_spin_trylock(lock_ptr)) {
1757 + index = lstat_update(lock_ptr, this_pc,
1758 + LSTAT_ACT_NO_WAIT);
1760 + uint32_t start_cycles = get_cycles();
1761 + _raw_spin_lock(lock_ptr); /* do the real lock */
1762 + index = lstat_update_time(lock_ptr, this_pc,
1763 + LSTAT_ACT_SPIN, get_cycles() - start_cycles);
1765 + /* save the index in the lock itself for use in spin unlock */
1766 + PUT_INDEX(lock_ptr, index);
1769 +/* some archs require this for atomic_dec_and_lock in modules */
1770 +EXPORT_SYMBOL(_metered_spin_lock);
1773 +_metered_spin_lock_flags(spinlock_t * lock_ptr, unsigned long flags,
1776 + if (lstat_control.state == LSTAT_OFF) {
1777 + _raw_spin_lock(lock_ptr); /* do the real lock */
1778 + PUT_INDEX(lock_ptr, 0); /* clean index in case lockmetering */
1779 + /* gets turned on before unlock */
1781 + void *this_pc = LSTAT_RA(LSTAT_RA_SPIN);
1784 + if (_raw_spin_trylock(lock_ptr)) {
1785 + index = lstat_update(lock_ptr, this_pc,
1786 + LSTAT_ACT_NO_WAIT);
1788 + uint32_t start_cycles = get_cycles();
1789 + /* do the real lock */
1790 + _raw_spin_lock_flags(lock_ptr, flags);
1791 + index = lstat_update_time(lock_ptr, this_pc,
1792 + LSTAT_ACT_SPIN, get_cycles() - start_cycles);
1794 + /* save the index in the lock itself for use in spin unlock */
1795 + PUT_INDEX(lock_ptr, index);
1800 +_metered_spin_trylock(spinlock_t * lock_ptr, void *caller_pc)
1802 + if (lstat_control.state == LSTAT_OFF) {
1803 + return _raw_spin_trylock(lock_ptr);
1806 + void *this_pc = LSTAT_RA(LSTAT_RA_SPIN);
1808 + if ((retval = _raw_spin_trylock(lock_ptr))) {
1809 + int index = lstat_update(lock_ptr, this_pc,
1810 + LSTAT_ACT_NO_WAIT);
1812 + * save the index in the lock itself for use in spin
1815 + PUT_INDEX(lock_ptr, index);
1817 + lstat_update(lock_ptr, this_pc, LSTAT_ACT_REJECT);
1825 +_metered_spin_unlock(spinlock_t * lock_ptr)
1829 + if (lstat_control.state != LSTAT_OFF) {
1830 + index = GET_INDEX(lock_ptr);
1832 + * If statistics were turned off when we set the lock,
1833 + * then the index can be zero. If that is the case,
1834 + * then collect no stats on this call.
1837 + uint32_t hold_time;
1838 + int cpu = THIS_CPU_NUMBER;
1839 + hold_time = get_cycles() -
1840 + (*lstat_control.counts[cpu])[index].acquire_time;
1841 + (*lstat_control.counts[cpu])[index].cum_hold_ticks +=
1842 + (uint64_t) hold_time;
1843 + if ((*lstat_control.counts[cpu])[index].max_hold_ticks <
1845 + (*lstat_control.counts[cpu])[index].
1846 + max_hold_ticks = hold_time;
1850 + /* make sure we don't have a stale index value saved */
1851 + PUT_INDEX(lock_ptr, 0);
1852 + _raw_spin_unlock(lock_ptr); /* do the real unlock */
1854 +/* some archs require this for atomic_dec_and_lock in modules*/
1855 +EXPORT_SYMBOL(_metered_spin_unlock);
1858 + * allocate the next global read lock structure and store its index
1859 + * in the rwlock at "lock_ptr".
1862 +alloc_rwlock_struct(rwlock_t * rwlock_ptr)
1865 + unsigned long flags;
1866 + int cpu = THIS_CPU_NUMBER;
1868 + /* If we've already overflowed, then do a quick exit */
1869 + if (lstat_control.next_free_read_lock_index >
1870 + LSTAT_MAX_READ_LOCK_INDEX) {
1871 + lstat_control.rwlock_overflow++;
1875 + local_irq_save(flags);
1876 + _raw_spin_lock(&lstat_control.directory_lock);
1878 + /* It is possible this changed while we were waiting for the directory_lock */
1879 + if (lstat_control.state == LSTAT_OFF) {
1884 + /* It is possible someone else got here first and set the index */
1885 + if ((index = GET_RWINDEX(rwlock_ptr)) == 0) {
1887 + * we can't turn on read stats for this lock while there are
1888 + * readers (this would mess up the running hold time sum at
1891 + if (RWLOCK_READERS(rwlock_ptr) != 0) {
1897 + * if stats are turned on after being off, we may need to
1898 + * return an old index from when the statistics were on last
1901 + for (index = 1; index < lstat_control.next_free_read_lock_index;
1903 + if ((*lstat_control.read_lock_counts[cpu])[index].
1904 + lock_ptr == rwlock_ptr)
1905 + goto put_index_and_unlock;
1907 + /* allocate the next global read lock structure */
1908 + if (lstat_control.next_free_read_lock_index >=
1909 + LSTAT_MAX_READ_LOCK_INDEX) {
1910 + lstat_control.rwlock_overflow++;
1914 + index = lstat_control.next_free_read_lock_index++;
1917 + * initialize the global read stats data structure for each
1920 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
1921 + (*lstat_control.read_lock_counts[cpu])[index].lock_ptr =
1924 +put_index_and_unlock:
1925 + /* store the index for the read lock structure into the lock */
1926 + PUT_RWINDEX(rwlock_ptr, index);
1930 + _raw_spin_unlock(&lstat_control.directory_lock);
1931 + local_irq_restore(flags);
1936 +_metered_read_lock(rwlock_t * rwlock_ptr, void *caller_pc)
1939 + uint32_t start_cycles;
1942 + unsigned long flags;
1943 + int readers_before, readers_after;
1944 + uint64_t cycles64;
1946 + if (lstat_control.state == LSTAT_OFF) {
1947 + _raw_read_lock(rwlock_ptr);
1948 + /* clean index in case lockmetering turns on before an unlock */
1949 + PUT_RWINDEX(rwlock_ptr, 0);
1953 + this_pc = LSTAT_RA(LSTAT_RA_READ);
1954 + cpu = THIS_CPU_NUMBER;
1955 + index = GET_RWINDEX(rwlock_ptr);
1957 + /* allocate the global stats entry for this lock, if needed */
1959 + index = alloc_rwlock_struct(rwlock_ptr);
1961 + readers_before = RWLOCK_READERS(rwlock_ptr);
1962 + if (_raw_read_trylock(rwlock_ptr)) {
1964 + * We have decremented the lock to count a new reader,
1965 + * and have confirmed that no writer has it locked.
1967 + /* update statistics if enabled */
1969 + local_irq_save(flags);
1970 + lstat_update((void *) rwlock_ptr, this_pc,
1971 + LSTAT_ACT_NO_WAIT);
1972 + /* preserve value of TSC so cum_hold_ticks and start_busy use same value */
1973 + cycles64 = get_cycles64();
1974 + (*lstat_control.read_lock_counts[cpu])[index].
1975 + cum_hold_ticks -= cycles64;
1977 + /* record time and cpu of start of busy period */
1978 + /* this is not perfect (some race conditions are possible) */
1979 + if (readers_before == 0) {
1980 + (*lstat_control.read_lock_counts[cpu])[index].
1981 + start_busy = cycles64;
1982 + PUT_RW_CPU(rwlock_ptr, cpu);
1984 + readers_after = RWLOCK_READERS(rwlock_ptr);
1985 + if (readers_after >
1986 + (*lstat_control.read_lock_counts[cpu])[index].
1988 + (*lstat_control.read_lock_counts[cpu])[index].
1989 + max_readers = readers_after;
1990 + local_irq_restore(flags);
1995 + /* If we get here, then we could not quickly grab the read lock */
1997 + start_cycles = get_cycles(); /* start counting the wait time */
1999 + /* Now spin until read_lock is successful */
2000 + _raw_read_lock(rwlock_ptr);
2002 + lstat_update_time((void *) rwlock_ptr, this_pc, LSTAT_ACT_SPIN,
2003 + get_cycles() - start_cycles);
2005 + /* update statistics if they are enabled for this lock */
2007 + local_irq_save(flags);
2008 + cycles64 = get_cycles64();
2009 + (*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks -=
2012 + /* this is not perfect (some race conditions are possible) */
2013 + if (readers_before == 0) {
2014 + (*lstat_control.read_lock_counts[cpu])[index].
2015 + start_busy = cycles64;
2016 + PUT_RW_CPU(rwlock_ptr, cpu);
2018 + readers_after = RWLOCK_READERS(rwlock_ptr);
2019 + if (readers_after >
2020 + (*lstat_control.read_lock_counts[cpu])[index].max_readers)
2021 + (*lstat_control.read_lock_counts[cpu])[index].
2022 + max_readers = readers_after;
2023 + local_irq_restore(flags);
2028 +_metered_read_unlock(rwlock_t * rwlock_ptr)
2032 + unsigned long flags;
2033 + uint64_t busy_length;
2034 + uint64_t cycles64;
2036 + if (lstat_control.state == LSTAT_OFF) {
2037 + _raw_read_unlock(rwlock_ptr);
2041 + index = GET_RWINDEX(rwlock_ptr);
2042 + cpu = THIS_CPU_NUMBER;
2045 + local_irq_save(flags);
2047 + * preserve value of TSC so cum_hold_ticks and busy_ticks are
2050 + cycles64 = get_cycles64();
2051 + (*lstat_control.read_lock_counts[cpu])[index].cum_hold_ticks +=
2053 + (*lstat_control.read_lock_counts[cpu])[index].read_lock_count++;
2056 + * once again, this is not perfect (some race conditions are
2059 + if (RWLOCK_READERS(rwlock_ptr) == 1) {
2060 + int cpu1 = GET_RW_CPU(rwlock_ptr);
2061 + uint64_t last_start_busy =
2062 + (*lstat_control.read_lock_counts[cpu1])[index].
2064 + (*lstat_control.read_lock_counts[cpu])[index].
2066 + if (cycles64 > last_start_busy) {
2067 + busy_length = cycles64 - last_start_busy;
2068 + (*lstat_control.read_lock_counts[cpu])[index].
2069 + busy_ticks += busy_length;
2072 + read_lock_counts[cpu])[index].
2075 + read_lock_counts[cpu])[index].
2076 + max_busy = busy_length;
2079 + local_irq_restore(flags);
2081 + _raw_read_unlock(rwlock_ptr);
2085 +_metered_write_lock(rwlock_t * rwlock_ptr, void *caller_pc)
2087 + uint32_t start_cycles;
2089 + uint32_t spin_ticks = 0; /* in anticipation of a potential wait */
2091 + int write_index = 0;
2094 + writer_writer_conflict,
2095 + writer_reader_conflict
2096 + } why_wait = writer_writer_conflict;
2098 + if (lstat_control.state == LSTAT_OFF) {
2099 + _raw_write_lock(rwlock_ptr);
2100 + /* clean index in case lockmetering turns on before an unlock */
2101 + PUT_RWINDEX(rwlock_ptr, 0);
2105 + this_pc = LSTAT_RA(LSTAT_RA_WRITE);
2106 + cpu = THIS_CPU_NUMBER;
2107 + index = GET_RWINDEX(rwlock_ptr);
2109 + /* allocate the global stats entry for this lock, if needed */
2111 + index = alloc_rwlock_struct(rwlock_ptr);
2114 + if (_raw_write_trylock(rwlock_ptr)) {
2115 + /* We acquired the lock on the first try */
2116 + write_index = lstat_update((void *) rwlock_ptr, this_pc,
2117 + LSTAT_ACT_NO_WAIT);
2118 + /* save the write_index for use in unlock if stats enabled */
2120 + (*lstat_control.read_lock_counts[cpu])[index].
2121 + write_index = write_index;
2125 + /* If we get here, then we could not quickly grab the write lock */
2126 + start_cycles = get_cycles(); /* start counting the wait time */
2128 + why_wait = RWLOCK_READERS(rwlock_ptr) ?
2129 + writer_reader_conflict : writer_writer_conflict;
2131 + /* Now set the lock and wait for conflicts to disappear */
2132 + _raw_write_lock(rwlock_ptr);
2134 + spin_ticks = get_cycles() - start_cycles;
2136 + /* update stats -- if enabled */
2137 + if (index > 0 && spin_ticks) {
2138 + if (why_wait == writer_reader_conflict) {
2139 + /* waited due to a reader holding the lock */
2140 + write_index = lstat_update_time((void *)rwlock_ptr,
2141 + this_pc, LSTAT_ACT_SPIN, spin_ticks);
2144 + * waited due to another writer holding the lock
2146 + write_index = lstat_update_time((void *)rwlock_ptr,
2147 + this_pc, LSTAT_ACT_WW_SPIN, spin_ticks);
2148 + (*lstat_control.counts[cpu])[write_index].
2149 + cum_wait_ww_ticks += spin_ticks;
2151 + (*lstat_control.counts[cpu])[write_index].
2152 + max_wait_ww_ticks) {
2153 + (*lstat_control.counts[cpu])[write_index].
2154 + max_wait_ww_ticks = spin_ticks;
2158 + /* save the directory index for use on write_unlock */
2159 + (*lstat_control.read_lock_counts[cpu])[index].
2160 + write_index = write_index;
2165 +_metered_write_unlock(rwlock_t * rwlock_ptr)
2170 + uint32_t hold_time;
2172 + if (lstat_control.state == LSTAT_OFF) {
2173 + _raw_write_unlock(rwlock_ptr);
2177 + cpu = THIS_CPU_NUMBER;
2178 + index = GET_RWINDEX(rwlock_ptr);
2180 + /* update statistics if stats enabled for this lock */
2183 + (*lstat_control.read_lock_counts[cpu])[index].write_index;
2185 + hold_time = get_cycles() -
2186 + (*lstat_control.counts[cpu])[write_index].acquire_time;
2187 + (*lstat_control.counts[cpu])[write_index].cum_hold_ticks +=
2188 + (uint64_t) hold_time;
2189 + if ((*lstat_control.counts[cpu])[write_index].max_hold_ticks <
2191 + (*lstat_control.counts[cpu])[write_index].
2192 + max_hold_ticks = hold_time;
2194 + _raw_write_unlock(rwlock_ptr);
2198 +_metered_write_trylock(rwlock_t * rwlock_ptr, void *caller_pc)
2201 + void *this_pc = LSTAT_RA(LSTAT_RA_WRITE);
2203 + if ((retval = _raw_write_trylock(rwlock_ptr))) {
2204 + lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_NO_WAIT);
2206 + lstat_update(rwlock_ptr, this_pc, LSTAT_ACT_REJECT);
2213 +init_control_space(void)
2215 + /* Set all control space pointers to null and indices to "empty" */
2219 + * Access CPU_CYCLE_FREQUENCY at the outset, which in some
2220 + * architectures may trigger a runtime calculation that uses a
2221 + * spinlock. Let's do this before lockmetering is turned on.
2223 + if (CPU_CYCLE_FREQUENCY == 0)
2226 + lstat_control.hashtab = NULL;
2227 + lstat_control.dir = NULL;
2228 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
2229 + lstat_control.counts[cpu] = NULL;
2230 + lstat_control.read_lock_counts[cpu] = NULL;
2235 +reset_lstat_data(void)
2240 + lstat_control.next_free_dir_index = 1; /* 0 is for overflows */
2241 + lstat_control.next_free_read_lock_index = 1;
2242 + lstat_control.dir_overflow = 0;
2243 + lstat_control.rwlock_overflow = 0;
2245 + lstat_control.started_cycles64 = 0;
2246 + lstat_control.ending_cycles64 = 0;
2247 + lstat_control.enabled_cycles64 = 0;
2248 + lstat_control.first_started_time = 0;
2249 + lstat_control.started_time = 0;
2250 + lstat_control.ending_time = 0;
2251 + lstat_control.intervals = 0;
2254 + * paranoia -- in case someone does a "lockstat reset" before
2257 + if (lstat_control.hashtab) {
2258 + bzero(lstat_control.hashtab,
2259 + LSTAT_HASH_TABLE_SIZE * sizeof (short));
2260 + bzero(lstat_control.dir, LSTAT_MAX_STAT_INDEX *
2261 + sizeof (lstat_directory_entry_t));
2263 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
2264 + bzero(lstat_control.counts[cpu],
2265 + sizeof (lstat_cpu_counts_t));
2266 + bzero(lstat_control.read_lock_counts[cpu],
2267 + sizeof (lstat_read_lock_cpu_counts_t));
2271 + _raw_spin_unlock(&lstat_control.directory_lock);
2272 + local_irq_restore(flags);
2278 +release_control_space(void)
2281 + * Called when either (1) allocation of kmem
2282 + * or (2) when user writes LSTAT_RELEASE to /pro/lockmeter.
2283 + * Assume that all pointers have been initialized to zero,
2284 + * i.e., nonzero pointers are valid addresses.
2288 + if (lstat_control.hashtab) {
2289 + kfree(lstat_control.hashtab);
2290 + lstat_control.hashtab = NULL;
2293 + if (lstat_control.dir) {
2294 + vfree(lstat_control.dir);
2295 + lstat_control.dir = NULL;
2298 + for (cpu = 0; cpu < NR_CPUS; cpu++) {
2299 + if (lstat_control.counts[cpu]) {
2300 + vfree(lstat_control.counts[cpu]);
2301 + lstat_control.counts[cpu] = NULL;
2303 + if (lstat_control.read_lock_counts[cpu]) {
2304 + kfree(lstat_control.read_lock_counts[cpu]);
2305 + lstat_control.read_lock_counts[cpu] = NULL;
2311 +get_lockmeter_info_size(void)
2313 + return sizeof (lstat_user_request_t)
2314 + + num_online_cpus() * sizeof (lstat_cpu_counts_t)
2315 + + num_online_cpus() * sizeof (lstat_read_lock_cpu_counts_t)
2316 + + (LSTAT_MAX_STAT_INDEX * sizeof (lstat_directory_entry_t));
2320 +get_lockmeter_info(char *buffer, size_t max_len, loff_t * last_index)
2322 + lstat_user_request_t req;
2323 + struct timeval tv;
2324 + ssize_t next_ret_bcount;
2325 + ssize_t actual_ret_bcount = 0;
2328 + *last_index = 0; /* a one-shot read */
2330 + req.lstat_version = LSTAT_VERSION;
2331 + req.state = lstat_control.state;
2332 + req.maxcpus = num_online_cpus();
2333 + req.cycleval = CPU_CYCLE_FREQUENCY;
2335 + req.kernel_magic_addr = (void *) &_etext;
2336 + req.kernel_end_addr = (void *) &_etext;
2338 + req.uts = system_utsname;
2339 + req.intervals = lstat_control.intervals;
2341 + req.first_started_time = lstat_control.first_started_time;
2342 + req.started_time = lstat_control.started_time;
2343 + req.started_cycles64 = lstat_control.started_cycles64;
2345 + req.next_free_dir_index = lstat_control.next_free_dir_index;
2346 + req.next_free_read_lock_index = lstat_control.next_free_read_lock_index;
2347 + req.dir_overflow = lstat_control.dir_overflow;
2348 + req.rwlock_overflow = lstat_control.rwlock_overflow;
2350 + if (lstat_control.state == LSTAT_OFF) {
2351 + if (req.intervals == 0) {
2352 + /* mesasurement is off and no valid data present */
2353 + next_ret_bcount = sizeof (lstat_user_request_t);
2354 + req.enabled_cycles64 = 0;
2356 + if ((actual_ret_bcount + next_ret_bcount) > max_len)
2357 + return actual_ret_bcount;
2359 + copy_to_user(buffer, (void *) &req, next_ret_bcount);
2360 + actual_ret_bcount += next_ret_bcount;
2361 + return actual_ret_bcount;
2364 + * measurement is off but valid data present
2365 + * fetch time info from lstat_control
2367 + req.ending_time = lstat_control.ending_time;
2368 + req.ending_cycles64 = lstat_control.ending_cycles64;
2369 + req.enabled_cycles64 = lstat_control.enabled_cycles64;
2373 + * this must be a read while data active--use current time,
2376 + do_gettimeofday(&tv);
2377 + req.ending_time = tv.tv_sec;
2378 + req.ending_cycles64 = get_cycles64();
2379 + req.enabled_cycles64 = req.ending_cycles64 -
2380 + req.started_cycles64 + lstat_control.enabled_cycles64;
2383 + next_ret_bcount = sizeof (lstat_user_request_t);
2384 + if ((actual_ret_bcount + next_ret_bcount) > max_len)
2385 + return actual_ret_bcount;
2387 + copy_to_user(buffer, (void *) &req, next_ret_bcount);
2388 + actual_ret_bcount += next_ret_bcount;
2390 + if (!lstat_control.counts[0]) /* not initialized? */
2391 + return actual_ret_bcount;
2393 + next_ret_bcount = sizeof (lstat_cpu_counts_t);
2394 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
2395 + if ((actual_ret_bcount + next_ret_bcount) > max_len)
2396 + return actual_ret_bcount; /* leave early */
2397 + copy_to_user(buffer + actual_ret_bcount,
2398 + lstat_control.counts[cpu], next_ret_bcount);
2399 + actual_ret_bcount += next_ret_bcount;
2402 + next_ret_bcount = LSTAT_MAX_STAT_INDEX *
2403 + sizeof (lstat_directory_entry_t);
2404 + if (((actual_ret_bcount + next_ret_bcount) > max_len)
2405 + || !lstat_control.dir)
2406 + return actual_ret_bcount; /* leave early */
2408 + copy_to_user(buffer + actual_ret_bcount, lstat_control.dir,
2410 + actual_ret_bcount += next_ret_bcount;
2412 + next_ret_bcount = sizeof (lstat_read_lock_cpu_counts_t);
2413 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
2414 + if (actual_ret_bcount + next_ret_bcount > max_len)
2415 + return actual_ret_bcount;
2416 + copy_to_user(buffer + actual_ret_bcount,
2417 + lstat_control.read_lock_counts[cpu],
2419 + actual_ret_bcount += next_ret_bcount;
2422 + return actual_ret_bcount;
2426 + * Writing to the /proc lockmeter node enables or disables metering.
2427 + * based upon the first byte of the "written" data.
2428 + * The following values are defined:
2429 + * LSTAT_ON: 1st call: allocates storage, intializes and turns on measurement
2430 + * subsequent calls just turn on measurement
2431 + * LSTAT_OFF: turns off measurement
2432 + * LSTAT_RESET: resets statistics
2433 + * LSTAT_RELEASE: releases statistics storage
2435 + * This allows one to accumulate statistics over several lockstat runs:
2439 + * ...repeat above as desired...
2441 + * ...now start a new set of measurements...
2448 +put_lockmeter_info(const char *buffer, size_t len)
2451 + int dirsize, countsize, read_lock_countsize, hashsize;
2454 + int i, read_lock_blocks;
2455 + unsigned long flags;
2456 + rwlock_t *lock_ptr;
2457 + struct timeval tv;
2462 + _raw_spin_lock(&lstat_control.control_lock);
2464 + get_user(put_char, buffer);
2465 + switch (put_char) {
2468 + if (lstat_control.state != LSTAT_OFF) {
2470 + * To avoid seeing read lock hold times in an
2471 + * inconsisent state, we have to follow this protocol
2472 + * to turn off statistics
2474 + local_irq_save(flags);
2476 + * getting this lock will stop any read lock block
2479 + _raw_spin_lock(&lstat_control.directory_lock);
2481 + * keep any more read lock blocks from being
2484 + lstat_control.state = LSTAT_OFF;
2485 + /* record how may read lock blocks there are */
2486 + read_lock_blocks =
2487 + lstat_control.next_free_read_lock_index;
2488 + _raw_spin_unlock(&lstat_control.directory_lock);
2489 + /* now go through the list of read locks */
2490 + cpu = THIS_CPU_NUMBER;
2491 + for (i = 1; i < read_lock_blocks; i++) {
2493 + (*lstat_control.read_lock_counts[cpu])[i].
2495 + /* is this saved lock address still valid? */
2496 + if (GET_RWINDEX(lock_ptr) == i) {
2498 + * lock address appears to still be
2499 + * valid because we only hold one lock
2500 + * at a time, this can't cause a
2501 + * deadlock unless this is a lock held
2502 + * as part of the current system call
2503 + * path. At the moment there
2504 + * are no READ mode locks held to get
2505 + * here from user space, so we solve
2506 + * this by skipping locks held in
2509 + if (RWLOCK_IS_WRITE_LOCKED(lock_ptr)) {
2510 + PUT_RWINDEX(lock_ptr, 0);
2514 + * now we know there are no read
2515 + * holders of this lock! stop
2516 + * statistics collection for this
2519 + _raw_write_lock(lock_ptr);
2520 + PUT_RWINDEX(lock_ptr, 0);
2521 + _raw_write_unlock(lock_ptr);
2524 + * it may still be possible for the hold time
2525 + * sum to be negative e.g. if a lock is
2526 + * reallocated while "busy" we will have to fix
2527 + * this up in the data reduction program.
2530 + local_irq_restore(flags);
2531 + lstat_control.intervals++;
2532 + lstat_control.ending_cycles64 = get_cycles64();
2533 + lstat_control.enabled_cycles64 +=
2534 + lstat_control.ending_cycles64 -
2535 + lstat_control.started_cycles64;
2536 + do_gettimeofday(&tv);
2537 + lstat_control.ending_time = tv.tv_sec;
2539 + * don't deallocate the structures -- we may do a
2540 + * lockstat on to add to the data that is already
2541 + * there. Use LSTAT_RELEASE to release storage
2544 + error = -EBUSY; /* already OFF */
2549 + if (lstat_control.state == LSTAT_OFF) {
2550 +#ifdef DEBUG_LOCKMETER
2551 + printk("put_lockmeter_info(cpu=%d): LSTAT_ON\n",
2554 + lstat_control.next_free_dir_index = 1; /* 0 is for overflows */
2556 + dirsize = LSTAT_MAX_STAT_INDEX *
2557 + sizeof (lstat_directory_entry_t);
2559 + (1 + LSTAT_HASH_TABLE_SIZE) * sizeof (ushort);
2560 + countsize = sizeof (lstat_cpu_counts_t);
2561 + read_lock_countsize =
2562 + sizeof (lstat_read_lock_cpu_counts_t);
2563 +#ifdef DEBUG_LOCKMETER
2564 + printk(" dirsize:%d", dirsize);
2565 + printk(" hashsize:%d", hashsize);
2566 + printk(" countsize:%d", countsize);
2567 + printk(" read_lock_countsize:%d\n",
2568 + read_lock_countsize);
2570 +#ifdef DEBUG_LOCKMETER
2573 + unsigned long cycles;
2574 + uint64_t cycles64;
2576 + do_gettimeofday(&tv);
2579 + do_gettimeofday(&tv);
2580 + } while (secs == tv.tv_sec);
2581 + cycles = get_cycles();
2582 + cycles64 = get_cycles64();
2585 + do_gettimeofday(&tv);
2586 + } while (secs == tv.tv_sec);
2587 + cycles = get_cycles() - cycles;
2588 + cycles64 = get_cycles64() - cycles;
2589 + printk("lockmeter: cycleFrequency:%d "
2590 + "cycles:%d cycles64:%d\n",
2591 + CPU_CYCLE_FREQUENCY, cycles, cycles64);
2596 + * if this is the first call, allocate storage and
2599 + if (!lstat_control.hashtab) {
2601 + spin_lock_init(&lstat_control.directory_lock);
2603 + /* guarantee all pointers at zero */
2604 + init_control_space();
2606 + lstat_control.hashtab =
2607 + kmalloc(hashsize, GFP_KERNEL);
2608 + if (!lstat_control.hashtab) {
2610 +#ifdef DEBUG_LOCKMETER
2611 + printk("!!error kmalloc of hashtab\n");
2614 + lstat_control.dir = vmalloc(dirsize);
2615 + if (!lstat_control.dir) {
2617 +#ifdef DEBUG_LOCKMETER
2618 + printk("!!error kmalloc of dir\n");
2622 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
2623 + lstat_control.counts[cpu] =
2624 + vmalloc(countsize);
2625 + if (!lstat_control.counts[cpu]) {
2627 +#ifdef DEBUG_LOCKMETER
2628 + printk("!!error vmalloc of "
2629 + "counts[%d]\n", cpu);
2632 + lstat_control.read_lock_counts[cpu] =
2633 + (lstat_read_lock_cpu_counts_t *)
2634 + kmalloc(read_lock_countsize,
2636 + if (!lstat_control.
2637 + read_lock_counts[cpu]) {
2639 +#ifdef DEBUG_LOCKMETER
2640 + printk("!!error kmalloc of "
2641 + "read_lock_counts[%d]\n",
2650 + * One or more kmalloc failures -- free
2653 + release_control_space();
2656 + if (!reset_lstat_data()) {
2662 + * record starting and ending times and the
2665 + if (lstat_control.intervals == 0) {
2666 + do_gettimeofday(&tv);
2667 + lstat_control.first_started_time =
2670 + lstat_control.started_cycles64 = get_cycles64();
2671 + do_gettimeofday(&tv);
2672 + lstat_control.started_time = tv.tv_sec;
2674 + lstat_control.state = LSTAT_ON;
2677 + error = -EBUSY; /* already ON */
2682 + if (lstat_control.state == LSTAT_OFF) {
2683 + if (!reset_lstat_data())
2686 + error = -EBUSY; /* still on; can't reset */
2690 + case LSTAT_RELEASE:
2691 + if (lstat_control.state == LSTAT_OFF) {
2692 + release_control_space();
2693 + lstat_control.intervals = 0;
2694 + lstat_control.enabled_cycles64 = 0;
2704 + _raw_spin_unlock(&lstat_control.control_lock);
2705 + return error ? error : len;
2708 +#ifdef USER_MODE_TESTING
2709 +/* following used for user mode testing */
2713 + int dirsize, hashsize, countsize, read_lock_countsize, cpu;
2715 + printf("lstat_control is at %x size=%d\n", &lstat_control,
2716 + sizeof (lstat_control));
2717 + printf("sizeof(spinlock_t)=%d\n", sizeof (spinlock_t));
2718 + lstat_control.state = LSTAT_ON;
2720 + lstat_control.directory_lock = SPIN_LOCK_UNLOCKED;
2721 + lstat_control.next_free_dir_index = 1; /* 0 is for overflows */
2722 + lstat_control.next_free_read_lock_index = 1;
2724 + dirsize = LSTAT_MAX_STAT_INDEX * sizeof (lstat_directory_entry_t);
2725 + hashsize = (1 + LSTAT_HASH_TABLE_SIZE) * sizeof (ushort);
2726 + countsize = sizeof (lstat_cpu_counts_t);
2727 + read_lock_countsize = sizeof (lstat_read_lock_cpu_counts_t);
2729 + lstat_control.hashtab = (ushort *) malloc(hashsize);
2731 + if (lstat_control.hashtab == 0) {
2732 + printf("malloc failure for at line %d in lockmeter.c\n",
2737 + lstat_control.dir = (lstat_directory_entry_t *) malloc(dirsize);
2739 + if (lstat_control.dir == 0) {
2740 + printf("malloc failure for at line %d in lockmeter.c\n", cpu,
2745 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
2747 + j = (int) (lstat_control.counts[cpu] =
2748 + (lstat_cpu_counts_t *) malloc(countsize));
2749 + k = (int) (lstat_control.read_lock_counts[cpu] =
2750 + (lstat_read_lock_cpu_counts_t *)
2751 + malloc(read_lock_countsize));
2753 + printf("malloc failure for cpu=%d at line %d in "
2754 + "lockmeter.c\n", cpu, __LINE__);
2759 + memset(lstat_control.hashtab, 0, hashsize);
2760 + memset(lstat_control.dir, 0, dirsize);
2762 + for (cpu = 0; cpu < num_online_cpus(); cpu++) {
2763 + memset(lstat_control.counts[cpu], 0, countsize);
2764 + memset(lstat_control.read_lock_counts[cpu], 0,
2765 + read_lock_countsize);
2771 +.globl __write_lock_failed \
2772 +__write_lock_failed: \
2773 + " LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax) \
2774 +1: cmpl $" RW_LOCK_BIAS_STR ",(%eax) \
2777 + " LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax) \
2778 + jnz __write_lock_failed \
2783 +.globl __read_lock_failed \
2784 +__read_lock_failed: \
2785 + lock ; incl (%eax) \
2786 +1: cmpl $1,(%eax) \
2789 + lock ; decl (%eax) \
2790 + js __read_lock_failed \
2796 + * these definitions need to match what is in kernel/spinlock.c
2797 + * except for the fact tht calls to _raw_ routines are replaced by
2798 + * corresponding calls to the _metered_ routines
2802 + * Generic declaration of the raw read_trylock() function,
2803 + * architectures are supposed to optimize this:
2805 +int __lockfunc generic_raw_read_trylock(rwlock_t *lock)
2807 + _metered_read_lock(lock, __builtin_return_address(0));
2810 +EXPORT_SYMBOL(generic_raw_read_trylock);
2812 +int __lockfunc _spin_trylock(spinlock_t *lock)
2814 + preempt_disable();
2815 + if (_metered_spin_trylock(lock, __builtin_return_address(0)))
2821 +EXPORT_SYMBOL(_spin_trylock);
2823 +int __lockfunc _write_trylock(rwlock_t *lock)
2825 + preempt_disable();
2826 + if (_metered_write_trylock(lock, __builtin_return_address(0)))
2832 +EXPORT_SYMBOL(_write_trylock);
2834 +#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
2836 + * This could be a long-held lock. If another CPU holds it for a long time,
2837 + * and that CPU is not asked to reschedule then *this* CPU will spin on the
2838 + * lock for a long time, even if *this* CPU is asked to reschedule.
2840 + * So what we do here, in the slow (contended) path is to spin on the lock by
2841 + * hand while permitting preemption.
2843 + * Called inside preempt_disable().
2845 +static inline void __preempt_spin_lock(spinlock_t *lock, void *caller_pc)
2847 + if (preempt_count() > 1) {
2848 + _metered_spin_lock(lock, caller_pc);
2854 + while (spin_is_locked(lock))
2856 + preempt_disable();
2857 + } while (!_metered_spin_trylock(lock, caller_pc));
2860 +void __lockfunc _spin_lock(spinlock_t *lock)
2862 + preempt_disable();
2863 + if (unlikely(!_metered_spin_trylock(lock, __builtin_return_address(0))))
2864 + __preempt_spin_lock(lock, __builtin_return_address(0));
2867 +static inline void __preempt_write_lock(rwlock_t *lock, void *caller_pc)
2869 + if (preempt_count() > 1) {
2870 + _metered_write_lock(lock, caller_pc);
2876 + while (rwlock_is_locked(lock))
2878 + preempt_disable();
2879 + } while (!_metered_write_trylock(lock,caller_pc));
2882 +void __lockfunc _write_lock(rwlock_t *lock)
2884 + preempt_disable();
2885 + if (unlikely(!_metered_write_trylock(lock, __builtin_return_address(0))))
2886 + __preempt_write_lock(lock, __builtin_return_address(0));
2889 +void __lockfunc _spin_lock(spinlock_t *lock)
2891 + preempt_disable();
2892 + _metered_spin_lock(lock, __builtin_return_address(0));
2895 +void __lockfunc _write_lock(rwlock_t *lock)
2897 + preempt_disable();
2898 + _metered_write_lock(lock, __builtin_return_address(0));
2901 +EXPORT_SYMBOL(_spin_lock);
2902 +EXPORT_SYMBOL(_write_lock);
2904 +void __lockfunc _read_lock(rwlock_t *lock)
2906 + preempt_disable();
2907 + _metered_read_lock(lock, __builtin_return_address(0));
2909 +EXPORT_SYMBOL(_read_lock);
2911 +void __lockfunc _spin_unlock(spinlock_t *lock)
2913 + _metered_spin_unlock(lock);
2916 +EXPORT_SYMBOL(_spin_unlock);
2918 +void __lockfunc _write_unlock(rwlock_t *lock)
2920 + _metered_write_unlock(lock);
2923 +EXPORT_SYMBOL(_write_unlock);
2925 +void __lockfunc _read_unlock(rwlock_t *lock)
2927 + _metered_read_unlock(lock);
2930 +EXPORT_SYMBOL(_read_unlock);
2932 +unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
2934 + unsigned long flags;
2936 + local_irq_save(flags);
2937 + preempt_disable();
2938 + _metered_spin_lock_flags(lock, flags, __builtin_return_address(0));
2941 +EXPORT_SYMBOL(_spin_lock_irqsave);
2943 +void __lockfunc _spin_lock_irq(spinlock_t *lock)
2945 + local_irq_disable();
2946 + preempt_disable();
2947 + _metered_spin_lock(lock, __builtin_return_address(0));
2949 +EXPORT_SYMBOL(_spin_lock_irq);
2951 +void __lockfunc _spin_lock_bh(spinlock_t *lock)
2953 + local_bh_disable();
2954 + preempt_disable();
2955 + _metered_spin_lock(lock, __builtin_return_address(0));
2957 +EXPORT_SYMBOL(_spin_lock_bh);
2959 +unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
2961 + unsigned long flags;
2963 + local_irq_save(flags);
2964 + preempt_disable();
2965 + _metered_read_lock(lock, __builtin_return_address(0));
2968 +EXPORT_SYMBOL(_read_lock_irqsave);
2970 +void __lockfunc _read_lock_irq(rwlock_t *lock)
2972 + local_irq_disable();
2973 + preempt_disable();
2974 + _metered_read_lock(lock, __builtin_return_address(0));
2976 +EXPORT_SYMBOL(_read_lock_irq);
2978 +void __lockfunc _read_lock_bh(rwlock_t *lock)
2980 + local_bh_disable();
2981 + preempt_disable();
2982 + _metered_read_lock(lock, __builtin_return_address(0));
2984 +EXPORT_SYMBOL(_read_lock_bh);
2986 +unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
2988 + unsigned long flags;
2990 + local_irq_save(flags);
2991 + preempt_disable();
2992 + _metered_write_lock(lock, __builtin_return_address(0));
2995 +EXPORT_SYMBOL(_write_lock_irqsave);
2997 +void __lockfunc _write_lock_irq(rwlock_t *lock)
2999 + local_irq_disable();
3000 + preempt_disable();
3001 + _metered_write_lock(lock, __builtin_return_address(0));
3003 +EXPORT_SYMBOL(_write_lock_irq);
3005 +void __lockfunc _write_lock_bh(rwlock_t *lock)
3007 + local_bh_disable();
3008 + preempt_disable();
3009 + _metered_write_lock(lock, __builtin_return_address(0));
3011 +EXPORT_SYMBOL(_write_lock_bh);
3013 +void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
3015 + _metered_spin_unlock(lock);
3016 + local_irq_restore(flags);
3019 +EXPORT_SYMBOL(_spin_unlock_irqrestore);
3021 +void __lockfunc _spin_unlock_irq(spinlock_t *lock)
3023 + _metered_spin_unlock(lock);
3024 + local_irq_enable();
3027 +EXPORT_SYMBOL(_spin_unlock_irq);
3029 +void __lockfunc _spin_unlock_bh(spinlock_t *lock)
3031 + _metered_spin_unlock(lock);
3033 + local_bh_enable();
3035 +EXPORT_SYMBOL(_spin_unlock_bh);
3037 +void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
3039 + _metered_read_unlock(lock);
3040 + local_irq_restore(flags);
3043 +EXPORT_SYMBOL(_read_unlock_irqrestore);
3045 +void __lockfunc _read_unlock_irq(rwlock_t *lock)
3047 + _metered_read_unlock(lock);
3048 + local_irq_enable();
3051 +EXPORT_SYMBOL(_read_unlock_irq);
3053 +void __lockfunc _read_unlock_bh(rwlock_t *lock)
3055 + _metered_read_unlock(lock);
3057 + local_bh_enable();
3059 +EXPORT_SYMBOL(_read_unlock_bh);
3061 +void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
3063 + _metered_write_unlock(lock);
3064 + local_irq_restore(flags);
3067 +EXPORT_SYMBOL(_write_unlock_irqrestore);
3069 +void __lockfunc _write_unlock_irq(rwlock_t *lock)
3071 + _metered_write_unlock(lock);
3072 + local_irq_enable();
3075 +EXPORT_SYMBOL(_write_unlock_irq);
3077 +void __lockfunc _write_unlock_bh(rwlock_t *lock)
3079 + _metered_write_unlock(lock);
3081 + local_bh_enable();
3083 +EXPORT_SYMBOL(_write_unlock_bh);
3085 +int __lockfunc _spin_trylock_bh(spinlock_t *lock)
3087 + local_bh_disable();
3088 + preempt_disable();
3089 + if (_metered_spin_trylock(lock, __builtin_return_address(0)))
3093 + local_bh_enable();
3096 +EXPORT_SYMBOL(_spin_trylock_bh);