1 Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig
\r
2 ===================================================================
\r
3 Index: linux-2.6.9/drivers/scsi/Kconfig
4 ===================================================================
5 --- linux-2.6.9.orig/drivers/scsi/Kconfig 2007-07-23 14:19:13.000000000 +0400
6 +++ linux-2.6.9/drivers/scsi/Kconfig 2007-07-26 14:16:36.000000000 +0400
7 @@ -61,6 +61,14 @@ config SCSI_DUMP
12 + bool "Enable SCSI disk I/O stats"
13 + depends on BLK_DEV_SD
16 + This enables SCSI disk I/O stats collection. You must also enable
17 + /proc file system support if you want this feature.
20 tristate "SCSI tape support"
22 Index: linux-2.6.9/drivers/scsi/scsi_proc.c
23 ===================================================================
24 --- linux-2.6.9.orig/drivers/scsi/scsi_proc.c 2007-03-13 02:47:28.000000000 +0300
25 +++ linux-2.6.9/drivers/scsi/scsi_proc.c 2007-07-26 14:16:36.000000000 +0400
27 /* 4K page size, but our output routines, use some slack for overruns */
28 #define PROC_BLOCK_SIZE (3*1024)
30 -static struct proc_dir_entry *proc_scsi;
31 +struct proc_dir_entry *proc_scsi;
32 +EXPORT_SYMBOL(proc_scsi);
34 /* Protect sht->present and sht->proc_dir */
35 static DECLARE_MUTEX(global_host_template_sem);
36 Index: linux-2.6.9/drivers/scsi/sd.c
37 ===================================================================
38 --- linux-2.6.9.orig/drivers/scsi/sd.c 2007-03-13 02:47:27.000000000 +0300
39 +++ linux-2.6.9/drivers/scsi/sd.c 2007-07-28 14:55:56.000000000 +0400
42 #include "scsi_logging.h"
44 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
45 +# include <linux/proc_fs.h>
46 +# include <linux/seq_file.h>
49 + unsigned long long iostat_size;
50 + unsigned long long iostat_count;
53 +#define IOSTAT_NCOUNTERS 16
55 + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
56 + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
57 + struct timeval iostat_timeval;
59 + /* queue depth: how well the pipe is filled up */
60 + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS];
61 + unsigned long long iostat_queue_ticks_sum;
62 + unsigned long iostat_queue_depth;
63 + unsigned long iostat_queue_stamp;
65 + /* seeks: how linear the traffic is */
66 + unsigned long long iostat_next_sector;
67 + unsigned long long iostat_seek_sectors;
68 + unsigned long long iostat_seeks;
69 + unsigned long long iostat_sectors;
70 + unsigned long long iostat_reqs;
71 + unsigned long iostat_read_reqs;
72 + unsigned long iostat_write_reqs;
74 + /* process time: how long it takes to process requests */
75 + unsigned long iostat_rtime[IOSTAT_NCOUNTERS];
76 + unsigned long iostat_wtime[IOSTAT_NCOUNTERS];
78 + /* queue time: how long process spent in elevator's queue */
79 + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
80 + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
82 + char iostat_name[32];
84 + /* must be the last field, as it's used to know size to be memset'ed */
85 + spinlock_t iostat_lock;
86 +} ____cacheline_aligned_in_smp iostat_stats_t;
88 +iostat_stats_t **sd_iostats;
89 +struct proc_dir_entry *sd_iostats_procdir;
90 +char sd_iostats_procdir_name[] = "sd_iostats";
92 +extern void sd_iostats_init(void);
93 +extern void sd_iostats_init_disk(struct gendisk *);
94 +extern void sd_iostats_fini(void);
95 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
96 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
98 +static inline void sd_iostats_init(void) {}
99 +static inline void sd_iostats_init_disk(struct gendisk *disk) {}
100 +static inline void sd_iostats_fini(void) {}
101 +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
102 +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
106 * More than enough for everybody ;) The huge number of majors
107 * is a leftover from 16bit dev_t days, we don't really need that
110 #define SD_MAX_DISKS (((26 * 26) + 26 + 1) * 26)
112 +#define SD_STATS 256
114 * Time out in seconds for disks and Magneto-opticals (which are slower).
116 @@ -278,6 +340,8 @@ static int sd_init_command(struct scsi_c
117 SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n",
118 disk->disk_name, (unsigned long long)block));
120 + sd_iostats_start_req(SCpnt);
123 * If we have a 1K hardware sectorsize, prevent access to single
124 * 512 byte sectors. In theory we could handle this - in fact
125 @@ -474,6 +538,7 @@ static int sd_open(struct inode *inode,
126 scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
129 + sd_iostats_init_disk(disk);
133 @@ -849,6 +914,9 @@ static void sd_rw_intr(struct scsi_cmnd
138 + sd_iostats_finish_req(SCpnt);
141 * This calls the generic completion function, now that we know
142 * how many actual sectors finished, and how many sectors we need
143 @@ -1575,6 +1643,481 @@ static void sd_shutdown(struct device *d
147 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
149 +sd_iostats_seq_show(struct seq_file *seq, void *v)
151 + struct timeval now;
152 + struct gendisk *disk = seq->private;
153 + iostat_stats_t *stats;
154 + unsigned long long read_len;
155 + unsigned long long read_len_tot;
156 + unsigned long read_num;
157 + unsigned long read_num_tot;
158 + unsigned long long write_len;
159 + unsigned long long write_len_tot;
160 + unsigned long write_num;
161 + unsigned long write_num_tot;
165 + if (sd_iostats == NULL) {
166 + printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
170 + stats = sd_iostats[scsi_disk(disk)->index];
171 + if (stats == NULL) {
172 + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
176 + do_gettimeofday(&now);
177 + now.tv_sec -= stats->iostat_timeval.tv_sec;
178 + now.tv_usec -= stats->iostat_timeval.tv_usec;
179 + if (now.tv_usec < 0) {
180 + now.tv_usec += 1000000;
184 + /* this sampling races with updates */
185 + seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
186 + (unsigned long) scsi_disk(disk)->index,
187 + now.tv_sec, now.tv_usec);
189 + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
190 + if (stats->iostat_read_histogram[i].iostat_count != 0 ||
191 + stats->iostat_write_histogram[i].iostat_count != 0)
195 + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
196 + "reads", "total", "writes", "total");
198 + read_len_tot = write_len_tot = 0;
199 + read_num_tot = write_num_tot = 0;
200 + for (i = 0; i <= maxi; i++) {
201 + read_len = stats->iostat_read_histogram[i].iostat_size;
202 + read_len_tot += read_len;
203 + read_num = stats->iostat_read_histogram[i].iostat_count;
204 + read_num_tot += read_num;
206 + write_len = stats->iostat_write_histogram[i].iostat_size;
207 + write_len_tot += write_len;
208 + write_num = stats->iostat_write_histogram[i].iostat_count;
209 + write_num_tot += write_num;
211 + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
212 + 512<<i, read_num, read_len, write_num, write_len);
215 + seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
216 + read_num_tot, read_len_tot,
217 + write_num_tot, write_len_tot);
219 + seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
220 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
221 + unsigned long long ticks, percent;
222 + ticks = stats->iostat_queue_ticks[i];
225 + percent = stats->iostat_queue_ticks[i] * 100;
226 + do_div(percent, stats->iostat_queue_ticks_sum);
227 + seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
230 + if (stats->iostat_reqs != 0) {
231 + unsigned long long aveseek = 0, percent = 0;
233 + if (stats->iostat_seeks) {
234 + aveseek = stats->iostat_seek_sectors;
235 + do_div(aveseek, stats->iostat_seeks);
236 + percent = stats->iostat_seeks * 100;
237 + do_div(percent, stats->iostat_reqs);
240 + seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
241 + "%llu sectors in ave, %llu%% of all reqs\n",
242 + stats->iostat_sectors, stats->iostat_reqs,
243 + stats->iostat_seeks, aveseek, percent);
246 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
247 + "%%", "writes", "%%");
248 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
249 + unsigned long read_percent = 0, write_percent = 0;
250 + if (stats->iostat_wtime[i] == 0 &&
251 + stats->iostat_rtime[i] == 0)
253 + if (stats->iostat_read_reqs)
254 + read_percent = stats->iostat_rtime[i] * 100 /
255 + stats->iostat_read_reqs;
256 + if (stats->iostat_write_reqs)
257 + write_percent = stats->iostat_wtime[i] * 100 /
258 + stats->iostat_write_reqs;
259 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
260 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
261 + stats->iostat_rtime[i], read_percent,
262 + stats->iostat_wtime[i], write_percent);
265 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
266 + "%%", "writes", "%%");
267 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
268 + unsigned long read_percent = 0, write_percent = 0;
269 + if (stats->iostat_wtime_in_queue[i] == 0 &&
270 + stats->iostat_rtime_in_queue[i] == 0)
272 + if (stats->iostat_read_reqs)
273 + read_percent = stats->iostat_rtime_in_queue[i] * 100 /
274 + stats->iostat_read_reqs;
275 + if (stats->iostat_write_reqs)
276 + write_percent = stats->iostat_wtime_in_queue[i] * 100 /
277 + stats->iostat_write_reqs;
278 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
279 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
280 + stats->iostat_rtime_in_queue[i],
282 + stats->iostat_wtime_in_queue[i],
290 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
292 + return (*pos == 0) ? (void *)1 : NULL;
296 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
303 +sd_iostats_seq_stop(struct seq_file *p, void *v)
307 +static struct seq_operations sd_iostats_seqops = {
308 + .start = sd_iostats_seq_start,
309 + .stop = sd_iostats_seq_stop,
310 + .next = sd_iostats_seq_next,
311 + .show = sd_iostats_seq_show,
315 +sd_iostats_seq_open (struct inode *inode, struct file *file)
319 + rc = seq_open(file, &sd_iostats_seqops);
323 + ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
328 +sd_iostats_seq_write(struct file *file, const char *buffer,
329 + size_t len, loff_t *off)
331 + struct seq_file *seq = file->private_data;
332 + struct gendisk *disk = seq->private;
333 + iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index];
334 + unsigned long flags;
335 + unsigned long qdepth;
338 + spin_lock_irqsave (&stats->iostat_lock, flags);
339 + qdepth = stats->iostat_queue_depth;
340 + memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
341 + do_gettimeofday(&stats->iostat_timeval);
342 + stats->iostat_queue_stamp = jiffies;
343 + stats->iostat_queue_depth = qdepth;
344 + spin_unlock_irqrestore (&stats->iostat_lock, flags);
349 +static struct file_operations sd_iostats_proc_fops = {
350 + .owner = THIS_MODULE,
351 + .open = sd_iostats_seq_open,
353 + .write = sd_iostats_seq_write,
354 + .llseek = seq_lseek,
355 + .release = seq_release,
358 +extern struct proc_dir_entry *proc_scsi;
361 +sd_iostats_init(void)
365 + sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
366 + if (sd_iostats == NULL) {
367 + printk(KERN_WARNING "Can't keep sd iostats: "
368 + "ENOMEM allocating stats array size %d\n",
369 + SD_STATS * sizeof(iostat_stats_t *));
373 + for (i = 0; i < SD_STATS; i++)
374 + sd_iostats[i] = NULL;
376 + if (proc_scsi == NULL) {
377 + printk(KERN_WARNING "No access to sd iostats: "
378 + "proc_scsi is NULL\n");
382 + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
383 + S_IFDIR | S_IRUGO | S_IXUGO,
385 + if (sd_iostats_procdir == NULL) {
386 + printk(KERN_WARNING "No access to sd iostats: "
387 + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
393 +sd_iostats_init_disk(struct gendisk *disk)
395 + struct proc_dir_entry *pde;
396 + unsigned long flags;
397 + iostat_stats_t *stats;
399 + if (sd_iostats == NULL || sd_iostats_procdir == NULL)
402 + if (scsi_disk(disk)->index > SD_STATS) {
403 + printk(KERN_ERR "sd_iostats_init_disk: "
404 + "unexpected disk index %d(%d)\n",
405 + scsi_disk(disk)->index, SD_STATS);
409 + if (sd_iostats[scsi_disk(disk)->index] != NULL)
412 + stats = kmalloc(sizeof(*stats), GFP_KERNEL);
413 + if (stats == NULL) {
414 + printk(KERN_WARNING "Can't keep %s iostats: "
415 + "ENOMEM allocating stats size %d\n",
416 + disk->disk_name, sizeof(*stats));
420 + memset (stats, 0, sizeof(*stats));
421 + do_gettimeofday(&stats->iostat_timeval);
422 + stats->iostat_queue_stamp = jiffies;
423 + spin_lock_init(&stats->iostat_lock);
426 + spin_lock_irqsave(&stats->iostat_lock, flags);
428 + if (sd_iostats[scsi_disk(disk)->index] != NULL) {
429 + spin_unlock_irqrestore(&stats->iostat_lock, flags);
434 + sd_iostats[scsi_disk(disk)->index] = stats;
436 + spin_unlock_irqrestore(&stats->iostat_lock, flags);
438 + strncpy(stats->iostat_name, disk->disk_name,
439 + sizeof(stats->iostat_name)-1);
441 + pde = create_proc_entry(stats->iostat_name, S_IRUGO | S_IWUSR,
442 + sd_iostats_procdir);
444 + printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
445 + sd_iostats_procdir_name, disk->disk_name);
447 + pde->proc_fops = &sd_iostats_proc_fops;
452 +void sd_iostats_fini(void)
456 + if (sd_iostats == NULL)
459 + for (i = 0; i < SD_STATS; i++) {
460 + if (sd_iostats[i] == NULL)
462 + if (sd_iostats_procdir != NULL)
463 + remove_proc_entry(sd_iostats[i]->iostat_name,
464 + sd_iostats_procdir);
465 + kfree(sd_iostats[i]);
468 + if (proc_scsi != NULL && sd_iostats_procdir != NULL)
469 + remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
471 + sd_iostats_procdir = NULL;
476 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
478 + struct request *rq = SCpnt->request;
479 + iostat_stats_t *stats;
480 + unsigned long *tcounter;
483 + unsigned long irqflags;
486 + disk = scsi_disk(rq->rq_disk)->index;
488 + if (sd_iostats == NULL)
491 + if (disk < 0 || disk >= SD_STATS) {
492 + printk(KERN_ERR "sd_iostats_bump: unexpected disk index "
493 + "%d([0-%d])\n", disk, SD_STATS);
497 + stats = sd_iostats[disk];
501 + tmp = jiffies - rq->start_time;
502 + for (tbucket = 0; tmp > 1; tbucket++)
504 + if (tbucket >= IOSTAT_NCOUNTERS)
505 + tbucket = IOSTAT_NCOUNTERS - 1;
506 + //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
508 + tcounter = rq_data_dir(rq) == WRITE ?
509 + &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
511 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
513 + /* update delay stats */
516 + /* update queue depth stats */
517 + i = stats->iostat_queue_depth;
518 + if (i >= IOSTAT_NCOUNTERS)
519 + i = IOSTAT_NCOUNTERS - 1;
520 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
521 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
522 + stats->iostat_queue_depth--;
524 + /* update seek stats. XXX: not sure about nr_sectors */
525 + stats->iostat_sectors += rq->nr_sectors;
526 + stats->iostat_reqs++;
527 + if (rq->sector != stats->iostat_next_sector) {
528 + stats->iostat_seek_sectors +=
529 + rq->sector > stats->iostat_next_sector ?
530 + rq->sector - stats->iostat_next_sector :
531 + stats->iostat_next_sector - rq->sector;
532 + stats->iostat_seeks++;
534 + stats->iostat_next_sector = rq->sector + rq->nr_sectors;
536 + stats->iostat_queue_stamp = jiffies;
538 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
541 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
543 + struct request *rq = SCpnt->request;
544 + iostat_stats_t *stats;
545 + iostat_counter_t *counter;
549 + unsigned long irqflags;
553 + disk = scsi_disk(rq->rq_disk)->index;
555 + if (sd_iostats == NULL)
558 + if (disk < 0 || disk >= SD_STATS) {
559 + printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
564 + stats = sd_iostats[disk];
568 + nsect = SCpnt->request_bufflen >> 9;
569 + for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
572 + if (bucket >= IOSTAT_NCOUNTERS) {
573 + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
577 + counter = rq_data_dir(rq) == WRITE ?
578 + &stats->iostat_write_histogram[bucket] :
579 + &stats->iostat_read_histogram[bucket];
581 + tmp = jiffies - rq->start_time;
582 + for (tbucket = 0; tmp > 1; tbucket++)
584 + if (tbucket >= IOSTAT_NCOUNTERS)
585 + tbucket = IOSTAT_NCOUNTERS - 1;
586 + //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
588 + /* an ugly hack to know exact processing time. the right
589 + * solution is to add one more field to struct request
590 + * hopefully it will break nothing ... */
591 + rq->start_time = jiffies;
593 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
595 + /* update queue depth stats */
596 + i = stats->iostat_queue_depth;
597 + if (i >= IOSTAT_NCOUNTERS)
598 + i = IOSTAT_NCOUNTERS - 1;
599 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
600 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
601 + stats->iostat_queue_depth++;
603 + /* update delay stats */
604 + if (rq_data_dir(rq) == WRITE) {
605 + stats->iostat_wtime_in_queue[tbucket]++;
606 + stats->iostat_write_reqs++;
608 + stats->iostat_rtime_in_queue[tbucket]++;
609 + stats->iostat_read_reqs++;
612 + /* update size stats */
613 + counter->iostat_size += nsect;
614 + counter->iostat_count++;
616 + stats->iostat_queue_stamp = jiffies;
618 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
623 * init_sd - entry point for this driver (both when built in or when
625 @@ -1584,6 +2127,7 @@ static void sd_shutdown(struct device *d
626 static int __init init_sd(void)
631 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
633 @@ -1594,7 +2138,10 @@ static int __init init_sd(void)
637 - return scsi_register_driver(&sd_template.gendrv);
638 + rc = scsi_register_driver(&sd_template.gendrv);
645 @@ -1608,6 +2155,7 @@ static void __exit exit_sd(void)
647 SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
650 scsi_unregister_driver(&sd_template.gendrv);
651 for (i = 0; i < SD_MAJORS; i++)
652 unregister_blkdev(sd_major(i), "sd");