1 Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig
\r
2 ===================================================================
\r
3 Index: linux-2.6.9/drivers/scsi/Kconfig
4 ===================================================================
5 --- linux-2.6.9.orig/drivers/scsi/Kconfig 2007-07-23 14:19:13.000000000 +0400
6 +++ linux-2.6.9/drivers/scsi/Kconfig 2007-07-26 14:16:36.000000000 +0400
7 @@ -61,6 +61,14 @@ config SCSI_DUMP
12 + bool "Enable SCSI disk I/O stats"
13 + depends on BLK_DEV_SD
16 + This enables SCSI disk I/O stats collection. You must also enable
17 + /proc file system support if you want this feature.
20 tristate "SCSI tape support"
22 Index: linux-2.6.9/drivers/scsi/scsi_proc.c
23 ===================================================================
24 --- linux-2.6.9.orig/drivers/scsi/scsi_proc.c 2007-03-13 02:47:28.000000000 +0300
25 +++ linux-2.6.9/drivers/scsi/scsi_proc.c 2007-07-26 14:16:36.000000000 +0400
27 /* 4K page size, but our output routines, use some slack for overruns */
28 #define PROC_BLOCK_SIZE (3*1024)
30 -static struct proc_dir_entry *proc_scsi;
31 +struct proc_dir_entry *proc_scsi;
32 +EXPORT_SYMBOL(proc_scsi);
34 /* Protect sht->present and sht->proc_dir */
35 static DECLARE_MUTEX(global_host_template_sem);
36 Index: linux-2.6.9/drivers/scsi/sd.c
37 ===================================================================
38 --- linux-2.6.9.orig/drivers/scsi/sd.c 2007-03-13 02:47:27.000000000 +0300
39 +++ linux-2.6.9/drivers/scsi/sd.c 2007-07-28 14:55:56.000000000 +0400
42 #include "scsi_logging.h"
44 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
45 +# include <linux/proc_fs.h>
46 +# include <linux/seq_file.h>
49 + unsigned long long iostat_size;
50 + unsigned long long iostat_count;
53 +#define IOSTAT_NCOUNTERS 16
55 + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
56 + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
57 + struct timeval iostat_timeval;
59 + /* queue depth: how well the pipe is filled up */
60 + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS];
61 + unsigned long long iostat_queue_ticks_sum;
62 + unsigned long iostat_queue_depth;
63 + unsigned long iostat_queue_stamp;
65 + /* seeks: how linear the traffic is */
66 + unsigned long long iostat_next_sector;
67 + unsigned long long iostat_seek_sectors;
68 + unsigned long long iostat_seeks;
69 + unsigned long long iostat_sectors;
70 + unsigned long long iostat_reqs;
71 + unsigned long iostat_read_reqs;
72 + unsigned long iostat_write_reqs;
74 + /* process time: how long it takes to process requests */
75 + unsigned long iostat_rtime[IOSTAT_NCOUNTERS];
76 + unsigned long iostat_wtime[IOSTAT_NCOUNTERS];
78 + /* queue time: how long process spent in elevator's queue */
79 + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
80 + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
82 + char iostat_name[32];
84 + /* must be the last field, as it's used to know size to be memset'ed */
85 + spinlock_t iostat_lock;
86 +} ____cacheline_aligned_in_smp iostat_stats_t;
88 +iostat_stats_t **sd_iostats;
89 +struct proc_dir_entry *sd_iostats_procdir;
90 +char sd_iostats_procdir_name[] = "sd_iostats";
92 +extern void sd_iostats_init(void);
93 +extern void sd_iostats_init_disk(struct gendisk *);
94 +extern void sd_iostats_fini(void);
95 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
96 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
98 +static inline void sd_iostats_init(void) {}
99 +static inline void sd_iostats_init_disk(struct gendisk *disk) {}
100 +static inline void sd_iostats_fini(void) {}
101 +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
102 +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
106 * More than enough for everybody ;) The huge number of majors
107 * is a leftover from 16bit dev_t days, we don't really need that
110 #define SD_MAX_DISKS (((26 * 26) + 26 + 1) * 26)
112 +#define SD_STATS 256
114 * Time out in seconds for disks and Magneto-opticals (which are slower).
116 @@ -278,6 +340,8 @@ static int sd_init_command(struct scsi_c
117 SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n",
118 disk->disk_name, (unsigned long long)block));
120 + sd_iostats_start_req(SCpnt);
123 * If we have a 1K hardware sectorsize, prevent access to single
124 * 512 byte sectors. In theory we could handle this - in fact
125 @@ -474,6 +538,7 @@ static int sd_open(struct inode *inode,
126 scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
129 + sd_iostats_init_disk(disk);
133 @@ -849,6 +914,7 @@ static void sd_rw_intr(struct scsi_cmnd
137 + sd_iostats_finish_req(SCpnt);
138 scsi_io_completion(SCpnt, good_bytes);
141 @@ -1575,6 +1643,481 @@ static void sd_shutdown(struct device *d
145 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
147 +sd_iostats_seq_show(struct seq_file *seq, void *v)
149 + struct timeval now;
150 + struct gendisk *disk = seq->private;
151 + iostat_stats_t *stats;
152 + unsigned long long read_len;
153 + unsigned long long read_len_tot;
154 + unsigned long read_num;
155 + unsigned long read_num_tot;
156 + unsigned long long write_len;
157 + unsigned long long write_len_tot;
158 + unsigned long write_num;
159 + unsigned long write_num_tot;
163 + if (sd_iostats == NULL) {
164 + printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
168 + stats = sd_iostats[scsi_disk(disk)->index];
169 + if (stats == NULL) {
170 + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
174 + do_gettimeofday(&now);
175 + now.tv_sec -= stats->iostat_timeval.tv_sec;
176 + now.tv_usec -= stats->iostat_timeval.tv_usec;
177 + if (now.tv_usec < 0) {
178 + now.tv_usec += 1000000;
182 + /* this sampling races with updates */
183 + seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
184 + (unsigned long) scsi_disk(disk)->index,
185 + now.tv_sec, now.tv_usec);
187 + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
188 + if (stats->iostat_read_histogram[i].iostat_count != 0 ||
189 + stats->iostat_write_histogram[i].iostat_count != 0)
193 + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
194 + "reads", "total", "writes", "total");
196 + read_len_tot = write_len_tot = 0;
197 + read_num_tot = write_num_tot = 0;
198 + for (i = 0; i <= maxi; i++) {
199 + read_len = stats->iostat_read_histogram[i].iostat_size;
200 + read_len_tot += read_len;
201 + read_num = stats->iostat_read_histogram[i].iostat_count;
202 + read_num_tot += read_num;
204 + write_len = stats->iostat_write_histogram[i].iostat_size;
205 + write_len_tot += write_len;
206 + write_num = stats->iostat_write_histogram[i].iostat_count;
207 + write_num_tot += write_num;
209 + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
210 + 512<<i, read_num, read_len, write_num, write_len);
213 + seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
214 + read_num_tot, read_len_tot,
215 + write_num_tot, write_len_tot);
217 + seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
218 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
219 + unsigned long long ticks, percent;
220 + ticks = stats->iostat_queue_ticks[i];
223 + percent = stats->iostat_queue_ticks[i] * 100;
224 + do_div(percent, stats->iostat_queue_ticks_sum);
225 + seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
228 + if (stats->iostat_reqs != 0) {
229 + unsigned long long aveseek = 0, percent = 0;
231 + if (stats->iostat_seeks) {
232 + aveseek = stats->iostat_seek_sectors;
233 + do_div(aveseek, stats->iostat_seeks);
234 + percent = stats->iostat_seeks * 100;
235 + do_div(percent, stats->iostat_reqs);
238 + seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
239 + "%llu sectors in ave, %llu%% of all reqs\n",
240 + stats->iostat_sectors, stats->iostat_reqs,
241 + stats->iostat_seeks, aveseek, percent);
244 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
245 + "%%", "writes", "%%");
246 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
247 + unsigned long read_percent = 0, write_percent = 0;
248 + if (stats->iostat_wtime[i] == 0 &&
249 + stats->iostat_rtime[i] == 0)
251 + if (stats->iostat_read_reqs)
252 + read_percent = stats->iostat_rtime[i] * 100 /
253 + stats->iostat_read_reqs;
254 + if (stats->iostat_write_reqs)
255 + write_percent = stats->iostat_wtime[i] * 100 /
256 + stats->iostat_write_reqs;
257 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
258 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
259 + stats->iostat_rtime[i], read_percent,
260 + stats->iostat_wtime[i], write_percent);
263 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
264 + "%%", "writes", "%%");
265 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
266 + unsigned long read_percent = 0, write_percent = 0;
267 + if (stats->iostat_wtime_in_queue[i] == 0 &&
268 + stats->iostat_rtime_in_queue[i] == 0)
270 + if (stats->iostat_read_reqs)
271 + read_percent = stats->iostat_rtime_in_queue[i] * 100 /
272 + stats->iostat_read_reqs;
273 + if (stats->iostat_write_reqs)
274 + write_percent = stats->iostat_wtime_in_queue[i] * 100 /
275 + stats->iostat_write_reqs;
276 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
277 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
278 + stats->iostat_rtime_in_queue[i],
280 + stats->iostat_wtime_in_queue[i],
288 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
290 + return (*pos == 0) ? (void *)1 : NULL;
294 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
301 +sd_iostats_seq_stop(struct seq_file *p, void *v)
305 +static struct seq_operations sd_iostats_seqops = {
306 + .start = sd_iostats_seq_start,
307 + .stop = sd_iostats_seq_stop,
308 + .next = sd_iostats_seq_next,
309 + .show = sd_iostats_seq_show,
313 +sd_iostats_seq_open (struct inode *inode, struct file *file)
317 + rc = seq_open(file, &sd_iostats_seqops);
321 + ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
326 +sd_iostats_seq_write(struct file *file, const char *buffer,
327 + size_t len, loff_t *off)
329 + struct seq_file *seq = file->private_data;
330 + struct gendisk *disk = seq->private;
331 + iostat_stats_t *stats = sd_iostats[scsi_disk(disk)->index];
332 + unsigned long flags;
333 + unsigned long qdepth;
336 + spin_lock_irqsave (&stats->iostat_lock, flags);
337 + qdepth = stats->iostat_queue_depth;
338 + memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
339 + do_gettimeofday(&stats->iostat_timeval);
340 + stats->iostat_queue_stamp = jiffies;
341 + stats->iostat_queue_depth = qdepth;
342 + spin_unlock_irqrestore (&stats->iostat_lock, flags);
347 +static struct file_operations sd_iostats_proc_fops = {
348 + .owner = THIS_MODULE,
349 + .open = sd_iostats_seq_open,
351 + .write = sd_iostats_seq_write,
352 + .llseek = seq_lseek,
353 + .release = seq_release,
356 +extern struct proc_dir_entry *proc_scsi;
359 +sd_iostats_init(void)
363 + sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
364 + if (sd_iostats == NULL) {
365 + printk(KERN_WARNING "Can't keep sd iostats: "
366 + "ENOMEM allocating stats array size %d\n",
367 + SD_STATS * sizeof(iostat_stats_t *));
371 + for (i = 0; i < SD_STATS; i++)
372 + sd_iostats[i] = NULL;
374 + if (proc_scsi == NULL) {
375 + printk(KERN_WARNING "No access to sd iostats: "
376 + "proc_scsi is NULL\n");
380 + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
381 + S_IFDIR | S_IRUGO | S_IXUGO,
383 + if (sd_iostats_procdir == NULL) {
384 + printk(KERN_WARNING "No access to sd iostats: "
385 + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
391 +sd_iostats_init_disk(struct gendisk *disk)
393 + struct proc_dir_entry *pde;
394 + unsigned long flags;
395 + iostat_stats_t *stats;
397 + if (sd_iostats == NULL || sd_iostats_procdir == NULL)
400 + if (scsi_disk(disk)->index > SD_STATS) {
401 + printk(KERN_ERR "sd_iostats_init_disk: "
402 + "unexpected disk index %d(%d)\n",
403 + scsi_disk(disk)->index, SD_STATS);
407 + if (sd_iostats[scsi_disk(disk)->index] != NULL)
410 + stats = kmalloc(sizeof(*stats), GFP_KERNEL);
411 + if (stats == NULL) {
412 + printk(KERN_WARNING "Can't keep %s iostats: "
413 + "ENOMEM allocating stats size %d\n",
414 + disk->disk_name, sizeof(*stats));
418 + memset (stats, 0, sizeof(*stats));
419 + do_gettimeofday(&stats->iostat_timeval);
420 + stats->iostat_queue_stamp = jiffies;
421 + spin_lock_init(&stats->iostat_lock);
424 + spin_lock_irqsave(&stats->iostat_lock, flags);
426 + if (sd_iostats[scsi_disk(disk)->index] != NULL) {
427 + spin_unlock_irqrestore(&stats->iostat_lock, flags);
432 + sd_iostats[scsi_disk(disk)->index] = stats;
434 + spin_unlock_irqrestore(&stats->iostat_lock, flags);
436 + strncpy(stats->iostat_name, disk->disk_name,
437 + sizeof(stats->iostat_name)-1);
439 + pde = create_proc_entry(stats->iostat_name, S_IRUGO | S_IWUSR,
440 + sd_iostats_procdir);
442 + printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
443 + sd_iostats_procdir_name, disk->disk_name);
445 + pde->proc_fops = &sd_iostats_proc_fops;
450 +void sd_iostats_fini(void)
454 + if (sd_iostats == NULL)
457 + for (i = 0; i < SD_STATS; i++) {
458 + if (sd_iostats[i] == NULL)
460 + if (sd_iostats_procdir != NULL)
461 + remove_proc_entry(sd_iostats[i]->iostat_name,
462 + sd_iostats_procdir);
463 + kfree(sd_iostats[i]);
466 + if (proc_scsi != NULL && sd_iostats_procdir != NULL)
467 + remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
469 + sd_iostats_procdir = NULL;
474 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
476 + struct request *rq = SCpnt->request;
477 + iostat_stats_t *stats;
478 + unsigned long *tcounter;
481 + unsigned long irqflags;
484 + disk = scsi_disk(rq->rq_disk)->index;
486 + if (sd_iostats == NULL)
489 + if (disk < 0 || disk >= SD_STATS) {
490 + printk(KERN_ERR "sd_iostats_bump: unexpected disk index "
491 + "%d([0-%d])\n", disk, SD_STATS);
495 + stats = sd_iostats[disk];
499 + tmp = jiffies - rq->start_time;
500 + for (tbucket = 0; tmp > 1; tbucket++)
502 + if (tbucket >= IOSTAT_NCOUNTERS)
503 + tbucket = IOSTAT_NCOUNTERS - 1;
504 + //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
506 + tcounter = rq_data_dir(rq) == WRITE ?
507 + &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
509 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
511 + /* update delay stats */
514 + /* update queue depth stats */
515 + i = stats->iostat_queue_depth;
516 + if (i >= IOSTAT_NCOUNTERS)
517 + i = IOSTAT_NCOUNTERS - 1;
518 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
519 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
520 + stats->iostat_queue_depth--;
522 + /* update seek stats. XXX: not sure about nr_sectors */
523 + stats->iostat_sectors += rq->nr_sectors;
524 + stats->iostat_reqs++;
525 + if (rq->sector != stats->iostat_next_sector) {
526 + stats->iostat_seek_sectors +=
527 + rq->sector > stats->iostat_next_sector ?
528 + rq->sector - stats->iostat_next_sector :
529 + stats->iostat_next_sector - rq->sector;
530 + stats->iostat_seeks++;
532 + stats->iostat_next_sector = rq->sector + rq->nr_sectors;
534 + stats->iostat_queue_stamp = jiffies;
536 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
539 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
541 + struct request *rq = SCpnt->request;
542 + iostat_stats_t *stats;
543 + iostat_counter_t *counter;
547 + unsigned long irqflags;
551 + disk = scsi_disk(rq->rq_disk)->index;
553 + if (sd_iostats == NULL)
556 + if (disk < 0 || disk >= SD_STATS) {
557 + printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
562 + stats = sd_iostats[disk];
566 + nsect = SCpnt->request_bufflen >> 9;
567 + for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
570 + if (bucket >= IOSTAT_NCOUNTERS) {
571 + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
575 + counter = rq_data_dir(rq) == WRITE ?
576 + &stats->iostat_write_histogram[bucket] :
577 + &stats->iostat_read_histogram[bucket];
579 + tmp = jiffies - rq->start_time;
580 + for (tbucket = 0; tmp > 1; tbucket++)
582 + if (tbucket >= IOSTAT_NCOUNTERS)
583 + tbucket = IOSTAT_NCOUNTERS - 1;
584 + //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
586 + /* an ugly hack to know exact processing time. the right
587 + * solution is to add one more field to struct request
588 + * hopefully it will break nothing ... */
589 + rq->start_time = jiffies;
591 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
593 + /* update queue depth stats */
594 + i = stats->iostat_queue_depth;
595 + if (i >= IOSTAT_NCOUNTERS)
596 + i = IOSTAT_NCOUNTERS - 1;
597 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
598 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
599 + stats->iostat_queue_depth++;
601 + /* update delay stats */
602 + if (rq_data_dir(rq) == WRITE) {
603 + stats->iostat_wtime_in_queue[tbucket]++;
604 + stats->iostat_write_reqs++;
606 + stats->iostat_rtime_in_queue[tbucket]++;
607 + stats->iostat_read_reqs++;
610 + /* update size stats */
611 + counter->iostat_size += nsect;
612 + counter->iostat_count++;
614 + stats->iostat_queue_stamp = jiffies;
616 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
621 * init_sd - entry point for this driver (both when built in or when
623 @@ -1584,6 +2127,7 @@ static void sd_shutdown(struct device *d
624 static int __init init_sd(void)
629 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
631 @@ -1594,7 +2138,10 @@ static int __init init_sd(void)
635 - return scsi_register_driver(&sd_template.gendrv);
636 + rc = scsi_register_driver(&sd_template.gendrv);
643 @@ -1608,6 +2155,7 @@ static void __exit exit_sd(void)
645 SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
648 scsi_unregister_driver(&sd_template.gendrv);
649 for (i = 0; i < SD_MAJORS; i++)
650 unregister_blkdev(sd_major(i), "sd");