1 Index: linux-2.6.5-7.311/drivers/scsi/Kconfig
2 ===================================================================
3 --- linux-2.6.5-7.311.orig/drivers/scsi/Kconfig
4 +++ linux-2.6.5-7.311/drivers/scsi/Kconfig
5 @@ -67,6 +67,14 @@ config SCSI_DUMP
6 polling I/O. If it doesn't, LKCD will fall back to ordinary
10 + bool "Enable SCSI disk I/O stats"
11 + depends on BLK_DEV_SD
14 + This enables SCSI disk I/O stats collection. You must also enable
15 + /proc file system support if you want this feature.
18 tristate "SCSI tape support"
20 Index: linux-2.6.5-7.311/drivers/scsi/scsi_proc.c
21 ===================================================================
22 --- linux-2.6.5-7.311.orig/drivers/scsi/scsi_proc.c
23 +++ linux-2.6.5-7.311/drivers/scsi/scsi_proc.c
25 /* 4K page size, but our output routines, use some slack for overruns */
26 #define PROC_BLOCK_SIZE (3*1024)
28 -static struct proc_dir_entry *proc_scsi;
29 +struct proc_dir_entry *proc_scsi;
30 +EXPORT_SYMBOL(proc_scsi);
32 /* Protect sht->present and sht->proc_dir */
33 static DECLARE_MUTEX(global_host_template_sem);
34 Index: linux-2.6.5-7.311/drivers/scsi/sd.c
35 ===================================================================
36 --- linux-2.6.5-7.311.orig/drivers/scsi/sd.c
37 +++ linux-2.6.5-7.311/drivers/scsi/sd.c
40 #define SD_DISKS 32768 /* anything between 256 and 262144 */
42 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
43 +# include <linux/proc_fs.h>
44 +# include <linux/seq_file.h>
47 + unsigned long long iostat_size;
48 + unsigned long long iostat_count;
51 +#define IOSTAT_NCOUNTERS 16
53 + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
54 + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
55 + struct timeval iostat_timeval;
57 + /* queue depth: how well the pipe is filled up */
58 + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS];
59 + unsigned long long iostat_queue_ticks_sum;
60 + unsigned long iostat_queue_depth;
61 + unsigned long iostat_queue_stamp;
63 + /* seeks: how linear the traffic is */
64 + unsigned long long iostat_next_sector;
65 + unsigned long long iostat_seek_sectors;
66 + unsigned long long iostat_seeks;
67 + unsigned long long iostat_sectors;
68 + unsigned long long iostat_reqs;
69 + unsigned long iostat_read_reqs;
70 + unsigned long iostat_write_reqs;
72 + /* process time: how long it takes to process requests */
73 + unsigned long iostat_rtime[IOSTAT_NCOUNTERS];
74 + unsigned long iostat_wtime[IOSTAT_NCOUNTERS];
76 + /* queue time: how long process spent in elevator's queue */
77 + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
78 + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
80 + /* must be the last field, as it's used to know size to be memset'ed */
81 + spinlock_t iostat_lock;
82 +} ____cacheline_aligned_in_smp iostat_stats_t;
84 +struct proc_dir_entry *sd_iostats_procdir = NULL;
85 +char sd_iostats_procdir_name[] = "sd_iostats";
86 +static struct file_operations sd_iostats_proc_fops;
88 +extern void sd_iostats_init(void);
89 +extern void sd_iostats_fini(void);
90 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
91 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
93 +static inline void sd_iostats_init(void) {}
94 +static inline void sd_iostats_fini(void) {}
95 +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
96 +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
100 * Time out in seconds for disks and Magneto-opticals (which are slower).
102 @@ -96,6 +153,9 @@ struct scsi_disk {
104 unsigned WCE : 1; /* state of disk WCE bit */
105 unsigned RCD : 1; /* state of disk RCD bit, unused */
106 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
107 + iostat_stats_t *stats; /* scsi disk statistics */
112 @@ -384,6 +444,8 @@ queue:
113 SCpnt->allowed = SD_MAX_RETRIES;
114 SCpnt->timeout_per_command = timeout;
116 + sd_iostats_start_req(SCpnt);
119 * This is the completion routine we use. This is matched in terms
120 * of capability to this function.
121 @@ -884,6 +946,9 @@ static void sd_rw_intr(struct scsi_cmnd
126 + sd_iostats_finish_req(SCpnt);
129 * This calls the generic completion function, now that we know
130 * how many actual sectors finished, and how many sectors we need
131 @@ -1527,6 +1592,36 @@ static int sd_probe(struct device *dev)
132 if (!sdkp->device_ready || sdp->host->no_partition_check)
133 gd->flags |= GENHD_FL_NO_PARTITION_CHECK;
135 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
136 + sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL);
137 + if (!sdkp->stats) {
138 + printk(KERN_WARNING "cannot allocate iostat structure for"
139 + "%s\n", gd->disk_name);
141 + do_gettimeofday(&sdkp->stats->iostat_timeval);
142 + sdkp->stats->iostat_queue_stamp = jiffies;
143 + spin_lock_init(&sdkp->stats->iostat_lock);
144 + if (sd_iostats_procdir) {
145 + struct proc_dir_entry *pde;
146 + pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR,
147 + sd_iostats_procdir);
149 + printk(KERN_WARNING "Can't create /proc/scsi/"
151 + sd_iostats_procdir_name,
153 + kfree(sdkp->stats);
154 + sdkp->stats = NULL;
156 + pde->proc_fops = &sd_iostats_proc_fops;
160 + kfree(sdkp->stats);
161 + sdkp->stats = NULL;
165 dev_set_drvdata(dev, sdkp);
168 @@ -1574,7 +1669,14 @@ static int sd_remove(struct device *dev)
169 static void scsi_disk_release(struct kobject *kobj)
171 struct scsi_disk *sdkp = to_scsi_disk(kobj);
174 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
176 + remove_proc_entry(sdkp->disk->disk_name, sd_iostats_procdir);
177 + kfree(sdkp->stats);
178 + sdkp->stats = NULL;
181 put_disk(sdkp->disk);
183 spin_lock(&sd_index_lock);
184 @@ -1605,6 +1707,366 @@ static void sd_shutdown(struct device *d
188 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
190 +sd_iostats_seq_show(struct seq_file *seq, void *v)
192 + struct timeval now;
193 + struct gendisk *disk = seq->private;
194 + iostat_stats_t *stats;
195 + unsigned long long read_len;
196 + unsigned long long read_len_tot;
197 + unsigned long read_num;
198 + unsigned long read_num_tot;
199 + unsigned long long write_len;
200 + unsigned long long write_len_tot;
201 + unsigned long write_num;
202 + unsigned long write_num_tot;
206 + stats = scsi_disk(disk)->stats;
207 + if (stats == NULL) {
208 + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
212 + do_gettimeofday(&now);
213 + now.tv_sec -= stats->iostat_timeval.tv_sec;
214 + now.tv_usec -= stats->iostat_timeval.tv_usec;
215 + if (now.tv_usec < 0) {
216 + now.tv_usec += 1000000;
220 + /* this sampling races with updates */
221 + seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
222 + (unsigned long) scsi_disk(disk)->index,
223 + now.tv_sec, now.tv_usec);
225 + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
226 + if (stats->iostat_read_histogram[i].iostat_count != 0 ||
227 + stats->iostat_write_histogram[i].iostat_count != 0)
231 + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
232 + "reads", "total", "writes", "total");
234 + read_len_tot = write_len_tot = 0;
235 + read_num_tot = write_num_tot = 0;
236 + for (i = 0; i <= maxi; i++) {
237 + read_len = stats->iostat_read_histogram[i].iostat_size;
238 + read_len_tot += read_len;
239 + read_num = stats->iostat_read_histogram[i].iostat_count;
240 + read_num_tot += read_num;
242 + write_len = stats->iostat_write_histogram[i].iostat_size;
243 + write_len_tot += write_len;
244 + write_num = stats->iostat_write_histogram[i].iostat_count;
245 + write_num_tot += write_num;
247 + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
248 + 512<<i, read_num, read_len, write_num, write_len);
251 + seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
252 + read_num_tot, read_len_tot,
253 + write_num_tot, write_len_tot);
255 + seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
256 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
257 + unsigned long long ticks, percent;
258 + ticks = stats->iostat_queue_ticks[i];
261 + percent = stats->iostat_queue_ticks[i] * 100;
262 + do_div(percent, stats->iostat_queue_ticks_sum);
263 + seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
266 + if (stats->iostat_reqs != 0) {
267 + unsigned long long aveseek = 0, percent = 0;
269 + if (stats->iostat_seeks) {
270 + aveseek = stats->iostat_seek_sectors;
271 + do_div(aveseek, stats->iostat_seeks);
272 + percent = stats->iostat_seeks * 100;
273 + do_div(percent, stats->iostat_reqs);
276 + seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
277 + "%llu sectors in ave, %llu%% of all reqs\n",
278 + stats->iostat_sectors, stats->iostat_reqs,
279 + stats->iostat_seeks, aveseek, percent);
282 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
283 + "%%", "writes", "%%");
284 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
285 + unsigned long read_percent = 0, write_percent = 0;
286 + if (stats->iostat_wtime[i] == 0 &&
287 + stats->iostat_rtime[i] == 0)
289 + if (stats->iostat_read_reqs)
290 + read_percent = stats->iostat_rtime[i] * 100 /
291 + stats->iostat_read_reqs;
292 + if (stats->iostat_write_reqs)
293 + write_percent = stats->iostat_wtime[i] * 100 /
294 + stats->iostat_write_reqs;
295 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
296 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
297 + stats->iostat_rtime[i], read_percent,
298 + stats->iostat_wtime[i], write_percent);
301 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
302 + "%%", "writes", "%%");
303 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
304 + unsigned long read_percent = 0, write_percent = 0;
305 + if (stats->iostat_wtime_in_queue[i] == 0 &&
306 + stats->iostat_rtime_in_queue[i] == 0)
308 + if (stats->iostat_read_reqs)
309 + read_percent = stats->iostat_rtime_in_queue[i] * 100 /
310 + stats->iostat_read_reqs;
311 + if (stats->iostat_write_reqs)
312 + write_percent = stats->iostat_wtime_in_queue[i] * 100 /
313 + stats->iostat_write_reqs;
314 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
315 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
316 + stats->iostat_rtime_in_queue[i],
318 + stats->iostat_wtime_in_queue[i],
326 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
328 + return (*pos == 0) ? (void *)1 : NULL;
332 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
339 +sd_iostats_seq_stop(struct seq_file *p, void *v)
343 +static struct seq_operations sd_iostats_seqops = {
344 + .start = sd_iostats_seq_start,
345 + .stop = sd_iostats_seq_stop,
346 + .next = sd_iostats_seq_next,
347 + .show = sd_iostats_seq_show,
351 +sd_iostats_seq_open (struct inode *inode, struct file *file)
355 + rc = seq_open(file, &sd_iostats_seqops);
359 + ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
364 +sd_iostats_seq_write(struct file *file, const char *buffer,
365 + size_t len, loff_t *off)
367 + struct seq_file *seq = file->private_data;
368 + struct gendisk *disk = seq->private;
369 + iostat_stats_t *stats = scsi_disk(disk)->stats;
370 + unsigned long flags;
371 + unsigned long qdepth;
374 + spin_lock_irqsave (&stats->iostat_lock, flags);
375 + qdepth = stats->iostat_queue_depth;
376 + memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
377 + do_gettimeofday(&stats->iostat_timeval);
378 + stats->iostat_queue_stamp = jiffies;
379 + stats->iostat_queue_depth = qdepth;
380 + spin_unlock_irqrestore (&stats->iostat_lock, flags);
385 +static struct file_operations sd_iostats_proc_fops = {
386 + .owner = THIS_MODULE,
387 + .open = sd_iostats_seq_open,
389 + .write = sd_iostats_seq_write,
390 + .llseek = seq_lseek,
391 + .release = seq_release,
394 +extern struct proc_dir_entry *proc_scsi;
397 +sd_iostats_init(void)
399 + if (proc_scsi == NULL) {
400 + printk(KERN_WARNING "No access to sd iostats: "
401 + "proc_scsi is NULL\n");
405 + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
406 + S_IFDIR | S_IRUGO | S_IXUGO,
408 + if (sd_iostats_procdir == NULL) {
409 + printk(KERN_WARNING "No access to sd iostats: "
410 + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
415 +void sd_iostats_fini(void)
417 + if (proc_scsi != NULL && sd_iostats_procdir != NULL)
418 + remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
420 + sd_iostats_procdir = NULL;
423 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
425 + struct request *rq = SCpnt->request;
426 + iostat_stats_t *stats;
427 + unsigned long *tcounter;
430 + unsigned long irqflags;
433 + stats = scsi_disk(rq->rq_disk)->stats;
437 + tmp = jiffies - rq->start_time;
438 + for (tbucket = 0; tmp > 1; tbucket++)
440 + if (tbucket >= IOSTAT_NCOUNTERS)
441 + tbucket = IOSTAT_NCOUNTERS - 1;
442 + //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
444 + tcounter = rq_data_dir(rq) == WRITE ?
445 + &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
447 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
449 + /* update delay stats */
452 + /* update queue depth stats */
453 + i = stats->iostat_queue_depth;
454 + if (i >= IOSTAT_NCOUNTERS)
455 + i = IOSTAT_NCOUNTERS - 1;
456 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
457 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
458 + BUG_ON(stats->iostat_queue_depth == 0);
459 + stats->iostat_queue_depth--;
461 + /* update seek stats. XXX: not sure about nr_sectors */
462 + stats->iostat_sectors += rq->nr_sectors;
463 + stats->iostat_reqs++;
464 + if (rq->sector != stats->iostat_next_sector) {
465 + stats->iostat_seek_sectors +=
466 + rq->sector > stats->iostat_next_sector ?
467 + rq->sector - stats->iostat_next_sector :
468 + stats->iostat_next_sector - rq->sector;
469 + stats->iostat_seeks++;
471 + stats->iostat_next_sector = rq->sector + rq->nr_sectors;
473 + stats->iostat_queue_stamp = jiffies;
475 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
478 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
480 + struct request *rq = SCpnt->request;
481 + iostat_stats_t *stats;
482 + iostat_counter_t *counter;
486 + unsigned long irqflags;
490 + stats = scsi_disk(rq->rq_disk)->stats;
494 + nsect = SCpnt->request_bufflen >> 9;
495 + for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
498 + if (bucket >= IOSTAT_NCOUNTERS) {
499 + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
503 + counter = rq_data_dir(rq) == WRITE ?
504 + &stats->iostat_write_histogram[bucket] :
505 + &stats->iostat_read_histogram[bucket];
507 + tmp = jiffies - rq->start_time;
508 + for (tbucket = 0; tmp > 1; tbucket++)
510 + if (tbucket >= IOSTAT_NCOUNTERS)
511 + tbucket = IOSTAT_NCOUNTERS - 1;
512 + //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
514 + /* an ugly hack to know exact processing time. the right
515 + * solution is to add one more field to struct request
516 + * hopefully it will break nothing ... */
517 + rq->start_time = jiffies;
519 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
521 + /* update queue depth stats */
522 + i = stats->iostat_queue_depth;
523 + if (i >= IOSTAT_NCOUNTERS)
524 + i = IOSTAT_NCOUNTERS - 1;
525 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
526 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
527 + stats->iostat_queue_depth++;
529 + /* update delay stats */
530 + if (rq_data_dir(rq) == WRITE) {
531 + stats->iostat_wtime_in_queue[tbucket]++;
532 + stats->iostat_write_reqs++;
534 + stats->iostat_rtime_in_queue[tbucket]++;
535 + stats->iostat_read_reqs++;
538 + /* update size stats */
539 + counter->iostat_size += nsect;
540 + counter->iostat_count++;
542 + stats->iostat_queue_stamp = jiffies;
544 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
549 * init_sd - entry point for this driver (both when built in or when
551 @@ -1614,6 +2076,7 @@ static void sd_shutdown(struct device *d
552 static int __init init_sd(void)
557 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
559 @@ -1624,7 +2087,11 @@ static int __init init_sd(void)
563 - return scsi_register_driver(&sd_template.gendrv);
565 + rc = scsi_register_driver(&sd_template.gendrv);
572 @@ -1641,6 +2108,7 @@ static void __exit exit_sd(void)
573 scsi_unregister_driver(&sd_template.gendrv);
574 for (i = 0; i < SD_MAJORS; i++)
575 unregister_blkdev(sd_major(i), "sd");
579 MODULE_LICENSE("GPL");