1 Export more statistics from the SCSI layer.
3 A nice to have patch, but not required for Lustre to work.
5 Index: linux-2.6.22.19/drivers/scsi/Kconfig
6 ===================================================================
7 --- linux-2.6.22.19.orig/drivers/scsi/Kconfig
8 +++ linux-2.6.22.19/drivers/scsi/Kconfig
9 @@ -76,6 +76,14 @@ config BLK_DEV_SD
10 In this case, do not compile the driver for your SCSI host adapter
11 (below) as a module either.
14 + bool "Enable SCSI disk I/O stats"
15 + depends on BLK_DEV_SD
18 + This enables SCSI disk I/O stats collection. You must also enable
19 + /proc file system support if you want this feature.
22 tristate "SCSI tape support"
24 Index: linux-2.6.22.19/drivers/scsi/scsi_proc.c
25 ===================================================================
26 --- linux-2.6.22.19.orig/drivers/scsi/scsi_proc.c
27 +++ linux-2.6.22.19/drivers/scsi/scsi_proc.c
29 /* 4K page size, but our output routines, use some slack for overruns */
30 #define PROC_BLOCK_SIZE (3*1024)
32 -static struct proc_dir_entry *proc_scsi;
33 +struct proc_dir_entry *proc_scsi;
34 +EXPORT_SYMBOL(proc_scsi);
36 /* Protect sht->present and sht->proc_dir */
37 static DEFINE_MUTEX(global_host_template_mutex);
38 Index: linux-2.6.22.19/drivers/scsi/sd.c
39 ===================================================================
40 --- linux-2.6.22.19.orig/drivers/scsi/sd.c
41 +++ linux-2.6.22.19/drivers/scsi/sd.c
42 @@ -94,6 +94,24 @@ static DEFINE_SPINLOCK(sd_index_lock);
43 * object after last put) */
44 static DEFINE_MUTEX(sd_ref_mutex);
46 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
47 +# include <linux/proc_fs.h>
48 +# include <linux/seq_file.h>
49 +struct proc_dir_entry *sd_iostats_procdir = NULL;
50 +char sd_iostats_procdir_name[] = "sd_iostats";
51 +static struct file_operations sd_iostats_proc_fops;
53 +extern void sd_iostats_init(void);
54 +extern void sd_iostats_fini(void);
55 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
56 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
58 +static inline void sd_iostats_init(void) {}
59 +static inline void sd_iostats_fini(void) {}
60 +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
61 +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
64 static const char *sd_cache_types[] = {
65 "write through", "none", "write back",
66 "write back, no read (daft)"
67 @@ -498,6 +516,8 @@ static int sd_init_command(struct scsi_c
69 SCpnt->done = sd_rw_intr;
71 + sd_iostats_start_req(SCpnt);
74 * This indicates that the command is ready from our end to be
76 @@ -980,6 +1000,7 @@ static void sd_rw_intr(struct scsi_cmnd
80 + sd_iostats_finish_req(SCpnt);
81 scsi_io_completion(SCpnt, good_bytes);
84 @@ -1666,6 +1687,36 @@ static int sd_probe(struct device *dev)
86 gd->flags |= GENHD_FL_REMOVABLE;
88 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
89 + sdkp->stats = kzalloc(sizeof(iostat_stats_t), GFP_KERNEL);
91 + printk(KERN_WARNING "cannot allocate iostat structure for"
92 + "%s\n", gd->disk_name);
94 + do_gettimeofday(&sdkp->stats->iostat_timeval);
95 + sdkp->stats->iostat_queue_stamp = jiffies;
96 + spin_lock_init(&sdkp->stats->iostat_lock);
97 + if (sd_iostats_procdir) {
98 + struct proc_dir_entry *pde;
99 + pde = create_proc_entry(gd->disk_name, S_IRUGO | S_IWUSR,
100 + sd_iostats_procdir);
102 + printk(KERN_WARNING "Can't create /proc/scsi/"
104 + sd_iostats_procdir_name,
106 + kfree(sdkp->stats);
107 + sdkp->stats = NULL;
109 + pde->proc_fops = &sd_iostats_proc_fops;
113 + kfree(sdkp->stats);
114 + sdkp->stats = NULL;
118 dev_set_drvdata(dev, sdkp);
121 @@ -1709,6 +1760,366 @@ static int sd_remove(struct device *dev)
125 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
127 +sd_iostats_seq_show(struct seq_file *seq, void *v)
129 + struct timeval now;
130 + struct gendisk *disk = seq->private;
131 + iostat_stats_t *stats;
132 + unsigned long long read_len;
133 + unsigned long long read_len_tot;
134 + unsigned long read_num;
135 + unsigned long read_num_tot;
136 + unsigned long long write_len;
137 + unsigned long long write_len_tot;
138 + unsigned long write_num;
139 + unsigned long write_num_tot;
143 + stats = scsi_disk(disk)->stats;
144 + if (stats == NULL) {
145 + printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
149 + do_gettimeofday(&now);
150 + now.tv_sec -= stats->iostat_timeval.tv_sec;
151 + now.tv_usec -= stats->iostat_timeval.tv_usec;
152 + if (now.tv_usec < 0) {
153 + now.tv_usec += 1000000;
157 + /* this sampling races with updates */
158 + seq_printf(seq, "index: %lu snapshot_time: %lu.%06lu\n",
159 + (unsigned long) scsi_disk(disk)->index,
160 + now.tv_sec, now.tv_usec);
162 + for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
163 + if (stats->iostat_read_histogram[i].iostat_count != 0 ||
164 + stats->iostat_write_histogram[i].iostat_count != 0)
168 + seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size",
169 + "reads", "total", "writes", "total");
171 + read_len_tot = write_len_tot = 0;
172 + read_num_tot = write_num_tot = 0;
173 + for (i = 0; i <= maxi; i++) {
174 + read_len = stats->iostat_read_histogram[i].iostat_size;
175 + read_len_tot += read_len;
176 + read_num = stats->iostat_read_histogram[i].iostat_count;
177 + read_num_tot += read_num;
179 + write_len = stats->iostat_write_histogram[i].iostat_size;
180 + write_len_tot += write_len;
181 + write_num = stats->iostat_write_histogram[i].iostat_count;
182 + write_num_tot += write_num;
184 + seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n",
185 + 512<<i, read_num, read_len, write_num, write_len);
188 + seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
189 + read_num_tot, read_len_tot,
190 + write_num_tot, write_len_tot);
192 + seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
193 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
194 + unsigned long long ticks, percent;
195 + ticks = stats->iostat_queue_ticks[i];
198 + percent = stats->iostat_queue_ticks[i] * 100;
199 + do_div(percent, stats->iostat_queue_ticks_sum);
200 + seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
203 + if (stats->iostat_reqs != 0) {
204 + unsigned long long aveseek = 0, percent = 0;
206 + if (stats->iostat_seeks) {
207 + aveseek = stats->iostat_seek_sectors;
208 + do_div(aveseek, stats->iostat_seeks);
209 + percent = stats->iostat_seeks * 100;
210 + do_div(percent, stats->iostat_reqs);
213 + seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
214 + "%llu sectors in ave, %llu%% of all reqs\n",
215 + stats->iostat_sectors, stats->iostat_reqs,
216 + stats->iostat_seeks, aveseek, percent);
219 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
220 + "%%", "writes", "%%");
221 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
222 + unsigned long read_percent = 0, write_percent = 0;
223 + if (stats->iostat_wtime[i] == 0 &&
224 + stats->iostat_rtime[i] == 0)
226 + if (stats->iostat_read_reqs)
227 + read_percent = stats->iostat_rtime[i] * 100 /
228 + stats->iostat_read_reqs;
229 + if (stats->iostat_write_reqs)
230 + write_percent = stats->iostat_wtime[i] * 100 /
231 + stats->iostat_write_reqs;
232 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
233 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
234 + stats->iostat_rtime[i], read_percent,
235 + stats->iostat_wtime[i], write_percent);
238 + seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
239 + "%%", "writes", "%%");
240 + for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
241 + unsigned long read_percent = 0, write_percent = 0;
242 + if (stats->iostat_wtime_in_queue[i] == 0 &&
243 + stats->iostat_rtime_in_queue[i] == 0)
245 + if (stats->iostat_read_reqs)
246 + read_percent = stats->iostat_rtime_in_queue[i] * 100 /
247 + stats->iostat_read_reqs;
248 + if (stats->iostat_write_reqs)
249 + write_percent = stats->iostat_wtime_in_queue[i] * 100 /
250 + stats->iostat_write_reqs;
251 + seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
252 + jiffies_to_msecs(((1UL << i) >> 1) << 1),
253 + stats->iostat_rtime_in_queue[i],
255 + stats->iostat_wtime_in_queue[i],
263 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
265 + return (*pos == 0) ? (void *)1 : NULL;
269 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
276 +sd_iostats_seq_stop(struct seq_file *p, void *v)
280 +static struct seq_operations sd_iostats_seqops = {
281 + .start = sd_iostats_seq_start,
282 + .stop = sd_iostats_seq_stop,
283 + .next = sd_iostats_seq_next,
284 + .show = sd_iostats_seq_show,
288 +sd_iostats_seq_open (struct inode *inode, struct file *file)
292 + rc = seq_open(file, &sd_iostats_seqops);
296 + ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
301 +sd_iostats_seq_write(struct file *file, const char *buffer,
302 + size_t len, loff_t *off)
304 + struct seq_file *seq = file->private_data;
305 + struct gendisk *disk = seq->private;
306 + iostat_stats_t *stats = scsi_disk(disk)->stats;
307 + unsigned long flags;
308 + unsigned long qdepth;
311 + spin_lock_irqsave (&stats->iostat_lock, flags);
312 + qdepth = stats->iostat_queue_depth;
313 + memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
314 + do_gettimeofday(&stats->iostat_timeval);
315 + stats->iostat_queue_stamp = jiffies;
316 + stats->iostat_queue_depth = qdepth;
317 + spin_unlock_irqrestore (&stats->iostat_lock, flags);
322 +static struct file_operations sd_iostats_proc_fops = {
323 + .owner = THIS_MODULE,
324 + .open = sd_iostats_seq_open,
326 + .write = sd_iostats_seq_write,
327 + .llseek = seq_lseek,
328 + .release = seq_release,
331 +extern struct proc_dir_entry *proc_scsi;
334 +sd_iostats_init(void)
336 + if (proc_scsi == NULL) {
337 + printk(KERN_WARNING "No access to sd iostats: "
338 + "proc_scsi is NULL\n");
342 + sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
343 + S_IFDIR | S_IRUGO | S_IXUGO,
345 + if (sd_iostats_procdir == NULL) {
346 + printk(KERN_WARNING "No access to sd iostats: "
347 + "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
352 +void sd_iostats_fini(void)
354 + if (proc_scsi != NULL && sd_iostats_procdir != NULL)
355 + remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
357 + sd_iostats_procdir = NULL;
360 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
362 + struct request *rq = SCpnt->request;
363 + iostat_stats_t *stats;
364 + unsigned long *tcounter;
367 + unsigned long irqflags;
370 + stats = scsi_disk(rq->rq_disk)->stats;
374 + tmp = jiffies - rq->start_time;
375 + for (tbucket = 0; tmp > 1; tbucket++)
377 + if (tbucket >= IOSTAT_NCOUNTERS)
378 + tbucket = IOSTAT_NCOUNTERS - 1;
379 + //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
381 + tcounter = rq_data_dir(rq) == WRITE ?
382 + &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
384 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
386 + /* update delay stats */
389 + /* update queue depth stats */
390 + i = stats->iostat_queue_depth;
391 + if (i >= IOSTAT_NCOUNTERS)
392 + i = IOSTAT_NCOUNTERS - 1;
393 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
394 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
395 + BUG_ON(stats->iostat_queue_depth == 0);
396 + stats->iostat_queue_depth--;
398 + /* update seek stats. XXX: not sure about nr_sectors */
399 + stats->iostat_sectors += rq->nr_sectors;
400 + stats->iostat_reqs++;
401 + if (rq->sector != stats->iostat_next_sector) {
402 + stats->iostat_seek_sectors +=
403 + rq->sector > stats->iostat_next_sector ?
404 + rq->sector - stats->iostat_next_sector :
405 + stats->iostat_next_sector - rq->sector;
406 + stats->iostat_seeks++;
408 + stats->iostat_next_sector = rq->sector + rq->nr_sectors;
410 + stats->iostat_queue_stamp = jiffies;
412 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
415 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
417 + struct request *rq = SCpnt->request;
418 + iostat_stats_t *stats;
419 + iostat_counter_t *counter;
423 + unsigned long irqflags;
427 + stats = scsi_disk(rq->rq_disk)->stats;
431 + nsect = SCpnt->request_bufflen >> 9;
432 + for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
435 + if (bucket >= IOSTAT_NCOUNTERS) {
436 + printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
440 + counter = rq_data_dir(rq) == WRITE ?
441 + &stats->iostat_write_histogram[bucket] :
442 + &stats->iostat_read_histogram[bucket];
444 + tmp = jiffies - rq->start_time;
445 + for (tbucket = 0; tmp > 1; tbucket++)
447 + if (tbucket >= IOSTAT_NCOUNTERS)
448 + tbucket = IOSTAT_NCOUNTERS - 1;
449 + //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
451 + /* an ugly hack to know exact processing time. the right
452 + * solution is to add one more field to struct request
453 + * hopefully it will break nothing ... */
454 + rq->start_time = jiffies;
456 + spin_lock_irqsave(&stats->iostat_lock, irqflags);
458 + /* update queue depth stats */
459 + i = stats->iostat_queue_depth;
460 + if (i >= IOSTAT_NCOUNTERS)
461 + i = IOSTAT_NCOUNTERS - 1;
462 + stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
463 + stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
464 + stats->iostat_queue_depth++;
466 + /* update delay stats */
467 + if (rq_data_dir(rq) == WRITE) {
468 + stats->iostat_wtime_in_queue[tbucket]++;
469 + stats->iostat_write_reqs++;
471 + stats->iostat_rtime_in_queue[tbucket]++;
472 + stats->iostat_read_reqs++;
475 + /* update size stats */
476 + counter->iostat_size += nsect;
477 + counter->iostat_count++;
479 + stats->iostat_queue_stamp = jiffies;
481 + spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
486 * scsi_disk_release - Called to free the scsi_disk structure
487 * @cdev: pointer to embedded class device
488 @@ -1727,10 +2138,16 @@ static void scsi_disk_release(struct cla
489 idr_remove(&sd_index_idr, sdkp->index);
490 spin_unlock(&sd_index_lock);
492 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
494 + remove_proc_entry(disk->disk_name, sd_iostats_procdir);
495 + kfree(sdkp->stats);
496 + sdkp->stats = NULL;
499 disk->private_data = NULL;
501 put_device(&sdkp->device->sdev_gendev);
506 @@ -1845,6 +2262,8 @@ static int __init init_sd(void)
512 err = class_register(&sd_disk_class);
515 @@ -1860,6 +2279,7 @@ err_out_class:
517 for (i = 0; i < SD_MAJORS; i++)
518 unregister_blkdev(sd_major(i), "sd");
523 Index: linux-2.6.22.19/include/scsi/sd.h
524 ===================================================================
525 --- linux-2.6.22.19.orig/include/scsi/sd.h
526 +++ linux-2.6.22.19/include/scsi/sd.h
529 #define SD_BUF_SIZE 512
531 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
533 + unsigned long long iostat_size;
534 + unsigned long long iostat_count;
537 +#define IOSTAT_NCOUNTERS 16
539 + iostat_counter_t iostat_read_histogram[IOSTAT_NCOUNTERS];
540 + iostat_counter_t iostat_write_histogram[IOSTAT_NCOUNTERS];
541 + struct timeval iostat_timeval;
543 + /* queue depth: how well the pipe is filled up */
544 + unsigned long long iostat_queue_ticks[IOSTAT_NCOUNTERS];
545 + unsigned long long iostat_queue_ticks_sum;
546 + unsigned long iostat_queue_depth;
547 + unsigned long iostat_queue_stamp;
549 + /* seeks: how linear the traffic is */
550 + unsigned long long iostat_next_sector;
551 + unsigned long long iostat_seek_sectors;
552 + unsigned long long iostat_seeks;
553 + unsigned long long iostat_sectors;
554 + unsigned long long iostat_reqs;
555 + unsigned long iostat_read_reqs;
556 + unsigned long iostat_write_reqs;
558 + /* process time: how long it takes to process requests */
559 + unsigned long iostat_rtime[IOSTAT_NCOUNTERS];
560 + unsigned long iostat_wtime[IOSTAT_NCOUNTERS];
562 + /* queue time: how long process spent in elevator's queue */
563 + unsigned long iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
564 + unsigned long iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
566 + /* must be the last field, as it's used to know size to be memset'ed */
567 + spinlock_t iostat_lock;
568 +} ____cacheline_aligned_in_smp iostat_stats_t;
572 struct scsi_driver *driver; /* always &sd_template */
573 struct scsi_device *device;
574 @@ -44,6 +84,9 @@ struct scsi_disk {
575 unsigned WCE : 1; /* state of disk WCE bit */
576 unsigned RCD : 1; /* state of disk RCD bit, unused */
577 unsigned DPOFUA : 1; /* state of disk DPOFUA bit */
578 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
579 + iostat_stats_t *stats; /* scsi disk statistics */
582 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev)