Whamcloud - gitweb
b=16395
[fs/lustre-release.git] / lustre / kernel_patches / patches / sd_iostats-2.6-rhel5.patch
1 Index: linux-2.6.9-5.0.3.EL/drivers/scsi/Kconfig\r
2 ===================================================================\r
3 Index: linux-2.6.9/drivers/scsi/Kconfig
4 ===================================================================
5 --- linux-2.6.9.orig/drivers/scsi/Kconfig       2007-07-23 14:19:13.000000000 +0400
6 +++ linux-2.6.9/drivers/scsi/Kconfig    2007-07-26 14:16:36.000000000 +0400
7 @@ -61,6 +61,14 @@ config SCSI_DUMP
8         help
9            SCSI dump support
10  
11 +config SD_IOSTATS
12 +   bool "Enable SCSI disk I/O stats"
13 +   depends on BLK_DEV_SD
14 +   default y
15 +   ---help---
16 +     This enables SCSI disk I/O stats collection.  You must also enable
17 +     /proc file system support if you want this feature.
18 +
19  config CHR_DEV_ST
20         tristate "SCSI tape support"
21         depends on SCSI
22 Index: linux-2.6.9/drivers/scsi/scsi_proc.c
23 ===================================================================
24 --- linux-2.6.9.orig/drivers/scsi/scsi_proc.c   2007-03-13 02:47:28.000000000 +0300
25 +++ linux-2.6.9/drivers/scsi/scsi_proc.c        2007-07-26 14:16:36.000000000 +0400
26 @@ -38,7 +38,8 @@
27  /* 4K page size, but our output routines, use some slack for overruns */
28  #define PROC_BLOCK_SIZE (3*1024)
29  
30 -static struct proc_dir_entry *proc_scsi;
31 +struct proc_dir_entry *proc_scsi;
32 +EXPORT_SYMBOL(proc_scsi);
33  
34  /* Protect sht->present and sht->proc_dir */
35  static DECLARE_MUTEX(global_host_template_sem);
36 Index: linux-2.6.9/drivers/scsi/sd.c
37 ===================================================================
38 --- linux-2.6.9.orig/drivers/scsi/sd.c  2007-03-13 02:47:27.000000000 +0300
39 +++ linux-2.6.9/drivers/scsi/sd.c       2007-07-28 14:55:56.000000000 +0400
40 @@ -63,6 +63,67 @@
41  
42  #include "scsi_logging.h"
43  
44 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
45 +# include <linux/proc_fs.h>
46 +# include <linux/seq_file.h>
47 +
48 +typedef struct {
49 +        unsigned long long iostat_size;
50 +        unsigned long long iostat_count;
51 +} iostat_counter_t;
52 +
53 +#define IOSTAT_NCOUNTERS 16
54 +typedef struct {
55 +        iostat_counter_t        iostat_read_histogram[IOSTAT_NCOUNTERS];
56 +        iostat_counter_t        iostat_write_histogram[IOSTAT_NCOUNTERS];
57 +        struct timeval          iostat_timeval;
58 +
59 +       /* queue depth: how well the pipe is filled up */
60 +       unsigned long long      iostat_queue_ticks[IOSTAT_NCOUNTERS];
61 +       unsigned long long      iostat_queue_ticks_sum;
62 +       unsigned long           iostat_queue_depth;
63 +       unsigned long           iostat_queue_stamp;
64 +
65 +       /* seeks: how linear the traffic is */
66 +       unsigned long long      iostat_next_sector;
67 +       unsigned long long      iostat_seek_sectors;
68 +       unsigned long long      iostat_seeks;
69 +       unsigned long long      iostat_sectors;
70 +       unsigned long long      iostat_reqs;
71 +       unsigned long           iostat_read_reqs;
72 +       unsigned long           iostat_write_reqs;
73 +
74 +       /* process time: how long it takes to process requests */
75 +       unsigned long           iostat_rtime[IOSTAT_NCOUNTERS];
76 +       unsigned long           iostat_wtime[IOSTAT_NCOUNTERS];
77 +
78 +       /* queue time: how long process spent in elevator's queue */
79 +       unsigned long           iostat_rtime_in_queue[IOSTAT_NCOUNTERS];
80 +       unsigned long           iostat_wtime_in_queue[IOSTAT_NCOUNTERS];
81 +
82 +       char                    iostat_name[32];
83 +
84 +       /* must be the last field, as it's used to know size to be memset'ed */
85 +       spinlock_t              iostat_lock;
86 +}  ____cacheline_aligned_in_smp iostat_stats_t;
87 +
88 +iostat_stats_t       **sd_iostats;
89 +struct proc_dir_entry *sd_iostats_procdir;
90 +char                   sd_iostats_procdir_name[] = "sd_iostats";
91 +
92 +extern void sd_iostats_init(void);
93 +extern void sd_iostats_init_disk(struct gendisk *);
94 +extern void sd_iostats_fini(void);
95 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt);
96 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt);
97 +#else
98 +static inline void sd_iostats_init(void) {}
99 +static inline void sd_iostats_init_disk(struct gendisk *disk) {}
100 +static inline void sd_iostats_fini(void) {}
101 +static inline void sd_iostats_start_req(struct scsi_cmnd *SCpnt) {}
102 +static inline void sd_iostats_finish_req(struct scsi_cmnd *SCpnt) {}
103 +#endif
104 +
105  /*
106   * More than enough for everybody ;)  The huge number of majors
107   * is a leftover from 16bit dev_t days, we don't really need that
108 @@ -76,6 +137,7 @@
109   */
110  #define SD_MAX_DISKS   (((26 * 26) + 26 + 1) * 26)
111  
112 +#define SD_STATS 256
113  /*
114   * Time out in seconds for disks and Magneto-opticals (which are slower).
115   */
116 @@ -278,6 +340,8 @@ static int sd_init_command(struct scsi_c
117         SCSI_LOG_HLQUEUE(2, printk("%s : block=%llu\n",
118                                    disk->disk_name, (unsigned long long)block));
119  
120 +       sd_iostats_start_req(SCpnt);
121 +
122         /*
123          * If we have a 1K hardware sectorsize, prevent access to single
124          * 512 byte sectors.  In theory we could handle this - in fact
125 @@ -474,6 +538,7 @@ static int sd_open(struct inode *inode, 
126                         scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
127         }
128  
129 +   sd_iostats_init_disk(disk);
130         return 0;
131  
132  error_out:
133 @@ -849,6 +914,7 @@ static void sd_rw_intr(struct scsi_cmnd 
134                 break;
135         }
136   out:
137 +       sd_iostats_finish_req(SCpnt);
138         scsi_io_completion(SCpnt, good_bytes);
139  }
140
141 @@ -1575,6 +1643,481 @@ static void sd_shutdown(struct device *d
142         sd_sync_cache(sdp);
143  }      
144  
145 +#if (defined(CONFIG_SD_IOSTATS) && defined(CONFIG_PROC_FS))
146 +static int
147 +sd_iostats_seq_show(struct seq_file *seq, void *v)
148 +{
149 +       struct timeval     now;
150 +       struct gendisk *disk = seq->private;
151 +       iostat_stats_t    *stats;
152 +       unsigned long long read_len;
153 +       unsigned long long read_len_tot;
154 +       unsigned long      read_num;
155 +       unsigned long      read_num_tot;
156 +       unsigned long long write_len;
157 +       unsigned long long write_len_tot;
158 +       unsigned long      write_num;
159 +       unsigned long      write_num_tot;
160 +       int                i;
161 +       int                maxi;
162 +
163 +       if (sd_iostats == NULL) {
164 +               printk(KERN_ERR "sd_iostats_seq_show: NULL stats array\n");
165 +               BUG();
166 +       }
167 +
168 +       stats = sd_iostats[scsi_disk(disk)->index];
169 +       if (stats == NULL) {
170 +               printk(KERN_ERR "sd_iostats_seq_show: NULL stats entry\n");
171 +               BUG();
172 +       }
173 +
174 +       do_gettimeofday(&now);
175 +       now.tv_sec -= stats->iostat_timeval.tv_sec;
176 +       now.tv_usec -= stats->iostat_timeval.tv_usec;
177 +       if (now.tv_usec < 0) {
178 +               now.tv_usec += 1000000;
179 +               now.tv_sec--;
180 +       }
181 +
182 +       /* this sampling races with updates */
183 +       seq_printf(seq, "index:        %lu   snapshot_time:         %lu.%06lu\n",
184 +                       (unsigned long) scsi_disk(disk)->index,
185 +                       now.tv_sec, now.tv_usec);
186 +
187 +       for (i = IOSTAT_NCOUNTERS - 1; i > 0; i--)
188 +               if (stats->iostat_read_histogram[i].iostat_count != 0 ||
189 +                               stats->iostat_write_histogram[i].iostat_count != 0)
190 +                       break;
191 +       maxi = i;
192 +
193 +       seq_printf(seq, "%8s %8s %12s %8s %12s\n", "size", 
194 +                       "reads", "total", "writes", "total");
195 +
196 +       read_len_tot = write_len_tot = 0;
197 +       read_num_tot = write_num_tot = 0;
198 +       for (i = 0; i <= maxi; i++) {
199 +               read_len = stats->iostat_read_histogram[i].iostat_size;
200 +               read_len_tot += read_len;
201 +               read_num = stats->iostat_read_histogram[i].iostat_count;
202 +               read_num_tot += read_num;
203 +
204 +               write_len = stats->iostat_write_histogram[i].iostat_size;
205 +               write_len_tot += write_len;
206 +               write_num = stats->iostat_write_histogram[i].iostat_count;
207 +               write_num_tot += write_num;
208 +
209 +               seq_printf (seq, "%8d %8lu %12llu %8lu %12llu\n", 
210 +                               512<<i, read_num, read_len, write_num, write_len);
211 +       }
212 +
213 +       seq_printf(seq, "%8s %8lu %12llu %8lu %12llu\n\n", "total",
214 +                       read_num_tot, read_len_tot, 
215 +                       write_num_tot, write_len_tot);
216 +
217 +       seq_printf(seq, "%8s %8s %8s\n", "qdepth", "ticks", "%");
218 +       for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
219 +               unsigned long long ticks, percent;
220 +               ticks = stats->iostat_queue_ticks[i];
221 +               if (ticks == 0)
222 +                       continue;
223 +               percent = stats->iostat_queue_ticks[i] * 100;
224 +               do_div(percent, stats->iostat_queue_ticks_sum);
225 +               seq_printf(seq, "%8d %8llu %8llu\n", i, ticks, percent);
226 +       }
227 +
228 +       if (stats->iostat_reqs != 0) {
229 +               unsigned long long aveseek = 0, percent = 0;
230 +
231 +               if (stats->iostat_seeks) {
232 +                       aveseek = stats->iostat_seek_sectors;
233 +                       do_div(aveseek, stats->iostat_seeks);
234 +                       percent = stats->iostat_seeks * 100;
235 +                       do_div(percent, stats->iostat_reqs);
236 +               }
237 +
238 +               seq_printf(seq, "\n%llu sectors in %llu reqs: %llu seek(s) over "
239 +                               "%llu sectors in ave, %llu%% of all reqs\n",
240 +                               stats->iostat_sectors, stats->iostat_reqs,
241 +                               stats->iostat_seeks, aveseek, percent);
242 +       }
243 +
244 +       seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "process time", "reads",
245 +                       "%%", "writes", "%%");
246 +       for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
247 +               unsigned long read_percent = 0, write_percent = 0;
248 +               if (stats->iostat_wtime[i] == 0 &&
249 +                               stats->iostat_rtime[i] == 0)
250 +                       continue;
251 +               if (stats->iostat_read_reqs)
252 +                       read_percent = stats->iostat_rtime[i] * 100 / 
253 +                               stats->iostat_read_reqs;
254 +               if (stats->iostat_write_reqs)
255 +                       write_percent = stats->iostat_wtime[i] * 100 / 
256 +                               stats->iostat_write_reqs;
257 +               seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
258 +                               jiffies_to_msecs(((1UL << i) >> 1) << 1),
259 +                               stats->iostat_rtime[i], read_percent,
260 +                               stats->iostat_wtime[i], write_percent);
261 +       }
262 +
263 +       seq_printf(seq, "\n%16s %8s %8s %8s %8s\n", "time in queue", "reads",
264 +                       "%%", "writes", "%%");
265 +       for (i = 0; i < IOSTAT_NCOUNTERS; i++) {
266 +               unsigned long read_percent = 0, write_percent = 0;
267 +               if (stats->iostat_wtime_in_queue[i] == 0 &&
268 +                               stats->iostat_rtime_in_queue[i] == 0)
269 +                       continue;
270 +               if (stats->iostat_read_reqs)
271 +                       read_percent = stats->iostat_rtime_in_queue[i] * 100 / 
272 +                               stats->iostat_read_reqs;
273 +               if (stats->iostat_write_reqs)
274 +                       write_percent = stats->iostat_wtime_in_queue[i] * 100 / 
275 +                               stats->iostat_write_reqs;
276 +               seq_printf(seq, "%16u %8lu %8lu %8lu %8lu\n",
277 +                               jiffies_to_msecs(((1UL << i) >> 1) << 1),
278 +                               stats->iostat_rtime_in_queue[i],
279 +                               read_percent,
280 +                               stats->iostat_wtime_in_queue[i],
281 +                               write_percent);
282 +       }
283 +
284 +       return 0;
285 +}
286 +
287 +static void *
288 +sd_iostats_seq_start(struct seq_file *p, loff_t *pos)
289 +{
290 +       return (*pos == 0) ? (void *)1 : NULL;
291 +}
292 +
293 +static void *
294 +sd_iostats_seq_next(struct seq_file *p, void *v, loff_t *pos)
295 +{
296 +       ++*pos;
297 +       return NULL;
298 +}
299 +
300 +static void
301 +sd_iostats_seq_stop(struct seq_file *p, void *v)
302 +{
303 +}
304 +
305 +static struct seq_operations sd_iostats_seqops = {
306 +       .start = sd_iostats_seq_start,
307 +       .stop  = sd_iostats_seq_stop,
308 +       .next  = sd_iostats_seq_next,
309 +       .show  = sd_iostats_seq_show,
310 +};
311 +
312 +static int
313 +sd_iostats_seq_open (struct inode *inode, struct file *file)
314 +{
315 +       int                    rc;
316 +
317 +       rc = seq_open(file, &sd_iostats_seqops);
318 +       if (rc != 0)
319 +               return rc;
320 +
321 +       ((struct seq_file *)file->private_data)->private = PDE(inode)->data;
322 +       return 0;
323 +}
324 +
325 +static ssize_t
326 +sd_iostats_seq_write(struct file *file, const char *buffer,
327 +                     size_t len, loff_t *off)
328 +{
329 +       struct seq_file   *seq = file->private_data;
330 +       struct gendisk *disk = seq->private;
331 +       iostat_stats_t    *stats = sd_iostats[scsi_disk(disk)->index];
332 +       unsigned long      flags;
333 +       unsigned long      qdepth;
334 +
335 +
336 +       spin_lock_irqsave (&stats->iostat_lock, flags);
337 +       qdepth = stats->iostat_queue_depth;
338 +       memset (stats, 0, offsetof(iostat_stats_t, iostat_lock));
339 +       do_gettimeofday(&stats->iostat_timeval);
340 +       stats->iostat_queue_stamp = jiffies;
341 +       stats->iostat_queue_depth = qdepth;
342 +       spin_unlock_irqrestore (&stats->iostat_lock, flags);
343 +
344 +       return len;
345 +}
346 +
347 +static struct file_operations sd_iostats_proc_fops = {
348 +       .owner   = THIS_MODULE,
349 +       .open    = sd_iostats_seq_open,
350 +       .read    = seq_read,
351 +       .write   = sd_iostats_seq_write,
352 +       .llseek  = seq_lseek,
353 +       .release = seq_release,
354 +};
355 +
356 +extern struct proc_dir_entry *proc_scsi;
357 +
358 +void
359 +sd_iostats_init(void)
360 +{
361 +       int    i;
362 +
363 +       sd_iostats = kmalloc(SD_STATS * sizeof(iostat_stats_t *), GFP_KERNEL);
364 +       if (sd_iostats == NULL) {
365 +               printk(KERN_WARNING "Can't keep sd iostats: "
366 +                       "ENOMEM allocating stats array size %d\n",
367 +                       SD_STATS * sizeof(iostat_stats_t *));
368 +               return;
369 +       }
370 +
371 +       for (i = 0; i < SD_STATS; i++)
372 +               sd_iostats[i] = NULL;
373 +
374 +       if (proc_scsi == NULL) {
375 +               printk(KERN_WARNING "No access to sd iostats: "
376 +                       "proc_scsi is NULL\n");
377 +               return;
378 +       }
379 +
380 +       sd_iostats_procdir = create_proc_entry(sd_iostats_procdir_name,
381 +                       S_IFDIR | S_IRUGO | S_IXUGO,
382 +                       proc_scsi);
383 +       if (sd_iostats_procdir == NULL) {
384 +               printk(KERN_WARNING "No access to sd iostats: "
385 +                       "can't create /proc/scsi/%s\n", sd_iostats_procdir_name);
386 +               return;
387 +        }
388 +}
389 +
390 +void
391 +sd_iostats_init_disk(struct gendisk *disk)
392 +{
393 +       struct proc_dir_entry *pde;
394 +       unsigned long          flags;
395 +       iostat_stats_t        *stats;
396 +
397 +       if (sd_iostats == NULL || sd_iostats_procdir == NULL)
398 +               return;
399 +
400 +       if (scsi_disk(disk)->index > SD_STATS) {
401 +               printk(KERN_ERR "sd_iostats_init_disk: "
402 +                       "unexpected disk index %d(%d)\n",
403 +                       scsi_disk(disk)->index, SD_STATS);
404 +               return;
405 +       }
406 +
407 +       if (sd_iostats[scsi_disk(disk)->index] != NULL)
408 +               return;
409 +
410 +       stats = kmalloc(sizeof(*stats), GFP_KERNEL);
411 +       if (stats == NULL) {
412 +               printk(KERN_WARNING "Can't keep %s iostats: "
413 +                       "ENOMEM allocating stats size %d\n", 
414 +                       disk->disk_name, sizeof(*stats));
415 +               return;
416 +       }
417 +
418 +       memset (stats, 0, sizeof(*stats));
419 +       do_gettimeofday(&stats->iostat_timeval);
420 +       stats->iostat_queue_stamp = jiffies;
421 +       spin_lock_init(&stats->iostat_lock);
422 +
423 +
424 +       spin_lock_irqsave(&stats->iostat_lock, flags);
425 +
426 +       if (sd_iostats[scsi_disk(disk)->index] != NULL) {
427 +               spin_unlock_irqrestore(&stats->iostat_lock, flags);
428 +               kfree (stats);
429 +               return;
430 +       }
431 +
432 +       sd_iostats[scsi_disk(disk)->index] = stats;
433 +
434 +       spin_unlock_irqrestore(&stats->iostat_lock, flags);
435 +
436 +       strncpy(stats->iostat_name, disk->disk_name,
437 +               sizeof(stats->iostat_name)-1);
438 +
439 +       pde = create_proc_entry(stats->iostat_name, S_IRUGO | S_IWUSR,
440 +                               sd_iostats_procdir);
441 +       if (pde == NULL) {
442 +               printk(KERN_WARNING "Can't create /proc/scsi/%s/%s\n",
443 +                       sd_iostats_procdir_name, disk->disk_name);
444 +       } else {
445 +               pde->proc_fops = &sd_iostats_proc_fops;
446 +               pde->data = disk;
447 +       }
448 +}
449 +
450 +void sd_iostats_fini(void)
451 +{
452 +       int  i;
453 +
454 +       if (sd_iostats == NULL)
455 +               return;
456 +
457 +       for (i = 0; i < SD_STATS; i++) {
458 +               if (sd_iostats[i] == NULL)
459 +                       continue;
460 +               if (sd_iostats_procdir != NULL)
461 +                       remove_proc_entry(sd_iostats[i]->iostat_name,
462 +                                               sd_iostats_procdir);
463 +               kfree(sd_iostats[i]);
464 +       }
465 +
466 +       if (proc_scsi != NULL && sd_iostats_procdir != NULL)
467 +               remove_proc_entry(sd_iostats_procdir_name, proc_scsi);
468 +
469 +       sd_iostats_procdir = NULL;
470 +       kfree(sd_iostats);
471 +       sd_iostats = NULL;
472 +}
473 +
474 +void sd_iostats_finish_req(struct scsi_cmnd *SCpnt)
475 +{
476 +       struct request          *rq = SCpnt->request;
477 +       iostat_stats_t          *stats;
478 +       unsigned long           *tcounter;
479 +       int                     tbucket;
480 +       int                     tmp;
481 +       unsigned long           irqflags;
482 +       int                     disk, i;
483 +
484 +       disk = scsi_disk(rq->rq_disk)->index;
485 +
486 +       if (sd_iostats == NULL)
487 +               return;
488 +
489 +       if (disk < 0 || disk >= SD_STATS) {
490 +               printk(KERN_ERR "sd_iostats_bump: unexpected disk index "
491 +                       "%d([0-%d])\n", disk, SD_STATS);
492 +               BUG();
493 +       }
494 +
495 +       stats = sd_iostats[disk];
496 +       if (stats == NULL)
497 +               return;
498 +
499 +       tmp = jiffies -  rq->start_time;
500 +       for (tbucket = 0; tmp > 1; tbucket++)
501 +               tmp >>= 1;
502 +       if (tbucket >= IOSTAT_NCOUNTERS)
503 +               tbucket = IOSTAT_NCOUNTERS - 1;
504 +       //printk("%u ticks in D to %u\n", jiffies - rq->start_time, tbucket);
505 +
506 +       tcounter = rq_data_dir(rq) == WRITE ? 
507 +               &stats->iostat_wtime[tbucket] : &stats->iostat_rtime[tbucket];
508 +
509 +       spin_lock_irqsave(&stats->iostat_lock, irqflags);
510 +
511 +       /* update delay stats */
512 +       (*tcounter)++;
513 +
514 +       /* update queue depth stats */
515 +       i = stats->iostat_queue_depth;
516 +       if (i >= IOSTAT_NCOUNTERS)
517 +               i = IOSTAT_NCOUNTERS - 1;
518 +       stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
519 +       stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
520 +       stats->iostat_queue_depth--;
521 +
522 +       /* update seek stats. XXX: not sure about nr_sectors */
523 +       stats->iostat_sectors += rq->nr_sectors;
524 +       stats->iostat_reqs++;
525 +       if (rq->sector != stats->iostat_next_sector) {
526 +               stats->iostat_seek_sectors += 
527 +                       rq->sector > stats->iostat_next_sector ?
528 +                       rq->sector - stats->iostat_next_sector :
529 +                       stats->iostat_next_sector - rq->sector;
530 +               stats->iostat_seeks++;
531 +       }
532 +       stats->iostat_next_sector = rq->sector + rq->nr_sectors;
533 +
534 +       stats->iostat_queue_stamp = jiffies;
535 +
536 +       spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
537 +}
538 +
539 +void sd_iostats_start_req(struct scsi_cmnd *SCpnt)
540 +{
541 +       struct request          *rq = SCpnt->request;
542 +       iostat_stats_t          *stats;
543 +       iostat_counter_t        *counter;
544 +       int                     bucket;
545 +       int                     tbucket;
546 +       int                     tmp;
547 +       unsigned long           irqflags;
548 +       int                     disk, i;
549 +       int                     nsect;
550 +
551 +       disk = scsi_disk(rq->rq_disk)->index;
552 +
553 +       if (sd_iostats == NULL)
554 +               return;
555 +
556 +       if (disk < 0 || disk >= SD_STATS) {
557 +               printk(KERN_ERR "sd_iostats_bump: unexpected disk index %d([0-%d])\n",
558 +                       disk, SD_STATS);
559 +               BUG();
560 +       }
561 +
562 +       stats = sd_iostats[disk];
563 +       if (stats == NULL)
564 +               return;
565 +
566 +       nsect = SCpnt->request_bufflen >> 9;
567 +       for (bucket = 0, tmp = nsect; tmp > 1; bucket++)
568 +               tmp >>= 1;
569 +
570 +       if (bucket >= IOSTAT_NCOUNTERS) {
571 +               printk (KERN_ERR "sd_iostats_bump: nsect %d too big\n", nsect);
572 +               BUG();
573 +       }
574 +
575 +       counter = rq_data_dir(rq) == WRITE ? 
576 +               &stats->iostat_write_histogram[bucket] :
577 +               &stats->iostat_read_histogram[bucket];
578 +
579 +       tmp = jiffies - rq->start_time;
580 +       for (tbucket = 0; tmp > 1; tbucket++)
581 +               tmp >>= 1;
582 +       if (tbucket >= IOSTAT_NCOUNTERS)
583 +               tbucket = IOSTAT_NCOUNTERS - 1;
584 +       //printk("%u ticks in Q to %u\n", jiffies - rq->start_time, tbucket);
585 +
586 +       /* an ugly hack to know exact processing time. the right
587 +        * solution is to add one more field to struct request
588 +        * hopefully it will break nothing ... */
589 +       rq->start_time = jiffies;
590 +
591 +       spin_lock_irqsave(&stats->iostat_lock, irqflags);
592 +
593 +       /* update queue depth stats */
594 +       i = stats->iostat_queue_depth;
595 +       if (i >= IOSTAT_NCOUNTERS)
596 +               i = IOSTAT_NCOUNTERS - 1;
597 +       stats->iostat_queue_ticks[i] += jiffies - stats->iostat_queue_stamp;
598 +       stats->iostat_queue_ticks_sum += jiffies - stats->iostat_queue_stamp;
599 +       stats->iostat_queue_depth++;
600 +
601 +       /* update delay stats */
602 +       if (rq_data_dir(rq) == WRITE) {
603 +               stats->iostat_wtime_in_queue[tbucket]++;
604 +               stats->iostat_write_reqs++;
605 +       } else {
606 +               stats->iostat_rtime_in_queue[tbucket]++;
607 +               stats->iostat_read_reqs++;
608 +       }
609 +
610 +       /* update size stats */
611 +       counter->iostat_size += nsect;
612 +       counter->iostat_count++;
613 +
614 +       stats->iostat_queue_stamp = jiffies;
615 +
616 +       spin_unlock_irqrestore(&stats->iostat_lock, irqflags);
617 +}
618 +#endif
619 +
620  /**
621   *     init_sd - entry point for this driver (both when built in or when
622   *     a module).
623 @@ -1584,6 +2127,7 @@ static void sd_shutdown(struct device *d
624  static int __init init_sd(void)
625  {
626         int majors = 0, i;
627 +   int rc = 0;
628  
629         SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
630  
631 @@ -1594,7 +2138,10 @@ static int __init init_sd(void)
632         if (!majors)
633                 return -ENODEV;
634  
635 -       return scsi_register_driver(&sd_template.gendrv);
636 +   rc = scsi_register_driver(&sd_template.gendrv);
637 +   if (rc == 0)
638 +      sd_iostats_init();
639 +   return rc;
640  }
641  
642  /**
643 @@ -1608,6 +2155,7 @@ static void __exit exit_sd(void)
644  
645         SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
646  
647 +   sd_iostats_fini();
648         scsi_unregister_driver(&sd_template.gendrv);
649         for (i = 0; i < SD_MAJORS; i++)
650                 unregister_blkdev(sd_major(i), "sd");