static char *osd_zerocopy_tag = "zerocopy";
+
+static void record_start_io(struct osd_device *osd, int rw, int npages,
+ int discont_pages)
+{
+ struct obd_histogram *h = osd->od_brw_stats.hist;
+
+ if (rw == READ) {
+ atomic_inc(&osd->od_r_in_flight);
+ lprocfs_oh_tally(&h[BRW_R_RPC_HIST],
+ atomic_read(&osd->od_r_in_flight));
+ lprocfs_oh_tally_log2(&h[BRW_R_PAGES], npages);
+ lprocfs_oh_tally(&h[BRW_R_DISCONT_PAGES], discont_pages);
+
+ } else {
+ atomic_inc(&osd->od_w_in_flight);
+ lprocfs_oh_tally(&h[BRW_W_RPC_HIST],
+ atomic_read(&osd->od_w_in_flight));
+ lprocfs_oh_tally_log2(&h[BRW_W_PAGES], npages);
+ lprocfs_oh_tally(&h[BRW_W_DISCONT_PAGES], discont_pages);
+
+ }
+}
+
+static void record_end_io(struct osd_device *osd, int rw,
+ unsigned long elapsed, int disksize)
+{
+ struct obd_histogram *h = osd->od_brw_stats.hist;
+
+ if (rw == READ) {
+ atomic_dec(&osd->od_r_in_flight);
+ if (disksize > 0)
+ lprocfs_oh_tally_log2(&h[BRW_R_DISK_IOSIZE], disksize);
+ if (elapsed)
+ lprocfs_oh_tally_log2(&h[BRW_R_IO_TIME], elapsed);
+
+ } else {
+ atomic_dec(&osd->od_w_in_flight);
+ if (disksize > 0)
+ lprocfs_oh_tally_log2(&h[BRW_W_DISK_IOSIZE], disksize);
+ if (elapsed)
+ lprocfs_oh_tally_log2(&h[BRW_W_IO_TIME], elapsed);
+ }
+}
+
static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
struct lu_buf *buf, loff_t *pos,
struct lustre_capa *capa)
uint64_t old_size;
int size = buf->lb_len;
int rc;
+ unsigned long start;
LASSERT(dt_object_exists(dt));
LASSERT(obj->oo_db);
+ start = cfs_time_current();
+
read_lock(&obj->oo_attr_lock);
old_size = obj->oo_attr.la_size;
read_unlock(&obj->oo_attr_lock);
size = old_size - *pos;
}
+ record_start_io(osd, READ, (size >> PAGE_CACHE_SHIFT), 0);
+
rc = -dmu_read(osd->od_os, obj->oo_db->db_object, *pos, size,
buf->lb_buf, DMU_READ_PREFETCH);
+
+ record_end_io(osd, READ, cfs_time_current() - start, size);
if (rc == 0) {
rc = size;
*pos += size;
-
- /* XXX: workaround for bug in HEAD: fsfilt_ldiskfs_read() returns
- * requested number of bytes, not actually read ones */
- if (S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr))
- rc = buf->lb_len;
}
return rc;
}
struct osd_thandle *oh;
uint64_t offset = *pos;
int rc;
+
ENTRY;
LASSERT(dt_object_exists(dt));
LASSERT(th != NULL);
oh = container_of0(th, struct osd_thandle, ot_super);
+ record_start_io(osd, WRITE, (buf->lb_len >> PAGE_CACHE_SHIFT), 0);
+
dmu_write(osd->od_os, obj->oo_db->db_object, offset,
(uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx);
write_lock(&obj->oo_attr_lock);
rc = buf->lb_len;
out:
+ record_end_io(osd, WRITE, 0, buf->lb_len);
+
RETURN(rc);
}
int i, rc, flags = 0;
bool ignore_quota = false, synced = false;
long long space = 0;
+ struct page *last_page = NULL;
+ unsigned long discont_pages = 0;
ENTRY;
LASSERT(dt_object_exists(dt));
oh = container_of0(th, struct osd_thandle, ot_super);
for (i = 0; i < npages; i++) {
+ if (last_page && lnb[i].lnb_page->index != (last_page->index + 1))
+ ++discont_pages;
+ last_page = lnb[i].lnb_page;
if (lnb[i].lnb_rc)
/* ENOSPC, network RPC error, etc.
* We don't want to book space for pages which will be
CDEBUG(D_QUOTA, "writting %d pages, reserving "LPD64"K of quota "
"space\n", npages, space);
+ record_start_io(osd, WRITE, npages, discont_pages);
retry:
/* acquire quota space if needed */
rc = osd_declare_quota(env, osd, obj->oo_attr.la_uid,
struct osd_thandle *oh;
uint64_t new_size = 0;
int i, rc = 0;
+ unsigned long iosize = 0;
ENTRY;
LASSERT(dt_object_exists(dt));
if (new_size < lnb[i].lnb_file_offset + lnb[i].lnb_len)
new_size = lnb[i].lnb_file_offset + lnb[i].lnb_len;
+ iosize += lnb[i].lnb_len;
}
if (unlikely(new_size == 0)) {
th->th_local = 1;
/* it is important to return 0 even when all lnb_rc == -ENOSPC
* since ofd_commitrw_write() retries several times on ENOSPC */
+ record_end_io(osd, WRITE, 0, 0);
RETURN(0);
}
write_unlock(&obj->oo_attr_lock);
}
+ record_end_io(osd, WRITE, 0, iosize);
+
RETURN(rc);
}
struct niobuf_local *lnb, int npages)
{
struct osd_object *obj = osd_dt_obj(dt);
+ struct osd_device *osd = osd_obj2dev(obj);
struct lu_buf buf;
loff_t offset;
int i;
+ unsigned long start;
+ unsigned long size = 0;
LASSERT(dt_object_exists(dt));
LASSERT(obj->oo_db);
+ start = cfs_time_current();
+
+ record_start_io(osd, READ, npages, 0);
+
for (i = 0; i < npages; i++) {
buf.lb_buf = kmap(lnb[i].lnb_page);
buf.lb_len = lnb[i].lnb_len;
lnb[i].lnb_rc = osd_read(env, dt, &buf, &offset, NULL);
kunmap(lnb[i].lnb_page);
+ size += lnb[i].lnb_rc;
+
if (lnb[i].lnb_rc < buf.lb_len) {
/* all subsequent rc should be 0 */
while (++i < npages)
}
}
+ record_end_io(osd, READ, cfs_time_current() - start, size);
+
return 0;
}
#ifdef LPROCFS
+#define pct(a, b) (b ? a * 100 / b : 0)
+
+static void display_brw_stats(struct seq_file *seq, char *name, char *units,
+ struct obd_histogram *read,
+ struct obd_histogram *write, int scale)
+{
+ unsigned long read_tot, write_tot, r, w, read_cum = 0, write_cum = 0;
+ int i;
+
+ seq_printf(seq, "\n%26s read | write\n", " ");
+ seq_printf(seq, "%-22s %-5s %% cum %% | %-11s %% cum %%\n",
+ name, units, units);
+
+ read_tot = lprocfs_oh_sum(read);
+ write_tot = lprocfs_oh_sum(write);
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ r = read->oh_buckets[i];
+ w = write->oh_buckets[i];
+ read_cum += r;
+ write_cum += w;
+ if (read_cum == 0 && write_cum == 0)
+ continue;
+
+ if (!scale)
+ seq_printf(seq, "%u", i);
+ else if (i < 10)
+ seq_printf(seq, "%u", scale << i);
+ else if (i < 20)
+ seq_printf(seq, "%uK", scale << (i-10));
+ else
+ seq_printf(seq, "%uM", scale << (i-20));
+
+ seq_printf(seq, ":\t\t%10lu %3lu %3lu | %4lu %3lu %3lu\n",
+ r, pct(r, read_tot), pct(read_cum, read_tot),
+ w, pct(w, write_tot), pct(write_cum, write_tot));
+
+ if (read_cum == read_tot && write_cum == write_tot)
+ break;
+ }
+}
+
+static void brw_stats_show(struct seq_file *seq, struct brw_stats *brw_stats)
+{
+ struct timeval now;
+
+ /* this sampling races with updates */
+ do_gettimeofday(&now);
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+
+ display_brw_stats(seq, "pages per bulk r/w", "rpcs",
+ &brw_stats->hist[BRW_R_PAGES],
+ &brw_stats->hist[BRW_W_PAGES], 1);
+ display_brw_stats(seq, "discontiguous pages", "rpcs",
+ &brw_stats->hist[BRW_R_DISCONT_PAGES],
+ &brw_stats->hist[BRW_W_DISCONT_PAGES], 0);
+#if 0
+ display_brw_stats(seq, "discontiguous blocks", "rpcs",
+ &brw_stats->hist[BRW_R_DISCONT_BLOCKS],
+ &brw_stats->hist[BRW_W_DISCONT_BLOCKS], 0);
+
+ display_brw_stats(seq, "disk fragmented I/Os", "ios",
+ &brw_stats->hist[BRW_R_DIO_FRAGS],
+ &brw_stats->hist[BRW_W_DIO_FRAGS], 0);
+#endif
+ display_brw_stats(seq, "disk I/Os in flight", "ios",
+ &brw_stats->hist[BRW_R_RPC_HIST],
+ &brw_stats->hist[BRW_W_RPC_HIST], 0);
+
+ display_brw_stats(seq, "I/O time (1/1000s)", "ios",
+ &brw_stats->hist[BRW_R_IO_TIME],
+ &brw_stats->hist[BRW_W_IO_TIME], 1000 / HZ);
+
+ display_brw_stats(seq, "disk I/O size", "ios",
+ &brw_stats->hist[BRW_R_DISK_IOSIZE],
+ &brw_stats->hist[BRW_W_DISK_IOSIZE], 1);
+}
+
+#undef pct
+
+static int osd_brw_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct osd_device *osd = seq->private;
+
+ brw_stats_show(seq, &osd->od_brw_stats);
+
+ return 0;
+}
+
+static ssize_t osd_brw_stats_seq_write(struct file *file, const char *buf,
+ size_t len, loff_t *off)
+{
+ struct seq_file *seq = file->private_data;
+ struct osd_device *osd = seq->private;
+ int i;
+
+ for (i = 0; i < BRW_LAST; i++)
+ lprocfs_oh_clear(&osd->od_brw_stats.hist[i]);
+
+ return len;
+}
+
+LPROC_SEQ_FOPS(osd_brw_stats);
+
static int osd_stats_init(struct osd_device *osd)
{
int result;
LPROCFS_CNTR_AVGMINMAX,
"thandle_closing", "usec");
#endif
+ result = lprocfs_seq_create(osd->od_proc_entry, "brw_stats",
+ 0644, &osd_brw_stats_fops, osd);
} else {
result = -ENOMEM;
}