iobuf->dr_elapsed_valid = 0;
LASSERT(iobuf->dr_dev == d);
LASSERT(iobuf->dr_frags > 0);
- lprocfs_oh_tally(&d->od_brw_stats.hist[BRW_R_DIO_FRAGS+rw],
+ lprocfs_oh_tally(&d->od_brw_stats.bs_hist[BRW_R_DIO_FRAGS + rw],
iobuf->dr_frags);
- lprocfs_oh_tally_log2(&d->od_brw_stats.hist[BRW_R_IO_TIME+rw],
+ lprocfs_oh_tally_log2(&d->od_brw_stats.bs_hist[BRW_R_IO_TIME+rw],
ktime_to_ms(iobuf->dr_elapsed));
}
}
*/
if (unlikely(iobuf == NULL)) {
- CERROR("***** bio->bi_private is NULL! This should never happen. Normally, I would crash here, but instead I will dump the bio contents to the console. Please report this to <https://jira.whamcloud.com/> , along with any interesting messages leading up to this point (like SCSI errors, perhaps). Because bi_private is NULL, I can't wake up the thread that initiated this IO - you will probably have to reboot this node.\n");
+ CERROR("***** bio->bi_private is NULL! Dump the bio contents to the console. Please report this to <https://jira.whamcloud.com/>, and probably have to reboot this node.\n");
CERROR("bi_next: %p, bi_flags: %lx, " __stringify(bi_opf)
": %x, bi_vcnt: %d, bi_idx: %d, bi->size: %d, bi_end_io: %p, bi_cnt: %d, bi_private: %p\n",
bio->bi_next, (unsigned long)bio->bi_flags,
static void record_start_io(struct osd_iobuf *iobuf, int size)
{
- struct osd_device *osd = iobuf->dr_dev;
- struct obd_histogram *h = osd->od_brw_stats.hist;
+ struct osd_device *osd = iobuf->dr_dev;
+ struct obd_histogram *h = osd->od_brw_stats.bs_hist;
iobuf->dr_frags++;
atomic_inc(&iobuf->dr_numreqs);
{
struct blk_integrity *bi = bdev_get_integrity(bdev);
struct bio_integrity_payload *bip = bio->bi_integrity;
- struct niobuf_local *lnb;
+ struct niobuf_local *lnb = NULL;
unsigned short sector_size = blk_integrity_interval(bi);
void *bio_prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;
struct bio_vec *bv;
sector_t sector = bio_start_sector(bio);
- unsigned int sectors, total;
+ unsigned int i, sectors, total;
DECLARE_BVEC_ITER_ALL(iter_all);
__u16 *expected_guard;
int rc;
total = 0;
bio_for_each_segment_all(bv, bio, iter_all) {
- lnb = iobuf->dr_lnbs[index];
+ for (i = index; i < iobuf->dr_npages; i++) {
+ if (iobuf->dr_pages[i] == bv->bv_page) {
+ lnb = iobuf->dr_lnbs[i];
+ break;
+ }
+ }
+ if (!lnb)
+ continue;
expected_guard = lnb->lnb_guards;
sectors = bv->bv_len / sector_size;
if (lnb->lnb_guard_rpc) {
total += sectors * bi->tuple_size;
LASSERT(total <= bip_size(bio->bi_integrity));
index++;
+ lnb = NULL;
}
return 0;
}
for (page_idx = page_idx_start, block_idx = start_blocks;
block_idx < block_idx_end; page_idx++,
block_idx += blocks_left_page) {
+ /* For cases where the filesystems blocksize is not the
+ * same as PAGE_SIZE (e.g. ARM with PAGE_SIZE=64KB and
+ * blocksize=4KB), there will be multiple blocks to
+ * read/write per page. Also, the start and end block may
+ * not be aligned to the start and end of the page, so the
+ * first page may skip some blocks at the start ("i != 0",
+ * "blocks_left_page" is reduced), and the last page may
+ * skip some blocks at the end (limited by "count").
+ */
page = pages[page_idx];
LASSERT(page_idx < iobuf->dr_npages);
i = block_idx % blocks_per_page;
blocks_left_page = blocks_per_page - i;
- for (page_offset = i * blocksize; i < blocks_left_page;
+ if (block_idx + blocks_left_page > block_idx_end)
+ blocks_left_page = block_idx_end - block_idx;
+ page_offset = i * blocksize;
+ for (i = 0; i < blocks_left_page;
i += nblocks, page_offset += blocksize * nblocks) {
nblocks = 1;
/* ignore quota for the whole request if any page is from
* client cache or written by root.
*
- * XXX once we drop the 1.8 client support, the checking
- * for whether page is from cache can be simplified as:
- * !(lnb[i].flags & OBD_BRW_SYNC)
- *
* XXX we could handle this on per-lnb basis as done by
* grant.
*/
if ((lnb[i].lnb_flags & OBD_BRW_NOQUOTA) ||
- (lnb[i].lnb_flags & (OBD_BRW_FROM_GRANT | OBD_BRW_SYNC)) ==
- OBD_BRW_FROM_GRANT)
+ (lnb[i].lnb_flags & OBD_BRW_SYS_RESOURCE) ||
+ !(lnb[i].lnb_flags & OBD_BRW_SYNC))
declare_flags |= OSD_QID_FORCE;
/*
* split more than once, but this is really rare.
*/
if (LDISKFS_I(inode)->i_flags & LDISKFS_EXTENTS_FL) {
+ /*
+ * many concurrent threads may grow tree by the time
+ * our transaction starts. so, consider 2 is a min depth.
+ */
depth = ext_depth(inode);
+ depth = min(max(depth, 1) + 1, LDISKFS_MAX_EXTENT_DEPTH);
if (extents <= 1) {
credits += depth * 2 * extents;
new_meta = depth;
new_meta = DIV_ROUND_UP(new_blocks,
LDISKFS_ADDR_PER_BLOCK(inode->i_sb)) + 4;
credits += new_meta;
- depth = 3;
}
dirty_groups += (extents + new_meta);
credits += dirty_groups;
/* we can't dirty more gd blocks than exist */
- if (extents > LDISKFS_SB(osd_sb(osd))->s_gdb_count)
+ if (dirty_groups > LDISKFS_SB(osd_sb(osd))->s_gdb_count)
credits += LDISKFS_SB(osd_sb(osd))->s_gdb_count;
else
credits += dirty_groups;
* level.
*/
depth = inode != NULL ? ext_depth(inode) : 0;
- depth = max(depth, 1) + 1;
+ depth = min(max(depth, 1) + 3, LDISKFS_MAX_EXTENT_DEPTH);
credits = depth;
/* if not append, then split may need to modify
* existing blocks moving entries into the new ones
blen = (ALIGN(end, 1 << inode->i_blkbits) >> inode->i_blkbits) - boff;
/* Create and mark new extents as either zero or unwritten */
- flags = osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ?
+ flags = (osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ||
+ !ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)) ?
LDISKFS_GET_BLOCKS_CREATE_ZERO :
LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT;
#ifndef HAVE_LDISKFS_GET_BLOCKS_KEEP_SIZE
#endif
inode_lock(inode);
- /*
- * We only support preallocation for extent-based file only.
- */
- if (!(ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)))
- GOTO(out, rc = -EOPNOTSUPP);
-
if (!(mode & FALLOC_FL_KEEP_SIZE) && (end > i_size_read(inode) ||
end > LDISKFS_I(inode)->i_disksize)) {
new_size = end;
}
}
+/* For a partial-page punch, flush punch range to disk immediately */
+static void osd_partial_page_flush_punch(struct osd_device *d,
+ struct inode *inode, loff_t start,
+ loff_t end)
+{
+ if (osd_use_page_cache(d)) {
+ filemap_fdatawrite_range(inode->i_mapping, start, end);
+ } else {
+ /* Notice we use "wait" version to ensure I/O is complete */
+ filemap_write_and_wait_range(inode->i_mapping, start,
+ end);
+ invalidate_mapping_pages(inode->i_mapping, start >> PAGE_SHIFT,
+ end >> PAGE_SHIFT);
+ }
+}
+
/*
* For a partial-page truncate, flush the page to disk immediately to
* avoid data corruption during direct disk write. b=17397
struct file *file = osd_quasi_file(env, inode);
file->f_op->fallocate(file, mode, start, end - start);
- osd_partial_page_flush(d, inode, start);
- osd_partial_page_flush(d, inode, end - 1);
+ osd_partial_page_flush_punch(d, inode, start, end - 1);
}
void osd_process_truncates(const struct lu_env *env, struct list_head *list)