From: Andreas Dilger Date: Thu, 5 Nov 2015 18:47:06 +0000 (+0000) Subject: Revert "LU-4865 zfs: grow block size by write pattern" X-Git-Tag: 2.7.63~8 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F53%2F17053%2F4;p=fs%2Flustre-release.git Revert "LU-4865 zfs: grow block size by write pattern" This reverts commit 3e4369135127b350dbc26a4a5dc94cfa46e394cf. This has shown problems in testing and may be the cause of LU-7392. Change-Id: I664f7f8c943d8a90f2d2a9845aea2636535d6b1e Reviewed-on: http://review.whamcloud.com/17053 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Nunez Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c index 85e8310..8299128 100644 --- a/lustre/osd-zfs/osd_io.c +++ b/lustre/osd-zfs/osd_io.c @@ -682,51 +682,6 @@ retry: RETURN(rc); } -/** - * Policy to grow ZFS block size by write pattern. - * For sequential write, it grows block size gradually until it reaches the - * maximum blocksize the dataset can support. Otherwise, it will just use - * the maximum block size. - */ -static int osd_grow_blocksize(struct osd_object *obj, struct osd_thandle *oh, - uint64_t start, uint64_t end) -{ - struct osd_device *osd = osd_obj2dev(obj); - dmu_buf_impl_t *db = (dmu_buf_impl_t *)obj->oo_db; - dnode_t *dn; - uint32_t blksz; - int rc = 0; - ENTRY; - - DB_DNODE_ENTER(db); - dn = DB_DNODE(db); - - if (dn->dn_maxblkid > 0) /* can't change block size */ - GOTO(out, rc); - - blksz = dn->dn_datablksz; - if (blksz >= osd->od_max_blksz) - GOTO(out, rc); - - /* now ZFS can support up to 16MB block size, and if the write - * is sequential, it just increases the block size gradually */ - if (start <= blksz) { /* sequential */ - blksz = (uint32_t)min_t(uint64_t, osd->od_max_blksz, end); - if (!is_power_of_2(blksz)) - blksz = size_roundup_power2(blksz); - } else { /* otherwise, use maximum block size */ - blksz = osd->od_max_blksz; - } - - if (blksz > dn->dn_datablksz) - rc = -dmu_object_set_blocksize(osd->od_os, dn->dn_object, - blksz, 0, oh->ot_tx); - EXIT; -out: - DB_DNODE_EXIT(db); - return rc; -} - static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, struct niobuf_local *lnb, int npages, struct thandle *th) @@ -745,14 +700,6 @@ static int osd_write_commit(const struct lu_env *env, struct dt_object *dt, LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); - /* adjust block size. Assume the buffers are sorted. */ - rc = osd_grow_blocksize(obj, oh, lnb[0].lnb_file_offset, - lnb[npages - 1].lnb_file_offset + - lnb[npages - 1].lnb_len); - if (rc < 0) /* ignore the error */ - CDEBUG(D_INODE, "obj "DFID": change block size error rc=%d\n", - PFID(lu_object_fid(&dt->do_lu)), rc); - for (i = 0; i < npages; i++) { CDEBUG(D_INODE, "write %u bytes at %u\n", (unsigned) lnb[i].lnb_len, diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index cc364a5..5c2d1f5 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -1303,11 +1303,16 @@ static dmu_buf_t *osd_mkreg(const struct lu_env *env, struct osd_object *obj, if (rc) return ERR_PTR(rc); + /* + * XXX: This heuristic is non-optimal. It would be better to + * increase the blocksize up to osd->od_max_blksz during the write. + * This is exactly how the ZPL behaves and it ensures that the right + * blocksize is selected based on the file size rather than the + * making broad assumptions based on the osd type. + */ if (!lu_device_is_md(osd2lu_dev(osd))) { - /* uses 4K as default block size because clients write data - * with page size that is 4K at minimum */ rc = -dmu_object_set_blocksize(osd->od_os, db->db_object, - 4096, 0, oh->ot_tx); + osd->od_max_blksz, 0, oh->ot_tx); if (unlikely(rc)) { CERROR("%s: can't change blocksize: %d\n", osd->od_svname, rc);