Fallocation might introduce unwritten extents, writting
data will trigger extents split, so we should reserve
credits for this case, to avoid complicated calculation,
we just use normal credits calculation if extent is mapped
as unwritten.
See comments in ext4:
If we add a single extent, then in the worse case, each tree
level index/leaf need to be changed in case of the tree split.
If more extents are inserted, they could cause the whole tree
split more than once, but this is really rare.
Lustre always reserve extents in 1 extent case, this is wrong.
Also fix indirect blocks calculation.
Signed-off-by: Wang Shilong <wshilong@ddn.com>
Change-Id: I9b67ec7b002711f040f46d0c77a645bb6f57a7de
Reviewed-on: https://review.whamcloud.com/43994
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
/* ext_depth() */
#include <ldiskfs/ldiskfs_extents.h>
/* ext_depth() */
#include <ldiskfs/ldiskfs_extents.h>
+#include <ldiskfs/ldiskfs.h>
static inline bool osd_use_page_cache(struct osd_device *d)
{
static inline bool osd_use_page_cache(struct osd_device *d)
{
struct osd_fextent {
sector_t start;
sector_t end;
struct osd_fextent {
sector_t start;
sector_t end;
unsigned int mapped:1;
};
unsigned int mapped:1;
};
return 0;
start = fe.fe_logical >> inode->i_blkbits;
return 0;
start = fe.fe_logical >> inode->i_blkbits;
+ cached_extent->flags = fe.fe_flags;
if (fei.fi_extents_mapped == 0) {
/* a special case - no extent found at this offset and forward.
* we can consider this as a hole to EOF. it's safe to cache
if (fei.fi_extents_mapped == 0) {
/* a special case - no extent found at this offset and forward.
* we can consider this as a hole to EOF. it's safe to cache
const struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
struct inode *inode = osd_dt_obj(dt)->oo_inode;
struct osd_thandle *oh;
const struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
struct inode *inode = osd_dt_obj(dt)->oo_inode;
struct osd_thandle *oh;
- int extents = 0;
- int depth;
+ int extents = 0, new_meta = 0;
+ int depth, new_blocks = 0;
int i;
int dirty_groups = 0;
int rc = 0;
int i;
int dirty_groups = 0;
int rc = 0;
OBD_BRW_FROM_GRANT)
declare_flags |= OSD_QID_FORCE;
OBD_BRW_FROM_GRANT)
declare_flags |= OSD_QID_FORCE;
- if (osd_is_mapped(dt, lnb[i].lnb_file_offset, &mapped)) {
+ /*
+ * Convert unwritten extent might need split extents, could
+ * not skip it.
+ */
+ if (osd_is_mapped(dt, lnb[i].lnb_file_offset, &mapped) &&
+ !(mapped.flags & FIEMAP_EXTENT_UNWRITTEN)) {
lnb[i].lnb_flags |= OBD_BRW_MAPPED;
continue;
}
lnb[i].lnb_flags |= OBD_BRW_MAPPED;
continue;
}
}
/* count only unmapped changes */
}
/* count only unmapped changes */
if (lnb[i].lnb_file_offset != extent.end || extent.end == 0) {
if (extent.end != 0)
extents += (extent.end - extent.start +
if (lnb[i].lnb_file_offset != extent.end || extent.end == 0) {
if (extent.end != 0)
extents += (extent.end - extent.start +
if (extents > MAX_EXTENTS_PER_WRITE)
extents = MAX_EXTENTS_PER_WRITE;
if (extents > MAX_EXTENTS_PER_WRITE)
extents = MAX_EXTENTS_PER_WRITE;
- dirty_groups = extents;
- /*
- * each extent can go into new leaf causing a split
- * 5 is max tree depth: inode + 4 index blocks
- * with blockmaps, depth is 3 at most
+ /**
+ * If we add a single extent, then in the worse case, each tree
+ * level index/leaf need to be changed in case of the tree split.
+ * If more extents are inserted, they could cause the whole tree
+ * split more than once, but this is really rare.
*/
if (LDISKFS_I(inode)->i_flags & LDISKFS_EXTENTS_FL) {
*/
if (LDISKFS_I(inode)->i_flags & LDISKFS_EXTENTS_FL) {
- /*
- * many concurrent threads may grow tree by the time
- * our transaction starts. so, consider 2 is a min depth
- */
depth = ext_depth(inode);
depth = ext_depth(inode);
- depth = max(depth, 1) + 1;
- dirty_groups += depth;
- credits += depth * 2 * extents;
+ if (extents <= 1) {
+ credits += depth * 2 * extents;
+ new_meta = depth;
+ } else {
+ credits += depth * 3 * extents;
+ new_meta = depth * 2 * extents;
+ }
+ /*
+ * With N contiguous data blocks, we need at most
+ * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
+ * 2 dindirect blocks, and 1 tindirect block
+ */
+ new_meta = DIV_ROUND_UP(new_blocks,
+ LDISKFS_ADDR_PER_BLOCK(inode->i_sb)) + 4;
+ credits += new_meta;
- dirty_groups += depth;
- credits += depth * extents;
+ dirty_groups += (extents + new_meta);
oh->oh_declared_ext = extents;
/* quota space for metadata blocks */
oh->oh_declared_ext = extents;
/* quota space for metadata blocks */
- quota_space += depth * extents * LDISKFS_BLOCK_SIZE(osd_sb(osd));
+ quota_space += new_meta * LDISKFS_BLOCK_SIZE(osd_sb(osd));
/* quota space should be reported in 1K blocks */
quota_space = toqb(quota_space);
/* quota space should be reported in 1K blocks */
quota_space = toqb(quota_space);