*
*/
+#define DEBUG_SUBSYSTEM S_OSD
+
/* prerequisite for linux/xattr.h */
#include <linux/types.h>
/* prerequisite for linux/xattr.h */
}
static int osd_declare_fallocate(const struct lu_env *env,
- struct dt_object *dt, struct thandle *th)
+ struct dt_object *dt, __u64 start, __u64 end,
+ int mode, struct thandle *th)
{
- struct osd_thandle *oh;
- struct inode *inode;
+ struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super);
+ struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
+ struct inode *inode = osd_dt_obj(dt)->oo_inode;
+ long long quota_space = 0;
+ /* 5 is max tree depth. (inode + 4 index blocks) */
+ int depth = 5;
int rc;
+
ENTRY;
- LASSERT(th);
- oh = container_of(th, struct osd_thandle, ot_super);
+ /*
+ * Only mode == 0 (which is standard prealloc) is supported now.
+ * Rest of mode options is not supported yet.
+ */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ RETURN(-EOPNOTSUPP);
- osd_trans_declare_op(env, oh, OSD_OT_PREALLOC,
- osd_dto_credits_noquota[DTO_WRITE_BLOCK]);
- inode = osd_dt_obj(dt)->oo_inode;
+ LASSERT(th);
LASSERT(inode);
+ /* quota space for metadata blocks
+ * approximate metadata estimate should be good enough.
+ */
+ quota_space += PAGE_SIZE;
+ quota_space += depth * LDISKFS_BLOCK_SIZE(osd_sb(osd));
+
+ /* quota space should be reported in 1K blocks */
+ quota_space = toqb(quota_space) + toqb(end - start) +
+ LDISKFS_META_TRANS_BLOCKS(inode->i_sb);
+
+ /* We don't need to reserve credits for whole fallocate here.
+ * We reserve space only for metadata. Fallocate credits are
+ * extended as required
+ */
rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode),
- i_projid_read(inode), 0, oh, osd_dt_obj(dt),
- NULL, OSD_QID_BLK);
+ i_projid_read(inode), quota_space, oh,
+ osd_dt_obj(dt), NULL, OSD_QID_BLK);
RETURN(rc);
}
+/* Borrow @ext4_chunk_trans_blocks */
+static int osd_chunk_trans_blocks(struct inode *inode, int nrblocks)
+{
+ ldiskfs_group_t groups;
+ int gdpblocks;
+ int idxblocks;
+ int depth;
+ int ret;
+
+ depth = ext_depth(inode);
+ idxblocks = depth * 2;
+
+ /*
+ * Now let's see how many group bitmaps and group descriptors need
+ * to account.
+ */
+ groups = idxblocks + 1;
+ gdpblocks = groups;
+ if (groups > LDISKFS_SB(inode->i_sb)->s_groups_count)
+ groups = LDISKFS_SB(inode->i_sb)->s_groups_count;
+ if (gdpblocks > LDISKFS_SB(inode->i_sb)->s_gdb_count)
+ gdpblocks = LDISKFS_SB(inode->i_sb)->s_gdb_count;
+
+ /* bitmaps and block group descriptor blocks */
+ ret = idxblocks + groups + gdpblocks;
+
+ /* Blocks for super block, inode, quota and xattr blocks */
+ ret += LDISKFS_META_TRANS_BLOCKS(inode->i_sb);
+
+ return ret;
+}
+
+static int osd_extend_restart_trans(handle_t *handle, int needed)
+{
+ int rc;
+
+ if (ldiskfs_handle_has_enough_credits(handle, needed))
+ return 0;
+
+ rc = ldiskfs_journal_extend(handle, needed - handle->h_buffer_credits);
+ if (rc <= 0)
+ return rc;
+
+ rc = ldiskfs_journal_restart(handle, needed);
+
+ return rc;
+}
+
static int osd_fallocate(const struct lu_env *env, struct dt_object *dt,
__u64 start, __u64 end, int mode, struct thandle *th)
{
+ struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super);
+ handle_t *handle = ldiskfs_journal_current_handle();
+ unsigned int save_credits = oh->ot_credits;
struct osd_object *obj = osd_dt_obj(dt);
struct inode *inode = obj->oo_inode;
- struct file *file;
+ struct ldiskfs_map_blocks map;
+ unsigned int credits;
+ ldiskfs_lblk_t blen;
+ ldiskfs_lblk_t boff;
+ loff_t new_size = 0;
+ int depth = 0;
+ int flags;
int rc = 0;
ENTRY;
+
/*
* Only mode == 0 (which is standard prealloc) is supported now.
* Rest of mode options is not supported yet.
LASSERT(dt_object_exists(dt));
LASSERT(osd_invariant(obj));
LASSERT(inode != NULL);
+
+ CDEBUG(D_INODE, "fallocate: inode #%lu: start %llu end %llu mode %d\n",
+ inode->i_ino, start, end, mode);
+
dquot_initialize(inode);
LASSERT(th);
- osd_trans_exec_op(env, th, OSD_OT_PREALLOC);
+ boff = start >> inode->i_blkbits;
+ blen = (ALIGN(end, 1 << inode->i_blkbits) >> inode->i_blkbits) - boff;
+
+ flags = LDISKFS_GET_BLOCKS_CREATE;
+ if (mode & FALLOC_FL_KEEP_SIZE)
+ flags |= LDISKFS_GET_BLOCKS_KEEP_SIZE;
+
+ inode_lock(inode);
/*
- * Because f_op->fallocate() does not have an inode arg
+ * We only support preallocation for extent-based file only.
*/
- file = osd_quasi_file(env, inode);
- rc = file->f_op->fallocate(file, mode, start, end - start);
+ if (!(ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)))
+ GOTO(out, rc = -EOPNOTSUPP);
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && (end > i_size_read(inode) ||
+ end > LDISKFS_I(inode)->i_disksize)) {
+ new_size = end;
+ rc = inode_newsize_ok(inode, new_size);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ inode_dio_wait(inode);
+
+ map.m_lblk = boff;
+ map.m_len = blen;
+
+ /*
+ * Don't normalize the request if it can fit in one extent so
+ * that it doesn't get unnecessarily split into multiple
+ * extents.
+ */
+ if (blen <= EXT_UNWRITTEN_MAX_LEN)
+ flags |= LDISKFS_GET_BLOCKS_NO_NORMALIZE;
+
+ /*
+ * credits to insert 1 extent into extent tree.
+ */
+ credits = osd_chunk_trans_blocks(inode, blen);
+ depth = ext_depth(inode);
+
+ while (rc >= 0 && blen) {
+ loff_t epos;
+
+ /*
+ * Recalculate credits when extent tree depth changes.
+ */
+ if (depth != ext_depth(inode)) {
+ credits = osd_chunk_trans_blocks(inode, blen);
+ depth = ext_depth(inode);
+ }
+
+ /* TODO: quota check */
+ rc = osd_extend_restart_trans(handle, credits);
+ if (rc)
+ break;
+
+ rc = ldiskfs_map_blocks(handle, inode, &map, flags);
+ if (rc <= 0) {
+ CDEBUG(D_INODE,
+ "inode #%lu: block %u: len %u: ldiskfs_map_blocks returned %d\n",
+ inode->i_ino, map.m_lblk, map.m_len, rc);
+ ldiskfs_mark_inode_dirty(handle, inode);
+ break;
+ }
+
+ map.m_lblk += rc;
+ map.m_len = blen = blen - rc;
+ epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ inode->i_ctime = current_time(inode);
+ if (new_size) {
+ if (epos > end)
+ epos = end;
+ if (ldiskfs_update_inode_size(inode, epos) & 0x1)
+ inode->i_mtime = inode->i_ctime;
+ } else {
+ if (epos > inode->i_size)
+ ldiskfs_set_inode_flag(inode,
+ LDISKFS_INODE_EOFBLOCKS);
+ }
+
+ ldiskfs_mark_inode_dirty(handle, inode);
+ }
+
+out:
+ inode_unlock(inode);
+
+ /* extand credits if needed for operations such as attribute set */
+ if (rc >= 0)
+ rc = osd_extend_restart_trans(handle, save_credits);
RETURN(rc);
}
"user write success, but expect EDQUOT"
}
+check_write_fallocate() {
+ local testfile="$1"
+ local qtype="$2"
+ local limit=$3
+ local short_qtype=${qtype:0:1}
+
+ count=$((limit/2))
+ log "Write ${count}MiB Using Fallocate"
+ $RUNAS fallocate -l${count}MiB $testfile ||
+ quota_error $short_qtype $TSTUSR "Write ${count}MiB fail"
+
+ cancel_lru_locks osc
+ sync; sync_all_data || true
+ sleep 2
+
+ count=$((limit + 1))
+ log "Write ${count}MiB Using Fallocate"
+ $RUNAS fallocate -l${count}MiB $testfile &&
+ quota_error $short_qtype $TSTUSR \
+ "Write success, expect EDQUOT" || true
+}
+
# test block hardlimit
test_1a() {
local limit=10 # 10M
}
run_test 1g "Quota pools: Block hard limit with wide striping"
+test_1h() {
+ local limit=10 # 10M
+ local testfile="$DIR/$tdir/$tfile-0"
+
+ [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ [ $OST1_VERSION -lt $(version_code 2.13.50) ] &&
+ skip "Need OST version at least 2.13.53"
+
+ setup_quota_test || error "setup quota failed with $?"
+ trap cleanup_quota_test EXIT
+
+ # enable ost quota
+ set_ost_qtype $QTYPE || error "enable ost quota failed"
+
+ # test for user
+ log "User quota (block hardlimit:$limit MB)"
+ $LFS setquota -u $TSTUSR -b 0 -B ${limit}M -i 0 -I 0 $DIR ||
+ error "set user quota failed"
+
+ # make sure the system is clean
+ local used=$(getquota -u $TSTUSR global curspace)
+ [ $used -ne 0 ] && error "Used space($used) for user $TSTUSR isn't 0."
+
+ $LFS setstripe $testfile -c 1 || error "setstripe $testfile failed"
+ chown $TSTUSR.$TSTUSR $testfile || error "chown $testfile failed"
+
+ check_write_fallocate $testfile "user" $limit
+
+ rm -f $testfile
+ wait_delete_completed || error "wait_delete_completed failed"
+ sync_all_data || true
+ used=$(getquota -u $TSTUSR global curspace)
+ [ $used -ne 0 ] && quota_error u $TSTUSR \
+ "user quota isn't released after deletion"
+ resetquota -u $TSTUSR
+}
+run_test 1h "Block hard limit test using fallocate"
+
# test inode hardlimit
test_2() {
local TESTFILE="$DIR/$tdir/$tfile-0"
}
run_test 150d "Verify fallocate Size and Blocks - Non zero start"
+test_150e() {
+ [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ [ $OST1_VERSION -ge $(version_code 2.13.55) ] ||
+ skip "Need OST version at least 2.13.55"
+
+ echo "df before:"
+ $LFS df
+ $LFS setstripe -c${OSTCOUNT} $DIR/$tfile ||
+ error "$LFS setstripe -c${OSTCOUNT} $DIR/$tfile failed"
+
+ # Find OST with Minimum Size
+ min_size_ost=$($LFS df | awk "/$FSNAME-OST/ { print \$4 }" |
+ sort -un | head -1)
+
+ # Get 90% of the available space
+ local space=$(((min_size_ost * 90)/100 * OSTCOUNT))
+
+ fallocate -l${space}k $DIR/$tfile ||
+ error "fallocate ${space}k $DIR/$tfile failed"
+ echo "'fallocate -l ${space}k $DIR/$tfile' succeeded"
+
+ # get size immediately after fallocate. This should be correctly
+ # updated
+ local size=$(stat -c '%s' $DIR/$tfile)
+ local used=$(( $(stat -c '%b * %B' $DIR/$tfile) / 1024))
+
+ # Sleep for a while for statfs to get updated. And not pull from cache.
+ sleep 2
+
+ echo "df after fallocate:"
+ $LFS df
+
+ (( size / 1024 == space )) || error "size $size != requested $space"
+ [ "$ost1_FSTYPE" != ldiskfs ] || (( used >= space )) ||
+ error "used $used < space $space"
+
+ rm $DIR/$tfile || error "rm failed"
+ sync
+ wait_delete_completed
+
+ echo "df after unlink:"
+ $LFS df
+}
+run_test 150e "Verify 90% of available OST space consumed by fallocate"
+
#LU-2902 roc_hit was not able to read all values from lproc
function roc_hit_init() {
local list=$(comma_list $(osts_nodes))