int osc_disconnect(struct obd_export *exp);
int osc_punch_send(struct obd_export *exp, struct obdo *oa,
obd_enqueue_update_f upcall, void *cookie);
+int osc_fallocate_base(struct obd_export *exp, struct obdo *oa,
+ obd_enqueue_update_f upcall, void *cookie, int mode);
/* osc_io.c */
int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
const struct cl_io_slice *slice);
int osc_io_lru_reserve(const struct lu_env *env, const struct cl_io_slice *ios,
loff_t pos, size_t count);
+int osc_punch_start(const struct lu_env *env, struct cl_io *io,
+ struct cl_object *obj);
/* osc_lock.c */
void osc_lock_to_lockless(const struct lu_env *env, struct osc_lock *ols,
#include <obd_class.h>
#include <lustre_osc.h>
+#include <linux/falloc.h>
#include <uapi/linux/lustre/lustre_param.h>
#include "mdc_internal.h"
&oio->oi_trunc);
if (rc < 0)
return rc;
+ } else if (cl_io_is_fallocate(io) &&
+ io->u.ci_setattr.sa_falloc_mode & FALLOC_FL_PUNCH_HOLE) {
+ rc = osc_punch_start(env, io, obj);
+ if (rc < 0)
+ return rc;
}
- if (cl_io_is_fallocate(io))
- return -EOPNOTSUPP;
-
if (oio->oi_lockless == 0) {
cl_object_attr_lock(obj);
rc = cl_object_attr_get(env, obj, attr);
return rc;
}
- if (!(ia_avalid & ATTR_SIZE))
+ if (!(ia_avalid & ATTR_SIZE) && !cl_io_is_fallocate(io))
return 0;
memset(oa, 0, sizeof(*oa));
oa->o_mtime = attr->cat_mtime;
oa->o_atime = attr->cat_atime;
oa->o_ctime = attr->cat_ctime;
-
- oa->o_size = size;
- oa->o_blocks = OBD_OBJECT_EOF;
oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLATIME |
OBD_MD_FLCTIME | OBD_MD_FLMTIME | OBD_MD_FLSIZE |
OBD_MD_FLBLOCKS;
+
if (oio->oi_lockless) {
oa->o_flags = OBD_FL_SRVLOCK;
oa->o_valid |= OBD_MD_FLFLAGS;
}
init_completion(&cbargs->opc_sync);
+ if (cl_io_is_fallocate(io)) {
+ int falloc_mode = io->u.ci_setattr.sa_falloc_mode;
- rc = osc_punch_send(osc_export(cl2osc(obj)), oa,
- mdc_async_upcall, cbargs);
+ oa->o_size = io->u.ci_setattr.sa_falloc_offset;
+ oa->o_blocks = io->u.ci_setattr.sa_falloc_end;
+ rc = osc_fallocate_base(osc_export(cl2osc(obj)), oa,
+ mdc_async_upcall, cbargs, falloc_mode);
+ } else {
+ oa->o_size = size;
+ oa->o_blocks = OBD_OBJECT_EOF;
+ rc = osc_punch_send(osc_export(cl2osc(obj)), oa,
+ mdc_async_upcall, cbargs);
+ }
cbargs->opc_rpc_sent = rc == 0;
return rc;
}
OST_PUNCH, mdt_punch_hdl,
mdt_hp_punch),
TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SYNC, mdt_data_sync),
+TGT_OST_HDL(HAS_BODY | HAS_REPLY | IS_MUTABLE, OST_FALLOCATE,
+ mdt_fallocate_hdl),
TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SEEK, tgt_lseek),
};
LPROC_MDT_IO_WRITE,
LPROC_MDT_IO_PUNCH,
LPROC_MDT_MIGRATE,
+ LPROC_MDT_FALLOCATE,
LPROC_MDT_LAST,
};
struct niobuf_remote *rnb, int npages,
struct niobuf_local *lnb, int old_rc);
int mdt_punch_hdl(struct tgt_session_info *tsi);
+int mdt_fallocate_hdl(struct tgt_session_info *tsi);
int mdt_glimpse_enqueue(struct mdt_thread_info *mti, struct ldlm_namespace *ns,
struct ldlm_lock **lockp, __u64 flags);
int mdt_brw_enqueue(struct mdt_thread_info *info, struct ldlm_namespace *ns,
int mdt_dom_disk_lvbo_update(const struct lu_env *env, struct mdt_object *mo,
struct ldlm_resource *res, bool increase_only);
void mdt_dom_obj_lvb_update(const struct lu_env *env, struct mdt_object *mo,
- bool increase_only);
+ struct obdo *oa, bool increase_only);
int mdt_dom_lvb_alloc(struct ldlm_resource *res);
static inline bool mdt_dom_check_for_discard(struct mdt_thread_info *mti,
#define DEBUG_SUBSYSTEM S_FILTER
#include <dt_object.h>
+#include <linux/falloc.h>
+
#include "mdt_internal.h"
/* functions below are stubs for now, they will be implemented with
}
void mdt_dom_obj_lvb_update(const struct lu_env *env, struct mdt_object *mo,
- bool increase_only)
+ struct obdo *oa, bool increase_only)
{
struct mdt_device *mdt = mdt_dev(mo->mot_obj.lo_dev);
struct ldlm_res_id resid;
return;
/* Update lvbo data if exists. */
- if (mdt_dom_lvb_is_valid(res))
+ if (mdt_dom_lvb_is_valid(res)) {
mdt_dom_disk_lvbo_update(env, mo, res, increase_only);
+ if (oa) {
+ struct ost_lvb *res_lvb = res->lr_lvb_data;
+
+ lock_res(res);
+ oa->o_valid |= OBD_MD_FLBLOCKS | OBD_MD_FLSIZE |
+ OBD_MD_FLMTIME | OBD_MD_FLATIME |
+ OBD_MD_FLCTIME;
+ oa->o_blocks = res_lvb->lvb_blocks;
+ oa->o_size = res_lvb->lvb_size;
+ oa->o_atime = res_lvb->lvb_atime;
+ oa->o_mtime = res_lvb->lvb_mtime;
+ oa->o_ctime = res_lvb->lvb_ctime;
+ unlock_res(res);
+ }
+ }
ldlm_resource_putref(res);
}
else
obdo_from_la(oa, la, LA_GID | LA_UID);
- mdt_dom_obj_lvb_update(env, mo, false);
+ mdt_dom_obj_lvb_update(env, mo, NULL, false);
/* don't report overquota flag if we failed before reaching
* commit */
if (old_rc == 0 && (rc == 0 || rc == -EDQUOT)) {
* atime and we should update the lvb so that other glimpses
* will also get the updated value. bug 5972 */
if (oa)
- mdt_dom_obj_lvb_update(env, mo, true);
+ mdt_dom_obj_lvb_update(env, mo, NULL, true);
rc = mdt_commitrw_read(env, mdt, mo, objcount, npages, lnb);
if (old_rc)
rc = old_rc;
RETURN(rc);
}
+int mdt_object_fallocate(const struct lu_env *env, struct dt_device *dt,
+ struct dt_object *dob, __u64 start, __u64 end,
+ int mode, struct lu_attr *la)
+{
+ struct thandle *th;
+ int rc;
+
+ ENTRY;
+
+ if (!dt_object_exists(dob))
+ RETURN(-ENOENT);
+
+ th = dt_trans_create(env, dt);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ rc = dt_declare_attr_set(env, dob, la, th);
+ if (rc)
+ GOTO(stop, rc);
+
+ rc = dt_declare_fallocate(env, dob, start, end, mode, th);
+ if (rc)
+ GOTO(stop, rc);
+
+ tgt_vbr_obj_set(env, dob);
+ rc = dt_trans_start(env, dt, th);
+ if (rc)
+ GOTO(stop, rc);
+
+ dt_write_lock(env, dob, 0);
+ rc = dt_falloc(env, dob, start, end, mode, th);
+ if (rc)
+ GOTO(unlock, rc);
+ rc = dt_attr_set(env, dob, la, th);
+ if (rc)
+ GOTO(unlock, rc);
+unlock:
+ dt_write_unlock(env, dob);
+stop:
+ th->th_result = rc;
+ dt_trans_stop(env, dt, th);
+ RETURN(rc);
+}
+
+/**
+ * MDT request handler for OST_FALLOCATE RPC.
+ *
+ * This is part of request processing. Validate request fields,
+ * preallocate the given MDT object and pack reply.
+ *
+ * \param[in] tsi target session environment for this request
+ *
+ * \retval 0 if successful
+ * \retval negative value on error
+ */
+int mdt_fallocate_hdl(struct tgt_session_info *tsi)
+{
+ struct obdo *oa = &tsi->tsi_ost_body->oa;
+ struct ptlrpc_request *req = tgt_ses_req(tsi);
+ struct ost_body *repbody;
+ struct mdt_thread_info *info;
+ struct ldlm_namespace *ns = tsi->tsi_tgt->lut_obd->obd_namespace;
+ struct obd_export *exp = tsi->tsi_exp;
+ struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev);
+ struct mdt_object *mo;
+ struct dt_object *dob;
+ struct lu_attr *la;
+ __u64 flags = 0;
+ struct lustre_handle lh = { 0, };
+ int rc, mode;
+ __u64 start, end;
+ bool srvlock;
+ ktime_t kstart = ktime_get();
+
+ repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
+ if (repbody == NULL)
+ RETURN(err_serious(-ENOMEM));
+
+ /*
+ * fallocate start and end are passed in o_size, o_blocks
+ * on the wire.
+ */
+ if ((oa->o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
+ (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
+ RETURN(err_serious(-EPROTO));
+
+ start = oa->o_size;
+ end = oa->o_blocks;
+ mode = oa->o_falloc_mode;
+
+ CDEBUG(D_INODE,
+ "fallocate: "DFID", mode = %#x, start = %lld, end = %lld\n",
+ PFID(&tsi->tsi_fid), mode, start, end);
+
+ /*
+ * mode == 0 (which is standard prealloc) and PUNCH is supported
+ * Rest of mode options are not supported yet.
+ */
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ RETURN(-EOPNOTSUPP);
+
+ if (mode & FALLOC_FL_PUNCH_HOLE && !(mode & FALLOC_FL_KEEP_SIZE)) {
+ CWARN("%s: PUNCH mode misses KEEP_SIZE flag, setting it\n",
+ tsi->tsi_tgt->lut_obd->obd_name);
+ mode |= FALLOC_FL_KEEP_SIZE;
+ }
+
+ info = tsi2mdt_info(tsi);
+ la = &info->mti_attr.ma_attr;
+
+ repbody->oa.o_oi = oa->o_oi;
+ repbody->oa.o_valid = OBD_MD_FLID;
+
+ srvlock = oa->o_valid & OBD_MD_FLFLAGS &&
+ oa->o_flags & OBD_FL_SRVLOCK;
+
+ if (srvlock) {
+ rc = tgt_mdt_data_lock(ns, &tsi->tsi_resid, &lh, LCK_PW,
+ &flags);
+ if (rc != 0)
+ GOTO(out, rc);
+ }
+
+ mo = mdt_object_find(tsi->tsi_env, mdt, &tsi->tsi_fid);
+ if (IS_ERR(mo))
+ GOTO(out_unlock, rc = PTR_ERR(mo));
+
+ if (!mdt_object_exists(mo))
+ GOTO(out_put, rc = -ENOENT);
+
+ /* Shouldn't happen on dirs */
+ if (S_ISDIR(lu_object_attr(&mo->mot_obj))) {
+ rc = -EPERM;
+ CERROR("%s: fallocate on dir "DFID": rc = %d\n",
+ exp->exp_obd->obd_name, PFID(&tsi->tsi_fid), rc);
+ GOTO(out_put, rc);
+ }
+
+ la_from_obdo(la, oa, OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME);
+
+ mdt_dom_write_lock(mo);
+ dob = mdt_obj2dt(mo);
+
+ if (la->la_valid & (LA_ATIME | LA_MTIME | LA_CTIME))
+ tgt_fmd_update(tsi->tsi_exp, &tsi->tsi_fid,
+ tgt_ses_req(tsi)->rq_xid);
+
+ rc = mdt_object_fallocate(tsi->tsi_env, mdt->mdt_bottom, dob, start,
+ end, mode, la);
+ mdt_dom_write_unlock(mo);
+ if (rc)
+ GOTO(out_put, rc);
+
+ mdt_dom_obj_lvb_update(tsi->tsi_env, mo, &repbody->oa, false);
+
+ mdt_counter_incr(req, LPROC_MDT_FALLOCATE,
+ ktime_us_delta(ktime_get(), kstart));
+
+ EXIT;
+out_put:
+ lu_object_put(tsi->tsi_env, &mo->mot_obj);
+out_unlock:
+ if (srvlock)
+ tgt_data_unlock(&lh, LCK_PW);
+out:
+ mdt_thread_info_fini(info);
+ return rc;
+}
+
int mdt_object_punch(const struct lu_env *env, struct dt_device *dt,
struct dt_object *dob, __u64 start, __u64 end,
struct lu_attr *la)
if (rc)
GOTO(out_put, rc);
- mdt_dom_obj_lvb_update(tsi->tsi_env, mo, false);
+ mdt_dom_obj_lvb_update(tsi->tsi_env, mo, &repbody->oa, false);
+
mdt_counter_incr(req, LPROC_MDT_IO_PUNCH,
ktime_us_delta(ktime_get(), kstart));
EXIT;
[LPROC_MDT_IO_WRITE] = "write_bytes",
[LPROC_MDT_IO_PUNCH] = "punch",
[LPROC_MDT_MIGRATE] = "migrate",
+ [LPROC_MDT_FALLOCATE] = "fallocate",
};
void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset)
if (rc != 0)
GOTO(out_unlock, rc);
- mdt_dom_obj_lvb_update(info->mti_env, mo, false);
+ mdt_dom_obj_lvb_update(info->mti_env, mo, NULL, false);
EXIT;
out_unlock:
mdt_reint_striped_unlock(info, mo, lh, einfo, rc);
* if server doesn't support fallocate punch, we also need these data to be
* flushed first to prevent re-ordering with the punch
*/
-static int osc_punch_start(const struct lu_env *env, struct cl_io *io,
- struct cl_object *obj)
+int osc_punch_start(const struct lu_env *env, struct cl_io *io,
+ struct cl_object *obj)
{
struct osc_object *osc = cl2osc(obj);
pgoff_t pg_start = cl_index(obj, io->u.ci_setattr.sa_falloc_offset);
osc);
RETURN(0);
}
+EXPORT_SYMBOL(osc_punch_start);
static int osc_io_setattr_start(const struct lu_env *env,
const struct cl_io_slice *slice)
ptlrpc_request_set_replen(req);
- req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret;
+ req->rq_interpret_reply = osc_setattr_interpret;
BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args));
sa = ptlrpc_req_async_args(sa, req);
sa->sa_oa = oa;
RETURN(0);
}
+EXPORT_SYMBOL(osc_fallocate_base);
static int osc_sync_interpret(const struct lu_env *env,
struct ptlrpc_request *req, void *args, int rc)
blen = (ALIGN(end, 1 << inode->i_blkbits) >> inode->i_blkbits) - boff;
/* Create and mark new extents as either zero or unwritten */
- flags = osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ?
+ flags = (osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ||
+ !ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)) ?
LDISKFS_GET_BLOCKS_CREATE_ZERO :
LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT;
#ifndef HAVE_LDISKFS_GET_BLOCKS_KEEP_SIZE
#endif
inode_lock(inode);
- /*
- * We only support preallocation for extent-based file only.
- */
- if (!(ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)))
- GOTO(out, rc = -EOPNOTSUPP);
-
if (!(mode & FALLOC_FL_KEEP_SIZE) && (end > i_size_read(inode) ||
end > LDISKFS_I(inode)->i_disksize)) {
new_size = end;
{
SANITY_ONLY=${SANITY_ONLY:-"36 39 40 41 42d 42e 43 46 56r 101e 119a \
131 150a 155a 155b 155c 155d 207 241 251"}
+ # Fallocate tests
+ (( $MDS1_VERSION >= $(version_code 2.14.52) )) &&
+ SANITY_ONLY+=" 150b 150bb 150c 150d 150f 150g"
+
SANITY_REPEAT=${SANITY_REPEAT:-1}
# XXX: to fix 45. Add 42a, c when LU-9693 fixed.
# Add 42b when LU-6493 fixed
test_150c() {
check_set_fallocate_or_skip
+ local striping="-c2"
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
$LFS setstripe -c $OSTCOUNT -S1M $DIR/$tfile || error "setstripe failed"
fallocate -l ${OSTCOUNT}m $DIR/$tfile || error "fallocate failed"
- sync; sync_all_data
- cancel_lru_locks $OSC
- sleep 5
- bytes=$(($(stat -c '%b * %B' $DIR/$tfile)))
- want=$((OSTCOUNT * 1048576))
+ local bytes=$(($(stat -c '%b * %B' $DIR/$tfile)))
+ local want=$((OSTCOUNT * 1048576))
# Must allocate all requested space, not more than 5% extra
(( $bytes >= $want && $bytes < $want * 105 / 100 )) ||
error "bytes $bytes is not $want"
rm -f $DIR/$tfile
- # verify fallocate on PFL file
- $LFS setstripe -E1M -c1 -E16M -c3 -Eeof -c 4 $DIR/$tfile ||
+
+ echo "verify fallocate on PFL file"
+
+ [[ "x$DOM" == "xyes" ]] && striping="-L mdt"
+
+ $LFS setstripe -E1M $striping -E16M -c3 -Eeof -c 4 $DIR/$tfile ||
error "Create $DIR/$tfile failed"
- fallocate -l $((1048576 * 1024)) $DIR/$tfile ||
+ fallocate -l $((1048576 * 512)) $DIR/$tfile ||
error "fallocate failed"
- sync; sync_all_data
- cancel_lru_locks $OSC
- sleep 5
- local bytes=$(($(stat -c '%b * %B' $DIR/$tfile)))
- local want=$((1024 * 1048576))
+ bytes=$(($(stat -c '%b * %B' $DIR/$tfile)))
+ want=$((512 * 1048576))
# Must allocate all requested space, not more than 5% extra
(( $bytes >= $want && $bytes < $want * 105 / 100 )) ||
test_150d() {
check_set_fallocate_or_skip
+ local striping="-c2"
+
+ [[ "x$DOM" == "xyes" ]] && striping="-L mdt"
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
- $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed"
+ $LFS setstripe -E1M $striping -E eof -c $OSTCOUNT -S1M $DIR/$tdir ||
+ error "setstripe failed"
fallocate -o 1G -l ${OSTCOUNT}m $DIR/$tdir || error "fallocate failed"
- sync; sync_all_data
- cancel_lru_locks $OSC
- sleep 5
local bytes=$(($(stat -c '%b * %B' $DIR/$tdir)))
local want=$((OSTCOUNT * 1048576))
check_set_fallocate_or_skip
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
+ [[ "x$DOM" == "xyes" ]] &&
+ $LFS setstripe -E1M -L mdt -E eof $DIR/$tfile
+
echo "Verify fallocate punch: Range within the file range"
yes 'A' | dd of=$DIR/$tfile bs=4096 count=5 ||
error "dd failed for bs 4096 and count 5"
check_set_fallocate_or_skip
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
- $LFS setstripe -c${OSTCOUNT} $DIR/$tfile ||
- error "$LFS setstripe -c${OSTCOUNT} $DIR/$tfile failed"
+ if [[ "x$DOM" == "xyes" ]]; then
+ $LFS setstripe -E2M -L mdt -E eof -c${OSTCOUNT} $DIR/$tfile ||
+ error "$LFS setstripe DoM + ${OSTCOUNT} OST failed"
+ else
+ $LFS setstripe -c${OSTCOUNT} $DIR/$tfile ||
+ error "$LFS setstripe -c${OSTCOUNT} $DIR/$tfile failed"
+ fi
# Get 100MB per OST of the available space to reduce run time
# else 60% of the available space if we are running SLOW tests