Patch adds support for fiemap to DoM files.
Server part:
- modify MDS_GET_IMFO handler to return FIEMAP like
OST_GET_INFO does
- mdt_fiemap_get() to process fiemap request
Client part:
- rewrite lov_object_fiemap() to support DoM component
- rework fiemap_for_stripe() to work with both DoM and
RAID0 layouts
- use initialized layout entries to get subobject and
get rid of lov_find_subobj() used by fiemap only
- fix issue with wrong resume entry/stripe count
- mdc_object_fiemap() as implementation of .coo_fiemap
cl_object_operations to send and receive fiemap request
- treat LOV subdev errors as UNKNOWN extent
- rework FID2PATH layout description to be compatible with
other GET_INFO keys (no protocol changes)
- add sanity.sh test_130h for DoM fiemap with resuming
To indicate MDT device the extra bit is taken from stripe
number bits in favor of device number. So total absolute
stripe amount limit is 32768 in fiemap report
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: I9b6df04fd62d773aec2d916440ba08dfea06faa4
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/55221
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
extern struct req_format RQF_MDS_CONNECT;
extern struct req_format RQF_MDS_DISCONNECT;
extern struct req_format RQF_MDS_GET_INFO;
+extern struct req_format RQF_MDS_FID2PATH;
extern struct req_format RQF_MDS_READPAGE;
extern struct req_format RQF_MDS_REINT;
extern struct req_format RQF_MDS_REINT_CREATE;
static inline int get_fe_device(struct fiemap_extent *fe)
{
- return fe->fe_device & 0xffff;
+ return fe->fe_device & 0x1ffff;
}
static inline void set_fe_device(struct fiemap_extent *fe, int devno)
{
- fe->fe_device = (fe->fe_device & 0xffff0000) | (devno & 0xffff);
+ fe->fe_device = (fe->fe_device & 0xfffe0000) | (devno & 0x1ffff);
}
static inline int get_fe_stripenr(struct fiemap_extent *fe)
{
- return fe->fe_device >> 16;
+ return fe->fe_device >> 17;
}
static inline void set_fe_stripenr(struct fiemap_extent *fe, int nr)
{
- fe->fe_device = (fe->fe_device & 0xffff) | (nr << 16);
+ fe->fe_device = (fe->fe_device & 0x1ffff) | (nr << 17);
}
static inline void set_fe_device_stripenr(struct fiemap_extent *fe, int devno,
int nr)
{
- fe->fe_device = (nr << 16) | (devno & 0xffff);
+ fe->fe_device = (nr << 17) | (devno & 0x1ffff);
}
static inline __kernel_size_t fiemap_count_to_size(__kernel_size_t extent_count)
struct lov_layout_raid0 lo_dom_r0;
struct lovsub_object *lo_dom;
struct lov_oinfo *lo_loi;
+ unsigned short lo_mdt_idx;
};
struct lov_layout_entry {
lle->lle_dom.lo_dom_r0.lo_nr = 1;
lle->lle_dom.lo_dom_r0.lo_sub = &lle->lle_dom.lo_dom;
lle->lle_dom.lo_loi = loi;
+ lle->lle_dom.lo_mdt_idx = idx;
rc = lov_page_slice_fixup(lov, clo);
RETURN(rc);
/**
* We calculate on which OST the mapping will end. If the length of mapping
- * is greater than (stripe_size * stripe_count) then the last_stripe will
+ * is greater than (stripe_size * stripe_count) then the last_stripe
* will be one just before start_stripe. Else we check if the mapping
* intersects each OST and find last_stripe.
* This function returns the last_stripe and also sets the stripe_count
int last_stripe;
int i, j;
+ if (lsme_is_dom(lsme)) {
+ *stripe_count = 1;
+ return start_stripe;
+ }
+
init_stripe = lov_stripe_number(lsm, index, ext->e_start);
- if (ext->e_end - ext->e_start >
- lsme->lsme_stripe_size * lsme->lsme_stripe_count) {
- if (init_stripe == start_stripe) {
- last_stripe = (start_stripe < 1) ?
- lsme->lsme_stripe_count - 1 : start_stripe - 1;
- *stripe_count = lsme->lsme_stripe_count;
- } else if (init_stripe < start_stripe) {
+ if (ext->e_end - ext->e_start > stripe_width(lsm, index)) {
+ if (init_stripe <= start_stripe) {
last_stripe = (init_stripe < 1) ?
lsme->lsme_stripe_count - 1 : init_stripe - 1;
*stripe_count = lsme->lsme_stripe_count -
} else {
for (j = 0, i = start_stripe; j < lsme->lsme_stripe_count;
i = (i + 1) % lsme->lsme_stripe_count, j++) {
- if (!lov_stripe_intersects(lsm, index, i, ext, NULL,
+ if (!lov_stripe_intersects(lsm, index, i, ext, NULL,
NULL))
break;
if ((start_stripe != init_stripe) && (i == init_stripe))
local_end < lun_end) {
fm_end_offset = local_end;
} else {
+ int stripes = lsme_is_dom(lsme) ? 1 : lsme->lsme_stripe_count;
+
/* This is a special value to indicate that caller should
* calculate offset in next stripe. */
fm_end_offset = 0;
- *start_stripe = (stripe_no + 1) % lsme->lsme_stripe_count;
+ *start_stripe = (stripe_no + 1) % stripes;
}
return fm_end_offset;
bool fs_enough; /* enough for this call */
};
-static struct cl_object *lov_find_subobj(const struct lu_env *env,
- struct lov_object *lov,
- struct lov_stripe_md *lsm,
- int index)
+static int fiemap_unknown(struct fiemap_state *fs, u64 obd_start, u64 obd_end)
{
- struct lov_device *dev = lu2lov_dev(lov2lu(lov)->lo_dev);
- struct lov_thread_info *lti = lov_env_info(env);
- struct lu_fid *ofid = <i->lti_fid;
- struct lov_oinfo *oinfo;
- struct cl_device *subdev;
- int entry = lov_comp_entry(index);
- int stripe = lov_comp_stripe(index);
- int ost_idx;
- int rc;
- struct cl_object *result;
-
- if (lov->lo_type != LLT_COMP)
- GOTO(out, result = NULL);
-
- if (entry >= lsm->lsm_entry_count ||
- stripe >= lsm->lsm_entries[entry]->lsme_stripe_count)
- GOTO(out, result = NULL);
-
- oinfo = lsm->lsm_entries[entry]->lsme_oinfo[stripe];
- ost_idx = oinfo->loi_ost_idx;
- rc = ostid_to_fid(ofid, &oinfo->loi_oi, ost_idx);
- if (rc != 0)
- GOTO(out, result = NULL);
-
- subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
- result = lov_sub_find(env, subdev, ofid, NULL);
-out:
- if (result == NULL)
- result = ERR_PTR(-EINVAL);
- return result;
+ /* If OST is inactive or layout is not supported or available
+ * then return extent with UNKNOWN flag.
+ */
+ fs->fs_fm->fm_mapped_extents = 1;
+ if (fs->fs_fm->fm_extent_count) {
+ fs->fs_fm->fm_extents[0].fe_logical = obd_start;
+ fs->fs_fm->fm_extents[0].fe_length = obd_end - obd_start + 1;
+ fs->fs_fm->fm_extents[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+ }
+ fs->fs_device_done = true;
+ return 1;
}
static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj,
struct lov_stripe_md *lsm, struct fiemap *fiemap,
size_t *buflen, struct ll_fiemap_info_key *fmkey,
- int index, int stripe_last, int stripeno,
+ int index, int stripe_last, const int stripeno,
struct fiemap_state *fs)
{
- struct lov_stripe_md_entry *lsme = lsm->lsm_entries[index];
- struct cl_object *subobj;
- struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
- struct fiemap_extent *fm_ext = &fs->fs_fm->fm_extents[0];
+ struct lov_object *lo = cl2lov(obj);
+ struct lov_layout_entry *lle = lov_entry(lo, index);
+ struct lov_stripe_md_entry *lsme = lle->lle_lsme;
+ struct cl_object *subobj = NULL;
+ struct fiemap *fsm = fs->fs_fm;
+ struct fiemap_extent *fm_ext = &fsm->fm_extents[0];
u64 req_fm_len; /* max requested extent coverage */
u64 len_mapped_single_call;
u64 obd_start;
u64 obd_end;
unsigned int ext_count;
- /* EOF for object */
- bool ost_eof = false;
- /* done with required mapping for this OST? */
- bool ost_done = false;
- int ost_index;
+ int devnr = 0;
int rc = 0;
- fs->fs_device_done = false;
/* Find out range of mapping on this stripe */
if ((lov_stripe_intersects(lsm, index, stripeno, &fs->fs_ext,
&obd_start, &obd_end)) == 0)
return 0;
- if (lov_oinfo_is_dummy(lsme->lsme_oinfo[stripeno]))
- return -EIO;
-
/* If this is a continuation FIEMAP call and we are on
* starting stripe then obd_start needs to be set to
* end_offset */
obd_start)
return 0;
+ fs->fs_device_done = false;
req_fm_len = obd_end - obd_start + 1;
- fs->fs_fm->fm_length = 0;
+ fsm->fm_length = 0;
len_mapped_single_call = 0;
- /* find lobsub object */
- subobj = lov_find_subobj(env, cl2lov(obj), lsm,
- lov_comp_index(index, stripeno));
- if (IS_ERR(subobj))
- return PTR_ERR(subobj);
+ if (lo->lo_type != LLT_COMP || !lle->lle_valid) {
+ ext_count = fiemap_unknown(fs, obd_start, obd_end);
+ GOTO(out_unknown, rc = -EOPNOTSUPP);
+ }
+
+ switch (lle->lle_type) {
+ case LOV_PATTERN_RAID0:
+ {
+ struct lov_device *lov = lov_object_dev(lo);
+ const struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+ struct lov_oinfo *oinfo;
+
+ if (stripeno >= r0->lo_nr)
+ RETURN(-EINVAL);
+ subobj = lovsub2cl(r0->lo_sub[stripeno]);
+ oinfo = lsme->lsme_oinfo[stripeno];
+ if (lov_oinfo_is_dummy(oinfo))
+ RETURN(-EIO);
+ devnr = oinfo->loi_ost_idx;
+ if (devnr < 0 || devnr >= lov_targets_nr(lov))
+ RETURN(-EINVAL);
+ if (!lov->ld_lov->lov_tgts[devnr]->ltd_active) {
+ ext_count = fiemap_unknown(fs, obd_start, obd_end);
+ GOTO(out_unknown, rc = -ENODEV);
+ }
+ break;
+ }
+ case LOV_PATTERN_MDT:
+ {
+ const struct lov_layout_dom *dom = &lle->lle_dom;
+
+ subobj = lovsub2cl(dom->lo_dom);
+ if (lov_oinfo_is_dummy(dom->lo_loi))
+ RETURN(-EIO);
+ devnr = dom->lo_mdt_idx | 0x10000ULL;
+ break;
+ }
+ default:
+ ext_count = fiemap_unknown(fs, obd_start, obd_end);
+ GOTO(out_unknown, rc = -EOPNOTSUPP);
+ }
+
+ if (!subobj)
+ RETURN(-EINVAL);
+
/* If the output buffer is very large and the objects have many
* extents we may need to loop on a single OST repeatedly */
- do {
+ while (!fs->fs_device_done) {
if (fiemap->fm_extent_count > 0) {
/* Don't get too many extents. */
if (fs->fs_cur_extent + fs->fs_cnt_need >
}
obd_start += len_mapped_single_call;
- fs->fs_fm->fm_length = req_fm_len - len_mapped_single_call;
- req_fm_len = fs->fs_fm->fm_length;
+ fsm->fm_length = req_fm_len - len_mapped_single_call;
+ req_fm_len = fsm->fm_length;
/**
* If we've collected enough extent map, we'd request 1 more,
* to see whether we coincidentally finished all available
* extent map, so that FIEMAP_EXTENT_LAST would be set.
*/
- fs->fs_fm->fm_extent_count = fs->fs_enough ?
- 1 : fs->fs_cnt_need;
- fs->fs_fm->fm_mapped_extents = 0;
- fs->fs_fm->fm_flags = fiemap->fm_flags;
-
- ost_index = lsme->lsme_oinfo[stripeno]->loi_ost_idx;
-
- if (ost_index < 0 || ost_index >= lov->desc.ld_tgt_count)
- GOTO(obj_put, rc = -EINVAL);
- /* If OST is inactive, return extent with UNKNOWN flag. */
- if (!lov->lov_tgts[ost_index]->ltd_active) {
-
- fs->fs_fm->fm_mapped_extents = 1;
- if (fs->fs_fm->fm_extent_count == 0)
- goto inactive_tgt;
-
- fm_ext[0].fe_logical = obd_start;
- fm_ext[0].fe_length = obd_end - obd_start + 1;
- fm_ext[0].fe_flags |=
- FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
-
- goto inactive_tgt;
+ fsm->fm_extent_count = fs->fs_enough ? 1 : fs->fs_cnt_need;
+ fsm->fm_mapped_extents = 0;
+ fsm->fm_flags = fiemap->fm_flags;
+ fsm->fm_start = obd_start;
+ fsm->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+ fmkey->lfik_fiemap = *fsm;
+ *buflen = fiemap_count_to_size(fsm->fm_extent_count);
+ rc = cl_object_fiemap(env, subobj, fmkey, fsm, buflen);
+ if (rc) {
+ /* Can we report as UNKNOWN all subdev error? */
+ ext_count = fiemap_unknown(fs, obd_start, obd_end);
+ GOTO(out_unknown, rc);
}
-
- fs->fs_fm->fm_start = obd_start;
- fs->fs_fm->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
- memcpy(&fmkey->lfik_fiemap, fs->fs_fm, sizeof(*fs->fs_fm));
- *buflen = fiemap_count_to_size(fs->fs_fm->fm_extent_count);
-
- rc = cl_object_fiemap(env, subobj, fmkey, fs->fs_fm, buflen);
- if (rc != 0)
- GOTO(obj_put, rc);
-inactive_tgt:
- ext_count = fs->fs_fm->fm_mapped_extents;
+ ext_count = fsm->fm_mapped_extents;
if (ext_count == 0) {
- ost_done = true;
fs->fs_device_done = true;
/* If last stripe has hold at the end,
* we need to return */
if (stripeno == fs->fs_last_stripe) {
fiemap->fm_mapped_extents = 0;
fs->fs_finish_stripe = true;
- GOTO(obj_put, rc);
+ RETURN(0);
}
break;
} else if (fs->fs_enough) {
* We've collected enough extents and there are
* more extents after it.
*/
- GOTO(obj_put, rc);
+ RETURN(0);
}
/* If we just need num of extents, got to next device */
obd_start;
/* Have we finished mapping on this device? */
- if (req_fm_len <= len_mapped_single_call) {
- ost_done = true;
+ if (req_fm_len <= len_mapped_single_call)
fs->fs_device_done = true;
- }
/* Clear the EXTENT_LAST flag which can be present on
* the last extent */
if (fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST)
fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST;
+
if (lov_stripe_size(lsm, index,
fm_ext[ext_count - 1].fe_logical +
fm_ext[ext_count - 1].fe_length,
stripeno) >= fmkey->lfik_oa.o_size) {
- ost_eof = true;
fs->fs_device_done = true;
}
-
- fiemap_prepare_and_copy_exts(fiemap, fm_ext, ost_index,
+out_unknown:
+ fiemap_prepare_and_copy_exts(fiemap, fm_ext, devnr,
ext_count, fs->fs_cur_extent,
stripe_last + stripeno);
fs->fs_cur_extent += ext_count;
/* Ran out of available extents? */
if (fs->fs_cur_extent >= fiemap->fm_extent_count)
fs->fs_enough = true;
- } while (!ost_done && !ost_eof);
+ }
if (stripeno == fs->fs_last_stripe)
fs->fs_finish_stripe = true;
-obj_put:
- cl_object_put(env, subobj);
-
- return rc;
+ return 0;
}
/**
GOTO(out_lsm, rc = -EOPNOTSUPP);
}
- /* No support for DOM layout yet. */
- if (lsme_is_dom(lsm->lsm_entries[0]))
- GOTO(out_lsm, rc = -EOPNOTSUPP);
-
if (lsm->lsm_is_released) {
if (fiemap->fm_start < fmkey->lfik_oa.o_size) {
/**
if (whole_start > fmkey->lfik_oa.o_size)
GOTO(out_fm_local, rc = -EINVAL);
whole_end = (fiemap->fm_length == OBD_OBJECT_EOF) ?
- fmkey->lfik_oa.o_size + 1 :
- whole_start + fiemap->fm_length;
+ fmkey->lfik_oa.o_size + 1 :
+ whole_start + fiemap->fm_length;
/**
* If fiemap->fm_length != OBD_OBJECT_EOF but whole_end exceeds file
* size
end_entry = lsm->lsm_entry_count - 1;
cur_stripe = 0;
for (entry = 0; entry <= end_entry; entry++) {
+ int stripes;
+
lsme = lsm->lsm_entries[entry];
- if (cur_stripe + lsme->lsme_stripe_count >= stripe_last) {
+ stripes = lsme_is_dom(lsme) ? 1 : lsme->lsme_stripe_count;
+ if (cur_stripe + stripes > stripe_last) {
start_entry = entry;
start_stripe = stripe_last - cur_stripe;
break;
}
-
- cur_stripe += lsme->lsme_stripe_count;
+ cur_stripe += stripes;
}
+
if (start_entry == -1) {
CERROR(DFID": FIEMAP does not init start entry, cur_stripe=%d, "
"stripe_last=%d\n", PFID(lu_object_fid(&obj->co_lu)),
range.e_end = whole_end;
for (entry = start_entry; entry <= end_entry; entry++) {
+ int stripes;
+
/* remeber to update stripe_last accordingly */
lsme = lsm->lsm_entries[entry];
+ stripes = lsme_is_dom(lsme) ? 1 : lsme->lsme_stripe_count;
/* FLR could contain component holes between entries */
if (!lsme_inited(lsme)) {
- stripe_last += lsme->lsme_stripe_count;
+ stripe_last += stripes;
resume = false;
continue;
}
if (!lu_extent_is_overlapped(&range, &lsme->lsme_extent)) {
- stripe_last += lsme->lsme_stripe_count;
+ stripe_last += stripes;
resume = false;
continue;
}
/* Check each stripe */
for (cur_stripe = fs.fs_start_stripe; stripe_count > 0;
- --stripe_count,
- cur_stripe = (cur_stripe + 1) % lsme->lsme_stripe_count) {
+ --stripe_count, cur_stripe = (cur_stripe + 1) % stripes) {
/* reset fs_finish_stripe */
fs.fs_finish_stripe = false;
rc = fiemap_for_stripe(env, obj, lsm, fiemap, buflen,
if (fs.fs_finish_stripe)
break;
} /* for each stripe */
- stripe_last += lsme->lsme_stripe_count;
+ stripe_last += stripes;
} /* for covering layout component entry */
finish:
fiemap->fm_mapped_extents = fs.fs_cur_extent;
out_fm_local:
OBD_FREE_LARGE(fm_local, buffer_size);
-
out_lsm:
lov_lsm_put(lsm);
return rc;
u64 loc_start, loc_end;
if (!lu_extent_is_overlapped(ext, &entry->lsme_extent))
- return 0;
+ return 0;
if (!obd_start)
obd_start = &loc_start;
}
static void mdc_build_res_name(struct osc_object *osc,
- struct ldlm_res_id *resname)
+ struct ldlm_res_id *resname)
{
fid_build_reg_res_name(lu_object_fid(osc2lu(osc)), resname);
}
RETURN(mdc_dlm_canceling(env, lock));
}
+static int mdc_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey,
+ struct fiemap *fiemap, size_t *buflen)
+{
+ struct osc_thread_info *info = osc_env_info(env);
+ struct osc_object *osc = cl2osc(obj);
+ struct obd_export *exp = osc_export(osc);
+ struct lustre_handle lockh;
+ enum ldlm_mode mode = LCK_MINMODE;
+ struct ptlrpc_request *req;
+ struct fiemap *repbuf;
+ struct ll_fiemap_info_key *rq_fmkey;
+ char *fmbuf;
+ __u64 flags;
+ int rc;
+
+ ENTRY;
+
+ fmkey->lfik_oa.o_oi = osc->oo_oinfo->loi_oi;
+
+ if (fmkey->lfik_fiemap.fm_flags & FIEMAP_FLAG_SYNC) {
+ struct ldlm_res_id *resid = &osc_env_info(env)->oti_resname;
+ union ldlm_policy_data *policy = &info->oti_policy;
+
+ mdc_build_res_name(osc, resid);
+ mdc_lock_build_policy(env, NULL, policy);
+ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_LVB_READY;
+ mode = mdc_dom_lock_match(env, exp, resid, LDLM_IBITS, policy,
+ LCK_PR | LCK_PW | LCK_GROUP,
+ &flags, osc, &lockh, 0);
+ fmkey->lfik_oa.o_valid |= OBD_MD_FLFLAGS;
+ if (mode) { /* lock is cached on client */
+ fmkey->lfik_oa.o_flags &= ~OBD_FL_SRVLOCK;
+ if (mode != LCK_PR) {
+ ldlm_lock_addref(&lockh, LCK_PR);
+ ldlm_lock_decref(&lockh, mode);
+ }
+ } else {
+ /* no cached lock, needs acquire lock on server side */
+ fmkey->lfik_oa.o_flags |= OBD_FL_SRVLOCK;
+ }
+ }
+
+ req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+ &RQF_OST_GET_INFO_FIEMAP);
+ if (!req)
+ GOTO(drop_lock, rc = -ENOMEM);
+
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_KEY, RCL_CLIENT,
+ sizeof(*fmkey));
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_CLIENT,
+ *buflen);
+ req_capsule_set_size(&req->rq_pill, &RMF_FIEMAP_VAL, RCL_SERVER,
+ *buflen);
+
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GET_INFO);
+ if (rc != 0) {
+ ptlrpc_request_free(req);
+ GOTO(drop_lock, rc);
+ }
+ rq_fmkey = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_KEY);
+ *rq_fmkey = *fmkey;
+ fmbuf = req_capsule_client_get(&req->rq_pill, &RMF_FIEMAP_VAL);
+ memcpy(fmbuf, fiemap, *buflen);
+ ptlrpc_request_set_replen(req);
+
+ rc = ptlrpc_queue_wait(req);
+ if (rc)
+ GOTO(fini_req, rc);
+
+ repbuf = req_capsule_server_get(&req->rq_pill, &RMF_FIEMAP_VAL);
+ if (!repbuf)
+ GOTO(fini_req, rc = -EPROTO);
+ memcpy(fiemap, repbuf, *buflen);
+
+fini_req:
+ ptlrpc_req_put(req);
+drop_lock:
+ if (mode)
+ ldlm_lock_decref(&lockh, LCK_PR);
+ RETURN(rc);
+}
+
static const struct cl_object_operations mdc_ops = {
.coo_page_init = osc_page_init,
.coo_lock_init = mdc_lock_init,
.coo_glimpse = osc_object_glimpse,
.coo_req_attr_set = mdc_req_attr_set,
.coo_prune = mdc_object_prune,
- .coo_object_flush = mdc_object_flush
+ .coo_object_flush = mdc_object_flush,
+ .coo_fiemap = mdc_object_fiemap,
};
static const struct osc_object_operations mdc_object_ops = {
u32 keylen, void *key,
u32 vallen, void *val)
{
- struct obd_import *imp = class_exp2cliimp(exp);
- struct ptlrpc_request *req;
- char *tmp;
- int rc = -EINVAL;
- ENTRY;
+ struct obd_import *imp = class_exp2cliimp(exp);
+ struct ptlrpc_request *req;
+ char *tmp;
+ int rc = -EINVAL;
- req = ptlrpc_request_alloc(imp, &RQF_MDS_GET_INFO);
- if (req == NULL)
- RETURN(-ENOMEM);
+ ENTRY;
+
+ req = ptlrpc_request_alloc(imp, &RQF_MDS_FID2PATH);
+ if (req == NULL)
+ RETURN(-ENOMEM);
- req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_KEY,
- RCL_CLIENT, keylen);
- req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VALLEN,
+ req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_KEY,
+ RCL_CLIENT, keylen);
+ req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VALLEN,
RCL_CLIENT, sizeof(vallen));
- rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GET_INFO);
- if (rc) {
- ptlrpc_request_free(req);
- RETURN(rc);
- }
+ rc = ptlrpc_request_pack(req, LUSTRE_MDS_VERSION, MDS_GET_INFO);
+ if (rc) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_KEY);
- memcpy(tmp, key, keylen);
- tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_VALLEN);
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_KEY);
+ memcpy(tmp, key, keylen);
+ tmp = req_capsule_client_get(&req->rq_pill, &RMF_GETINFO_VALLEN);
memcpy(tmp, &vallen, sizeof(vallen));
- req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VAL,
- RCL_SERVER, vallen);
- ptlrpc_request_set_replen(req);
+ req_capsule_set_size(&req->rq_pill, &RMF_GETINFO_VAL,
+ RCL_SERVER, vallen);
+ ptlrpc_request_set_replen(req);
/* if server failed to resolve FID, and OI scrub not able to fix it, it
* will return -EINPROGRESS, ptlrpc_queue_wait() will keep retrying,
int mdt_get_info(struct tgt_session_info *tsi)
{
- char *key;
- int keylen;
- __u32 *vallen;
- void *valout;
- int rc;
+ char *key;
+ int keylen;
+ int rc;
ENTRY;
key = req_capsule_client_get(tsi->tsi_pill, &RMF_GETINFO_KEY);
- if (key == NULL) {
- CDEBUG(D_IOCTL, "No GETINFO key\n");
- RETURN(err_serious(-EFAULT));
+ if (!key) {
+ DEBUG_REQ(D_IOCTL, tgt_ses_req(tsi), "no GETINFO key");
+ RETURN(err_serious(-EPROTO));
}
keylen = req_capsule_get_size(tsi->tsi_pill, &RMF_GETINFO_KEY,
RCL_CLIENT);
-
- vallen = req_capsule_client_get(tsi->tsi_pill, &RMF_GETINFO_VALLEN);
- if (vallen == NULL) {
- CDEBUG(D_IOCTL, "%s: cannot get RMF_GETINFO_VALLEN buffer\n",
- tgt_name(tsi->tsi_tgt));
- RETURN(err_serious(-EFAULT));
- }
-
- req_capsule_set_size(tsi->tsi_pill, &RMF_GETINFO_VAL, RCL_SERVER,
- *vallen);
- rc = req_capsule_server_pack(tsi->tsi_pill);
- if (rc)
- RETURN(err_serious(rc));
-
- valout = req_capsule_server_get(tsi->tsi_pill, &RMF_GETINFO_VAL);
- if (valout == NULL) {
- CDEBUG(D_IOCTL, "%s: cannot get get-info RPC out buffer\n",
- tgt_name(tsi->tsi_tgt));
- RETURN(err_serious(-EFAULT));
- }
-
if (KEY_IS(KEY_FID2PATH)) {
- struct mdt_thread_info *info = tsi2mdt_info(tsi);
+ struct mdt_thread_info *info;
+ __u32 *vallen;
+ void *valout;
+
+ req_capsule_extend(tsi->tsi_pill, &RQF_MDS_FID2PATH);
+ vallen = req_capsule_client_get(tsi->tsi_pill,
+ &RMF_GETINFO_VALLEN);
+ if (!vallen) {
+ CDEBUG(D_IOCTL,
+ "%s: cannot get RMF_GETINFO_VALLEN buffer\n",
+ tgt_name(tsi->tsi_tgt));
+ RETURN(err_serious(-EPROTO));
+ }
+ req_capsule_set_size(tsi->tsi_pill, &RMF_GETINFO_VAL,
+ RCL_SERVER, *vallen);
+ rc = req_capsule_server_pack(tsi->tsi_pill);
+ if (rc)
+ RETURN(err_serious(rc));
+
+ valout = req_capsule_server_get(tsi->tsi_pill,
+ &RMF_GETINFO_VAL);
+ if (!valout) {
+ CDEBUG(D_IOCTL,
+ "%s: cannot get get-info RPC out buffer\n",
+ tgt_name(tsi->tsi_tgt));
+ RETURN(-ENOMEM);
+ }
+ info = tsi2mdt_info(tsi);
rc = mdt_rpc_fid2path(info, key, keylen, valout, *vallen);
mdt_thread_info_fini(info);
+ } else if (KEY_IS(KEY_FIEMAP)) {
+ rc = mdt_fiemap_get(tsi);
} else {
- rc = -EINVAL;
+ rc = err_serious(-EOPNOTSUPP);
}
RETURN(rc);
}
ktime_t kstart);
int mdt_punch_hdl(struct tgt_session_info *tsi);
int mdt_fallocate_hdl(struct tgt_session_info *tsi);
+int mdt_fiemap_get(struct tgt_session_info *tsi);
int mdt_glimpse_enqueue(struct mdt_thread_info *mti, struct ldlm_namespace *ns,
struct ldlm_lock **lockp, __u64 flags);
int mdt_brw_enqueue(struct mdt_thread_info *info, struct ldlm_namespace *ns,
return rc;
}
+/*
+ * Check if there is any sparse area in DoM segment.
+ */
+static int dom_has_zero_regions(struct fiemap *fiemap)
+{
+ struct fiemap_extent *fiemap_start = fiemap->fm_extents;
+ __u64 begin = fiemap->fm_start;
+ unsigned int i;
+
+ for (i = 0; i < fiemap->fm_mapped_extents; i++) {
+ if (fiemap_start[i].fe_logical > begin)
+ return true;
+ begin = fiemap_start[i].fe_logical + fiemap_start[i].fe_length;
+ }
+ return begin < (fiemap->fm_start + fiemap->fm_length);
+}
+
+int mdt_dom_fiemap(const struct lu_env *env, struct mdt_device *mdt,
+ const struct lu_fid *fid, struct fiemap *fiemap)
+{
+ struct mdt_object *mo;
+ int rc;
+
+ ENTRY;
+
+ mo = mdt_object_find(env, mdt, fid);
+ if (IS_ERR(mo))
+ RETURN(PTR_ERR(mo));
+
+ mdt_dom_read_lock(mo);
+ if (!mdt_object_exists(mo))
+ GOTO(out, rc = -ENOENT);
+ if (mdt_object_remote(mo))
+ GOTO(out, rc = -EREMOTE);
+ if (!S_ISREG(lu_object_attr(&mo->mot_obj)))
+ GOTO(out, rc = -EBADF);
+
+ rc = dt_fiemap_get(env, mdt_obj2dt(mo), fiemap);
+out:
+ mdt_dom_read_unlock(mo);
+ lu_object_put(env, &mo->mot_obj);
+ RETURN(rc);
+}
+/**
+ * Get FIEMAP (FIle Extent MAPping) for object with the given FID.
+ *
+ * This function returns a list of extents which describes how a file's
+ * blocks are laid out on the disk.
+ *
+ * \param[in] tsi target session environment for this request
+ *
+ * \retval 0 if \a fiemap is filled with data successfully
+ * \retval negative value on error
+ */
+int mdt_fiemap_get(struct tgt_session_info *tsi)
+{
+ struct ldlm_namespace *ns = tsi->tsi_tgt->lut_obd->obd_namespace;
+ struct obd_export *exp = tsi->tsi_exp;
+ struct mdt_device *mdt = mdt_dev(exp->exp_obd->obd_lu_dev);
+ struct ll_fiemap_info_key *fm_key;
+ struct obdo *oa;
+ struct fiemap *fiemap;
+ const struct lu_fid *fid;
+ int rlen, rc;
+ bool srvlock;
+
+ req_capsule_extend(tsi->tsi_pill, &RQF_OST_GET_INFO_FIEMAP);
+
+ fm_key = req_capsule_client_get(tsi->tsi_pill, &RMF_FIEMAP_KEY);
+ if (!fm_key)
+ RETURN(err_serious(-EPROTO));
+
+ rlen = fiemap_count_to_size(fm_key->lfik_fiemap.fm_extent_count);
+ req_capsule_set_size(tsi->tsi_pill, &RMF_FIEMAP_VAL, RCL_SERVER, rlen);
+ rc = req_capsule_server_pack(tsi->tsi_pill);
+ if (rc)
+ RETURN(err_serious(rc));
+
+ fiemap = req_capsule_server_get(tsi->tsi_pill, &RMF_FIEMAP_VAL);
+ if (!fiemap)
+ RETURN(-ENOMEM);
+
+ oa = &fm_key->lfik_oa;
+ rc = tgt_validate_obdo(tsi, oa);
+ if (rc)
+ RETURN(rc);
+
+ fid = &oa->o_oi.oi_fid;
+ *fiemap = fm_key->lfik_fiemap;
+
+ CDEBUG(D_INODE, "get FIEMAP of object "DFID"\n", PFID(fid));
+ rc = mdt_dom_fiemap(tsi->tsi_env, mdt, fid, fiemap);
+ if (rc)
+ RETURN(rc);
+
+ srvlock = (exp_connect_flags(exp) & OBD_CONNECT_SRVLOCK) &&
+ oa->o_valid & OBD_MD_FLFLAGS &&
+ oa->o_flags & OBD_FL_SRVLOCK;
+
+ if (srvlock && dom_has_zero_regions(fiemap)) {
+ __u64 flg = 0;
+ struct lustre_handle lh = { 0 };
+
+ CDEBUG(D_OTHER, "FIEMAP: lock "DFID" due to sparse areas\n",
+ PFID(fid));
+ fid_build_reg_res_name(fid, &tsi->tsi_resid);
+ rc = tgt_mdt_data_lock(ns, &tsi->tsi_resid, &lh, LCK_PR, &flg);
+ if (!rc)
+ rc = mdt_dom_fiemap(tsi->tsi_env, mdt, fid, fiemap);
+ }
+
+ RETURN(rc);
+}
+
static int mdt_object_punch(const struct lu_env *env, struct dt_device *dt,
struct dt_object *dob, __u64 start, __u64 end,
struct lu_attr *la)
RETURN_EXIT;
}
+
static const struct req_msg_field *mds_getinfo_client[] = {
&RMF_PTLRPC_BODY,
&RMF_GETINFO_KEY,
+};
+
+static const struct req_msg_field *mds_fid2path_client[] = {
+ &RMF_PTLRPC_BODY,
+ &RMF_GETINFO_KEY,
&RMF_GETINFO_VALLEN
};
&RQF_MDS_CONNECT,
&RQF_MDS_DISCONNECT,
&RQF_MDS_GET_INFO,
+ &RQF_MDS_FID2PATH,
&RQF_MDS_GET_ROOT,
&RQF_MDS_STATFS,
&RQF_MDS_STATFS_NEW,
mds_getinfo_server);
EXPORT_SYMBOL(RQF_MDS_GET_INFO);
+struct req_format RQF_MDS_FID2PATH =
+ DEFINE_REQ_FMT0("MDS_FID2PATH", mds_fid2path_client,
+ mds_getinfo_server);
+EXPORT_SYMBOL(RQF_MDS_FID2PATH);
+
struct req_format RQF_MDS_BATCH =
DEFINE_REQ_FMT0("MDS_BATCH", mds_batch_client,
mds_batch_server);
}
run_test 130h "FIEMAP deadlock"
+test_130i() {
+ (( $MDS1_VERSION >= $(version_code 2.15.63.195) )) ||
+ skip "Need MDS version at least 2.15.63.195 for DoM support"
+ local filefrag_op=$(filefrag -l 2>&1 | grep "invalid option")
+ [[ -z "$filefrag_op" ]] || skip_env "filefrag missing Lustre support"
+ [[ "$ost1_FSTYPE" != "zfs" ]] ||
+ skip "LU-1941: FIEMAP unimplemented on ZFS"
+
+ local dom_file=$DIR/$tfile
+
+ stack_trap "rm -f $dom_file"
+
+ $LFS setstripe -E 1M -L mdt -E -1 -c2 -S 131072 -o1,0 $dom_file ||
+ error "setstripe on $dom_file"
+
+ local blks=$((128 * 3))
+ local expected=$(((blks / 3) * 4))
+
+ for ((i = 0; i < $blks; i++)); do
+ dd if=/dev/zero of=$dom_file count=1 bs=4k seek=$((2 * i)) \
+ conv=notrunc > /dev/null 2>&1 ||
+ error "dd failed to $dom_file"
+ done
+
+ filefrag -ves $dom_file | (head -7; echo ; tail -5)
+ (( ! ${PIPESTATUS[0]} )) || error "filefrag $dom_file failed"
+
+ filefrag_op=$(filefrag -ve -k $dom_file |
+ sed -n '/ext:/,/found/{/ext:/d; /found/d; p}')
+
+ local last_lun=$(echo $filefrag_op | cut -d: -f5)
+ local lun_len=0
+ local num_luns=1
+
+ while IFS=$'\n' read line; do
+ local frag_lun=$(echo $line | cut -d: -f5)
+ local ext_len=$(echo $line | cut -d: -f4)
+
+ if (( $frag_lun != $last_lun )); then
+ if (( lun_len != expected )); then
+ error "dev #$last_lun: $lun_len != $expected"
+ else
+ (( num_luns += 1 ))
+ lun_len=0
+ fi
+ fi
+ (( lun_len += ext_len ))
+ last_lun=$frag_lun
+ done <<< "$filefrag_op"
+
+ if (( num_luns != 3 )); then
+ error "num devices: $num_luns, but 3 expected"
+ fi
+ if (( lun_len != expected )); then
+ error "dev #$last_lun: $lun_len != $expected"
+ fi
+ echo "FIEMAP on DoM file succeeded"
+}
+run_test 130i "FIEMAP (DoM file)"
+
# Test for writev/readv
test_131a() {
rwv -f $DIR/$tfile -w -n 3 524288 1048576 1572864 ||