+ struct lov_device *dev = lu2lov_dev(lov2lu(lov)->lo_dev);
+ struct lov_thread_info *lti = lov_env_info(env);
+ struct lu_fid *ofid = <i->lti_fid;
+ struct lov_oinfo *oinfo;
+ struct cl_device *subdev;
+ int entry = lov_comp_entry(index);
+ int stripe = lov_comp_stripe(index);
+ int ost_idx;
+ int rc;
+ struct cl_object *result;
+
+ if (lov->lo_type != LLT_COMP)
+ GOTO(out, result = NULL);
+
+ if (entry >= lsm->lsm_entry_count ||
+ stripe >= lsm->lsm_entries[entry]->lsme_stripe_count)
+ GOTO(out, result = NULL);
+
+ oinfo = lsm->lsm_entries[entry]->lsme_oinfo[stripe];
+ ost_idx = oinfo->loi_ost_idx;
+ rc = ostid_to_fid(ofid, &oinfo->loi_oi, ost_idx);
+ if (rc != 0)
+ GOTO(out, result = NULL);
+
+ subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
+ result = lov_sub_find(env, subdev, ofid, NULL);
+out:
+ if (result == NULL)
+ result = ERR_PTR(-EINVAL);
+ return result;
+}
+
+static int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj,
+ struct lov_stripe_md *lsm, struct fiemap *fiemap,
+ size_t *buflen, struct ll_fiemap_info_key *fmkey,
+ int index, int stripe_last, int stripeno,
+ struct fiemap_state *fs)
+{
+ struct lov_stripe_md_entry *lsme = lsm->lsm_entries[index];
+ struct cl_object *subobj;
+ struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
+ struct fiemap_extent *fm_ext = &fs->fs_fm->fm_extents[0];
+ u64 req_fm_len; /* max requested extent coverage */
+ u64 len_mapped_single_call;
+ u64 obd_start;
+ u64 obd_end;
+ unsigned int ext_count;
+ /* EOF for object */
+ bool ost_eof = false;
+ /* done with required mapping for this OST? */
+ bool ost_done = false;
+ int ost_index;
+ int rc = 0;
+
+ fs->fs_device_done = false;
+ /* Find out range of mapping on this stripe */
+ if ((lov_stripe_intersects(lsm, index, stripeno, &fs->fs_ext,
+ &obd_start, &obd_end)) == 0)
+ return 0;
+
+ if (lov_oinfo_is_dummy(lsme->lsme_oinfo[stripeno]))
+ return -EIO;
+
+ /* If this is a continuation FIEMAP call and we are on
+ * starting stripe then obd_start needs to be set to
+ * end_offset */
+ if (fs->fs_end_offset != 0 && stripeno == fs->fs_start_stripe)
+ obd_start = fs->fs_end_offset;
+
+ if (lov_size_to_stripe(lsm, index, fs->fs_ext.e_end, stripeno) ==
+ obd_start)
+ return 0;
+
+ req_fm_len = obd_end - obd_start + 1;
+ fs->fs_fm->fm_length = 0;
+ len_mapped_single_call = 0;
+
+ /* find lobsub object */
+ subobj = lov_find_subobj(env, cl2lov(obj), lsm,
+ lov_comp_index(index, stripeno));
+ if (IS_ERR(subobj))
+ return PTR_ERR(subobj);
+ /* If the output buffer is very large and the objects have many
+ * extents we may need to loop on a single OST repeatedly */
+ do {
+ if (fiemap->fm_extent_count > 0) {
+ /* Don't get too many extents. */
+ if (fs->fs_cur_extent + fs->fs_cnt_need >
+ fiemap->fm_extent_count)
+ fs->fs_cnt_need = fiemap->fm_extent_count -
+ fs->fs_cur_extent;
+ }
+
+ obd_start += len_mapped_single_call;
+ fs->fs_fm->fm_length = req_fm_len - len_mapped_single_call;
+ req_fm_len = fs->fs_fm->fm_length;
+ /**
+ * If we've collected enough extent map, we'd request 1 more,
+ * to see whether we coincidentally finished all available
+ * extent map, so that FIEMAP_EXTENT_LAST would be set.
+ */
+ fs->fs_fm->fm_extent_count = fs->fs_enough ?
+ 1 : fs->fs_cnt_need;
+ fs->fs_fm->fm_mapped_extents = 0;
+ fs->fs_fm->fm_flags = fiemap->fm_flags;
+
+ ost_index = lsme->lsme_oinfo[stripeno]->loi_ost_idx;
+
+ if (ost_index < 0 || ost_index >= lov->desc.ld_tgt_count)
+ GOTO(obj_put, rc = -EINVAL);
+ /* If OST is inactive, return extent with UNKNOWN flag. */
+ if (!lov->lov_tgts[ost_index]->ltd_active) {
+ fs->fs_fm->fm_flags |= FIEMAP_EXTENT_LAST;
+ fs->fs_fm->fm_mapped_extents = 1;
+
+ fm_ext[0].fe_logical = obd_start;
+ fm_ext[0].fe_length = obd_end - obd_start + 1;
+ fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+
+ goto inactive_tgt;
+ }
+
+ fs->fs_fm->fm_start = obd_start;
+ fs->fs_fm->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+ memcpy(&fmkey->lfik_fiemap, fs->fs_fm, sizeof(*fs->fs_fm));
+ *buflen = fiemap_count_to_size(fs->fs_fm->fm_extent_count);
+
+ rc = cl_object_fiemap(env, subobj, fmkey, fs->fs_fm, buflen);
+ if (rc != 0)
+ GOTO(obj_put, rc);
+inactive_tgt:
+ ext_count = fs->fs_fm->fm_mapped_extents;
+ if (ext_count == 0) {
+ ost_done = true;
+ fs->fs_device_done = true;
+ /* If last stripe has hold at the end,
+ * we need to return */
+ if (stripeno == fs->fs_last_stripe) {
+ fiemap->fm_mapped_extents = 0;
+ fs->fs_finish_stripe = true;
+ GOTO(obj_put, rc);
+ }
+ break;
+ } else if (fs->fs_enough) {
+ /*
+ * We've collected enough extents and there are
+ * more extents after it.
+ */
+ GOTO(obj_put, rc);
+ }
+
+ /* If we just need num of extents, got to next device */
+ if (fiemap->fm_extent_count == 0) {
+ fs->fs_cur_extent += ext_count;
+ break;
+ }
+
+ /* prepare to copy retrived map extents */
+ len_mapped_single_call = fm_ext[ext_count - 1].fe_logical +
+ fm_ext[ext_count - 1].fe_length -
+ obd_start;
+
+ /* Have we finished mapping on this device? */
+ if (req_fm_len <= len_mapped_single_call) {
+ ost_done = true;
+ fs->fs_device_done = true;
+ }
+
+ /* Clear the EXTENT_LAST flag which can be present on
+ * the last extent */
+ if (fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST)
+ fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST;
+ if (lov_stripe_size(lsm, index,
+ fm_ext[ext_count - 1].fe_logical +
+ fm_ext[ext_count - 1].fe_length,
+ stripeno) >= fmkey->lfik_oa.o_size) {
+ ost_eof = true;
+ fs->fs_device_done = true;
+ }
+
+ fiemap_prepare_and_copy_exts(fiemap, fm_ext, ost_index,
+ ext_count, fs->fs_cur_extent,
+ stripe_last + stripeno);
+ fs->fs_cur_extent += ext_count;
+
+ /* Ran out of available extents? */
+ if (fs->fs_cur_extent >= fiemap->fm_extent_count)
+ fs->fs_enough = true;
+ } while (!ost_done && !ost_eof);
+
+ if (stripeno == fs->fs_last_stripe)
+ fs->fs_finish_stripe = true;
+obj_put:
+ cl_object_put(env, subobj);
+
+ return rc;
+}
+
+/**
+ * Break down the FIEMAP request and send appropriate calls to individual OSTs.
+ * This also handles the restarting of FIEMAP calls in case mapping overflows
+ * the available number of extents in single call.
+ *
+ * \param env [in] lustre environment
+ * \param obj [in] file object
+ * \param fmkey [in] fiemap request header and other info
+ * \param fiemap [out] fiemap buffer holding retrived map extents
+ * \param buflen [in/out] max buffer length of @fiemap, when iterate
+ * each OST, it is used to limit max map needed
+ * \retval 0 success
+ * \retval < 0 error
+ */
+static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
+ struct ll_fiemap_info_key *fmkey,
+ struct fiemap *fiemap, size_t *buflen)
+{
+ struct lov_stripe_md_entry *lsme;
+ struct lov_stripe_md *lsm;
+ struct fiemap *fm_local = NULL;
+ loff_t whole_start;
+ loff_t whole_end;
+ int entry;
+ int start_entry = -1;
+ int end_entry;
+ int cur_stripe = 0;
+ int stripe_count;
+ unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
+ int rc = 0;
+ struct fiemap_state fs = { 0 };
+ struct lu_extent range;
+ int cur_ext;
+ int stripe_last;
+ int start_stripe = 0;
+ bool resume = false;
+ ENTRY;
+
+ lsm = lov_lsm_addref(cl2lov(obj));
+ if (lsm == NULL) {
+ /* no extent: there is no object for mapping */
+ fiemap->fm_mapped_extents = 0;
+ return 0;
+ }
+
+ if (!(fiemap->fm_flags & FIEMAP_FLAG_DEVICE_ORDER)) {
+ /**
+ * If the entry count > 1 or stripe_count > 1 and the
+ * application does not understand DEVICE_ORDER flag,
+ * it cannot interpret the extents correctly.
+ */
+ if (lsm->lsm_entry_count > 1 ||
+ (lsm->lsm_entry_count == 1 &&
+ lsm->lsm_entries[0]->lsme_stripe_count > 1))
+ GOTO(out_lsm, rc = -ENOTSUPP);
+ }
+
+ /* No support for DOM layout yet. */
+ if (lsme_is_dom(lsm->lsm_entries[0]))
+ GOTO(out_lsm, rc = -ENOTSUPP);
+
+ if (lsm->lsm_is_released) {
+ if (fiemap->fm_start < fmkey->lfik_oa.o_size) {
+ /**
+ * released file, return a minimal FIEMAP if
+ * request fits in file-size.
+ */
+ fiemap->fm_mapped_extents = 1;
+ fiemap->fm_extents[0].fe_logical = fiemap->fm_start;
+ if (fiemap->fm_start + fiemap->fm_length <
+ fmkey->lfik_oa.o_size)
+ fiemap->fm_extents[0].fe_length =
+ fiemap->fm_length;
+ else
+ fiemap->fm_extents[0].fe_length =
+ fmkey->lfik_oa.o_size -
+ fiemap->fm_start;
+ fiemap->fm_extents[0].fe_flags |=
+ FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
+ }
+ GOTO(out_lsm, rc = 0);
+ }
+
+ /* buffer_size is small to hold fm_extent_count of extents. */
+ if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size)
+ buffer_size = fiemap_count_to_size(fiemap->fm_extent_count);
+
+ OBD_ALLOC_LARGE(fm_local, buffer_size);
+ if (fm_local == NULL)
+ GOTO(out_lsm, rc = -ENOMEM);
+
+ /**
+ * Requested extent count exceeds the fiemap buffer size, shrink our
+ * ambition.
+ */
+ if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen)
+ fiemap->fm_extent_count = fiemap_size_to_count(*buflen);
+
+ fs.fs_enough = false;
+ fs.fs_cur_extent = 0;
+ fs.fs_fm = fm_local;
+ fs.fs_cnt_need = fiemap_size_to_count(buffer_size);
+
+ whole_start = fiemap->fm_start;
+ /* whole_start is beyond the end of the file */
+ if (whole_start > fmkey->lfik_oa.o_size)
+ GOTO(out_fm_local, rc = -EINVAL);
+ whole_end = (fiemap->fm_length == OBD_OBJECT_EOF) ?
+ fmkey->lfik_oa.o_size + 1 :
+ whole_start + fiemap->fm_length;
+ /**
+ * If fiemap->fm_length != OBD_OBJECT_EOF but whole_end exceeds file
+ * size
+ */
+ if (whole_end > fmkey->lfik_oa.o_size + 1)
+ whole_end = fmkey->lfik_oa.o_size + 1;
+
+ /**
+ * the high 16bits of fe_device remember which stripe the last
+ * call has been arrived, we'd continue from there in this call.
+ */
+ if (fiemap->fm_extent_count && fiemap->fm_extents[0].fe_logical)
+ resume = true;
+ stripe_last = get_fe_stripenr(&fiemap->fm_extents[0]);
+ /**
+ * stripe_last records stripe number we've been processed in the last
+ * call
+ */
+ end_entry = lsm->lsm_entry_count - 1;
+ cur_stripe = 0;
+ for (entry = 0; entry <= end_entry; entry++) {
+ lsme = lsm->lsm_entries[entry];
+ if (cur_stripe + lsme->lsme_stripe_count >= stripe_last) {
+ start_entry = entry;
+ start_stripe = stripe_last - cur_stripe;
+ break;
+ }
+
+ cur_stripe += lsme->lsme_stripe_count;
+ }
+ if (start_entry == -1) {
+ CERROR(DFID": FIEMAP does not init start entry, cur_stripe=%d, "
+ "stripe_last=%d\n", PFID(lu_object_fid(&obj->co_lu)),
+ cur_stripe, stripe_last);
+ GOTO(out_fm_local, rc = -EINVAL);
+ }
+ /**
+ * @start_entry & @start_stripe records the position of fiemap
+ * resumption @stripe_last keeps recording the absolution position
+ * we'are processing. @resume indicates we'd honor @start_stripe.
+ */
+
+ range.e_start = whole_start;
+ range.e_end = whole_end;
+
+ for (entry = start_entry; entry <= end_entry; entry++) {
+ /* remeber to update stripe_last accordingly */
+ lsme = lsm->lsm_entries[entry];
+
+ /* FLR could contain component holes between entries */
+ if (!lsme_inited(lsme)) {
+ stripe_last += lsme->lsme_stripe_count;
+ resume = false;
+ continue;
+ }
+
+ if (!lu_extent_is_overlapped(&range, &lsme->lsme_extent)) {
+ stripe_last += lsme->lsme_stripe_count;
+ resume = false;
+ continue;
+ }
+
+ /* prepare for a component entry iteration */
+ if (lsme->lsme_extent.e_start > whole_start)
+ fs.fs_ext.e_start = lsme->lsme_extent.e_start;
+ else
+ fs.fs_ext.e_start = whole_start;
+ if (lsme->lsme_extent.e_end > whole_end)
+ fs.fs_ext.e_end = whole_end;
+ else
+ fs.fs_ext.e_end = lsme->lsme_extent.e_end;
+
+ /* Calculate start stripe, last stripe and length of mapping */
+ if (resume) {
+ fs.fs_start_stripe = start_stripe;
+ /* put stripe_last to the first stripe of the comp */
+ stripe_last -= start_stripe;
+ resume = false;
+ } else {
+ fs.fs_start_stripe = lov_stripe_number(lsm, entry,
+ fs.fs_ext.e_start);
+ }
+ fs.fs_last_stripe = fiemap_calc_last_stripe(lsm, entry,
+ &fs.fs_ext, fs.fs_start_stripe,
+ &stripe_count);
+ /**
+ * A new mirror component is under process, reset
+ * fs.fs_end_offset and then fiemap_for_stripe() starts from
+ * the overlapping extent, otherwise starts from
+ * fs.fs_end_offset.
+ */
+ if (entry > start_entry && lsme->lsme_extent.e_start == 0) {
+ /* new mirror */
+ fs.fs_end_offset = 0;
+ } else {
+ fs.fs_end_offset = fiemap_calc_fm_end_offset(fiemap,
+ lsm, entry, &fs.fs_ext,
+ &fs.fs_start_stripe);
+ }
+
+ /* Check each stripe */
+ for (cur_stripe = fs.fs_start_stripe; stripe_count > 0;
+ --stripe_count,
+ cur_stripe = (cur_stripe + 1) % lsme->lsme_stripe_count) {
+ /* reset fs_finish_stripe */
+ fs.fs_finish_stripe = false;
+ rc = fiemap_for_stripe(env, obj, lsm, fiemap, buflen,
+ fmkey, entry, stripe_last,
+ cur_stripe, &fs);
+ if (rc < 0)
+ GOTO(out_fm_local, rc);
+ if (fs.fs_enough) {
+ stripe_last += cur_stripe;
+ GOTO(finish, rc);
+ }
+ if (fs.fs_finish_stripe)
+ break;
+ } /* for each stripe */
+ stripe_last += lsme->lsme_stripe_count;
+ } /* for covering layout component entry */
+
+finish:
+ if (fs.fs_cur_extent > 0)
+ cur_ext = fs.fs_cur_extent - 1;
+ else
+ cur_ext = 0;
+
+ /* done all the processing */
+ if (entry > end_entry)
+ fiemap->fm_extents[cur_ext].fe_flags |= FIEMAP_EXTENT_LAST;
+
+ /* Indicate that we are returning device offsets unless file just has
+ * single stripe */
+ if (lsm->lsm_entry_count > 1 ||
+ (lsm->lsm_entry_count == 1 &&
+ lsm->lsm_entries[0]->lsme_stripe_count > 1))
+ fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
+
+ if (fiemap->fm_extent_count == 0)
+ goto skip_last_device_calc;
+
+skip_last_device_calc:
+ fiemap->fm_mapped_extents = fs.fs_cur_extent;
+out_fm_local:
+ OBD_FREE_LARGE(fm_local, buffer_size);
+
+out_lsm:
+ lov_lsm_put(lsm);
+ return rc;
+}
+
+static int lov_object_getstripe(const struct lu_env *env, struct cl_object *obj,
+ struct lov_user_md __user *lum, size_t size)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_stripe_md *lsm;
+ int rc = 0;
+ ENTRY;
+
+ lsm = lov_lsm_addref(lov);
+ if (lsm == NULL)
+ RETURN(-ENODATA);
+
+ rc = lov_getstripe(env, cl2lov(obj), lsm, lum, size);
+ lov_lsm_put(lsm);
+ RETURN(rc);
+}
+
+static int lov_object_layout_get(const struct lu_env *env,
+ struct cl_object *obj,
+ struct cl_layout *cl)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_stripe_md *lsm = lov_lsm_addref(lov);
+ struct lu_buf *buf = &cl->cl_buf;
+ ssize_t rc;
+ ENTRY;
+
+ if (lsm == NULL) {
+ cl->cl_size = 0;
+ cl->cl_layout_gen = CL_LAYOUT_GEN_EMPTY;
+
+ RETURN(0);
+ }
+
+ cl->cl_size = lov_comp_md_size(lsm);
+ cl->cl_layout_gen = lsm->lsm_layout_gen;
+ cl->cl_is_released = lsm->lsm_is_released;
+ cl->cl_is_composite = lsm_is_composite(lsm->lsm_magic);
+
+ rc = lov_lsm_pack(lsm, buf->lb_buf, buf->lb_len);
+ lov_lsm_put(lsm);
+
+ /* return error or number of bytes */
+ RETURN(rc);
+}
+
+static loff_t lov_object_maxbytes(struct cl_object *obj)
+{
+ struct lov_object *lov = cl2lov(obj);
+ struct lov_stripe_md *lsm = lov_lsm_addref(lov);
+ loff_t maxbytes;
+
+ if (lsm == NULL)
+ return LLONG_MAX;
+
+ maxbytes = lsm->lsm_maxbytes;
+
+ lov_lsm_put(lsm);
+
+ return maxbytes;
+}
+
+static int lov_object_flush(const struct lu_env *env, struct cl_object *obj,
+ struct ldlm_lock *lock)
+{
+ return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_flush, true, env, obj,
+ lock);