+#define FIEMAP_BUFFER_SIZE 4096
+
+/**
+ * Non-zero fe_logical indicates that this is a continuation FIEMAP
+ * call. The local end offset and the device are sent in the first
+ * fm_extent. This function calculates the stripe number from the index.
+ * This function returns a stripe_no on which mapping is to be restarted.
+ *
+ * This function returns fm_end_offset which is the in-OST offset at which
+ * mapping should be restarted. If fm_end_offset=0 is returned then caller
+ * will re-calculate proper offset in next stripe.
+ * Note that the first extent is passed to lov_get_info via the value field.
+ *
+ * \param fiemap fiemap request header
+ * \param lsm striping information for the file
+ * \param fm_start logical start of mapping
+ * \param fm_end logical end of mapping
+ * \param start_stripe starting stripe will be returned in this
+ */
+obd_size fiemap_calc_fm_end_offset(struct ll_user_fiemap *fiemap,
+ struct lov_stripe_md *lsm, obd_size fm_start,
+ obd_size fm_end, int *start_stripe)
+{
+ obd_size local_end = fiemap->fm_extents[0].fe_logical;
+ obd_off lun_start, lun_end;
+ obd_size fm_end_offset;
+ int stripe_no = -1, i;
+
+ if (fiemap->fm_extent_count == 0 ||
+ fiemap->fm_extents[0].fe_logical == 0)
+ return 0;
+
+ /* Find out stripe_no from ost_index saved in the fe_device */
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ if (lsm->lsm_oinfo[i]->loi_ost_idx ==
+ fiemap->fm_extents[0].fe_device) {
+ stripe_no = i;
+ break;
+ }
+ }
+
+ /* If we have finished mapping on previous device, shift logical
+ * offset to start of next device */
+ if ((lov_stripe_intersects(lsm, stripe_no, fm_start, fm_end,
+ &lun_start, &lun_end)) != 0 &&
+ local_end < lun_end) {
+ fm_end_offset = local_end;
+ *start_stripe = stripe_no;
+ } else {
+ /* This is a special value to indicate that caller should
+ * calculate offset in next stripe. */
+ fm_end_offset = 0;
+ *start_stripe = (stripe_no + 1) % lsm->lsm_stripe_count;
+ }
+
+ return fm_end_offset;
+}
+
+/**
+ * We calculate on which OST the mapping will end. If the length of mapping
+ * is greater than (stripe_size * stripe_count) then the last_stripe will
+ * will be one just before start_stripe. Else we check if the mapping
+ * intersects each OST and find last_stripe.
+ * This function returns the last_stripe and also sets the stripe_count
+ * over which the mapping is spread
+ *
+ * \param lsm striping information for the file
+ * \param fm_start logical start of mapping
+ * \param fm_end logical end of mapping
+ * \param start_stripe starting stripe of the mapping
+ * \param stripe_count the number of stripes across which to map is returned
+ *
+ * \retval last_stripe return the last stripe of the mapping
+ */
+int fiemap_calc_last_stripe(struct lov_stripe_md *lsm, obd_size fm_start,
+ obd_size fm_end, int start_stripe,
+ int *stripe_count)
+{
+ int last_stripe;
+ obd_off obd_start, obd_end;
+ int i, j;
+
+ if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
+ last_stripe = (start_stripe < 1 ? lsm->lsm_stripe_count - 1 :
+ start_stripe - 1);
+ *stripe_count = lsm->lsm_stripe_count;
+ } else {
+ for (j = 0, i = start_stripe; j < lsm->lsm_stripe_count;
+ i = (i + 1) % lsm->lsm_stripe_count, j++) {
+ if ((lov_stripe_intersects(lsm, i, fm_start, fm_end,
+ &obd_start, &obd_end)) == 0)
+ break;
+ }
+ *stripe_count = j;
+ last_stripe = (start_stripe + j - 1) %lsm->lsm_stripe_count;
+ }
+
+ return last_stripe;
+}
+
+/**
+ * Set fe_device and copy extents from local buffer into main return buffer.
+ *
+ * \param fiemap fiemap request header
+ * \param lcl_fm_ext array of local fiemap extents to be copied
+ * \param ost_index OST index to be written into the fm_device field for each
+ extent
+ * \param ext_count number of extents to be copied
+ * \param current_extent where to start copying in main extent array
+ */
+void fiemap_prepare_and_copy_exts(struct ll_user_fiemap *fiemap,
+ struct ll_fiemap_extent *lcl_fm_ext,
+ int ost_index, unsigned int ext_count,
+ int current_extent)
+{
+ char *to;
+ int ext;
+
+ for (ext = 0; ext < ext_count; ext++) {
+ lcl_fm_ext[ext].fe_device = ost_index;
+ lcl_fm_ext[ext].fe_flags |= FIEMAP_EXTENT_NET;
+ }
+
+ /* Copy fm_extent's from fm_local to return buffer */
+ to = (char *)fiemap + fiemap_count_to_size(current_extent);
+ memcpy(to, lcl_fm_ext, ext_count * sizeof(struct ll_fiemap_extent));
+}
+
+/**
+ * Break down the FIEMAP request and send appropriate calls to individual OSTs.
+ * This also handles the restarting of FIEMAP calls in case mapping overflows
+ * the available number of extents in single call.
+ */
+static int lov_fiemap(struct lov_obd *lov, __u32 keylen, void *key,
+ __u32 *vallen, void *val, struct lov_stripe_md *lsm)
+{
+ struct ll_fiemap_info_key *fm_key = key;
+ struct ll_user_fiemap *fiemap = val;
+ struct ll_user_fiemap *fm_local = NULL;
+ struct ll_fiemap_extent *lcl_fm_ext;
+ int count_local;
+ unsigned int get_num_extents = 0;
+ int ost_index = 0, actual_start_stripe, start_stripe;
+ obd_size fm_start, fm_end, fm_length, fm_end_offset = 0;
+ obd_size curr_loc;
+ int current_extent = 0, rc = 0, i;
+ int ost_eof = 0; /* EOF for object */
+ int ost_done = 0; /* done with required mapping for this OST? */
+ int last_stripe;
+ int cur_stripe = 0, cur_stripe_wrap = 0, stripe_count;
+ unsigned int buffer_size = FIEMAP_BUFFER_SIZE;
+
+ if (lsm == NULL)
+ GOTO(out, rc = 0);
+
+ if (fiemap_count_to_size(fm_key->fiemap.fm_extent_count) < buffer_size)
+ buffer_size = fiemap_count_to_size(fm_key->fiemap.fm_extent_count);
+
+ OBD_ALLOC(fm_local, buffer_size);
+ if (fm_local == NULL)
+ GOTO(out, rc = -ENOMEM);
+ lcl_fm_ext = &fm_local->fm_extents[0];
+
+ count_local = fiemap_size_to_count(buffer_size);
+
+ memcpy(fiemap, &fm_key->fiemap, sizeof(*fiemap));
+ fm_start = fiemap->fm_start;
+ fm_length = fiemap->fm_length;
+ /* Calculate start stripe, last stripe and length of mapping */
+ actual_start_stripe = start_stripe = lov_stripe_number(lsm, fm_start);
+ fm_end = (fm_length == ~0ULL ? fm_key->oa.o_size :
+ fm_start + fm_length - 1);
+ /* If fm_length != ~0ULL but fm_start+fm_length-1 exceeds file size */
+ if (fm_end > fm_key->oa.o_size)
+ fm_end = fm_key->oa.o_size;
+
+ last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
+ actual_start_stripe, &stripe_count);
+
+ fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end,
+ &start_stripe);
+
+ if (fiemap->fm_extent_count == 0) {
+ get_num_extents = 1;
+ count_local = 0;
+ }
+
+ /* Check each stripe */
+ for (cur_stripe = start_stripe, i = 0; i < stripe_count;
+ i++, cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
+ obd_size req_fm_len; /* Stores length of required mapping */
+ obd_size len_mapped_single_call;
+ obd_off lun_start, lun_end, obd_object_end;
+ unsigned int ext_count;
+
+ cur_stripe_wrap = cur_stripe;
+
+ /* Find out range of mapping on this stripe */
+ if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
+ &lun_start, &obd_object_end)) == 0)
+ continue;
+
+ /* If this is a continuation FIEMAP call and we are on
+ * starting stripe then lun_start needs to be set to
+ * fm_end_offset */
+ if (fm_end_offset != 0 && cur_stripe == start_stripe)
+ lun_start = fm_end_offset;
+
+ if (fm_length != ~0ULL) {
+ /* Handle fm_start + fm_length overflow */
+ if (fm_start + fm_length < fm_start)
+ fm_length = ~0ULL - fm_start;
+ lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
+ cur_stripe);
+ } else {
+ lun_end = ~0ULL;
+ }
+
+ if (lun_start == lun_end)
+ continue;
+
+ req_fm_len = obd_object_end - lun_start;
+ fm_local->fm_length = 0;
+ len_mapped_single_call = 0;
+
+ /* If the output buffer is very large and the objects have many
+ * extents we may need to loop on a single OST repeatedly */
+ ost_eof = 0;
+ ost_done = 0;
+ do {
+ if (get_num_extents == 0) {
+ /* Don't get too many extents. */
+ if (current_extent + count_local >
+ fiemap->fm_extent_count)
+ count_local = fiemap->fm_extent_count -
+ current_extent;
+ }
+
+ lun_start += len_mapped_single_call;
+ fm_local->fm_length = req_fm_len - len_mapped_single_call;
+ req_fm_len = fm_local->fm_length;
+ fm_local->fm_extent_count = count_local;
+ fm_local->fm_mapped_extents = 0;
+ fm_local->fm_flags = fiemap->fm_flags;
+
+ fm_key->oa.o_id = lsm->lsm_oinfo[cur_stripe]->loi_id;
+ ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;
+
+ if (ost_index < 0 || ost_index >=lov->desc.ld_tgt_count)
+ GOTO(out, rc = -EINVAL);
+
+ /* If OST is inactive, return extent with UNKNOWN flag */
+ if (lov && !lov->lov_tgts[ost_index]->ltd_active) {
+ fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
+ fm_local->fm_mapped_extents = 1;
+
+ lcl_fm_ext[0].fe_logical = lun_start;
+ lcl_fm_ext[0].fe_length = obd_object_end -
+ lun_start;
+ lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+
+ goto inactive_tgt;
+ }
+
+ fm_local->fm_start = lun_start;
+ fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+ memcpy(&fm_key->fiemap, fm_local, sizeof(*fm_local));
+ *vallen=fiemap_count_to_size(fm_local->fm_extent_count);
+ rc = obd_get_info(lov->lov_tgts[ost_index]->ltd_exp,
+ keylen, key, vallen, fm_local, lsm);
+ if (rc != 0)
+ GOTO(out, rc);
+
+inactive_tgt:
+ ext_count = fm_local->fm_mapped_extents;
+ if (ext_count == 0) {
+ ost_done = 1;
+ /* If last stripe has hole at the end,
+ * then we need to return */
+ if (cur_stripe_wrap == last_stripe) {
+ fiemap->fm_mapped_extents = 0;
+ goto finish;
+ }
+ break;
+ }
+
+ /* If we just need num of extents then go to next device */
+ if (get_num_extents) {
+ current_extent += ext_count;
+ break;
+ }
+
+ len_mapped_single_call = lcl_fm_ext[ext_count-1].fe_logical -
+ lun_start + lcl_fm_ext[ext_count - 1].fe_length;
+
+ /* Have we finished mapping on this device? */
+ if (req_fm_len <= len_mapped_single_call)
+ ost_done = 1;
+
+ /* Clear the EXTENT_LAST flag which can be present on
+ * last extent */
+ if (lcl_fm_ext[ext_count-1].fe_flags & FIEMAP_EXTENT_LAST)
+ lcl_fm_ext[ext_count - 1].fe_flags &=
+ ~FIEMAP_EXTENT_LAST;
+
+ curr_loc = lov_stripe_size(lsm,
+ lcl_fm_ext[ext_count - 1].fe_logical+
+ lcl_fm_ext[ext_count - 1].fe_length,
+ cur_stripe);
+ if (curr_loc >= fm_key->oa.o_size)
+ ost_eof = 1;
+
+ fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
+ ost_index, ext_count,
+ current_extent);
+
+ current_extent += ext_count;
+
+ /* Ran out of available extents? */
+ if (current_extent >= fiemap->fm_extent_count)
+ goto finish;
+ } while (ost_done == 0 && ost_eof == 0);
+
+ if (cur_stripe_wrap == last_stripe)
+ goto finish;
+ }
+
+finish:
+ /* Indicate that we are returning device offsets unless file just has
+ * single stripe */
+ if (lsm->lsm_stripe_count > 1)
+ fiemap->fm_flags |= FIEMAP_FLAG_DEVICE_ORDER;
+
+ if (get_num_extents)
+ goto skip_last_device_calc;
+
+ /* Check if we have reached the last stripe and whether mapping for that
+ * stripe is done. */
+ if (cur_stripe_wrap == last_stripe) {
+ if (ost_done || ost_eof)
+ fiemap->fm_extents[current_extent - 1].fe_flags |=
+ FIEMAP_EXTENT_LAST;
+ }
+
+skip_last_device_calc:
+ fiemap->fm_mapped_extents = current_extent;
+
+out:
+ OBD_FREE(fm_local, buffer_size);
+ return rc;
+}
+