Whamcloud - gitweb
LU-8773 llite: refactor lov_object_fiemap()
[fs/lustre-release.git] / lustre / lov / lov_object.c
index f731146..7ee0ebc 100644 (file)
  *
  * You should have received a copy of the GNU General Public License
  * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
  *
  * GPL HEADER END
  */
@@ -27,7 +23,7 @@
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2015, Intel Corporation.
+ * Copyright (c) 2011, 2016, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -80,8 +76,6 @@ struct lov_layout_operations {
                             struct cl_object *obj, struct cl_io *io);
         int  (*llo_getattr)(const struct lu_env *env, struct cl_object *obj,
                             struct cl_attr *attr);
-       int  (*llo_find_cbdata)(const struct lu_env *env, struct cl_object *obj,
-                               ldlm_iterator_t iter, void *data);
 };
 
 static int lov_layout_wait(const struct lu_env *env, struct lov_object *lov);
@@ -197,8 +191,8 @@ static int lov_init_sub(const struct lu_env *env, struct lov_object *lov,
                }
 
                LU_OBJECT_DEBUG(mask, env, &stripe->co_lu,
-                               "stripe %d is already owned.\n", idx);
-               LU_OBJECT_DEBUG(mask, env, old_obj, "owned.\n");
+                               "stripe %d is already owned.", idx);
+               LU_OBJECT_DEBUG(mask, env, old_obj, "owned.");
                LU_OBJECT_HEADER(mask, env, lov2lu(lov), "try to own.\n");
                cl_object_put(env, stripe);
        }
@@ -273,6 +267,12 @@ static int lov_init_raid0(const struct lu_env *env, struct lov_device *dev,
                        if (result != 0)
                                GOTO(out, result);
 
+                       if (dev->ld_target[ost_idx] == NULL) {
+                               CERROR("%s: OST %04x is not initialized\n",
+                                      lov2obd(dev->ld_lov)->obd_name, ost_idx);
+                               GOTO(out, result = -EIO);
+                       }
+
                        subdev = lovsub2cl_dev(dev->ld_target[ost_idx]);
                        subconf->u.coc_oinfo = oinfo;
                        LASSERTF(subdev != NULL, "not init ost %d\n", ost_idx);
@@ -593,37 +593,6 @@ static int lov_attr_get_raid0(const struct lu_env *env, struct cl_object *obj,
        RETURN(result);
 }
 
-static int lov_find_cbdata_empty(const struct lu_env *env,
-                                struct cl_object *obj, ldlm_iterator_t iter,
-                                void *data)
-{
-       return 0;
-}
-
-static int lov_find_cbdata_raid0(const struct lu_env *env,
-                                struct cl_object *obj, ldlm_iterator_t iter,
-                                void *data)
-{
-       struct lov_object       *lov = cl2lov(obj);
-       struct lov_layout_raid0 *r0 = lov_r0(lov);
-       struct cl_object        *subobj;
-       int                     i;
-       int                     rc = 0;
-
-       for (i = 0; i < r0->lo_nr; ++i) {
-               if (r0->lo_sub[i] == NULL)
-                       continue;
-
-               subobj = lovsub2cl(r0->lo_sub[i]);
-
-               rc = cl_object_find_cbdata(env, subobj, iter, data);
-               if (rc != 0)
-                       break;
-       }
-
-       return rc;
-}
-
 const static struct lov_layout_operations lov_dispatch[] = {
         [LLT_EMPTY] = {
                 .llo_init      = lov_init_empty,
@@ -635,7 +604,6 @@ const static struct lov_layout_operations lov_dispatch[] = {
                 .llo_lock_init = lov_lock_init_empty,
                 .llo_io_init   = lov_io_init_empty,
                .llo_getattr   = lov_attr_get_empty,
-               .llo_find_cbdata = lov_find_cbdata_empty
         },
         [LLT_RAID0] = {
                 .llo_init      = lov_init_raid0,
@@ -647,7 +615,6 @@ const static struct lov_layout_operations lov_dispatch[] = {
                 .llo_lock_init = lov_lock_init_raid0,
                 .llo_io_init   = lov_io_init_raid0,
                .llo_getattr   = lov_attr_get_raid0,
-               .llo_find_cbdata = lov_find_cbdata_raid0
        },
         [LLT_RELEASED] = {
                 .llo_init      = lov_init_released,
@@ -659,21 +626,20 @@ const static struct lov_layout_operations lov_dispatch[] = {
                 .llo_lock_init = lov_lock_init_empty,
                 .llo_io_init   = lov_io_init_released,
                .llo_getattr   = lov_attr_get_empty,
-               .llo_find_cbdata = lov_find_cbdata_empty
         }
 };
 
 /**
  * Performs a double-dispatch based on the layout type of an object.
  */
-#define LOV_2DISPATCH_NOLOCK(obj, op, ...)                              \
-({                                                                      \
-        struct lov_object                      *__obj = (obj);          \
-        enum lov_layout_type                    __llt;                  \
-                                                                        \
-        __llt = __obj->lo_type;                                         \
-        LASSERT(0 <= __llt && __llt < ARRAY_SIZE(lov_dispatch));        \
-        lov_dispatch[__llt].op(__VA_ARGS__);                            \
+#define LOV_2DISPATCH_NOLOCK(obj, op, ...)             \
+({                                                     \
+       struct lov_object *__obj = (obj);               \
+       enum lov_layout_type __llt;                     \
+                                                       \
+       __llt = __obj->lo_type;                         \
+       LASSERT(__llt < ARRAY_SIZE(lov_dispatch));      \
+       lov_dispatch[__llt].op(__VA_ARGS__);            \
 })
 
 /**
@@ -690,12 +656,16 @@ static enum lov_layout_type lov_type(struct lov_stripe_md *lsm)
 
 static inline void lov_conf_freeze(struct lov_object *lov)
 {
+       CDEBUG(D_INODE, "To take share lov(%p) owner %p/%p\n",
+               lov, lov->lo_owner, current);
        if (lov->lo_owner != current)
                down_read(&lov->lo_type_guard);
 }
 
 static inline void lov_conf_thaw(struct lov_object *lov)
 {
+       CDEBUG(D_INODE, "To release share lov(%p) owner %p/%p\n",
+               lov, lov->lo_owner, current);
        if (lov->lo_owner != current)
                up_read(&lov->lo_type_guard);
 }
@@ -727,7 +697,7 @@ do {                                                                    \
                                                                         \
        lov_conf_freeze(__obj);                                         \
         __llt = __obj->lo_type;                                         \
-        LASSERT(0 <= __llt && __llt < ARRAY_SIZE(lov_dispatch));        \
+       LASSERT(__llt < ARRAY_SIZE(lov_dispatch));                      \
         lov_dispatch[__llt].op(__VA_ARGS__);                            \
        lov_conf_thaw(__obj);                                           \
 } while (0)
@@ -738,10 +708,14 @@ static void lov_conf_lock(struct lov_object *lov)
        down_write(&lov->lo_type_guard);
        LASSERT(lov->lo_owner == NULL);
        lov->lo_owner = current;
+       CDEBUG(D_INODE, "Took exclusive lov(%p) owner %p\n",
+               lov, lov->lo_owner);
 }
 
 static void lov_conf_unlock(struct lov_object *lov)
 {
+       CDEBUG(D_INODE, "To release exclusive lov(%p) owner %p\n",
+               lov, lov->lo_owner);
        lov->lo_owner = NULL;
        up_write(&lov->lo_type_guard);
 }
@@ -770,22 +744,19 @@ static int lov_layout_change(const struct lu_env *unused,
        union lov_layout_state *state = &lov->u;
        const struct lov_layout_operations *old_ops;
        const struct lov_layout_operations *new_ops;
-       void *cookie;
+       struct lov_device *lov_dev = lov_object_dev(lov);
        struct lu_env *env;
-       int refcheck;
+       __u16 refcheck;
        int rc;
        ENTRY;
 
-       LASSERT(0 <= lov->lo_type && lov->lo_type < ARRAY_SIZE(lov_dispatch));
+       LASSERT(lov->lo_type < ARRAY_SIZE(lov_dispatch));
 
-       cookie = cl_env_reenter();
        env = cl_env_get(&refcheck);
-       if (IS_ERR(env)) {
-               cl_env_reexit(cookie);
+       if (IS_ERR(env))
                RETURN(PTR_ERR(env));
-       }
 
-       LASSERT(0 <= llt && llt < ARRAY_SIZE(lov_dispatch));
+       LASSERT(llt < ARRAY_SIZE(lov_dispatch));
 
        CDEBUG(D_INODE, DFID" from %s to %s\n",
               PFID(lu_object_fid(lov2lu(lov))),
@@ -806,14 +777,21 @@ static int lov_layout_change(const struct lu_env *unused,
 
        LASSERT(atomic_read(&lov->lo_active_ios) == 0);
 
+       CDEBUG(D_INODE, DFID "Apply new layout lov %p, type %d\n",
+              PFID(lu_object_fid(lov2lu(lov))), lov, llt);
+
        lov->lo_type = LLT_EMPTY;
 
        /* page bufsize fixup */
        cl_object_header(&lov->lo_cl)->coh_page_bufsize -=
                lov_page_slice_fixup(lov, NULL);
 
-       rc = new_ops->llo_init(env, lov_object_dev(lov), lov, lsm, conf, state);
+       rc = new_ops->llo_init(env, lov_dev, lov, lsm, conf, state);
        if (rc != 0) {
+               struct obd_device *obd = lov2obd(lov_dev->ld_lov);
+
+               CERROR("%s: cannot apply new layout on "DFID" : rc = %d\n",
+                      obd->obd_name, PFID(lu_object_fid(lov2lu(lov))), rc);
                new_ops->llo_delete(env, lov, state);
                new_ops->llo_fini(env, lov, state);
                /* this file becomes an EMPTY file. */
@@ -825,8 +803,6 @@ static int lov_layout_change(const struct lu_env *unused,
 
 out:
        cl_env_put(env, &refcheck);
-       cl_env_reexit(cookie);
-
        RETURN(rc);
 }
 
@@ -979,6 +955,11 @@ int lov_io_init(const struct lu_env *env, struct cl_object *obj,
                struct cl_io *io)
 {
        CL_IO_SLICE_CLEAN(lov_env_io(env), lis_cl);
+
+       CDEBUG(D_INODE, DFID "io %p type %d ignore/verify layout %d/%d\n",
+              PFID(lu_object_fid(&obj->co_lu)), io, io->ci_type,
+              io->ci_ignore_layout, io->ci_verify_layout);
+
        return LOV_2DISPATCH_MAYLOCK(cl2lov(obj), llo_io_init,
                                     !io->ci_ignore_layout, env, obj, io);
 }
@@ -1031,12 +1012,12 @@ int lov_lock_init(const struct lu_env *env, struct cl_object *obj,
  * \retval last_stripe         return the last stripe of the mapping
  */
 static int fiemap_calc_last_stripe(struct lov_stripe_md *lsm,
-                                  loff_t fm_start, loff_t fm_end,
+                                  u64 fm_start, u64 fm_end,
                                   int start_stripe, int *stripe_count)
 {
        int last_stripe;
-       loff_t obd_start;
-       loff_t obd_end;
+       u64 obd_start;
+       u64 obd_end;
        int i, j;
 
        if (fm_end - fm_start > lsm->lsm_stripe_size * lsm->lsm_stripe_count) {
@@ -1104,15 +1085,15 @@ static void fiemap_prepare_and_copy_exts(struct fiemap *fiemap,
  * \param fm_end [in]          logical end of mapping
  * \param start_stripe [out]   starting stripe will be returned in this
  */
-static loff_t fiemap_calc_fm_end_offset(struct fiemap *fiemap,
-                                       struct lov_stripe_md *lsm,
-                                       loff_t fm_start, loff_t fm_end,
-                                       int *start_stripe)
+static u64 fiemap_calc_fm_end_offset(struct fiemap *fiemap,
+                                    struct lov_stripe_md *lsm,
+                                    u64 fm_start, u64 fm_end,
+                                    int *start_stripe)
 {
-       loff_t local_end = fiemap->fm_extents[0].fe_logical;
-       loff_t lun_start;
-       loff_t lun_end;
-       loff_t fm_end_offset;
+       u64 local_end = fiemap->fm_extents[0].fe_logical;
+       u64 lun_start;
+       u64 lun_end;
+       u64 fm_end_offset;
        int stripe_no = -1;
        int i;
 
@@ -1153,6 +1134,188 @@ static loff_t fiemap_calc_fm_end_offset(struct fiemap *fiemap,
        return fm_end_offset;
 }
 
+struct fiemap_state {
+       struct fiemap   *fs_fm;
+       u64             fs_start;
+       u64             fs_length;
+       u64             fs_end;
+       u64             fs_end_offset;
+       int             fs_cur_extent;
+       int             fs_cnt_need;
+       int             fs_start_stripe;
+       int             fs_last_stripe;
+       bool            fs_device_done;
+       bool            fs_finish;
+       bool            fs_enough;
+};
+
+int fiemap_for_stripe(const struct lu_env *env, struct cl_object *obj,
+                     struct lov_stripe_md *lsm,
+                     struct fiemap *fiemap, size_t *buflen,
+                     struct ll_fiemap_info_key *fmkey, int stripeno,
+                     struct fiemap_state *fs)
+{
+       struct cl_object *subobj;
+       struct lov_obd *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
+       struct fiemap_extent *fm_ext = &fs->fs_fm->fm_extents[0];
+       u64 req_fm_len; /* Stores length of required mapping */
+       u64 len_mapped_single_call;
+       u64 lun_start;
+       u64 lun_end;
+       u64 obd_object_end;
+       unsigned int ext_count;
+       /* EOF for object */
+       bool ost_eof = false;
+       /* done with required mapping for this OST? */
+       bool ost_done = false;
+       int ost_index;
+       int rc = 0;
+
+       fs->fs_device_done = false;
+       /* Find out range of mapping on this stripe */
+       if ((lov_stripe_intersects(lsm, stripeno, fs->fs_start, fs->fs_end,
+                                  &lun_start, &obd_object_end)) == 0)
+               return 0;
+
+       if (lov_oinfo_is_dummy(lsm->lsm_oinfo[stripeno]))
+               return -EIO;
+
+       /* If this is a continuation FIEMAP call and we are on
+        * starting stripe then lun_start needs to be set to
+        * end_offset */
+       if (fs->fs_end_offset != 0 && stripeno == fs->fs_start_stripe)
+               lun_start = fs->fs_end_offset;
+
+       lun_end = fs->fs_length;
+       if (lun_end != ~0ULL) {
+               /* Handle fs->fs_start + fs->fs_length overflow */
+               if (fs->fs_start + fs->fs_length < fs->fs_start)
+                       fs->fs_length = ~0ULL - fs->fs_start;
+               lun_end = lov_size_to_stripe(lsm, fs->fs_start + fs->fs_length,
+                                            stripeno);
+       }
+
+       if (lun_start == lun_end)
+               return 0;
+
+       req_fm_len = obd_object_end - lun_start;
+       fs->fs_fm->fm_length = 0;
+       len_mapped_single_call = 0;
+
+       /* find lobsub object */
+       subobj = lov_find_subobj(env, cl2lov(obj), lsm, stripeno);
+       if (IS_ERR(subobj))
+               return PTR_ERR(subobj);
+       /* If the output buffer is very large and the objects have many
+        * extents we may need to loop on a single OST repeatedly */
+       do {
+               if (fiemap->fm_extent_count > 0) {
+                       /* Don't get too many extents. */
+                       if (fs->fs_cur_extent + fs->fs_cnt_need >
+                           fiemap->fm_extent_count)
+                               fs->fs_cnt_need = fiemap->fm_extent_count -
+                                                 fs->fs_cur_extent;
+               }
+
+               lun_start += len_mapped_single_call;
+               fs->fs_fm->fm_length = req_fm_len - len_mapped_single_call;
+               req_fm_len = fs->fs_fm->fm_length;
+               fs->fs_fm->fm_extent_count = fs->fs_enough ?
+                                            1 : fs->fs_cnt_need;
+               fs->fs_fm->fm_mapped_extents = 0;
+               fs->fs_fm->fm_flags = fiemap->fm_flags;
+
+               ost_index = lsm->lsm_oinfo[stripeno]->loi_ost_idx;
+
+               if (ost_index < 0 || ost_index >= lov->desc.ld_tgt_count)
+                       GOTO(obj_put, rc = -EINVAL);
+               /* If OST is inactive, return extent with UNKNOWN flag. */
+               if (!lov->lov_tgts[ost_index]->ltd_active) {
+                       fs->fs_fm->fm_flags |= FIEMAP_EXTENT_LAST;
+                       fs->fs_fm->fm_mapped_extents = 1;
+
+                       fm_ext[0].fe_logical = lun_start;
+                       fm_ext[0].fe_length = obd_object_end - lun_start;
+                       fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
+
+                       goto inactive_tgt;
+               }
+
+               fs->fs_fm->fm_start = lun_start;
+               fs->fs_fm->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
+               memcpy(&fmkey->lfik_fiemap, fs->fs_fm, sizeof(*fs->fs_fm));
+               *buflen = fiemap_count_to_size(fs->fs_fm->fm_extent_count);
+
+               rc = cl_object_fiemap(env, subobj, fmkey, fs->fs_fm, buflen);
+               if (rc != 0)
+                       GOTO(obj_put, rc);
+inactive_tgt:
+               ext_count = fs->fs_fm->fm_mapped_extents;
+               if (ext_count == 0) {
+                       ost_done = true;
+                       fs->fs_device_done = true;
+                       /* If last stripe has hold at the end,
+                        * we need to return */
+                       if (stripeno == fs->fs_last_stripe) {
+                               fiemap->fm_mapped_extents = 0;
+                               fs->fs_finish = true;
+                               GOTO(obj_put, rc);
+                       }
+                       break;
+               } else if (fs->fs_enough) {
+                       /*
+                        * We've collected enough extents and there are
+                        * more extents after it.
+                        */
+                       fs->fs_finish = true;
+                       GOTO(obj_put, rc);
+               }
+
+               /* If we just need num of extents, got to next device */
+               if (fiemap->fm_extent_count == 0) {
+                       fs->fs_cur_extent += ext_count;
+                       break;
+               }
+
+               /* prepare to copy retrived map extents */
+               len_mapped_single_call = fm_ext[ext_count - 1].fe_logical +
+                                        fm_ext[ext_count - 1].fe_length -
+                                        lun_start;
+
+               /* Have we finished mapping on this device? */
+               if (req_fm_len <= len_mapped_single_call) {
+                       ost_done = true;
+                       fs->fs_device_done = true;
+               }
+
+               /* Clear the EXTENT_LAST flag which can be present on
+                * the last extent */
+               if (fm_ext[ext_count - 1].fe_flags & FIEMAP_EXTENT_LAST)
+                       fm_ext[ext_count - 1].fe_flags &= ~FIEMAP_EXTENT_LAST;
+               if (lov_stripe_size(lsm, fm_ext[ext_count - 1].fe_logical +
+                                        fm_ext[ext_count - 1].fe_length,
+                                   stripeno) >= fmkey->lfik_oa.o_size) {
+                       ost_eof = true;
+                       fs->fs_device_done = true;
+               }
+
+               fiemap_prepare_and_copy_exts(fiemap, fm_ext, ost_index,
+                                            ext_count, fs->fs_cur_extent);
+               fs->fs_cur_extent += ext_count;
+
+               /* Ran out of available extents? */
+               if (fs->fs_cur_extent >= fiemap->fm_extent_count)
+                       fs->fs_enough = true;
+       } while (!ost_done && !ost_eof);
+
+       if (stripeno == fs->fs_last_stripe)
+               fs->fs_finish = true;
+obj_put:
+       cl_object_put(env, subobj);
+
+       return rc;
+}
+
 /**
  * Break down the FIEMAP request and send appropriate calls to individual OSTs.
  * This also handles the restarting of FIEMAP calls in case mapping overflows
@@ -1172,30 +1335,12 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
                             struct fiemap *fiemap, size_t *buflen)
 {
        struct lov_stripe_md    *lsm;
-       struct cl_object        *subobj = NULL;
-       struct lov_obd          *lov = lu2lov_dev(obj->co_lu.lo_dev)->ld_lov;
        struct fiemap           *fm_local = NULL;
-       struct fiemap_extent    *lcl_fm_ext;
-       loff_t                  fm_start;
-       loff_t                  fm_end;
-       loff_t                  fm_length;
-       loff_t                  fm_end_offset;
-       int                     count_local;
-       int                     ost_index = 0;
-       int                     start_stripe;
-       int                     current_extent = 0;
-       int                     rc = 0;
-       int                     last_stripe;
-       int                     cur_stripe = 0;
-       int                     cur_stripe_wrap = 0;
+       int                     cur_stripe;
        int                     stripe_count;
        unsigned int            buffer_size = FIEMAP_BUFFER_SIZE;
-       /* Whether have we collected enough extents */
-       bool                    enough = false;
-       /* EOF for object */
-       bool                    ost_eof = false;
-       /* done with required mapping for this OST? */
-       bool                    ost_done = false;
+       int                     rc = 0;
+       struct fiemap_state fs = { 0 };
        ENTRY;
 
        lsm = lov_lsm_addref(cl2lov(obj));
@@ -1208,7 +1353,7 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
         */
        if (lsm->lsm_stripe_count > 1 && !(fiemap->fm_flags &
                                           FIEMAP_FLAG_DEVICE_ORDER))
-               GOTO(out, rc = -ENOTSUPP);
+               GOTO(out_lsm, rc = -ENOTSUPP);
 
        if (lsm_is_released(lsm)) {
                if (fiemap->fm_start < fmkey->lfik_oa.o_size) {
@@ -1229,7 +1374,7 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
                        fiemap->fm_extents[0].fe_flags |=
                                FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_LAST;
                }
-               GOTO(out, rc = 0);
+               GOTO(out_lsm, rc = 0);
        }
 
        if (fiemap_count_to_size(fiemap->fm_extent_count) < buffer_size)
@@ -1237,26 +1382,35 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
 
        OBD_ALLOC_LARGE(fm_local, buffer_size);
        if (fm_local == NULL)
-               GOTO(out, rc = -ENOMEM);
-       lcl_fm_ext = &fm_local->fm_extents[0];
-       count_local = fiemap_size_to_count(buffer_size);
+               GOTO(out_lsm, rc = -ENOMEM);
+
+       fs.fs_fm = fm_local;
+       fs.fs_cnt_need = fiemap_size_to_count(buffer_size);
 
-       fm_start = fiemap->fm_start;
-       fm_length = fiemap->fm_length;
+       fs.fs_start = fiemap->fm_start;
+       /* fs.fs_start is beyond the end of the file */
+       if (fs.fs_start > fmkey->lfik_oa.o_size)
+               GOTO(out_fm_local, rc = -EINVAL);
+
+       fs.fs_length = fiemap->fm_length;
        /* Calculate start stripe, last stripe and length of mapping */
-       start_stripe = lov_stripe_number(lsm, fm_start);
-       fm_end = (fm_length == ~0ULL) ? fmkey->lfik_oa.o_size :
-                                       fm_start + fm_length - 1;
-       /* If fm_length != ~0ULL but fm_start_fm_length-1 exceeds file size */
-       if (fm_end > fmkey->lfik_oa.o_size)
-               fm_end = fmkey->lfik_oa.o_size;
-
-       last_stripe = fiemap_calc_last_stripe(lsm, fm_start, fm_end,
-                                             start_stripe, &stripe_count);
-       fm_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fm_start, fm_end,
-                                                 &start_stripe);
-       if (fm_end_offset == -EINVAL)
-               GOTO(out, rc = -EINVAL);
+       fs.fs_start_stripe = lov_stripe_number(lsm, fs.fs_start);
+       fs.fs_end = (fs.fs_length == ~0ULL) ? fmkey->lfik_oa.o_size :
+                                             fs.fs_start + fs.fs_length - 1;
+       /* If fs_length != ~0ULL but fs_start+fs_length-1 exceeds file size */
+       if (fs.fs_end > fmkey->lfik_oa.o_size) {
+               fs.fs_end = fmkey->lfik_oa.o_size;
+               fs.fs_length = fs.fs_end - fs.fs_start;
+       }
+
+       fs.fs_last_stripe = fiemap_calc_last_stripe(lsm, fs.fs_start, fs.fs_end,
+                                                   fs.fs_start_stripe,
+                                                   &stripe_count);
+       fs.fs_end_offset = fiemap_calc_fm_end_offset(fiemap, lsm, fs.fs_start,
+                                                    fs.fs_end,
+                                                    &fs.fs_start_stripe);
+       if (fs.fs_end_offset == -EINVAL)
+               GOTO(out_fm_local, rc = -EINVAL);
 
        /**
         * Requested extent count exceeds the fiemap buffer size, shrink our
@@ -1265,171 +1419,24 @@ static int lov_object_fiemap(const struct lu_env *env, struct cl_object *obj,
        if (fiemap_count_to_size(fiemap->fm_extent_count) > *buflen)
                fiemap->fm_extent_count = fiemap_size_to_count(*buflen);
        if (fiemap->fm_extent_count == 0)
-               count_local = 0;
+               fs.fs_cnt_need = 0;
+
+       fs.fs_finish = false;
+       fs.fs_enough = false;
+       fs.fs_cur_extent = 0;
 
        /* Check each stripe */
-       for (cur_stripe = start_stripe; stripe_count > 0;
+       for (cur_stripe = fs.fs_start_stripe; stripe_count > 0;
             --stripe_count,
             cur_stripe = (cur_stripe + 1) % lsm->lsm_stripe_count) {
-               loff_t req_fm_len; /* Stores length of required mapping */
-               loff_t len_mapped_single_call;
-               loff_t lun_start;
-               loff_t lun_end;
-               loff_t obd_object_end;
-               unsigned int ext_count;
-
-               cur_stripe_wrap = cur_stripe;
-
-               /* Find out range of mapping on this stripe */
-               if ((lov_stripe_intersects(lsm, cur_stripe, fm_start, fm_end,
-                                          &lun_start, &obd_object_end)) == 0)
-                       continue;
-
-               if (lov_oinfo_is_dummy(lsm->lsm_oinfo[cur_stripe]))
-                       GOTO(out, rc = -EIO);
-
-               /* If this is a continuation FIEMAP call and we are on
-                * starting stripe then lun_start needs to be set to
-                * fm_end_offset */
-               if (fm_end_offset != 0 && cur_stripe == start_stripe)
-                       lun_start = fm_end_offset;
-
-               if (fm_length != ~0ULL) {
-                       /* Handle fm_start + fm_length overflow */
-                       if (fm_start + fm_length < fm_start)
-                               fm_length = ~0ULL - fm_start;
-                       lun_end = lov_size_to_stripe(lsm, fm_start + fm_length,
-                                                    cur_stripe);
-               } else {
-                       lun_end = ~0ULL;
-               }
-
-               if (lun_start == lun_end)
-                       continue;
-
-               req_fm_len = obd_object_end - lun_start;
-               fm_local->fm_length = 0;
-               len_mapped_single_call = 0;
-
-               /* find lobsub object */
-               subobj = lov_find_subobj(env, cl2lov(obj), lsm,
-                                            cur_stripe);
-               if (IS_ERR(subobj))
-                       GOTO(out, rc = PTR_ERR(subobj));
-               /* If the output buffer is very large and the objects have many
-                * extents we may need to loop on a single OST repeatedly */
-               ost_eof = false;
-               ost_done = false;
-               do {
-                       if (fiemap->fm_extent_count > 0) {
-                               /* Don't get too many extents. */
-                               if (current_extent + count_local >
-                                   fiemap->fm_extent_count)
-                                       count_local = fiemap->fm_extent_count -
-                                                     current_extent;
-                       }
-
-                       lun_start += len_mapped_single_call;
-                       fm_local->fm_length = req_fm_len -
-                                             len_mapped_single_call;
-                       req_fm_len = fm_local->fm_length;
-                       fm_local->fm_extent_count = enough ? 1 : count_local;
-                       fm_local->fm_mapped_extents = 0;
-                       fm_local->fm_flags = fiemap->fm_flags;
-
-                       ost_index = lsm->lsm_oinfo[cur_stripe]->loi_ost_idx;
-
-                       if (ost_index < 0 ||
-                           ost_index >= lov->desc.ld_tgt_count)
-                               GOTO(obj_put, rc = -EINVAL);
-                       /* If OST is inactive, return extent with UNKNOWN
-                        * flag. */
-                       if (!lov->lov_tgts[ost_index]->ltd_active) {
-                               fm_local->fm_flags |= FIEMAP_EXTENT_LAST;
-                               fm_local->fm_mapped_extents = 1;
-
-                               lcl_fm_ext[0].fe_logical = lun_start;
-                               lcl_fm_ext[0].fe_length = obd_object_end -
-                                                         lun_start;
-                               lcl_fm_ext[0].fe_flags |= FIEMAP_EXTENT_UNKNOWN;
-
-                               goto inactive_tgt;
-                       }
-
-                       fm_local->fm_start = lun_start;
-                       fm_local->fm_flags &= ~FIEMAP_FLAG_DEVICE_ORDER;
-                       memcpy(&fmkey->lfik_fiemap, fm_local,
-                              sizeof(*fm_local));
-                       *buflen = fiemap_count_to_size(
-                                               fm_local->fm_extent_count);
-
-                       rc = cl_object_fiemap(env, subobj, fmkey, fm_local,
-                                             buflen);
-                       if (rc != 0)
-                               GOTO(obj_put, rc);
-inactive_tgt:
-                       ext_count = fm_local->fm_mapped_extents;
-                       if (ext_count == 0) {
-                               ost_done = true;
-                               /* If last stripe has hold at the end,
-                                * we need to return */
-                               if (cur_stripe_wrap == last_stripe) {
-                                       fiemap->fm_mapped_extents = 0;
-                                       goto finish;
-                               }
-                               break;
-                       } else if (enough) {
-                               /*
-                                * We've collected enough extents and there are
-                                * more extents after it.
-                                */
-                               goto finish;
-                       }
-
-                       /* If we just need num of extents, got to next device */
-                       if (fiemap->fm_extent_count == 0) {
-                               current_extent += ext_count;
-                               break;
-                       }
-
-                       /* prepare to copy retrived map extents */
-                       len_mapped_single_call =
-                               lcl_fm_ext[ext_count - 1].fe_logical -
-                               lun_start + lcl_fm_ext[ext_count - 1].fe_length;
-
-                       /* Have we finished mapping on this device? */
-                       if (req_fm_len <= len_mapped_single_call)
-                               ost_done = true;
-
-                       /* Clear the EXTENT_LAST flag which can be present on
-                        * the last extent */
-                       if (lcl_fm_ext[ext_count - 1].fe_flags &
-                           FIEMAP_EXTENT_LAST)
-                               lcl_fm_ext[ext_count - 1].fe_flags &=
-                                                       ~FIEMAP_EXTENT_LAST;
-                       if (lov_stripe_size(lsm,
-                                       lcl_fm_ext[ext_count - 1].fe_logical +
-                                       lcl_fm_ext[ext_count - 1].fe_length,
-                                       cur_stripe) >= fmkey->lfik_oa.o_size)
-                               ost_eof = true;
-
-                       fiemap_prepare_and_copy_exts(fiemap, lcl_fm_ext,
-                                                    ost_index, ext_count,
-                                                    current_extent);
-                       current_extent += ext_count;
-
-                       /* Ran out of available extents? */
-                       if (current_extent >= fiemap->fm_extent_count)
-                               enough = true;
-               } while (!ost_done && !ost_eof);
-
-               cl_object_put(env, subobj);
-               subobj = NULL;
-
-               if (cur_stripe_wrap == last_stripe)
-                       goto finish;
+               rc = fiemap_for_stripe(env, obj, lsm, fiemap, buflen, fmkey,
+                                      cur_stripe, &fs);
+               if (rc < 0)
+                       GOTO(out_fm_local, rc);
+               if (fs.fs_finish)
+                       break;
        } /* for each stripe */
-finish:
+
        /* Indicate that we are returning device offsets unless file just has
         * single stripe */
        if (lsm->lsm_stripe_count > 1)
@@ -1440,18 +1447,15 @@ finish:
 
        /* Check if we have reached the last stripe and whether mapping for that
         * stripe is done. */
-       if ((cur_stripe_wrap == last_stripe) && (ost_done || ost_eof))
-               fiemap->fm_extents[current_extent - 1].fe_flags |=
+       if ((cur_stripe == fs.fs_last_stripe) && fs.fs_device_done)
+               fiemap->fm_extents[fs.fs_cur_extent - 1].fe_flags |=
                                                             FIEMAP_EXTENT_LAST;
 skip_last_device_calc:
-       fiemap->fm_mapped_extents = current_extent;
-obj_put:
-       if (subobj != NULL)
-               cl_object_put(env, subobj);
-out:
-       if (fm_local != NULL)
-               OBD_FREE_LARGE(fm_local, buffer_size);
+       fiemap->fm_mapped_extents = fs.fs_cur_extent;
+out_fm_local:
+       OBD_FREE_LARGE(fm_local, buffer_size);
 
+out_lsm:
        lov_lsm_put(lsm);
 
        return rc;
@@ -1516,18 +1520,6 @@ static loff_t lov_object_maxbytes(struct cl_object *obj)
        return maxbytes;
 }
 
-static int lov_object_find_cbdata(const struct lu_env *env,
-                                 struct cl_object *obj, ldlm_iterator_t iter,
-                                 void *data)
-{
-       int rc;
-       ENTRY;
-
-       /* call cl_object_find_cbdata for sub obj */
-       rc = LOV_2DISPATCH(cl2lov(obj), llo_find_cbdata, env, obj, iter, data);
-       RETURN(rc);
-}
-
 static const struct cl_object_operations lov_ops = {
        .coo_page_init    = lov_page_init,
        .coo_lock_init    = lov_lock_init,
@@ -1538,7 +1530,6 @@ static const struct cl_object_operations lov_ops = {
        .coo_getstripe    = lov_object_getstripe,
        .coo_layout_get   = lov_object_layout_get,
        .coo_maxbytes     = lov_object_maxbytes,
-       .coo_find_cbdata  = lov_object_find_cbdata,
        .coo_fiemap       = lov_object_fiemap,
 };