+static void lov_delete_raid0(const struct lu_env *env, struct lov_object *lov,
+ struct lov_layout_entry *lle)
+{
+ struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+
+ ENTRY;
+
+ if (r0->lo_sub != NULL) {
+ int i;
+
+ for (i = 0; i < r0->lo_nr; ++i) {
+ struct lovsub_object *los = r0->lo_sub[i];
+
+ if (los != NULL) {
+ cl_object_prune(env, &los->lso_cl);
+ /*
+ * If top-level object is to be evicted from
+ * the cache, so are its sub-objects.
+ */
+ lov_subobject_kill(env, lov, r0, los, i);
+ }
+ }
+ }
+
+ EXIT;
+}
+
+static void lov_fini_raid0(const struct lu_env *env,
+ struct lov_layout_entry *lle)
+{
+ struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+
+ if (r0->lo_sub != NULL) {
+ OBD_FREE_LARGE(r0->lo_sub, r0->lo_nr * sizeof r0->lo_sub[0]);
+ r0->lo_sub = NULL;
+ }
+}
+
+static int lov_print_raid0(const struct lu_env *env, void *cookie,
+ lu_printer_t p, const struct lov_layout_entry *lle)
+{
+ const struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+ int i;
+
+ for (i = 0; i < r0->lo_nr; ++i) {
+ struct lu_object *sub;
+
+ if (r0->lo_sub[i] != NULL) {
+ sub = lovsub2lu(r0->lo_sub[i]);
+ lu_object_print(env, cookie, p, sub);
+ } else {
+ (*p)(env, cookie, "sub %d absent\n", i);
+ }
+ }
+ return 0;
+}
+
+static int lov_attr_get_raid0(const struct lu_env *env, struct lov_object *lov,
+ unsigned int index, struct lov_layout_entry *lle,
+ struct cl_attr **lov_attr)
+{
+ struct lov_layout_raid0 *r0 = &lle->lle_raid0;
+ struct lov_stripe_md *lsm = lov->lo_lsm;
+ struct ost_lvb *lvb = &lov_env_info(env)->lti_lvb;
+ struct cl_attr *attr = &r0->lo_attr;
+ __u64 kms = 0;
+ int result = 0;
+
+ if (r0->lo_attr_valid) {
+ *lov_attr = attr;
+ return 0;
+ }
+
+ memset(lvb, 0, sizeof(*lvb));
+
+ /* XXX: timestamps can be negative by sanity:test_39m,
+ * how can it be? */
+ lvb->lvb_atime = LLONG_MIN;
+ lvb->lvb_ctime = LLONG_MIN;
+ lvb->lvb_mtime = LLONG_MIN;
+
+ /*
+ * XXX that should be replaced with a loop over sub-objects,
+ * doing cl_object_attr_get() on them. But for now, let's
+ * reuse old lov code.
+ */
+
+ /*
+ * XXX take lsm spin-lock to keep lov_merge_lvb_kms()
+ * happy. It's not needed, because new code uses
+ * ->coh_attr_guard spin-lock to protect consistency of
+ * sub-object attributes.
+ */
+ lov_stripe_lock(lsm);
+ result = lov_merge_lvb_kms(lsm, index, lvb, &kms);
+ lov_stripe_unlock(lsm);
+ if (result == 0) {
+ cl_lvb2attr(attr, lvb);
+ attr->cat_kms = kms;
+ r0->lo_attr_valid = 1;
+ *lov_attr = attr;
+ }
+
+ return result;
+}
+
+static struct lov_comp_layout_entry_ops raid0_ops = {
+ .lco_init = lov_init_raid0,
+ .lco_fini = lov_fini_raid0,
+ .lco_getattr = lov_attr_get_raid0,
+};
+
+static int lov_attr_get_dom(const struct lu_env *env, struct lov_object *lov,
+ unsigned int index, struct lov_layout_entry *lle,
+ struct cl_attr **lov_attr)
+{
+ struct lov_layout_dom *dom = &lle->lle_dom;
+ struct lov_oinfo *loi = dom->lo_loi;
+ struct cl_attr *attr = &dom->lo_dom_r0.lo_attr;
+
+ if (dom->lo_dom_r0.lo_attr_valid) {
+ *lov_attr = attr;
+ return 0;
+ }
+
+ if (OST_LVB_IS_ERR(loi->loi_lvb.lvb_blocks))
+ return OST_LVB_GET_ERR(loi->loi_lvb.lvb_blocks);
+
+ cl_lvb2attr(attr, &loi->loi_lvb);
+
+ /* DoM component size can be bigger than stripe size after
+ * client's setattr RPC, so do not count anything beyond
+ * component end. Alternatively, check that limit on server
+ * and do not allow size overflow there. */
+ if (attr->cat_size > lle->lle_extent->e_end)
+ attr->cat_size = lle->lle_extent->e_end;
+
+ attr->cat_kms = attr->cat_size;
+
+ dom->lo_dom_r0.lo_attr_valid = 1;
+ *lov_attr = attr;
+
+ return 0;
+}
+
+/**
+ * Lookup FLD to get MDS index of the given DOM object FID.
+ *
+ * \param[in] ld LOV device
+ * \param[in] fid FID to lookup
+ * \param[out] nr index in MDC array to return back
+ *
+ * \retval 0 and \a mds filled with MDS index if successful
+ * \retval negative value on error
+ */
+static int lov_fld_lookup(struct lov_device *ld, const struct lu_fid *fid,
+ __u32 *nr)
+{
+ __u32 mds_idx;
+ int i, rc;
+
+ ENTRY;
+
+ rc = fld_client_lookup(&ld->ld_lmv->u.lmv.lmv_fld, fid_seq(fid),
+ &mds_idx, LU_SEQ_RANGE_MDT, NULL);
+ if (rc) {
+ CERROR("%s: error while looking for mds number. Seq %#llx"
+ ", err = %d\n", lu_dev_name(cl2lu_dev(&ld->ld_cl)),
+ fid_seq(fid), rc);
+ RETURN(rc);
+ }
+
+ CDEBUG(D_INODE, "FLD lookup got mds #%x for fid="DFID"\n",
+ mds_idx, PFID(fid));
+
+ /* find proper MDC device in the array */
+ for (i = 0; i < ld->ld_md_tgts_nr; i++) {
+ if (ld->ld_md_tgts[i].ldm_mdc != NULL &&
+ ld->ld_md_tgts[i].ldm_idx == mds_idx)
+ break;
+ }
+
+ if (i == ld->ld_md_tgts_nr) {
+ CERROR("%s: cannot find corresponding MDC device for mds #%x "
+ "for fid="DFID"\n", lu_dev_name(cl2lu_dev(&ld->ld_cl)),
+ mds_idx, PFID(fid));
+ rc = -EINVAL;
+ } else {
+ *nr = i;
+ }
+ RETURN(rc);
+}
+
+/**
+ * Implementation of lov_comp_layout_entry_ops::lco_init for DOM object.
+ *
+ * Init the DOM object for the first time. It prepares also RAID0 entry
+ * for it to use in common methods with ordinary RAID0 layout entries.
+ *
+ * \param[in] env execution environment
+ * \param[in] dev LOV device
+ * \param[in] lov LOV object
+ * \param[in] index Composite layout entry index in LSM
+ * \param[in] lle Composite LOV layout entry
+ */
+static int lov_init_dom(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov, unsigned int index,
+ const struct cl_object_conf *conf,
+ struct lov_layout_entry *lle)
+{
+ struct lov_thread_info *lti = lov_env_info(env);
+ struct lov_stripe_md_entry *lsme = lov_lse(lov, index);
+ struct cl_object *clo;
+ struct lu_object *o = lov2lu(lov);
+ const struct lu_fid *fid = lu_object_fid(o);
+ struct cl_device *mdcdev;
+ struct lov_oinfo *loi = NULL;
+ struct cl_object_conf *sconf = <i->lti_stripe_conf;
+
+ int rc;
+ __u32 idx = 0;
+
+ ENTRY;
+
+ LASSERT(index == 0);
+
+ /* find proper MDS device */
+ rc = lov_fld_lookup(dev, fid, &idx);
+ if (rc)
+ RETURN(rc);
+
+ LASSERTF(dev->ld_md_tgts[idx].ldm_mdc != NULL,
+ "LOV md target[%u] is NULL\n", idx);
+
+ /* check lsm is DOM, more checks are needed */
+ LASSERT(lsme->lsme_stripe_count == 0);
+
+ /*
+ * Create lower cl_objects.
+ */
+ mdcdev = dev->ld_md_tgts[idx].ldm_mdc;
+
+ LASSERTF(mdcdev != NULL, "non-initialized mdc subdev\n");
+
+ /* DoM object has no oinfo in LSM entry, create it exclusively */
+ OBD_SLAB_ALLOC_PTR_GFP(loi, lov_oinfo_slab, GFP_NOFS);
+ if (loi == NULL)
+ RETURN(-ENOMEM);
+
+ fid_to_ostid(lu_object_fid(lov2lu(lov)), &loi->loi_oi);
+
+ sconf->u.coc_oinfo = loi;
+again:
+ clo = lov_sub_find(env, mdcdev, fid, sconf);
+ if (IS_ERR(clo))
+ GOTO(out, rc = PTR_ERR(clo));
+
+ rc = lov_init_sub(env, lov, clo, loi, lov_comp_index(index, 0));
+ if (rc == -EAGAIN) /* try again */
+ goto again;
+ else if (rc != 0)
+ GOTO(out, rc);
+
+ lle->lle_dom.lo_dom = cl2lovsub(clo);
+ spin_lock_init(&lle->lle_dom.lo_dom_r0.lo_sub_lock);
+ lle->lle_dom.lo_dom_r0.lo_nr = 1;
+ lle->lle_dom.lo_dom_r0.lo_sub = &lle->lle_dom.lo_dom;
+ lle->lle_dom.lo_loi = loi;
+
+ rc = lov_page_slice_fixup(lov, clo);
+ RETURN(rc);
+
+out:
+ if (loi != NULL)
+ OBD_SLAB_FREE_PTR(loi, lov_oinfo_slab);
+ return rc;
+}
+
+/**
+ * Implementation of lov_layout_operations::llo_fini for DOM object.
+ *
+ * Finish the DOM object and free related memory.
+ *
+ * \param[in] env execution environment
+ * \param[in] lov LOV object
+ * \param[in] state LOV layout state
+ */
+static void lov_fini_dom(const struct lu_env *env,
+ struct lov_layout_entry *lle)
+{
+ if (lle->lle_dom.lo_dom != NULL)
+ lle->lle_dom.lo_dom = NULL;
+ if (lle->lle_dom.lo_loi != NULL)
+ OBD_SLAB_FREE_PTR(lle->lle_dom.lo_loi, lov_oinfo_slab);
+}
+
+static struct lov_comp_layout_entry_ops dom_ops = {
+ .lco_init = lov_init_dom,
+ .lco_fini = lov_fini_dom,
+ .lco_getattr = lov_attr_get_dom,
+};
+
+static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
+ const struct cl_object_conf *conf,
+ union lov_layout_state *state)
+{
+ struct lov_layout_composite *comp = &state->composite;
+ struct lov_layout_entry *lle;
+ struct lov_mirror_entry *lre;
+ unsigned int entry_count;
+ unsigned int psz = 0;
+ unsigned int mirror_count;
+ int flr_state = lsm->lsm_flags & LCM_FL_FLR_MASK;
+ int result = 0;
+ unsigned int seq;
+ int i, j;
+
+ ENTRY;
+
+ LASSERT(lsm->lsm_entry_count > 0);
+ LASSERT(lov->lo_lsm == NULL);
+ lov->lo_lsm = lsm_addref(lsm);
+ lov->lo_layout_invalid = true;
+
+ dump_lsm(D_INODE, lsm);
+
+ entry_count = lsm->lsm_entry_count;
+
+ spin_lock_init(&comp->lo_write_lock);
+ comp->lo_flags = lsm->lsm_flags;
+ comp->lo_mirror_count = lsm->lsm_mirror_count + 1;
+ comp->lo_entry_count = lsm->lsm_entry_count;
+ comp->lo_preferred_mirror = -1;
+
+ if (equi(flr_state == LCM_FL_NONE, comp->lo_mirror_count > 1))
+ RETURN(-EINVAL);
+
+ OBD_ALLOC(comp->lo_mirrors,
+ comp->lo_mirror_count * sizeof(*comp->lo_mirrors));
+ if (comp->lo_mirrors == NULL)
+ RETURN(-ENOMEM);
+
+ OBD_ALLOC(comp->lo_entries, entry_count * sizeof(*comp->lo_entries));
+ if (comp->lo_entries == NULL)
+ RETURN(-ENOMEM);
+
+ /* Initiate all entry types and extents data at first */
+ for (i = 0, j = 0, mirror_count = 1; i < entry_count; i++) {
+ int mirror_id = 0;
+
+ lle = &comp->lo_entries[i];
+
+ lle->lle_lsme = lsm->lsm_entries[i];
+ lle->lle_type = lov_entry_type(lle->lle_lsme);
+ switch (lle->lle_type) {
+ case LOV_PATTERN_RAID0:
+ lle->lle_comp_ops = &raid0_ops;
+ break;
+ case LOV_PATTERN_MDT:
+ lle->lle_comp_ops = &dom_ops;
+ break;
+ default:
+ CERROR("%s: unknown composite layout entry type %i\n",
+ lov2obd(dev->ld_lov)->obd_name,
+ lsm->lsm_entries[i]->lsme_pattern);
+ dump_lsm(D_ERROR, lsm);
+ RETURN(-EIO);
+ }
+
+ lle->lle_extent = &lle->lle_lsme->lsme_extent;
+ lle->lle_valid = !(lle->lle_lsme->lsme_flags & LCME_FL_STALE);
+
+ if (flr_state != LCM_FL_NONE)
+ mirror_id = mirror_id_of(lle->lle_lsme->lsme_id);
+
+ lre = &comp->lo_mirrors[j];
+ if (i > 0) {
+ if (mirror_id == lre->lre_mirror_id) {
+ lre->lre_valid |= lle->lle_valid;
+ lre->lre_stale |= !lle->lle_valid;
+ lre->lre_end = i;
+ continue;
+ }
+
+ /* new mirror detected, assume that the mirrors
+ * are shorted in layout */
+ ++mirror_count;
+ ++j;
+ if (j >= comp->lo_mirror_count)
+ break;
+
+ lre = &comp->lo_mirrors[j];
+ }
+
+ /* entries must be sorted by mirrors */
+ lre->lre_mirror_id = mirror_id;
+ lre->lre_start = lre->lre_end = i;
+ lre->lre_preferred = !!(lle->lle_lsme->lsme_flags &
+ LCME_FL_PREF_RD);
+ lre->lre_valid = lle->lle_valid;
+ lre->lre_stale = !lle->lle_valid;
+ }
+
+ /* sanity check for FLR */
+ if (mirror_count != comp->lo_mirror_count) {
+ CDEBUG(D_INODE, DFID
+ " doesn't have the # of mirrors it claims, %u/%u\n",
+ PFID(lu_object_fid(lov2lu(lov))), mirror_count,
+ comp->lo_mirror_count + 1);
+
+ GOTO(out, result = -EINVAL);
+ }
+
+ lov_foreach_layout_entry(lov, lle) {
+ int index = lov_layout_entry_index(lov, lle);
+
+ /**
+ * If the component has not been init-ed on MDS side, for
+ * PFL layout, we'd know that the components beyond this one
+ * will be dynamically init-ed later on file write/trunc ops.
+ */
+ if (!lsme_inited(lle->lle_lsme))
+ continue;
+
+ result = lle->lle_comp_ops->lco_init(env, dev, lov, index,
+ conf, lle);
+ if (result < 0)
+ break;
+
+ LASSERT(ergo(psz > 0, psz == result));
+ psz = result;
+ }
+
+ if (psz > 0)
+ cl_object_header(&lov->lo_cl)->coh_page_bufsize += psz;
+
+ /* decide the preferred mirror. It uses the hash value of lov_object
+ * so that different clients would use different mirrors for read. */
+ mirror_count = 0;
+ seq = hash_long((unsigned long)lov, 8);
+ for (i = 0; i < comp->lo_mirror_count; i++) {
+ unsigned int idx = (i + seq) % comp->lo_mirror_count;
+
+ lre = lov_mirror_entry(lov, idx);
+ if (lre->lre_stale)
+ continue;
+
+ mirror_count++; /* valid mirror */
+
+ if (lre->lre_preferred || comp->lo_preferred_mirror < 0)
+ comp->lo_preferred_mirror = idx;
+ }
+ if (!mirror_count) {
+ CDEBUG(D_INODE, DFID
+ " doesn't have any valid mirrors\n",
+ PFID(lu_object_fid(lov2lu(lov))));
+
+ comp->lo_preferred_mirror = 0;
+ }
+
+ LASSERT(comp->lo_preferred_mirror >= 0);
+
+ EXIT;
+out:
+ return result > 0 ? 0 : result;
+}
+
+static int lov_init_empty(const struct lu_env *env, struct lov_device *dev,
+ struct lov_object *lov, struct lov_stripe_md *lsm,
+ const struct cl_object_conf *conf,
+ union lov_layout_state *state)
+{
+ return 0;
+}
+
+static int lov_init_released(const struct lu_env *env,
+ struct lov_device *dev, struct lov_object *lov,
+ struct lov_stripe_md *lsm,
+ const struct cl_object_conf *conf,
+ union lov_layout_state *state)
+{
+ LASSERT(lsm != NULL);
+ LASSERT(lsm->lsm_is_released);
+ LASSERT(lov->lo_lsm == NULL);
+
+ lov->lo_lsm = lsm_addref(lsm);
+ return 0;
+}
+
+static int lov_init_foreign(const struct lu_env *env,
+ struct lov_device *dev, struct lov_object *lov,
+ struct lov_stripe_md *lsm,
+ const struct cl_object_conf *conf,