+ /**
+ * primary with all OSTs are available, this is the perfect
+ * 1st pick.
+ */
+ if (lo->ldo_mirrors[index].lme_primary)
+ break;
+ } /* for all mirrors */
+
+ /* failed to pick a sound mirror, lower our expectation */
+ if (picked < 0)
+ picked = second_pick;
+ if (picked < 0)
+ picked = third_pick;
+ if (picked < 0)
+ RETURN(-ENODATA);
+
+ RETURN(picked);
+}
+
+/**
+ * figure out the components should be instantiated for resync.
+ */
+static int lod_prepare_resync(const struct lu_env *env, struct lod_object *lo,
+ struct lu_extent *extent)
+{
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lod_layout_component *lod_comp;
+ unsigned int need_sync = 0;
+ int i;
+
+ CDEBUG(D_LAYOUT,
+ DFID": instantiate all stale components in "DEXT"\n",
+ PFID(lod_object_fid(lo)), PEXT(extent));
+
+ /**
+ * instantiate all components within this extent, even non-stale
+ * components.
+ */
+ for (i = 0; i < lo->ldo_mirror_count; i++) {
+ if (!lo->ldo_mirrors[i].lme_stale)
+ continue;
+
+ lod_foreach_mirror_comp(lod_comp, lo, i) {
+ if (!lu_extent_is_overlapped(extent,
+ &lod_comp->llc_extent))
+ break;
+
+ need_sync++;
+
+ if (lod_comp_inited(lod_comp))
+ continue;
+
+ CDEBUG(D_LAYOUT, "resync instantiate %d / %d\n",
+ i, lod_comp_index(lo, lod_comp));
+ info->lti_comp_idx[info->lti_count++] =
+ lod_comp_index(lo, lod_comp);
+ }
+ }
+
+ return need_sync ? 0 : -EALREADY;
+}
+
+static int lod_declare_update_rdonly(const struct lu_env *env,
+ struct lod_object *lo, struct md_layout_change *mlc,
+ struct thandle *th)
+{
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lu_attr *layout_attr = &info->lti_layout_attr;
+ struct lod_layout_component *lod_comp;
+ struct lu_extent extent = { 0 };
+ int rc;
+ ENTRY;
+
+ LASSERT(lo->ldo_flr_state == LCM_FL_RDONLY);
+ LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE ||
+ mlc->mlc_opc == MD_LAYOUT_RESYNC);
+ LASSERT(lo->ldo_mirror_count > 0);
+
+ if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+ struct layout_intent *layout = mlc->mlc_intent;
+ int picked;
+
+ extent = layout->li_extent;
+ CDEBUG(D_LAYOUT, DFID": trying to write :"DEXT"\n",
+ PFID(lod_object_fid(lo)), PEXT(&extent));
+
+ picked = lod_primary_pick(env, lo, &extent);
+ if (picked < 0)
+ RETURN(picked);
+
+ CDEBUG(D_LAYOUT, DFID": picked mirror id %u as primary\n",
+ PFID(lod_object_fid(lo)),
+ lo->ldo_mirrors[picked].lme_id);
+
+ if (layout->li_opc == LAYOUT_INTENT_TRUNC) {
+ /**
+ * trunc transfers [0, size) in the intent extent, we'd
+ * stale components overlapping [size, eof).
+ */
+ extent.e_start = extent.e_end;
+ extent.e_end = OBD_OBJECT_EOF;
+ }
+
+ /* stale overlapping components from other mirrors */
+ lod_stale_components(lo, picked, &extent);
+
+ /* restore truncate intent extent */
+ if (layout->li_opc == LAYOUT_INTENT_TRUNC)
+ extent.e_end = extent.e_start;
+
+ /* instantiate components for the picked mirror, start from 0 */
+ extent.e_start = 0;
+
+ lod_foreach_mirror_comp(lod_comp, lo, picked) {
+ if (!lu_extent_is_overlapped(&extent,
+ &lod_comp->llc_extent))
+ break;
+
+ if (lod_comp_inited(lod_comp))
+ continue;
+
+ info->lti_comp_idx[info->lti_count++] =
+ lod_comp_index(lo, lod_comp);
+ }
+
+ lo->ldo_flr_state = LCM_FL_WRITE_PENDING;
+ } else { /* MD_LAYOUT_RESYNC */
+ int i;
+
+ /**
+ * could contain multiple non-stale mirrors, so we need to
+ * prep uninited all components assuming any non-stale mirror
+ * could be picked as the primary mirror.
+ */
+ for (i = 0; i < lo->ldo_mirror_count; i++) {
+ if (lo->ldo_mirrors[i].lme_stale)
+ continue;
+
+ lod_foreach_mirror_comp(lod_comp, lo, i) {
+ if (!lod_comp_inited(lod_comp))
+ break;
+
+ if (extent.e_end < lod_comp->llc_extent.e_end)
+ extent.e_end =
+ lod_comp->llc_extent.e_end;
+ }
+ }
+
+ rc = lod_prepare_resync(env, lo, &extent);
+ if (rc)
+ GOTO(out, rc);
+ /* change the file state to SYNC_PENDING */
+ lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
+ }
+
+ /* Reset the layout version once it's becoming too large.
+ * This way it can make sure that the layout version is
+ * monotonously increased in this writing era. */
+ lod_obj_inc_layout_gen(lo);
+ if (lo->ldo_layout_gen > (LCME_ID_MAX >> 1)) {
+ __u32 layout_version;
+
+ cfs_get_random_bytes(&layout_version, sizeof(layout_version));
+ lo->ldo_layout_gen = layout_version & 0xffff;
+ }
+
+ rc = lod_declare_instantiate_components(env, lo, th);
+ if (rc)
+ GOTO(out, rc);
+
+ layout_attr->la_valid = LA_LAYOUT_VERSION;
+ layout_attr->la_layout_version = 0; /* set current version */
+ if (mlc->mlc_opc == MD_LAYOUT_RESYNC)
+ layout_attr->la_layout_version = LU_LAYOUT_RESYNC;
+ rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
+ if (rc)
+ GOTO(out, rc);
+
+out:
+ if (rc)
+ lod_striping_free(env, lo);
+ RETURN(rc);
+}
+
+static int lod_declare_update_write_pending(const struct lu_env *env,
+ struct lod_object *lo, struct md_layout_change *mlc,
+ struct thandle *th)
+{
+ struct lod_thread_info *info = lod_env_info(env);
+ struct lu_attr *layout_attr = &info->lti_layout_attr;
+ struct lod_layout_component *lod_comp;
+ struct lu_extent extent = { 0 };
+ int primary = -1;
+ int i;
+ int rc;
+ ENTRY;
+
+ LASSERT(lo->ldo_flr_state == LCM_FL_WRITE_PENDING);
+ LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE ||
+ mlc->mlc_opc == MD_LAYOUT_RESYNC);
+
+ /* look for the primary mirror */
+ for (i = 0; i < lo->ldo_mirror_count; i++) {
+ if (lo->ldo_mirrors[i].lme_stale)
+ continue;
+
+ LASSERTF(primary < 0, DFID " has multiple primary: %u / %u",
+ PFID(lod_object_fid(lo)),
+ lo->ldo_mirrors[i].lme_id,
+ lo->ldo_mirrors[primary].lme_id);
+
+ primary = i;
+ }
+ if (primary < 0) {
+ CERROR(DFID ": doesn't have a primary mirror\n",
+ PFID(lod_object_fid(lo)));
+ GOTO(out, rc = -ENODATA);
+ }
+
+ CDEBUG(D_LAYOUT, DFID": found primary %u\n",
+ PFID(lod_object_fid(lo)), lo->ldo_mirrors[primary].lme_id);
+
+ LASSERT(!lo->ldo_mirrors[primary].lme_stale);
+
+ /* for LAYOUT_WRITE opc, it has to do the following operations:
+ * 1. stale overlapping componets from stale mirrors;
+ * 2. instantiate components of the primary mirror;
+ * 3. transfter layout version to all objects of the primary;
+ *
+ * for LAYOUT_RESYNC opc, it will do:
+ * 1. instantiate components of all stale mirrors;
+ * 2. transfer layout version to all objects to close write era. */
+
+ if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+ LASSERT(mlc->mlc_intent != NULL);
+
+ extent = mlc->mlc_intent->li_extent;
+
+ CDEBUG(D_LAYOUT, DFID": intent to write: "DEXT"\n",
+ PFID(lod_object_fid(lo)), PEXT(&extent));
+
+ if (mlc->mlc_intent->li_opc == LAYOUT_INTENT_TRUNC) {
+ /**
+ * trunc transfers [0, size) in the intent extent, we'd
+ * stale components overlapping [size, eof).
+ */
+ extent.e_start = extent.e_end;
+ extent.e_end = OBD_OBJECT_EOF;
+ }
+ /* 1. stale overlapping components */
+ lod_stale_components(lo, primary, &extent);
+
+ /* 2. find out the components need instantiating.
+ * instantiate [0, mlc->mlc_intent->e_end) */
+
+ /* restore truncate intent extent */
+ if (mlc->mlc_intent->li_opc == LAYOUT_INTENT_TRUNC)
+ extent.e_end = extent.e_start;
+ extent.e_start = 0;
+
+ lod_foreach_mirror_comp(lod_comp, lo, primary) {
+ if (!lu_extent_is_overlapped(&extent,
+ &lod_comp->llc_extent))
+ break;
+
+ if (lod_comp_inited(lod_comp))
+ continue;
+
+ CDEBUG(D_LAYOUT, "write instantiate %d / %d\n",
+ primary, lod_comp_index(lo, lod_comp));
+ info->lti_comp_idx[info->lti_count++] =
+ lod_comp_index(lo, lod_comp);
+ }
+ } else { /* MD_LAYOUT_RESYNC */
+ lod_foreach_mirror_comp(lod_comp, lo, primary) {
+ if (!lod_comp_inited(lod_comp))
+ break;
+
+ extent.e_end = lod_comp->llc_extent.e_end;
+ }
+
+ rc = lod_prepare_resync(env, lo, &extent);