int llapi_heat_set(int fd, __u64 flags);
int llapi_layout_sanity(struct llapi_layout *layout, bool incomplete, bool flr);
void llapi_layout_sanity_perror(int error);
+int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size);
/** @} llapi */
le16_to_cpu(comp->lcm_entry_count) - 1); \
entry++)
-int lod_erase_dom_stripe(struct lov_comp_md_v1 *comp_v1)
+int lod_erase_dom_stripe(struct lov_comp_md_v1 *comp_v1,
+ struct lov_comp_md_entry_v1 *dom_ent)
{
- struct lov_comp_md_entry_v1 *ent, *dom_ent;
+ struct lov_comp_md_entry_v1 *ent;
__u16 entries;
__u32 dom_off, dom_size, comp_size;
void *blob_src, *blob_dst;
return -EFBIG;
comp_size = le32_to_cpu(comp_v1->lcm_size);
- dom_ent = &comp_v1->lcm_entries[0];
dom_off = le32_to_cpu(dom_ent->lcme_offset);
dom_size = le32_to_cpu(dom_ent->lcme_size);
return -ERESTART;
}
-int lod_fix_dom_stripe(struct lod_device *d, struct lov_comp_md_v1 *comp_v1)
+int lod_fix_dom_stripe(struct lod_device *d, struct lov_comp_md_v1 *comp_v1,
+ struct lov_comp_md_entry_v1 *dom_ent)
{
- struct lov_comp_md_entry_v1 *ent, *dom_ent;
+ struct lov_comp_md_entry_v1 *ent;
struct lu_extent *dom_ext, *ext;
struct lov_user_md_v1 *lum;
__u32 stripe_size;
__u16 mid, dom_mid;
int rc = 0;
- dom_ent = &comp_v1->lcm_entries[0];
dom_ext = &dom_ent->lcme_extent;
dom_mid = mirror_id_of(le32_to_cpu(dom_ent->lcme_id));
stripe_size = d->lod_dom_max_stripesize;
if (stripe_size == 0) {
/* DoM component size is zero due to server setting,
* remove it from the layout */
- rc = lod_erase_dom_stripe(comp_v1);
+ rc = lod_erase_dom_stripe(comp_v1, dom_ent);
} else {
/* Update DoM extent end finally */
dom_ext->e_end = cpu_to_le64(stripe_size);
lum = tmp.lb_buf;
if (lov_pattern(le32_to_cpu(lum->lmm_pattern)) ==
LOV_PATTERN_MDT) {
- /* DoM component can be only the first stripe */
+ /* DoM component must be the first in a mirror */
if (le64_to_cpu(ext->e_start) > 0) {
CDEBUG(D_LAYOUT, "invalid DoM component "
"with %llu extent start\n",
"%u is bigger than MDT limit %u, check "
"dom_max_stripesize parameter\n",
stripe_size, d->lod_dom_max_stripesize);
- rc = lod_fix_dom_stripe(d, comp_v1);
+ rc = lod_fix_dom_stripe(d, comp_v1, ent);
if (rc == -ERESTART) {
/* DoM entry was removed, re-check
* new layout from start */
struct lov_comp_md_v1 *cur_lcm;
struct lov_comp_md_v1 *merge_lcm;
struct lov_comp_md_entry_v1 *lcme;
+ struct lov_mds_md_v1 *lmm;
size_t size = 0;
size_t offset;
__u16 cur_entry_count;
__u16 mirror_id = 0;
__u32 mirror_count;
int rc, i;
+ bool merge_has_dom;
+
ENTRY;
merge_lcm = mbuf->lb_buf;
}
mirror_id = mirror_id_of(id) + 1;
+
+ /* check if first entry in new layout is DOM */
+ lmm = (struct lov_mds_md_v1 *)((char *)merge_lcm +
+ merge_lcm->lcm_entries[0].lcme_offset);
+ merge_has_dom = lov_pattern(le32_to_cpu(lmm->lmm_pattern)) ==
+ LOV_PATTERN_MDT;
+
for (i = 0; i < merge_entry_count; i++) {
struct lov_comp_md_entry_v1 *merge_lcme;
*lcme = *merge_lcme;
lcme->lcme_offset = cpu_to_le32(offset);
+ if (merge_has_dom && i == 0)
+ lcme->lcme_flags |= cpu_to_le32(LCME_FL_STALE);
id = pflr_id(mirror_id, i + 1);
lcme->lcme_id = cpu_to_le32(id);
struct cl_device *mdcdev;
struct lov_oinfo *loi = NULL;
struct cl_object_conf *sconf = <i->lti_stripe_conf;
-
int rc;
__u32 idx = 0;
ENTRY;
- LASSERT(index == 0);
+ /* DOM entry may be not zero index due to FLR but must start from 0 */
+ if (unlikely(lle->lle_extent->e_start != 0)) {
+ CERROR("%s: DOM entry must be the first stripe in a mirror\n",
+ lov2obd(dev->ld_lov)->obd_name);
+ dump_lsm(D_ERROR, lov->lo_lsm);
+ RETURN(-EINVAL);
+ }
/* find proper MDS device */
rc = lov_fld_lookup(dev, fid, &idx);
int result = 0;
unsigned int seq;
int i, j;
+ bool dom_size = 0;
ENTRY;
lle->lle_comp_ops = &raid0_ops;
break;
case LOV_PATTERN_MDT:
+ /* Allowed to have several DOM stripes in different
+ * mirrors with the same DoM size.
+ */
+ if (!dom_size) {
+ dom_size = lle->lle_lsme->lsme_extent.e_end;
+ } else if (dom_size !=
+ lle->lle_lsme->lsme_extent.e_end) {
+ CERROR("%s: DOM entries with different sizes\n",
+ lov2obd(dev->ld_lov)->obd_name);
+ dump_lsm(D_ERROR, lsm);
+ RETURN(-EINVAL);
+ }
lle->lle_comp_ops = &dom_ops;
break;
default:
struct lov_layout_entry *entry;
lov_foreach_layout_entry(lov, entry)
- entry->lle_comp_ops->lco_fini(env, entry);
+ if (entry->lle_comp_ops)
+ entry->lle_comp_ops->lco_fini(env, entry);
OBD_FREE(comp->lo_entries,
comp->lo_entry_count * sizeof(*comp->lo_entries));
return 0;
}
+static int mdd_dom_data_truncate(const struct lu_env *env,
+ struct mdd_device *mdd, struct mdd_object *mo);
+
static int mdd_xattr_split(const struct lu_env *env, struct md_object *md_obj,
struct md_rejig_data *mrd)
{
struct lov_comp_md_v1 *lcm;
struct thandle *handle;
int rc;
+ bool dom_stripe = false;
+
ENTRY;
rc = lu_fid_cmp(mdo2fid(obj), mdo2fid(vic));
if (rc < 0)
GOTO(out, rc);
+ dom_stripe = mdd_lmm_dom_size(buf_vic->lb_buf) > 0;
+
rc = mdd_declare_xattr_set(env, mdd, obj, buf, XATTR_NAME_LOV,
LU_XATTR_SPLIT, handle);
if (rc)
mdd_obj_dev_name(obj), PFID(mdo2fid(obj)), rc2);
}
out:
- mdd_trans_stop(env, mdd, rc, handle);
+ rc = mdd_trans_stop(env, mdd, rc, handle);
+
+ /* Truncate local DOM data if all went well */
+ if (!rc && dom_stripe)
+ mdd_dom_data_truncate(env, mdd, obj);
+
mdd_write_unlock(env, obj);
mdd_write_unlock(env, vic);
lu_buf_free(buf_save);
struct lov_comp_md_v1 *comp_v1;
struct lov_mds_md *v1;
__u32 off;
+ bool has_dom = true;
int i;
if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_COMP_V1)
v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
/* DoM entry is the first entry always */
- if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) != LOV_PATTERN_MDT)
+ if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) != LOV_PATTERN_MDT &&
+ le16_to_cpu(comp_v1->lcm_mirror_count) == 0)
return LMM_NO_DOM;
- for (i = 1; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
+ for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
int j;
off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset);
v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
+ if (lov_pattern(le32_to_cpu(v1->lmm_pattern)) ==
+ LOV_PATTERN_MDT)
+ has_dom = true;
+
for (j = 0; j < le16_to_cpu(v1->lmm_stripe_count); j++) {
/* if there is any object on OST */
if (le32_to_cpu(v1->lmm_objects[j].l_ost_idx) !=
return LMM_DOM_OST;
}
}
- return LMM_DOM_ONLY;
+ return has_dom ? LMM_DOM_ONLY : LMM_NO_DOM;
}
static inline bool mdt_lmm_is_flr(struct lov_mds_md *lmm)
$LFS migrate -c2 $dom ||
error "failed to migrate to the new composite layout"
- [ $($LFS getstripe -L $dom) == 'mdt' ] &&
+ [ $($LFS getstripe -L $dom) != 'mdt' ] ||
error "MDT stripe was not removed"
cancel_lru_locks mdc
local new_md5=$(md5sum $dom)
- [ "$old_md5" != "$new_md5" ] &&
+ [ "$old_md5" == "$new_md5" ] ||
error "$old_md5 != $new_md5"
# Skip free space checks with ZFS
cancel_lru_locks mdc
local new_md5=$(md5sum $dom)
- [ "$old_md5" != "$new_md5" ] &&
+ [ "$old_md5" == "$new_md5" ] ||
error "$old_md5 != $new_md5"
# Skip free space checks with ZFS
}
run_test 272c "DoM migration: DOM file to the OST-striped file (composite)"
+test_272d() {
+ [ $MDS1_VERSION -lt $(version_code 2.12.55) ] &&
+ skip "Need MDS version at least 2.12.55"
+
+ local dom=$DIR/$tdir/$tfile
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1M -L mdt -E -1 -c1 $dom
+
+ local mdtidx=$($LFS getstripe -m $dom)
+ local mdtname=MDT$(printf %04x $mdtidx)
+ local facet=mds$((mdtidx + 1))
+
+ dd if=/dev/urandom of=$dom bs=2M count=1 oflag=direct ||
+ error "failed to write data into $dom"
+ local old_md5=$(md5sum $dom)
+ cancel_lru_locks mdc
+ local mdtfree1=$(do_facet $facet \
+ lctl get_param -n osd*.*$mdtname.kbytesfree)
+
+ $LFS mirror extend -N -E 2M -c1 -E -1 -c2 $dom ||
+ error "failed mirroring to the new composite layout"
+ $LFS mirror resync $dom ||
+ error "failed mirror resync"
+ $LFS mirror split --mirror-id 1 -d $dom ||
+ error "failed mirror split"
+
+ [ $($LFS getstripe -L $dom) != 'mdt' ] ||
+ error "MDT stripe was not removed"
+
+ cancel_lru_locks mdc
+ local new_md5=$(md5sum $dom)
+ [ "$old_md5" == "$new_md5" ] ||
+ error "$old_md5 != $new_md5"
+
+ # Skip free space checks with ZFS
+ if [ "$(facet_fstype $facet)" != "zfs" ]; then
+ local mdtfree2=$(do_facet $facet \
+ lctl get_param -n osd*.*$mdtname.kbytesfree)
+ [ $mdtfree2 -gt $mdtfree1 ] ||
+ error "MDS space is not freed after DOM mirror deletion"
+ fi
+ return 0
+}
+run_test 272d "DoM mirroring: OST-striped mirror to DOM file"
+
+test_272e() {
+ [ $MDS1_VERSION -lt $(version_code 2.12.55) ] &&
+ skip "Need MDS version at least 2.12.55"
+
+ local dom=$DIR/$tdir/$tfile
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -c 2 $dom
+
+ dd if=/dev/urandom of=$dom bs=512K count=1 oflag=direct ||
+ error "failed to write data into $dom"
+ local old_md5=$(md5sum $dom)
+ cancel_lru_locks mdc
+
+ $LFS mirror extend -N -E 1M -L mdt -E eof -c2 $dom ||
+ error "failed mirroring to the DOM layout"
+ $LFS mirror resync $dom ||
+ error "failed mirror resync"
+ $LFS mirror split --mirror-id 1 -d $dom ||
+ error "failed mirror split"
+
+ [ $($LFS getstripe -L $dom) != 'mdt' ] ||
+ error "MDT stripe was not removed"
+
+ cancel_lru_locks mdc
+ local new_md5=$(md5sum $dom)
+ [ "$old_md5" == "$new_md5" ] ||
+ error "$old_md5 != $new_md5"
+
+ return 0
+}
+run_test 272e "DoM mirroring: DOM mirror to the OST-striped file"
+
+test_272f() {
+ [ $MDS1_VERSION -lt $(version_code 2.12.55) ] &&
+ skip "Need MDS version at least 2.12.55"
+
+ local dom=$DIR/$tdir/$tfile
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -c 2 $dom
+
+ dd if=/dev/urandom of=$dom bs=512K count=1 oflag=direct ||
+ error "failed to write data into $dom"
+ local old_md5=$(md5sum $dom)
+ cancel_lru_locks mdc
+
+ $LFS migrate -E 1M -L mdt -E eof -c2 -v $dom ||
+ error "failed migrating to the DOM file"
+
+ cancel_lru_locks mdc
+ local new_md5=$(md5sum $dom)
+ [ "$old_md5" != "$new_md5" ] &&
+ error "$old_md5 != $new_md5"
+
+ return 0
+}
+run_test 272f "DoM migration: OST-striped file to DOM file"
+
test_273a() {
[ $MDS1_VERSION -lt $(version_code 2.11.50) ] &&
skip "Need MDS version at least 2.11.50"
static int lfs_pcc_state(int argc, char **argv);
static int lfs_pcc(int argc, char **argv);
static int lfs_pcc_list_commands(int argc, char **argv);
+static int lfs_migrate_to_dom(int fd, int fdv, char *name,
+ __u64 migration_flags,
+ struct llapi_stripe_param *param,
+ struct llapi_layout *layout);
enum setstripe_origin {
SO_SETSTRIPE,
struct llapi_stripe_param *param,
struct llapi_layout *layout)
{
+ struct llapi_layout *existing;
+ uint64_t dom_new, dom_cur;
int fd = -1;
int fdv = -1;
int rc;
if (rc < 0)
goto out;
+ rc = llapi_layout_dom_size(layout, &dom_new);
+ if (rc) {
+ error_loc = "cannot get new layout DoM size";
+ goto out;
+ }
+ /* special case for migration to DOM layout*/
+ existing = llapi_layout_get_by_fd(fd, 0);
+ if (!existing) {
+ error_loc = "cannot get existing layout";
+ goto out;
+ }
+
+ rc = llapi_layout_dom_size(existing, &dom_cur);
+ if (rc) {
+ error_loc = "cannot get current layout DoM size";
+ goto out;
+ }
+
+ /* if file has DoM layout already then migration is possible to
+ * the new layout with the same DoM component via swap layout,
+ * if new layout used bigger DOM size, then mirroring is used
+ */
+ if (dom_new > dom_cur) {
+ rc = lfs_migrate_to_dom(fd, fdv, name, migration_flags, param,
+ layout);
+ if (rc)
+ error_loc = "cannot migrate to DOM layout";
+ goto out_closed;
+ }
+
if (!(migration_flags & MIGRATION_NONBLOCK)) {
/* Blocking mode (forced if servers do not support file lease).
* It is also the default mode, since we cannot distinguish
if (fdv >= 0)
close(fdv);
-
+out_closed:
if (rc < 0)
fprintf(stderr, "error: %s: %s: %s: %s\n",
progname, name, error_loc, strerror(-rc));
return rc;
}
+static inline
+int lfs_mirror_resync_file(const char *fname, struct ll_ioc_lease *ioc,
+ __u16 *mirror_ids, int ids_nr);
+
+static int lfs_migrate_to_dom(int fd, int fdv, char *name,
+ __u64 migration_flags,
+ struct llapi_stripe_param *param,
+ struct llapi_layout *layout)
+{
+ struct ll_ioc_lease *data = NULL;
+ int rc;
+
+ rc = llapi_lease_acquire(fd, LL_LEASE_RDLCK);
+ if (rc < 0) {
+ error_loc = "cannot get lease";
+ goto out_close;
+ }
+
+ /* Atomically put lease, merge layouts, resync and close. */
+ data = calloc(1, offsetof(typeof(*data), lil_ids[1024]));
+ if (!data) {
+ error_loc = "memory allocation";
+ goto out_close;
+ }
+ data->lil_mode = LL_LEASE_UNLCK;
+ data->lil_flags = LL_LEASE_LAYOUT_MERGE;
+ data->lil_count = 1;
+ data->lil_ids[0] = fdv;
+ rc = llapi_lease_set(fd, data);
+ if (rc < 0) {
+ error_loc = "cannot merge layout";
+ goto out_close;
+ } else if (rc == 0) {
+ rc = -EBUSY;
+ error_loc = "lost lease lock";
+ goto out_close;
+ }
+ close(fd);
+ close(fdv);
+
+ rc = lfs_mirror_resync_file(name, data, NULL, 0);
+ if (rc) {
+ error_loc = "cannot resync file";
+ goto out;
+ }
+
+ /* delete first mirror now */
+ rc = mirror_split(name, 1, NULL, MF_DESTROY, NULL);
+ if (rc < 0)
+ error_loc = "cannot delete old layout";
+ goto out;
+
+out_close:
+ close(fd);
+ close(fdv);
+out:
+ if (rc < 0)
+ fprintf(stderr, "error: %s: %s: %s: %s\n",
+ progname, name, error_loc, strerror(-rc));
+ else if (migration_flags & MIGRATION_VERBOSE)
+ printf("%s\n", name);
+ if (data)
+ free(data);
+ return rc;
+}
+
/**
* Parse a string containing an target index list into an array of integers.
*
LSE_FLAGS,
LSE_DOM_EXTENSION,
LSE_DOM_EXTENSION_FOLLOWING,
- LSE_DOM_FLR,
+ LSE_DOM_FIRST,
LSE_SET_COMP_START,
LSE_NOT_ZERO_LENGTH_EXTENDABLE,
LSE_END_NOT_GREATER,
"DoM components can't be extension space",
[LSE_DOM_EXTENSION_FOLLOWING] =
"DoM components cannot be followed by extension space",
- [LSE_DOM_FLR] =
- "FLR and DoM are not supported together",
+ [LSE_DOM_FIRST] =
+ "DoM component should be the first one in a file/mirror",
[LSE_SET_COMP_START] =
"Must set previous component extent before adding next",
[LSE_NOT_ZERO_LENGTH_EXTENDABLE] =
goto out_err;
}
- /* DoM and FLR are not supported together */
- if (args->lsa_flr && first_comp) {
- args->lsa_rc = LSE_DOM_FLR;
- errno = ENOTSUP;
+ /* DoM should be the first component in a mirror */
+ if (!first_comp) {
+ args->lsa_rc = LSE_DOM_FIRST;
+ errno = EINVAL;
goto out_err;
}
}
return rc;
}
+
+int llapi_layout_dom_size(struct llapi_layout *layout, uint64_t *size)
+{
+ uint64_t pattern, start;
+ int rc;
+
+ if (!layout || !llapi_layout_is_composite(layout)) {
+ *size = 0;
+ return 0;
+ }
+
+ rc = llapi_layout_comp_use(layout, LLAPI_LAYOUT_COMP_USE_FIRST);
+ if (rc)
+ return -errno;
+
+ rc = llapi_layout_pattern_get(layout, &pattern);
+ if (rc)
+ return -errno;
+
+ if (pattern != LOV_PATTERN_MDT && pattern != LLAPI_LAYOUT_MDT) {
+ *size = 0;
+ return 0;
+ }
+
+ rc = llapi_layout_comp_extent_get(layout, &start, size);
+ if (rc)
+ return -errno;
+ if (start)
+ return -ERANGE;
+ return 0;
+}
+