From aea66b7d3fdc37d61704bd5df2a25a4747a6cce5 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Mon, 25 Oct 2021 16:45:29 +0800 Subject: [PATCH] LU-14642 flr: abolish MDS transfer layout version to OST Quit setting layout version to OST object from MDS, and client write request will carry the new layout version and OST object rejects old layout version write and update new layout version accordingly. Signed-off-by: Bobi Jam Change-Id: I655044f69a4509a2b0cfe99f86de2ce4ee846979 --- lustre/include/obd_support.h | 2 +- lustre/include/uapi/linux/lustre/lustre_user.h | 13 ++- lustre/llite/file.c | 28 ++--- lustre/lod/lod_internal.h | 4 +- lustre/lod/lod_object.c | 106 ------------------- lustre/lod/lod_qos.c | 1 + lustre/mdd/mdd_object.c | 2 +- lustre/ofd/ofd_internal.h | 17 +++ lustre/ofd/ofd_io.c | 28 ++--- lustre/ofd/ofd_objects.c | 16 +-- lustre/osp/osp_sync.c | 7 -- lustre/tests/sanity-flr.sh | 137 ++++++++++++++++++++++--- 12 files changed, 176 insertions(+), 185 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 4322ae2..7990639 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -683,7 +683,7 @@ extern char obd_jobid_var[]; /* FLR */ #define OBD_FAIL_FLR_GLIMPSE_IMMUTABLE 0x1A00 #define OBD_FAIL_FLR_LV_DELAY 0x1A01 -#define OBD_FAIL_FLR_LV_INC 0x1A02 +#define OBD_FAIL_FLR_LV_INC 0x1A02 /* not used */ #define OBD_FAIL_FLR_RANDOM_PICK_MIRROR 0x1A03 /* DT */ diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index aefead0..138a935 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -886,10 +886,6 @@ enum lov_comp_md_entry_flags { #define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \ LCME_FL_EXTENSION) -/* the highest bit in obdo::o_layout_version is used to mark if the file is - * being resynced. */ -#define LU_LAYOUT_RESYNC LCME_FL_NEG - /* lcme_id can be specified as certain flags, and the the first * bit of lcme_id is used to indicate that the ID is representing * certain LCME_FL_* but not a real ID. Which implies we can have @@ -901,6 +897,15 @@ enum lcme_id { LCME_ID_NOT_ID = LCME_FL_NEG }; +/* layout version equals to lcme_id, except some bits have special meanings */ +enum layout_version_flags { + /* layout version reaches the high water mark to be increased to + * circularly reuse the smallest value */ + LU_LAYOUT_HIGEN = 0x40000000, + /* the highest bit is used to mark if the file is being resynced */ + LU_LAYOUT_RESYNC = 0x80000000, +}; + #define LCME_ID_MASK LCME_ID_MAX struct lov_comp_md_entry_v1 { diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 28d5226..5284b92 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -3624,6 +3624,7 @@ static long ll_file_unlock_lease(struct file *file, struct ll_ioc_lease *ioc, bool lease_broken = false; fmode_t fmode = 0; enum mds_op_bias bias = 0; + int fdv; struct file *layout_file = NULL; void *data = NULL; size_t data_size = 0; @@ -3660,16 +3661,15 @@ static long ll_file_unlock_lease(struct file *file, struct ll_ioc_lease *ioc, bias = MDS_CLOSE_RESYNC_DONE; break; case LL_LEASE_LAYOUT_MERGE: { - int fd; if (ioc->lil_count != 1) GOTO(out_lease_close, rc = -EINVAL); arg += sizeof(*ioc); - if (copy_from_user(&fd, (void __user *)arg, sizeof(__u32))) + if (copy_from_user(&fdv, (void __user *)arg, sizeof(__u32))) GOTO(out_lease_close, rc = -EFAULT); - layout_file = fget(fd); + layout_file = fget(fdv); if (!layout_file) GOTO(out_lease_close, rc = -EBADF); @@ -3682,7 +3682,6 @@ static long ll_file_unlock_lease(struct file *file, struct ll_ioc_lease *ioc, break; } case LL_LEASE_LAYOUT_SPLIT: { - int fdv; int mirror_id; if (ioc->lil_count != 2) @@ -3753,28 +3752,23 @@ out_lease_close: EXIT; out: - switch (ioc->lil_flags) { - case LL_LEASE_RESYNC_DONE: - if (data) - OBD_FREE(data, data_size); - break; - case LL_LEASE_LAYOUT_MERGE: - case LL_LEASE_LAYOUT_SPLIT: - if (layout_file) - fput(layout_file); + if (ioc->lil_flags == LL_LEASE_RESYNC_DONE && data) + OBD_FREE(data, data_size); - ll_layout_refresh(inode, &fd->fd_layout_version); - break; - case LL_LEASE_PCC_ATTACH: + if (layout_file) + fput(layout_file); + + if (ioc->lil_flags == LL_LEASE_PCC_ATTACH) { if (!rc) rc = rc2; rc = pcc_readwrite_attach_fini(file, inode, param.pa_layout_gen, lease_broken, rc, attached); - break; } + ll_layout_refresh(inode, &fd->fd_layout_version); + if (!rc) rc = ll_lease_type_from_fmode(fmode); RETURN(rc); diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 61d5199..77987e7 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -390,8 +390,10 @@ static inline void lod_obj_inc_layout_gen(struct lod_object *lo) lo->ldo_layout_gen++; lo->ldo_layout_gen |= preserve; /* Zero is not a valid generation */ - if (unlikely((lo->ldo_layout_gen & LCME_ID_MASK) == 0)) + if (unlikely((lo->ldo_layout_gen & LCME_ID_MASK) == 0)) { lo->ldo_layout_gen++; + lo->ldo_layout_gen &= ~LU_LAYOUT_RESYNC; + } } struct lod_it { diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 1003ad1..999dfc8 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -3277,7 +3277,6 @@ static int lod_declare_layout_merge(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); - struct lu_attr *layout_attr = &info->lti_layout_attr; struct lu_buf *buf = &info->lti_buf; struct lod_object *lo = lod_dt_obj(dt); struct lov_comp_md_v1 *lcm; @@ -3424,20 +3423,6 @@ static int lod_declare_layout_merge(const struct lu_env *env, lod_obj_inc_layout_gen(lo); lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen); - /* transfer layout version to OST objects. */ - if (lo->ldo_mirror_count > 1) { - struct lod_obj_stripe_cb_data data = { {0} }; - - layout_attr->la_valid = LA_LAYOUT_VERSION; - layout_attr->la_layout_version = 0; - data.locd_attr = layout_attr; - data.locd_declare = true; - data.locd_stripe_cb = lod_obj_stripe_attr_set_cb; - rc = lod_obj_for_each_stripe(env, lo, th, &data); - if (rc) - GOTO(out, rc); - } - rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf, XATTR_NAME_LOV, LU_XATTR_REPLACE, th); @@ -3453,8 +3438,6 @@ static int lod_declare_layout_split(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *mbuf, struct thandle *th) { - struct lod_thread_info *info = lod_env_info(env); - struct lu_attr *layout_attr = &info->lti_layout_attr; struct lod_object *lo = lod_dt_obj(dt); struct lov_comp_md_v1 *lcm = mbuf->lb_buf; int rc; @@ -3468,21 +3451,6 @@ static int lod_declare_layout_split(const struct lu_env *env, /* fix on-disk layout gen */ lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen); - - /* transfer layout version to OST objects. */ - if (lo->ldo_mirror_count > 1) { - struct lod_obj_stripe_cb_data data = { {0} }; - - layout_attr->la_valid = LA_LAYOUT_VERSION; - layout_attr->la_layout_version = 0; - data.locd_attr = layout_attr; - data.locd_declare = true; - data.locd_stripe_cb = lod_obj_stripe_attr_set_cb; - rc = lod_obj_for_each_stripe(env, lo, th, &data); - if (rc) - RETURN(rc); - } - rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), mbuf, XATTR_NAME_LOV, LU_XATTR_REPLACE, th); RETURN(rc); @@ -4783,9 +4751,7 @@ static int lod_xattr_set(const struct lu_env *env, const char *name, int fl, struct thandle *th) { struct dt_object *next = dt_object_child(dt); - struct lu_attr *layout_attr = &lod_env_info(env)->lti_layout_attr; struct lod_object *lo = lod_dt_obj(dt); - struct lod_obj_stripe_cb_data data = { {0} }; int rc = 0; ENTRY; @@ -4888,21 +4854,6 @@ static int lod_xattr_set(const struct lu_env *env, rc = lod_striping_reload(env, lo, buf, LVF_ALL_STALE); if (rc) RETURN(rc); - - if (lo->ldo_mirror_count > 1 && - layout_attr->la_valid & LA_LAYOUT_VERSION) { - /* mirror split */ - layout_attr->la_layout_version = - lo->ldo_layout_gen; - data.locd_attr = layout_attr; - data.locd_declare = false; - data.locd_stripe_cb = - lod_obj_stripe_attr_set_cb; - rc = lod_obj_for_each_stripe(env, lo, th, - &data); - if (rc) - RETURN(rc); - } } else if (fl & LU_XATTR_PURGE) { rc = lod_layout_purge(env, dt, buf, th); } else if (dt_object_remote(dt)) { @@ -4935,21 +4886,6 @@ static int lod_xattr_set(const struct lu_env *env, rc = lod_striped_create(env, dt, NULL, NULL, th); if (rc) RETURN(rc); - - if (fl & LU_XATTR_MERGE && lo->ldo_mirror_count > 1 && - layout_attr->la_valid & LA_LAYOUT_VERSION) { - /* mirror merge exec phase */ - layout_attr->la_layout_version = - lo->ldo_layout_gen; - data.locd_attr = layout_attr; - data.locd_declare = false; - data.locd_stripe_cb = - lod_obj_stripe_attr_set_cb; - rc = lod_obj_for_each_stripe(env, lo, th, - &data); - if (rc) - RETURN(rc); - } } RETURN(rc); } else if (strcmp(name, XATTR_NAME_FID) == 0) { @@ -7671,7 +7607,6 @@ static int lod_declare_update_rdonly(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); - struct lu_attr *layout_attr = &info->lti_layout_attr; struct lod_layout_component *lod_comp; struct lu_extent extent = { 0 }; int rc; @@ -7782,25 +7717,11 @@ static int lod_declare_update_rdonly(const struct lu_env *env, * This way it can make sure that the layout version is * monotonously increased in this writing era. */ lod_obj_inc_layout_gen(lo); - if (lo->ldo_layout_gen > (LCME_ID_MAX >> 1)) { - __u32 layout_version; - - get_random_bytes(&layout_version, sizeof(layout_version)); - lo->ldo_layout_gen = layout_version & 0xffff; - } rc = lod_declare_instantiate_components(env, lo, th, 0); if (rc) GOTO(out, rc); - layout_attr->la_valid = LA_LAYOUT_VERSION; - layout_attr->la_layout_version = 0; /* set current version */ - if (mlc->mlc_opc == MD_LAYOUT_RESYNC) - layout_attr->la_layout_version = LU_LAYOUT_RESYNC; - rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th); - if (rc) - GOTO(out, rc); - out: if (rc) lod_striping_free(env, lo); @@ -7812,7 +7733,6 @@ static int lod_declare_update_write_pending(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); - struct lu_attr *layout_attr = &info->lti_layout_attr; struct lod_layout_component *lod_comp; struct lu_extent extent = { 0 }; int primary = -1; @@ -7944,19 +7864,6 @@ static int lod_declare_update_write_pending(const struct lu_env *env, if (rc) GOTO(out, rc); - /* 3. transfer layout version to OST objects. - * transfer new layout version to OST objects so that stale writes - * can be denied. It also ends an era of writing by setting - * LU_LAYOUT_RESYNC. Normal client can never use this bit to - * send write RPC; only resync RPCs could do it. */ - layout_attr->la_valid = LA_LAYOUT_VERSION; - layout_attr->la_layout_version = 0; /* set current version */ - if (mlc->mlc_opc == MD_LAYOUT_RESYNC) - layout_attr->la_layout_version = LU_LAYOUT_RESYNC; - rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th); - if (rc) - GOTO(out, rc); - lod_obj_inc_layout_gen(lo); out: if (rc) @@ -7969,7 +7876,6 @@ static int lod_declare_update_sync_pending(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); - struct lu_attr *layout_attr = &info->lti_layout_attr; unsigned sync_components = 0; unsigned resync_components = 0; int i; @@ -8042,12 +7948,6 @@ static int lod_declare_update_sync_pending(const struct lu_env *env, lo->ldo_flr_state = LCM_FL_RDONLY; lod_obj_inc_layout_gen(lo); - layout_attr->la_valid = LA_LAYOUT_VERSION; - layout_attr->la_layout_version = 0; /* set current version */ - rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th); - if (rc) - GOTO(out, rc); - info->lti_buf.lb_len = lod_comp_md_size(lo, false); rc = lod_sub_declare_xattr_set(env, lod_object_child(lo), &info->lti_buf, XATTR_NAME_LOV, 0, th); @@ -8721,8 +8621,6 @@ static int lod_layout_change(const struct lu_env *env, struct dt_object *dt, struct md_layout_change *mlc, struct thandle *th) { struct lu_attr *attr = &lod_env_info(env)->lti_attr; - struct lu_attr *layout_attr = &lod_env_info(env)->lti_layout_attr; - struct lod_object *lo = lod_dt_obj(dt); int rc; ENTRY; @@ -8734,10 +8632,6 @@ static int lod_layout_change(const struct lu_env *env, struct dt_object *dt, } rc = lod_striped_create(env, dt, attr, NULL, th); - if (!rc && layout_attr->la_valid & LA_LAYOUT_VERSION) { - layout_attr->la_layout_version |= lo->ldo_layout_gen; - rc = lod_attr_set(env, dt, layout_attr, th); - } RETURN(rc); } diff --git a/lustre/lod/lod_qos.c b/lustre/lod/lod_qos.c index f6571f3..fbc3fa2 100644 --- a/lustre/lod/lod_qos.c +++ b/lustre/lod/lod_qos.c @@ -2602,6 +2602,7 @@ put_ldts: } lod_comp->llc_stripe_count = 0; } else { + lod_comp->llc_layout_gen = 0; lod_comp->llc_stripe = stripe; lod_comp->llc_ost_indices = ost_indices; lod_comp->llc_stripes_allocated = stripe_len; diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index e266977..3b80b57 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -1648,7 +1648,7 @@ static int mdd_xattr_merge(const struct lu_env *env, struct md_object *md_obj, GOTO(out, rc); rc = mdo_xattr_del(env, vic, XATTR_NAME_LOV, handle); - if (rc) /* wtf? */ + if (rc) GOTO(out_restore, rc); (void)mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, obj, handle, diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 12da352..f147664 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -504,4 +504,21 @@ static inline int ofd_validate_seq(struct obd_export *exp, __u64 seq) return 0; } +/* whether the requestion IO contains older layout version than that on the + * disk. */ +static inline bool ofd_layout_version_less(__u32 req_version, + __u32 ondisk_version) +{ + __u32 req = req_version & ~LU_LAYOUT_RESYNC; + __u32 ondisk = ondisk_version & ~LU_LAYOUT_RESYNC; + + /** + * request layout version could be circularly increased to the samllest + * value, in that case @req < @ondisk but @req does not have the high + * end bit set while @ondisk does. + */ + return (req < ondisk) && + ((req & LU_LAYOUT_HIGEN) == (ondisk & LU_LAYOUT_HIGEN)); +} + #endif /* _OFD_INTERNAL_H */ diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 3b96d63..0d7a61b 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -439,7 +439,6 @@ int ofd_verify_ff(const struct lu_env *env, struct ofd_object *fo, int ofd_verify_layout_version(const struct lu_env *env, struct ofd_object *fo, const struct obdo *oa) { - __u32 layout_version; int rc; ENTRY; @@ -449,25 +448,18 @@ int ofd_verify_layout_version(const struct lu_env *env, rc = ofd_object_ff_load(env, fo); if (rc < 0) { if (rc == -ENODATA) - rc = -EINPROGRESS; + rc = 0; GOTO(out, rc); } - layout_version = fo->ofo_ff.ff_layout_version; - if (oa->o_layout_version >= layout_version && - oa->o_layout_version <= layout_version + fo->ofo_ff.ff_range) - GOTO(out, rc = 0); - - /* normal traffic, decide if to return ESTALE or EINPROGRESS */ - layout_version &= ~LU_LAYOUT_RESYNC; - - /* this update is not legitimate */ - if ((oa->o_layout_version & ~LU_LAYOUT_RESYNC) <= layout_version) - GOTO(out, rc = -ESTALE); - - /* layout version may not be transmitted yet */ - if ((oa->o_layout_version & ~LU_LAYOUT_RESYNC) > layout_version) - GOTO(out, rc = -EINPROGRESS); + /** + * this update is not legitimate, whose layout version is older than + * that on the disk. + */ + if (ofd_layout_version_less(oa->o_layout_version, + fo->ofo_ff.ff_layout_version + + fo->ofo_ff.ff_range)) + RETURN(-ESTALE); EXIT; @@ -794,8 +786,6 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, ofd_object_put(env, fo); GOTO(out, rc); } - - oa->o_valid &= ~OBD_MD_LAYOUT_VERSION; } if (ptlrpc_connection_is_local(exp->exp_connection)) diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 41fe74b..17e7f30 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -592,23 +592,15 @@ int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo, PFID(lu_object_fid(&fo->ofo_obj.do_lu)), ff->ff_layout_version, oa->o_layout_version); - /* only the MDS has the authority to update layout version */ - if (!(exp_connect_flags(ofd_info(env)->fti_exp) & - OBD_CONNECT_MDS)) { - CERROR(DFID": update layout version from client\n", - PFID(&fo->ofo_ff.ff_parent)); - - RETURN(-EPERM); - } - if (ff->ff_layout_version & LU_LAYOUT_RESYNC) { /* this opens a new era of writing */ ff->ff_layout_version = 0; ff->ff_range = 0; } - /* it's not allowed to change it to a smaller value */ - if (oa->o_layout_version < ff->ff_layout_version) + /*it's not allowed to change it to a smaller value */ + if (ofd_layout_version_less(oa->o_layout_version, + ff->ff_layout_version)) RETURN(-EINVAL); if (ff->ff_layout_version == 0 || @@ -919,8 +911,6 @@ int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo, rc = ofd_verify_layout_version(env, fo, oa); if (rc) GOTO(unlock, rc); - - oa->o_valid &= ~OBD_MD_LAYOUT_VERSION; } if (oa->o_valid & OBD_MD_FLFLAGS && oa->o_flags & LUSTRE_ENCRYPT_FL) { diff --git a/lustre/osp/osp_sync.c b/lustre/osp/osp_sync.c index 2f1db6b..8d3ea09 100644 --- a/lustre/osp/osp_sync.c +++ b/lustre/osp/osp_sync.c @@ -767,13 +767,6 @@ static int osp_sync_new_setattr_job(struct osp_device *d, else body->oa.o_valid |= rec->lsr_valid; - if (body->oa.o_valid & OBD_MD_LAYOUT_VERSION) { - OBD_FAIL_TIMEOUT(OBD_FAIL_FLR_LV_DELAY, cfs_fail_val); - if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_FLR_LV_INC))) - body->oa.o_layout_version = LU_LAYOUT_RESYNC | - (body->oa.o_layout_version + 1); - } - osp_sync_send_new_rpc(d, llh, h, req); RETURN(0); } diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 26356bd..2661dfc 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -1405,16 +1405,30 @@ test_35() { } run_test 35 "allow to write to mirrored files" +get_file_layout_version() { + local tf=$1 + local flv=$($LFS getstripe $tf | awk '/lcm_layout_gen/{print $2}') + + echo -n $flv +} + +get_ost_layout_version() { + local tf=$1 + local olv=$($MULTIOP $tf oXc | awk '/ostlayoutversion/{print $2}') + + echo -n $flv +} + verify_ost_layout_version() { local tf=$1 # get file layout version - local flv=$($LFS getstripe $tf | awk '/lcm_layout_gen/{print $2}') + local flv=$(get_file_layout_version $tf) # layout version from OST objects - local olv=$($MULTIOP $tf oXc | awk '/ostlayoutversion/{print $2}') + local olv=$(get_ost_layout_version $tf) - [ $flv -eq $olv ] || error "layout version mismatch: $flv vs. $olv" + (( flv >= olv )) || error "layout version mismatch: $flv vs. $olv" } create_file_36() { @@ -1429,7 +1443,7 @@ create_file_36() { done } -test_36() { +test_36a() { local tf=$DIR/$tfile stack_trap "rm -f $tf $tf-2 $tf-3" @@ -1464,27 +1478,112 @@ test_36() { local st=$(date +%s) $MULTIOP $tf-2 oO_WRONLY:w1024Yc || error "write mirrored file error" - [ $(date +%s) -ge $((st+delay_sec)) ] || - error "write finished before layout version is transmitted" - # verify OST layout version verify_ost_layout_version $tf do_facet $mds_facet $LCTL set_param fail_loc=0 +} +run_test 36a "write to mirrored files" - # test case 3 - mds_idx=mds$(($($LFS getstripe -m $tf-3) + 1)) +test_36b() { + local tf=$DIR/$tfile - #define OBD_FAIL_FLR_LV_INC 0x1A02 - do_facet $mds_facet $LCTL set_param fail_loc=0x1A02 + (( OSTCOUNT < 2 )) && skip "need >= 2 OSTs" && return - # write open file should return error - $MULTIOP $tf-3 oO_WRONLY:O_SYNC:w1024c && - error "write a mirrored file succeeded" || true + # create 2 mirrors using different OSTs + $LFS setstripe -N -c1 -i0 --flags=prefer -N -c1 -i1 $tf || + error "create mirrored file" - do_facet $mds_facet $LCTL set_param fail_loc=0 + # write 1M data to one mirror + dd if=/dev/zero of=$tf bs=1M count=1 || error "write file error" + sync + + # set prefer mirror to another mirror + $LFS setstripe --comp-set -I0x10001 --comp-flags=^prefer $tf || + error "clear prefer mirror error" + $LFS setstripe --comp-set -I0x20002 --comp-flags=prefer $tf || + error "set prefer mirror error" + + # the second write should not hung + dd if=/dev/zero of=$tf bs=1M count=1 || error "write file error" +} +run_test 36b "write should not hung when prefered mirror is stale" + +test_36c() { + local tf=$DIR/$tfile + + (( OSTCOUNT < 2 )) && skip "need >= 2 OSTs" && return + + # create 2 mirrors using different OSTs + $LFS setstripe -N -c1 -i0 --flags=prefer -N -c1 -i1 $tf || + error "create mirrored file" + + # write it in the background + dd if=/dev/zero of=$tf bs=1M count=600 & + local pid=$! + + sleep 1 + + $LFS setstripe --comp-set -I0x10001 --comp-flags=^prefer $tf || + error "clear prefer mirror error" + $LFS setstripe --comp-set -I0x20002 --comp-flags=prefer $tf || + error "set prefer mirror error" + + wait $pid +} +run_test 36c "change prefer mirror during write shouldn't hung" + +test_36d() { + local tf=$DIR/$tfile + + echo " ** create $tf" + $LFS mirror create -N $tf || error "create $tf failed" + + for i in 1 2; do + echo " ** mirror extend $tf" + $LFS mirror extend -N $tf || error "mirror extend $tf failed" + flv=$(get_file_layout_version $tf) + olv=$(get_ost_layout_version $tf) + echo " flv=$flv olv=$olv" + done + + for i in 1 2; do + echo " ** write $tf" + dd if=/dev/zero of=$tf bs=1k count=1 || error "write $tf failed" + flv=$(get_file_layout_version $tf) + olv=$(get_ost_layout_version $tf) + echo " flv=$flv olv=$olv" + (( flv == olv )) || + error "write update OST layout failed $flv/$olv" + done + + echo " ** resync $tf" + $LFS mirror resync $tf || error "mirror resync $tf failed" + flv=$(get_file_layout_version $tf) + olv=$(get_ost_layout_version $tf) + echo " flv=$flv olv=$olv" + + for i in 1 2; do + echo " ** truncate $tf" + $TRUNCATE $tf $((1024 * 1024)) || error "truncate $tf fails" + flv=$(get_file_layout_version $tf) + olv=$(get_ost_layout_version $tf) + echo " flv=$flv olv=$olv" + (( flv == olv || flv == olv + 1 )) || + error "truncate update OST layout failed $flv/$olv" + done + + for i in 1 2; do + echo " ** write $tf" + dd if=/dev/zero of=$tf bs=1k count=1 || error "write $tf failed" + flv=$(get_file_layout_version $tf) + olv=$(get_ost_layout_version $tf) + echo " flv=$flv olv=$olv" + (( flv == olv )) || + error "write update OST layout failed $flv/$olv" + done } -run_test 36 "write to mirrored files" +run_test 36d "write/punch FLR file update OST layout version" create_files_37() { local tf @@ -2524,6 +2623,8 @@ run_test 50A "mirror split update layout generation" test_50a() { $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' || skip "OST does not support SEEK_HOLE" + [ "$FSTYPE" != "zfs" ] || + skip "lseek for ZFS is not accurate if obj is not committed" local file=$DIR/$tdir/$tfile local offset @@ -2622,6 +2723,8 @@ run_test 50a "mirror extend/copy preserves sparseness" test_50b() { $LCTL get_param osc.*.import | grep -q 'connect_flags:.*seek' || skip "OST does not support SEEK_HOLE" + [ "$FSTYPE" != "zfs" ] || + skip "lseek for ZFS is not accurate if obj is not committed" local file=$DIR/$tdir/$tfile local offset @@ -2734,6 +2837,8 @@ test_50d() { skip "OST does not support SEEK_HOLE" (( $LINUX_VERSION_CODE > $(version_code 3.0.0) )) || skip "client kernel does not support SEEK_HOLE" + [ "$FSTYPE" != "zfs" ] || + skip "lseek for ZFS is not accurate if obj is not committed" local file=$DIR/$tdir/$tfile local offset -- 1.8.3.1