From b09d33ed8e47c33b3c2af7b8d70f78ff72882f20 Mon Sep 17 00:00:00 2001 From: Qian Yingjin Date: Fri, 5 Feb 2021 11:48:26 +0800 Subject: [PATCH] LU-10499 pcc: check first before set PCC-RO on a file In this patch, MDT takes a CR layout lock against the file object first to check whether the file is already PCC-RO cached. If so, return immediately; Otherwise, take an EX lock on the file to update the FLR PCC-RO state accordingly. By this check, it can avoid heavy lock contention and unnecessary revocation of the layout lock granted to the other clients when multiple processes from many clients perform read-only attach on a shared file simultaneously. EX-bug-id: EX-2455 Test-Parameters: clientcount=3 testlist=sanity-pcc,sanity-pcc,sanity-pcc Signed-off-by: Qian Yingjin Change-Id: If59315abe444917f8a890b60a38c239b8ee045bf Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54370 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Li Xi Reviewed-by: Oleg Drokin Reviewed-by: Andreas Dilger --- lustre/include/dt_object.h | 26 +++++++++++++++++++++ lustre/include/md_object.h | 14 +++++++++++ lustre/include/obd_support.h | 1 + lustre/llite/pcc.c | 11 +++++++-- lustre/lod/lod_object.c | 15 ++++++++++++ lustre/mdd/mdd_internal.h | 7 ++++++ lustre/mdd/mdd_object.c | 11 +++++++++ lustre/mdt/mdt_handler.c | 55 ++++++++++++++++++++++++++++++++++++++++---- lustre/tests/sanity-pcc.sh | 46 ++++++++++++++++++++++++++++++++++++ 9 files changed, 179 insertions(+), 7 deletions(-) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 80940a9..1c0238b 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1069,6 +1069,22 @@ struct dt_object_operations { int (*do_layout_change)(const struct lu_env *env, struct dt_object *dt, struct md_layout_change *mlc, struct thandle *th); + + /** + * Check whether the file is in PCC-RO state. + * + * \param[in] env execution environment + * \param[in] dt DT object + * \param[in] layout data structure to describe the changes to + * the DT object's layout + * + * \retval 0 success + * \retval -ne -EALREADY if the file is already PCC-RO cached; + * Otherwise, return error code + */ + int (*do_layout_pccro_check)(const struct lu_env *env, + struct dt_object *dt, + struct md_layout_change *mlc); }; enum dt_bufs_type { @@ -2925,6 +2941,16 @@ static inline int dt_layout_change(const struct lu_env *env, return o->do_ops->do_layout_change(env, o, mlc, th); } +static inline int dt_layout_pccro_check(const struct lu_env *env, + struct dt_object *o, + struct md_layout_change *mlc) +{ + LASSERT(o); + LASSERT(o->do_ops); + LASSERT(o->do_ops->do_layout_pccro_check); + return o->do_ops->do_layout_pccro_check(env, o, mlc); +} + struct dt_find_hint { struct lu_fid *dfh_fid; struct dt_device *dfh_dt; diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h index a3133f4..c2cc292 100644 --- a/lustre/include/md_object.h +++ b/lustre/include/md_object.h @@ -313,6 +313,12 @@ struct md_object_operations { int (*moo_layout_change)(const struct lu_env *env, struct md_object *obj, struct md_layout_change *layout); + /** + * Check whether the file is in PCC-RO state. + */ + int (*moo_layout_pccro_check)(const struct lu_env *env, + struct md_object *obj, + struct md_layout_change *layout); }; /** @@ -531,6 +537,14 @@ static inline int mo_layout_change(const struct lu_env *env, return m->mo_ops->moo_layout_change(env, m, layout); } +static inline int mo_layout_pccro_check(const struct lu_env *env, + struct md_object *m, + struct md_layout_change *layout) +{ + LASSERT(m->mo_ops->moo_layout_pccro_check); + return m->mo_ops->moo_layout_pccro_check(env, m, layout); +} + static inline int mo_swap_layouts(const struct lu_env *env, struct md_object *o1, struct md_object *o2, __u64 dv1, __u64 dv2, __u64 flags) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 623215c..74a2245 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -259,6 +259,7 @@ extern bool obd_enable_health_write; #define OBD_FAIL_MDS_LOD_CREATE_PAUSE 0x173 #define OBD_FAIL_MDS_CONNECT_VS_EVICT 0x174 #define OBD_FAIL_MDS_DELAY_OPEN 0x175 +#define OBD_FAIL_MDS_LL_PCCRO 0x176 /* CMD */ #define OBD_FAIL_MDS_IS_SUBDIR_NET 0x180 diff --git a/lustre/llite/pcc.c b/lustre/llite/pcc.c index eb09814..3ddb2a5 100644 --- a/lustre/llite/pcc.c +++ b/lustre/llite/pcc.c @@ -3518,8 +3518,12 @@ static int pcc_readonly_ioctl_attach(struct file *file, RETURN(-EOPNOTSUPP); rc = pcc_attach_allowed_check(inode); - if (rc) + if (rc) { + CDEBUG(D_CACHE, + "PCC-RO caching for "DFID" not allowed, rc = %d\n", + PFID(ll_inode2fid(inode)), rc); RETURN(rc); + } rc = pcc_layout_rdonly_set(inode, &gen); if (rc) @@ -3538,8 +3542,11 @@ static int pcc_readonly_ioctl_attach(struct file *file, pcc_inode_lock(inode); old_cred = override_creds(super->pccs_cred); lli->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING; - if (gen != ll_layout_version_get(lli)) + if (gen != ll_layout_version_get(lli)) { + CDEBUG(D_CACHE, "L.Gen mismatch %u:%u\n", + gen, ll_layout_version_get(lli)); GOTO(out_put_unlock, rc = -ESTALE); + } pcci = ll_i2pcci(inode); if (!pcci) { diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index f488a30..5f9a360 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -8021,6 +8021,20 @@ static int lod_prepare_resync(const struct lu_env *env, struct lod_object *lo, return need_sync ? 0 : -EALREADY; } +static int lod_layout_pccro_check(const struct lu_env *env, + struct dt_object *dt, + struct md_layout_change *mlc) +{ + struct lod_object *lo = lod_dt_obj(dt); + int rc; + + rc = lod_striping_load(env, lo); + if (rc) + return rc; + + return lo->ldo_flr_state & LCM_FL_PCC_RDONLY ? -EALREADY : 0; +} + static struct lod_layout_component * lod_locate_comp_hsm(struct lod_object *lo, int *hsm_mirror_id) { @@ -9423,6 +9437,7 @@ const struct dt_object_operations lod_obj_ops = { .do_invalidate = lod_invalidate, .do_declare_layout_change = lod_declare_layout_change, .do_layout_change = lod_layout_change, + .do_layout_pccro_check = lod_layout_pccro_check, }; /** diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index b51b8fa..8707d54 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -757,6 +757,13 @@ mdo_layout_change(const struct lu_env *env, struct mdd_object *obj, return dt_layout_change(env, mdd_object_child(obj), mlc, handle); } +static inline int +mdo_layout_pccro_check(const struct lu_env *env, struct mdd_object *obj, + struct md_layout_change *mlc) +{ + return dt_layout_pccro_check(env, mdd_object_child(obj), mlc); +} + static inline int mdo_declare_index_insert(const struct lu_env *env, struct mdd_object *obj, const struct lu_fid *fid, __u32 type, diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index 130b4cb..8165c07 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -3439,6 +3439,16 @@ out: /* Update the layout for PCC-RO. */ static int +mdd_layout_pccro_check(const struct lu_env *env, struct md_object *o, + struct md_layout_change *mlc) +{ + return mdo_layout_pccro_check(env, md2mdd_obj(o), mlc); +} + +/** + * Update the layout for PCC-RO. + */ +static int mdd_layout_update_pccro(const struct lu_env *env, struct md_object *o, struct md_layout_change *mlc) { @@ -4221,4 +4231,5 @@ const struct md_object_operations mdd_obj_ops = { .moo_object_lock = mdd_object_lock, .moo_object_unlock = mdd_object_unlock, .moo_layout_change = mdd_layout_change, + .moo_layout_pccro_check = mdd_layout_pccro_check, }; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index b6ab5f3..baeadcf 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -1794,6 +1794,8 @@ int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj, rc = mdt_object_lock(info, obj, lhc, lockpart, LCK_EX); if (rc) RETURN(rc); + + CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_LL_PCCRO, cfs_fail_val); } mutex_lock(&obj->mot_som_mutex); @@ -4862,6 +4864,35 @@ out_shrink: return rc; } +static int mdt_layout_change_pccro(struct mdt_thread_info *info, + struct mdt_object *obj, + struct mdt_lock_handle *lhc, + struct md_layout_change *layout) +{ + int rc; + + ENTRY; + + if (!mdt_object_exists(obj)) + RETURN(-ENOENT); + + if (!S_ISREG(lu_object_attr(&obj->mot_obj))) + RETURN(-EINVAL); + + rc = mdt_object_lock(info, obj, lhc, MDS_INODELOCK_LAYOUT, LCK_CR); + if (rc) + RETURN(rc); + + rc = mo_layout_pccro_check(info->mti_env, + mdt_object_child(obj), layout); + if (rc == -EALREADY) + RETURN(0); + + mdt_object_unlock(info, obj, lhc, 1); + rc = mdt_layout_change(info, obj, lhc, layout); + RETURN(rc); +} + static int mdt_intent_layout(enum ldlm_intent_flags it_opc, struct mdt_thread_info *info, struct ldlm_lock **lockp, @@ -4984,11 +5015,25 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc, mdt_intent_fixup_resent(info, *lockp, lhc, flags); (*lockp)->l_lvb_type = LVB_T_LAYOUT; - /* - * Instantiate some layout components, if @buf contains lovea, then it's - * a replay of the layout intent write RPC. - */ - rc = mdt_layout_change(info, obj, lhc, &layout); + if (intent->lai_opc == LAYOUT_INTENT_PCCRO_SET) + /* + * Take a CR layout lock against the file object first to check + * whether the file is already PCC-RO cached. If so, return + * immediately; Otherwise, take an EX layout lock on the file + * to update the FLR PCC-RO state accordingly. By this check, + * it can avoid heavy lock contention and unnecessary revocation + * of the layout lock granted to the other clients when multiple + * processes from many clients perform read-only attach on a + * shared file object simultaneously. + */ + rc = mdt_layout_change_pccro(info, obj, lhc, &layout); + else + /* + * Instantiate some layout components, if @buf contains lovea, + * then it's a replay of the layout intent write RPC. + */ + rc = mdt_layout_change(info, obj, lhc, &layout); + ldlm_rep->lock_policy_res2 = clear_serious(rc); if (lustre_handle_is_used(&lhc->mlh_reg_lh)) { diff --git a/lustre/tests/sanity-pcc.sh b/lustre/tests/sanity-pcc.sh index 2bcdd5f..820b10a 100755 --- a/lustre/tests/sanity-pcc.sh +++ b/lustre/tests/sanity-pcc.sh @@ -219,6 +219,9 @@ setup_loopdev() { stack_trap "do_facet $facet rm -rf $mntpt" EXIT do_facet $facet dd if=/dev/zero of=$file bs=1M count=$size stack_trap "do_facet $facet rm -f $file" EXIT + do_facet $facet mount + do_facet $facet $UMOUNT $mntpt + do_facet $facet mount do_facet $facet mkfs.ext4 $file || error "mkfs.ext4 $file failed" do_facet $facet file $file @@ -2984,6 +2987,49 @@ test_36b() { } run_test 36b "Stale RO-PCC copy should be deleted after remove the PCC backend" +test_37() { + local loopfile="$TMP/$tfile" + local loopfile2="$TMP/$tfile.2" + local mntpt="/mnt/pcc.$tdir" + local mntpt2="/mnt/pcc.$tdir.2" + local file=$DIR/$tdir/$tfile + local file2=$DIR2/$tdir/$tfile + + $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro || + skip "Server does not support PCC-RO" + + mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed" + touch $file + + setup_loopdev client $loopfile $mntpt 50 + setup_loopdev client $loopfile2 $mntpt2 50 + $LCTL pcc add $MOUNT $mntpt -p \ + "projid={2} roid=$HSM_ARCHIVE_NUMBER auto_attach=0 pccro=1" || + error "failed to config PCC for $MOUNT $mntpt" + $LCTL pcc add $MOUNT2 $mntpt2 -p \ + "projid={2} roid=$HSM_ARCHIVE_NUMBER auto_attach=0 pccro=1" || + error "failed to config PCC for $MOUNT2 $mntpt2" + $LCTL pcc list $MOUNT + $LCTL pcc list $MOUNT2 + + cancel_lru_locks mdc +#define CFS_FAIL_ONCE | OBD_FAIL_MDS_LL_PCCRO + $LCTL set_param -n fail_loc=0x80000176 fail_val=10 + $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file & + sleep 2 + $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file2 + wait + $LFS pcc state $file + $LFS pcc state $file2 + + check_lpcc_state $file "readonly" client + check_lpcc_state $file2 "readonly" client + + $LCTL pcc clear $MOUNT + $LCTL pcc clear $MOUNT2 +} +run_test 37 "Multiple readers on a shared file with PCC-RO mode" + test_41() { local loopfile="$TMP/$tfile" local mntpt="/mnt/pcc.$tdir" -- 1.8.3.1