From 7a797388645449838f21c558ff618e9e2f88ef7e Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Tue, 24 Nov 2020 00:34:11 -0500 Subject: [PATCH] LU-13974 llog: check stale osp object The logic of osp_attr_get has 2 path, 1) return attributes from a cache for health osp object 2) make an out update request and return attributes for stale osp object, object lose stale state. When some out update request with llog writes failed, osp object become stale. But llog handle stay inconsistent (bitmap,count, last_index), and a next llog_add->llog_osd_write_rec do dt_attr_get, gets attributes and makes osp object valid, and uses wrong llog handle data. The result is index jump at llog file - recX, recX+2. And it makes an error during update log processing if failover take a place. The fix adds dt_object_stale function to check osp_object. llog_osd_write_rec check it and return ESTALE. llog_add would fail with ESTALE error and doesn't corrupt update log. Lustre-change: https://review.whamcloud.com/40742 Lustre-commit: 82c6e42d6137f39a1f2394b7bc6e8d600eb36181 HPE-bug-id: LUS-9030 Signed-off-by: Alexander Boyko Change-Id: Iadf53fd816e1c5bde0a19d4c537f0408796c864a Reviewed-by: Oleg Drokin Signed-off-by: Mikhail Pershin Reviewed-by: Andreas Dilger Reviewed-on: https://review.whamcloud.com/46802 Tested-by: jenkins Tested-by: Maloo --- lustre/include/dt_object.h | 21 +++++++++++++++++++++ lustre/obdclass/llog_osd.c | 3 +++ lustre/osd-ldiskfs/osd_handler.c | 5 +++++ lustre/osd-zfs/osd_object.c | 6 ++++++ lustre/osp/osp_internal.h | 1 + lustre/osp/osp_md_object.c | 1 + lustre/osp/osp_object.c | 11 +++++++++++ 7 files changed, 48 insertions(+) diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index e24d3d5..26fed07 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1090,6 +1090,18 @@ struct dt_object_operations { int (*do_invalidate)(const struct lu_env *env, struct dt_object *dt); /** + * Check object stale state. + * + * OSP only. + * + * \param[in] dt object + * + * \retval true for stale object + * \retval false for not stale object + */ + bool (*do_check_stale)(struct dt_object *dt); + + /** * Declare intention to instaintiate extended layout component. * * \param[in] env execution environment @@ -2438,6 +2450,15 @@ static inline int dt_write_locked(const struct lu_env *env, return dt->do_ops->do_write_locked(env, dt); } +static inline bool dt_object_stale(struct dt_object *dt) +{ + LASSERT(dt); + LASSERT(dt->do_ops); + LASSERT(dt->do_ops->do_check_stale); + + return dt->do_ops->do_check_stale(dt); +} + static inline int dt_declare_attr_get(const struct lu_env *env, struct dt_object *dt) { diff --git a/lustre/obdclass/llog_osd.c b/lustre/obdclass/llog_osd.c index 2336b38..11d1ff9 100644 --- a/lustre/obdclass/llog_osd.c +++ b/lustre/obdclass/llog_osd.c @@ -418,6 +418,9 @@ static int llog_osd_write_rec(const struct lu_env *env, LASSERT(llh->llh_size == reclen); } + /* return error if osp object is stale */ + if (idx != LLOG_HEADER_IDX && dt_object_stale(o)) + RETURN(-ESTALE); rc = dt_attr_get(env, o, &lgi->lgi_attr); if (rc) RETURN(rc); diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index a0cf303..2789af5 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -5027,6 +5027,10 @@ static int osd_invalidate(const struct lu_env *env, struct dt_object *dt) return 0; } +static bool osd_check_stale(struct dt_object *dt) +{ + return false; +} /* * Index operations. */ @@ -5215,6 +5219,7 @@ static const struct dt_object_operations osd_obj_ops = { .do_xattr_list = osd_xattr_list, .do_object_sync = osd_object_sync, .do_invalidate = osd_invalidate, + .do_check_stale = osd_check_stale, }; static const struct dt_object_operations osd_obj_otable_it_ops = { diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index c037708..15d9599 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -2151,6 +2151,11 @@ static int osd_invalidate(const struct lu_env *env, struct dt_object *dt) return 0; } +static bool osd_check_stale(struct dt_object *dt) +{ + return false; +} + static struct dt_object_operations osd_obj_ops = { .do_read_lock = osd_read_lock, .do_write_lock = osd_write_lock, @@ -2178,6 +2183,7 @@ static struct dt_object_operations osd_obj_ops = { .do_xattr_list = osd_xattr_list, .do_object_sync = osd_object_sync, .do_invalidate = osd_invalidate, + .do_check_stale = osd_check_stale, }; static struct lu_object_operations osd_lu_obj_ops = { diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index af10f60..7415889 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -819,6 +819,7 @@ int osp_declare_xattr_del(const struct lu_env *env, struct dt_object *dt, int osp_xattr_del(const struct lu_env *env, struct dt_object *dt, const char *name, struct thandle *th); int osp_invalidate(const struct lu_env *env, struct dt_object *dt); +bool osp_check_stale(struct dt_object *dt); void osp_obj_invalidate_cache(struct osp_object *obj); int osp_trans_stop(const struct lu_env *env, struct dt_device *dt, diff --git a/lustre/osp/osp_md_object.c b/lustre/osp/osp_md_object.c index d565f0c..892e9b3 100644 --- a/lustre/osp/osp_md_object.c +++ b/lustre/osp/osp_md_object.c @@ -1110,6 +1110,7 @@ struct dt_object_operations osp_md_obj_ops = { .do_object_lock = osp_md_object_lock, .do_object_unlock = osp_md_object_unlock, .do_invalidate = osp_invalidate, + .do_check_stale = osp_check_stale, }; /** diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index 2854d23..906f9a1 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -1368,6 +1368,17 @@ int osp_invalidate(const struct lu_env *env, struct dt_object *dt) RETURN(0); } +bool osp_check_stale(struct dt_object *dt) +{ + struct osp_object *obj = dt2osp_obj(dt); + + if (is_ost_obj(&dt->do_lu) && obj->opo_non_exist) + return true; + + return obj->opo_stale; +} + + /** * Implement OSP layer dt_object_operations::do_declare_create() interface. * -- 1.8.3.1