From 373b76b345b5eaa21b201315075f01ccfc408189 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 21 Mar 2024 10:09:34 +1100 Subject: [PATCH] LU-17658 fid: check on disk sequence before allocating to osp If we lose the commit to update seq_srv on ofd/ost, the available super-sequence range is not updated, the sequence server of ofd could assign the same sequence again to a different osp, creating filesystem corruption. To address this, a new dt_device_operations->dt_last_seq_get() is added to iterate the current known sequence dirs under /O and return the latest one. Before using the super-sequence range read from seq_srv we use the new interface to double check and update the current range or get a new range if necessary. Change-Id: I49a11bb3b5e476e55c5835b05392c9567aeeb4ce Signed-off-by: Li Dongyang Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54474 Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/fid/fid_handler.c | 21 ++++++++++++++ lustre/fid/fid_store.c | 1 + lustre/include/dt_object.h | 24 ++++++++++++++++ lustre/osd-ldiskfs/osd_handler.c | 1 + lustre/osd-ldiskfs/osd_scrub.c | 61 ++++++++++++++++++++++++++++++++++++++++ lustre/osd-ldiskfs/osd_scrub.h | 2 ++ lustre/osd-zfs/osd_handler.c | 1 + lustre/osd-zfs/osd_internal.h | 2 ++ lustre/osd-zfs/osd_scrub.c | 40 ++++++++++++++++++++++++++ 9 files changed, 153 insertions(+) diff --git a/lustre/fid/fid_handler.c b/lustre/fid/fid_handler.c index d831465..1bd9470 100644 --- a/lustre/fid/fid_handler.c +++ b/lustre/fid/fid_handler.c @@ -310,6 +310,7 @@ static int __seq_server_alloc_meta(struct lu_server_seq *seq, LASSERT(lu_seq_range_is_sane(space)); +restart: rc = seq_server_check_and_alloc_super(env, seq); if (rc < 0) { if (rc == -EINPROGRESS) { @@ -328,6 +329,26 @@ static int __seq_server_alloc_meta(struct lu_server_seq *seq, if (seq->lss_set_width) { rc = range_alloc_set(env, out, seq); } else { + __u64 last_seq; + + rc = dt_last_seq_get(env, seq->lss_dev, &last_seq); + if (!rc) { + if (last_seq + 1 >= space->lsr_end) { + LCONSOLE_INFO("%s: On disk last known sequence %#llx beyond super-sequence " + DRANGE", getting new super-sequence\n", + seq->lss_name, last_seq, + PRANGE(space)); + space->lsr_start = space->lsr_end; + GOTO(restart, rc); + } + if (last_seq >= space->lsr_start) { + LCONSOLE_INFO("%s: On disk last known sequence %#llx within super-sequence " + DRANGE", updating super-sequence\n", + seq->lss_name, last_seq, + PRANGE(space)); + space->lsr_start = last_seq + 1; + } + } range_alloc(out, space, seq->lss_width); rc = seq_store_update(env, seq, NULL, 1); } diff --git a/lustre/fid/fid_store.c b/lustre/fid/fid_store.c index 1c8f088..aef6ee7 100644 --- a/lustre/fid/fid_store.c +++ b/lustre/fid/fid_store.c @@ -205,6 +205,7 @@ int seq_store_init(struct lu_server_seq *seq, dt_obj = dt_find_or_create(env, dt, &fid, &dof, &attr); if (!IS_ERR(dt_obj)) { seq->lss_obj = dt_obj; + seq->lss_dev = dt; rc = 0; } else { CERROR("%s: Can't find \"%s\" obj %d\n", diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 90f27c3..2d7c73d 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -304,6 +304,20 @@ struct dt_device_operations { int (*dt_reserve_or_free_quota)(const struct lu_env *env, struct dt_device *dev, struct lquota_id_info *qi); + + /** + * Return last known sequence number from disk. + * + * \param[in] env execution environment for this thread + * \param[in] dev dt device + * \param[out] seq last known sequence on disk + * + * \retval 0 on success + * \retval negative negated errno on error + */ + int (*dt_last_seq_get)(const struct lu_env *env, + struct dt_device *dev, + __u64 *seq); }; struct dt_index_features { @@ -3026,6 +3040,16 @@ static inline int dt_reserve_or_free_quota(const struct lu_env *env, return dev->dd_ops->dt_reserve_or_free_quota(env, dev, qi); } +static inline int dt_last_seq_get(const struct lu_env *env, + struct dt_device *dev, + __u64 *seq) +{ + LASSERT(dev); + LASSERT(dev->dd_ops); + LASSERT(dev->dd_ops->dt_last_seq_get); + return dev->dd_ops->dt_last_seq_get(env, dev, seq); +} + static inline int dt_lookup(const struct lu_env *env, struct dt_object *dt, struct dt_rec *rec, diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 4049cdd..a621e80 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2796,6 +2796,7 @@ static const struct dt_device_operations osd_dt_ops = { .dt_ro = osd_ro, .dt_commit_async = osd_commit_async, .dt_reserve_or_free_quota = osd_reserve_or_free_quota, + .dt_last_seq_get = osd_last_seq_get, }; static void osd_inode_getattr(const struct lu_env *env, diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 33ecf15..1b463cf 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -3527,3 +3527,64 @@ static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev) return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode, osd_scan_O_seq); } + +static int osd_seq_dir_helper(const struct lu_env *env, + struct osd_device *osd, struct inode *dir, + struct osd_it_ea *oie) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lu_fid *fid = &info->oti_fid; + struct inode *inode; + struct osd_inode_id id; + char *name = NULL; + __u64 seq; + int rc = 0; + + ENTRY; + + osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN); + inode = osd_iget(info, osd, &id, 0); + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); + + if (!S_ISDIR(inode->i_mode)) + GOTO(out, rc); + + OBD_ALLOC(name, oie->oie_dirent->oied_namelen + 1); + if (name == NULL) + GOTO(out, rc = -ENOMEM); + memcpy(name, oie->oie_dirent->oied_name, + oie->oie_dirent->oied_namelen); + name[oie->oie_dirent->oied_namelen] = '\0'; + + rc = kstrtoull(name, 16, &seq); + if (!rc && seq >= FID_SEQ_NORMAL && seq > fid_seq(fid)) + fid->f_seq = seq; + + OBD_FREE(name, oie->oie_dirent->oied_namelen + 1); +out: + iput(inode); + RETURN(rc); +} + +int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt, + __u64 *seq) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct osd_device *osd = osd_dt_dev(dt); + struct lu_fid *fid = &info->oti_fid; + int rc; + + ENTRY; + + if (!osd->od_is_ost) + RETURN(-EINVAL); + + fid_zero(fid); + rc = osd_scan_dir(env, osd, osd->od_ost_map->om_root->d_inode, + osd_seq_dir_helper); + if (!rc) + *seq = fid_seq(fid); + + RETURN(rc); +} diff --git a/lustre/osd-ldiskfs/osd_scrub.h b/lustre/osd-ldiskfs/osd_scrub.h index f877888..b61cab5 100644 --- a/lustre/osd-ldiskfs/osd_scrub.h +++ b/lustre/osd-ldiskfs/osd_scrub.h @@ -53,4 +53,6 @@ struct osd_scrub { time64_t os_bad_oimap_time; }; +int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt, + __u64 *seq); #endif /* _OSD_SCRUB_H */ diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 50849e8..9ab85fd 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -734,6 +734,7 @@ static const struct dt_device_operations osd_dt_ops = { .dt_commit_async = osd_commit_async, .dt_ro = osd_ro, .dt_reserve_or_free_quota = osd_reserve_or_free_quota, + .dt_last_seq_get = osd_last_seq_get, }; static void *osd_key_init(const struct lu_context *ctx, diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 8fd2b89..66f0721 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -710,6 +710,8 @@ int osd_oii_insert(const struct lu_env *env, struct osd_device *dev, const struct lu_fid *fid, uint64_t oid, bool insert); int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, uint64_t *oid); +int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt, + __u64 *seq); /** * Basic transaction credit op diff --git a/lustre/osd-zfs/osd_scrub.c b/lustre/osd-zfs/osd_scrub.c index 0380b22..0e3fd99 100644 --- a/lustre/osd-zfs/osd_scrub.c +++ b/lustre/osd-zfs/osd_scrub.c @@ -2426,3 +2426,43 @@ static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev) { return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_O_seq); } + +static int osd_seq_dir_helper(const struct lu_env *env, + struct osd_device *osd, uint64_t dir_oid, + struct osd_zap_it *ozi) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lu_fid *fid = &info->oti_fid; + __u64 seq; + int rc; + + if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type)))) + return 0; + + rc = kstrtoull(ozi->ozi_name, 16, &seq); + if (!rc && seq >= FID_SEQ_NORMAL && seq > fid_seq(fid)) + fid->f_seq = seq; + + return 0; +} + +int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt, + __u64 *seq) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct osd_device *osd = osd_dt_dev(dt); + struct lu_fid *fid = &info->oti_fid; + int rc; + + ENTRY; + + if (!osd->od_is_ost) + RETURN(-EINVAL); + + fid_zero(fid); + rc = osd_scan_dir(env, osd, osd->od_O_id, osd_seq_dir_helper); + if (!rc) + *seq = fid_seq(fid); + + RETURN(rc); +} -- 1.8.3.1