Whamcloud - gitweb
LU-17658 fid: check on disk sequence before allocating to osp 74/54474/12
authorLi Dongyang <dongyangli@ddn.com>
Wed, 20 Mar 2024 23:09:34 +0000 (10:09 +1100)
committerOleg Drokin <green@whamcloud.com>
Tue, 27 May 2025 04:03:56 +0000 (04:03 +0000)
If we lose the commit to update seq_srv on ofd/ost, the available
super-sequence range is not updated, the sequence server of ofd
could assign the same sequence again to a different osp,
creating filesystem corruption.

To address this, a new dt_device_operations->dt_last_seq_get()
is added to iterate the current known sequence dirs under /O
and return the latest one. Before using the super-sequence range
read from seq_srv we use the new interface to double check and
update the current range or get a new range if necessary.

Change-Id: I49a11bb3b5e476e55c5835b05392c9567aeeb4ce
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54474
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/fid/fid_handler.c
lustre/fid/fid_store.c
lustre/include/dt_object.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-ldiskfs/osd_scrub.h
lustre/osd-zfs/osd_handler.c
lustre/osd-zfs/osd_internal.h
lustre/osd-zfs/osd_scrub.c

index d831465..1bd9470 100644 (file)
@@ -310,6 +310,7 @@ static int __seq_server_alloc_meta(struct lu_server_seq *seq,
 
        LASSERT(lu_seq_range_is_sane(space));
 
+restart:
        rc = seq_server_check_and_alloc_super(env, seq);
        if (rc < 0) {
                if (rc == -EINPROGRESS) {
@@ -328,6 +329,26 @@ static int __seq_server_alloc_meta(struct lu_server_seq *seq,
        if (seq->lss_set_width) {
                rc = range_alloc_set(env, out, seq);
        } else {
+               __u64 last_seq;
+
+               rc = dt_last_seq_get(env, seq->lss_dev, &last_seq);
+               if (!rc) {
+                       if (last_seq + 1 >= space->lsr_end) {
+                               LCONSOLE_INFO("%s: On disk last known sequence %#llx beyond super-sequence "
+                                             DRANGE", getting new super-sequence\n",
+                                             seq->lss_name, last_seq,
+                                             PRANGE(space));
+                               space->lsr_start = space->lsr_end;
+                               GOTO(restart, rc);
+                       }
+                       if (last_seq >= space->lsr_start) {
+                               LCONSOLE_INFO("%s: On disk last known sequence %#llx within super-sequence "
+                                             DRANGE", updating super-sequence\n",
+                                             seq->lss_name, last_seq,
+                                             PRANGE(space));
+                               space->lsr_start = last_seq + 1;
+                       }
+               }
                range_alloc(out, space, seq->lss_width);
                rc = seq_store_update(env, seq, NULL, 1);
        }
index 1c8f088..aef6ee7 100644 (file)
@@ -205,6 +205,7 @@ int seq_store_init(struct lu_server_seq *seq,
        dt_obj = dt_find_or_create(env, dt, &fid, &dof, &attr);
        if (!IS_ERR(dt_obj)) {
                seq->lss_obj = dt_obj;
+               seq->lss_dev = dt;
                rc = 0;
        } else {
                CERROR("%s: Can't find \"%s\" obj %d\n",
index 90f27c3..2d7c73d 100644 (file)
@@ -304,6 +304,20 @@ struct dt_device_operations {
        int   (*dt_reserve_or_free_quota)(const struct lu_env *env,
                                          struct dt_device *dev,
                                          struct lquota_id_info *qi);
+
+       /**
+        * Return last known sequence number from disk.
+        *
+        * \param[in] env       execution environment for this thread
+        * \param[in] dev       dt device
+        * \param[out] seq      last known sequence on disk
+        *
+        * \retval 0            on success
+        * \retval negative     negated errno on error
+        */
+       int   (*dt_last_seq_get)(const struct lu_env *env,
+                                struct dt_device *dev,
+                                __u64 *seq);
 };
 
 struct dt_index_features {
@@ -3026,6 +3040,16 @@ static inline int dt_reserve_or_free_quota(const struct lu_env *env,
        return dev->dd_ops->dt_reserve_or_free_quota(env, dev, qi);
 }
 
+static inline int dt_last_seq_get(const struct lu_env *env,
+                                 struct dt_device *dev,
+                                 __u64 *seq)
+{
+       LASSERT(dev);
+       LASSERT(dev->dd_ops);
+       LASSERT(dev->dd_ops->dt_last_seq_get);
+       return dev->dd_ops->dt_last_seq_get(env, dev, seq);
+}
+
 static inline int dt_lookup(const struct lu_env *env,
                            struct dt_object *dt,
                            struct dt_rec *rec,
index 4049cdd..a621e80 100644 (file)
@@ -2796,6 +2796,7 @@ static const struct dt_device_operations osd_dt_ops = {
        .dt_ro                    = osd_ro,
        .dt_commit_async          = osd_commit_async,
        .dt_reserve_or_free_quota = osd_reserve_or_free_quota,
+       .dt_last_seq_get          = osd_last_seq_get,
 };
 
 static void osd_inode_getattr(const struct lu_env *env,
index 33ecf15..1b463cf 100644 (file)
@@ -3527,3 +3527,64 @@ static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
        return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
                            osd_scan_O_seq);
 }
+
+static int osd_seq_dir_helper(const struct lu_env *env,
+                              struct osd_device *osd, struct inode *dir,
+                              struct osd_it_ea *oie)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lu_fid *fid = &info->oti_fid;
+       struct inode *inode;
+       struct osd_inode_id id;
+       char *name = NULL;
+       __u64 seq;
+       int rc = 0;
+
+       ENTRY;
+
+       osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+       inode = osd_iget(info, osd, &id, 0);
+       if (IS_ERR(inode))
+               RETURN(PTR_ERR(inode));
+
+       if (!S_ISDIR(inode->i_mode))
+               GOTO(out, rc);
+
+       OBD_ALLOC(name, oie->oie_dirent->oied_namelen + 1);
+       if (name == NULL)
+               GOTO(out, rc = -ENOMEM);
+       memcpy(name, oie->oie_dirent->oied_name,
+              oie->oie_dirent->oied_namelen);
+       name[oie->oie_dirent->oied_namelen] = '\0';
+
+       rc = kstrtoull(name, 16, &seq);
+       if (!rc && seq >= FID_SEQ_NORMAL && seq > fid_seq(fid))
+               fid->f_seq = seq;
+
+       OBD_FREE(name, oie->oie_dirent->oied_namelen + 1);
+out:
+       iput(inode);
+       RETURN(rc);
+}
+
+int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt,
+                    __u64 *seq)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct osd_device *osd = osd_dt_dev(dt);
+       struct lu_fid *fid = &info->oti_fid;
+       int rc;
+
+       ENTRY;
+
+       if (!osd->od_is_ost)
+               RETURN(-EINVAL);
+
+       fid_zero(fid);
+       rc = osd_scan_dir(env, osd, osd->od_ost_map->om_root->d_inode,
+                         osd_seq_dir_helper);
+       if (!rc)
+               *seq = fid_seq(fid);
+
+       RETURN(rc);
+}
index f877888..b61cab5 100644 (file)
@@ -53,4 +53,6 @@ struct osd_scrub {
        time64_t                os_bad_oimap_time;
 };
 
+int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt,
+                    __u64 *seq);
 #endif /* _OSD_SCRUB_H */
index 50849e8..9ab85fd 100644 (file)
@@ -734,6 +734,7 @@ static const struct dt_device_operations osd_dt_ops = {
        .dt_commit_async          = osd_commit_async,
        .dt_ro                    = osd_ro,
        .dt_reserve_or_free_quota = osd_reserve_or_free_quota,
+       .dt_last_seq_get          = osd_last_seq_get,
 };
 
 static void *osd_key_init(const struct lu_context *ctx,
index 8fd2b89..66f0721 100644 (file)
@@ -710,6 +710,8 @@ int osd_oii_insert(const struct lu_env *env, struct osd_device *dev,
                   const struct lu_fid *fid, uint64_t oid, bool insert);
 int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
                   uint64_t *oid);
+int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt,
+                    __u64 *seq);
 
 /**
  * Basic transaction credit op
index 0380b22..0e3fd99 100644 (file)
@@ -2426,3 +2426,43 @@ static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
 {
        return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_O_seq);
 }
+
+static int osd_seq_dir_helper(const struct lu_env *env,
+                             struct osd_device *osd, uint64_t dir_oid,
+                             struct osd_zap_it *ozi)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lu_fid *fid = &info->oti_fid;
+       __u64 seq;
+       int rc;
+
+       if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+               return 0;
+
+       rc = kstrtoull(ozi->ozi_name, 16, &seq);
+       if (!rc && seq >= FID_SEQ_NORMAL && seq > fid_seq(fid))
+               fid->f_seq = seq;
+
+       return 0;
+}
+
+int osd_last_seq_get(const struct lu_env *env, struct dt_device *dt,
+                    __u64 *seq)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct osd_device *osd = osd_dt_dev(dt);
+       struct lu_fid *fid = &info->oti_fid;
+       int rc;
+
+       ENTRY;
+
+       if (!osd->od_is_ost)
+               RETURN(-EINVAL);
+
+       fid_zero(fid);
+       rc = osd_scan_dir(env, osd, osd->od_O_id, osd_seq_dir_helper);
+       if (!rc)
+               *seq = fid_seq(fid);
+
+       RETURN(rc);
+}