Whamcloud - gitweb
LU-17393 osd: recreate LAST_ID for local seq 98/53898/10
authorHongchao Zhang <hongchao@whamcloud.com>
Tue, 19 Mar 2024 04:19:42 +0000 (12:19 +0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 10 Jun 2024 06:09:59 +0000 (06:09 +0000)
The file at /O/seq/LAST_ID in the sequences used by local storage
is not fixed by LFSCK currently, this patch addes the support to
scan the local storage sequences under root object director "/O"
and recreate or fix it accordingly.

Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Change-Id: I840a0fcfa207528c5a0e9f0c87df8b4745bba671
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53898
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
12 files changed:
lustre/include/lustre_scrub.h
lustre/include/obd.h
lustre/include/uapi/linux/lustre/lustre_fid.h
lustre/mdt/mdt_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_io.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-zfs/osd_internal.h
lustre/osd-zfs/osd_scrub.c
lustre/ptlrpc/nodemap_storage.c
lustre/tests/sanity-scrub.sh
lustre/utils/ll_decode_filter_fid.c

index 29a3a23..001c50e 100644 (file)
@@ -171,6 +171,12 @@ struct lustre_scrub {
        __u64                   os_new_checked;
        __u64                   os_pos_current;
        __u32                   os_start_flags;
+
+       /* FIDs with maxmimum OID in local storage */
+       __u32                   os_ls_size;
+       __u32                   os_ls_count;
+       struct lu_fid           *os_ls_fids;
+
        /* Some of these bits can be set by different threads so
         * all updates must be protected by ->os_lock to avoid
         * racing read-modify-write cycles causing corruption.
index 2070da7..9a28fff 100644 (file)
@@ -638,7 +638,8 @@ struct obd_device {
                obd_process_conf:1,     /* device is processing mgs config */
                obd_checksum_dump:1,    /* dump pages upon cksum error */
                obd_dynamic_nids:1,     /* Allow dynamic NIDs on device */
-               obd_read_only:1;        /* device is read-only */
+               obd_read_only:1,        /* device is read-only */
+               obd_need_scrub:1;       /* device need scrub */
 #ifdef HAVE_SERVER_SUPPORT
        /* no committed-transno notification */
        unsigned long                   obd_no_transno:1;
index a57cc0c..b0b0455 100644 (file)
@@ -363,6 +363,17 @@ static inline bool fid_is_sane(const struct lu_fid *fid)
                        fid_seq_is_rsvd(fid_seq(fid)));
 }
 
+static inline bool fid_seq_is_local_storage(__u64 seq)
+{
+       return seq == FID_SEQ_LLOG || seq == FID_SEQ_LLOG_NAME ||
+              seq == FID_SEQ_LOCAL_NAME || seq == FID_SEQ_QUOTA;
+}
+
+static inline bool fid_is_local_storage(const struct lu_fid *fid)
+{
+       return fid_seq_is_local_storage(fid->f_seq);
+}
+
 static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
 {
        return !memcmp(f0, f1, sizeof(*f0));
index ab01713..1ed1cef 100644 (file)
@@ -7951,9 +7951,21 @@ static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt)
 
        if (!mdt->mdt_skip_lfsck && !mdt->mdt_bottom->dd_rdonly) {
                struct lfsck_start_param lsp;
+               struct lfsck_start start;
 
                lsp.lsp_start = NULL;
                lsp.lsp_index_valid = 0;
+
+               if (dt2lu_dev(mdt->mdt_bottom)->ld_obd &&
+                   dt2lu_dev(mdt->mdt_bottom)->ld_obd->obd_need_scrub) {
+                       memset(&start, 0, sizeof(start));
+                       start.ls_version = LFSCK_VERSION_V1;
+                       start.ls_active = LFSCK_TYPE_SCRUB;
+                       start.ls_flags = LPF_RESET;
+
+                       lsp.lsp_start = &start;
+               }
+
                rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
                                                           OBD_IOC_START_LFSCK,
                                                           0, &lsp);
index 0325e66..c3e72a8 100644 (file)
@@ -768,6 +768,7 @@ struct osd_thread_info {
 
        struct osd_it_ea_dirent *oti_seq_dirent;
        struct osd_it_ea_dirent *oti_dir_dirent;
+       struct inode            *oti_lastid_inode;
 
        struct osd_lookup_cache_object oti_cobj; /* cache object id */
        struct osd_lookup_cache *oti_lookup_cache;
@@ -1223,6 +1224,9 @@ int osd_calc_bkmap_credits(struct super_block *sb, struct inode *inode,
                           const int blocks);
 
 int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
+int osd_ldiskfs_write(struct osd_device *osd, struct inode *inode, void *buf,
+                     int bufsize, int write_NUL, loff_t *offs,
+                     handle_t *handle);
 
 static inline
 struct dentry *osd_child_dentry_by_inode(const struct lu_env *env,
index 1eea451..e939264 100644 (file)
@@ -1904,11 +1904,10 @@ static int osd_ldiskfs_writelink(struct inode *inode, char *buffer, int buflen)
        return 0;
 }
 
-static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
-                                   int bufsize, int write_NUL, loff_t *offs,
-                                   handle_t *handle)
+int osd_ldiskfs_write(struct osd_device *osd, struct inode *inode, void *buf,
+                     int bufsize, int write_NUL, loff_t *offs,
+                     handle_t *handle)
 {
-       struct inode *inode = osd_dt_obj(dt)->oo_inode;
        struct buffer_head *bh        = NULL;
        loff_t              offset    = *offs;
        loff_t              new_size  = i_size_read(inode);
@@ -1964,7 +1963,6 @@ static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
                              offset, block, bufsize, *offs);
 
                if (IS_ERR_OR_NULL(bh)) {
-                       struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
                        int flags = LDISKFS_GET_BLOCKS_CREATE;
 
                        /* while the file system is being mounted, avoid
@@ -2055,6 +2053,17 @@ static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
        return err;
 }
 
+static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
+                                   int bufsize, int write_NUL, loff_t *offs,
+                                   handle_t *handle)
+{
+       struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
+       struct inode *inode = osd_dt_obj(dt)->oo_inode;
+
+       return osd_ldiskfs_write(osd, inode, buf, bufsize, write_NUL, offs,
+                                handle);
+}
+
 static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
                         const struct lu_buf *buf, loff_t *pos,
                         struct thandle *handle)
index 47164f8..5daff18 100644 (file)
@@ -648,10 +648,13 @@ static int osd_scrub_get_fid(struct osd_thread_info *info,
 
 static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
                        struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
-                       struct super_block *sb, bool scrub)
+                       struct super_block *sb, bool is_scrub)
 {
+       struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
        struct inode *inode;
+       int           index;
        int           rc;
+
        ENTRY;
 
        /* Not handle the backend root object and agent parent object.
@@ -690,15 +693,24 @@ static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
        if (dev->od_is_ost && S_ISREG(inode->i_mode) && inode->i_nlink > 1)
                dev->od_scrub.os_scrub.os_has_ml_file = 1;
 
-       if (scrub &&
+       if (is_scrub &&
            ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
                /* Only skip it for the first OI scrub accessing. */
                ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
                GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
        }
 
-       rc = osd_scrub_get_fid(info, dev, inode, fid, scrub);
+       rc = osd_scrub_get_fid(info, dev, inode, fid, is_scrub);
+       if (rc >= 0 && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) {
+               index = 0;
+               for (index = 0; index < scrub->os_ls_count; index++)
+                       if (scrub->os_ls_fids[index].f_seq == fid->f_seq)
+                               break;
 
+               if (index < scrub->os_ls_count &&
+                   scrub->os_ls_fids[index].f_oid < fid->f_oid)
+                       scrub->os_ls_fids[index].f_oid = fid->f_oid;
+       }
        GOTO(put, rc);
 
 put:
@@ -1200,6 +1212,11 @@ static int osd_otable_it_preload(const struct lu_env *env,
 static int osd_scan_ml_file_main(const struct lu_env *env,
                                 struct osd_device *dev);
 
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev);
+
+static int osd_scan_last_id_main(const struct lu_env *env,
+                                struct osd_device *dev);
+
 static int osd_scrub_main(void *args)
 {
        struct lu_env env;
@@ -1240,6 +1257,16 @@ static int osd_scrub_main(void *args)
               scrub->os_pos_current,
               scrub->os_file.sf_param & SP_DRYRUN ? " dryrun mode" : "");
 
+       scrub->os_ls_count = 0;
+       scrub->os_ls_size = 4;
+       OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid));
+       if (scrub->os_ls_fids == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = osd_scan_O_main(&env, dev);
+       if (rc)
+               GOTO(out, rc);
+
        rc = osd_inode_iteration(osd_oti_get(&env), dev, ~0U, false);
        if (unlikely(rc == SCRUB_IT_CRASH)) {
                spin_lock(&scrub->os_lock);
@@ -1251,9 +1278,13 @@ static int osd_scrub_main(void *args)
        if (scrub->os_has_ml_file) {
                ret = osd_scan_ml_file_main(&env, dev);
                if (ret != 0)
-                       rc = ret;
+                       GOTO(out, rc = ret);
        }
 
+       ret = osd_scan_last_id_main(&env, dev);
+       if (ret != 0)
+               rc = ret;
+
        GOTO(post, rc);
 
 post:
@@ -1268,6 +1299,15 @@ post:
 
 
 out:
+       if (scrub->os_ls_fids) {
+               OBD_FREE(scrub->os_ls_fids,
+                        scrub->os_ls_size * sizeof(struct lu_fid));
+
+               scrub->os_ls_size = 0;
+               scrub->os_ls_count = 0;
+               scrub->os_ls_fids = NULL;
+       }
+
        osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
        lu_env_fini(&env);
 
@@ -3196,3 +3236,308 @@ static int osd_scan_ml_file_main(const struct lu_env *env,
        return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
                            osd_scan_ml_file_seq);
 }
+
+#define LASTID "LAST_ID"
+
+static int osd_update_lastid(struct osd_device *dev, struct inode *inode,
+                            __u64 lastid_known)
+{
+       handle_t *th;
+       loff_t offset = 0;
+       __u64 lastid;
+       int rc;
+
+       ENTRY;
+
+       th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC,
+                                 osd_dto_credits_noquota[DTO_WRITE_BLOCK]);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       lastid = cpu_to_le64(lastid_known);
+       rc = osd_ldiskfs_write(dev, inode, &lastid, sizeof(lastid), 0, &offset,
+                              th);
+       mark_inode_dirty(inode);
+       ldiskfs_journal_stop(th);
+       RETURN(rc);
+}
+
+static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev,
+                            struct inode *dir, __u64 lastid_known)
+{
+       handle_t *th;
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct dentry *d_lastid;
+       struct inode *i_lastid;
+       loff_t offset = 0;
+       int credits = LDISKFS_DATA_TRANS_BLOCKS(dir->i_sb) +
+                       LDISKFS_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                       osd_dto_credits_noquota[DTO_WRITE_BLOCK];
+       int rc;
+
+       ENTRY;
+
+       sb_start_write(dir->i_sb);
+       th = osd_journal_start_sb(dir->i_sb, LDISKFS_HT_MISC, credits);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       i_lastid = ldiskfs_create_inode(th, dir, (S_IFREG | 0644), NULL);
+       if (IS_ERR(i_lastid))
+               GOTO(out_stop, rc = PTR_ERR(i_lastid));
+
+       unlock_new_inode(i_lastid);
+
+       d_lastid = osd_child_dentry_by_inode(env, dir, LASTID, strlen(LASTID));
+       rc = osd_ldiskfs_add_entry(info, dev, th, d_lastid, i_lastid, NULL);
+       if (rc)
+               GOTO(out_stop, rc);
+
+       rc = osd_ldiskfs_write(dev, i_lastid, &lastid_known,
+                              sizeof(lastid_known), 0, &offset, th);
+       if (rc)
+               GOTO(out_stop, rc);
+       mark_inode_dirty(i_lastid);
+
+       ldiskfs_journal_stop(th);
+       th = NULL;
+       sb_end_write(dir->i_sb);
+       GOTO(out, rc = 0);
+
+out_stop:
+       if (!IS_ERR_OR_NULL(th))
+               ldiskfs_journal_stop(th);
+       sb_end_write(dir->i_sb);
+
+out:
+       if (!IS_ERR_OR_NULL(i_lastid))
+               iput(i_lastid);
+       RETURN(rc);
+}
+
+static int osd_scan_lastid_dir(const struct lu_env *env, struct osd_device *dev,
+                              struct inode *dir, struct osd_it_ea *oie)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct inode *inode;
+       struct osd_inode_id id;
+       int rc = 0;
+
+       ENTRY;
+
+       osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+       inode = osd_iget(info, dev, &id);
+       if (IS_ERR(inode))
+               RETURN(PTR_ERR(inode));
+
+       if (S_ISDIR(inode->i_mode))
+               GOTO(out, rc = 0);
+
+       if (strlen(LASTID) != oie->oie_dirent->oied_namelen ||
+           strncmp(oie->oie_dirent->oied_name, LASTID,
+                   oie->oie_dirent->oied_namelen) != 0) {
+               CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n",
+                      osd_name(dev), info->oti_seq_dirent->oied_name,
+                      oie->oie_dirent->oied_name);
+               GOTO(out, rc = 0);
+       }
+
+       info->oti_lastid_inode = inode;
+       RETURN(0);
+
+out:
+       iput(inode);
+       RETURN(rc);
+}
+
+static int osd_scan_lastid_seq(const struct lu_env *env, struct osd_device *dev,
+                              struct inode *dir, struct osd_it_ea *oie)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lustre_ost_attrs *lma = &info->oti_ost_attrs;
+       struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
+       struct inode *inode;
+       struct osd_inode_id id;
+       __u64 seq;
+       __u64 lastid;
+       __u64 lastid_known;
+       loff_t offset = 0;
+       int index;
+       int rc;
+
+       ENTRY;
+
+       osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+       inode = osd_iget(info, dev, &id);
+       if (IS_ERR(inode))
+               RETURN(PTR_ERR(inode));
+
+       if (!S_ISDIR(inode->i_mode))
+               GOTO(out, rc = 0);
+
+       rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq);
+       if (rc)
+               GOTO(out, rc);
+
+       if (seq < 0x1F) {
+               rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq);
+               if (rc)
+                       GOTO(out, rc);
+       }
+
+       if (!fid_seq_is_local_storage(seq))
+               GOTO(out, rc = 0);
+
+       info->oti_lastid_inode = NULL;
+       info->oti_seq_dirent = oie->oie_dirent;
+       rc = osd_scan_dir(env, dev, inode, osd_scan_lastid_dir);
+       info->oti_seq_dirent = NULL;
+
+       if (rc)
+               GOTO(out, rc);
+
+       if (scrub->os_file.sf_param & SP_DRYRUN)
+               GOTO(out, rc = 0);
+
+       for (index = 0; index < scrub->os_ls_count; index++)
+               if (scrub->os_ls_fids[index].f_seq == seq)
+                       break;
+
+       if (unlikely(index >= scrub->os_ls_count)) {
+               CDEBUG(D_LFSCK,
+                      "%s: can't find seq %llu, it's modified during scrub?\n",
+                      osd_name(dev), seq);
+               GOTO(out, rc);
+       }
+
+       lastid_known = scrub->os_ls_fids[index].f_oid;
+       if (!info->oti_lastid_inode) {
+               rc = osd_create_lastid(env, dev, dir, lastid_known);
+               GOTO(out, rc);
+       }
+
+       rc = osd_get_lma(info, info->oti_lastid_inode, &info->oti_obj_dentry,
+                        lma);
+       if (rc && rc != -ENODATA) {
+               CDEBUG(D_LFSCK, "%s: failed to get the xattr %s for O/%s/%s\n",
+                      osd_name(dev), XATTR_NAME_LMA,
+                      oie->oie_dirent->oied_name, LASTID);
+               GOTO(out, rc);
+       }
+
+       if (rc != 0 || lma->loa_lma.lma_self_fid.f_seq != seq ||
+           lma->loa_lma.lma_self_fid.f_oid != 0 ||
+           lma->loa_lma.lma_self_fid.f_ver != 0) {
+               lma->loa_lma.lma_self_fid.f_seq = seq;
+               lma->loa_lma.lma_self_fid.f_oid = 0;
+               lma->loa_lma.lma_self_fid.f_ver = 0;
+
+               rc = __osd_xattr_set(info, info->oti_lastid_inode,
+                                    XATTR_NAME_LMA, lma, sizeof(*lma),
+                                    rc == -ENODATA ?
+                                               XATTR_CREATE : XATTR_REPLACE);
+               if (rc)
+                       GOTO(out, rc);
+       }
+
+       spin_lock(&info->oti_lastid_inode->i_lock);
+       if (i_size_read(info->oti_lastid_inode) < sizeof(lastid)) {
+               spin_unlock(&info->oti_lastid_inode->i_lock);
+               lastid = 0;
+       } else {
+               spin_unlock(&info->oti_lastid_inode->i_lock);
+
+               rc = osd_ldiskfs_read(info->oti_lastid_inode, &lastid,
+                                     sizeof(lastid), &offset);
+               if (rc < 0)
+                       GOTO(out, rc);
+
+               if (rc < sizeof(lastid))
+                       lastid = 0;
+               else
+                       lastid = le64_to_cpu(lastid);
+       }
+
+       if (lastid < lastid_known)
+               rc = osd_update_lastid(dev, info->oti_lastid_inode,
+                                      lastid_known);
+
+out:
+       if (info->oti_lastid_inode) {
+               iput(info->oti_lastid_inode);
+               info->oti_lastid_inode = NULL;
+       }
+
+       iput(inode);
+       RETURN(rc);
+}
+
+static int osd_scan_last_id_main(const struct lu_env *env,
+                                struct osd_device *dev)
+{
+       return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
+                           osd_scan_lastid_seq);
+}
+
+static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev,
+                         struct inode *dir, struct osd_it_ea *oie)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
+       struct inode *inode;
+       struct osd_inode_id id;
+       struct lu_fid *fids;
+       __u64 seq;
+       int rc;
+
+       ENTRY;
+
+       osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+       inode = osd_iget(info, dev, &id);
+       if (IS_ERR(inode))
+               RETURN(PTR_ERR(inode));
+
+       if (!S_ISDIR(inode->i_mode))
+               GOTO(out, rc = 0);
+
+       rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq);
+       if (rc)
+               GOTO(out, rc);
+
+       if (seq < 0x1F) {
+               rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq);
+               if (rc)
+                       GOTO(out, rc);
+       }
+
+       if (!fid_seq_is_local_storage(seq))
+               GOTO(out, rc = 0);
+
+       scrub->os_ls_count++;
+       if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) {
+               OBD_ALLOC(fids,
+                         sizeof(struct lu_fid) * (scrub->os_ls_size + 4));
+               if (fids == NULL)
+                       GOTO(out, -ENOMEM);
+
+               memcpy(fids, scrub->os_ls_fids,
+                      sizeof(struct lu_fid) * scrub->os_ls_size);
+               OBD_FREE(scrub->os_ls_fids,
+                        sizeof(struct lu_fid) * scrub->os_ls_size);
+
+               scrub->os_ls_size += 4;
+               scrub->os_ls_fids = fids;
+       }
+
+       scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq;
+
+out:
+       iput(inode);
+       RETURN(rc);
+}
+
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
+{
+       return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
+                           osd_scan_O_seq);
+}
index 827135e..6a6deb9 100644 (file)
@@ -276,6 +276,7 @@ struct osd_thread_info {
 
        char                    *oti_seq_name;
        char                    *oti_dir_name;
+       uint64_t                oti_lastid_oid;
 };
 
 extern struct lu_context_key osd_key;
index bddf41c..87456cb 100644 (file)
@@ -157,6 +157,7 @@ osd_scrub_check_update(const struct lu_env *env, struct osd_device *dev,
        dnode_t *dn = NULL;
        uint64_t oid2;
        int ops = DTO_INDEX_UPDATE;
+       int index;
        int rc;
 
        ENTRY;
@@ -267,6 +268,17 @@ out:
                sa_handle_destroy(hdl);
        }
 
+       if (!rc && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) {
+               index = 0;
+               for (index = 0; index < scrub->os_ls_count; index++)
+                       if (scrub->os_ls_fids[index].f_seq == fid->f_seq)
+                               break;
+
+               if (index < scrub->os_ls_count &&
+                   scrub->os_ls_fids[index].f_oid < fid->f_oid)
+                       scrub->os_ls_fids[index].f_oid = fid->f_oid;
+       }
+
 cleanup:
        if (nvbuf)
                nvlist_free(nvbuf);
@@ -464,6 +476,9 @@ static int osd_scrub_exec(const struct lu_env *env, struct osd_device *dev,
 
 static int osd_scan_ml_file_main(const struct lu_env *env,
                                 struct osd_device *dev);
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev);
+static int osd_scan_lastid_main(const struct lu_env *env,
+                               struct osd_device *dev);
 
 static int osd_scrub_main(void *args)
 {
@@ -506,6 +521,16 @@ static int osd_scrub_main(void *args)
               scrub->os_name, scrub->os_start_flags,
               scrub->os_pos_current);
 
+       scrub->os_ls_count = 0;
+       scrub->os_ls_size = 4;
+       OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid));
+       if (scrub->os_ls_fids == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = osd_scan_O_main(&env, dev);
+       if (rc)
+               GOTO(out, rc);
+
        fid = &osd_oti_get(&env)->oti_fid;
        while (!rc && !kthread_should_stop()) {
                rc = osd_scrub_next(&env, dev, fid, &oid);
@@ -535,11 +560,24 @@ post:
                        rc = ret;
        }
 
+       ret = osd_scan_lastid_main(&env, dev);
+       if (ret != 0)
+               rc = ret;
+
        rc = scrub_thread_post(&env, &dev->od_scrub, rc);
        CDEBUG(D_LFSCK, "%s: OI scrub: stop, pos = %llu: rc = %d\n",
               scrub->os_name, scrub->os_pos_current, rc);
 
 out:
+       if (scrub->os_ls_fids) {
+               OBD_FREE(scrub->os_ls_fids,
+                        scrub->os_ls_size * sizeof(struct lu_fid));
+
+               scrub->os_ls_size = 0;
+               scrub->os_ls_count = 0;
+               scrub->os_ls_fids = NULL;
+       }
+
        while (!list_empty(&scrub->os_inconsistent_items)) {
                struct osd_inconsistent_item *oii;
 
@@ -2045,3 +2083,359 @@ static int osd_scan_ml_file_main(const struct lu_env *env,
 {
        return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_ml_file_seq);
 }
+
+#define LASTID "LAST_ID"
+
+static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev,
+                            struct osd_zap_it *ozi, __u64 lastid_known)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+       struct lu_attr *la = &info->oti_la;
+       struct luz_direntry *zde = &info->oti_zde;
+       uint64_t dir = ozi->ozi_zde.lzd_reg.zde_dnode;
+       dmu_tx_t *tx = NULL;
+       nvlist_t *nvbuf = NULL;
+       dnode_t *dn = NULL;
+       sa_handle_t *hdl;
+       __u64 lastid;
+       int num = sizeof(*zde) / 8;
+       int rc = 0;
+
+       ENTRY;
+
+       tx = dmu_tx_create(dev->od_os);
+       if (!tx)
+               GOTO(out, rc = -ENOMEM);
+
+       dmu_tx_hold_sa_create(tx, osd_find_dnsize(dev, OSD_BASE_EA_IN_BONUS));
+       dmu_tx_hold_zap(tx, dir, FALSE, NULL);
+
+       rc = -dmu_tx_assign(tx, TXG_WAIT);
+       if (rc)
+               GOTO(abort, rc);
+
+       memset(&zde->lzd_reg, 0, sizeof(zde->lzd_reg));
+       zde->lzd_reg.zde_type = IFTODT(S_IFREG);
+       zde->lzd_fid = lma->lma_self_fid;
+
+       rc = -nvlist_alloc(&nvbuf, NV_UNIQUE_NAME, KM_SLEEP);
+       if (rc)
+               GOTO(abort, rc);
+
+       lustre_lma_init(lma, &zde->lzd_fid, 0, 0);
+       lustre_lma_swab(lma);
+       rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA, (uchar_t *)lma,
+                                   sizeof(*lma));
+       if (rc)
+               GOTO(abort, rc);
+
+       la->la_valid = LA_TYPE | LA_MODE;
+       la->la_mode = (DTTOIF(zde->lzd_reg.zde_type) & S_IFMT) | 0644;
+
+       rc = __osd_object_create(env, dev, NULL, &zde->lzd_fid, &dn, tx, la);
+       if (rc)
+               GOTO(abort, rc);
+
+       zde->lzd_reg.zde_dnode = dn->dn_object;
+       rc = -sa_handle_get(dev->od_os, dn->dn_object, NULL,
+                           SA_HDL_PRIVATE, &hdl);
+       if (rc)
+               GOTO(abort, rc);
+
+       rc = __osd_attr_init(env, dev, NULL, hdl, tx, la, dir, nvbuf);
+       if (rc)
+               GOTO(abort, rc);
+
+       sa_handle_destroy(hdl);
+       hdl = NULL;
+
+       dmu_tx_hold_write_by_dnode(tx, dn, 0, sizeof(lastid_known));
+
+       lastid = cpu_to_le64(lastid_known);
+       dmu_write_by_dnode(dn, 0, sizeof(lastid), &lastid, tx);
+
+       rc = osd_zap_add(dev, dir, NULL, LASTID, strlen(LASTID), num,
+                        (void *)zde, tx);
+       if (rc)
+               GOTO(abort, tx);
+
+       dmu_tx_commit(tx);
+       GOTO(out, rc);
+
+abort:
+       if (dn)
+               dmu_object_free(dev->od_os, dn->dn_object, tx);
+
+       dmu_tx_abort(tx);
+
+out:
+       if (hdl)
+               sa_handle_destroy(hdl);
+       if (dn)
+               osd_dnode_rele(dn);
+       if (nvbuf)
+               nvlist_free(nvbuf);
+
+       return rc;
+}
+
+static int osd_scan_lastid_dir(const struct lu_env *env,
+                              struct osd_device *dev, uint64_t dir_oid,
+                              struct osd_zap_it *ozi)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+
+       ENTRY;
+
+       if (!S_ISREG(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+               RETURN(0);
+
+       if (strcmp(ozi->ozi_name, LASTID) != 0) {
+               CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n",
+                      osd_name(dev), info->oti_seq_name, ozi->ozi_name);
+               RETURN(0);
+       }
+
+       info->oti_lastid_oid = ozi->ozi_zde.lzd_reg.zde_dnode;
+       RETURN(0);
+}
+
+static int osd_scan_lastid_seq(const struct lu_env *env,
+                              struct osd_device *dev, uint64_t dir_oid,
+                              struct osd_zap_it *ozi)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+       struct lu_buf *lb = &info->oti_xattr_lbuf;
+       struct lustre_scrub *scrub = &dev->od_scrub;
+       dnode_t *dn = NULL;
+       dmu_tx_t *tx = NULL;
+       nvlist_t *nvbuf = NULL;
+       sa_handle_t *hdl = NULL;
+       uint64_t blocks;
+       uint32_t blksize;
+       uint32_t sz_lma;
+       size_t size = 0;
+       __u64 seq;
+       __u64 lastid;
+       __u64 lastid_known;
+       bool need_update = false;
+       int index;
+       int rc;
+
+       ENTRY;
+
+       if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+               RETURN(0);
+
+       rc = kstrtoull(ozi->ozi_name, 16, &seq);
+       if (rc)
+               RETURN(rc);
+
+       if (seq < 0x1F) {
+               rc = kstrtoull(ozi->ozi_name, 10, &seq);
+               if (rc)
+                       RETURN(rc);
+       }
+
+       if (!fid_seq_is_local_storage(seq))
+               GOTO(out, rc = 0);
+
+       info->oti_lastid_oid = 0;
+       info->oti_seq_name = ozi->ozi_name;
+       rc = osd_scan_dir(env, dev, ozi->ozi_zde.lzd_reg.zde_dnode,
+                         osd_scan_lastid_dir);
+       if (rc)
+               GOTO(out, rc);
+
+       for (index = 0; index < scrub->os_ls_count; index++)
+               if (scrub->os_ls_fids[index].f_seq == seq)
+                       break;
+
+       if (unlikely(index >= scrub->os_ls_count)) {
+               CDEBUG(D_LFSCK,
+                      "%s: can't find seq %llu, it's modified during scrub?\n",
+                      osd_name(dev), seq);
+               GOTO(out, rc = -ERANGE);
+       }
+
+       lastid_known = scrub->os_ls_fids[index].f_oid;
+
+       if (info->oti_lastid_oid == 0) {
+               lma->lma_self_fid.f_seq = seq;
+               lma->lma_self_fid.f_oid = 0;
+               lma->lma_self_fid.f_ver = 0;
+
+               rc = osd_create_lastid(env, dev, ozi, lastid_known);
+               GOTO(out, rc);
+       }
+
+       rc = __osd_obj2dnode(dev->od_os, info->oti_lastid_oid, &dn);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = -sa_handle_get(dev->od_os, dn->dn_object, NULL,
+                           SA_HDL_PRIVATE, &hdl);
+       if (rc)
+               GOTO(out, rc);
+
+       lastid = 0;
+       sa_object_size(hdl, &blksize, &blocks);
+       if (blocks > 0) {
+               rc = osd_dmu_read(dev, dn, 0, sizeof(lastid), (char *) &lastid,
+                                 0);
+               if (rc)
+                       GOTO(out, rc);
+
+               lastid = le64_to_cpu(lastid);
+               if (lastid <= lastid_known)
+                       need_update = true;
+       } else {
+               need_update = true;
+       }
+
+       rc = __osd_xattr_load(dev, hdl, &nvbuf);
+       if (rc)
+               GOTO(out, rc);
+
+       rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, (uchar_t **) &lma,
+                                      &sz_lma);
+       if (rc != 0 && rc != -ENOENT)
+               GOTO(out, rc);
+
+       if (rc == -ENOENT || lma->lma_self_fid.f_seq != seq ||
+           lma->lma_self_fid.f_oid != 0 || lma->lma_self_fid.f_ver != 0) {
+               if (!rc) {
+                       rc = -nvlist_remove(nvbuf, XATTR_NAME_LMA,
+                                           DATA_TYPE_BYTE_ARRAY);
+                       if (rc)
+                               GOTO(out, rc);
+               }
+
+               need_update = true;
+               lma->lma_self_fid.f_seq = seq;
+               lma->lma_self_fid.f_oid = 0;
+               lma->lma_self_fid.f_ver = 0;
+
+               rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA,
+                                           (uchar_t *) &lma, sizeof(lma));
+               if (rc)
+                       GOTO(out, rc);
+       }
+
+       if (!need_update)
+               GOTO(out, rc);
+
+       if (scrub->os_file.sf_param & SP_DRYRUN)
+               GOTO(out, rc = 0);
+
+       tx = dmu_tx_create(dev->od_os);
+       if (!tx)
+               GOTO(out, rc = -ENOMEM);
+
+       dmu_tx_hold_zap_by_dnode(tx, dn, TRUE, NULL);
+       if (lastid < lastid_known)
+               dmu_tx_hold_write_by_dnode(tx, dn, 0, sizeof(lastid));
+
+       rc = -dmu_tx_assign(tx, TXG_WAIT);
+       if (rc)
+               GOTO(abort, rc);
+
+       rc = -nvlist_size(nvbuf, &size, NV_ENCODE_XDR);
+       if (rc)
+               GOTO(abort, rc);
+
+       lu_buf_check_and_alloc(lb, size);
+       if (lb->lb_buf == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = -nvlist_pack(nvbuf, (char **)&lb->lb_buf, &size, NV_ENCODE_XDR,
+                         KM_SLEEP);
+       if (rc)
+               GOTO(abort, rc);
+
+       rc = -sa_update(hdl, SA_ZPL_SIZE(dev), lb->lb_buf, size, tx);
+       if (rc)
+               GOTO(abort, rc);
+
+       if (lastid < lastid_known) {
+               lastid = cpu_to_le64(lastid_known);
+               dmu_write_by_dnode(dn, 0, sizeof(lastid),
+                                  (const char *) &lastid, tx);
+       }
+
+       dmu_tx_commit(tx);
+       GOTO(out, rc);
+
+abort:
+       dmu_tx_abort(tx);
+
+out:
+       if (hdl)
+               sa_handle_destroy(hdl);
+
+       if (dn)
+               osd_dnode_rele(dn);
+
+       RETURN(rc);
+}
+
+static int osd_scan_lastid_main(const struct lu_env *env,
+                               struct osd_device *dev)
+{
+       return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_lastid_seq);
+}
+
+static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev,
+                         uint64_t dir_oid, struct osd_zap_it *ozi)
+{
+       struct lustre_scrub *scrub = &dev->od_scrub;
+       struct lu_fid *fids;
+       __u64 seq;
+       int rc;
+
+       ENTRY;
+
+       if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+               RETURN(0);
+
+       rc = kstrtoull(ozi->ozi_name, 16, &seq);
+       if (rc)
+               RETURN(rc);
+
+       if (seq < 0x1F) {
+               rc = kstrtoull(ozi->ozi_name, 10, &seq);
+               if (rc)
+                       RETURN(rc);
+       }
+
+       if (!fid_seq_is_local_storage(seq))
+               GOTO(out, rc = 0);
+
+       scrub->os_ls_count++;
+       if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) {
+               OBD_ALLOC(fids,
+                         sizeof(struct lu_fid) * (scrub->os_ls_size + 4));
+               if (fids == NULL)
+                       GOTO(out, -ENOMEM);
+
+               memcpy(fids, scrub->os_ls_fids,
+                      sizeof(struct lu_fid) * scrub->os_ls_size);
+               OBD_FREE(scrub->os_ls_fids,
+                        sizeof(struct lu_fid) * scrub->os_ls_size);
+
+               scrub->os_ls_size += 4;
+               scrub->os_ls_fids = fids;
+       }
+
+       scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq;
+
+out:
+       RETURN(rc);
+}
+
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
+{
+       return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_O_seq);
+}
index 430dca0..2d41c89 100644 (file)
@@ -52,6 +52,7 @@
 #include <linux/types.h>
 #include <uapi/linux/lnet/lnet-types.h>
 #include <uapi/linux/lustre/lustre_idl.h>
+#include <uapi/linux/lustre/lustre_ioctl.h>
 #include <uapi/linux/lustre/lustre_disk.h>
 #include <dt_object.h>
 #include <lu_object.h>
@@ -245,12 +246,26 @@ again:
                }
        }
 
+retry:
        nm_obj = local_index_find_or_create(env, los, root_obj,
                                                LUSTRE_NODEMAP_NAME,
                                                S_IFREG | S_IRUGO | S_IWUSR,
                                                &dt_nodemap_features);
-       if (IS_ERR(nm_obj))
+       if (IS_ERR(nm_obj)) {
+               if (PTR_ERR(nm_obj) == -EEXIST && rc != -ENOENT &&
+                   los->los_last_oid < (tfid.f_oid - 1)) {
+                       if (dt2lu_dev(dev)->ld_obd)
+                               dt2lu_dev(dev)->ld_obd->obd_need_scrub = 1;
+
+                       mutex_lock(&los->los_id_lock);
+                       los->los_last_oid = tfid.f_oid - 1;
+                       mutex_unlock(&los->los_id_lock);
+
+                       goto retry;
+               }
+
                GOTO(out_root, nm_obj);
+       }
 
        if (nm_obj->do_index_ops == NULL) {
                rc = nm_obj->do_ops->do_index_try(env, nm_obj,
index a710be8..1292e7f 100755 (executable)
@@ -1483,6 +1483,71 @@ test_21() {
 }
 run_test 21 "don't hang MDS recovery when failed to get update log"
 
+test_22() {
+       #FID_SEQ_LLOG = 1
+       #FID_SEQ_LLOG_NAME = 10
+       #FID_SEQ_LOCAL_NAME = 0x200000003,
+       local s_llog="1"
+       local s_llog_name="10"
+       local s_local="200000003"
+       local lma
+       local fid
+
+       stopall
+
+       # remove the LASTID
+       mount_fstype mds1 || error "(1) Fail to mount mds1"
+       mntpt=$(facet_mntpt mds1)
+
+       do_facet mds1 rm -f "$mntpt/O/$s_llog/LAST_ID"
+       do_facet mds1 rm -f "$mntpt/O/$s_llog_name/LAST_ID"
+       do_facet mds1 rm -f "$mntpt/O/$s_local_name/LAST_ID"
+
+       unmount_fstype mds1 || error "(2) Fail to umount mds1"
+
+       $LCTL set_param debug=-1
+       $LCTL dk > /dev/null
+       start mds1 $(mdsdevname 1) > /dev/null || {
+               $LCTL dk > /tmp/log
+               error "(3) Fail to start mds1"
+       }
+       $START_SCRUB -r || error "(4) Fail to start OI scrub on MDT!"
+
+       wait_update_facet mds1 "$LCTL get_param -n \
+               osd-*.$(facet_svc mds1).oi_scrub |
+               awk '/^status/ { print \\\$2 }'" "completed" 6 ||
+               error "(5) Expected '$expected' on mds1"
+
+       stop mds1
+
+       mount_fstype mds1 || error "(6) Fail to mount mds1 again"
+       do_facet mds1 stat "$mntpt/O/$s_llog/LAST_ID" ||
+               error "(7) LAST_ID is not recreated for LLOG"
+       lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_llog/LAST_ID)
+       fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma)
+       [ "$fid" == "[0x1:0x0:0x0]" ] ||
+               error "(8) the LMA of the LAST_ID is incorrect"
+
+       do_facet mds1 stat "$mntpt/O/$s_llog_name/LAST_ID" ||
+               error "(8) LAST_ID is not recreated for LLOG_NAME"
+       lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_llog_name/LAST_ID)
+       fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma)
+       [ "$fid" == "[0xa:0x0:0x0]" ] ||
+               error "(8) the LMA of the LAST_ID is incorrect"
+
+       do_facet mds1 stat "$mntpt/O/$s_local/LAST_ID" ||
+               error "(9) LAST_ID is not recreated for LOCAL_NAME"
+       lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_local/LAST_ID)
+       fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma)
+       [ "$fid" == "[0x200000003:0x0:0x0]" ] ||
+               error "(8) the LMA of the LAST_ID is incorrect"
+
+       unmount_fstype mds1 || error "(10) Fail to umount mds1 again"
+
+       start mds1 $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
+               error "(11) Fail to start mds1"
+}
+run_test 22 "LFSCK can recreate or fix the LASTID on MDT/OST"
 
 # restore MDS/OST size
 MDSSIZE=${SAVED_MDSSIZE}
index f395849..e80417b 100644 (file)
@@ -93,9 +93,10 @@ int main(int argc, char *argv[])
                                struct lustre_ost_attrs *loa = (void *)buf;
                                int rc1;
 
+                               memset(loa, 0, sizeof(*loa));
                                rc1 = getxattr(argv[i], "trusted.lma", loa,
                                               sizeof(*loa));
-                               if (rc1 < sizeof(*loa)) {
+                               if (rc1 < sizeof(struct lustre_mdt_attrs)) {
                                        fprintf(stderr,
                                                "%s: error reading fid: %s\n",
                                                argv[i], strerror(ENODATA));
@@ -105,7 +106,8 @@ int main(int argc, char *argv[])
                                }
 
                                lustre_loa_swab(loa);
-                               if (!(loa->loa_lma.lma_compat &
+                               if (rc1 > sizeof(struct lustre_mdt_attrs) &&
+                                   !(loa->loa_lma.lma_compat &
                                      LMAC_STRIPE_INFO)) {
                                        fprintf(stderr,
                                                "%s: not stripe info: %s\n",
@@ -115,9 +117,10 @@ int main(int argc, char *argv[])
                                        continue;
                                }
 
-                               printf("%s: parent="DFID" stripe=%u "
+                               printf("%s: fid="DFID" parent="DFID" stripe=%u "
                                       "stripe_size=%u stripe_count=%u",
                                       argv[i],
+                                      PFID(&loa->loa_lma.lma_self_fid),
                                       (unsigned long long)loa->loa_parent_fid.f_seq,
                                       loa->loa_parent_fid.f_oid, 0, /* ver */
                                       loa->loa_parent_fid.f_stripe_idx &