From: Hongchao Zhang Date: Tue, 19 Mar 2024 04:19:42 +0000 (+0800) Subject: LU-17393 osd: recreate LAST_ID for local seq X-Git-Tag: 2.15.64~105 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F98%2F53898%2F10;p=fs%2Flustre-release.git LU-17393 osd: recreate LAST_ID for local seq The file at /O/seq/LAST_ID in the sequences used by local storage is not fixed by LFSCK currently, this patch addes the support to scan the local storage sequences under root object director "/O" and recreate or fix it accordingly. Signed-off-by: Hongchao Zhang Change-Id: I840a0fcfa207528c5a0e9f0c87df8b4745bba671 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53898 Reviewed-by: Andreas Dilger Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/include/lustre_scrub.h b/lustre/include/lustre_scrub.h index 29a3a23..001c50e 100644 --- a/lustre/include/lustre_scrub.h +++ b/lustre/include/lustre_scrub.h @@ -171,6 +171,12 @@ struct lustre_scrub { __u64 os_new_checked; __u64 os_pos_current; __u32 os_start_flags; + + /* FIDs with maxmimum OID in local storage */ + __u32 os_ls_size; + __u32 os_ls_count; + struct lu_fid *os_ls_fids; + /* Some of these bits can be set by different threads so * all updates must be protected by ->os_lock to avoid * racing read-modify-write cycles causing corruption. diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 2070da7..9a28fff 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -638,7 +638,8 @@ struct obd_device { obd_process_conf:1, /* device is processing mgs config */ obd_checksum_dump:1, /* dump pages upon cksum error */ obd_dynamic_nids:1, /* Allow dynamic NIDs on device */ - obd_read_only:1; /* device is read-only */ + obd_read_only:1, /* device is read-only */ + obd_need_scrub:1; /* device need scrub */ #ifdef HAVE_SERVER_SUPPORT /* no committed-transno notification */ unsigned long obd_no_transno:1; diff --git a/lustre/include/uapi/linux/lustre/lustre_fid.h b/lustre/include/uapi/linux/lustre/lustre_fid.h index a57cc0c..b0b0455 100644 --- a/lustre/include/uapi/linux/lustre/lustre_fid.h +++ b/lustre/include/uapi/linux/lustre/lustre_fid.h @@ -363,6 +363,17 @@ static inline bool fid_is_sane(const struct lu_fid *fid) fid_seq_is_rsvd(fid_seq(fid))); } +static inline bool fid_seq_is_local_storage(__u64 seq) +{ + return seq == FID_SEQ_LLOG || seq == FID_SEQ_LLOG_NAME || + seq == FID_SEQ_LOCAL_NAME || seq == FID_SEQ_QUOTA; +} + +static inline bool fid_is_local_storage(const struct lu_fid *fid) +{ + return fid_seq_is_local_storage(fid->f_seq); +} + static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1) { return !memcmp(f0, f1, sizeof(*f0)); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index ab01713..1ed1cef 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -7951,9 +7951,21 @@ static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) if (!mdt->mdt_skip_lfsck && !mdt->mdt_bottom->dd_rdonly) { struct lfsck_start_param lsp; + struct lfsck_start start; lsp.lsp_start = NULL; lsp.lsp_index_valid = 0; + + if (dt2lu_dev(mdt->mdt_bottom)->ld_obd && + dt2lu_dev(mdt->mdt_bottom)->ld_obd->obd_need_scrub) { + memset(&start, 0, sizeof(start)); + start.ls_version = LFSCK_VERSION_V1; + start.ls_active = LFSCK_TYPE_SCRUB; + start.ls_flags = LPF_RESET; + + lsp.lsp_start = &start; + } + rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child, OBD_IOC_START_LFSCK, 0, &lsp); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 0325e66..c3e72a8 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -768,6 +768,7 @@ struct osd_thread_info { struct osd_it_ea_dirent *oti_seq_dirent; struct osd_it_ea_dirent *oti_dir_dirent; + struct inode *oti_lastid_inode; struct osd_lookup_cache_object oti_cobj; /* cache object id */ struct osd_lookup_cache *oti_lookup_cache; @@ -1223,6 +1224,9 @@ int osd_calc_bkmap_credits(struct super_block *sb, struct inode *inode, const int blocks); int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); +int osd_ldiskfs_write(struct osd_device *osd, struct inode *inode, void *buf, + int bufsize, int write_NUL, loff_t *offs, + handle_t *handle); static inline struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 1eea451..e939264 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1904,11 +1904,10 @@ static int osd_ldiskfs_writelink(struct inode *inode, char *buffer, int buflen) return 0; } -static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf, - int bufsize, int write_NUL, loff_t *offs, - handle_t *handle) +int osd_ldiskfs_write(struct osd_device *osd, struct inode *inode, void *buf, + int bufsize, int write_NUL, loff_t *offs, + handle_t *handle) { - struct inode *inode = osd_dt_obj(dt)->oo_inode; struct buffer_head *bh = NULL; loff_t offset = *offs; loff_t new_size = i_size_read(inode); @@ -1964,7 +1963,6 @@ static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf, offset, block, bufsize, *offs); if (IS_ERR_OR_NULL(bh)) { - struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt)); int flags = LDISKFS_GET_BLOCKS_CREATE; /* while the file system is being mounted, avoid @@ -2055,6 +2053,17 @@ static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf, return err; } +static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf, + int bufsize, int write_NUL, loff_t *offs, + handle_t *handle) +{ + struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt)); + struct inode *inode = osd_dt_obj(dt)->oo_inode; + + return osd_ldiskfs_write(osd, inode, buf, bufsize, write_NUL, offs, + handle); +} + static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, loff_t *pos, struct thandle *handle) diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 47164f8..5daff18 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -648,10 +648,13 @@ static int osd_scrub_get_fid(struct osd_thread_info *info, static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev, struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos, - struct super_block *sb, bool scrub) + struct super_block *sb, bool is_scrub) { + struct lustre_scrub *scrub = &dev->od_scrub.os_scrub; struct inode *inode; + int index; int rc; + ENTRY; /* Not handle the backend root object and agent parent object. @@ -690,15 +693,24 @@ static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev, if (dev->od_is_ost && S_ISREG(inode->i_mode) && inode->i_nlink > 1) dev->od_scrub.os_scrub.os_has_ml_file = 1; - if (scrub && + if (is_scrub && ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) { /* Only skip it for the first OI scrub accessing. */ ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB); GOTO(put, rc = SCRUB_NEXT_NOSCRUB); } - rc = osd_scrub_get_fid(info, dev, inode, fid, scrub); + rc = osd_scrub_get_fid(info, dev, inode, fid, is_scrub); + if (rc >= 0 && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) { + index = 0; + for (index = 0; index < scrub->os_ls_count; index++) + if (scrub->os_ls_fids[index].f_seq == fid->f_seq) + break; + if (index < scrub->os_ls_count && + scrub->os_ls_fids[index].f_oid < fid->f_oid) + scrub->os_ls_fids[index].f_oid = fid->f_oid; + } GOTO(put, rc); put: @@ -1200,6 +1212,11 @@ static int osd_otable_it_preload(const struct lu_env *env, static int osd_scan_ml_file_main(const struct lu_env *env, struct osd_device *dev); +static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev); + +static int osd_scan_last_id_main(const struct lu_env *env, + struct osd_device *dev); + static int osd_scrub_main(void *args) { struct lu_env env; @@ -1240,6 +1257,16 @@ static int osd_scrub_main(void *args) scrub->os_pos_current, scrub->os_file.sf_param & SP_DRYRUN ? " dryrun mode" : ""); + scrub->os_ls_count = 0; + scrub->os_ls_size = 4; + OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid)); + if (scrub->os_ls_fids == NULL) + GOTO(out, rc = -ENOMEM); + + rc = osd_scan_O_main(&env, dev); + if (rc) + GOTO(out, rc); + rc = osd_inode_iteration(osd_oti_get(&env), dev, ~0U, false); if (unlikely(rc == SCRUB_IT_CRASH)) { spin_lock(&scrub->os_lock); @@ -1251,9 +1278,13 @@ static int osd_scrub_main(void *args) if (scrub->os_has_ml_file) { ret = osd_scan_ml_file_main(&env, dev); if (ret != 0) - rc = ret; + GOTO(out, rc = ret); } + ret = osd_scan_last_id_main(&env, dev); + if (ret != 0) + rc = ret; + GOTO(post, rc); post: @@ -1268,6 +1299,15 @@ post: out: + if (scrub->os_ls_fids) { + OBD_FREE(scrub->os_ls_fids, + scrub->os_ls_size * sizeof(struct lu_fid)); + + scrub->os_ls_size = 0; + scrub->os_ls_count = 0; + scrub->os_ls_fids = NULL; + } + osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items); lu_env_fini(&env); @@ -3196,3 +3236,308 @@ static int osd_scan_ml_file_main(const struct lu_env *env, return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode, osd_scan_ml_file_seq); } + +#define LASTID "LAST_ID" + +static int osd_update_lastid(struct osd_device *dev, struct inode *inode, + __u64 lastid_known) +{ + handle_t *th; + loff_t offset = 0; + __u64 lastid; + int rc; + + ENTRY; + + th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC, + osd_dto_credits_noquota[DTO_WRITE_BLOCK]); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + lastid = cpu_to_le64(lastid_known); + rc = osd_ldiskfs_write(dev, inode, &lastid, sizeof(lastid), 0, &offset, + th); + mark_inode_dirty(inode); + ldiskfs_journal_stop(th); + RETURN(rc); +} + +static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev, + struct inode *dir, __u64 lastid_known) +{ + handle_t *th; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *d_lastid; + struct inode *i_lastid; + loff_t offset = 0; + int credits = LDISKFS_DATA_TRANS_BLOCKS(dir->i_sb) + + LDISKFS_INDEX_EXTRA_TRANS_BLOCKS + 3 + + osd_dto_credits_noquota[DTO_WRITE_BLOCK]; + int rc; + + ENTRY; + + sb_start_write(dir->i_sb); + th = osd_journal_start_sb(dir->i_sb, LDISKFS_HT_MISC, credits); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + i_lastid = ldiskfs_create_inode(th, dir, (S_IFREG | 0644), NULL); + if (IS_ERR(i_lastid)) + GOTO(out_stop, rc = PTR_ERR(i_lastid)); + + unlock_new_inode(i_lastid); + + d_lastid = osd_child_dentry_by_inode(env, dir, LASTID, strlen(LASTID)); + rc = osd_ldiskfs_add_entry(info, dev, th, d_lastid, i_lastid, NULL); + if (rc) + GOTO(out_stop, rc); + + rc = osd_ldiskfs_write(dev, i_lastid, &lastid_known, + sizeof(lastid_known), 0, &offset, th); + if (rc) + GOTO(out_stop, rc); + mark_inode_dirty(i_lastid); + + ldiskfs_journal_stop(th); + th = NULL; + sb_end_write(dir->i_sb); + GOTO(out, rc = 0); + +out_stop: + if (!IS_ERR_OR_NULL(th)) + ldiskfs_journal_stop(th); + sb_end_write(dir->i_sb); + +out: + if (!IS_ERR_OR_NULL(i_lastid)) + iput(i_lastid); + RETURN(rc); +} + +static int osd_scan_lastid_dir(const struct lu_env *env, struct osd_device *dev, + struct inode *dir, struct osd_it_ea *oie) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct inode *inode; + struct osd_inode_id id; + int rc = 0; + + ENTRY; + + osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN); + inode = osd_iget(info, dev, &id); + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); + + if (S_ISDIR(inode->i_mode)) + GOTO(out, rc = 0); + + if (strlen(LASTID) != oie->oie_dirent->oied_namelen || + strncmp(oie->oie_dirent->oied_name, LASTID, + oie->oie_dirent->oied_namelen) != 0) { + CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n", + osd_name(dev), info->oti_seq_dirent->oied_name, + oie->oie_dirent->oied_name); + GOTO(out, rc = 0); + } + + info->oti_lastid_inode = inode; + RETURN(0); + +out: + iput(inode); + RETURN(rc); +} + +static int osd_scan_lastid_seq(const struct lu_env *env, struct osd_device *dev, + struct inode *dir, struct osd_it_ea *oie) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lustre_ost_attrs *lma = &info->oti_ost_attrs; + struct lustre_scrub *scrub = &dev->od_scrub.os_scrub; + struct inode *inode; + struct osd_inode_id id; + __u64 seq; + __u64 lastid; + __u64 lastid_known; + loff_t offset = 0; + int index; + int rc; + + ENTRY; + + osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN); + inode = osd_iget(info, dev, &id); + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); + + if (!S_ISDIR(inode->i_mode)) + GOTO(out, rc = 0); + + rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq); + if (rc) + GOTO(out, rc); + + if (seq < 0x1F) { + rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq); + if (rc) + GOTO(out, rc); + } + + if (!fid_seq_is_local_storage(seq)) + GOTO(out, rc = 0); + + info->oti_lastid_inode = NULL; + info->oti_seq_dirent = oie->oie_dirent; + rc = osd_scan_dir(env, dev, inode, osd_scan_lastid_dir); + info->oti_seq_dirent = NULL; + + if (rc) + GOTO(out, rc); + + if (scrub->os_file.sf_param & SP_DRYRUN) + GOTO(out, rc = 0); + + for (index = 0; index < scrub->os_ls_count; index++) + if (scrub->os_ls_fids[index].f_seq == seq) + break; + + if (unlikely(index >= scrub->os_ls_count)) { + CDEBUG(D_LFSCK, + "%s: can't find seq %llu, it's modified during scrub?\n", + osd_name(dev), seq); + GOTO(out, rc); + } + + lastid_known = scrub->os_ls_fids[index].f_oid; + if (!info->oti_lastid_inode) { + rc = osd_create_lastid(env, dev, dir, lastid_known); + GOTO(out, rc); + } + + rc = osd_get_lma(info, info->oti_lastid_inode, &info->oti_obj_dentry, + lma); + if (rc && rc != -ENODATA) { + CDEBUG(D_LFSCK, "%s: failed to get the xattr %s for O/%s/%s\n", + osd_name(dev), XATTR_NAME_LMA, + oie->oie_dirent->oied_name, LASTID); + GOTO(out, rc); + } + + if (rc != 0 || lma->loa_lma.lma_self_fid.f_seq != seq || + lma->loa_lma.lma_self_fid.f_oid != 0 || + lma->loa_lma.lma_self_fid.f_ver != 0) { + lma->loa_lma.lma_self_fid.f_seq = seq; + lma->loa_lma.lma_self_fid.f_oid = 0; + lma->loa_lma.lma_self_fid.f_ver = 0; + + rc = __osd_xattr_set(info, info->oti_lastid_inode, + XATTR_NAME_LMA, lma, sizeof(*lma), + rc == -ENODATA ? + XATTR_CREATE : XATTR_REPLACE); + if (rc) + GOTO(out, rc); + } + + spin_lock(&info->oti_lastid_inode->i_lock); + if (i_size_read(info->oti_lastid_inode) < sizeof(lastid)) { + spin_unlock(&info->oti_lastid_inode->i_lock); + lastid = 0; + } else { + spin_unlock(&info->oti_lastid_inode->i_lock); + + rc = osd_ldiskfs_read(info->oti_lastid_inode, &lastid, + sizeof(lastid), &offset); + if (rc < 0) + GOTO(out, rc); + + if (rc < sizeof(lastid)) + lastid = 0; + else + lastid = le64_to_cpu(lastid); + } + + if (lastid < lastid_known) + rc = osd_update_lastid(dev, info->oti_lastid_inode, + lastid_known); + +out: + if (info->oti_lastid_inode) { + iput(info->oti_lastid_inode); + info->oti_lastid_inode = NULL; + } + + iput(inode); + RETURN(rc); +} + +static int osd_scan_last_id_main(const struct lu_env *env, + struct osd_device *dev) +{ + return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode, + osd_scan_lastid_seq); +} + +static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev, + struct inode *dir, struct osd_it_ea *oie) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lustre_scrub *scrub = &dev->od_scrub.os_scrub; + struct inode *inode; + struct osd_inode_id id; + struct lu_fid *fids; + __u64 seq; + int rc; + + ENTRY; + + osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN); + inode = osd_iget(info, dev, &id); + if (IS_ERR(inode)) + RETURN(PTR_ERR(inode)); + + if (!S_ISDIR(inode->i_mode)) + GOTO(out, rc = 0); + + rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq); + if (rc) + GOTO(out, rc); + + if (seq < 0x1F) { + rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq); + if (rc) + GOTO(out, rc); + } + + if (!fid_seq_is_local_storage(seq)) + GOTO(out, rc = 0); + + scrub->os_ls_count++; + if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) { + OBD_ALLOC(fids, + sizeof(struct lu_fid) * (scrub->os_ls_size + 4)); + if (fids == NULL) + GOTO(out, -ENOMEM); + + memcpy(fids, scrub->os_ls_fids, + sizeof(struct lu_fid) * scrub->os_ls_size); + OBD_FREE(scrub->os_ls_fids, + sizeof(struct lu_fid) * scrub->os_ls_size); + + scrub->os_ls_size += 4; + scrub->os_ls_fids = fids; + } + + scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq; + +out: + iput(inode); + RETURN(rc); +} + +static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev) +{ + return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode, + osd_scan_O_seq); +} diff --git a/lustre/osd-zfs/osd_internal.h b/lustre/osd-zfs/osd_internal.h index 827135e..6a6deb9 100644 --- a/lustre/osd-zfs/osd_internal.h +++ b/lustre/osd-zfs/osd_internal.h @@ -276,6 +276,7 @@ struct osd_thread_info { char *oti_seq_name; char *oti_dir_name; + uint64_t oti_lastid_oid; }; extern struct lu_context_key osd_key; diff --git a/lustre/osd-zfs/osd_scrub.c b/lustre/osd-zfs/osd_scrub.c index bddf41c..87456cb 100644 --- a/lustre/osd-zfs/osd_scrub.c +++ b/lustre/osd-zfs/osd_scrub.c @@ -157,6 +157,7 @@ osd_scrub_check_update(const struct lu_env *env, struct osd_device *dev, dnode_t *dn = NULL; uint64_t oid2; int ops = DTO_INDEX_UPDATE; + int index; int rc; ENTRY; @@ -267,6 +268,17 @@ out: sa_handle_destroy(hdl); } + if (!rc && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) { + index = 0; + for (index = 0; index < scrub->os_ls_count; index++) + if (scrub->os_ls_fids[index].f_seq == fid->f_seq) + break; + + if (index < scrub->os_ls_count && + scrub->os_ls_fids[index].f_oid < fid->f_oid) + scrub->os_ls_fids[index].f_oid = fid->f_oid; + } + cleanup: if (nvbuf) nvlist_free(nvbuf); @@ -464,6 +476,9 @@ static int osd_scrub_exec(const struct lu_env *env, struct osd_device *dev, static int osd_scan_ml_file_main(const struct lu_env *env, struct osd_device *dev); +static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev); +static int osd_scan_lastid_main(const struct lu_env *env, + struct osd_device *dev); static int osd_scrub_main(void *args) { @@ -506,6 +521,16 @@ static int osd_scrub_main(void *args) scrub->os_name, scrub->os_start_flags, scrub->os_pos_current); + scrub->os_ls_count = 0; + scrub->os_ls_size = 4; + OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid)); + if (scrub->os_ls_fids == NULL) + GOTO(out, rc = -ENOMEM); + + rc = osd_scan_O_main(&env, dev); + if (rc) + GOTO(out, rc); + fid = &osd_oti_get(&env)->oti_fid; while (!rc && !kthread_should_stop()) { rc = osd_scrub_next(&env, dev, fid, &oid); @@ -535,11 +560,24 @@ post: rc = ret; } + ret = osd_scan_lastid_main(&env, dev); + if (ret != 0) + rc = ret; + rc = scrub_thread_post(&env, &dev->od_scrub, rc); CDEBUG(D_LFSCK, "%s: OI scrub: stop, pos = %llu: rc = %d\n", scrub->os_name, scrub->os_pos_current, rc); out: + if (scrub->os_ls_fids) { + OBD_FREE(scrub->os_ls_fids, + scrub->os_ls_size * sizeof(struct lu_fid)); + + scrub->os_ls_size = 0; + scrub->os_ls_count = 0; + scrub->os_ls_fids = NULL; + } + while (!list_empty(&scrub->os_inconsistent_items)) { struct osd_inconsistent_item *oii; @@ -2045,3 +2083,359 @@ static int osd_scan_ml_file_main(const struct lu_env *env, { return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_ml_file_seq); } + +#define LASTID "LAST_ID" + +static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev, + struct osd_zap_it *ozi, __u64 lastid_known) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + struct lu_attr *la = &info->oti_la; + struct luz_direntry *zde = &info->oti_zde; + uint64_t dir = ozi->ozi_zde.lzd_reg.zde_dnode; + dmu_tx_t *tx = NULL; + nvlist_t *nvbuf = NULL; + dnode_t *dn = NULL; + sa_handle_t *hdl; + __u64 lastid; + int num = sizeof(*zde) / 8; + int rc = 0; + + ENTRY; + + tx = dmu_tx_create(dev->od_os); + if (!tx) + GOTO(out, rc = -ENOMEM); + + dmu_tx_hold_sa_create(tx, osd_find_dnsize(dev, OSD_BASE_EA_IN_BONUS)); + dmu_tx_hold_zap(tx, dir, FALSE, NULL); + + rc = -dmu_tx_assign(tx, TXG_WAIT); + if (rc) + GOTO(abort, rc); + + memset(&zde->lzd_reg, 0, sizeof(zde->lzd_reg)); + zde->lzd_reg.zde_type = IFTODT(S_IFREG); + zde->lzd_fid = lma->lma_self_fid; + + rc = -nvlist_alloc(&nvbuf, NV_UNIQUE_NAME, KM_SLEEP); + if (rc) + GOTO(abort, rc); + + lustre_lma_init(lma, &zde->lzd_fid, 0, 0); + lustre_lma_swab(lma); + rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA, (uchar_t *)lma, + sizeof(*lma)); + if (rc) + GOTO(abort, rc); + + la->la_valid = LA_TYPE | LA_MODE; + la->la_mode = (DTTOIF(zde->lzd_reg.zde_type) & S_IFMT) | 0644; + + rc = __osd_object_create(env, dev, NULL, &zde->lzd_fid, &dn, tx, la); + if (rc) + GOTO(abort, rc); + + zde->lzd_reg.zde_dnode = dn->dn_object; + rc = -sa_handle_get(dev->od_os, dn->dn_object, NULL, + SA_HDL_PRIVATE, &hdl); + if (rc) + GOTO(abort, rc); + + rc = __osd_attr_init(env, dev, NULL, hdl, tx, la, dir, nvbuf); + if (rc) + GOTO(abort, rc); + + sa_handle_destroy(hdl); + hdl = NULL; + + dmu_tx_hold_write_by_dnode(tx, dn, 0, sizeof(lastid_known)); + + lastid = cpu_to_le64(lastid_known); + dmu_write_by_dnode(dn, 0, sizeof(lastid), &lastid, tx); + + rc = osd_zap_add(dev, dir, NULL, LASTID, strlen(LASTID), num, + (void *)zde, tx); + if (rc) + GOTO(abort, tx); + + dmu_tx_commit(tx); + GOTO(out, rc); + +abort: + if (dn) + dmu_object_free(dev->od_os, dn->dn_object, tx); + + dmu_tx_abort(tx); + +out: + if (hdl) + sa_handle_destroy(hdl); + if (dn) + osd_dnode_rele(dn); + if (nvbuf) + nvlist_free(nvbuf); + + return rc; +} + +static int osd_scan_lastid_dir(const struct lu_env *env, + struct osd_device *dev, uint64_t dir_oid, + struct osd_zap_it *ozi) +{ + struct osd_thread_info *info = osd_oti_get(env); + + ENTRY; + + if (!S_ISREG(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type)))) + RETURN(0); + + if (strcmp(ozi->ozi_name, LASTID) != 0) { + CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n", + osd_name(dev), info->oti_seq_name, ozi->ozi_name); + RETURN(0); + } + + info->oti_lastid_oid = ozi->ozi_zde.lzd_reg.zde_dnode; + RETURN(0); +} + +static int osd_scan_lastid_seq(const struct lu_env *env, + struct osd_device *dev, uint64_t dir_oid, + struct osd_zap_it *ozi) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs; + struct lu_buf *lb = &info->oti_xattr_lbuf; + struct lustre_scrub *scrub = &dev->od_scrub; + dnode_t *dn = NULL; + dmu_tx_t *tx = NULL; + nvlist_t *nvbuf = NULL; + sa_handle_t *hdl = NULL; + uint64_t blocks; + uint32_t blksize; + uint32_t sz_lma; + size_t size = 0; + __u64 seq; + __u64 lastid; + __u64 lastid_known; + bool need_update = false; + int index; + int rc; + + ENTRY; + + if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type)))) + RETURN(0); + + rc = kstrtoull(ozi->ozi_name, 16, &seq); + if (rc) + RETURN(rc); + + if (seq < 0x1F) { + rc = kstrtoull(ozi->ozi_name, 10, &seq); + if (rc) + RETURN(rc); + } + + if (!fid_seq_is_local_storage(seq)) + GOTO(out, rc = 0); + + info->oti_lastid_oid = 0; + info->oti_seq_name = ozi->ozi_name; + rc = osd_scan_dir(env, dev, ozi->ozi_zde.lzd_reg.zde_dnode, + osd_scan_lastid_dir); + if (rc) + GOTO(out, rc); + + for (index = 0; index < scrub->os_ls_count; index++) + if (scrub->os_ls_fids[index].f_seq == seq) + break; + + if (unlikely(index >= scrub->os_ls_count)) { + CDEBUG(D_LFSCK, + "%s: can't find seq %llu, it's modified during scrub?\n", + osd_name(dev), seq); + GOTO(out, rc = -ERANGE); + } + + lastid_known = scrub->os_ls_fids[index].f_oid; + + if (info->oti_lastid_oid == 0) { + lma->lma_self_fid.f_seq = seq; + lma->lma_self_fid.f_oid = 0; + lma->lma_self_fid.f_ver = 0; + + rc = osd_create_lastid(env, dev, ozi, lastid_known); + GOTO(out, rc); + } + + rc = __osd_obj2dnode(dev->od_os, info->oti_lastid_oid, &dn); + if (rc) + GOTO(out, rc); + + rc = -sa_handle_get(dev->od_os, dn->dn_object, NULL, + SA_HDL_PRIVATE, &hdl); + if (rc) + GOTO(out, rc); + + lastid = 0; + sa_object_size(hdl, &blksize, &blocks); + if (blocks > 0) { + rc = osd_dmu_read(dev, dn, 0, sizeof(lastid), (char *) &lastid, + 0); + if (rc) + GOTO(out, rc); + + lastid = le64_to_cpu(lastid); + if (lastid <= lastid_known) + need_update = true; + } else { + need_update = true; + } + + rc = __osd_xattr_load(dev, hdl, &nvbuf); + if (rc) + GOTO(out, rc); + + rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, (uchar_t **) &lma, + &sz_lma); + if (rc != 0 && rc != -ENOENT) + GOTO(out, rc); + + if (rc == -ENOENT || lma->lma_self_fid.f_seq != seq || + lma->lma_self_fid.f_oid != 0 || lma->lma_self_fid.f_ver != 0) { + if (!rc) { + rc = -nvlist_remove(nvbuf, XATTR_NAME_LMA, + DATA_TYPE_BYTE_ARRAY); + if (rc) + GOTO(out, rc); + } + + need_update = true; + lma->lma_self_fid.f_seq = seq; + lma->lma_self_fid.f_oid = 0; + lma->lma_self_fid.f_ver = 0; + + rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA, + (uchar_t *) &lma, sizeof(lma)); + if (rc) + GOTO(out, rc); + } + + if (!need_update) + GOTO(out, rc); + + if (scrub->os_file.sf_param & SP_DRYRUN) + GOTO(out, rc = 0); + + tx = dmu_tx_create(dev->od_os); + if (!tx) + GOTO(out, rc = -ENOMEM); + + dmu_tx_hold_zap_by_dnode(tx, dn, TRUE, NULL); + if (lastid < lastid_known) + dmu_tx_hold_write_by_dnode(tx, dn, 0, sizeof(lastid)); + + rc = -dmu_tx_assign(tx, TXG_WAIT); + if (rc) + GOTO(abort, rc); + + rc = -nvlist_size(nvbuf, &size, NV_ENCODE_XDR); + if (rc) + GOTO(abort, rc); + + lu_buf_check_and_alloc(lb, size); + if (lb->lb_buf == NULL) + GOTO(out, rc = -ENOMEM); + + rc = -nvlist_pack(nvbuf, (char **)&lb->lb_buf, &size, NV_ENCODE_XDR, + KM_SLEEP); + if (rc) + GOTO(abort, rc); + + rc = -sa_update(hdl, SA_ZPL_SIZE(dev), lb->lb_buf, size, tx); + if (rc) + GOTO(abort, rc); + + if (lastid < lastid_known) { + lastid = cpu_to_le64(lastid_known); + dmu_write_by_dnode(dn, 0, sizeof(lastid), + (const char *) &lastid, tx); + } + + dmu_tx_commit(tx); + GOTO(out, rc); + +abort: + dmu_tx_abort(tx); + +out: + if (hdl) + sa_handle_destroy(hdl); + + if (dn) + osd_dnode_rele(dn); + + RETURN(rc); +} + +static int osd_scan_lastid_main(const struct lu_env *env, + struct osd_device *dev) +{ + return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_lastid_seq); +} + +static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev, + uint64_t dir_oid, struct osd_zap_it *ozi) +{ + struct lustre_scrub *scrub = &dev->od_scrub; + struct lu_fid *fids; + __u64 seq; + int rc; + + ENTRY; + + if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type)))) + RETURN(0); + + rc = kstrtoull(ozi->ozi_name, 16, &seq); + if (rc) + RETURN(rc); + + if (seq < 0x1F) { + rc = kstrtoull(ozi->ozi_name, 10, &seq); + if (rc) + RETURN(rc); + } + + if (!fid_seq_is_local_storage(seq)) + GOTO(out, rc = 0); + + scrub->os_ls_count++; + if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) { + OBD_ALLOC(fids, + sizeof(struct lu_fid) * (scrub->os_ls_size + 4)); + if (fids == NULL) + GOTO(out, -ENOMEM); + + memcpy(fids, scrub->os_ls_fids, + sizeof(struct lu_fid) * scrub->os_ls_size); + OBD_FREE(scrub->os_ls_fids, + sizeof(struct lu_fid) * scrub->os_ls_size); + + scrub->os_ls_size += 4; + scrub->os_ls_fids = fids; + } + + scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq; + +out: + RETURN(rc); +} + +static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev) +{ + return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_O_seq); +} diff --git a/lustre/ptlrpc/nodemap_storage.c b/lustre/ptlrpc/nodemap_storage.c index 430dca0..2d41c89 100644 --- a/lustre/ptlrpc/nodemap_storage.c +++ b/lustre/ptlrpc/nodemap_storage.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -245,12 +246,26 @@ again: } } +retry: nm_obj = local_index_find_or_create(env, los, root_obj, LUSTRE_NODEMAP_NAME, S_IFREG | S_IRUGO | S_IWUSR, &dt_nodemap_features); - if (IS_ERR(nm_obj)) + if (IS_ERR(nm_obj)) { + if (PTR_ERR(nm_obj) == -EEXIST && rc != -ENOENT && + los->los_last_oid < (tfid.f_oid - 1)) { + if (dt2lu_dev(dev)->ld_obd) + dt2lu_dev(dev)->ld_obd->obd_need_scrub = 1; + + mutex_lock(&los->los_id_lock); + los->los_last_oid = tfid.f_oid - 1; + mutex_unlock(&los->los_id_lock); + + goto retry; + } + GOTO(out_root, nm_obj); + } if (nm_obj->do_index_ops == NULL) { rc = nm_obj->do_ops->do_index_try(env, nm_obj, diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index a710be8..1292e7f 100755 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -1483,6 +1483,71 @@ test_21() { } run_test 21 "don't hang MDS recovery when failed to get update log" +test_22() { + #FID_SEQ_LLOG = 1 + #FID_SEQ_LLOG_NAME = 10 + #FID_SEQ_LOCAL_NAME = 0x200000003, + local s_llog="1" + local s_llog_name="10" + local s_local="200000003" + local lma + local fid + + stopall + + # remove the LASTID + mount_fstype mds1 || error "(1) Fail to mount mds1" + mntpt=$(facet_mntpt mds1) + + do_facet mds1 rm -f "$mntpt/O/$s_llog/LAST_ID" + do_facet mds1 rm -f "$mntpt/O/$s_llog_name/LAST_ID" + do_facet mds1 rm -f "$mntpt/O/$s_local_name/LAST_ID" + + unmount_fstype mds1 || error "(2) Fail to umount mds1" + + $LCTL set_param debug=-1 + $LCTL dk > /dev/null + start mds1 $(mdsdevname 1) > /dev/null || { + $LCTL dk > /tmp/log + error "(3) Fail to start mds1" + } + $START_SCRUB -r || error "(4) Fail to start OI scrub on MDT!" + + wait_update_facet mds1 "$LCTL get_param -n \ + osd-*.$(facet_svc mds1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 6 || + error "(5) Expected '$expected' on mds1" + + stop mds1 + + mount_fstype mds1 || error "(6) Fail to mount mds1 again" + do_facet mds1 stat "$mntpt/O/$s_llog/LAST_ID" || + error "(7) LAST_ID is not recreated for LLOG" + lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_llog/LAST_ID) + fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma) + [ "$fid" == "[0x1:0x0:0x0]" ] || + error "(8) the LMA of the LAST_ID is incorrect" + + do_facet mds1 stat "$mntpt/O/$s_llog_name/LAST_ID" || + error "(8) LAST_ID is not recreated for LLOG_NAME" + lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_llog_name/LAST_ID) + fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma) + [ "$fid" == "[0xa:0x0:0x0]" ] || + error "(8) the LMA of the LAST_ID is incorrect" + + do_facet mds1 stat "$mntpt/O/$s_local/LAST_ID" || + error "(9) LAST_ID is not recreated for LOCAL_NAME" + lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_local/LAST_ID) + fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma) + [ "$fid" == "[0x200000003:0x0:0x0]" ] || + error "(8) the LMA of the LAST_ID is incorrect" + + unmount_fstype mds1 || error "(10) Fail to umount mds1 again" + + start mds1 $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null || + error "(11) Fail to start mds1" +} +run_test 22 "LFSCK can recreate or fix the LASTID on MDT/OST" # restore MDS/OST size MDSSIZE=${SAVED_MDSSIZE} diff --git a/lustre/utils/ll_decode_filter_fid.c b/lustre/utils/ll_decode_filter_fid.c index f395849..e80417b 100644 --- a/lustre/utils/ll_decode_filter_fid.c +++ b/lustre/utils/ll_decode_filter_fid.c @@ -93,9 +93,10 @@ int main(int argc, char *argv[]) struct lustre_ost_attrs *loa = (void *)buf; int rc1; + memset(loa, 0, sizeof(*loa)); rc1 = getxattr(argv[i], "trusted.lma", loa, sizeof(*loa)); - if (rc1 < sizeof(*loa)) { + if (rc1 < sizeof(struct lustre_mdt_attrs)) { fprintf(stderr, "%s: error reading fid: %s\n", argv[i], strerror(ENODATA)); @@ -105,7 +106,8 @@ int main(int argc, char *argv[]) } lustre_loa_swab(loa); - if (!(loa->loa_lma.lma_compat & + if (rc1 > sizeof(struct lustre_mdt_attrs) && + !(loa->loa_lma.lma_compat & LMAC_STRIPE_INFO)) { fprintf(stderr, "%s: not stripe info: %s\n", @@ -115,9 +117,10 @@ int main(int argc, char *argv[]) continue; } - printf("%s: parent="DFID" stripe=%u " + printf("%s: fid="DFID" parent="DFID" stripe=%u " "stripe_size=%u stripe_count=%u", argv[i], + PFID(&loa->loa_lma.lma_self_fid), (unsigned long long)loa->loa_parent_fid.f_seq, loa->loa_parent_fid.f_oid, 0, /* ver */ loa->loa_parent_fid.f_stripe_idx &