__u64 os_new_checked;
__u64 os_pos_current;
__u32 os_start_flags;
+
+ /* FIDs with maxmimum OID in local storage */
+ __u32 os_ls_size;
+ __u32 os_ls_count;
+ struct lu_fid *os_ls_fids;
+
/* Some of these bits can be set by different threads so
* all updates must be protected by ->os_lock to avoid
* racing read-modify-write cycles causing corruption.
obd_process_conf:1, /* device is processing mgs config */
obd_checksum_dump:1, /* dump pages upon cksum error */
obd_dynamic_nids:1, /* Allow dynamic NIDs on device */
- obd_read_only:1; /* device is read-only */
+ obd_read_only:1, /* device is read-only */
+ obd_need_scrub:1; /* device need scrub */
#ifdef HAVE_SERVER_SUPPORT
/* no committed-transno notification */
unsigned long obd_no_transno:1;
fid_seq_is_rsvd(fid_seq(fid)));
}
+static inline bool fid_seq_is_local_storage(__u64 seq)
+{
+ return seq == FID_SEQ_LLOG || seq == FID_SEQ_LLOG_NAME ||
+ seq == FID_SEQ_LOCAL_NAME || seq == FID_SEQ_QUOTA;
+}
+
+static inline bool fid_is_local_storage(const struct lu_fid *fid)
+{
+ return fid_seq_is_local_storage(fid->f_seq);
+}
+
static inline bool lu_fid_eq(const struct lu_fid *f0, const struct lu_fid *f1)
{
return !memcmp(f0, f1, sizeof(*f0));
if (!mdt->mdt_skip_lfsck && !mdt->mdt_bottom->dd_rdonly) {
struct lfsck_start_param lsp;
+ struct lfsck_start start;
lsp.lsp_start = NULL;
lsp.lsp_index_valid = 0;
+
+ if (dt2lu_dev(mdt->mdt_bottom)->ld_obd &&
+ dt2lu_dev(mdt->mdt_bottom)->ld_obd->obd_need_scrub) {
+ memset(&start, 0, sizeof(start));
+ start.ls_version = LFSCK_VERSION_V1;
+ start.ls_active = LFSCK_TYPE_SCRUB;
+ start.ls_flags = LPF_RESET;
+
+ lsp.lsp_start = &start;
+ }
+
rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
OBD_IOC_START_LFSCK,
0, &lsp);
struct osd_it_ea_dirent *oti_seq_dirent;
struct osd_it_ea_dirent *oti_dir_dirent;
+ struct inode *oti_lastid_inode;
struct osd_lookup_cache_object oti_cobj; /* cache object id */
struct osd_lookup_cache *oti_lookup_cache;
const int blocks);
int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
+int osd_ldiskfs_write(struct osd_device *osd, struct inode *inode, void *buf,
+ int bufsize, int write_NUL, loff_t *offs,
+ handle_t *handle);
static inline
struct dentry *osd_child_dentry_by_inode(const struct lu_env *env,
return 0;
}
-static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
- int bufsize, int write_NUL, loff_t *offs,
- handle_t *handle)
+int osd_ldiskfs_write(struct osd_device *osd, struct inode *inode, void *buf,
+ int bufsize, int write_NUL, loff_t *offs,
+ handle_t *handle)
{
- struct inode *inode = osd_dt_obj(dt)->oo_inode;
struct buffer_head *bh = NULL;
loff_t offset = *offs;
loff_t new_size = i_size_read(inode);
offset, block, bufsize, *offs);
if (IS_ERR_OR_NULL(bh)) {
- struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
int flags = LDISKFS_GET_BLOCKS_CREATE;
/* while the file system is being mounted, avoid
return err;
}
+static int osd_ldiskfs_write_record(struct dt_object *dt, void *buf,
+ int bufsize, int write_NUL, loff_t *offs,
+ handle_t *handle)
+{
+ struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
+ struct inode *inode = osd_dt_obj(dt)->oo_inode;
+
+ return osd_ldiskfs_write(osd, inode, buf, bufsize, write_NUL, offs,
+ handle);
+}
+
static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
const struct lu_buf *buf, loff_t *pos,
struct thandle *handle)
static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
- struct super_block *sb, bool scrub)
+ struct super_block *sb, bool is_scrub)
{
+ struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
struct inode *inode;
+ int index;
int rc;
+
ENTRY;
/* Not handle the backend root object and agent parent object.
if (dev->od_is_ost && S_ISREG(inode->i_mode) && inode->i_nlink > 1)
dev->od_scrub.os_scrub.os_has_ml_file = 1;
- if (scrub &&
+ if (is_scrub &&
ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
/* Only skip it for the first OI scrub accessing. */
ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
}
- rc = osd_scrub_get_fid(info, dev, inode, fid, scrub);
+ rc = osd_scrub_get_fid(info, dev, inode, fid, is_scrub);
+ if (rc >= 0 && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) {
+ index = 0;
+ for (index = 0; index < scrub->os_ls_count; index++)
+ if (scrub->os_ls_fids[index].f_seq == fid->f_seq)
+ break;
+ if (index < scrub->os_ls_count &&
+ scrub->os_ls_fids[index].f_oid < fid->f_oid)
+ scrub->os_ls_fids[index].f_oid = fid->f_oid;
+ }
GOTO(put, rc);
put:
static int osd_scan_ml_file_main(const struct lu_env *env,
struct osd_device *dev);
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev);
+
+static int osd_scan_last_id_main(const struct lu_env *env,
+ struct osd_device *dev);
+
static int osd_scrub_main(void *args)
{
struct lu_env env;
scrub->os_pos_current,
scrub->os_file.sf_param & SP_DRYRUN ? " dryrun mode" : "");
+ scrub->os_ls_count = 0;
+ scrub->os_ls_size = 4;
+ OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid));
+ if (scrub->os_ls_fids == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = osd_scan_O_main(&env, dev);
+ if (rc)
+ GOTO(out, rc);
+
rc = osd_inode_iteration(osd_oti_get(&env), dev, ~0U, false);
if (unlikely(rc == SCRUB_IT_CRASH)) {
spin_lock(&scrub->os_lock);
if (scrub->os_has_ml_file) {
ret = osd_scan_ml_file_main(&env, dev);
if (ret != 0)
- rc = ret;
+ GOTO(out, rc = ret);
}
+ ret = osd_scan_last_id_main(&env, dev);
+ if (ret != 0)
+ rc = ret;
+
GOTO(post, rc);
post:
out:
+ if (scrub->os_ls_fids) {
+ OBD_FREE(scrub->os_ls_fids,
+ scrub->os_ls_size * sizeof(struct lu_fid));
+
+ scrub->os_ls_size = 0;
+ scrub->os_ls_count = 0;
+ scrub->os_ls_fids = NULL;
+ }
+
osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
lu_env_fini(&env);
return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
osd_scan_ml_file_seq);
}
+
+#define LASTID "LAST_ID"
+
+static int osd_update_lastid(struct osd_device *dev, struct inode *inode,
+ __u64 lastid_known)
+{
+ handle_t *th;
+ loff_t offset = 0;
+ __u64 lastid;
+ int rc;
+
+ ENTRY;
+
+ th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC,
+ osd_dto_credits_noquota[DTO_WRITE_BLOCK]);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ lastid = cpu_to_le64(lastid_known);
+ rc = osd_ldiskfs_write(dev, inode, &lastid, sizeof(lastid), 0, &offset,
+ th);
+ mark_inode_dirty(inode);
+ ldiskfs_journal_stop(th);
+ RETURN(rc);
+}
+
+static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev,
+ struct inode *dir, __u64 lastid_known)
+{
+ handle_t *th;
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct dentry *d_lastid;
+ struct inode *i_lastid;
+ loff_t offset = 0;
+ int credits = LDISKFS_DATA_TRANS_BLOCKS(dir->i_sb) +
+ LDISKFS_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+ osd_dto_credits_noquota[DTO_WRITE_BLOCK];
+ int rc;
+
+ ENTRY;
+
+ sb_start_write(dir->i_sb);
+ th = osd_journal_start_sb(dir->i_sb, LDISKFS_HT_MISC, credits);
+ if (IS_ERR(th))
+ RETURN(PTR_ERR(th));
+
+ i_lastid = ldiskfs_create_inode(th, dir, (S_IFREG | 0644), NULL);
+ if (IS_ERR(i_lastid))
+ GOTO(out_stop, rc = PTR_ERR(i_lastid));
+
+ unlock_new_inode(i_lastid);
+
+ d_lastid = osd_child_dentry_by_inode(env, dir, LASTID, strlen(LASTID));
+ rc = osd_ldiskfs_add_entry(info, dev, th, d_lastid, i_lastid, NULL);
+ if (rc)
+ GOTO(out_stop, rc);
+
+ rc = osd_ldiskfs_write(dev, i_lastid, &lastid_known,
+ sizeof(lastid_known), 0, &offset, th);
+ if (rc)
+ GOTO(out_stop, rc);
+ mark_inode_dirty(i_lastid);
+
+ ldiskfs_journal_stop(th);
+ th = NULL;
+ sb_end_write(dir->i_sb);
+ GOTO(out, rc = 0);
+
+out_stop:
+ if (!IS_ERR_OR_NULL(th))
+ ldiskfs_journal_stop(th);
+ sb_end_write(dir->i_sb);
+
+out:
+ if (!IS_ERR_OR_NULL(i_lastid))
+ iput(i_lastid);
+ RETURN(rc);
+}
+
+static int osd_scan_lastid_dir(const struct lu_env *env, struct osd_device *dev,
+ struct inode *dir, struct osd_it_ea *oie)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct inode *inode;
+ struct osd_inode_id id;
+ int rc = 0;
+
+ ENTRY;
+
+ osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+ inode = osd_iget(info, dev, &id);
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ if (S_ISDIR(inode->i_mode))
+ GOTO(out, rc = 0);
+
+ if (strlen(LASTID) != oie->oie_dirent->oied_namelen ||
+ strncmp(oie->oie_dirent->oied_name, LASTID,
+ oie->oie_dirent->oied_namelen) != 0) {
+ CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n",
+ osd_name(dev), info->oti_seq_dirent->oied_name,
+ oie->oie_dirent->oied_name);
+ GOTO(out, rc = 0);
+ }
+
+ info->oti_lastid_inode = inode;
+ RETURN(0);
+
+out:
+ iput(inode);
+ RETURN(rc);
+}
+
+static int osd_scan_lastid_seq(const struct lu_env *env, struct osd_device *dev,
+ struct inode *dir, struct osd_it_ea *oie)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_ost_attrs *lma = &info->oti_ost_attrs;
+ struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
+ struct inode *inode;
+ struct osd_inode_id id;
+ __u64 seq;
+ __u64 lastid;
+ __u64 lastid_known;
+ loff_t offset = 0;
+ int index;
+ int rc;
+
+ ENTRY;
+
+ osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+ inode = osd_iget(info, dev, &id);
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ if (!S_ISDIR(inode->i_mode))
+ GOTO(out, rc = 0);
+
+ rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq);
+ if (rc)
+ GOTO(out, rc);
+
+ if (seq < 0x1F) {
+ rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ if (!fid_seq_is_local_storage(seq))
+ GOTO(out, rc = 0);
+
+ info->oti_lastid_inode = NULL;
+ info->oti_seq_dirent = oie->oie_dirent;
+ rc = osd_scan_dir(env, dev, inode, osd_scan_lastid_dir);
+ info->oti_seq_dirent = NULL;
+
+ if (rc)
+ GOTO(out, rc);
+
+ if (scrub->os_file.sf_param & SP_DRYRUN)
+ GOTO(out, rc = 0);
+
+ for (index = 0; index < scrub->os_ls_count; index++)
+ if (scrub->os_ls_fids[index].f_seq == seq)
+ break;
+
+ if (unlikely(index >= scrub->os_ls_count)) {
+ CDEBUG(D_LFSCK,
+ "%s: can't find seq %llu, it's modified during scrub?\n",
+ osd_name(dev), seq);
+ GOTO(out, rc);
+ }
+
+ lastid_known = scrub->os_ls_fids[index].f_oid;
+ if (!info->oti_lastid_inode) {
+ rc = osd_create_lastid(env, dev, dir, lastid_known);
+ GOTO(out, rc);
+ }
+
+ rc = osd_get_lma(info, info->oti_lastid_inode, &info->oti_obj_dentry,
+ lma);
+ if (rc && rc != -ENODATA) {
+ CDEBUG(D_LFSCK, "%s: failed to get the xattr %s for O/%s/%s\n",
+ osd_name(dev), XATTR_NAME_LMA,
+ oie->oie_dirent->oied_name, LASTID);
+ GOTO(out, rc);
+ }
+
+ if (rc != 0 || lma->loa_lma.lma_self_fid.f_seq != seq ||
+ lma->loa_lma.lma_self_fid.f_oid != 0 ||
+ lma->loa_lma.lma_self_fid.f_ver != 0) {
+ lma->loa_lma.lma_self_fid.f_seq = seq;
+ lma->loa_lma.lma_self_fid.f_oid = 0;
+ lma->loa_lma.lma_self_fid.f_ver = 0;
+
+ rc = __osd_xattr_set(info, info->oti_lastid_inode,
+ XATTR_NAME_LMA, lma, sizeof(*lma),
+ rc == -ENODATA ?
+ XATTR_CREATE : XATTR_REPLACE);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ spin_lock(&info->oti_lastid_inode->i_lock);
+ if (i_size_read(info->oti_lastid_inode) < sizeof(lastid)) {
+ spin_unlock(&info->oti_lastid_inode->i_lock);
+ lastid = 0;
+ } else {
+ spin_unlock(&info->oti_lastid_inode->i_lock);
+
+ rc = osd_ldiskfs_read(info->oti_lastid_inode, &lastid,
+ sizeof(lastid), &offset);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (rc < sizeof(lastid))
+ lastid = 0;
+ else
+ lastid = le64_to_cpu(lastid);
+ }
+
+ if (lastid < lastid_known)
+ rc = osd_update_lastid(dev, info->oti_lastid_inode,
+ lastid_known);
+
+out:
+ if (info->oti_lastid_inode) {
+ iput(info->oti_lastid_inode);
+ info->oti_lastid_inode = NULL;
+ }
+
+ iput(inode);
+ RETURN(rc);
+}
+
+static int osd_scan_last_id_main(const struct lu_env *env,
+ struct osd_device *dev)
+{
+ return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
+ osd_scan_lastid_seq);
+}
+
+static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev,
+ struct inode *dir, struct osd_it_ea *oie)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
+ struct inode *inode;
+ struct osd_inode_id id;
+ struct lu_fid *fids;
+ __u64 seq;
+ int rc;
+
+ ENTRY;
+
+ osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
+ inode = osd_iget(info, dev, &id);
+ if (IS_ERR(inode))
+ RETURN(PTR_ERR(inode));
+
+ if (!S_ISDIR(inode->i_mode))
+ GOTO(out, rc = 0);
+
+ rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq);
+ if (rc)
+ GOTO(out, rc);
+
+ if (seq < 0x1F) {
+ rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ if (!fid_seq_is_local_storage(seq))
+ GOTO(out, rc = 0);
+
+ scrub->os_ls_count++;
+ if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) {
+ OBD_ALLOC(fids,
+ sizeof(struct lu_fid) * (scrub->os_ls_size + 4));
+ if (fids == NULL)
+ GOTO(out, -ENOMEM);
+
+ memcpy(fids, scrub->os_ls_fids,
+ sizeof(struct lu_fid) * scrub->os_ls_size);
+ OBD_FREE(scrub->os_ls_fids,
+ sizeof(struct lu_fid) * scrub->os_ls_size);
+
+ scrub->os_ls_size += 4;
+ scrub->os_ls_fids = fids;
+ }
+
+ scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq;
+
+out:
+ iput(inode);
+ RETURN(rc);
+}
+
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
+{
+ return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
+ osd_scan_O_seq);
+}
char *oti_seq_name;
char *oti_dir_name;
+ uint64_t oti_lastid_oid;
};
extern struct lu_context_key osd_key;
dnode_t *dn = NULL;
uint64_t oid2;
int ops = DTO_INDEX_UPDATE;
+ int index;
int rc;
ENTRY;
sa_handle_destroy(hdl);
}
+ if (!rc && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) {
+ index = 0;
+ for (index = 0; index < scrub->os_ls_count; index++)
+ if (scrub->os_ls_fids[index].f_seq == fid->f_seq)
+ break;
+
+ if (index < scrub->os_ls_count &&
+ scrub->os_ls_fids[index].f_oid < fid->f_oid)
+ scrub->os_ls_fids[index].f_oid = fid->f_oid;
+ }
+
cleanup:
if (nvbuf)
nvlist_free(nvbuf);
static int osd_scan_ml_file_main(const struct lu_env *env,
struct osd_device *dev);
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev);
+static int osd_scan_lastid_main(const struct lu_env *env,
+ struct osd_device *dev);
static int osd_scrub_main(void *args)
{
scrub->os_name, scrub->os_start_flags,
scrub->os_pos_current);
+ scrub->os_ls_count = 0;
+ scrub->os_ls_size = 4;
+ OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid));
+ if (scrub->os_ls_fids == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = osd_scan_O_main(&env, dev);
+ if (rc)
+ GOTO(out, rc);
+
fid = &osd_oti_get(&env)->oti_fid;
while (!rc && !kthread_should_stop()) {
rc = osd_scrub_next(&env, dev, fid, &oid);
rc = ret;
}
+ ret = osd_scan_lastid_main(&env, dev);
+ if (ret != 0)
+ rc = ret;
+
rc = scrub_thread_post(&env, &dev->od_scrub, rc);
CDEBUG(D_LFSCK, "%s: OI scrub: stop, pos = %llu: rc = %d\n",
scrub->os_name, scrub->os_pos_current, rc);
out:
+ if (scrub->os_ls_fids) {
+ OBD_FREE(scrub->os_ls_fids,
+ scrub->os_ls_size * sizeof(struct lu_fid));
+
+ scrub->os_ls_size = 0;
+ scrub->os_ls_count = 0;
+ scrub->os_ls_fids = NULL;
+ }
+
while (!list_empty(&scrub->os_inconsistent_items)) {
struct osd_inconsistent_item *oii;
{
return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_ml_file_seq);
}
+
+#define LASTID "LAST_ID"
+
+static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev,
+ struct osd_zap_it *ozi, __u64 lastid_known)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct lu_attr *la = &info->oti_la;
+ struct luz_direntry *zde = &info->oti_zde;
+ uint64_t dir = ozi->ozi_zde.lzd_reg.zde_dnode;
+ dmu_tx_t *tx = NULL;
+ nvlist_t *nvbuf = NULL;
+ dnode_t *dn = NULL;
+ sa_handle_t *hdl;
+ __u64 lastid;
+ int num = sizeof(*zde) / 8;
+ int rc = 0;
+
+ ENTRY;
+
+ tx = dmu_tx_create(dev->od_os);
+ if (!tx)
+ GOTO(out, rc = -ENOMEM);
+
+ dmu_tx_hold_sa_create(tx, osd_find_dnsize(dev, OSD_BASE_EA_IN_BONUS));
+ dmu_tx_hold_zap(tx, dir, FALSE, NULL);
+
+ rc = -dmu_tx_assign(tx, TXG_WAIT);
+ if (rc)
+ GOTO(abort, rc);
+
+ memset(&zde->lzd_reg, 0, sizeof(zde->lzd_reg));
+ zde->lzd_reg.zde_type = IFTODT(S_IFREG);
+ zde->lzd_fid = lma->lma_self_fid;
+
+ rc = -nvlist_alloc(&nvbuf, NV_UNIQUE_NAME, KM_SLEEP);
+ if (rc)
+ GOTO(abort, rc);
+
+ lustre_lma_init(lma, &zde->lzd_fid, 0, 0);
+ lustre_lma_swab(lma);
+ rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA, (uchar_t *)lma,
+ sizeof(*lma));
+ if (rc)
+ GOTO(abort, rc);
+
+ la->la_valid = LA_TYPE | LA_MODE;
+ la->la_mode = (DTTOIF(zde->lzd_reg.zde_type) & S_IFMT) | 0644;
+
+ rc = __osd_object_create(env, dev, NULL, &zde->lzd_fid, &dn, tx, la);
+ if (rc)
+ GOTO(abort, rc);
+
+ zde->lzd_reg.zde_dnode = dn->dn_object;
+ rc = -sa_handle_get(dev->od_os, dn->dn_object, NULL,
+ SA_HDL_PRIVATE, &hdl);
+ if (rc)
+ GOTO(abort, rc);
+
+ rc = __osd_attr_init(env, dev, NULL, hdl, tx, la, dir, nvbuf);
+ if (rc)
+ GOTO(abort, rc);
+
+ sa_handle_destroy(hdl);
+ hdl = NULL;
+
+ dmu_tx_hold_write_by_dnode(tx, dn, 0, sizeof(lastid_known));
+
+ lastid = cpu_to_le64(lastid_known);
+ dmu_write_by_dnode(dn, 0, sizeof(lastid), &lastid, tx);
+
+ rc = osd_zap_add(dev, dir, NULL, LASTID, strlen(LASTID), num,
+ (void *)zde, tx);
+ if (rc)
+ GOTO(abort, tx);
+
+ dmu_tx_commit(tx);
+ GOTO(out, rc);
+
+abort:
+ if (dn)
+ dmu_object_free(dev->od_os, dn->dn_object, tx);
+
+ dmu_tx_abort(tx);
+
+out:
+ if (hdl)
+ sa_handle_destroy(hdl);
+ if (dn)
+ osd_dnode_rele(dn);
+ if (nvbuf)
+ nvlist_free(nvbuf);
+
+ return rc;
+}
+
+static int osd_scan_lastid_dir(const struct lu_env *env,
+ struct osd_device *dev, uint64_t dir_oid,
+ struct osd_zap_it *ozi)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+
+ ENTRY;
+
+ if (!S_ISREG(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+ RETURN(0);
+
+ if (strcmp(ozi->ozi_name, LASTID) != 0) {
+ CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n",
+ osd_name(dev), info->oti_seq_name, ozi->ozi_name);
+ RETURN(0);
+ }
+
+ info->oti_lastid_oid = ozi->ozi_zde.lzd_reg.zde_dnode;
+ RETURN(0);
+}
+
+static int osd_scan_lastid_seq(const struct lu_env *env,
+ struct osd_device *dev, uint64_t dir_oid,
+ struct osd_zap_it *ozi)
+{
+ struct osd_thread_info *info = osd_oti_get(env);
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ struct lu_buf *lb = &info->oti_xattr_lbuf;
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ dnode_t *dn = NULL;
+ dmu_tx_t *tx = NULL;
+ nvlist_t *nvbuf = NULL;
+ sa_handle_t *hdl = NULL;
+ uint64_t blocks;
+ uint32_t blksize;
+ uint32_t sz_lma;
+ size_t size = 0;
+ __u64 seq;
+ __u64 lastid;
+ __u64 lastid_known;
+ bool need_update = false;
+ int index;
+ int rc;
+
+ ENTRY;
+
+ if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+ RETURN(0);
+
+ rc = kstrtoull(ozi->ozi_name, 16, &seq);
+ if (rc)
+ RETURN(rc);
+
+ if (seq < 0x1F) {
+ rc = kstrtoull(ozi->ozi_name, 10, &seq);
+ if (rc)
+ RETURN(rc);
+ }
+
+ if (!fid_seq_is_local_storage(seq))
+ GOTO(out, rc = 0);
+
+ info->oti_lastid_oid = 0;
+ info->oti_seq_name = ozi->ozi_name;
+ rc = osd_scan_dir(env, dev, ozi->ozi_zde.lzd_reg.zde_dnode,
+ osd_scan_lastid_dir);
+ if (rc)
+ GOTO(out, rc);
+
+ for (index = 0; index < scrub->os_ls_count; index++)
+ if (scrub->os_ls_fids[index].f_seq == seq)
+ break;
+
+ if (unlikely(index >= scrub->os_ls_count)) {
+ CDEBUG(D_LFSCK,
+ "%s: can't find seq %llu, it's modified during scrub?\n",
+ osd_name(dev), seq);
+ GOTO(out, rc = -ERANGE);
+ }
+
+ lastid_known = scrub->os_ls_fids[index].f_oid;
+
+ if (info->oti_lastid_oid == 0) {
+ lma->lma_self_fid.f_seq = seq;
+ lma->lma_self_fid.f_oid = 0;
+ lma->lma_self_fid.f_ver = 0;
+
+ rc = osd_create_lastid(env, dev, ozi, lastid_known);
+ GOTO(out, rc);
+ }
+
+ rc = __osd_obj2dnode(dev->od_os, info->oti_lastid_oid, &dn);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = -sa_handle_get(dev->od_os, dn->dn_object, NULL,
+ SA_HDL_PRIVATE, &hdl);
+ if (rc)
+ GOTO(out, rc);
+
+ lastid = 0;
+ sa_object_size(hdl, &blksize, &blocks);
+ if (blocks > 0) {
+ rc = osd_dmu_read(dev, dn, 0, sizeof(lastid), (char *) &lastid,
+ 0);
+ if (rc)
+ GOTO(out, rc);
+
+ lastid = le64_to_cpu(lastid);
+ if (lastid <= lastid_known)
+ need_update = true;
+ } else {
+ need_update = true;
+ }
+
+ rc = __osd_xattr_load(dev, hdl, &nvbuf);
+ if (rc)
+ GOTO(out, rc);
+
+ rc = -nvlist_lookup_byte_array(nvbuf, XATTR_NAME_LMA, (uchar_t **) &lma,
+ &sz_lma);
+ if (rc != 0 && rc != -ENOENT)
+ GOTO(out, rc);
+
+ if (rc == -ENOENT || lma->lma_self_fid.f_seq != seq ||
+ lma->lma_self_fid.f_oid != 0 || lma->lma_self_fid.f_ver != 0) {
+ if (!rc) {
+ rc = -nvlist_remove(nvbuf, XATTR_NAME_LMA,
+ DATA_TYPE_BYTE_ARRAY);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ need_update = true;
+ lma->lma_self_fid.f_seq = seq;
+ lma->lma_self_fid.f_oid = 0;
+ lma->lma_self_fid.f_ver = 0;
+
+ rc = -nvlist_add_byte_array(nvbuf, XATTR_NAME_LMA,
+ (uchar_t *) &lma, sizeof(lma));
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ if (!need_update)
+ GOTO(out, rc);
+
+ if (scrub->os_file.sf_param & SP_DRYRUN)
+ GOTO(out, rc = 0);
+
+ tx = dmu_tx_create(dev->od_os);
+ if (!tx)
+ GOTO(out, rc = -ENOMEM);
+
+ dmu_tx_hold_zap_by_dnode(tx, dn, TRUE, NULL);
+ if (lastid < lastid_known)
+ dmu_tx_hold_write_by_dnode(tx, dn, 0, sizeof(lastid));
+
+ rc = -dmu_tx_assign(tx, TXG_WAIT);
+ if (rc)
+ GOTO(abort, rc);
+
+ rc = -nvlist_size(nvbuf, &size, NV_ENCODE_XDR);
+ if (rc)
+ GOTO(abort, rc);
+
+ lu_buf_check_and_alloc(lb, size);
+ if (lb->lb_buf == NULL)
+ GOTO(out, rc = -ENOMEM);
+
+ rc = -nvlist_pack(nvbuf, (char **)&lb->lb_buf, &size, NV_ENCODE_XDR,
+ KM_SLEEP);
+ if (rc)
+ GOTO(abort, rc);
+
+ rc = -sa_update(hdl, SA_ZPL_SIZE(dev), lb->lb_buf, size, tx);
+ if (rc)
+ GOTO(abort, rc);
+
+ if (lastid < lastid_known) {
+ lastid = cpu_to_le64(lastid_known);
+ dmu_write_by_dnode(dn, 0, sizeof(lastid),
+ (const char *) &lastid, tx);
+ }
+
+ dmu_tx_commit(tx);
+ GOTO(out, rc);
+
+abort:
+ dmu_tx_abort(tx);
+
+out:
+ if (hdl)
+ sa_handle_destroy(hdl);
+
+ if (dn)
+ osd_dnode_rele(dn);
+
+ RETURN(rc);
+}
+
+static int osd_scan_lastid_main(const struct lu_env *env,
+ struct osd_device *dev)
+{
+ return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_lastid_seq);
+}
+
+static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev,
+ uint64_t dir_oid, struct osd_zap_it *ozi)
+{
+ struct lustre_scrub *scrub = &dev->od_scrub;
+ struct lu_fid *fids;
+ __u64 seq;
+ int rc;
+
+ ENTRY;
+
+ if (!S_ISDIR(cpu_to_le16(DTTOIF(ozi->ozi_zde.lzd_reg.zde_type))))
+ RETURN(0);
+
+ rc = kstrtoull(ozi->ozi_name, 16, &seq);
+ if (rc)
+ RETURN(rc);
+
+ if (seq < 0x1F) {
+ rc = kstrtoull(ozi->ozi_name, 10, &seq);
+ if (rc)
+ RETURN(rc);
+ }
+
+ if (!fid_seq_is_local_storage(seq))
+ GOTO(out, rc = 0);
+
+ scrub->os_ls_count++;
+ if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) {
+ OBD_ALLOC(fids,
+ sizeof(struct lu_fid) * (scrub->os_ls_size + 4));
+ if (fids == NULL)
+ GOTO(out, -ENOMEM);
+
+ memcpy(fids, scrub->os_ls_fids,
+ sizeof(struct lu_fid) * scrub->os_ls_size);
+ OBD_FREE(scrub->os_ls_fids,
+ sizeof(struct lu_fid) * scrub->os_ls_size);
+
+ scrub->os_ls_size += 4;
+ scrub->os_ls_fids = fids;
+ }
+
+ scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq;
+
+out:
+ RETURN(rc);
+}
+
+static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
+{
+ return osd_scan_dir(env, dev, dev->od_O_id, osd_scan_O_seq);
+}
#include <linux/types.h>
#include <uapi/linux/lnet/lnet-types.h>
#include <uapi/linux/lustre/lustre_idl.h>
+#include <uapi/linux/lustre/lustre_ioctl.h>
#include <uapi/linux/lustre/lustre_disk.h>
#include <dt_object.h>
#include <lu_object.h>
}
}
+retry:
nm_obj = local_index_find_or_create(env, los, root_obj,
LUSTRE_NODEMAP_NAME,
S_IFREG | S_IRUGO | S_IWUSR,
&dt_nodemap_features);
- if (IS_ERR(nm_obj))
+ if (IS_ERR(nm_obj)) {
+ if (PTR_ERR(nm_obj) == -EEXIST && rc != -ENOENT &&
+ los->los_last_oid < (tfid.f_oid - 1)) {
+ if (dt2lu_dev(dev)->ld_obd)
+ dt2lu_dev(dev)->ld_obd->obd_need_scrub = 1;
+
+ mutex_lock(&los->los_id_lock);
+ los->los_last_oid = tfid.f_oid - 1;
+ mutex_unlock(&los->los_id_lock);
+
+ goto retry;
+ }
+
GOTO(out_root, nm_obj);
+ }
if (nm_obj->do_index_ops == NULL) {
rc = nm_obj->do_ops->do_index_try(env, nm_obj,
}
run_test 21 "don't hang MDS recovery when failed to get update log"
+test_22() {
+ #FID_SEQ_LLOG = 1
+ #FID_SEQ_LLOG_NAME = 10
+ #FID_SEQ_LOCAL_NAME = 0x200000003,
+ local s_llog="1"
+ local s_llog_name="10"
+ local s_local="200000003"
+ local lma
+ local fid
+
+ stopall
+
+ # remove the LASTID
+ mount_fstype mds1 || error "(1) Fail to mount mds1"
+ mntpt=$(facet_mntpt mds1)
+
+ do_facet mds1 rm -f "$mntpt/O/$s_llog/LAST_ID"
+ do_facet mds1 rm -f "$mntpt/O/$s_llog_name/LAST_ID"
+ do_facet mds1 rm -f "$mntpt/O/$s_local_name/LAST_ID"
+
+ unmount_fstype mds1 || error "(2) Fail to umount mds1"
+
+ $LCTL set_param debug=-1
+ $LCTL dk > /dev/null
+ start mds1 $(mdsdevname 1) > /dev/null || {
+ $LCTL dk > /tmp/log
+ error "(3) Fail to start mds1"
+ }
+ $START_SCRUB -r || error "(4) Fail to start OI scrub on MDT!"
+
+ wait_update_facet mds1 "$LCTL get_param -n \
+ osd-*.$(facet_svc mds1).oi_scrub |
+ awk '/^status/ { print \\\$2 }'" "completed" 6 ||
+ error "(5) Expected '$expected' on mds1"
+
+ stop mds1
+
+ mount_fstype mds1 || error "(6) Fail to mount mds1 again"
+ do_facet mds1 stat "$mntpt/O/$s_llog/LAST_ID" ||
+ error "(7) LAST_ID is not recreated for LLOG"
+ lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_llog/LAST_ID)
+ fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma)
+ [ "$fid" == "[0x1:0x0:0x0]" ] ||
+ error "(8) the LMA of the LAST_ID is incorrect"
+
+ do_facet mds1 stat "$mntpt/O/$s_llog_name/LAST_ID" ||
+ error "(8) LAST_ID is not recreated for LLOG_NAME"
+ lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_llog_name/LAST_ID)
+ fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma)
+ [ "$fid" == "[0xa:0x0:0x0]" ] ||
+ error "(8) the LMA of the LAST_ID is incorrect"
+
+ do_facet mds1 stat "$mntpt/O/$s_local/LAST_ID" ||
+ error "(9) LAST_ID is not recreated for LOCAL_NAME"
+ lma=$(do_facet mds1 $LL_DECODE_FILTER_FID $mntpt/O/$s_local/LAST_ID)
+ fid=$(sed -e 's/.*fid=//' -e 's/ .*//' <<< $lma)
+ [ "$fid" == "[0x200000003:0x0:0x0]" ] ||
+ error "(8) the LMA of the LAST_ID is incorrect"
+
+ unmount_fstype mds1 || error "(10) Fail to umount mds1 again"
+
+ start mds1 $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB > /dev/null ||
+ error "(11) Fail to start mds1"
+}
+run_test 22 "LFSCK can recreate or fix the LASTID on MDT/OST"
# restore MDS/OST size
MDSSIZE=${SAVED_MDSSIZE}
struct lustre_ost_attrs *loa = (void *)buf;
int rc1;
+ memset(loa, 0, sizeof(*loa));
rc1 = getxattr(argv[i], "trusted.lma", loa,
sizeof(*loa));
- if (rc1 < sizeof(*loa)) {
+ if (rc1 < sizeof(struct lustre_mdt_attrs)) {
fprintf(stderr,
"%s: error reading fid: %s\n",
argv[i], strerror(ENODATA));
}
lustre_loa_swab(loa);
- if (!(loa->loa_lma.lma_compat &
+ if (rc1 > sizeof(struct lustre_mdt_attrs) &&
+ !(loa->loa_lma.lma_compat &
LMAC_STRIPE_INFO)) {
fprintf(stderr,
"%s: not stripe info: %s\n",
continue;
}
- printf("%s: parent="DFID" stripe=%u "
+ printf("%s: fid="DFID" parent="DFID" stripe=%u "
"stripe_size=%u stripe_count=%u",
argv[i],
+ PFID(&loa->loa_lma.lma_self_fid),
(unsigned long long)loa->loa_parent_fid.f_seq,
loa->loa_parent_fid.f_oid, 0, /* ver */
loa->loa_parent_fid.f_stripe_idx &