RETURN(rc);
}
+int osd_obj_map_recover(struct osd_thread_info *info,
+ struct osd_device *osd,
+ struct inode *src_parent,
+ struct dentry *src_child,
+ const struct lu_fid *fid)
+{
+ struct osd_obj_seq *osd_seq;
+ struct dentry *tgt_parent;
+ struct dentry *tgt_child = &info->oti_child_dentry;
+ struct inode *dir;
+ struct inode *inode = src_child->d_inode;
+ struct ost_id *ostid = &info->oti_ostid;
+ handle_t *jh;
+ struct ldiskfs_dir_entry_2 *de;
+ struct buffer_head *bh;
+ char name[32];
+ int dirn;
+ int rc = 0;
+ ENTRY;
+
+ if (fid_is_last_id(fid)) {
+ osd_seq = osd_seq_load(info, osd, fid_seq(fid));
+ if (IS_ERR(osd_seq))
+ RETURN(PTR_ERR(osd_seq));
+
+ tgt_parent = osd_seq->oos_root;
+ tgt_child->d_name.name = "LAST_ID";
+ tgt_child->d_name.len = strlen("LAST_ID");
+ } else {
+ fid_to_ostid(fid, ostid);
+ osd_seq = osd_seq_load(info, osd, ostid_seq(ostid));
+ if (IS_ERR(osd_seq))
+ RETURN(PTR_ERR(osd_seq));
+
+ dirn = ostid_id(ostid) & (osd_seq->oos_subdir_count - 1);
+ tgt_parent = osd_seq->oos_dirs[dirn];
+ osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
+ tgt_child->d_name.name = name;
+ tgt_child->d_name.len = strlen(name);
+ }
+ LASSERT(tgt_parent != NULL);
+
+ dir = tgt_parent->d_inode;
+ tgt_child->d_name.hash = 0;
+ tgt_child->d_parent = tgt_parent;
+ tgt_child->d_inode = inode;
+
+ /* The non-initialized src_child may be destroyed. */
+ jh = ldiskfs_journal_start_sb(osd_sb(osd),
+ osd_dto_credits_noquota[DTO_INDEX_DELETE] +
+ osd_dto_credits_noquota[DTO_INDEX_INSERT] +
+ osd_dto_credits_noquota[DTO_OBJECT_DELETE]);
+ if (IS_ERR(jh))
+ RETURN(PTR_ERR(jh));
+
+ ll_vfs_dq_init(src_parent);
+ ll_vfs_dq_init(dir);
+
+ mutex_lock(&src_parent->i_mutex);
+ mutex_lock(&dir->i_mutex);
+ bh = osd_ldiskfs_find_entry(dir, tgt_child, &de, NULL);
+ if (bh != NULL) {
+ /* XXX: If some other object occupied the same slot. And If such
+ * inode is zero-sized and with SUID+SGID, then means it is
+ * a new created one. Maybe we can remove it and insert the
+ * original one back to the /O/<seq>/d<x>. But there are
+ * something to be considered:
+ *
+ * 1) The OST-object under /lost+found has crashed LMA.
+ * So it should not conflict with the current one.
+ *
+ * 2) There are race conditions that: someone may just want
+ * to modify the current one. Even if the OI scrub takes
+ * the object lock when remove the current one, it still
+ * cause the modification to be lost becasue the target
+ * has been removed when the RPC service thread waiting
+ * for the lock.
+ *
+ * So keep it there before we have suitable solution. */
+ brelse(bh);
+
+ /* If the src object has never been modified, then remove it. */
+ if (inode->i_size == 0 && inode->i_mode & S_ISUID &&
+ inode->i_mode & S_ISGID)
+ ll_vfs_unlink(src_parent, src_child, osd->od_mnt);
+ GOTO(unlock_src, rc = 0);
+ }
+
+ bh = osd_ldiskfs_find_entry(src_parent, src_child, &de, NULL);
+ if (unlikely(bh == NULL))
+ GOTO(unlock, rc = -ENOENT);
+
+ rc = ldiskfs_delete_entry(jh, src_parent, de, bh);
+ brelse(bh);
+ if (rc != 0)
+ GOTO(unlock, rc);
+
+ rc = osd_ldiskfs_add_entry(jh, tgt_child, inode, NULL);
+
+ GOTO(unlock, rc);
+
+unlock:
+ mutex_unlock(&dir->i_mutex);
+
+unlock_src:
+ mutex_unlock(&src_parent->i_mutex);
+ ldiskfs_journal_stop(jh);
+ return rc;
+}
+
static struct dentry *
osd_object_spec_find(struct osd_thread_info *info, struct osd_device *osd,
const struct lu_fid *fid, char **name)
return 0;
}
-static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
- struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
- struct super_block *sb, bool scrub)
+static int osd_scrub_get_fid(struct osd_thread_info *info,
+ struct osd_device *dev, struct inode *inode,
+ struct lu_fid *fid, bool scrub)
{
- struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
- struct inode *inode;
- int rc = 0;
- bool has_lma = false;
- ENTRY;
-
- osd_id_gen(lid, pos, OSD_OII_NOGEN);
- inode = osd_iget(info, dev, lid);
- if (IS_ERR(inode)) {
- rc = PTR_ERR(inode);
- /* The inode may be removed after bitmap searching, or the
- * file is new created without inode initialized yet. */
- if (rc == -ENOENT || rc == -ESTALE)
- RETURN(SCRUB_NEXT_CONTINUE);
-
- CERROR("%.16s: fail to read inode, ino# = %u, rc = %d\n",
- LDISKFS_SB(sb)->s_es->s_volume_name, pos, rc);
- RETURN(rc);
- }
-
- /* If the inode has no OI mapping, then it is special locally used,
- * should be invisible to OI scrub or up layer LFSCK. */
- if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI))
- GOTO(put, rc = SCRUB_NEXT_CONTINUE);
-
- if (scrub &&
- ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
- /* Only skip it for the first OI scrub accessing. */
- ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
- GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
- }
+ struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
+ int rc;
+ bool has_lma = false;
rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
if (rc == 0) {
if (lma->lma_compat & LMAC_NOT_IN_OI) {
ldiskfs_set_inode_state(inode,
LDISKFS_STATE_LUSTRE_NO_OI);
- GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+ return SCRUB_NEXT_CONTINUE;
}
- if (fid_is_llog(&lma->lma_self_fid))
- GOTO(put, rc = SCRUB_NEXT_CONTINUE);
-
*fid = lma->lma_self_fid;
if (fid_is_internal(&lma->lma_self_fid)) {
if (!scrub)
rc = SCRUB_NEXT_CONTINUE;
- GOTO(put, rc);
+ return rc;
}
if (!scrub)
- GOTO(put, rc);
+ return 0;
if (fid_is_namespace_visible(fid) && !fid_is_norm(fid))
- GOTO(put, rc);
+ return 0;
- if (lma->lma_compat & LMAC_FID_ON_OST || fid_is_last_id(fid))
- GOTO(put, rc = SCRUB_NEXT_OSTOBJ);
+ if (lma->lma_compat & LMAC_FID_ON_OST)
+ return SCRUB_NEXT_OSTOBJ;
- if (fid_is_idif(fid))
- GOTO(put, rc = SCRUB_NEXT_OSTOBJ_OLD);
+ if (fid_is_idif(fid) || fid_is_last_id(fid))
+ return SCRUB_NEXT_OSTOBJ_OLD;
if (lma->lma_incompat & LMAI_AGENT)
- GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+ return SCRUB_NEXT_CONTINUE;
/* Here, it may be MDT-object, or may be 2.4 OST-object.
* Fall through. */
if (scrub)
/* It is old 2.x (x <= 3) or 1.8 OST-object. */
rc = SCRUB_NEXT_OSTOBJ_OLD;
- GOTO(put, rc);
+ return rc;
}
if (rc > 0) {
else
/* It is 2.4 OST-object. */
rc = SCRUB_NEXT_OSTOBJ_OLD;
- GOTO(put, rc);
+ return rc;
}
if (rc != -ENODATA)
- GOTO(put, rc);
+ return rc;
if (!has_lma) {
if (dev->od_handle_nolma) {
* generate its FID, ignore it directly. */
rc = SCRUB_NEXT_CONTINUE;
}
- GOTO(put, rc);
+ return rc;
}
/* For OI scrub case only: the object has LMA but has no ff
rc = osd_scrub_check_local_fldb(info, dev, fid);
}
+ return rc;
+}
+
+static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
+ struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
+ struct super_block *sb, bool scrub)
+{
+ struct inode *inode;
+ int rc;
+ ENTRY;
+
+ osd_id_gen(lid, pos, OSD_OII_NOGEN);
+ inode = osd_iget(info, dev, lid);
+ if (IS_ERR(inode)) {
+ rc = PTR_ERR(inode);
+ /* The inode may be removed after bitmap searching, or the
+ * file is new created without inode initialized yet. */
+ if (rc == -ENOENT || rc == -ESTALE)
+ RETURN(SCRUB_NEXT_CONTINUE);
+
+ CERROR("%.16s: fail to read inode, ino# = %u, rc = %d\n",
+ LDISKFS_SB(sb)->s_es->s_volume_name, pos, rc);
+ RETURN(rc);
+ }
+
+ /* If the inode has no OI mapping, then it is special locally used,
+ * should be invisible to OI scrub or up layer LFSCK. */
+ if (ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NO_OI))
+ GOTO(put, rc = SCRUB_NEXT_CONTINUE);
+
+ if (scrub &&
+ ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
+ /* Only skip it for the first OI scrub accessing. */
+ ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
+ GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
+ }
+
+ rc = osd_scrub_get_fid(info, dev, inode, fid, scrub);
+
GOTO(put, rc);
put:
static int osd_ios_varfid_fill(void *buf, const char *name, int namelen,
loff_t offset, __u64 ino, unsigned d_type);
+static int osd_ios_lf_fill(void *buf, const char *name, int namelen,
+ loff_t offset, __u64 ino, unsigned d_type);
static int
osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
OLF_SCAN_SUBITEMS = 0x0001,
OLF_HIDE_FID = 0x0002,
OLF_SHOW_NAME = 0x0004,
+ OLF_NO_OI = 0x0008,
};
struct osd_lf_map {
{ "LAST_GROUP", { FID_SEQ_LOCAL_FILE, OFD_LAST_GROUP_OID, 0 },
OLF_SHOW_NAME, NULL, NULL },
+ /* lost+found */
+ { "lost+found", { 0, 0, 0 }, OLF_SCAN_SUBITEMS | OLF_NO_OI,
+ osd_ios_general_scan, osd_ios_lf_fill },
+
{ NULL, { 0, 0, 0 }, 0, NULL, NULL }
};
RETURN(rc);
}
+/**
+ * It scans the /lost+found, and for the OST-object (with filter_fid
+ * or filter_fid_old), move them back to its proper /O/<seq>/d<x>.
+ */
+static int osd_ios_lf_fill(void *buf, const char *name, int namelen,
+ loff_t offset, __u64 ino, unsigned d_type)
+{
+ struct osd_ios_filldir_buf *fill_buf = buf;
+ struct osd_thread_info *info = fill_buf->oifb_info;
+ struct osd_device *dev = fill_buf->oifb_dev;
+ struct lu_fid *fid = &info->oti_fid;
+ struct osd_scrub *scrub = &dev->od_scrub;
+ struct dentry *parent = fill_buf->oifb_dentry;
+ struct dentry *child;
+ struct inode *dir = parent->d_inode;
+ struct inode *inode;
+ int rc;
+ ENTRY;
+
+ /* skip any '.' started names */
+ if (name[0] == '.')
+ RETURN(0);
+
+ scrub->os_lf_scanned++;
+ child = osd_ios_lookup_one_len(name, parent, namelen);
+ if (IS_ERR(child)) {
+ CWARN("%s: cannot lookup child '%.*s': rc = %d\n",
+ osd_name(dev), namelen, name, (int)PTR_ERR(child));
+ RETURN(0);
+ }
+
+ inode = child->d_inode;
+ if (S_ISDIR(inode->i_mode)) {
+ rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
+ osd_ios_lf_fill);
+ if (rc != 0)
+ CWARN("%s: cannot add child '%.*s': rc = %d\n",
+ osd_name(dev), namelen, name, rc);
+ GOTO(put, rc);
+ }
+
+ if (!S_ISREG(inode->i_mode))
+ GOTO(put, rc = 0);
+
+ rc = osd_scrub_get_fid(info, dev, inode, fid, true);
+ if (rc == SCRUB_NEXT_OSTOBJ || rc == SCRUB_NEXT_OSTOBJ_OLD) {
+ rc = osd_obj_map_recover(info, dev, dir, child, fid);
+ if (rc == 0) {
+ CDEBUG(D_LFSCK, "recovered '%.*s' ["DFID"] from "
+ "/lost+found.\n", namelen, name, PFID(fid));
+ scrub->os_lf_repaired++;
+ } else {
+ CWARN("%s: cannot rename for '%.*s' "DFID": rc = %d\n",
+ osd_name(dev), namelen, name, PFID(fid), rc);
+ }
+ }
+
+ /* XXX: For MDT-objects, we can move them from /lost+found to namespace
+ * visible place, such as the /ROOT/.lustre/lost+found, then LFSCK
+ * can process them in furtuer. */
+
+ GOTO(put, rc);
+
+put:
+ if (rc < 0)
+ scrub->os_lf_failed++;
+ dput(child);
+ /* skip the failure to make the scanning to continue. */
+ return 0;
+}
+
static int osd_ios_varfid_fill(void *buf, const char *name, int namelen,
loff_t offset, __u64 ino, unsigned d_type)
{
if (IS_ERR(child))
RETURN(PTR_ERR(child));
- rc = osd_ios_scan_one(fill_buf->oifb_info, dev, child->d_inode,
- &map->olm_fid, map->olm_flags);
+ if (!(map->olm_flags & OLF_NO_OI))
+ rc = osd_ios_scan_one(fill_buf->oifb_info, dev, child->d_inode,
+ &map->olm_fid, map->olm_flags);
if (rc == 0 && map->olm_flags & OLF_SCAN_SUBITEMS)
rc = osd_ios_new_item(dev, child, map->olm_scandir,
map->olm_filldir);
"run_time: %u seconds\n"
"average_speed: "LPU64" objects/sec\n"
"real-time_speed: "LPU64" objects/sec\n"
- "current_position: %u\n",
- rtime, speed, new_checked, scrub->os_pos_current);
+ "current_position: %u\n"
+ "lf_scanned: "LPU64"\n"
+ "lf_reparied: "LPU64"\n"
+ "lf_failed: "LPU64"\n",
+ rtime, speed, new_checked, scrub->os_pos_current,
+ scrub->os_lf_scanned, scrub->os_lf_repaired,
+ scrub->os_lf_failed);
} else {
if (sf->sf_run_time != 0)
do_div(speed, sf->sf_run_time);
"run_time: %u seconds\n"
"average_speed: "LPU64" objects/sec\n"
"real-time_speed: N/A\n"
- "current_position: N/A\n",
- sf->sf_run_time, speed);
+ "current_position: N/A\n"
+ "lf_scanned: "LPU64"\n"
+ "lf_reparied: "LPU64"\n"
+ "lf_failed: "LPU64"\n",
+ sf->sf_run_time, speed, scrub->os_lf_scanned,
+ scrub->os_lf_repaired, scrub->os_lf_failed);
}
if (rc <= 0)
goto out;