struct task_struct *os_task;
struct list_head os_inconsistent_items;
+ /* once inconsistent mapping can't be fixed, put into this list */
+ struct list_head os_stale_items;
/* write lock for scrub prep/update/post/checkpoint,
* read lock for scrub dump. */
#define OBD_FAIL_OSD_REF_DEL 0x19c
#define OBD_FAIL_OSD_OI_ENOSPC 0x19d
#define OBD_FAIL_OSD_DOTDOT_ENOSPC 0x19e
+#define OBD_FAIL_OSD_SCRUB_STALE 0x19f
#define OBD_FAIL_OFD_SET_OID 0x1e0
iput(inode);
osd_add_oi_cache(oti, dev, id, fid);
- osd_oii_insert(dev, fid, id, true);
+ osd_scrub_oi_insert(dev, fid, id, true);
oclb->oclb_found = true;
return 1;
RETURN(rc);
}
+/**
+ * Is object in scrub inconsistent/stale list.
+ *
+ * \a scrub has two lists, os_inconsistent_items contains mappings to fix, while
+ * os_stale_items contains mappings failed to fix.
+ */
+static bool fid_in_scrub_list(struct lustre_scrub *scrub,
+ const struct list_head *list,
+ const struct lu_fid *fid)
+{
+ struct osd_inconsistent_item *oii;
+
+ if (list_empty(list))
+ return false;
+
+ spin_lock(&scrub->os_lock);
+ list_for_each_entry(oii, list, oii_list) {
+ if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
+ spin_unlock(&scrub->os_lock);
+ return true;
+ }
+ }
+ spin_unlock(&scrub->os_lock);
+
+ return false;
+}
+
static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
const struct lu_fid *fid,
const struct lu_object_conf *conf)
bool trusted = true;
bool updated = false;
bool checked = false;
+ bool stale = false;
ENTRY;
* do_create->osd_oi_insert().
*/
if (conf && conf->loc_flags & LOC_F_NEW)
- GOTO(out, result = 0);
+ RETURN(0);
/* Search order: 1. per-thread cache. */
if (lu_fid_eq(fid, &oic->oic_fid) && likely(oic->oic_dev == dev)) {
goto iget;
}
+ /* Search order: 2. OI scrub pending list. */
id = &info->oti_id;
memset(id, 0, sizeof(struct osd_inode_id));
- if (!list_empty(&scrub->os_inconsistent_items)) {
- /* Search order: 2. OI scrub pending list. */
- result = osd_oii_lookup(dev, fid, id);
- if (!result)
- goto iget;
- }
+ if (fid_in_scrub_list(scrub, &scrub->os_inconsistent_items, fid))
+ RETURN(-EINPROGRESS);
+
+ stale = fid_in_scrub_list(scrub, &scrub->os_stale_items, fid);
+ if (stale && OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE))
+ RETURN(-ESTALE);
/*
* The OI mapping in the OI file can be updated by the OI scrub
goto check_lma;
}
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE))
+ goto trigger;
+
result = PTR_ERR(inode);
if (result == -ENOENT || result == -ESTALE)
GOTO(out, result = 0);
GOTO(out, result);
trigger:
+ /* don't trigger repeatedly for stale mapping */
+ if (stale)
+ GOTO(out, result = -ESTALE);
+
/*
* We still have chance to get the valid inode: for the
* object which is referenced by remote name entry, the
goto join;
if (IS_ERR_OR_NULL(inode) || result) {
- osd_oii_insert(dev, fid, id, result == -ENOENT);
+ osd_scrub_oi_insert(dev, fid, id, result == -ENOENT);
GOTO(out, result = -EINPROGRESS);
}
LASSERT(remote);
LASSERT(obj->oo_inode == inode);
- osd_oii_insert(dev, fid, id, true);
+ osd_scrub_oi_insert(dev, fid, id, true);
goto found;
}
}
join:
+ if (IS_ERR_OR_NULL(inode) || result)
+ osd_scrub_oi_insert(dev, fid, id, result == -ENOENT);
+
rc1 = osd_scrub_start(env, dev, flags);
CDEBUG_LIMIT(D_LFSCK | D_CONSOLE | D_WARNING,
"%s: trigger OI scrub by RPC for "DFID"/%u with flags %#x: rc = %d\n",
if (rc1 && rc1 != -EALREADY)
GOTO(out, result = -EREMCHG);
- if (IS_ERR_OR_NULL(inode) || result) {
- osd_oii_insert(dev, fid, id, result == -ENOENT);
+ if (IS_ERR_OR_NULL(inode) || result)
GOTO(out, result = -EINPROGRESS);
- }
LASSERT(remote);
LASSERT(obj->oo_inode == inode);
-
- osd_oii_insert(dev, fid, id, true);
goto found;
check_lma:
GOTO(out, result = (!obj->oo_hl_head ? -ENOMEM : 0));
out:
+ if (!result && stale)
+ osd_scrub_oi_resurrect(scrub, fid);
+
if (result || !obj->oo_inode) {
if (!IS_ERR_OR_NULL(inode))
iput(inode);
}
}
- rc = osd_oii_insert(dev, fid, id, insert);
+ rc = osd_scrub_oi_insert(dev, fid, id, insert);
/*
* There is race condition between osd_oi_lookup and OI scrub.
* The OI scrub finished just after osd_oi_lookup() failure.
struct osd_inconsistent_item {
/* link into lustre_scrub::os_inconsistent_items,
* protected by lustre_scrub::os_lock. */
- struct list_head oii_list;
+ struct list_head oii_list;
/* The right FID <=> ino#/gen mapping. */
- struct osd_idmap_cache oii_cache;
+ struct osd_idmap_cache oii_cache;
- unsigned int oii_insert:1; /* insert or update mapping. */
+ unsigned int oii_insert:1; /* insert or update mapping. */
};
struct osd_otable_cache {
int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
bool restored);
void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev);
-int osd_oii_insert(struct osd_device *dev, const struct lu_fid *fid,
- struct osd_inode_id *id, int insert);
-int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
- struct osd_inode_id *id);
+int osd_scrub_oi_insert(struct osd_device *dev, const struct lu_fid *fid,
+ struct osd_inode_id *id, int insert);
+void osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
+ const struct lu_fid *fid);
void osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
struct dentry *osd_lookup_one_len_unlocked(struct osd_device *dev,
struct osd_inode_id *oi_id = &info->oti_id2;
int rc = 0;
+ CDEBUG(D_INODE, "insert OI for "DFID"\n", PFID(fid));
+
if (unlikely(fid_is_last_id(fid)))
return osd_obj_spec_insert(info, osd, fid, id, th);
{
struct lu_fid *oi_fid = &info->oti_fid2;
+ CDEBUG(D_INODE, "delete OI for "DFID"\n", PFID(fid));
+
/* clear idmap cache */
if (lu_fid_eq(fid, &info->oti_cache.oic_fid))
fid_zero(&info->oti_cache.oic_fid);
struct osd_inode_id *oi_id = &info->oti_id2;
int rc = 0;
+ CDEBUG(D_INODE, "update OI for "DFID"\n", PFID(fid));
+
if (unlikely(fid_is_last_id(fid)))
return osd_obj_spec_update(info, osd, fid, id, th);
#define OSD_OTABLE_MAX_HASH 0x00000000ffffffffULL
+/* high priority inconsistent items list APIs */
+#define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60
+
+/**
+ * Add mapping into scrub.os_inconsistent_item list, and the OI scrub thread
+ * will fix them in priority.
+ */
+int osd_scrub_oi_insert(struct osd_device *dev, const struct lu_fid *fid,
+ struct osd_inode_id *id, int insert)
+{
+ struct osd_inconsistent_item *oii;
+ struct osd_scrub *oscrub = &dev->od_scrub;
+ struct lustre_scrub *lscrub = &oscrub->os_scrub;
+ int wakeup = 0;
+
+ ENTRY;
+
+ OBD_ALLOC_PTR(oii);
+ if (unlikely(oii == NULL))
+ RETURN(-ENOMEM);
+
+ INIT_LIST_HEAD(&oii->oii_list);
+ oii->oii_cache.oic_fid = *fid;
+ oii->oii_cache.oic_lid = *id;
+ oii->oii_cache.oic_dev = dev;
+ oii->oii_insert = insert;
+
+ spin_lock(&lscrub->os_lock);
+ if (lscrub->os_partial_scan) {
+ __u64 now = ktime_get_real_seconds();
+
+ /* If there haven't been errors in a long time,
+ * decay old count until either the errors are
+ * gone or we reach the current interval.
+ */
+ while (unlikely(oscrub->os_bad_oimap_count > 0 &&
+ oscrub->os_bad_oimap_time +
+ SCRUB_BAD_OIMAP_DECAY_INTERVAL < now)) {
+ oscrub->os_bad_oimap_count >>= 1;
+ oscrub->os_bad_oimap_time +=
+ SCRUB_BAD_OIMAP_DECAY_INTERVAL;
+ }
+
+ oscrub->os_bad_oimap_time = now;
+ if (++oscrub->os_bad_oimap_count >
+ dev->od_full_scrub_threshold_rate)
+ lscrub->os_full_scrub = 1;
+ }
+
+ if (list_empty(&lscrub->os_inconsistent_items)) {
+ wakeup = 1;
+ } else {
+ struct osd_inconsistent_item *tmp;
+
+ list_for_each_entry(tmp, &lscrub->os_inconsistent_items,
+ oii_list) {
+ if (lu_fid_eq(fid, &tmp->oii_cache.oic_fid)) {
+ spin_unlock(&lscrub->os_lock);
+ OBD_FREE_PTR(oii);
+ RETURN(0);
+ }
+ }
+ }
+
+ list_add_tail(&oii->oii_list, &lscrub->os_inconsistent_items);
+ spin_unlock(&lscrub->os_lock);
+
+ if (wakeup)
+ wake_up_var(lscrub);
+
+ RETURN(0);
+}
+
+/* if item could not be repaired, add it to the os_stale_items list to avoid
+ * triggering scrub repeatedly.
+ */
+static inline void osd_scrub_oi_mark_stale(struct lustre_scrub *scrub,
+ struct osd_inconsistent_item *oii)
+{
+ spin_lock(&scrub->os_lock);
+ list_move_tail(&oii->oii_list, &scrub->os_stale_items);
+ spin_unlock(&scrub->os_lock);
+}
+
+/* OI of \a fid may be marked stale, and if its mapping is scrubbed, remove it
+ * from os_stale_items list.
+ */
+void osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
+ const struct lu_fid *fid)
+{
+ struct osd_inconsistent_item *oii;
+
+ if (list_empty(&scrub->os_stale_items))
+ return;
+
+ spin_lock(&scrub->os_lock);
+ list_for_each_entry(oii, &scrub->os_stale_items, oii_list) {
+ if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
+ list_del(&oii->oii_list);
+ OBD_FREE_PTR(oii);
+ break;
+ }
+ }
+ spin_unlock(&scrub->os_lock);
+}
+
+static void osd_scrub_ois_fini(struct lustre_scrub *scrub,
+ struct list_head *list)
+{
+ struct osd_inconsistent_item *oii;
+ struct osd_inconsistent_item *tmp;
+
+ spin_lock(&scrub->os_lock);
+ list_for_each_entry_safe(oii, tmp, list, oii_list) {
+ list_del(&oii->oii_list);
+ OBD_FREE_PTR(oii);
+ }
+ spin_unlock(&scrub->os_lock);
+}
+
static inline int osd_scrub_has_window(struct lustre_scrub *scrub,
struct osd_otable_cache *ooc)
{
if (val < 0)
GOTO(out, rc = val);
- if (scrub->os_in_prior)
+ if (scrub->os_in_prior) {
oii = list_entry(oic, struct osd_inconsistent_item,
oii_cache);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE))
+ GOTO(out, rc = -ESTALE);
+ }
if (lid->oii_ino < sf->sf_pos_latest_start && oii == NULL)
GOTO(out, rc = 0);
if (sf->sf_pos_first_inconsistent == 0 ||
sf->sf_pos_first_inconsistent > lid->oii_ino)
sf->sf_pos_first_inconsistent = lid->oii_ino;
+ if (oii) {
+ osd_scrub_oi_mark_stale(scrub, oii);
+ CDEBUG(D_LFSCK,
+ "%s: fix inconsistent OI "DFID" -> %u/%u failed: %d\n",
+ osd_dev2name(dev), PFID(fid), lid->oii_ino,
+ lid->oii_gen, rc);
+ }
} else {
+ if (!oii && !OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE)) {
+ osd_scrub_oi_resurrect(scrub, fid);
+ CDEBUG(D_LFSCK,
+ "%s: resurrect OI "DFID" -> %u/%u\n",
+ osd_dev2name(dev), PFID(fid), lid->oii_ino,
+ lid->oii_gen);
+ } else if (oii) {
+ /* release fixed inconsistent item */
+ CDEBUG(D_LFSCK,
+ "%s: inconsistent OI "DFID" -> %u/%u fixed\n",
+ osd_dev2name(dev), PFID(fid), lid->oii_ino,
+ lid->oii_gen);
+ spin_lock(&scrub->os_lock);
+ list_del_init(&oii->oii_list);
+ spin_unlock(&scrub->os_lock);
+
+ OBD_FREE_PTR(oii);
+ }
rc = 0;
}
/* There may be conflict unlink during the OI scrub,
* if happend, then remove the new added OI mapping. */
- if (ops == DTO_INDEX_INSERT && inode != NULL && !IS_ERR(inode) &&
+ if (ops == DTO_INDEX_INSERT && !IS_ERR_OR_NULL(inode) &&
unlikely(ldiskfs_test_inode_state(inode,
LDISKFS_STATE_LUSTRE_DESTROY)))
osd_scrub_refresh_mapping(info, dev, fid, lid,
(val == SCRUB_NEXT_OSTOBJ ||
val == SCRUB_NEXT_OSTOBJ_OLD) ?
OI_KNOWN_ON_OST : 0, NULL);
+
up_write(&scrub->os_rwsem);
- if (!IS_ERR(inode))
+ if (!IS_ERR_OR_NULL(inode))
iput(inode);
- if (oii != NULL) {
- spin_lock(&scrub->os_lock);
- if (likely(!list_empty(&oii->oii_list)))
- list_del(&oii->oii_list);
- spin_unlock(&scrub->os_lock);
-
- OBD_FREE_PTR(oii);
- }
-
RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
}
osd_scrub2name(scrub), scrub->os_pos_current, rc);
out:
- while (!list_empty(&scrub->os_inconsistent_items)) {
- struct osd_inconsistent_item *oii;
-
- oii = list_entry(scrub->os_inconsistent_items.next,
- struct osd_inconsistent_item, oii_list);
- list_del_init(&oii->oii_list);
- OBD_FREE_PTR(oii);
- }
-
+ osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
lu_env_fini(&env);
noenv:
spin_unlock(&scrub->os_lock);
scrub_stop(scrub);
mutex_unlock(&dev->od_otable_mutex);
+
+ osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
+ osd_scrub_ois_fini(scrub, &scrub->os_stale_items);
}
/* OI scrub setup/cleanup */
init_rwsem(&scrub->os_rwsem);
spin_lock_init(&scrub->os_lock);
INIT_LIST_HEAD(&scrub->os_inconsistent_items);
+ INIT_LIST_HEAD(&scrub->os_stale_items);
scrub->os_name = osd_name(dev);
scrub->os_auto_scrub_interval = interval;
}
};
-/* high priority inconsistent items list APIs */
-
-#define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60
-
-int osd_oii_insert(struct osd_device *dev, const struct lu_fid *fid,
- struct osd_inode_id *id, int insert)
-{
- struct osd_inconsistent_item *oii;
- struct osd_scrub *oscrub = &dev->od_scrub;
- struct lustre_scrub *lscrub = &oscrub->os_scrub;
- int wakeup = 0;
- ENTRY;
-
- OBD_ALLOC_PTR(oii);
- if (unlikely(oii == NULL))
- RETURN(-ENOMEM);
-
- INIT_LIST_HEAD(&oii->oii_list);
- oii->oii_cache.oic_fid = *fid;
- oii->oii_cache.oic_lid = *id;
- oii->oii_cache.oic_dev = dev;
- oii->oii_insert = insert;
-
- spin_lock(&lscrub->os_lock);
- if (lscrub->os_partial_scan) {
- __u64 now = ktime_get_real_seconds();
-
- /* If there haven't been errors in a long time,
- * decay old count until either the errors are
- * gone or we reach the current interval. */
- while (unlikely(oscrub->os_bad_oimap_count > 0 &&
- oscrub->os_bad_oimap_time +
- SCRUB_BAD_OIMAP_DECAY_INTERVAL < now)) {
- oscrub->os_bad_oimap_count >>= 1;
- oscrub->os_bad_oimap_time +=
- SCRUB_BAD_OIMAP_DECAY_INTERVAL;
- }
-
- oscrub->os_bad_oimap_time = now;
- if (++oscrub->os_bad_oimap_count >
- dev->od_full_scrub_threshold_rate)
- lscrub->os_full_scrub = 1;
- }
-
- if (!lscrub->os_running) {
- spin_unlock(&lscrub->os_lock);
- OBD_FREE_PTR(oii);
- RETURN(-EAGAIN);
- }
-
- if (list_empty(&lscrub->os_inconsistent_items))
- wakeup = 1;
- list_add_tail(&oii->oii_list, &lscrub->os_inconsistent_items);
- spin_unlock(&lscrub->os_lock);
-
- if (wakeup)
- wake_up_var(lscrub);
-
- RETURN(0);
-}
-
-int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
- struct osd_inode_id *id)
-{
- struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct osd_inconsistent_item *oii;
- ENTRY;
-
- spin_lock(&scrub->os_lock);
- list_for_each_entry(oii, &scrub->os_inconsistent_items, oii_list) {
- if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
- *id = oii->oii_cache.oic_lid;
- spin_unlock(&scrub->os_lock);
- RETURN(0);
- }
- }
- spin_unlock(&scrub->os_lock);
-
- RETURN(-ENOENT);
-}
-
void osd_scrub_dump(struct seq_file *m, struct osd_device *dev)
{
struct osd_scrub *scrub = &dev->od_scrub;
}
run_test 1c "LFSCK can find out and repair lost FID-in-dirent"
-test_1d() {
- [ $MDS1_VERSION -lt $(version_code 2.13.57) ] &&
- skip "MDS older than 2.13.57"
- [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
-
- check_mount_and_prep
-
- touch $DIR/$tdir/$tfile
- mkdir $DIR/$tdir/subdir
- $LFS mkdir -i 1 $DIR/$tdir/remotedir
- $LFS path2fid $DIR/$tdir
- ll_decode_linkea $DIR/$tdir/$tfile
- ll_decode_linkea $DIR/$tdir/subdir
- ll_decode_linkea $DIR/$tdir/remotedir
-
- local mntpt=$(facet_mntpt mds1)
-
- # unlink OI files to remove the stale entry
- local saved_opts=$MDS_MOUNT_OPTS
-
- stopall
- mount_fstype mds1 $mntpt
- # increase $tdir FID oid in LMA
- do_facet mds1 "getfattr -d -m trusted.lma -e hex \
- --absolute-names $mntpt/ROOT/$tdir | \
- sed -E 's/0(.{8})$/1\1/' | setfattr --restore=-"
- unmount_fstype mds1 $mntpt
- setupall
-
- # the FID oid in LMA was increased above, and it's not in OI table,
- # run scrub first to generate mapping in OI, so the following namespace
- # check can fix linkea correctly, this is not necessary normally.
- do_facet mds1 $LCTL lfsck_start -M ${MDT_DEV} -t scrub ||
- error "failed to start LFSCK for scrub!"
- wait_update_facet mds1 "$LCTL get_param -n \
- osd-*.$(facet_svc mds1).oi_scrub |
- awk '/^status/ { print \\\$2 }'" "completed" 32 ||
- error "unexpected status"
-
- $START_NAMESPACE -r -A || error "fail to start LFSCK for namespace!"
- wait_update_facet mds1 "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "unexpected status"
- }
- $LFS path2fid $DIR/$tdir
- ll_decode_linkea $DIR/$tdir/$tfile
- ll_decode_linkea $DIR/$tdir/subdir
- ll_decode_linkea $DIR/$tdir/remotedir
-
- local pfid
- local fid
-
- fid=$($LFS path2fid $DIR/$tdir)
- for f in $tfile subdir remotedir; do
- pfid=$(ll_decode_linkea $DIR/$tdir/$f |
- awk '/pfid/ { print $3 }')
- pfid=${pfid%,}
- [ "$pfid" == "$fid" ] || error "$fid in LMA != $pfid in linkea"
- done
-}
-run_test 1d "LFSCK can fix mismatch of FID in LMA and FID in child linkea"
-
test_2a() {
lfsck_prep 1 1
}
run_test 19 "LFSCK can fix multiple linked files on OST"
+test_20() {
+ [ "$mds1_FSTYPE" == "ldiskfs" ] || skip "ldiskfs only test"
+
+ check_mount_and_prep
+
+ rm -rf $DIR/$tdir
+ mkdir_on_mdt0 $DIR/$tdir || error "mkdir $tdir failed"
+ #define OBD_FAIL_OSD_FID_MAPPING 0x193
+ do_facet mds1 $LCTL set_param fail_loc=0x193
+ chmod 757 $DIR/$tdir || error "chmod $tdir failed"
+
+ stop mds1
+ start mds1 $(mdsdevname 1) $MOUNT_OPTS_NOSCRUB
+ scrub_enable_auto
+
+ local sec
+
+ #define OBD_FAIL_OSD_SCRUB_IRREPARABLE 0x19f
+ do_facet mds1 $LCTL set_param fail_loc=0x19f
+ stat $DIR/$tdir && error "stat $tdir should fail"
+ sec=$(scrub_status 1 | awk '/^time_since_latest_start/ { print $2 }')
+ echo "OI scrub was triggerred $sec seconds ago"
+
+ sleep 5
+ # OI mappings are irreparable, which shouldn't trigger scrub again
+ stat $DIR/$tdir && error "stat $tdir should fail"
+ sec=$(scrub_status 1 | awk '/^time_since_latest_start/ { print $2 }')
+ echo "OI scrub was triggerred $sec seconds ago"
+ (( sec >= 5 )) || error "OI scrub retriggered"
+}
+run_test 20 "Don't trigger OI scrub for irreparable oi repeatedly"
+
# restore MDS/OST size
MDSSIZE=${SAVED_MDSSIZE}
OSTSIZE=${SAVED_OSTSIZE}