#define OSD_OTABLE_MAX_HASH 0x00000000ffffffffULL
+/* high priority inconsistent items list APIs */
+#define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60
+
+/**
+ * Add mapping into scrub.os_inconsistent_item list, and the OI scrub thread
+ * will fix them in priority.
+ */
+int osd_scrub_oi_insert(struct osd_device *dev, const struct lu_fid *fid,
+ struct osd_inode_id *id, int insert)
+{
+ struct osd_inconsistent_item *oii;
+ struct osd_scrub *oscrub = &dev->od_scrub;
+ struct lustre_scrub *lscrub = &oscrub->os_scrub;
+ int wakeup = 0;
+
+ ENTRY;
+
+ OBD_ALLOC_PTR(oii);
+ if (unlikely(oii == NULL))
+ RETURN(-ENOMEM);
+
+ INIT_LIST_HEAD(&oii->oii_list);
+ oii->oii_cache.oic_fid = *fid;
+ oii->oii_cache.oic_lid = *id;
+ oii->oii_cache.oic_dev = dev;
+ oii->oii_insert = insert;
+
+ spin_lock(&lscrub->os_lock);
+ if (lscrub->os_partial_scan) {
+ __u64 now = ktime_get_real_seconds();
+
+ /* If there haven't been errors in a long time,
+ * decay old count until either the errors are
+ * gone or we reach the current interval.
+ */
+ while (unlikely(oscrub->os_bad_oimap_count > 0 &&
+ oscrub->os_bad_oimap_time +
+ SCRUB_BAD_OIMAP_DECAY_INTERVAL < now)) {
+ oscrub->os_bad_oimap_count >>= 1;
+ oscrub->os_bad_oimap_time +=
+ SCRUB_BAD_OIMAP_DECAY_INTERVAL;
+ }
+
+ oscrub->os_bad_oimap_time = now;
+ if (++oscrub->os_bad_oimap_count >
+ dev->od_full_scrub_threshold_rate)
+ lscrub->os_full_scrub = 1;
+ }
+
+ if (list_empty(&lscrub->os_inconsistent_items)) {
+ wakeup = 1;
+ } else {
+ struct osd_inconsistent_item *tmp;
+
+ list_for_each_entry(tmp, &lscrub->os_inconsistent_items,
+ oii_list) {
+ if (lu_fid_eq(fid, &tmp->oii_cache.oic_fid)) {
+ spin_unlock(&lscrub->os_lock);
+ OBD_FREE_PTR(oii);
+ RETURN(0);
+ }
+ }
+ }
+
+ list_add_tail(&oii->oii_list, &lscrub->os_inconsistent_items);
+ spin_unlock(&lscrub->os_lock);
+
+ if (wakeup)
+ wake_up_var(lscrub);
+
+ RETURN(0);
+}
+
+/* if item could not be repaired, add it to the os_stale_items list to avoid
+ * triggering scrub repeatedly.
+ */
+static inline void osd_scrub_oi_mark_stale(struct lustre_scrub *scrub,
+ struct osd_inconsistent_item *oii)
+{
+ spin_lock(&scrub->os_lock);
+ list_move_tail(&oii->oii_list, &scrub->os_stale_items);
+ spin_unlock(&scrub->os_lock);
+}
+
+/* OI of \a fid may be marked stale, and if its mapping is scrubbed, remove it
+ * from os_stale_items list.
+ */
+void osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
+ const struct lu_fid *fid)
+{
+ struct osd_inconsistent_item *oii;
+
+ if (list_empty(&scrub->os_stale_items))
+ return;
+
+ spin_lock(&scrub->os_lock);
+ list_for_each_entry(oii, &scrub->os_stale_items, oii_list) {
+ if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
+ list_del(&oii->oii_list);
+ OBD_FREE_PTR(oii);
+ break;
+ }
+ }
+ spin_unlock(&scrub->os_lock);
+}
+
+static void osd_scrub_ois_fini(struct lustre_scrub *scrub,
+ struct list_head *list)
+{
+ struct osd_inconsistent_item *oii;
+ struct osd_inconsistent_item *tmp;
+
+ spin_lock(&scrub->os_lock);
+ list_for_each_entry_safe(oii, tmp, list, oii_list) {
+ list_del(&oii->oii_list);
+ OBD_FREE_PTR(oii);
+ }
+ spin_unlock(&scrub->os_lock);
+}
+
static inline int osd_scrub_has_window(struct lustre_scrub *scrub,
struct osd_otable_cache *ooc)
{
if (val < 0)
GOTO(out, rc = val);
- if (scrub->os_in_prior)
+ if (scrub->os_in_prior) {
oii = list_entry(oic, struct osd_inconsistent_item,
oii_cache);
+ if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE))
+ GOTO(out, rc = -ESTALE);
+ }
if (lid->oii_ino < sf->sf_pos_latest_start && oii == NULL)
GOTO(out, rc = 0);
if (sf->sf_pos_first_inconsistent == 0 ||
sf->sf_pos_first_inconsistent > lid->oii_ino)
sf->sf_pos_first_inconsistent = lid->oii_ino;
+ if (oii) {
+ osd_scrub_oi_mark_stale(scrub, oii);
+ CDEBUG(D_LFSCK,
+ "%s: fix inconsistent OI "DFID" -> %u/%u failed: %d\n",
+ osd_dev2name(dev), PFID(fid), lid->oii_ino,
+ lid->oii_gen, rc);
+ }
} else {
+ if (!oii && !OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE)) {
+ osd_scrub_oi_resurrect(scrub, fid);
+ CDEBUG(D_LFSCK,
+ "%s: resurrect OI "DFID" -> %u/%u\n",
+ osd_dev2name(dev), PFID(fid), lid->oii_ino,
+ lid->oii_gen);
+ } else if (oii) {
+ /* release fixed inconsistent item */
+ CDEBUG(D_LFSCK,
+ "%s: inconsistent OI "DFID" -> %u/%u fixed\n",
+ osd_dev2name(dev), PFID(fid), lid->oii_ino,
+ lid->oii_gen);
+ spin_lock(&scrub->os_lock);
+ list_del_init(&oii->oii_list);
+ spin_unlock(&scrub->os_lock);
+
+ OBD_FREE_PTR(oii);
+ }
rc = 0;
}
/* There may be conflict unlink during the OI scrub,
* if happend, then remove the new added OI mapping. */
- if (ops == DTO_INDEX_INSERT && inode != NULL && !IS_ERR(inode) &&
+ if (ops == DTO_INDEX_INSERT && !IS_ERR_OR_NULL(inode) &&
unlikely(ldiskfs_test_inode_state(inode,
LDISKFS_STATE_LUSTRE_DESTROY)))
osd_scrub_refresh_mapping(info, dev, fid, lid,
(val == SCRUB_NEXT_OSTOBJ ||
val == SCRUB_NEXT_OSTOBJ_OLD) ?
OI_KNOWN_ON_OST : 0, NULL);
+
up_write(&scrub->os_rwsem);
- if (!IS_ERR(inode))
+ if (!IS_ERR_OR_NULL(inode))
iput(inode);
- if (oii != NULL) {
- spin_lock(&scrub->os_lock);
- if (likely(!list_empty(&oii->oii_list)))
- list_del(&oii->oii_list);
- spin_unlock(&scrub->os_lock);
-
- OBD_FREE_PTR(oii);
- }
-
RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
}
osd_scrub2name(scrub), scrub->os_pos_current, rc);
out:
- while (!list_empty(&scrub->os_inconsistent_items)) {
- struct osd_inconsistent_item *oii;
-
- oii = list_entry(scrub->os_inconsistent_items.next,
- struct osd_inconsistent_item, oii_list);
- list_del_init(&oii->oii_list);
- OBD_FREE_PTR(oii);
- }
-
+ osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
lu_env_fini(&env);
noenv:
spin_unlock(&scrub->os_lock);
scrub_stop(scrub);
mutex_unlock(&dev->od_otable_mutex);
+
+ osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
+ osd_scrub_ois_fini(scrub, &scrub->os_stale_items);
}
/* OI scrub setup/cleanup */
init_rwsem(&scrub->os_rwsem);
spin_lock_init(&scrub->os_lock);
INIT_LIST_HEAD(&scrub->os_inconsistent_items);
+ INIT_LIST_HEAD(&scrub->os_stale_items);
scrub->os_name = osd_name(dev);
scrub->os_auto_scrub_interval = interval;
}
};
-/* high priority inconsistent items list APIs */
-
-#define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60
-
-int osd_oii_insert(struct osd_device *dev, const struct lu_fid *fid,
- struct osd_inode_id *id, int insert)
-{
- struct osd_inconsistent_item *oii;
- struct osd_scrub *oscrub = &dev->od_scrub;
- struct lustre_scrub *lscrub = &oscrub->os_scrub;
- int wakeup = 0;
- ENTRY;
-
- OBD_ALLOC_PTR(oii);
- if (unlikely(oii == NULL))
- RETURN(-ENOMEM);
-
- INIT_LIST_HEAD(&oii->oii_list);
- oii->oii_cache.oic_fid = *fid;
- oii->oii_cache.oic_lid = *id;
- oii->oii_cache.oic_dev = dev;
- oii->oii_insert = insert;
-
- spin_lock(&lscrub->os_lock);
- if (lscrub->os_partial_scan) {
- __u64 now = ktime_get_real_seconds();
-
- /* If there haven't been errors in a long time,
- * decay old count until either the errors are
- * gone or we reach the current interval. */
- while (unlikely(oscrub->os_bad_oimap_count > 0 &&
- oscrub->os_bad_oimap_time +
- SCRUB_BAD_OIMAP_DECAY_INTERVAL < now)) {
- oscrub->os_bad_oimap_count >>= 1;
- oscrub->os_bad_oimap_time +=
- SCRUB_BAD_OIMAP_DECAY_INTERVAL;
- }
-
- oscrub->os_bad_oimap_time = now;
- if (++oscrub->os_bad_oimap_count >
- dev->od_full_scrub_threshold_rate)
- lscrub->os_full_scrub = 1;
- }
-
- if (!lscrub->os_running) {
- spin_unlock(&lscrub->os_lock);
- OBD_FREE_PTR(oii);
- RETURN(-EAGAIN);
- }
-
- if (list_empty(&lscrub->os_inconsistent_items))
- wakeup = 1;
- list_add_tail(&oii->oii_list, &lscrub->os_inconsistent_items);
- spin_unlock(&lscrub->os_lock);
-
- if (wakeup)
- wake_up_var(lscrub);
-
- RETURN(0);
-}
-
-int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
- struct osd_inode_id *id)
-{
- struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
- struct osd_inconsistent_item *oii;
- ENTRY;
-
- spin_lock(&scrub->os_lock);
- list_for_each_entry(oii, &scrub->os_inconsistent_items, oii_list) {
- if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
- *id = oii->oii_cache.oic_lid;
- spin_unlock(&scrub->os_lock);
- RETURN(0);
- }
- }
- spin_unlock(&scrub->os_lock);
-
- RETURN(-ENOENT);
-}
-
void osd_scrub_dump(struct seq_file *m, struct osd_device *dev)
{
struct osd_scrub *scrub = &dev->od_scrub;