Whamcloud - gitweb
LU-17040 scrub: inconsistent item 97/51997/7
authorAlexander Boyko <alexander.boyko@hpe.com>
Thu, 17 Aug 2023 10:03:07 +0000 (06:03 -0400)
committerOleg Drokin <green@whamcloud.com>
Wed, 25 Oct 2023 18:03:26 +0000 (18:03 +0000)
When OI does not include the fid, scrub will attempt to
fix it with zero inode number. There is
low chance that fid would be found during full inode
scan. But inode scan requires an empty inconsistent
list. With repeated EINPROGRESS replies, inconsistent list is
always not empty.

Move fid with zero inode numbers to stale list.

1 scrub fix to print real OI resurect and
skip not related
2 out_handle debug for dt_locate() fid failed
3 debug for out requests when it was interrupted

HPE-bug-id: LUS-10780
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: Iad9e9cba90b4648eb0fe8fa6c99984ada60fde70
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51997
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_scrub.c
lustre/osp/osp_trans.c
lustre/target/out_handler.c

index 7f50f3d..3051ab1 100644 (file)
@@ -891,7 +891,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
 void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev);
 int osd_scrub_oi_insert(struct osd_device *dev, const struct lu_fid *fid,
                         struct osd_inode_id *id, int insert);
-void osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
+bool osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
                            const struct lu_fid *fid);
 void osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
 
index f16376f..f99798c 100644 (file)
@@ -138,23 +138,27 @@ static inline void osd_scrub_oi_mark_stale(struct lustre_scrub *scrub,
 /* OI of \a fid may be marked stale, and if its mapping is scrubbed, remove it
  * from os_stale_items list.
  */
-void osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
+bool osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
                            const struct lu_fid *fid)
 {
        struct osd_inconsistent_item *oii;
+       bool resurrected = false;
 
        if (list_empty(&scrub->os_stale_items))
-               return;
+               return resurrected;
 
        spin_lock(&scrub->os_lock);
        list_for_each_entry(oii, &scrub->os_stale_items, oii_list) {
                if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
                        list_del(&oii->oii_list);
                        OBD_FREE_PTR(oii);
+                       resurrected = true;
                        break;
                }
        }
        spin_unlock(&scrub->os_lock);
+
+       return resurrected;
 }
 
 static void osd_scrub_ois_fini(struct lustre_scrub *scrub,
@@ -317,7 +321,9 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
        }
 
        if (lid->oii_ino < sf->sf_pos_latest_start && !oii)
-               GOTO(out, rc = 0);
+               GOTO(skip, rc = 0);
+       if (lid->oii_ino < LDISKFS_FIRST_INO(osd_sb(dev)))
+               GOTO(out, rc = -ENOENT);
 
        if (fid_is_igif(fid))
                sf->sf_items_igif++;
@@ -355,7 +361,7 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
                        GOTO(out, rc);
 
                if (bad_inode)
-                       GOTO(out, rc = 0);
+                       GOTO(skip, rc = 0);
 
                if (val == SCRUB_NEXT_OSTOBJ)
                        sf->sf_flags |= SF_INCONSISTENT;
@@ -385,7 +391,7 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
 
                /* if new inode is bad, keep existing mapping */
                if (bad_inode)
-                       GOTO(out, rc = 0);
+                       GOTO(skip, rc = 0);
 
                /* verify existing mapping */
                inode2 = osd_iget(info, dev, lid2);
@@ -412,7 +418,7 @@ osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
                      inode->i_mtime.tv_sec == inode2->i_mtime.tv_sec) ||
                     inode->i_mtime.tv_sec < inode2->i_mtime.tv_sec)) {
                        iput(inode2);
-                       GOTO(out, rc);
+                       GOTO(skip, rc);
                }
                iput(inode2);
 delete:
@@ -451,38 +457,41 @@ delete:
 out:
        if (rc < 0) {
                sf->sf_items_failed++;
-               if (sf->sf_pos_first_inconsistent == 0 ||
-                   sf->sf_pos_first_inconsistent > lid->oii_ino)
+               if (lid->oii_ino >= LDISKFS_FIRST_INO(osd_sb(dev)) &&
+                   (sf->sf_pos_first_inconsistent == 0 ||
+                   sf->sf_pos_first_inconsistent > lid->oii_ino))
                        sf->sf_pos_first_inconsistent = lid->oii_ino;
-               if (oii) {
-                       osd_scrub_oi_mark_stale(scrub, oii);
-                       CDEBUG(D_LFSCK,
-                              "%s: fix inconsistent OI "DFID" -> %u/%u failed: %d\n",
-                              osd_dev2name(dev), PFID(fid), lid->oii_ino,
-                              lid->oii_gen, rc);
-               }
        } else {
                if (!oii && !CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE)) {
-                       osd_scrub_oi_resurrect(scrub, fid);
-                       CDEBUG(D_LFSCK,
-                              "%s: resurrect OI "DFID" -> %u/%u\n",
-                              osd_dev2name(dev), PFID(fid), lid->oii_ino,
-                              lid->oii_gen);
+                       if (osd_scrub_oi_resurrect(scrub, fid))
+                               CDEBUG(D_LFSCK,
+                                      "%s: resurrect OI "DFID" -> %u/%u\n",
+                                      osd_dev2name(dev), PFID(fid),
+                                      lid->oii_ino, lid->oii_gen);
                } else if (oii) {
                        /* release fixed inconsistent item */
                        CDEBUG(D_LFSCK,
                               "%s: inconsistent OI "DFID" -> %u/%u %s\n",
                               osd_dev2name(dev), PFID(fid), lid->oii_ino,
-                              lid->oii_gen, ops == DTO_INDEX_DELETE ?
-                              "deleted" : "fixed");
+                              lid->oii_gen, bad_inode ? "deleted" : "fixed");
                        spin_lock(&scrub->os_lock);
                        list_del_init(&oii->oii_list);
                        spin_unlock(&scrub->os_lock);
 
                        OBD_FREE_PTR(oii);
+                       oii = NULL;
                }
                rc = 0;
        }
+skip:
+       if (oii) {
+               /* something strange with item, moving to stale */
+               osd_scrub_oi_mark_stale(scrub, oii);
+               CDEBUG(D_LFSCK,
+                      "%s: fix inconsistent OI "DFID" -> %u/%u failed: %d\n",
+                      osd_dev2name(dev), PFID(fid), lid->oii_ino,
+                      lid->oii_gen, rc);
+       }
        up_write(&scrub->os_rwsem);
 
        if (!IS_ERR_OR_NULL(inode))
index e06b21a..dba32bc 100644 (file)
@@ -634,6 +634,13 @@ static int osp_update_interpret(const struct lu_env *env,
        if (env == NULL)
                env = oaua->oaua_update_env;
 
+       if (req->rq_intr && req->rq_nr_resend != 0) {
+               struct osp_update_request_sub   *ours;
+               DEBUG_REQ(D_HA, req, "dumping out request\n");
+               list_for_each_entry(ours, &our->our_req_list, ours_list) {
+                       object_update_request_dump(ours->ours_req, D_HA);
+               }
+       }
        oaua->oaua_update = NULL;
        oth = our->our_th;
        if (oaua->oaua_flow_control) {
index ca215a3..7958a0d 100644 (file)
@@ -1113,9 +1113,16 @@ int out_handle(struct tgt_session_info *tsi)
 
                        dt_obj = dt_locate_at(env, dt, &update->ou_fid,
                                dt->dd_lu_dev.ld_site->ls_top_dev, &conf);
-                       if (IS_ERR(dt_obj))
-                               GOTO(out, rc = PTR_ERR(dt_obj));
-
+                       if (IS_ERR(dt_obj)) {
+                               rc = PTR_ERR(dt_obj);
+                               CDEBUG(D_HA,
+                                      "%s: opc: 0x%x locate error fid"\
+                                      DFID": rc = %d\n",
+                                      tgt_name(tsi->tsi_tgt),
+                                      update->ou_type,
+                                      PFID(&update->ou_fid), rc);
+                               GOTO(out, rc);
+                       }
                        if (dt->dd_record_fid_accessed) {
                                struct lfsck_req_local *lrl = &tti->tti_lrl;