Whamcloud - gitweb
LU-2059 llog: MGC to use OSD API for backup logs
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_scrub.c
index ed2c27f..98a3b24 100644 (file)
@@ -35,9 +35,6 @@
  * Author: Fan Yong <yong.fan@whamcloud.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <lustre/lustre_idl.h>
@@ -100,7 +97,8 @@ static int osd_scrub_refresh_mapping(struct osd_thread_info *info,
        ENTRY;
 
        fid_cpu_to_be(oi_fid, fid);
-       osd_id_pack(oi_id, id);
+       if (id != NULL)
+               osd_id_pack(oi_id, id);
        jh = ldiskfs_journal_start_sb(osd_sb(dev),
                                      osd_dto_credits_noquota[ops]);
        if (IS_ERR(jh)) {
@@ -519,6 +517,9 @@ iget:
                        sf->sf_items_updated_prior++;
                else
                        sf->sf_items_updated++;
+
+               /* The target has been changed, need to be re-loaded. */
+               lu_object_purge(info->oti_env, osd2lu_dev(dev), fid);
        }
 
        GOTO(out, rc);
@@ -698,7 +699,8 @@ static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
        rc = osd_get_lma(info, inode, &info->oti_obj_dentry, lma);
        if (rc == 0) {
                if (fid_is_llog(&lma->lma_self_fid) ||
-                   (!scrub && fid_is_internal(&lma->lma_self_fid)))
+                   (!scrub && fid_is_internal(&lma->lma_self_fid)) ||
+                   (scrub && (lma->lma_incompat & LMAI_AGENT)))
                        rc = SCRUB_NEXT_CONTINUE;
                else
                        *fid = lma->lma_self_fid;
@@ -972,18 +974,10 @@ static int osd_inode_iteration(struct osd_thread_info *info,
                                brelse(param.bitmap);
                                RETURN(rc);
                        }
-
-                       if (preload && dev->od_otable_it->ooi_stopping) {
-                               brelse(param.bitmap);
-                               RETURN(0);
-                       }
                }
 
 next_group:
                brelse(param.bitmap);
-
-               if (preload && dev->od_otable_it->ooi_stopping)
-                       RETURN(0);
        }
 
        if (*pos > limit)
@@ -1027,8 +1021,7 @@ static int osd_scrub_main(void *args)
        int                   rc;
        ENTRY;
 
-       cfs_daemonize("OI_scrub");
-       rc = lu_env_init(&env, LCT_DT_THREAD);
+       rc = lu_env_init(&env, LCT_LOCAL);
        if (rc != 0) {
                CERROR("%.16s: OI scrub, fail to init env, rc = %d\n",
                       LDISKFS_SB(sb)->s_es->s_volume_name, rc);
@@ -1137,7 +1130,7 @@ static const struct osd_lf_map osd_lf_maps[] = {
                osd_ios_general_scan, osd_ios_varfid_fill },
 
        /* PENDING */
-       { "PENDING", { FID_SEQ_LOCAL_FILE, MDD_ORPHAN_OID, 0 }, 0, NULL, NULL },
+       { "PENDING", { 0, 0, 0 }, 0, NULL, NULL },
 
        /* ROOT */
        { "ROOT", { FID_SEQ_ROOT, 1, 0 },
@@ -1158,8 +1151,7 @@ static const struct osd_lf_map osd_lf_maps[] = {
                NULL, NULL },
 
        /* lfsck_bookmark */
-       { "lfsck_bookmark", { FID_SEQ_LOCAL_FILE, LFSCK_BOOKMARK_OID, 0 }, 0,
-               NULL, NULL },
+       { "lfsck_bookmark", { 0, 0, 0 }, 0, NULL, NULL },
 
        /* lov_objid */
        { LOV_OBJID, { FID_SEQ_LOCAL_FILE, MDD_LOV_OBJ_OID, 0 }, OLF_SHOW_NAME,
@@ -1185,17 +1177,12 @@ static const struct osd_lf_map osd_lf_maps[] = {
        { "seq_srv", { FID_SEQ_LOCAL_FILE, FID_SEQ_SRV_OID, 0 },
                OLF_SHOW_NAME, NULL, NULL },
 
-       /* LAST_GROUP */
-       { "LAST_GROUP", { FID_SEQ_LOCAL_FILE, OFD_LAST_GROUP_OID, 0 },
-               OLF_SHOW_NAME, NULL, NULL },
-
        /* health_check */
        { HEALTH_CHECK, { FID_SEQ_LOCAL_FILE, OFD_HEALTH_CHECK_OID, 0 },
                OLF_SHOW_NAME, NULL, NULL },
 
        /* lfsck_namespace */
-       { "lfsck_namespace", { FID_SEQ_LOCAL_FILE, LFSCK_BOOKMARK_OID, 0 }, 0,
-               NULL, NULL },
+       { "lfsck_namespace", { 0, 0, 0 }, 0, NULL, NULL },
 
        /* OBJECTS, upgrade from old device */
        { OBJECTS, { 0, 0, 0 }, OLF_SCAN_SUBITEMS, osd_ios_OBJECTS_scan, NULL },
@@ -1206,6 +1193,10 @@ static const struct osd_lf_map osd_lf_maps[] = {
        /* lquota_v2.group, upgrade from old device */
        { "lquota_v2.group", { 0, 0, 0 }, 0, NULL, NULL },
 
+       /* LAST_GROUP, upgrade from old device */
+       { "LAST_GROUP", { FID_SEQ_LOCAL_FILE, OFD_LAST_GROUP_OID, 0 },
+               OLF_SHOW_NAME, NULL, NULL },
+
        { NULL, { 0, 0, 0 }, 0, NULL, NULL }
 };
 
@@ -1464,7 +1455,31 @@ osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev,
                        }
                }
        } else {
-               rc = osd_ios_scan_one(info, dev, child->d_inode, NULL, 0);
+               /* For lustre-2.x (x <= 3), the ".lustre" has NO FID-in-LMA,
+                * so the client will get IGIF for the ".lustre" object when
+                * the MDT restart.
+                *
+                * From the OI scrub view, when the MDT upgrade to Lustre-2.4,
+                * it does not know whether there are some old clients cached
+                * the ".lustre" IGIF during the upgrading. Two choices:
+                *
+                * 1) Generate IGIF-in-LMA and IGIF-in-OI for the ".lustre".
+                *    It will allow the old connected clients to access the
+                *    ".lustre" with cached IGIF. But it will cause others
+                *    on the MDT failed to check "fid_is_dot_lustre()".
+                *
+                * 2) Use fixed FID {FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE, 0}
+                *    for ".lustre" in spite of whether there are some clients
+                *    cached the ".lustre" IGIF or not. It enables the check
+                *    "fid_is_dot_lustre()" on the MDT, although it will cause
+                *    that the old connected clients cannot access the ".lustre"
+                *    with the cached IGIF.
+                *
+                * Usually, it is rare case for the old connected clients
+                * to access the ".lustre" with cached IGIF. So we prefer
+                * to the solution 2). */
+               rc = osd_ios_scan_one(info, dev, child->d_inode,
+                                     &LU_DOT_LUSTRE_FID, 0);
                dput(child);
        }
 
@@ -1507,11 +1522,12 @@ osd_ios_OBJECTS_scan(struct osd_thread_info *info, struct osd_device *dev,
 static int osd_initial_OI_scrub(struct osd_thread_info *info,
                                struct osd_device *dev)
 {
-       struct osd_ios_item *item    = NULL;
-       scandir_t            scandir = osd_ios_general_scan;
-       filldir_t            filldir = osd_ios_root_fill;
-       struct dentry       *dentry  = osd_sb(dev)->s_root;
-       int                  rc;
+       struct osd_ios_item     *item    = NULL;
+       scandir_t                scandir = osd_ios_general_scan;
+       filldir_t                filldir = osd_ios_root_fill;
+       struct dentry           *dentry  = osd_sb(dev)->s_root;
+       const struct osd_lf_map *map     = osd_lf_maps;
+       int                      rc;
        ENTRY;
 
        while (1) {
@@ -1545,7 +1561,32 @@ static int osd_initial_OI_scrub(struct osd_thread_info *info,
                OBD_FREE_PTR(item);
        }
 
-       RETURN(rc);
+       if (rc != 0)
+               RETURN(rc);
+
+       /* There maybe the case that the object has been removed, but its OI
+        * mapping is still in the OI file, such as the "CATALOGS" after MDT
+        * file-level backup/restore. So here cleanup the stale OI mappings. */
+       while (map->olm_name != NULL) {
+               struct dentry *child;
+
+               if (fid_is_zero(&map->olm_fid)) {
+                       map++;
+                       continue;
+               }
+
+               child = osd_ios_lookup_one_len(map->olm_name,
+                                              osd_sb(dev)->s_root,
+                                              strlen(map->olm_name));
+               if (!IS_ERR(child))
+                       dput(child);
+               else if (PTR_ERR(child) == -ENOENT)
+                       osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
+                                                 NULL, DTO_INDEX_DELETE);
+               map++;
+       }
+
+       RETURN(0);
 }
 
 char *osd_lf_fid2name(const struct lu_fid *fid)
@@ -1597,8 +1638,8 @@ again:
 
        scrub->os_start_flags = flags;
        thread_set_flags(thread, 0);
-       rc = cfs_create_thread(osd_scrub_main, dev, 0);
-       if (rc < 0) {
+       rc = PTR_ERR(kthread_run(osd_scrub_main, dev, "OI_scrub"));
+       if (IS_ERR_VALUE(rc)) {
                CERROR("%.16s: cannot start iteration thread, rc = %d\n",
                       LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc);
                RETURN(rc);
@@ -1755,6 +1796,13 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
                        rc = osd_scrub_start(dev);
        }
 
+       /* it is possible that dcache entries may keep objects after they are
+        * deleted by OSD. While it looks safe this can cause object data to
+        * stay until umount causing failures in tests calculating free space,
+        * e.g. replay-ost-single. Since those dcache entries are not used
+        * anymore let's just free them after use here */
+       shrink_dcache_sb(sb);
+
        RETURN(rc);
 }
 
@@ -1799,7 +1847,6 @@ static struct dt_it *osd_otable_it_init(const struct lu_env *env,
 
        dev->od_otable_it = it;
        it->ooi_dev = dev;
-       it->ooi_pid = cfs_curproc_pid();
        it->ooi_cache.ooc_consumer_idx = -1;
        if (flags & DOIF_OUTUSED)
                it->ooi_used_outside = 1;
@@ -1851,30 +1898,8 @@ static int osd_otable_it_get(const struct lu_env *env,
        return 0;
 }
 
-/**
- * It is hack here:
- *
- * Sometimes the otable-based iteration driver (LFSCK) may be blocked in OSD
- * layer when someone wants to stop/pause the iteration. Under such case, we
- * need some mechanism to notify the event and wakeup the blocker.
- */
 static void osd_otable_it_put(const struct lu_env *env, struct dt_it *di)
 {
-       struct osd_otable_it *it  = (struct osd_otable_it *)di;
-       struct osd_device    *dev = it->ooi_dev;
-
-       /* od_otable_mutex: prevent curcurrent init/fini */
-       mutex_lock(&dev->od_otable_mutex);
-       if (it->ooi_pid == cfs_curproc_pid()) {
-               dev->od_scrub.os_paused = 1;
-       } else {
-               struct ptlrpc_thread *thread = &dev->od_scrub.os_thread;
-
-               it->ooi_stopping = 1;
-               if (it->ooi_waiting)
-                       cfs_waitq_broadcast(&thread->t_ctl_waitq);
-       }
-       mutex_unlock(&dev->od_otable_mutex);
 }
 
 static inline int
@@ -1882,7 +1907,7 @@ osd_otable_it_wakeup(struct osd_scrub *scrub, struct osd_otable_it *it)
 {
        spin_lock(&scrub->os_lock);
        if (it->ooi_cache.ooc_pos_preload < scrub->os_pos_current ||
-           scrub->os_waiting || it->ooi_stopping ||
+           scrub->os_waiting ||
            !thread_is_running(&scrub->os_thread))
                it->ooi_waiting = 0;
        else
@@ -1938,9 +1963,6 @@ again:
        if (!thread_is_running(thread) && !it->ooi_used_outside)
                RETURN(1);
 
-       if (it->ooi_stopping)
-               RETURN(0);
-
        rc = osd_otable_it_preload(env, it);
        if (rc >= 0)
                goto again;