Whamcloud - gitweb
LU-14687 llite: Return errors for aio
[fs/lustre-release.git] / lustre / osd-zfs / osd_scrub.c
index dd0eddd..f8fb843 100644 (file)
 #define OSD_OTABLE_MAX_HASH            ((1ULL << 48) - 1)
 #define OTABLE_PREFETCH                        256
 
-#define DTO_INDEX_INSERT               1
-#define DTO_INDEX_DELETE               2
-#define DTO_INDEX_UPDATE               3
-
 static inline bool osd_scrub_has_window(struct osd_otable_it *it)
 {
        return it->ooi_prefetched < OTABLE_PREFETCH;
@@ -71,11 +67,11 @@ static inline bool osd_scrub_has_window(struct osd_otable_it *it)
  * \retval   0, changed successfully
  * \retval -ve, on error
  */
-static int osd_scrub_refresh_mapping(const struct lu_env *env,
-                                    struct osd_device *dev,
-                                    const struct lu_fid *fid,
-                                    uint64_t oid, int ops,
-                                    bool force, const char *name)
+int osd_scrub_refresh_mapping(const struct lu_env *env,
+                             struct osd_device *dev,
+                             const struct lu_fid *fid,
+                             uint64_t oid, enum dt_txn_op ops,
+                             bool force, const char *name)
 {
        struct osd_thread_info *info = osd_oti_get(env);
        struct zpl_direntry *zde = &info->oti_zde.lzd_reg;
@@ -196,7 +192,10 @@ zget:
                        GOTO(out, rc);
                }
 
+               spin_lock(&scrub->os_lock);
                scrub->os_full_speed = 1;
+               spin_unlock(&scrub->os_lock);
+
                sf->sf_flags |= SF_INCONSISTENT;
        } else if (oid == oid2) {
                GOTO(out, rc = 0);
@@ -227,7 +226,9 @@ zget:
                }
 
 update:
+               spin_lock(&scrub->os_lock);
                scrub->os_full_speed = 1;
+               spin_unlock(&scrub->os_lock);
                sf->sf_flags |= SF_INCONSISTENT;
        }
 
@@ -278,7 +279,6 @@ out:
 static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
 {
        struct lustre_scrub *scrub = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
        struct scrub_file *sf = &scrub->os_file;
        __u32 flags = scrub->os_start_flags;
        int rc;
@@ -304,6 +304,7 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
        if (flags & SS_RESET)
                scrub_file_reset(scrub, dev->od_uuid, 0);
 
+       spin_lock(&scrub->os_lock);
        scrub->os_partial_scan = 0;
        if (flags & SS_AUTO_FULL) {
                scrub->os_full_speed = 1;
@@ -315,7 +316,6 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
                scrub->os_full_speed = 0;
        }
 
-       spin_lock(&scrub->os_lock);
        scrub->os_in_prior = 0;
        scrub->os_waiting = 0;
        scrub->os_paused = 0;
@@ -332,15 +332,15 @@ static int osd_scrub_prep(const struct lu_env *env, struct osd_device *dev)
 
        scrub->os_pos_current = sf->sf_pos_latest_start;
        sf->sf_status = SS_SCANNING;
-       sf->sf_time_latest_start = cfs_time_current_sec();
+       sf->sf_time_latest_start = ktime_get_real_seconds();
        sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
        sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1;
        rc = scrub_file_store(env, scrub);
        if (!rc) {
                spin_lock(&scrub->os_lock);
-               thread_set_flags(thread, SVC_RUNNING);
+               scrub->os_running = 1;
                spin_unlock(&scrub->os_lock);
-               wake_up_all(&thread->t_ctl_waitq);
+               wake_up_var(scrub);
        }
        up_write(&scrub->os_rwsem);
 
@@ -360,14 +360,14 @@ static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev,
 
        down_write(&scrub->os_rwsem);
        spin_lock(&scrub->os_lock);
-       thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+       scrub->os_running = 0;
        spin_unlock(&scrub->os_lock);
        if (scrub->os_new_checked > 0) {
                sf->sf_items_checked += scrub->os_new_checked;
                scrub->os_new_checked = 0;
                sf->sf_pos_last_checkpoint = scrub->os_pos_current;
        }
-       sf->sf_time_last_checkpoint = cfs_time_current_sec();
+       sf->sf_time_last_checkpoint = ktime_get_real_seconds();
        if (result > 0) {
                sf->sf_status = SS_COMPLETED;
                if (!(sf->sf_param & SP_DRYRUN)) {
@@ -385,8 +385,9 @@ static int osd_scrub_post(const struct lu_env *env, struct osd_device *dev,
        } else {
                sf->sf_status = SS_FAILED;
        }
-       sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC -
-                                           scrub->os_time_last_checkpoint);
+       sf->sf_run_time += ktime_get_seconds() -
+                          scrub->os_time_last_checkpoint;
+
        rc = scrub_file_store(env, scrub);
        up_write(&scrub->os_rwsem);
 
@@ -401,7 +402,7 @@ osd_scrub_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
        spin_lock(&scrub->os_lock);
        if (osd_scrub_has_window(it) ||
            !list_empty(&scrub->os_inconsistent_items) ||
-           it->ooi_waiting || !thread_is_running(&scrub->os_thread))
+           it->ooi_waiting || kthread_should_stop())
                scrub->os_waiting = 0;
        else
                scrub->os_waiting = 1;
@@ -413,9 +414,7 @@ osd_scrub_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
 static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev,
                          struct lu_fid *fid, uint64_t *oid)
 {
-       struct l_wait_info lwi = { 0 };
        struct lustre_scrub *scrub = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
        struct osd_otable_it *it = dev->od_otable_it;
        struct lustre_mdt_attrs *lma = NULL;
        nvlist_t *nvbuf = NULL;
@@ -424,20 +423,19 @@ static int osd_scrub_next(const struct lu_env *env, struct osd_device *dev,
        ENTRY;
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0) {
-               lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val), NULL, NULL);
-               if (likely(lwi.lwi_timeout > 0)) {
-                       l_wait_event(thread->t_ctl_waitq,
-                               !list_empty(&scrub->os_inconsistent_items) ||
-                               !thread_is_running(thread),
-                               &lwi);
-                       if (unlikely(!thread_is_running(thread)))
-                               RETURN(SCRUB_NEXT_EXIT);
-               }
+               wait_var_event_timeout(
+                       scrub,
+                       !list_empty(&scrub->os_inconsistent_items) ||
+                       kthread_should_stop(),
+                       cfs_time_seconds(cfs_fail_val));
+
+               if (kthread_should_stop())
+                       RETURN(SCRUB_NEXT_EXIT);
        }
 
        if (OBD_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
                spin_lock(&scrub->os_lock);
-               thread_set_flags(thread, SVC_STOPPING);
+               scrub->os_running = 0;
                spin_unlock(&scrub->os_lock);
                RETURN(SCRUB_NEXT_CRASH);
        }
@@ -469,14 +467,10 @@ again:
                spin_unlock(&scrub->os_lock);
        }
 
-       if (!scrub->os_full_speed && !osd_scrub_has_window(it)) {
-               memset(&lwi, 0, sizeof(lwi));
-               l_wait_event(thread->t_ctl_waitq,
-                            osd_scrub_wakeup(scrub, it),
-                            &lwi);
-       }
+       if (!scrub->os_full_speed && !osd_scrub_has_window(it))
+               wait_var_event(scrub, osd_scrub_wakeup(scrub, it));
 
-       if (unlikely(!thread_is_running(thread)))
+       if (kthread_should_stop())
                GOTO(out, rc = SCRUB_NEXT_EXIT);
 
        rc = -dmu_object_next(dev->od_os, &scrub->os_pos_current, B_FALSE, 0);
@@ -509,7 +503,7 @@ again:
                it->ooi_prefetched++;
                if (it->ooi_waiting) {
                        it->ooi_waiting = 0;
-                       wake_up_all(&thread->t_ctl_waitq);
+                       wake_up_var(scrub);
                }
                spin_unlock(&scrub->os_lock);
        }
@@ -527,7 +521,6 @@ static int osd_scrub_exec(const struct lu_env *env, struct osd_device *dev,
                          const struct lu_fid *fid, uint64_t oid, int rc)
 {
        struct lustre_scrub *scrub = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
        struct osd_otable_it *it = dev->od_otable_it;
 
        rc = osd_scrub_check_update(env, dev, fid, oid, rc);
@@ -537,12 +530,14 @@ static int osd_scrub_exec(const struct lu_env *env, struct osd_device *dev,
                        it->ooi_prefetched++;
                        if (it->ooi_waiting) {
                                it->ooi_waiting = 0;
-                               wake_up_all(&thread->t_ctl_waitq);
+                               wake_up_var(scrub);
                        }
                        spin_unlock(&scrub->os_lock);
                }
        } else {
+               spin_lock(&scrub->os_lock);
                scrub->os_in_prior = 0;
+               spin_unlock(&scrub->os_lock);
        }
 
        if (rc)
@@ -563,7 +558,6 @@ static int osd_scrub_main(void *args)
        struct lu_env env;
        struct osd_device *dev = (struct osd_device *)args;
        struct lustre_scrub *scrub = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
        struct lu_fid *fid;
        uint64_t oid;
        int rc = 0;
@@ -584,13 +578,13 @@ static int osd_scrub_main(void *args)
        }
 
        if (!scrub->os_full_speed) {
-               struct l_wait_info lwi = { 0 };
                struct osd_otable_it *it = dev->od_otable_it;
 
-               l_wait_event(thread->t_ctl_waitq,
-                            it->ooi_user_ready || !thread_is_running(thread),
-                            &lwi);
-               if (unlikely(!thread_is_running(thread)))
+               wait_var_event(scrub,
+                              it->ooi_user_ready ||
+                              kthread_should_stop());
+
+               if (kthread_should_stop())
                        GOTO(post, rc = 0);
 
                scrub->os_pos_current = it->ooi_pos;
@@ -601,14 +595,14 @@ static int osd_scrub_main(void *args)
               scrub->os_pos_current);
 
        fid = &osd_oti_get(&env)->oti_fid;
-       while (!rc && thread_is_running(thread)) {
+       while (!rc && !kthread_should_stop()) {
                rc = osd_scrub_next(&env, dev, fid, &oid);
                switch (rc) {
                case SCRUB_NEXT_EXIT:
                        GOTO(post, rc = 0);
                case SCRUB_NEXT_CRASH:
                        spin_lock(&scrub->os_lock);
-                       thread_set_flags(&scrub->os_thread, SVC_STOPPING);
+                       scrub->os_running = 0;
                        spin_unlock(&scrub->os_lock);
                        GOTO(out, rc = -EINVAL);
                case SCRUB_NEXT_FATAL:
@@ -641,9 +635,12 @@ out:
 
 noenv:
        spin_lock(&scrub->os_lock);
-       thread_set_flags(thread, SVC_STOPPED);
-       wake_up_all(&thread->t_ctl_waitq);
+       scrub->os_running = 0;
        spin_unlock(&scrub->os_lock);
+       if (xchg(&scrub->os_task, NULL) == NULL)
+               /* scrub_stop is waiting, we need to synchronize */
+               wait_var_event(scrub, kthread_should_stop());
+       wake_up_var(scrub);
        return rc;
 }
 
@@ -700,7 +697,7 @@ static const struct osd_lf_map osd_lf_maps[] = {
 
        /* PENDING */
        {
-               .olm_name               = "PENDING",
+               .olm_name               = MDT_ORPHAN_DIR,
        },
 
        /* ROOT */
@@ -1272,8 +1269,7 @@ static int osd_ios_ROOT_sd(const struct lu_env *env, struct osd_device *dev,
                                    sizeof(*zde) / 8, (void *)zde);
                if (rc) {
                        if (rc != -ENOENT)
-                               CWARN("%s: initial OI scrub failed to find"
-                                     "the entry %s under .lustre: rc = %d\n",
+                               CWARN("%s: initial OI scrub failed to find the entry %s under .lustre: rc = %d\n",
                                      osd_name(dev), map->olm_name, rc);
                        else if (!fid_is_zero(&map->olm_fid))
                                /* Try to remove the stale OI mapping. */
@@ -1386,14 +1382,16 @@ int osd_scrub_start(const struct lu_env *env, struct osd_device *dev,
        RETURN(rc == -EALREADY ? 0 : rc);
 }
 
-static void osd_scrub_stop(struct osd_device *dev)
+void osd_scrub_stop(struct osd_device *dev)
 {
        struct lustre_scrub *scrub = &dev->od_scrub;
        ENTRY;
 
        /* od_otable_sem: prevent concurrent start/stop */
        down(&dev->od_otable_sem);
+       spin_lock(&scrub->os_lock);
        scrub->os_paused = 1;
+       spin_unlock(&scrub->os_lock);
        scrub_stop(scrub);
        up(&dev->od_otable_sem);
 
@@ -1404,7 +1402,8 @@ static void osd_scrub_stop(struct osd_device *dev)
 
 static const char osd_scrub_name[] = "OI_scrub";
 
-int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
+int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
+                   bool resetoi)
 {
        struct osd_thread_info *info = osd_oti_get(env);
        struct lustre_scrub *scrub = &dev->od_scrub;
@@ -1416,11 +1415,10 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        bool dirty = false;
        ENTRY;
 
-       memcpy(dev->od_uuid,
+       memcpy(dev->od_uuid.b,
               &dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid,
               sizeof(dsl_dataset_phys(dev->od_os->os_dsl_dataset)->ds_guid));
        memset(&dev->od_scrub, 0, sizeof(struct lustre_scrub));
-       init_waitqueue_head(&scrub->os_thread.t_ctl_waitq);
        init_rwsem(&scrub->os_rwsem);
        spin_lock_init(&scrub->os_lock);
        INIT_LIST_HEAD(&scrub->os_inconsistent_items);
@@ -1447,6 +1445,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        if (IS_ERR_OR_NULL(obj))
                RETURN(obj ? PTR_ERR(obj) : -ENOENT);
 
+       obj->do_body_ops = &osd_body_scrub_ops;
        scrub->os_obj = obj;
        rc = scrub_file_load(env, scrub);
        if (rc == -ENOENT || rc == -EFAULT) {
@@ -1455,29 +1454,12 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        } else if (rc < 0) {
                GOTO(cleanup_obj, rc);
        } else {
-               if (memcmp(sf->sf_uuid, dev->od_uuid, 16) != 0) {
-                       struct obd_uuid *old_uuid;
-                       struct obd_uuid *new_uuid;
-
-                       OBD_ALLOC_PTR(old_uuid);
-                       OBD_ALLOC_PTR(new_uuid);
-                       if (!old_uuid || !new_uuid) {
-                               CERROR("%s: UUID has been changed, but"
-                                      "failed to allocate RAM for report\n",
-                                      osd_name(dev));
-                       } else {
-                               class_uuid_unparse(sf->sf_uuid, old_uuid);
-                               class_uuid_unparse(dev->od_uuid, new_uuid);
-                               CDEBUG(D_LFSCK, "%s: UUID has been changed "
-                                      "from %s to %s\n", osd_name(dev),
-                                      old_uuid->uuid, new_uuid->uuid);
-                       }
+               if (!uuid_equal(&sf->sf_uuid, &dev->od_uuid)) {
+                       CDEBUG(D_LFSCK,
+                              "%s: UUID has been changed from %pU to %pU\n",
+                              osd_name(dev), &sf->sf_uuid, &dev->od_uuid);
                        scrub_file_reset(scrub, dev->od_uuid, SF_INCONSISTENT);
                        dirty = true;
-                       if (old_uuid)
-                               OBD_FREE_PTR(old_uuid);
-                       if (new_uuid)
-                               OBD_FREE_PTR(new_uuid);
                } else if (sf->sf_status == SS_SCANNING) {
                        sf->sf_status = SS_CRASHED;
                        dirty = true;
@@ -1504,7 +1486,7 @@ int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
        }
 
        /* Initialize OI files. */
-       rc = osd_oi_init(env, dev);
+       rc = osd_oi_init(env, dev, resetoi);
        if (rc < 0)
                GOTO(cleanup_obj, rc);
 
@@ -1682,7 +1664,7 @@ osd_otable_it_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
 {
        spin_lock(&scrub->os_lock);
        if (it->ooi_pos < scrub->os_pos_current || scrub->os_waiting ||
-           !thread_is_running(&scrub->os_thread))
+           !scrub->os_running)
                it->ooi_waiting = 0;
        else
                it->ooi_waiting = 1;
@@ -1696,12 +1678,10 @@ static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di)
        struct osd_otable_it *it = (struct osd_otable_it *)di;
        struct osd_device *dev = it->ooi_dev;
        struct lustre_scrub *scrub = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
-       struct l_wait_info lwi = { 0 };
        struct lustre_mdt_attrs *lma = NULL;
        nvlist_t *nvbuf = NULL;
-       int size = 0;
-       int rc;
+       int rc, size = 0;
+       bool locked;
        ENTRY;
 
        LASSERT(it->ooi_user_ready);
@@ -1719,11 +1699,10 @@ again:
        }
 
        if (it->ooi_pos >= scrub->os_pos_current)
-               l_wait_event(thread->t_ctl_waitq,
-                            osd_otable_it_wakeup(scrub, it),
-                            &lwi);
+               wait_var_event(scrub,
+                              osd_otable_it_wakeup(scrub, it));
 
-       if (!thread_is_running(thread) && !it->ooi_used_outside)
+       if (!scrub->os_running && !it->ooi_used_outside)
                GOTO(out, rc = 1);
 
        rc = -dmu_object_next(dev->od_os, &it->ooi_pos, B_FALSE, 0);
@@ -1738,16 +1717,20 @@ again:
 
        rc = __osd_xattr_load_by_oid(dev, it->ooi_pos, &nvbuf);
 
-       if (!scrub->os_full_speed)
+       locked = false;
+       if (!scrub->os_full_speed) {
                spin_lock(&scrub->os_lock);
+               locked = true;
+       }
        it->ooi_prefetched--;
        if (!scrub->os_full_speed) {
                if (scrub->os_waiting) {
                        scrub->os_waiting = 0;
-                       wake_up_all(&thread->t_ctl_waitq);
+                       wake_up_var(scrub);
                }
-               spin_unlock(&scrub->os_lock);
        }
+       if (locked)
+               spin_unlock(&scrub->os_lock);
 
        if (rc == -ENOENT || rc == -EEXIST || rc == -ENODATA)
                goto again;
@@ -1839,7 +1822,7 @@ static int osd_otable_it_load(const struct lu_env *env,
        it->ooi_prefetched_dnode = 0;
        it->ooi_user_ready = 1;
        if (!scrub->os_full_speed)
-               wake_up_all(&scrub->os_thread.t_ctl_waitq);
+               wake_up_var(scrub);
 
        /* Unplug OSD layer iteration by the first next() call. */
        rc = osd_otable_it_next(env, (struct dt_it *)it);
@@ -1875,7 +1858,6 @@ int osd_oii_insert(const struct lu_env *env, struct osd_device *dev,
                   const struct lu_fid *fid, uint64_t oid, bool insert)
 {
        struct lustre_scrub *scrub = &dev->od_scrub;
-       struct ptlrpc_thread *thread = &scrub->os_thread;
        struct osd_inconsistent_item *oii;
        bool wakeup = false;
        ENTRY;
@@ -1892,7 +1874,7 @@ int osd_oii_insert(const struct lu_env *env, struct osd_device *dev,
        oii->oii_insert = insert;
 
        spin_lock(&scrub->os_lock);
-       if (unlikely(!thread_is_running(thread))) {
+       if (!scrub->os_running) {
                spin_unlock(&scrub->os_lock);
                OBD_FREE_PTR(oii);
                RETURN(-EAGAIN);
@@ -1904,7 +1886,7 @@ int osd_oii_insert(const struct lu_env *env, struct osd_device *dev,
        spin_unlock(&scrub->os_lock);
 
        if (wakeup)
-               wake_up_all(&thread->t_ctl_waitq);
+               wake_up_var(scrub);
 
        RETURN(0);
 }