Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / lfsck / lfsck_engine.c
index ab0e6ff..606ea88 100644 (file)
@@ -187,7 +187,7 @@ static int lfsck_needs_scan_dir(const struct lu_env *env,
                                GOTO(out, rc = 0);
                }
 
-               dt_read_lock(env, obj, MOR_TGT_CHILD);
+               dt_read_lock(env, obj, DT_TGT_CHILD);
                if (unlikely(lfsck_is_dead_obj(obj))) {
                        dt_read_unlock(env, obj);
 
@@ -234,7 +234,7 @@ static int lfsck_load_stripe_lmv(const struct lu_env *env,
        LASSERT(lfsck->li_obj_dir == NULL);
        LASSERT(lfsck->li_lmv == NULL);
 
-       rc = lfsck_read_stripe_lmv(env, obj, lmv);
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv);
        if (rc == -ENODATA) {
                lfsck->li_obj_dir = lfsck_object_get(obj);
 
@@ -960,8 +960,19 @@ static int lfsck_master_oit_engine(const struct lu_env *env,
                                goto checkpoint;
                }
 
-               if (dt_object_exists(target))
-                       rc = lfsck_exec_oit(env, lfsck, target);
+               if (dt_object_exists(target)) {
+                       struct lu_attr la = { .la_valid = 0 };
+
+                       rc = dt_attr_get(env, target, &la);
+                       if (likely(!rc && (!(la.la_valid & LA_FLAGS) ||
+                                          !(la.la_flags & LUSTRE_ORPHAN_FL))))
+                               rc = lfsck_exec_oit(env, lfsck, target);
+                       else
+                               CDEBUG(D_INFO,
+                                      "%s: orphan "DFID", %llx/%x: rc = %d\n",
+                                      lfsck_lfsck2name(lfsck), PFID(fid),
+                                      la.la_valid, la.la_flags, rc);
+               }
 
                lfsck_object_put(env, target);
                if (rc != 0 && bk->lb_param & LPF_FAILOUT)
@@ -1016,6 +1027,10 @@ int lfsck_master_engine(void *args)
        int                       rc;
        ENTRY;
 
+       spin_lock(&lfsck->li_lock);
+       lfsck->li_task = current;
+       spin_unlock(&lfsck->li_lock);
+
        /* There will be some objects verification during the LFSCK start,
         * such as the subsequent lfsck_verify_lpf(). Trigger low layer OI
         * OI scrub before that to handle the potential inconsistence. */
@@ -1059,6 +1074,11 @@ int lfsck_master_engine(void *args)
               current_pid());
 
        spin_lock(&lfsck->li_lock);
+       if (unlikely(!thread_is_starting(thread))) {
+               spin_unlock(&lfsck->li_lock);
+               GOTO(fini_oit, rc = 0);
+       }
+
        thread_set_flags(thread, SVC_RUNNING);
        spin_unlock(&lfsck->li_lock);
        wake_up_all(&thread->t_ctl_waitq);
@@ -1107,6 +1127,7 @@ fini_oit:
 fini_args:
        spin_lock(&lfsck->li_lock);
        thread_set_flags(thread, SVC_STOPPED);
+       lfsck->li_task = NULL;
        spin_unlock(&lfsck->li_lock);
        wake_up_all(&thread->t_ctl_waitq);
        lfsck_thread_args_fini(lta);
@@ -1225,7 +1246,7 @@ again:
        }
        spin_unlock(&ltds->ltd_lock);
 
-       rc = ptlrpc_set_wait(set);
+       rc = ptlrpc_set_wait(env, set);
        if (rc < 0) {
                ptlrpc_set_destroy(set);
                RETURN(rc);
@@ -1320,7 +1341,7 @@ static int lfsck_assistant_notify_others(const struct lu_env *env,
                up_read(&ltds->ltd_rw_sem);
 
                /* Sync up */
-               rc = ptlrpc_set_wait(set);
+               rc = ptlrpc_set_wait(env, set);
                if (rc < 0) {
                        ptlrpc_set_destroy(set);
                        RETURN(rc);
@@ -1452,7 +1473,7 @@ again:
                }
                spin_unlock(&ltds->ltd_lock);
 
-               rc = ptlrpc_set_wait(set);
+               rc = ptlrpc_set_wait(env, set);
                if (rc < 0) {
                        ptlrpc_set_destroy(set);
                        RETURN(rc);
@@ -1526,7 +1547,7 @@ again:
                break;
        }
 
-       rc1 = ptlrpc_set_wait(set);
+       rc1 = ptlrpc_set_wait(env, set);
        ptlrpc_set_destroy(set);
 
        RETURN(rc != 0 ? rc : rc1);
@@ -1596,7 +1617,7 @@ int lfsck_assistant_engine(void *args)
                while (!list_empty(&lad->lad_req_list)) {
                        bool wakeup = false;
 
-                       if (unlikely(lad->lad_exit ||
+                       if (unlikely(test_bit(LAD_EXIT, &lad->lad_flags) ||
                                     !thread_is_running(mthread)))
                                GOTO(cleanup, rc = lad->lad_post_result);
 
@@ -1628,25 +1649,25 @@ int lfsck_assistant_engine(void *args)
 
                l_wait_event(athread->t_ctl_waitq,
                             !lfsck_assistant_req_empty(lad) ||
-                            lad->lad_exit ||
-                            lad->lad_to_post ||
-                            lad->lad_to_double_scan,
+                            test_bit(LAD_EXIT, &lad->lad_flags) ||
+                            test_bit(LAD_TO_POST, &lad->lad_flags) ||
+                            test_bit(LAD_TO_DOUBLE_SCAN, &lad->lad_flags),
                             &lwi);
 
-               if (unlikely(lad->lad_exit))
+               if (unlikely(test_bit(LAD_EXIT, &lad->lad_flags)))
                        GOTO(cleanup, rc = lad->lad_post_result);
 
                if (!list_empty(&lad->lad_req_list))
                        continue;
 
-               if (lad->lad_to_post) {
+               if (test_bit(LAD_TO_POST, &lad->lad_flags)) {
                        CDEBUG(D_LFSCK, "%s: %s LFSCK assistant thread post\n",
                               lfsck_lfsck2name(lfsck), lad->lad_name);
 
-                       if (unlikely(lad->lad_exit))
+                       if (unlikely(test_bit(LAD_EXIT, &lad->lad_flags)))
                                GOTO(cleanup, rc = lad->lad_post_result);
 
-                       lad->lad_to_post = 0;
+                       clear_bit(LAD_TO_POST, &lad->lad_flags);
                        LASSERT(lad->lad_post_result > 0);
 
                        /* Wakeup the master engine to go ahead. */
@@ -1663,10 +1684,10 @@ int lfsck_assistant_engine(void *args)
                               lad->lad_name, rc);
                }
 
-               if (lad->lad_to_double_scan) {
-                       lad->lad_to_double_scan = 0;
+               if (test_bit(LAD_TO_DOUBLE_SCAN, &lad->lad_flags)) {
+                       clear_bit(LAD_TO_DOUBLE_SCAN, &lad->lad_flags);
                        atomic_inc(&lfsck->li_double_scan_count);
-                       lad->lad_in_double_scan = 1;
+                       set_bit(LAD_IN_DOUBLE_SCAN, &lad->lad_flags);
                        wake_up_all(&mthread->t_ctl_waitq);
 
                        com->lc_new_checked = 0;
@@ -1690,7 +1711,7 @@ int lfsck_assistant_engine(void *args)
                        if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
                                GOTO(cleanup, rc = 0);
 
-                       while (lad->lad_in_double_scan) {
+                       while (test_bit(LAD_IN_DOUBLE_SCAN, &lad->lad_flags)) {
                                rc = lfsck_assistant_query_others(env, com);
                                if (lfsck_phase2_next_ready(lad))
                                        goto p2_next;
@@ -1705,12 +1726,13 @@ int lfsck_assistant_engine(void *args)
                                                           NULL, NULL);
                                rc = l_wait_event(athread->t_ctl_waitq,
                                        lfsck_phase2_next_ready(lad) ||
-                                       lad->lad_exit ||
+                                       test_bit(LAD_EXIT, &lad->lad_flags) ||
                                        !thread_is_running(mthread),
                                        &lwi);
 
-                               if (unlikely(lad->lad_exit ||
-                                            !thread_is_running(mthread)))
+                               if (unlikely(
+                                       test_bit(LAD_EXIT, &lad->lad_flags) ||
+                                       !thread_is_running(mthread)))
                                        GOTO(cleanup, rc = 0);
 
                                if (rc == -ETIMEDOUT)
@@ -1724,8 +1746,9 @@ p2_next:
                                if (rc != 0)
                                        GOTO(cleanup, rc);
 
-                               if (unlikely(lad->lad_exit ||
-                                            !thread_is_running(mthread)))
+                               if (unlikely(
+                                       test_bit(LAD_EXIT, &lad->lad_flags) ||
+                                       !thread_is_running(mthread)))
                                        GOTO(cleanup, rc = 0);
                        }
                }
@@ -1737,7 +1760,7 @@ cleanup:
        if (rc < 0)
                lad->lad_assistant_status = rc;
 
-       if (lad->lad_exit && lad->lad_post_result <= 0)
+       if (test_bit(LAD_EXIT, &lad->lad_flags) && lad->lad_post_result <= 0)
                lao->la_fill_pos(env, com, &lfsck->li_pos_checkpoint);
 
        thread_set_flags(athread, SVC_STOPPING);
@@ -1753,9 +1776,6 @@ cleanup:
        }
        spin_unlock(&lad->lad_lock);
 
-       LASSERTF(lad->lad_prefetched == 0, "unmatched prefeteched objs %d\n",
-                lad->lad_prefetched);
-
        memset(lr, 0, sizeof(*lr));
        if (rc > 0) {
                lr->lr_event = LE_PHASE2_DONE;
@@ -1814,8 +1834,8 @@ cleanup:
        /* Under force exit case, some requests may be just freed without
         * verification, those objects should be re-handled when next run.
         * So not update the on-disk trace file under such case. */
-       if (lad->lad_in_double_scan) {
-               if (!lad->lad_exit)
+       if (test_bit(LAD_IN_DOUBLE_SCAN, &lad->lad_flags)) {
+               if (!test_bit(LAD_EXIT, &lad->lad_flags))
                        rc1 = lao->la_double_scan_result(env, com, rc);
 
                CDEBUG(D_LFSCK, "%s: LFSCK assistant phase2 scan "
@@ -1824,13 +1844,12 @@ cleanup:
        }
 
 fini:
-       if (lad->lad_in_double_scan)
+       if (test_bit(LAD_IN_DOUBLE_SCAN, &lad->lad_flags))
                atomic_dec(&lfsck->li_double_scan_count);
 
        spin_lock(&lad->lad_lock);
        lad->lad_assistant_status = (rc1 != 0 ? rc1 : rc);
        thread_set_flags(athread, SVC_STOPPED);
-       wake_up_all(&mthread->t_ctl_waitq);
        lad->lad_task = NULL;
        spin_unlock(&lad->lad_lock);
 
@@ -1839,6 +1858,7 @@ fini:
               lad->lad_assistant_status);
 
        lfsck_thread_args_fini(lta);
+       wake_up_all(&mthread->t_ctl_waitq);
 
        return rc;
 }