Whamcloud - gitweb
LU-3950 lfsck: control all LFSCK nodes via single command (2) 57/9257/5
authorFan Yong <fan.yong@intel.com>
Tue, 11 Feb 2014 04:54:06 +0000 (12:54 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 25 Feb 2014 00:17:03 +0000 (00:17 +0000)
The single command should work for not only layout LFSCK, but also for
other LFSCK components, such as namespace LFSCK, OI scrub on each node
and DNE LFSCK in the future.

Introduce another lfsck_start option "-o" for enable orphan handling.
Currently it is used for orphan OST-objects handling. When enable it,
the layout LFSCK will be triggered on all servers by default.

Code cleanup and more log information.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: Iaed9ee61d3d0fced32f9dd6b2a7f6663de6d2dc7
Reviewed-on: http://review.whamcloud.com/9257
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
14 files changed:
lustre/include/lu_target.h
lustre/include/lustre/lustre_lfsck_user.h
lustre/include/lustre_lfsck.h
lustre/lfsck/lfsck_engine.c
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_lib.c
lustre/mdt/mdt_handler.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_obd.c
lustre/target/tgt_handler.c
lustre/tests/sanity-lfsck.sh
lustre/utils/lctl.c
lustre/utils/lustre_lfsck.c

index f2ab070..34b8fcb 100644 (file)
@@ -282,9 +282,6 @@ void tgt_brw_unlock(struct obd_ioobj *obj, struct niobuf_remote *niob,
 int tgt_brw_read(struct tgt_session_info *tsi);
 int tgt_brw_write(struct tgt_session_info *tsi);
 int tgt_hpreq_handler(struct ptlrpc_request *req);
-void tgt_register_lfsck_start(int (*start)(const struct lu_env *,
-                                          struct dt_device *,
-                                          struct lfsck_start_param *));
 void tgt_register_lfsck_in_notify(int (*notify)(const struct lu_env *,
                                                struct dt_device *,
                                                struct lfsck_request *));
index 2232ea7..7c9751b 100644 (file)
@@ -43,11 +43,14 @@ enum lfsck_param_flags {
        /* Dryrun mode, only check without modification */
        LPF_DRYRUN      = 0x0004,
 
-       /* Start/stop LFSCK on all MDT devices. */
-       LPF_ALL_MDT     = 0x0008,
+       /* LFSCK runs on all targets. */
+       LPF_ALL_TGT     = 0x0008,
 
-       /* Broadcast the command to other MDTs. */
+       /* Broadcast the command to other MDTs. Only valid on the sponsor MDT */
        LPF_BROADCAST   = 0x0010,
+
+       /* Handle orphan objects. */
+       LPF_ORPHAN      = 0x0020,
 };
 
 enum lfsck_type {
index e992346..1209bc2 100644 (file)
@@ -111,7 +111,6 @@ enum lfsck_status {
 };
 
 struct lfsck_start_param {
-       struct ldlm_namespace   *lsp_namespace;
        struct lfsck_start      *lsp_start;
        __u32                    lsp_index;
        unsigned int             lsp_index_valid:1;
@@ -126,17 +125,19 @@ enum lfsck_events {
        LE_STOP                 = 6,
        LE_QUERY                = 7,
        LE_FID_ACCESSED         = 8,
+       LE_PEER_EXIT            = 9,
 };
 
 enum lfsck_event_flags {
        LEF_TO_OST              = 0x00000001,
        LEF_FROM_OST            = 0x00000002,
-       LEF_FORCE_STOP          = 0x00000004,
 };
 
 typedef int (*lfsck_out_notify)(const struct lu_env *env, void *data,
                                enum lfsck_events event);
 
+int lfsck_register_namespace(const struct lu_env *env, struct dt_device *key,
+                            struct ldlm_namespace *ns);
 int lfsck_register(const struct lu_env *env, struct dt_device *key,
                   struct dt_device *next, struct obd_device *obd,
                   lfsck_out_notify notify, void *notify_data, bool master);
index 6f0ec24..e31faf3 100644 (file)
@@ -374,6 +374,7 @@ int lfsck_master_engine(void *args)
        struct dt_object         *oit_obj  = lfsck->li_obj_oit;
        const struct dt_it_ops   *oit_iops = &oit_obj->do_index_ops->dio_it;
        struct dt_it             *oit_di;
+       struct l_wait_info        lwi      = { 0 };
        int                       rc;
        ENTRY;
 
@@ -406,6 +407,13 @@ int lfsck_master_engine(void *args)
        spin_unlock(&lfsck->li_lock);
        wake_up_all(&thread->t_ctl_waitq);
 
+       l_wait_event(thread->t_ctl_waitq,
+                    lfsck->li_start_unplug ||
+                    !thread_is_running(thread),
+                    &lwi);
+       if (!thread_is_running(thread))
+               GOTO(fini_oit, rc = 0);
+
        if (!cfs_list_empty(&lfsck->li_list_scan) ||
            cfs_list_empty(&lfsck->li_list_double_scan))
                rc = lfsck_master_oit_engine(env, lfsck);
index 6aef96f..59215b8 100644 (file)
@@ -286,6 +286,11 @@ struct lfsck_operations {
                          int result,
                          bool init);
 
+       int (*lfsck_interpret)(const struct lu_env *env,
+                              struct ptlrpc_request *req,
+                              void *args,
+                              int rc);
+
        int (*lfsck_dump)(const struct lu_env *env,
                          struct lfsck_component *com,
                          char *buf,
@@ -492,7 +497,8 @@ struct lfsck_instance {
        unsigned int              li_oit_over:1, /* oit is finished. */
                                  li_drop_dryrun:1, /* Ever dryrun, not now. */
                                  li_master:1, /* Master instance or not. */
-                                 li_current_oit_processed:1;
+                                 li_current_oit_processed:1,
+                                 li_start_unplug:1;
 };
 
 enum lfsck_linkea_flags {
@@ -508,6 +514,8 @@ struct lfsck_async_interpret_args {
        struct lfsck_tgt_descs          *laia_ltds;
        struct lfsck_tgt_desc           *laia_ltd;
        struct lfsck_request            *laia_lr;
+       int                              laia_result;
+       unsigned int                     laia_shared:1;
 };
 
 struct lfsck_thread_args {
@@ -540,6 +548,7 @@ struct lfsck_thread_info {
        char                    lti_key[NAME_MAX + 16];
        struct lfsck_request    lti_lr;
        struct lfsck_async_interpret_args lti_laia;
+       struct lfsck_start      lti_start;
        struct lfsck_stop       lti_stop;
        ldlm_policy_data_t      lti_policy;
        struct ldlm_res_id      lti_resid;
index 9dd4d7f..251088a 100644 (file)
@@ -1120,15 +1120,17 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                if (rc != 0) {
                        struct lfsck_layout *lo = com->lc_file_ram;
 
+                       CERROR("%s: fail to notify %s %x for layout start: "
+                              "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck),
+                              (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
+                              ltd->ltd_index, rc);
                        lo->ll_flags |= LF_INCOMPLETE;
-                       lfsck_tgt_put(ltd);
                        break;
                }
 
                spin_lock(&ltds->ltd_lock);
                if (ltd->ltd_dead || ltd->ltd_layout_done) {
                        spin_unlock(&ltds->ltd_lock);
-                       lfsck_tgt_put(ltd);
                        break;
                }
 
@@ -1148,17 +1150,17 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                                              &llmd->llmd_mdt_phase1_list);
                }
                spin_unlock(&ltds->ltd_lock);
-               lfsck_tgt_put(ltd);
                break;
        case LE_STOP:
        case LE_PHASE1_DONE:
        case LE_PHASE2_DONE:
-               if (rc != 0)
-                       CERROR("%s: fail to notify %s %x for layout: "
-                              "event = %d, rc = %d\n",
-                              lfsck_lfsck2name(com->lc_lfsck),
-                              (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
-                              ltd->ltd_index, lr->lr_event, rc);
+       case LE_PEER_EXIT:
+               if (rc != 0 && rc != -EALREADY)
+                       CWARN("%s: fail to notify %s %x for layout: "
+                             "event = %d, rc = %d\n",
+                             lfsck_lfsck2name(com->lc_lfsck),
+                             (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
+                             ltd->ltd_index, lr->lr_event, rc);
                break;
        case LE_QUERY: {
                struct lfsck_reply *reply;
@@ -1168,7 +1170,6 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                        list_del_init(&ltd->ltd_layout_phase_list);
                        list_del_init(&ltd->ltd_layout_list);
                        spin_unlock(&ltds->ltd_lock);
-                       lfsck_tgt_put(ltd);
                        break;
                }
 
@@ -1182,7 +1183,6 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                        list_del_init(&ltd->ltd_layout_phase_list);
                        list_del_init(&ltd->ltd_layout_list);
                        spin_unlock(&ltds->ltd_lock);
-                       lfsck_tgt_put(ltd);
                        break;
                }
 
@@ -1212,7 +1212,6 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                        spin_unlock(&ltds->ltd_lock);
                        break;
                }
-               lfsck_tgt_put(ltd);
                break;
        }
        default:
@@ -1221,7 +1220,10 @@ static int lfsck_layout_master_async_interpret(const struct lu_env *env,
                break;
        }
 
-       lfsck_component_put(env, com);
+       if (!laia->laia_shared) {
+               lfsck_tgt_put(ltd);
+               lfsck_component_put(env, com);
+       }
 
        return 0;
 }
@@ -1238,7 +1240,6 @@ static int lfsck_layout_master_query_others(const struct lu_env *env,
        struct lfsck_tgt_descs            *ltds;
        struct lfsck_tgt_desc             *ltd;
        struct list_head                  *head;
-       __u32                              cnt   = 0;
        int                                rc    = 0;
        int                                rc1   = 0;
        ENTRY;
@@ -1254,6 +1255,7 @@ static int lfsck_layout_master_query_others(const struct lu_env *env,
        lr->lr_active = LT_LAYOUT;
        laia->laia_com = com;
        laia->laia_lr = lr;
+       laia->laia_shared = 0;
 
        if (!list_empty(&llmd->llmd_mdt_phase1_list)) {
                ltds = &lfsck->li_mdt_descs;
@@ -1292,20 +1294,15 @@ again:
                               ltd->ltd_index, rc);
                        lfsck_tgt_put(ltd);
                        rc1 = rc;
-               } else {
-                       cnt++;
                }
                spin_lock(&ltds->ltd_lock);
        }
        spin_unlock(&ltds->ltd_lock);
 
-       if (cnt > 0) {
-               rc = ptlrpc_set_wait(set);
-               if (rc < 0) {
-                       ptlrpc_set_destroy(set);
-                       RETURN(rc);
-               }
-               cnt = 0;
+       rc = ptlrpc_set_wait(set);
+       if (rc < 0) {
+               ptlrpc_set_destroy(set);
+               RETURN(rc);
        }
 
        if (!(lr->lr_flags & LEF_TO_OST) &&
@@ -1327,21 +1324,20 @@ lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd)
 
 static int lfsck_layout_master_notify_others(const struct lu_env *env,
                                             struct lfsck_component *com,
-                                            struct lfsck_request *lr,
-                                            __u32 flags)
+                                            struct lfsck_request *lr)
 {
        struct lfsck_thread_info          *info  = lfsck_env_info(env);
        struct lfsck_async_interpret_args *laia  = &info->lti_laia;
        struct lfsck_instance             *lfsck = com->lc_lfsck;
        struct lfsck_layout_master_data   *llmd  = com->lc_data;
        struct lfsck_layout               *lo    = com->lc_file_ram;
+       struct lfsck_bookmark             *bk    = &lfsck->li_bookmark_ram;
        struct ptlrpc_request_set         *set;
        struct lfsck_tgt_descs            *ltds;
        struct lfsck_tgt_desc             *ltd;
        struct lfsck_tgt_desc             *next;
        struct list_head                  *head;
        __u32                              idx;
-       __u32                              cnt   = 0;
        int                                rc    = 0;
        ENTRY;
 
@@ -1349,17 +1345,15 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env,
        if (set == NULL)
                RETURN(-ENOMEM);
 
+       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
        lr->lr_active = LT_LAYOUT;
        laia->laia_com = com;
        laia->laia_lr = lr;
-       lr->lr_flags = 0;
+       laia->laia_shared = 0;
        switch (lr->lr_event) {
        case LE_START:
-               /* Notify OSTs firstly, then other MDTs if needed. */
-               lr->lr_flags |= LEF_TO_OST;
+               /* Notify OSTs firstly, then handle other MDTs if needed. */
                ltds = &lfsck->li_ost_descs;
-
-lable1:
                laia->laia_ltds = ltds;
                down_read(&ltds->ltd_rw_sem);
                cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
@@ -1379,34 +1373,21 @@ lable1:
                                       "MDT", idx, rc);
                                lfsck_tgt_put(ltd);
                                lo->ll_flags |= LF_INCOMPLETE;
-                       } else {
-                               cnt++;
                        }
                }
                up_read(&ltds->ltd_rw_sem);
 
                /* Sync up */
-               if (cnt > 0) {
-                       rc = ptlrpc_set_wait(set);
-                       if (rc < 0) {
-                               ptlrpc_set_destroy(set);
-                               RETURN(rc);
-                       }
-                       cnt = 0;
+               rc = ptlrpc_set_wait(set);
+               if (rc < 0) {
+                       ptlrpc_set_destroy(set);
+                       RETURN(rc);
                }
 
-               if (!(flags & LPF_ALL_MDT))
+               if (!(bk->lb_param & LPF_ALL_TGT))
                        break;
 
-               ltds = &lfsck->li_mdt_descs;
-               /* The sponsor broadcasts the request to other MDTs. */
-               if (flags & LPF_BROADCAST) {
-                       flags &= ~LPF_ALL_MDT;
-                       lr->lr_flags &= ~LEF_TO_OST;
-                       goto lable1;
-               }
-
-               /* non-sponsors link other MDT targets locallly. */
+               /* link other MDT targets locallly. */
                spin_lock(&ltds->ltd_lock);
                cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
                        ltd = LTD_TGT(ltds, idx);
@@ -1421,40 +1402,37 @@ lable1:
                                      &llmd->llmd_mdt_phase1_list);
                }
                spin_unlock(&ltds->ltd_lock);
-
                break;
        case LE_STOP:
-               if (flags & LPF_BROADCAST)
-                       lr->lr_flags |= LEF_FORCE_STOP;
        case LE_PHASE2_DONE:
-               /* Notify other MDTs if needed, then the OSTs. */
-               if (flags & LPF_ALL_MDT) {
-                       /* The sponsor broadcasts the request to other MDTs. */
-                       if (flags & LPF_BROADCAST) {
-                               lr->lr_flags &= ~LEF_TO_OST;
-                               head = &llmd->llmd_mdt_list;
-                               ltds = &lfsck->li_mdt_descs;
-                               goto lable3;
-                       }
-
-                       /* non-sponsors unlink other MDT targets locallly. */
+       case LE_PEER_EXIT: {
+               /* Handle other MDTs firstly if needed, then notify the OSTs. */
+               if (bk->lb_param & LPF_ALL_TGT) {
+                       head = &llmd->llmd_mdt_list;
                        ltds = &lfsck->li_mdt_descs;
-                       spin_lock(&ltds->ltd_lock);
-                       list_for_each_entry_safe(ltd, next,
-                                                &llmd->llmd_mdt_list,
-                                                ltd_layout_list) {
-                               list_del_init(&ltd->ltd_layout_phase_list);
-                               list_del_init(&ltd->ltd_layout_list);
+                       if (lr->lr_event == LE_STOP) {
+                               /* unlink other MDT targets locallly. */
+                               spin_lock(&ltds->ltd_lock);
+                               list_for_each_entry_safe(ltd, next, head,
+                                                        ltd_layout_list) {
+                                       list_del_init(&ltd->ltd_layout_phase_list);
+                                       list_del_init(&ltd->ltd_layout_list);
+                               }
+                               spin_unlock(&ltds->ltd_lock);
+
+                               lr->lr_flags |= LEF_TO_OST;
+                               head = &llmd->llmd_ost_list;
+                               ltds = &lfsck->li_ost_descs;
+                       } else {
+                               lr->lr_flags &= ~LEF_TO_OST;
                        }
-                       spin_unlock(&ltds->ltd_lock);
+               } else {
+                       lr->lr_flags |= LEF_TO_OST;
+                       head = &llmd->llmd_ost_list;
+                       ltds = &lfsck->li_ost_descs;
                }
 
-lable2:
-               lr->lr_flags |= LEF_TO_OST;
-               head = &llmd->llmd_ost_list;
-               ltds = &lfsck->li_ost_descs;
-
-lable3:
+again:
                laia->laia_ltds = ltds;
                spin_lock(&ltds->ltd_lock);
                while (!list_empty(head)) {
@@ -1463,41 +1441,40 @@ lable3:
                        if (!list_empty(&ltd->ltd_layout_phase_list))
                                list_del_init(&ltd->ltd_layout_phase_list);
                        list_del_init(&ltd->ltd_layout_list);
+                       atomic_inc(&ltd->ltd_ref);
                        laia->laia_ltd = ltd;
                        spin_unlock(&ltds->ltd_lock);
                        rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
                                        lfsck_layout_master_async_interpret,
                                        laia, LFSCK_NOTIFY);
-                       if (rc != 0)
+                       if (rc != 0) {
                                CERROR("%s: fail to notify %s %x for layout "
                                       "stop/phase2: rc = %d\n",
                                       lfsck_lfsck2name(lfsck),
                                       (lr->lr_flags & LEF_TO_OST) ? "OST" :
                                       "MDT", ltd->ltd_index, rc);
-                       else
-                               cnt++;
+                               lfsck_tgt_put(ltd);
+                       }
                        spin_lock(&ltds->ltd_lock);
                }
                spin_unlock(&ltds->ltd_lock);
 
-               if (!(flags & LPF_BROADCAST))
-                       break;
-
-               /* Sync up */
-               if (cnt > 0) {
-                       rc = ptlrpc_set_wait(set);
-                       if (rc < 0) {
-                               ptlrpc_set_destroy(set);
-                               RETURN(rc);
-                       }
-                       cnt = 0;
+               rc = ptlrpc_set_wait(set);
+               if (rc < 0) {
+                       ptlrpc_set_destroy(set);
+                       RETURN(rc);
                }
 
-               flags &= ~LPF_BROADCAST;
-               goto lable2;
+               if (!(lr->lr_flags & LEF_TO_OST)) {
+                       lr->lr_flags |= LEF_TO_OST;
+                       head = &llmd->llmd_ost_list;
+                       ltds = &lfsck->li_ost_descs;
+                       goto again;
+               }
+               break;
+       }
        case LE_PHASE1_DONE:
                llmd->llmd_touch_gen++;
-               lr->lr_flags &= ~LEF_TO_OST;
                ltds = &lfsck->li_mdt_descs;
                laia->laia_ltds = ltds;
                spin_lock(&ltds->ltd_lock);
@@ -1512,18 +1489,19 @@ lable3:
                        list_del_init(&ltd->ltd_layout_phase_list);
                        list_add_tail(&ltd->ltd_layout_phase_list,
                                      &llmd->llmd_mdt_phase1_list);
+                       atomic_inc(&ltd->ltd_ref);
                        laia->laia_ltd = ltd;
                        spin_unlock(&ltds->ltd_lock);
                        rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
                                        lfsck_layout_master_async_interpret,
                                        laia, LFSCK_NOTIFY);
-                       if (rc != 0)
+                       if (rc != 0) {
                                CERROR("%s: fail to notify MDT %x for layout "
                                       "phase1 done: rc = %d\n",
                                       lfsck_lfsck2name(lfsck),
                                       ltd->ltd_index, rc);
-                       else
-                               cnt++;
+                               lfsck_tgt_put(ltd);
+                       }
                        spin_lock(&ltds->ltd_lock);
                }
                spin_unlock(&ltds->ltd_lock);
@@ -1535,14 +1513,9 @@ lable3:
                break;
        }
 
-       if (cnt > 0)
-               rc = ptlrpc_set_wait(set);
+       rc = ptlrpc_set_wait(set);
        ptlrpc_set_destroy(set);
 
-       if (rc == 0 && lr->lr_event == LE_START &&
-           list_empty(&llmd->llmd_ost_list))
-               rc = -ENODEV;
-
        RETURN(rc);
 }
 
@@ -2250,6 +2223,8 @@ out:
                 * mark the LFSCK as INCOMPLETE. */
                if (rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -ETIMEDOUT ||
                    rc == -EHOSTDOWN || rc == -EHOSTUNREACH) {
+                       CERROR("%s: Fail to take with OST %x: rc = %d.\n",
+                              lfsck_lfsck2name(lfsck), llr->llr_ost_idx, rc);
                        lo->ll_flags |= LF_INCOMPLETE;
                        lo->ll_objs_skipped++;
                        rc = 0;
@@ -2284,26 +2259,21 @@ static int lfsck_layout_assistant(void *args)
        struct l_wait_info               lwi     = { 0 };
        int                              rc      = 0;
        int                              rc1     = 0;
-       __u32                            flags;
        ENTRY;
 
-       if (lta->lta_lsp->lsp_start != NULL)
-               flags  = lta->lta_lsp->lsp_start->ls_flags;
-       else
-               flags = bk->lb_param;
        memset(lr, 0, sizeof(*lr));
        lr->lr_event = LE_START;
-       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
        lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
                       LSV_ASYNC_WINDOWS;
        lr->lr_speed = bk->lb_speed_limit;
        lr->lr_version = bk->lb_version;
        lr->lr_param = bk->lb_param;
        lr->lr_async_windows = bk->lb_async_windows;
+       lr->lr_flags = LEF_TO_OST;
        if (pos->lp_oit_cookie <= 1)
                lr->lr_param |= LPF_RESET;
 
-       rc = lfsck_layout_master_notify_others(env, com, lr, flags);
+       rc = lfsck_layout_master_notify_others(env, com, lr);
        if (rc != 0) {
                CERROR("%s: fail to notify others for layout start: rc = %d\n",
                       lfsck_lfsck2name(lfsck), rc);
@@ -2368,10 +2338,9 @@ static int lfsck_layout_assistant(void *args)
                        LASSERT(llmd->llmd_post_result > 0);
 
                        memset(lr, 0, sizeof(*lr));
-                       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
                        lr->lr_event = LE_PHASE1_DONE;
                        lr->lr_status = llmd->llmd_post_result;
-                       rc = lfsck_layout_master_notify_others(env, com, lr, 0);
+                       rc = lfsck_layout_master_notify_others(env, com, lr);
                        if (rc != 0)
                                CERROR("%s: failed to notify others "
                                       "for layout post: rc = %d\n",
@@ -2477,34 +2446,43 @@ cleanup1:
 
 cleanup2:
        memset(lr, 0, sizeof(*lr));
-       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
        if (rc > 0) {
                lr->lr_event = LE_PHASE2_DONE;
-               flags = 0;
                lr->lr_status = rc;
        } else if (rc == 0) {
-               lr->lr_event = LE_STOP;
-               if (lfsck->li_status == LS_PAUSED ||
-                   lfsck->li_status == LS_CO_PAUSED) {
-                       flags = 0;
-                       lr->lr_status = LS_CO_PAUSED;
-               } else if (lfsck->li_status == LS_STOPPED ||
-                        lfsck->li_status == LS_CO_STOPPED) {
-                       flags = lfsck->li_flags;
-                       if (flags & LPF_BROADCAST)
-                               lr->lr_status = LS_STOPPED;
-                       else
-                               lr->lr_status = LS_CO_STOPPED;
+               if (lfsck->li_flags & LPF_ALL_TGT) {
+                       lr->lr_event = LE_STOP;
+                       lr->lr_status = LS_STOPPED;
                } else {
-                       LBUG();
+                       lr->lr_event = LE_PEER_EXIT;
+                       switch (lfsck->li_status) {
+                       case LS_PAUSED:
+                       case LS_CO_PAUSED:
+                               lr->lr_status = LS_CO_PAUSED;
+                               break;
+                       case LS_STOPPED:
+                       case LS_CO_STOPPED:
+                               lr->lr_status = LS_CO_STOPPED;
+                               break;
+                       default:
+                               CERROR("%s: unknown status: rc = %d\n",
+                                      lfsck_lfsck2name(lfsck),
+                                      lfsck->li_status);
+                               lr->lr_status = LS_CO_FAILED;
+                               break;
+                       }
                }
        } else {
-               lr->lr_event = LE_STOP;
-               flags = 0;
-               lr->lr_status = LS_CO_FAILED;
+               if (lfsck->li_flags & LPF_ALL_TGT) {
+                       lr->lr_event = LE_STOP;
+                       lr->lr_status = LS_FAILED;
+               } else {
+                       lr->lr_event = LE_PEER_EXIT;
+                       lr->lr_status = LS_CO_FAILED;
+               }
        }
 
-       rc1 = lfsck_layout_master_notify_others(env, com, lr, flags);
+       rc1 = lfsck_layout_master_notify_others(env, com, lr);
        if (rc1 != 0) {
                CERROR("%s: failed to notify others for layout quit: rc = %d\n",
                       lfsck_lfsck2name(lfsck), rc1);
@@ -2640,7 +2618,6 @@ lfsck_layout_slave_query_master(const struct lu_env *env,
        struct lfsck_layout_slave_target *llst;
        struct obd_export                *exp;
        struct ptlrpc_request_set        *set;
-       int                               cnt   = 0;
        int                               rc    = 0;
        int                               rc1   = 0;
        ENTRY;
@@ -2687,15 +2664,12 @@ lfsck_layout_slave_query_master(const struct lu_env *env,
                        rc1 = rc;
                        lfsck_layout_llst_put(llst);
                        class_export_put(exp);
-               } else {
-                       cnt++;
                }
                spin_lock(&llsd->llsd_lock);
        }
        spin_unlock(&llsd->llsd_lock);
 
-       if (cnt > 0)
-               rc = ptlrpc_set_wait(set);
+       rc = ptlrpc_set_wait(set);
        ptlrpc_set_destroy(set);
 
        RETURN(rc1 != 0 ? rc1 : rc);
@@ -2712,7 +2686,6 @@ lfsck_layout_slave_notify_master(const struct lu_env *env,
        struct lfsck_layout_slave_target *llst;
        struct obd_export                *exp;
        struct ptlrpc_request_set        *set;
-       int                               cnt   = 0;
        int                               rc;
        ENTRY;
 
@@ -2756,17 +2729,13 @@ lfsck_layout_slave_notify_master(const struct lu_env *env,
                        CERROR("%s: slave fail to notify %s for layout: "
                               "rc = %d\n", lfsck_lfsck2name(lfsck),
                               exp->exp_obd->obd_name, rc);
-               else
-                       cnt++;
                lfsck_layout_llst_put(llst);
                class_export_put(exp);
                spin_lock(&llsd->llsd_lock);
        }
        spin_unlock(&llsd->llsd_lock);
 
-       if (cnt > 0)
-               rc = ptlrpc_set_wait(set);
-
+       ptlrpc_set_wait(set);
        ptlrpc_set_destroy(set);
 
        RETURN_EXIT;
@@ -2894,7 +2863,8 @@ static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
 }
 
 static int lfsck_layout_prep(const struct lu_env *env,
-                            struct lfsck_component *com)
+                            struct lfsck_component *com,
+                            struct lfsck_start *start)
 {
        struct lfsck_instance   *lfsck  = com->lc_lfsck;
        struct lfsck_layout     *lo     = com->lc_file_ram;
@@ -2903,7 +2873,9 @@ static int lfsck_layout_prep(const struct lu_env *env,
        fid_zero(&pos->lp_dir_parent);
        pos->lp_dir_cookie = 0;
        if (lo->ll_status == LS_COMPLETED ||
-           lo->ll_status == LS_PARTIAL) {
+           lo->ll_status == LS_PARTIAL ||
+           /* To handle orphan, must scan from the beginning. */
+           (start != NULL && start->ls_flags & LPF_ORPHAN)) {
                int rc;
 
                rc = lfsck_layout_reset(env, com, false);
@@ -2912,9 +2884,7 @@ static int lfsck_layout_prep(const struct lu_env *env,
        }
 
        down_write(&com->lc_sem);
-
        lo->ll_time_latest_start = cfs_time_current_sec();
-
        spin_lock(&lfsck->li_lock);
        if (lo->ll_flags & LF_SCANNED_ONCE) {
                if (!lfsck->li_drop_dryrun ||
@@ -2948,7 +2918,6 @@ static int lfsck_layout_prep(const struct lu_env *env,
                        pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
        }
        spin_unlock(&lfsck->li_lock);
-
        up_write(&com->lc_sem);
 
        return 0;
@@ -2958,17 +2927,16 @@ static int lfsck_layout_slave_prep(const struct lu_env *env,
                                   struct lfsck_component *com,
                                   struct lfsck_start_param *lsp)
 {
-       struct lfsck_layout             *lo     = com->lc_file_ram;
        struct lfsck_layout_slave_data  *llsd   = com->lc_data;
+       struct lfsck_start              *start  = lsp->lsp_start;
        int                              rc;
 
-       rc = lfsck_layout_prep(env, com);
-       if (rc != 0 || lo->ll_status != LS_SCANNING_PHASE1 ||
-           !lsp->lsp_index_valid)
+       rc = lfsck_layout_prep(env, com, start);
+       if (rc != 0 || !lsp->lsp_index_valid)
                return rc;
 
        rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
-       if (rc == 0 && !(lo->ll_flags & LF_INCOMPLETE)) {
+       if (rc == 0 && start != NULL && start->ls_flags & LPF_ORPHAN) {
                LASSERT(!llsd->llsd_rbtree_valid);
 
                write_lock(&llsd->llsd_rb_lock);
@@ -2991,7 +2959,7 @@ static int lfsck_layout_master_prep(const struct lu_env *env,
        long                             rc;
        ENTRY;
 
-       rc = lfsck_layout_prep(env, com);
+       rc = lfsck_layout_prep(env, com, lsp->lsp_start);
        if (rc != 0)
                RETURN(rc);
 
@@ -3094,6 +3062,9 @@ static int lfsck_layout_scan_stripes(const struct lu_env *env,
                ostid_to_fid(fid, oi, index);
                tgt = lfsck_tgt_get(ltds, index);
                if (unlikely(tgt == NULL)) {
+                       CERROR("%s: Cannot talk with OST %x which is not join "
+                              "the layout LFSCK.\n",
+                              lfsck_lfsck2name(lfsck), index);
                        lo->ll_flags |= LF_INCOMPLETE;
                        goto next;
                }
@@ -3755,6 +3726,7 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env,
 
        if (unlikely(lo->ll_status != LS_SCANNING_PHASE2)) {
                lfsck_rbtree_cleanup(env, com);
+               lfsck_layout_slave_notify_master(env, com, LE_PHASE2_DONE, 0);
                RETURN(0);
        }
 
@@ -3800,6 +3772,7 @@ done:
        rc = lfsck_layout_double_scan_result(env, com, rc);
 
        lfsck_rbtree_cleanup(env, com);
+       lfsck_layout_slave_notify_master(env, com, LE_PHASE2_DONE, rc);
        if (atomic_dec_and_test(&lfsck->li_double_scan_count))
                wake_up_all(&lfsck->li_thread.t_ctl_waitq);
 
@@ -3913,11 +3886,12 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env,
        struct lfsck_layout_master_data *llmd  = com->lc_data;
        struct lfsck_tgt_descs          *ltds;
        struct lfsck_tgt_desc           *ltd;
+       bool                             fail  = false;
        ENTRY;
 
        if (lr->lr_event != LE_PHASE1_DONE &&
            lr->lr_event != LE_PHASE2_DONE &&
-           lr->lr_event != LE_STOP)
+           lr->lr_event != LE_PEER_EXIT)
                RETURN(-EINVAL);
 
        if (lr->lr_flags & LEF_FROM_OST)
@@ -3938,7 +3912,12 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env,
                if (lr->lr_status <= 0) {
                        ltd->ltd_layout_done = 1;
                        list_del_init(&ltd->ltd_layout_list);
+                       CWARN("%s: %s %x failed/stopped at phase1: rc = %d.\n",
+                             lfsck_lfsck2name(lfsck),
+                             (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
+                             ltd->ltd_index, lr->lr_status);
                        lo->ll_flags |= LF_INCOMPLETE;
+                       fail = true;
                        break;
                }
 
@@ -3960,23 +3939,29 @@ static int lfsck_layout_master_in_notify(const struct lu_env *env,
                ltd->ltd_layout_done = 1;
                list_del_init(&ltd->ltd_layout_list);
                break;
-       case LE_STOP:
+       case LE_PEER_EXIT:
+               fail = true;
                ltd->ltd_layout_done = 1;
                list_del_init(&ltd->ltd_layout_list);
-               if (!(lr->lr_flags & LEF_FORCE_STOP))
+               if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) {
+                       CWARN("%s: the peer %s %x exit layout LFSCK.\n",
+                             lfsck_lfsck2name(lfsck),
+                             (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
+                             ltd->ltd_index);
                        lo->ll_flags |= LF_INCOMPLETE;
+               }
                break;
        default:
                break;
        }
        spin_unlock(&ltds->ltd_lock);
 
-       if (lr->lr_flags & LEF_FORCE_STOP) {
+       if (fail && lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT) {
                struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
 
                memset(stop, 0, sizeof(*stop));
                stop->ls_status = lr->lr_status;
-               stop->ls_flags = lr->lr_param;
+               stop->ls_flags = lr->lr_param & ~LPF_BROADCAST;
                lfsck_stop(env, lfsck->li_bottom, stop);
        } else if (lfsck_layout_master_to_orphan(llmd)) {
                wake_up_all(&llmd->llmd_thread.t_ctl_waitq);
@@ -4000,8 +3985,7 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env,
                RETURN(0);
        }
 
-       if (lr->lr_event != LE_PHASE2_DONE &&
-           lr->lr_event != LE_STOP)
+       if (lr->lr_event != LE_PHASE2_DONE && lr->lr_event != LE_PEER_EXIT)
                RETURN(-EINVAL);
 
        llst = lfsck_layout_llst_find_and_del(llsd, lr->lr_index);
@@ -4009,23 +3993,17 @@ static int lfsck_layout_slave_in_notify(const struct lu_env *env,
                RETURN(-ENODEV);
 
        lfsck_layout_llst_put(llst);
-       if (list_empty(&llsd->llsd_master_list)) {
-               switch (lr->lr_event) {
-               case LE_PHASE2_DONE:
-                       wake_up_all(&lfsck->li_thread.t_ctl_waitq);
-                       break;
-               case LE_STOP: {
-                       struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
+       if (list_empty(&llsd->llsd_master_list))
+               wake_up_all(&lfsck->li_thread.t_ctl_waitq);
 
-                       memset(stop, 0, sizeof(*stop));
-                       stop->ls_status = lr->lr_status;
-                       stop->ls_flags = lr->lr_param;
-                       lfsck_stop(env, lfsck->li_bottom, stop);
-                       break;
-               }
-               default:
-                       break;
-               }
+       if (lr->lr_event == LE_PEER_EXIT &&
+           lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT) {
+               struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
+
+               memset(stop, 0, sizeof(*stop));
+               stop->ls_status = lr->lr_status;
+               stop->ls_flags = lr->lr_param & ~LPF_BROADCAST;
+               lfsck_stop(env, lfsck->li_bottom, stop);
        }
 
        RETURN(0);
@@ -4051,36 +4029,43 @@ static int lfsck_layout_master_stop_notify(const struct lu_env *env,
        struct lfsck_instance             *lfsck = com->lc_lfsck;
        int                                rc;
 
-       LASSERT(list_empty(&ltd->ltd_layout_list));
-       LASSERT(list_empty(&ltd->ltd_layout_phase_list));
+       spin_lock(&ltds->ltd_lock);
+       if (list_empty(&ltd->ltd_layout_list)) {
+               LASSERT(list_empty(&ltd->ltd_layout_phase_list));
+               spin_unlock(&ltds->ltd_lock);
+
+               return 0;
+       }
+
+       list_del_init(&ltd->ltd_layout_phase_list);
+       list_del_init(&ltd->ltd_layout_list);
+       spin_unlock(&ltds->ltd_lock);
 
        memset(lr, 0, sizeof(*lr));
        lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
-       lr->lr_event = LE_STOP;
+       lr->lr_event = LE_PEER_EXIT;
        lr->lr_active = LT_LAYOUT;
-       if (ltds == &lfsck->li_ost_descs) {
+       lr->lr_status = LS_CO_PAUSED;
+       if (ltds == &lfsck->li_ost_descs)
                lr->lr_flags = LEF_TO_OST;
-       } else {
-               if (ltd->ltd_index == lfsck_dev_idx(lfsck->li_bottom))
-                       return 0;
-
-               lr->lr_flags = 0;
-       }
-       lr->lr_status = LS_CO_STOPPED;
 
        laia->laia_com = com;
        laia->laia_ltds = ltds;
+       atomic_inc(&ltd->ltd_ref);
        laia->laia_ltd = ltd;
        laia->laia_lr = lr;
+       laia->laia_shared = 0;
 
        rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
                                 lfsck_layout_master_async_interpret,
                                 laia, LFSCK_NOTIFY);
-       if (rc != 0)
+       if (rc != 0) {
                CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n",
                       lfsck_lfsck2name(lfsck),
                       (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
                       ltd->ltd_index, rc);
+               lfsck_tgt_put(ltd);
+       }
 
        return rc;
 }
@@ -4098,7 +4083,8 @@ static int lfsck_layout_slave_join(const struct lu_env *env,
        ENTRY;
 
        if (!lsp->lsp_index_valid || start == NULL ||
-           !(start->ls_flags & LPF_ALL_MDT))
+           !(start->ls_flags & LPF_ALL_TGT) ||
+           !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT))
                RETURN(-EALREADY);
 
        spin_unlock(&lfsck->li_lock);
@@ -4124,6 +4110,7 @@ static struct lfsck_operations lfsck_layout_master_ops = {
        .lfsck_exec_oit         = lfsck_layout_master_exec_oit,
        .lfsck_exec_dir         = lfsck_layout_exec_dir,
        .lfsck_post             = lfsck_layout_master_post,
+       .lfsck_interpret        = lfsck_layout_master_async_interpret,
        .lfsck_dump             = lfsck_layout_dump,
        .lfsck_double_scan      = lfsck_layout_master_double_scan,
        .lfsck_data_release     = lfsck_layout_master_data_release,
index 3fa1f93..c2f2b50 100644 (file)
@@ -966,6 +966,32 @@ int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
        return result;
 }
 
+static void lfsck_interpret(const struct lu_env *env,
+                           struct lfsck_instance *lfsck,
+                           struct ptlrpc_request *req, void *args, int result)
+{
+       struct lfsck_async_interpret_args *laia = args;
+       struct lfsck_component            *com;
+
+       LASSERT(laia->laia_shared);
+
+       spin_lock(&lfsck->li_lock);
+       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+               if (com->lc_ops->lfsck_interpret != NULL) {
+                       laia->laia_com = com;
+                       com->lc_ops->lfsck_interpret(env, req, laia, result);
+               }
+       }
+
+       list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
+               if (com->lc_ops->lfsck_interpret != NULL) {
+                       laia->laia_com = com;
+                       com->lc_ops->lfsck_interpret(env, req, laia, result);
+               }
+       }
+       spin_unlock(&lfsck->li_lock);
+}
+
 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
 {
        struct lfsck_component *com;
@@ -991,46 +1017,46 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
        return rc1 != 0 ? rc1 : rc;
 }
 
-int lfsck_stop_notify(const struct lu_env *env, struct lfsck_instance *lfsck,
-                     struct lfsck_tgt_descs *ltds, struct lfsck_tgt_desc *ltd)
+static int lfsck_stop_notify(const struct lu_env *env,
+                            struct lfsck_instance *lfsck,
+                            struct lfsck_tgt_descs *ltds,
+                            struct lfsck_tgt_desc *ltd, __u16 type)
 {
        struct ptlrpc_request_set *set;
        struct lfsck_component    *com;
-       int                        cnt = 0;
        int                        rc  = 0;
-       int                        rc1 = 0;
+       ENTRY;
 
-       set = ptlrpc_prep_set();
-       if (set == NULL)
-               return -ENOMEM;
+       spin_lock(&lfsck->li_lock);
+       com = __lfsck_component_find(lfsck, type, &lfsck->li_list_scan);
+       if (com == NULL)
+               com = __lfsck_component_find(lfsck, type,
+                                            &lfsck->li_list_double_scan);
+       if (com != NULL)
+               lfsck_component_get(com);
+       spin_lock(&lfsck->li_lock);
 
-       list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
+       if (com != NULL) {
                if (com->lc_ops->lfsck_stop_notify != NULL) {
-                       rc = com->lc_ops->lfsck_stop_notify(env, com, ltds,
-                                                           ltd, set);
-                       if (rc != 0)
-                               rc1 = rc;
-                       else
-                               cnt++;
-               }
-       }
+                       set = ptlrpc_prep_set();
+                       if (set == NULL) {
+                               lfsck_component_put(env, com);
+
+                               RETURN(-ENOMEM);
+                       }
 
-       list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
-               if (com->lc_ops->lfsck_stop_notify != NULL) {
                        rc = com->lc_ops->lfsck_stop_notify(env, com, ltds,
                                                            ltd, set);
-                       if (rc != 0)
-                               rc1 = rc;
-                       else
-                               cnt++;
+                       if (rc == 0)
+                               rc = ptlrpc_set_wait(set);
+
+                       ptlrpc_set_destroy(set);
                }
-       }
 
-       if (cnt > 0)
-               rc = ptlrpc_set_wait(set);
-       ptlrpc_set_destroy(set);
+               lfsck_component_put(env, com);
+       }
 
-       return rc1 != 0 ? rc1 : rc;
+       RETURN(rc);
 }
 
 void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
@@ -1051,6 +1077,23 @@ void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
        }
 }
 
+static int lfsck_async_interpret(const struct lu_env *env,
+                                struct ptlrpc_request *req,
+                                void *args, int rc)
+{
+       struct lfsck_async_interpret_args *laia = args;
+       struct lfsck_instance             *lfsck;
+
+       lfsck = container_of0(laia->laia_ltds, struct lfsck_instance,
+                             li_mdt_descs);
+       lfsck_interpret(env, lfsck, req, laia, rc);
+       lfsck_tgt_put(laia->laia_ltd);
+       if (rc != 0 && laia->laia_result != -EALREADY)
+               laia->laia_result = rc;
+
+       return 0;
+}
+
 int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
                        struct lfsck_request *lr,
                        struct ptlrpc_request_set *set,
@@ -1096,7 +1139,8 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
 
        laia = ptlrpc_req_async_args(req);
        *laia = *(struct lfsck_async_interpret_args *)args;
-       lfsck_component_get(laia->laia_com);
+       if (laia->laia_com != NULL)
+               lfsck_component_get(laia->laia_com);
        req->rq_interpret_reply = interpreter;
        ptlrpc_set_add_req(set, req);
 
@@ -1256,6 +1300,195 @@ int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
 }
 EXPORT_SYMBOL(lfsck_dump);
 
+static int lfsck_stop_all(const struct lu_env *env,
+                         struct lfsck_instance *lfsck,
+                         struct lfsck_stop *stop)
+{
+       struct lfsck_thread_info          *info   = lfsck_env_info(env);
+       struct lfsck_request              *lr     = &info->lti_lr;
+       struct lfsck_async_interpret_args *laia   = &info->lti_laia;
+       struct ptlrpc_request_set         *set;
+       struct lfsck_tgt_descs            *ltds   = &lfsck->li_mdt_descs;
+       struct lfsck_tgt_desc             *ltd;
+       struct lfsck_bookmark             *bk     = &lfsck->li_bookmark_ram;
+       __u32                              idx;
+       int                                rc     = 0;
+       int                                rc1    = 0;
+       ENTRY;
+
+       LASSERT(stop->ls_flags & LPF_BROADCAST);
+
+       set = ptlrpc_prep_set();
+       if (unlikely(set == NULL)) {
+               CERROR("%s: cannot allocate memory for stop LFSCK on "
+                      "all targets\n", lfsck_lfsck2name(lfsck));
+
+               RETURN(-ENOMEM);
+       }
+
+       memset(lr, 0, sizeof(*lr));
+       lr->lr_event = LE_STOP;
+       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
+       lr->lr_status = stop->ls_status;
+       lr->lr_version = bk->lb_version;
+       lr->lr_active = LFSCK_TYPES_ALL;
+       lr->lr_param = stop->ls_flags;
+
+       laia->laia_com = NULL;
+       laia->laia_ltds = ltds;
+       laia->laia_lr = lr;
+       laia->laia_result = 0;
+       laia->laia_shared = 1;
+
+       down_read(&ltds->ltd_rw_sem);
+       cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+               ltd = lfsck_tgt_get(ltds, idx);
+               LASSERT(ltd != NULL);
+
+               laia->laia_ltd = ltd;
+               rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
+                                        lfsck_async_interpret, laia,
+                                        LFSCK_NOTIFY);
+               if (rc != 0) {
+                       lfsck_interpret(env, lfsck, NULL, laia, rc);
+                       lfsck_tgt_put(ltd);
+                       CWARN("%s: cannot notify MDT %x for LFSCK stop: "
+                             "rc = %d\n", lfsck_lfsck2name(lfsck), idx, rc);
+                       rc1 = rc;
+               }
+       }
+       up_read(&ltds->ltd_rw_sem);
+
+       rc = ptlrpc_set_wait(set);
+       ptlrpc_set_destroy(set);
+
+       if (rc == 0)
+               rc = laia->laia_result;
+
+       if (rc == -EALREADY)
+               rc = 0;
+
+       if (rc != 0)
+               CWARN("%s: fail to stop LFSCK on some MDTs: rc = %d\n",
+                     lfsck_lfsck2name(lfsck), rc);
+
+       RETURN(rc != 0 ? rc : rc1);
+}
+
+static int lfsck_start_all(const struct lu_env *env,
+                          struct lfsck_instance *lfsck,
+                          struct lfsck_start *start)
+{
+       struct lfsck_thread_info          *info   = lfsck_env_info(env);
+       struct lfsck_request              *lr     = &info->lti_lr;
+       struct lfsck_async_interpret_args *laia   = &info->lti_laia;
+       struct ptlrpc_request_set         *set;
+       struct lfsck_tgt_descs            *ltds   = &lfsck->li_mdt_descs;
+       struct lfsck_tgt_desc             *ltd;
+       struct lfsck_bookmark             *bk     = &lfsck->li_bookmark_ram;
+       __u32                              idx;
+       int                                rc     = 0;
+       ENTRY;
+
+       LASSERT(start->ls_flags & LPF_BROADCAST);
+
+       set = ptlrpc_prep_set();
+       if (unlikely(set == NULL)) {
+               if (bk->lb_param & LPF_FAILOUT) {
+                       CERROR("%s: cannot allocate memory for start LFSCK on "
+                              "all targets, failout.\n",
+                              lfsck_lfsck2name(lfsck));
+
+                       RETURN(-ENOMEM);
+               } else {
+                       CWARN("%s: cannot allocate memory for start LFSCK on "
+                             "all targets, partly scan.\n",
+                             lfsck_lfsck2name(lfsck));
+
+                       RETURN(0);
+               }
+       }
+
+       memset(lr, 0, sizeof(*lr));
+       lr->lr_event = LE_START;
+       lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
+       lr->lr_speed = bk->lb_speed_limit;
+       lr->lr_version = bk->lb_version;
+       lr->lr_active = start->ls_active;
+       lr->lr_param = start->ls_flags;
+       lr->lr_async_windows = bk->lb_async_windows;
+       lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
+                      LSV_ASYNC_WINDOWS;
+
+       laia->laia_com = NULL;
+       laia->laia_ltds = ltds;
+       laia->laia_lr = lr;
+       laia->laia_result = 0;
+       laia->laia_shared = 1;
+
+       down_read(&ltds->ltd_rw_sem);
+       cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+               ltd = lfsck_tgt_get(ltds, idx);
+               LASSERT(ltd != NULL);
+
+               laia->laia_ltd = ltd;
+               ltd->ltd_layout_done = 0;
+               rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
+                                        lfsck_async_interpret, laia,
+                                        LFSCK_NOTIFY);
+               if (rc != 0) {
+                       lfsck_interpret(env, lfsck, NULL, laia, rc);
+                       lfsck_tgt_put(ltd);
+                       if (bk->lb_param & LPF_FAILOUT) {
+                               CERROR("%s: cannot notify MDT %x for LFSCK "
+                                      "start, failout: rc = %d\n",
+                                      lfsck_lfsck2name(lfsck), idx, rc);
+                               break;
+                       } else {
+                               CWARN("%s: cannot notify MDT %x for LFSCK "
+                                     "start, partly scan: rc = %d\n",
+                                     lfsck_lfsck2name(lfsck), idx, rc);
+                               rc = 0;
+                       }
+               }
+       }
+       up_read(&ltds->ltd_rw_sem);
+
+       if (rc != 0) {
+               ptlrpc_set_destroy(set);
+
+               RETURN(rc);
+       }
+
+       rc = ptlrpc_set_wait(set);
+       ptlrpc_set_destroy(set);
+
+       if (rc == 0)
+               rc = laia->laia_result;
+
+       if (rc != 0) {
+               if (bk->lb_param & LPF_FAILOUT) {
+                       struct lfsck_stop *stop = &info->lti_stop;
+
+                       CERROR("%s: cannot start LFSCK on some MDTs, "
+                              "stop all: rc = %d\n",
+                              lfsck_lfsck2name(lfsck), rc);
+                       if (rc != -EALREADY) {
+                               stop->ls_status = LS_FAILED;
+                               stop->ls_flags = LPF_ALL_TGT | LPF_BROADCAST;
+                               lfsck_stop_all(env, lfsck, stop);
+                       }
+               } else {
+                       CWARN("%s: cannot start LFSCK on some MDTs, "
+                             "partly scan: rc = %d\n",
+                             lfsck_lfsck2name(lfsck), rc);
+                       rc = 0;
+               }
+       }
+
+       RETURN(rc);
+}
+
 int lfsck_start(const struct lu_env *env, struct dt_device *key,
                struct lfsck_start_param *lsp)
 {
@@ -1277,6 +1510,10 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        if (unlikely(lfsck == NULL))
                RETURN(-ENODEV);
 
+       /* System is not ready, try again later. */
+       if (unlikely(lfsck->li_namespace == NULL))
+               GOTO(put, rc = -EAGAIN);
+
        /* start == NULL means auto trigger paused LFSCK. */
        if ((start == NULL) &&
            (cfs_list_empty(&lfsck->li_list_scan) ||
@@ -1318,9 +1555,9 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        }
        spin_unlock(&lfsck->li_lock);
 
-       lfsck->li_namespace = lsp->lsp_namespace;
        lfsck->li_status = 0;
        lfsck->li_oit_over = 0;
+       lfsck->li_start_unplug = 0;
        lfsck->li_drop_dryrun = 0;
        lfsck->li_new_scanned = 0;
 
@@ -1328,6 +1565,13 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
        if (start == NULL)
                goto trigger;
 
+       if (start->ls_flags & LPF_BROADCAST && !lfsck->li_master) {
+               CERROR("%s: only allow to specify '-A | -o' via MDS\n",
+                      lfsck_lfsck2name(lfsck));
+
+               GOTO(out, rc = -EPERM);
+       }
+
        start->ls_version = bk->lb_version;
        if (start->ls_valid & LSV_SPEED_LIMIT) {
                __lfsck_set_speed(lfsck, start->ls_speed_limit);
@@ -1373,13 +1617,23 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key,
                }
        }
 
-       if (bk->lb_param & LPF_ALL_MDT &&
-           !(start->ls_flags & LPF_ALL_MDT)) {
-               bk->lb_param &= ~LPF_ALL_MDT;
+       if (bk->lb_param & LPF_ALL_TGT &&
+           !(start->ls_flags & LPF_ALL_TGT)) {
+               bk->lb_param &= ~LPF_ALL_TGT;
+               dirty = true;
+       } else if (!(bk->lb_param & LPF_ALL_TGT) &&
+                  start->ls_flags & LPF_ALL_TGT) {
+               bk->lb_param |= LPF_ALL_TGT;
+               dirty = true;
+       }
+
+       if (bk->lb_param & LPF_ORPHAN &&
+           !(start->ls_flags & LPF_ORPHAN)) {
+               bk->lb_param &= ~LPF_ORPHAN;
                dirty = true;
-       } else if (!(bk->lb_param & LPF_ALL_MDT) &&
-                  start->ls_flags & LPF_ALL_MDT) {
-               bk->lb_param |= LPF_ALL_MDT;
+       } else if (!(bk->lb_param & LPF_ORPHAN) &&
+                  start->ls_flags & LPF_ORPHAN) {
+               bk->lb_param |= LPF_ORPHAN;
                dirty = true;
        }
 
@@ -1467,21 +1721,54 @@ trigger:
                CERROR("%s: cannot start LFSCK thread: rc = %ld\n",
                       lfsck_lfsck2name(lfsck), rc);
                lfsck_thread_args_fini(lta);
+
+               GOTO(out, rc);
+       }
+
+       l_wait_event(thread->t_ctl_waitq,
+                    thread_is_running(thread) ||
+                    thread_is_stopped(thread),
+                    &lwi);
+       if (start == NULL || !(start->ls_flags & LPF_BROADCAST)) {
+               lfsck->li_start_unplug = 1;
+               wake_up_all(&thread->t_ctl_waitq);
+
+               GOTO(out, rc = 0);
+       }
+
+       /* release lfsck::li_mutex to avoid deadlock. */
+       mutex_unlock(&lfsck->li_mutex);
+       rc = lfsck_start_all(env, lfsck, start);
+       if (rc != 0) {
+               spin_lock(&lfsck->li_lock);
+               if (thread_is_stopped(thread)) {
+                       spin_unlock(&lfsck->li_lock);
+               } else {
+                       lfsck->li_status = LS_FAILED;
+                       lfsck->li_flags = 0;
+                       thread_set_flags(thread, SVC_STOPPING);
+                       spin_unlock(&lfsck->li_lock);
+
+                       lfsck->li_start_unplug = 1;
+                       wake_up_all(&thread->t_ctl_waitq);
+                       l_wait_event(thread->t_ctl_waitq,
+                                    thread_is_stopped(thread),
+                                    &lwi);
+               }
        } else {
-               rc = 0;
-               l_wait_event(thread->t_ctl_waitq,
-                            thread_is_running(thread) ||
-                            thread_is_stopped(thread),
-                            &lwi);
+               lfsck->li_start_unplug = 1;
+               wake_up_all(&thread->t_ctl_waitq);
        }
 
-       GOTO(out, rc);
+       GOTO(put, rc);
 
 out:
        mutex_unlock(&lfsck->li_mutex);
+
 put:
        lfsck_instance_put(env, lfsck);
-       return (rc < 0 ? rc : 0);
+
+       return rc < 0 ? rc : 0;
 }
 EXPORT_SYMBOL(lfsck_start);
 
@@ -1492,6 +1779,7 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
        struct ptlrpc_thread    *thread;
        struct l_wait_info       lwi    = { 0 };
        int                      rc     = 0;
+       int                      rc1    = 0;
        ENTRY;
 
        lfsck = lfsck_instance_find(key, true, false);
@@ -1499,6 +1787,18 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key,
                RETURN(-ENODEV);
 
        thread = &lfsck->li_thread;
+       /* release lfsck::li_mutex to avoid deadlock. */
+       if (stop != NULL && stop->ls_flags & LPF_BROADCAST) {
+               if (!lfsck->li_master) {
+                       CERROR("%s: only allow to specify '-A' via MDS\n",
+                              lfsck_lfsck2name(lfsck));
+
+                       GOTO(out, rc = -EPERM);
+               }
+
+               rc1 = lfsck_stop_all(env, lfsck, stop);
+       }
+
        mutex_lock(&lfsck->li_mutex);
        spin_lock(&lfsck->li_lock);
        if (thread_is_init(thread) || thread_is_stopped(thread)) {
@@ -1528,41 +1828,67 @@ out:
        mutex_unlock(&lfsck->li_mutex);
        lfsck_instance_put(env, lfsck);
 
-       return rc;
+       return rc != 0 ? rc : rc1;
 }
 EXPORT_SYMBOL(lfsck_stop);
 
 int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
                    struct lfsck_request *lr)
 {
-       struct lfsck_instance  *lfsck;
-       struct lfsck_component *com;
-       int                     rc;
+       int rc = -EOPNOTSUPP;
        ENTRY;
 
        switch (lr->lr_event) {
-       case LE_STOP:
+       case LE_START: {
+               struct lfsck_start       *start = &lfsck_env_info(env)->lti_start;
+               struct lfsck_start_param  lsp;
+
+               memset(start, 0, sizeof(*start));
+               start->ls_valid = lr->lr_valid;
+               start->ls_speed_limit = lr->lr_speed;
+               start->ls_version = lr->lr_version;
+               start->ls_active = lr->lr_active;
+               start->ls_flags = lr->lr_param & ~LPF_BROADCAST;
+               start->ls_async_windows = lr->lr_async_windows;
+
+               lsp.lsp_start = start;
+               lsp.lsp_index = lr->lr_index;
+               lsp.lsp_index_valid = 1;
+               rc = lfsck_start(env, key, &lsp);
+               break;
+       }
+       case LE_STOP: {
+               struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
+
+               memset(stop, 0, sizeof(*stop));
+               stop->ls_status = lr->lr_status;
+               stop->ls_flags = lr->lr_param & ~LPF_BROADCAST;
+               rc = lfsck_stop(env, key, stop);
+               break;
+       }
        case LE_PHASE1_DONE:
        case LE_PHASE2_DONE:
        case LE_FID_ACCESSED:
-               break;
-       default:
-               RETURN(-EOPNOTSUPP);
-       }
+       case LE_PEER_EXIT: {
+               struct lfsck_instance  *lfsck;
+               struct lfsck_component *com;
 
-       lfsck = lfsck_instance_find(key, true, false);
-       if (unlikely(lfsck == NULL))
-               RETURN(-ENODEV);
+               lfsck = lfsck_instance_find(key, true, false);
+               if (unlikely(lfsck == NULL))
+                       RETURN(-ENODEV);
 
-       com = lfsck_component_find(lfsck, lr->lr_active);
-       if (likely(com != NULL)) {
-               rc = com->lc_ops->lfsck_in_notify(env, com, lr);
-               lfsck_component_put(env, com);
-       } else {
-               rc = -ENOTSUPP;
-       }
+               com = lfsck_component_find(lfsck, lr->lr_active);
+               if (likely(com != NULL)) {
+                       rc = com->lc_ops->lfsck_in_notify(env, com, lr);
+                       lfsck_component_put(env, com);
+               }
 
-       lfsck_instance_put(env, lfsck);
+               lfsck_instance_put(env, lfsck);
+               break;
+       }
+       default:
+               break;
+       }
 
        RETURN(rc);
 }
@@ -1594,6 +1920,23 @@ int lfsck_query(const struct lu_env *env, struct dt_device *key,
 }
 EXPORT_SYMBOL(lfsck_query);
 
+int lfsck_register_namespace(const struct lu_env *env, struct dt_device *key,
+                            struct ldlm_namespace *ns)
+{
+       struct lfsck_instance  *lfsck;
+       int                     rc      = -ENODEV;
+
+       lfsck = lfsck_instance_find(key, true, false);
+       if (likely(lfsck != NULL)) {
+               lfsck->li_namespace = ns;
+               lfsck_instance_put(env, lfsck);
+               rc = 0;
+       }
+
+       return rc;
+}
+EXPORT_SYMBOL(lfsck_register_namespace);
+
 int lfsck_register(const struct lu_env *env, struct dt_device *key,
                   struct dt_device *next, struct obd_device *obd,
                   lfsck_out_notify notify, void *notify_data, bool master)
@@ -1774,10 +2117,8 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
 {
        struct lfsck_instance   *lfsck;
        struct lfsck_tgt_descs  *ltds;
-       struct lfsck_tgt_desc   *ltd;
+       struct lfsck_tgt_desc   *ltd    = NULL;
        struct list_head        *head;
-       bool                     found = false;
-       bool                     stop  = false;
 
        if (for_ost)
                head = &lfsck_ost_orphan_list;
@@ -1806,7 +2147,6 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
                ltds = &lfsck->li_mdt_descs;
 
        down_write(&ltds->ltd_rw_sem);
-
        LASSERT(ltds->ltd_tgts_bitmap != NULL);
 
        if (unlikely(index >= ltds->ltd_tgts_bitmap->size))
@@ -1816,30 +2156,14 @@ void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
        if (unlikely(ltd == NULL))
                goto unlock;
 
-       found = true;
-       spin_lock(&ltds->ltd_lock);
-       ltd->ltd_dead = 1;
-       if (!list_empty(&ltd->ltd_layout_list)) {
-               list_del_init(&ltd->ltd_layout_phase_list);
-               list_del_init(&ltd->ltd_layout_list);
-               stop = true;
-       } else {
-               LASSERT(list_empty(&ltd->ltd_layout_phase_list));
-       }
-       spin_unlock(&ltds->ltd_lock);
-
-       if (stop && lfsck->li_master)
-               lfsck_stop_notify(env, lfsck, ltds, ltd);
-
        LASSERT(ltds->ltd_tgtnr > 0);
 
        ltds->ltd_tgtnr--;
        cfs_bitmap_clear(ltds->ltd_tgts_bitmap, index);
        LTD_TGT(ltds, index) = NULL;
-       lfsck_tgt_put(ltd);
 
 unlock:
-       if (!found) {
+       if (ltd == NULL) {
                if (for_ost)
                        head = &lfsck->li_ost_descs.ltd_orphan;
                else
@@ -1848,13 +2172,20 @@ unlock:
                list_for_each_entry(ltd, head, ltd_orphan_list) {
                        if (ltd->ltd_tgt == tgt) {
                                list_del_init(&ltd->ltd_orphan_list);
-                               lfsck_tgt_put(ltd);
                                break;
                        }
                }
        }
 
        up_write(&ltds->ltd_rw_sem);
+       if (ltd != NULL) {
+               spin_lock(&ltds->ltd_lock);
+               ltd->ltd_dead = 1;
+               spin_unlock(&ltds->ltd_lock);
+               lfsck_stop_notify(env, lfsck, ltds, ltd, LT_LAYOUT);
+               lfsck_tgt_put(ltd);
+       }
+
        lfsck_instance_put(env, lfsck);
 }
 EXPORT_SYMBOL(lfsck_del_target);
@@ -1868,7 +2199,6 @@ static int __init lfsck_init(void)
        lfsck_key_init_generic(&lfsck_thread_key, NULL);
        rc = lu_context_key_register(&lfsck_thread_key);
        if (rc == 0) {
-               tgt_register_lfsck_start(lfsck_start);
                tgt_register_lfsck_in_notify(lfsck_in_notify);
                tgt_register_lfsck_query(lfsck_query);
        }
index 17340d0..5aad150 100644 (file)
@@ -4749,7 +4749,9 @@ static int mdt_prepare(const struct lu_env *env,
        if (rc)
                RETURN(rc);
 
-       lsp.lsp_namespace = mdt->mdt_namespace;
+       rc = lfsck_register_namespace(env, mdt->mdt_bottom, mdt->mdt_namespace);
+       LASSERTF(rc == 0, "register namespace failed: rc = %d\n", rc);
+
        lsp.lsp_start = NULL;
        lsp.lsp_index_valid = 0;
        rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child,
@@ -5608,7 +5610,6 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                        break;
                }
 
-               lsp.lsp_namespace = mdt->mdt_namespace;
                lsp.lsp_start = (struct lfsck_start *)(data->ioc_inlbuf1);
                lsp.lsp_index_valid = 0;
                rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0, &lsp);
@@ -5617,11 +5618,17 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
        case OBD_IOC_STOP_LFSCK: {
                struct md_device        *next = mdt->mdt_child;
                struct obd_ioctl_data   *data = karg;
-               struct lfsck_stop       *stop =
-                               (struct lfsck_stop *)(data->ioc_inlbuf1);
+               struct lfsck_stop        stop;
+
+               stop.ls_status = LS_STOPPED;
+               /* Old lfsck utils may pass NULL @stop. */
+               if (data->ioc_inlbuf1 == NULL)
+                       stop.ls_flags = 0;
+               else
+                       stop.ls_flags =
+                       ((struct lfsck_stop *)(data->ioc_inlbuf1))->ls_flags;
 
-               stop->ls_status = LS_STOPPED;
-               rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0, stop);
+               rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0, &stop);
                break;
        }
         case OBD_IOC_GET_OBJ_VERSION: {
index 6782e5d..24f767b 100644 (file)
@@ -442,7 +442,9 @@ static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev,
                RETURN(rc);
        }
 
-       lsp.lsp_namespace = ofd->ofd_namespace;
+       rc = lfsck_register_namespace(env, ofd->ofd_osd, ofd->ofd_namespace);
+       LASSERTF(rc == 0, "register namespace failed: rc = %d\n", rc);
+
        lsp.lsp_start = NULL;
        lsp.lsp_index_valid = 0;
        rc = lfsck_start(env, ofd->ofd_osd, &lsp);
index a5e7a2d..e3491db 100644 (file)
@@ -1023,7 +1023,6 @@ int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                        break;
                }
 
-               lsp.lsp_namespace = ofd->ofd_namespace;
                lsp.lsp_start = (struct lfsck_start *)(data->ioc_inlbuf1);
                lsp.lsp_index_valid = 0;
                rc = lfsck_start(&env, ofd->ofd_osd, &lsp);
@@ -1031,9 +1030,17 @@ int ofd_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
        }
        case OBD_IOC_STOP_LFSCK: {
                struct obd_ioctl_data *data = karg;
+               struct lfsck_stop      stop;
 
-               rc = lfsck_stop(&env, ofd->ofd_osd,
-                               (struct lfsck_stop *)(data->ioc_inlbuf1));
+               stop.ls_status = LS_STOPPED;
+               /* Old lfsck utils may pass NULL @stop. */
+               if (data->ioc_inlbuf1 == NULL)
+                       stop.ls_flags = 0;
+               else
+                       stop.ls_flags =
+                       ((struct lfsck_stop *)(data->ioc_inlbuf1))->ls_flags;
+
+               rc = lfsck_stop(&env, ofd->ofd_osd, &stop);
                break;
        }
        case OBD_IOC_GET_OBJ_VERSION:
index 0883c4f..a69d4c5 100644 (file)
@@ -1363,18 +1363,6 @@ TGT_SEC_HDL_VAR(0,       SEC_CTX_FINI,           tgt_sec_ctx_handle),
 };
 EXPORT_SYMBOL(tgt_sec_ctx_handlers);
 
-static int (*tgt_lfsck_start)(const struct lu_env *env,
-                             struct dt_device *key,
-                             struct lfsck_start_param *lsp) = NULL;
-
-void tgt_register_lfsck_start(int (*start)(const struct lu_env *,
-                                          struct dt_device *,
-                                          struct lfsck_start_param *))
-{
-       tgt_lfsck_start = start;
-}
-EXPORT_SYMBOL(tgt_register_lfsck_start);
-
 int (*tgt_lfsck_in_notify)(const struct lu_env *env,
                           struct dt_device *key,
                           struct lfsck_request *lr) = NULL;
@@ -1412,39 +1400,7 @@ static int tgt_handle_lfsck_notify(struct tgt_session_info *tsi)
        if (lr == NULL)
                RETURN(-EPROTO);
 
-       switch (lr->lr_event) {
-       case LE_START: {
-               struct lfsck_start       start;
-               struct lfsck_start_param lsp;
-
-               start.ls_valid = lr->lr_valid;
-               start.ls_speed_limit = lr->lr_speed;
-               start.ls_version = lr->lr_version;
-               start.ls_active = lr->lr_active;
-               start.ls_flags = lr->lr_param;
-               start.ls_async_windows = lr->lr_async_windows;
-
-               lsp.lsp_namespace = tsi->tsi_exp->exp_obd->obd_namespace;
-               lsp.lsp_start = &start;
-               lsp.lsp_index = lr->lr_index;
-               if (lr->lr_flags & LEF_TO_OST)
-                       lsp.lsp_index_valid = 1;
-               else
-                       lsp.lsp_index_valid = 0;
-               rc = tgt_lfsck_start(env, key, &lsp);
-               break;
-       }
-       case LE_STOP:
-       case LE_PHASE1_DONE:
-       case LE_PHASE2_DONE:
-               rc = tgt_lfsck_in_notify(env, key, lr);
-               break;
-       default:
-               CERROR("%s: unsupported lfsck_event: rc = %d\n",
-                      tgt_name(tsi->tsi_tgt), lr->lr_event);
-               rc = -EOPNOTSUPP;
-               break;
-       }
+       rc = tgt_lfsck_in_notify(env, key, lr);
 
        RETURN(rc);
 }
index 405422f..65b5e7c 100644 (file)
@@ -1182,43 +1182,81 @@ test_12() {
                createmany -o $DIR/${k}/f 100
        done
 
-       echo "Trigger LFSCK on all targets by single command (limited speed)."
-       do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
+       echo "Start namespace LFSCK on all targets by single command (-s 10)."
+       do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
                -s 10 || error "(2) Fail to start LFSCK on all devices!"
 
        echo "All the LFSCK targets should be in 'scanning-phase1' status."
        for k in $(seq $MDSCOUNT); do
                local STATUS=$(do_facet mds${k} $LCTL get_param -n \
-                               mdd.$(facet_svc mds${k}).lfsck_layout |
+                               mdd.$(facet_svc mds${k}).lfsck_namespace |
                                awk '/^status/ { print $2 }')
                [ "$STATUS" == "scanning-phase1" ] ||
                error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
        done
 
-       echo "Stop layout LFSCK on all targets by single lctl command."
+       echo "Stop namespace LFSCK on all targets by single lctl command."
        do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
                error "(4) Fail to stop LFSCK on all devices!"
 
        echo "All the LFSCK targets should be in 'stopped' status."
        for k in $(seq $MDSCOUNT); do
                local STATUS=$(do_facet mds${k} $LCTL get_param -n \
-                               mdd.$(facet_svc mds${k}).lfsck_layout |
+                               mdd.$(facet_svc mds${k}).lfsck_namespace |
                                awk '/^status/ { print $2 }')
                [ "$STATUS" == "stopped" ] ||
                        error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
        done
 
+       echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
+       do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
+               -s 0 -r || error "(6) Fail to start LFSCK on all devices!"
+
+       echo "All the LFSCK targets should be in 'completed' status."
+       for k in $(seq $MDSCOUNT); do
+               wait_update_facet mds${k} "$LCTL get_param -n \
+                       mdd.$(facet_svc mds${k}).lfsck_namespace |
+                       awk '/^status/ { print \\\$2 }'" "completed" 8 ||
+                       error "(7) MDS${k} is not the expected 'completed'"
+       done
+
+       echo "Start layout LFSCK on all targets by single command (-s 10)."
+       do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
+               -s 10 || error "(8) Fail to start LFSCK on all devices!"
+
+       echo "All the LFSCK targets should be in 'scanning-phase1' status."
+       for k in $(seq $MDSCOUNT); do
+               local STATUS=$(do_facet mds${k} $LCTL get_param -n \
+                               mdd.$(facet_svc mds${k}).lfsck_layout |
+                               awk '/^status/ { print $2 }')
+               [ "$STATUS" == "scanning-phase1" ] ||
+               error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
+       done
+
+       echo "Stop layout LFSCK on all targets by single lctl command."
+       do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
+               error "(10) Fail to stop LFSCK on all devices!"
+
+       echo "All the LFSCK targets should be in 'stopped' status."
+       for k in $(seq $MDSCOUNT); do
+               local STATUS=$(do_facet mds${k} $LCTL get_param -n \
+                               mdd.$(facet_svc mds${k}).lfsck_layout |
+                               awk '/^status/ { print $2 }')
+               [ "$STATUS" == "stopped" ] ||
+                       error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
+       done
+
        for k in $(seq $OSTCOUNT); do
                local STATUS=$(do_facet ost${k} $LCTL get_param -n \
                                obdfilter.$(facet_svc ost${k}).lfsck_layout |
                                awk '/^status/ { print $2 }')
                [ "$STATUS" == "stopped" ] ||
-                       error "(6) OST${k} Expect 'stopped', but got '$STATUS'"
+                       error "(12) OST${k} Expect 'stopped', but got '$STATUS'"
        done
 
-       echo "Re-trigger LFSCK on all targets by single command (full speed)."
+       echo "Re-start layout LFSCK on all targets by single command (-s 0)."
        do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t layout -A \
-               -s 0 || error "(7) Fail to start LFSCK on all devices!"
+               -s 0 -r || error "(13) Fail to start LFSCK on all devices!"
 
        echo "All the LFSCK targets should be in 'completed' status."
        for k in $(seq $MDSCOUNT); do
@@ -1228,7 +1266,7 @@ test_12() {
                wait_update_facet mds${k} "$LCTL get_param -n \
                        mdd.$(facet_svc mds${k}).lfsck_layout |
                        awk '/^status/ { print \\\$2 }'" "completed" 32 ||
-                       error "(8) MDS${k} is not the expected 'completed'"
+                       error "(14) MDS${k} is not the expected 'completed'"
        done
 }
 run_test 12 "single command to trigger LFSCK on all devices"
index c3fc3c4..9ee230c 100644 (file)
@@ -369,7 +369,7 @@ command_t cmdlist[] = {
         "                   [-n | --dryrun switch] [-r | --reset]\n"
         "                   [-s | --speed speed_limit] [-A | --all]\n"
         "                   [-t | --type lfsck_type[,lfsck_type...]]\n"
-        "                   [-w | --windows win_size]"},
+        "                   [-w | --windows win_size] [-o | --orphan]"},
        {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
         "usage: lfsck_stop <-M | --device [MDT,OST]_device>\n"
         "                  [-A | --all] [-h | --help]"},
index 0dd7018..4bf8b23 100644 (file)
@@ -55,6 +55,7 @@ static struct option long_opt_start[] = {
        {"all",         no_argument,       0, 'A'},
        {"type",        required_argument, 0, 't'},
        {"windows",     required_argument, 0, 'w'},
+       {"orphan",      no_argument,       0, 'o'},
        {0,             0,                 0,   0}
 };
 
@@ -99,7 +100,7 @@ static void usage_start(void)
                "            [-n | --dryrun switch] [-r | --reset]\n"
                "            [-s | --speed speed_limit] [-A | --all]\n"
                "            [-t | --type lfsck_type[,lfsck_type...]]\n"
-               "            [-w | --windows win_size]\n"
+               "            [-w | --windows win_size] [-o | --orphan]\n"
                "OPTIONS:\n"
                "-M: The device to start LFSCK/scrub on.\n"
                "-e: Error handle, 'continue'(default) or 'abort'.\n"
@@ -110,7 +111,8 @@ static void usage_start(void)
                    "'%d' means no limit (default).\n"
                "-A: Start LFSCK on all MDT devices.\n"
                "-t: The LFSCK type(s) to be started.\n"
-               "-w: The windows size for async requests pipeline.\n",
+               "-w: The windows size for async requests pipeline.\n"
+               "-o: handle orphan objects.\n",
                LFSCK_SPEED_NO_LIMIT);
 }
 
@@ -149,7 +151,7 @@ int jt_lfsck_start(int argc, char **argv)
        char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
        char device[MAX_OBD_NAME];
        struct lfsck_start start;
-       char *optstring = "M:e:hn:rs:At:w:";
+       char *optstring = "M:e:hn:rs:At:w:o";
        int opt, index, rc, val, i, type;
 
        memset(&data, 0, sizeof(data));
@@ -203,7 +205,7 @@ int jt_lfsck_start(int argc, char **argv)
                        start.ls_valid |= LSV_SPEED_LIMIT;
                        break;
                case 'A':
-                       start.ls_flags |= LPF_ALL_MDT | LPF_BROADCAST;
+                       start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
                        break;
                case 't': {
                        char *str = optarg, *p, c;
@@ -262,6 +264,10 @@ int jt_lfsck_start(int argc, char **argv)
                        start.ls_async_windows = val;
                        start.ls_valid |= LSV_ASYNC_WINDOWS;
                        break;
+               case 'o':
+                       start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST |
+                                         LPF_ORPHAN;
+                       break;
                default:
                        fprintf(stderr, "Invalid option, '-h' for help.\n");
                        return -EINVAL;
@@ -340,7 +346,7 @@ int jt_lfsck_stop(int argc, char **argv)
                                return rc;
                        break;
                case 'A':
-                       stop.ls_flags |= LPF_ALL_MDT | LPF_BROADCAST;
+                       stop.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
                        break;
                case 'h':
                        usage_stop();