4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2013, Intel Corporation.
26 * lustre/lfsck/lfsck_layout.c
28 * Author: Fan, Yong <fan.yong@intel.com>
32 # define EXPORT_SYMTAB
34 #define DEBUG_SUBSYSTEM S_LFSCK
36 #include <linux/bitops.h>
38 #include <lustre/lustre_idl.h>
39 #include <lu_object.h>
40 #include <dt_object.h>
41 #include <lustre_linkea.h>
42 #include <lustre_fid.h>
43 #include <lustre_lib.h>
44 #include <lustre_net.h>
45 #include <lustre/lustre_user.h>
46 #include <md_object.h>
47 #include <obd_class.h>
49 #include "lfsck_internal.h"
51 #define LFSCK_LAYOUT_MAGIC 0xB173AE14
53 static const char lfsck_layout_name[] = "lfsck_layout";
55 struct lfsck_layout_seq {
56 struct list_head lls_list;
59 __u64 lls_lastid_known;
60 struct dt_object *lls_lastid_obj;
61 unsigned int lls_dirty:1;
64 struct lfsck_layout_slave_target {
65 /* link into lfsck_layout_slave_data::llsd_master_list. */
66 struct list_head llst_list;
72 struct lfsck_layout_slave_data {
73 /* list for lfsck_layout_seq */
74 struct list_head llsd_seq_list;
76 /* list for the masters involve layout verification. */
77 struct list_head llsd_master_list;
82 struct lfsck_layout_object {
83 struct dt_object *llo_obj;
84 struct lu_attr llo_attr;
89 struct lfsck_layout_req {
90 struct list_head llr_list;
91 struct lfsck_layout_object *llr_parent;
92 struct dt_object *llr_child;
94 __u32 llr_lov_idx; /* offset in LOV EA */
97 struct lfsck_layout_master_data {
99 struct list_head llmd_req_list;
101 /* list for the ost targets involve layout verification. */
102 struct list_head llmd_ost_list;
104 /* list for the ost targets in phase1 scanning. */
105 struct list_head llmd_ost_phase1_list;
107 /* list for the ost targets in phase1 scanning. */
108 struct list_head llmd_ost_phase2_list;
110 /* list for the mdt targets involve layout verification. */
111 struct list_head llmd_mdt_list;
113 /* list for the mdt targets in phase1 scanning. */
114 struct list_head llmd_mdt_phase1_list;
116 /* list for the mdt targets in phase1 scanning. */
117 struct list_head llmd_mdt_phase2_list;
119 struct ptlrpc_thread llmd_thread;
120 __u32 llmd_touch_gen;
122 int llmd_assistant_status;
123 int llmd_post_result;
124 unsigned int llmd_to_post:1,
125 llmd_to_double_scan:1,
126 llmd_in_double_scan:1,
130 struct lfsck_layout_slave_async_args {
131 struct obd_export *llsaa_exp;
132 struct lfsck_component *llsaa_com;
133 struct lfsck_layout_slave_target *llsaa_llst;
136 static struct lfsck_layout_object *
137 lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj,
140 struct lfsck_layout_object *llo;
145 return ERR_PTR(-ENOMEM);
147 rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA);
154 lu_object_get(&obj->do_lu);
156 /* The gen can be used to check whether some others have changed the
157 * file layout after LFSCK pre-fetching but before real verification. */
159 atomic_set(&llo->llo_ref, 1);
165 lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst)
167 if (atomic_dec_and_test(&llst->llst_ref)) {
168 LASSERT(list_empty(&llst->llst_list));
175 lfsck_layout_llst_add(struct lfsck_layout_slave_data *llsd, __u32 index)
177 struct lfsck_layout_slave_target *llst;
178 struct lfsck_layout_slave_target *tmp;
185 INIT_LIST_HEAD(&llst->llst_list);
187 llst->llst_index = index;
188 atomic_set(&llst->llst_ref, 1);
190 spin_lock(&llsd->llsd_lock);
191 list_for_each_entry(tmp, &llsd->llsd_master_list, llst_list) {
192 if (tmp->llst_index == index) {
198 list_add_tail(&llst->llst_list, &llsd->llsd_master_list);
199 spin_unlock(&llsd->llsd_lock);
208 lfsck_layout_llst_del(struct lfsck_layout_slave_data *llsd,
209 struct lfsck_layout_slave_target *llst)
213 spin_lock(&llsd->llsd_lock);
214 if (!list_empty(&llst->llst_list)) {
215 list_del_init(&llst->llst_list);
218 spin_unlock(&llsd->llsd_lock);
221 lfsck_layout_llst_put(llst);
224 static inline struct lfsck_layout_slave_target *
225 lfsck_layout_llst_find_and_del(struct lfsck_layout_slave_data *llsd,
228 struct lfsck_layout_slave_target *llst;
230 spin_lock(&llsd->llsd_lock);
231 list_for_each_entry(llst, &llsd->llsd_master_list, llst_list) {
232 if (llst->llst_index == index) {
233 list_del_init(&llst->llst_list);
234 spin_unlock(&llsd->llsd_lock);
239 spin_unlock(&llsd->llsd_lock);
244 static inline void lfsck_layout_object_put(const struct lu_env *env,
245 struct lfsck_layout_object *llo)
247 if (atomic_dec_and_test(&llo->llo_ref)) {
248 lfsck_object_put(env, llo->llo_obj);
253 static struct lfsck_layout_req *
254 lfsck_layout_req_init(struct lfsck_layout_object *parent,
255 struct dt_object *child, __u32 ost_idx, __u32 lov_idx)
257 struct lfsck_layout_req *llr;
261 return ERR_PTR(-ENOMEM);
263 INIT_LIST_HEAD(&llr->llr_list);
264 atomic_inc(&parent->llo_ref);
265 llr->llr_parent = parent;
266 llr->llr_child = child;
267 llr->llr_ost_idx = ost_idx;
268 llr->llr_lov_idx = lov_idx;
273 static inline void lfsck_layout_req_fini(const struct lu_env *env,
274 struct lfsck_layout_req *llr)
276 lu_object_put(env, &llr->llr_child->do_lu);
277 lfsck_layout_object_put(env, llr->llr_parent);
281 static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd)
285 spin_lock(&llmd->llmd_lock);
286 if (list_empty(&llmd->llmd_req_list))
288 spin_unlock(&llmd->llmd_lock);
293 static int lfsck_layout_get_lovea(const struct lu_env *env,
294 struct dt_object *obj,
295 struct lu_buf *buf, ssize_t *buflen)
300 rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA);
302 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
307 lu_buf_realloc(buf, rc);
309 *buflen = buf->lb_len;
311 if (buf->lb_buf == NULL)
323 if (unlikely(buf->lb_buf == NULL)) {
324 lu_buf_alloc(buf, rc);
326 *buflen = buf->lb_len;
328 if (buf->lb_buf == NULL)
337 static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm)
342 magic = le32_to_cpu(lmm->lmm_magic);
343 /* If magic crashed, keep it there. Sometime later, during OST-object
344 * orphan handling, if some OST-object(s) back-point to it, it can be
345 * verified and repaired. */
346 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
349 patten = le32_to_cpu(lmm->lmm_pattern);
350 /* XXX: currently, we only support LOV_PATTERN_RAID0. */
351 if (patten != LOV_PATTERN_RAID0)
357 static void lfsck_layout_le_to_cpu(struct lfsck_layout *des,
358 const struct lfsck_layout *src)
362 des->ll_magic = le32_to_cpu(src->ll_magic);
363 des->ll_status = le32_to_cpu(src->ll_status);
364 des->ll_flags = le32_to_cpu(src->ll_flags);
365 des->ll_success_count = le32_to_cpu(src->ll_success_count);
366 des->ll_run_time_phase1 = le32_to_cpu(src->ll_run_time_phase1);
367 des->ll_run_time_phase2 = le32_to_cpu(src->ll_run_time_phase2);
368 des->ll_time_last_complete = le64_to_cpu(src->ll_time_last_complete);
369 des->ll_time_latest_start = le64_to_cpu(src->ll_time_latest_start);
370 des->ll_time_last_checkpoint =
371 le64_to_cpu(src->ll_time_last_checkpoint);
372 des->ll_pos_latest_start = le64_to_cpu(src->ll_pos_latest_start);
373 des->ll_pos_last_checkpoint = le64_to_cpu(src->ll_pos_last_checkpoint);
374 des->ll_pos_first_inconsistent =
375 le64_to_cpu(src->ll_pos_first_inconsistent);
376 des->ll_objs_checked_phase1 = le64_to_cpu(src->ll_objs_checked_phase1);
377 des->ll_objs_failed_phase1 = le64_to_cpu(src->ll_objs_failed_phase1);
378 des->ll_objs_checked_phase2 = le64_to_cpu(src->ll_objs_checked_phase2);
379 des->ll_objs_failed_phase2 = le64_to_cpu(src->ll_objs_failed_phase2);
380 for (i = 0; i < LLIT_MAX; i++)
381 des->ll_objs_repaired[i] =
382 le64_to_cpu(src->ll_objs_repaired[i]);
383 des->ll_objs_skipped = le64_to_cpu(src->ll_objs_skipped);
386 static void lfsck_layout_cpu_to_le(struct lfsck_layout *des,
387 const struct lfsck_layout *src)
391 des->ll_magic = cpu_to_le32(src->ll_magic);
392 des->ll_status = cpu_to_le32(src->ll_status);
393 des->ll_flags = cpu_to_le32(src->ll_flags);
394 des->ll_success_count = cpu_to_le32(src->ll_success_count);
395 des->ll_run_time_phase1 = cpu_to_le32(src->ll_run_time_phase1);
396 des->ll_run_time_phase2 = cpu_to_le32(src->ll_run_time_phase2);
397 des->ll_time_last_complete = cpu_to_le64(src->ll_time_last_complete);
398 des->ll_time_latest_start = cpu_to_le64(src->ll_time_latest_start);
399 des->ll_time_last_checkpoint =
400 cpu_to_le64(src->ll_time_last_checkpoint);
401 des->ll_pos_latest_start = cpu_to_le64(src->ll_pos_latest_start);
402 des->ll_pos_last_checkpoint = cpu_to_le64(src->ll_pos_last_checkpoint);
403 des->ll_pos_first_inconsistent =
404 cpu_to_le64(src->ll_pos_first_inconsistent);
405 des->ll_objs_checked_phase1 = cpu_to_le64(src->ll_objs_checked_phase1);
406 des->ll_objs_failed_phase1 = cpu_to_le64(src->ll_objs_failed_phase1);
407 des->ll_objs_checked_phase2 = cpu_to_le64(src->ll_objs_checked_phase2);
408 des->ll_objs_failed_phase2 = cpu_to_le64(src->ll_objs_failed_phase2);
409 for (i = 0; i < LLIT_MAX; i++)
410 des->ll_objs_repaired[i] =
411 cpu_to_le64(src->ll_objs_repaired[i]);
412 des->ll_objs_skipped = cpu_to_le64(src->ll_objs_skipped);
416 * \retval +ve: the lfsck_layout is broken, the caller should reset it.
417 * \retval 0: succeed.
418 * \retval -ve: failed cases.
420 static int lfsck_layout_load(const struct lu_env *env,
421 struct lfsck_component *com)
423 struct lfsck_layout *lo = com->lc_file_ram;
424 const struct dt_body_operations *dbo = com->lc_obj->do_body_ops;
425 ssize_t size = com->lc_file_size;
429 rc = dbo->dbo_read(env, com->lc_obj,
430 lfsck_buf_get(env, com->lc_file_disk, size), &pos,
435 CWARN("%s: failed to load lfsck_layout: rc = %d\n",
436 lfsck_lfsck2name(com->lc_lfsck), rc);
438 } else if (rc != size) {
439 CWARN("%s: crashed lfsck_layout, to be reset: rc = %d\n",
440 lfsck_lfsck2name(com->lc_lfsck), rc);
444 lfsck_layout_le_to_cpu(lo, com->lc_file_disk);
445 if (lo->ll_magic != LFSCK_LAYOUT_MAGIC) {
446 CWARN("%s: invalid lfsck_layout magic %#x != %#x, "
447 "to be reset\n", lfsck_lfsck2name(com->lc_lfsck),
448 lo->ll_magic, LFSCK_LAYOUT_MAGIC);
455 static int lfsck_layout_store(const struct lu_env *env,
456 struct lfsck_component *com)
458 struct dt_object *obj = com->lc_obj;
459 struct lfsck_instance *lfsck = com->lc_lfsck;
460 struct lfsck_layout *lo = com->lc_file_disk;
461 struct thandle *handle;
462 ssize_t size = com->lc_file_size;
467 lfsck_layout_cpu_to_le(lo, com->lc_file_ram);
468 handle = dt_trans_create(env, lfsck->li_bottom);
469 if (IS_ERR(handle)) {
470 rc = PTR_ERR(handle);
471 CERROR("%s: fail to create trans for storing lfsck_layout: "
472 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
476 rc = dt_declare_record_write(env, obj, size, pos, handle);
478 CERROR("%s: fail to declare trans for storing lfsck_layout(1): "
479 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
483 rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
485 CERROR("%s: fail to start trans for storing lfsck_layout: "
486 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
490 rc = dt_record_write(env, obj, lfsck_buf_get(env, lo, size), &pos,
493 CERROR("%s: fail to store lfsck_layout(1): size = %d, "
494 "rc = %d\n", lfsck_lfsck2name(lfsck), (int)size, rc);
499 dt_trans_stop(env, lfsck->li_bottom, handle);
504 static int lfsck_layout_init(const struct lu_env *env,
505 struct lfsck_component *com)
507 struct lfsck_layout *lo = com->lc_file_ram;
510 memset(lo, 0, com->lc_file_size);
511 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
512 lo->ll_status = LS_INIT;
513 down_write(&com->lc_sem);
514 rc = lfsck_layout_store(env, com);
515 up_write(&com->lc_sem);
520 static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt,
521 struct dt_object *obj, const struct lu_fid *fid)
523 struct seq_server_site *ss = lu_site2seq(dt->dd_lu_dev.ld_site);
524 struct lu_seq_range range = { 0 };
525 struct lustre_mdt_attrs *lma;
528 fld_range_set_any(&range);
529 rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), &range);
531 if (fld_range_is_ost(&range))
537 lma = &lfsck_env_info(env)->lti_lma;
538 rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lma, sizeof(*lma)),
539 XATTR_NAME_LMA, BYPASS_CAPA);
540 if (rc == sizeof(*lma)) {
541 lustre_lma_swab(lma);
543 return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0;
546 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID, BYPASS_CAPA);
551 static struct lfsck_layout_seq *
552 lfsck_layout_seq_lookup(struct lfsck_layout_slave_data *llsd, __u64 seq)
554 struct lfsck_layout_seq *lls;
556 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
557 if (lls->lls_seq == seq)
560 if (lls->lls_seq > seq)
568 lfsck_layout_seq_insert(struct lfsck_layout_slave_data *llsd,
569 struct lfsck_layout_seq *lls)
571 struct lfsck_layout_seq *tmp;
572 struct list_head *pos = &llsd->llsd_seq_list;
574 list_for_each_entry(tmp, &llsd->llsd_seq_list, lls_list) {
575 if (lls->lls_seq < tmp->lls_seq) {
576 pos = &tmp->lls_list;
580 list_add_tail(&lls->lls_list, pos);
584 lfsck_layout_lastid_create(const struct lu_env *env,
585 struct lfsck_instance *lfsck,
586 struct dt_object *obj)
588 struct lfsck_thread_info *info = lfsck_env_info(env);
589 struct lu_attr *la = &info->lti_la;
590 struct dt_object_format *dof = &info->lti_dof;
591 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
592 struct dt_device *dt = lfsck->li_bottom;
599 CDEBUG(D_LFSCK, "To create LAST_ID for <seq> "LPX64"\n",
600 fid_seq(lfsck_dto2fid(obj)));
602 if (bk->lb_param & LPF_DRYRUN)
605 memset(la, 0, sizeof(*la));
606 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
607 la->la_valid = LA_MODE | LA_UID | LA_GID;
608 dof->dof_type = dt_mode_to_dft(S_IFREG);
610 th = dt_trans_create(env, dt);
612 RETURN(rc = PTR_ERR(th));
614 rc = dt_declare_create(env, obj, la, NULL, dof, th);
618 rc = dt_declare_record_write(env, obj, sizeof(lastid), pos, th);
622 rc = dt_trans_start_local(env, dt, th);
626 dt_write_lock(env, obj, 0);
627 if (likely(!dt_object_exists(obj))) {
628 rc = dt_create(env, obj, la, NULL, dof, th);
630 rc = dt_record_write(env, obj,
631 lfsck_buf_get(env, &lastid, sizeof(lastid)),
634 dt_write_unlock(env, obj);
639 dt_trans_stop(env, dt, th);
645 lfsck_layout_lastid_reload(const struct lu_env *env,
646 struct lfsck_component *com,
647 struct lfsck_layout_seq *lls)
653 dt_read_lock(env, lls->lls_lastid_obj, 0);
654 rc = dt_record_read(env, lls->lls_lastid_obj,
655 lfsck_buf_get(env, &lastid, sizeof(lastid)), &pos);
656 dt_read_unlock(env, lls->lls_lastid_obj);
657 if (unlikely(rc != 0))
660 lastid = le64_to_cpu(lastid);
661 if (lastid < lls->lls_lastid_known) {
662 struct lfsck_instance *lfsck = com->lc_lfsck;
663 struct lfsck_layout *lo = com->lc_file_ram;
665 lls->lls_lastid = lls->lls_lastid_known;
667 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
668 LASSERT(lfsck->li_out_notify != NULL);
670 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
671 LE_LASTID_REBUILDING);
672 lo->ll_flags |= LF_CRASHED_LASTID;
674 } else if (lastid >= lls->lls_lastid) {
675 lls->lls_lastid = lastid;
683 lfsck_layout_lastid_store(const struct lu_env *env,
684 struct lfsck_component *com)
686 struct lfsck_instance *lfsck = com->lc_lfsck;
687 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
688 struct dt_device *dt = lfsck->li_bottom;
689 struct lfsck_layout_slave_data *llsd = com->lc_data;
690 struct lfsck_layout_seq *lls;
696 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
699 /* XXX: Add the code back if we really found related
700 * inconsistent cases in the future. */
702 if (!lls->lls_dirty) {
703 /* In OFD, before the pre-creation, the LAST_ID
704 * file will be updated firstly, which may hide
705 * some potential crashed cases. For example:
707 * The old obj1's ID is higher than old LAST_ID
708 * but lower than the new LAST_ID, but the LFSCK
709 * have not touch the obj1 until the OFD updated
710 * the LAST_ID. So the LFSCK does not regard it
711 * as crashed case. But when OFD does not create
712 * successfully, it will set the LAST_ID as the
713 * real created objects' ID, then LFSCK needs to
714 * found related inconsistency. */
715 rc = lfsck_layout_lastid_reload(env, com, lls);
716 if (likely(!lls->lls_dirty))
721 CDEBUG(D_LFSCK, "To sync the LAST_ID for <seq> "LPX64
722 " as <oid> "LPU64"\n", lls->lls_seq, lls->lls_lastid);
724 if (bk->lb_param & LPF_DRYRUN) {
729 th = dt_trans_create(env, dt);
732 CERROR("%s: (1) failed to store "LPX64": rc = %d\n",
733 lfsck_lfsck2name(com->lc_lfsck),
738 rc = dt_declare_record_write(env, lls->lls_lastid_obj,
739 sizeof(lastid), pos, th);
743 rc = dt_trans_start_local(env, dt, th);
747 lastid = cpu_to_le64(lls->lls_lastid);
748 dt_write_lock(env, lls->lls_lastid_obj, 0);
749 rc = dt_record_write(env, lls->lls_lastid_obj,
750 lfsck_buf_get(env, &lastid,
751 sizeof(lastid)), &pos, th);
752 dt_write_unlock(env, lls->lls_lastid_obj);
757 dt_trans_stop(env, dt, th);
760 CERROR("%s: (2) failed to store "LPX64": rc = %d\n",
761 lfsck_lfsck2name(com->lc_lfsck),
770 lfsck_layout_lastid_load(const struct lu_env *env,
771 struct lfsck_component *com,
772 struct lfsck_layout_seq *lls)
774 struct lfsck_instance *lfsck = com->lc_lfsck;
775 struct lfsck_layout *lo = com->lc_file_ram;
776 struct lu_fid *fid = &lfsck_env_info(env)->lti_fid;
777 struct dt_object *obj;
782 lu_last_id_fid(fid, lls->lls_seq, lfsck_dev_idx(lfsck->li_bottom));
783 obj = dt_locate(env, lfsck->li_bottom, fid);
785 RETURN(PTR_ERR(obj));
787 /* LAST_ID crashed, to be rebuilt */
788 if (!dt_object_exists(obj)) {
789 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
790 LASSERT(lfsck->li_out_notify != NULL);
792 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
793 LE_LASTID_REBUILDING);
794 lo->ll_flags |= LF_CRASHED_LASTID;
796 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) &&
798 struct l_wait_info lwi = LWI_TIMEOUT(
799 cfs_time_seconds(cfs_fail_val),
802 up_write(&com->lc_sem);
803 l_wait_event(lfsck->li_thread.t_ctl_waitq,
804 !thread_is_running(&lfsck->li_thread),
806 down_write(&com->lc_sem);
810 rc = lfsck_layout_lastid_create(env, lfsck, obj);
812 dt_read_lock(env, obj, 0);
813 rc = dt_read(env, obj,
814 lfsck_buf_get(env, &lls->lls_lastid, sizeof(__u64)),
816 dt_read_unlock(env, obj);
817 if (rc != 0 && rc != sizeof(__u64))
818 GOTO(out, rc = (rc > 0 ? -EFAULT : rc));
820 if (rc == 0 && !(lo->ll_flags & LF_CRASHED_LASTID)) {
821 LASSERT(lfsck->li_out_notify != NULL);
823 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
824 LE_LASTID_REBUILDING);
825 lo->ll_flags |= LF_CRASHED_LASTID;
828 lls->lls_lastid = le64_to_cpu(lls->lls_lastid);
836 lfsck_object_put(env, obj);
838 lls->lls_lastid_obj = obj;
843 static int lfsck_layout_master_async_interpret(const struct lu_env *env,
844 struct ptlrpc_request *req,
847 struct lfsck_async_interpret_args *laia = args;
848 struct lfsck_component *com = laia->laia_com;
849 struct lfsck_layout_master_data *llmd = com->lc_data;
850 struct lfsck_tgt_descs *ltds = laia->laia_ltds;
851 struct lfsck_tgt_desc *ltd = laia->laia_ltd;
852 struct lfsck_request *lr = laia->laia_lr;
854 switch (lr->lr_event) {
857 struct lfsck_layout *lo = com->lc_file_ram;
859 lo->ll_flags |= LF_INCOMPLETE;
864 spin_lock(<ds->ltd_lock);
865 if (ltd->ltd_dead || ltd->ltd_layout_done) {
866 spin_unlock(<ds->ltd_lock);
871 if (lr->lr_flags & LEF_TO_OST) {
872 if (list_empty(<d->ltd_layout_list))
873 list_add_tail(<d->ltd_layout_list,
874 &llmd->llmd_ost_list);
875 if (list_empty(<d->ltd_layout_phase_list))
876 list_add_tail(<d->ltd_layout_phase_list,
877 &llmd->llmd_ost_phase1_list);
879 if (list_empty(<d->ltd_layout_list))
880 list_add_tail(<d->ltd_layout_list,
881 &llmd->llmd_mdt_list);
882 if (list_empty(<d->ltd_layout_phase_list))
883 list_add_tail(<d->ltd_layout_phase_list,
884 &llmd->llmd_mdt_phase1_list);
886 spin_unlock(<ds->ltd_lock);
893 CERROR("%s: fail to notify %s %x for layout: "
894 "event = %d, rc = %d\n",
895 lfsck_lfsck2name(com->lc_lfsck),
896 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
897 ltd->ltd_index, lr->lr_event, rc);
900 struct lfsck_reply *reply;
903 spin_lock(<ds->ltd_lock);
904 list_del_init(<d->ltd_layout_phase_list);
905 list_del_init(<d->ltd_layout_list);
906 spin_unlock(<ds->ltd_lock);
911 reply = req_capsule_server_get(&req->rq_pill,
915 CERROR("%s: invalid return value: rc = %d\n",
916 lfsck_lfsck2name(com->lc_lfsck), rc);
917 spin_lock(<ds->ltd_lock);
918 list_del_init(<d->ltd_layout_phase_list);
919 list_del_init(<d->ltd_layout_list);
920 spin_unlock(<ds->ltd_lock);
925 switch (reply->lr_status) {
926 case LS_SCANNING_PHASE1:
928 case LS_SCANNING_PHASE2:
929 spin_lock(<ds->ltd_lock);
930 list_del_init(<d->ltd_layout_phase_list);
931 if (ltd->ltd_dead || ltd->ltd_layout_done) {
932 spin_unlock(<ds->ltd_lock);
936 if (lr->lr_flags & LEF_TO_OST)
937 list_add_tail(<d->ltd_layout_phase_list,
938 &llmd->llmd_ost_phase2_list);
940 list_add_tail(<d->ltd_layout_phase_list,
941 &llmd->llmd_mdt_phase2_list);
942 spin_unlock(<ds->ltd_lock);
945 spin_lock(<ds->ltd_lock);
946 list_del_init(<d->ltd_layout_phase_list);
947 list_del_init(<d->ltd_layout_list);
948 spin_unlock(<ds->ltd_lock);
955 CERROR("%s: unexpected event: rc = %d\n",
956 lfsck_lfsck2name(com->lc_lfsck), lr->lr_event);
960 lfsck_component_put(env, com);
965 static int lfsck_layout_master_query_others(const struct lu_env *env,
966 struct lfsck_component *com)
968 struct lfsck_thread_info *info = lfsck_env_info(env);
969 struct lfsck_request *lr = &info->lti_lr;
970 struct lfsck_async_interpret_args *laia = &info->lti_laia;
971 struct lfsck_instance *lfsck = com->lc_lfsck;
972 struct lfsck_layout_master_data *llmd = com->lc_data;
973 struct ptlrpc_request_set *set;
974 struct lfsck_tgt_descs *ltds;
975 struct lfsck_tgt_desc *ltd;
976 struct list_head *head;
982 set = ptlrpc_prep_set();
986 llmd->llmd_touch_gen++;
987 memset(lr, 0, sizeof(*lr));
988 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
989 lr->lr_event = LE_QUERY;
990 lr->lr_active = LT_LAYOUT;
991 laia->laia_com = com;
994 if (!list_empty(&llmd->llmd_mdt_phase1_list)) {
995 ltds = &lfsck->li_mdt_descs;
997 head = &llmd->llmd_mdt_phase1_list;
1001 ltds = &lfsck->li_ost_descs;
1002 lr->lr_flags = LEF_TO_OST;
1003 head = &llmd->llmd_ost_phase1_list;
1006 laia->laia_ltds = ltds;
1007 spin_lock(<ds->ltd_lock);
1008 while (!list_empty(head)) {
1009 ltd = list_entry(head->next,
1010 struct lfsck_tgt_desc,
1011 ltd_layout_phase_list);
1012 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1015 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1016 list_del(<d->ltd_layout_phase_list);
1017 list_add_tail(<d->ltd_layout_phase_list, head);
1018 atomic_inc(<d->ltd_ref);
1019 laia->laia_ltd = ltd;
1020 spin_unlock(<ds->ltd_lock);
1021 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1022 lfsck_layout_master_async_interpret,
1025 CERROR("%s: fail to query %s %x for layout: rc = %d\n",
1026 lfsck_lfsck2name(lfsck),
1027 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
1028 ltd->ltd_index, rc);
1034 spin_lock(<ds->ltd_lock);
1036 spin_unlock(<ds->ltd_lock);
1039 rc = ptlrpc_set_wait(set);
1041 ptlrpc_set_destroy(set);
1047 if (!(lr->lr_flags & LEF_TO_OST) &&
1048 list_empty(&llmd->llmd_mdt_phase1_list))
1051 ptlrpc_set_destroy(set);
1053 RETURN(rc1 != 0 ? rc1 : rc);
1057 lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd)
1059 return list_empty(&llmd->llmd_mdt_phase1_list) &&
1060 (!list_empty(&llmd->llmd_ost_phase2_list) ||
1061 list_empty(&llmd->llmd_ost_phase1_list));
1064 static int lfsck_layout_master_notify_others(const struct lu_env *env,
1065 struct lfsck_component *com,
1066 struct lfsck_request *lr,
1069 struct lfsck_thread_info *info = lfsck_env_info(env);
1070 struct lfsck_async_interpret_args *laia = &info->lti_laia;
1071 struct lfsck_instance *lfsck = com->lc_lfsck;
1072 struct lfsck_layout_master_data *llmd = com->lc_data;
1073 struct lfsck_layout *lo = com->lc_file_ram;
1074 struct ptlrpc_request_set *set;
1075 struct lfsck_tgt_descs *ltds;
1076 struct lfsck_tgt_desc *ltd;
1077 struct lfsck_tgt_desc *next;
1078 struct list_head *head;
1084 set = ptlrpc_prep_set();
1088 lr->lr_active = LT_LAYOUT;
1089 laia->laia_com = com;
1092 switch (lr->lr_event) {
1094 /* Notify OSTs firstly, then other MDTs if needed. */
1095 lr->lr_flags |= LEF_TO_OST;
1096 ltds = &lfsck->li_ost_descs;
1099 laia->laia_ltds = ltds;
1100 down_read(<ds->ltd_rw_sem);
1101 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1102 ltd = lfsck_tgt_get(ltds, idx);
1103 LASSERT(ltd != NULL);
1105 laia->laia_ltd = ltd;
1106 ltd->ltd_layout_done = 0;
1107 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1108 lfsck_layout_master_async_interpret,
1109 laia, LFSCK_NOTIFY);
1111 CERROR("%s: fail to notify %s %x for layout "
1113 lfsck_lfsck2name(lfsck),
1114 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1117 lo->ll_flags |= LF_INCOMPLETE;
1122 up_read(<ds->ltd_rw_sem);
1126 rc = ptlrpc_set_wait(set);
1128 ptlrpc_set_destroy(set);
1134 if (!(flags & LPF_ALL_MDT))
1137 ltds = &lfsck->li_mdt_descs;
1138 /* The sponsor broadcasts the request to other MDTs. */
1139 if (flags & LPF_BROADCAST) {
1140 flags &= ~LPF_ALL_MDT;
1141 lr->lr_flags &= ~LEF_TO_OST;
1145 /* non-sponsors link other MDT targets locallly. */
1146 spin_lock(<ds->ltd_lock);
1147 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1148 ltd = LTD_TGT(ltds, idx);
1149 LASSERT(ltd != NULL);
1151 if (!list_empty(<d->ltd_layout_list))
1154 list_add_tail(<d->ltd_layout_list,
1155 &llmd->llmd_mdt_list);
1156 list_add_tail(<d->ltd_layout_phase_list,
1157 &llmd->llmd_mdt_phase1_list);
1159 spin_unlock(<ds->ltd_lock);
1163 if (flags & LPF_BROADCAST)
1164 lr->lr_flags |= LEF_FORCE_STOP;
1165 case LE_PHASE2_DONE:
1166 /* Notify other MDTs if needed, then the OSTs. */
1167 if (flags & LPF_ALL_MDT) {
1168 /* The sponsor broadcasts the request to other MDTs. */
1169 if (flags & LPF_BROADCAST) {
1170 lr->lr_flags &= ~LEF_TO_OST;
1171 head = &llmd->llmd_mdt_list;
1172 ltds = &lfsck->li_mdt_descs;
1176 /* non-sponsors unlink other MDT targets locallly. */
1177 ltds = &lfsck->li_mdt_descs;
1178 spin_lock(<ds->ltd_lock);
1179 list_for_each_entry_safe(ltd, next,
1180 &llmd->llmd_mdt_list,
1182 list_del_init(<d->ltd_layout_phase_list);
1183 list_del_init(<d->ltd_layout_list);
1185 spin_unlock(<ds->ltd_lock);
1189 lr->lr_flags |= LEF_TO_OST;
1190 head = &llmd->llmd_ost_list;
1191 ltds = &lfsck->li_ost_descs;
1194 laia->laia_ltds = ltds;
1195 spin_lock(<ds->ltd_lock);
1196 while (!list_empty(head)) {
1197 ltd = list_entry(head->next, struct lfsck_tgt_desc,
1199 if (!list_empty(<d->ltd_layout_phase_list))
1200 list_del_init(<d->ltd_layout_phase_list);
1201 list_del_init(<d->ltd_layout_list);
1202 laia->laia_ltd = ltd;
1203 spin_unlock(<ds->ltd_lock);
1204 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1205 lfsck_layout_master_async_interpret,
1206 laia, LFSCK_NOTIFY);
1208 CERROR("%s: fail to notify %s %x for layout "
1209 "stop/phase2: rc = %d\n",
1210 lfsck_lfsck2name(lfsck),
1211 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1212 "MDT", ltd->ltd_index, rc);
1215 spin_lock(<ds->ltd_lock);
1217 spin_unlock(<ds->ltd_lock);
1219 if (!(flags & LPF_BROADCAST))
1224 rc = ptlrpc_set_wait(set);
1226 ptlrpc_set_destroy(set);
1232 flags &= ~LPF_BROADCAST;
1234 case LE_PHASE1_DONE:
1235 llmd->llmd_touch_gen++;
1236 lr->lr_flags &= ~LEF_TO_OST;
1237 ltds = &lfsck->li_mdt_descs;
1238 laia->laia_ltds = ltds;
1239 spin_lock(<ds->ltd_lock);
1240 while (!list_empty(&llmd->llmd_mdt_phase1_list)) {
1241 ltd = list_entry(llmd->llmd_mdt_phase1_list.next,
1242 struct lfsck_tgt_desc,
1243 ltd_layout_phase_list);
1244 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1247 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1248 list_del_init(<d->ltd_layout_phase_list);
1249 list_add_tail(<d->ltd_layout_phase_list,
1250 &llmd->llmd_mdt_phase1_list);
1251 laia->laia_ltd = ltd;
1252 spin_unlock(<ds->ltd_lock);
1253 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1254 lfsck_layout_master_async_interpret,
1255 laia, LFSCK_NOTIFY);
1257 CERROR("%s: fail to notify MDT %x for layout "
1258 "phase1 done: rc = %d\n",
1259 lfsck_lfsck2name(lfsck),
1260 ltd->ltd_index, rc);
1263 spin_lock(<ds->ltd_lock);
1265 spin_unlock(<ds->ltd_lock);
1268 CERROR("%s: unexpected LFSCK event: rc = %d\n",
1269 lfsck_lfsck2name(lfsck), lr->lr_event);
1275 rc = ptlrpc_set_wait(set);
1276 ptlrpc_set_destroy(set);
1278 if (rc == 0 && lr->lr_event == LE_START &&
1279 list_empty(&llmd->llmd_ost_list))
1285 static int lfsck_layout_double_scan_result(const struct lu_env *env,
1286 struct lfsck_component *com,
1289 struct lfsck_instance *lfsck = com->lc_lfsck;
1290 struct lfsck_layout *lo = com->lc_file_ram;
1291 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1293 down_write(&com->lc_sem);
1295 lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1296 HALF_SEC - lfsck->li_time_last_checkpoint);
1297 lo->ll_time_last_checkpoint = cfs_time_current_sec();
1298 lo->ll_objs_checked_phase2 += com->lc_new_checked;
1301 com->lc_journal = 0;
1302 if (lo->ll_flags & LF_INCOMPLETE)
1303 lo->ll_status = LS_PARTIAL;
1305 lo->ll_status = LS_COMPLETED;
1306 if (!(bk->lb_param & LPF_DRYRUN))
1307 lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
1308 lo->ll_time_last_complete = lo->ll_time_last_checkpoint;
1309 lo->ll_success_count++;
1310 } else if (rc == 0) {
1311 lo->ll_status = lfsck->li_status;
1312 if (lo->ll_status == 0)
1313 lo->ll_status = LS_STOPPED;
1315 lo->ll_status = LS_FAILED;
1318 if (lo->ll_status != LS_PAUSED) {
1319 spin_lock(&lfsck->li_lock);
1320 list_del_init(&com->lc_link);
1321 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1322 spin_unlock(&lfsck->li_lock);
1325 rc = lfsck_layout_store(env, com);
1327 up_write(&com->lc_sem);
1332 static int lfsck_layout_lock(const struct lu_env *env,
1333 struct lfsck_component *com,
1334 struct dt_object *obj,
1335 struct lustre_handle *lh, __u64 bits)
1337 struct lfsck_thread_info *info = lfsck_env_info(env);
1338 ldlm_policy_data_t *policy = &info->lti_policy;
1339 struct ldlm_res_id *resid = &info->lti_resid;
1340 struct lfsck_instance *lfsck = com->lc_lfsck;
1341 __u64 flags = LDLM_FL_ATOMIC_CB;
1344 LASSERT(lfsck->li_namespace != NULL);
1346 memset(policy, 0, sizeof(*policy));
1347 policy->l_inodebits.bits = bits;
1348 fid_build_reg_res_name(lfsck_dto2fid(obj), resid);
1349 rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS,
1350 policy, LCK_EX, &flags, ldlm_blocking_ast,
1351 ldlm_completion_ast, NULL, NULL, 0,
1352 LVB_T_NONE, NULL, lh);
1353 if (rc == ELDLM_OK) {
1356 memset(lh, 0, sizeof(*lh));
1363 static void lfsck_layout_unlock(struct lustre_handle *lh)
1365 if (lustre_handle_is_used(lh)) {
1366 ldlm_lock_decref(lh, LCK_EX);
1367 memset(lh, 0, sizeof(*lh));
1371 static int lfsck_layout_trans_stop(const struct lu_env *env,
1372 struct dt_device *dev,
1373 struct thandle *handle, int result)
1377 handle->th_result = result;
1378 rc = dt_trans_stop(env, dev, handle);
1387 static int lfsck_layout_scan_orphan(const struct lu_env *env,
1388 struct lfsck_component *com,
1389 struct lfsck_tgt_desc *ltd)
1391 /* XXX: To be extended in other patch. */
1396 /* For the MDT-object with dangling reference, we need to re-create
1397 * the missed OST-object with the known FID/owner information. */
1398 static int lfsck_layout_recreate_ostobj(const struct lu_env *env,
1399 struct lfsck_component *com,
1400 struct lfsck_layout_req *llr,
1403 struct lfsck_thread_info *info = lfsck_env_info(env);
1404 struct filter_fid *pfid = &info->lti_new_pfid;
1405 struct dt_allocation_hint *hint = &info->lti_hint;
1406 struct dt_object *parent = llr->llr_parent->llo_obj;
1407 struct dt_object *child = llr->llr_child;
1408 struct dt_device *dev = lfsck_obj2dt_dev(child);
1409 const struct lu_fid *tfid = lu_object_fid(&parent->do_lu);
1410 struct thandle *handle;
1412 struct lustre_handle lh = { 0 };
1416 CDEBUG(D_LFSCK, "Repair dangling reference for: parent "DFID
1417 ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
1418 PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
1419 llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid);
1421 rc = lfsck_layout_lock(env, com, parent, &lh,
1422 MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
1426 handle = dt_trans_create(env, dev);
1428 GOTO(unlock1, rc = PTR_ERR(handle));
1430 hint->dah_parent = NULL;
1432 pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq);
1433 pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid);
1434 pfid->ff_parent.f_ver = cpu_to_le32(llr->llr_lov_idx);
1435 buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
1437 rc = dt_declare_create(env, child, la, hint, NULL, handle);
1441 rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID,
1442 LU_XATTR_CREATE, handle);
1446 rc = dt_trans_start(env, dev, handle);
1450 dt_read_lock(env, parent, 0);
1451 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1452 GOTO(unlock2, rc = 1);
1454 rc = dt_create(env, child, la, hint, NULL, handle);
1458 rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, LU_XATTR_CREATE,
1459 handle, BYPASS_CAPA);
1464 dt_read_unlock(env, parent);
1467 rc = lfsck_layout_trans_stop(env, dev, handle, rc);
1470 lfsck_layout_unlock(&lh);
1475 static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
1476 struct lfsck_component *com,
1477 struct lfsck_layout_req *llr)
1479 struct lfsck_layout *lo = com->lc_file_ram;
1480 struct lfsck_thread_info *info = lfsck_env_info(env);
1481 struct dt_object *parent = llr->llr_parent->llo_obj;
1482 struct dt_object *child = llr->llr_child;
1483 struct lu_attr *pla = &info->lti_la;
1484 struct lu_attr *cla = &info->lti_la2;
1485 struct lfsck_instance *lfsck = com->lc_lfsck;
1486 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1487 enum lfsck_layout_inconsistency_type type = LLIT_NONE;
1491 rc = dt_attr_get(env, parent, pla, BYPASS_CAPA);
1493 if (lu_object_is_dying(parent->do_lu.lo_header))
1499 rc = dt_attr_get(env, child, cla, BYPASS_CAPA);
1500 if (rc == -ENOENT) {
1501 if (lu_object_is_dying(parent->do_lu.lo_header))
1504 type = LLIT_DANGLING;
1511 /* XXX: other inconsistency will be checked in other patches. */
1514 if (bk->lb_param & LPF_DRYRUN) {
1515 if (type != LLIT_NONE)
1523 memset(cla, 0, sizeof(*cla));
1524 cla->la_uid = pla->la_uid;
1525 cla->la_gid = pla->la_gid;
1526 cla->la_mode = S_IFREG | 0666;
1527 cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
1528 LA_ATIME | LA_MTIME | LA_CTIME;
1529 rc = lfsck_layout_recreate_ostobj(env, com, llr, cla);
1532 /* XXX: other inconsistency will be fixed in other patches. */
1534 case LLIT_UNMATCHED_PAIR:
1536 case LLIT_MULTIPLE_REFERENCED:
1538 case LLIT_INCONSISTENT_OWNER:
1548 down_write(&com->lc_sem);
1550 /* If cannot touch the target server,
1551 * mark the LFSCK as INCOMPLETE. */
1552 if (rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -ETIMEDOUT ||
1553 rc == -EHOSTDOWN || rc == -EHOSTUNREACH) {
1554 lo->ll_flags |= LF_INCOMPLETE;
1555 lo->ll_objs_skipped++;
1558 lo->ll_objs_failed_phase1++;
1560 } else if (rc > 0) {
1561 LASSERTF(type > LLIT_NONE && type <= LLIT_MAX,
1562 "unknown type = %d\n", type);
1564 lo->ll_objs_repaired[type - 1]++;
1566 up_write(&com->lc_sem);
1571 static int lfsck_layout_assistant(void *args)
1573 struct lfsck_thread_args *lta = args;
1574 struct lu_env *env = <a->lta_env;
1575 struct lfsck_component *com = lta->lta_com;
1576 struct lfsck_instance *lfsck = lta->lta_lfsck;
1577 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1578 struct lfsck_position *pos = &com->lc_pos_start;
1579 struct lfsck_thread_info *info = lfsck_env_info(env);
1580 struct lfsck_request *lr = &info->lti_lr;
1581 struct lfsck_layout_master_data *llmd = com->lc_data;
1582 struct ptlrpc_thread *mthread = &lfsck->li_thread;
1583 struct ptlrpc_thread *athread = &llmd->llmd_thread;
1584 struct lfsck_layout_req *llr;
1585 struct l_wait_info lwi = { 0 };
1591 if (lta->lta_lsp->lsp_start != NULL)
1592 flags = lta->lta_lsp->lsp_start->ls_flags;
1594 flags = bk->lb_param;
1595 memset(lr, 0, sizeof(*lr));
1596 lr->lr_event = LE_START;
1597 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1598 lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
1600 lr->lr_speed = bk->lb_speed_limit;
1601 lr->lr_version = bk->lb_version;
1602 lr->lr_param = bk->lb_param;
1603 lr->lr_async_windows = bk->lb_async_windows;
1604 if (pos->lp_oit_cookie <= 1)
1605 lr->lr_param |= LPF_RESET;
1607 rc = lfsck_layout_master_notify_others(env, com, lr, flags);
1609 CERROR("%s: fail to notify others for layout start: rc = %d\n",
1610 lfsck_lfsck2name(lfsck), rc);
1614 spin_lock(&llmd->llmd_lock);
1615 thread_set_flags(athread, SVC_RUNNING);
1616 spin_unlock(&llmd->llmd_lock);
1617 wake_up_all(&mthread->t_ctl_waitq);
1620 while (!list_empty(&llmd->llmd_req_list)) {
1621 bool wakeup = false;
1623 if (unlikely(llmd->llmd_exit))
1624 GOTO(cleanup1, rc = llmd->llmd_post_result);
1626 llr = list_entry(llmd->llmd_req_list.next,
1627 struct lfsck_layout_req,
1629 /* Only the lfsck_layout_assistant thread itself can
1630 * remove the "llr" from the head of the list, LFSCK
1631 * engine thread only inserts other new "lld" at the
1632 * end of the list. So it is safe to handle current
1633 * "llr" without the spin_lock. */
1634 rc = lfsck_layout_assistant_handle_one(env, com, llr);
1635 spin_lock(&llmd->llmd_lock);
1636 list_del_init(&llr->llr_list);
1637 if (bk->lb_async_windows != 0 &&
1638 llmd->llmd_prefetched >= bk->lb_async_windows)
1641 llmd->llmd_prefetched--;
1642 spin_unlock(&llmd->llmd_lock);
1644 wake_up_all(&mthread->t_ctl_waitq);
1646 lfsck_layout_req_fini(env, llr);
1647 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
1651 /* Wakeup the master engine if it is waiting in checkpoint. */
1652 wake_up_all(&mthread->t_ctl_waitq);
1654 l_wait_event(athread->t_ctl_waitq,
1655 !lfsck_layout_req_empty(llmd) ||
1657 llmd->llmd_to_post ||
1658 llmd->llmd_to_double_scan,
1661 if (unlikely(llmd->llmd_exit))
1662 GOTO(cleanup1, rc = llmd->llmd_post_result);
1664 if (!list_empty(&llmd->llmd_req_list))
1667 if (llmd->llmd_to_post) {
1668 llmd->llmd_to_post = 0;
1669 LASSERT(llmd->llmd_post_result > 0);
1671 memset(lr, 0, sizeof(*lr));
1672 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1673 lr->lr_event = LE_PHASE1_DONE;
1674 lr->lr_status = llmd->llmd_post_result;
1675 rc = lfsck_layout_master_notify_others(env, com, lr, 0);
1677 CERROR("%s: failed to notify others "
1678 "for layout post: rc = %d\n",
1679 lfsck_lfsck2name(lfsck), rc);
1681 /* Wakeup the master engine to go ahead. */
1682 wake_up_all(&mthread->t_ctl_waitq);
1685 if (llmd->llmd_to_double_scan) {
1686 llmd->llmd_to_double_scan = 0;
1687 atomic_inc(&lfsck->li_double_scan_count);
1688 llmd->llmd_in_double_scan = 1;
1689 wake_up_all(&mthread->t_ctl_waitq);
1691 while (llmd->llmd_in_double_scan) {
1692 struct lfsck_tgt_descs *ltds =
1693 &lfsck->li_ost_descs;
1694 struct lfsck_tgt_desc *ltd;
1696 rc = lfsck_layout_master_query_others(env, com);
1697 if (lfsck_layout_master_to_orphan(llmd))
1703 /* Pull LFSCK status on related targets once
1704 * per 30 seconds if we are not notified. */
1705 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(30),
1706 cfs_time_seconds(1),
1708 rc = l_wait_event(athread->t_ctl_waitq,
1709 lfsck_layout_master_to_orphan(llmd) ||
1711 !thread_is_running(mthread),
1714 if (unlikely(llmd->llmd_exit ||
1715 !thread_is_running(mthread)))
1716 GOTO(cleanup2, rc = 0);
1718 if (rc == -ETIMEDOUT)
1725 spin_lock(<ds->ltd_lock);
1727 &llmd->llmd_ost_phase2_list)) {
1729 llmd->llmd_ost_phase2_list.next,
1730 struct lfsck_tgt_desc,
1731 ltd_layout_phase_list);
1733 <d->ltd_layout_phase_list);
1734 spin_unlock(<ds->ltd_lock);
1736 rc = lfsck_layout_scan_orphan(env, com,
1739 bk->lb_param & LPF_FAILOUT)
1742 if (unlikely(llmd->llmd_exit ||
1743 !thread_is_running(mthread)))
1744 GOTO(cleanup2, rc = 0);
1746 spin_lock(<ds->ltd_lock);
1749 if (list_empty(&llmd->llmd_ost_phase1_list)) {
1750 spin_unlock(<ds->ltd_lock);
1751 GOTO(cleanup2, rc = 1);
1753 spin_unlock(<ds->ltd_lock);
1759 /* Cleanup the unfinished requests. */
1760 spin_lock(&llmd->llmd_lock);
1762 llmd->llmd_assistant_status = rc;
1764 while (!list_empty(&llmd->llmd_req_list)) {
1765 llr = list_entry(llmd->llmd_req_list.next,
1766 struct lfsck_layout_req,
1768 list_del_init(&llr->llr_list);
1769 llmd->llmd_prefetched--;
1770 spin_unlock(&llmd->llmd_lock);
1771 lfsck_layout_req_fini(env, llr);
1772 spin_lock(&llmd->llmd_lock);
1774 spin_unlock(&llmd->llmd_lock);
1776 LASSERTF(llmd->llmd_prefetched == 0, "unmatched prefeteched objs %d\n",
1777 llmd->llmd_prefetched);
1780 memset(lr, 0, sizeof(*lr));
1781 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1783 lr->lr_event = LE_PHASE2_DONE;
1786 } else if (rc == 0) {
1787 lr->lr_event = LE_STOP;
1788 if (lfsck->li_status == LS_PAUSED ||
1789 lfsck->li_status == LS_CO_PAUSED) {
1791 lr->lr_status = LS_CO_PAUSED;
1792 } else if (lfsck->li_status == LS_STOPPED ||
1793 lfsck->li_status == LS_CO_STOPPED) {
1794 flags = lfsck->li_flags;
1795 if (flags & LPF_BROADCAST)
1796 lr->lr_status = LS_STOPPED;
1798 lr->lr_status = LS_CO_STOPPED;
1803 lr->lr_event = LE_STOP;
1805 lr->lr_status = LS_CO_FAILED;
1808 rc1 = lfsck_layout_master_notify_others(env, com, lr, flags);
1810 CERROR("%s: failed to notify others for layout quit: rc = %d\n",
1811 lfsck_lfsck2name(lfsck), rc1);
1815 /* Under force exit case, some requests may be just freed without
1816 * verification, those objects should be re-handled when next run.
1817 * So not update the on-disk tracing file under such case. */
1818 if (!llmd->llmd_exit)
1819 rc1 = lfsck_layout_double_scan_result(env, com, rc);
1822 if (llmd->llmd_in_double_scan)
1823 atomic_dec(&lfsck->li_double_scan_count);
1825 spin_lock(&llmd->llmd_lock);
1826 llmd->llmd_assistant_status = (rc1 != 0 ? rc1 : rc);
1827 thread_set_flags(athread, SVC_STOPPED);
1828 wake_up_all(&mthread->t_ctl_waitq);
1829 spin_unlock(&llmd->llmd_lock);
1830 lfsck_thread_args_fini(lta);
1836 lfsck_layout_slave_async_interpret(const struct lu_env *env,
1837 struct ptlrpc_request *req,
1840 struct lfsck_layout_slave_async_args *llsaa = args;
1841 struct obd_export *exp = llsaa->llsaa_exp;
1842 struct lfsck_component *com = llsaa->llsaa_com;
1843 struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst;
1844 struct lfsck_layout_slave_data *llsd = com->lc_data;
1848 /* It is quite probably caused by target crash,
1849 * to make the LFSCK can go ahead, assume that
1850 * the target finished the LFSCK prcoessing. */
1853 struct lfsck_reply *lr;
1855 lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY);
1856 if (lr->lr_status != LS_SCANNING_PHASE1 &&
1857 lr->lr_status != LS_SCANNING_PHASE2)
1861 lfsck_layout_llst_del(llsd, llst);
1862 lfsck_layout_llst_put(llst);
1863 lfsck_component_put(env, com);
1864 class_export_put(exp);
1869 static int lfsck_layout_async_query(const struct lu_env *env,
1870 struct lfsck_component *com,
1871 struct obd_export *exp,
1872 struct lfsck_layout_slave_target *llst,
1873 struct lfsck_request *lr,
1874 struct ptlrpc_request_set *set)
1876 struct lfsck_layout_slave_async_args *llsaa;
1877 struct ptlrpc_request *req;
1878 struct lfsck_request *tmp;
1882 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_QUERY);
1886 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_QUERY);
1888 ptlrpc_request_free(req);
1892 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
1894 ptlrpc_request_set_replen(req);
1896 llsaa = ptlrpc_req_async_args(req);
1897 llsaa->llsaa_exp = exp;
1898 llsaa->llsaa_com = lfsck_component_get(com);
1899 llsaa->llsaa_llst = llst;
1900 req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
1901 ptlrpc_set_add_req(set, req);
1906 static int lfsck_layout_async_notify(const struct lu_env *env,
1907 struct obd_export *exp,
1908 struct lfsck_request *lr,
1909 struct ptlrpc_request_set *set)
1911 struct ptlrpc_request *req;
1912 struct lfsck_request *tmp;
1916 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_NOTIFY);
1920 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_NOTIFY);
1922 ptlrpc_request_free(req);
1926 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
1928 ptlrpc_request_set_replen(req);
1929 ptlrpc_set_add_req(set, req);
1935 lfsck_layout_slave_query_master(const struct lu_env *env,
1936 struct lfsck_component *com)
1938 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
1939 struct lfsck_instance *lfsck = com->lc_lfsck;
1940 struct lfsck_layout_slave_data *llsd = com->lc_data;
1941 struct lfsck_layout_slave_target *llst;
1942 struct obd_export *exp;
1943 struct ptlrpc_request_set *set;
1949 set = ptlrpc_prep_set();
1953 memset(lr, 0, sizeof(*lr));
1954 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1955 lr->lr_event = LE_QUERY;
1956 lr->lr_active = LT_LAYOUT;
1958 llsd->llsd_touch_gen++;
1959 spin_lock(&llsd->llsd_lock);
1960 while (!list_empty(&llsd->llsd_master_list)) {
1961 llst = list_entry(llsd->llsd_master_list.next,
1962 struct lfsck_layout_slave_target,
1964 if (llst->llst_gen == llsd->llsd_touch_gen)
1967 llst->llst_gen = llsd->llsd_touch_gen;
1968 list_del(&llst->llst_list);
1969 list_add_tail(&llst->llst_list,
1970 &llsd->llsd_master_list);
1971 atomic_inc(&llst->llst_ref);
1972 spin_unlock(&llsd->llsd_lock);
1974 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
1977 lfsck_layout_llst_del(llsd, llst);
1978 lfsck_layout_llst_put(llst);
1979 spin_lock(&llsd->llsd_lock);
1983 rc = lfsck_layout_async_query(env, com, exp, llst, lr, set);
1985 CERROR("%s: slave fail to query %s for layout: "
1986 "rc = %d\n", lfsck_lfsck2name(lfsck),
1987 exp->exp_obd->obd_name, rc);
1989 lfsck_layout_llst_put(llst);
1990 class_export_put(exp);
1994 spin_lock(&llsd->llsd_lock);
1996 spin_unlock(&llsd->llsd_lock);
1999 rc = ptlrpc_set_wait(set);
2000 ptlrpc_set_destroy(set);
2002 RETURN(rc1 != 0 ? rc1 : rc);
2006 lfsck_layout_slave_notify_master(const struct lu_env *env,
2007 struct lfsck_component *com,
2008 enum lfsck_events event, int result)
2010 struct lfsck_instance *lfsck = com->lc_lfsck;
2011 struct lfsck_layout_slave_data *llsd = com->lc_data;
2012 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
2013 struct lfsck_layout_slave_target *llst;
2014 struct obd_export *exp;
2015 struct ptlrpc_request_set *set;
2020 set = ptlrpc_prep_set();
2024 memset(lr, 0, sizeof(*lr));
2025 lr->lr_event = event;
2026 lr->lr_flags = LEF_FROM_OST;
2027 lr->lr_status = result;
2028 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2029 lr->lr_active = LT_LAYOUT;
2030 llsd->llsd_touch_gen++;
2031 spin_lock(&llsd->llsd_lock);
2032 while (!list_empty(&llsd->llsd_master_list)) {
2033 llst = list_entry(llsd->llsd_master_list.next,
2034 struct lfsck_layout_slave_target,
2036 if (llst->llst_gen == llsd->llsd_touch_gen)
2039 llst->llst_gen = llsd->llsd_touch_gen;
2040 list_del(&llst->llst_list);
2041 list_add_tail(&llst->llst_list,
2042 &llsd->llsd_master_list);
2043 atomic_inc(&llst->llst_ref);
2044 spin_unlock(&llsd->llsd_lock);
2046 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
2049 lfsck_layout_llst_del(llsd, llst);
2050 lfsck_layout_llst_put(llst);
2051 spin_lock(&llsd->llsd_lock);
2055 rc = lfsck_layout_async_notify(env, exp, lr, set);
2057 CERROR("%s: slave fail to notify %s for layout: "
2058 "rc = %d\n", lfsck_lfsck2name(lfsck),
2059 exp->exp_obd->obd_name, rc);
2062 lfsck_layout_llst_put(llst);
2063 class_export_put(exp);
2064 spin_lock(&llsd->llsd_lock);
2066 spin_unlock(&llsd->llsd_lock);
2069 rc = ptlrpc_set_wait(set);
2071 ptlrpc_set_destroy(set);
2078 static int lfsck_layout_reset(const struct lu_env *env,
2079 struct lfsck_component *com, bool init)
2081 struct lfsck_layout *lo = com->lc_file_ram;
2084 down_write(&com->lc_sem);
2086 memset(lo, 0, com->lc_file_size);
2088 __u32 count = lo->ll_success_count;
2089 __u64 last_time = lo->ll_time_last_complete;
2091 memset(lo, 0, com->lc_file_size);
2092 lo->ll_success_count = count;
2093 lo->ll_time_last_complete = last_time;
2096 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
2097 lo->ll_status = LS_INIT;
2099 rc = lfsck_layout_store(env, com);
2100 up_write(&com->lc_sem);
2105 static void lfsck_layout_fail(const struct lu_env *env,
2106 struct lfsck_component *com, bool new_checked)
2108 struct lfsck_layout *lo = com->lc_file_ram;
2110 down_write(&com->lc_sem);
2112 com->lc_new_checked++;
2113 lo->ll_objs_failed_phase1++;
2114 if (lo->ll_pos_first_inconsistent == 0) {
2115 struct lfsck_instance *lfsck = com->lc_lfsck;
2117 lo->ll_pos_first_inconsistent =
2118 lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
2121 up_write(&com->lc_sem);
2124 static int lfsck_layout_master_checkpoint(const struct lu_env *env,
2125 struct lfsck_component *com, bool init)
2127 struct lfsck_instance *lfsck = com->lc_lfsck;
2128 struct lfsck_layout *lo = com->lc_file_ram;
2129 struct lfsck_layout_master_data *llmd = com->lc_data;
2130 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2131 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2132 struct l_wait_info lwi = { 0 };
2135 if (com->lc_new_checked == 0 && !init)
2138 l_wait_event(mthread->t_ctl_waitq,
2139 list_empty(&llmd->llmd_req_list) ||
2140 !thread_is_running(mthread) ||
2141 thread_is_stopped(athread),
2144 if (!thread_is_running(mthread) || thread_is_stopped(athread))
2147 down_write(&com->lc_sem);
2149 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
2151 lo->ll_pos_last_checkpoint =
2152 lfsck->li_pos_current.lp_oit_cookie;
2153 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2154 HALF_SEC - lfsck->li_time_last_checkpoint);
2155 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2156 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2157 com->lc_new_checked = 0;
2160 rc = lfsck_layout_store(env, com);
2161 up_write(&com->lc_sem);
2166 static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
2167 struct lfsck_component *com, bool init)
2169 struct lfsck_instance *lfsck = com->lc_lfsck;
2170 struct lfsck_layout *lo = com->lc_file_ram;
2173 if (com->lc_new_checked == 0 && !init)
2176 down_write(&com->lc_sem);
2179 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
2181 lo->ll_pos_last_checkpoint =
2182 lfsck->li_pos_current.lp_oit_cookie;
2183 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2184 HALF_SEC - lfsck->li_time_last_checkpoint);
2185 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2186 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2187 com->lc_new_checked = 0;
2190 rc = lfsck_layout_store(env, com);
2192 up_write(&com->lc_sem);
2197 static int lfsck_layout_prep(const struct lu_env *env,
2198 struct lfsck_component *com)
2200 struct lfsck_instance *lfsck = com->lc_lfsck;
2201 struct lfsck_layout *lo = com->lc_file_ram;
2202 struct lfsck_position *pos = &com->lc_pos_start;
2204 fid_zero(&pos->lp_dir_parent);
2205 pos->lp_dir_cookie = 0;
2206 if (lo->ll_status == LS_COMPLETED ||
2207 lo->ll_status == LS_PARTIAL) {
2210 rc = lfsck_layout_reset(env, com, false);
2215 down_write(&com->lc_sem);
2217 lo->ll_time_latest_start = cfs_time_current_sec();
2219 spin_lock(&lfsck->li_lock);
2220 if (lo->ll_flags & LF_SCANNED_ONCE) {
2221 if (!lfsck->li_drop_dryrun ||
2222 lo->ll_pos_first_inconsistent == 0) {
2223 lo->ll_status = LS_SCANNING_PHASE2;
2224 list_del_init(&com->lc_link);
2225 list_add_tail(&com->lc_link,
2226 &lfsck->li_list_double_scan);
2227 pos->lp_oit_cookie = 0;
2231 lo->ll_status = LS_SCANNING_PHASE1;
2232 lo->ll_run_time_phase1 = 0;
2233 lo->ll_run_time_phase2 = 0;
2234 lo->ll_objs_checked_phase1 = 0;
2235 lo->ll_objs_checked_phase2 = 0;
2236 lo->ll_objs_failed_phase1 = 0;
2237 lo->ll_objs_failed_phase2 = 0;
2238 for (i = 0; i < LLIT_MAX; i++)
2239 lo->ll_objs_repaired[i] = 0;
2241 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2244 lo->ll_status = LS_SCANNING_PHASE1;
2245 if (!lfsck->li_drop_dryrun ||
2246 lo->ll_pos_first_inconsistent == 0)
2247 pos->lp_oit_cookie = lo->ll_pos_last_checkpoint + 1;
2249 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2251 spin_unlock(&lfsck->li_lock);
2253 up_write(&com->lc_sem);
2258 static int lfsck_layout_slave_prep(const struct lu_env *env,
2259 struct lfsck_component *com,
2260 struct lfsck_start_param *lsp)
2262 struct lfsck_layout *lo = com->lc_file_ram;
2263 struct lfsck_layout_slave_data *llsd = com->lc_data;
2266 /* XXX: For a new scanning, generate OST-objects
2267 * bitmap for orphan detection. */
2269 rc = lfsck_layout_prep(env, com);
2270 if (rc != 0 || lo->ll_status != LS_SCANNING_PHASE1 ||
2271 !lsp->lsp_index_valid)
2274 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
2279 static int lfsck_layout_master_prep(const struct lu_env *env,
2280 struct lfsck_component *com,
2281 struct lfsck_start_param *lsp)
2283 struct lfsck_instance *lfsck = com->lc_lfsck;
2284 struct lfsck_layout_master_data *llmd = com->lc_data;
2285 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2286 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2287 struct lfsck_thread_args *lta;
2291 rc = lfsck_layout_prep(env, com);
2295 llmd->llmd_assistant_status = 0;
2296 llmd->llmd_post_result = 0;
2297 llmd->llmd_to_post = 0;
2298 llmd->llmd_to_double_scan = 0;
2299 llmd->llmd_in_double_scan = 0;
2300 llmd->llmd_exit = 0;
2301 thread_set_flags(athread, 0);
2303 lta = lfsck_thread_args_init(lfsck, com, lsp);
2305 RETURN(PTR_ERR(lta));
2307 rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"));
2308 if (IS_ERR_VALUE(rc)) {
2309 CERROR("%s: Cannot start LFSCK layout assistant thread: "
2310 "rc = %ld\n", lfsck_lfsck2name(lfsck), rc);
2311 lfsck_thread_args_fini(lta);
2313 struct l_wait_info lwi = { 0 };
2315 l_wait_event(mthread->t_ctl_waitq,
2316 thread_is_running(athread) ||
2317 thread_is_stopped(athread),
2319 if (unlikely(!thread_is_running(athread)))
2320 rc = llmd->llmd_assistant_status;
2328 /* Pre-fetch the attribute for each stripe in the given layout EA. */
2329 static int lfsck_layout_scan_stripes(const struct lu_env *env,
2330 struct lfsck_component *com,
2331 struct dt_object *parent,
2332 struct lov_mds_md_v1 *lmm)
2334 struct lfsck_thread_info *info = lfsck_env_info(env);
2335 struct lfsck_instance *lfsck = com->lc_lfsck;
2336 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2337 struct lfsck_layout *lo = com->lc_file_ram;
2338 struct lfsck_layout_master_data *llmd = com->lc_data;
2339 struct lfsck_layout_object *llo = NULL;
2340 struct lov_ost_data_v1 *objs;
2341 struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs;
2342 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2343 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2344 struct l_wait_info lwi = { 0 };
2352 buf = lfsck_buf_get(env, &info->lti_old_pfid,
2353 sizeof(struct filter_fid_old));
2354 count = le16_to_cpu(lmm->lmm_stripe_count);
2355 gen = le16_to_cpu(lmm->lmm_layout_gen);
2356 if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1)
2357 objs = &(lmm->lmm_objects[0]);
2359 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
2361 for (i = 0; i < count; i++, objs++) {
2362 struct lu_fid *fid = &info->lti_fid;
2363 struct ost_id *oi = &info->lti_oi;
2364 struct lfsck_layout_req *llr;
2365 struct lfsck_tgt_desc *tgt = NULL;
2366 struct dt_object *cobj = NULL;
2368 le32_to_cpu(objs->l_ost_idx);
2369 bool wakeup = false;
2371 l_wait_event(mthread->t_ctl_waitq,
2372 bk->lb_async_windows == 0 ||
2373 llmd->llmd_prefetched < bk->lb_async_windows ||
2374 !thread_is_running(mthread) ||
2375 thread_is_stopped(athread),
2378 if (unlikely(!thread_is_running(mthread)) ||
2379 thread_is_stopped(athread))
2382 ostid_le_to_cpu(&objs->l_ost_oi, oi);
2383 ostid_to_fid(fid, oi, index);
2384 tgt = lfsck_tgt_get(ltds, index);
2385 if (unlikely(tgt == NULL)) {
2386 lo->ll_flags |= LF_INCOMPLETE;
2390 cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid);
2396 rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA);
2400 rc = dt_declare_xattr_get(env, cobj, buf, XATTR_NAME_FID,
2406 llo = lfsck_layout_object_init(env, parent, gen);
2413 llr = lfsck_layout_req_init(llo, cobj, index, i);
2420 spin_lock(&llmd->llmd_lock);
2421 if (llmd->llmd_assistant_status < 0) {
2422 spin_unlock(&llmd->llmd_lock);
2423 lfsck_layout_req_fini(env, llr);
2425 RETURN(llmd->llmd_assistant_status);
2428 list_add_tail(&llr->llr_list, &llmd->llmd_req_list);
2429 if (llmd->llmd_prefetched == 0)
2432 llmd->llmd_prefetched++;
2433 spin_unlock(&llmd->llmd_lock);
2435 wake_up_all(&athread->t_ctl_waitq);
2438 down_write(&com->lc_sem);
2439 com->lc_new_checked++;
2441 lo->ll_objs_failed_phase1++;
2442 up_write(&com->lc_sem);
2444 if (cobj != NULL && !IS_ERR(cobj))
2445 lu_object_put(env, &cobj->do_lu);
2447 if (likely(tgt != NULL))
2450 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
2457 if (llo != NULL && !IS_ERR(llo))
2458 lfsck_layout_object_put(env, llo);
2463 /* For the given object, read its layout EA locally. For each stripe, pre-fetch
2464 * the OST-object's attribute and generate an structure lfsck_layout_req on the
2465 * list ::llmd_req_list.
2467 * For each request on above list, the lfsck_layout_assistant thread compares
2468 * the OST side attribute with local attribute, if inconsistent, then repair it.
2470 * All above processing is async mode with pipeline. */
2471 static int lfsck_layout_master_exec_oit(const struct lu_env *env,
2472 struct lfsck_component *com,
2473 struct dt_object *obj)
2475 struct lfsck_thread_info *info = lfsck_env_info(env);
2476 struct ost_id *oi = &info->lti_oi;
2477 struct lfsck_layout *lo = com->lc_file_ram;
2478 struct lfsck_layout_master_data *llmd = com->lc_data;
2479 struct lfsck_instance *lfsck = com->lc_lfsck;
2480 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2481 struct thandle *handle = NULL;
2482 struct lu_buf *buf = &info->lti_big_buf;
2483 struct lov_mds_md_v1 *lmm = NULL;
2484 struct dt_device *dev = lfsck->li_bottom;
2485 struct lustre_handle lh = { 0 };
2486 ssize_t buflen = buf->lb_len;
2488 bool locked = false;
2489 bool stripe = false;
2492 if (!S_ISREG(lfsck_object_type(obj)))
2495 if (llmd->llmd_assistant_status < 0)
2496 GOTO(out, rc = -ESRCH);
2498 fid_to_lmm_oi(lfsck_dto2fid(obj), oi);
2499 lmm_oi_cpu_to_le(oi, oi);
2500 dt_read_lock(env, obj, 0);
2504 rc = lfsck_layout_get_lovea(env, obj, buf, &buflen);
2510 rc = lfsck_layout_verify_header(lmm);
2514 if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) == 0)
2515 GOTO(out, stripe = true);
2517 /* Inconsistent lmm_oi, should be repaired. */
2518 CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n",
2519 PFID(lfsck_dto2fid(obj)));
2521 if (bk->lb_param & LPF_DRYRUN) {
2522 down_write(&com->lc_sem);
2523 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2524 up_write(&com->lc_sem);
2526 GOTO(out, stripe = true);
2529 if (!lustre_handle_is_used(&lh)) {
2530 dt_read_unlock(env, obj);
2532 buf->lb_len = buflen;
2533 rc = lfsck_layout_lock(env, com, obj, &lh,
2534 MDS_INODELOCK_LAYOUT |
2535 MDS_INODELOCK_XATTR);
2539 handle = dt_trans_create(env, dev);
2541 GOTO(out, rc = PTR_ERR(handle));
2543 rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2544 LU_XATTR_REPLACE, handle);
2548 rc = dt_trans_start_local(env, dev, handle);
2552 dt_write_lock(env, obj, 0);
2559 rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2560 LU_XATTR_REPLACE, handle, BYPASS_CAPA);
2564 down_write(&com->lc_sem);
2565 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2566 up_write(&com->lc_sem);
2568 GOTO(out, stripe = true);
2572 if (lustre_handle_is_used(&lh))
2573 dt_write_unlock(env, obj);
2575 dt_read_unlock(env, obj);
2578 if (handle != NULL && !IS_ERR(handle))
2579 dt_trans_stop(env, dev, handle);
2581 lfsck_layout_unlock(&lh);
2583 rc = lfsck_layout_scan_stripes(env, com, obj, lmm);
2585 down_write(&com->lc_sem);
2586 com->lc_new_checked++;
2588 lo->ll_objs_failed_phase1++;
2589 up_write(&com->lc_sem);
2591 buf->lb_len = buflen;
2596 static int lfsck_layout_slave_exec_oit(const struct lu_env *env,
2597 struct lfsck_component *com,
2598 struct dt_object *obj)
2600 struct lfsck_instance *lfsck = com->lc_lfsck;
2601 struct lfsck_layout *lo = com->lc_file_ram;
2602 const struct lu_fid *fid = lfsck_dto2fid(obj);
2603 struct lfsck_layout_slave_data *llsd = com->lc_data;
2604 struct lfsck_layout_seq *lls;
2610 /* XXX: Update OST-objects bitmap for orphan detection. */
2612 LASSERT(llsd != NULL);
2614 down_write(&com->lc_sem);
2615 if (fid_is_idif(fid))
2617 else if (!fid_is_norm(fid) ||
2618 !fid_is_for_ostobj(env, lfsck->li_next, obj, fid))
2619 GOTO(unlock, rc = 0);
2622 com->lc_new_checked++;
2624 lls = lfsck_layout_seq_lookup(llsd, seq);
2627 if (unlikely(lls == NULL))
2628 GOTO(unlock, rc = -ENOMEM);
2630 INIT_LIST_HEAD(&lls->lls_list);
2632 rc = lfsck_layout_lastid_load(env, com, lls);
2634 lo->ll_objs_failed_phase1++;
2639 lfsck_layout_seq_insert(llsd, lls);
2642 if (unlikely(fid_is_last_id(fid)))
2643 GOTO(unlock, rc = 0);
2646 if (oid > lls->lls_lastid_known)
2647 lls->lls_lastid_known = oid;
2649 if (oid > lls->lls_lastid) {
2650 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
2651 /* OFD may create new objects during LFSCK scanning. */
2652 rc = lfsck_layout_lastid_reload(env, com, lls);
2653 if (unlikely(rc != 0))
2654 CWARN("%s: failed to reload LAST_ID for "LPX64
2656 lfsck_lfsck2name(com->lc_lfsck),
2658 if (oid <= lls->lls_lastid)
2659 GOTO(unlock, rc = 0);
2661 LASSERT(lfsck->li_out_notify != NULL);
2663 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
2664 LE_LASTID_REBUILDING);
2665 lo->ll_flags |= LF_CRASHED_LASTID;
2668 lls->lls_lastid = oid;
2672 GOTO(unlock, rc = 0);
2675 up_write(&com->lc_sem);
2680 static int lfsck_layout_exec_dir(const struct lu_env *env,
2681 struct lfsck_component *com,
2682 struct dt_object *obj,
2683 struct lu_dirent *ent)
2688 static int lfsck_layout_master_post(const struct lu_env *env,
2689 struct lfsck_component *com,
2690 int result, bool init)
2692 struct lfsck_instance *lfsck = com->lc_lfsck;
2693 struct lfsck_layout *lo = com->lc_file_ram;
2694 struct lfsck_layout_master_data *llmd = com->lc_data;
2695 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2696 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2697 struct l_wait_info lwi = { 0 };
2702 llmd->llmd_post_result = result;
2703 llmd->llmd_to_post = 1;
2704 if (llmd->llmd_post_result <= 0)
2705 llmd->llmd_exit = 1;
2707 wake_up_all(&athread->t_ctl_waitq);
2708 l_wait_event(mthread->t_ctl_waitq,
2709 (result > 0 && list_empty(&llmd->llmd_req_list)) ||
2710 thread_is_stopped(athread),
2713 if (llmd->llmd_assistant_status < 0)
2714 result = llmd->llmd_assistant_status;
2716 down_write(&com->lc_sem);
2717 spin_lock(&lfsck->li_lock);
2718 /* When LFSCK failed, there may be some prefetched objects those are
2719 * not been processed yet, we do not know the exactly position, then
2720 * just restart from last check-point next time. */
2721 if (!init && !llmd->llmd_exit)
2722 lo->ll_pos_last_checkpoint =
2723 lfsck->li_pos_current.lp_oit_cookie;
2726 lo->ll_status = LS_SCANNING_PHASE2;
2727 lo->ll_flags |= LF_SCANNED_ONCE;
2728 lo->ll_flags &= ~LF_UPGRADE;
2729 list_del_init(&com->lc_link);
2730 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
2731 } else if (result == 0) {
2732 lo->ll_status = lfsck->li_status;
2733 if (lo->ll_status == 0)
2734 lo->ll_status = LS_STOPPED;
2735 if (lo->ll_status != LS_PAUSED) {
2736 list_del_init(&com->lc_link);
2737 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2740 lo->ll_status = LS_FAILED;
2741 list_del_init(&com->lc_link);
2742 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2744 spin_unlock(&lfsck->li_lock);
2747 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2748 HALF_SEC - lfsck->li_time_last_checkpoint);
2749 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2750 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2751 com->lc_new_checked = 0;
2754 rc = lfsck_layout_store(env, com);
2755 up_write(&com->lc_sem);
2760 static int lfsck_layout_slave_post(const struct lu_env *env,
2761 struct lfsck_component *com,
2762 int result, bool init)
2764 struct lfsck_instance *lfsck = com->lc_lfsck;
2765 struct lfsck_layout *lo = com->lc_file_ram;
2769 rc = lfsck_layout_lastid_store(env, com);
2773 LASSERT(lfsck->li_out_notify != NULL);
2775 down_write(&com->lc_sem);
2777 spin_lock(&lfsck->li_lock);
2779 lo->ll_pos_last_checkpoint =
2780 lfsck->li_pos_current.lp_oit_cookie;
2782 lo->ll_status = LS_SCANNING_PHASE2;
2783 lo->ll_flags |= LF_SCANNED_ONCE;
2784 if (lo->ll_flags & LF_CRASHED_LASTID) {
2786 lo->ll_flags &= ~LF_CRASHED_LASTID;
2788 lo->ll_flags &= ~LF_UPGRADE;
2789 list_del_init(&com->lc_link);
2790 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
2791 } else if (result == 0) {
2792 lo->ll_status = lfsck->li_status;
2793 if (lo->ll_status == 0)
2794 lo->ll_status = LS_STOPPED;
2795 if (lo->ll_status != LS_PAUSED) {
2796 list_del_init(&com->lc_link);
2797 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2800 lo->ll_status = LS_FAILED;
2801 list_del_init(&com->lc_link);
2802 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2804 spin_unlock(&lfsck->li_lock);
2807 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
2811 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2812 HALF_SEC - lfsck->li_time_last_checkpoint);
2813 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2814 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2815 com->lc_new_checked = 0;
2818 rc = lfsck_layout_store(env, com);
2820 up_write(&com->lc_sem);
2822 lfsck_layout_slave_notify_master(env, com, LE_PHASE1_DONE, result);
2827 static int lfsck_layout_dump(const struct lu_env *env,
2828 struct lfsck_component *com, char *buf, int len)
2830 struct lfsck_instance *lfsck = com->lc_lfsck;
2831 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2832 struct lfsck_layout *lo = com->lc_file_ram;
2837 down_read(&com->lc_sem);
2838 rc = snprintf(buf, len,
2839 "name: lfsck_layout\n"
2845 lfsck_status2names(lo->ll_status));
2851 rc = lfsck_bits_dump(&buf, &len, lo->ll_flags, lfsck_flags_names,
2856 rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
2861 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_complete,
2862 "time_since_last_completed");
2866 rc = lfsck_time_dump(&buf, &len, lo->ll_time_latest_start,
2867 "time_since_latest_start");
2871 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_checkpoint,
2872 "time_since_last_checkpoint");
2876 rc = snprintf(buf, len,
2877 "latest_start_position: "LPU64"\n"
2878 "last_checkpoint_position: "LPU64"\n"
2879 "first_failure_position: "LPU64"\n",
2880 lo->ll_pos_latest_start,
2881 lo->ll_pos_last_checkpoint,
2882 lo->ll_pos_first_inconsistent);
2889 rc = snprintf(buf, len,
2890 "success_count: %u\n"
2891 "repaired_dangling: "LPU64"\n"
2892 "repaired_unmatched_pair: "LPU64"\n"
2893 "repaired_multiple_referenced: "LPU64"\n"
2894 "repaired_orphan: "LPU64"\n"
2895 "repaired_inconsistent_owner: "LPU64"\n"
2896 "repaired_others: "LPU64"\n"
2897 "skipped: "LPU64"\n"
2898 "failed_phase1: "LPU64"\n"
2899 "failed_phase2: "LPU64"\n",
2900 lo->ll_success_count,
2901 lo->ll_objs_repaired[LLIT_DANGLING - 1],
2902 lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1],
2903 lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1],
2904 lo->ll_objs_repaired[LLIT_ORPHAN - 1],
2905 lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1],
2906 lo->ll_objs_repaired[LLIT_OTHERS - 1],
2907 lo->ll_objs_skipped,
2908 lo->ll_objs_failed_phase1,
2909 lo->ll_objs_failed_phase2);
2916 if (lo->ll_status == LS_SCANNING_PHASE1) {
2918 const struct dt_it_ops *iops;
2919 cfs_duration_t duration = cfs_time_current() -
2920 lfsck->li_time_last_checkpoint;
2921 __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
2922 __u64 speed = checked;
2923 __u64 new_checked = com->lc_new_checked * HZ;
2924 __u32 rtime = lo->ll_run_time_phase1 +
2925 cfs_duration_sec(duration + HALF_SEC);
2928 do_div(new_checked, duration);
2930 do_div(speed, rtime);
2931 rc = snprintf(buf, len,
2932 "checked_phase1: "LPU64"\n"
2933 "checked_phase2: "LPU64"\n"
2934 "run_time_phase1: %u seconds\n"
2935 "run_time_phase2: %u seconds\n"
2936 "average_speed_phase1: "LPU64" items/sec\n"
2937 "average_speed_phase2: N/A\n"
2938 "real-time_speed_phase1: "LPU64" items/sec\n"
2939 "real-time_speed_phase2: N/A\n",
2941 lo->ll_objs_checked_phase2,
2943 lo->ll_run_time_phase2,
2952 LASSERT(lfsck->li_di_oit != NULL);
2954 iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
2956 /* The low layer otable-based iteration position may NOT
2957 * exactly match the layout-based directory traversal
2958 * cookie. Generally, it is not a serious issue. But the
2959 * caller should NOT make assumption on that. */
2960 pos = iops->store(env, lfsck->li_di_oit);
2961 if (!lfsck->li_current_oit_processed)
2963 rc = snprintf(buf, len, "current_position: "LPU64"\n", pos);
2970 /* XXX: LS_SCANNING_PHASE2 will be handled in the future. */
2971 __u64 speed1 = lo->ll_objs_checked_phase1;
2972 __u64 speed2 = lo->ll_objs_checked_phase2;
2974 if (lo->ll_run_time_phase1 != 0)
2975 do_div(speed1, lo->ll_run_time_phase1);
2976 if (lo->ll_run_time_phase2 != 0)
2977 do_div(speed2, lo->ll_run_time_phase2);
2978 rc = snprintf(buf, len,
2979 "checked_phase1: "LPU64"\n"
2980 "checked_phase2: "LPU64"\n"
2981 "run_time_phase1: %u seconds\n"
2982 "run_time_phase2: %u seconds\n"
2983 "average_speed_phase1: "LPU64" items/sec\n"
2984 "average_speed_phase2: "LPU64" objs/sec\n"
2985 "real-time_speed_phase1: N/A\n"
2986 "real-time_speed_phase2: N/A\n"
2987 "current_position: N/A\n",
2988 lo->ll_objs_checked_phase1,
2989 lo->ll_objs_checked_phase2,
2990 lo->ll_run_time_phase1,
2991 lo->ll_run_time_phase2,
3003 up_read(&com->lc_sem);
3008 static int lfsck_layout_master_double_scan(const struct lu_env *env,
3009 struct lfsck_component *com)
3011 struct lfsck_layout_master_data *llmd = com->lc_data;
3012 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
3013 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3014 struct lfsck_layout *lo = com->lc_file_ram;
3015 struct l_wait_info lwi = { 0 };
3017 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
3020 llmd->llmd_to_double_scan = 1;
3021 wake_up_all(&athread->t_ctl_waitq);
3022 l_wait_event(mthread->t_ctl_waitq,
3023 llmd->llmd_in_double_scan ||
3024 thread_is_stopped(athread),
3026 if (llmd->llmd_assistant_status < 0)
3027 return llmd->llmd_assistant_status;
3032 static int lfsck_layout_slave_double_scan(const struct lu_env *env,
3033 struct lfsck_component *com)
3035 struct lfsck_instance *lfsck = com->lc_lfsck;
3036 struct lfsck_layout_slave_data *llsd = com->lc_data;
3037 struct lfsck_layout *lo = com->lc_file_ram;
3038 struct ptlrpc_thread *thread = &lfsck->li_thread;
3042 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
3045 atomic_inc(&lfsck->li_double_scan_count);
3047 com->lc_new_checked = 0;
3048 com->lc_new_scanned = 0;
3049 com->lc_time_last_checkpoint = cfs_time_current();
3050 com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
3051 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
3054 struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(30),
3057 rc = lfsck_layout_slave_query_master(env, com);
3058 if (list_empty(&llsd->llsd_master_list)) {
3059 if (unlikely(!thread_is_running(thread)))
3070 rc = l_wait_event(thread->t_ctl_waitq,
3071 !thread_is_running(thread) ||
3072 list_empty(&llsd->llsd_master_list),
3074 if (unlikely(!thread_is_running(thread)))
3077 if (rc == -ETIMEDOUT)
3080 GOTO(done, rc = (rc < 0 ? rc : 1));
3084 rc = lfsck_layout_double_scan_result(env, com, rc);
3086 if (atomic_dec_and_test(&lfsck->li_double_scan_count))
3087 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3092 static void lfsck_layout_master_data_release(const struct lu_env *env,
3093 struct lfsck_component *com)
3095 struct lfsck_layout_master_data *llmd = com->lc_data;
3096 struct lfsck_instance *lfsck = com->lc_lfsck;
3097 struct lfsck_tgt_descs *ltds;
3098 struct lfsck_tgt_desc *ltd;
3099 struct lfsck_tgt_desc *next;
3101 LASSERT(llmd != NULL);
3102 LASSERT(thread_is_init(&llmd->llmd_thread) ||
3103 thread_is_stopped(&llmd->llmd_thread));
3104 LASSERT(list_empty(&llmd->llmd_req_list));
3106 com->lc_data = NULL;
3108 ltds = &lfsck->li_ost_descs;
3109 spin_lock(<ds->ltd_lock);
3110 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase1_list,
3111 ltd_layout_phase_list) {
3112 list_del_init(<d->ltd_layout_phase_list);
3114 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase2_list,
3115 ltd_layout_phase_list) {
3116 list_del_init(<d->ltd_layout_phase_list);
3118 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_list,
3120 list_del_init(<d->ltd_layout_list);
3122 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase1_list,
3123 ltd_layout_phase_list) {
3124 list_del_init(<d->ltd_layout_phase_list);
3126 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase2_list,
3127 ltd_layout_phase_list) {
3128 list_del_init(<d->ltd_layout_phase_list);
3130 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_list,
3132 list_del_init(<d->ltd_layout_list);
3134 spin_unlock(<ds->ltd_lock);
3139 static void lfsck_layout_slave_data_release(const struct lu_env *env,
3140 struct lfsck_component *com)
3142 struct lfsck_layout_slave_data *llsd = com->lc_data;
3143 struct lfsck_layout_seq *lls;
3144 struct lfsck_layout_seq *next;
3145 struct lfsck_layout_slave_target *llst;
3146 struct lfsck_layout_slave_target *tmp;
3148 LASSERT(llsd != NULL);
3150 com->lc_data = NULL;
3152 list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list,
3154 list_del_init(&lls->lls_list);
3155 lfsck_object_put(env, lls->lls_lastid_obj);
3159 list_for_each_entry_safe(llst, tmp, &llsd->llsd_master_list,
3161 list_del_init(&llst->llst_list);
3168 static void lfsck_layout_master_quit(const struct lu_env *env,
3169 struct lfsck_component *com)
3171 struct lfsck_layout_master_data *llmd = com->lc_data;
3172 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
3173 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3174 struct l_wait_info lwi = { 0 };
3176 llmd->llmd_exit = 1;
3177 wake_up_all(&athread->t_ctl_waitq);
3178 l_wait_event(mthread->t_ctl_waitq,
3179 thread_is_init(athread) ||
3180 thread_is_stopped(athread),
3184 static int lfsck_layout_master_in_notify(const struct lu_env *env,
3185 struct lfsck_component *com,
3186 struct lfsck_request *lr)
3188 struct lfsck_instance *lfsck = com->lc_lfsck;
3189 struct lfsck_layout *lo = com->lc_file_ram;
3190 struct lfsck_layout_master_data *llmd = com->lc_data;
3191 struct lfsck_tgt_descs *ltds;
3192 struct lfsck_tgt_desc *ltd;
3195 if (lr->lr_event != LE_PHASE1_DONE &&
3196 lr->lr_event != LE_PHASE2_DONE &&
3197 lr->lr_event != LE_STOP)
3200 if (lr->lr_flags & LEF_FROM_OST)
3201 ltds = &lfsck->li_ost_descs;
3203 ltds = &lfsck->li_mdt_descs;
3204 spin_lock(<ds->ltd_lock);
3205 ltd = LTD_TGT(ltds, lr->lr_index);
3207 spin_unlock(<ds->ltd_lock);
3212 list_del_init(<d->ltd_layout_phase_list);
3213 switch (lr->lr_event) {
3214 case LE_PHASE1_DONE:
3215 if (lr->lr_status <= 0) {
3216 ltd->ltd_layout_done = 1;
3217 list_del_init(<d->ltd_layout_list);
3218 lo->ll_flags |= LF_INCOMPLETE;
3222 if (lr->lr_flags & LEF_FROM_OST) {
3223 if (list_empty(<d->ltd_layout_list))
3224 list_add_tail(<d->ltd_layout_list,
3225 &llmd->llmd_ost_list);
3226 list_add_tail(<d->ltd_layout_phase_list,
3227 &llmd->llmd_ost_phase2_list);
3229 if (list_empty(<d->ltd_layout_list))
3230 list_add_tail(<d->ltd_layout_list,
3231 &llmd->llmd_mdt_list);
3232 list_add_tail(<d->ltd_layout_phase_list,
3233 &llmd->llmd_mdt_phase2_list);
3236 case LE_PHASE2_DONE:
3237 ltd->ltd_layout_done = 1;
3238 list_del_init(<d->ltd_layout_list);
3241 ltd->ltd_layout_done = 1;
3242 list_del_init(<d->ltd_layout_list);
3243 if (!(lr->lr_flags & LEF_FORCE_STOP))
3244 lo->ll_flags |= LF_INCOMPLETE;
3249 spin_unlock(<ds->ltd_lock);
3251 if (lr->lr_flags & LEF_FORCE_STOP) {
3252 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3254 memset(stop, 0, sizeof(*stop));
3255 stop->ls_status = lr->lr_status;
3256 stop->ls_flags = lr->lr_param;
3257 lfsck_stop(env, lfsck->li_bottom, stop);
3258 } else if (lfsck_layout_master_to_orphan(llmd)) {
3259 wake_up_all(&llmd->llmd_thread.t_ctl_waitq);
3265 static int lfsck_layout_slave_in_notify(const struct lu_env *env,
3266 struct lfsck_component *com,
3267 struct lfsck_request *lr)
3269 struct lfsck_instance *lfsck = com->lc_lfsck;
3270 struct lfsck_layout_slave_data *llsd = com->lc_data;
3271 struct lfsck_layout_slave_target *llst;
3274 if (lr->lr_event != LE_PHASE2_DONE &&
3275 lr->lr_event != LE_STOP)
3278 llst = lfsck_layout_llst_find_and_del(llsd, lr->lr_index);
3282 lfsck_layout_llst_put(llst);
3283 if (list_empty(&llsd->llsd_master_list)) {
3284 switch (lr->lr_event) {
3285 case LE_PHASE2_DONE:
3286 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3289 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3291 memset(stop, 0, sizeof(*stop));
3292 stop->ls_status = lr->lr_status;
3293 stop->ls_flags = lr->lr_param;
3294 lfsck_stop(env, lfsck->li_bottom, stop);
3305 static int lfsck_layout_query(const struct lu_env *env,
3306 struct lfsck_component *com)
3308 struct lfsck_layout *lo = com->lc_file_ram;
3310 return lo->ll_status;
3313 static int lfsck_layout_master_stop_notify(const struct lu_env *env,
3314 struct lfsck_component *com,
3315 struct lfsck_tgt_descs *ltds,
3316 struct lfsck_tgt_desc *ltd,
3317 struct ptlrpc_request_set *set)
3319 struct lfsck_thread_info *info = lfsck_env_info(env);
3320 struct lfsck_async_interpret_args *laia = &info->lti_laia;
3321 struct lfsck_request *lr = &info->lti_lr;
3322 struct lfsck_instance *lfsck = com->lc_lfsck;
3325 LASSERT(list_empty(<d->ltd_layout_list));
3326 LASSERT(list_empty(<d->ltd_layout_phase_list));
3328 memset(lr, 0, sizeof(*lr));
3329 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
3330 lr->lr_event = LE_STOP;
3331 lr->lr_active = LT_LAYOUT;
3332 if (ltds == &lfsck->li_ost_descs) {
3333 lr->lr_flags = LEF_TO_OST;
3335 if (ltd->ltd_index == lfsck_dev_idx(lfsck->li_bottom))
3340 lr->lr_status = LS_CO_STOPPED;
3342 laia->laia_com = com;
3343 laia->laia_ltds = ltds;
3344 laia->laia_ltd = ltd;
3347 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
3348 lfsck_layout_master_async_interpret,
3349 laia, LFSCK_NOTIFY);
3351 CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n",
3352 lfsck_lfsck2name(lfsck),
3353 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
3354 ltd->ltd_index, rc);
3359 /* with lfsck::li_lock held */
3360 static int lfsck_layout_slave_join(const struct lu_env *env,
3361 struct lfsck_component *com,
3362 struct lfsck_start_param *lsp)
3364 struct lfsck_instance *lfsck = com->lc_lfsck;
3365 struct lfsck_layout_slave_data *llsd = com->lc_data;
3366 struct lfsck_layout_slave_target *llst;
3367 struct lfsck_start *start = lsp->lsp_start;
3371 if (!lsp->lsp_index_valid || start == NULL ||
3372 !(start->ls_flags & LPF_ALL_MDT))
3375 spin_unlock(&lfsck->li_lock);
3376 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
3377 spin_lock(&lfsck->li_lock);
3378 if (rc == 0 && !thread_is_running(&lfsck->li_thread)) {
3379 spin_unlock(&lfsck->li_lock);
3380 llst = lfsck_layout_llst_find_and_del(llsd, lsp->lsp_index);
3382 lfsck_layout_llst_put(llst);
3383 spin_lock(&lfsck->li_lock);
3390 static struct lfsck_operations lfsck_layout_master_ops = {
3391 .lfsck_reset = lfsck_layout_reset,
3392 .lfsck_fail = lfsck_layout_fail,
3393 .lfsck_checkpoint = lfsck_layout_master_checkpoint,
3394 .lfsck_prep = lfsck_layout_master_prep,
3395 .lfsck_exec_oit = lfsck_layout_master_exec_oit,
3396 .lfsck_exec_dir = lfsck_layout_exec_dir,
3397 .lfsck_post = lfsck_layout_master_post,
3398 .lfsck_dump = lfsck_layout_dump,
3399 .lfsck_double_scan = lfsck_layout_master_double_scan,
3400 .lfsck_data_release = lfsck_layout_master_data_release,
3401 .lfsck_quit = lfsck_layout_master_quit,
3402 .lfsck_in_notify = lfsck_layout_master_in_notify,
3403 .lfsck_query = lfsck_layout_query,
3404 .lfsck_stop_notify = lfsck_layout_master_stop_notify,
3407 static struct lfsck_operations lfsck_layout_slave_ops = {
3408 .lfsck_reset = lfsck_layout_reset,
3409 .lfsck_fail = lfsck_layout_fail,
3410 .lfsck_checkpoint = lfsck_layout_slave_checkpoint,
3411 .lfsck_prep = lfsck_layout_slave_prep,
3412 .lfsck_exec_oit = lfsck_layout_slave_exec_oit,
3413 .lfsck_exec_dir = lfsck_layout_exec_dir,
3414 .lfsck_post = lfsck_layout_slave_post,
3415 .lfsck_dump = lfsck_layout_dump,
3416 .lfsck_double_scan = lfsck_layout_slave_double_scan,
3417 .lfsck_data_release = lfsck_layout_slave_data_release,
3418 .lfsck_in_notify = lfsck_layout_slave_in_notify,
3419 .lfsck_query = lfsck_layout_query,
3420 .lfsck_join = lfsck_layout_slave_join,
3423 int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
3425 struct lfsck_component *com;
3426 struct lfsck_layout *lo;
3427 struct dt_object *root = NULL;
3428 struct dt_object *obj;
3436 INIT_LIST_HEAD(&com->lc_link);
3437 INIT_LIST_HEAD(&com->lc_link_dir);
3438 init_rwsem(&com->lc_sem);
3439 atomic_set(&com->lc_ref, 1);
3440 com->lc_lfsck = lfsck;
3441 com->lc_type = LT_LAYOUT;
3442 if (lfsck->li_master) {
3443 struct lfsck_layout_master_data *llmd;
3445 com->lc_ops = &lfsck_layout_master_ops;
3446 OBD_ALLOC_PTR(llmd);
3448 GOTO(out, rc = -ENOMEM);
3450 INIT_LIST_HEAD(&llmd->llmd_req_list);
3451 spin_lock_init(&llmd->llmd_lock);
3452 INIT_LIST_HEAD(&llmd->llmd_ost_list);
3453 INIT_LIST_HEAD(&llmd->llmd_ost_phase1_list);
3454 INIT_LIST_HEAD(&llmd->llmd_ost_phase2_list);
3455 INIT_LIST_HEAD(&llmd->llmd_mdt_list);
3456 INIT_LIST_HEAD(&llmd->llmd_mdt_phase1_list);
3457 INIT_LIST_HEAD(&llmd->llmd_mdt_phase2_list);
3458 init_waitqueue_head(&llmd->llmd_thread.t_ctl_waitq);
3459 com->lc_data = llmd;
3461 struct lfsck_layout_slave_data *llsd;
3463 com->lc_ops = &lfsck_layout_slave_ops;
3464 OBD_ALLOC_PTR(llsd);
3466 GOTO(out, rc = -ENOMEM);
3468 INIT_LIST_HEAD(&llsd->llsd_seq_list);
3469 INIT_LIST_HEAD(&llsd->llsd_master_list);
3470 spin_lock_init(&llsd->llsd_lock);
3471 com->lc_data = llsd;
3473 com->lc_file_size = sizeof(*lo);
3474 OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
3475 if (com->lc_file_ram == NULL)
3476 GOTO(out, rc = -ENOMEM);
3478 OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
3479 if (com->lc_file_disk == NULL)
3480 GOTO(out, rc = -ENOMEM);
3482 root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
3484 GOTO(out, rc = PTR_ERR(root));
3486 if (unlikely(!dt_try_as_dir(env, root)))
3487 GOTO(out, rc = -ENOTDIR);
3489 obj = local_file_find_or_create(env, lfsck->li_los, root,
3491 S_IFREG | S_IRUGO | S_IWUSR);
3493 GOTO(out, rc = PTR_ERR(obj));
3496 rc = lfsck_layout_load(env, com);
3498 rc = lfsck_layout_reset(env, com, true);
3499 else if (rc == -ENOENT)
3500 rc = lfsck_layout_init(env, com);
3505 lo = com->lc_file_ram;
3506 switch (lo->ll_status) {
3512 spin_lock(&lfsck->li_lock);
3513 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3514 spin_unlock(&lfsck->li_lock);
3517 CERROR("%s: unknown lfsck_layout status: rc = %u\n",
3518 lfsck_lfsck2name(lfsck), lo->ll_status);
3520 case LS_SCANNING_PHASE1:
3521 case LS_SCANNING_PHASE2:
3522 /* No need to store the status to disk right now.
3523 * If the system crashed before the status stored,
3524 * it will be loaded back when next time. */
3525 lo->ll_status = LS_CRASHED;
3526 lo->ll_flags |= LF_INCOMPLETE;
3533 spin_lock(&lfsck->li_lock);
3534 list_add_tail(&com->lc_link, &lfsck->li_list_scan);
3535 spin_unlock(&lfsck->li_lock);
3539 if (lo->ll_flags & LF_CRASHED_LASTID) {
3540 LASSERT(lfsck->li_out_notify != NULL);
3542 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3543 LE_LASTID_REBUILDING);
3549 if (root != NULL && !IS_ERR(root))
3550 lu_object_put(env, &root->do_lu);
3553 lfsck_component_cleanup(env, com);