4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2013, Intel Corporation.
26 * lustre/lfsck/lfsck_layout.c
28 * Author: Fan, Yong <fan.yong@intel.com>
32 # define EXPORT_SYMTAB
34 #define DEBUG_SUBSYSTEM S_LFSCK
36 #include <linux/bitops.h>
38 #include <lustre/lustre_idl.h>
39 #include <lu_object.h>
40 #include <dt_object.h>
41 #include <lustre_linkea.h>
42 #include <lustre_fid.h>
43 #include <lustre_lib.h>
44 #include <lustre_net.h>
45 #include <lustre/lustre_user.h>
46 #include <md_object.h>
47 #include <obd_class.h>
49 #include "lfsck_internal.h"
51 #define LFSCK_LAYOUT_MAGIC 0xB173AE14
53 static const char lfsck_layout_name[] = "lfsck_layout";
55 struct lfsck_layout_seq {
56 struct list_head lls_list;
59 __u64 lls_lastid_known;
60 struct dt_object *lls_lastid_obj;
61 unsigned int lls_dirty:1;
64 struct lfsck_layout_slave_target {
65 /* link into lfsck_layout_slave_data::llsd_master_list. */
66 struct list_head llst_list;
72 struct lfsck_layout_slave_data {
73 /* list for lfsck_layout_seq */
74 struct list_head llsd_seq_list;
76 /* list for the masters involve layout verification. */
77 struct list_head llsd_master_list;
82 struct lfsck_layout_object {
83 struct dt_object *llo_obj;
84 struct lu_attr llo_attr;
89 struct lfsck_layout_req {
90 struct list_head llr_list;
91 struct lfsck_layout_object *llr_parent;
92 struct dt_object *llr_child;
94 __u32 llr_lov_idx; /* offset in LOV EA */
97 struct lfsck_layout_master_data {
99 struct list_head llmd_req_list;
101 /* list for the ost targets involve layout verification. */
102 struct list_head llmd_ost_list;
104 /* list for the ost targets in phase1 scanning. */
105 struct list_head llmd_ost_phase1_list;
107 /* list for the ost targets in phase1 scanning. */
108 struct list_head llmd_ost_phase2_list;
110 /* list for the mdt targets involve layout verification. */
111 struct list_head llmd_mdt_list;
113 /* list for the mdt targets in phase1 scanning. */
114 struct list_head llmd_mdt_phase1_list;
116 /* list for the mdt targets in phase1 scanning. */
117 struct list_head llmd_mdt_phase2_list;
119 struct ptlrpc_thread llmd_thread;
120 __u32 llmd_touch_gen;
122 int llmd_assistant_status;
123 int llmd_post_result;
124 unsigned int llmd_to_post:1,
125 llmd_to_double_scan:1,
126 llmd_in_double_scan:1,
130 struct lfsck_layout_slave_async_args {
131 struct obd_export *llsaa_exp;
132 struct lfsck_component *llsaa_com;
133 struct lfsck_layout_slave_target *llsaa_llst;
136 static struct lfsck_layout_object *
137 lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj,
140 struct lfsck_layout_object *llo;
145 return ERR_PTR(-ENOMEM);
147 rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA);
154 lu_object_get(&obj->do_lu);
156 /* The gen can be used to check whether some others have changed the
157 * file layout after LFSCK pre-fetching but before real verification. */
159 atomic_set(&llo->llo_ref, 1);
165 lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst)
167 if (atomic_dec_and_test(&llst->llst_ref)) {
168 LASSERT(list_empty(&llst->llst_list));
175 lfsck_layout_llst_add(struct lfsck_layout_slave_data *llsd, __u32 index)
177 struct lfsck_layout_slave_target *llst;
178 struct lfsck_layout_slave_target *tmp;
185 INIT_LIST_HEAD(&llst->llst_list);
187 llst->llst_index = index;
188 atomic_set(&llst->llst_ref, 1);
190 spin_lock(&llsd->llsd_lock);
191 list_for_each_entry(tmp, &llsd->llsd_master_list, llst_list) {
192 if (tmp->llst_index == index) {
198 list_add_tail(&llst->llst_list, &llsd->llsd_master_list);
199 spin_unlock(&llsd->llsd_lock);
208 lfsck_layout_llst_del(struct lfsck_layout_slave_data *llsd,
209 struct lfsck_layout_slave_target *llst)
213 spin_lock(&llsd->llsd_lock);
214 if (!list_empty(&llst->llst_list)) {
215 list_del_init(&llst->llst_list);
218 spin_unlock(&llsd->llsd_lock);
221 lfsck_layout_llst_put(llst);
224 static inline struct lfsck_layout_slave_target *
225 lfsck_layout_llst_find_and_del(struct lfsck_layout_slave_data *llsd,
228 struct lfsck_layout_slave_target *llst;
230 spin_lock(&llsd->llsd_lock);
231 list_for_each_entry(llst, &llsd->llsd_master_list, llst_list) {
232 if (llst->llst_index == index) {
233 list_del_init(&llst->llst_list);
234 spin_unlock(&llsd->llsd_lock);
239 spin_unlock(&llsd->llsd_lock);
244 static inline void lfsck_layout_object_put(const struct lu_env *env,
245 struct lfsck_layout_object *llo)
247 if (atomic_dec_and_test(&llo->llo_ref)) {
248 lfsck_object_put(env, llo->llo_obj);
253 static struct lfsck_layout_req *
254 lfsck_layout_req_init(struct lfsck_layout_object *parent,
255 struct dt_object *child, __u32 ost_idx, __u32 lov_idx)
257 struct lfsck_layout_req *llr;
261 return ERR_PTR(-ENOMEM);
263 INIT_LIST_HEAD(&llr->llr_list);
264 atomic_inc(&parent->llo_ref);
265 llr->llr_parent = parent;
266 llr->llr_child = child;
267 llr->llr_ost_idx = ost_idx;
268 llr->llr_lov_idx = lov_idx;
273 static inline void lfsck_layout_req_fini(const struct lu_env *env,
274 struct lfsck_layout_req *llr)
276 lu_object_put(env, &llr->llr_child->do_lu);
277 lfsck_layout_object_put(env, llr->llr_parent);
281 static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd)
285 spin_lock(&llmd->llmd_lock);
286 if (list_empty(&llmd->llmd_req_list))
288 spin_unlock(&llmd->llmd_lock);
293 static int lfsck_layout_get_lovea(const struct lu_env *env,
294 struct dt_object *obj,
295 struct lu_buf *buf, ssize_t *buflen)
300 rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA);
302 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
307 lu_buf_realloc(buf, rc);
309 *buflen = buf->lb_len;
311 if (buf->lb_buf == NULL)
323 if (unlikely(buf->lb_buf == NULL)) {
324 lu_buf_alloc(buf, rc);
326 *buflen = buf->lb_len;
328 if (buf->lb_buf == NULL)
337 static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm)
342 magic = le32_to_cpu(lmm->lmm_magic);
343 /* If magic crashed, keep it there. Sometime later, during OST-object
344 * orphan handling, if some OST-object(s) back-point to it, it can be
345 * verified and repaired. */
346 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
349 patten = le32_to_cpu(lmm->lmm_pattern);
350 /* XXX: currently, we only support LOV_PATTERN_RAID0. */
351 if (patten != LOV_PATTERN_RAID0)
357 static void lfsck_layout_le_to_cpu(struct lfsck_layout *des,
358 const struct lfsck_layout *src)
362 des->ll_magic = le32_to_cpu(src->ll_magic);
363 des->ll_status = le32_to_cpu(src->ll_status);
364 des->ll_flags = le32_to_cpu(src->ll_flags);
365 des->ll_success_count = le32_to_cpu(src->ll_success_count);
366 des->ll_run_time_phase1 = le32_to_cpu(src->ll_run_time_phase1);
367 des->ll_run_time_phase2 = le32_to_cpu(src->ll_run_time_phase2);
368 des->ll_time_last_complete = le64_to_cpu(src->ll_time_last_complete);
369 des->ll_time_latest_start = le64_to_cpu(src->ll_time_latest_start);
370 des->ll_time_last_checkpoint =
371 le64_to_cpu(src->ll_time_last_checkpoint);
372 des->ll_pos_latest_start = le64_to_cpu(src->ll_pos_latest_start);
373 des->ll_pos_last_checkpoint = le64_to_cpu(src->ll_pos_last_checkpoint);
374 des->ll_pos_first_inconsistent =
375 le64_to_cpu(src->ll_pos_first_inconsistent);
376 des->ll_objs_checked_phase1 = le64_to_cpu(src->ll_objs_checked_phase1);
377 des->ll_objs_failed_phase1 = le64_to_cpu(src->ll_objs_failed_phase1);
378 des->ll_objs_checked_phase2 = le64_to_cpu(src->ll_objs_checked_phase2);
379 des->ll_objs_failed_phase2 = le64_to_cpu(src->ll_objs_failed_phase2);
380 for (i = 0; i < LLIT_MAX; i++)
381 des->ll_objs_repaired[i] =
382 le64_to_cpu(src->ll_objs_repaired[i]);
383 des->ll_objs_skipped = le64_to_cpu(src->ll_objs_skipped);
386 static void lfsck_layout_cpu_to_le(struct lfsck_layout *des,
387 const struct lfsck_layout *src)
391 des->ll_magic = cpu_to_le32(src->ll_magic);
392 des->ll_status = cpu_to_le32(src->ll_status);
393 des->ll_flags = cpu_to_le32(src->ll_flags);
394 des->ll_success_count = cpu_to_le32(src->ll_success_count);
395 des->ll_run_time_phase1 = cpu_to_le32(src->ll_run_time_phase1);
396 des->ll_run_time_phase2 = cpu_to_le32(src->ll_run_time_phase2);
397 des->ll_time_last_complete = cpu_to_le64(src->ll_time_last_complete);
398 des->ll_time_latest_start = cpu_to_le64(src->ll_time_latest_start);
399 des->ll_time_last_checkpoint =
400 cpu_to_le64(src->ll_time_last_checkpoint);
401 des->ll_pos_latest_start = cpu_to_le64(src->ll_pos_latest_start);
402 des->ll_pos_last_checkpoint = cpu_to_le64(src->ll_pos_last_checkpoint);
403 des->ll_pos_first_inconsistent =
404 cpu_to_le64(src->ll_pos_first_inconsistent);
405 des->ll_objs_checked_phase1 = cpu_to_le64(src->ll_objs_checked_phase1);
406 des->ll_objs_failed_phase1 = cpu_to_le64(src->ll_objs_failed_phase1);
407 des->ll_objs_checked_phase2 = cpu_to_le64(src->ll_objs_checked_phase2);
408 des->ll_objs_failed_phase2 = cpu_to_le64(src->ll_objs_failed_phase2);
409 for (i = 0; i < LLIT_MAX; i++)
410 des->ll_objs_repaired[i] =
411 cpu_to_le64(src->ll_objs_repaired[i]);
412 des->ll_objs_skipped = cpu_to_le64(src->ll_objs_skipped);
416 * \retval +ve: the lfsck_layout is broken, the caller should reset it.
417 * \retval 0: succeed.
418 * \retval -ve: failed cases.
420 static int lfsck_layout_load(const struct lu_env *env,
421 struct lfsck_component *com)
423 struct lfsck_layout *lo = com->lc_file_ram;
424 const struct dt_body_operations *dbo = com->lc_obj->do_body_ops;
425 ssize_t size = com->lc_file_size;
429 rc = dbo->dbo_read(env, com->lc_obj,
430 lfsck_buf_get(env, com->lc_file_disk, size), &pos,
435 CWARN("%s: failed to load lfsck_layout: rc = %d\n",
436 lfsck_lfsck2name(com->lc_lfsck), rc);
438 } else if (rc != size) {
439 CWARN("%s: crashed lfsck_layout, to be reset: rc = %d\n",
440 lfsck_lfsck2name(com->lc_lfsck), rc);
444 lfsck_layout_le_to_cpu(lo, com->lc_file_disk);
445 if (lo->ll_magic != LFSCK_LAYOUT_MAGIC) {
446 CWARN("%s: invalid lfsck_layout magic %#x != %#x, "
447 "to be reset\n", lfsck_lfsck2name(com->lc_lfsck),
448 lo->ll_magic, LFSCK_LAYOUT_MAGIC);
455 static int lfsck_layout_store(const struct lu_env *env,
456 struct lfsck_component *com)
458 struct dt_object *obj = com->lc_obj;
459 struct lfsck_instance *lfsck = com->lc_lfsck;
460 struct lfsck_layout *lo = com->lc_file_disk;
461 struct thandle *handle;
462 ssize_t size = com->lc_file_size;
467 lfsck_layout_cpu_to_le(lo, com->lc_file_ram);
468 handle = dt_trans_create(env, lfsck->li_bottom);
469 if (IS_ERR(handle)) {
470 rc = PTR_ERR(handle);
471 CERROR("%s: fail to create trans for storing lfsck_layout: "
472 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
476 rc = dt_declare_record_write(env, obj, size, pos, handle);
478 CERROR("%s: fail to declare trans for storing lfsck_layout(1): "
479 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
483 rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
485 CERROR("%s: fail to start trans for storing lfsck_layout: "
486 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
490 rc = dt_record_write(env, obj, lfsck_buf_get(env, lo, size), &pos,
493 CERROR("%s: fail to store lfsck_layout(1): size = %d, "
494 "rc = %d\n", lfsck_lfsck2name(lfsck), (int)size, rc);
499 dt_trans_stop(env, lfsck->li_bottom, handle);
504 static int lfsck_layout_init(const struct lu_env *env,
505 struct lfsck_component *com)
507 struct lfsck_layout *lo = com->lc_file_ram;
510 memset(lo, 0, com->lc_file_size);
511 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
512 lo->ll_status = LS_INIT;
513 down_write(&com->lc_sem);
514 rc = lfsck_layout_store(env, com);
515 up_write(&com->lc_sem);
520 static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt,
521 struct dt_object *obj, const struct lu_fid *fid)
523 struct seq_server_site *ss = lu_site2seq(dt->dd_lu_dev.ld_site);
524 struct lu_seq_range range = { 0 };
525 struct lustre_mdt_attrs *lma;
528 fld_range_set_any(&range);
529 rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), &range);
531 if (fld_range_is_ost(&range))
537 lma = &lfsck_env_info(env)->lti_lma;
538 rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lma, sizeof(*lma)),
539 XATTR_NAME_LMA, BYPASS_CAPA);
540 if (rc == sizeof(*lma)) {
541 lustre_lma_swab(lma);
543 return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0;
546 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID, BYPASS_CAPA);
551 static struct lfsck_layout_seq *
552 lfsck_layout_seq_lookup(struct lfsck_layout_slave_data *llsd, __u64 seq)
554 struct lfsck_layout_seq *lls;
556 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
557 if (lls->lls_seq == seq)
560 if (lls->lls_seq > seq)
568 lfsck_layout_seq_insert(struct lfsck_layout_slave_data *llsd,
569 struct lfsck_layout_seq *lls)
571 struct lfsck_layout_seq *tmp;
572 struct list_head *pos = &llsd->llsd_seq_list;
574 list_for_each_entry(tmp, &llsd->llsd_seq_list, lls_list) {
575 if (lls->lls_seq < tmp->lls_seq) {
576 pos = &tmp->lls_list;
580 list_add_tail(&lls->lls_list, pos);
584 lfsck_layout_lastid_create(const struct lu_env *env,
585 struct lfsck_instance *lfsck,
586 struct dt_object *obj)
588 struct lfsck_thread_info *info = lfsck_env_info(env);
589 struct lu_attr *la = &info->lti_la;
590 struct dt_object_format *dof = &info->lti_dof;
591 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
592 struct dt_device *dt = lfsck->li_bottom;
599 CDEBUG(D_LFSCK, "To create LAST_ID for <seq> "LPX64"\n",
600 fid_seq(lfsck_dto2fid(obj)));
602 if (bk->lb_param & LPF_DRYRUN)
605 memset(la, 0, sizeof(*la));
606 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
607 la->la_valid = LA_MODE | LA_UID | LA_GID;
608 dof->dof_type = dt_mode_to_dft(S_IFREG);
610 th = dt_trans_create(env, dt);
612 RETURN(rc = PTR_ERR(th));
614 rc = dt_declare_create(env, obj, la, NULL, dof, th);
618 rc = dt_declare_record_write(env, obj, sizeof(lastid), pos, th);
622 rc = dt_trans_start_local(env, dt, th);
626 dt_write_lock(env, obj, 0);
627 if (likely(!dt_object_exists(obj))) {
628 rc = dt_create(env, obj, la, NULL, dof, th);
630 rc = dt_record_write(env, obj,
631 lfsck_buf_get(env, &lastid, sizeof(lastid)),
634 dt_write_unlock(env, obj);
639 dt_trans_stop(env, dt, th);
645 lfsck_layout_lastid_reload(const struct lu_env *env,
646 struct lfsck_component *com,
647 struct lfsck_layout_seq *lls)
653 dt_read_lock(env, lls->lls_lastid_obj, 0);
654 rc = dt_record_read(env, lls->lls_lastid_obj,
655 lfsck_buf_get(env, &lastid, sizeof(lastid)), &pos);
656 dt_read_unlock(env, lls->lls_lastid_obj);
657 if (unlikely(rc != 0))
660 lastid = le64_to_cpu(lastid);
661 if (lastid < lls->lls_lastid_known) {
662 struct lfsck_instance *lfsck = com->lc_lfsck;
663 struct lfsck_layout *lo = com->lc_file_ram;
665 lls->lls_lastid = lls->lls_lastid_known;
667 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
668 LASSERT(lfsck->li_out_notify != NULL);
670 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
671 LE_LASTID_REBUILDING);
672 lo->ll_flags |= LF_CRASHED_LASTID;
674 } else if (lastid >= lls->lls_lastid) {
675 lls->lls_lastid = lastid;
683 lfsck_layout_lastid_store(const struct lu_env *env,
684 struct lfsck_component *com)
686 struct lfsck_instance *lfsck = com->lc_lfsck;
687 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
688 struct dt_device *dt = lfsck->li_bottom;
689 struct lfsck_layout_slave_data *llsd = com->lc_data;
690 struct lfsck_layout_seq *lls;
696 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
699 /* XXX: Add the code back if we really found related
700 * inconsistent cases in the future. */
702 if (!lls->lls_dirty) {
703 /* In OFD, before the pre-creation, the LAST_ID
704 * file will be updated firstly, which may hide
705 * some potential crashed cases. For example:
707 * The old obj1's ID is higher than old LAST_ID
708 * but lower than the new LAST_ID, but the LFSCK
709 * have not touch the obj1 until the OFD updated
710 * the LAST_ID. So the LFSCK does not regard it
711 * as crashed case. But when OFD does not create
712 * successfully, it will set the LAST_ID as the
713 * real created objects' ID, then LFSCK needs to
714 * found related inconsistency. */
715 rc = lfsck_layout_lastid_reload(env, com, lls);
716 if (likely(!lls->lls_dirty))
721 CDEBUG(D_LFSCK, "To sync the LAST_ID for <seq> "LPX64
722 " as <oid> "LPU64"\n", lls->lls_seq, lls->lls_lastid);
724 if (bk->lb_param & LPF_DRYRUN) {
729 th = dt_trans_create(env, dt);
732 CERROR("%s: (1) failed to store "LPX64": rc = %d\n",
733 lfsck_lfsck2name(com->lc_lfsck),
738 rc = dt_declare_record_write(env, lls->lls_lastid_obj,
739 sizeof(lastid), pos, th);
743 rc = dt_trans_start_local(env, dt, th);
747 lastid = cpu_to_le64(lls->lls_lastid);
748 dt_write_lock(env, lls->lls_lastid_obj, 0);
749 rc = dt_record_write(env, lls->lls_lastid_obj,
750 lfsck_buf_get(env, &lastid,
751 sizeof(lastid)), &pos, th);
752 dt_write_unlock(env, lls->lls_lastid_obj);
757 dt_trans_stop(env, dt, th);
760 CERROR("%s: (2) failed to store "LPX64": rc = %d\n",
761 lfsck_lfsck2name(com->lc_lfsck),
770 lfsck_layout_lastid_load(const struct lu_env *env,
771 struct lfsck_component *com,
772 struct lfsck_layout_seq *lls)
774 struct lfsck_instance *lfsck = com->lc_lfsck;
775 struct lfsck_layout *lo = com->lc_file_ram;
776 struct lu_fid *fid = &lfsck_env_info(env)->lti_fid;
777 struct dt_object *obj;
782 lu_last_id_fid(fid, lls->lls_seq, lfsck_dev_idx(lfsck->li_bottom));
783 obj = dt_locate(env, lfsck->li_bottom, fid);
785 RETURN(PTR_ERR(obj));
787 /* LAST_ID crashed, to be rebuilt */
788 if (!dt_object_exists(obj)) {
789 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
790 LASSERT(lfsck->li_out_notify != NULL);
792 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
793 LE_LASTID_REBUILDING);
794 lo->ll_flags |= LF_CRASHED_LASTID;
796 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) &&
798 struct l_wait_info lwi = LWI_TIMEOUT(
799 cfs_time_seconds(cfs_fail_val),
802 up_write(&com->lc_sem);
803 l_wait_event(lfsck->li_thread.t_ctl_waitq,
804 !thread_is_running(&lfsck->li_thread),
806 down_write(&com->lc_sem);
810 rc = lfsck_layout_lastid_create(env, lfsck, obj);
812 dt_read_lock(env, obj, 0);
813 rc = dt_read(env, obj,
814 lfsck_buf_get(env, &lls->lls_lastid, sizeof(__u64)),
816 dt_read_unlock(env, obj);
817 if (rc != 0 && rc != sizeof(__u64))
818 GOTO(out, rc = (rc > 0 ? -EFAULT : rc));
820 if (rc == 0 && !(lo->ll_flags & LF_CRASHED_LASTID)) {
821 LASSERT(lfsck->li_out_notify != NULL);
823 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
824 LE_LASTID_REBUILDING);
825 lo->ll_flags |= LF_CRASHED_LASTID;
828 lls->lls_lastid = le64_to_cpu(lls->lls_lastid);
836 lfsck_object_put(env, obj);
838 lls->lls_lastid_obj = obj;
843 static int lfsck_layout_master_async_interpret(const struct lu_env *env,
844 struct ptlrpc_request *req,
847 struct lfsck_async_interpret_args *laia = args;
848 struct lfsck_component *com = laia->laia_com;
849 struct lfsck_layout_master_data *llmd = com->lc_data;
850 struct lfsck_tgt_descs *ltds = laia->laia_ltds;
851 struct lfsck_tgt_desc *ltd = laia->laia_ltd;
852 struct lfsck_request *lr = laia->laia_lr;
854 switch (lr->lr_event) {
857 struct lfsck_layout *lo = com->lc_file_ram;
859 lo->ll_flags |= LF_INCOMPLETE;
864 spin_lock(<ds->ltd_lock);
865 if (ltd->ltd_dead || ltd->ltd_layout_done) {
866 spin_unlock(<ds->ltd_lock);
871 if (lr->lr_flags & LEF_TO_OST) {
872 if (list_empty(<d->ltd_layout_list))
873 list_add_tail(<d->ltd_layout_list,
874 &llmd->llmd_ost_list);
875 if (list_empty(<d->ltd_layout_phase_list))
876 list_add_tail(<d->ltd_layout_phase_list,
877 &llmd->llmd_ost_phase1_list);
879 if (list_empty(<d->ltd_layout_list))
880 list_add_tail(<d->ltd_layout_list,
881 &llmd->llmd_mdt_list);
882 if (list_empty(<d->ltd_layout_phase_list))
883 list_add_tail(<d->ltd_layout_phase_list,
884 &llmd->llmd_mdt_phase1_list);
886 spin_unlock(<ds->ltd_lock);
893 CERROR("%s: fail to notify %s %x for layout: "
894 "event = %d, rc = %d\n",
895 lfsck_lfsck2name(com->lc_lfsck),
896 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
897 ltd->ltd_index, lr->lr_event, rc);
900 struct lfsck_reply *reply;
903 spin_lock(<ds->ltd_lock);
904 list_del_init(<d->ltd_layout_phase_list);
905 list_del_init(<d->ltd_layout_list);
906 spin_unlock(<ds->ltd_lock);
911 reply = req_capsule_server_get(&req->rq_pill,
915 CERROR("%s: invalid return value: rc = %d\n",
916 lfsck_lfsck2name(com->lc_lfsck), rc);
917 spin_lock(<ds->ltd_lock);
918 list_del_init(<d->ltd_layout_phase_list);
919 list_del_init(<d->ltd_layout_list);
920 spin_unlock(<ds->ltd_lock);
925 switch (reply->lr_status) {
926 case LS_SCANNING_PHASE1:
928 case LS_SCANNING_PHASE2:
929 spin_lock(<ds->ltd_lock);
930 list_del_init(<d->ltd_layout_phase_list);
931 if (ltd->ltd_dead || ltd->ltd_layout_done) {
932 spin_unlock(<ds->ltd_lock);
936 if (lr->lr_flags & LEF_TO_OST)
937 list_add_tail(<d->ltd_layout_phase_list,
938 &llmd->llmd_ost_phase2_list);
940 list_add_tail(<d->ltd_layout_phase_list,
941 &llmd->llmd_mdt_phase2_list);
942 spin_unlock(<ds->ltd_lock);
945 spin_lock(<ds->ltd_lock);
946 list_del_init(<d->ltd_layout_phase_list);
947 list_del_init(<d->ltd_layout_list);
948 spin_unlock(<ds->ltd_lock);
955 CERROR("%s: unexpected event: rc = %d\n",
956 lfsck_lfsck2name(com->lc_lfsck), lr->lr_event);
960 lfsck_component_put(env, com);
965 static int lfsck_layout_master_query_others(const struct lu_env *env,
966 struct lfsck_component *com)
968 struct lfsck_thread_info *info = lfsck_env_info(env);
969 struct lfsck_request *lr = &info->lti_lr;
970 struct lfsck_async_interpret_args *laia = &info->lti_laia;
971 struct lfsck_instance *lfsck = com->lc_lfsck;
972 struct lfsck_layout_master_data *llmd = com->lc_data;
973 struct ptlrpc_request_set *set;
974 struct lfsck_tgt_descs *ltds;
975 struct lfsck_tgt_desc *ltd;
976 struct list_head *head;
982 set = ptlrpc_prep_set();
986 llmd->llmd_touch_gen++;
987 memset(lr, 0, sizeof(*lr));
988 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
989 lr->lr_event = LE_QUERY;
990 lr->lr_active = LT_LAYOUT;
991 laia->laia_com = com;
994 if (!list_empty(&llmd->llmd_mdt_phase1_list)) {
995 ltds = &lfsck->li_mdt_descs;
997 head = &llmd->llmd_mdt_phase1_list;
1001 ltds = &lfsck->li_ost_descs;
1002 lr->lr_flags = LEF_TO_OST;
1003 head = &llmd->llmd_ost_phase1_list;
1006 laia->laia_ltds = ltds;
1007 spin_lock(<ds->ltd_lock);
1008 while (!list_empty(head)) {
1009 ltd = list_entry(head->next,
1010 struct lfsck_tgt_desc,
1011 ltd_layout_phase_list);
1012 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1015 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1016 list_del(<d->ltd_layout_phase_list);
1017 list_add_tail(<d->ltd_layout_phase_list, head);
1018 atomic_inc(<d->ltd_ref);
1019 laia->laia_ltd = ltd;
1020 spin_unlock(<ds->ltd_lock);
1021 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1022 lfsck_layout_master_async_interpret,
1025 CERROR("%s: fail to query %s %x for layout: rc = %d\n",
1026 lfsck_lfsck2name(lfsck),
1027 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
1028 ltd->ltd_index, rc);
1034 spin_lock(<ds->ltd_lock);
1036 spin_unlock(<ds->ltd_lock);
1039 rc = ptlrpc_set_wait(set);
1041 ptlrpc_set_destroy(set);
1047 if (!(lr->lr_flags & LEF_TO_OST) &&
1048 list_empty(&llmd->llmd_mdt_phase1_list))
1051 ptlrpc_set_destroy(set);
1053 RETURN(rc1 != 0 ? rc1 : rc);
1057 lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd)
1059 return list_empty(&llmd->llmd_mdt_phase1_list) &&
1060 (!list_empty(&llmd->llmd_ost_phase2_list) ||
1061 list_empty(&llmd->llmd_ost_phase1_list));
1064 static int lfsck_layout_master_notify_others(const struct lu_env *env,
1065 struct lfsck_component *com,
1066 struct lfsck_request *lr,
1069 struct lfsck_thread_info *info = lfsck_env_info(env);
1070 struct lfsck_async_interpret_args *laia = &info->lti_laia;
1071 struct lfsck_instance *lfsck = com->lc_lfsck;
1072 struct lfsck_layout_master_data *llmd = com->lc_data;
1073 struct lfsck_layout *lo = com->lc_file_ram;
1074 struct ptlrpc_request_set *set;
1075 struct lfsck_tgt_descs *ltds;
1076 struct lfsck_tgt_desc *ltd;
1077 struct lfsck_tgt_desc *next;
1078 struct list_head *head;
1084 set = ptlrpc_prep_set();
1088 lr->lr_active = LT_LAYOUT;
1089 laia->laia_com = com;
1092 switch (lr->lr_event) {
1094 /* Notify OSTs firstly, then other MDTs if needed. */
1095 lr->lr_flags |= LEF_TO_OST;
1096 ltds = &lfsck->li_ost_descs;
1099 laia->laia_ltds = ltds;
1100 down_read(<ds->ltd_rw_sem);
1101 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1102 ltd = lfsck_tgt_get(ltds, idx);
1103 LASSERT(ltd != NULL);
1105 laia->laia_ltd = ltd;
1106 ltd->ltd_layout_done = 0;
1107 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1108 lfsck_layout_master_async_interpret,
1109 laia, LFSCK_NOTIFY);
1111 CERROR("%s: fail to notify %s %x for layout "
1113 lfsck_lfsck2name(lfsck),
1114 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1117 lo->ll_flags |= LF_INCOMPLETE;
1122 up_read(<ds->ltd_rw_sem);
1126 rc = ptlrpc_set_wait(set);
1128 ptlrpc_set_destroy(set);
1134 if (!(flags & LPF_ALL_MDT))
1137 ltds = &lfsck->li_mdt_descs;
1138 /* The sponsor broadcasts the request to other MDTs. */
1139 if (flags & LPF_BROADCAST) {
1140 flags &= ~LPF_ALL_MDT;
1141 lr->lr_flags &= ~LEF_TO_OST;
1145 /* non-sponsors link other MDT targets locallly. */
1146 spin_lock(<ds->ltd_lock);
1147 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1148 ltd = LTD_TGT(ltds, idx);
1149 LASSERT(ltd != NULL);
1151 if (!list_empty(<d->ltd_layout_list))
1154 list_add_tail(<d->ltd_layout_list,
1155 &llmd->llmd_mdt_list);
1156 list_add_tail(<d->ltd_layout_phase_list,
1157 &llmd->llmd_mdt_phase1_list);
1159 spin_unlock(<ds->ltd_lock);
1163 if (flags & LPF_BROADCAST)
1164 lr->lr_flags |= LEF_FORCE_STOP;
1165 case LE_PHASE2_DONE:
1166 /* Notify other MDTs if needed, then the OSTs. */
1167 if (flags & LPF_ALL_MDT) {
1168 /* The sponsor broadcasts the request to other MDTs. */
1169 if (flags & LPF_BROADCAST) {
1170 lr->lr_flags &= ~LEF_TO_OST;
1171 head = &llmd->llmd_mdt_list;
1172 ltds = &lfsck->li_mdt_descs;
1176 /* non-sponsors unlink other MDT targets locallly. */
1177 ltds = &lfsck->li_mdt_descs;
1178 spin_lock(<ds->ltd_lock);
1179 list_for_each_entry_safe(ltd, next,
1180 &llmd->llmd_mdt_list,
1182 list_del_init(<d->ltd_layout_phase_list);
1183 list_del_init(<d->ltd_layout_list);
1185 spin_unlock(<ds->ltd_lock);
1189 lr->lr_flags |= LEF_TO_OST;
1190 head = &llmd->llmd_ost_list;
1191 ltds = &lfsck->li_ost_descs;
1194 laia->laia_ltds = ltds;
1195 spin_lock(<ds->ltd_lock);
1196 while (!list_empty(head)) {
1197 ltd = list_entry(head->next, struct lfsck_tgt_desc,
1199 if (!list_empty(<d->ltd_layout_phase_list))
1200 list_del_init(<d->ltd_layout_phase_list);
1201 list_del_init(<d->ltd_layout_list);
1202 laia->laia_ltd = ltd;
1203 spin_unlock(<ds->ltd_lock);
1204 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1205 lfsck_layout_master_async_interpret,
1206 laia, LFSCK_NOTIFY);
1208 CERROR("%s: fail to notify %s %x for layout "
1209 "stop/phase2: rc = %d\n",
1210 lfsck_lfsck2name(lfsck),
1211 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1212 "MDT", ltd->ltd_index, rc);
1215 spin_lock(<ds->ltd_lock);
1217 spin_unlock(<ds->ltd_lock);
1219 if (!(flags & LPF_BROADCAST))
1224 rc = ptlrpc_set_wait(set);
1226 ptlrpc_set_destroy(set);
1232 flags &= ~LPF_BROADCAST;
1234 case LE_PHASE1_DONE:
1235 llmd->llmd_touch_gen++;
1236 lr->lr_flags &= ~LEF_TO_OST;
1237 ltds = &lfsck->li_mdt_descs;
1238 laia->laia_ltds = ltds;
1239 spin_lock(<ds->ltd_lock);
1240 while (!list_empty(&llmd->llmd_mdt_phase1_list)) {
1241 ltd = list_entry(llmd->llmd_mdt_phase1_list.next,
1242 struct lfsck_tgt_desc,
1243 ltd_layout_phase_list);
1244 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1247 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1248 list_del_init(<d->ltd_layout_phase_list);
1249 list_add_tail(<d->ltd_layout_phase_list,
1250 &llmd->llmd_mdt_phase1_list);
1251 laia->laia_ltd = ltd;
1252 spin_unlock(<ds->ltd_lock);
1253 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1254 lfsck_layout_master_async_interpret,
1255 laia, LFSCK_NOTIFY);
1257 CERROR("%s: fail to notify MDT %x for layout "
1258 "phase1 done: rc = %d\n",
1259 lfsck_lfsck2name(lfsck),
1260 ltd->ltd_index, rc);
1263 spin_lock(<ds->ltd_lock);
1265 spin_unlock(<ds->ltd_lock);
1268 CERROR("%s: unexpected LFSCK event: rc = %d\n",
1269 lfsck_lfsck2name(lfsck), lr->lr_event);
1275 rc = ptlrpc_set_wait(set);
1276 ptlrpc_set_destroy(set);
1278 if (rc == 0 && lr->lr_event == LE_START &&
1279 list_empty(&llmd->llmd_ost_list))
1285 static int lfsck_layout_double_scan_result(const struct lu_env *env,
1286 struct lfsck_component *com,
1289 struct lfsck_instance *lfsck = com->lc_lfsck;
1290 struct lfsck_layout *lo = com->lc_file_ram;
1291 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1293 down_write(&com->lc_sem);
1295 lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1296 HALF_SEC - lfsck->li_time_last_checkpoint);
1297 lo->ll_time_last_checkpoint = cfs_time_current_sec();
1298 lo->ll_objs_checked_phase2 += com->lc_new_checked;
1301 com->lc_journal = 0;
1302 if (lo->ll_flags & LF_INCOMPLETE)
1303 lo->ll_status = LS_PARTIAL;
1305 lo->ll_status = LS_COMPLETED;
1306 if (!(bk->lb_param & LPF_DRYRUN))
1307 lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
1308 lo->ll_time_last_complete = lo->ll_time_last_checkpoint;
1309 lo->ll_success_count++;
1310 } else if (rc == 0) {
1311 lo->ll_status = lfsck->li_status;
1312 if (lo->ll_status == 0)
1313 lo->ll_status = LS_STOPPED;
1315 lo->ll_status = LS_FAILED;
1318 if (lo->ll_status != LS_PAUSED) {
1319 spin_lock(&lfsck->li_lock);
1320 list_del_init(&com->lc_link);
1321 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1322 spin_unlock(&lfsck->li_lock);
1325 rc = lfsck_layout_store(env, com);
1327 up_write(&com->lc_sem);
1332 static int lfsck_layout_lock(const struct lu_env *env,
1333 struct lfsck_component *com,
1334 struct dt_object *obj,
1335 struct lustre_handle *lh, __u64 bits)
1337 struct lfsck_thread_info *info = lfsck_env_info(env);
1338 ldlm_policy_data_t *policy = &info->lti_policy;
1339 struct ldlm_res_id *resid = &info->lti_resid;
1340 struct lfsck_instance *lfsck = com->lc_lfsck;
1341 __u64 flags = LDLM_FL_ATOMIC_CB;
1344 LASSERT(lfsck->li_namespace != NULL);
1346 memset(policy, 0, sizeof(*policy));
1347 policy->l_inodebits.bits = bits;
1348 fid_build_reg_res_name(lfsck_dto2fid(obj), resid);
1349 rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS,
1350 policy, LCK_EX, &flags, ldlm_blocking_ast,
1351 ldlm_completion_ast, NULL, NULL, 0,
1352 LVB_T_NONE, NULL, lh);
1353 if (rc == ELDLM_OK) {
1356 memset(lh, 0, sizeof(*lh));
1363 static void lfsck_layout_unlock(struct lustre_handle *lh)
1365 if (lustre_handle_is_used(lh)) {
1366 ldlm_lock_decref(lh, LCK_EX);
1367 memset(lh, 0, sizeof(*lh));
1371 static int lfsck_layout_scan_orphan(const struct lu_env *env,
1372 struct lfsck_component *com,
1373 struct lfsck_tgt_desc *ltd)
1375 /* XXX: To be extended in other patch. */
1380 static int lfsck_layout_assistant(void *args)
1382 struct lfsck_thread_args *lta = args;
1383 struct lu_env *env = <a->lta_env;
1384 struct lfsck_component *com = lta->lta_com;
1385 struct lfsck_instance *lfsck = lta->lta_lfsck;
1386 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1387 struct lfsck_position *pos = &com->lc_pos_start;
1388 struct lfsck_thread_info *info = lfsck_env_info(env);
1389 struct lfsck_request *lr = &info->lti_lr;
1390 struct lfsck_layout_master_data *llmd = com->lc_data;
1391 struct ptlrpc_thread *mthread = &lfsck->li_thread;
1392 struct ptlrpc_thread *athread = &llmd->llmd_thread;
1393 struct lfsck_layout_req *llr;
1394 struct l_wait_info lwi = { 0 };
1400 if (lta->lta_lsp->lsp_start != NULL)
1401 flags = lta->lta_lsp->lsp_start->ls_flags;
1403 flags = bk->lb_param;
1404 memset(lr, 0, sizeof(*lr));
1405 lr->lr_event = LE_START;
1406 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1407 lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
1409 lr->lr_speed = bk->lb_speed_limit;
1410 lr->lr_version = bk->lb_version;
1411 lr->lr_param = bk->lb_param;
1412 lr->lr_async_windows = bk->lb_async_windows;
1413 if (pos->lp_oit_cookie <= 1)
1414 lr->lr_param |= LPF_RESET;
1416 rc = lfsck_layout_master_notify_others(env, com, lr, flags);
1418 CERROR("%s: fail to notify others for layout start: rc = %d\n",
1419 lfsck_lfsck2name(lfsck), rc);
1423 spin_lock(&llmd->llmd_lock);
1424 thread_set_flags(athread, SVC_RUNNING);
1425 spin_unlock(&llmd->llmd_lock);
1426 wake_up_all(&mthread->t_ctl_waitq);
1429 while (!list_empty(&llmd->llmd_req_list)) {
1430 bool wakeup = false;
1432 if (unlikely(llmd->llmd_exit))
1433 GOTO(cleanup1, rc = llmd->llmd_post_result);
1435 /* XXX: To be extended in other patch.
1437 * Compare the OST side attribute with local attribute,
1438 * and fix it if found inconsistency. */
1440 spin_lock(&llmd->llmd_lock);
1441 llr = list_entry(llmd->llmd_req_list.next,
1442 struct lfsck_layout_req,
1444 list_del_init(&llr->llr_list);
1445 if (bk->lb_async_windows != 0 &&
1446 llmd->llmd_prefetched >= bk->lb_async_windows)
1449 llmd->llmd_prefetched--;
1450 spin_unlock(&llmd->llmd_lock);
1452 wake_up_all(&mthread->t_ctl_waitq);
1454 lfsck_layout_req_fini(env, llr);
1457 /* Wakeup the master engine if it is waiting in checkpoint. */
1458 wake_up_all(&mthread->t_ctl_waitq);
1460 l_wait_event(athread->t_ctl_waitq,
1461 !lfsck_layout_req_empty(llmd) ||
1463 llmd->llmd_to_post ||
1464 llmd->llmd_to_double_scan,
1467 if (unlikely(llmd->llmd_exit))
1468 GOTO(cleanup1, rc = llmd->llmd_post_result);
1470 if (!list_empty(&llmd->llmd_req_list))
1473 if (llmd->llmd_to_post) {
1474 llmd->llmd_to_post = 0;
1475 LASSERT(llmd->llmd_post_result > 0);
1477 memset(lr, 0, sizeof(*lr));
1478 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1479 lr->lr_event = LE_PHASE1_DONE;
1480 lr->lr_status = llmd->llmd_post_result;
1481 rc = lfsck_layout_master_notify_others(env, com, lr, 0);
1483 CERROR("%s: failed to notify others "
1484 "for layout post: rc = %d\n",
1485 lfsck_lfsck2name(lfsck), rc);
1487 /* Wakeup the master engine to go ahead. */
1488 wake_up_all(&mthread->t_ctl_waitq);
1491 if (llmd->llmd_to_double_scan) {
1492 llmd->llmd_to_double_scan = 0;
1493 atomic_inc(&lfsck->li_double_scan_count);
1494 llmd->llmd_in_double_scan = 1;
1495 wake_up_all(&mthread->t_ctl_waitq);
1497 while (llmd->llmd_in_double_scan) {
1498 struct lfsck_tgt_descs *ltds =
1499 &lfsck->li_ost_descs;
1500 struct lfsck_tgt_desc *ltd;
1502 rc = lfsck_layout_master_query_others(env, com);
1503 if (lfsck_layout_master_to_orphan(llmd))
1509 /* Pull LFSCK status on related targets once
1510 * per 30 seconds if we are not notified. */
1511 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(30),
1512 cfs_time_seconds(1),
1514 rc = l_wait_event(athread->t_ctl_waitq,
1515 lfsck_layout_master_to_orphan(llmd) ||
1517 !thread_is_running(mthread),
1520 if (unlikely(llmd->llmd_exit ||
1521 !thread_is_running(mthread)))
1522 GOTO(cleanup2, rc = 0);
1524 if (rc == -ETIMEDOUT)
1531 spin_lock(<ds->ltd_lock);
1533 &llmd->llmd_ost_phase2_list)) {
1535 llmd->llmd_ost_phase2_list.next,
1536 struct lfsck_tgt_desc,
1537 ltd_layout_phase_list);
1539 <d->ltd_layout_phase_list);
1540 spin_unlock(<ds->ltd_lock);
1542 rc = lfsck_layout_scan_orphan(env, com,
1545 bk->lb_param & LPF_FAILOUT)
1548 if (unlikely(llmd->llmd_exit ||
1549 !thread_is_running(mthread)))
1550 GOTO(cleanup2, rc = 0);
1552 spin_lock(<ds->ltd_lock);
1555 if (list_empty(&llmd->llmd_ost_phase1_list)) {
1556 spin_unlock(<ds->ltd_lock);
1557 GOTO(cleanup2, rc = 1);
1559 spin_unlock(<ds->ltd_lock);
1565 /* Cleanup the unfinished requests. */
1566 spin_lock(&llmd->llmd_lock);
1567 while (!list_empty(&llmd->llmd_req_list)) {
1568 llr = list_entry(llmd->llmd_req_list.next,
1569 struct lfsck_layout_req,
1571 list_del_init(&llr->llr_list);
1572 llmd->llmd_prefetched--;
1573 spin_unlock(&llmd->llmd_lock);
1574 lfsck_layout_req_fini(env, llr);
1575 spin_lock(&llmd->llmd_lock);
1577 spin_unlock(&llmd->llmd_lock);
1579 LASSERTF(llmd->llmd_prefetched == 0, "unmatched prefeteched objs %d\n",
1580 llmd->llmd_prefetched);
1583 memset(lr, 0, sizeof(*lr));
1584 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1586 lr->lr_event = LE_PHASE2_DONE;
1589 } else if (rc == 0) {
1590 lr->lr_event = LE_STOP;
1591 if (lfsck->li_status == LS_PAUSED ||
1592 lfsck->li_status == LS_CO_PAUSED) {
1594 lr->lr_status = LS_CO_PAUSED;
1595 } else if (lfsck->li_status == LS_STOPPED ||
1596 lfsck->li_status == LS_CO_STOPPED) {
1597 flags = lfsck->li_flags;
1598 if (flags & LPF_BROADCAST)
1599 lr->lr_status = LS_STOPPED;
1601 lr->lr_status = LS_CO_STOPPED;
1606 lr->lr_event = LE_STOP;
1608 lr->lr_status = LS_CO_FAILED;
1611 rc1 = lfsck_layout_master_notify_others(env, com, lr, flags);
1613 CERROR("%s: failed to notify others for layout quit: rc = %d\n",
1614 lfsck_lfsck2name(lfsck), rc1);
1618 /* Under force exit case, some requests may be just freed without
1619 * verification, those objects should be re-handled when next run.
1620 * So not update the on-disk tracing file under such case. */
1621 if (!llmd->llmd_exit)
1622 rc1 = lfsck_layout_double_scan_result(env, com, rc);
1625 if (llmd->llmd_in_double_scan)
1626 atomic_dec(&lfsck->li_double_scan_count);
1628 spin_lock(&llmd->llmd_lock);
1629 llmd->llmd_assistant_status = (rc1 != 0 ? rc1 : rc);
1630 thread_set_flags(athread, SVC_STOPPED);
1631 wake_up_all(&mthread->t_ctl_waitq);
1632 spin_unlock(&llmd->llmd_lock);
1633 lfsck_thread_args_fini(lta);
1639 lfsck_layout_slave_async_interpret(const struct lu_env *env,
1640 struct ptlrpc_request *req,
1643 struct lfsck_layout_slave_async_args *llsaa = args;
1644 struct obd_export *exp = llsaa->llsaa_exp;
1645 struct lfsck_component *com = llsaa->llsaa_com;
1646 struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst;
1647 struct lfsck_layout_slave_data *llsd = com->lc_data;
1651 /* It is quite probably caused by target crash,
1652 * to make the LFSCK can go ahead, assume that
1653 * the target finished the LFSCK prcoessing. */
1656 struct lfsck_reply *lr;
1658 lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY);
1659 if (lr->lr_status != LS_SCANNING_PHASE1 &&
1660 lr->lr_status != LS_SCANNING_PHASE2)
1664 lfsck_layout_llst_del(llsd, llst);
1665 lfsck_layout_llst_put(llst);
1666 lfsck_component_put(env, com);
1667 class_export_put(exp);
1672 static int lfsck_layout_async_query(const struct lu_env *env,
1673 struct lfsck_component *com,
1674 struct obd_export *exp,
1675 struct lfsck_layout_slave_target *llst,
1676 struct lfsck_request *lr,
1677 struct ptlrpc_request_set *set)
1679 struct lfsck_layout_slave_async_args *llsaa;
1680 struct ptlrpc_request *req;
1681 struct lfsck_request *tmp;
1685 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_QUERY);
1689 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_QUERY);
1691 ptlrpc_request_free(req);
1695 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
1697 ptlrpc_request_set_replen(req);
1699 llsaa = ptlrpc_req_async_args(req);
1700 llsaa->llsaa_exp = exp;
1701 llsaa->llsaa_com = lfsck_component_get(com);
1702 llsaa->llsaa_llst = llst;
1703 req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
1704 ptlrpc_set_add_req(set, req);
1709 static int lfsck_layout_async_notify(const struct lu_env *env,
1710 struct obd_export *exp,
1711 struct lfsck_request *lr,
1712 struct ptlrpc_request_set *set)
1714 struct ptlrpc_request *req;
1715 struct lfsck_request *tmp;
1719 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_NOTIFY);
1723 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_NOTIFY);
1725 ptlrpc_request_free(req);
1729 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
1731 ptlrpc_request_set_replen(req);
1732 ptlrpc_set_add_req(set, req);
1738 lfsck_layout_slave_query_master(const struct lu_env *env,
1739 struct lfsck_component *com)
1741 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
1742 struct lfsck_instance *lfsck = com->lc_lfsck;
1743 struct lfsck_layout_slave_data *llsd = com->lc_data;
1744 struct lfsck_layout_slave_target *llst;
1745 struct obd_export *exp;
1746 struct ptlrpc_request_set *set;
1752 set = ptlrpc_prep_set();
1756 memset(lr, 0, sizeof(*lr));
1757 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1758 lr->lr_event = LE_QUERY;
1759 lr->lr_active = LT_LAYOUT;
1761 llsd->llsd_touch_gen++;
1762 spin_lock(&llsd->llsd_lock);
1763 while (!list_empty(&llsd->llsd_master_list)) {
1764 llst = list_entry(llsd->llsd_master_list.next,
1765 struct lfsck_layout_slave_target,
1767 if (llst->llst_gen == llsd->llsd_touch_gen)
1770 llst->llst_gen = llsd->llsd_touch_gen;
1771 list_del(&llst->llst_list);
1772 list_add_tail(&llst->llst_list,
1773 &llsd->llsd_master_list);
1774 atomic_inc(&llst->llst_ref);
1775 spin_unlock(&llsd->llsd_lock);
1777 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
1780 lfsck_layout_llst_del(llsd, llst);
1781 lfsck_layout_llst_put(llst);
1782 spin_lock(&llsd->llsd_lock);
1786 rc = lfsck_layout_async_query(env, com, exp, llst, lr, set);
1788 CERROR("%s: slave fail to query %s for layout: "
1789 "rc = %d\n", lfsck_lfsck2name(lfsck),
1790 exp->exp_obd->obd_name, rc);
1792 lfsck_layout_llst_put(llst);
1793 class_export_put(exp);
1797 spin_lock(&llsd->llsd_lock);
1799 spin_unlock(&llsd->llsd_lock);
1802 rc = ptlrpc_set_wait(set);
1803 ptlrpc_set_destroy(set);
1805 RETURN(rc1 != 0 ? rc1 : rc);
1809 lfsck_layout_slave_notify_master(const struct lu_env *env,
1810 struct lfsck_component *com,
1811 enum lfsck_events event, int result)
1813 struct lfsck_instance *lfsck = com->lc_lfsck;
1814 struct lfsck_layout_slave_data *llsd = com->lc_data;
1815 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
1816 struct lfsck_layout_slave_target *llst;
1817 struct obd_export *exp;
1818 struct ptlrpc_request_set *set;
1823 set = ptlrpc_prep_set();
1827 memset(lr, 0, sizeof(*lr));
1828 lr->lr_event = event;
1829 lr->lr_flags = LEF_FROM_OST;
1830 lr->lr_status = result;
1831 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1832 lr->lr_active = LT_LAYOUT;
1833 llsd->llsd_touch_gen++;
1834 spin_lock(&llsd->llsd_lock);
1835 while (!list_empty(&llsd->llsd_master_list)) {
1836 llst = list_entry(llsd->llsd_master_list.next,
1837 struct lfsck_layout_slave_target,
1839 if (llst->llst_gen == llsd->llsd_touch_gen)
1842 llst->llst_gen = llsd->llsd_touch_gen;
1843 list_del(&llst->llst_list);
1844 list_add_tail(&llst->llst_list,
1845 &llsd->llsd_master_list);
1846 atomic_inc(&llst->llst_ref);
1847 spin_unlock(&llsd->llsd_lock);
1849 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
1852 lfsck_layout_llst_del(llsd, llst);
1853 lfsck_layout_llst_put(llst);
1854 spin_lock(&llsd->llsd_lock);
1858 rc = lfsck_layout_async_notify(env, exp, lr, set);
1860 CERROR("%s: slave fail to notify %s for layout: "
1861 "rc = %d\n", lfsck_lfsck2name(lfsck),
1862 exp->exp_obd->obd_name, rc);
1865 lfsck_layout_llst_put(llst);
1866 class_export_put(exp);
1867 spin_lock(&llsd->llsd_lock);
1869 spin_unlock(&llsd->llsd_lock);
1872 rc = ptlrpc_set_wait(set);
1874 ptlrpc_set_destroy(set);
1881 static int lfsck_layout_reset(const struct lu_env *env,
1882 struct lfsck_component *com, bool init)
1884 struct lfsck_layout *lo = com->lc_file_ram;
1887 down_write(&com->lc_sem);
1889 memset(lo, 0, com->lc_file_size);
1891 __u32 count = lo->ll_success_count;
1892 __u64 last_time = lo->ll_time_last_complete;
1894 memset(lo, 0, com->lc_file_size);
1895 lo->ll_success_count = count;
1896 lo->ll_time_last_complete = last_time;
1899 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
1900 lo->ll_status = LS_INIT;
1902 rc = lfsck_layout_store(env, com);
1903 up_write(&com->lc_sem);
1908 static void lfsck_layout_fail(const struct lu_env *env,
1909 struct lfsck_component *com, bool new_checked)
1911 struct lfsck_layout *lo = com->lc_file_ram;
1913 down_write(&com->lc_sem);
1915 com->lc_new_checked++;
1916 lo->ll_objs_failed_phase1++;
1917 if (lo->ll_pos_first_inconsistent == 0) {
1918 struct lfsck_instance *lfsck = com->lc_lfsck;
1920 lo->ll_pos_first_inconsistent =
1921 lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
1924 up_write(&com->lc_sem);
1927 static int lfsck_layout_master_checkpoint(const struct lu_env *env,
1928 struct lfsck_component *com, bool init)
1930 struct lfsck_instance *lfsck = com->lc_lfsck;
1931 struct lfsck_layout *lo = com->lc_file_ram;
1932 struct lfsck_layout_master_data *llmd = com->lc_data;
1933 struct ptlrpc_thread *mthread = &lfsck->li_thread;
1934 struct ptlrpc_thread *athread = &llmd->llmd_thread;
1935 struct l_wait_info lwi = { 0 };
1938 if (com->lc_new_checked == 0 && !init)
1941 l_wait_event(mthread->t_ctl_waitq,
1942 list_empty(&llmd->llmd_req_list) ||
1943 !thread_is_running(mthread) ||
1944 thread_is_stopped(athread),
1947 if (!thread_is_running(mthread) || thread_is_stopped(athread))
1950 down_write(&com->lc_sem);
1952 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
1954 lo->ll_pos_last_checkpoint =
1955 lfsck->li_pos_current.lp_oit_cookie;
1956 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1957 HALF_SEC - lfsck->li_time_last_checkpoint);
1958 lo->ll_time_last_checkpoint = cfs_time_current_sec();
1959 lo->ll_objs_checked_phase1 += com->lc_new_checked;
1960 com->lc_new_checked = 0;
1963 rc = lfsck_layout_store(env, com);
1964 up_write(&com->lc_sem);
1969 static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
1970 struct lfsck_component *com, bool init)
1972 struct lfsck_instance *lfsck = com->lc_lfsck;
1973 struct lfsck_layout *lo = com->lc_file_ram;
1976 if (com->lc_new_checked == 0 && !init)
1979 down_write(&com->lc_sem);
1982 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
1984 lo->ll_pos_last_checkpoint =
1985 lfsck->li_pos_current.lp_oit_cookie;
1986 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1987 HALF_SEC - lfsck->li_time_last_checkpoint);
1988 lo->ll_time_last_checkpoint = cfs_time_current_sec();
1989 lo->ll_objs_checked_phase1 += com->lc_new_checked;
1990 com->lc_new_checked = 0;
1993 rc = lfsck_layout_store(env, com);
1995 up_write(&com->lc_sem);
2000 static int lfsck_layout_prep(const struct lu_env *env,
2001 struct lfsck_component *com)
2003 struct lfsck_instance *lfsck = com->lc_lfsck;
2004 struct lfsck_layout *lo = com->lc_file_ram;
2005 struct lfsck_position *pos = &com->lc_pos_start;
2007 fid_zero(&pos->lp_dir_parent);
2008 pos->lp_dir_cookie = 0;
2009 if (lo->ll_status == LS_COMPLETED ||
2010 lo->ll_status == LS_PARTIAL) {
2013 rc = lfsck_layout_reset(env, com, false);
2018 down_write(&com->lc_sem);
2020 lo->ll_time_latest_start = cfs_time_current_sec();
2022 spin_lock(&lfsck->li_lock);
2023 if (lo->ll_flags & LF_SCANNED_ONCE) {
2024 if (!lfsck->li_drop_dryrun ||
2025 lo->ll_pos_first_inconsistent == 0) {
2026 lo->ll_status = LS_SCANNING_PHASE2;
2027 list_del_init(&com->lc_link);
2028 list_add_tail(&com->lc_link,
2029 &lfsck->li_list_double_scan);
2030 pos->lp_oit_cookie = 0;
2034 lo->ll_status = LS_SCANNING_PHASE1;
2035 lo->ll_run_time_phase1 = 0;
2036 lo->ll_run_time_phase2 = 0;
2037 lo->ll_objs_checked_phase1 = 0;
2038 lo->ll_objs_checked_phase2 = 0;
2039 lo->ll_objs_failed_phase1 = 0;
2040 lo->ll_objs_failed_phase2 = 0;
2041 for (i = 0; i < LLIT_MAX; i++)
2042 lo->ll_objs_repaired[i] = 0;
2044 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2047 lo->ll_status = LS_SCANNING_PHASE1;
2048 if (!lfsck->li_drop_dryrun ||
2049 lo->ll_pos_first_inconsistent == 0)
2050 pos->lp_oit_cookie = lo->ll_pos_last_checkpoint + 1;
2052 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2054 spin_unlock(&lfsck->li_lock);
2056 up_write(&com->lc_sem);
2061 static int lfsck_layout_slave_prep(const struct lu_env *env,
2062 struct lfsck_component *com,
2063 struct lfsck_start_param *lsp)
2065 struct lfsck_layout *lo = com->lc_file_ram;
2066 struct lfsck_layout_slave_data *llsd = com->lc_data;
2069 /* XXX: For a new scanning, generate OST-objects
2070 * bitmap for orphan detection. */
2072 rc = lfsck_layout_prep(env, com);
2073 if (rc != 0 || lo->ll_status != LS_SCANNING_PHASE1 ||
2074 !lsp->lsp_index_valid)
2077 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
2082 static int lfsck_layout_master_prep(const struct lu_env *env,
2083 struct lfsck_component *com,
2084 struct lfsck_start_param *lsp)
2086 struct lfsck_instance *lfsck = com->lc_lfsck;
2087 struct lfsck_layout_master_data *llmd = com->lc_data;
2088 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2089 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2090 struct lfsck_thread_args *lta;
2094 rc = lfsck_layout_prep(env, com);
2098 llmd->llmd_assistant_status = 0;
2099 llmd->llmd_post_result = 0;
2100 llmd->llmd_to_post = 0;
2101 llmd->llmd_to_double_scan = 0;
2102 llmd->llmd_in_double_scan = 0;
2103 llmd->llmd_exit = 0;
2104 thread_set_flags(athread, 0);
2106 lta = lfsck_thread_args_init(lfsck, com, lsp);
2108 RETURN(PTR_ERR(lta));
2110 rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"));
2111 if (IS_ERR_VALUE(rc)) {
2112 CERROR("%s: Cannot start LFSCK layout assistant thread: "
2113 "rc = %ld\n", lfsck_lfsck2name(lfsck), rc);
2114 lfsck_thread_args_fini(lta);
2116 struct l_wait_info lwi = { 0 };
2118 l_wait_event(mthread->t_ctl_waitq,
2119 thread_is_running(athread) ||
2120 thread_is_stopped(athread),
2122 if (unlikely(!thread_is_running(athread)))
2123 rc = llmd->llmd_assistant_status;
2131 /* Pre-fetch the attribute for each stripe in the given layout EA. */
2132 static int lfsck_layout_scan_stripes(const struct lu_env *env,
2133 struct lfsck_component *com,
2134 struct dt_object *parent,
2135 struct lov_mds_md_v1 *lmm)
2137 struct lfsck_thread_info *info = lfsck_env_info(env);
2138 struct lfsck_instance *lfsck = com->lc_lfsck;
2139 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2140 struct lfsck_layout *lo = com->lc_file_ram;
2141 struct lfsck_layout_master_data *llmd = com->lc_data;
2142 struct lfsck_layout_object *llo = NULL;
2143 struct lov_ost_data_v1 *objs;
2144 struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs;
2145 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2146 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2147 struct l_wait_info lwi = { 0 };
2155 buf = lfsck_buf_get(env, &info->lti_pfid,
2156 sizeof(struct filter_fid_old));
2157 count = le16_to_cpu(lmm->lmm_stripe_count);
2158 gen = le16_to_cpu(lmm->lmm_layout_gen);
2159 if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1)
2160 objs = &(lmm->lmm_objects[0]);
2162 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
2164 for (i = 0; i < count; i++, objs++) {
2165 struct lu_fid *fid = &info->lti_fid;
2166 struct ost_id *oi = &info->lti_oi;
2167 struct lfsck_layout_req *llr;
2168 struct lfsck_tgt_desc *tgt = NULL;
2169 struct dt_object *cobj = NULL;
2171 le32_to_cpu(objs->l_ost_idx);
2172 bool wakeup = false;
2174 l_wait_event(mthread->t_ctl_waitq,
2175 bk->lb_async_windows == 0 ||
2176 llmd->llmd_prefetched < bk->lb_async_windows ||
2177 !thread_is_running(mthread) ||
2178 thread_is_stopped(athread),
2181 if (unlikely(!thread_is_running(mthread)) ||
2182 thread_is_stopped(athread))
2185 ostid_le_to_cpu(&objs->l_ost_oi, oi);
2186 ostid_to_fid(fid, oi, index);
2187 tgt = lfsck_tgt_get(ltds, index);
2188 if (unlikely(tgt == NULL)) {
2189 lo->ll_flags |= LF_INCOMPLETE;
2193 cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid);
2199 rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA);
2203 rc = dt_declare_xattr_get(env, cobj, buf, XATTR_NAME_FID,
2209 llo = lfsck_layout_object_init(env, parent, gen);
2216 llr = lfsck_layout_req_init(llo, cobj, index, i);
2223 spin_lock(&llmd->llmd_lock);
2224 if (llmd->llmd_assistant_status < 0) {
2225 spin_unlock(&llmd->llmd_lock);
2226 lfsck_layout_req_fini(env, llr);
2228 RETURN(llmd->llmd_assistant_status);
2231 list_add_tail(&llr->llr_list, &llmd->llmd_req_list);
2232 if (llmd->llmd_prefetched == 0)
2235 llmd->llmd_prefetched++;
2236 spin_unlock(&llmd->llmd_lock);
2238 wake_up_all(&athread->t_ctl_waitq);
2241 down_write(&com->lc_sem);
2242 com->lc_new_checked++;
2244 lo->ll_objs_failed_phase1++;
2245 up_write(&com->lc_sem);
2247 if (cobj != NULL && !IS_ERR(cobj))
2248 lu_object_put(env, &cobj->do_lu);
2250 if (likely(tgt != NULL))
2253 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
2260 if (llo != NULL && !IS_ERR(llo))
2261 lfsck_layout_object_put(env, llo);
2266 /* For the given object, read its layout EA locally. For each stripe, pre-fetch
2267 * the OST-object's attribute and generate an structure lfsck_layout_req on the
2268 * list ::llmd_req_list.
2270 * For each request on above list, the lfsck_layout_assistant thread compares
2271 * the OST side attribute with local attribute, if inconsistent, then repair it.
2273 * All above processing is async mode with pipeline. */
2274 static int lfsck_layout_master_exec_oit(const struct lu_env *env,
2275 struct lfsck_component *com,
2276 struct dt_object *obj)
2278 struct lfsck_thread_info *info = lfsck_env_info(env);
2279 struct ost_id *oi = &info->lti_oi;
2280 struct lfsck_layout *lo = com->lc_file_ram;
2281 struct lfsck_layout_master_data *llmd = com->lc_data;
2282 struct lfsck_instance *lfsck = com->lc_lfsck;
2283 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2284 struct thandle *handle = NULL;
2285 struct lu_buf *buf = &info->lti_big_buf;
2286 struct lov_mds_md_v1 *lmm = NULL;
2287 struct dt_device *dev = lfsck->li_bottom;
2288 struct lustre_handle lh = { 0 };
2289 ssize_t buflen = buf->lb_len;
2291 bool locked = false;
2292 bool stripe = false;
2295 if (!S_ISREG(lfsck_object_type(obj)))
2298 if (llmd->llmd_assistant_status < 0)
2299 GOTO(out, rc = -ESRCH);
2301 fid_to_lmm_oi(lfsck_dto2fid(obj), oi);
2302 lmm_oi_cpu_to_le(oi, oi);
2303 dt_read_lock(env, obj, 0);
2307 rc = lfsck_layout_get_lovea(env, obj, buf, &buflen);
2313 rc = lfsck_layout_verify_header(lmm);
2317 if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) == 0)
2318 GOTO(out, stripe = true);
2320 /* Inconsistent lmm_oi, should be repaired. */
2321 CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n",
2322 PFID(lfsck_dto2fid(obj)));
2324 if (bk->lb_param & LPF_DRYRUN) {
2325 down_write(&com->lc_sem);
2326 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2327 up_write(&com->lc_sem);
2329 GOTO(out, stripe = true);
2332 if (!lustre_handle_is_used(&lh)) {
2333 dt_read_unlock(env, obj);
2335 buf->lb_len = buflen;
2336 rc = lfsck_layout_lock(env, com, obj, &lh,
2337 MDS_INODELOCK_LAYOUT |
2338 MDS_INODELOCK_XATTR);
2342 handle = dt_trans_create(env, dev);
2344 GOTO(out, rc = PTR_ERR(handle));
2346 rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2347 LU_XATTR_REPLACE, handle);
2351 rc = dt_trans_start_local(env, dev, handle);
2355 dt_write_lock(env, obj, 0);
2362 rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2363 LU_XATTR_REPLACE, handle, BYPASS_CAPA);
2367 down_write(&com->lc_sem);
2368 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2369 up_write(&com->lc_sem);
2371 GOTO(out, stripe = true);
2375 if (lustre_handle_is_used(&lh))
2376 dt_write_unlock(env, obj);
2378 dt_read_unlock(env, obj);
2381 if (handle != NULL && !IS_ERR(handle))
2382 dt_trans_stop(env, dev, handle);
2384 lfsck_layout_unlock(&lh);
2386 rc = lfsck_layout_scan_stripes(env, com, obj, lmm);
2388 down_write(&com->lc_sem);
2389 com->lc_new_checked++;
2391 lo->ll_objs_failed_phase1++;
2392 up_write(&com->lc_sem);
2394 buf->lb_len = buflen;
2399 static int lfsck_layout_slave_exec_oit(const struct lu_env *env,
2400 struct lfsck_component *com,
2401 struct dt_object *obj)
2403 struct lfsck_instance *lfsck = com->lc_lfsck;
2404 struct lfsck_layout *lo = com->lc_file_ram;
2405 const struct lu_fid *fid = lfsck_dto2fid(obj);
2406 struct lfsck_layout_slave_data *llsd = com->lc_data;
2407 struct lfsck_layout_seq *lls;
2413 /* XXX: Update OST-objects bitmap for orphan detection. */
2415 LASSERT(llsd != NULL);
2417 down_write(&com->lc_sem);
2418 if (fid_is_idif(fid))
2420 else if (!fid_is_norm(fid) ||
2421 !fid_is_for_ostobj(env, lfsck->li_next, obj, fid))
2422 GOTO(unlock, rc = 0);
2425 com->lc_new_checked++;
2427 lls = lfsck_layout_seq_lookup(llsd, seq);
2430 if (unlikely(lls == NULL))
2431 GOTO(unlock, rc = -ENOMEM);
2433 INIT_LIST_HEAD(&lls->lls_list);
2435 rc = lfsck_layout_lastid_load(env, com, lls);
2437 lo->ll_objs_failed_phase1++;
2442 lfsck_layout_seq_insert(llsd, lls);
2445 if (unlikely(fid_is_last_id(fid)))
2446 GOTO(unlock, rc = 0);
2449 if (oid > lls->lls_lastid_known)
2450 lls->lls_lastid_known = oid;
2452 if (oid > lls->lls_lastid) {
2453 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
2454 /* OFD may create new objects during LFSCK scanning. */
2455 rc = lfsck_layout_lastid_reload(env, com, lls);
2456 if (unlikely(rc != 0))
2457 CWARN("%s: failed to reload LAST_ID for "LPX64
2459 lfsck_lfsck2name(com->lc_lfsck),
2461 if (oid <= lls->lls_lastid)
2462 GOTO(unlock, rc = 0);
2464 LASSERT(lfsck->li_out_notify != NULL);
2466 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
2467 LE_LASTID_REBUILDING);
2468 lo->ll_flags |= LF_CRASHED_LASTID;
2471 lls->lls_lastid = oid;
2475 GOTO(unlock, rc = 0);
2478 up_write(&com->lc_sem);
2483 static int lfsck_layout_exec_dir(const struct lu_env *env,
2484 struct lfsck_component *com,
2485 struct dt_object *obj,
2486 struct lu_dirent *ent)
2491 static int lfsck_layout_master_post(const struct lu_env *env,
2492 struct lfsck_component *com,
2493 int result, bool init)
2495 struct lfsck_instance *lfsck = com->lc_lfsck;
2496 struct lfsck_layout *lo = com->lc_file_ram;
2497 struct lfsck_layout_master_data *llmd = com->lc_data;
2498 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2499 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2500 struct l_wait_info lwi = { 0 };
2505 llmd->llmd_post_result = result;
2506 llmd->llmd_to_post = 1;
2507 if (llmd->llmd_post_result <= 0)
2508 llmd->llmd_exit = 1;
2510 wake_up_all(&athread->t_ctl_waitq);
2511 l_wait_event(mthread->t_ctl_waitq,
2512 (result > 0 && list_empty(&llmd->llmd_req_list)) ||
2513 thread_is_stopped(athread),
2516 if (llmd->llmd_assistant_status < 0)
2517 result = llmd->llmd_assistant_status;
2519 down_write(&com->lc_sem);
2520 spin_lock(&lfsck->li_lock);
2521 /* When LFSCK failed, there may be some prefetched objects those are
2522 * not been processed yet, we do not know the exactly position, then
2523 * just restart from last check-point next time. */
2524 if (!init && !llmd->llmd_exit)
2525 lo->ll_pos_last_checkpoint =
2526 lfsck->li_pos_current.lp_oit_cookie;
2529 lo->ll_status = LS_SCANNING_PHASE2;
2530 lo->ll_flags |= LF_SCANNED_ONCE;
2531 lo->ll_flags &= ~LF_UPGRADE;
2532 list_del_init(&com->lc_link);
2533 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
2534 } else if (result == 0) {
2535 lo->ll_status = lfsck->li_status;
2536 if (lo->ll_status == 0)
2537 lo->ll_status = LS_STOPPED;
2538 if (lo->ll_status != LS_PAUSED) {
2539 list_del_init(&com->lc_link);
2540 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2543 lo->ll_status = LS_FAILED;
2544 list_del_init(&com->lc_link);
2545 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2547 spin_unlock(&lfsck->li_lock);
2550 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2551 HALF_SEC - lfsck->li_time_last_checkpoint);
2552 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2553 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2554 com->lc_new_checked = 0;
2557 rc = lfsck_layout_store(env, com);
2558 up_write(&com->lc_sem);
2563 static int lfsck_layout_slave_post(const struct lu_env *env,
2564 struct lfsck_component *com,
2565 int result, bool init)
2567 struct lfsck_instance *lfsck = com->lc_lfsck;
2568 struct lfsck_layout *lo = com->lc_file_ram;
2572 rc = lfsck_layout_lastid_store(env, com);
2576 LASSERT(lfsck->li_out_notify != NULL);
2578 down_write(&com->lc_sem);
2580 spin_lock(&lfsck->li_lock);
2582 lo->ll_pos_last_checkpoint =
2583 lfsck->li_pos_current.lp_oit_cookie;
2585 lo->ll_status = LS_SCANNING_PHASE2;
2586 lo->ll_flags |= LF_SCANNED_ONCE;
2587 if (lo->ll_flags & LF_CRASHED_LASTID) {
2589 lo->ll_flags &= ~LF_CRASHED_LASTID;
2591 lo->ll_flags &= ~LF_UPGRADE;
2592 list_del_init(&com->lc_link);
2593 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
2594 } else if (result == 0) {
2595 lo->ll_status = lfsck->li_status;
2596 if (lo->ll_status == 0)
2597 lo->ll_status = LS_STOPPED;
2598 if (lo->ll_status != LS_PAUSED) {
2599 list_del_init(&com->lc_link);
2600 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2603 lo->ll_status = LS_FAILED;
2604 list_del_init(&com->lc_link);
2605 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2607 spin_unlock(&lfsck->li_lock);
2610 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
2614 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2615 HALF_SEC - lfsck->li_time_last_checkpoint);
2616 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2617 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2618 com->lc_new_checked = 0;
2621 rc = lfsck_layout_store(env, com);
2623 up_write(&com->lc_sem);
2625 lfsck_layout_slave_notify_master(env, com, LE_PHASE1_DONE, result);
2630 static int lfsck_layout_dump(const struct lu_env *env,
2631 struct lfsck_component *com, char *buf, int len)
2633 struct lfsck_instance *lfsck = com->lc_lfsck;
2634 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2635 struct lfsck_layout *lo = com->lc_file_ram;
2640 down_read(&com->lc_sem);
2641 rc = snprintf(buf, len,
2642 "name: lfsck_layout\n"
2648 lfsck_status2names(lo->ll_status));
2654 rc = lfsck_bits_dump(&buf, &len, lo->ll_flags, lfsck_flags_names,
2659 rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
2664 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_complete,
2665 "time_since_last_completed");
2669 rc = lfsck_time_dump(&buf, &len, lo->ll_time_latest_start,
2670 "time_since_latest_start");
2674 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_checkpoint,
2675 "time_since_last_checkpoint");
2679 rc = snprintf(buf, len,
2680 "latest_start_position: "LPU64"\n"
2681 "last_checkpoint_position: "LPU64"\n"
2682 "first_failure_position: "LPU64"\n",
2683 lo->ll_pos_latest_start,
2684 lo->ll_pos_last_checkpoint,
2685 lo->ll_pos_first_inconsistent);
2692 rc = snprintf(buf, len,
2693 "success_count: %u\n"
2694 "repaired_dangling: "LPU64"\n"
2695 "repaired_unmatched_pair: "LPU64"\n"
2696 "repaired_multiple_referenced: "LPU64"\n"
2697 "repaired_orphan: "LPU64"\n"
2698 "repaired_inconsistent_owner: "LPU64"\n"
2699 "repaired_others: "LPU64"\n"
2700 "skipped: "LPU64"\n"
2701 "failed_phase1: "LPU64"\n"
2702 "failed_phase2: "LPU64"\n",
2703 lo->ll_success_count,
2704 lo->ll_objs_repaired[LLIT_DANGLING - 1],
2705 lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1],
2706 lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1],
2707 lo->ll_objs_repaired[LLIT_ORPHAN - 1],
2708 lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1],
2709 lo->ll_objs_repaired[LLIT_OTHERS - 1],
2710 lo->ll_objs_skipped,
2711 lo->ll_objs_failed_phase1,
2712 lo->ll_objs_failed_phase2);
2719 if (lo->ll_status == LS_SCANNING_PHASE1) {
2721 const struct dt_it_ops *iops;
2722 cfs_duration_t duration = cfs_time_current() -
2723 lfsck->li_time_last_checkpoint;
2724 __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
2725 __u64 speed = checked;
2726 __u64 new_checked = com->lc_new_checked * HZ;
2727 __u32 rtime = lo->ll_run_time_phase1 +
2728 cfs_duration_sec(duration + HALF_SEC);
2731 do_div(new_checked, duration);
2733 do_div(speed, rtime);
2734 rc = snprintf(buf, len,
2735 "checked_phase1: "LPU64"\n"
2736 "checked_phase2: "LPU64"\n"
2737 "run_time_phase1: %u seconds\n"
2738 "run_time_phase2: %u seconds\n"
2739 "average_speed_phase1: "LPU64" items/sec\n"
2740 "average_speed_phase2: N/A\n"
2741 "real-time_speed_phase1: "LPU64" items/sec\n"
2742 "real-time_speed_phase2: N/A\n",
2744 lo->ll_objs_checked_phase2,
2746 lo->ll_run_time_phase2,
2755 LASSERT(lfsck->li_di_oit != NULL);
2757 iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
2759 /* The low layer otable-based iteration position may NOT
2760 * exactly match the layout-based directory traversal
2761 * cookie. Generally, it is not a serious issue. But the
2762 * caller should NOT make assumption on that. */
2763 pos = iops->store(env, lfsck->li_di_oit);
2764 if (!lfsck->li_current_oit_processed)
2766 rc = snprintf(buf, len, "current_position: "LPU64"\n", pos);
2773 /* XXX: LS_SCANNING_PHASE2 will be handled in the future. */
2774 __u64 speed1 = lo->ll_objs_checked_phase1;
2775 __u64 speed2 = lo->ll_objs_checked_phase2;
2777 if (lo->ll_run_time_phase1 != 0)
2778 do_div(speed1, lo->ll_run_time_phase1);
2779 if (lo->ll_run_time_phase2 != 0)
2780 do_div(speed2, lo->ll_run_time_phase2);
2781 rc = snprintf(buf, len,
2782 "checked_phase1: "LPU64"\n"
2783 "checked_phase2: "LPU64"\n"
2784 "run_time_phase1: %u seconds\n"
2785 "run_time_phase2: %u seconds\n"
2786 "average_speed_phase1: "LPU64" items/sec\n"
2787 "average_speed_phase2: "LPU64" objs/sec\n"
2788 "real-time_speed_phase1: N/A\n"
2789 "real-time_speed_phase2: N/A\n"
2790 "current_position: N/A\n",
2791 lo->ll_objs_checked_phase1,
2792 lo->ll_objs_checked_phase2,
2793 lo->ll_run_time_phase1,
2794 lo->ll_run_time_phase2,
2806 up_read(&com->lc_sem);
2811 static int lfsck_layout_master_double_scan(const struct lu_env *env,
2812 struct lfsck_component *com)
2814 struct lfsck_layout_master_data *llmd = com->lc_data;
2815 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
2816 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2817 struct lfsck_layout *lo = com->lc_file_ram;
2818 struct l_wait_info lwi = { 0 };
2820 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
2823 llmd->llmd_to_double_scan = 1;
2824 wake_up_all(&athread->t_ctl_waitq);
2825 l_wait_event(mthread->t_ctl_waitq,
2826 llmd->llmd_in_double_scan ||
2827 thread_is_stopped(athread),
2829 if (llmd->llmd_assistant_status < 0)
2830 return llmd->llmd_assistant_status;
2835 static int lfsck_layout_slave_double_scan(const struct lu_env *env,
2836 struct lfsck_component *com)
2838 struct lfsck_instance *lfsck = com->lc_lfsck;
2839 struct lfsck_layout_slave_data *llsd = com->lc_data;
2840 struct lfsck_layout *lo = com->lc_file_ram;
2841 struct ptlrpc_thread *thread = &lfsck->li_thread;
2845 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
2848 atomic_inc(&lfsck->li_double_scan_count);
2850 com->lc_new_checked = 0;
2851 com->lc_new_scanned = 0;
2852 com->lc_time_last_checkpoint = cfs_time_current();
2853 com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
2854 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2857 struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(30),
2860 rc = lfsck_layout_slave_query_master(env, com);
2861 if (list_empty(&llsd->llsd_master_list)) {
2862 if (unlikely(!thread_is_running(thread)))
2873 rc = l_wait_event(thread->t_ctl_waitq,
2874 !thread_is_running(thread) ||
2875 list_empty(&llsd->llsd_master_list),
2877 if (unlikely(!thread_is_running(thread)))
2880 if (rc == -ETIMEDOUT)
2883 GOTO(done, rc = (rc < 0 ? rc : 1));
2887 rc = lfsck_layout_double_scan_result(env, com, rc);
2889 if (atomic_dec_and_test(&lfsck->li_double_scan_count))
2890 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
2895 static void lfsck_layout_master_data_release(const struct lu_env *env,
2896 struct lfsck_component *com)
2898 struct lfsck_layout_master_data *llmd = com->lc_data;
2899 struct lfsck_instance *lfsck = com->lc_lfsck;
2900 struct lfsck_tgt_descs *ltds;
2901 struct lfsck_tgt_desc *ltd;
2902 struct lfsck_tgt_desc *next;
2904 LASSERT(llmd != NULL);
2905 LASSERT(thread_is_init(&llmd->llmd_thread) ||
2906 thread_is_stopped(&llmd->llmd_thread));
2907 LASSERT(list_empty(&llmd->llmd_req_list));
2909 com->lc_data = NULL;
2911 ltds = &lfsck->li_ost_descs;
2912 spin_lock(<ds->ltd_lock);
2913 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase1_list,
2914 ltd_layout_phase_list) {
2915 list_del_init(<d->ltd_layout_phase_list);
2917 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase2_list,
2918 ltd_layout_phase_list) {
2919 list_del_init(<d->ltd_layout_phase_list);
2921 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_list,
2923 list_del_init(<d->ltd_layout_list);
2925 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase1_list,
2926 ltd_layout_phase_list) {
2927 list_del_init(<d->ltd_layout_phase_list);
2929 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase2_list,
2930 ltd_layout_phase_list) {
2931 list_del_init(<d->ltd_layout_phase_list);
2933 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_list,
2935 list_del_init(<d->ltd_layout_list);
2937 spin_unlock(<ds->ltd_lock);
2942 static void lfsck_layout_slave_data_release(const struct lu_env *env,
2943 struct lfsck_component *com)
2945 struct lfsck_layout_slave_data *llsd = com->lc_data;
2946 struct lfsck_layout_seq *lls;
2947 struct lfsck_layout_seq *next;
2948 struct lfsck_layout_slave_target *llst;
2949 struct lfsck_layout_slave_target *tmp;
2951 LASSERT(llsd != NULL);
2953 com->lc_data = NULL;
2955 list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list,
2957 list_del_init(&lls->lls_list);
2958 lfsck_object_put(env, lls->lls_lastid_obj);
2962 list_for_each_entry_safe(llst, tmp, &llsd->llsd_master_list,
2964 list_del_init(&llst->llst_list);
2971 static void lfsck_layout_master_quit(const struct lu_env *env,
2972 struct lfsck_component *com)
2974 struct lfsck_layout_master_data *llmd = com->lc_data;
2975 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
2976 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2977 struct l_wait_info lwi = { 0 };
2979 llmd->llmd_exit = 1;
2980 wake_up_all(&athread->t_ctl_waitq);
2981 l_wait_event(mthread->t_ctl_waitq,
2982 thread_is_init(athread) ||
2983 thread_is_stopped(athread),
2987 static int lfsck_layout_master_in_notify(const struct lu_env *env,
2988 struct lfsck_component *com,
2989 struct lfsck_request *lr)
2991 struct lfsck_instance *lfsck = com->lc_lfsck;
2992 struct lfsck_layout *lo = com->lc_file_ram;
2993 struct lfsck_layout_master_data *llmd = com->lc_data;
2994 struct lfsck_tgt_descs *ltds;
2995 struct lfsck_tgt_desc *ltd;
2998 if (lr->lr_event != LE_PHASE1_DONE &&
2999 lr->lr_event != LE_PHASE2_DONE &&
3000 lr->lr_event != LE_STOP)
3003 if (lr->lr_flags & LEF_FROM_OST)
3004 ltds = &lfsck->li_ost_descs;
3006 ltds = &lfsck->li_mdt_descs;
3007 spin_lock(<ds->ltd_lock);
3008 ltd = LTD_TGT(ltds, lr->lr_index);
3010 spin_unlock(<ds->ltd_lock);
3015 list_del_init(<d->ltd_layout_phase_list);
3016 switch (lr->lr_event) {
3017 case LE_PHASE1_DONE:
3018 if (lr->lr_status <= 0) {
3019 ltd->ltd_layout_done = 1;
3020 list_del_init(<d->ltd_layout_list);
3021 lo->ll_flags |= LF_INCOMPLETE;
3025 if (lr->lr_flags & LEF_FROM_OST) {
3026 if (list_empty(<d->ltd_layout_list))
3027 list_add_tail(<d->ltd_layout_list,
3028 &llmd->llmd_ost_list);
3029 list_add_tail(<d->ltd_layout_phase_list,
3030 &llmd->llmd_ost_phase2_list);
3032 if (list_empty(<d->ltd_layout_list))
3033 list_add_tail(<d->ltd_layout_list,
3034 &llmd->llmd_mdt_list);
3035 list_add_tail(<d->ltd_layout_phase_list,
3036 &llmd->llmd_mdt_phase2_list);
3039 case LE_PHASE2_DONE:
3040 ltd->ltd_layout_done = 1;
3041 list_del_init(<d->ltd_layout_list);
3044 ltd->ltd_layout_done = 1;
3045 list_del_init(<d->ltd_layout_list);
3046 if (!(lr->lr_flags & LEF_FORCE_STOP))
3047 lo->ll_flags |= LF_INCOMPLETE;
3052 spin_unlock(<ds->ltd_lock);
3054 if (lr->lr_flags & LEF_FORCE_STOP) {
3055 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3057 memset(stop, 0, sizeof(*stop));
3058 stop->ls_status = lr->lr_status;
3059 stop->ls_flags = lr->lr_param;
3060 lfsck_stop(env, lfsck->li_bottom, stop);
3061 } else if (lfsck_layout_master_to_orphan(llmd)) {
3062 wake_up_all(&llmd->llmd_thread.t_ctl_waitq);
3068 static int lfsck_layout_slave_in_notify(const struct lu_env *env,
3069 struct lfsck_component *com,
3070 struct lfsck_request *lr)
3072 struct lfsck_instance *lfsck = com->lc_lfsck;
3073 struct lfsck_layout_slave_data *llsd = com->lc_data;
3074 struct lfsck_layout_slave_target *llst;
3077 if (lr->lr_event != LE_PHASE2_DONE &&
3078 lr->lr_event != LE_STOP)
3081 llst = lfsck_layout_llst_find_and_del(llsd, lr->lr_index);
3085 lfsck_layout_llst_put(llst);
3086 if (list_empty(&llsd->llsd_master_list)) {
3087 switch (lr->lr_event) {
3088 case LE_PHASE2_DONE:
3089 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3092 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3094 memset(stop, 0, sizeof(*stop));
3095 stop->ls_status = lr->lr_status;
3096 stop->ls_flags = lr->lr_param;
3097 lfsck_stop(env, lfsck->li_bottom, stop);
3108 static int lfsck_layout_query(const struct lu_env *env,
3109 struct lfsck_component *com)
3111 struct lfsck_layout *lo = com->lc_file_ram;
3113 return lo->ll_status;
3116 static int lfsck_layout_master_stop_notify(const struct lu_env *env,
3117 struct lfsck_component *com,
3118 struct lfsck_tgt_descs *ltds,
3119 struct lfsck_tgt_desc *ltd,
3120 struct ptlrpc_request_set *set)
3122 struct lfsck_thread_info *info = lfsck_env_info(env);
3123 struct lfsck_async_interpret_args *laia = &info->lti_laia;
3124 struct lfsck_request *lr = &info->lti_lr;
3125 struct lfsck_instance *lfsck = com->lc_lfsck;
3128 LASSERT(list_empty(<d->ltd_layout_list));
3129 LASSERT(list_empty(<d->ltd_layout_phase_list));
3131 memset(lr, 0, sizeof(*lr));
3132 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
3133 lr->lr_event = LE_STOP;
3134 lr->lr_active = LT_LAYOUT;
3135 if (ltds == &lfsck->li_ost_descs) {
3136 lr->lr_flags = LEF_TO_OST;
3138 if (ltd->ltd_index == lfsck_dev_idx(lfsck->li_bottom))
3143 lr->lr_status = LS_CO_STOPPED;
3145 laia->laia_com = com;
3146 laia->laia_ltds = ltds;
3147 laia->laia_ltd = ltd;
3150 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
3151 lfsck_layout_master_async_interpret,
3152 laia, LFSCK_NOTIFY);
3154 CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n",
3155 lfsck_lfsck2name(lfsck),
3156 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
3157 ltd->ltd_index, rc);
3162 /* with lfsck::li_lock held */
3163 static int lfsck_layout_slave_join(const struct lu_env *env,
3164 struct lfsck_component *com,
3165 struct lfsck_start_param *lsp)
3167 struct lfsck_instance *lfsck = com->lc_lfsck;
3168 struct lfsck_layout_slave_data *llsd = com->lc_data;
3169 struct lfsck_layout_slave_target *llst;
3170 struct lfsck_start *start = lsp->lsp_start;
3174 if (!lsp->lsp_index_valid || start == NULL ||
3175 !(start->ls_flags & LPF_ALL_MDT))
3178 spin_unlock(&lfsck->li_lock);
3179 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
3180 spin_lock(&lfsck->li_lock);
3181 if (rc == 0 && !thread_is_running(&lfsck->li_thread)) {
3182 spin_unlock(&lfsck->li_lock);
3183 llst = lfsck_layout_llst_find_and_del(llsd, lsp->lsp_index);
3185 lfsck_layout_llst_put(llst);
3186 spin_lock(&lfsck->li_lock);
3193 static struct lfsck_operations lfsck_layout_master_ops = {
3194 .lfsck_reset = lfsck_layout_reset,
3195 .lfsck_fail = lfsck_layout_fail,
3196 .lfsck_checkpoint = lfsck_layout_master_checkpoint,
3197 .lfsck_prep = lfsck_layout_master_prep,
3198 .lfsck_exec_oit = lfsck_layout_master_exec_oit,
3199 .lfsck_exec_dir = lfsck_layout_exec_dir,
3200 .lfsck_post = lfsck_layout_master_post,
3201 .lfsck_dump = lfsck_layout_dump,
3202 .lfsck_double_scan = lfsck_layout_master_double_scan,
3203 .lfsck_data_release = lfsck_layout_master_data_release,
3204 .lfsck_quit = lfsck_layout_master_quit,
3205 .lfsck_in_notify = lfsck_layout_master_in_notify,
3206 .lfsck_query = lfsck_layout_query,
3207 .lfsck_stop_notify = lfsck_layout_master_stop_notify,
3210 static struct lfsck_operations lfsck_layout_slave_ops = {
3211 .lfsck_reset = lfsck_layout_reset,
3212 .lfsck_fail = lfsck_layout_fail,
3213 .lfsck_checkpoint = lfsck_layout_slave_checkpoint,
3214 .lfsck_prep = lfsck_layout_slave_prep,
3215 .lfsck_exec_oit = lfsck_layout_slave_exec_oit,
3216 .lfsck_exec_dir = lfsck_layout_exec_dir,
3217 .lfsck_post = lfsck_layout_slave_post,
3218 .lfsck_dump = lfsck_layout_dump,
3219 .lfsck_double_scan = lfsck_layout_slave_double_scan,
3220 .lfsck_data_release = lfsck_layout_slave_data_release,
3221 .lfsck_in_notify = lfsck_layout_slave_in_notify,
3222 .lfsck_query = lfsck_layout_query,
3223 .lfsck_join = lfsck_layout_slave_join,
3226 int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
3228 struct lfsck_component *com;
3229 struct lfsck_layout *lo;
3230 struct dt_object *root = NULL;
3231 struct dt_object *obj;
3239 INIT_LIST_HEAD(&com->lc_link);
3240 INIT_LIST_HEAD(&com->lc_link_dir);
3241 init_rwsem(&com->lc_sem);
3242 atomic_set(&com->lc_ref, 1);
3243 com->lc_lfsck = lfsck;
3244 com->lc_type = LT_LAYOUT;
3245 if (lfsck->li_master) {
3246 struct lfsck_layout_master_data *llmd;
3248 com->lc_ops = &lfsck_layout_master_ops;
3249 OBD_ALLOC_PTR(llmd);
3251 GOTO(out, rc = -ENOMEM);
3253 INIT_LIST_HEAD(&llmd->llmd_req_list);
3254 spin_lock_init(&llmd->llmd_lock);
3255 INIT_LIST_HEAD(&llmd->llmd_ost_list);
3256 INIT_LIST_HEAD(&llmd->llmd_ost_phase1_list);
3257 INIT_LIST_HEAD(&llmd->llmd_ost_phase2_list);
3258 INIT_LIST_HEAD(&llmd->llmd_mdt_list);
3259 INIT_LIST_HEAD(&llmd->llmd_mdt_phase1_list);
3260 INIT_LIST_HEAD(&llmd->llmd_mdt_phase2_list);
3261 init_waitqueue_head(&llmd->llmd_thread.t_ctl_waitq);
3262 com->lc_data = llmd;
3264 struct lfsck_layout_slave_data *llsd;
3266 com->lc_ops = &lfsck_layout_slave_ops;
3267 OBD_ALLOC_PTR(llsd);
3269 GOTO(out, rc = -ENOMEM);
3271 INIT_LIST_HEAD(&llsd->llsd_seq_list);
3272 INIT_LIST_HEAD(&llsd->llsd_master_list);
3273 spin_lock_init(&llsd->llsd_lock);
3274 com->lc_data = llsd;
3276 com->lc_file_size = sizeof(*lo);
3277 OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
3278 if (com->lc_file_ram == NULL)
3279 GOTO(out, rc = -ENOMEM);
3281 OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
3282 if (com->lc_file_disk == NULL)
3283 GOTO(out, rc = -ENOMEM);
3285 root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
3287 GOTO(out, rc = PTR_ERR(root));
3289 if (unlikely(!dt_try_as_dir(env, root)))
3290 GOTO(out, rc = -ENOTDIR);
3292 obj = local_file_find_or_create(env, lfsck->li_los, root,
3294 S_IFREG | S_IRUGO | S_IWUSR);
3296 GOTO(out, rc = PTR_ERR(obj));
3299 rc = lfsck_layout_load(env, com);
3301 rc = lfsck_layout_reset(env, com, true);
3302 else if (rc == -ENOENT)
3303 rc = lfsck_layout_init(env, com);
3308 lo = com->lc_file_ram;
3309 switch (lo->ll_status) {
3315 spin_lock(&lfsck->li_lock);
3316 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3317 spin_unlock(&lfsck->li_lock);
3320 CERROR("%s: unknown lfsck_layout status: rc = %u\n",
3321 lfsck_lfsck2name(lfsck), lo->ll_status);
3323 case LS_SCANNING_PHASE1:
3324 case LS_SCANNING_PHASE2:
3325 /* No need to store the status to disk right now.
3326 * If the system crashed before the status stored,
3327 * it will be loaded back when next time. */
3328 lo->ll_status = LS_CRASHED;
3329 lo->ll_flags |= LF_INCOMPLETE;
3336 spin_lock(&lfsck->li_lock);
3337 list_add_tail(&com->lc_link, &lfsck->li_list_scan);
3338 spin_unlock(&lfsck->li_lock);
3342 if (lo->ll_flags & LF_CRASHED_LASTID) {
3343 LASSERT(lfsck->li_out_notify != NULL);
3345 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3346 LE_LASTID_REBUILDING);
3352 if (root != NULL && !IS_ERR(root))
3353 lu_object_put(env, &root->do_lu);
3356 lfsck_component_cleanup(env, com);