4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2013, Intel Corporation.
26 * lustre/lfsck/lfsck_layout.c
28 * Author: Fan, Yong <fan.yong@intel.com>
32 # define EXPORT_SYMTAB
34 #define DEBUG_SUBSYSTEM S_LFSCK
36 #include <linux/bitops.h>
38 #include <lustre/lustre_idl.h>
39 #include <lu_object.h>
40 #include <dt_object.h>
41 #include <lustre_linkea.h>
42 #include <lustre_fid.h>
43 #include <lustre_lib.h>
44 #include <lustre_net.h>
45 #include <lustre/lustre_user.h>
46 #include <md_object.h>
47 #include <obd_class.h>
49 #include "lfsck_internal.h"
51 #define LFSCK_LAYOUT_MAGIC 0xB173AE14
53 static const char lfsck_layout_name[] = "lfsck_layout";
55 struct lfsck_layout_seq {
56 struct list_head lls_list;
59 __u64 lls_lastid_known;
60 struct dt_object *lls_lastid_obj;
61 unsigned int lls_dirty:1;
64 struct lfsck_layout_slave_target {
65 /* link into lfsck_layout_slave_data::llsd_master_list. */
66 struct list_head llst_list;
72 struct lfsck_layout_slave_data {
73 /* list for lfsck_layout_seq */
74 struct list_head llsd_seq_list;
76 /* list for the masters involve layout verification. */
77 struct list_head llsd_master_list;
82 struct lfsck_layout_object {
83 struct dt_object *llo_obj;
84 struct lu_attr llo_attr;
89 struct lfsck_layout_req {
90 struct list_head llr_list;
91 struct lfsck_layout_object *llr_parent;
92 struct dt_object *llr_child;
94 __u32 llr_lov_idx; /* offset in LOV EA */
97 struct lfsck_layout_master_data {
99 struct list_head llmd_req_list;
101 /* list for the ost targets involve layout verification. */
102 struct list_head llmd_ost_list;
104 /* list for the ost targets in phase1 scanning. */
105 struct list_head llmd_ost_phase1_list;
107 /* list for the ost targets in phase1 scanning. */
108 struct list_head llmd_ost_phase2_list;
110 /* list for the mdt targets involve layout verification. */
111 struct list_head llmd_mdt_list;
113 /* list for the mdt targets in phase1 scanning. */
114 struct list_head llmd_mdt_phase1_list;
116 /* list for the mdt targets in phase1 scanning. */
117 struct list_head llmd_mdt_phase2_list;
119 struct ptlrpc_thread llmd_thread;
120 __u32 llmd_touch_gen;
122 int llmd_assistant_status;
123 int llmd_post_result;
124 unsigned int llmd_to_post:1,
125 llmd_to_double_scan:1,
126 llmd_in_double_scan:1,
130 struct lfsck_layout_slave_async_args {
131 struct obd_export *llsaa_exp;
132 struct lfsck_component *llsaa_com;
133 struct lfsck_layout_slave_target *llsaa_llst;
136 static struct lfsck_layout_object *
137 lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj,
140 struct lfsck_layout_object *llo;
145 return ERR_PTR(-ENOMEM);
147 rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA);
154 lu_object_get(&obj->do_lu);
156 /* The gen can be used to check whether some others have changed the
157 * file layout after LFSCK pre-fetching but before real verification. */
159 atomic_set(&llo->llo_ref, 1);
165 lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst)
167 if (atomic_dec_and_test(&llst->llst_ref)) {
168 LASSERT(list_empty(&llst->llst_list));
175 lfsck_layout_llst_add(struct lfsck_layout_slave_data *llsd, __u32 index)
177 struct lfsck_layout_slave_target *llst;
178 struct lfsck_layout_slave_target *tmp;
185 INIT_LIST_HEAD(&llst->llst_list);
187 llst->llst_index = index;
188 atomic_set(&llst->llst_ref, 1);
190 spin_lock(&llsd->llsd_lock);
191 list_for_each_entry(tmp, &llsd->llsd_master_list, llst_list) {
192 if (tmp->llst_index == index) {
198 list_add_tail(&llst->llst_list, &llsd->llsd_master_list);
199 spin_unlock(&llsd->llsd_lock);
208 lfsck_layout_llst_del(struct lfsck_layout_slave_data *llsd,
209 struct lfsck_layout_slave_target *llst)
213 spin_lock(&llsd->llsd_lock);
214 if (!list_empty(&llst->llst_list)) {
215 list_del_init(&llst->llst_list);
218 spin_unlock(&llsd->llsd_lock);
221 lfsck_layout_llst_put(llst);
224 static inline struct lfsck_layout_slave_target *
225 lfsck_layout_llst_find_and_del(struct lfsck_layout_slave_data *llsd,
228 struct lfsck_layout_slave_target *llst;
230 spin_lock(&llsd->llsd_lock);
231 list_for_each_entry(llst, &llsd->llsd_master_list, llst_list) {
232 if (llst->llst_index == index) {
233 list_del_init(&llst->llst_list);
234 spin_unlock(&llsd->llsd_lock);
239 spin_unlock(&llsd->llsd_lock);
244 static inline void lfsck_layout_object_put(const struct lu_env *env,
245 struct lfsck_layout_object *llo)
247 if (atomic_dec_and_test(&llo->llo_ref)) {
248 lfsck_object_put(env, llo->llo_obj);
253 static struct lfsck_layout_req *
254 lfsck_layout_req_init(struct lfsck_layout_object *parent,
255 struct dt_object *child, __u32 ost_idx, __u32 lov_idx)
257 struct lfsck_layout_req *llr;
261 return ERR_PTR(-ENOMEM);
263 INIT_LIST_HEAD(&llr->llr_list);
264 atomic_inc(&parent->llo_ref);
265 llr->llr_parent = parent;
266 llr->llr_child = child;
267 llr->llr_ost_idx = ost_idx;
268 llr->llr_lov_idx = lov_idx;
273 static inline void lfsck_layout_req_fini(const struct lu_env *env,
274 struct lfsck_layout_req *llr)
276 lu_object_put(env, &llr->llr_child->do_lu);
277 lfsck_layout_object_put(env, llr->llr_parent);
281 static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd)
285 spin_lock(&llmd->llmd_lock);
286 if (list_empty(&llmd->llmd_req_list))
288 spin_unlock(&llmd->llmd_lock);
293 static int lfsck_layout_get_lovea(const struct lu_env *env,
294 struct dt_object *obj,
295 struct lu_buf *buf, ssize_t *buflen)
300 rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA);
302 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
307 lu_buf_realloc(buf, rc);
309 *buflen = buf->lb_len;
311 if (buf->lb_buf == NULL)
323 if (unlikely(buf->lb_buf == NULL)) {
324 lu_buf_alloc(buf, rc);
326 *buflen = buf->lb_len;
328 if (buf->lb_buf == NULL)
337 static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm)
342 magic = le32_to_cpu(lmm->lmm_magic);
343 /* If magic crashed, keep it there. Sometime later, during OST-object
344 * orphan handling, if some OST-object(s) back-point to it, it can be
345 * verified and repaired. */
346 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
349 patten = le32_to_cpu(lmm->lmm_pattern);
350 /* XXX: currently, we only support LOV_PATTERN_RAID0. */
351 if (patten != LOV_PATTERN_RAID0)
357 static void lfsck_layout_le_to_cpu(struct lfsck_layout *des,
358 const struct lfsck_layout *src)
362 des->ll_magic = le32_to_cpu(src->ll_magic);
363 des->ll_status = le32_to_cpu(src->ll_status);
364 des->ll_flags = le32_to_cpu(src->ll_flags);
365 des->ll_success_count = le32_to_cpu(src->ll_success_count);
366 des->ll_run_time_phase1 = le32_to_cpu(src->ll_run_time_phase1);
367 des->ll_run_time_phase2 = le32_to_cpu(src->ll_run_time_phase2);
368 des->ll_time_last_complete = le64_to_cpu(src->ll_time_last_complete);
369 des->ll_time_latest_start = le64_to_cpu(src->ll_time_latest_start);
370 des->ll_time_last_checkpoint =
371 le64_to_cpu(src->ll_time_last_checkpoint);
372 des->ll_pos_latest_start = le64_to_cpu(src->ll_pos_latest_start);
373 des->ll_pos_last_checkpoint = le64_to_cpu(src->ll_pos_last_checkpoint);
374 des->ll_pos_first_inconsistent =
375 le64_to_cpu(src->ll_pos_first_inconsistent);
376 des->ll_objs_checked_phase1 = le64_to_cpu(src->ll_objs_checked_phase1);
377 des->ll_objs_failed_phase1 = le64_to_cpu(src->ll_objs_failed_phase1);
378 des->ll_objs_checked_phase2 = le64_to_cpu(src->ll_objs_checked_phase2);
379 des->ll_objs_failed_phase2 = le64_to_cpu(src->ll_objs_failed_phase2);
380 for (i = 0; i < LLIT_MAX; i++)
381 des->ll_objs_repaired[i] =
382 le64_to_cpu(src->ll_objs_repaired[i]);
383 des->ll_objs_skipped = le64_to_cpu(src->ll_objs_skipped);
386 static void lfsck_layout_cpu_to_le(struct lfsck_layout *des,
387 const struct lfsck_layout *src)
391 des->ll_magic = cpu_to_le32(src->ll_magic);
392 des->ll_status = cpu_to_le32(src->ll_status);
393 des->ll_flags = cpu_to_le32(src->ll_flags);
394 des->ll_success_count = cpu_to_le32(src->ll_success_count);
395 des->ll_run_time_phase1 = cpu_to_le32(src->ll_run_time_phase1);
396 des->ll_run_time_phase2 = cpu_to_le32(src->ll_run_time_phase2);
397 des->ll_time_last_complete = cpu_to_le64(src->ll_time_last_complete);
398 des->ll_time_latest_start = cpu_to_le64(src->ll_time_latest_start);
399 des->ll_time_last_checkpoint =
400 cpu_to_le64(src->ll_time_last_checkpoint);
401 des->ll_pos_latest_start = cpu_to_le64(src->ll_pos_latest_start);
402 des->ll_pos_last_checkpoint = cpu_to_le64(src->ll_pos_last_checkpoint);
403 des->ll_pos_first_inconsistent =
404 cpu_to_le64(src->ll_pos_first_inconsistent);
405 des->ll_objs_checked_phase1 = cpu_to_le64(src->ll_objs_checked_phase1);
406 des->ll_objs_failed_phase1 = cpu_to_le64(src->ll_objs_failed_phase1);
407 des->ll_objs_checked_phase2 = cpu_to_le64(src->ll_objs_checked_phase2);
408 des->ll_objs_failed_phase2 = cpu_to_le64(src->ll_objs_failed_phase2);
409 for (i = 0; i < LLIT_MAX; i++)
410 des->ll_objs_repaired[i] =
411 cpu_to_le64(src->ll_objs_repaired[i]);
412 des->ll_objs_skipped = cpu_to_le64(src->ll_objs_skipped);
416 * \retval +ve: the lfsck_layout is broken, the caller should reset it.
417 * \retval 0: succeed.
418 * \retval -ve: failed cases.
420 static int lfsck_layout_load(const struct lu_env *env,
421 struct lfsck_component *com)
423 struct lfsck_layout *lo = com->lc_file_ram;
424 const struct dt_body_operations *dbo = com->lc_obj->do_body_ops;
425 ssize_t size = com->lc_file_size;
429 rc = dbo->dbo_read(env, com->lc_obj,
430 lfsck_buf_get(env, com->lc_file_disk, size), &pos,
435 CWARN("%s: failed to load lfsck_layout: rc = %d\n",
436 lfsck_lfsck2name(com->lc_lfsck), rc);
438 } else if (rc != size) {
439 CWARN("%s: crashed lfsck_layout, to be reset: rc = %d\n",
440 lfsck_lfsck2name(com->lc_lfsck), rc);
444 lfsck_layout_le_to_cpu(lo, com->lc_file_disk);
445 if (lo->ll_magic != LFSCK_LAYOUT_MAGIC) {
446 CWARN("%s: invalid lfsck_layout magic %#x != %#x, "
447 "to be reset\n", lfsck_lfsck2name(com->lc_lfsck),
448 lo->ll_magic, LFSCK_LAYOUT_MAGIC);
455 static int lfsck_layout_store(const struct lu_env *env,
456 struct lfsck_component *com)
458 struct dt_object *obj = com->lc_obj;
459 struct lfsck_instance *lfsck = com->lc_lfsck;
460 struct lfsck_layout *lo = com->lc_file_disk;
461 struct thandle *handle;
462 ssize_t size = com->lc_file_size;
467 lfsck_layout_cpu_to_le(lo, com->lc_file_ram);
468 handle = dt_trans_create(env, lfsck->li_bottom);
469 if (IS_ERR(handle)) {
470 rc = PTR_ERR(handle);
471 CERROR("%s: fail to create trans for storing lfsck_layout: "
472 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
476 rc = dt_declare_record_write(env, obj, size, pos, handle);
478 CERROR("%s: fail to declare trans for storing lfsck_layout(1): "
479 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
483 rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
485 CERROR("%s: fail to start trans for storing lfsck_layout: "
486 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
490 rc = dt_record_write(env, obj, lfsck_buf_get(env, lo, size), &pos,
493 CERROR("%s: fail to store lfsck_layout(1): size = %d, "
494 "rc = %d\n", lfsck_lfsck2name(lfsck), (int)size, rc);
499 dt_trans_stop(env, lfsck->li_bottom, handle);
504 static int lfsck_layout_init(const struct lu_env *env,
505 struct lfsck_component *com)
507 struct lfsck_layout *lo = com->lc_file_ram;
510 memset(lo, 0, com->lc_file_size);
511 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
512 lo->ll_status = LS_INIT;
513 down_write(&com->lc_sem);
514 rc = lfsck_layout_store(env, com);
515 up_write(&com->lc_sem);
520 static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt,
521 struct dt_object *obj, const struct lu_fid *fid)
523 struct seq_server_site *ss = lu_site2seq(dt->dd_lu_dev.ld_site);
524 struct lu_seq_range range = { 0 };
525 struct lustre_mdt_attrs *lma;
528 fld_range_set_any(&range);
529 rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), &range);
531 if (fld_range_is_ost(&range))
537 lma = &lfsck_env_info(env)->lti_lma;
538 rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lma, sizeof(*lma)),
539 XATTR_NAME_LMA, BYPASS_CAPA);
540 if (rc == sizeof(*lma)) {
541 lustre_lma_swab(lma);
543 return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0;
546 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID, BYPASS_CAPA);
551 static struct lfsck_layout_seq *
552 lfsck_layout_seq_lookup(struct lfsck_layout_slave_data *llsd, __u64 seq)
554 struct lfsck_layout_seq *lls;
556 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
557 if (lls->lls_seq == seq)
560 if (lls->lls_seq > seq)
568 lfsck_layout_seq_insert(struct lfsck_layout_slave_data *llsd,
569 struct lfsck_layout_seq *lls)
571 struct lfsck_layout_seq *tmp;
572 struct list_head *pos = &llsd->llsd_seq_list;
574 list_for_each_entry(tmp, &llsd->llsd_seq_list, lls_list) {
575 if (lls->lls_seq < tmp->lls_seq) {
576 pos = &tmp->lls_list;
580 list_add_tail(&lls->lls_list, pos);
584 lfsck_layout_lastid_create(const struct lu_env *env,
585 struct lfsck_instance *lfsck,
586 struct dt_object *obj)
588 struct lfsck_thread_info *info = lfsck_env_info(env);
589 struct lu_attr *la = &info->lti_la;
590 struct dt_object_format *dof = &info->lti_dof;
591 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
592 struct dt_device *dt = lfsck->li_bottom;
599 CDEBUG(D_LFSCK, "To create LAST_ID for <seq> "LPX64"\n",
600 fid_seq(lfsck_dto2fid(obj)));
602 if (bk->lb_param & LPF_DRYRUN)
605 memset(la, 0, sizeof(*la));
606 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
607 la->la_valid = LA_MODE | LA_UID | LA_GID;
608 dof->dof_type = dt_mode_to_dft(S_IFREG);
610 th = dt_trans_create(env, dt);
612 RETURN(rc = PTR_ERR(th));
614 rc = dt_declare_create(env, obj, la, NULL, dof, th);
618 rc = dt_declare_record_write(env, obj, sizeof(lastid), pos, th);
622 rc = dt_trans_start_local(env, dt, th);
626 dt_write_lock(env, obj, 0);
627 if (likely(!dt_object_exists(obj))) {
628 rc = dt_create(env, obj, la, NULL, dof, th);
630 rc = dt_record_write(env, obj,
631 lfsck_buf_get(env, &lastid, sizeof(lastid)),
634 dt_write_unlock(env, obj);
639 dt_trans_stop(env, dt, th);
645 lfsck_layout_lastid_reload(const struct lu_env *env,
646 struct lfsck_component *com,
647 struct lfsck_layout_seq *lls)
653 dt_read_lock(env, lls->lls_lastid_obj, 0);
654 rc = dt_record_read(env, lls->lls_lastid_obj,
655 lfsck_buf_get(env, &lastid, sizeof(lastid)), &pos);
656 dt_read_unlock(env, lls->lls_lastid_obj);
657 if (unlikely(rc != 0))
660 lastid = le64_to_cpu(lastid);
661 if (lastid < lls->lls_lastid_known) {
662 struct lfsck_instance *lfsck = com->lc_lfsck;
663 struct lfsck_layout *lo = com->lc_file_ram;
665 lls->lls_lastid = lls->lls_lastid_known;
667 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
668 LASSERT(lfsck->li_out_notify != NULL);
670 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
671 LE_LASTID_REBUILDING);
672 lo->ll_flags |= LF_CRASHED_LASTID;
674 } else if (lastid >= lls->lls_lastid) {
675 lls->lls_lastid = lastid;
683 lfsck_layout_lastid_store(const struct lu_env *env,
684 struct lfsck_component *com)
686 struct lfsck_instance *lfsck = com->lc_lfsck;
687 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
688 struct dt_device *dt = lfsck->li_bottom;
689 struct lfsck_layout_slave_data *llsd = com->lc_data;
690 struct lfsck_layout_seq *lls;
696 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
699 /* XXX: Add the code back if we really found related
700 * inconsistent cases in the future. */
702 if (!lls->lls_dirty) {
703 /* In OFD, before the pre-creation, the LAST_ID
704 * file will be updated firstly, which may hide
705 * some potential crashed cases. For example:
707 * The old obj1's ID is higher than old LAST_ID
708 * but lower than the new LAST_ID, but the LFSCK
709 * have not touch the obj1 until the OFD updated
710 * the LAST_ID. So the LFSCK does not regard it
711 * as crashed case. But when OFD does not create
712 * successfully, it will set the LAST_ID as the
713 * real created objects' ID, then LFSCK needs to
714 * found related inconsistency. */
715 rc = lfsck_layout_lastid_reload(env, com, lls);
716 if (likely(!lls->lls_dirty))
721 CDEBUG(D_LFSCK, "To sync the LAST_ID for <seq> "LPX64
722 " as <oid> "LPU64"\n", lls->lls_seq, lls->lls_lastid);
724 if (bk->lb_param & LPF_DRYRUN) {
729 th = dt_trans_create(env, dt);
732 CERROR("%s: (1) failed to store "LPX64": rc = %d\n",
733 lfsck_lfsck2name(com->lc_lfsck),
738 rc = dt_declare_record_write(env, lls->lls_lastid_obj,
739 sizeof(lastid), pos, th);
743 rc = dt_trans_start_local(env, dt, th);
747 lastid = cpu_to_le64(lls->lls_lastid);
748 dt_write_lock(env, lls->lls_lastid_obj, 0);
749 rc = dt_record_write(env, lls->lls_lastid_obj,
750 lfsck_buf_get(env, &lastid,
751 sizeof(lastid)), &pos, th);
752 dt_write_unlock(env, lls->lls_lastid_obj);
757 dt_trans_stop(env, dt, th);
760 CERROR("%s: (2) failed to store "LPX64": rc = %d\n",
761 lfsck_lfsck2name(com->lc_lfsck),
770 lfsck_layout_lastid_load(const struct lu_env *env,
771 struct lfsck_component *com,
772 struct lfsck_layout_seq *lls)
774 struct lfsck_instance *lfsck = com->lc_lfsck;
775 struct lfsck_layout *lo = com->lc_file_ram;
776 struct lu_fid *fid = &lfsck_env_info(env)->lti_fid;
777 struct dt_object *obj;
782 lu_last_id_fid(fid, lls->lls_seq, lfsck_dev_idx(lfsck->li_bottom));
783 obj = dt_locate(env, lfsck->li_bottom, fid);
785 RETURN(PTR_ERR(obj));
787 /* LAST_ID crashed, to be rebuilt */
788 if (!dt_object_exists(obj)) {
789 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
790 LASSERT(lfsck->li_out_notify != NULL);
792 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
793 LE_LASTID_REBUILDING);
794 lo->ll_flags |= LF_CRASHED_LASTID;
796 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) &&
798 struct l_wait_info lwi = LWI_TIMEOUT(
799 cfs_time_seconds(cfs_fail_val),
802 up_write(&com->lc_sem);
803 l_wait_event(lfsck->li_thread.t_ctl_waitq,
804 !thread_is_running(&lfsck->li_thread),
806 down_write(&com->lc_sem);
810 rc = lfsck_layout_lastid_create(env, lfsck, obj);
812 dt_read_lock(env, obj, 0);
813 rc = dt_read(env, obj,
814 lfsck_buf_get(env, &lls->lls_lastid, sizeof(__u64)),
816 dt_read_unlock(env, obj);
817 if (rc != 0 && rc != sizeof(__u64))
818 GOTO(out, rc = (rc > 0 ? -EFAULT : rc));
820 if (rc == 0 && !(lo->ll_flags & LF_CRASHED_LASTID)) {
821 LASSERT(lfsck->li_out_notify != NULL);
823 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
824 LE_LASTID_REBUILDING);
825 lo->ll_flags |= LF_CRASHED_LASTID;
828 lls->lls_lastid = le64_to_cpu(lls->lls_lastid);
836 lfsck_object_put(env, obj);
838 lls->lls_lastid_obj = obj;
843 static int lfsck_layout_master_async_interpret(const struct lu_env *env,
844 struct ptlrpc_request *req,
847 struct lfsck_async_interpret_args *laia = args;
848 struct lfsck_component *com = laia->laia_com;
849 struct lfsck_layout_master_data *llmd = com->lc_data;
850 struct lfsck_tgt_descs *ltds = laia->laia_ltds;
851 struct lfsck_tgt_desc *ltd = laia->laia_ltd;
852 struct lfsck_request *lr = laia->laia_lr;
854 switch (lr->lr_event) {
857 struct lfsck_layout *lo = com->lc_file_ram;
859 lo->ll_flags |= LF_INCOMPLETE;
864 spin_lock(<ds->ltd_lock);
865 if (ltd->ltd_dead || ltd->ltd_layout_done) {
866 spin_unlock(<ds->ltd_lock);
871 if (lr->lr_flags & LEF_TO_OST) {
872 if (list_empty(<d->ltd_layout_list))
873 list_add_tail(<d->ltd_layout_list,
874 &llmd->llmd_ost_list);
875 if (list_empty(<d->ltd_layout_phase_list))
876 list_add_tail(<d->ltd_layout_phase_list,
877 &llmd->llmd_ost_phase1_list);
879 if (list_empty(<d->ltd_layout_list))
880 list_add_tail(<d->ltd_layout_list,
881 &llmd->llmd_mdt_list);
882 if (list_empty(<d->ltd_layout_phase_list))
883 list_add_tail(<d->ltd_layout_phase_list,
884 &llmd->llmd_mdt_phase1_list);
886 spin_unlock(<ds->ltd_lock);
893 CERROR("%s: fail to notify %s %x for layout: "
894 "event = %d, rc = %d\n",
895 lfsck_lfsck2name(com->lc_lfsck),
896 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
897 ltd->ltd_index, lr->lr_event, rc);
900 struct lfsck_reply *reply;
903 spin_lock(<ds->ltd_lock);
904 list_del_init(<d->ltd_layout_phase_list);
905 list_del_init(<d->ltd_layout_list);
906 spin_unlock(<ds->ltd_lock);
911 reply = req_capsule_server_get(&req->rq_pill,
915 CERROR("%s: invalid return value: rc = %d\n",
916 lfsck_lfsck2name(com->lc_lfsck), rc);
917 spin_lock(<ds->ltd_lock);
918 list_del_init(<d->ltd_layout_phase_list);
919 list_del_init(<d->ltd_layout_list);
920 spin_unlock(<ds->ltd_lock);
925 switch (reply->lr_status) {
926 case LS_SCANNING_PHASE1:
928 case LS_SCANNING_PHASE2:
929 spin_lock(<ds->ltd_lock);
930 list_del_init(<d->ltd_layout_phase_list);
931 if (ltd->ltd_dead || ltd->ltd_layout_done) {
932 spin_unlock(<ds->ltd_lock);
936 if (lr->lr_flags & LEF_TO_OST)
937 list_add_tail(<d->ltd_layout_phase_list,
938 &llmd->llmd_ost_phase2_list);
940 list_add_tail(<d->ltd_layout_phase_list,
941 &llmd->llmd_mdt_phase2_list);
942 spin_unlock(<ds->ltd_lock);
945 spin_lock(<ds->ltd_lock);
946 list_del_init(<d->ltd_layout_phase_list);
947 list_del_init(<d->ltd_layout_list);
948 spin_unlock(<ds->ltd_lock);
955 CERROR("%s: unexpected event: rc = %d\n",
956 lfsck_lfsck2name(com->lc_lfsck), lr->lr_event);
960 lfsck_component_put(env, com);
965 static int lfsck_layout_master_query_others(const struct lu_env *env,
966 struct lfsck_component *com)
968 struct lfsck_thread_info *info = lfsck_env_info(env);
969 struct lfsck_request *lr = &info->lti_lr;
970 struct lfsck_async_interpret_args *laia = &info->lti_laia;
971 struct lfsck_instance *lfsck = com->lc_lfsck;
972 struct lfsck_layout_master_data *llmd = com->lc_data;
973 struct ptlrpc_request_set *set;
974 struct lfsck_tgt_descs *ltds;
975 struct lfsck_tgt_desc *ltd;
976 struct list_head *head;
982 set = ptlrpc_prep_set();
986 llmd->llmd_touch_gen++;
987 memset(lr, 0, sizeof(*lr));
988 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
989 lr->lr_event = LE_QUERY;
990 lr->lr_active = LT_LAYOUT;
991 laia->laia_com = com;
994 if (!list_empty(&llmd->llmd_mdt_phase1_list)) {
995 ltds = &lfsck->li_mdt_descs;
997 head = &llmd->llmd_mdt_phase1_list;
1001 ltds = &lfsck->li_ost_descs;
1002 lr->lr_flags = LEF_TO_OST;
1003 head = &llmd->llmd_ost_phase1_list;
1006 laia->laia_ltds = ltds;
1007 spin_lock(<ds->ltd_lock);
1008 while (!list_empty(head)) {
1009 ltd = list_entry(head->next,
1010 struct lfsck_tgt_desc,
1011 ltd_layout_phase_list);
1012 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1015 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1016 list_del(<d->ltd_layout_phase_list);
1017 list_add_tail(<d->ltd_layout_phase_list, head);
1018 atomic_inc(<d->ltd_ref);
1019 laia->laia_ltd = ltd;
1020 spin_unlock(<ds->ltd_lock);
1021 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1022 lfsck_layout_master_async_interpret,
1025 CERROR("%s: fail to query %s %x for layout: rc = %d\n",
1026 lfsck_lfsck2name(lfsck),
1027 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
1028 ltd->ltd_index, rc);
1034 spin_lock(<ds->ltd_lock);
1036 spin_unlock(<ds->ltd_lock);
1039 rc = ptlrpc_set_wait(set);
1041 ptlrpc_set_destroy(set);
1047 if (!(lr->lr_flags & LEF_TO_OST) &&
1048 list_empty(&llmd->llmd_mdt_phase1_list))
1051 ptlrpc_set_destroy(set);
1053 RETURN(rc1 != 0 ? rc1 : rc);
1057 lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd)
1059 return list_empty(&llmd->llmd_mdt_phase1_list) &&
1060 (!list_empty(&llmd->llmd_ost_phase2_list) ||
1061 list_empty(&llmd->llmd_ost_phase1_list));
1064 static int lfsck_layout_master_notify_others(const struct lu_env *env,
1065 struct lfsck_component *com,
1066 struct lfsck_request *lr,
1069 struct lfsck_thread_info *info = lfsck_env_info(env);
1070 struct lfsck_async_interpret_args *laia = &info->lti_laia;
1071 struct lfsck_instance *lfsck = com->lc_lfsck;
1072 struct lfsck_layout_master_data *llmd = com->lc_data;
1073 struct lfsck_layout *lo = com->lc_file_ram;
1074 struct ptlrpc_request_set *set;
1075 struct lfsck_tgt_descs *ltds;
1076 struct lfsck_tgt_desc *ltd;
1077 struct lfsck_tgt_desc *next;
1078 struct list_head *head;
1084 set = ptlrpc_prep_set();
1088 lr->lr_active = LT_LAYOUT;
1089 laia->laia_com = com;
1092 switch (lr->lr_event) {
1094 /* Notify OSTs firstly, then other MDTs if needed. */
1095 lr->lr_flags |= LEF_TO_OST;
1096 ltds = &lfsck->li_ost_descs;
1099 laia->laia_ltds = ltds;
1100 down_read(<ds->ltd_rw_sem);
1101 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1102 ltd = lfsck_tgt_get(ltds, idx);
1103 LASSERT(ltd != NULL);
1105 laia->laia_ltd = ltd;
1106 ltd->ltd_layout_done = 0;
1107 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1108 lfsck_layout_master_async_interpret,
1109 laia, LFSCK_NOTIFY);
1111 CERROR("%s: fail to notify %s %x for layout "
1113 lfsck_lfsck2name(lfsck),
1114 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1117 lo->ll_flags |= LF_INCOMPLETE;
1122 up_read(<ds->ltd_rw_sem);
1126 rc = ptlrpc_set_wait(set);
1128 ptlrpc_set_destroy(set);
1134 if (!(flags & LPF_ALL_MDT))
1137 ltds = &lfsck->li_mdt_descs;
1138 /* The sponsor broadcasts the request to other MDTs. */
1139 if (flags & LPF_BROADCAST) {
1140 flags &= ~LPF_ALL_MDT;
1141 lr->lr_flags &= ~LEF_TO_OST;
1145 /* non-sponsors link other MDT targets locallly. */
1146 spin_lock(<ds->ltd_lock);
1147 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1148 ltd = LTD_TGT(ltds, idx);
1149 LASSERT(ltd != NULL);
1151 if (!list_empty(<d->ltd_layout_list))
1154 list_add_tail(<d->ltd_layout_list,
1155 &llmd->llmd_mdt_list);
1156 list_add_tail(<d->ltd_layout_phase_list,
1157 &llmd->llmd_mdt_phase1_list);
1159 spin_unlock(<ds->ltd_lock);
1163 if (flags & LPF_BROADCAST)
1164 lr->lr_flags |= LEF_FORCE_STOP;
1165 case LE_PHASE2_DONE:
1166 /* Notify other MDTs if needed, then the OSTs. */
1167 if (flags & LPF_ALL_MDT) {
1168 /* The sponsor broadcasts the request to other MDTs. */
1169 if (flags & LPF_BROADCAST) {
1170 lr->lr_flags &= ~LEF_TO_OST;
1171 head = &llmd->llmd_mdt_list;
1172 ltds = &lfsck->li_mdt_descs;
1176 /* non-sponsors unlink other MDT targets locallly. */
1177 ltds = &lfsck->li_mdt_descs;
1178 spin_lock(<ds->ltd_lock);
1179 list_for_each_entry_safe(ltd, next,
1180 &llmd->llmd_mdt_list,
1182 list_del_init(<d->ltd_layout_phase_list);
1183 list_del_init(<d->ltd_layout_list);
1185 spin_unlock(<ds->ltd_lock);
1189 lr->lr_flags |= LEF_TO_OST;
1190 head = &llmd->llmd_ost_list;
1191 ltds = &lfsck->li_ost_descs;
1194 laia->laia_ltds = ltds;
1195 spin_lock(<ds->ltd_lock);
1196 while (!list_empty(head)) {
1197 ltd = list_entry(head->next, struct lfsck_tgt_desc,
1199 if (!list_empty(<d->ltd_layout_phase_list))
1200 list_del_init(<d->ltd_layout_phase_list);
1201 list_del_init(<d->ltd_layout_list);
1202 laia->laia_ltd = ltd;
1203 spin_unlock(<ds->ltd_lock);
1204 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1205 lfsck_layout_master_async_interpret,
1206 laia, LFSCK_NOTIFY);
1208 CERROR("%s: fail to notify %s %x for layout "
1209 "stop/phase2: rc = %d\n",
1210 lfsck_lfsck2name(lfsck),
1211 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1212 "MDT", ltd->ltd_index, rc);
1215 spin_lock(<ds->ltd_lock);
1217 spin_unlock(<ds->ltd_lock);
1219 if (!(flags & LPF_BROADCAST))
1224 rc = ptlrpc_set_wait(set);
1226 ptlrpc_set_destroy(set);
1232 flags &= ~LPF_BROADCAST;
1234 case LE_PHASE1_DONE:
1235 llmd->llmd_touch_gen++;
1236 lr->lr_flags &= ~LEF_TO_OST;
1237 ltds = &lfsck->li_mdt_descs;
1238 laia->laia_ltds = ltds;
1239 spin_lock(<ds->ltd_lock);
1240 while (!list_empty(&llmd->llmd_mdt_phase1_list)) {
1241 ltd = list_entry(llmd->llmd_mdt_phase1_list.next,
1242 struct lfsck_tgt_desc,
1243 ltd_layout_phase_list);
1244 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1247 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1248 list_del_init(<d->ltd_layout_phase_list);
1249 list_add_tail(<d->ltd_layout_phase_list,
1250 &llmd->llmd_mdt_phase1_list);
1251 laia->laia_ltd = ltd;
1252 spin_unlock(<ds->ltd_lock);
1253 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1254 lfsck_layout_master_async_interpret,
1255 laia, LFSCK_NOTIFY);
1257 CERROR("%s: fail to notify MDT %x for layout "
1258 "phase1 done: rc = %d\n",
1259 lfsck_lfsck2name(lfsck),
1260 ltd->ltd_index, rc);
1263 spin_lock(<ds->ltd_lock);
1265 spin_unlock(<ds->ltd_lock);
1268 CERROR("%s: unexpected LFSCK event: rc = %d\n",
1269 lfsck_lfsck2name(lfsck), lr->lr_event);
1275 rc = ptlrpc_set_wait(set);
1276 ptlrpc_set_destroy(set);
1278 if (rc == 0 && lr->lr_event == LE_START &&
1279 list_empty(&llmd->llmd_ost_list))
1285 static int lfsck_layout_double_scan_result(const struct lu_env *env,
1286 struct lfsck_component *com,
1289 struct lfsck_instance *lfsck = com->lc_lfsck;
1290 struct lfsck_layout *lo = com->lc_file_ram;
1291 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1293 down_write(&com->lc_sem);
1295 lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1296 HALF_SEC - lfsck->li_time_last_checkpoint);
1297 lo->ll_time_last_checkpoint = cfs_time_current_sec();
1298 lo->ll_objs_checked_phase2 += com->lc_new_checked;
1301 com->lc_journal = 0;
1302 if (lo->ll_flags & LF_INCOMPLETE)
1303 lo->ll_status = LS_PARTIAL;
1305 lo->ll_status = LS_COMPLETED;
1306 if (!(bk->lb_param & LPF_DRYRUN))
1307 lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
1308 lo->ll_time_last_complete = lo->ll_time_last_checkpoint;
1309 lo->ll_success_count++;
1310 } else if (rc == 0) {
1311 lo->ll_status = lfsck->li_status;
1312 if (lo->ll_status == 0)
1313 lo->ll_status = LS_STOPPED;
1315 lo->ll_status = LS_FAILED;
1318 if (lo->ll_status != LS_PAUSED) {
1319 spin_lock(&lfsck->li_lock);
1320 list_del_init(&com->lc_link);
1321 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1322 spin_unlock(&lfsck->li_lock);
1325 rc = lfsck_layout_store(env, com);
1327 up_write(&com->lc_sem);
1332 static int lfsck_layout_lock(const struct lu_env *env,
1333 struct lfsck_component *com,
1334 struct dt_object *obj,
1335 struct lustre_handle *lh, __u64 bits)
1337 struct lfsck_thread_info *info = lfsck_env_info(env);
1338 ldlm_policy_data_t *policy = &info->lti_policy;
1339 struct ldlm_res_id *resid = &info->lti_resid;
1340 struct lfsck_instance *lfsck = com->lc_lfsck;
1341 __u64 flags = LDLM_FL_ATOMIC_CB;
1344 LASSERT(lfsck->li_namespace != NULL);
1346 memset(policy, 0, sizeof(*policy));
1347 policy->l_inodebits.bits = bits;
1348 fid_build_reg_res_name(lfsck_dto2fid(obj), resid);
1349 rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS,
1350 policy, LCK_EX, &flags, ldlm_blocking_ast,
1351 ldlm_completion_ast, NULL, NULL, 0,
1352 LVB_T_NONE, NULL, lh);
1353 if (rc == ELDLM_OK) {
1356 memset(lh, 0, sizeof(*lh));
1363 static void lfsck_layout_unlock(struct lustre_handle *lh)
1365 if (lustre_handle_is_used(lh)) {
1366 ldlm_lock_decref(lh, LCK_EX);
1367 memset(lh, 0, sizeof(*lh));
1371 static int lfsck_layout_trans_stop(const struct lu_env *env,
1372 struct dt_device *dev,
1373 struct thandle *handle, int result)
1377 handle->th_result = result;
1378 rc = dt_trans_stop(env, dev, handle);
1387 static int lfsck_layout_scan_orphan(const struct lu_env *env,
1388 struct lfsck_component *com,
1389 struct lfsck_tgt_desc *ltd)
1391 /* XXX: To be extended in other patch. */
1396 /* For the MDT-object with dangling reference, we need to re-create
1397 * the missed OST-object with the known FID/owner information. */
1398 static int lfsck_layout_recreate_ostobj(const struct lu_env *env,
1399 struct lfsck_component *com,
1400 struct lfsck_layout_req *llr,
1403 struct lfsck_thread_info *info = lfsck_env_info(env);
1404 struct filter_fid *pfid = &info->lti_new_pfid;
1405 struct dt_allocation_hint *hint = &info->lti_hint;
1406 struct dt_object *parent = llr->llr_parent->llo_obj;
1407 struct dt_object *child = llr->llr_child;
1408 struct dt_device *dev = lfsck_obj2dt_dev(child);
1409 const struct lu_fid *tfid = lu_object_fid(&parent->do_lu);
1410 struct thandle *handle;
1412 struct lustre_handle lh = { 0 };
1416 CDEBUG(D_LFSCK, "Repair dangling reference for: parent "DFID
1417 ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
1418 PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
1419 llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid);
1421 rc = lfsck_layout_lock(env, com, parent, &lh,
1422 MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
1426 handle = dt_trans_create(env, dev);
1428 GOTO(unlock1, rc = PTR_ERR(handle));
1430 hint->dah_parent = NULL;
1432 pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq);
1433 pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid);
1434 pfid->ff_parent.f_ver = cpu_to_le32(llr->llr_lov_idx);
1435 buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
1437 rc = dt_declare_create(env, child, la, hint, NULL, handle);
1441 rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID,
1442 LU_XATTR_CREATE, handle);
1446 rc = dt_trans_start(env, dev, handle);
1450 dt_read_lock(env, parent, 0);
1451 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1452 GOTO(unlock2, rc = 1);
1454 rc = dt_create(env, child, la, hint, NULL, handle);
1458 rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, LU_XATTR_CREATE,
1459 handle, BYPASS_CAPA);
1464 dt_read_unlock(env, parent);
1467 rc = lfsck_layout_trans_stop(env, dev, handle, rc);
1470 lfsck_layout_unlock(&lh);
1475 /* If the OST-object does not recognize the MDT-object as its parent, and
1476 * there is no other MDT-object claims as its parent, then just trust the
1477 * given MDT-object as its parent. So update the OST-object filter_fid. */
1478 static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env,
1479 struct lfsck_component *com,
1480 struct lfsck_layout_req *llr,
1481 const struct lu_attr *pla)
1483 struct lfsck_thread_info *info = lfsck_env_info(env);
1484 struct filter_fid *pfid = &info->lti_new_pfid;
1485 struct lu_attr *tla = &info->lti_la3;
1486 struct dt_object *parent = llr->llr_parent->llo_obj;
1487 struct dt_object *child = llr->llr_child;
1488 struct dt_device *dev = lfsck_obj2dt_dev(child);
1489 const struct lu_fid *tfid = lu_object_fid(&parent->do_lu);
1490 struct thandle *handle;
1492 struct lustre_handle lh = { 0 };
1496 CDEBUG(D_LFSCK, "Repair unmatched MDT-OST pair for: parent "DFID
1497 ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
1498 PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
1499 llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid);
1501 rc = lfsck_layout_lock(env, com, parent, &lh,
1502 MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
1506 handle = dt_trans_create(env, dev);
1508 GOTO(unlock1, rc = PTR_ERR(handle));
1510 pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq);
1511 pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid);
1512 /* The ff_parent->f_ver is not the real parent fid->f_ver. Instead,
1513 * it is the OST-object index in the parent MDT-object layout. */
1514 pfid->ff_parent.f_ver = cpu_to_le32(llr->llr_lov_idx);
1515 buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
1517 rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle);
1521 tla->la_valid = LA_UID | LA_GID;
1522 tla->la_uid = pla->la_uid;
1523 tla->la_gid = pla->la_gid;
1524 rc = dt_declare_attr_set(env, child, tla, handle);
1528 rc = dt_trans_start(env, dev, handle);
1532 dt_write_lock(env, parent, 0);
1533 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1534 GOTO(unlock2, rc = 1);
1536 rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle,
1541 /* Get the latest parent's owner. */
1542 rc = dt_attr_get(env, parent, tla, BYPASS_CAPA);
1546 tla->la_valid = LA_UID | LA_GID;
1547 rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA);
1552 dt_write_unlock(env, parent);
1555 rc = lfsck_layout_trans_stop(env, dev, handle, rc);
1558 lfsck_layout_unlock(&lh);
1563 /* Check whether the OST-object correctly back points to the
1564 * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */
1565 static int lfsck_layout_check_parent(const struct lu_env *env,
1566 struct lfsck_component *com,
1567 struct dt_object *parent,
1568 const struct lu_fid *pfid,
1569 const struct lu_fid *cfid,
1570 const struct lu_attr *pla,
1571 const struct lu_attr *cla,
1572 struct lfsck_layout_req *llr,
1573 struct lu_buf *lov_ea, __u32 idx)
1575 struct lfsck_thread_info *info = lfsck_env_info(env);
1576 struct lu_buf *buf = &info->lti_big_buf;
1577 struct dt_object *tobj;
1578 struct lov_mds_md_v1 *lmm;
1579 struct lov_ost_data_v1 *objs;
1586 if (fid_is_zero(pfid)) {
1587 /* client never wrote. */
1588 if (cla->la_size == 0 && cla->la_blocks == 0)
1591 RETURN(LLIT_UNMATCHED_PAIR);
1594 if (unlikely(!fid_is_sane(pfid)))
1595 RETURN(LLIT_UNMATCHED_PAIR);
1597 if (lu_fid_eq(pfid, lu_object_fid(&parent->do_lu))) {
1598 if (llr->llr_lov_idx == idx)
1601 RETURN(LLIT_UNMATCHED_PAIR);
1604 tobj = lfsck_object_find(env, com->lc_lfsck, pfid);
1606 RETURN(LLIT_UNMATCHED_PAIR);
1609 RETURN(PTR_ERR(tobj));
1611 if (!dt_object_exists(tobj))
1612 GOTO(out, rc = LLIT_UNMATCHED_PAIR);
1614 /* Load the tobj's layout EA, in spite of it is a local MDT-object or
1615 * remote one on another MDT. Then check whether the given OST-object
1616 * is in such layout. If yes, it is multiple referenced, otherwise it
1617 * is unmatched referenced case. */
1618 rc = lfsck_layout_get_lovea(env, tobj, buf, NULL);
1620 GOTO(out, rc = LLIT_UNMATCHED_PAIR);
1626 rc = lfsck_layout_verify_header(lmm);
1630 /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
1631 * been verified in lfsck_layout_verify_header() already. If some
1632 * new magic introduced in the future, then layout LFSCK needs to
1633 * be updated also. */
1634 magic = le32_to_cpu(lmm->lmm_magic);
1635 if (magic == LOV_MAGIC_V1) {
1636 objs = &(lmm->lmm_objects[0]);
1638 LASSERT(magic == LOV_MAGIC_V3);
1639 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
1642 count = le16_to_cpu(lmm->lmm_stripe_count);
1643 for (i = 0; i < count; i++, objs++) {
1644 struct lu_fid *tfid = &info->lti_fid2;
1645 struct ost_id *oi = &info->lti_oi;
1647 ostid_le_to_cpu(&objs->l_ost_oi, oi);
1648 ostid_to_fid(tfid, oi, le32_to_cpu(objs->l_ost_idx));
1649 if (lu_fid_eq(cfid, tfid)) {
1652 GOTO(out, rc = LLIT_MULTIPLE_REFERENCED);
1656 GOTO(out, rc = LLIT_UNMATCHED_PAIR);
1659 lfsck_object_put(env, tobj);
1664 static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
1665 struct lfsck_component *com,
1666 struct lfsck_layout_req *llr)
1668 struct lfsck_layout *lo = com->lc_file_ram;
1669 struct lfsck_thread_info *info = lfsck_env_info(env);
1670 struct filter_fid_old *pea = &info->lti_old_pfid;
1671 struct lu_fid *pfid = &info->lti_fid;
1673 struct dt_object *parent = llr->llr_parent->llo_obj;
1674 struct dt_object *child = llr->llr_child;
1675 struct lu_attr *pla = &info->lti_la;
1676 struct lu_attr *cla = &info->lti_la2;
1677 struct lfsck_instance *lfsck = com->lc_lfsck;
1678 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1679 enum lfsck_layout_inconsistency_type type = LLIT_NONE;
1684 rc = dt_attr_get(env, parent, pla, BYPASS_CAPA);
1686 if (lu_object_is_dying(parent->do_lu.lo_header))
1692 rc = dt_attr_get(env, child, cla, BYPASS_CAPA);
1693 if (rc == -ENOENT) {
1694 if (lu_object_is_dying(parent->do_lu.lo_header))
1697 type = LLIT_DANGLING;
1704 buf = lfsck_buf_get(env, pea, sizeof(struct filter_fid_old));
1705 rc= dt_xattr_get(env, child, buf, XATTR_NAME_FID, BYPASS_CAPA);
1706 if (unlikely(rc >= 0 && rc != sizeof(struct filter_fid_old) &&
1707 rc != sizeof(struct filter_fid))) {
1708 type = LLIT_UNMATCHED_PAIR;
1712 if (rc < 0 && rc != -ENODATA)
1715 if (rc == -ENODATA) {
1718 fid_le_to_cpu(pfid, &pea->ff_parent);
1719 /* OST-object does not save parent FID::f_ver, instead,
1720 * the OST-object index in the parent MDT-object layout
1721 * EA reuses the pfid->f_ver. */
1726 rc = lfsck_layout_check_parent(env, com, parent, pfid,
1727 lu_object_fid(&child->do_lu),
1728 pla, cla, llr, buf, idx);
1737 /* XXX: other inconsistency will be checked in other patches. */
1740 if (bk->lb_param & LPF_DRYRUN) {
1741 if (type != LLIT_NONE)
1749 memset(cla, 0, sizeof(*cla));
1750 cla->la_uid = pla->la_uid;
1751 cla->la_gid = pla->la_gid;
1752 cla->la_mode = S_IFREG | 0666;
1753 cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
1754 LA_ATIME | LA_MTIME | LA_CTIME;
1755 rc = lfsck_layout_recreate_ostobj(env, com, llr, cla);
1757 case LLIT_UNMATCHED_PAIR:
1758 rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla);
1761 /* XXX: other inconsistency will be fixed in other patches. */
1763 case LLIT_MULTIPLE_REFERENCED:
1765 case LLIT_INCONSISTENT_OWNER:
1775 down_write(&com->lc_sem);
1777 /* If cannot touch the target server,
1778 * mark the LFSCK as INCOMPLETE. */
1779 if (rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -ETIMEDOUT ||
1780 rc == -EHOSTDOWN || rc == -EHOSTUNREACH) {
1781 lo->ll_flags |= LF_INCOMPLETE;
1782 lo->ll_objs_skipped++;
1785 lo->ll_objs_failed_phase1++;
1787 } else if (rc > 0) {
1788 LASSERTF(type > LLIT_NONE && type <= LLIT_MAX,
1789 "unknown type = %d\n", type);
1791 lo->ll_objs_repaired[type - 1]++;
1793 up_write(&com->lc_sem);
1798 static int lfsck_layout_assistant(void *args)
1800 struct lfsck_thread_args *lta = args;
1801 struct lu_env *env = <a->lta_env;
1802 struct lfsck_component *com = lta->lta_com;
1803 struct lfsck_instance *lfsck = lta->lta_lfsck;
1804 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1805 struct lfsck_position *pos = &com->lc_pos_start;
1806 struct lfsck_thread_info *info = lfsck_env_info(env);
1807 struct lfsck_request *lr = &info->lti_lr;
1808 struct lfsck_layout_master_data *llmd = com->lc_data;
1809 struct ptlrpc_thread *mthread = &lfsck->li_thread;
1810 struct ptlrpc_thread *athread = &llmd->llmd_thread;
1811 struct lfsck_layout_req *llr;
1812 struct l_wait_info lwi = { 0 };
1818 if (lta->lta_lsp->lsp_start != NULL)
1819 flags = lta->lta_lsp->lsp_start->ls_flags;
1821 flags = bk->lb_param;
1822 memset(lr, 0, sizeof(*lr));
1823 lr->lr_event = LE_START;
1824 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1825 lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
1827 lr->lr_speed = bk->lb_speed_limit;
1828 lr->lr_version = bk->lb_version;
1829 lr->lr_param = bk->lb_param;
1830 lr->lr_async_windows = bk->lb_async_windows;
1831 if (pos->lp_oit_cookie <= 1)
1832 lr->lr_param |= LPF_RESET;
1834 rc = lfsck_layout_master_notify_others(env, com, lr, flags);
1836 CERROR("%s: fail to notify others for layout start: rc = %d\n",
1837 lfsck_lfsck2name(lfsck), rc);
1841 spin_lock(&llmd->llmd_lock);
1842 thread_set_flags(athread, SVC_RUNNING);
1843 spin_unlock(&llmd->llmd_lock);
1844 wake_up_all(&mthread->t_ctl_waitq);
1847 while (!list_empty(&llmd->llmd_req_list)) {
1848 bool wakeup = false;
1850 if (unlikely(llmd->llmd_exit))
1851 GOTO(cleanup1, rc = llmd->llmd_post_result);
1853 llr = list_entry(llmd->llmd_req_list.next,
1854 struct lfsck_layout_req,
1856 /* Only the lfsck_layout_assistant thread itself can
1857 * remove the "llr" from the head of the list, LFSCK
1858 * engine thread only inserts other new "lld" at the
1859 * end of the list. So it is safe to handle current
1860 * "llr" without the spin_lock. */
1861 rc = lfsck_layout_assistant_handle_one(env, com, llr);
1862 spin_lock(&llmd->llmd_lock);
1863 list_del_init(&llr->llr_list);
1864 if (bk->lb_async_windows != 0 &&
1865 llmd->llmd_prefetched >= bk->lb_async_windows)
1868 llmd->llmd_prefetched--;
1869 spin_unlock(&llmd->llmd_lock);
1871 wake_up_all(&mthread->t_ctl_waitq);
1873 lfsck_layout_req_fini(env, llr);
1874 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
1878 /* Wakeup the master engine if it is waiting in checkpoint. */
1879 wake_up_all(&mthread->t_ctl_waitq);
1881 l_wait_event(athread->t_ctl_waitq,
1882 !lfsck_layout_req_empty(llmd) ||
1884 llmd->llmd_to_post ||
1885 llmd->llmd_to_double_scan,
1888 if (unlikely(llmd->llmd_exit))
1889 GOTO(cleanup1, rc = llmd->llmd_post_result);
1891 if (!list_empty(&llmd->llmd_req_list))
1894 if (llmd->llmd_to_post) {
1895 llmd->llmd_to_post = 0;
1896 LASSERT(llmd->llmd_post_result > 0);
1898 memset(lr, 0, sizeof(*lr));
1899 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1900 lr->lr_event = LE_PHASE1_DONE;
1901 lr->lr_status = llmd->llmd_post_result;
1902 rc = lfsck_layout_master_notify_others(env, com, lr, 0);
1904 CERROR("%s: failed to notify others "
1905 "for layout post: rc = %d\n",
1906 lfsck_lfsck2name(lfsck), rc);
1908 /* Wakeup the master engine to go ahead. */
1909 wake_up_all(&mthread->t_ctl_waitq);
1912 if (llmd->llmd_to_double_scan) {
1913 llmd->llmd_to_double_scan = 0;
1914 atomic_inc(&lfsck->li_double_scan_count);
1915 llmd->llmd_in_double_scan = 1;
1916 wake_up_all(&mthread->t_ctl_waitq);
1918 while (llmd->llmd_in_double_scan) {
1919 struct lfsck_tgt_descs *ltds =
1920 &lfsck->li_ost_descs;
1921 struct lfsck_tgt_desc *ltd;
1923 rc = lfsck_layout_master_query_others(env, com);
1924 if (lfsck_layout_master_to_orphan(llmd))
1930 /* Pull LFSCK status on related targets once
1931 * per 30 seconds if we are not notified. */
1932 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(30),
1933 cfs_time_seconds(1),
1935 rc = l_wait_event(athread->t_ctl_waitq,
1936 lfsck_layout_master_to_orphan(llmd) ||
1938 !thread_is_running(mthread),
1941 if (unlikely(llmd->llmd_exit ||
1942 !thread_is_running(mthread)))
1943 GOTO(cleanup2, rc = 0);
1945 if (rc == -ETIMEDOUT)
1952 spin_lock(<ds->ltd_lock);
1954 &llmd->llmd_ost_phase2_list)) {
1956 llmd->llmd_ost_phase2_list.next,
1957 struct lfsck_tgt_desc,
1958 ltd_layout_phase_list);
1960 <d->ltd_layout_phase_list);
1961 spin_unlock(<ds->ltd_lock);
1963 rc = lfsck_layout_scan_orphan(env, com,
1966 bk->lb_param & LPF_FAILOUT)
1969 if (unlikely(llmd->llmd_exit ||
1970 !thread_is_running(mthread)))
1971 GOTO(cleanup2, rc = 0);
1973 spin_lock(<ds->ltd_lock);
1976 if (list_empty(&llmd->llmd_ost_phase1_list)) {
1977 spin_unlock(<ds->ltd_lock);
1978 GOTO(cleanup2, rc = 1);
1980 spin_unlock(<ds->ltd_lock);
1986 /* Cleanup the unfinished requests. */
1987 spin_lock(&llmd->llmd_lock);
1989 llmd->llmd_assistant_status = rc;
1991 while (!list_empty(&llmd->llmd_req_list)) {
1992 llr = list_entry(llmd->llmd_req_list.next,
1993 struct lfsck_layout_req,
1995 list_del_init(&llr->llr_list);
1996 llmd->llmd_prefetched--;
1997 spin_unlock(&llmd->llmd_lock);
1998 lfsck_layout_req_fini(env, llr);
1999 spin_lock(&llmd->llmd_lock);
2001 spin_unlock(&llmd->llmd_lock);
2003 LASSERTF(llmd->llmd_prefetched == 0, "unmatched prefeteched objs %d\n",
2004 llmd->llmd_prefetched);
2007 memset(lr, 0, sizeof(*lr));
2008 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2010 lr->lr_event = LE_PHASE2_DONE;
2013 } else if (rc == 0) {
2014 lr->lr_event = LE_STOP;
2015 if (lfsck->li_status == LS_PAUSED ||
2016 lfsck->li_status == LS_CO_PAUSED) {
2018 lr->lr_status = LS_CO_PAUSED;
2019 } else if (lfsck->li_status == LS_STOPPED ||
2020 lfsck->li_status == LS_CO_STOPPED) {
2021 flags = lfsck->li_flags;
2022 if (flags & LPF_BROADCAST)
2023 lr->lr_status = LS_STOPPED;
2025 lr->lr_status = LS_CO_STOPPED;
2030 lr->lr_event = LE_STOP;
2032 lr->lr_status = LS_CO_FAILED;
2035 rc1 = lfsck_layout_master_notify_others(env, com, lr, flags);
2037 CERROR("%s: failed to notify others for layout quit: rc = %d\n",
2038 lfsck_lfsck2name(lfsck), rc1);
2042 /* Under force exit case, some requests may be just freed without
2043 * verification, those objects should be re-handled when next run.
2044 * So not update the on-disk tracing file under such case. */
2045 if (!llmd->llmd_exit)
2046 rc1 = lfsck_layout_double_scan_result(env, com, rc);
2049 if (llmd->llmd_in_double_scan)
2050 atomic_dec(&lfsck->li_double_scan_count);
2052 spin_lock(&llmd->llmd_lock);
2053 llmd->llmd_assistant_status = (rc1 != 0 ? rc1 : rc);
2054 thread_set_flags(athread, SVC_STOPPED);
2055 wake_up_all(&mthread->t_ctl_waitq);
2056 spin_unlock(&llmd->llmd_lock);
2057 lfsck_thread_args_fini(lta);
2063 lfsck_layout_slave_async_interpret(const struct lu_env *env,
2064 struct ptlrpc_request *req,
2067 struct lfsck_layout_slave_async_args *llsaa = args;
2068 struct obd_export *exp = llsaa->llsaa_exp;
2069 struct lfsck_component *com = llsaa->llsaa_com;
2070 struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst;
2071 struct lfsck_layout_slave_data *llsd = com->lc_data;
2075 /* It is quite probably caused by target crash,
2076 * to make the LFSCK can go ahead, assume that
2077 * the target finished the LFSCK prcoessing. */
2080 struct lfsck_reply *lr;
2082 lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY);
2083 if (lr->lr_status != LS_SCANNING_PHASE1 &&
2084 lr->lr_status != LS_SCANNING_PHASE2)
2088 lfsck_layout_llst_del(llsd, llst);
2089 lfsck_layout_llst_put(llst);
2090 lfsck_component_put(env, com);
2091 class_export_put(exp);
2096 static int lfsck_layout_async_query(const struct lu_env *env,
2097 struct lfsck_component *com,
2098 struct obd_export *exp,
2099 struct lfsck_layout_slave_target *llst,
2100 struct lfsck_request *lr,
2101 struct ptlrpc_request_set *set)
2103 struct lfsck_layout_slave_async_args *llsaa;
2104 struct ptlrpc_request *req;
2105 struct lfsck_request *tmp;
2109 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_QUERY);
2113 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_QUERY);
2115 ptlrpc_request_free(req);
2119 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
2121 ptlrpc_request_set_replen(req);
2123 llsaa = ptlrpc_req_async_args(req);
2124 llsaa->llsaa_exp = exp;
2125 llsaa->llsaa_com = lfsck_component_get(com);
2126 llsaa->llsaa_llst = llst;
2127 req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
2128 ptlrpc_set_add_req(set, req);
2133 static int lfsck_layout_async_notify(const struct lu_env *env,
2134 struct obd_export *exp,
2135 struct lfsck_request *lr,
2136 struct ptlrpc_request_set *set)
2138 struct ptlrpc_request *req;
2139 struct lfsck_request *tmp;
2143 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_NOTIFY);
2147 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_NOTIFY);
2149 ptlrpc_request_free(req);
2153 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
2155 ptlrpc_request_set_replen(req);
2156 ptlrpc_set_add_req(set, req);
2162 lfsck_layout_slave_query_master(const struct lu_env *env,
2163 struct lfsck_component *com)
2165 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
2166 struct lfsck_instance *lfsck = com->lc_lfsck;
2167 struct lfsck_layout_slave_data *llsd = com->lc_data;
2168 struct lfsck_layout_slave_target *llst;
2169 struct obd_export *exp;
2170 struct ptlrpc_request_set *set;
2176 set = ptlrpc_prep_set();
2180 memset(lr, 0, sizeof(*lr));
2181 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2182 lr->lr_event = LE_QUERY;
2183 lr->lr_active = LT_LAYOUT;
2185 llsd->llsd_touch_gen++;
2186 spin_lock(&llsd->llsd_lock);
2187 while (!list_empty(&llsd->llsd_master_list)) {
2188 llst = list_entry(llsd->llsd_master_list.next,
2189 struct lfsck_layout_slave_target,
2191 if (llst->llst_gen == llsd->llsd_touch_gen)
2194 llst->llst_gen = llsd->llsd_touch_gen;
2195 list_del(&llst->llst_list);
2196 list_add_tail(&llst->llst_list,
2197 &llsd->llsd_master_list);
2198 atomic_inc(&llst->llst_ref);
2199 spin_unlock(&llsd->llsd_lock);
2201 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
2204 lfsck_layout_llst_del(llsd, llst);
2205 lfsck_layout_llst_put(llst);
2206 spin_lock(&llsd->llsd_lock);
2210 rc = lfsck_layout_async_query(env, com, exp, llst, lr, set);
2212 CERROR("%s: slave fail to query %s for layout: "
2213 "rc = %d\n", lfsck_lfsck2name(lfsck),
2214 exp->exp_obd->obd_name, rc);
2216 lfsck_layout_llst_put(llst);
2217 class_export_put(exp);
2221 spin_lock(&llsd->llsd_lock);
2223 spin_unlock(&llsd->llsd_lock);
2226 rc = ptlrpc_set_wait(set);
2227 ptlrpc_set_destroy(set);
2229 RETURN(rc1 != 0 ? rc1 : rc);
2233 lfsck_layout_slave_notify_master(const struct lu_env *env,
2234 struct lfsck_component *com,
2235 enum lfsck_events event, int result)
2237 struct lfsck_instance *lfsck = com->lc_lfsck;
2238 struct lfsck_layout_slave_data *llsd = com->lc_data;
2239 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
2240 struct lfsck_layout_slave_target *llst;
2241 struct obd_export *exp;
2242 struct ptlrpc_request_set *set;
2247 set = ptlrpc_prep_set();
2251 memset(lr, 0, sizeof(*lr));
2252 lr->lr_event = event;
2253 lr->lr_flags = LEF_FROM_OST;
2254 lr->lr_status = result;
2255 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2256 lr->lr_active = LT_LAYOUT;
2257 llsd->llsd_touch_gen++;
2258 spin_lock(&llsd->llsd_lock);
2259 while (!list_empty(&llsd->llsd_master_list)) {
2260 llst = list_entry(llsd->llsd_master_list.next,
2261 struct lfsck_layout_slave_target,
2263 if (llst->llst_gen == llsd->llsd_touch_gen)
2266 llst->llst_gen = llsd->llsd_touch_gen;
2267 list_del(&llst->llst_list);
2268 list_add_tail(&llst->llst_list,
2269 &llsd->llsd_master_list);
2270 atomic_inc(&llst->llst_ref);
2271 spin_unlock(&llsd->llsd_lock);
2273 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
2276 lfsck_layout_llst_del(llsd, llst);
2277 lfsck_layout_llst_put(llst);
2278 spin_lock(&llsd->llsd_lock);
2282 rc = lfsck_layout_async_notify(env, exp, lr, set);
2284 CERROR("%s: slave fail to notify %s for layout: "
2285 "rc = %d\n", lfsck_lfsck2name(lfsck),
2286 exp->exp_obd->obd_name, rc);
2289 lfsck_layout_llst_put(llst);
2290 class_export_put(exp);
2291 spin_lock(&llsd->llsd_lock);
2293 spin_unlock(&llsd->llsd_lock);
2296 rc = ptlrpc_set_wait(set);
2298 ptlrpc_set_destroy(set);
2305 static int lfsck_layout_reset(const struct lu_env *env,
2306 struct lfsck_component *com, bool init)
2308 struct lfsck_layout *lo = com->lc_file_ram;
2311 down_write(&com->lc_sem);
2313 memset(lo, 0, com->lc_file_size);
2315 __u32 count = lo->ll_success_count;
2316 __u64 last_time = lo->ll_time_last_complete;
2318 memset(lo, 0, com->lc_file_size);
2319 lo->ll_success_count = count;
2320 lo->ll_time_last_complete = last_time;
2323 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
2324 lo->ll_status = LS_INIT;
2326 rc = lfsck_layout_store(env, com);
2327 up_write(&com->lc_sem);
2332 static void lfsck_layout_fail(const struct lu_env *env,
2333 struct lfsck_component *com, bool new_checked)
2335 struct lfsck_layout *lo = com->lc_file_ram;
2337 down_write(&com->lc_sem);
2339 com->lc_new_checked++;
2340 lo->ll_objs_failed_phase1++;
2341 if (lo->ll_pos_first_inconsistent == 0) {
2342 struct lfsck_instance *lfsck = com->lc_lfsck;
2344 lo->ll_pos_first_inconsistent =
2345 lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
2348 up_write(&com->lc_sem);
2351 static int lfsck_layout_master_checkpoint(const struct lu_env *env,
2352 struct lfsck_component *com, bool init)
2354 struct lfsck_instance *lfsck = com->lc_lfsck;
2355 struct lfsck_layout *lo = com->lc_file_ram;
2356 struct lfsck_layout_master_data *llmd = com->lc_data;
2357 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2358 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2359 struct l_wait_info lwi = { 0 };
2362 if (com->lc_new_checked == 0 && !init)
2365 l_wait_event(mthread->t_ctl_waitq,
2366 list_empty(&llmd->llmd_req_list) ||
2367 !thread_is_running(mthread) ||
2368 thread_is_stopped(athread),
2371 if (!thread_is_running(mthread) || thread_is_stopped(athread))
2374 down_write(&com->lc_sem);
2376 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
2378 lo->ll_pos_last_checkpoint =
2379 lfsck->li_pos_current.lp_oit_cookie;
2380 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2381 HALF_SEC - lfsck->li_time_last_checkpoint);
2382 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2383 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2384 com->lc_new_checked = 0;
2387 rc = lfsck_layout_store(env, com);
2388 up_write(&com->lc_sem);
2393 static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
2394 struct lfsck_component *com, bool init)
2396 struct lfsck_instance *lfsck = com->lc_lfsck;
2397 struct lfsck_layout *lo = com->lc_file_ram;
2400 if (com->lc_new_checked == 0 && !init)
2403 down_write(&com->lc_sem);
2406 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
2408 lo->ll_pos_last_checkpoint =
2409 lfsck->li_pos_current.lp_oit_cookie;
2410 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2411 HALF_SEC - lfsck->li_time_last_checkpoint);
2412 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2413 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2414 com->lc_new_checked = 0;
2417 rc = lfsck_layout_store(env, com);
2419 up_write(&com->lc_sem);
2424 static int lfsck_layout_prep(const struct lu_env *env,
2425 struct lfsck_component *com)
2427 struct lfsck_instance *lfsck = com->lc_lfsck;
2428 struct lfsck_layout *lo = com->lc_file_ram;
2429 struct lfsck_position *pos = &com->lc_pos_start;
2431 fid_zero(&pos->lp_dir_parent);
2432 pos->lp_dir_cookie = 0;
2433 if (lo->ll_status == LS_COMPLETED ||
2434 lo->ll_status == LS_PARTIAL) {
2437 rc = lfsck_layout_reset(env, com, false);
2442 down_write(&com->lc_sem);
2444 lo->ll_time_latest_start = cfs_time_current_sec();
2446 spin_lock(&lfsck->li_lock);
2447 if (lo->ll_flags & LF_SCANNED_ONCE) {
2448 if (!lfsck->li_drop_dryrun ||
2449 lo->ll_pos_first_inconsistent == 0) {
2450 lo->ll_status = LS_SCANNING_PHASE2;
2451 list_del_init(&com->lc_link);
2452 list_add_tail(&com->lc_link,
2453 &lfsck->li_list_double_scan);
2454 pos->lp_oit_cookie = 0;
2458 lo->ll_status = LS_SCANNING_PHASE1;
2459 lo->ll_run_time_phase1 = 0;
2460 lo->ll_run_time_phase2 = 0;
2461 lo->ll_objs_checked_phase1 = 0;
2462 lo->ll_objs_checked_phase2 = 0;
2463 lo->ll_objs_failed_phase1 = 0;
2464 lo->ll_objs_failed_phase2 = 0;
2465 for (i = 0; i < LLIT_MAX; i++)
2466 lo->ll_objs_repaired[i] = 0;
2468 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2471 lo->ll_status = LS_SCANNING_PHASE1;
2472 if (!lfsck->li_drop_dryrun ||
2473 lo->ll_pos_first_inconsistent == 0)
2474 pos->lp_oit_cookie = lo->ll_pos_last_checkpoint + 1;
2476 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2478 spin_unlock(&lfsck->li_lock);
2480 up_write(&com->lc_sem);
2485 static int lfsck_layout_slave_prep(const struct lu_env *env,
2486 struct lfsck_component *com,
2487 struct lfsck_start_param *lsp)
2489 struct lfsck_layout *lo = com->lc_file_ram;
2490 struct lfsck_layout_slave_data *llsd = com->lc_data;
2493 /* XXX: For a new scanning, generate OST-objects
2494 * bitmap for orphan detection. */
2496 rc = lfsck_layout_prep(env, com);
2497 if (rc != 0 || lo->ll_status != LS_SCANNING_PHASE1 ||
2498 !lsp->lsp_index_valid)
2501 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
2506 static int lfsck_layout_master_prep(const struct lu_env *env,
2507 struct lfsck_component *com,
2508 struct lfsck_start_param *lsp)
2510 struct lfsck_instance *lfsck = com->lc_lfsck;
2511 struct lfsck_layout_master_data *llmd = com->lc_data;
2512 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2513 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2514 struct lfsck_thread_args *lta;
2518 rc = lfsck_layout_prep(env, com);
2522 llmd->llmd_assistant_status = 0;
2523 llmd->llmd_post_result = 0;
2524 llmd->llmd_to_post = 0;
2525 llmd->llmd_to_double_scan = 0;
2526 llmd->llmd_in_double_scan = 0;
2527 llmd->llmd_exit = 0;
2528 thread_set_flags(athread, 0);
2530 lta = lfsck_thread_args_init(lfsck, com, lsp);
2532 RETURN(PTR_ERR(lta));
2534 rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"));
2535 if (IS_ERR_VALUE(rc)) {
2536 CERROR("%s: Cannot start LFSCK layout assistant thread: "
2537 "rc = %ld\n", lfsck_lfsck2name(lfsck), rc);
2538 lfsck_thread_args_fini(lta);
2540 struct l_wait_info lwi = { 0 };
2542 l_wait_event(mthread->t_ctl_waitq,
2543 thread_is_running(athread) ||
2544 thread_is_stopped(athread),
2546 if (unlikely(!thread_is_running(athread)))
2547 rc = llmd->llmd_assistant_status;
2555 /* Pre-fetch the attribute for each stripe in the given layout EA. */
2556 static int lfsck_layout_scan_stripes(const struct lu_env *env,
2557 struct lfsck_component *com,
2558 struct dt_object *parent,
2559 struct lov_mds_md_v1 *lmm)
2561 struct lfsck_thread_info *info = lfsck_env_info(env);
2562 struct lfsck_instance *lfsck = com->lc_lfsck;
2563 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2564 struct lfsck_layout *lo = com->lc_file_ram;
2565 struct lfsck_layout_master_data *llmd = com->lc_data;
2566 struct lfsck_layout_object *llo = NULL;
2567 struct lov_ost_data_v1 *objs;
2568 struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs;
2569 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2570 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2571 struct l_wait_info lwi = { 0 };
2580 buf = lfsck_buf_get(env, &info->lti_old_pfid,
2581 sizeof(struct filter_fid_old));
2582 count = le16_to_cpu(lmm->lmm_stripe_count);
2583 gen = le16_to_cpu(lmm->lmm_layout_gen);
2584 /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
2585 * been verified in lfsck_layout_verify_header() already. If some
2586 * new magic introduced in the future, then layout LFSCK needs to
2587 * be updated also. */
2588 magic = le32_to_cpu(lmm->lmm_magic);
2589 if (magic == LOV_MAGIC_V1) {
2590 objs = &(lmm->lmm_objects[0]);
2592 LASSERT(magic == LOV_MAGIC_V3);
2593 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
2596 for (i = 0; i < count; i++, objs++) {
2597 struct lu_fid *fid = &info->lti_fid;
2598 struct ost_id *oi = &info->lti_oi;
2599 struct lfsck_layout_req *llr;
2600 struct lfsck_tgt_desc *tgt = NULL;
2601 struct dt_object *cobj = NULL;
2603 le32_to_cpu(objs->l_ost_idx);
2604 bool wakeup = false;
2606 l_wait_event(mthread->t_ctl_waitq,
2607 bk->lb_async_windows == 0 ||
2608 llmd->llmd_prefetched < bk->lb_async_windows ||
2609 !thread_is_running(mthread) ||
2610 thread_is_stopped(athread),
2613 if (unlikely(!thread_is_running(mthread)) ||
2614 thread_is_stopped(athread))
2617 ostid_le_to_cpu(&objs->l_ost_oi, oi);
2618 ostid_to_fid(fid, oi, index);
2619 tgt = lfsck_tgt_get(ltds, index);
2620 if (unlikely(tgt == NULL)) {
2621 lo->ll_flags |= LF_INCOMPLETE;
2625 cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid);
2631 rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA);
2635 rc = dt_declare_xattr_get(env, cobj, buf, XATTR_NAME_FID,
2641 llo = lfsck_layout_object_init(env, parent, gen);
2648 llr = lfsck_layout_req_init(llo, cobj, index, i);
2655 spin_lock(&llmd->llmd_lock);
2656 if (llmd->llmd_assistant_status < 0) {
2657 spin_unlock(&llmd->llmd_lock);
2658 lfsck_layout_req_fini(env, llr);
2660 RETURN(llmd->llmd_assistant_status);
2663 list_add_tail(&llr->llr_list, &llmd->llmd_req_list);
2664 if (llmd->llmd_prefetched == 0)
2667 llmd->llmd_prefetched++;
2668 spin_unlock(&llmd->llmd_lock);
2670 wake_up_all(&athread->t_ctl_waitq);
2673 down_write(&com->lc_sem);
2674 com->lc_new_checked++;
2676 lo->ll_objs_failed_phase1++;
2677 up_write(&com->lc_sem);
2679 if (cobj != NULL && !IS_ERR(cobj))
2680 lu_object_put(env, &cobj->do_lu);
2682 if (likely(tgt != NULL))
2685 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
2692 if (llo != NULL && !IS_ERR(llo))
2693 lfsck_layout_object_put(env, llo);
2698 /* For the given object, read its layout EA locally. For each stripe, pre-fetch
2699 * the OST-object's attribute and generate an structure lfsck_layout_req on the
2700 * list ::llmd_req_list.
2702 * For each request on above list, the lfsck_layout_assistant thread compares
2703 * the OST side attribute with local attribute, if inconsistent, then repair it.
2705 * All above processing is async mode with pipeline. */
2706 static int lfsck_layout_master_exec_oit(const struct lu_env *env,
2707 struct lfsck_component *com,
2708 struct dt_object *obj)
2710 struct lfsck_thread_info *info = lfsck_env_info(env);
2711 struct ost_id *oi = &info->lti_oi;
2712 struct lfsck_layout *lo = com->lc_file_ram;
2713 struct lfsck_layout_master_data *llmd = com->lc_data;
2714 struct lfsck_instance *lfsck = com->lc_lfsck;
2715 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2716 struct thandle *handle = NULL;
2717 struct lu_buf *buf = &info->lti_big_buf;
2718 struct lov_mds_md_v1 *lmm = NULL;
2719 struct dt_device *dev = lfsck->li_bottom;
2720 struct lustre_handle lh = { 0 };
2721 ssize_t buflen = buf->lb_len;
2723 bool locked = false;
2724 bool stripe = false;
2727 if (!S_ISREG(lfsck_object_type(obj)))
2730 if (llmd->llmd_assistant_status < 0)
2731 GOTO(out, rc = -ESRCH);
2733 fid_to_lmm_oi(lfsck_dto2fid(obj), oi);
2734 lmm_oi_cpu_to_le(oi, oi);
2735 dt_read_lock(env, obj, 0);
2739 rc = lfsck_layout_get_lovea(env, obj, buf, &buflen);
2745 rc = lfsck_layout_verify_header(lmm);
2749 if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) == 0)
2750 GOTO(out, stripe = true);
2752 /* Inconsistent lmm_oi, should be repaired. */
2753 CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n",
2754 PFID(lfsck_dto2fid(obj)));
2756 if (bk->lb_param & LPF_DRYRUN) {
2757 down_write(&com->lc_sem);
2758 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2759 up_write(&com->lc_sem);
2761 GOTO(out, stripe = true);
2764 if (!lustre_handle_is_used(&lh)) {
2765 dt_read_unlock(env, obj);
2767 buf->lb_len = buflen;
2768 rc = lfsck_layout_lock(env, com, obj, &lh,
2769 MDS_INODELOCK_LAYOUT |
2770 MDS_INODELOCK_XATTR);
2774 handle = dt_trans_create(env, dev);
2776 GOTO(out, rc = PTR_ERR(handle));
2778 rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2779 LU_XATTR_REPLACE, handle);
2783 rc = dt_trans_start_local(env, dev, handle);
2787 dt_write_lock(env, obj, 0);
2794 rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2795 LU_XATTR_REPLACE, handle, BYPASS_CAPA);
2799 down_write(&com->lc_sem);
2800 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2801 up_write(&com->lc_sem);
2803 GOTO(out, stripe = true);
2807 if (lustre_handle_is_used(&lh))
2808 dt_write_unlock(env, obj);
2810 dt_read_unlock(env, obj);
2813 if (handle != NULL && !IS_ERR(handle))
2814 dt_trans_stop(env, dev, handle);
2816 lfsck_layout_unlock(&lh);
2818 rc = lfsck_layout_scan_stripes(env, com, obj, lmm);
2820 down_write(&com->lc_sem);
2821 com->lc_new_checked++;
2823 lo->ll_objs_failed_phase1++;
2824 up_write(&com->lc_sem);
2826 buf->lb_len = buflen;
2831 static int lfsck_layout_slave_exec_oit(const struct lu_env *env,
2832 struct lfsck_component *com,
2833 struct dt_object *obj)
2835 struct lfsck_instance *lfsck = com->lc_lfsck;
2836 struct lfsck_layout *lo = com->lc_file_ram;
2837 const struct lu_fid *fid = lfsck_dto2fid(obj);
2838 struct lfsck_layout_slave_data *llsd = com->lc_data;
2839 struct lfsck_layout_seq *lls;
2845 /* XXX: Update OST-objects bitmap for orphan detection. */
2847 LASSERT(llsd != NULL);
2849 down_write(&com->lc_sem);
2850 if (fid_is_idif(fid))
2852 else if (!fid_is_norm(fid) ||
2853 !fid_is_for_ostobj(env, lfsck->li_next, obj, fid))
2854 GOTO(unlock, rc = 0);
2857 com->lc_new_checked++;
2859 lls = lfsck_layout_seq_lookup(llsd, seq);
2862 if (unlikely(lls == NULL))
2863 GOTO(unlock, rc = -ENOMEM);
2865 INIT_LIST_HEAD(&lls->lls_list);
2867 rc = lfsck_layout_lastid_load(env, com, lls);
2869 lo->ll_objs_failed_phase1++;
2874 lfsck_layout_seq_insert(llsd, lls);
2877 if (unlikely(fid_is_last_id(fid)))
2878 GOTO(unlock, rc = 0);
2881 if (oid > lls->lls_lastid_known)
2882 lls->lls_lastid_known = oid;
2884 if (oid > lls->lls_lastid) {
2885 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
2886 /* OFD may create new objects during LFSCK scanning. */
2887 rc = lfsck_layout_lastid_reload(env, com, lls);
2888 if (unlikely(rc != 0))
2889 CWARN("%s: failed to reload LAST_ID for "LPX64
2891 lfsck_lfsck2name(com->lc_lfsck),
2893 if (oid <= lls->lls_lastid)
2894 GOTO(unlock, rc = 0);
2896 LASSERT(lfsck->li_out_notify != NULL);
2898 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
2899 LE_LASTID_REBUILDING);
2900 lo->ll_flags |= LF_CRASHED_LASTID;
2903 lls->lls_lastid = oid;
2907 GOTO(unlock, rc = 0);
2910 up_write(&com->lc_sem);
2915 static int lfsck_layout_exec_dir(const struct lu_env *env,
2916 struct lfsck_component *com,
2917 struct dt_object *obj,
2918 struct lu_dirent *ent)
2923 static int lfsck_layout_master_post(const struct lu_env *env,
2924 struct lfsck_component *com,
2925 int result, bool init)
2927 struct lfsck_instance *lfsck = com->lc_lfsck;
2928 struct lfsck_layout *lo = com->lc_file_ram;
2929 struct lfsck_layout_master_data *llmd = com->lc_data;
2930 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2931 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2932 struct l_wait_info lwi = { 0 };
2937 llmd->llmd_post_result = result;
2938 llmd->llmd_to_post = 1;
2939 if (llmd->llmd_post_result <= 0)
2940 llmd->llmd_exit = 1;
2942 wake_up_all(&athread->t_ctl_waitq);
2943 l_wait_event(mthread->t_ctl_waitq,
2944 (result > 0 && list_empty(&llmd->llmd_req_list)) ||
2945 thread_is_stopped(athread),
2948 if (llmd->llmd_assistant_status < 0)
2949 result = llmd->llmd_assistant_status;
2951 down_write(&com->lc_sem);
2952 spin_lock(&lfsck->li_lock);
2953 /* When LFSCK failed, there may be some prefetched objects those are
2954 * not been processed yet, we do not know the exactly position, then
2955 * just restart from last check-point next time. */
2956 if (!init && !llmd->llmd_exit)
2957 lo->ll_pos_last_checkpoint =
2958 lfsck->li_pos_current.lp_oit_cookie;
2961 lo->ll_status = LS_SCANNING_PHASE2;
2962 lo->ll_flags |= LF_SCANNED_ONCE;
2963 lo->ll_flags &= ~LF_UPGRADE;
2964 list_del_init(&com->lc_link);
2965 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
2966 } else if (result == 0) {
2967 lo->ll_status = lfsck->li_status;
2968 if (lo->ll_status == 0)
2969 lo->ll_status = LS_STOPPED;
2970 if (lo->ll_status != LS_PAUSED) {
2971 list_del_init(&com->lc_link);
2972 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2975 lo->ll_status = LS_FAILED;
2976 list_del_init(&com->lc_link);
2977 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
2979 spin_unlock(&lfsck->li_lock);
2982 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2983 HALF_SEC - lfsck->li_time_last_checkpoint);
2984 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2985 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2986 com->lc_new_checked = 0;
2989 rc = lfsck_layout_store(env, com);
2990 up_write(&com->lc_sem);
2995 static int lfsck_layout_slave_post(const struct lu_env *env,
2996 struct lfsck_component *com,
2997 int result, bool init)
2999 struct lfsck_instance *lfsck = com->lc_lfsck;
3000 struct lfsck_layout *lo = com->lc_file_ram;
3004 rc = lfsck_layout_lastid_store(env, com);
3008 LASSERT(lfsck->li_out_notify != NULL);
3010 down_write(&com->lc_sem);
3012 spin_lock(&lfsck->li_lock);
3014 lo->ll_pos_last_checkpoint =
3015 lfsck->li_pos_current.lp_oit_cookie;
3017 lo->ll_status = LS_SCANNING_PHASE2;
3018 lo->ll_flags |= LF_SCANNED_ONCE;
3019 if (lo->ll_flags & LF_CRASHED_LASTID) {
3021 lo->ll_flags &= ~LF_CRASHED_LASTID;
3023 lo->ll_flags &= ~LF_UPGRADE;
3024 list_del_init(&com->lc_link);
3025 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
3026 } else if (result == 0) {
3027 lo->ll_status = lfsck->li_status;
3028 if (lo->ll_status == 0)
3029 lo->ll_status = LS_STOPPED;
3030 if (lo->ll_status != LS_PAUSED) {
3031 list_del_init(&com->lc_link);
3032 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3035 lo->ll_status = LS_FAILED;
3036 list_del_init(&com->lc_link);
3037 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3039 spin_unlock(&lfsck->li_lock);
3042 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3046 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
3047 HALF_SEC - lfsck->li_time_last_checkpoint);
3048 lo->ll_time_last_checkpoint = cfs_time_current_sec();
3049 lo->ll_objs_checked_phase1 += com->lc_new_checked;
3050 com->lc_new_checked = 0;
3053 rc = lfsck_layout_store(env, com);
3055 up_write(&com->lc_sem);
3057 lfsck_layout_slave_notify_master(env, com, LE_PHASE1_DONE, result);
3062 static int lfsck_layout_dump(const struct lu_env *env,
3063 struct lfsck_component *com, char *buf, int len)
3065 struct lfsck_instance *lfsck = com->lc_lfsck;
3066 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
3067 struct lfsck_layout *lo = com->lc_file_ram;
3072 down_read(&com->lc_sem);
3073 rc = snprintf(buf, len,
3074 "name: lfsck_layout\n"
3080 lfsck_status2names(lo->ll_status));
3086 rc = lfsck_bits_dump(&buf, &len, lo->ll_flags, lfsck_flags_names,
3091 rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
3096 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_complete,
3097 "time_since_last_completed");
3101 rc = lfsck_time_dump(&buf, &len, lo->ll_time_latest_start,
3102 "time_since_latest_start");
3106 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_checkpoint,
3107 "time_since_last_checkpoint");
3111 rc = snprintf(buf, len,
3112 "latest_start_position: "LPU64"\n"
3113 "last_checkpoint_position: "LPU64"\n"
3114 "first_failure_position: "LPU64"\n",
3115 lo->ll_pos_latest_start,
3116 lo->ll_pos_last_checkpoint,
3117 lo->ll_pos_first_inconsistent);
3124 rc = snprintf(buf, len,
3125 "success_count: %u\n"
3126 "repaired_dangling: "LPU64"\n"
3127 "repaired_unmatched_pair: "LPU64"\n"
3128 "repaired_multiple_referenced: "LPU64"\n"
3129 "repaired_orphan: "LPU64"\n"
3130 "repaired_inconsistent_owner: "LPU64"\n"
3131 "repaired_others: "LPU64"\n"
3132 "skipped: "LPU64"\n"
3133 "failed_phase1: "LPU64"\n"
3134 "failed_phase2: "LPU64"\n",
3135 lo->ll_success_count,
3136 lo->ll_objs_repaired[LLIT_DANGLING - 1],
3137 lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1],
3138 lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1],
3139 lo->ll_objs_repaired[LLIT_ORPHAN - 1],
3140 lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1],
3141 lo->ll_objs_repaired[LLIT_OTHERS - 1],
3142 lo->ll_objs_skipped,
3143 lo->ll_objs_failed_phase1,
3144 lo->ll_objs_failed_phase2);
3151 if (lo->ll_status == LS_SCANNING_PHASE1) {
3153 const struct dt_it_ops *iops;
3154 cfs_duration_t duration = cfs_time_current() -
3155 lfsck->li_time_last_checkpoint;
3156 __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
3157 __u64 speed = checked;
3158 __u64 new_checked = com->lc_new_checked * HZ;
3159 __u32 rtime = lo->ll_run_time_phase1 +
3160 cfs_duration_sec(duration + HALF_SEC);
3163 do_div(new_checked, duration);
3165 do_div(speed, rtime);
3166 rc = snprintf(buf, len,
3167 "checked_phase1: "LPU64"\n"
3168 "checked_phase2: "LPU64"\n"
3169 "run_time_phase1: %u seconds\n"
3170 "run_time_phase2: %u seconds\n"
3171 "average_speed_phase1: "LPU64" items/sec\n"
3172 "average_speed_phase2: N/A\n"
3173 "real-time_speed_phase1: "LPU64" items/sec\n"
3174 "real-time_speed_phase2: N/A\n",
3176 lo->ll_objs_checked_phase2,
3178 lo->ll_run_time_phase2,
3187 LASSERT(lfsck->li_di_oit != NULL);
3189 iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
3191 /* The low layer otable-based iteration position may NOT
3192 * exactly match the layout-based directory traversal
3193 * cookie. Generally, it is not a serious issue. But the
3194 * caller should NOT make assumption on that. */
3195 pos = iops->store(env, lfsck->li_di_oit);
3196 if (!lfsck->li_current_oit_processed)
3198 rc = snprintf(buf, len, "current_position: "LPU64"\n", pos);
3205 /* XXX: LS_SCANNING_PHASE2 will be handled in the future. */
3206 __u64 speed1 = lo->ll_objs_checked_phase1;
3207 __u64 speed2 = lo->ll_objs_checked_phase2;
3209 if (lo->ll_run_time_phase1 != 0)
3210 do_div(speed1, lo->ll_run_time_phase1);
3211 if (lo->ll_run_time_phase2 != 0)
3212 do_div(speed2, lo->ll_run_time_phase2);
3213 rc = snprintf(buf, len,
3214 "checked_phase1: "LPU64"\n"
3215 "checked_phase2: "LPU64"\n"
3216 "run_time_phase1: %u seconds\n"
3217 "run_time_phase2: %u seconds\n"
3218 "average_speed_phase1: "LPU64" items/sec\n"
3219 "average_speed_phase2: "LPU64" objs/sec\n"
3220 "real-time_speed_phase1: N/A\n"
3221 "real-time_speed_phase2: N/A\n"
3222 "current_position: N/A\n",
3223 lo->ll_objs_checked_phase1,
3224 lo->ll_objs_checked_phase2,
3225 lo->ll_run_time_phase1,
3226 lo->ll_run_time_phase2,
3238 up_read(&com->lc_sem);
3243 static int lfsck_layout_master_double_scan(const struct lu_env *env,
3244 struct lfsck_component *com)
3246 struct lfsck_layout_master_data *llmd = com->lc_data;
3247 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
3248 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3249 struct lfsck_layout *lo = com->lc_file_ram;
3250 struct l_wait_info lwi = { 0 };
3252 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
3255 llmd->llmd_to_double_scan = 1;
3256 wake_up_all(&athread->t_ctl_waitq);
3257 l_wait_event(mthread->t_ctl_waitq,
3258 llmd->llmd_in_double_scan ||
3259 thread_is_stopped(athread),
3261 if (llmd->llmd_assistant_status < 0)
3262 return llmd->llmd_assistant_status;
3267 static int lfsck_layout_slave_double_scan(const struct lu_env *env,
3268 struct lfsck_component *com)
3270 struct lfsck_instance *lfsck = com->lc_lfsck;
3271 struct lfsck_layout_slave_data *llsd = com->lc_data;
3272 struct lfsck_layout *lo = com->lc_file_ram;
3273 struct ptlrpc_thread *thread = &lfsck->li_thread;
3277 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
3280 atomic_inc(&lfsck->li_double_scan_count);
3282 com->lc_new_checked = 0;
3283 com->lc_new_scanned = 0;
3284 com->lc_time_last_checkpoint = cfs_time_current();
3285 com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
3286 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
3289 struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(30),
3292 rc = lfsck_layout_slave_query_master(env, com);
3293 if (list_empty(&llsd->llsd_master_list)) {
3294 if (unlikely(!thread_is_running(thread)))
3305 rc = l_wait_event(thread->t_ctl_waitq,
3306 !thread_is_running(thread) ||
3307 list_empty(&llsd->llsd_master_list),
3309 if (unlikely(!thread_is_running(thread)))
3312 if (rc == -ETIMEDOUT)
3315 GOTO(done, rc = (rc < 0 ? rc : 1));
3319 rc = lfsck_layout_double_scan_result(env, com, rc);
3321 if (atomic_dec_and_test(&lfsck->li_double_scan_count))
3322 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3327 static void lfsck_layout_master_data_release(const struct lu_env *env,
3328 struct lfsck_component *com)
3330 struct lfsck_layout_master_data *llmd = com->lc_data;
3331 struct lfsck_instance *lfsck = com->lc_lfsck;
3332 struct lfsck_tgt_descs *ltds;
3333 struct lfsck_tgt_desc *ltd;
3334 struct lfsck_tgt_desc *next;
3336 LASSERT(llmd != NULL);
3337 LASSERT(thread_is_init(&llmd->llmd_thread) ||
3338 thread_is_stopped(&llmd->llmd_thread));
3339 LASSERT(list_empty(&llmd->llmd_req_list));
3341 com->lc_data = NULL;
3343 ltds = &lfsck->li_ost_descs;
3344 spin_lock(<ds->ltd_lock);
3345 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase1_list,
3346 ltd_layout_phase_list) {
3347 list_del_init(<d->ltd_layout_phase_list);
3349 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase2_list,
3350 ltd_layout_phase_list) {
3351 list_del_init(<d->ltd_layout_phase_list);
3353 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_list,
3355 list_del_init(<d->ltd_layout_list);
3357 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase1_list,
3358 ltd_layout_phase_list) {
3359 list_del_init(<d->ltd_layout_phase_list);
3361 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase2_list,
3362 ltd_layout_phase_list) {
3363 list_del_init(<d->ltd_layout_phase_list);
3365 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_list,
3367 list_del_init(<d->ltd_layout_list);
3369 spin_unlock(<ds->ltd_lock);
3374 static void lfsck_layout_slave_data_release(const struct lu_env *env,
3375 struct lfsck_component *com)
3377 struct lfsck_layout_slave_data *llsd = com->lc_data;
3378 struct lfsck_layout_seq *lls;
3379 struct lfsck_layout_seq *next;
3380 struct lfsck_layout_slave_target *llst;
3381 struct lfsck_layout_slave_target *tmp;
3383 LASSERT(llsd != NULL);
3385 com->lc_data = NULL;
3387 list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list,
3389 list_del_init(&lls->lls_list);
3390 lfsck_object_put(env, lls->lls_lastid_obj);
3394 list_for_each_entry_safe(llst, tmp, &llsd->llsd_master_list,
3396 list_del_init(&llst->llst_list);
3403 static void lfsck_layout_master_quit(const struct lu_env *env,
3404 struct lfsck_component *com)
3406 struct lfsck_layout_master_data *llmd = com->lc_data;
3407 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
3408 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3409 struct l_wait_info lwi = { 0 };
3411 llmd->llmd_exit = 1;
3412 wake_up_all(&athread->t_ctl_waitq);
3413 l_wait_event(mthread->t_ctl_waitq,
3414 thread_is_init(athread) ||
3415 thread_is_stopped(athread),
3419 static int lfsck_layout_master_in_notify(const struct lu_env *env,
3420 struct lfsck_component *com,
3421 struct lfsck_request *lr)
3423 struct lfsck_instance *lfsck = com->lc_lfsck;
3424 struct lfsck_layout *lo = com->lc_file_ram;
3425 struct lfsck_layout_master_data *llmd = com->lc_data;
3426 struct lfsck_tgt_descs *ltds;
3427 struct lfsck_tgt_desc *ltd;
3430 if (lr->lr_event != LE_PHASE1_DONE &&
3431 lr->lr_event != LE_PHASE2_DONE &&
3432 lr->lr_event != LE_STOP)
3435 if (lr->lr_flags & LEF_FROM_OST)
3436 ltds = &lfsck->li_ost_descs;
3438 ltds = &lfsck->li_mdt_descs;
3439 spin_lock(<ds->ltd_lock);
3440 ltd = LTD_TGT(ltds, lr->lr_index);
3442 spin_unlock(<ds->ltd_lock);
3447 list_del_init(<d->ltd_layout_phase_list);
3448 switch (lr->lr_event) {
3449 case LE_PHASE1_DONE:
3450 if (lr->lr_status <= 0) {
3451 ltd->ltd_layout_done = 1;
3452 list_del_init(<d->ltd_layout_list);
3453 lo->ll_flags |= LF_INCOMPLETE;
3457 if (lr->lr_flags & LEF_FROM_OST) {
3458 if (list_empty(<d->ltd_layout_list))
3459 list_add_tail(<d->ltd_layout_list,
3460 &llmd->llmd_ost_list);
3461 list_add_tail(<d->ltd_layout_phase_list,
3462 &llmd->llmd_ost_phase2_list);
3464 if (list_empty(<d->ltd_layout_list))
3465 list_add_tail(<d->ltd_layout_list,
3466 &llmd->llmd_mdt_list);
3467 list_add_tail(<d->ltd_layout_phase_list,
3468 &llmd->llmd_mdt_phase2_list);
3471 case LE_PHASE2_DONE:
3472 ltd->ltd_layout_done = 1;
3473 list_del_init(<d->ltd_layout_list);
3476 ltd->ltd_layout_done = 1;
3477 list_del_init(<d->ltd_layout_list);
3478 if (!(lr->lr_flags & LEF_FORCE_STOP))
3479 lo->ll_flags |= LF_INCOMPLETE;
3484 spin_unlock(<ds->ltd_lock);
3486 if (lr->lr_flags & LEF_FORCE_STOP) {
3487 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3489 memset(stop, 0, sizeof(*stop));
3490 stop->ls_status = lr->lr_status;
3491 stop->ls_flags = lr->lr_param;
3492 lfsck_stop(env, lfsck->li_bottom, stop);
3493 } else if (lfsck_layout_master_to_orphan(llmd)) {
3494 wake_up_all(&llmd->llmd_thread.t_ctl_waitq);
3500 static int lfsck_layout_slave_in_notify(const struct lu_env *env,
3501 struct lfsck_component *com,
3502 struct lfsck_request *lr)
3504 struct lfsck_instance *lfsck = com->lc_lfsck;
3505 struct lfsck_layout_slave_data *llsd = com->lc_data;
3506 struct lfsck_layout_slave_target *llst;
3509 if (lr->lr_event != LE_PHASE2_DONE &&
3510 lr->lr_event != LE_STOP)
3513 llst = lfsck_layout_llst_find_and_del(llsd, lr->lr_index);
3517 lfsck_layout_llst_put(llst);
3518 if (list_empty(&llsd->llsd_master_list)) {
3519 switch (lr->lr_event) {
3520 case LE_PHASE2_DONE:
3521 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3524 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3526 memset(stop, 0, sizeof(*stop));
3527 stop->ls_status = lr->lr_status;
3528 stop->ls_flags = lr->lr_param;
3529 lfsck_stop(env, lfsck->li_bottom, stop);
3540 static int lfsck_layout_query(const struct lu_env *env,
3541 struct lfsck_component *com)
3543 struct lfsck_layout *lo = com->lc_file_ram;
3545 return lo->ll_status;
3548 static int lfsck_layout_master_stop_notify(const struct lu_env *env,
3549 struct lfsck_component *com,
3550 struct lfsck_tgt_descs *ltds,
3551 struct lfsck_tgt_desc *ltd,
3552 struct ptlrpc_request_set *set)
3554 struct lfsck_thread_info *info = lfsck_env_info(env);
3555 struct lfsck_async_interpret_args *laia = &info->lti_laia;
3556 struct lfsck_request *lr = &info->lti_lr;
3557 struct lfsck_instance *lfsck = com->lc_lfsck;
3560 LASSERT(list_empty(<d->ltd_layout_list));
3561 LASSERT(list_empty(<d->ltd_layout_phase_list));
3563 memset(lr, 0, sizeof(*lr));
3564 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
3565 lr->lr_event = LE_STOP;
3566 lr->lr_active = LT_LAYOUT;
3567 if (ltds == &lfsck->li_ost_descs) {
3568 lr->lr_flags = LEF_TO_OST;
3570 if (ltd->ltd_index == lfsck_dev_idx(lfsck->li_bottom))
3575 lr->lr_status = LS_CO_STOPPED;
3577 laia->laia_com = com;
3578 laia->laia_ltds = ltds;
3579 laia->laia_ltd = ltd;
3582 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
3583 lfsck_layout_master_async_interpret,
3584 laia, LFSCK_NOTIFY);
3586 CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n",
3587 lfsck_lfsck2name(lfsck),
3588 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
3589 ltd->ltd_index, rc);
3594 /* with lfsck::li_lock held */
3595 static int lfsck_layout_slave_join(const struct lu_env *env,
3596 struct lfsck_component *com,
3597 struct lfsck_start_param *lsp)
3599 struct lfsck_instance *lfsck = com->lc_lfsck;
3600 struct lfsck_layout_slave_data *llsd = com->lc_data;
3601 struct lfsck_layout_slave_target *llst;
3602 struct lfsck_start *start = lsp->lsp_start;
3606 if (!lsp->lsp_index_valid || start == NULL ||
3607 !(start->ls_flags & LPF_ALL_MDT))
3610 spin_unlock(&lfsck->li_lock);
3611 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
3612 spin_lock(&lfsck->li_lock);
3613 if (rc == 0 && !thread_is_running(&lfsck->li_thread)) {
3614 spin_unlock(&lfsck->li_lock);
3615 llst = lfsck_layout_llst_find_and_del(llsd, lsp->lsp_index);
3617 lfsck_layout_llst_put(llst);
3618 spin_lock(&lfsck->li_lock);
3625 static struct lfsck_operations lfsck_layout_master_ops = {
3626 .lfsck_reset = lfsck_layout_reset,
3627 .lfsck_fail = lfsck_layout_fail,
3628 .lfsck_checkpoint = lfsck_layout_master_checkpoint,
3629 .lfsck_prep = lfsck_layout_master_prep,
3630 .lfsck_exec_oit = lfsck_layout_master_exec_oit,
3631 .lfsck_exec_dir = lfsck_layout_exec_dir,
3632 .lfsck_post = lfsck_layout_master_post,
3633 .lfsck_dump = lfsck_layout_dump,
3634 .lfsck_double_scan = lfsck_layout_master_double_scan,
3635 .lfsck_data_release = lfsck_layout_master_data_release,
3636 .lfsck_quit = lfsck_layout_master_quit,
3637 .lfsck_in_notify = lfsck_layout_master_in_notify,
3638 .lfsck_query = lfsck_layout_query,
3639 .lfsck_stop_notify = lfsck_layout_master_stop_notify,
3642 static struct lfsck_operations lfsck_layout_slave_ops = {
3643 .lfsck_reset = lfsck_layout_reset,
3644 .lfsck_fail = lfsck_layout_fail,
3645 .lfsck_checkpoint = lfsck_layout_slave_checkpoint,
3646 .lfsck_prep = lfsck_layout_slave_prep,
3647 .lfsck_exec_oit = lfsck_layout_slave_exec_oit,
3648 .lfsck_exec_dir = lfsck_layout_exec_dir,
3649 .lfsck_post = lfsck_layout_slave_post,
3650 .lfsck_dump = lfsck_layout_dump,
3651 .lfsck_double_scan = lfsck_layout_slave_double_scan,
3652 .lfsck_data_release = lfsck_layout_slave_data_release,
3653 .lfsck_in_notify = lfsck_layout_slave_in_notify,
3654 .lfsck_query = lfsck_layout_query,
3655 .lfsck_join = lfsck_layout_slave_join,
3658 int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
3660 struct lfsck_component *com;
3661 struct lfsck_layout *lo;
3662 struct dt_object *root = NULL;
3663 struct dt_object *obj;
3671 INIT_LIST_HEAD(&com->lc_link);
3672 INIT_LIST_HEAD(&com->lc_link_dir);
3673 init_rwsem(&com->lc_sem);
3674 atomic_set(&com->lc_ref, 1);
3675 com->lc_lfsck = lfsck;
3676 com->lc_type = LT_LAYOUT;
3677 if (lfsck->li_master) {
3678 struct lfsck_layout_master_data *llmd;
3680 com->lc_ops = &lfsck_layout_master_ops;
3681 OBD_ALLOC_PTR(llmd);
3683 GOTO(out, rc = -ENOMEM);
3685 INIT_LIST_HEAD(&llmd->llmd_req_list);
3686 spin_lock_init(&llmd->llmd_lock);
3687 INIT_LIST_HEAD(&llmd->llmd_ost_list);
3688 INIT_LIST_HEAD(&llmd->llmd_ost_phase1_list);
3689 INIT_LIST_HEAD(&llmd->llmd_ost_phase2_list);
3690 INIT_LIST_HEAD(&llmd->llmd_mdt_list);
3691 INIT_LIST_HEAD(&llmd->llmd_mdt_phase1_list);
3692 INIT_LIST_HEAD(&llmd->llmd_mdt_phase2_list);
3693 init_waitqueue_head(&llmd->llmd_thread.t_ctl_waitq);
3694 com->lc_data = llmd;
3696 struct lfsck_layout_slave_data *llsd;
3698 com->lc_ops = &lfsck_layout_slave_ops;
3699 OBD_ALLOC_PTR(llsd);
3701 GOTO(out, rc = -ENOMEM);
3703 INIT_LIST_HEAD(&llsd->llsd_seq_list);
3704 INIT_LIST_HEAD(&llsd->llsd_master_list);
3705 spin_lock_init(&llsd->llsd_lock);
3706 com->lc_data = llsd;
3708 com->lc_file_size = sizeof(*lo);
3709 OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
3710 if (com->lc_file_ram == NULL)
3711 GOTO(out, rc = -ENOMEM);
3713 OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
3714 if (com->lc_file_disk == NULL)
3715 GOTO(out, rc = -ENOMEM);
3717 root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
3719 GOTO(out, rc = PTR_ERR(root));
3721 if (unlikely(!dt_try_as_dir(env, root)))
3722 GOTO(out, rc = -ENOTDIR);
3724 obj = local_file_find_or_create(env, lfsck->li_los, root,
3726 S_IFREG | S_IRUGO | S_IWUSR);
3728 GOTO(out, rc = PTR_ERR(obj));
3731 rc = lfsck_layout_load(env, com);
3733 rc = lfsck_layout_reset(env, com, true);
3734 else if (rc == -ENOENT)
3735 rc = lfsck_layout_init(env, com);
3740 lo = com->lc_file_ram;
3741 switch (lo->ll_status) {
3747 spin_lock(&lfsck->li_lock);
3748 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3749 spin_unlock(&lfsck->li_lock);
3752 CERROR("%s: unknown lfsck_layout status: rc = %u\n",
3753 lfsck_lfsck2name(lfsck), lo->ll_status);
3755 case LS_SCANNING_PHASE1:
3756 case LS_SCANNING_PHASE2:
3757 /* No need to store the status to disk right now.
3758 * If the system crashed before the status stored,
3759 * it will be loaded back when next time. */
3760 lo->ll_status = LS_CRASHED;
3761 lo->ll_flags |= LF_INCOMPLETE;
3768 spin_lock(&lfsck->li_lock);
3769 list_add_tail(&com->lc_link, &lfsck->li_list_scan);
3770 spin_unlock(&lfsck->li_lock);
3774 if (lo->ll_flags & LF_CRASHED_LASTID) {
3775 LASSERT(lfsck->li_out_notify != NULL);
3777 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3778 LE_LASTID_REBUILDING);
3784 if (root != NULL && !IS_ERR(root))
3785 lu_object_put(env, &root->do_lu);
3788 lfsck_component_cleanup(env, com);