4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2013, Intel Corporation.
26 * lustre/lfsck/lfsck_layout.c
28 * Author: Fan, Yong <fan.yong@intel.com>
32 # define EXPORT_SYMTAB
34 #define DEBUG_SUBSYSTEM S_LFSCK
36 #include <linux/bitops.h>
38 #include <lustre/lustre_idl.h>
39 #include <lu_object.h>
40 #include <dt_object.h>
41 #include <lustre_linkea.h>
42 #include <lustre_fid.h>
43 #include <lustre_lib.h>
44 #include <lustre_net.h>
45 #include <lustre/lustre_user.h>
46 #include <md_object.h>
47 #include <obd_class.h>
49 #include "lfsck_internal.h"
51 #define LFSCK_LAYOUT_MAGIC 0xB173AE14
53 static const char lfsck_layout_name[] = "lfsck_layout";
55 struct lfsck_layout_seq {
56 struct list_head lls_list;
59 __u64 lls_lastid_known;
60 struct dt_object *lls_lastid_obj;
61 unsigned int lls_dirty:1;
64 struct lfsck_layout_slave_target {
65 /* link into lfsck_layout_slave_data::llsd_master_list. */
66 struct list_head llst_list;
72 struct lfsck_layout_slave_data {
73 /* list for lfsck_layout_seq */
74 struct list_head llsd_seq_list;
76 /* list for the masters involve layout verification. */
77 struct list_head llsd_master_list;
82 struct lfsck_layout_object {
83 struct dt_object *llo_obj;
84 struct lu_attr llo_attr;
89 struct lfsck_layout_req {
90 struct list_head llr_list;
91 struct lfsck_layout_object *llr_parent;
92 struct dt_object *llr_child;
94 __u32 llr_lov_idx; /* offset in LOV EA */
97 struct lfsck_layout_master_data {
99 struct list_head llmd_req_list;
101 /* list for the ost targets involve layout verification. */
102 struct list_head llmd_ost_list;
104 /* list for the ost targets in phase1 scanning. */
105 struct list_head llmd_ost_phase1_list;
107 /* list for the ost targets in phase1 scanning. */
108 struct list_head llmd_ost_phase2_list;
110 /* list for the mdt targets involve layout verification. */
111 struct list_head llmd_mdt_list;
113 /* list for the mdt targets in phase1 scanning. */
114 struct list_head llmd_mdt_phase1_list;
116 /* list for the mdt targets in phase1 scanning. */
117 struct list_head llmd_mdt_phase2_list;
119 struct ptlrpc_thread llmd_thread;
120 __u32 llmd_touch_gen;
122 int llmd_assistant_status;
123 int llmd_post_result;
124 unsigned int llmd_to_post:1,
125 llmd_to_double_scan:1,
126 llmd_in_double_scan:1,
130 struct lfsck_layout_slave_async_args {
131 struct obd_export *llsaa_exp;
132 struct lfsck_component *llsaa_com;
133 struct lfsck_layout_slave_target *llsaa_llst;
136 static struct lfsck_layout_object *
137 lfsck_layout_object_init(const struct lu_env *env, struct dt_object *obj,
140 struct lfsck_layout_object *llo;
145 return ERR_PTR(-ENOMEM);
147 rc = dt_attr_get(env, obj, &llo->llo_attr, BYPASS_CAPA);
154 lu_object_get(&obj->do_lu);
156 /* The gen can be used to check whether some others have changed the
157 * file layout after LFSCK pre-fetching but before real verification. */
159 atomic_set(&llo->llo_ref, 1);
165 lfsck_layout_llst_put(struct lfsck_layout_slave_target *llst)
167 if (atomic_dec_and_test(&llst->llst_ref)) {
168 LASSERT(list_empty(&llst->llst_list));
175 lfsck_layout_llst_add(struct lfsck_layout_slave_data *llsd, __u32 index)
177 struct lfsck_layout_slave_target *llst;
178 struct lfsck_layout_slave_target *tmp;
185 INIT_LIST_HEAD(&llst->llst_list);
187 llst->llst_index = index;
188 atomic_set(&llst->llst_ref, 1);
190 spin_lock(&llsd->llsd_lock);
191 list_for_each_entry(tmp, &llsd->llsd_master_list, llst_list) {
192 if (tmp->llst_index == index) {
198 list_add_tail(&llst->llst_list, &llsd->llsd_master_list);
199 spin_unlock(&llsd->llsd_lock);
208 lfsck_layout_llst_del(struct lfsck_layout_slave_data *llsd,
209 struct lfsck_layout_slave_target *llst)
213 spin_lock(&llsd->llsd_lock);
214 if (!list_empty(&llst->llst_list)) {
215 list_del_init(&llst->llst_list);
218 spin_unlock(&llsd->llsd_lock);
221 lfsck_layout_llst_put(llst);
224 static inline struct lfsck_layout_slave_target *
225 lfsck_layout_llst_find_and_del(struct lfsck_layout_slave_data *llsd,
228 struct lfsck_layout_slave_target *llst;
230 spin_lock(&llsd->llsd_lock);
231 list_for_each_entry(llst, &llsd->llsd_master_list, llst_list) {
232 if (llst->llst_index == index) {
233 list_del_init(&llst->llst_list);
234 spin_unlock(&llsd->llsd_lock);
239 spin_unlock(&llsd->llsd_lock);
244 static inline void lfsck_layout_object_put(const struct lu_env *env,
245 struct lfsck_layout_object *llo)
247 if (atomic_dec_and_test(&llo->llo_ref)) {
248 lfsck_object_put(env, llo->llo_obj);
253 static struct lfsck_layout_req *
254 lfsck_layout_req_init(struct lfsck_layout_object *parent,
255 struct dt_object *child, __u32 ost_idx, __u32 lov_idx)
257 struct lfsck_layout_req *llr;
261 return ERR_PTR(-ENOMEM);
263 INIT_LIST_HEAD(&llr->llr_list);
264 atomic_inc(&parent->llo_ref);
265 llr->llr_parent = parent;
266 llr->llr_child = child;
267 llr->llr_ost_idx = ost_idx;
268 llr->llr_lov_idx = lov_idx;
273 static inline void lfsck_layout_req_fini(const struct lu_env *env,
274 struct lfsck_layout_req *llr)
276 lu_object_put(env, &llr->llr_child->do_lu);
277 lfsck_layout_object_put(env, llr->llr_parent);
281 static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd)
285 spin_lock(&llmd->llmd_lock);
286 if (list_empty(&llmd->llmd_req_list))
288 spin_unlock(&llmd->llmd_lock);
293 static int lfsck_layout_get_lovea(const struct lu_env *env,
294 struct dt_object *obj,
295 struct lu_buf *buf, ssize_t *buflen)
300 rc = dt_xattr_get(env, obj, buf, XATTR_NAME_LOV, BYPASS_CAPA);
302 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
307 lu_buf_realloc(buf, rc);
309 *buflen = buf->lb_len;
311 if (buf->lb_buf == NULL)
323 if (unlikely(buf->lb_buf == NULL)) {
324 lu_buf_alloc(buf, rc);
326 *buflen = buf->lb_len;
328 if (buf->lb_buf == NULL)
337 static int lfsck_layout_verify_header(struct lov_mds_md_v1 *lmm)
342 magic = le32_to_cpu(lmm->lmm_magic);
343 /* If magic crashed, keep it there. Sometime later, during OST-object
344 * orphan handling, if some OST-object(s) back-point to it, it can be
345 * verified and repaired. */
346 if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
349 patten = le32_to_cpu(lmm->lmm_pattern);
350 /* XXX: currently, we only support LOV_PATTERN_RAID0. */
351 if (patten != LOV_PATTERN_RAID0)
357 static void lfsck_layout_le_to_cpu(struct lfsck_layout *des,
358 const struct lfsck_layout *src)
362 des->ll_magic = le32_to_cpu(src->ll_magic);
363 des->ll_status = le32_to_cpu(src->ll_status);
364 des->ll_flags = le32_to_cpu(src->ll_flags);
365 des->ll_success_count = le32_to_cpu(src->ll_success_count);
366 des->ll_run_time_phase1 = le32_to_cpu(src->ll_run_time_phase1);
367 des->ll_run_time_phase2 = le32_to_cpu(src->ll_run_time_phase2);
368 des->ll_time_last_complete = le64_to_cpu(src->ll_time_last_complete);
369 des->ll_time_latest_start = le64_to_cpu(src->ll_time_latest_start);
370 des->ll_time_last_checkpoint =
371 le64_to_cpu(src->ll_time_last_checkpoint);
372 des->ll_pos_latest_start = le64_to_cpu(src->ll_pos_latest_start);
373 des->ll_pos_last_checkpoint = le64_to_cpu(src->ll_pos_last_checkpoint);
374 des->ll_pos_first_inconsistent =
375 le64_to_cpu(src->ll_pos_first_inconsistent);
376 des->ll_objs_checked_phase1 = le64_to_cpu(src->ll_objs_checked_phase1);
377 des->ll_objs_failed_phase1 = le64_to_cpu(src->ll_objs_failed_phase1);
378 des->ll_objs_checked_phase2 = le64_to_cpu(src->ll_objs_checked_phase2);
379 des->ll_objs_failed_phase2 = le64_to_cpu(src->ll_objs_failed_phase2);
380 for (i = 0; i < LLIT_MAX; i++)
381 des->ll_objs_repaired[i] =
382 le64_to_cpu(src->ll_objs_repaired[i]);
383 des->ll_objs_skipped = le64_to_cpu(src->ll_objs_skipped);
386 static void lfsck_layout_cpu_to_le(struct lfsck_layout *des,
387 const struct lfsck_layout *src)
391 des->ll_magic = cpu_to_le32(src->ll_magic);
392 des->ll_status = cpu_to_le32(src->ll_status);
393 des->ll_flags = cpu_to_le32(src->ll_flags);
394 des->ll_success_count = cpu_to_le32(src->ll_success_count);
395 des->ll_run_time_phase1 = cpu_to_le32(src->ll_run_time_phase1);
396 des->ll_run_time_phase2 = cpu_to_le32(src->ll_run_time_phase2);
397 des->ll_time_last_complete = cpu_to_le64(src->ll_time_last_complete);
398 des->ll_time_latest_start = cpu_to_le64(src->ll_time_latest_start);
399 des->ll_time_last_checkpoint =
400 cpu_to_le64(src->ll_time_last_checkpoint);
401 des->ll_pos_latest_start = cpu_to_le64(src->ll_pos_latest_start);
402 des->ll_pos_last_checkpoint = cpu_to_le64(src->ll_pos_last_checkpoint);
403 des->ll_pos_first_inconsistent =
404 cpu_to_le64(src->ll_pos_first_inconsistent);
405 des->ll_objs_checked_phase1 = cpu_to_le64(src->ll_objs_checked_phase1);
406 des->ll_objs_failed_phase1 = cpu_to_le64(src->ll_objs_failed_phase1);
407 des->ll_objs_checked_phase2 = cpu_to_le64(src->ll_objs_checked_phase2);
408 des->ll_objs_failed_phase2 = cpu_to_le64(src->ll_objs_failed_phase2);
409 for (i = 0; i < LLIT_MAX; i++)
410 des->ll_objs_repaired[i] =
411 cpu_to_le64(src->ll_objs_repaired[i]);
412 des->ll_objs_skipped = cpu_to_le64(src->ll_objs_skipped);
416 * \retval +ve: the lfsck_layout is broken, the caller should reset it.
417 * \retval 0: succeed.
418 * \retval -ve: failed cases.
420 static int lfsck_layout_load(const struct lu_env *env,
421 struct lfsck_component *com)
423 struct lfsck_layout *lo = com->lc_file_ram;
424 const struct dt_body_operations *dbo = com->lc_obj->do_body_ops;
425 ssize_t size = com->lc_file_size;
429 rc = dbo->dbo_read(env, com->lc_obj,
430 lfsck_buf_get(env, com->lc_file_disk, size), &pos,
435 CWARN("%s: failed to load lfsck_layout: rc = %d\n",
436 lfsck_lfsck2name(com->lc_lfsck), rc);
438 } else if (rc != size) {
439 CWARN("%s: crashed lfsck_layout, to be reset: rc = %d\n",
440 lfsck_lfsck2name(com->lc_lfsck), rc);
444 lfsck_layout_le_to_cpu(lo, com->lc_file_disk);
445 if (lo->ll_magic != LFSCK_LAYOUT_MAGIC) {
446 CWARN("%s: invalid lfsck_layout magic %#x != %#x, "
447 "to be reset\n", lfsck_lfsck2name(com->lc_lfsck),
448 lo->ll_magic, LFSCK_LAYOUT_MAGIC);
455 static int lfsck_layout_store(const struct lu_env *env,
456 struct lfsck_component *com)
458 struct dt_object *obj = com->lc_obj;
459 struct lfsck_instance *lfsck = com->lc_lfsck;
460 struct lfsck_layout *lo = com->lc_file_disk;
461 struct thandle *handle;
462 ssize_t size = com->lc_file_size;
467 lfsck_layout_cpu_to_le(lo, com->lc_file_ram);
468 handle = dt_trans_create(env, lfsck->li_bottom);
469 if (IS_ERR(handle)) {
470 rc = PTR_ERR(handle);
471 CERROR("%s: fail to create trans for storing lfsck_layout: "
472 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
476 rc = dt_declare_record_write(env, obj, size, pos, handle);
478 CERROR("%s: fail to declare trans for storing lfsck_layout(1): "
479 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
483 rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
485 CERROR("%s: fail to start trans for storing lfsck_layout: "
486 "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
490 rc = dt_record_write(env, obj, lfsck_buf_get(env, lo, size), &pos,
493 CERROR("%s: fail to store lfsck_layout(1): size = %d, "
494 "rc = %d\n", lfsck_lfsck2name(lfsck), (int)size, rc);
499 dt_trans_stop(env, lfsck->li_bottom, handle);
504 static int lfsck_layout_init(const struct lu_env *env,
505 struct lfsck_component *com)
507 struct lfsck_layout *lo = com->lc_file_ram;
510 memset(lo, 0, com->lc_file_size);
511 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
512 lo->ll_status = LS_INIT;
513 down_write(&com->lc_sem);
514 rc = lfsck_layout_store(env, com);
515 up_write(&com->lc_sem);
520 static int fid_is_for_ostobj(const struct lu_env *env, struct dt_device *dt,
521 struct dt_object *obj, const struct lu_fid *fid)
523 struct seq_server_site *ss = lu_site2seq(dt->dd_lu_dev.ld_site);
524 struct lu_seq_range range = { 0 };
525 struct lustre_mdt_attrs *lma;
528 fld_range_set_any(&range);
529 rc = fld_server_lookup(env, ss->ss_server_fld, fid_seq(fid), &range);
531 if (fld_range_is_ost(&range))
537 lma = &lfsck_env_info(env)->lti_lma;
538 rc = dt_xattr_get(env, obj, lfsck_buf_get(env, lma, sizeof(*lma)),
539 XATTR_NAME_LMA, BYPASS_CAPA);
540 if (rc == sizeof(*lma)) {
541 lustre_lma_swab(lma);
543 return lma->lma_compat & LMAC_FID_ON_OST ? 1 : 0;
546 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_FID, BYPASS_CAPA);
551 static struct lfsck_layout_seq *
552 lfsck_layout_seq_lookup(struct lfsck_layout_slave_data *llsd, __u64 seq)
554 struct lfsck_layout_seq *lls;
556 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
557 if (lls->lls_seq == seq)
560 if (lls->lls_seq > seq)
568 lfsck_layout_seq_insert(struct lfsck_layout_slave_data *llsd,
569 struct lfsck_layout_seq *lls)
571 struct lfsck_layout_seq *tmp;
572 struct list_head *pos = &llsd->llsd_seq_list;
574 list_for_each_entry(tmp, &llsd->llsd_seq_list, lls_list) {
575 if (lls->lls_seq < tmp->lls_seq) {
576 pos = &tmp->lls_list;
580 list_add_tail(&lls->lls_list, pos);
584 lfsck_layout_lastid_create(const struct lu_env *env,
585 struct lfsck_instance *lfsck,
586 struct dt_object *obj)
588 struct lfsck_thread_info *info = lfsck_env_info(env);
589 struct lu_attr *la = &info->lti_la;
590 struct dt_object_format *dof = &info->lti_dof;
591 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
592 struct dt_device *dt = lfsck->li_bottom;
599 CDEBUG(D_LFSCK, "To create LAST_ID for <seq> "LPX64"\n",
600 fid_seq(lfsck_dto2fid(obj)));
602 if (bk->lb_param & LPF_DRYRUN)
605 memset(la, 0, sizeof(*la));
606 la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
607 la->la_valid = LA_MODE | LA_UID | LA_GID;
608 dof->dof_type = dt_mode_to_dft(S_IFREG);
610 th = dt_trans_create(env, dt);
612 RETURN(rc = PTR_ERR(th));
614 rc = dt_declare_create(env, obj, la, NULL, dof, th);
618 rc = dt_declare_record_write(env, obj, sizeof(lastid), pos, th);
622 rc = dt_trans_start_local(env, dt, th);
626 dt_write_lock(env, obj, 0);
627 if (likely(!dt_object_exists(obj))) {
628 rc = dt_create(env, obj, la, NULL, dof, th);
630 rc = dt_record_write(env, obj,
631 lfsck_buf_get(env, &lastid, sizeof(lastid)),
634 dt_write_unlock(env, obj);
639 dt_trans_stop(env, dt, th);
645 lfsck_layout_lastid_reload(const struct lu_env *env,
646 struct lfsck_component *com,
647 struct lfsck_layout_seq *lls)
653 dt_read_lock(env, lls->lls_lastid_obj, 0);
654 rc = dt_record_read(env, lls->lls_lastid_obj,
655 lfsck_buf_get(env, &lastid, sizeof(lastid)), &pos);
656 dt_read_unlock(env, lls->lls_lastid_obj);
657 if (unlikely(rc != 0))
660 lastid = le64_to_cpu(lastid);
661 if (lastid < lls->lls_lastid_known) {
662 struct lfsck_instance *lfsck = com->lc_lfsck;
663 struct lfsck_layout *lo = com->lc_file_ram;
665 lls->lls_lastid = lls->lls_lastid_known;
667 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
668 LASSERT(lfsck->li_out_notify != NULL);
670 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
671 LE_LASTID_REBUILDING);
672 lo->ll_flags |= LF_CRASHED_LASTID;
674 } else if (lastid >= lls->lls_lastid) {
675 lls->lls_lastid = lastid;
683 lfsck_layout_lastid_store(const struct lu_env *env,
684 struct lfsck_component *com)
686 struct lfsck_instance *lfsck = com->lc_lfsck;
687 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
688 struct dt_device *dt = lfsck->li_bottom;
689 struct lfsck_layout_slave_data *llsd = com->lc_data;
690 struct lfsck_layout_seq *lls;
696 list_for_each_entry(lls, &llsd->llsd_seq_list, lls_list) {
699 /* XXX: Add the code back if we really found related
700 * inconsistent cases in the future. */
702 if (!lls->lls_dirty) {
703 /* In OFD, before the pre-creation, the LAST_ID
704 * file will be updated firstly, which may hide
705 * some potential crashed cases. For example:
707 * The old obj1's ID is higher than old LAST_ID
708 * but lower than the new LAST_ID, but the LFSCK
709 * have not touch the obj1 until the OFD updated
710 * the LAST_ID. So the LFSCK does not regard it
711 * as crashed case. But when OFD does not create
712 * successfully, it will set the LAST_ID as the
713 * real created objects' ID, then LFSCK needs to
714 * found related inconsistency. */
715 rc = lfsck_layout_lastid_reload(env, com, lls);
716 if (likely(!lls->lls_dirty))
721 CDEBUG(D_LFSCK, "To sync the LAST_ID for <seq> "LPX64
722 " as <oid> "LPU64"\n", lls->lls_seq, lls->lls_lastid);
724 if (bk->lb_param & LPF_DRYRUN) {
729 th = dt_trans_create(env, dt);
732 CERROR("%s: (1) failed to store "LPX64": rc = %d\n",
733 lfsck_lfsck2name(com->lc_lfsck),
738 rc = dt_declare_record_write(env, lls->lls_lastid_obj,
739 sizeof(lastid), pos, th);
743 rc = dt_trans_start_local(env, dt, th);
747 lastid = cpu_to_le64(lls->lls_lastid);
748 dt_write_lock(env, lls->lls_lastid_obj, 0);
749 rc = dt_record_write(env, lls->lls_lastid_obj,
750 lfsck_buf_get(env, &lastid,
751 sizeof(lastid)), &pos, th);
752 dt_write_unlock(env, lls->lls_lastid_obj);
757 dt_trans_stop(env, dt, th);
760 CERROR("%s: (2) failed to store "LPX64": rc = %d\n",
761 lfsck_lfsck2name(com->lc_lfsck),
770 lfsck_layout_lastid_load(const struct lu_env *env,
771 struct lfsck_component *com,
772 struct lfsck_layout_seq *lls)
774 struct lfsck_instance *lfsck = com->lc_lfsck;
775 struct lfsck_layout *lo = com->lc_file_ram;
776 struct lu_fid *fid = &lfsck_env_info(env)->lti_fid;
777 struct dt_object *obj;
782 lu_last_id_fid(fid, lls->lls_seq, lfsck_dev_idx(lfsck->li_bottom));
783 obj = dt_locate(env, lfsck->li_bottom, fid);
785 RETURN(PTR_ERR(obj));
787 /* LAST_ID crashed, to be rebuilt */
788 if (!dt_object_exists(obj)) {
789 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
790 LASSERT(lfsck->li_out_notify != NULL);
792 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
793 LE_LASTID_REBUILDING);
794 lo->ll_flags |= LF_CRASHED_LASTID;
796 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY4) &&
798 struct l_wait_info lwi = LWI_TIMEOUT(
799 cfs_time_seconds(cfs_fail_val),
802 up_write(&com->lc_sem);
803 l_wait_event(lfsck->li_thread.t_ctl_waitq,
804 !thread_is_running(&lfsck->li_thread),
806 down_write(&com->lc_sem);
810 rc = lfsck_layout_lastid_create(env, lfsck, obj);
812 dt_read_lock(env, obj, 0);
813 rc = dt_read(env, obj,
814 lfsck_buf_get(env, &lls->lls_lastid, sizeof(__u64)),
816 dt_read_unlock(env, obj);
817 if (rc != 0 && rc != sizeof(__u64))
818 GOTO(out, rc = (rc > 0 ? -EFAULT : rc));
820 if (rc == 0 && !(lo->ll_flags & LF_CRASHED_LASTID)) {
821 LASSERT(lfsck->li_out_notify != NULL);
823 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
824 LE_LASTID_REBUILDING);
825 lo->ll_flags |= LF_CRASHED_LASTID;
828 lls->lls_lastid = le64_to_cpu(lls->lls_lastid);
836 lfsck_object_put(env, obj);
838 lls->lls_lastid_obj = obj;
843 static int lfsck_layout_master_async_interpret(const struct lu_env *env,
844 struct ptlrpc_request *req,
847 struct lfsck_async_interpret_args *laia = args;
848 struct lfsck_component *com = laia->laia_com;
849 struct lfsck_layout_master_data *llmd = com->lc_data;
850 struct lfsck_tgt_descs *ltds = laia->laia_ltds;
851 struct lfsck_tgt_desc *ltd = laia->laia_ltd;
852 struct lfsck_request *lr = laia->laia_lr;
854 switch (lr->lr_event) {
857 struct lfsck_layout *lo = com->lc_file_ram;
859 lo->ll_flags |= LF_INCOMPLETE;
864 spin_lock(<ds->ltd_lock);
865 if (ltd->ltd_dead || ltd->ltd_layout_done) {
866 spin_unlock(<ds->ltd_lock);
871 if (lr->lr_flags & LEF_TO_OST) {
872 if (list_empty(<d->ltd_layout_list))
873 list_add_tail(<d->ltd_layout_list,
874 &llmd->llmd_ost_list);
875 if (list_empty(<d->ltd_layout_phase_list))
876 list_add_tail(<d->ltd_layout_phase_list,
877 &llmd->llmd_ost_phase1_list);
879 if (list_empty(<d->ltd_layout_list))
880 list_add_tail(<d->ltd_layout_list,
881 &llmd->llmd_mdt_list);
882 if (list_empty(<d->ltd_layout_phase_list))
883 list_add_tail(<d->ltd_layout_phase_list,
884 &llmd->llmd_mdt_phase1_list);
886 spin_unlock(<ds->ltd_lock);
893 CERROR("%s: fail to notify %s %x for layout: "
894 "event = %d, rc = %d\n",
895 lfsck_lfsck2name(com->lc_lfsck),
896 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
897 ltd->ltd_index, lr->lr_event, rc);
900 struct lfsck_reply *reply;
903 spin_lock(<ds->ltd_lock);
904 list_del_init(<d->ltd_layout_phase_list);
905 list_del_init(<d->ltd_layout_list);
906 spin_unlock(<ds->ltd_lock);
911 reply = req_capsule_server_get(&req->rq_pill,
915 CERROR("%s: invalid return value: rc = %d\n",
916 lfsck_lfsck2name(com->lc_lfsck), rc);
917 spin_lock(<ds->ltd_lock);
918 list_del_init(<d->ltd_layout_phase_list);
919 list_del_init(<d->ltd_layout_list);
920 spin_unlock(<ds->ltd_lock);
925 switch (reply->lr_status) {
926 case LS_SCANNING_PHASE1:
928 case LS_SCANNING_PHASE2:
929 spin_lock(<ds->ltd_lock);
930 list_del_init(<d->ltd_layout_phase_list);
931 if (ltd->ltd_dead || ltd->ltd_layout_done) {
932 spin_unlock(<ds->ltd_lock);
936 if (lr->lr_flags & LEF_TO_OST)
937 list_add_tail(<d->ltd_layout_phase_list,
938 &llmd->llmd_ost_phase2_list);
940 list_add_tail(<d->ltd_layout_phase_list,
941 &llmd->llmd_mdt_phase2_list);
942 spin_unlock(<ds->ltd_lock);
945 spin_lock(<ds->ltd_lock);
946 list_del_init(<d->ltd_layout_phase_list);
947 list_del_init(<d->ltd_layout_list);
948 spin_unlock(<ds->ltd_lock);
955 CERROR("%s: unexpected event: rc = %d\n",
956 lfsck_lfsck2name(com->lc_lfsck), lr->lr_event);
960 lfsck_component_put(env, com);
965 static int lfsck_layout_master_query_others(const struct lu_env *env,
966 struct lfsck_component *com)
968 struct lfsck_thread_info *info = lfsck_env_info(env);
969 struct lfsck_request *lr = &info->lti_lr;
970 struct lfsck_async_interpret_args *laia = &info->lti_laia;
971 struct lfsck_instance *lfsck = com->lc_lfsck;
972 struct lfsck_layout_master_data *llmd = com->lc_data;
973 struct ptlrpc_request_set *set;
974 struct lfsck_tgt_descs *ltds;
975 struct lfsck_tgt_desc *ltd;
976 struct list_head *head;
982 set = ptlrpc_prep_set();
986 llmd->llmd_touch_gen++;
987 memset(lr, 0, sizeof(*lr));
988 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
989 lr->lr_event = LE_QUERY;
990 lr->lr_active = LT_LAYOUT;
991 laia->laia_com = com;
994 if (!list_empty(&llmd->llmd_mdt_phase1_list)) {
995 ltds = &lfsck->li_mdt_descs;
997 head = &llmd->llmd_mdt_phase1_list;
1001 ltds = &lfsck->li_ost_descs;
1002 lr->lr_flags = LEF_TO_OST;
1003 head = &llmd->llmd_ost_phase1_list;
1006 laia->laia_ltds = ltds;
1007 spin_lock(<ds->ltd_lock);
1008 while (!list_empty(head)) {
1009 ltd = list_entry(head->next,
1010 struct lfsck_tgt_desc,
1011 ltd_layout_phase_list);
1012 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1015 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1016 list_del(<d->ltd_layout_phase_list);
1017 list_add_tail(<d->ltd_layout_phase_list, head);
1018 atomic_inc(<d->ltd_ref);
1019 laia->laia_ltd = ltd;
1020 spin_unlock(<ds->ltd_lock);
1021 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1022 lfsck_layout_master_async_interpret,
1025 CERROR("%s: fail to query %s %x for layout: rc = %d\n",
1026 lfsck_lfsck2name(lfsck),
1027 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
1028 ltd->ltd_index, rc);
1034 spin_lock(<ds->ltd_lock);
1036 spin_unlock(<ds->ltd_lock);
1039 rc = ptlrpc_set_wait(set);
1041 ptlrpc_set_destroy(set);
1047 if (!(lr->lr_flags & LEF_TO_OST) &&
1048 list_empty(&llmd->llmd_mdt_phase1_list))
1051 ptlrpc_set_destroy(set);
1053 RETURN(rc1 != 0 ? rc1 : rc);
1057 lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd)
1059 return list_empty(&llmd->llmd_mdt_phase1_list) &&
1060 (!list_empty(&llmd->llmd_ost_phase2_list) ||
1061 list_empty(&llmd->llmd_ost_phase1_list));
1064 static int lfsck_layout_master_notify_others(const struct lu_env *env,
1065 struct lfsck_component *com,
1066 struct lfsck_request *lr,
1069 struct lfsck_thread_info *info = lfsck_env_info(env);
1070 struct lfsck_async_interpret_args *laia = &info->lti_laia;
1071 struct lfsck_instance *lfsck = com->lc_lfsck;
1072 struct lfsck_layout_master_data *llmd = com->lc_data;
1073 struct lfsck_layout *lo = com->lc_file_ram;
1074 struct ptlrpc_request_set *set;
1075 struct lfsck_tgt_descs *ltds;
1076 struct lfsck_tgt_desc *ltd;
1077 struct lfsck_tgt_desc *next;
1078 struct list_head *head;
1084 set = ptlrpc_prep_set();
1088 lr->lr_active = LT_LAYOUT;
1089 laia->laia_com = com;
1092 switch (lr->lr_event) {
1094 /* Notify OSTs firstly, then other MDTs if needed. */
1095 lr->lr_flags |= LEF_TO_OST;
1096 ltds = &lfsck->li_ost_descs;
1099 laia->laia_ltds = ltds;
1100 down_read(<ds->ltd_rw_sem);
1101 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1102 ltd = lfsck_tgt_get(ltds, idx);
1103 LASSERT(ltd != NULL);
1105 laia->laia_ltd = ltd;
1106 ltd->ltd_layout_done = 0;
1107 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1108 lfsck_layout_master_async_interpret,
1109 laia, LFSCK_NOTIFY);
1111 CERROR("%s: fail to notify %s %x for layout "
1113 lfsck_lfsck2name(lfsck),
1114 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1117 lo->ll_flags |= LF_INCOMPLETE;
1122 up_read(<ds->ltd_rw_sem);
1126 rc = ptlrpc_set_wait(set);
1128 ptlrpc_set_destroy(set);
1134 if (!(flags & LPF_ALL_MDT))
1137 ltds = &lfsck->li_mdt_descs;
1138 /* The sponsor broadcasts the request to other MDTs. */
1139 if (flags & LPF_BROADCAST) {
1140 flags &= ~LPF_ALL_MDT;
1141 lr->lr_flags &= ~LEF_TO_OST;
1145 /* non-sponsors link other MDT targets locallly. */
1146 spin_lock(<ds->ltd_lock);
1147 cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1148 ltd = LTD_TGT(ltds, idx);
1149 LASSERT(ltd != NULL);
1151 if (!list_empty(<d->ltd_layout_list))
1154 list_add_tail(<d->ltd_layout_list,
1155 &llmd->llmd_mdt_list);
1156 list_add_tail(<d->ltd_layout_phase_list,
1157 &llmd->llmd_mdt_phase1_list);
1159 spin_unlock(<ds->ltd_lock);
1163 if (flags & LPF_BROADCAST)
1164 lr->lr_flags |= LEF_FORCE_STOP;
1165 case LE_PHASE2_DONE:
1166 /* Notify other MDTs if needed, then the OSTs. */
1167 if (flags & LPF_ALL_MDT) {
1168 /* The sponsor broadcasts the request to other MDTs. */
1169 if (flags & LPF_BROADCAST) {
1170 lr->lr_flags &= ~LEF_TO_OST;
1171 head = &llmd->llmd_mdt_list;
1172 ltds = &lfsck->li_mdt_descs;
1176 /* non-sponsors unlink other MDT targets locallly. */
1177 ltds = &lfsck->li_mdt_descs;
1178 spin_lock(<ds->ltd_lock);
1179 list_for_each_entry_safe(ltd, next,
1180 &llmd->llmd_mdt_list,
1182 list_del_init(<d->ltd_layout_phase_list);
1183 list_del_init(<d->ltd_layout_list);
1185 spin_unlock(<ds->ltd_lock);
1189 lr->lr_flags |= LEF_TO_OST;
1190 head = &llmd->llmd_ost_list;
1191 ltds = &lfsck->li_ost_descs;
1194 laia->laia_ltds = ltds;
1195 spin_lock(<ds->ltd_lock);
1196 while (!list_empty(head)) {
1197 ltd = list_entry(head->next, struct lfsck_tgt_desc,
1199 if (!list_empty(<d->ltd_layout_phase_list))
1200 list_del_init(<d->ltd_layout_phase_list);
1201 list_del_init(<d->ltd_layout_list);
1202 laia->laia_ltd = ltd;
1203 spin_unlock(<ds->ltd_lock);
1204 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1205 lfsck_layout_master_async_interpret,
1206 laia, LFSCK_NOTIFY);
1208 CERROR("%s: fail to notify %s %x for layout "
1209 "stop/phase2: rc = %d\n",
1210 lfsck_lfsck2name(lfsck),
1211 (lr->lr_flags & LEF_TO_OST) ? "OST" :
1212 "MDT", ltd->ltd_index, rc);
1215 spin_lock(<ds->ltd_lock);
1217 spin_unlock(<ds->ltd_lock);
1219 if (!(flags & LPF_BROADCAST))
1224 rc = ptlrpc_set_wait(set);
1226 ptlrpc_set_destroy(set);
1232 flags &= ~LPF_BROADCAST;
1234 case LE_PHASE1_DONE:
1235 llmd->llmd_touch_gen++;
1236 lr->lr_flags &= ~LEF_TO_OST;
1237 ltds = &lfsck->li_mdt_descs;
1238 laia->laia_ltds = ltds;
1239 spin_lock(<ds->ltd_lock);
1240 while (!list_empty(&llmd->llmd_mdt_phase1_list)) {
1241 ltd = list_entry(llmd->llmd_mdt_phase1_list.next,
1242 struct lfsck_tgt_desc,
1243 ltd_layout_phase_list);
1244 if (ltd->ltd_layout_gen == llmd->llmd_touch_gen)
1247 ltd->ltd_layout_gen = llmd->llmd_touch_gen;
1248 list_del_init(<d->ltd_layout_phase_list);
1249 list_add_tail(<d->ltd_layout_phase_list,
1250 &llmd->llmd_mdt_phase1_list);
1251 laia->laia_ltd = ltd;
1252 spin_unlock(<ds->ltd_lock);
1253 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1254 lfsck_layout_master_async_interpret,
1255 laia, LFSCK_NOTIFY);
1257 CERROR("%s: fail to notify MDT %x for layout "
1258 "phase1 done: rc = %d\n",
1259 lfsck_lfsck2name(lfsck),
1260 ltd->ltd_index, rc);
1263 spin_lock(<ds->ltd_lock);
1265 spin_unlock(<ds->ltd_lock);
1268 CERROR("%s: unexpected LFSCK event: rc = %d\n",
1269 lfsck_lfsck2name(lfsck), lr->lr_event);
1275 rc = ptlrpc_set_wait(set);
1276 ptlrpc_set_destroy(set);
1278 if (rc == 0 && lr->lr_event == LE_START &&
1279 list_empty(&llmd->llmd_ost_list))
1285 static int lfsck_layout_double_scan_result(const struct lu_env *env,
1286 struct lfsck_component *com,
1289 struct lfsck_instance *lfsck = com->lc_lfsck;
1290 struct lfsck_layout *lo = com->lc_file_ram;
1291 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1293 down_write(&com->lc_sem);
1295 lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1296 HALF_SEC - lfsck->li_time_last_checkpoint);
1297 lo->ll_time_last_checkpoint = cfs_time_current_sec();
1298 lo->ll_objs_checked_phase2 += com->lc_new_checked;
1301 com->lc_journal = 0;
1302 if (lo->ll_flags & LF_INCOMPLETE)
1303 lo->ll_status = LS_PARTIAL;
1305 lo->ll_status = LS_COMPLETED;
1306 if (!(bk->lb_param & LPF_DRYRUN))
1307 lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
1308 lo->ll_time_last_complete = lo->ll_time_last_checkpoint;
1309 lo->ll_success_count++;
1310 } else if (rc == 0) {
1311 lo->ll_status = lfsck->li_status;
1312 if (lo->ll_status == 0)
1313 lo->ll_status = LS_STOPPED;
1315 lo->ll_status = LS_FAILED;
1318 if (lo->ll_status != LS_PAUSED) {
1319 spin_lock(&lfsck->li_lock);
1320 list_del_init(&com->lc_link);
1321 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1322 spin_unlock(&lfsck->li_lock);
1325 rc = lfsck_layout_store(env, com);
1327 up_write(&com->lc_sem);
1332 static int lfsck_layout_lock(const struct lu_env *env,
1333 struct lfsck_component *com,
1334 struct dt_object *obj,
1335 struct lustre_handle *lh, __u64 bits)
1337 struct lfsck_thread_info *info = lfsck_env_info(env);
1338 ldlm_policy_data_t *policy = &info->lti_policy;
1339 struct ldlm_res_id *resid = &info->lti_resid;
1340 struct lfsck_instance *lfsck = com->lc_lfsck;
1341 __u64 flags = LDLM_FL_ATOMIC_CB;
1344 LASSERT(lfsck->li_namespace != NULL);
1346 memset(policy, 0, sizeof(*policy));
1347 policy->l_inodebits.bits = bits;
1348 fid_build_reg_res_name(lfsck_dto2fid(obj), resid);
1349 rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_IBITS,
1350 policy, LCK_EX, &flags, ldlm_blocking_ast,
1351 ldlm_completion_ast, NULL, NULL, 0,
1352 LVB_T_NONE, NULL, lh);
1353 if (rc == ELDLM_OK) {
1356 memset(lh, 0, sizeof(*lh));
1363 static void lfsck_layout_unlock(struct lustre_handle *lh)
1365 if (lustre_handle_is_used(lh)) {
1366 ldlm_lock_decref(lh, LCK_EX);
1367 memset(lh, 0, sizeof(*lh));
1371 static int lfsck_layout_trans_stop(const struct lu_env *env,
1372 struct dt_device *dev,
1373 struct thandle *handle, int result)
1377 handle->th_result = result;
1378 rc = dt_trans_stop(env, dev, handle);
1387 static int lfsck_layout_scan_orphan(const struct lu_env *env,
1388 struct lfsck_component *com,
1389 struct lfsck_tgt_desc *ltd)
1391 /* XXX: To be extended in other patch. */
1396 /* For the MDT-object with dangling reference, we need to re-create
1397 * the missed OST-object with the known FID/owner information. */
1398 static int lfsck_layout_recreate_ostobj(const struct lu_env *env,
1399 struct lfsck_component *com,
1400 struct lfsck_layout_req *llr,
1403 struct lfsck_thread_info *info = lfsck_env_info(env);
1404 struct filter_fid *pfid = &info->lti_new_pfid;
1405 struct dt_allocation_hint *hint = &info->lti_hint;
1406 struct dt_object *parent = llr->llr_parent->llo_obj;
1407 struct dt_object *child = llr->llr_child;
1408 struct dt_device *dev = lfsck_obj2dt_dev(child);
1409 const struct lu_fid *tfid = lu_object_fid(&parent->do_lu);
1410 struct thandle *handle;
1412 struct lustre_handle lh = { 0 };
1416 CDEBUG(D_LFSCK, "Repair dangling reference for: parent "DFID
1417 ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
1418 PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
1419 llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid);
1421 rc = lfsck_layout_lock(env, com, parent, &lh,
1422 MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
1426 handle = dt_trans_create(env, dev);
1428 GOTO(unlock1, rc = PTR_ERR(handle));
1430 hint->dah_parent = NULL;
1432 pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq);
1433 pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid);
1434 pfid->ff_parent.f_ver = cpu_to_le32(llr->llr_lov_idx);
1435 buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
1437 rc = dt_declare_create(env, child, la, hint, NULL, handle);
1441 rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID,
1442 LU_XATTR_CREATE, handle);
1446 rc = dt_trans_start(env, dev, handle);
1450 dt_read_lock(env, parent, 0);
1451 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1452 GOTO(unlock2, rc = 1);
1454 rc = dt_create(env, child, la, hint, NULL, handle);
1458 rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, LU_XATTR_CREATE,
1459 handle, BYPASS_CAPA);
1464 dt_read_unlock(env, parent);
1467 rc = lfsck_layout_trans_stop(env, dev, handle, rc);
1470 lfsck_layout_unlock(&lh);
1475 /* If the OST-object does not recognize the MDT-object as its parent, and
1476 * there is no other MDT-object claims as its parent, then just trust the
1477 * given MDT-object as its parent. So update the OST-object filter_fid. */
1478 static int lfsck_layout_repair_unmatched_pair(const struct lu_env *env,
1479 struct lfsck_component *com,
1480 struct lfsck_layout_req *llr,
1481 const struct lu_attr *pla)
1483 struct lfsck_thread_info *info = lfsck_env_info(env);
1484 struct filter_fid *pfid = &info->lti_new_pfid;
1485 struct lu_attr *tla = &info->lti_la3;
1486 struct dt_object *parent = llr->llr_parent->llo_obj;
1487 struct dt_object *child = llr->llr_child;
1488 struct dt_device *dev = lfsck_obj2dt_dev(child);
1489 const struct lu_fid *tfid = lu_object_fid(&parent->do_lu);
1490 struct thandle *handle;
1492 struct lustre_handle lh = { 0 };
1496 CDEBUG(D_LFSCK, "Repair unmatched MDT-OST pair for: parent "DFID
1497 ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
1498 PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
1499 llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid);
1501 rc = lfsck_layout_lock(env, com, parent, &lh,
1502 MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
1506 handle = dt_trans_create(env, dev);
1508 GOTO(unlock1, rc = PTR_ERR(handle));
1510 pfid->ff_parent.f_seq = cpu_to_le64(tfid->f_seq);
1511 pfid->ff_parent.f_oid = cpu_to_le32(tfid->f_oid);
1512 /* The ff_parent->f_ver is not the real parent fid->f_ver. Instead,
1513 * it is the OST-object index in the parent MDT-object layout. */
1514 pfid->ff_parent.f_ver = cpu_to_le32(llr->llr_lov_idx);
1515 buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
1517 rc = dt_declare_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle);
1521 tla->la_valid = LA_UID | LA_GID;
1522 tla->la_uid = pla->la_uid;
1523 tla->la_gid = pla->la_gid;
1524 rc = dt_declare_attr_set(env, child, tla, handle);
1528 rc = dt_trans_start(env, dev, handle);
1532 dt_write_lock(env, parent, 0);
1533 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1534 GOTO(unlock2, rc = 1);
1536 rc = dt_xattr_set(env, child, buf, XATTR_NAME_FID, 0, handle,
1541 /* Get the latest parent's owner. */
1542 rc = dt_attr_get(env, parent, tla, BYPASS_CAPA);
1546 tla->la_valid = LA_UID | LA_GID;
1547 rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA);
1552 dt_write_unlock(env, parent);
1555 rc = lfsck_layout_trans_stop(env, dev, handle, rc);
1558 lfsck_layout_unlock(&lh);
1563 /* If there are more than one MDT-objects claim as the OST-object's parent,
1564 * and the OST-object only recognizes one of them, then we need to generate
1565 * new OST-object(s) with new fid(s) for the non-recognized MDT-object(s). */
1566 static int lfsck_layout_repair_multiple_references(const struct lu_env *env,
1567 struct lfsck_component *com,
1568 struct lfsck_layout_req *llr,
1572 struct lfsck_thread_info *info = lfsck_env_info(env);
1573 struct dt_allocation_hint *hint = &info->lti_hint;
1574 struct dt_object_format *dof = &info->lti_dof;
1575 struct dt_device *pdev = com->lc_lfsck->li_next;
1576 struct ost_id *oi = &info->lti_oi;
1577 struct dt_object *parent = llr->llr_parent->llo_obj;
1578 struct dt_device *cdev = lfsck_obj2dt_dev(llr->llr_child);
1579 struct dt_object *child = NULL;
1580 struct lu_device *d = &cdev->dd_lu_dev;
1581 struct lu_object *o = NULL;
1582 struct thandle *handle;
1583 struct lov_mds_md_v1 *lmm;
1584 struct lov_ost_data_v1 *objs;
1585 struct lustre_handle lh = { 0 };
1590 CDEBUG(D_LFSCK, "Repair multiple references for: parent "DFID
1591 ", OST-index %u, stripe-index %u, owner %u:%u\n",
1592 PFID(lfsck_dto2fid(parent)), llr->llr_ost_idx,
1593 llr->llr_lov_idx, la->la_uid, la->la_gid);
1595 rc = lfsck_layout_lock(env, com, parent, &lh,
1596 MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
1600 handle = dt_trans_create(env, pdev);
1602 GOTO(unlock1, rc = PTR_ERR(handle));
1604 o = lu_object_anon(env, d, NULL);
1606 GOTO(stop, rc = PTR_ERR(o));
1608 child = container_of(o, struct dt_object, do_lu);
1609 o = lu_object_locate(o->lo_header, d->ld_type);
1610 if (unlikely(o == NULL))
1611 GOTO(stop, rc = -EINVAL);
1613 child = container_of(o, struct dt_object, do_lu);
1614 la->la_valid = LA_UID | LA_GID;
1615 hint->dah_parent = NULL;
1617 dof->dof_type = DFT_REGULAR;
1618 rc = dt_declare_create(env, child, la, NULL, NULL, handle);
1622 rc = dt_declare_xattr_set(env, parent, buf, XATTR_NAME_LOV,
1623 LU_XATTR_REPLACE, handle);
1627 rc = dt_trans_start(env, pdev, handle);
1631 dt_write_lock(env, parent, 0);
1632 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1633 GOTO(unlock2, rc = 0);
1635 rc = dt_xattr_get(env, parent, buf, XATTR_NAME_LOV, BYPASS_CAPA);
1636 if (unlikely(rc == 0 || rc == -ENODATA || rc == -ERANGE))
1637 GOTO(unlock2, rc = 0);
1640 rc = lfsck_layout_verify_header(lmm);
1644 /* Someone change layout during the LFSCK, no need to repair then. */
1645 if (le16_to_cpu(lmm->lmm_layout_gen) != llr->llr_parent->llo_gen)
1646 GOTO(unlock2, rc = 0);
1648 rc = dt_create(env, child, la, hint, dof, handle);
1652 /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
1653 * been verified in lfsck_layout_verify_header() already. If some
1654 * new magic introduced in the future, then layout LFSCK needs to
1655 * be updated also. */
1656 magic = le32_to_cpu(lmm->lmm_magic);
1657 if (magic == LOV_MAGIC_V1) {
1658 objs = &(lmm->lmm_objects[0]);
1660 LASSERT(magic == LOV_MAGIC_V3);
1661 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
1664 lmm->lmm_layout_gen = cpu_to_le16(llr->llr_parent->llo_gen + 1);
1665 fid_to_ostid(lu_object_fid(&child->do_lu), oi);
1666 ostid_cpu_to_le(oi, &objs[llr->llr_lov_idx].l_ost_oi);
1667 objs[llr->llr_lov_idx].l_ost_gen = cpu_to_le32(0);
1668 objs[llr->llr_lov_idx].l_ost_idx = cpu_to_le32(llr->llr_ost_idx);
1669 rc = dt_xattr_set(env, parent, buf, XATTR_NAME_LOV,
1670 LU_XATTR_REPLACE, handle, BYPASS_CAPA);
1672 GOTO(unlock2, rc = (rc == 0 ? 1 : rc));
1675 dt_write_unlock(env, parent);
1679 lu_object_put(env, &child->do_lu);
1681 dt_trans_stop(env, pdev, handle);
1684 lfsck_layout_unlock(&lh);
1689 /* If the MDT-object and the OST-object have different owner information,
1690 * then trust the MDT-object, because the normal chown/chgrp handle order
1691 * is from MDT to OST, and it is possible that some chown/chgrp operation
1692 * is partly done. */
1693 static int lfsck_layout_repair_owner(const struct lu_env *env,
1694 struct lfsck_component *com,
1695 struct lfsck_layout_req *llr,
1696 struct lu_attr *pla)
1698 struct lfsck_thread_info *info = lfsck_env_info(env);
1699 struct lu_attr *tla = &info->lti_la3;
1700 struct dt_object *parent = llr->llr_parent->llo_obj;
1701 struct dt_object *child = llr->llr_child;
1702 struct dt_device *dev = lfsck_obj2dt_dev(child);
1703 struct thandle *handle;
1707 CDEBUG(D_LFSCK, "Repair inconsistent file owner for: parent "DFID
1708 ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
1709 PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
1710 llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid);
1712 handle = dt_trans_create(env, dev);
1714 RETURN(PTR_ERR(handle));
1716 tla->la_uid = pla->la_uid;
1717 tla->la_gid = pla->la_gid;
1718 tla->la_valid = LA_UID | LA_GID;
1719 rc = dt_declare_attr_set(env, child, tla, handle);
1723 rc = dt_trans_start(env, dev, handle);
1727 /* Use the dt_object lock to serialize with destroy and attr_set. */
1728 dt_read_lock(env, parent, 0);
1729 if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
1730 GOTO(unlock, rc = 1);
1732 /* Get the latest parent's owner. */
1733 rc = dt_attr_get(env, parent, tla, BYPASS_CAPA);
1735 CWARN("%s: fail to get the latest parent's ("DFID") owner, "
1736 "not sure whether some others chown/chgrp during the "
1737 "LFSCK: rc = %d\n", lfsck_lfsck2name(com->lc_lfsck),
1738 PFID(lfsck_dto2fid(parent)), rc);
1743 /* Some others chown/chgrp during the LFSCK, needs to do nothing. */
1744 if (unlikely(tla->la_uid != pla->la_uid ||
1745 tla->la_gid != pla->la_gid))
1746 GOTO(unlock, rc = 1);
1748 tla->la_valid = LA_UID | LA_GID;
1749 rc = dt_attr_set(env, child, tla, handle, BYPASS_CAPA);
1754 dt_read_unlock(env, parent);
1757 rc = lfsck_layout_trans_stop(env, dev, handle, rc);
1762 /* Check whether the OST-object correctly back points to the
1763 * MDT-object (@parent) via the XATTR_NAME_FID xattr (@pfid). */
1764 static int lfsck_layout_check_parent(const struct lu_env *env,
1765 struct lfsck_component *com,
1766 struct dt_object *parent,
1767 const struct lu_fid *pfid,
1768 const struct lu_fid *cfid,
1769 const struct lu_attr *pla,
1770 const struct lu_attr *cla,
1771 struct lfsck_layout_req *llr,
1772 struct lu_buf *lov_ea, __u32 idx)
1774 struct lfsck_thread_info *info = lfsck_env_info(env);
1775 struct lu_buf *buf = &info->lti_big_buf;
1776 struct dt_object *tobj;
1777 struct lov_mds_md_v1 *lmm;
1778 struct lov_ost_data_v1 *objs;
1785 if (fid_is_zero(pfid)) {
1786 /* client never wrote. */
1787 if (cla->la_size == 0 && cla->la_blocks == 0) {
1788 if (unlikely(cla->la_uid != pla->la_uid ||
1789 cla->la_gid != pla->la_gid))
1790 RETURN (LLIT_INCONSISTENT_OWNER);
1795 RETURN(LLIT_UNMATCHED_PAIR);
1798 if (unlikely(!fid_is_sane(pfid)))
1799 RETURN(LLIT_UNMATCHED_PAIR);
1801 if (lu_fid_eq(pfid, lu_object_fid(&parent->do_lu))) {
1802 if (llr->llr_lov_idx == idx)
1805 RETURN(LLIT_UNMATCHED_PAIR);
1808 tobj = lfsck_object_find(env, com->lc_lfsck, pfid);
1810 RETURN(LLIT_UNMATCHED_PAIR);
1813 RETURN(PTR_ERR(tobj));
1815 if (!dt_object_exists(tobj))
1816 GOTO(out, rc = LLIT_UNMATCHED_PAIR);
1818 /* Load the tobj's layout EA, in spite of it is a local MDT-object or
1819 * remote one on another MDT. Then check whether the given OST-object
1820 * is in such layout. If yes, it is multiple referenced, otherwise it
1821 * is unmatched referenced case. */
1822 rc = lfsck_layout_get_lovea(env, tobj, buf, NULL);
1824 GOTO(out, rc = LLIT_UNMATCHED_PAIR);
1830 rc = lfsck_layout_verify_header(lmm);
1834 /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
1835 * been verified in lfsck_layout_verify_header() already. If some
1836 * new magic introduced in the future, then layout LFSCK needs to
1837 * be updated also. */
1838 magic = le32_to_cpu(lmm->lmm_magic);
1839 if (magic == LOV_MAGIC_V1) {
1840 objs = &(lmm->lmm_objects[0]);
1842 LASSERT(magic == LOV_MAGIC_V3);
1843 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
1846 count = le16_to_cpu(lmm->lmm_stripe_count);
1847 for (i = 0; i < count; i++, objs++) {
1848 struct lu_fid *tfid = &info->lti_fid2;
1849 struct ost_id *oi = &info->lti_oi;
1851 ostid_le_to_cpu(&objs->l_ost_oi, oi);
1852 ostid_to_fid(tfid, oi, le32_to_cpu(objs->l_ost_idx));
1853 if (lu_fid_eq(cfid, tfid)) {
1856 GOTO(out, rc = LLIT_MULTIPLE_REFERENCED);
1860 GOTO(out, rc = LLIT_UNMATCHED_PAIR);
1863 lfsck_object_put(env, tobj);
1868 static int lfsck_layout_assistant_handle_one(const struct lu_env *env,
1869 struct lfsck_component *com,
1870 struct lfsck_layout_req *llr)
1872 struct lfsck_layout *lo = com->lc_file_ram;
1873 struct lfsck_thread_info *info = lfsck_env_info(env);
1874 struct filter_fid_old *pea = &info->lti_old_pfid;
1875 struct lu_fid *pfid = &info->lti_fid;
1876 struct lu_buf *buf = NULL;
1877 struct dt_object *parent = llr->llr_parent->llo_obj;
1878 struct dt_object *child = llr->llr_child;
1879 struct lu_attr *pla = &info->lti_la;
1880 struct lu_attr *cla = &info->lti_la2;
1881 struct lfsck_instance *lfsck = com->lc_lfsck;
1882 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
1883 enum lfsck_layout_inconsistency_type type = LLIT_NONE;
1888 rc = dt_attr_get(env, parent, pla, BYPASS_CAPA);
1890 if (lu_object_is_dying(parent->do_lu.lo_header))
1896 rc = dt_attr_get(env, child, cla, BYPASS_CAPA);
1897 if (rc == -ENOENT) {
1898 if (lu_object_is_dying(parent->do_lu.lo_header))
1901 type = LLIT_DANGLING;
1908 buf = lfsck_buf_get(env, pea, sizeof(struct filter_fid_old));
1909 rc= dt_xattr_get(env, child, buf, XATTR_NAME_FID, BYPASS_CAPA);
1910 if (unlikely(rc >= 0 && rc != sizeof(struct filter_fid_old) &&
1911 rc != sizeof(struct filter_fid))) {
1912 type = LLIT_UNMATCHED_PAIR;
1916 if (rc < 0 && rc != -ENODATA)
1919 if (rc == -ENODATA) {
1922 fid_le_to_cpu(pfid, &pea->ff_parent);
1923 /* OST-object does not save parent FID::f_ver, instead,
1924 * the OST-object index in the parent MDT-object layout
1925 * EA reuses the pfid->f_ver. */
1930 rc = lfsck_layout_check_parent(env, com, parent, pfid,
1931 lu_object_fid(&child->do_lu),
1932 pla, cla, llr, buf, idx);
1941 if (unlikely(cla->la_uid != pla->la_uid ||
1942 cla->la_gid != pla->la_gid)) {
1943 type = LLIT_INCONSISTENT_OWNER;
1948 if (bk->lb_param & LPF_DRYRUN) {
1949 if (type != LLIT_NONE)
1957 memset(cla, 0, sizeof(*cla));
1958 cla->la_uid = pla->la_uid;
1959 cla->la_gid = pla->la_gid;
1960 cla->la_mode = S_IFREG | 0666;
1961 cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
1962 LA_ATIME | LA_MTIME | LA_CTIME;
1963 rc = lfsck_layout_recreate_ostobj(env, com, llr, cla);
1965 case LLIT_UNMATCHED_PAIR:
1966 rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla);
1968 case LLIT_MULTIPLE_REFERENCED:
1969 rc = lfsck_layout_repair_multiple_references(env, com, llr,
1972 case LLIT_INCONSISTENT_OWNER:
1973 rc = lfsck_layout_repair_owner(env, com, llr, pla);
1983 down_write(&com->lc_sem);
1985 /* If cannot touch the target server,
1986 * mark the LFSCK as INCOMPLETE. */
1987 if (rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -ETIMEDOUT ||
1988 rc == -EHOSTDOWN || rc == -EHOSTUNREACH) {
1989 lo->ll_flags |= LF_INCOMPLETE;
1990 lo->ll_objs_skipped++;
1993 lo->ll_objs_failed_phase1++;
1995 } else if (rc > 0) {
1996 LASSERTF(type > LLIT_NONE && type <= LLIT_MAX,
1997 "unknown type = %d\n", type);
1999 lo->ll_objs_repaired[type - 1]++;
2001 up_write(&com->lc_sem);
2006 static int lfsck_layout_assistant(void *args)
2008 struct lfsck_thread_args *lta = args;
2009 struct lu_env *env = <a->lta_env;
2010 struct lfsck_component *com = lta->lta_com;
2011 struct lfsck_instance *lfsck = lta->lta_lfsck;
2012 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2013 struct lfsck_position *pos = &com->lc_pos_start;
2014 struct lfsck_thread_info *info = lfsck_env_info(env);
2015 struct lfsck_request *lr = &info->lti_lr;
2016 struct lfsck_layout_master_data *llmd = com->lc_data;
2017 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2018 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2019 struct lfsck_layout_req *llr;
2020 struct l_wait_info lwi = { 0 };
2026 if (lta->lta_lsp->lsp_start != NULL)
2027 flags = lta->lta_lsp->lsp_start->ls_flags;
2029 flags = bk->lb_param;
2030 memset(lr, 0, sizeof(*lr));
2031 lr->lr_event = LE_START;
2032 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2033 lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
2035 lr->lr_speed = bk->lb_speed_limit;
2036 lr->lr_version = bk->lb_version;
2037 lr->lr_param = bk->lb_param;
2038 lr->lr_async_windows = bk->lb_async_windows;
2039 if (pos->lp_oit_cookie <= 1)
2040 lr->lr_param |= LPF_RESET;
2042 rc = lfsck_layout_master_notify_others(env, com, lr, flags);
2044 CERROR("%s: fail to notify others for layout start: rc = %d\n",
2045 lfsck_lfsck2name(lfsck), rc);
2049 spin_lock(&llmd->llmd_lock);
2050 thread_set_flags(athread, SVC_RUNNING);
2051 spin_unlock(&llmd->llmd_lock);
2052 wake_up_all(&mthread->t_ctl_waitq);
2055 while (!list_empty(&llmd->llmd_req_list)) {
2056 bool wakeup = false;
2058 if (unlikely(llmd->llmd_exit))
2059 GOTO(cleanup1, rc = llmd->llmd_post_result);
2061 llr = list_entry(llmd->llmd_req_list.next,
2062 struct lfsck_layout_req,
2064 /* Only the lfsck_layout_assistant thread itself can
2065 * remove the "llr" from the head of the list, LFSCK
2066 * engine thread only inserts other new "lld" at the
2067 * end of the list. So it is safe to handle current
2068 * "llr" without the spin_lock. */
2069 rc = lfsck_layout_assistant_handle_one(env, com, llr);
2070 spin_lock(&llmd->llmd_lock);
2071 list_del_init(&llr->llr_list);
2072 if (bk->lb_async_windows != 0 &&
2073 llmd->llmd_prefetched >= bk->lb_async_windows)
2076 llmd->llmd_prefetched--;
2077 spin_unlock(&llmd->llmd_lock);
2079 wake_up_all(&mthread->t_ctl_waitq);
2081 lfsck_layout_req_fini(env, llr);
2082 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
2086 /* Wakeup the master engine if it is waiting in checkpoint. */
2087 wake_up_all(&mthread->t_ctl_waitq);
2089 l_wait_event(athread->t_ctl_waitq,
2090 !lfsck_layout_req_empty(llmd) ||
2092 llmd->llmd_to_post ||
2093 llmd->llmd_to_double_scan,
2096 if (unlikely(llmd->llmd_exit))
2097 GOTO(cleanup1, rc = llmd->llmd_post_result);
2099 if (!list_empty(&llmd->llmd_req_list))
2102 if (llmd->llmd_to_post) {
2103 llmd->llmd_to_post = 0;
2104 LASSERT(llmd->llmd_post_result > 0);
2106 memset(lr, 0, sizeof(*lr));
2107 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2108 lr->lr_event = LE_PHASE1_DONE;
2109 lr->lr_status = llmd->llmd_post_result;
2110 rc = lfsck_layout_master_notify_others(env, com, lr, 0);
2112 CERROR("%s: failed to notify others "
2113 "for layout post: rc = %d\n",
2114 lfsck_lfsck2name(lfsck), rc);
2116 /* Wakeup the master engine to go ahead. */
2117 wake_up_all(&mthread->t_ctl_waitq);
2120 if (llmd->llmd_to_double_scan) {
2121 llmd->llmd_to_double_scan = 0;
2122 atomic_inc(&lfsck->li_double_scan_count);
2123 llmd->llmd_in_double_scan = 1;
2124 wake_up_all(&mthread->t_ctl_waitq);
2126 while (llmd->llmd_in_double_scan) {
2127 struct lfsck_tgt_descs *ltds =
2128 &lfsck->li_ost_descs;
2129 struct lfsck_tgt_desc *ltd;
2131 rc = lfsck_layout_master_query_others(env, com);
2132 if (lfsck_layout_master_to_orphan(llmd))
2138 /* Pull LFSCK status on related targets once
2139 * per 30 seconds if we are not notified. */
2140 lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(30),
2141 cfs_time_seconds(1),
2143 rc = l_wait_event(athread->t_ctl_waitq,
2144 lfsck_layout_master_to_orphan(llmd) ||
2146 !thread_is_running(mthread),
2149 if (unlikely(llmd->llmd_exit ||
2150 !thread_is_running(mthread)))
2151 GOTO(cleanup2, rc = 0);
2153 if (rc == -ETIMEDOUT)
2160 spin_lock(<ds->ltd_lock);
2162 &llmd->llmd_ost_phase2_list)) {
2164 llmd->llmd_ost_phase2_list.next,
2165 struct lfsck_tgt_desc,
2166 ltd_layout_phase_list);
2168 <d->ltd_layout_phase_list);
2169 spin_unlock(<ds->ltd_lock);
2171 rc = lfsck_layout_scan_orphan(env, com,
2174 bk->lb_param & LPF_FAILOUT)
2177 if (unlikely(llmd->llmd_exit ||
2178 !thread_is_running(mthread)))
2179 GOTO(cleanup2, rc = 0);
2181 spin_lock(<ds->ltd_lock);
2184 if (list_empty(&llmd->llmd_ost_phase1_list)) {
2185 spin_unlock(<ds->ltd_lock);
2186 GOTO(cleanup2, rc = 1);
2188 spin_unlock(<ds->ltd_lock);
2194 /* Cleanup the unfinished requests. */
2195 spin_lock(&llmd->llmd_lock);
2197 llmd->llmd_assistant_status = rc;
2199 while (!list_empty(&llmd->llmd_req_list)) {
2200 llr = list_entry(llmd->llmd_req_list.next,
2201 struct lfsck_layout_req,
2203 list_del_init(&llr->llr_list);
2204 llmd->llmd_prefetched--;
2205 spin_unlock(&llmd->llmd_lock);
2206 lfsck_layout_req_fini(env, llr);
2207 spin_lock(&llmd->llmd_lock);
2209 spin_unlock(&llmd->llmd_lock);
2211 LASSERTF(llmd->llmd_prefetched == 0, "unmatched prefeteched objs %d\n",
2212 llmd->llmd_prefetched);
2215 memset(lr, 0, sizeof(*lr));
2216 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2218 lr->lr_event = LE_PHASE2_DONE;
2221 } else if (rc == 0) {
2222 lr->lr_event = LE_STOP;
2223 if (lfsck->li_status == LS_PAUSED ||
2224 lfsck->li_status == LS_CO_PAUSED) {
2226 lr->lr_status = LS_CO_PAUSED;
2227 } else if (lfsck->li_status == LS_STOPPED ||
2228 lfsck->li_status == LS_CO_STOPPED) {
2229 flags = lfsck->li_flags;
2230 if (flags & LPF_BROADCAST)
2231 lr->lr_status = LS_STOPPED;
2233 lr->lr_status = LS_CO_STOPPED;
2238 lr->lr_event = LE_STOP;
2240 lr->lr_status = LS_CO_FAILED;
2243 rc1 = lfsck_layout_master_notify_others(env, com, lr, flags);
2245 CERROR("%s: failed to notify others for layout quit: rc = %d\n",
2246 lfsck_lfsck2name(lfsck), rc1);
2250 /* Under force exit case, some requests may be just freed without
2251 * verification, those objects should be re-handled when next run.
2252 * So not update the on-disk tracing file under such case. */
2253 if (!llmd->llmd_exit)
2254 rc1 = lfsck_layout_double_scan_result(env, com, rc);
2257 if (llmd->llmd_in_double_scan)
2258 atomic_dec(&lfsck->li_double_scan_count);
2260 spin_lock(&llmd->llmd_lock);
2261 llmd->llmd_assistant_status = (rc1 != 0 ? rc1 : rc);
2262 thread_set_flags(athread, SVC_STOPPED);
2263 wake_up_all(&mthread->t_ctl_waitq);
2264 spin_unlock(&llmd->llmd_lock);
2265 lfsck_thread_args_fini(lta);
2271 lfsck_layout_slave_async_interpret(const struct lu_env *env,
2272 struct ptlrpc_request *req,
2275 struct lfsck_layout_slave_async_args *llsaa = args;
2276 struct obd_export *exp = llsaa->llsaa_exp;
2277 struct lfsck_component *com = llsaa->llsaa_com;
2278 struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst;
2279 struct lfsck_layout_slave_data *llsd = com->lc_data;
2283 /* It is quite probably caused by target crash,
2284 * to make the LFSCK can go ahead, assume that
2285 * the target finished the LFSCK prcoessing. */
2288 struct lfsck_reply *lr;
2290 lr = req_capsule_server_get(&req->rq_pill, &RMF_LFSCK_REPLY);
2291 if (lr->lr_status != LS_SCANNING_PHASE1 &&
2292 lr->lr_status != LS_SCANNING_PHASE2)
2296 lfsck_layout_llst_del(llsd, llst);
2297 lfsck_layout_llst_put(llst);
2298 lfsck_component_put(env, com);
2299 class_export_put(exp);
2304 static int lfsck_layout_async_query(const struct lu_env *env,
2305 struct lfsck_component *com,
2306 struct obd_export *exp,
2307 struct lfsck_layout_slave_target *llst,
2308 struct lfsck_request *lr,
2309 struct ptlrpc_request_set *set)
2311 struct lfsck_layout_slave_async_args *llsaa;
2312 struct ptlrpc_request *req;
2313 struct lfsck_request *tmp;
2317 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_QUERY);
2321 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_QUERY);
2323 ptlrpc_request_free(req);
2327 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
2329 ptlrpc_request_set_replen(req);
2331 llsaa = ptlrpc_req_async_args(req);
2332 llsaa->llsaa_exp = exp;
2333 llsaa->llsaa_com = lfsck_component_get(com);
2334 llsaa->llsaa_llst = llst;
2335 req->rq_interpret_reply = lfsck_layout_slave_async_interpret;
2336 ptlrpc_set_add_req(set, req);
2341 static int lfsck_layout_async_notify(const struct lu_env *env,
2342 struct obd_export *exp,
2343 struct lfsck_request *lr,
2344 struct ptlrpc_request_set *set)
2346 struct ptlrpc_request *req;
2347 struct lfsck_request *tmp;
2351 req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_LFSCK_NOTIFY);
2355 rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, LFSCK_NOTIFY);
2357 ptlrpc_request_free(req);
2361 tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
2363 ptlrpc_request_set_replen(req);
2364 ptlrpc_set_add_req(set, req);
2370 lfsck_layout_slave_query_master(const struct lu_env *env,
2371 struct lfsck_component *com)
2373 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
2374 struct lfsck_instance *lfsck = com->lc_lfsck;
2375 struct lfsck_layout_slave_data *llsd = com->lc_data;
2376 struct lfsck_layout_slave_target *llst;
2377 struct obd_export *exp;
2378 struct ptlrpc_request_set *set;
2384 set = ptlrpc_prep_set();
2388 memset(lr, 0, sizeof(*lr));
2389 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2390 lr->lr_event = LE_QUERY;
2391 lr->lr_active = LT_LAYOUT;
2393 llsd->llsd_touch_gen++;
2394 spin_lock(&llsd->llsd_lock);
2395 while (!list_empty(&llsd->llsd_master_list)) {
2396 llst = list_entry(llsd->llsd_master_list.next,
2397 struct lfsck_layout_slave_target,
2399 if (llst->llst_gen == llsd->llsd_touch_gen)
2402 llst->llst_gen = llsd->llsd_touch_gen;
2403 list_del(&llst->llst_list);
2404 list_add_tail(&llst->llst_list,
2405 &llsd->llsd_master_list);
2406 atomic_inc(&llst->llst_ref);
2407 spin_unlock(&llsd->llsd_lock);
2409 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
2412 lfsck_layout_llst_del(llsd, llst);
2413 lfsck_layout_llst_put(llst);
2414 spin_lock(&llsd->llsd_lock);
2418 rc = lfsck_layout_async_query(env, com, exp, llst, lr, set);
2420 CERROR("%s: slave fail to query %s for layout: "
2421 "rc = %d\n", lfsck_lfsck2name(lfsck),
2422 exp->exp_obd->obd_name, rc);
2424 lfsck_layout_llst_put(llst);
2425 class_export_put(exp);
2429 spin_lock(&llsd->llsd_lock);
2431 spin_unlock(&llsd->llsd_lock);
2434 rc = ptlrpc_set_wait(set);
2435 ptlrpc_set_destroy(set);
2437 RETURN(rc1 != 0 ? rc1 : rc);
2441 lfsck_layout_slave_notify_master(const struct lu_env *env,
2442 struct lfsck_component *com,
2443 enum lfsck_events event, int result)
2445 struct lfsck_instance *lfsck = com->lc_lfsck;
2446 struct lfsck_layout_slave_data *llsd = com->lc_data;
2447 struct lfsck_request *lr = &lfsck_env_info(env)->lti_lr;
2448 struct lfsck_layout_slave_target *llst;
2449 struct obd_export *exp;
2450 struct ptlrpc_request_set *set;
2455 set = ptlrpc_prep_set();
2459 memset(lr, 0, sizeof(*lr));
2460 lr->lr_event = event;
2461 lr->lr_flags = LEF_FROM_OST;
2462 lr->lr_status = result;
2463 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
2464 lr->lr_active = LT_LAYOUT;
2465 llsd->llsd_touch_gen++;
2466 spin_lock(&llsd->llsd_lock);
2467 while (!list_empty(&llsd->llsd_master_list)) {
2468 llst = list_entry(llsd->llsd_master_list.next,
2469 struct lfsck_layout_slave_target,
2471 if (llst->llst_gen == llsd->llsd_touch_gen)
2474 llst->llst_gen = llsd->llsd_touch_gen;
2475 list_del(&llst->llst_list);
2476 list_add_tail(&llst->llst_list,
2477 &llsd->llsd_master_list);
2478 atomic_inc(&llst->llst_ref);
2479 spin_unlock(&llsd->llsd_lock);
2481 exp = lustre_find_lwp_by_index(lfsck->li_obd->obd_name,
2484 lfsck_layout_llst_del(llsd, llst);
2485 lfsck_layout_llst_put(llst);
2486 spin_lock(&llsd->llsd_lock);
2490 rc = lfsck_layout_async_notify(env, exp, lr, set);
2492 CERROR("%s: slave fail to notify %s for layout: "
2493 "rc = %d\n", lfsck_lfsck2name(lfsck),
2494 exp->exp_obd->obd_name, rc);
2497 lfsck_layout_llst_put(llst);
2498 class_export_put(exp);
2499 spin_lock(&llsd->llsd_lock);
2501 spin_unlock(&llsd->llsd_lock);
2504 rc = ptlrpc_set_wait(set);
2506 ptlrpc_set_destroy(set);
2513 static int lfsck_layout_reset(const struct lu_env *env,
2514 struct lfsck_component *com, bool init)
2516 struct lfsck_layout *lo = com->lc_file_ram;
2519 down_write(&com->lc_sem);
2521 memset(lo, 0, com->lc_file_size);
2523 __u32 count = lo->ll_success_count;
2524 __u64 last_time = lo->ll_time_last_complete;
2526 memset(lo, 0, com->lc_file_size);
2527 lo->ll_success_count = count;
2528 lo->ll_time_last_complete = last_time;
2531 lo->ll_magic = LFSCK_LAYOUT_MAGIC;
2532 lo->ll_status = LS_INIT;
2534 rc = lfsck_layout_store(env, com);
2535 up_write(&com->lc_sem);
2540 static void lfsck_layout_fail(const struct lu_env *env,
2541 struct lfsck_component *com, bool new_checked)
2543 struct lfsck_layout *lo = com->lc_file_ram;
2545 down_write(&com->lc_sem);
2547 com->lc_new_checked++;
2548 lo->ll_objs_failed_phase1++;
2549 if (lo->ll_pos_first_inconsistent == 0) {
2550 struct lfsck_instance *lfsck = com->lc_lfsck;
2552 lo->ll_pos_first_inconsistent =
2553 lfsck->li_obj_oit->do_index_ops->dio_it.store(env,
2556 up_write(&com->lc_sem);
2559 static int lfsck_layout_master_checkpoint(const struct lu_env *env,
2560 struct lfsck_component *com, bool init)
2562 struct lfsck_instance *lfsck = com->lc_lfsck;
2563 struct lfsck_layout *lo = com->lc_file_ram;
2564 struct lfsck_layout_master_data *llmd = com->lc_data;
2565 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2566 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2567 struct l_wait_info lwi = { 0 };
2570 if (com->lc_new_checked == 0 && !init)
2573 l_wait_event(mthread->t_ctl_waitq,
2574 list_empty(&llmd->llmd_req_list) ||
2575 !thread_is_running(mthread) ||
2576 thread_is_stopped(athread),
2579 if (!thread_is_running(mthread) || thread_is_stopped(athread))
2582 down_write(&com->lc_sem);
2584 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
2586 lo->ll_pos_last_checkpoint =
2587 lfsck->li_pos_current.lp_oit_cookie;
2588 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2589 HALF_SEC - lfsck->li_time_last_checkpoint);
2590 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2591 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2592 com->lc_new_checked = 0;
2595 rc = lfsck_layout_store(env, com);
2596 up_write(&com->lc_sem);
2601 static int lfsck_layout_slave_checkpoint(const struct lu_env *env,
2602 struct lfsck_component *com, bool init)
2604 struct lfsck_instance *lfsck = com->lc_lfsck;
2605 struct lfsck_layout *lo = com->lc_file_ram;
2608 if (com->lc_new_checked == 0 && !init)
2611 down_write(&com->lc_sem);
2614 lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie;
2616 lo->ll_pos_last_checkpoint =
2617 lfsck->li_pos_current.lp_oit_cookie;
2618 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
2619 HALF_SEC - lfsck->li_time_last_checkpoint);
2620 lo->ll_time_last_checkpoint = cfs_time_current_sec();
2621 lo->ll_objs_checked_phase1 += com->lc_new_checked;
2622 com->lc_new_checked = 0;
2625 rc = lfsck_layout_store(env, com);
2627 up_write(&com->lc_sem);
2632 static int lfsck_layout_prep(const struct lu_env *env,
2633 struct lfsck_component *com)
2635 struct lfsck_instance *lfsck = com->lc_lfsck;
2636 struct lfsck_layout *lo = com->lc_file_ram;
2637 struct lfsck_position *pos = &com->lc_pos_start;
2639 fid_zero(&pos->lp_dir_parent);
2640 pos->lp_dir_cookie = 0;
2641 if (lo->ll_status == LS_COMPLETED ||
2642 lo->ll_status == LS_PARTIAL) {
2645 rc = lfsck_layout_reset(env, com, false);
2650 down_write(&com->lc_sem);
2652 lo->ll_time_latest_start = cfs_time_current_sec();
2654 spin_lock(&lfsck->li_lock);
2655 if (lo->ll_flags & LF_SCANNED_ONCE) {
2656 if (!lfsck->li_drop_dryrun ||
2657 lo->ll_pos_first_inconsistent == 0) {
2658 lo->ll_status = LS_SCANNING_PHASE2;
2659 list_del_init(&com->lc_link);
2660 list_add_tail(&com->lc_link,
2661 &lfsck->li_list_double_scan);
2662 pos->lp_oit_cookie = 0;
2666 lo->ll_status = LS_SCANNING_PHASE1;
2667 lo->ll_run_time_phase1 = 0;
2668 lo->ll_run_time_phase2 = 0;
2669 lo->ll_objs_checked_phase1 = 0;
2670 lo->ll_objs_checked_phase2 = 0;
2671 lo->ll_objs_failed_phase1 = 0;
2672 lo->ll_objs_failed_phase2 = 0;
2673 for (i = 0; i < LLIT_MAX; i++)
2674 lo->ll_objs_repaired[i] = 0;
2676 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2679 lo->ll_status = LS_SCANNING_PHASE1;
2680 if (!lfsck->li_drop_dryrun ||
2681 lo->ll_pos_first_inconsistent == 0)
2682 pos->lp_oit_cookie = lo->ll_pos_last_checkpoint + 1;
2684 pos->lp_oit_cookie = lo->ll_pos_first_inconsistent;
2686 spin_unlock(&lfsck->li_lock);
2688 up_write(&com->lc_sem);
2693 static int lfsck_layout_slave_prep(const struct lu_env *env,
2694 struct lfsck_component *com,
2695 struct lfsck_start_param *lsp)
2697 struct lfsck_layout *lo = com->lc_file_ram;
2698 struct lfsck_layout_slave_data *llsd = com->lc_data;
2701 /* XXX: For a new scanning, generate OST-objects
2702 * bitmap for orphan detection. */
2704 rc = lfsck_layout_prep(env, com);
2705 if (rc != 0 || lo->ll_status != LS_SCANNING_PHASE1 ||
2706 !lsp->lsp_index_valid)
2709 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
2714 static int lfsck_layout_master_prep(const struct lu_env *env,
2715 struct lfsck_component *com,
2716 struct lfsck_start_param *lsp)
2718 struct lfsck_instance *lfsck = com->lc_lfsck;
2719 struct lfsck_layout_master_data *llmd = com->lc_data;
2720 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2721 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2722 struct lfsck_thread_args *lta;
2726 rc = lfsck_layout_prep(env, com);
2730 llmd->llmd_assistant_status = 0;
2731 llmd->llmd_post_result = 0;
2732 llmd->llmd_to_post = 0;
2733 llmd->llmd_to_double_scan = 0;
2734 llmd->llmd_in_double_scan = 0;
2735 llmd->llmd_exit = 0;
2736 thread_set_flags(athread, 0);
2738 lta = lfsck_thread_args_init(lfsck, com, lsp);
2740 RETURN(PTR_ERR(lta));
2742 rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout"));
2743 if (IS_ERR_VALUE(rc)) {
2744 CERROR("%s: Cannot start LFSCK layout assistant thread: "
2745 "rc = %ld\n", lfsck_lfsck2name(lfsck), rc);
2746 lfsck_thread_args_fini(lta);
2748 struct l_wait_info lwi = { 0 };
2750 l_wait_event(mthread->t_ctl_waitq,
2751 thread_is_running(athread) ||
2752 thread_is_stopped(athread),
2754 if (unlikely(!thread_is_running(athread)))
2755 rc = llmd->llmd_assistant_status;
2763 /* Pre-fetch the attribute for each stripe in the given layout EA. */
2764 static int lfsck_layout_scan_stripes(const struct lu_env *env,
2765 struct lfsck_component *com,
2766 struct dt_object *parent,
2767 struct lov_mds_md_v1 *lmm)
2769 struct lfsck_thread_info *info = lfsck_env_info(env);
2770 struct lfsck_instance *lfsck = com->lc_lfsck;
2771 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2772 struct lfsck_layout *lo = com->lc_file_ram;
2773 struct lfsck_layout_master_data *llmd = com->lc_data;
2774 struct lfsck_layout_object *llo = NULL;
2775 struct lov_ost_data_v1 *objs;
2776 struct lfsck_tgt_descs *ltds = &lfsck->li_ost_descs;
2777 struct ptlrpc_thread *mthread = &lfsck->li_thread;
2778 struct ptlrpc_thread *athread = &llmd->llmd_thread;
2779 struct l_wait_info lwi = { 0 };
2788 buf = lfsck_buf_get(env, &info->lti_old_pfid,
2789 sizeof(struct filter_fid_old));
2790 count = le16_to_cpu(lmm->lmm_stripe_count);
2791 gen = le16_to_cpu(lmm->lmm_layout_gen);
2792 /* Currently, we only support LOV_MAGIC_V1/LOV_MAGIC_V3 which has
2793 * been verified in lfsck_layout_verify_header() already. If some
2794 * new magic introduced in the future, then layout LFSCK needs to
2795 * be updated also. */
2796 magic = le32_to_cpu(lmm->lmm_magic);
2797 if (magic == LOV_MAGIC_V1) {
2798 objs = &(lmm->lmm_objects[0]);
2800 LASSERT(magic == LOV_MAGIC_V3);
2801 objs = &((struct lov_mds_md_v3 *)lmm)->lmm_objects[0];
2804 for (i = 0; i < count; i++, objs++) {
2805 struct lu_fid *fid = &info->lti_fid;
2806 struct ost_id *oi = &info->lti_oi;
2807 struct lfsck_layout_req *llr;
2808 struct lfsck_tgt_desc *tgt = NULL;
2809 struct dt_object *cobj = NULL;
2811 le32_to_cpu(objs->l_ost_idx);
2812 bool wakeup = false;
2814 l_wait_event(mthread->t_ctl_waitq,
2815 bk->lb_async_windows == 0 ||
2816 llmd->llmd_prefetched < bk->lb_async_windows ||
2817 !thread_is_running(mthread) ||
2818 thread_is_stopped(athread),
2821 if (unlikely(!thread_is_running(mthread)) ||
2822 thread_is_stopped(athread))
2825 ostid_le_to_cpu(&objs->l_ost_oi, oi);
2826 ostid_to_fid(fid, oi, index);
2827 tgt = lfsck_tgt_get(ltds, index);
2828 if (unlikely(tgt == NULL)) {
2829 lo->ll_flags |= LF_INCOMPLETE;
2833 cobj = lfsck_object_find_by_dev(env, tgt->ltd_tgt, fid);
2839 rc = dt_declare_attr_get(env, cobj, BYPASS_CAPA);
2843 rc = dt_declare_xattr_get(env, cobj, buf, XATTR_NAME_FID,
2849 llo = lfsck_layout_object_init(env, parent, gen);
2856 llr = lfsck_layout_req_init(llo, cobj, index, i);
2863 spin_lock(&llmd->llmd_lock);
2864 if (llmd->llmd_assistant_status < 0) {
2865 spin_unlock(&llmd->llmd_lock);
2866 lfsck_layout_req_fini(env, llr);
2868 RETURN(llmd->llmd_assistant_status);
2871 list_add_tail(&llr->llr_list, &llmd->llmd_req_list);
2872 if (llmd->llmd_prefetched == 0)
2875 llmd->llmd_prefetched++;
2876 spin_unlock(&llmd->llmd_lock);
2878 wake_up_all(&athread->t_ctl_waitq);
2881 down_write(&com->lc_sem);
2882 com->lc_new_checked++;
2884 lo->ll_objs_failed_phase1++;
2885 up_write(&com->lc_sem);
2887 if (cobj != NULL && !IS_ERR(cobj))
2888 lu_object_put(env, &cobj->do_lu);
2890 if (likely(tgt != NULL))
2893 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
2900 if (llo != NULL && !IS_ERR(llo))
2901 lfsck_layout_object_put(env, llo);
2906 /* For the given object, read its layout EA locally. For each stripe, pre-fetch
2907 * the OST-object's attribute and generate an structure lfsck_layout_req on the
2908 * list ::llmd_req_list.
2910 * For each request on above list, the lfsck_layout_assistant thread compares
2911 * the OST side attribute with local attribute, if inconsistent, then repair it.
2913 * All above processing is async mode with pipeline. */
2914 static int lfsck_layout_master_exec_oit(const struct lu_env *env,
2915 struct lfsck_component *com,
2916 struct dt_object *obj)
2918 struct lfsck_thread_info *info = lfsck_env_info(env);
2919 struct ost_id *oi = &info->lti_oi;
2920 struct lfsck_layout *lo = com->lc_file_ram;
2921 struct lfsck_layout_master_data *llmd = com->lc_data;
2922 struct lfsck_instance *lfsck = com->lc_lfsck;
2923 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
2924 struct thandle *handle = NULL;
2925 struct lu_buf *buf = &info->lti_big_buf;
2926 struct lov_mds_md_v1 *lmm = NULL;
2927 struct dt_device *dev = lfsck->li_bottom;
2928 struct lustre_handle lh = { 0 };
2929 ssize_t buflen = buf->lb_len;
2931 bool locked = false;
2932 bool stripe = false;
2935 if (!S_ISREG(lfsck_object_type(obj)))
2938 if (llmd->llmd_assistant_status < 0)
2939 GOTO(out, rc = -ESRCH);
2941 fid_to_lmm_oi(lfsck_dto2fid(obj), oi);
2942 lmm_oi_cpu_to_le(oi, oi);
2943 dt_read_lock(env, obj, 0);
2947 rc = lfsck_layout_get_lovea(env, obj, buf, &buflen);
2953 rc = lfsck_layout_verify_header(lmm);
2957 if (memcmp(oi, &lmm->lmm_oi, sizeof(*oi)) == 0)
2958 GOTO(out, stripe = true);
2960 /* Inconsistent lmm_oi, should be repaired. */
2961 CDEBUG(D_LFSCK, "Repair bad lmm_oi for "DFID"\n",
2962 PFID(lfsck_dto2fid(obj)));
2964 if (bk->lb_param & LPF_DRYRUN) {
2965 down_write(&com->lc_sem);
2966 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
2967 up_write(&com->lc_sem);
2969 GOTO(out, stripe = true);
2972 if (!lustre_handle_is_used(&lh)) {
2973 dt_read_unlock(env, obj);
2975 buf->lb_len = buflen;
2976 rc = lfsck_layout_lock(env, com, obj, &lh,
2977 MDS_INODELOCK_LAYOUT |
2978 MDS_INODELOCK_XATTR);
2982 handle = dt_trans_create(env, dev);
2984 GOTO(out, rc = PTR_ERR(handle));
2986 rc = dt_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV,
2987 LU_XATTR_REPLACE, handle);
2991 rc = dt_trans_start_local(env, dev, handle);
2995 dt_write_lock(env, obj, 0);
3002 rc = dt_xattr_set(env, obj, buf, XATTR_NAME_LOV,
3003 LU_XATTR_REPLACE, handle, BYPASS_CAPA);
3007 down_write(&com->lc_sem);
3008 lo->ll_objs_repaired[LLIT_OTHERS - 1]++;
3009 up_write(&com->lc_sem);
3011 GOTO(out, stripe = true);
3015 if (lustre_handle_is_used(&lh))
3016 dt_write_unlock(env, obj);
3018 dt_read_unlock(env, obj);
3021 if (handle != NULL && !IS_ERR(handle))
3022 dt_trans_stop(env, dev, handle);
3024 lfsck_layout_unlock(&lh);
3026 rc = lfsck_layout_scan_stripes(env, com, obj, lmm);
3028 down_write(&com->lc_sem);
3029 com->lc_new_checked++;
3031 lo->ll_objs_failed_phase1++;
3032 up_write(&com->lc_sem);
3034 buf->lb_len = buflen;
3039 static int lfsck_layout_slave_exec_oit(const struct lu_env *env,
3040 struct lfsck_component *com,
3041 struct dt_object *obj)
3043 struct lfsck_instance *lfsck = com->lc_lfsck;
3044 struct lfsck_layout *lo = com->lc_file_ram;
3045 const struct lu_fid *fid = lfsck_dto2fid(obj);
3046 struct lfsck_layout_slave_data *llsd = com->lc_data;
3047 struct lfsck_layout_seq *lls;
3053 /* XXX: Update OST-objects bitmap for orphan detection. */
3055 LASSERT(llsd != NULL);
3057 down_write(&com->lc_sem);
3058 if (fid_is_idif(fid))
3060 else if (!fid_is_norm(fid) ||
3061 !fid_is_for_ostobj(env, lfsck->li_next, obj, fid))
3062 GOTO(unlock, rc = 0);
3065 com->lc_new_checked++;
3067 lls = lfsck_layout_seq_lookup(llsd, seq);
3070 if (unlikely(lls == NULL))
3071 GOTO(unlock, rc = -ENOMEM);
3073 INIT_LIST_HEAD(&lls->lls_list);
3075 rc = lfsck_layout_lastid_load(env, com, lls);
3077 lo->ll_objs_failed_phase1++;
3082 lfsck_layout_seq_insert(llsd, lls);
3085 if (unlikely(fid_is_last_id(fid)))
3086 GOTO(unlock, rc = 0);
3089 if (oid > lls->lls_lastid_known)
3090 lls->lls_lastid_known = oid;
3092 if (oid > lls->lls_lastid) {
3093 if (!(lo->ll_flags & LF_CRASHED_LASTID)) {
3094 /* OFD may create new objects during LFSCK scanning. */
3095 rc = lfsck_layout_lastid_reload(env, com, lls);
3096 if (unlikely(rc != 0))
3097 CWARN("%s: failed to reload LAST_ID for "LPX64
3099 lfsck_lfsck2name(com->lc_lfsck),
3101 if (oid <= lls->lls_lastid)
3102 GOTO(unlock, rc = 0);
3104 LASSERT(lfsck->li_out_notify != NULL);
3106 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3107 LE_LASTID_REBUILDING);
3108 lo->ll_flags |= LF_CRASHED_LASTID;
3111 lls->lls_lastid = oid;
3115 GOTO(unlock, rc = 0);
3118 up_write(&com->lc_sem);
3123 static int lfsck_layout_exec_dir(const struct lu_env *env,
3124 struct lfsck_component *com,
3125 struct dt_object *obj,
3126 struct lu_dirent *ent)
3131 static int lfsck_layout_master_post(const struct lu_env *env,
3132 struct lfsck_component *com,
3133 int result, bool init)
3135 struct lfsck_instance *lfsck = com->lc_lfsck;
3136 struct lfsck_layout *lo = com->lc_file_ram;
3137 struct lfsck_layout_master_data *llmd = com->lc_data;
3138 struct ptlrpc_thread *mthread = &lfsck->li_thread;
3139 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3140 struct l_wait_info lwi = { 0 };
3145 llmd->llmd_post_result = result;
3146 llmd->llmd_to_post = 1;
3147 if (llmd->llmd_post_result <= 0)
3148 llmd->llmd_exit = 1;
3150 wake_up_all(&athread->t_ctl_waitq);
3151 l_wait_event(mthread->t_ctl_waitq,
3152 (result > 0 && list_empty(&llmd->llmd_req_list)) ||
3153 thread_is_stopped(athread),
3156 if (llmd->llmd_assistant_status < 0)
3157 result = llmd->llmd_assistant_status;
3159 down_write(&com->lc_sem);
3160 spin_lock(&lfsck->li_lock);
3161 /* When LFSCK failed, there may be some prefetched objects those are
3162 * not been processed yet, we do not know the exactly position, then
3163 * just restart from last check-point next time. */
3164 if (!init && !llmd->llmd_exit)
3165 lo->ll_pos_last_checkpoint =
3166 lfsck->li_pos_current.lp_oit_cookie;
3169 lo->ll_status = LS_SCANNING_PHASE2;
3170 lo->ll_flags |= LF_SCANNED_ONCE;
3171 lo->ll_flags &= ~LF_UPGRADE;
3172 list_del_init(&com->lc_link);
3173 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
3174 } else if (result == 0) {
3175 lo->ll_status = lfsck->li_status;
3176 if (lo->ll_status == 0)
3177 lo->ll_status = LS_STOPPED;
3178 if (lo->ll_status != LS_PAUSED) {
3179 list_del_init(&com->lc_link);
3180 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3183 lo->ll_status = LS_FAILED;
3184 list_del_init(&com->lc_link);
3185 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3187 spin_unlock(&lfsck->li_lock);
3190 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
3191 HALF_SEC - lfsck->li_time_last_checkpoint);
3192 lo->ll_time_last_checkpoint = cfs_time_current_sec();
3193 lo->ll_objs_checked_phase1 += com->lc_new_checked;
3194 com->lc_new_checked = 0;
3197 rc = lfsck_layout_store(env, com);
3198 up_write(&com->lc_sem);
3203 static int lfsck_layout_slave_post(const struct lu_env *env,
3204 struct lfsck_component *com,
3205 int result, bool init)
3207 struct lfsck_instance *lfsck = com->lc_lfsck;
3208 struct lfsck_layout *lo = com->lc_file_ram;
3212 rc = lfsck_layout_lastid_store(env, com);
3216 LASSERT(lfsck->li_out_notify != NULL);
3218 down_write(&com->lc_sem);
3220 spin_lock(&lfsck->li_lock);
3222 lo->ll_pos_last_checkpoint =
3223 lfsck->li_pos_current.lp_oit_cookie;
3225 lo->ll_status = LS_SCANNING_PHASE2;
3226 lo->ll_flags |= LF_SCANNED_ONCE;
3227 if (lo->ll_flags & LF_CRASHED_LASTID) {
3229 lo->ll_flags &= ~LF_CRASHED_LASTID;
3231 lo->ll_flags &= ~LF_UPGRADE;
3232 list_del_init(&com->lc_link);
3233 list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
3234 } else if (result == 0) {
3235 lo->ll_status = lfsck->li_status;
3236 if (lo->ll_status == 0)
3237 lo->ll_status = LS_STOPPED;
3238 if (lo->ll_status != LS_PAUSED) {
3239 list_del_init(&com->lc_link);
3240 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3243 lo->ll_status = LS_FAILED;
3244 list_del_init(&com->lc_link);
3245 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3247 spin_unlock(&lfsck->li_lock);
3250 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3254 lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
3255 HALF_SEC - lfsck->li_time_last_checkpoint);
3256 lo->ll_time_last_checkpoint = cfs_time_current_sec();
3257 lo->ll_objs_checked_phase1 += com->lc_new_checked;
3258 com->lc_new_checked = 0;
3261 rc = lfsck_layout_store(env, com);
3263 up_write(&com->lc_sem);
3265 lfsck_layout_slave_notify_master(env, com, LE_PHASE1_DONE, result);
3270 static int lfsck_layout_dump(const struct lu_env *env,
3271 struct lfsck_component *com, char *buf, int len)
3273 struct lfsck_instance *lfsck = com->lc_lfsck;
3274 struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
3275 struct lfsck_layout *lo = com->lc_file_ram;
3280 down_read(&com->lc_sem);
3281 rc = snprintf(buf, len,
3282 "name: lfsck_layout\n"
3288 lfsck_status2names(lo->ll_status));
3294 rc = lfsck_bits_dump(&buf, &len, lo->ll_flags, lfsck_flags_names,
3299 rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
3304 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_complete,
3305 "time_since_last_completed");
3309 rc = lfsck_time_dump(&buf, &len, lo->ll_time_latest_start,
3310 "time_since_latest_start");
3314 rc = lfsck_time_dump(&buf, &len, lo->ll_time_last_checkpoint,
3315 "time_since_last_checkpoint");
3319 rc = snprintf(buf, len,
3320 "latest_start_position: "LPU64"\n"
3321 "last_checkpoint_position: "LPU64"\n"
3322 "first_failure_position: "LPU64"\n",
3323 lo->ll_pos_latest_start,
3324 lo->ll_pos_last_checkpoint,
3325 lo->ll_pos_first_inconsistent);
3332 rc = snprintf(buf, len,
3333 "success_count: %u\n"
3334 "repaired_dangling: "LPU64"\n"
3335 "repaired_unmatched_pair: "LPU64"\n"
3336 "repaired_multiple_referenced: "LPU64"\n"
3337 "repaired_orphan: "LPU64"\n"
3338 "repaired_inconsistent_owner: "LPU64"\n"
3339 "repaired_others: "LPU64"\n"
3340 "skipped: "LPU64"\n"
3341 "failed_phase1: "LPU64"\n"
3342 "failed_phase2: "LPU64"\n",
3343 lo->ll_success_count,
3344 lo->ll_objs_repaired[LLIT_DANGLING - 1],
3345 lo->ll_objs_repaired[LLIT_UNMATCHED_PAIR - 1],
3346 lo->ll_objs_repaired[LLIT_MULTIPLE_REFERENCED - 1],
3347 lo->ll_objs_repaired[LLIT_ORPHAN - 1],
3348 lo->ll_objs_repaired[LLIT_INCONSISTENT_OWNER - 1],
3349 lo->ll_objs_repaired[LLIT_OTHERS - 1],
3350 lo->ll_objs_skipped,
3351 lo->ll_objs_failed_phase1,
3352 lo->ll_objs_failed_phase2);
3359 if (lo->ll_status == LS_SCANNING_PHASE1) {
3361 const struct dt_it_ops *iops;
3362 cfs_duration_t duration = cfs_time_current() -
3363 lfsck->li_time_last_checkpoint;
3364 __u64 checked = lo->ll_objs_checked_phase1 + com->lc_new_checked;
3365 __u64 speed = checked;
3366 __u64 new_checked = com->lc_new_checked * HZ;
3367 __u32 rtime = lo->ll_run_time_phase1 +
3368 cfs_duration_sec(duration + HALF_SEC);
3371 do_div(new_checked, duration);
3373 do_div(speed, rtime);
3374 rc = snprintf(buf, len,
3375 "checked_phase1: "LPU64"\n"
3376 "checked_phase2: "LPU64"\n"
3377 "run_time_phase1: %u seconds\n"
3378 "run_time_phase2: %u seconds\n"
3379 "average_speed_phase1: "LPU64" items/sec\n"
3380 "average_speed_phase2: N/A\n"
3381 "real-time_speed_phase1: "LPU64" items/sec\n"
3382 "real-time_speed_phase2: N/A\n",
3384 lo->ll_objs_checked_phase2,
3386 lo->ll_run_time_phase2,
3395 LASSERT(lfsck->li_di_oit != NULL);
3397 iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
3399 /* The low layer otable-based iteration position may NOT
3400 * exactly match the layout-based directory traversal
3401 * cookie. Generally, it is not a serious issue. But the
3402 * caller should NOT make assumption on that. */
3403 pos = iops->store(env, lfsck->li_di_oit);
3404 if (!lfsck->li_current_oit_processed)
3406 rc = snprintf(buf, len, "current_position: "LPU64"\n", pos);
3413 /* XXX: LS_SCANNING_PHASE2 will be handled in the future. */
3414 __u64 speed1 = lo->ll_objs_checked_phase1;
3415 __u64 speed2 = lo->ll_objs_checked_phase2;
3417 if (lo->ll_run_time_phase1 != 0)
3418 do_div(speed1, lo->ll_run_time_phase1);
3419 if (lo->ll_run_time_phase2 != 0)
3420 do_div(speed2, lo->ll_run_time_phase2);
3421 rc = snprintf(buf, len,
3422 "checked_phase1: "LPU64"\n"
3423 "checked_phase2: "LPU64"\n"
3424 "run_time_phase1: %u seconds\n"
3425 "run_time_phase2: %u seconds\n"
3426 "average_speed_phase1: "LPU64" items/sec\n"
3427 "average_speed_phase2: "LPU64" objs/sec\n"
3428 "real-time_speed_phase1: N/A\n"
3429 "real-time_speed_phase2: N/A\n"
3430 "current_position: N/A\n",
3431 lo->ll_objs_checked_phase1,
3432 lo->ll_objs_checked_phase2,
3433 lo->ll_run_time_phase1,
3434 lo->ll_run_time_phase2,
3446 up_read(&com->lc_sem);
3451 static int lfsck_layout_master_double_scan(const struct lu_env *env,
3452 struct lfsck_component *com)
3454 struct lfsck_layout_master_data *llmd = com->lc_data;
3455 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
3456 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3457 struct lfsck_layout *lo = com->lc_file_ram;
3458 struct l_wait_info lwi = { 0 };
3460 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
3463 llmd->llmd_to_double_scan = 1;
3464 wake_up_all(&athread->t_ctl_waitq);
3465 l_wait_event(mthread->t_ctl_waitq,
3466 llmd->llmd_in_double_scan ||
3467 thread_is_stopped(athread),
3469 if (llmd->llmd_assistant_status < 0)
3470 return llmd->llmd_assistant_status;
3475 static int lfsck_layout_slave_double_scan(const struct lu_env *env,
3476 struct lfsck_component *com)
3478 struct lfsck_instance *lfsck = com->lc_lfsck;
3479 struct lfsck_layout_slave_data *llsd = com->lc_data;
3480 struct lfsck_layout *lo = com->lc_file_ram;
3481 struct ptlrpc_thread *thread = &lfsck->li_thread;
3485 if (unlikely(lo->ll_status != LS_SCANNING_PHASE2))
3488 atomic_inc(&lfsck->li_double_scan_count);
3490 com->lc_new_checked = 0;
3491 com->lc_new_scanned = 0;
3492 com->lc_time_last_checkpoint = cfs_time_current();
3493 com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
3494 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
3497 struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(30),
3500 rc = lfsck_layout_slave_query_master(env, com);
3501 if (list_empty(&llsd->llsd_master_list)) {
3502 if (unlikely(!thread_is_running(thread)))
3513 rc = l_wait_event(thread->t_ctl_waitq,
3514 !thread_is_running(thread) ||
3515 list_empty(&llsd->llsd_master_list),
3517 if (unlikely(!thread_is_running(thread)))
3520 if (rc == -ETIMEDOUT)
3523 GOTO(done, rc = (rc < 0 ? rc : 1));
3527 rc = lfsck_layout_double_scan_result(env, com, rc);
3529 if (atomic_dec_and_test(&lfsck->li_double_scan_count))
3530 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3535 static void lfsck_layout_master_data_release(const struct lu_env *env,
3536 struct lfsck_component *com)
3538 struct lfsck_layout_master_data *llmd = com->lc_data;
3539 struct lfsck_instance *lfsck = com->lc_lfsck;
3540 struct lfsck_tgt_descs *ltds;
3541 struct lfsck_tgt_desc *ltd;
3542 struct lfsck_tgt_desc *next;
3544 LASSERT(llmd != NULL);
3545 LASSERT(thread_is_init(&llmd->llmd_thread) ||
3546 thread_is_stopped(&llmd->llmd_thread));
3547 LASSERT(list_empty(&llmd->llmd_req_list));
3549 com->lc_data = NULL;
3551 ltds = &lfsck->li_ost_descs;
3552 spin_lock(<ds->ltd_lock);
3553 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase1_list,
3554 ltd_layout_phase_list) {
3555 list_del_init(<d->ltd_layout_phase_list);
3557 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_phase2_list,
3558 ltd_layout_phase_list) {
3559 list_del_init(<d->ltd_layout_phase_list);
3561 list_for_each_entry_safe(ltd, next, &llmd->llmd_ost_list,
3563 list_del_init(<d->ltd_layout_list);
3565 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase1_list,
3566 ltd_layout_phase_list) {
3567 list_del_init(<d->ltd_layout_phase_list);
3569 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_phase2_list,
3570 ltd_layout_phase_list) {
3571 list_del_init(<d->ltd_layout_phase_list);
3573 list_for_each_entry_safe(ltd, next, &llmd->llmd_mdt_list,
3575 list_del_init(<d->ltd_layout_list);
3577 spin_unlock(<ds->ltd_lock);
3582 static void lfsck_layout_slave_data_release(const struct lu_env *env,
3583 struct lfsck_component *com)
3585 struct lfsck_layout_slave_data *llsd = com->lc_data;
3586 struct lfsck_layout_seq *lls;
3587 struct lfsck_layout_seq *next;
3588 struct lfsck_layout_slave_target *llst;
3589 struct lfsck_layout_slave_target *tmp;
3591 LASSERT(llsd != NULL);
3593 com->lc_data = NULL;
3595 list_for_each_entry_safe(lls, next, &llsd->llsd_seq_list,
3597 list_del_init(&lls->lls_list);
3598 lfsck_object_put(env, lls->lls_lastid_obj);
3602 list_for_each_entry_safe(llst, tmp, &llsd->llsd_master_list,
3604 list_del_init(&llst->llst_list);
3611 static void lfsck_layout_master_quit(const struct lu_env *env,
3612 struct lfsck_component *com)
3614 struct lfsck_layout_master_data *llmd = com->lc_data;
3615 struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread;
3616 struct ptlrpc_thread *athread = &llmd->llmd_thread;
3617 struct l_wait_info lwi = { 0 };
3619 llmd->llmd_exit = 1;
3620 wake_up_all(&athread->t_ctl_waitq);
3621 l_wait_event(mthread->t_ctl_waitq,
3622 thread_is_init(athread) ||
3623 thread_is_stopped(athread),
3627 static int lfsck_layout_master_in_notify(const struct lu_env *env,
3628 struct lfsck_component *com,
3629 struct lfsck_request *lr)
3631 struct lfsck_instance *lfsck = com->lc_lfsck;
3632 struct lfsck_layout *lo = com->lc_file_ram;
3633 struct lfsck_layout_master_data *llmd = com->lc_data;
3634 struct lfsck_tgt_descs *ltds;
3635 struct lfsck_tgt_desc *ltd;
3638 if (lr->lr_event != LE_PHASE1_DONE &&
3639 lr->lr_event != LE_PHASE2_DONE &&
3640 lr->lr_event != LE_STOP)
3643 if (lr->lr_flags & LEF_FROM_OST)
3644 ltds = &lfsck->li_ost_descs;
3646 ltds = &lfsck->li_mdt_descs;
3647 spin_lock(<ds->ltd_lock);
3648 ltd = LTD_TGT(ltds, lr->lr_index);
3650 spin_unlock(<ds->ltd_lock);
3655 list_del_init(<d->ltd_layout_phase_list);
3656 switch (lr->lr_event) {
3657 case LE_PHASE1_DONE:
3658 if (lr->lr_status <= 0) {
3659 ltd->ltd_layout_done = 1;
3660 list_del_init(<d->ltd_layout_list);
3661 lo->ll_flags |= LF_INCOMPLETE;
3665 if (lr->lr_flags & LEF_FROM_OST) {
3666 if (list_empty(<d->ltd_layout_list))
3667 list_add_tail(<d->ltd_layout_list,
3668 &llmd->llmd_ost_list);
3669 list_add_tail(<d->ltd_layout_phase_list,
3670 &llmd->llmd_ost_phase2_list);
3672 if (list_empty(<d->ltd_layout_list))
3673 list_add_tail(<d->ltd_layout_list,
3674 &llmd->llmd_mdt_list);
3675 list_add_tail(<d->ltd_layout_phase_list,
3676 &llmd->llmd_mdt_phase2_list);
3679 case LE_PHASE2_DONE:
3680 ltd->ltd_layout_done = 1;
3681 list_del_init(<d->ltd_layout_list);
3684 ltd->ltd_layout_done = 1;
3685 list_del_init(<d->ltd_layout_list);
3686 if (!(lr->lr_flags & LEF_FORCE_STOP))
3687 lo->ll_flags |= LF_INCOMPLETE;
3692 spin_unlock(<ds->ltd_lock);
3694 if (lr->lr_flags & LEF_FORCE_STOP) {
3695 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3697 memset(stop, 0, sizeof(*stop));
3698 stop->ls_status = lr->lr_status;
3699 stop->ls_flags = lr->lr_param;
3700 lfsck_stop(env, lfsck->li_bottom, stop);
3701 } else if (lfsck_layout_master_to_orphan(llmd)) {
3702 wake_up_all(&llmd->llmd_thread.t_ctl_waitq);
3708 static int lfsck_layout_slave_in_notify(const struct lu_env *env,
3709 struct lfsck_component *com,
3710 struct lfsck_request *lr)
3712 struct lfsck_instance *lfsck = com->lc_lfsck;
3713 struct lfsck_layout_slave_data *llsd = com->lc_data;
3714 struct lfsck_layout_slave_target *llst;
3717 if (lr->lr_event != LE_PHASE2_DONE &&
3718 lr->lr_event != LE_STOP)
3721 llst = lfsck_layout_llst_find_and_del(llsd, lr->lr_index);
3725 lfsck_layout_llst_put(llst);
3726 if (list_empty(&llsd->llsd_master_list)) {
3727 switch (lr->lr_event) {
3728 case LE_PHASE2_DONE:
3729 wake_up_all(&lfsck->li_thread.t_ctl_waitq);
3732 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
3734 memset(stop, 0, sizeof(*stop));
3735 stop->ls_status = lr->lr_status;
3736 stop->ls_flags = lr->lr_param;
3737 lfsck_stop(env, lfsck->li_bottom, stop);
3748 static int lfsck_layout_query(const struct lu_env *env,
3749 struct lfsck_component *com)
3751 struct lfsck_layout *lo = com->lc_file_ram;
3753 return lo->ll_status;
3756 static int lfsck_layout_master_stop_notify(const struct lu_env *env,
3757 struct lfsck_component *com,
3758 struct lfsck_tgt_descs *ltds,
3759 struct lfsck_tgt_desc *ltd,
3760 struct ptlrpc_request_set *set)
3762 struct lfsck_thread_info *info = lfsck_env_info(env);
3763 struct lfsck_async_interpret_args *laia = &info->lti_laia;
3764 struct lfsck_request *lr = &info->lti_lr;
3765 struct lfsck_instance *lfsck = com->lc_lfsck;
3768 LASSERT(list_empty(<d->ltd_layout_list));
3769 LASSERT(list_empty(<d->ltd_layout_phase_list));
3771 memset(lr, 0, sizeof(*lr));
3772 lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
3773 lr->lr_event = LE_STOP;
3774 lr->lr_active = LT_LAYOUT;
3775 if (ltds == &lfsck->li_ost_descs) {
3776 lr->lr_flags = LEF_TO_OST;
3778 if (ltd->ltd_index == lfsck_dev_idx(lfsck->li_bottom))
3783 lr->lr_status = LS_CO_STOPPED;
3785 laia->laia_com = com;
3786 laia->laia_ltds = ltds;
3787 laia->laia_ltd = ltd;
3790 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
3791 lfsck_layout_master_async_interpret,
3792 laia, LFSCK_NOTIFY);
3794 CERROR("%s: Fail to notify %s %x for co-stop: rc = %d\n",
3795 lfsck_lfsck2name(lfsck),
3796 (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
3797 ltd->ltd_index, rc);
3802 /* with lfsck::li_lock held */
3803 static int lfsck_layout_slave_join(const struct lu_env *env,
3804 struct lfsck_component *com,
3805 struct lfsck_start_param *lsp)
3807 struct lfsck_instance *lfsck = com->lc_lfsck;
3808 struct lfsck_layout_slave_data *llsd = com->lc_data;
3809 struct lfsck_layout_slave_target *llst;
3810 struct lfsck_start *start = lsp->lsp_start;
3814 if (!lsp->lsp_index_valid || start == NULL ||
3815 !(start->ls_flags & LPF_ALL_MDT))
3818 spin_unlock(&lfsck->li_lock);
3819 rc = lfsck_layout_llst_add(llsd, lsp->lsp_index);
3820 spin_lock(&lfsck->li_lock);
3821 if (rc == 0 && !thread_is_running(&lfsck->li_thread)) {
3822 spin_unlock(&lfsck->li_lock);
3823 llst = lfsck_layout_llst_find_and_del(llsd, lsp->lsp_index);
3825 lfsck_layout_llst_put(llst);
3826 spin_lock(&lfsck->li_lock);
3833 static struct lfsck_operations lfsck_layout_master_ops = {
3834 .lfsck_reset = lfsck_layout_reset,
3835 .lfsck_fail = lfsck_layout_fail,
3836 .lfsck_checkpoint = lfsck_layout_master_checkpoint,
3837 .lfsck_prep = lfsck_layout_master_prep,
3838 .lfsck_exec_oit = lfsck_layout_master_exec_oit,
3839 .lfsck_exec_dir = lfsck_layout_exec_dir,
3840 .lfsck_post = lfsck_layout_master_post,
3841 .lfsck_dump = lfsck_layout_dump,
3842 .lfsck_double_scan = lfsck_layout_master_double_scan,
3843 .lfsck_data_release = lfsck_layout_master_data_release,
3844 .lfsck_quit = lfsck_layout_master_quit,
3845 .lfsck_in_notify = lfsck_layout_master_in_notify,
3846 .lfsck_query = lfsck_layout_query,
3847 .lfsck_stop_notify = lfsck_layout_master_stop_notify,
3850 static struct lfsck_operations lfsck_layout_slave_ops = {
3851 .lfsck_reset = lfsck_layout_reset,
3852 .lfsck_fail = lfsck_layout_fail,
3853 .lfsck_checkpoint = lfsck_layout_slave_checkpoint,
3854 .lfsck_prep = lfsck_layout_slave_prep,
3855 .lfsck_exec_oit = lfsck_layout_slave_exec_oit,
3856 .lfsck_exec_dir = lfsck_layout_exec_dir,
3857 .lfsck_post = lfsck_layout_slave_post,
3858 .lfsck_dump = lfsck_layout_dump,
3859 .lfsck_double_scan = lfsck_layout_slave_double_scan,
3860 .lfsck_data_release = lfsck_layout_slave_data_release,
3861 .lfsck_in_notify = lfsck_layout_slave_in_notify,
3862 .lfsck_query = lfsck_layout_query,
3863 .lfsck_join = lfsck_layout_slave_join,
3866 int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
3868 struct lfsck_component *com;
3869 struct lfsck_layout *lo;
3870 struct dt_object *root = NULL;
3871 struct dt_object *obj;
3879 INIT_LIST_HEAD(&com->lc_link);
3880 INIT_LIST_HEAD(&com->lc_link_dir);
3881 init_rwsem(&com->lc_sem);
3882 atomic_set(&com->lc_ref, 1);
3883 com->lc_lfsck = lfsck;
3884 com->lc_type = LT_LAYOUT;
3885 if (lfsck->li_master) {
3886 struct lfsck_layout_master_data *llmd;
3888 com->lc_ops = &lfsck_layout_master_ops;
3889 OBD_ALLOC_PTR(llmd);
3891 GOTO(out, rc = -ENOMEM);
3893 INIT_LIST_HEAD(&llmd->llmd_req_list);
3894 spin_lock_init(&llmd->llmd_lock);
3895 INIT_LIST_HEAD(&llmd->llmd_ost_list);
3896 INIT_LIST_HEAD(&llmd->llmd_ost_phase1_list);
3897 INIT_LIST_HEAD(&llmd->llmd_ost_phase2_list);
3898 INIT_LIST_HEAD(&llmd->llmd_mdt_list);
3899 INIT_LIST_HEAD(&llmd->llmd_mdt_phase1_list);
3900 INIT_LIST_HEAD(&llmd->llmd_mdt_phase2_list);
3901 init_waitqueue_head(&llmd->llmd_thread.t_ctl_waitq);
3902 com->lc_data = llmd;
3904 struct lfsck_layout_slave_data *llsd;
3906 com->lc_ops = &lfsck_layout_slave_ops;
3907 OBD_ALLOC_PTR(llsd);
3909 GOTO(out, rc = -ENOMEM);
3911 INIT_LIST_HEAD(&llsd->llsd_seq_list);
3912 INIT_LIST_HEAD(&llsd->llsd_master_list);
3913 spin_lock_init(&llsd->llsd_lock);
3914 com->lc_data = llsd;
3916 com->lc_file_size = sizeof(*lo);
3917 OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
3918 if (com->lc_file_ram == NULL)
3919 GOTO(out, rc = -ENOMEM);
3921 OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
3922 if (com->lc_file_disk == NULL)
3923 GOTO(out, rc = -ENOMEM);
3925 root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
3927 GOTO(out, rc = PTR_ERR(root));
3929 if (unlikely(!dt_try_as_dir(env, root)))
3930 GOTO(out, rc = -ENOTDIR);
3932 obj = local_file_find_or_create(env, lfsck->li_los, root,
3934 S_IFREG | S_IRUGO | S_IWUSR);
3936 GOTO(out, rc = PTR_ERR(obj));
3939 rc = lfsck_layout_load(env, com);
3941 rc = lfsck_layout_reset(env, com, true);
3942 else if (rc == -ENOENT)
3943 rc = lfsck_layout_init(env, com);
3948 lo = com->lc_file_ram;
3949 switch (lo->ll_status) {
3955 spin_lock(&lfsck->li_lock);
3956 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
3957 spin_unlock(&lfsck->li_lock);
3960 CERROR("%s: unknown lfsck_layout status: rc = %u\n",
3961 lfsck_lfsck2name(lfsck), lo->ll_status);
3963 case LS_SCANNING_PHASE1:
3964 case LS_SCANNING_PHASE2:
3965 /* No need to store the status to disk right now.
3966 * If the system crashed before the status stored,
3967 * it will be loaded back when next time. */
3968 lo->ll_status = LS_CRASHED;
3969 lo->ll_flags |= LF_INCOMPLETE;
3976 spin_lock(&lfsck->li_lock);
3977 list_add_tail(&com->lc_link, &lfsck->li_list_scan);
3978 spin_unlock(&lfsck->li_lock);
3982 if (lo->ll_flags & LF_CRASHED_LASTID) {
3983 LASSERT(lfsck->li_out_notify != NULL);
3985 lfsck->li_out_notify(env, lfsck->li_out_notify_data,
3986 LE_LASTID_REBUILDING);
3992 if (root != NULL && !IS_ERR(root))
3993 lu_object_put(env, &root->do_lu);
3996 lfsck_component_cleanup(env, com);