4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012, Intel Corporation.
26 * lustre/mdd/mdd_lfsck.c
28 * Top-level entry points into mdd module
30 * LFSCK controller, which scans the whole device through low layer
31 * iteration APIs, drives all lfsck compeonents, controls the speed.
33 * Author: Fan Yong <yong.fan@whamcloud.com>
37 # define EXPORT_SYMTAB
39 #define DEBUG_SUBSYSTEM S_MDS
41 #include <lustre/lustre_idl.h>
42 #include <lustre_fid.h>
43 #include <obd_support.h>
45 #include "mdd_internal.h"
46 #include "mdd_lfsck.h"
48 #define HALF_SEC (CFS_HZ >> 1)
49 #define LFSCK_CHECKPOINT_INTERVAL 60
50 #define MDS_DIR_DUMMY_START 0xffffffffffffffffULL
52 #define LFSCK_NAMEENTRY_DEAD 1 /* The object has been unlinked. */
53 #define LFSCK_NAMEENTRY_REMOVED 2 /* The entry has been removed. */
54 #define LFSCK_NAMEENTRY_RECREATED 3 /* The entry has been recreated. */
56 const char lfsck_bookmark_name[] = "lfsck_bookmark";
57 const char lfsck_namespace_name[] = "lfsck_namespace";
59 static const char *lfsck_status_names[] = {
71 static const char *lfsck_flags_names[] = {
78 static const char *lfsck_param_names[] = {
86 static inline struct mdd_device *mdd_lfsck2mdd(struct md_lfsck *lfsck)
88 return container_of0(lfsck, struct mdd_device, mdd_lfsck);
91 static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
93 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
95 return mdd2obd_dev(mdd)->obd_name;
98 static inline void mdd_lfsck_component_get(struct lfsck_component *com)
100 atomic_inc(&com->lc_ref);
103 static inline void mdd_lfsck_component_put(const struct lu_env *env,
104 struct lfsck_component *com)
106 if (atomic_dec_and_test(&com->lc_ref)) {
107 if (com->lc_obj != NULL)
108 lu_object_put(env, &com->lc_obj->do_lu);
109 if (com->lc_file_ram != NULL)
110 OBD_FREE(com->lc_file_ram, com->lc_file_size);
111 if (com->lc_file_disk != NULL)
112 OBD_FREE(com->lc_file_disk, com->lc_file_size);
117 static inline struct lfsck_component *
118 __mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type, cfs_list_t *list)
120 struct lfsck_component *com;
122 cfs_list_for_each_entry(com, list, lc_link) {
123 if (com->lc_type == type)
129 static struct lfsck_component *
130 mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type)
132 struct lfsck_component *com;
134 spin_lock(&lfsck->ml_lock);
135 com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_scan);
139 com = __mdd_lfsck_component_find(lfsck, type,
140 &lfsck->ml_list_double_scan);
144 com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_idle);
148 mdd_lfsck_component_get(com);
149 spin_unlock(&lfsck->ml_lock);
153 static void mdd_lfsck_component_cleanup(const struct lu_env *env,
154 struct lfsck_component *com)
156 if (!cfs_list_empty(&com->lc_link))
157 cfs_list_del_init(&com->lc_link);
158 if (!cfs_list_empty(&com->lc_link_dir))
159 cfs_list_del_init(&com->lc_link_dir);
161 mdd_lfsck_component_put(env, com);
164 static int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
172 rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
178 for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
181 rc = snprintf(*buf, *len, "%s%c", names[i],
182 bits != 0 ? ',' : '\n');
193 static int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
198 rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
199 cfs_time_current_sec() - time);
201 rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
210 static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
215 if (fid_is_zero(&pos->lp_dir_parent)) {
216 if (pos->lp_oit_cookie == 0)
217 rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
220 rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
221 prefix, pos->lp_oit_cookie);
223 rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
224 prefix, pos->lp_oit_cookie,
225 PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
235 static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
236 struct lfsck_position *pos, bool oit_processed,
239 const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it;
241 spin_lock(&lfsck->ml_lock);
242 if (unlikely(lfsck->ml_di_oit == NULL)) {
243 spin_unlock(&lfsck->ml_lock);
244 memset(pos, 0, sizeof(*pos));
248 pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit);
250 LASSERT(pos->lp_oit_cookie > 0);
253 pos->lp_oit_cookie--;
255 if (lfsck->ml_di_dir != NULL) {
256 struct dt_object *dto = lfsck->ml_obj_dir;
258 pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
259 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
262 LASSERT(pos->lp_dir_cookie != MDS_DIR_DUMMY_START);
264 if (pos->lp_dir_cookie == MDS_DIR_END_OFF)
265 LASSERT(dir_processed);
267 /* For the dir which just to be processed,
268 * lp_dir_cookie will become MDS_DIR_DUMMY_START,
269 * which can be correctly handled by mdd_lfsck_prep. */
271 pos->lp_dir_cookie--;
273 fid_zero(&pos->lp_dir_parent);
274 pos->lp_dir_cookie = 0;
276 spin_unlock(&lfsck->ml_lock);
279 static inline void mdd_lfsck_pos_set_zero(struct lfsck_position *pos)
281 memset(pos, 0, sizeof(*pos));
284 static inline int mdd_lfsck_pos_is_zero(const struct lfsck_position *pos)
286 return pos->lp_oit_cookie == 0 && fid_is_zero(&pos->lp_dir_parent);
289 static inline int mdd_lfsck_pos_is_eq(const struct lfsck_position *pos1,
290 const struct lfsck_position *pos2)
292 if (pos1->lp_oit_cookie < pos2->lp_oit_cookie)
295 if (pos1->lp_oit_cookie > pos2->lp_oit_cookie)
298 if (fid_is_zero(&pos1->lp_dir_parent) &&
299 !fid_is_zero(&pos2->lp_dir_parent))
302 if (!fid_is_zero(&pos1->lp_dir_parent) &&
303 fid_is_zero(&pos2->lp_dir_parent))
306 if (fid_is_zero(&pos1->lp_dir_parent) &&
307 fid_is_zero(&pos2->lp_dir_parent))
310 LASSERT(lu_fid_eq(&pos1->lp_dir_parent, &pos2->lp_dir_parent));
312 if (pos1->lp_dir_cookie < pos2->lp_dir_cookie)
315 if (pos1->lp_dir_cookie > pos2->lp_dir_cookie)
321 static void mdd_lfsck_close_dir(const struct lu_env *env,
322 struct md_lfsck *lfsck)
324 struct dt_object *dir_obj = lfsck->ml_obj_dir;
325 const struct dt_it_ops *dir_iops = &dir_obj->do_index_ops->dio_it;
326 struct dt_it *dir_di = lfsck->ml_di_dir;
328 spin_lock(&lfsck->ml_lock);
329 lfsck->ml_di_dir = NULL;
330 spin_unlock(&lfsck->ml_lock);
332 dir_iops->put(env, dir_di);
333 dir_iops->fini(env, dir_di);
334 lfsck->ml_obj_dir = NULL;
335 lu_object_put(env, &dir_obj->do_lu);
338 static void __mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
340 lfsck->ml_bookmark_ram.lb_speed_limit = limit;
341 if (limit != LFSCK_SPEED_NO_LIMIT) {
342 if (limit > CFS_HZ) {
343 lfsck->ml_sleep_rate = limit / CFS_HZ;
344 lfsck->ml_sleep_jif = 1;
346 lfsck->ml_sleep_rate = 1;
347 lfsck->ml_sleep_jif = CFS_HZ / limit;
350 lfsck->ml_sleep_jif = 0;
351 lfsck->ml_sleep_rate = 0;
355 static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
357 struct ptlrpc_thread *thread = &lfsck->ml_thread;
358 struct l_wait_info lwi;
360 if (lfsck->ml_sleep_jif > 0 &&
361 lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
362 spin_lock(&lfsck->ml_lock);
363 if (likely(lfsck->ml_sleep_jif > 0 &&
364 lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
365 lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
366 LWI_ON_SIGNAL_NOOP, NULL);
367 spin_unlock(&lfsck->ml_lock);
369 l_wait_event(thread->t_ctl_waitq,
370 !thread_is_running(thread),
372 lfsck->ml_new_scanned = 0;
374 spin_unlock(&lfsck->ml_lock);
379 /* lfsck_bookmark file ops */
381 static void inline mdd_lfsck_bookmark_to_cpu(struct lfsck_bookmark *des,
382 struct lfsck_bookmark *src)
384 des->lb_magic = le32_to_cpu(src->lb_magic);
385 des->lb_version = le16_to_cpu(src->lb_version);
386 des->lb_param = le16_to_cpu(src->lb_param);
387 des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
390 static void inline mdd_lfsck_bookmark_to_le(struct lfsck_bookmark *des,
391 struct lfsck_bookmark *src)
393 des->lb_magic = cpu_to_le32(src->lb_magic);
394 des->lb_version = cpu_to_le16(src->lb_version);
395 des->lb_param = cpu_to_le16(src->lb_param);
396 des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
399 static int mdd_lfsck_bookmark_load(const struct lu_env *env,
400 struct md_lfsck *lfsck)
403 int len = sizeof(struct lfsck_bookmark);
406 rc = dt_record_read(env, lfsck->ml_bookmark_obj,
407 mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
410 struct lfsck_bookmark *bm = &lfsck->ml_bookmark_ram;
412 mdd_lfsck_bookmark_to_cpu(bm, &lfsck->ml_bookmark_disk);
413 if (bm->lb_magic != LFSCK_BOOKMARK_MAGIC) {
414 CWARN("%.16s: invalid lfsck_bookmark magic "
415 "0x%x != 0x%x\n", mdd_lfsck2name(lfsck),
416 bm->lb_magic, LFSCK_BOOKMARK_MAGIC);
417 /* Process it as new lfsck_bookmark. */
421 if (rc == -EFAULT && pos == 0)
422 /* return -ENODATA for empty lfsck_bookmark. */
425 CERROR("%.16s: fail to load lfsck_bookmark, "
426 "expected = %d, rc = %d\n",
427 mdd_lfsck2name(lfsck), len, rc);
432 static int mdd_lfsck_bookmark_store(const struct lu_env *env,
433 struct md_lfsck *lfsck)
435 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
436 struct thandle *handle;
437 struct dt_object *obj = lfsck->ml_bookmark_obj;
439 int len = sizeof(struct lfsck_bookmark);
443 mdd_lfsck_bookmark_to_le(&lfsck->ml_bookmark_disk,
444 &lfsck->ml_bookmark_ram);
445 handle = dt_trans_create(env, mdd->mdd_bottom);
446 if (IS_ERR(handle)) {
447 rc = PTR_ERR(handle);
448 CERROR("%.16s: fail to create trans for storing "
449 "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
453 rc = dt_declare_record_write(env, obj, len, 0, handle);
455 CERROR("%.16s: fail to declare trans for storing "
456 "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
460 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
462 CERROR("%.16s: fail to start trans for storing "
463 "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
467 rc = dt_record_write(env, obj,
468 mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
471 CERROR("%.16s: fail to store lfsck_bookmark, expected = %d, "
472 "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
477 dt_trans_stop(env, mdd->mdd_bottom, handle);
481 static int mdd_lfsck_bookmark_init(const struct lu_env *env,
482 struct md_lfsck *lfsck)
484 struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram;
487 memset(mb, 0, sizeof(mb));
488 mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
489 mb->lb_version = LFSCK_VERSION_V2;
490 mutex_lock(&lfsck->ml_mutex);
491 rc = mdd_lfsck_bookmark_store(env, lfsck);
492 mutex_unlock(&lfsck->ml_mutex);
496 /* lfsck_namespace file ops */
498 static void inline mdd_lfsck_position_to_cpu(struct lfsck_position *des,
499 struct lfsck_position *src)
501 des->lp_oit_cookie = le64_to_cpu(src->lp_oit_cookie);
502 fid_le_to_cpu(&des->lp_dir_parent, &src->lp_dir_parent);
503 des->lp_dir_cookie = le64_to_cpu(src->lp_dir_cookie);
506 static void inline mdd_lfsck_position_to_le(struct lfsck_position *des,
507 struct lfsck_position *src)
509 des->lp_oit_cookie = cpu_to_le64(src->lp_oit_cookie);
510 fid_cpu_to_le(&des->lp_dir_parent, &src->lp_dir_parent);
511 des->lp_dir_cookie = cpu_to_le64(src->lp_dir_cookie);
514 static void inline mdd_lfsck_namespace_to_cpu(struct lfsck_namespace *des,
515 struct lfsck_namespace *src)
517 des->ln_magic = le32_to_cpu(src->ln_magic);
518 des->ln_status = le32_to_cpu(src->ln_status);
519 des->ln_flags = le32_to_cpu(src->ln_flags);
520 des->ln_success_count = le32_to_cpu(src->ln_success_count);
521 des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
522 des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
523 des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
524 des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
525 des->ln_time_last_checkpoint =
526 le64_to_cpu(src->ln_time_last_checkpoint);
527 mdd_lfsck_position_to_cpu(&des->ln_pos_latest_start,
528 &src->ln_pos_latest_start);
529 mdd_lfsck_position_to_cpu(&des->ln_pos_last_checkpoint,
530 &src->ln_pos_last_checkpoint);
531 mdd_lfsck_position_to_cpu(&des->ln_pos_first_inconsistent,
532 &src->ln_pos_first_inconsistent);
533 des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
534 des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
535 des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
536 des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
537 des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
538 des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
539 des->ln_objs_repaired_phase2 =
540 le64_to_cpu(src->ln_objs_repaired_phase2);
541 des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
542 des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
543 des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
544 fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
545 &src->ln_fid_latest_scanned_phase2);
548 static void inline mdd_lfsck_namespace_to_le(struct lfsck_namespace *des,
549 struct lfsck_namespace *src)
551 des->ln_magic = cpu_to_le32(src->ln_magic);
552 des->ln_status = cpu_to_le32(src->ln_status);
553 des->ln_flags = cpu_to_le32(src->ln_flags);
554 des->ln_success_count = cpu_to_le32(src->ln_success_count);
555 des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
556 des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
557 des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
558 des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
559 des->ln_time_last_checkpoint =
560 cpu_to_le64(src->ln_time_last_checkpoint);
561 mdd_lfsck_position_to_le(&des->ln_pos_latest_start,
562 &src->ln_pos_latest_start);
563 mdd_lfsck_position_to_le(&des->ln_pos_last_checkpoint,
564 &src->ln_pos_last_checkpoint);
565 mdd_lfsck_position_to_le(&des->ln_pos_first_inconsistent,
566 &src->ln_pos_first_inconsistent);
567 des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
568 des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
569 des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
570 des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
571 des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
572 des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
573 des->ln_objs_repaired_phase2 =
574 cpu_to_le64(src->ln_objs_repaired_phase2);
575 des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
576 des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
577 des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
578 fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
579 &src->ln_fid_latest_scanned_phase2);
583 * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
584 * \retval 0: succeed.
585 * \retval -ve: failed cases.
587 static int mdd_lfsck_namespace_load(const struct lu_env *env,
588 struct lfsck_component *com)
590 int len = com->lc_file_size;
593 rc = dt_xattr_get(env, com->lc_obj,
594 mdd_buf_get(env, com->lc_file_disk, len),
595 XATTR_NAME_LFSCK_NAMESPACE, BYPASS_CAPA);
597 struct lfsck_namespace *ns = com->lc_file_ram;
599 mdd_lfsck_namespace_to_cpu(ns,
600 (struct lfsck_namespace *)com->lc_file_disk);
601 if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
602 CWARN("%.16s: invalid lfsck_namespace magic "
604 mdd_lfsck2name(com->lc_lfsck),
605 ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
610 } else if (rc != -ENODATA) {
611 CERROR("%.16s: fail to load lfsck_namespace, expected = %d, "
612 "rc = %d\n", mdd_lfsck2name(com->lc_lfsck), len, rc);
619 static int mdd_lfsck_namespace_store(const struct lu_env *env,
620 struct lfsck_component *com, bool init)
622 struct dt_object *obj = com->lc_obj;
623 struct md_lfsck *lfsck = com->lc_lfsck;
624 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
625 struct thandle *handle;
626 int len = com->lc_file_size;
630 mdd_lfsck_namespace_to_le((struct lfsck_namespace *)com->lc_file_disk,
631 (struct lfsck_namespace *)com->lc_file_ram);
632 handle = dt_trans_create(env, mdd->mdd_bottom);
633 if (IS_ERR(handle)) {
634 rc = PTR_ERR(handle);
635 CERROR("%.16s: fail to create trans for storing "
636 "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
640 rc = dt_declare_xattr_set(env, obj,
641 mdd_buf_get(env, com->lc_file_disk, len),
642 XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
644 CERROR("%.16s: fail to declare trans for storing "
645 "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
649 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
651 CERROR("%.16s: fail to start trans for storing "
652 "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
656 rc = dt_xattr_set(env, obj,
657 mdd_buf_get(env, com->lc_file_disk, len),
658 XATTR_NAME_LFSCK_NAMESPACE,
659 init ? LU_XATTR_CREATE : LU_XATTR_REPLACE,
660 handle, BYPASS_CAPA);
662 CERROR("%.16s: fail to store lfsck_namespace, len = %d, "
663 "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
668 dt_trans_stop(env, mdd->mdd_bottom, handle);
672 static int mdd_lfsck_namespace_init(const struct lu_env *env,
673 struct lfsck_component *com)
675 struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
678 memset(ns, 0, sizeof(*ns));
679 ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
680 ns->ln_status = LS_INIT;
681 down_write(&com->lc_sem);
682 rc = mdd_lfsck_namespace_store(env, com, true);
683 up_write(&com->lc_sem);
687 static int mdd_declare_lfsck_namespace_unlink(const struct lu_env *env,
688 struct mdd_device *mdd,
692 struct thandle *handle)
696 rc = dt_declare_delete(env, p, (const struct dt_key *)name, handle);
700 rc = dt_declare_ref_del(env, c, handle);
704 rc = dt_declare_destroy(env, c, handle);
708 static int mdd_lfsck_namespace_unlink(const struct lu_env *env,
709 struct mdd_device *mdd,
710 struct lfsck_component *com)
712 struct mdd_thread_info *info = mdd_env_info(env);
713 struct lu_fid *fid = &info->mti_fid;
714 struct dt_object *child = com->lc_obj;
715 struct dt_object *parent;
716 struct thandle *handle;
721 parent = dt_store_resolve(env, mdd->mdd_bottom, "", fid);
723 RETURN(rc = PTR_ERR(parent));
725 if (!dt_try_as_dir(env, parent))
726 GOTO(out, rc = -ENOTDIR);
728 handle = dt_trans_create(env, mdd->mdd_bottom);
730 GOTO(out, rc = PTR_ERR(handle));
732 rc = mdd_declare_lfsck_namespace_unlink(env, mdd, parent, child,
733 lfsck_namespace_name, handle);
737 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
741 dt_write_lock(env, child, MOR_TGT_CHILD);
743 rc = dt_delete(env, parent, (struct dt_key *)lfsck_namespace_name,
744 handle, BYPASS_CAPA);
748 rc = child->do_ops->do_ref_del(env, child, handle);
750 lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID);
751 rc = dt_insert(env, parent,
752 (const struct dt_rec*)fid,
753 (const struct dt_key *)lfsck_namespace_name,
754 handle, BYPASS_CAPA, 1);
760 rc = dt_destroy(env, child, handle);
766 dt_write_unlock(env, child);
769 lu_object_put(env, &child->do_lu);
773 dt_trans_stop(env, mdd->mdd_bottom, handle);
776 lu_object_put(env, &parent->do_lu);
780 static int mdd_lfsck_namespace_lookup(const struct lu_env *env,
781 struct lfsck_component *com,
782 const struct lu_fid *fid,
785 struct lu_fid *key = &mdd_env_info(env)->mti_fid;
788 fid_cpu_to_be(key, fid);
789 rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
790 (const struct dt_key *)key, BYPASS_CAPA);
794 static int mdd_lfsck_namespace_delete(const struct lu_env *env,
795 struct lfsck_component *com,
796 const struct lu_fid *fid)
798 struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
799 struct lu_fid *key = &mdd_env_info(env)->mti_fid;
800 struct thandle *handle;
801 struct dt_object *obj = com->lc_obj;
805 handle = dt_trans_create(env, mdd->mdd_bottom);
807 RETURN(PTR_ERR(handle));
809 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
813 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
817 fid_cpu_to_be(key, fid);
818 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
824 dt_trans_stop(env, mdd->mdd_bottom, handle);
828 static int mdd_lfsck_namespace_update(const struct lu_env *env,
829 struct lfsck_component *com,
830 const struct lu_fid *fid,
831 __u8 flags, bool force)
833 struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
834 struct lu_fid *key = &mdd_env_info(env)->mti_fid;
835 struct thandle *handle;
836 struct dt_object *obj = com->lc_obj;
842 rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf);
843 if (rc != 0 && rc != -ENOENT)
847 if (!force || flags == tf)
851 handle = dt_trans_create(env, mdd->mdd_bottom);
853 RETURN(PTR_ERR(handle));
855 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
860 handle = dt_trans_create(env, mdd->mdd_bottom);
862 RETURN(PTR_ERR(handle));
865 rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
866 (const struct dt_key *)fid, handle);
870 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
874 fid_cpu_to_be(key, fid);
876 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
879 CERROR("%s: fail to insert "DFID", rc = %d\n",
880 mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc);
885 rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
886 (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
891 dt_trans_stop(env, mdd->mdd_bottom, handle);
896 * \retval +ve repaired
897 * \retval 0 no need to repair
898 * \retval -ve error cases
900 static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env,
901 struct lfsck_component *com,
902 struct mdd_object *child,
905 struct mdd_thread_info *info = mdd_env_info(env);
906 struct lu_attr *la = &info->mti_la;
907 struct lu_name *cname = &info->mti_name;
908 struct lu_fid *pfid = &info->mti_fid;
909 struct lu_fid *cfid = &info->mti_fid2;
910 struct md_lfsck *lfsck = com->lc_lfsck;
911 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
912 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
913 struct lfsck_namespace *ns =
914 (struct lfsck_namespace *)com->lc_file_ram;
915 struct mdd_link_data ldata = { 0 };
916 struct thandle *handle = NULL;
923 if (com->lc_journal) {
929 handle = mdd_trans_create(env, mdd);
931 RETURN(rc = PTR_ERR(handle));
933 rc = mdd_declare_links_add(env, child, handle);;
937 rc = mdd_trans_start(env, mdd, handle);
941 mdd_write_lock(env, child, MOR_TGT_CHILD);
945 if (unlikely(mdd_is_dead_obj(child)))
948 rc = mdd_links_read(env, child, &ldata);
950 if ((bk->lb_param & LPF_DRYRUN) &&
951 (rc == -EINVAL || rc == -ENODATA))
957 rc = mdd_la_get(env, child, la, BYPASS_CAPA);
961 ldata.ml_lee = (struct link_ea_entry *)(ldata.ml_leh + 1);
962 count = ldata.ml_leh->leh_reccount;
963 while (count-- > 0) {
964 struct mdd_object *parent = NULL;
965 struct dt_object *dir;
967 mdd_lee_unpack(ldata.ml_lee, &ldata.ml_reclen, cname, pfid);
968 if (!fid_is_sane(pfid))
971 parent = mdd_object_find(env, mdd, pfid);
974 else if (IS_ERR(parent))
975 GOTO(stop, rc = PTR_ERR(parent));
977 if (!mdd_object_exists(parent))
980 /* XXX: need more processing for remote object in the future. */
981 if (mdd_object_remote(parent)) {
982 mdd_object_put(env, parent);
983 ldata.ml_lee = (struct link_ea_entry *)
984 ((char *)ldata.ml_lee + ldata.ml_reclen);
988 dir = mdd_object_child(parent);
989 if (unlikely(!dt_try_as_dir(env, dir)))
992 /* To guarantee the 'name' is terminated with '0'. */
993 memcpy(info->mti_key, cname->ln_name, cname->ln_namelen);
994 info->mti_key[cname->ln_namelen] = 0;
995 cname->ln_name = info->mti_key;
996 rc = dt_lookup(env, dir, (struct dt_rec *)cfid,
997 (const struct dt_key *)cname->ln_name,
999 if (rc != 0 && rc != -ENOENT) {
1000 mdd_object_put(env, parent);
1005 if (lu_fid_eq(cfid, mdo2fid(child))) {
1006 mdd_object_put(env, parent);
1007 ldata.ml_lee = (struct link_ea_entry *)
1008 ((char *)ldata.ml_lee + ldata.ml_reclen);
1015 if (ldata.ml_leh->leh_reccount > la->la_nlink)
1018 /* XXX: For the case of there is linkea entry, but without name
1019 * entry pointing to the object, and the object link count
1020 * isn't less than the count of name entries, then add the
1021 * name entry back to namespace.
1023 * It is out of LFSCK 1.5 scope, will implement it in the
1024 * future. Keep the linkEA entry. */
1025 mdd_object_put(env, parent);
1026 ldata.ml_lee = (struct link_ea_entry *)
1027 ((char *)ldata.ml_lee + ldata.ml_reclen);
1032 mdd_object_put(env, parent);
1033 if (bk->lb_param & LPF_DRYRUN)
1036 CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
1037 PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name,
1039 mdd_links_del_buf(env, &ldata, cname);
1044 if (!com->lc_journal) {
1045 com->lc_journal = 1;
1049 rc = mdd_links_write(env, child, &ldata, handle);
1056 mdd_write_unlock(env, child);
1059 mdd_trans_stop(env, mdd, rc, handle);
1061 if (rc == 0 && update) {
1062 ns->ln_objs_nlink_repaired++;
1068 /* namespace APIs */
1070 static int mdd_lfsck_namespace_reset(const struct lu_env *env,
1071 struct lfsck_component *com, bool init)
1073 struct mdd_thread_info *info = mdd_env_info(env);
1074 struct lu_fid *fid = &info->mti_fid;
1075 struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
1076 struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
1077 struct md_object *mdo;
1078 struct dt_object *dto;
1082 down_write(&com->lc_sem);
1084 memset(ns, 0, sizeof(*ns));
1086 __u32 count = ns->ln_success_count;
1087 __u64 last_time = ns->ln_time_last_complete;
1089 memset(ns, 0, sizeof(*ns));
1090 ns->ln_success_count = count;
1091 ns->ln_time_last_complete = last_time;
1093 ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
1094 ns->ln_status = LS_INIT;
1096 rc = mdd_lfsck_namespace_unlink(env, mdd, com);
1100 lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID);
1101 mdo = llo_store_create_index(env, &mdd->mdd_md_dev, mdd->mdd_bottom, "",
1102 lfsck_namespace_name, fid,
1103 &dt_lfsck_features);
1105 GOTO(out, rc = PTR_ERR(mdo));
1107 lu_object_put(env, &mdo->mo_lu);
1108 dto = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name, fid);
1110 GOTO(out, rc = PTR_ERR(dto));
1113 rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
1117 rc = mdd_lfsck_namespace_store(env, com, true);
1122 up_write(&com->lc_sem);
1127 mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
1128 bool oit, bool new_checked)
1130 struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
1132 down_write(&com->lc_sem);
1134 com->lc_new_checked++;
1135 ns->ln_items_failed++;
1136 if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1137 mdd_lfsck_pos_fill(env, com->lc_lfsck,
1138 &ns->ln_pos_first_inconsistent, oit, !oit);
1139 up_write(&com->lc_sem);
1142 static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env,
1143 struct lfsck_component *com,
1146 struct md_lfsck *lfsck = com->lc_lfsck;
1147 struct lfsck_namespace *ns =
1148 (struct lfsck_namespace *)com->lc_file_ram;
1151 if (com->lc_new_checked == 0 && !init)
1154 down_write(&com->lc_sem);
1156 ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
1158 ns->ln_time_last_checkpoint = ns->ln_time_latest_start;
1159 ns->ln_pos_latest_start = lfsck->ml_pos_current;
1161 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1162 HALF_SEC - lfsck->ml_time_last_checkpoint);
1163 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1164 ns->ln_items_checked += com->lc_new_checked;
1165 com->lc_new_checked = 0;
1168 rc = mdd_lfsck_namespace_store(env, com, false);
1170 up_write(&com->lc_sem);
1174 static int mdd_lfsck_namespace_prep(const struct lu_env *env,
1175 struct lfsck_component *com)
1177 struct md_lfsck *lfsck = com->lc_lfsck;
1178 struct lfsck_namespace *ns =
1179 (struct lfsck_namespace *)com->lc_file_ram;
1180 struct lfsck_position *pos = &com->lc_pos_start;
1182 if (ns->ln_status == LS_COMPLETED) {
1185 rc = mdd_lfsck_namespace_reset(env, com, false);
1190 down_write(&com->lc_sem);
1192 ns->ln_time_latest_start = cfs_time_current_sec();
1194 spin_lock(&lfsck->ml_lock);
1195 if (ns->ln_flags & LF_SCANNED_ONCE) {
1196 if (!lfsck->ml_drop_dryrun ||
1197 mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1198 ns->ln_status = LS_SCANNING_PHASE2;
1199 cfs_list_del_init(&com->lc_link);
1200 cfs_list_add_tail(&com->lc_link,
1201 &lfsck->ml_list_double_scan);
1202 if (!cfs_list_empty(&com->lc_link_dir))
1203 cfs_list_del_init(&com->lc_link_dir);
1204 mdd_lfsck_pos_set_zero(pos);
1206 ns->ln_status = LS_SCANNING_PHASE1;
1207 ns->ln_run_time_phase1 = 0;
1208 ns->ln_run_time_phase2 = 0;
1209 ns->ln_items_checked = 0;
1210 ns->ln_items_repaired = 0;
1211 ns->ln_items_failed = 0;
1212 ns->ln_dirs_checked = 0;
1213 ns->ln_mlinked_checked = 0;
1214 ns->ln_objs_checked_phase2 = 0;
1215 ns->ln_objs_repaired_phase2 = 0;
1216 ns->ln_objs_failed_phase2 = 0;
1217 ns->ln_objs_nlink_repaired = 0;
1218 ns->ln_objs_lost_found = 0;
1219 fid_zero(&ns->ln_fid_latest_scanned_phase2);
1220 if (cfs_list_empty(&com->lc_link_dir))
1221 cfs_list_add_tail(&com->lc_link_dir,
1222 &lfsck->ml_list_dir);
1223 *pos = ns->ln_pos_first_inconsistent;
1226 ns->ln_status = LS_SCANNING_PHASE1;
1227 if (cfs_list_empty(&com->lc_link_dir))
1228 cfs_list_add_tail(&com->lc_link_dir,
1229 &lfsck->ml_list_dir);
1230 if (!lfsck->ml_drop_dryrun ||
1231 mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1232 *pos = ns->ln_pos_last_checkpoint;
1233 pos->lp_oit_cookie++;
1234 if (!fid_is_zero(&pos->lp_dir_parent)) {
1235 if (pos->lp_dir_cookie == MDS_DIR_END_OFF) {
1236 fid_zero(&pos->lp_dir_parent);
1238 pos->lp_dir_cookie++;
1242 *pos = ns->ln_pos_first_inconsistent;
1245 spin_unlock(&lfsck->ml_lock);
1247 up_write(&com->lc_sem);
1251 static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env,
1252 struct lfsck_component *com,
1253 struct mdd_object *obj)
1255 down_write(&com->lc_sem);
1256 com->lc_new_checked++;
1257 if (S_ISDIR(mdd_object_type(obj)))
1258 ((struct lfsck_namespace *)com->lc_file_ram)->ln_dirs_checked++;
1259 up_write(&com->lc_sem);
1263 static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env,
1264 struct mdd_object *obj,
1265 struct thandle *handle)
1269 /* For destroying all invalid linkEA entries. */
1270 rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
1274 /* For insert new linkEA entry. */
1275 rc = mdd_declare_links_add(env, obj, handle);
1279 static int mdd_lfsck_namespace_check_exist(const struct lu_env *env,
1280 struct md_lfsck *lfsck,
1281 struct mdd_object *obj,
1284 struct dt_object *dir = lfsck->ml_obj_dir;
1285 struct lu_fid *fid = &mdd_env_info(env)->mti_fid;
1289 if (unlikely(mdd_is_dead_obj(obj)))
1290 RETURN(LFSCK_NAMEENTRY_DEAD);
1292 rc = dt_lookup(env, dir, (struct dt_rec *)fid,
1293 (const struct dt_key *)name, BYPASS_CAPA);
1295 RETURN(LFSCK_NAMEENTRY_REMOVED);
1300 if (!lu_fid_eq(fid, mdo2fid(obj)))
1301 RETURN(LFSCK_NAMEENTRY_RECREATED);
1306 static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
1307 struct lfsck_component *com,
1308 struct mdd_object *obj,
1309 struct lu_dirent *ent)
1311 struct mdd_thread_info *info = mdd_env_info(env);
1312 struct lu_attr *la = &info->mti_la;
1313 struct md_lfsck *lfsck = com->lc_lfsck;
1314 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
1315 struct lfsck_namespace *ns =
1316 (struct lfsck_namespace *)com->lc_file_ram;
1317 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
1318 struct mdd_link_data ldata = { 0 };
1319 const struct lu_fid *pfid =
1320 lu_object_fid(&lfsck->ml_obj_dir->do_lu);
1321 const struct lu_fid *cfid = mdo2fid(obj);
1322 const struct lu_name *cname;
1323 struct thandle *handle = NULL;
1324 bool repaired = false;
1325 bool locked = false;
1330 cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen);
1331 down_write(&com->lc_sem);
1332 com->lc_new_checked++;
1334 if (ent->lde_attrs & LUDA_UPGRADE) {
1335 ns->ln_flags |= LF_UPGRADE;
1337 } else if (ent->lde_attrs & LUDA_REPAIR) {
1338 ns->ln_flags |= LF_INCONSISTENT;
1342 if (ent->lde_name[0] == '.' &&
1343 (ent->lde_namelen == 1 ||
1344 (ent->lde_namelen == 2 && ent->lde_name[1] == '.')))
1347 if (!(bk->lb_param & LPF_DRYRUN) &&
1348 (com->lc_journal || repaired)) {
1353 com->lc_journal = 1;
1354 handle = mdd_trans_create(env, mdd);
1356 GOTO(out, rc = PTR_ERR(handle));
1358 rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle);
1362 rc = mdd_trans_start(env, mdd, handle);
1366 mdd_write_lock(env, obj, MOR_TGT_CHILD);
1370 rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
1374 rc = mdd_links_read(env, obj, &ldata);
1376 count = ldata.ml_leh->leh_reccount;
1377 rc = mdd_links_find(env, obj, &ldata, cname, pfid);
1379 /* For dir, if there are more than one linkea entries,
1380 * then remove all the other redundant linkea entries.*/
1381 if (unlikely(count > 1 &&
1382 S_ISDIR(mdd_object_type(obj))))
1389 ns->ln_flags |= LF_INCONSISTENT;
1390 if (bk->lb_param & LPF_DRYRUN) {
1395 /*For dir, remove the unmatched linkea entry directly.*/
1396 if (S_ISDIR(mdd_object_type(obj))) {
1397 if (!com->lc_journal)
1400 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK,
1401 handle, BYPASS_CAPA);
1410 } else if (unlikely(rc == -EINVAL)) {
1411 ns->ln_flags |= LF_INCONSISTENT;
1412 if (bk->lb_param & LPF_DRYRUN) {
1418 if (!com->lc_journal)
1421 /* The magic crashed, we are not sure whether there are more
1422 * corrupt data in the linkea, so remove all linkea entries. */
1423 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle,
1429 } else if (rc == -ENODATA) {
1430 ns->ln_flags |= LF_UPGRADE;
1431 if (bk->lb_param & LPF_DRYRUN) {
1438 rc = mdd_links_new(env, &ldata);
1443 if (!com->lc_journal)
1446 rc = mdd_links_add_buf(env, &ldata, cname, pfid);
1450 rc = mdd_links_write(env, obj, &ldata, handle);
1454 count = ldata.ml_leh->leh_reccount;
1463 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
1468 (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj))))
1469 /* Usually, it is for single linked object or dir, do nothing.*/
1472 /* Following modification will be in another transaction. */
1473 if (handle != NULL) {
1474 LASSERT(mdd_write_locked(env, obj));
1476 mdd_write_unlock(env, obj);
1479 mdd_trans_stop(env, mdd, 0, handle);
1483 ns->ln_mlinked_checked++;
1484 rc = mdd_lfsck_namespace_update(env, com, cfid,
1485 count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
1491 mdd_write_unlock(env, obj);
1494 mdd_trans_stop(env, mdd, rc, handle);
1498 ns->ln_items_failed++;
1499 if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1500 mdd_lfsck_pos_fill(env, lfsck,
1501 &ns->ln_pos_first_inconsistent,
1503 if (!(bk->lb_param & LPF_FAILOUT))
1507 ns->ln_items_repaired++;
1509 com->lc_journal = 0;
1512 up_write(&com->lc_sem);
1516 static int mdd_lfsck_namespace_post(const struct lu_env *env,
1517 struct lfsck_component *com,
1520 struct md_lfsck *lfsck = com->lc_lfsck;
1521 struct lfsck_namespace *ns =
1522 (struct lfsck_namespace *)com->lc_file_ram;
1525 down_write(&com->lc_sem);
1527 spin_lock(&lfsck->ml_lock);
1529 ns->ln_status = LS_SCANNING_PHASE2;
1530 ns->ln_flags |= LF_SCANNED_ONCE;
1531 ns->ln_flags &= ~LF_UPGRADE;
1532 cfs_list_del_init(&com->lc_link);
1533 cfs_list_del_init(&com->lc_link_dir);
1534 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_double_scan);
1535 } else if (result == 0) {
1536 if (lfsck->ml_paused) {
1537 ns->ln_status = LS_PAUSED;
1539 ns->ln_status = LS_STOPPED;
1540 cfs_list_del_init(&com->lc_link);
1541 cfs_list_del_init(&com->lc_link_dir);
1542 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1545 ns->ln_status = LS_FAILED;
1546 cfs_list_del_init(&com->lc_link);
1547 cfs_list_del_init(&com->lc_link_dir);
1548 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1550 spin_unlock(&lfsck->ml_lock);
1552 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1553 HALF_SEC - lfsck->ml_time_last_checkpoint);
1554 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1555 ns->ln_items_checked += com->lc_new_checked;
1556 com->lc_new_checked = 0;
1558 rc = mdd_lfsck_namespace_store(env, com, false);
1560 up_write(&com->lc_sem);
1565 mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
1568 struct md_lfsck *lfsck = com->lc_lfsck;
1569 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
1570 struct lfsck_namespace *ns =
1571 (struct lfsck_namespace *)com->lc_file_ram;
1576 down_read(&com->lc_sem);
1577 rc = snprintf(buf, len,
1578 "name: lfsck_namespace\n"
1584 lfsck_status_names[ns->ln_status]);
1590 rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
1595 rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
1600 rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
1601 "time_since_last_completed");
1605 rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
1606 "time_since_latest_start");
1610 rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
1611 "time_since_last_checkpoint");
1615 rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
1616 "latest_start_position");
1620 rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
1621 "last_checkpoint_position");
1625 rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
1626 "first_failure_position");
1630 if (ns->ln_status == LS_SCANNING_PHASE1) {
1631 struct lfsck_position pos;
1632 cfs_duration_t duration = cfs_time_current() -
1633 lfsck->ml_time_last_checkpoint;
1634 __u64 checked = ns->ln_items_checked + com->lc_new_checked;
1635 __u64 speed = checked;
1636 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1637 __u32 rtime = ns->ln_run_time_phase1 +
1638 cfs_duration_sec(duration + HALF_SEC);
1641 do_div(new_checked, duration);
1643 do_div(speed, rtime);
1644 rc = snprintf(buf, len,
1645 "checked_phase1: "LPU64"\n"
1646 "checked_phase2: "LPU64"\n"
1647 "updated_phase1: "LPU64"\n"
1648 "updated_phase2: "LPU64"\n"
1649 "failed_phase1: "LPU64"\n"
1650 "failed_phase2: "LPU64"\n"
1652 "M-linked: "LPU64"\n"
1653 "nlinks_repaired: "LPU64"\n"
1654 "lost_found: "LPU64"\n"
1655 "success_count: %u\n"
1656 "run_time_phase1: %u seconds\n"
1657 "run_time_phase2: %u seconds\n"
1658 "average_speed_phase1: "LPU64" items/sec\n"
1659 "average_speed_phase2: N/A\n"
1660 "real-time_speed_phase1: "LPU64" items/sec\n"
1661 "real-time_speed_phase2: N/A\n",
1663 ns->ln_objs_checked_phase2,
1664 ns->ln_items_repaired,
1665 ns->ln_objs_repaired_phase2,
1666 ns->ln_items_failed,
1667 ns->ln_objs_failed_phase2,
1668 ns->ln_dirs_checked,
1669 ns->ln_mlinked_checked,
1670 ns->ln_objs_nlink_repaired,
1671 ns->ln_objs_lost_found,
1672 ns->ln_success_count,
1674 ns->ln_run_time_phase2,
1682 mdd_lfsck_pos_fill(env, lfsck, &pos, true, true);
1683 rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
1686 } else if (ns->ln_status == LS_SCANNING_PHASE2) {
1687 cfs_duration_t duration = cfs_time_current() -
1688 lfsck->ml_time_last_checkpoint;
1689 __u64 checked = ns->ln_objs_checked_phase2 +
1690 com->lc_new_checked;
1691 __u64 speed1 = ns->ln_items_checked;
1692 __u64 speed2 = checked;
1693 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1694 __u32 rtime = ns->ln_run_time_phase2 +
1695 cfs_duration_sec(duration + HALF_SEC);
1698 do_div(new_checked, duration);
1699 if (ns->ln_run_time_phase1 != 0)
1700 do_div(speed1, ns->ln_run_time_phase1);
1702 do_div(speed2, rtime);
1703 rc = snprintf(buf, len,
1704 "checked_phase1: "LPU64"\n"
1705 "checked_phase2: "LPU64"\n"
1706 "updated_phase1: "LPU64"\n"
1707 "updated_phase2: "LPU64"\n"
1708 "failed_phase1: "LPU64"\n"
1709 "failed_phase2: "LPU64"\n"
1711 "M-linked: "LPU64"\n"
1712 "nlinks_repaired: "LPU64"\n"
1713 "lost_found: "LPU64"\n"
1714 "success_count: %u\n"
1715 "run_time_phase1: %u seconds\n"
1716 "run_time_phase2: %u seconds\n"
1717 "average_speed_phase1: "LPU64" items/sec\n"
1718 "average_speed_phase2: "LPU64" objs/sec\n"
1719 "real-time_speed_phase1: N/A\n"
1720 "real-time_speed_phase2: "LPU64" objs/sec\n"
1721 "current_position: "DFID"\n",
1722 ns->ln_items_checked,
1724 ns->ln_items_repaired,
1725 ns->ln_objs_repaired_phase2,
1726 ns->ln_items_failed,
1727 ns->ln_objs_failed_phase2,
1728 ns->ln_dirs_checked,
1729 ns->ln_mlinked_checked,
1730 ns->ln_objs_nlink_repaired,
1731 ns->ln_objs_lost_found,
1732 ns->ln_success_count,
1733 ns->ln_run_time_phase1,
1738 PFID(&ns->ln_fid_latest_scanned_phase2));
1745 __u64 speed1 = ns->ln_items_checked;
1746 __u64 speed2 = ns->ln_objs_checked_phase2;
1748 if (ns->ln_run_time_phase1 != 0)
1749 do_div(speed1, ns->ln_run_time_phase1);
1750 if (ns->ln_run_time_phase2 != 0)
1751 do_div(speed2, ns->ln_run_time_phase2);
1752 rc = snprintf(buf, len,
1753 "checked_phase1: "LPU64"\n"
1754 "checked_phase2: "LPU64"\n"
1755 "updated_phase1: "LPU64"\n"
1756 "updated_phase2: "LPU64"\n"
1757 "failed_phase1: "LPU64"\n"
1758 "failed_phase2: "LPU64"\n"
1760 "M-linked: "LPU64"\n"
1761 "nlinks_repaired: "LPU64"\n"
1762 "lost_found: "LPU64"\n"
1763 "success_count: %u\n"
1764 "run_time_phase1: %u seconds\n"
1765 "run_time_phase2: %u seconds\n"
1766 "average_speed_phase1: "LPU64" items/sec\n"
1767 "average_speed_phase2: "LPU64" objs/sec\n"
1768 "real-time_speed_phase1: N/A\n"
1769 "real-time_speed_phase2: N/A\n"
1770 "current_position: N/A\n",
1771 ns->ln_items_checked,
1772 ns->ln_objs_checked_phase2,
1773 ns->ln_items_repaired,
1774 ns->ln_objs_repaired_phase2,
1775 ns->ln_items_failed,
1776 ns->ln_objs_failed_phase2,
1777 ns->ln_dirs_checked,
1778 ns->ln_mlinked_checked,
1779 ns->ln_objs_nlink_repaired,
1780 ns->ln_objs_lost_found,
1781 ns->ln_success_count,
1782 ns->ln_run_time_phase1,
1783 ns->ln_run_time_phase2,
1795 up_read(&com->lc_sem);
1799 static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
1800 struct lfsck_component *com)
1802 struct md_lfsck *lfsck = com->lc_lfsck;
1803 struct ptlrpc_thread *thread = &lfsck->ml_thread;
1804 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
1805 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
1806 struct lfsck_namespace *ns =
1807 (struct lfsck_namespace *)com->lc_file_ram;
1808 struct dt_object *obj = com->lc_obj;
1809 const struct dt_it_ops *iops = &obj->do_index_ops->dio_it;
1810 struct mdd_object *target;
1818 lfsck->ml_new_scanned = 0;
1819 lfsck->ml_time_last_checkpoint = cfs_time_current();
1820 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1821 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1823 di = iops->init(env, obj, 0, BYPASS_CAPA);
1825 RETURN(PTR_ERR(di));
1827 fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
1828 rc = iops->get(env, di, (const struct dt_key *)&fid);
1832 /* Skip the start one, which either has been processed or non-exist. */
1833 rc = iops->next(env, di);
1837 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
1841 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
1843 struct l_wait_info lwi;
1845 lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
1847 l_wait_event(thread->t_ctl_waitq,
1848 !thread_is_running(thread),
1852 key = iops->key(env, di);
1853 fid_be_to_cpu(&fid, (const struct lu_fid *)key);
1854 target = mdd_object_find(env, mdd, &fid);
1855 down_write(&com->lc_sem);
1856 if (target == NULL) {
1859 } else if (IS_ERR(target)) {
1860 rc = PTR_ERR(target);
1864 /* XXX: need more processing for remote object in the future. */
1865 if (!mdd_object_exists(target) || mdd_object_remote(target))
1868 rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
1870 rc = mdd_lfsck_namespace_double_scan_one(env, com,
1874 mdd_object_put(env, target);
1877 lfsck->ml_new_scanned++;
1878 com->lc_new_checked++;
1879 ns->ln_fid_latest_scanned_phase2 = fid;
1881 ns->ln_objs_repaired_phase2++;
1883 ns->ln_objs_failed_phase2++;
1884 up_write(&com->lc_sem);
1886 if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
1887 mdd_lfsck_namespace_delete(env, com, &fid);
1888 } else if (rc < 0) {
1889 flags |= LLF_REPAIR_FAILED;
1890 mdd_lfsck_namespace_update(env, com, &fid, flags, true);
1893 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
1896 if (likely(cfs_time_beforeq(cfs_time_current(),
1897 lfsck->ml_time_next_checkpoint)) ||
1898 com->lc_new_checked == 0)
1901 down_write(&com->lc_sem);
1902 ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1903 HALF_SEC - lfsck->ml_time_last_checkpoint);
1904 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1905 ns->ln_objs_checked_phase2 += com->lc_new_checked;
1906 com->lc_new_checked = 0;
1907 rc = mdd_lfsck_namespace_store(env, com, false);
1908 up_write(&com->lc_sem);
1912 lfsck->ml_time_last_checkpoint = cfs_time_current();
1913 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1914 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1917 mdd_lfsck_control_speed(lfsck);
1918 if (unlikely(!thread_is_running(thread)))
1921 rc = iops->next(env, di);
1930 iops->fini(env, di);
1931 down_write(&com->lc_sem);
1933 ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1934 HALF_SEC - lfsck->ml_time_last_checkpoint);
1935 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1936 ns->ln_objs_checked_phase2 += com->lc_new_checked;
1937 com->lc_new_checked = 0;
1940 com->lc_journal = 0;
1941 ns->ln_status = LS_COMPLETED;
1942 if (!(bk->lb_param & LPF_DRYRUN))
1944 ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
1945 ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
1946 ns->ln_success_count++;
1947 } else if (rc == 0) {
1948 if (lfsck->ml_paused)
1949 ns->ln_status = LS_PAUSED;
1951 ns->ln_status = LS_STOPPED;
1953 ns->ln_status = LS_FAILED;
1956 if (ns->ln_status != LS_PAUSED) {
1957 spin_lock(&lfsck->ml_lock);
1958 cfs_list_del_init(&com->lc_link);
1959 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1960 spin_unlock(&lfsck->ml_lock);
1963 rc = mdd_lfsck_namespace_store(env, com, false);
1965 up_write(&com->lc_sem);
1969 static struct lfsck_operations mdd_lfsck_namespace_ops = {
1970 .lfsck_reset = mdd_lfsck_namespace_reset,
1971 .lfsck_fail = mdd_lfsck_namespace_fail,
1972 .lfsck_checkpoint = mdd_lfsck_namespace_checkpoint,
1973 .lfsck_prep = mdd_lfsck_namespace_prep,
1974 .lfsck_exec_oit = mdd_lfsck_namespace_exec_oit,
1975 .lfsck_exec_dir = mdd_lfsck_namespace_exec_dir,
1976 .lfsck_post = mdd_lfsck_namespace_post,
1977 .lfsck_dump = mdd_lfsck_namespace_dump,
1978 .lfsck_double_scan = mdd_lfsck_namespace_double_scan,
1981 /* LFSCK component setup/cleanup functions */
1983 static int mdd_lfsck_namespace_setup(const struct lu_env *env,
1984 struct md_lfsck *lfsck)
1986 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
1987 struct lfsck_component *com;
1988 struct lfsck_namespace *ns;
1989 struct dt_object *obj;
1997 CFS_INIT_LIST_HEAD(&com->lc_link);
1998 CFS_INIT_LIST_HEAD(&com->lc_link_dir);
1999 init_rwsem(&com->lc_sem);
2000 atomic_set(&com->lc_ref, 1);
2001 com->lc_lfsck = lfsck;
2002 com->lc_type = LT_NAMESPACE;
2003 com->lc_ops = &mdd_lfsck_namespace_ops;
2004 com->lc_file_size = sizeof(struct lfsck_namespace);
2005 OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
2006 if (com->lc_file_ram == NULL)
2007 GOTO(out, rc = -ENOMEM);
2009 OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
2010 if (com->lc_file_disk == NULL)
2011 GOTO(out, rc = -ENOMEM);
2013 obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name,
2014 &mdd_env_info(env)->mti_fid);
2016 GOTO(out, rc = PTR_ERR(obj));
2019 rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features);
2023 rc = mdd_lfsck_namespace_load(env, com);
2025 rc = mdd_lfsck_namespace_reset(env, com, true);
2026 else if (rc == -ENODATA)
2027 rc = mdd_lfsck_namespace_init(env, com);
2031 ns = (struct lfsck_namespace *)com->lc_file_ram;
2032 switch (ns->ln_status) {
2037 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
2040 CERROR("%s: unknown status: %u\n",
2041 mdd_lfsck2name(lfsck), ns->ln_status);
2043 case LS_SCANNING_PHASE1:
2044 case LS_SCANNING_PHASE2:
2045 /* No need to store the status to disk right now.
2046 * If the system crashed before the status stored,
2047 * it will be loaded back when next time. */
2048 ns->ln_status = LS_CRASHED;
2052 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_scan);
2053 cfs_list_add_tail(&com->lc_link_dir, &lfsck->ml_list_dir);
2061 mdd_lfsck_component_cleanup(env, com);
2065 /* helper functions for framework */
2067 static int object_is_client_visible(const struct lu_env *env,
2068 struct mdd_device *mdd,
2069 struct mdd_object *obj)
2071 struct lu_fid *fid = &mdd_env_info(env)->mti_fid;
2075 LASSERT(S_ISDIR(mdd_object_type(obj)));
2078 if (mdd_is_root(mdd, mdo2fid(obj))) {
2080 mdd_object_put(env, obj);
2084 mdd_read_lock(env, obj, MOR_TGT_CHILD);
2085 if (unlikely(mdd_is_dead_obj(obj))) {
2086 mdd_read_unlock(env, obj);
2088 mdd_object_put(env, obj);
2092 rc = dt_xattr_get(env, mdd_object_child(obj),
2093 mdd_buf_get(env, NULL, 0), XATTR_NAME_LINK,
2095 mdd_read_unlock(env, obj);
2098 mdd_object_put(env, obj);
2102 if (rc < 0 && rc != -ENODATA) {
2104 mdd_object_put(env, obj);
2108 rc = mdd_parent_fid(env, obj, fid);
2110 mdd_object_put(env, obj);
2114 if (unlikely(lu_fid_eq(fid, &mdd->mdd_local_root_fid)))
2117 obj = mdd_object_find(env, mdd, fid);
2120 else if (IS_ERR(obj))
2121 return PTR_ERR(obj);
2123 /* XXX: need more processing for remote object in the future. */
2124 if (!mdd_object_exists(obj) || mdd_object_remote(obj)) {
2125 mdd_object_put(env, obj);
2134 static void mdd_lfsck_unpack_ent(struct lu_dirent *ent)
2136 fid_le_to_cpu(&ent->lde_fid, &ent->lde_fid);
2137 ent->lde_hash = le64_to_cpu(ent->lde_hash);
2138 ent->lde_reclen = le16_to_cpu(ent->lde_reclen);
2139 ent->lde_namelen = le16_to_cpu(ent->lde_namelen);
2140 ent->lde_attrs = le32_to_cpu(ent->lde_attrs);
2142 /* Make sure the name is terminated with '0'.
2143 * The data (type) after ent::lde_name maybe
2144 * broken, but we do not care. */
2145 ent->lde_name[ent->lde_namelen] = 0;
2148 /* LFSCK wrap functions */
2150 static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck,
2151 bool oit, bool new_checked)
2153 struct lfsck_component *com;
2155 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2156 com->lc_ops->lfsck_fail(env, com, oit, new_checked);
2160 static int mdd_lfsck_checkpoint(const struct lu_env *env,
2161 struct md_lfsck *lfsck, bool oit)
2163 struct lfsck_component *com;
2166 if (likely(cfs_time_beforeq(cfs_time_current(),
2167 lfsck->ml_time_next_checkpoint)))
2170 mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, oit, !oit);
2171 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2172 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
2177 lfsck->ml_time_last_checkpoint = cfs_time_current();
2178 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2179 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2183 static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
2185 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
2186 struct mdd_object *obj = NULL;
2187 struct dt_object *dt_obj;
2188 struct lfsck_component *com;
2189 struct lfsck_component *next;
2190 struct lfsck_position *pos = NULL;
2191 const struct dt_it_ops *iops =
2192 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2197 LASSERT(lfsck->ml_obj_dir == NULL);
2198 LASSERT(lfsck->ml_di_dir == NULL);
2200 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2201 com->lc_new_checked = 0;
2202 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2203 com->lc_journal = 0;
2205 rc = com->lc_ops->lfsck_prep(env, com);
2209 if ((pos == NULL) ||
2210 (!mdd_lfsck_pos_is_zero(&com->lc_pos_start) &&
2211 mdd_lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
2212 pos = &com->lc_pos_start;
2215 /* Init otable-based iterator. */
2217 rc = iops->load(env, lfsck->ml_di_oit, 0);
2218 GOTO(out, rc = (rc >= 0 ? 0 : rc));
2221 rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie);
2225 if (fid_is_zero(&pos->lp_dir_parent))
2228 /* Find the directory for namespace-based traverse. */
2229 obj = mdd_object_find(env, mdd, &pos->lp_dir_parent);
2232 else if (IS_ERR(obj))
2233 RETURN(PTR_ERR(obj));
2235 /* XXX: need more processing for remote object in the future. */
2236 if (!mdd_object_exists(obj) || mdd_object_remote(obj) ||
2237 unlikely(!S_ISDIR(mdd_object_type(obj))))
2240 if (unlikely(mdd_is_dead_obj(obj)))
2243 dt_obj = mdd_object_child(obj);
2244 if (unlikely(!dt_try_as_dir(env, dt_obj)))
2245 GOTO(out, rc = -ENOTDIR);
2247 /* Init the namespace-based directory traverse. */
2248 iops = &dt_obj->do_index_ops->dio_it;
2249 di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
2251 GOTO(out, rc = PTR_ERR(di));
2253 rc = iops->load(env, di, pos->lp_dir_cookie);
2255 rc = iops->next(env, di);
2261 iops->fini(env, di);
2265 lfsck->ml_obj_dir = dt_obj;
2266 spin_lock(&lfsck->ml_lock);
2267 lfsck->ml_di_dir = di;
2268 spin_unlock(&lfsck->ml_lock);
2275 mdd_object_put(env, obj);
2278 return (rc > 0 ? 0 : rc);
2280 mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false, false);
2281 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2282 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
2287 lfsck->ml_time_last_checkpoint = cfs_time_current();
2288 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2289 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2293 static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
2294 struct mdd_object *obj)
2296 struct lfsck_component *com;
2297 struct dt_object *dt_obj;
2298 const struct dt_it_ops *iops;
2303 LASSERT(lfsck->ml_obj_dir == NULL);
2305 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2306 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
2311 if (!S_ISDIR(mdd_object_type(obj)) ||
2312 cfs_list_empty(&lfsck->ml_list_dir))
2315 rc = object_is_client_visible(env, mdd_lfsck2mdd(lfsck), obj);
2319 if (unlikely(mdd_is_dead_obj(obj)))
2322 dt_obj = mdd_object_child(obj);
2323 if (unlikely(!dt_try_as_dir(env, dt_obj)))
2324 GOTO(out, rc = -ENOTDIR);
2326 iops = &dt_obj->do_index_ops->dio_it;
2327 di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
2329 GOTO(out, rc = PTR_ERR(di));
2331 rc = iops->load(env, di, 0);
2333 rc = iops->next(env, di);
2339 iops->fini(env, di);
2343 mdd_object_get(obj);
2344 lfsck->ml_obj_dir = dt_obj;
2345 spin_lock(&lfsck->ml_lock);
2346 lfsck->ml_di_dir = di;
2347 spin_unlock(&lfsck->ml_lock);
2353 mdd_lfsck_fail(env, lfsck, false, false);
2354 return (rc > 0 ? 0 : rc);
2357 static int mdd_lfsck_exec_dir(const struct lu_env *env, struct md_lfsck *lfsck,
2358 struct mdd_object *obj, struct lu_dirent *ent)
2360 struct lfsck_component *com;
2363 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2364 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
2371 static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck,
2374 struct lfsck_component *com;
2375 struct lfsck_component *next;
2378 mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true, true);
2379 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2380 rc = com->lc_ops->lfsck_post(env, com, result);
2385 lfsck->ml_time_last_checkpoint = cfs_time_current();
2386 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2387 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2391 static int mdd_lfsck_double_scan(const struct lu_env *env,
2392 struct md_lfsck *lfsck)
2394 struct lfsck_component *com;
2395 struct lfsck_component *next;
2398 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_double_scan,
2400 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2401 com->lc_journal = 0;
2403 rc = com->lc_ops->lfsck_double_scan(env, com);
2412 static int mdd_lfsck_dir_engine(const struct lu_env *env,
2413 struct md_lfsck *lfsck)
2415 struct mdd_thread_info *info = mdd_env_info(env);
2416 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
2417 const struct dt_it_ops *iops =
2418 &lfsck->ml_obj_dir->do_index_ops->dio_it;
2419 struct dt_it *di = lfsck->ml_di_dir;
2420 struct lu_dirent *ent = &info->mti_ent;
2421 struct lu_fid *fid = &info->mti_fid;
2422 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
2423 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2428 struct mdd_object *child;
2430 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY2) &&
2432 struct l_wait_info lwi;
2434 lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2436 l_wait_event(thread->t_ctl_waitq,
2437 !thread_is_running(thread),
2441 lfsck->ml_new_scanned++;
2442 rc = iops->rec(env, di, (struct dt_rec *)ent,
2443 lfsck->ml_args_dir);
2445 mdd_lfsck_fail(env, lfsck, false, true);
2446 if (bk->lb_param & LPF_FAILOUT)
2452 mdd_lfsck_unpack_ent(ent);
2453 if (ent->lde_attrs & LUDA_IGNORE)
2456 *fid = ent->lde_fid;
2457 child = mdd_object_find(env, mdd, fid);
2458 if (child == NULL) {
2460 } else if (IS_ERR(child)) {
2461 mdd_lfsck_fail(env, lfsck, false, true);
2462 if (bk->lb_param & LPF_FAILOUT)
2463 RETURN(PTR_ERR(child));
2468 /* XXX: need more processing for remote object in the future. */
2469 if (mdd_object_exists(child) && !mdd_object_remote(child))
2470 rc = mdd_lfsck_exec_dir(env, lfsck, child, ent);
2471 mdd_object_put(env, child);
2472 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2476 rc = mdd_lfsck_checkpoint(env, lfsck, false);
2477 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2481 mdd_lfsck_control_speed(lfsck);
2482 if (unlikely(!thread_is_running(thread)))
2485 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL2)) {
2486 spin_lock(&lfsck->ml_lock);
2487 thread_set_flags(thread, SVC_STOPPING);
2488 spin_unlock(&lfsck->ml_lock);
2492 rc = iops->next(env, di);
2495 if (rc > 0 && !lfsck->ml_oit_over)
2496 mdd_lfsck_close_dir(env, lfsck);
2501 static int mdd_lfsck_oit_engine(const struct lu_env *env,
2502 struct md_lfsck *lfsck)
2504 struct mdd_thread_info *info = mdd_env_info(env);
2505 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
2506 const struct dt_it_ops *iops =
2507 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2508 struct dt_it *di = lfsck->ml_di_oit;
2509 struct lu_fid *fid = &info->mti_fid;
2510 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
2511 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2516 struct mdd_object *target;
2518 if (lfsck->ml_di_dir != NULL) {
2519 rc = mdd_lfsck_dir_engine(env, lfsck);
2524 if (unlikely(lfsck->ml_oit_over))
2527 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) &&
2529 struct l_wait_info lwi;
2531 lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2533 l_wait_event(thread->t_ctl_waitq,
2534 !thread_is_running(thread),
2538 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
2541 lfsck->ml_new_scanned++;
2542 rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
2544 mdd_lfsck_fail(env, lfsck, true, true);
2545 if (bk->lb_param & LPF_FAILOUT)
2551 target = mdd_object_find(env, mdd, fid);
2552 if (target == NULL) {
2554 } else if (IS_ERR(target)) {
2555 mdd_lfsck_fail(env, lfsck, true, true);
2556 if (bk->lb_param & LPF_FAILOUT)
2557 RETURN(PTR_ERR(target));
2562 /* XXX: In fact, low layer otable-based iteration should not
2563 * return agent object. But before LU-2646 resolved, we
2564 * need more processing for agent object. */
2565 if (mdd_object_exists(target) && !mdd_object_remote(target))
2566 rc = mdd_lfsck_exec_oit(env, lfsck, target);
2567 mdd_object_put(env, target);
2568 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2572 rc = mdd_lfsck_checkpoint(env, lfsck, true);
2573 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2577 mdd_lfsck_control_speed(lfsck);
2579 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) {
2580 spin_lock(&lfsck->ml_lock);
2581 thread_set_flags(thread, SVC_STOPPING);
2582 spin_unlock(&lfsck->ml_lock);
2586 rc = iops->next(env, di);
2588 lfsck->ml_oit_over = 1;
2590 if (unlikely(!thread_is_running(thread)))
2592 } while (rc == 0 || lfsck->ml_di_dir != NULL);
2597 static int mdd_lfsck_main(void *args)
2600 struct md_lfsck *lfsck = (struct md_lfsck *)args;
2601 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2602 struct dt_object *oit_obj = lfsck->ml_obj_oit;
2603 const struct dt_it_ops *oit_iops = &oit_obj->do_index_ops->dio_it;
2604 struct dt_it *oit_di;
2608 cfs_daemonize("lfsck");
2609 rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
2611 CERROR("%s: LFSCK, fail to init env, rc = %d\n",
2612 mdd_lfsck2name(lfsck), rc);
2616 oit_di = oit_iops->init(&env, oit_obj, lfsck->ml_args_oit, BYPASS_CAPA);
2617 if (IS_ERR(oit_di)) {
2618 rc = PTR_ERR(oit_di);
2619 CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
2620 mdd_lfsck2name(lfsck), rc);
2624 spin_lock(&lfsck->ml_lock);
2625 lfsck->ml_di_oit = oit_di;
2626 spin_unlock(&lfsck->ml_lock);
2627 rc = mdd_lfsck_prep(&env, lfsck);
2631 CDEBUG(D_LFSCK, "LFSCK entry: oit_flags = 0x%x, dir_flags = 0x%x, "
2632 "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2633 ", pid = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2634 lfsck->ml_pos_current.lp_oit_cookie,
2635 lfsck->ml_pos_current.lp_dir_cookie,
2636 PFID(&lfsck->ml_pos_current.lp_dir_parent),
2639 spin_lock(&lfsck->ml_lock);
2640 thread_set_flags(thread, SVC_RUNNING);
2641 spin_unlock(&lfsck->ml_lock);
2642 cfs_waitq_broadcast(&thread->t_ctl_waitq);
2644 if (!cfs_list_empty(&lfsck->ml_list_scan) ||
2645 cfs_list_empty(&lfsck->ml_list_double_scan))
2646 rc = mdd_lfsck_oit_engine(&env, lfsck);
2650 CDEBUG(D_LFSCK, "LFSCK exit: oit_flags = 0x%x, dir_flags = 0x%x, "
2651 "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2652 ", pid = %d, rc = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2653 lfsck->ml_pos_current.lp_oit_cookie,
2654 lfsck->ml_pos_current.lp_dir_cookie,
2655 PFID(&lfsck->ml_pos_current.lp_dir_parent),
2656 cfs_curproc_pid(), rc);
2658 if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
2659 oit_iops->put(&env, oit_di);
2661 if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
2662 rc = mdd_lfsck_post(&env, lfsck, rc);
2663 if (lfsck->ml_di_dir != NULL)
2664 mdd_lfsck_close_dir(&env, lfsck);
2667 spin_lock(&lfsck->ml_lock);
2668 lfsck->ml_di_oit = NULL;
2669 spin_unlock(&lfsck->ml_lock);
2671 oit_iops->fini(&env, oit_di);
2673 if (!cfs_list_empty(&lfsck->ml_list_double_scan))
2674 rc = mdd_lfsck_double_scan(&env, lfsck);
2679 /* XXX: Purge the pinned objects in the future. */
2685 spin_lock(&lfsck->ml_lock);
2686 thread_set_flags(thread, SVC_STOPPED);
2687 cfs_waitq_broadcast(&thread->t_ctl_waitq);
2688 spin_unlock(&lfsck->ml_lock);
2692 /* external interfaces */
2694 int mdd_lfsck_set_speed(const struct lu_env *env, struct md_lfsck *lfsck,
2699 mutex_lock(&lfsck->ml_mutex);
2700 __mdd_lfsck_set_speed(lfsck, limit);
2701 rc = mdd_lfsck_bookmark_store(env, lfsck);
2702 mutex_unlock(&lfsck->ml_mutex);
2706 int mdd_lfsck_dump(const struct lu_env *env, struct md_lfsck *lfsck,
2707 __u16 type, char *buf, int len)
2709 struct lfsck_component *com;
2712 if (!lfsck->ml_initialized)
2715 com = mdd_lfsck_component_find(lfsck, type);
2719 rc = com->lc_ops->lfsck_dump(env, com, buf, len);
2720 mdd_lfsck_component_put(env, com);
2724 int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
2725 struct lfsck_start *start)
2727 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
2728 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2729 struct lfsck_component *com;
2730 struct l_wait_info lwi = { 0 };
2737 if (lfsck->ml_obj_oit == NULL)
2740 /* start == NULL means auto trigger paused LFSCK. */
2741 if ((start == NULL) &&
2742 (cfs_list_empty(&lfsck->ml_list_scan) ||
2743 OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
2746 mutex_lock(&lfsck->ml_mutex);
2747 spin_lock(&lfsck->ml_lock);
2748 if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
2749 spin_unlock(&lfsck->ml_lock);
2750 mutex_unlock(&lfsck->ml_mutex);
2754 spin_unlock(&lfsck->ml_lock);
2756 lfsck->ml_paused = 0;
2757 lfsck->ml_oit_over = 0;
2758 lfsck->ml_drop_dryrun = 0;
2759 lfsck->ml_new_scanned = 0;
2761 /* For auto trigger. */
2765 start->ls_version = bk->lb_version;
2766 if (start->ls_valid & LSV_SPEED_LIMIT) {
2767 __mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
2771 if (start->ls_valid & LSV_ERROR_HANDLE) {
2772 valid |= DOIV_ERROR_HANDLE;
2773 if (start->ls_flags & LPF_FAILOUT)
2774 flags |= DOIF_FAILOUT;
2776 if ((start->ls_flags & LPF_FAILOUT) &&
2777 !(bk->lb_param & LPF_FAILOUT)) {
2778 bk->lb_param |= LPF_FAILOUT;
2780 } else if (!(start->ls_flags & LPF_FAILOUT) &&
2781 (bk->lb_param & LPF_FAILOUT)) {
2782 bk->lb_param &= ~LPF_FAILOUT;
2787 if (start->ls_valid & LSV_DRYRUN) {
2788 if ((start->ls_flags & LPF_DRYRUN) &&
2789 !(bk->lb_param & LPF_DRYRUN)) {
2790 bk->lb_param |= LPF_DRYRUN;
2792 } else if (!(start->ls_flags & LPF_DRYRUN) &&
2793 (bk->lb_param & LPF_DRYRUN)) {
2794 bk->lb_param &= ~LPF_DRYRUN;
2795 lfsck->ml_drop_dryrun = 1;
2801 rc = mdd_lfsck_bookmark_store(env, lfsck);
2806 if (start->ls_flags & LPF_RESET)
2807 flags |= DOIF_RESET;
2809 if (start->ls_active != 0) {
2810 struct lfsck_component *next;
2813 if (start->ls_active == LFSCK_TYPES_ALL)
2814 start->ls_active = LFSCK_TYPES_SUPPORTED;
2816 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
2817 start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
2818 GOTO(out, rc = -ENOTSUPP);
2821 cfs_list_for_each_entry_safe(com, next,
2822 &lfsck->ml_list_scan, lc_link) {
2823 if (!(com->lc_type & start->ls_active)) {
2824 rc = com->lc_ops->lfsck_post(env, com, 0);
2830 while (start->ls_active != 0) {
2831 if (type & start->ls_active) {
2832 com = __mdd_lfsck_component_find(lfsck, type,
2833 &lfsck->ml_list_idle);
2835 /* The component status will be updated
2836 * when its prep() is called later by
2837 * the LFSCK main engine. */
2838 cfs_list_del_init(&com->lc_link);
2839 cfs_list_add_tail(&com->lc_link,
2840 &lfsck->ml_list_scan);
2842 start->ls_active &= ~type;
2848 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2849 start->ls_active |= com->lc_type;
2850 if (flags & DOIF_RESET) {
2851 rc = com->lc_ops->lfsck_reset(env, com, false);
2858 lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
2859 if (bk->lb_param & LPF_DRYRUN)
2860 lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN;
2862 if (bk->lb_param & LPF_FAILOUT) {
2863 valid |= DOIV_ERROR_HANDLE;
2864 flags |= DOIF_FAILOUT;
2867 if (!cfs_list_empty(&lfsck->ml_list_scan))
2868 flags |= DOIF_OUTUSED;
2870 lfsck->ml_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
2871 thread_set_flags(thread, 0);
2872 rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
2874 CERROR("%s: cannot start LFSCK thread, rc = %d\n",
2875 mdd_lfsck2name(lfsck), rc);
2877 l_wait_event(thread->t_ctl_waitq,
2878 thread_is_running(thread) ||
2879 thread_is_stopped(thread),
2885 mutex_unlock(&lfsck->ml_mutex);
2886 return (rc < 0 ? rc : 0);
2889 int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck,
2892 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2893 struct l_wait_info lwi = { 0 };
2896 mutex_lock(&lfsck->ml_mutex);
2897 spin_lock(&lfsck->ml_lock);
2898 if (thread_is_init(thread) || thread_is_stopped(thread)) {
2899 spin_unlock(&lfsck->ml_lock);
2900 mutex_unlock(&lfsck->ml_mutex);
2905 lfsck->ml_paused = 1;
2906 thread_set_flags(thread, SVC_STOPPING);
2907 /* The LFSCK thread may be sleeping on low layer wait queue,
2909 if (likely(lfsck->ml_di_oit != NULL))
2910 lfsck->ml_obj_oit->do_index_ops->dio_it.put(env,
2912 spin_unlock(&lfsck->ml_lock);
2914 cfs_waitq_broadcast(&thread->t_ctl_waitq);
2915 l_wait_event(thread->t_ctl_waitq,
2916 thread_is_stopped(thread),
2918 mutex_unlock(&lfsck->ml_mutex);
2923 static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
2924 .f_oid = OTABLE_IT_OID,
2927 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
2929 struct md_lfsck *lfsck = &mdd->mdd_lfsck;
2930 struct dt_object *obj;
2934 LASSERT(!lfsck->ml_initialized);
2936 lfsck->ml_initialized = 1;
2937 mutex_init(&lfsck->ml_mutex);
2938 spin_lock_init(&lfsck->ml_lock);
2939 CFS_INIT_LIST_HEAD(&lfsck->ml_list_scan);
2940 CFS_INIT_LIST_HEAD(&lfsck->ml_list_dir);
2941 CFS_INIT_LIST_HEAD(&lfsck->ml_list_double_scan);
2942 CFS_INIT_LIST_HEAD(&lfsck->ml_list_idle);
2943 cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
2945 obj = dt_locate(env, mdd->mdd_bottom, &lfsck_it_fid);
2947 RETURN(PTR_ERR(obj));
2949 lfsck->ml_obj_oit = obj;
2950 rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
2952 if (rc == -ENOTSUPP)
2958 obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_bookmark_name,
2959 &mdd_env_info(env)->mti_fid);
2961 RETURN(PTR_ERR(obj));
2963 lfsck->ml_bookmark_obj = obj;
2964 rc = mdd_lfsck_bookmark_load(env, lfsck);
2966 rc = mdd_lfsck_bookmark_init(env, lfsck);
2970 rc = mdd_lfsck_namespace_setup(env, lfsck);
2971 /* XXX: LFSCK components initialization to be added here. */
2976 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
2978 struct md_lfsck *lfsck = &mdd->mdd_lfsck;
2979 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2980 struct lfsck_component *com;
2982 if (!lfsck->ml_initialized)
2985 LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
2987 if (lfsck->ml_obj_oit != NULL) {
2988 lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
2989 lfsck->ml_obj_oit = NULL;
2992 LASSERT(lfsck->ml_obj_dir == NULL);
2994 if (lfsck->ml_bookmark_obj != NULL) {
2995 lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
2996 lfsck->ml_bookmark_obj = NULL;
2999 while (!cfs_list_empty(&lfsck->ml_list_scan)) {
3000 com = cfs_list_entry(lfsck->ml_list_scan.next,
3001 struct lfsck_component,
3003 mdd_lfsck_component_cleanup(env, com);
3006 LASSERT(cfs_list_empty(&lfsck->ml_list_dir));
3008 while (!cfs_list_empty(&lfsck->ml_list_double_scan)) {
3009 com = cfs_list_entry(lfsck->ml_list_double_scan.next,
3010 struct lfsck_component,
3012 mdd_lfsck_component_cleanup(env, com);
3015 while (!cfs_list_empty(&lfsck->ml_list_idle)) {
3016 com = cfs_list_entry(lfsck->ml_list_idle.next,
3017 struct lfsck_component,
3019 mdd_lfsck_component_cleanup(env, com);