4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012, 2013, Intel Corporation.
26 * lustre/mdd/mdd_lfsck.c
28 * Top-level entry points into mdd module
30 * LFSCK controller, which scans the whole device through low layer
31 * iteration APIs, drives all lfsck compeonents, controls the speed.
33 * Author: Fan Yong <yong.fan@whamcloud.com>
37 # define EXPORT_SYMTAB
39 #define DEBUG_SUBSYSTEM S_MDS
41 #include <lustre/lustre_idl.h>
42 #include <lustre_fid.h>
43 #include <obd_support.h>
45 #include "mdd_internal.h"
46 #include "mdd_lfsck.h"
48 #define HALF_SEC (CFS_HZ >> 1)
49 #define LFSCK_CHECKPOINT_INTERVAL 60
51 #define LFSCK_NAMEENTRY_DEAD 1 /* The object has been unlinked. */
52 #define LFSCK_NAMEENTRY_REMOVED 2 /* The entry has been removed. */
53 #define LFSCK_NAMEENTRY_RECREATED 3 /* The entry has been recreated. */
55 const char lfsck_bookmark_name[] = "lfsck_bookmark";
56 const char lfsck_namespace_name[] = "lfsck_namespace";
58 static const char *lfsck_status_names[] = {
70 static const char *lfsck_flags_names[] = {
77 static const char *lfsck_param_names[] = {
85 static inline struct mdd_device *mdd_lfsck2mdd(struct md_lfsck *lfsck)
87 return container_of0(lfsck, struct mdd_device, mdd_lfsck);
90 static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
92 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
94 return mdd2obd_dev(mdd)->obd_name;
97 static inline void mdd_lfsck_component_get(struct lfsck_component *com)
99 atomic_inc(&com->lc_ref);
102 static inline void mdd_lfsck_component_put(const struct lu_env *env,
103 struct lfsck_component *com)
105 if (atomic_dec_and_test(&com->lc_ref)) {
106 if (com->lc_obj != NULL)
107 lu_object_put(env, &com->lc_obj->do_lu);
108 if (com->lc_file_ram != NULL)
109 OBD_FREE(com->lc_file_ram, com->lc_file_size);
110 if (com->lc_file_disk != NULL)
111 OBD_FREE(com->lc_file_disk, com->lc_file_size);
116 static inline struct lfsck_component *
117 __mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type, cfs_list_t *list)
119 struct lfsck_component *com;
121 cfs_list_for_each_entry(com, list, lc_link) {
122 if (com->lc_type == type)
128 static struct lfsck_component *
129 mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type)
131 struct lfsck_component *com;
133 spin_lock(&lfsck->ml_lock);
134 com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_scan);
138 com = __mdd_lfsck_component_find(lfsck, type,
139 &lfsck->ml_list_double_scan);
143 com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_idle);
147 mdd_lfsck_component_get(com);
148 spin_unlock(&lfsck->ml_lock);
152 static void mdd_lfsck_component_cleanup(const struct lu_env *env,
153 struct lfsck_component *com)
155 if (!cfs_list_empty(&com->lc_link))
156 cfs_list_del_init(&com->lc_link);
157 if (!cfs_list_empty(&com->lc_link_dir))
158 cfs_list_del_init(&com->lc_link_dir);
160 mdd_lfsck_component_put(env, com);
163 static int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
171 rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
177 for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
180 rc = snprintf(*buf, *len, "%s%c", names[i],
181 bits != 0 ? ',' : '\n');
192 static int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
197 rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
198 cfs_time_current_sec() - time);
200 rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
209 static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
214 if (fid_is_zero(&pos->lp_dir_parent)) {
215 if (pos->lp_oit_cookie == 0)
216 rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
219 rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
220 prefix, pos->lp_oit_cookie);
222 rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
223 prefix, pos->lp_oit_cookie,
224 PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
234 static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
235 struct lfsck_position *pos, bool init)
237 const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it;
239 spin_lock(&lfsck->ml_lock);
240 if (unlikely(lfsck->ml_di_oit == NULL)) {
241 spin_unlock(&lfsck->ml_lock);
242 memset(pos, 0, sizeof(*pos));
246 pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit);
247 if (!lfsck->ml_current_oit_processed && !init)
248 pos->lp_oit_cookie--;
250 LASSERT(pos->lp_oit_cookie > 0);
252 if (lfsck->ml_di_dir != NULL) {
253 struct dt_object *dto = lfsck->ml_obj_dir;
255 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
258 if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) {
259 fid_zero(&pos->lp_dir_parent);
260 pos->lp_dir_cookie = 0;
262 pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
265 fid_zero(&pos->lp_dir_parent);
266 pos->lp_dir_cookie = 0;
268 spin_unlock(&lfsck->ml_lock);
271 static inline void mdd_lfsck_pos_set_zero(struct lfsck_position *pos)
273 memset(pos, 0, sizeof(*pos));
276 static inline int mdd_lfsck_pos_is_zero(const struct lfsck_position *pos)
278 return pos->lp_oit_cookie == 0 && fid_is_zero(&pos->lp_dir_parent);
281 static inline int mdd_lfsck_pos_is_eq(const struct lfsck_position *pos1,
282 const struct lfsck_position *pos2)
284 if (pos1->lp_oit_cookie < pos2->lp_oit_cookie)
287 if (pos1->lp_oit_cookie > pos2->lp_oit_cookie)
290 if (fid_is_zero(&pos1->lp_dir_parent) &&
291 !fid_is_zero(&pos2->lp_dir_parent))
294 if (!fid_is_zero(&pos1->lp_dir_parent) &&
295 fid_is_zero(&pos2->lp_dir_parent))
298 if (fid_is_zero(&pos1->lp_dir_parent) &&
299 fid_is_zero(&pos2->lp_dir_parent))
302 LASSERT(lu_fid_eq(&pos1->lp_dir_parent, &pos2->lp_dir_parent));
304 if (pos1->lp_dir_cookie < pos2->lp_dir_cookie)
307 if (pos1->lp_dir_cookie > pos2->lp_dir_cookie)
313 static void mdd_lfsck_close_dir(const struct lu_env *env,
314 struct md_lfsck *lfsck)
316 struct dt_object *dir_obj = lfsck->ml_obj_dir;
317 const struct dt_it_ops *dir_iops = &dir_obj->do_index_ops->dio_it;
318 struct dt_it *dir_di = lfsck->ml_di_dir;
320 spin_lock(&lfsck->ml_lock);
321 lfsck->ml_di_dir = NULL;
322 spin_unlock(&lfsck->ml_lock);
324 dir_iops->put(env, dir_di);
325 dir_iops->fini(env, dir_di);
326 lfsck->ml_obj_dir = NULL;
327 lu_object_put(env, &dir_obj->do_lu);
330 static void __mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
332 lfsck->ml_bookmark_ram.lb_speed_limit = limit;
333 if (limit != LFSCK_SPEED_NO_LIMIT) {
334 if (limit > CFS_HZ) {
335 lfsck->ml_sleep_rate = limit / CFS_HZ;
336 lfsck->ml_sleep_jif = 1;
338 lfsck->ml_sleep_rate = 1;
339 lfsck->ml_sleep_jif = CFS_HZ / limit;
342 lfsck->ml_sleep_jif = 0;
343 lfsck->ml_sleep_rate = 0;
347 static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
349 struct ptlrpc_thread *thread = &lfsck->ml_thread;
350 struct l_wait_info lwi;
352 if (lfsck->ml_sleep_jif > 0 &&
353 lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
354 spin_lock(&lfsck->ml_lock);
355 if (likely(lfsck->ml_sleep_jif > 0 &&
356 lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
357 lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
358 LWI_ON_SIGNAL_NOOP, NULL);
359 spin_unlock(&lfsck->ml_lock);
361 l_wait_event(thread->t_ctl_waitq,
362 !thread_is_running(thread),
364 lfsck->ml_new_scanned = 0;
366 spin_unlock(&lfsck->ml_lock);
371 /* lfsck_bookmark file ops */
373 static void inline mdd_lfsck_bookmark_to_cpu(struct lfsck_bookmark *des,
374 struct lfsck_bookmark *src)
376 des->lb_magic = le32_to_cpu(src->lb_magic);
377 des->lb_version = le16_to_cpu(src->lb_version);
378 des->lb_param = le16_to_cpu(src->lb_param);
379 des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
382 static void inline mdd_lfsck_bookmark_to_le(struct lfsck_bookmark *des,
383 struct lfsck_bookmark *src)
385 des->lb_magic = cpu_to_le32(src->lb_magic);
386 des->lb_version = cpu_to_le16(src->lb_version);
387 des->lb_param = cpu_to_le16(src->lb_param);
388 des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
391 static int mdd_lfsck_bookmark_load(const struct lu_env *env,
392 struct md_lfsck *lfsck)
395 int len = sizeof(struct lfsck_bookmark);
398 rc = dt_record_read(env, lfsck->ml_bookmark_obj,
399 mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
402 struct lfsck_bookmark *bm = &lfsck->ml_bookmark_ram;
404 mdd_lfsck_bookmark_to_cpu(bm, &lfsck->ml_bookmark_disk);
405 if (bm->lb_magic != LFSCK_BOOKMARK_MAGIC) {
406 CWARN("%.16s: invalid lfsck_bookmark magic "
407 "0x%x != 0x%x\n", mdd_lfsck2name(lfsck),
408 bm->lb_magic, LFSCK_BOOKMARK_MAGIC);
409 /* Process it as new lfsck_bookmark. */
413 if (rc == -EFAULT && pos == 0)
414 /* return -ENODATA for empty lfsck_bookmark. */
417 CERROR("%.16s: fail to load lfsck_bookmark, "
418 "expected = %d, rc = %d\n",
419 mdd_lfsck2name(lfsck), len, rc);
424 static int mdd_lfsck_bookmark_store(const struct lu_env *env,
425 struct md_lfsck *lfsck)
427 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
428 struct thandle *handle;
429 struct dt_object *obj = lfsck->ml_bookmark_obj;
431 int len = sizeof(struct lfsck_bookmark);
435 mdd_lfsck_bookmark_to_le(&lfsck->ml_bookmark_disk,
436 &lfsck->ml_bookmark_ram);
437 handle = dt_trans_create(env, mdd->mdd_bottom);
438 if (IS_ERR(handle)) {
439 rc = PTR_ERR(handle);
440 CERROR("%.16s: fail to create trans for storing "
441 "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
445 rc = dt_declare_record_write(env, obj, len, 0, handle);
447 CERROR("%.16s: fail to declare trans for storing "
448 "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
452 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
454 CERROR("%.16s: fail to start trans for storing "
455 "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
459 rc = dt_record_write(env, obj,
460 mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
463 CERROR("%.16s: fail to store lfsck_bookmark, expected = %d, "
464 "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
469 dt_trans_stop(env, mdd->mdd_bottom, handle);
473 static int mdd_lfsck_bookmark_init(const struct lu_env *env,
474 struct md_lfsck *lfsck)
476 struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram;
479 memset(mb, 0, sizeof(*mb));
480 mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
481 mb->lb_version = LFSCK_VERSION_V2;
482 mutex_lock(&lfsck->ml_mutex);
483 rc = mdd_lfsck_bookmark_store(env, lfsck);
484 mutex_unlock(&lfsck->ml_mutex);
488 /* lfsck_namespace file ops */
490 static void inline mdd_lfsck_position_to_cpu(struct lfsck_position *des,
491 struct lfsck_position *src)
493 des->lp_oit_cookie = le64_to_cpu(src->lp_oit_cookie);
494 fid_le_to_cpu(&des->lp_dir_parent, &src->lp_dir_parent);
495 des->lp_dir_cookie = le64_to_cpu(src->lp_dir_cookie);
498 static void inline mdd_lfsck_position_to_le(struct lfsck_position *des,
499 struct lfsck_position *src)
501 des->lp_oit_cookie = cpu_to_le64(src->lp_oit_cookie);
502 fid_cpu_to_le(&des->lp_dir_parent, &src->lp_dir_parent);
503 des->lp_dir_cookie = cpu_to_le64(src->lp_dir_cookie);
506 static void inline mdd_lfsck_namespace_to_cpu(struct lfsck_namespace *des,
507 struct lfsck_namespace *src)
509 des->ln_magic = le32_to_cpu(src->ln_magic);
510 des->ln_status = le32_to_cpu(src->ln_status);
511 des->ln_flags = le32_to_cpu(src->ln_flags);
512 des->ln_success_count = le32_to_cpu(src->ln_success_count);
513 des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
514 des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
515 des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
516 des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
517 des->ln_time_last_checkpoint =
518 le64_to_cpu(src->ln_time_last_checkpoint);
519 mdd_lfsck_position_to_cpu(&des->ln_pos_latest_start,
520 &src->ln_pos_latest_start);
521 mdd_lfsck_position_to_cpu(&des->ln_pos_last_checkpoint,
522 &src->ln_pos_last_checkpoint);
523 mdd_lfsck_position_to_cpu(&des->ln_pos_first_inconsistent,
524 &src->ln_pos_first_inconsistent);
525 des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
526 des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
527 des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
528 des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
529 des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
530 des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
531 des->ln_objs_repaired_phase2 =
532 le64_to_cpu(src->ln_objs_repaired_phase2);
533 des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
534 des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
535 des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
536 fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
537 &src->ln_fid_latest_scanned_phase2);
540 static void inline mdd_lfsck_namespace_to_le(struct lfsck_namespace *des,
541 struct lfsck_namespace *src)
543 des->ln_magic = cpu_to_le32(src->ln_magic);
544 des->ln_status = cpu_to_le32(src->ln_status);
545 des->ln_flags = cpu_to_le32(src->ln_flags);
546 des->ln_success_count = cpu_to_le32(src->ln_success_count);
547 des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
548 des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
549 des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
550 des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
551 des->ln_time_last_checkpoint =
552 cpu_to_le64(src->ln_time_last_checkpoint);
553 mdd_lfsck_position_to_le(&des->ln_pos_latest_start,
554 &src->ln_pos_latest_start);
555 mdd_lfsck_position_to_le(&des->ln_pos_last_checkpoint,
556 &src->ln_pos_last_checkpoint);
557 mdd_lfsck_position_to_le(&des->ln_pos_first_inconsistent,
558 &src->ln_pos_first_inconsistent);
559 des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
560 des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
561 des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
562 des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
563 des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
564 des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
565 des->ln_objs_repaired_phase2 =
566 cpu_to_le64(src->ln_objs_repaired_phase2);
567 des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
568 des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
569 des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
570 fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
571 &src->ln_fid_latest_scanned_phase2);
575 * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
576 * \retval 0: succeed.
577 * \retval -ve: failed cases.
579 static int mdd_lfsck_namespace_load(const struct lu_env *env,
580 struct lfsck_component *com)
582 int len = com->lc_file_size;
585 rc = dt_xattr_get(env, com->lc_obj,
586 mdd_buf_get(env, com->lc_file_disk, len),
587 XATTR_NAME_LFSCK_NAMESPACE, BYPASS_CAPA);
589 struct lfsck_namespace *ns = com->lc_file_ram;
591 mdd_lfsck_namespace_to_cpu(ns,
592 (struct lfsck_namespace *)com->lc_file_disk);
593 if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
594 CWARN("%.16s: invalid lfsck_namespace magic "
596 mdd_lfsck2name(com->lc_lfsck),
597 ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
602 } else if (rc != -ENODATA) {
603 CERROR("%.16s: fail to load lfsck_namespace, expected = %d, "
604 "rc = %d\n", mdd_lfsck2name(com->lc_lfsck), len, rc);
611 static int mdd_lfsck_namespace_store(const struct lu_env *env,
612 struct lfsck_component *com, bool init)
614 struct dt_object *obj = com->lc_obj;
615 struct md_lfsck *lfsck = com->lc_lfsck;
616 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
617 struct thandle *handle;
618 int len = com->lc_file_size;
622 mdd_lfsck_namespace_to_le((struct lfsck_namespace *)com->lc_file_disk,
623 (struct lfsck_namespace *)com->lc_file_ram);
624 handle = dt_trans_create(env, mdd->mdd_bottom);
625 if (IS_ERR(handle)) {
626 rc = PTR_ERR(handle);
627 CERROR("%.16s: fail to create trans for storing "
628 "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
632 rc = dt_declare_xattr_set(env, obj,
633 mdd_buf_get(env, com->lc_file_disk, len),
634 XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
636 CERROR("%.16s: fail to declare trans for storing "
637 "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
641 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
643 CERROR("%.16s: fail to start trans for storing "
644 "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
648 rc = dt_xattr_set(env, obj,
649 mdd_buf_get(env, com->lc_file_disk, len),
650 XATTR_NAME_LFSCK_NAMESPACE,
651 init ? LU_XATTR_CREATE : LU_XATTR_REPLACE,
652 handle, BYPASS_CAPA);
654 CERROR("%.16s: fail to store lfsck_namespace, len = %d, "
655 "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
660 dt_trans_stop(env, mdd->mdd_bottom, handle);
664 static int mdd_lfsck_namespace_init(const struct lu_env *env,
665 struct lfsck_component *com)
667 struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
670 memset(ns, 0, sizeof(*ns));
671 ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
672 ns->ln_status = LS_INIT;
673 down_write(&com->lc_sem);
674 rc = mdd_lfsck_namespace_store(env, com, true);
675 up_write(&com->lc_sem);
679 static int mdd_lfsck_namespace_lookup(const struct lu_env *env,
680 struct lfsck_component *com,
681 const struct lu_fid *fid,
684 struct lu_fid *key = &mdd_env_info(env)->mti_fid;
687 fid_cpu_to_be(key, fid);
688 rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
689 (const struct dt_key *)key, BYPASS_CAPA);
693 static int mdd_lfsck_namespace_delete(const struct lu_env *env,
694 struct lfsck_component *com,
695 const struct lu_fid *fid)
697 struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
698 struct lu_fid *key = &mdd_env_info(env)->mti_fid;
699 struct thandle *handle;
700 struct dt_object *obj = com->lc_obj;
704 handle = dt_trans_create(env, mdd->mdd_bottom);
706 RETURN(PTR_ERR(handle));
708 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
712 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
716 fid_cpu_to_be(key, fid);
717 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
723 dt_trans_stop(env, mdd->mdd_bottom, handle);
727 static int mdd_lfsck_namespace_update(const struct lu_env *env,
728 struct lfsck_component *com,
729 const struct lu_fid *fid,
730 __u8 flags, bool force)
732 struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
733 struct lu_fid *key = &mdd_env_info(env)->mti_fid;
734 struct thandle *handle;
735 struct dt_object *obj = com->lc_obj;
741 rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf);
742 if (rc != 0 && rc != -ENOENT)
746 if (!force || flags == tf)
750 handle = dt_trans_create(env, mdd->mdd_bottom);
752 RETURN(PTR_ERR(handle));
754 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
759 handle = dt_trans_create(env, mdd->mdd_bottom);
761 RETURN(PTR_ERR(handle));
764 rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
765 (const struct dt_key *)fid, handle);
769 rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
773 fid_cpu_to_be(key, fid);
775 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
778 CERROR("%s: fail to insert "DFID", rc = %d\n",
779 mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc);
784 rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
785 (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
790 dt_trans_stop(env, mdd->mdd_bottom, handle);
795 * \retval +ve repaired
796 * \retval 0 no need to repair
797 * \retval -ve error cases
799 static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env,
800 struct lfsck_component *com,
801 struct mdd_object *child,
804 struct mdd_thread_info *info = mdd_env_info(env);
805 struct lu_attr *la = &info->mti_la;
806 struct lu_name *cname = &info->mti_name;
807 struct lu_fid *pfid = &info->mti_fid;
808 struct lu_fid *cfid = &info->mti_fid2;
809 struct md_lfsck *lfsck = com->lc_lfsck;
810 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
811 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
812 struct lfsck_namespace *ns =
813 (struct lfsck_namespace *)com->lc_file_ram;
814 struct linkea_data ldata = { 0 };
815 struct thandle *handle = NULL;
822 if (com->lc_journal) {
828 handle = mdd_trans_create(env, mdd);
830 RETURN(rc = PTR_ERR(handle));
832 rc = mdd_declare_links_add(env, child, handle, NULL);
836 rc = mdd_trans_start(env, mdd, handle);
840 mdd_write_lock(env, child, MOR_TGT_CHILD);
844 if (unlikely(mdd_is_dead_obj(child)))
847 rc = mdd_links_read(env, child, &ldata);
849 if ((bk->lb_param & LPF_DRYRUN) &&
850 (rc == -EINVAL || rc == -ENODATA))
856 rc = mdd_la_get(env, child, la, BYPASS_CAPA);
860 ldata.ld_lee = LINKEA_FIRST_ENTRY(ldata);
861 count = ldata.ld_leh->leh_reccount;
862 while (count-- > 0) {
863 struct mdd_object *parent = NULL;
864 struct dt_object *dir;
866 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname,
868 if (!fid_is_sane(pfid))
871 parent = mdd_object_find(env, mdd, pfid);
874 else if (IS_ERR(parent))
875 GOTO(stop, rc = PTR_ERR(parent));
877 if (!mdd_object_exists(parent))
880 /* XXX: Currently, skip remote object, the consistency for
881 * remote object will be processed in LFSCK phase III. */
882 if (mdd_object_remote(parent)) {
883 mdd_object_put(env, parent);
884 ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
888 dir = mdd_object_child(parent);
889 if (unlikely(!dt_try_as_dir(env, dir)))
892 /* To guarantee the 'name' is terminated with '0'. */
893 memcpy(info->mti_key, cname->ln_name, cname->ln_namelen);
894 info->mti_key[cname->ln_namelen] = 0;
895 cname->ln_name = info->mti_key;
896 rc = dt_lookup(env, dir, (struct dt_rec *)cfid,
897 (const struct dt_key *)cname->ln_name,
899 if (rc != 0 && rc != -ENOENT) {
900 mdd_object_put(env, parent);
905 if (lu_fid_eq(cfid, mdo2fid(child))) {
906 mdd_object_put(env, parent);
907 ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
914 if (ldata.ld_leh->leh_reccount > la->la_nlink)
917 /* XXX: For the case of there is linkea entry, but without name
918 * entry pointing to the object, and the object link count
919 * isn't less than the count of name entries, then add the
920 * name entry back to namespace.
922 * It is out of LFSCK 1.5 scope, will implement it in the
923 * future. Keep the linkEA entry. */
924 mdd_object_put(env, parent);
925 ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
930 mdd_object_put(env, parent);
931 if (bk->lb_param & LPF_DRYRUN)
934 CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
935 PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name,
937 linkea_del_buf(&ldata, cname);
942 if (!com->lc_journal) {
947 rc = mdd_links_write(env, child, &ldata, handle);
954 mdd_write_unlock(env, child);
957 mdd_trans_stop(env, mdd, rc, handle);
959 if (rc == 0 && update) {
960 ns->ln_objs_nlink_repaired++;
968 static int mdd_lfsck_namespace_reset(const struct lu_env *env,
969 struct lfsck_component *com, bool init)
971 struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
972 struct mdd_device *mdd = mdd_lfsck2mdd(com->lc_lfsck);
973 struct dt_object *dto, *root;
977 down_write(&com->lc_sem);
979 memset(ns, 0, sizeof(*ns));
981 __u32 count = ns->ln_success_count;
982 __u64 last_time = ns->ln_time_last_complete;
984 memset(ns, 0, sizeof(*ns));
985 ns->ln_success_count = count;
986 ns->ln_time_last_complete = last_time;
988 ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
989 ns->ln_status = LS_INIT;
991 root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
992 if (unlikely(IS_ERR(root)))
993 GOTO(out, rc = PTR_ERR(root));
995 rc = local_object_unlink(env, mdd->mdd_bottom, root,
996 lfsck_namespace_name);
1000 lu_object_put(env, &com->lc_obj->do_lu);
1002 dto = local_index_find_or_create(env, mdd->mdd_los, root,
1003 lfsck_namespace_name,
1004 S_IFREG | S_IRUGO | S_IWUSR,
1005 &dt_lfsck_features);
1007 GOTO(out, rc = PTR_ERR(dto));
1009 rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
1014 rc = mdd_lfsck_namespace_store(env, com, true);
1018 lu_object_put(env, &root->do_lu);
1019 up_write(&com->lc_sem);
1024 mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
1027 struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
1029 down_write(&com->lc_sem);
1031 com->lc_new_checked++;
1032 ns->ln_items_failed++;
1033 if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1034 mdd_lfsck_pos_fill(env, com->lc_lfsck,
1035 &ns->ln_pos_first_inconsistent, false);
1036 up_write(&com->lc_sem);
1039 static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env,
1040 struct lfsck_component *com,
1043 struct md_lfsck *lfsck = com->lc_lfsck;
1044 struct lfsck_namespace *ns =
1045 (struct lfsck_namespace *)com->lc_file_ram;
1048 if (com->lc_new_checked == 0 && !init)
1051 down_write(&com->lc_sem);
1054 ns->ln_pos_latest_start = lfsck->ml_pos_current;
1056 ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
1057 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1058 HALF_SEC - lfsck->ml_time_last_checkpoint);
1059 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1060 ns->ln_items_checked += com->lc_new_checked;
1061 com->lc_new_checked = 0;
1064 rc = mdd_lfsck_namespace_store(env, com, false);
1066 up_write(&com->lc_sem);
1070 static int mdd_lfsck_namespace_prep(const struct lu_env *env,
1071 struct lfsck_component *com)
1073 struct md_lfsck *lfsck = com->lc_lfsck;
1074 struct lfsck_namespace *ns =
1075 (struct lfsck_namespace *)com->lc_file_ram;
1076 struct lfsck_position *pos = &com->lc_pos_start;
1078 if (ns->ln_status == LS_COMPLETED) {
1081 rc = mdd_lfsck_namespace_reset(env, com, false);
1086 down_write(&com->lc_sem);
1088 ns->ln_time_latest_start = cfs_time_current_sec();
1090 spin_lock(&lfsck->ml_lock);
1091 if (ns->ln_flags & LF_SCANNED_ONCE) {
1092 if (!lfsck->ml_drop_dryrun ||
1093 mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1094 ns->ln_status = LS_SCANNING_PHASE2;
1095 cfs_list_del_init(&com->lc_link);
1096 cfs_list_add_tail(&com->lc_link,
1097 &lfsck->ml_list_double_scan);
1098 if (!cfs_list_empty(&com->lc_link_dir))
1099 cfs_list_del_init(&com->lc_link_dir);
1100 mdd_lfsck_pos_set_zero(pos);
1102 ns->ln_status = LS_SCANNING_PHASE1;
1103 ns->ln_run_time_phase1 = 0;
1104 ns->ln_run_time_phase2 = 0;
1105 ns->ln_items_checked = 0;
1106 ns->ln_items_repaired = 0;
1107 ns->ln_items_failed = 0;
1108 ns->ln_dirs_checked = 0;
1109 ns->ln_mlinked_checked = 0;
1110 ns->ln_objs_checked_phase2 = 0;
1111 ns->ln_objs_repaired_phase2 = 0;
1112 ns->ln_objs_failed_phase2 = 0;
1113 ns->ln_objs_nlink_repaired = 0;
1114 ns->ln_objs_lost_found = 0;
1115 fid_zero(&ns->ln_fid_latest_scanned_phase2);
1116 if (cfs_list_empty(&com->lc_link_dir))
1117 cfs_list_add_tail(&com->lc_link_dir,
1118 &lfsck->ml_list_dir);
1119 *pos = ns->ln_pos_first_inconsistent;
1122 ns->ln_status = LS_SCANNING_PHASE1;
1123 if (cfs_list_empty(&com->lc_link_dir))
1124 cfs_list_add_tail(&com->lc_link_dir,
1125 &lfsck->ml_list_dir);
1126 if (!lfsck->ml_drop_dryrun ||
1127 mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1128 *pos = ns->ln_pos_last_checkpoint;
1129 pos->lp_oit_cookie++;
1131 *pos = ns->ln_pos_first_inconsistent;
1134 spin_unlock(&lfsck->ml_lock);
1136 up_write(&com->lc_sem);
1140 static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env,
1141 struct lfsck_component *com,
1142 struct mdd_object *obj)
1144 down_write(&com->lc_sem);
1145 com->lc_new_checked++;
1146 if (S_ISDIR(mdd_object_type(obj)))
1147 ((struct lfsck_namespace *)com->lc_file_ram)->ln_dirs_checked++;
1148 up_write(&com->lc_sem);
1152 static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env,
1153 struct mdd_object *obj,
1154 struct thandle *handle)
1158 /* For destroying all invalid linkEA entries. */
1159 rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
1163 /* For insert new linkEA entry. */
1164 rc = mdd_declare_links_add(env, obj, handle, NULL);
1168 static int mdd_lfsck_namespace_check_exist(const struct lu_env *env,
1169 struct md_lfsck *lfsck,
1170 struct mdd_object *obj,
1173 struct dt_object *dir = lfsck->ml_obj_dir;
1174 struct lu_fid *fid = &mdd_env_info(env)->mti_fid;
1178 if (unlikely(mdd_is_dead_obj(obj)))
1179 RETURN(LFSCK_NAMEENTRY_DEAD);
1181 rc = dt_lookup(env, dir, (struct dt_rec *)fid,
1182 (const struct dt_key *)name, BYPASS_CAPA);
1184 RETURN(LFSCK_NAMEENTRY_REMOVED);
1189 if (!lu_fid_eq(fid, mdo2fid(obj)))
1190 RETURN(LFSCK_NAMEENTRY_RECREATED);
1195 static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
1196 struct lfsck_component *com,
1197 struct mdd_object *obj,
1198 struct lu_dirent *ent)
1200 struct mdd_thread_info *info = mdd_env_info(env);
1201 struct lu_attr *la = &info->mti_la;
1202 struct md_lfsck *lfsck = com->lc_lfsck;
1203 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
1204 struct lfsck_namespace *ns =
1205 (struct lfsck_namespace *)com->lc_file_ram;
1206 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
1207 struct linkea_data ldata = { 0 };
1208 const struct lu_fid *pfid =
1209 lu_object_fid(&lfsck->ml_obj_dir->do_lu);
1210 const struct lu_fid *cfid = mdo2fid(obj);
1211 const struct lu_name *cname;
1212 struct thandle *handle = NULL;
1213 bool repaired = false;
1214 bool locked = false;
1219 cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen);
1220 down_write(&com->lc_sem);
1221 com->lc_new_checked++;
1223 if (ent->lde_attrs & LUDA_UPGRADE) {
1224 ns->ln_flags |= LF_UPGRADE;
1226 } else if (ent->lde_attrs & LUDA_REPAIR) {
1227 ns->ln_flags |= LF_INCONSISTENT;
1231 if (ent->lde_name[0] == '.' &&
1232 (ent->lde_namelen == 1 ||
1233 (ent->lde_namelen == 2 && ent->lde_name[1] == '.') ||
1234 fid_is_dot_lustre(&ent->lde_fid)))
1237 if (!(bk->lb_param & LPF_DRYRUN) &&
1238 (com->lc_journal || repaired)) {
1243 com->lc_journal = 1;
1244 handle = mdd_trans_create(env, mdd);
1246 GOTO(out, rc = PTR_ERR(handle));
1248 rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle);
1252 rc = mdd_trans_start(env, mdd, handle);
1256 mdd_write_lock(env, obj, MOR_TGT_CHILD);
1260 rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
1264 rc = mdd_links_read(env, obj, &ldata);
1266 count = ldata.ld_leh->leh_reccount;
1267 rc = linkea_links_find(&ldata, cname, pfid);
1269 /* For dir, if there are more than one linkea entries,
1270 * then remove all the other redundant linkea entries.*/
1271 if (unlikely(count > 1 &&
1272 S_ISDIR(mdd_object_type(obj))))
1279 ns->ln_flags |= LF_INCONSISTENT;
1280 if (bk->lb_param & LPF_DRYRUN) {
1285 /*For dir, remove the unmatched linkea entry directly.*/
1286 if (S_ISDIR(mdd_object_type(obj))) {
1287 if (!com->lc_journal)
1290 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK,
1291 handle, BYPASS_CAPA);
1300 } else if (unlikely(rc == -EINVAL)) {
1301 ns->ln_flags |= LF_INCONSISTENT;
1302 if (bk->lb_param & LPF_DRYRUN) {
1308 if (!com->lc_journal)
1311 /* The magic crashed, we are not sure whether there are more
1312 * corrupt data in the linkea, so remove all linkea entries. */
1313 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle,
1319 } else if (rc == -ENODATA) {
1320 ns->ln_flags |= LF_UPGRADE;
1321 if (bk->lb_param & LPF_DRYRUN) {
1328 rc = linkea_data_new(&ldata, &mdd_env_info(env)->mti_link_buf);
1333 if (!com->lc_journal)
1336 rc = linkea_add_buf(&ldata, cname, pfid);
1340 rc = mdd_links_write(env, obj, &ldata, handle);
1344 count = ldata.ld_leh->leh_reccount;
1353 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
1358 (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj))))
1359 /* Usually, it is for single linked object or dir, do nothing.*/
1362 /* Following modification will be in another transaction. */
1363 if (handle != NULL) {
1364 LASSERT(mdd_write_locked(env, obj));
1366 mdd_write_unlock(env, obj);
1369 mdd_trans_stop(env, mdd, 0, handle);
1373 ns->ln_mlinked_checked++;
1374 rc = mdd_lfsck_namespace_update(env, com, cfid,
1375 count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
1381 mdd_write_unlock(env, obj);
1384 mdd_trans_stop(env, mdd, rc, handle);
1388 ns->ln_items_failed++;
1389 if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1390 mdd_lfsck_pos_fill(env, lfsck,
1391 &ns->ln_pos_first_inconsistent,
1393 if (!(bk->lb_param & LPF_FAILOUT))
1397 ns->ln_items_repaired++;
1399 com->lc_journal = 0;
1402 up_write(&com->lc_sem);
1406 static int mdd_lfsck_namespace_post(const struct lu_env *env,
1407 struct lfsck_component *com,
1408 int result, bool init)
1410 struct md_lfsck *lfsck = com->lc_lfsck;
1411 struct lfsck_namespace *ns =
1412 (struct lfsck_namespace *)com->lc_file_ram;
1415 down_write(&com->lc_sem);
1417 spin_lock(&lfsck->ml_lock);
1419 ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
1421 ns->ln_status = LS_SCANNING_PHASE2;
1422 ns->ln_flags |= LF_SCANNED_ONCE;
1423 ns->ln_flags &= ~LF_UPGRADE;
1424 cfs_list_del_init(&com->lc_link);
1425 cfs_list_del_init(&com->lc_link_dir);
1426 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_double_scan);
1427 } else if (result == 0) {
1428 if (lfsck->ml_paused) {
1429 ns->ln_status = LS_PAUSED;
1431 ns->ln_status = LS_STOPPED;
1432 cfs_list_del_init(&com->lc_link);
1433 cfs_list_del_init(&com->lc_link_dir);
1434 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1437 ns->ln_status = LS_FAILED;
1438 cfs_list_del_init(&com->lc_link);
1439 cfs_list_del_init(&com->lc_link_dir);
1440 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1442 spin_unlock(&lfsck->ml_lock);
1445 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1446 HALF_SEC - lfsck->ml_time_last_checkpoint);
1447 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1448 ns->ln_items_checked += com->lc_new_checked;
1449 com->lc_new_checked = 0;
1452 rc = mdd_lfsck_namespace_store(env, com, false);
1454 up_write(&com->lc_sem);
1459 mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
1462 struct md_lfsck *lfsck = com->lc_lfsck;
1463 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
1464 struct lfsck_namespace *ns =
1465 (struct lfsck_namespace *)com->lc_file_ram;
1470 down_read(&com->lc_sem);
1471 rc = snprintf(buf, len,
1472 "name: lfsck_namespace\n"
1478 lfsck_status_names[ns->ln_status]);
1484 rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
1489 rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
1494 rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
1495 "time_since_last_completed");
1499 rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
1500 "time_since_latest_start");
1504 rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
1505 "time_since_last_checkpoint");
1509 rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
1510 "latest_start_position");
1514 rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
1515 "last_checkpoint_position");
1519 rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
1520 "first_failure_position");
1524 if (ns->ln_status == LS_SCANNING_PHASE1) {
1525 struct lfsck_position pos;
1526 cfs_duration_t duration = cfs_time_current() -
1527 lfsck->ml_time_last_checkpoint;
1528 __u64 checked = ns->ln_items_checked + com->lc_new_checked;
1529 __u64 speed = checked;
1530 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1531 __u32 rtime = ns->ln_run_time_phase1 +
1532 cfs_duration_sec(duration + HALF_SEC);
1535 do_div(new_checked, duration);
1537 do_div(speed, rtime);
1538 rc = snprintf(buf, len,
1539 "checked_phase1: "LPU64"\n"
1540 "checked_phase2: "LPU64"\n"
1541 "updated_phase1: "LPU64"\n"
1542 "updated_phase2: "LPU64"\n"
1543 "failed_phase1: "LPU64"\n"
1544 "failed_phase2: "LPU64"\n"
1546 "M-linked: "LPU64"\n"
1547 "nlinks_repaired: "LPU64"\n"
1548 "lost_found: "LPU64"\n"
1549 "success_count: %u\n"
1550 "run_time_phase1: %u seconds\n"
1551 "run_time_phase2: %u seconds\n"
1552 "average_speed_phase1: "LPU64" items/sec\n"
1553 "average_speed_phase2: N/A\n"
1554 "real-time_speed_phase1: "LPU64" items/sec\n"
1555 "real-time_speed_phase2: N/A\n",
1557 ns->ln_objs_checked_phase2,
1558 ns->ln_items_repaired,
1559 ns->ln_objs_repaired_phase2,
1560 ns->ln_items_failed,
1561 ns->ln_objs_failed_phase2,
1562 ns->ln_dirs_checked,
1563 ns->ln_mlinked_checked,
1564 ns->ln_objs_nlink_repaired,
1565 ns->ln_objs_lost_found,
1566 ns->ln_success_count,
1568 ns->ln_run_time_phase2,
1576 mdd_lfsck_pos_fill(env, lfsck, &pos, false);
1577 rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
1580 } else if (ns->ln_status == LS_SCANNING_PHASE2) {
1581 cfs_duration_t duration = cfs_time_current() -
1582 lfsck->ml_time_last_checkpoint;
1583 __u64 checked = ns->ln_objs_checked_phase2 +
1584 com->lc_new_checked;
1585 __u64 speed1 = ns->ln_items_checked;
1586 __u64 speed2 = checked;
1587 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1588 __u32 rtime = ns->ln_run_time_phase2 +
1589 cfs_duration_sec(duration + HALF_SEC);
1592 do_div(new_checked, duration);
1593 if (ns->ln_run_time_phase1 != 0)
1594 do_div(speed1, ns->ln_run_time_phase1);
1596 do_div(speed2, rtime);
1597 rc = snprintf(buf, len,
1598 "checked_phase1: "LPU64"\n"
1599 "checked_phase2: "LPU64"\n"
1600 "updated_phase1: "LPU64"\n"
1601 "updated_phase2: "LPU64"\n"
1602 "failed_phase1: "LPU64"\n"
1603 "failed_phase2: "LPU64"\n"
1605 "M-linked: "LPU64"\n"
1606 "nlinks_repaired: "LPU64"\n"
1607 "lost_found: "LPU64"\n"
1608 "success_count: %u\n"
1609 "run_time_phase1: %u seconds\n"
1610 "run_time_phase2: %u seconds\n"
1611 "average_speed_phase1: "LPU64" items/sec\n"
1612 "average_speed_phase2: "LPU64" objs/sec\n"
1613 "real-time_speed_phase1: N/A\n"
1614 "real-time_speed_phase2: "LPU64" objs/sec\n"
1615 "current_position: "DFID"\n",
1616 ns->ln_items_checked,
1618 ns->ln_items_repaired,
1619 ns->ln_objs_repaired_phase2,
1620 ns->ln_items_failed,
1621 ns->ln_objs_failed_phase2,
1622 ns->ln_dirs_checked,
1623 ns->ln_mlinked_checked,
1624 ns->ln_objs_nlink_repaired,
1625 ns->ln_objs_lost_found,
1626 ns->ln_success_count,
1627 ns->ln_run_time_phase1,
1632 PFID(&ns->ln_fid_latest_scanned_phase2));
1639 __u64 speed1 = ns->ln_items_checked;
1640 __u64 speed2 = ns->ln_objs_checked_phase2;
1642 if (ns->ln_run_time_phase1 != 0)
1643 do_div(speed1, ns->ln_run_time_phase1);
1644 if (ns->ln_run_time_phase2 != 0)
1645 do_div(speed2, ns->ln_run_time_phase2);
1646 rc = snprintf(buf, len,
1647 "checked_phase1: "LPU64"\n"
1648 "checked_phase2: "LPU64"\n"
1649 "updated_phase1: "LPU64"\n"
1650 "updated_phase2: "LPU64"\n"
1651 "failed_phase1: "LPU64"\n"
1652 "failed_phase2: "LPU64"\n"
1654 "M-linked: "LPU64"\n"
1655 "nlinks_repaired: "LPU64"\n"
1656 "lost_found: "LPU64"\n"
1657 "success_count: %u\n"
1658 "run_time_phase1: %u seconds\n"
1659 "run_time_phase2: %u seconds\n"
1660 "average_speed_phase1: "LPU64" items/sec\n"
1661 "average_speed_phase2: "LPU64" objs/sec\n"
1662 "real-time_speed_phase1: N/A\n"
1663 "real-time_speed_phase2: N/A\n"
1664 "current_position: N/A\n",
1665 ns->ln_items_checked,
1666 ns->ln_objs_checked_phase2,
1667 ns->ln_items_repaired,
1668 ns->ln_objs_repaired_phase2,
1669 ns->ln_items_failed,
1670 ns->ln_objs_failed_phase2,
1671 ns->ln_dirs_checked,
1672 ns->ln_mlinked_checked,
1673 ns->ln_objs_nlink_repaired,
1674 ns->ln_objs_lost_found,
1675 ns->ln_success_count,
1676 ns->ln_run_time_phase1,
1677 ns->ln_run_time_phase2,
1689 up_read(&com->lc_sem);
1693 static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
1694 struct lfsck_component *com)
1696 struct md_lfsck *lfsck = com->lc_lfsck;
1697 struct ptlrpc_thread *thread = &lfsck->ml_thread;
1698 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
1699 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
1700 struct lfsck_namespace *ns =
1701 (struct lfsck_namespace *)com->lc_file_ram;
1702 struct dt_object *obj = com->lc_obj;
1703 const struct dt_it_ops *iops = &obj->do_index_ops->dio_it;
1704 struct mdd_object *target;
1712 lfsck->ml_new_scanned = 0;
1713 lfsck->ml_time_last_checkpoint = cfs_time_current();
1714 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1715 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1717 di = iops->init(env, obj, 0, BYPASS_CAPA);
1719 RETURN(PTR_ERR(di));
1721 fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
1722 rc = iops->get(env, di, (const struct dt_key *)&fid);
1726 /* Skip the start one, which either has been processed or non-exist. */
1727 rc = iops->next(env, di);
1731 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
1735 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
1737 struct l_wait_info lwi;
1739 lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
1741 l_wait_event(thread->t_ctl_waitq,
1742 !thread_is_running(thread),
1746 key = iops->key(env, di);
1747 fid_be_to_cpu(&fid, (const struct lu_fid *)key);
1748 target = mdd_object_find(env, mdd, &fid);
1749 down_write(&com->lc_sem);
1750 if (target == NULL) {
1753 } else if (IS_ERR(target)) {
1754 rc = PTR_ERR(target);
1758 /* XXX: Currently, skip remote object, the consistency for
1759 * remote object will be processed in LFSCK phase III. */
1760 if (!mdd_object_exists(target) || mdd_object_remote(target))
1763 rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
1765 rc = mdd_lfsck_namespace_double_scan_one(env, com,
1769 mdd_object_put(env, target);
1772 lfsck->ml_new_scanned++;
1773 com->lc_new_checked++;
1774 ns->ln_fid_latest_scanned_phase2 = fid;
1776 ns->ln_objs_repaired_phase2++;
1778 ns->ln_objs_failed_phase2++;
1779 up_write(&com->lc_sem);
1781 if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
1782 mdd_lfsck_namespace_delete(env, com, &fid);
1783 } else if (rc < 0) {
1784 flags |= LLF_REPAIR_FAILED;
1785 mdd_lfsck_namespace_update(env, com, &fid, flags, true);
1788 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
1791 if (likely(cfs_time_beforeq(cfs_time_current(),
1792 lfsck->ml_time_next_checkpoint)) ||
1793 com->lc_new_checked == 0)
1796 down_write(&com->lc_sem);
1797 ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1798 HALF_SEC - lfsck->ml_time_last_checkpoint);
1799 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1800 ns->ln_objs_checked_phase2 += com->lc_new_checked;
1801 com->lc_new_checked = 0;
1802 rc = mdd_lfsck_namespace_store(env, com, false);
1803 up_write(&com->lc_sem);
1807 lfsck->ml_time_last_checkpoint = cfs_time_current();
1808 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1809 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1812 mdd_lfsck_control_speed(lfsck);
1813 if (unlikely(!thread_is_running(thread)))
1816 rc = iops->next(env, di);
1825 iops->fini(env, di);
1826 down_write(&com->lc_sem);
1828 ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1829 HALF_SEC - lfsck->ml_time_last_checkpoint);
1830 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1831 ns->ln_objs_checked_phase2 += com->lc_new_checked;
1832 com->lc_new_checked = 0;
1835 com->lc_journal = 0;
1836 ns->ln_status = LS_COMPLETED;
1837 if (!(bk->lb_param & LPF_DRYRUN))
1839 ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
1840 ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
1841 ns->ln_success_count++;
1842 } else if (rc == 0) {
1843 if (lfsck->ml_paused)
1844 ns->ln_status = LS_PAUSED;
1846 ns->ln_status = LS_STOPPED;
1848 ns->ln_status = LS_FAILED;
1851 if (ns->ln_status != LS_PAUSED) {
1852 spin_lock(&lfsck->ml_lock);
1853 cfs_list_del_init(&com->lc_link);
1854 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1855 spin_unlock(&lfsck->ml_lock);
1858 rc = mdd_lfsck_namespace_store(env, com, false);
1860 up_write(&com->lc_sem);
1864 static struct lfsck_operations mdd_lfsck_namespace_ops = {
1865 .lfsck_reset = mdd_lfsck_namespace_reset,
1866 .lfsck_fail = mdd_lfsck_namespace_fail,
1867 .lfsck_checkpoint = mdd_lfsck_namespace_checkpoint,
1868 .lfsck_prep = mdd_lfsck_namespace_prep,
1869 .lfsck_exec_oit = mdd_lfsck_namespace_exec_oit,
1870 .lfsck_exec_dir = mdd_lfsck_namespace_exec_dir,
1871 .lfsck_post = mdd_lfsck_namespace_post,
1872 .lfsck_dump = mdd_lfsck_namespace_dump,
1873 .lfsck_double_scan = mdd_lfsck_namespace_double_scan,
1876 /* LFSCK component setup/cleanup functions */
1878 static int mdd_lfsck_namespace_setup(const struct lu_env *env,
1879 struct md_lfsck *lfsck)
1881 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
1882 struct lfsck_component *com;
1883 struct lfsck_namespace *ns;
1884 struct dt_object *obj, *root;
1892 CFS_INIT_LIST_HEAD(&com->lc_link);
1893 CFS_INIT_LIST_HEAD(&com->lc_link_dir);
1894 init_rwsem(&com->lc_sem);
1895 atomic_set(&com->lc_ref, 1);
1896 com->lc_lfsck = lfsck;
1897 com->lc_type = LT_NAMESPACE;
1898 com->lc_ops = &mdd_lfsck_namespace_ops;
1899 com->lc_file_size = sizeof(struct lfsck_namespace);
1900 OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
1901 if (com->lc_file_ram == NULL)
1902 GOTO(out, rc = -ENOMEM);
1904 OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
1905 if (com->lc_file_disk == NULL)
1906 GOTO(out, rc = -ENOMEM);
1908 root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
1909 if (unlikely(IS_ERR(root)))
1910 GOTO(out, rc = PTR_ERR(root));
1912 obj = local_index_find_or_create(env, mdd->mdd_los, root,
1913 lfsck_namespace_name,
1914 S_IFREG | S_IRUGO | S_IWUSR,
1915 &dt_lfsck_features);
1916 lu_object_put(env, &root->do_lu);
1918 GOTO(out, rc = PTR_ERR(obj));
1921 rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features);
1925 rc = mdd_lfsck_namespace_load(env, com);
1927 rc = mdd_lfsck_namespace_reset(env, com, true);
1928 else if (rc == -ENODATA)
1929 rc = mdd_lfsck_namespace_init(env, com);
1933 ns = (struct lfsck_namespace *)com->lc_file_ram;
1934 switch (ns->ln_status) {
1939 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1942 CERROR("%s: unknown status: %u\n",
1943 mdd_lfsck2name(lfsck), ns->ln_status);
1945 case LS_SCANNING_PHASE1:
1946 case LS_SCANNING_PHASE2:
1947 /* No need to store the status to disk right now.
1948 * If the system crashed before the status stored,
1949 * it will be loaded back when next time. */
1950 ns->ln_status = LS_CRASHED;
1954 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_scan);
1955 cfs_list_add_tail(&com->lc_link_dir, &lfsck->ml_list_dir);
1963 mdd_lfsck_component_cleanup(env, com);
1967 /* helper functions for framework */
1969 static int object_needs_lfsck(const struct lu_env *env, struct mdd_device *mdd,
1970 struct mdd_object *obj)
1972 struct lu_fid *fid = &mdd_env_info(env)->mti_fid;
1976 LASSERT(S_ISDIR(mdd_object_type(obj)));
1979 if (mdd_is_root(mdd, mdo2fid(obj))) {
1981 mdd_object_put(env, obj);
1985 /* .lustre doesn't contain "real" user objects, no need lfsck */
1986 if (fid_is_dot_lustre(mdo2fid(obj))) {
1988 mdd_object_put(env, obj);
1992 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1993 if (unlikely(mdd_is_dead_obj(obj))) {
1994 mdd_read_unlock(env, obj);
1996 mdd_object_put(env, obj);
2000 rc = dt_xattr_get(env, mdd_object_child(obj),
2001 mdd_buf_get(env, NULL, 0), XATTR_NAME_LINK,
2003 mdd_read_unlock(env, obj);
2006 mdd_object_put(env, obj);
2010 if (rc < 0 && rc != -ENODATA) {
2012 mdd_object_put(env, obj);
2016 rc = mdd_parent_fid(env, obj, fid);
2018 mdd_object_put(env, obj);
2022 if (unlikely(lu_fid_eq(fid, &mdd->mdd_local_root_fid)))
2025 obj = mdd_object_find(env, mdd, fid);
2028 else if (IS_ERR(obj))
2029 return PTR_ERR(obj);
2031 if (!mdd_object_exists(obj)) {
2032 mdd_object_put(env, obj);
2036 /* Currently, only client visible directory can be remote. */
2037 if (mdd_object_remote(obj)) {
2038 mdd_object_put(env, obj);
2047 static void mdd_lfsck_unpack_ent(struct lu_dirent *ent)
2049 fid_le_to_cpu(&ent->lde_fid, &ent->lde_fid);
2050 ent->lde_hash = le64_to_cpu(ent->lde_hash);
2051 ent->lde_reclen = le16_to_cpu(ent->lde_reclen);
2052 ent->lde_namelen = le16_to_cpu(ent->lde_namelen);
2053 ent->lde_attrs = le32_to_cpu(ent->lde_attrs);
2055 /* Make sure the name is terminated with '0'.
2056 * The data (type) after ent::lde_name maybe
2057 * broken, but we do not care. */
2058 ent->lde_name[ent->lde_namelen] = 0;
2061 /* LFSCK wrap functions */
2063 static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck,
2066 struct lfsck_component *com;
2068 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2069 com->lc_ops->lfsck_fail(env, com, new_checked);
2073 static int mdd_lfsck_checkpoint(const struct lu_env *env,
2074 struct md_lfsck *lfsck)
2076 struct lfsck_component *com;
2079 if (likely(cfs_time_beforeq(cfs_time_current(),
2080 lfsck->ml_time_next_checkpoint)))
2083 mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false);
2084 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2085 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
2090 lfsck->ml_time_last_checkpoint = cfs_time_current();
2091 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2092 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2096 static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
2098 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
2099 struct mdd_object *obj = NULL;
2100 struct dt_object *dt_obj;
2101 struct lfsck_component *com;
2102 struct lfsck_component *next;
2103 struct lfsck_position *pos = NULL;
2104 const struct dt_it_ops *iops =
2105 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2110 LASSERT(lfsck->ml_obj_dir == NULL);
2111 LASSERT(lfsck->ml_di_dir == NULL);
2113 lfsck->ml_current_oit_processed = 0;
2114 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2115 com->lc_new_checked = 0;
2116 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2117 com->lc_journal = 0;
2119 rc = com->lc_ops->lfsck_prep(env, com);
2123 if ((pos == NULL) ||
2124 (!mdd_lfsck_pos_is_zero(&com->lc_pos_start) &&
2125 mdd_lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
2126 pos = &com->lc_pos_start;
2129 /* Init otable-based iterator. */
2131 rc = iops->load(env, lfsck->ml_di_oit, 0);
2133 lfsck->ml_oit_over = 1;
2140 rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie);
2144 lfsck->ml_oit_over = 1;
2146 if (fid_is_zero(&pos->lp_dir_parent))
2149 /* Find the directory for namespace-based traverse. */
2150 obj = mdd_object_find(env, mdd, &pos->lp_dir_parent);
2153 else if (IS_ERR(obj))
2154 RETURN(PTR_ERR(obj));
2156 /* XXX: Currently, skip remote object, the consistency for
2157 * remote object will be processed in LFSCK phase III. */
2158 if (!mdd_object_exists(obj) || mdd_object_remote(obj) ||
2159 unlikely(!S_ISDIR(mdd_object_type(obj))))
2162 if (unlikely(mdd_is_dead_obj(obj)))
2165 dt_obj = mdd_object_child(obj);
2166 if (unlikely(!dt_try_as_dir(env, dt_obj)))
2167 GOTO(out, rc = -ENOTDIR);
2169 /* Init the namespace-based directory traverse. */
2170 iops = &dt_obj->do_index_ops->dio_it;
2171 di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
2173 GOTO(out, rc = PTR_ERR(di));
2175 LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF);
2177 rc = iops->load(env, di, pos->lp_dir_cookie);
2178 if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0))
2179 rc = iops->next(env, di);
2185 iops->fini(env, di);
2189 lfsck->ml_obj_dir = dt_obj;
2190 spin_lock(&lfsck->ml_lock);
2191 lfsck->ml_di_dir = di;
2192 spin_unlock(&lfsck->ml_lock);
2199 mdd_object_put(env, obj);
2202 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan,
2204 com->lc_ops->lfsck_post(env, com, rc, true);
2210 mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true);
2211 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2212 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
2217 lfsck->ml_time_last_checkpoint = cfs_time_current();
2218 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2219 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2223 static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
2224 struct mdd_object *obj)
2226 struct lfsck_component *com;
2227 struct dt_object *dt_obj;
2228 const struct dt_it_ops *iops;
2233 LASSERT(lfsck->ml_obj_dir == NULL);
2235 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2236 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
2241 if (!S_ISDIR(mdd_object_type(obj)) ||
2242 cfs_list_empty(&lfsck->ml_list_dir))
2245 rc = object_needs_lfsck(env, mdd_lfsck2mdd(lfsck), obj);
2249 if (unlikely(mdd_is_dead_obj(obj)))
2252 dt_obj = mdd_object_child(obj);
2253 if (unlikely(!dt_try_as_dir(env, dt_obj)))
2254 GOTO(out, rc = -ENOTDIR);
2256 iops = &dt_obj->do_index_ops->dio_it;
2257 di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
2259 GOTO(out, rc = PTR_ERR(di));
2261 rc = iops->load(env, di, 0);
2263 rc = iops->next(env, di);
2269 iops->fini(env, di);
2273 mdd_object_get(obj);
2274 lfsck->ml_obj_dir = dt_obj;
2275 spin_lock(&lfsck->ml_lock);
2276 lfsck->ml_di_dir = di;
2277 spin_unlock(&lfsck->ml_lock);
2283 mdd_lfsck_fail(env, lfsck, false);
2284 return (rc > 0 ? 0 : rc);
2287 static int mdd_lfsck_exec_dir(const struct lu_env *env, struct md_lfsck *lfsck,
2288 struct mdd_object *obj, struct lu_dirent *ent)
2290 struct lfsck_component *com;
2293 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2294 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
2301 static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck,
2304 struct lfsck_component *com;
2305 struct lfsck_component *next;
2308 mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false);
2309 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2310 rc = com->lc_ops->lfsck_post(env, com, result, false);
2315 lfsck->ml_time_last_checkpoint = cfs_time_current();
2316 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2317 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2321 static int mdd_lfsck_double_scan(const struct lu_env *env,
2322 struct md_lfsck *lfsck)
2324 struct lfsck_component *com;
2325 struct lfsck_component *next;
2328 cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_double_scan,
2330 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2331 com->lc_journal = 0;
2333 rc = com->lc_ops->lfsck_double_scan(env, com);
2342 static int mdd_lfsck_dir_engine(const struct lu_env *env,
2343 struct md_lfsck *lfsck)
2345 struct mdd_thread_info *info = mdd_env_info(env);
2346 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
2347 const struct dt_it_ops *iops =
2348 &lfsck->ml_obj_dir->do_index_ops->dio_it;
2349 struct dt_it *di = lfsck->ml_di_dir;
2350 struct lu_dirent *ent = &info->mti_ent;
2351 struct lu_fid *fid = &info->mti_fid;
2352 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
2353 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2358 struct mdd_object *child;
2360 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY2) &&
2362 struct l_wait_info lwi;
2364 lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2366 l_wait_event(thread->t_ctl_waitq,
2367 !thread_is_running(thread),
2371 lfsck->ml_new_scanned++;
2372 rc = iops->rec(env, di, (struct dt_rec *)ent,
2373 lfsck->ml_args_dir);
2375 mdd_lfsck_fail(env, lfsck, true);
2376 if (bk->lb_param & LPF_FAILOUT)
2382 mdd_lfsck_unpack_ent(ent);
2383 if (ent->lde_attrs & LUDA_IGNORE)
2386 *fid = ent->lde_fid;
2387 child = mdd_object_find(env, mdd, fid);
2388 if (child == NULL) {
2390 } else if (IS_ERR(child)) {
2391 mdd_lfsck_fail(env, lfsck, true);
2392 if (bk->lb_param & LPF_FAILOUT)
2393 RETURN(PTR_ERR(child));
2398 /* XXX: Currently, skip remote object, the consistency for
2399 * remote object will be processed in LFSCK phase III. */
2400 if (mdd_object_exists(child) && !mdd_object_remote(child))
2401 rc = mdd_lfsck_exec_dir(env, lfsck, child, ent);
2402 mdd_object_put(env, child);
2403 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2407 rc = mdd_lfsck_checkpoint(env, lfsck);
2408 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2412 mdd_lfsck_control_speed(lfsck);
2413 if (unlikely(!thread_is_running(thread)))
2416 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL2)) {
2417 spin_lock(&lfsck->ml_lock);
2418 thread_set_flags(thread, SVC_STOPPING);
2419 spin_unlock(&lfsck->ml_lock);
2423 rc = iops->next(env, di);
2426 if (rc > 0 && !lfsck->ml_oit_over)
2427 mdd_lfsck_close_dir(env, lfsck);
2432 static int mdd_lfsck_oit_engine(const struct lu_env *env,
2433 struct md_lfsck *lfsck)
2435 struct mdd_thread_info *info = mdd_env_info(env);
2436 struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
2437 const struct dt_it_ops *iops =
2438 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2439 struct dt_it *di = lfsck->ml_di_oit;
2440 struct lu_fid *fid = &info->mti_fid;
2441 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
2442 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2447 struct mdd_object *target;
2449 if (lfsck->ml_di_dir != NULL) {
2450 rc = mdd_lfsck_dir_engine(env, lfsck);
2455 if (unlikely(lfsck->ml_oit_over))
2458 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) &&
2460 struct l_wait_info lwi;
2462 lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2464 l_wait_event(thread->t_ctl_waitq,
2465 !thread_is_running(thread),
2469 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
2472 lfsck->ml_current_oit_processed = 1;
2473 lfsck->ml_new_scanned++;
2474 rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
2476 mdd_lfsck_fail(env, lfsck, true);
2477 if (bk->lb_param & LPF_FAILOUT)
2483 target = mdd_object_find(env, mdd, fid);
2484 if (target == NULL) {
2486 } else if (IS_ERR(target)) {
2487 mdd_lfsck_fail(env, lfsck, true);
2488 if (bk->lb_param & LPF_FAILOUT)
2489 RETURN(PTR_ERR(target));
2494 /* XXX: Currently, skip remote object, the consistency for
2495 * remote object will be processed in LFSCK phase III. */
2496 if (mdd_object_exists(target) && !mdd_object_remote(target))
2497 rc = mdd_lfsck_exec_oit(env, lfsck, target);
2498 mdd_object_put(env, target);
2499 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2503 rc = mdd_lfsck_checkpoint(env, lfsck);
2504 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2508 mdd_lfsck_control_speed(lfsck);
2510 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) {
2511 spin_lock(&lfsck->ml_lock);
2512 thread_set_flags(thread, SVC_STOPPING);
2513 spin_unlock(&lfsck->ml_lock);
2517 rc = iops->next(env, di);
2518 if (unlikely(rc > 0))
2519 lfsck->ml_oit_over = 1;
2520 else if (likely(rc == 0))
2521 lfsck->ml_current_oit_processed = 0;
2523 if (unlikely(!thread_is_running(thread)))
2525 } while (rc == 0 || lfsck->ml_di_dir != NULL);
2530 static int mdd_lfsck_main(void *args)
2533 struct md_lfsck *lfsck = (struct md_lfsck *)args;
2534 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2535 struct dt_object *oit_obj = lfsck->ml_obj_oit;
2536 const struct dt_it_ops *oit_iops = &oit_obj->do_index_ops->dio_it;
2537 struct dt_it *oit_di;
2541 cfs_daemonize("lfsck");
2542 rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
2544 CERROR("%s: LFSCK, fail to init env, rc = %d\n",
2545 mdd_lfsck2name(lfsck), rc);
2549 oit_di = oit_iops->init(&env, oit_obj, lfsck->ml_args_oit, BYPASS_CAPA);
2550 if (IS_ERR(oit_di)) {
2551 rc = PTR_ERR(oit_di);
2552 CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
2553 mdd_lfsck2name(lfsck), rc);
2557 spin_lock(&lfsck->ml_lock);
2558 lfsck->ml_di_oit = oit_di;
2559 spin_unlock(&lfsck->ml_lock);
2560 rc = mdd_lfsck_prep(&env, lfsck);
2564 CDEBUG(D_LFSCK, "LFSCK entry: oit_flags = 0x%x, dir_flags = 0x%x, "
2565 "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2566 ", pid = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2567 lfsck->ml_pos_current.lp_oit_cookie,
2568 lfsck->ml_pos_current.lp_dir_cookie,
2569 PFID(&lfsck->ml_pos_current.lp_dir_parent),
2572 spin_lock(&lfsck->ml_lock);
2573 thread_set_flags(thread, SVC_RUNNING);
2574 spin_unlock(&lfsck->ml_lock);
2575 cfs_waitq_broadcast(&thread->t_ctl_waitq);
2577 if (!cfs_list_empty(&lfsck->ml_list_scan) ||
2578 cfs_list_empty(&lfsck->ml_list_double_scan))
2579 rc = mdd_lfsck_oit_engine(&env, lfsck);
2583 CDEBUG(D_LFSCK, "LFSCK exit: oit_flags = 0x%x, dir_flags = 0x%x, "
2584 "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2585 ", pid = %d, rc = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2586 lfsck->ml_pos_current.lp_oit_cookie,
2587 lfsck->ml_pos_current.lp_dir_cookie,
2588 PFID(&lfsck->ml_pos_current.lp_dir_parent),
2589 cfs_curproc_pid(), rc);
2591 if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
2592 oit_iops->put(&env, oit_di);
2594 if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
2595 rc = mdd_lfsck_post(&env, lfsck, rc);
2596 if (lfsck->ml_di_dir != NULL)
2597 mdd_lfsck_close_dir(&env, lfsck);
2600 spin_lock(&lfsck->ml_lock);
2601 lfsck->ml_di_oit = NULL;
2602 spin_unlock(&lfsck->ml_lock);
2604 oit_iops->fini(&env, oit_di);
2606 if (!cfs_list_empty(&lfsck->ml_list_double_scan))
2607 rc = mdd_lfsck_double_scan(&env, lfsck);
2612 /* XXX: Purge the pinned objects in the future. */
2618 spin_lock(&lfsck->ml_lock);
2619 thread_set_flags(thread, SVC_STOPPED);
2620 cfs_waitq_broadcast(&thread->t_ctl_waitq);
2621 spin_unlock(&lfsck->ml_lock);
2625 /* external interfaces */
2627 int mdd_lfsck_set_speed(const struct lu_env *env, struct md_lfsck *lfsck,
2632 mutex_lock(&lfsck->ml_mutex);
2633 __mdd_lfsck_set_speed(lfsck, limit);
2634 rc = mdd_lfsck_bookmark_store(env, lfsck);
2635 mutex_unlock(&lfsck->ml_mutex);
2639 int mdd_lfsck_dump(const struct lu_env *env, struct md_lfsck *lfsck,
2640 __u16 type, char *buf, int len)
2642 struct lfsck_component *com;
2645 if (!lfsck->ml_initialized)
2648 com = mdd_lfsck_component_find(lfsck, type);
2652 rc = com->lc_ops->lfsck_dump(env, com, buf, len);
2653 mdd_lfsck_component_put(env, com);
2657 int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
2658 struct lfsck_start *start)
2660 struct lfsck_bookmark *bk = &lfsck->ml_bookmark_ram;
2661 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2662 struct lfsck_component *com;
2663 struct l_wait_info lwi = { 0 };
2670 if (lfsck->ml_obj_oit == NULL)
2673 /* start == NULL means auto trigger paused LFSCK. */
2674 if ((start == NULL) &&
2675 (cfs_list_empty(&lfsck->ml_list_scan) ||
2676 OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
2679 mutex_lock(&lfsck->ml_mutex);
2680 spin_lock(&lfsck->ml_lock);
2681 if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
2682 spin_unlock(&lfsck->ml_lock);
2683 mutex_unlock(&lfsck->ml_mutex);
2687 spin_unlock(&lfsck->ml_lock);
2689 lfsck->ml_paused = 0;
2690 lfsck->ml_oit_over = 0;
2691 lfsck->ml_drop_dryrun = 0;
2692 lfsck->ml_new_scanned = 0;
2694 /* For auto trigger. */
2698 start->ls_version = bk->lb_version;
2699 if (start->ls_valid & LSV_SPEED_LIMIT) {
2700 __mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
2704 if (start->ls_valid & LSV_ERROR_HANDLE) {
2705 valid |= DOIV_ERROR_HANDLE;
2706 if (start->ls_flags & LPF_FAILOUT)
2707 flags |= DOIF_FAILOUT;
2709 if ((start->ls_flags & LPF_FAILOUT) &&
2710 !(bk->lb_param & LPF_FAILOUT)) {
2711 bk->lb_param |= LPF_FAILOUT;
2713 } else if (!(start->ls_flags & LPF_FAILOUT) &&
2714 (bk->lb_param & LPF_FAILOUT)) {
2715 bk->lb_param &= ~LPF_FAILOUT;
2720 if (start->ls_valid & LSV_DRYRUN) {
2721 valid |= DOIV_DRYRUN;
2722 if (start->ls_flags & LPF_DRYRUN)
2723 flags |= DOIF_DRYRUN;
2725 if ((start->ls_flags & LPF_DRYRUN) &&
2726 !(bk->lb_param & LPF_DRYRUN)) {
2727 bk->lb_param |= LPF_DRYRUN;
2729 } else if (!(start->ls_flags & LPF_DRYRUN) &&
2730 (bk->lb_param & LPF_DRYRUN)) {
2731 bk->lb_param &= ~LPF_DRYRUN;
2732 lfsck->ml_drop_dryrun = 1;
2738 rc = mdd_lfsck_bookmark_store(env, lfsck);
2743 if (start->ls_flags & LPF_RESET)
2744 flags |= DOIF_RESET;
2746 if (start->ls_active != 0) {
2747 struct lfsck_component *next;
2750 if (start->ls_active == LFSCK_TYPES_ALL)
2751 start->ls_active = LFSCK_TYPES_SUPPORTED;
2753 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
2754 start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
2755 GOTO(out, rc = -ENOTSUPP);
2758 cfs_list_for_each_entry_safe(com, next,
2759 &lfsck->ml_list_scan, lc_link) {
2760 if (!(com->lc_type & start->ls_active)) {
2761 rc = com->lc_ops->lfsck_post(env, com, 0,
2768 while (start->ls_active != 0) {
2769 if (type & start->ls_active) {
2770 com = __mdd_lfsck_component_find(lfsck, type,
2771 &lfsck->ml_list_idle);
2773 /* The component status will be updated
2774 * when its prep() is called later by
2775 * the LFSCK main engine. */
2776 cfs_list_del_init(&com->lc_link);
2777 cfs_list_add_tail(&com->lc_link,
2778 &lfsck->ml_list_scan);
2780 start->ls_active &= ~type;
2786 cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2787 start->ls_active |= com->lc_type;
2788 if (flags & DOIF_RESET) {
2789 rc = com->lc_ops->lfsck_reset(env, com, false);
2796 lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
2797 if (bk->lb_param & LPF_DRYRUN) {
2798 lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN;
2799 valid |= DOIV_DRYRUN;
2800 flags |= DOIF_DRYRUN;
2803 if (bk->lb_param & LPF_FAILOUT) {
2804 valid |= DOIV_ERROR_HANDLE;
2805 flags |= DOIF_FAILOUT;
2808 if (!cfs_list_empty(&lfsck->ml_list_scan))
2809 flags |= DOIF_OUTUSED;
2811 lfsck->ml_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
2812 thread_set_flags(thread, 0);
2813 rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
2815 CERROR("%s: cannot start LFSCK thread, rc = %d\n",
2816 mdd_lfsck2name(lfsck), rc);
2818 l_wait_event(thread->t_ctl_waitq,
2819 thread_is_running(thread) ||
2820 thread_is_stopped(thread),
2826 mutex_unlock(&lfsck->ml_mutex);
2827 return (rc < 0 ? rc : 0);
2830 int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck,
2833 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2834 struct l_wait_info lwi = { 0 };
2837 if (!lfsck->ml_initialized)
2840 mutex_lock(&lfsck->ml_mutex);
2841 spin_lock(&lfsck->ml_lock);
2842 if (thread_is_init(thread) || thread_is_stopped(thread)) {
2843 spin_unlock(&lfsck->ml_lock);
2844 mutex_unlock(&lfsck->ml_mutex);
2849 lfsck->ml_paused = 1;
2850 thread_set_flags(thread, SVC_STOPPING);
2851 /* The LFSCK thread may be sleeping on low layer wait queue,
2853 if (likely(lfsck->ml_di_oit != NULL))
2854 lfsck->ml_obj_oit->do_index_ops->dio_it.put(env,
2856 spin_unlock(&lfsck->ml_lock);
2858 cfs_waitq_broadcast(&thread->t_ctl_waitq);
2859 l_wait_event(thread->t_ctl_waitq,
2860 thread_is_stopped(thread),
2862 mutex_unlock(&lfsck->ml_mutex);
2867 static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
2868 .f_oid = OTABLE_IT_OID,
2871 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
2873 struct md_lfsck *lfsck = &mdd->mdd_lfsck;
2874 struct dt_object *obj;
2880 LASSERT(!lfsck->ml_initialized);
2882 lfsck->ml_initialized = 1;
2883 mutex_init(&lfsck->ml_mutex);
2884 spin_lock_init(&lfsck->ml_lock);
2885 CFS_INIT_LIST_HEAD(&lfsck->ml_list_scan);
2886 CFS_INIT_LIST_HEAD(&lfsck->ml_list_dir);
2887 CFS_INIT_LIST_HEAD(&lfsck->ml_list_double_scan);
2888 CFS_INIT_LIST_HEAD(&lfsck->ml_list_idle);
2889 cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
2891 obj = dt_locate(env, mdd->mdd_bottom, &lfsck_it_fid);
2893 RETURN(PTR_ERR(obj));
2895 lfsck->ml_obj_oit = obj;
2896 rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
2898 if (rc == -ENOTSUPP)
2903 /* LFSCK bookmark */
2905 rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid,
2906 lfsck_bookmark_name,
2907 S_IFREG | S_IRUGO | S_IWUSR, &fid);
2911 obj = dt_locate(env, mdd->mdd_bottom, &fid);
2913 GOTO(out, rc = PTR_ERR(obj));
2915 LASSERT(lu_object_exists(&obj->do_lu));
2916 lfsck->ml_bookmark_obj = obj;
2918 rc = mdd_lfsck_bookmark_load(env, lfsck);
2920 rc = mdd_lfsck_bookmark_init(env, lfsck);
2924 rc = mdd_lfsck_namespace_setup(env, lfsck);
2927 /* XXX: LFSCK components initialization to be added here. */
2930 lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
2931 lfsck->ml_obj_oit = NULL;
2935 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
2937 struct md_lfsck *lfsck = &mdd->mdd_lfsck;
2938 struct ptlrpc_thread *thread = &lfsck->ml_thread;
2939 struct lfsck_component *com;
2941 if (!lfsck->ml_initialized)
2944 LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
2946 if (lfsck->ml_obj_oit != NULL) {
2947 lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
2948 lfsck->ml_obj_oit = NULL;
2951 LASSERT(lfsck->ml_obj_dir == NULL);
2953 if (lfsck->ml_bookmark_obj != NULL) {
2954 lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
2955 lfsck->ml_bookmark_obj = NULL;
2958 while (!cfs_list_empty(&lfsck->ml_list_scan)) {
2959 com = cfs_list_entry(lfsck->ml_list_scan.next,
2960 struct lfsck_component,
2962 mdd_lfsck_component_cleanup(env, com);
2965 LASSERT(cfs_list_empty(&lfsck->ml_list_dir));
2967 while (!cfs_list_empty(&lfsck->ml_list_double_scan)) {
2968 com = cfs_list_entry(lfsck->ml_list_double_scan.next,
2969 struct lfsck_component,
2971 mdd_lfsck_component_cleanup(env, com);
2974 while (!cfs_list_empty(&lfsck->ml_list_idle)) {
2975 com = cfs_list_entry(lfsck->ml_list_idle.next,
2976 struct lfsck_component,
2978 mdd_lfsck_component_cleanup(env, com);