4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2013, Intel Corporation.
26 * lustre/lfsck/lfsck_internal.h
28 * Shared definitions and declarations for the LFSCK.
30 * Author: Fan, Yong <fan.yong@intel.com>
33 #ifndef _LFSCK_INTERNAL_H
34 # define _LFSCK_INTERNAL_H
36 #include <lustre/lustre_lfsck_user.h>
37 #include <lustre/lustre_user.h>
38 #include <lustre/lustre_idl.h>
39 #include <lustre_lfsck.h>
41 #include <lu_object.h>
42 #include <dt_object.h>
43 #include <md_object.h>
44 #include <lustre_net.h>
45 #include <lustre_dlm.h>
46 #include <lustre_fid.h>
47 #include <md_object.h>
49 #define HALF_SEC (HZ >> 1)
50 #define LFSCK_CHECKPOINT_INTERVAL 60
52 #define LFSCK_NAMEENTRY_DEAD 1 /* The object has been unlinked. */
53 #define LFSCK_NAMEENTRY_REMOVED 2 /* The entry has been removed. */
54 #define LFSCK_NAMEENTRY_RECREATED 3 /* The entry has been recreated. */
57 /* Finish the first cycle scanning. */
58 LF_SCANNED_ONCE = 0x00000001ULL,
60 /* There is some namespace inconsistency. */
61 LF_INCONSISTENT = 0x00000002ULL,
63 /* The device is upgraded from 1.8 format. */
64 LF_UPGRADE = 0x00000004ULL,
66 /* The server ever restarted during the LFSCK, and may miss to process
67 * some objects check/repair. */
68 LF_INCOMPLETE = 0x00000008ULL,
70 /* The LAST_ID (file) crashed. */
71 LF_CRASHED_LASTID = 0x00000010ULL,
74 struct lfsck_position {
75 /* low layer object table-based iteration position. */
78 /* parent FID for directory traversal. */
79 struct lu_fid lp_dir_parent;
81 /* namespace-based directory traversal position. */
85 struct lfsck_bookmark {
86 /* Magic number to detect that this struct contains valid data. */
89 /* For compatible with old versions. */
92 /* See 'enum lfsck_param_flags' */
95 /* How many items can be scanned at most per second. */
98 /* The windows size for async requests pipeline. */
99 __u16 lb_async_windows;
101 /* For 64-bits aligned. */
104 /* For future using. */
105 __u64 lb_reserved[6];
108 struct lfsck_namespace {
109 /* Magic number to detect that this struct contains valid data. */
112 /* See 'enum lfsck_status'. */
115 /* See 'enum lfsck_flags'. */
118 /* How many completed LFSCK runs on the device. */
119 __u32 ln_success_count;
121 /* How long the LFSCK phase1 has run in seconds. */
122 __u32 ln_run_time_phase1;
124 /* How long the LFSCK phase2 has run in seconds. */
125 __u32 ln_run_time_phase2;
127 /* Time for the last LFSCK completed in seconds since epoch. */
128 __u64 ln_time_last_complete;
130 /* Time for the latest LFSCK ran in seconds since epoch. */
131 __u64 ln_time_latest_start;
133 /* Time for the last LFSCK checkpoint in seconds since epoch. */
134 __u64 ln_time_last_checkpoint;
136 /* Position for the latest LFSCK started from. */
137 struct lfsck_position ln_pos_latest_start;
139 /* Position for the last LFSCK checkpoint. */
140 struct lfsck_position ln_pos_last_checkpoint;
142 /* Position for the first should be updated object. */
143 struct lfsck_position ln_pos_first_inconsistent;
145 /* How many items (including dir) have been checked. */
146 __u64 ln_items_checked;
148 /* How many items have been repaired. */
149 __u64 ln_items_repaired;
151 /* How many items failed to be processed. */
152 __u64 ln_items_failed;
154 /* How many directories have been traversed. */
155 __u64 ln_dirs_checked;
157 /* How many multiple-linked objects have been checked. */
158 __u64 ln_mlinked_checked;
160 /* How many objects have been double scanned. */
161 __u64 ln_objs_checked_phase2;
163 /* How many objects have been reparied during double scan. */
164 __u64 ln_objs_repaired_phase2;
166 /* How many objects failed to be processed during double scan. */
167 __u64 ln_objs_failed_phase2;
169 /* How many objects with nlink fixed. */
170 __u64 ln_objs_nlink_repaired;
172 /* How many objects were lost before, but found back now. */
173 __u64 ln_objs_lost_found;
175 /* The latest object has been processed (failed) during double scan. */
176 struct lu_fid ln_fid_latest_scanned_phase2;
178 /* For further using. 256-bytes aligned now. */
179 __u64 ln_reserved[2];
182 enum lfsck_layout_inconsistency_type {
185 LLIT_UNMATCHED_PAIR = 2,
186 LLIT_MULTIPLE_REFERENCED = 3,
188 LLIT_INCONSISTENT_OWNER = 5,
190 LLIT_MAX = LLIT_OTHERS
193 struct lfsck_layout {
194 /* Magic number to detect that this struct contains valid data. */
197 /* See 'enum lfsck_status'. */
200 /* See 'enum lfsck_flags'. */
203 /* How many completed LFSCK runs on the device. */
204 __u32 ll_success_count;
206 /* How long the LFSCK phase1 has run in seconds. */
207 __u32 ll_run_time_phase1;
209 /* How long the LFSCK phase2 has run in seconds. */
210 __u32 ll_run_time_phase2;
212 /* Time for the last LFSCK completed in seconds since epoch. */
213 __u64 ll_time_last_complete;
215 /* Time for the latest LFSCK ran in seconds since epoch. */
216 __u64 ll_time_latest_start;
218 /* Time for the last LFSCK checkpoint in seconds since epoch. */
219 __u64 ll_time_last_checkpoint;
221 /* Position for the latest LFSCK started from. */
222 __u64 ll_pos_latest_start;
224 /* Position for the last LFSCK checkpoint. */
225 __u64 ll_pos_last_checkpoint;
227 /* Position for the first should be updated object. */
228 __u64 ll_pos_first_inconsistent;
230 /* How many objects have been checked. */
231 __u64 ll_objs_checked_phase1;
233 /* How many objects failed to be processed. */
234 __u64 ll_objs_failed_phase1;
236 /* How many objects have been double scanned. */
237 __u64 ll_objs_checked_phase2;
239 /* How many objects failed to be processed during double scan. */
240 __u64 ll_objs_failed_phase2;
242 /* kinds of inconsistency have been repaired.
243 * ll_objs_repaired[type - 1] is the count for the given @type. */
244 __u64 ll_objs_repaired[LLIT_MAX];
246 /* How many objects have been skipped because of related
247 * MDT(s)/OST(s) do not participate in the LFSCK */
248 __u64 ll_objs_skipped;
250 /* For further using. 256-bytes aligned now. */
251 __u64 ll_reserved[12];
254 struct lfsck_component;
255 struct lfsck_tgt_descs;
256 struct lfsck_tgt_desc;
258 struct lfsck_operations {
259 int (*lfsck_reset)(const struct lu_env *env,
260 struct lfsck_component *com,
263 void (*lfsck_fail)(const struct lu_env *env,
264 struct lfsck_component *com,
267 int (*lfsck_checkpoint)(const struct lu_env *env,
268 struct lfsck_component *com,
271 int (*lfsck_prep)(const struct lu_env *env,
272 struct lfsck_component *com);
274 int (*lfsck_exec_oit)(const struct lu_env *env,
275 struct lfsck_component *com,
276 struct dt_object *obj);
278 int (*lfsck_exec_dir)(const struct lu_env *env,
279 struct lfsck_component *com,
280 struct dt_object *obj,
281 struct lu_dirent *ent);
283 int (*lfsck_post)(const struct lu_env *env,
284 struct lfsck_component *com,
288 int (*lfsck_dump)(const struct lu_env *env,
289 struct lfsck_component *com,
293 int (*lfsck_double_scan)(const struct lu_env *env,
294 struct lfsck_component *com);
296 void (*lfsck_data_release)(const struct lu_env *env,
297 struct lfsck_component *com);
299 void (*lfsck_quit)(const struct lu_env *env,
300 struct lfsck_component *com);
302 int (*lfsck_in_notify)(const struct lu_env *env,
303 struct lfsck_component *com,
304 struct lfsck_request *lr);
306 int (*lfsck_query)(const struct lu_env *env,
307 struct lfsck_component *com);
309 int (*lfsck_stop_notify)(const struct lu_env *env,
310 struct lfsck_component *com,
311 struct lfsck_tgt_descs *ltds,
312 struct lfsck_tgt_desc *ltd,
313 struct ptlrpc_request_set *set);
316 #define TGT_PTRS 256 /* number of pointers at 1st level */
317 #define TGT_PTRS_PER_BLOCK 256 /* number of pointers at 2nd level */
319 struct lfsck_tgt_desc {
320 struct list_head ltd_orphan_list;
321 struct dt_device *ltd_tgt;
322 struct dt_device *ltd_key;
323 struct obd_export *ltd_exp;
324 struct list_head ltd_layout_list;
325 struct list_head ltd_layout_phase_list;
328 __u32 ltd_layout_gen;
329 unsigned int ltd_dead:1;
332 struct lfsck_tgt_desc_idx {
333 struct lfsck_tgt_desc *ldi_tgts[TGT_PTRS_PER_BLOCK];
336 struct lfsck_tgt_descs {
337 /* list of known TGTs */
338 struct lfsck_tgt_desc_idx *ltd_tgts_idx[TGT_PTRS];
340 /* bitmap of TGTs available */
341 cfs_bitmap_t *ltd_tgts_bitmap;
343 /* for lfsck_tgt_desc::ltd_xxx_list */
346 /* for tgts table accessing and changes */
347 struct rw_semaphore ltd_rw_sem;
349 /* Temporary list for orphan targets. */
350 struct list_head ltd_orphan;
352 /* number of registered TGTs */
356 #define LTD_TGT(ltd, index) \
357 ((ltd)->ltd_tgts_idx[(index) / TGT_PTRS_PER_BLOCK]->\
358 ldi_tgts[(index) % TGT_PTRS_PER_BLOCK])
360 #define OST_TGT(lfsck, index) LTD_TGT(&lfsck->li_ost_descs, index)
361 #define MDT_TGT(lfsck, index) LTD_TGT(&lfsck->li_mdt_descs, index)
363 struct lfsck_component {
364 /* into lfsck_instance::li_list_(scan,double_scan,idle} */
367 /* into lfsck_instance::li_list_dir */
368 cfs_list_t lc_link_dir;
370 struct rw_semaphore lc_sem;
373 struct lfsck_position lc_pos_start;
374 struct lfsck_instance *lc_lfsck;
375 struct dt_object *lc_obj;
376 struct lfsck_operations *lc_ops;
381 /* The time for last checkpoint, jiffies */
382 cfs_time_t lc_time_last_checkpoint;
384 /* The time for next checkpoint, jiffies */
385 cfs_time_t lc_time_next_checkpoint;
389 /* How many objects have been checked since last checkpoint. */
390 __u32 lc_new_checked;
392 /* How many objects have been scanned since last sleep. */
393 __u32 lc_new_scanned;
395 unsigned int lc_journal:1;
399 struct lfsck_instance {
400 struct mutex li_mutex;
403 /* Link into the lfsck_instance_list. */
406 /* For the components in (first) scanning via otable-based iteration. */
407 cfs_list_t li_list_scan;
409 /* For the components in scanning via directory traversal. Because
410 * directory traversal cannot guarantee all the object be scanned,
411 * so the component in the li_list_dir must be in li_list_scan. */
412 cfs_list_t li_list_dir;
414 /* For the components in double scanning. */
415 cfs_list_t li_list_double_scan;
417 /* For the components those are not scanning now. */
418 cfs_list_t li_list_idle;
421 atomic_t li_double_scan_count;
422 struct ptlrpc_thread li_thread;
424 /* The time for last checkpoint, jiffies */
425 cfs_time_t li_time_last_checkpoint;
427 /* The time for next checkpoint, jiffies */
428 cfs_time_t li_time_next_checkpoint;
430 lfsck_out_notify li_out_notify;
431 void *li_out_notify_data;
432 struct dt_device *li_next;
433 struct dt_device *li_bottom;
434 struct ldlm_namespace *li_namespace;
435 struct local_oid_storage *li_los;
436 struct lu_fid li_local_root_fid; /* backend root "/" */
437 struct lu_fid li_global_root_fid; /* /ROOT */
438 struct dt_object *li_bookmark_obj;
439 struct lfsck_bookmark li_bookmark_ram;
440 struct lfsck_bookmark li_bookmark_disk;
441 struct lfsck_position li_pos_current;
443 /* Obj for otable-based iteration */
444 struct dt_object *li_obj_oit;
446 /* Obj for directory traversal */
447 struct dt_object *li_obj_dir;
449 /* It for otable-based iteration */
450 struct dt_it *li_di_oit;
452 /* It for directory traversal */
453 struct dt_it *li_di_dir;
455 /* Description of OST */
456 struct lfsck_tgt_descs li_ost_descs;
458 /* Description of MDT */
459 struct lfsck_tgt_descs li_mdt_descs;
461 /* namespace-based directory traversal position. */
464 /* Arguments for low layer otable-based iteration. */
467 /* Arugments for namespace-based directory traversal. */
470 /* Schedule for every N objects. */
473 /* Sleep N jiffies for each schedule. */
476 /* How many objects have been scanned since last sleep. */
477 __u32 li_new_scanned;
479 /* The status when the LFSCK stopped or paused. */
482 unsigned int li_oit_over:1, /* oit is finished. */
483 li_drop_dryrun:1, /* Ever dryrun, not now. */
484 li_master:1, /* Master instance or not. */
485 li_current_oit_processed:1;
488 enum lfsck_linkea_flags {
489 /* The linkea entries does not match the object nlinks. */
490 LLF_UNMATCH_NLINKS = 0x01,
492 /* Fail to repair the multiple-linked objects during the double scan. */
493 LLF_REPAIR_FAILED = 0x02,
496 struct lfsck_async_interpret_args {
497 struct lfsck_component *laia_com;
498 struct lfsck_tgt_descs *laia_ltds;
499 struct lfsck_tgt_desc *laia_ltd;
500 struct lfsck_request *laia_lr;
503 struct lfsck_thread_args {
504 struct lu_env lta_env;
505 struct lfsck_instance *lta_lfsck;
506 struct lfsck_component *lta_com;
509 struct lfsck_thread_info {
510 struct lu_name lti_name;
511 struct lu_buf lti_buf;
512 struct lu_buf lti_linkea_buf;
513 struct lu_fid lti_fid;
514 struct lu_fid lti_fid2;
515 struct lu_attr lti_la;
516 struct ost_id lti_oi;
518 struct lustre_mdt_attrs lti_lma;
519 /* old LMA for compatibility */
520 char lti_lma_old[LMA_OLD_SIZE];
522 struct dt_object_format lti_dof;
523 /* lti_ent and lti_key must be conjoint,
524 * then lti_ent::lde_name will be lti_key. */
525 struct lu_dirent lti_ent;
526 char lti_key[NAME_MAX + 16];
527 struct lfsck_request lti_lr;
528 struct lfsck_async_interpret_args lti_laia;
532 const char *lfsck_status2names(enum lfsck_status status);
533 void lfsck_component_cleanup(const struct lu_env *env,
534 struct lfsck_component *com);
535 void lfsck_instance_cleanup(const struct lu_env *env,
536 struct lfsck_instance *lfsck);
537 int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
539 int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix);
540 int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
542 void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
543 struct lfsck_position *pos, bool init);
544 void lfsck_control_speed(struct lfsck_instance *lfsck);
545 void lfsck_control_speed_by_self(struct lfsck_component *com);
546 int lfsck_reset(const struct lu_env *env, struct lfsck_instance *lfsck,
548 struct lfsck_thread_args *lfsck_thread_args_init(struct lfsck_instance *lfsck,
549 struct lfsck_component *com);
550 void lfsck_thread_args_fini(struct lfsck_thread_args *lta);
551 void lfsck_fail(const struct lu_env *env, struct lfsck_instance *lfsck,
553 int lfsck_checkpoint(const struct lu_env *env, struct lfsck_instance *lfsck);
554 int lfsck_prep(const struct lu_env *env, struct lfsck_instance *lfsck);
555 int lfsck_exec_oit(const struct lu_env *env, struct lfsck_instance *lfsck,
556 struct dt_object *obj);
557 int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck,
558 struct dt_object *obj, struct lu_dirent *ent);
559 int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
561 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck);
562 void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck);
563 int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
564 struct lfsck_request *lr,
565 struct ptlrpc_request_set *set,
566 ptlrpc_interpterer_t interpterer,
567 void *args, int request);
570 int lfsck_master_engine(void *args);
572 /* lfsck_bookmark.c */
573 int lfsck_bookmark_store(const struct lu_env *env,
574 struct lfsck_instance *lfsck);
575 int lfsck_bookmark_setup(const struct lu_env *env,
576 struct lfsck_instance *lfsck);
578 /* lfsck_namespace.c */
579 int lfsck_namespace_setup(const struct lu_env *env,
580 struct lfsck_instance *lfsck);
583 int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck);
585 extern const char *lfsck_flags_names[];
586 extern const char *lfsck_param_names[];
587 extern struct lu_context_key lfsck_thread_key;
589 static inline struct lfsck_thread_info *
590 lfsck_env_info(const struct lu_env *env)
592 struct lfsck_thread_info *info;
594 info = lu_context_key_get(&env->le_ctx, &lfsck_thread_key);
595 LASSERT(info != NULL);
599 static inline const struct lu_name *
600 lfsck_name_get_const(const struct lu_env *env, const void *area, ssize_t len)
602 struct lu_name *lname;
604 lname = &lfsck_env_info(env)->lti_name;
605 lname->ln_name = area;
606 lname->ln_namelen = len;
610 static inline struct lu_buf *
611 lfsck_buf_get(const struct lu_env *env, void *area, ssize_t len)
615 buf = &lfsck_env_info(env)->lti_buf;
621 static inline const struct lu_buf *
622 lfsck_buf_get_const(const struct lu_env *env, const void *area, ssize_t len)
626 buf = &lfsck_env_info(env)->lti_buf;
627 buf->lb_buf = (void *)area;
632 static inline char *lfsck_lfsck2name(struct lfsck_instance *lfsck)
634 return lfsck->li_bottom->dd_lu_dev.ld_obd->obd_name;
637 static inline const struct lu_fid *lfsck_dto2fid(const struct dt_object *obj)
639 return lu_object_fid(&obj->do_lu);
642 static inline void lfsck_pos_set_zero(struct lfsck_position *pos)
644 memset(pos, 0, sizeof(*pos));
647 static inline int lfsck_pos_is_zero(const struct lfsck_position *pos)
649 return pos->lp_oit_cookie == 0 && fid_is_zero(&pos->lp_dir_parent);
652 static inline int lfsck_pos_is_eq(const struct lfsck_position *pos1,
653 const struct lfsck_position *pos2)
655 if (pos1->lp_oit_cookie < pos2->lp_oit_cookie)
658 if (pos1->lp_oit_cookie > pos2->lp_oit_cookie)
661 if (fid_is_zero(&pos1->lp_dir_parent) &&
662 !fid_is_zero(&pos2->lp_dir_parent))
665 if (!fid_is_zero(&pos1->lp_dir_parent) &&
666 fid_is_zero(&pos2->lp_dir_parent))
669 if (fid_is_zero(&pos1->lp_dir_parent) &&
670 fid_is_zero(&pos2->lp_dir_parent))
673 LASSERT(lu_fid_eq(&pos1->lp_dir_parent, &pos2->lp_dir_parent));
675 if (pos1->lp_dir_cookie < pos2->lp_dir_cookie)
678 if (pos1->lp_dir_cookie > pos2->lp_dir_cookie)
684 static void inline lfsck_position_le_to_cpu(struct lfsck_position *des,
685 struct lfsck_position *src)
687 des->lp_oit_cookie = le64_to_cpu(src->lp_oit_cookie);
688 fid_le_to_cpu(&des->lp_dir_parent, &src->lp_dir_parent);
689 des->lp_dir_cookie = le64_to_cpu(src->lp_dir_cookie);
692 static void inline lfsck_position_cpu_to_le(struct lfsck_position *des,
693 struct lfsck_position *src)
695 des->lp_oit_cookie = cpu_to_le64(src->lp_oit_cookie);
696 fid_cpu_to_le(&des->lp_dir_parent, &src->lp_dir_parent);
697 des->lp_dir_cookie = cpu_to_le64(src->lp_dir_cookie);
700 static inline umode_t lfsck_object_type(const struct dt_object *obj)
702 return lu_object_attr(&obj->do_lu);
705 static inline int lfsck_is_dead_obj(const struct dt_object *obj)
707 struct lu_object_header *loh = obj->do_lu.lo_header;
709 return !!test_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags);
712 static inline struct dt_object *lfsck_object_find(const struct lu_env *env,
713 struct lfsck_instance *lfsck,
714 const struct lu_fid *fid)
716 return lu2dt(lu_object_find_slice(env, dt2lu_dev(lfsck->li_next),
720 static inline struct dt_object *lfsck_object_get(struct dt_object *obj)
722 lu_object_get(&obj->do_lu);
726 static inline void lfsck_object_put(const struct lu_env *env,
727 struct dt_object *obj)
729 lu_object_put(env, &obj->do_lu);
732 static inline struct lfsck_tgt_desc *lfsck_tgt_get(struct lfsck_tgt_descs *ltds,
735 struct lfsck_tgt_desc *ltd;
737 ltd = LTD_TGT(ltds, index);
739 atomic_inc(<d->ltd_ref);
744 static inline void lfsck_tgt_put(struct lfsck_tgt_desc *ltd)
746 if (atomic_dec_and_test(<d->ltd_ref))
750 static inline struct lfsck_component *
751 lfsck_component_get(struct lfsck_component *com)
753 atomic_inc(&com->lc_ref);
758 static inline void lfsck_component_put(const struct lu_env *env,
759 struct lfsck_component *com)
761 if (atomic_dec_and_test(&com->lc_ref)) {
762 if (com->lc_obj != NULL)
763 lu_object_put_nocache(env, &com->lc_obj->do_lu);
764 if (com->lc_file_ram != NULL)
765 OBD_FREE(com->lc_file_ram, com->lc_file_size);
766 if (com->lc_file_disk != NULL)
767 OBD_FREE(com->lc_file_disk, com->lc_file_size);
768 if (com->lc_data != NULL) {
769 LASSERT(com->lc_ops->lfsck_data_release != NULL);
771 com->lc_ops->lfsck_data_release(env, com);
778 static inline struct lfsck_instance *
779 lfsck_instance_get(struct lfsck_instance *lfsck)
781 atomic_inc(&lfsck->li_ref);
786 static inline void lfsck_instance_put(const struct lu_env *env,
787 struct lfsck_instance *lfsck)
789 if (atomic_dec_and_test(&lfsck->li_ref))
790 lfsck_instance_cleanup(env, lfsck);
793 static inline mdsno_t lfsck_dev_idx(struct dt_device *dev)
795 return dev->dd_lu_dev.ld_site->ld_seq_site->ss_node_id;
798 #endif /* _LFSCK_INTERNAL_H */