Whamcloud - gitweb
9d7b54c8c1838fa6dccf4e85fcca21ac6d5853f3
[fs/lustre-release.git] / lustre / mdd / mdd_lfsck.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, Intel Corporation.
24  */
25 /*
26  * lustre/mdd/mdd_lfsck.c
27  *
28  * Top-level entry points into mdd module
29  *
30  * LFSCK controller, which scans the whole device through low layer
31  * iteration APIs, drives all lfsck compeonents, controls the speed.
32  *
33  * Author: Fan Yong <yong.fan@whamcloud.com>
34  */
35
36 #ifndef EXPORT_SYMTAB
37 # define EXPORT_SYMTAB
38 #endif
39 #define DEBUG_SUBSYSTEM S_MDS
40
41 #include <lustre/lustre_idl.h>
42 #include <lustre_fid.h>
43 #include <obd_support.h>
44
45 #include "mdd_internal.h"
46 #include "mdd_lfsck.h"
47
48 #define HALF_SEC                        (CFS_HZ >> 1)
49 #define LFSCK_CHECKPOINT_INTERVAL       60
50 #define MDS_DIR_DUMMY_START             0xffffffffffffffffULL
51
52 #define LFSCK_NAMEENTRY_DEAD            1 /* The object has been unlinked. */
53 #define LFSCK_NAMEENTRY_REMOVED         2 /* The entry has been removed. */
54 #define LFSCK_NAMEENTRY_RECREATED       3 /* The entry has been recreated. */
55
56 const char lfsck_bookmark_name[] = "lfsck_bookmark";
57 const char lfsck_namespace_name[] = "lfsck_namespace";
58
59 static const char *lfsck_status_names[] = {
60         "init",
61         "scanning-phase1",
62         "scanning-phase2",
63         "completed",
64         "failed",
65         "stopped",
66         "paused",
67         "crashed",
68         NULL
69 };
70
71 static const char *lfsck_flags_names[] = {
72         "scanned-once",
73         "inconsistent",
74         "upgrade",
75         NULL
76 };
77
78 static const char *lfsck_param_names[] = {
79         "failout",
80         "dryrun",
81         NULL
82 };
83
84 /* misc functions */
85
86 static inline struct mdd_device *mdd_lfsck2mdd(struct md_lfsck *lfsck)
87 {
88         return container_of0(lfsck, struct mdd_device, mdd_lfsck);
89 }
90
91 static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
92 {
93         struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
94
95         return mdd2obd_dev(mdd)->obd_name;
96 }
97
98 static inline void mdd_lfsck_component_get(struct lfsck_component *com)
99 {
100         atomic_inc(&com->lc_ref);
101 }
102
103 static inline void mdd_lfsck_component_put(const struct lu_env *env,
104                                            struct lfsck_component *com)
105 {
106         if (atomic_dec_and_test(&com->lc_ref)) {
107                 if (com->lc_obj != NULL)
108                         lu_object_put(env, &com->lc_obj->do_lu);
109                 if (com->lc_file_ram != NULL)
110                         OBD_FREE(com->lc_file_ram, com->lc_file_size);
111                 if (com->lc_file_disk != NULL)
112                         OBD_FREE(com->lc_file_disk, com->lc_file_size);
113                 OBD_FREE_PTR(com);
114         }
115 }
116
117 static inline struct lfsck_component *
118 __mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type, cfs_list_t *list)
119 {
120         struct lfsck_component *com;
121
122         cfs_list_for_each_entry(com, list, lc_link) {
123                 if (com->lc_type == type)
124                         return com;
125         }
126         return NULL;
127 }
128
129 static struct lfsck_component *
130 mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type)
131 {
132         struct lfsck_component *com;
133
134         spin_lock(&lfsck->ml_lock);
135         com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_scan);
136         if (com != NULL)
137                 goto unlock;
138
139         com = __mdd_lfsck_component_find(lfsck, type,
140                                          &lfsck->ml_list_double_scan);
141         if (com != NULL)
142                 goto unlock;
143
144         com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_idle);
145
146 unlock:
147         if (com != NULL)
148                 mdd_lfsck_component_get(com);
149         spin_unlock(&lfsck->ml_lock);
150         return com;
151 }
152
153 static void mdd_lfsck_component_cleanup(const struct lu_env *env,
154                                         struct lfsck_component *com)
155 {
156         if (!cfs_list_empty(&com->lc_link))
157                 cfs_list_del_init(&com->lc_link);
158         if (!cfs_list_empty(&com->lc_link_dir))
159                 cfs_list_del_init(&com->lc_link_dir);
160
161         mdd_lfsck_component_put(env, com);
162 }
163
164 static int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
165                            const char *prefix)
166 {
167         int save = *len;
168         int flag;
169         int rc;
170         int i;
171
172         rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
173         if (rc <= 0)
174                 return -ENOSPC;
175
176         *buf += rc;
177         *len -= rc;
178         for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
179                 if (flag & bits) {
180                         bits &= ~flag;
181                         rc = snprintf(*buf, *len, "%s%c", names[i],
182                                       bits != 0 ? ',' : '\n');
183                         if (rc <= 0)
184                                 return -ENOSPC;
185
186                         *buf += rc;
187                         *len -= rc;
188                 }
189         }
190         return save - *len;
191 }
192
193 static int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
194 {
195         int rc;
196
197         if (time != 0)
198                 rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
199                               cfs_time_current_sec() - time);
200         else
201                 rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
202         if (rc <= 0)
203                 return -ENOSPC;
204
205         *buf += rc;
206         *len -= rc;
207         return rc;
208 }
209
210 static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
211                           const char *prefix)
212 {
213         int rc;
214
215         if (fid_is_zero(&pos->lp_dir_parent)) {
216                 if (pos->lp_oit_cookie == 0)
217                         rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
218                                       prefix);
219                 else
220                         rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
221                                       prefix, pos->lp_oit_cookie);
222         } else {
223                 rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
224                               prefix, pos->lp_oit_cookie,
225                               PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
226         }
227         if (rc <= 0)
228                 return -ENOSPC;
229
230         *buf += rc;
231         *len -= rc;
232         return rc;
233 }
234
235 static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
236                                struct lfsck_position *pos, bool oit_processed,
237                                bool dir_processed)
238 {
239         const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it;
240
241         spin_lock(&lfsck->ml_lock);
242         if (unlikely(lfsck->ml_di_oit == NULL)) {
243                 spin_unlock(&lfsck->ml_lock);
244                 memset(pos, 0, sizeof(*pos));
245                 return;
246         }
247
248         pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit);
249
250         LASSERT(pos->lp_oit_cookie > 0);
251
252         if (!oit_processed)
253                 pos->lp_oit_cookie--;
254
255         if (lfsck->ml_di_dir != NULL) {
256                 struct dt_object *dto = lfsck->ml_obj_dir;
257
258                 pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
259                 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
260                                                         lfsck->ml_di_dir);
261
262                 LASSERT(pos->lp_dir_cookie != MDS_DIR_DUMMY_START);
263
264                 if (pos->lp_dir_cookie >= MDS_DIR_END_OFF)
265                         pos->lp_dir_cookie = MDS_DIR_END_OFF;
266                 else if (!dir_processed)
267                         /* For the dir which just to be processed,
268                          * lp_dir_cookie will become MDS_DIR_DUMMY_START,
269                          * which can be correctly handled by mdd_lfsck_prep. */
270                         pos->lp_dir_cookie--;
271         } else {
272                 fid_zero(&pos->lp_dir_parent);
273                 pos->lp_dir_cookie = 0;
274         }
275         spin_unlock(&lfsck->ml_lock);
276 }
277
278 static inline void mdd_lfsck_pos_set_zero(struct lfsck_position *pos)
279 {
280         memset(pos, 0, sizeof(*pos));
281 }
282
283 static inline int mdd_lfsck_pos_is_zero(const struct lfsck_position *pos)
284 {
285         return pos->lp_oit_cookie == 0 && fid_is_zero(&pos->lp_dir_parent);
286 }
287
288 static inline int mdd_lfsck_pos_is_eq(const struct lfsck_position *pos1,
289                                       const struct lfsck_position *pos2)
290 {
291         if (pos1->lp_oit_cookie < pos2->lp_oit_cookie)
292                 return -1;
293
294         if (pos1->lp_oit_cookie > pos2->lp_oit_cookie)
295                 return 1;
296
297         if (fid_is_zero(&pos1->lp_dir_parent) &&
298             !fid_is_zero(&pos2->lp_dir_parent))
299                 return -1;
300
301         if (!fid_is_zero(&pos1->lp_dir_parent) &&
302             fid_is_zero(&pos2->lp_dir_parent))
303                 return 1;
304
305         if (fid_is_zero(&pos1->lp_dir_parent) &&
306             fid_is_zero(&pos2->lp_dir_parent))
307                 return 0;
308
309         LASSERT(lu_fid_eq(&pos1->lp_dir_parent, &pos2->lp_dir_parent));
310
311         if (pos1->lp_dir_cookie < pos2->lp_dir_cookie)
312                 return -1;
313
314         if (pos1->lp_dir_cookie > pos2->lp_dir_cookie)
315                 return 1;
316
317         return 0;
318 }
319
320 static void mdd_lfsck_close_dir(const struct lu_env *env,
321                                 struct md_lfsck *lfsck)
322 {
323         struct dt_object        *dir_obj  = lfsck->ml_obj_dir;
324         const struct dt_it_ops  *dir_iops = &dir_obj->do_index_ops->dio_it;
325         struct dt_it            *dir_di   = lfsck->ml_di_dir;
326
327         spin_lock(&lfsck->ml_lock);
328         lfsck->ml_di_dir = NULL;
329         spin_unlock(&lfsck->ml_lock);
330
331         dir_iops->put(env, dir_di);
332         dir_iops->fini(env, dir_di);
333         lfsck->ml_obj_dir = NULL;
334         lu_object_put(env, &dir_obj->do_lu);
335 }
336
337 static void __mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
338 {
339         lfsck->ml_bookmark_ram.lb_speed_limit = limit;
340         if (limit != LFSCK_SPEED_NO_LIMIT) {
341                 if (limit > CFS_HZ) {
342                         lfsck->ml_sleep_rate = limit / CFS_HZ;
343                         lfsck->ml_sleep_jif = 1;
344                 } else {
345                         lfsck->ml_sleep_rate = 1;
346                         lfsck->ml_sleep_jif = CFS_HZ / limit;
347                 }
348         } else {
349                 lfsck->ml_sleep_jif = 0;
350                 lfsck->ml_sleep_rate = 0;
351         }
352 }
353
354 static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
355 {
356         struct ptlrpc_thread *thread = &lfsck->ml_thread;
357         struct l_wait_info    lwi;
358
359         if (lfsck->ml_sleep_jif > 0 &&
360             lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
361                 spin_lock(&lfsck->ml_lock);
362                 if (likely(lfsck->ml_sleep_jif > 0 &&
363                            lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
364                         lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
365                                                LWI_ON_SIGNAL_NOOP, NULL);
366                         spin_unlock(&lfsck->ml_lock);
367
368                         l_wait_event(thread->t_ctl_waitq,
369                                      !thread_is_running(thread),
370                                      &lwi);
371                         lfsck->ml_new_scanned = 0;
372                 } else {
373                         spin_unlock(&lfsck->ml_lock);
374                 }
375         }
376 }
377
378 /* lfsck_bookmark file ops */
379
380 static void inline mdd_lfsck_bookmark_to_cpu(struct lfsck_bookmark *des,
381                                              struct lfsck_bookmark *src)
382 {
383         des->lb_magic = le32_to_cpu(src->lb_magic);
384         des->lb_version = le16_to_cpu(src->lb_version);
385         des->lb_param = le16_to_cpu(src->lb_param);
386         des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
387 }
388
389 static void inline mdd_lfsck_bookmark_to_le(struct lfsck_bookmark *des,
390                                             struct lfsck_bookmark *src)
391 {
392         des->lb_magic = cpu_to_le32(src->lb_magic);
393         des->lb_version = cpu_to_le16(src->lb_version);
394         des->lb_param = cpu_to_le16(src->lb_param);
395         des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
396 }
397
398 static int mdd_lfsck_bookmark_load(const struct lu_env *env,
399                                    struct md_lfsck *lfsck)
400 {
401         loff_t pos = 0;
402         int    len = sizeof(struct lfsck_bookmark);
403         int    rc;
404
405         rc = dt_record_read(env, lfsck->ml_bookmark_obj,
406                             mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
407                             &pos);
408         if (rc == 0) {
409                 struct lfsck_bookmark *bm = &lfsck->ml_bookmark_ram;
410
411                 mdd_lfsck_bookmark_to_cpu(bm, &lfsck->ml_bookmark_disk);
412                 if (bm->lb_magic != LFSCK_BOOKMARK_MAGIC) {
413                         CWARN("%.16s: invalid lfsck_bookmark magic "
414                               "0x%x != 0x%x\n", mdd_lfsck2name(lfsck),
415                               bm->lb_magic, LFSCK_BOOKMARK_MAGIC);
416                         /* Process it as new lfsck_bookmark. */
417                         rc = -ENODATA;
418                 }
419         } else {
420                 if (rc == -EFAULT && pos == 0)
421                         /* return -ENODATA for empty lfsck_bookmark. */
422                         rc = -ENODATA;
423                 else
424                         CERROR("%.16s: fail to load lfsck_bookmark, "
425                                "expected = %d, rc = %d\n",
426                                mdd_lfsck2name(lfsck), len, rc);
427         }
428         return rc;
429 }
430
431 static int mdd_lfsck_bookmark_store(const struct lu_env *env,
432                                     struct md_lfsck *lfsck)
433 {
434         struct mdd_device *mdd    = mdd_lfsck2mdd(lfsck);
435         struct thandle    *handle;
436         struct dt_object  *obj    = lfsck->ml_bookmark_obj;
437         loff_t             pos    = 0;
438         int                len    = sizeof(struct lfsck_bookmark);
439         int                rc;
440         ENTRY;
441
442         mdd_lfsck_bookmark_to_le(&lfsck->ml_bookmark_disk,
443                                  &lfsck->ml_bookmark_ram);
444         handle = dt_trans_create(env, mdd->mdd_bottom);
445         if (IS_ERR(handle)) {
446                 rc = PTR_ERR(handle);
447                 CERROR("%.16s: fail to create trans for storing "
448                        "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
449                 RETURN(rc);
450         }
451
452         rc = dt_declare_record_write(env, obj, len, 0, handle);
453         if (rc != 0) {
454                 CERROR("%.16s: fail to declare trans for storing "
455                        "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
456                 GOTO(out, rc);
457         }
458
459         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
460         if (rc != 0) {
461                 CERROR("%.16s: fail to start trans for storing "
462                        "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
463                 GOTO(out, rc);
464         }
465
466         rc = dt_record_write(env, obj,
467                              mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
468                              &pos, handle);
469         if (rc != 0)
470                 CERROR("%.16s: fail to store lfsck_bookmark, expected = %d, "
471                        "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
472
473         GOTO(out, rc);
474
475 out:
476         dt_trans_stop(env, mdd->mdd_bottom, handle);
477         return rc;
478 }
479
480 static int mdd_lfsck_bookmark_init(const struct lu_env *env,
481                                    struct md_lfsck *lfsck)
482 {
483         struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram;
484         int rc;
485
486         memset(mb, 0, sizeof(*mb));
487         mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
488         mb->lb_version = LFSCK_VERSION_V2;
489         mutex_lock(&lfsck->ml_mutex);
490         rc = mdd_lfsck_bookmark_store(env, lfsck);
491         mutex_unlock(&lfsck->ml_mutex);
492         return rc;
493 }
494
495 /* lfsck_namespace file ops */
496
497 static void inline mdd_lfsck_position_to_cpu(struct lfsck_position *des,
498                                              struct lfsck_position *src)
499 {
500         des->lp_oit_cookie = le64_to_cpu(src->lp_oit_cookie);
501         fid_le_to_cpu(&des->lp_dir_parent, &src->lp_dir_parent);
502         des->lp_dir_cookie = le64_to_cpu(src->lp_dir_cookie);
503 }
504
505 static void inline mdd_lfsck_position_to_le(struct lfsck_position *des,
506                                              struct lfsck_position *src)
507 {
508         des->lp_oit_cookie = cpu_to_le64(src->lp_oit_cookie);
509         fid_cpu_to_le(&des->lp_dir_parent, &src->lp_dir_parent);
510         des->lp_dir_cookie = cpu_to_le64(src->lp_dir_cookie);
511 }
512
513 static void inline mdd_lfsck_namespace_to_cpu(struct lfsck_namespace *des,
514                                               struct lfsck_namespace *src)
515 {
516         des->ln_magic = le32_to_cpu(src->ln_magic);
517         des->ln_status = le32_to_cpu(src->ln_status);
518         des->ln_flags = le32_to_cpu(src->ln_flags);
519         des->ln_success_count = le32_to_cpu(src->ln_success_count);
520         des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
521         des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
522         des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
523         des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
524         des->ln_time_last_checkpoint =
525                                 le64_to_cpu(src->ln_time_last_checkpoint);
526         mdd_lfsck_position_to_cpu(&des->ln_pos_latest_start,
527                                   &src->ln_pos_latest_start);
528         mdd_lfsck_position_to_cpu(&des->ln_pos_last_checkpoint,
529                                   &src->ln_pos_last_checkpoint);
530         mdd_lfsck_position_to_cpu(&des->ln_pos_first_inconsistent,
531                                   &src->ln_pos_first_inconsistent);
532         des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
533         des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
534         des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
535         des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
536         des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
537         des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
538         des->ln_objs_repaired_phase2 =
539                                 le64_to_cpu(src->ln_objs_repaired_phase2);
540         des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
541         des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
542         des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
543         fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
544                       &src->ln_fid_latest_scanned_phase2);
545 }
546
547 static void inline mdd_lfsck_namespace_to_le(struct lfsck_namespace *des,
548                                              struct lfsck_namespace *src)
549 {
550         des->ln_magic = cpu_to_le32(src->ln_magic);
551         des->ln_status = cpu_to_le32(src->ln_status);
552         des->ln_flags = cpu_to_le32(src->ln_flags);
553         des->ln_success_count = cpu_to_le32(src->ln_success_count);
554         des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
555         des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
556         des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
557         des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
558         des->ln_time_last_checkpoint =
559                                 cpu_to_le64(src->ln_time_last_checkpoint);
560         mdd_lfsck_position_to_le(&des->ln_pos_latest_start,
561                                  &src->ln_pos_latest_start);
562         mdd_lfsck_position_to_le(&des->ln_pos_last_checkpoint,
563                                  &src->ln_pos_last_checkpoint);
564         mdd_lfsck_position_to_le(&des->ln_pos_first_inconsistent,
565                                  &src->ln_pos_first_inconsistent);
566         des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
567         des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
568         des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
569         des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
570         des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
571         des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
572         des->ln_objs_repaired_phase2 =
573                                 cpu_to_le64(src->ln_objs_repaired_phase2);
574         des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
575         des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
576         des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
577         fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
578                       &src->ln_fid_latest_scanned_phase2);
579 }
580
581 /**
582  * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
583  * \retval 0: succeed.
584  * \retval -ve: failed cases.
585  */
586 static int mdd_lfsck_namespace_load(const struct lu_env *env,
587                                     struct lfsck_component *com)
588 {
589         int len = com->lc_file_size;
590         int rc;
591
592         rc = dt_xattr_get(env, com->lc_obj,
593                           mdd_buf_get(env, com->lc_file_disk, len),
594                           XATTR_NAME_LFSCK_NAMESPACE, BYPASS_CAPA);
595         if (rc == len) {
596                 struct lfsck_namespace *ns = com->lc_file_ram;
597
598                 mdd_lfsck_namespace_to_cpu(ns,
599                                 (struct lfsck_namespace *)com->lc_file_disk);
600                 if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
601                         CWARN("%.16s: invalid lfsck_namespace magic "
602                               "0x%x != 0x%x\n",
603                               mdd_lfsck2name(com->lc_lfsck),
604                               ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
605                         rc = 1;
606                 } else {
607                         rc = 0;
608                 }
609         } else if (rc != -ENODATA) {
610                 CERROR("%.16s: fail to load lfsck_namespace, expected = %d, "
611                        "rc = %d\n", mdd_lfsck2name(com->lc_lfsck), len, rc);
612                 if (rc >= 0)
613                         rc = 1;
614         }
615         return rc;
616 }
617
618 static int mdd_lfsck_namespace_store(const struct lu_env *env,
619                                      struct lfsck_component *com, bool init)
620 {
621         struct dt_object  *obj    = com->lc_obj;
622         struct md_lfsck   *lfsck  = com->lc_lfsck;
623         struct mdd_device *mdd    = mdd_lfsck2mdd(lfsck);
624         struct thandle    *handle;
625         int                len    = com->lc_file_size;
626         int                rc;
627         ENTRY;
628
629         mdd_lfsck_namespace_to_le((struct lfsck_namespace *)com->lc_file_disk,
630                                   (struct lfsck_namespace *)com->lc_file_ram);
631         handle = dt_trans_create(env, mdd->mdd_bottom);
632         if (IS_ERR(handle)) {
633                 rc = PTR_ERR(handle);
634                 CERROR("%.16s: fail to create trans for storing "
635                        "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
636                 RETURN(rc);
637         }
638
639         rc = dt_declare_xattr_set(env, obj,
640                                   mdd_buf_get(env, com->lc_file_disk, len),
641                                   XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
642         if (rc != 0) {
643                 CERROR("%.16s: fail to declare trans for storing "
644                        "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
645                 GOTO(out, rc);
646         }
647
648         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
649         if (rc != 0) {
650                 CERROR("%.16s: fail to start trans for storing "
651                        "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
652                 GOTO(out, rc);
653         }
654
655         rc = dt_xattr_set(env, obj,
656                           mdd_buf_get(env, com->lc_file_disk, len),
657                           XATTR_NAME_LFSCK_NAMESPACE,
658                           init ? LU_XATTR_CREATE : LU_XATTR_REPLACE,
659                           handle, BYPASS_CAPA);
660         if (rc != 0)
661                 CERROR("%.16s: fail to store lfsck_namespace, len = %d, "
662                        "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
663
664         GOTO(out, rc);
665
666 out:
667         dt_trans_stop(env, mdd->mdd_bottom, handle);
668         return rc;
669 }
670
671 static int mdd_lfsck_namespace_init(const struct lu_env *env,
672                                     struct lfsck_component *com)
673 {
674         struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
675         int rc;
676
677         memset(ns, 0, sizeof(*ns));
678         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
679         ns->ln_status = LS_INIT;
680         down_write(&com->lc_sem);
681         rc = mdd_lfsck_namespace_store(env, com, true);
682         up_write(&com->lc_sem);
683         return rc;
684 }
685
686 static int mdd_lfsck_namespace_lookup(const struct lu_env *env,
687                                       struct lfsck_component *com,
688                                       const struct lu_fid *fid,
689                                       __u8 *flags)
690 {
691         struct lu_fid *key = &mdd_env_info(env)->mti_fid;
692         int            rc;
693
694         fid_cpu_to_be(key, fid);
695         rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
696                        (const struct dt_key *)key, BYPASS_CAPA);
697         return rc;
698 }
699
700 static int mdd_lfsck_namespace_delete(const struct lu_env *env,
701                                       struct lfsck_component *com,
702                                       const struct lu_fid *fid)
703 {
704         struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
705         struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
706         struct thandle    *handle;
707         struct dt_object *obj     = com->lc_obj;
708         int               rc;
709         ENTRY;
710
711         handle = dt_trans_create(env, mdd->mdd_bottom);
712         if (IS_ERR(handle))
713                 RETURN(PTR_ERR(handle));
714
715         rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
716         if (rc != 0)
717                 GOTO(out, rc);
718
719         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
720         if (rc != 0)
721                 GOTO(out, rc);
722
723         fid_cpu_to_be(key, fid);
724         rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
725                        BYPASS_CAPA);
726
727         GOTO(out, rc);
728
729 out:
730         dt_trans_stop(env, mdd->mdd_bottom, handle);
731         return rc;
732 }
733
734 static int mdd_lfsck_namespace_update(const struct lu_env *env,
735                                       struct lfsck_component *com,
736                                       const struct lu_fid *fid,
737                                       __u8 flags, bool force)
738 {
739         struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
740         struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
741         struct thandle    *handle;
742         struct dt_object *obj     = com->lc_obj;
743         int               rc;
744         bool              exist   = false;
745         __u8              tf;
746         ENTRY;
747
748         rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf);
749         if (rc != 0 && rc != -ENOENT)
750                 RETURN(rc);
751
752         if (rc == 0) {
753                 if (!force || flags == tf)
754                         RETURN(0);
755
756                 exist = true;
757                 handle = dt_trans_create(env, mdd->mdd_bottom);
758                 if (IS_ERR(handle))
759                         RETURN(PTR_ERR(handle));
760
761                 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
762                                        handle);
763                 if (rc != 0)
764                         GOTO(out, rc);
765         } else {
766                 handle = dt_trans_create(env, mdd->mdd_bottom);
767                 if (IS_ERR(handle))
768                         RETURN(PTR_ERR(handle));
769         }
770
771         rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
772                                (const struct dt_key *)fid, handle);
773         if (rc != 0)
774                 GOTO(out, rc);
775
776         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
777         if (rc != 0)
778                 GOTO(out, rc);
779
780         fid_cpu_to_be(key, fid);
781         if (exist) {
782                 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
783                                BYPASS_CAPA);
784                 if (rc != 0) {
785                         CERROR("%s: fail to insert "DFID", rc = %d\n",
786                                mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc);
787                         GOTO(out, rc);
788                 }
789         }
790
791         rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
792                        (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
793
794         GOTO(out, rc);
795
796 out:
797         dt_trans_stop(env, mdd->mdd_bottom, handle);
798         return rc;
799 }
800
801 /**
802  * \retval +ve  repaired
803  * \retval 0    no need to repair
804  * \retval -ve  error cases
805  */
806 static int mdd_lfsck_namespace_double_scan_one(const struct lu_env *env,
807                                                struct lfsck_component *com,
808                                                struct mdd_object *child,
809                                                __u8 flags)
810 {
811         struct mdd_thread_info  *info     = mdd_env_info(env);
812         struct lu_attr          *la       = &info->mti_la;
813         struct lu_name          *cname    = &info->mti_name;
814         struct lu_fid           *pfid     = &info->mti_fid;
815         struct lu_fid           *cfid     = &info->mti_fid2;
816         struct md_lfsck         *lfsck    = com->lc_lfsck;
817         struct mdd_device       *mdd      = mdd_lfsck2mdd(lfsck);
818         struct lfsck_bookmark   *bk       = &lfsck->ml_bookmark_ram;
819         struct lfsck_namespace  *ns       =
820                                 (struct lfsck_namespace *)com->lc_file_ram;
821         struct linkea_data       ldata    = { 0 };
822         struct thandle          *handle   = NULL;
823         bool                     locked   = false;
824         bool                     update   = false;
825         int                      count;
826         int                      rc;
827         ENTRY;
828
829         if (com->lc_journal) {
830
831 again:
832                 LASSERT(!locked);
833
834                 com->lc_journal = 1;
835                 handle = mdd_trans_create(env, mdd);
836                 if (IS_ERR(handle))
837                         RETURN(rc = PTR_ERR(handle));
838
839                 rc = mdd_declare_links_add(env, child, handle, NULL);
840                 if (rc != 0)
841                         GOTO(stop, rc);
842
843                 rc = mdd_trans_start(env, mdd, handle);
844                 if (rc != 0)
845                         GOTO(stop, rc);
846
847                 mdd_write_lock(env, child, MOR_TGT_CHILD);
848                 locked = true;
849         }
850
851         if (unlikely(mdd_is_dead_obj(child)))
852                 GOTO(stop, rc = 0);
853
854         rc = mdd_links_read(env, child, &ldata);
855         if (rc != 0) {
856                 if ((bk->lb_param & LPF_DRYRUN) &&
857                     (rc == -EINVAL || rc == -ENODATA))
858                         rc = 1;
859
860                 GOTO(stop, rc);
861         }
862
863         rc = mdd_la_get(env, child, la, BYPASS_CAPA);
864         if (rc != 0)
865                 GOTO(stop, rc);
866
867         ldata.ld_lee = LINKEA_FIRST_ENTRY(ldata);
868         count = ldata.ld_leh->leh_reccount;
869         while (count-- > 0) {
870                 struct mdd_object *parent = NULL;
871                 struct dt_object *dir;
872
873                 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname,
874                                     pfid);
875                 if (!fid_is_sane(pfid))
876                         goto shrink;
877
878                 parent = mdd_object_find(env, mdd, pfid);
879                 if (parent == NULL)
880                         goto shrink;
881                 else if (IS_ERR(parent))
882                         GOTO(stop, rc = PTR_ERR(parent));
883
884                 if (!mdd_object_exists(parent))
885                         goto shrink;
886
887                 /* XXX: need more processing for remote object in the future. */
888                 if (mdd_object_remote(parent)) {
889                         mdd_object_put(env, parent);
890                         ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
891                         continue;
892                 }
893
894                 dir = mdd_object_child(parent);
895                 if (unlikely(!dt_try_as_dir(env, dir)))
896                         goto shrink;
897
898                 /* To guarantee the 'name' is terminated with '0'. */
899                 memcpy(info->mti_key, cname->ln_name, cname->ln_namelen);
900                 info->mti_key[cname->ln_namelen] = 0;
901                 cname->ln_name = info->mti_key;
902                 rc = dt_lookup(env, dir, (struct dt_rec *)cfid,
903                                (const struct dt_key *)cname->ln_name,
904                                BYPASS_CAPA);
905                 if (rc != 0 && rc != -ENOENT) {
906                         mdd_object_put(env, parent);
907                         GOTO(stop, rc);
908                 }
909
910                 if (rc == 0) {
911                         if (lu_fid_eq(cfid, mdo2fid(child))) {
912                                 mdd_object_put(env, parent);
913                                 ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
914                                 continue;
915                         }
916
917                         goto shrink;
918                 }
919
920                 if (ldata.ld_leh->leh_reccount > la->la_nlink)
921                         goto shrink;
922
923                 /* XXX: For the case of there is linkea entry, but without name
924                  *      entry pointing to the object, and the object link count
925                  *      isn't less than the count of name entries, then add the
926                  *      name entry back to namespace.
927                  *
928                  *      It is out of LFSCK 1.5 scope, will implement it in the
929                  *      future. Keep the linkEA entry. */
930                 mdd_object_put(env, parent);
931                 ldata.ld_lee = LINKEA_NEXT_ENTRY(ldata);
932                 continue;
933
934 shrink:
935                 if (parent != NULL)
936                         mdd_object_put(env, parent);
937                 if (bk->lb_param & LPF_DRYRUN)
938                         RETURN(1);
939
940                 CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
941                        PFID(mdo2fid(child)), cname->ln_namelen, cname->ln_name,
942                        PFID(pfid));
943                 linkea_del_buf(&ldata, cname);
944                 update = true;
945         }
946
947         if (update) {
948                 if (!com->lc_journal) {
949                         com->lc_journal = 1;
950                         goto again;
951                 }
952
953                 rc = mdd_links_write(env, child, &ldata, handle);
954         }
955
956         GOTO(stop, rc);
957
958 stop:
959         if (locked)
960                 mdd_write_unlock(env, child);
961
962         if (handle != NULL)
963                 mdd_trans_stop(env, mdd, rc, handle);
964
965         if (rc == 0 && update) {
966                 ns->ln_objs_nlink_repaired++;
967                 rc = 1;
968         }
969         return rc;
970 }
971
972 /* namespace APIs */
973
974 static int mdd_lfsck_namespace_reset(const struct lu_env *env,
975                                      struct lfsck_component *com, bool init)
976 {
977         struct lfsck_namespace  *ns   = (struct lfsck_namespace *)com->lc_file_ram;
978         struct mdd_device       *mdd  = mdd_lfsck2mdd(com->lc_lfsck);
979         struct dt_object        *dto, *root;
980         int                      rc;
981         ENTRY;
982
983         down_write(&com->lc_sem);
984         if (init) {
985                 memset(ns, 0, sizeof(*ns));
986         } else {
987                 __u32 count = ns->ln_success_count;
988                 __u64 last_time = ns->ln_time_last_complete;
989
990                 memset(ns, 0, sizeof(*ns));
991                 ns->ln_success_count = count;
992                 ns->ln_time_last_complete = last_time;
993         }
994         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
995         ns->ln_status = LS_INIT;
996
997         root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
998         if (unlikely(IS_ERR(root)))
999                 GOTO(out, rc = PTR_ERR(root));
1000
1001         rc = local_object_unlink(env, mdd->mdd_bottom, root,
1002                                  lfsck_namespace_name);
1003         if (rc != 0)
1004                 GOTO(out, rc);
1005
1006         dto = local_index_find_or_create(env, mdd->mdd_los, root,
1007                                          lfsck_namespace_name,
1008                                          S_IFREG | S_IRUGO | S_IWUSR,
1009                                          &dt_lfsck_features);
1010         if (IS_ERR(dto))
1011                 GOTO(out, rc = PTR_ERR(dto));
1012
1013         rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
1014         if (rc != 0)
1015                 GOTO(out, rc);
1016         com->lc_obj = dto;
1017
1018         rc = mdd_lfsck_namespace_store(env, com, true);
1019
1020         GOTO(out, rc);
1021 out:
1022         lu_object_put(env, &root->do_lu);
1023         up_write(&com->lc_sem);
1024         return rc;
1025 }
1026
1027 static void
1028 mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
1029                          bool oit, bool new_checked)
1030 {
1031         struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
1032
1033         down_write(&com->lc_sem);
1034         if (new_checked)
1035                 com->lc_new_checked++;
1036         ns->ln_items_failed++;
1037         if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1038                 mdd_lfsck_pos_fill(env, com->lc_lfsck,
1039                                    &ns->ln_pos_first_inconsistent, oit, !oit);
1040         up_write(&com->lc_sem);
1041 }
1042
1043 static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env,
1044                                           struct lfsck_component *com,
1045                                           bool init)
1046 {
1047         struct md_lfsck         *lfsck = com->lc_lfsck;
1048         struct lfsck_namespace  *ns    =
1049                                 (struct lfsck_namespace *)com->lc_file_ram;
1050         int                      rc;
1051
1052         if (com->lc_new_checked == 0 && !init)
1053                 return 0;
1054
1055         down_write(&com->lc_sem);
1056
1057         ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
1058         if (init) {
1059                 ns->ln_time_last_checkpoint = ns->ln_time_latest_start;
1060                 ns->ln_pos_latest_start = lfsck->ml_pos_current;
1061         } else {
1062                 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1063                                 HALF_SEC - lfsck->ml_time_last_checkpoint);
1064                 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1065                 ns->ln_items_checked += com->lc_new_checked;
1066                 com->lc_new_checked = 0;
1067         }
1068
1069         rc = mdd_lfsck_namespace_store(env, com, false);
1070
1071         up_write(&com->lc_sem);
1072         return rc;
1073 }
1074
1075 static int mdd_lfsck_namespace_prep(const struct lu_env *env,
1076                                     struct lfsck_component *com)
1077 {
1078         struct md_lfsck         *lfsck  = com->lc_lfsck;
1079         struct lfsck_namespace  *ns     =
1080                                 (struct lfsck_namespace *)com->lc_file_ram;
1081         struct lfsck_position   *pos    = &com->lc_pos_start;
1082
1083         if (ns->ln_status == LS_COMPLETED) {
1084                 int rc;
1085
1086                 rc = mdd_lfsck_namespace_reset(env, com, false);
1087                 if (rc != 0)
1088                         return rc;
1089         }
1090
1091         down_write(&com->lc_sem);
1092
1093         ns->ln_time_latest_start = cfs_time_current_sec();
1094
1095         spin_lock(&lfsck->ml_lock);
1096         if (ns->ln_flags & LF_SCANNED_ONCE) {
1097                 if (!lfsck->ml_drop_dryrun ||
1098                     mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1099                         ns->ln_status = LS_SCANNING_PHASE2;
1100                         cfs_list_del_init(&com->lc_link);
1101                         cfs_list_add_tail(&com->lc_link,
1102                                           &lfsck->ml_list_double_scan);
1103                         if (!cfs_list_empty(&com->lc_link_dir))
1104                                 cfs_list_del_init(&com->lc_link_dir);
1105                         mdd_lfsck_pos_set_zero(pos);
1106                 } else {
1107                         ns->ln_status = LS_SCANNING_PHASE1;
1108                         ns->ln_run_time_phase1 = 0;
1109                         ns->ln_run_time_phase2 = 0;
1110                         ns->ln_items_checked = 0;
1111                         ns->ln_items_repaired = 0;
1112                         ns->ln_items_failed = 0;
1113                         ns->ln_dirs_checked = 0;
1114                         ns->ln_mlinked_checked = 0;
1115                         ns->ln_objs_checked_phase2 = 0;
1116                         ns->ln_objs_repaired_phase2 = 0;
1117                         ns->ln_objs_failed_phase2 = 0;
1118                         ns->ln_objs_nlink_repaired = 0;
1119                         ns->ln_objs_lost_found = 0;
1120                         fid_zero(&ns->ln_fid_latest_scanned_phase2);
1121                         if (cfs_list_empty(&com->lc_link_dir))
1122                                 cfs_list_add_tail(&com->lc_link_dir,
1123                                                   &lfsck->ml_list_dir);
1124                         *pos = ns->ln_pos_first_inconsistent;
1125                 }
1126         } else {
1127                 ns->ln_status = LS_SCANNING_PHASE1;
1128                 if (cfs_list_empty(&com->lc_link_dir))
1129                         cfs_list_add_tail(&com->lc_link_dir,
1130                                           &lfsck->ml_list_dir);
1131                 if (!lfsck->ml_drop_dryrun ||
1132                     mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1133                         *pos = ns->ln_pos_last_checkpoint;
1134                         pos->lp_oit_cookie++;
1135                         if (!fid_is_zero(&pos->lp_dir_parent)) {
1136                                 if (pos->lp_dir_cookie == MDS_DIR_END_OFF) {
1137                                         fid_zero(&pos->lp_dir_parent);
1138                                 } else {
1139                                         pos->lp_dir_cookie++;
1140                                 }
1141                         }
1142                 } else {
1143                         *pos = ns->ln_pos_first_inconsistent;
1144                 }
1145         }
1146         spin_unlock(&lfsck->ml_lock);
1147
1148         up_write(&com->lc_sem);
1149         return 0;
1150 }
1151
1152 static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env,
1153                                         struct lfsck_component *com,
1154                                         struct mdd_object *obj)
1155 {
1156         down_write(&com->lc_sem);
1157         com->lc_new_checked++;
1158         if (S_ISDIR(mdd_object_type(obj)))
1159                 ((struct lfsck_namespace *)com->lc_file_ram)->ln_dirs_checked++;
1160         up_write(&com->lc_sem);
1161         return 0;
1162 }
1163
1164 static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env,
1165                                                 struct mdd_object *obj,
1166                                                 struct thandle *handle)
1167 {
1168         int rc;
1169
1170         /* For destroying all invalid linkEA entries. */
1171         rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
1172         if (rc != 0)
1173                 return rc;
1174
1175         /* For insert new linkEA entry. */
1176         rc = mdd_declare_links_add(env, obj, handle, NULL);
1177         return rc;
1178 }
1179
1180 static int mdd_lfsck_namespace_check_exist(const struct lu_env *env,
1181                                            struct md_lfsck *lfsck,
1182                                            struct mdd_object *obj,
1183                                            const char *name)
1184 {
1185         struct dt_object *dir = lfsck->ml_obj_dir;
1186         struct lu_fid    *fid = &mdd_env_info(env)->mti_fid;
1187         int               rc;
1188         ENTRY;
1189
1190         if (unlikely(mdd_is_dead_obj(obj)))
1191                 RETURN(LFSCK_NAMEENTRY_DEAD);
1192
1193         rc = dt_lookup(env, dir, (struct dt_rec *)fid,
1194                        (const struct dt_key *)name, BYPASS_CAPA);
1195         if (rc == -ENOENT)
1196                 RETURN(LFSCK_NAMEENTRY_REMOVED);
1197
1198         if (rc < 0)
1199                 RETURN(rc);
1200
1201         if (!lu_fid_eq(fid, mdo2fid(obj)))
1202                 RETURN(LFSCK_NAMEENTRY_RECREATED);
1203
1204         RETURN(0);
1205 }
1206
1207 static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
1208                                         struct lfsck_component *com,
1209                                         struct mdd_object *obj,
1210                                         struct lu_dirent *ent)
1211 {
1212         struct mdd_thread_info     *info     = mdd_env_info(env);
1213         struct lu_attr             *la       = &info->mti_la;
1214         struct md_lfsck            *lfsck    = com->lc_lfsck;
1215         struct lfsck_bookmark      *bk       = &lfsck->ml_bookmark_ram;
1216         struct lfsck_namespace     *ns       =
1217                                 (struct lfsck_namespace *)com->lc_file_ram;
1218         struct mdd_device          *mdd      = mdd_lfsck2mdd(lfsck);
1219         struct linkea_data          ldata    = { 0 };
1220         const struct lu_fid        *pfid     =
1221                                 lu_object_fid(&lfsck->ml_obj_dir->do_lu);
1222         const struct lu_fid        *cfid     = mdo2fid(obj);
1223         const struct lu_name       *cname;
1224         struct thandle             *handle   = NULL;
1225         bool                        repaired = false;
1226         bool                        locked   = false;
1227         int                         count    = 0;
1228         int                         rc;
1229         ENTRY;
1230
1231         cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen);
1232         down_write(&com->lc_sem);
1233         com->lc_new_checked++;
1234
1235         if (ent->lde_attrs & LUDA_UPGRADE) {
1236                 ns->ln_flags |= LF_UPGRADE;
1237                 repaired = true;
1238         } else if (ent->lde_attrs & LUDA_REPAIR) {
1239                 ns->ln_flags |= LF_INCONSISTENT;
1240                 repaired = true;
1241         }
1242
1243         if (ent->lde_name[0] == '.' &&
1244             (ent->lde_namelen == 1 ||
1245              (ent->lde_namelen == 2 && ent->lde_name[1] == '.')))
1246                 GOTO(out, rc = 0);
1247
1248         if (!(bk->lb_param & LPF_DRYRUN) &&
1249             (com->lc_journal || repaired)) {
1250
1251 again:
1252                 LASSERT(!locked);
1253
1254                 com->lc_journal = 1;
1255                 handle = mdd_trans_create(env, mdd);
1256                 if (IS_ERR(handle))
1257                         GOTO(out, rc = PTR_ERR(handle));
1258
1259                 rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle);
1260                 if (rc != 0)
1261                         GOTO(stop, rc);
1262
1263                 rc = mdd_trans_start(env, mdd, handle);
1264                 if (rc != 0)
1265                         GOTO(stop, rc);
1266
1267                 mdd_write_lock(env, obj, MOR_TGT_CHILD);
1268                 locked = true;
1269         }
1270
1271         rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
1272         if (rc != 0)
1273                 GOTO(stop, rc);
1274
1275         rc = mdd_links_read(env, obj, &ldata);
1276         if (rc == 0) {
1277                 count = ldata.ld_leh->leh_reccount;
1278                 rc = linkea_links_find(&ldata, cname, pfid);
1279                 if (rc == 0) {
1280                         /* For dir, if there are more than one linkea entries,
1281                          * then remove all the other redundant linkea entries.*/
1282                         if (unlikely(count > 1 &&
1283                                      S_ISDIR(mdd_object_type(obj))))
1284                                 goto unmatch;
1285
1286                         goto record;
1287                 } else {
1288
1289 unmatch:
1290                         ns->ln_flags |= LF_INCONSISTENT;
1291                         if (bk->lb_param & LPF_DRYRUN) {
1292                                 repaired = true;
1293                                 goto record;
1294                         }
1295
1296                         /*For dir, remove the unmatched linkea entry directly.*/
1297                         if (S_ISDIR(mdd_object_type(obj))) {
1298                                 if (!com->lc_journal)
1299                                         goto again;
1300
1301                                 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK,
1302                                                    handle, BYPASS_CAPA);
1303                                 if (rc != 0)
1304                                         GOTO(stop, rc);
1305
1306                                 goto nodata;
1307                         } else {
1308                                 goto add;
1309                         }
1310                 }
1311         } else if (unlikely(rc == -EINVAL)) {
1312                 ns->ln_flags |= LF_INCONSISTENT;
1313                 if (bk->lb_param & LPF_DRYRUN) {
1314                         count = 1;
1315                         repaired = true;
1316                         goto record;
1317                 }
1318
1319                 if (!com->lc_journal)
1320                         goto again;
1321
1322                 /* The magic crashed, we are not sure whether there are more
1323                  * corrupt data in the linkea, so remove all linkea entries. */
1324                 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle,
1325                                    BYPASS_CAPA);
1326                 if (rc != 0)
1327                         GOTO(stop, rc);
1328
1329                 goto nodata;
1330         } else if (rc == -ENODATA) {
1331                 ns->ln_flags |= LF_UPGRADE;
1332                 if (bk->lb_param & LPF_DRYRUN) {
1333                         count = 1;
1334                         repaired = true;
1335                         goto record;
1336                 }
1337
1338 nodata:
1339                 rc = linkea_data_new(&ldata, &mdd_env_info(env)->mti_link_buf);
1340                 if (rc != 0)
1341                         GOTO(stop, rc);
1342
1343 add:
1344                 if (!com->lc_journal)
1345                         goto again;
1346
1347                 rc = linkea_add_buf(&ldata, cname, pfid);
1348                 if (rc != 0)
1349                         GOTO(stop, rc);
1350
1351                 rc = mdd_links_write(env, obj, &ldata, handle);
1352                 if (rc != 0)
1353                         GOTO(stop, rc);
1354
1355                 count = ldata.ld_leh->leh_reccount;
1356                 repaired = true;
1357         } else {
1358                 GOTO(stop, rc);
1359         }
1360
1361 record:
1362         LASSERT(count > 0);
1363
1364         rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
1365         if (rc != 0)
1366                 GOTO(stop, rc);
1367
1368         if ((count == 1) &&
1369             (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj))))
1370                 /* Usually, it is for single linked object or dir, do nothing.*/
1371                 GOTO(stop, rc);
1372
1373         /* Following modification will be in another transaction.  */
1374         if (handle != NULL) {
1375                 LASSERT(mdd_write_locked(env, obj));
1376
1377                 mdd_write_unlock(env, obj);
1378                 locked = false;
1379
1380                 mdd_trans_stop(env, mdd, 0, handle);
1381                 handle = NULL;
1382         }
1383
1384         ns->ln_mlinked_checked++;
1385         rc = mdd_lfsck_namespace_update(env, com, cfid,
1386                         count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
1387
1388         GOTO(out, rc);
1389
1390 stop:
1391         if (locked)
1392                 mdd_write_unlock(env, obj);
1393
1394         if (handle != NULL)
1395                 mdd_trans_stop(env, mdd, rc, handle);
1396
1397 out:
1398         if (rc < 0) {
1399                 ns->ln_items_failed++;
1400                 if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1401                         mdd_lfsck_pos_fill(env, lfsck,
1402                                            &ns->ln_pos_first_inconsistent,
1403                                            true, false);
1404                 if (!(bk->lb_param & LPF_FAILOUT))
1405                         rc = 0;
1406         } else {
1407                 if (repaired)
1408                         ns->ln_items_repaired++;
1409                 else
1410                         com->lc_journal = 0;
1411                 rc = 0;
1412         }
1413         up_write(&com->lc_sem);
1414         return rc;
1415 }
1416
1417 static int mdd_lfsck_namespace_post(const struct lu_env *env,
1418                                     struct lfsck_component *com,
1419                                     int result)
1420 {
1421         struct md_lfsck         *lfsck = com->lc_lfsck;
1422         struct lfsck_namespace  *ns    =
1423                                 (struct lfsck_namespace *)com->lc_file_ram;
1424         int                      rc;
1425
1426         down_write(&com->lc_sem);
1427
1428         spin_lock(&lfsck->ml_lock);
1429         if (result > 0) {
1430                 ns->ln_status = LS_SCANNING_PHASE2;
1431                 ns->ln_flags |= LF_SCANNED_ONCE;
1432                 ns->ln_flags &= ~LF_UPGRADE;
1433                 cfs_list_del_init(&com->lc_link);
1434                 cfs_list_del_init(&com->lc_link_dir);
1435                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_double_scan);
1436         } else if (result == 0) {
1437                 if (lfsck->ml_paused) {
1438                         ns->ln_status = LS_PAUSED;
1439                 } else {
1440                         ns->ln_status = LS_STOPPED;
1441                         cfs_list_del_init(&com->lc_link);
1442                         cfs_list_del_init(&com->lc_link_dir);
1443                         cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1444                 }
1445         } else {
1446                 ns->ln_status = LS_FAILED;
1447                 cfs_list_del_init(&com->lc_link);
1448                 cfs_list_del_init(&com->lc_link_dir);
1449                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1450         }
1451         spin_unlock(&lfsck->ml_lock);
1452
1453         ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1454                                 HALF_SEC - lfsck->ml_time_last_checkpoint);
1455         ns->ln_time_last_checkpoint = cfs_time_current_sec();
1456         ns->ln_items_checked += com->lc_new_checked;
1457         com->lc_new_checked = 0;
1458
1459         rc = mdd_lfsck_namespace_store(env, com, false);
1460
1461         up_write(&com->lc_sem);
1462         return rc;
1463 }
1464
1465 static int
1466 mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
1467                          char *buf, int len)
1468 {
1469         struct md_lfsck         *lfsck = com->lc_lfsck;
1470         struct lfsck_bookmark   *bk    = &lfsck->ml_bookmark_ram;
1471         struct lfsck_namespace  *ns    =
1472                                 (struct lfsck_namespace *)com->lc_file_ram;
1473         int                      save  = len;
1474         int                      ret   = -ENOSPC;
1475         int                      rc;
1476
1477         down_read(&com->lc_sem);
1478         rc = snprintf(buf, len,
1479                       "name: lfsck_namespace\n"
1480                       "magic: 0x%x\n"
1481                       "version: %d\n"
1482                       "status: %s\n",
1483                       ns->ln_magic,
1484                       bk->lb_version,
1485                       lfsck_status_names[ns->ln_status]);
1486         if (rc <= 0)
1487                 goto out;
1488
1489         buf += rc;
1490         len -= rc;
1491         rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
1492                              "flags");
1493         if (rc < 0)
1494                 goto out;
1495
1496         rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
1497                              "param");
1498         if (rc < 0)
1499                 goto out;
1500
1501         rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
1502                              "time_since_last_completed");
1503         if (rc < 0)
1504                 goto out;
1505
1506         rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
1507                              "time_since_latest_start");
1508         if (rc < 0)
1509                 goto out;
1510
1511         rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
1512                              "time_since_last_checkpoint");
1513         if (rc < 0)
1514                 goto out;
1515
1516         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
1517                             "latest_start_position");
1518         if (rc < 0)
1519                 goto out;
1520
1521         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
1522                             "last_checkpoint_position");
1523         if (rc < 0)
1524                 goto out;
1525
1526         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
1527                             "first_failure_position");
1528         if (rc < 0)
1529                 goto out;
1530
1531         if (ns->ln_status == LS_SCANNING_PHASE1) {
1532                 struct lfsck_position pos;
1533                 cfs_duration_t duration = cfs_time_current() -
1534                                           lfsck->ml_time_last_checkpoint;
1535                 __u64 checked = ns->ln_items_checked + com->lc_new_checked;
1536                 __u64 speed = checked;
1537                 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1538                 __u32 rtime = ns->ln_run_time_phase1 +
1539                               cfs_duration_sec(duration + HALF_SEC);
1540
1541                 if (duration != 0)
1542                         do_div(new_checked, duration);
1543                 if (rtime != 0)
1544                         do_div(speed, rtime);
1545                 rc = snprintf(buf, len,
1546                               "checked_phase1: "LPU64"\n"
1547                               "checked_phase2: "LPU64"\n"
1548                               "updated_phase1: "LPU64"\n"
1549                               "updated_phase2: "LPU64"\n"
1550                               "failed_phase1: "LPU64"\n"
1551                               "failed_phase2: "LPU64"\n"
1552                               "dirs: "LPU64"\n"
1553                               "M-linked: "LPU64"\n"
1554                               "nlinks_repaired: "LPU64"\n"
1555                               "lost_found: "LPU64"\n"
1556                               "success_count: %u\n"
1557                               "run_time_phase1: %u seconds\n"
1558                               "run_time_phase2: %u seconds\n"
1559                               "average_speed_phase1: "LPU64" items/sec\n"
1560                               "average_speed_phase2: N/A\n"
1561                               "real-time_speed_phase1: "LPU64" items/sec\n"
1562                               "real-time_speed_phase2: N/A\n",
1563                               checked,
1564                               ns->ln_objs_checked_phase2,
1565                               ns->ln_items_repaired,
1566                               ns->ln_objs_repaired_phase2,
1567                               ns->ln_items_failed,
1568                               ns->ln_objs_failed_phase2,
1569                               ns->ln_dirs_checked,
1570                               ns->ln_mlinked_checked,
1571                               ns->ln_objs_nlink_repaired,
1572                               ns->ln_objs_lost_found,
1573                               ns->ln_success_count,
1574                               rtime,
1575                               ns->ln_run_time_phase2,
1576                               speed,
1577                               new_checked);
1578                 if (rc <= 0)
1579                         goto out;
1580
1581                 buf += rc;
1582                 len -= rc;
1583                 mdd_lfsck_pos_fill(env, lfsck, &pos, true, true);
1584                 rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
1585                 if (rc <= 0)
1586                         goto out;
1587         } else if (ns->ln_status == LS_SCANNING_PHASE2) {
1588                 cfs_duration_t duration = cfs_time_current() -
1589                                           lfsck->ml_time_last_checkpoint;
1590                 __u64 checked = ns->ln_objs_checked_phase2 +
1591                                 com->lc_new_checked;
1592                 __u64 speed1 = ns->ln_items_checked;
1593                 __u64 speed2 = checked;
1594                 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1595                 __u32 rtime = ns->ln_run_time_phase2 +
1596                               cfs_duration_sec(duration + HALF_SEC);
1597
1598                 if (duration != 0)
1599                         do_div(new_checked, duration);
1600                 if (ns->ln_run_time_phase1 != 0)
1601                         do_div(speed1, ns->ln_run_time_phase1);
1602                 if (rtime != 0)
1603                         do_div(speed2, rtime);
1604                 rc = snprintf(buf, len,
1605                               "checked_phase1: "LPU64"\n"
1606                               "checked_phase2: "LPU64"\n"
1607                               "updated_phase1: "LPU64"\n"
1608                               "updated_phase2: "LPU64"\n"
1609                               "failed_phase1: "LPU64"\n"
1610                               "failed_phase2: "LPU64"\n"
1611                               "dirs: "LPU64"\n"
1612                               "M-linked: "LPU64"\n"
1613                               "nlinks_repaired: "LPU64"\n"
1614                               "lost_found: "LPU64"\n"
1615                               "success_count: %u\n"
1616                               "run_time_phase1: %u seconds\n"
1617                               "run_time_phase2: %u seconds\n"
1618                               "average_speed_phase1: "LPU64" items/sec\n"
1619                               "average_speed_phase2: "LPU64" objs/sec\n"
1620                               "real-time_speed_phase1: N/A\n"
1621                               "real-time_speed_phase2: "LPU64" objs/sec\n"
1622                               "current_position: "DFID"\n",
1623                               ns->ln_items_checked,
1624                               checked,
1625                               ns->ln_items_repaired,
1626                               ns->ln_objs_repaired_phase2,
1627                               ns->ln_items_failed,
1628                               ns->ln_objs_failed_phase2,
1629                               ns->ln_dirs_checked,
1630                               ns->ln_mlinked_checked,
1631                               ns->ln_objs_nlink_repaired,
1632                               ns->ln_objs_lost_found,
1633                               ns->ln_success_count,
1634                               ns->ln_run_time_phase1,
1635                               rtime,
1636                               speed1,
1637                               speed2,
1638                               new_checked,
1639                               PFID(&ns->ln_fid_latest_scanned_phase2));
1640                 if (rc <= 0)
1641                         goto out;
1642
1643                 buf += rc;
1644                 len -= rc;
1645         } else {
1646                 __u64 speed1 = ns->ln_items_checked;
1647                 __u64 speed2 = ns->ln_objs_checked_phase2;
1648
1649                 if (ns->ln_run_time_phase1 != 0)
1650                         do_div(speed1, ns->ln_run_time_phase1);
1651                 if (ns->ln_run_time_phase2 != 0)
1652                         do_div(speed2, ns->ln_run_time_phase2);
1653                 rc = snprintf(buf, len,
1654                               "checked_phase1: "LPU64"\n"
1655                               "checked_phase2: "LPU64"\n"
1656                               "updated_phase1: "LPU64"\n"
1657                               "updated_phase2: "LPU64"\n"
1658                               "failed_phase1: "LPU64"\n"
1659                               "failed_phase2: "LPU64"\n"
1660                               "dirs: "LPU64"\n"
1661                               "M-linked: "LPU64"\n"
1662                               "nlinks_repaired: "LPU64"\n"
1663                               "lost_found: "LPU64"\n"
1664                               "success_count: %u\n"
1665                               "run_time_phase1: %u seconds\n"
1666                               "run_time_phase2: %u seconds\n"
1667                               "average_speed_phase1: "LPU64" items/sec\n"
1668                               "average_speed_phase2: "LPU64" objs/sec\n"
1669                               "real-time_speed_phase1: N/A\n"
1670                               "real-time_speed_phase2: N/A\n"
1671                               "current_position: N/A\n",
1672                               ns->ln_items_checked,
1673                               ns->ln_objs_checked_phase2,
1674                               ns->ln_items_repaired,
1675                               ns->ln_objs_repaired_phase2,
1676                               ns->ln_items_failed,
1677                               ns->ln_objs_failed_phase2,
1678                               ns->ln_dirs_checked,
1679                               ns->ln_mlinked_checked,
1680                               ns->ln_objs_nlink_repaired,
1681                               ns->ln_objs_lost_found,
1682                               ns->ln_success_count,
1683                               ns->ln_run_time_phase1,
1684                               ns->ln_run_time_phase2,
1685                               speed1,
1686                               speed2);
1687                 if (rc <= 0)
1688                         goto out;
1689
1690                 buf += rc;
1691                 len -= rc;
1692         }
1693         ret = save - len;
1694
1695 out:
1696         up_read(&com->lc_sem);
1697         return ret;
1698 }
1699
1700 static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
1701                                            struct lfsck_component *com)
1702 {
1703         struct md_lfsck         *lfsck  = com->lc_lfsck;
1704         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
1705         struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
1706         struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
1707         struct lfsck_namespace  *ns     =
1708                                 (struct lfsck_namespace *)com->lc_file_ram;
1709         struct dt_object        *obj    = com->lc_obj;
1710         const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
1711         struct mdd_object       *target;
1712         struct dt_it            *di;
1713         struct dt_key           *key;
1714         struct lu_fid            fid;
1715         int                      rc;
1716         __u8                     flags;
1717         ENTRY;
1718
1719         lfsck->ml_new_scanned = 0;
1720         lfsck->ml_time_last_checkpoint = cfs_time_current();
1721         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1722                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1723
1724         di = iops->init(env, obj, 0, BYPASS_CAPA);
1725         if (IS_ERR(di))
1726                 RETURN(PTR_ERR(di));
1727
1728         fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
1729         rc = iops->get(env, di, (const struct dt_key *)&fid);
1730         if (rc < 0)
1731                 GOTO(fini, rc);
1732
1733         /* Skip the start one, which either has been processed or non-exist. */
1734         rc = iops->next(env, di);
1735         if (rc != 0)
1736                 GOTO(put, rc);
1737
1738         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
1739                 GOTO(put, rc = 0);
1740
1741         do {
1742                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
1743                     cfs_fail_val > 0) {
1744                         struct l_wait_info lwi;
1745
1746                         lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
1747                                           NULL, NULL);
1748                         l_wait_event(thread->t_ctl_waitq,
1749                                      !thread_is_running(thread),
1750                                      &lwi);
1751                 }
1752
1753                 key = iops->key(env, di);
1754                 fid_be_to_cpu(&fid, (const struct lu_fid *)key);
1755                 target = mdd_object_find(env, mdd, &fid);
1756                 down_write(&com->lc_sem);
1757                 if (target == NULL) {
1758                         rc = 0;
1759                         goto checkpoint;
1760                 } else if (IS_ERR(target)) {
1761                         rc = PTR_ERR(target);
1762                         goto checkpoint;
1763                 }
1764
1765                 /* XXX: need more processing for remote object in the future. */
1766                 if (!mdd_object_exists(target) || mdd_object_remote(target))
1767                         goto obj_put;
1768
1769                 rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
1770                 if (rc == 0)
1771                         rc = mdd_lfsck_namespace_double_scan_one(env, com,
1772                                                                  target, flags);
1773
1774 obj_put:
1775                 mdd_object_put(env, target);
1776
1777 checkpoint:
1778                 lfsck->ml_new_scanned++;
1779                 com->lc_new_checked++;
1780                 ns->ln_fid_latest_scanned_phase2 = fid;
1781                 if (rc > 0)
1782                         ns->ln_objs_repaired_phase2++;
1783                 else if (rc < 0)
1784                         ns->ln_objs_failed_phase2++;
1785                 up_write(&com->lc_sem);
1786
1787                 if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
1788                         mdd_lfsck_namespace_delete(env, com, &fid);
1789                 } else if (rc < 0) {
1790                         flags |= LLF_REPAIR_FAILED;
1791                         mdd_lfsck_namespace_update(env, com, &fid, flags, true);
1792                 }
1793
1794                 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
1795                         GOTO(put, rc);
1796
1797                 if (likely(cfs_time_beforeq(cfs_time_current(),
1798                                             lfsck->ml_time_next_checkpoint)) ||
1799                     com->lc_new_checked == 0)
1800                         goto speed;
1801
1802                 down_write(&com->lc_sem);
1803                 ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1804                                 HALF_SEC - lfsck->ml_time_last_checkpoint);
1805                 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1806                 ns->ln_objs_checked_phase2 += com->lc_new_checked;
1807                 com->lc_new_checked = 0;
1808                 rc = mdd_lfsck_namespace_store(env, com, false);
1809                 up_write(&com->lc_sem);
1810                 if (rc != 0)
1811                         GOTO(put, rc);
1812
1813                 lfsck->ml_time_last_checkpoint = cfs_time_current();
1814                 lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1815                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1816
1817 speed:
1818                 mdd_lfsck_control_speed(lfsck);
1819                 if (unlikely(!thread_is_running(thread)))
1820                         GOTO(put, rc = 0);
1821
1822                 rc = iops->next(env, di);
1823         } while (rc == 0);
1824
1825         GOTO(put, rc);
1826
1827 put:
1828         iops->put(env, di);
1829
1830 fini:
1831         iops->fini(env, di);
1832         down_write(&com->lc_sem);
1833
1834         ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1835                                 HALF_SEC - lfsck->ml_time_last_checkpoint);
1836         ns->ln_time_last_checkpoint = cfs_time_current_sec();
1837         ns->ln_objs_checked_phase2 += com->lc_new_checked;
1838         com->lc_new_checked = 0;
1839
1840         if (rc > 0) {
1841                 com->lc_journal = 0;
1842                 ns->ln_status = LS_COMPLETED;
1843                 if (!(bk->lb_param & LPF_DRYRUN))
1844                         ns->ln_flags &=
1845                         ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
1846                 ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
1847                 ns->ln_success_count++;
1848         } else if (rc == 0) {
1849                 if (lfsck->ml_paused)
1850                         ns->ln_status = LS_PAUSED;
1851                 else
1852                         ns->ln_status = LS_STOPPED;
1853         } else {
1854                 ns->ln_status = LS_FAILED;
1855         }
1856
1857         if (ns->ln_status != LS_PAUSED) {
1858                 spin_lock(&lfsck->ml_lock);
1859                 cfs_list_del_init(&com->lc_link);
1860                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1861                 spin_unlock(&lfsck->ml_lock);
1862         }
1863
1864         rc = mdd_lfsck_namespace_store(env, com, false);
1865
1866         up_write(&com->lc_sem);
1867         return rc;
1868 }
1869
1870 static struct lfsck_operations mdd_lfsck_namespace_ops = {
1871         .lfsck_reset            = mdd_lfsck_namespace_reset,
1872         .lfsck_fail             = mdd_lfsck_namespace_fail,
1873         .lfsck_checkpoint       = mdd_lfsck_namespace_checkpoint,
1874         .lfsck_prep             = mdd_lfsck_namespace_prep,
1875         .lfsck_exec_oit         = mdd_lfsck_namespace_exec_oit,
1876         .lfsck_exec_dir         = mdd_lfsck_namespace_exec_dir,
1877         .lfsck_post             = mdd_lfsck_namespace_post,
1878         .lfsck_dump             = mdd_lfsck_namespace_dump,
1879         .lfsck_double_scan      = mdd_lfsck_namespace_double_scan,
1880 };
1881
1882 /* LFSCK component setup/cleanup functions */
1883
1884 static int mdd_lfsck_namespace_setup(const struct lu_env *env,
1885                                      struct md_lfsck *lfsck)
1886 {
1887         struct mdd_device       *mdd = mdd_lfsck2mdd(lfsck);
1888         struct lfsck_component  *com;
1889         struct lfsck_namespace  *ns;
1890         struct dt_object        *obj, *root;
1891         int                      rc;
1892         ENTRY;
1893
1894         OBD_ALLOC_PTR(com);
1895         if (com == NULL)
1896                 RETURN(-ENOMEM);
1897
1898         CFS_INIT_LIST_HEAD(&com->lc_link);
1899         CFS_INIT_LIST_HEAD(&com->lc_link_dir);
1900         init_rwsem(&com->lc_sem);
1901         atomic_set(&com->lc_ref, 1);
1902         com->lc_lfsck = lfsck;
1903         com->lc_type = LT_NAMESPACE;
1904         com->lc_ops = &mdd_lfsck_namespace_ops;
1905         com->lc_file_size = sizeof(struct lfsck_namespace);
1906         OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
1907         if (com->lc_file_ram == NULL)
1908                 GOTO(out, rc = -ENOMEM);
1909
1910         OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
1911         if (com->lc_file_disk == NULL)
1912                 GOTO(out, rc = -ENOMEM);
1913
1914         root = dt_locate(env, mdd->mdd_bottom, &mdd->mdd_local_root_fid);
1915         if (unlikely(IS_ERR(root)))
1916                 GOTO(out, rc = PTR_ERR(root));
1917
1918         obj = local_index_find_or_create(env, mdd->mdd_los, root,
1919                                          lfsck_namespace_name,
1920                                          S_IFREG | S_IRUGO | S_IWUSR,
1921                                          &dt_lfsck_features);
1922         lu_object_put(env, &root->do_lu);
1923         if (IS_ERR(obj))
1924                 GOTO(out, rc = PTR_ERR(obj));
1925
1926         com->lc_obj = obj;
1927         rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features);
1928         if (rc != 0)
1929                 GOTO(out, rc);
1930
1931         rc = mdd_lfsck_namespace_load(env, com);
1932         if (rc > 0)
1933                 rc = mdd_lfsck_namespace_reset(env, com, true);
1934         else if (rc == -ENODATA)
1935                 rc = mdd_lfsck_namespace_init(env, com);
1936         if (rc != 0)
1937                 GOTO(out, rc);
1938
1939         ns = (struct lfsck_namespace *)com->lc_file_ram;
1940         switch (ns->ln_status) {
1941         case LS_INIT:
1942         case LS_COMPLETED:
1943         case LS_FAILED:
1944         case LS_STOPPED:
1945                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1946                 break;
1947         default:
1948                 CERROR("%s: unknown status: %u\n",
1949                        mdd_lfsck2name(lfsck), ns->ln_status);
1950                 /* fall through */
1951         case LS_SCANNING_PHASE1:
1952         case LS_SCANNING_PHASE2:
1953                 /* No need to store the status to disk right now.
1954                  * If the system crashed before the status stored,
1955                  * it will be loaded back when next time. */
1956                 ns->ln_status = LS_CRASHED;
1957                 /* fall through */
1958         case LS_PAUSED:
1959         case LS_CRASHED:
1960                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_scan);
1961                 cfs_list_add_tail(&com->lc_link_dir, &lfsck->ml_list_dir);
1962                 break;
1963         }
1964
1965         GOTO(out, rc = 0);
1966
1967 out:
1968         if (rc != 0)
1969                 mdd_lfsck_component_cleanup(env, com);
1970         return rc;
1971 }
1972
1973 /* helper functions for framework */
1974
1975 static int object_is_client_visible(const struct lu_env *env,
1976                                     struct mdd_device *mdd,
1977                                     struct mdd_object *obj)
1978 {
1979         struct lu_fid *fid   = &mdd_env_info(env)->mti_fid;
1980         int            depth = 0;
1981         int            rc;
1982
1983         LASSERT(S_ISDIR(mdd_object_type(obj)));
1984
1985         while (1) {
1986                 if (mdd_is_root(mdd, mdo2fid(obj))) {
1987                         if (depth > 0)
1988                                 mdd_object_put(env, obj);
1989                         return 1;
1990                 }
1991
1992                 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1993                 if (unlikely(mdd_is_dead_obj(obj))) {
1994                         mdd_read_unlock(env, obj);
1995                         if (depth > 0)
1996                                 mdd_object_put(env, obj);
1997                         return 0;
1998                 }
1999
2000                 rc = dt_xattr_get(env, mdd_object_child(obj),
2001                                   mdd_buf_get(env, NULL, 0), XATTR_NAME_LINK,
2002                                   BYPASS_CAPA);
2003                 mdd_read_unlock(env, obj);
2004                 if (rc >= 0) {
2005                         if (depth > 0)
2006                                 mdd_object_put(env, obj);
2007                         return 1;
2008                 }
2009
2010                 if (rc < 0 && rc != -ENODATA) {
2011                         if (depth > 0)
2012                                 mdd_object_put(env, obj);
2013                         return rc;
2014                 }
2015
2016                 rc = mdd_parent_fid(env, obj, fid);
2017                 if (depth > 0)
2018                         mdd_object_put(env, obj);
2019                 if (rc != 0)
2020                         return rc;
2021
2022                 if (unlikely(lu_fid_eq(fid, &mdd->mdd_local_root_fid)))
2023                         return 0;
2024
2025                 obj = mdd_object_find(env, mdd, fid);
2026                 if (obj == NULL)
2027                         return 0;
2028                 else if (IS_ERR(obj))
2029                         return PTR_ERR(obj);
2030
2031                 if (!mdd_object_exists(obj)) {
2032                         mdd_object_put(env, obj);
2033                         return 0;
2034                 }
2035
2036                 /* Currently, only client visible directory can be remote. */
2037                 if (mdd_object_remote(obj)) {
2038                         mdd_object_put(env, obj);
2039                         return 1;
2040                 }
2041
2042                 depth++;
2043         }
2044         return 0;
2045 }
2046
2047 static void mdd_lfsck_unpack_ent(struct lu_dirent *ent)
2048 {
2049         fid_le_to_cpu(&ent->lde_fid, &ent->lde_fid);
2050         ent->lde_hash = le64_to_cpu(ent->lde_hash);
2051         ent->lde_reclen = le16_to_cpu(ent->lde_reclen);
2052         ent->lde_namelen = le16_to_cpu(ent->lde_namelen);
2053         ent->lde_attrs = le32_to_cpu(ent->lde_attrs);
2054
2055         /* Make sure the name is terminated with '0'.
2056          * The data (type) after ent::lde_name maybe
2057          * broken, but we do not care. */
2058         ent->lde_name[ent->lde_namelen] = 0;
2059 }
2060
2061 /* LFSCK wrap functions */
2062
2063 static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck,
2064                            bool oit, bool new_checked)
2065 {
2066         struct lfsck_component *com;
2067
2068         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2069                 com->lc_ops->lfsck_fail(env, com, oit, new_checked);
2070         }
2071 }
2072
2073 static int mdd_lfsck_checkpoint(const struct lu_env *env,
2074                                 struct md_lfsck *lfsck, bool oit)
2075 {
2076         struct lfsck_component *com;
2077         int                     rc;
2078
2079         if (likely(cfs_time_beforeq(cfs_time_current(),
2080                                     lfsck->ml_time_next_checkpoint)))
2081                 return 0;
2082
2083         mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, oit, !oit);
2084         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2085                 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
2086                 if (rc != 0)
2087                         return rc;;
2088         }
2089
2090         lfsck->ml_time_last_checkpoint = cfs_time_current();
2091         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2092                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2093         return 0;
2094 }
2095
2096 static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
2097 {
2098         struct mdd_device      *mdd     = mdd_lfsck2mdd(lfsck);
2099         struct mdd_object      *obj     = NULL;
2100         struct dt_object       *dt_obj;
2101         struct lfsck_component *com;
2102         struct lfsck_component *next;
2103         struct lfsck_position  *pos     = NULL;
2104         const struct dt_it_ops *iops    =
2105                                 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2106         struct dt_it           *di;
2107         int                     rc;
2108         ENTRY;
2109
2110         LASSERT(lfsck->ml_obj_dir == NULL);
2111         LASSERT(lfsck->ml_di_dir == NULL);
2112
2113         cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2114                 com->lc_new_checked = 0;
2115                 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2116                         com->lc_journal = 0;
2117
2118                 rc = com->lc_ops->lfsck_prep(env, com);
2119                 if (rc != 0)
2120                         RETURN(rc);
2121
2122                 if ((pos == NULL) ||
2123                     (!mdd_lfsck_pos_is_zero(&com->lc_pos_start) &&
2124                      mdd_lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
2125                         pos = &com->lc_pos_start;
2126         }
2127
2128         /* Init otable-based iterator. */
2129         if (pos == NULL) {
2130                 rc = iops->load(env, lfsck->ml_di_oit, 0);
2131                 if (rc > 0) {
2132                         lfsck->ml_oit_over = 1;
2133                         rc = 0;
2134                 }
2135
2136                 GOTO(out, rc);
2137         }
2138
2139         rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie);
2140         if (rc < 0)
2141                 GOTO(out, rc);
2142         else if (rc > 0)
2143                 lfsck->ml_oit_over = 1;
2144
2145         if (fid_is_zero(&pos->lp_dir_parent))
2146                 GOTO(out, rc = 0);
2147
2148         /* Find the directory for namespace-based traverse. */
2149         obj = mdd_object_find(env, mdd, &pos->lp_dir_parent);
2150         if (obj == NULL)
2151                 GOTO(out, rc = 0);
2152         else if (IS_ERR(obj))
2153                 RETURN(PTR_ERR(obj));
2154
2155         /* XXX: need more processing for remote object in the future. */
2156         if (!mdd_object_exists(obj) || mdd_object_remote(obj) ||
2157             unlikely(!S_ISDIR(mdd_object_type(obj))))
2158                 GOTO(out, rc = 0);
2159
2160         if (unlikely(mdd_is_dead_obj(obj)))
2161                 GOTO(out, rc = 0);
2162
2163         dt_obj = mdd_object_child(obj);
2164         if (unlikely(!dt_try_as_dir(env, dt_obj)))
2165                 GOTO(out, rc = -ENOTDIR);
2166
2167         /* Init the namespace-based directory traverse. */
2168         iops = &dt_obj->do_index_ops->dio_it;
2169         di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
2170         if (IS_ERR(di))
2171                 GOTO(out, rc = PTR_ERR(di));
2172
2173         rc = iops->load(env, di, pos->lp_dir_cookie);
2174         if (rc == 0)
2175                 rc = iops->next(env, di);
2176         else if (rc > 0)
2177                 rc = 0;
2178
2179         if (rc != 0) {
2180                 iops->put(env, di);
2181                 iops->fini(env, di);
2182                 GOTO(out, rc);
2183         }
2184
2185         lfsck->ml_obj_dir = dt_obj;
2186         spin_lock(&lfsck->ml_lock);
2187         lfsck->ml_di_dir = di;
2188         spin_unlock(&lfsck->ml_lock);
2189         obj = NULL;
2190
2191         GOTO(out, rc = 0);
2192
2193 out:
2194         if (obj != NULL)
2195                 mdd_object_put(env, obj);
2196
2197         if (rc != 0)
2198                 return (rc > 0 ? 0 : rc);
2199
2200         mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false, false);
2201         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2202                 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
2203                 if (rc != 0)
2204                         break;
2205         }
2206
2207         lfsck->ml_time_last_checkpoint = cfs_time_current();
2208         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2209                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2210         return rc;
2211 }
2212
2213 static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
2214                               struct mdd_object *obj)
2215 {
2216         struct lfsck_component *com;
2217         struct dt_object       *dt_obj;
2218         const struct dt_it_ops *iops;
2219         struct dt_it           *di;
2220         int                     rc;
2221         ENTRY;
2222
2223         LASSERT(lfsck->ml_obj_dir == NULL);
2224
2225         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2226                 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
2227                 if (rc != 0)
2228                         RETURN(rc);
2229         }
2230
2231         if (!S_ISDIR(mdd_object_type(obj)) ||
2232             cfs_list_empty(&lfsck->ml_list_dir))
2233                RETURN(0);
2234
2235         rc = object_is_client_visible(env, mdd_lfsck2mdd(lfsck), obj);
2236         if (rc <= 0)
2237                 GOTO(out, rc);
2238
2239         if (unlikely(mdd_is_dead_obj(obj)))
2240                 GOTO(out, rc = 0);
2241
2242         dt_obj = mdd_object_child(obj);
2243         if (unlikely(!dt_try_as_dir(env, dt_obj)))
2244                 GOTO(out, rc = -ENOTDIR);
2245
2246         iops = &dt_obj->do_index_ops->dio_it;
2247         di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
2248         if (IS_ERR(di))
2249                 GOTO(out, rc = PTR_ERR(di));
2250
2251         rc = iops->load(env, di, 0);
2252         if (rc == 0)
2253                 rc = iops->next(env, di);
2254         else if (rc > 0)
2255                 rc = 0;
2256
2257         if (rc != 0) {
2258                 iops->put(env, di);
2259                 iops->fini(env, di);
2260                 GOTO(out, rc);
2261         }
2262
2263         mdd_object_get(obj);
2264         lfsck->ml_obj_dir = dt_obj;
2265         spin_lock(&lfsck->ml_lock);
2266         lfsck->ml_di_dir = di;
2267         spin_unlock(&lfsck->ml_lock);
2268
2269         GOTO(out, rc = 0);
2270
2271 out:
2272         if (rc < 0)
2273                 mdd_lfsck_fail(env, lfsck, false, false);
2274         return (rc > 0 ? 0 : rc);
2275 }
2276
2277 static int mdd_lfsck_exec_dir(const struct lu_env *env, struct md_lfsck *lfsck,
2278                               struct mdd_object *obj, struct lu_dirent *ent)
2279 {
2280         struct lfsck_component *com;
2281         int                     rc;
2282
2283         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2284                 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
2285                 if (rc != 0)
2286                         return rc;
2287         }
2288         return 0;
2289 }
2290
2291 static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck,
2292                           int result)
2293 {
2294         struct lfsck_component *com;
2295         struct lfsck_component *next;
2296         int                     rc;
2297
2298         mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true, true);
2299         cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2300                 rc = com->lc_ops->lfsck_post(env, com, result);
2301                 if (rc != 0)
2302                         return rc;
2303         }
2304
2305         lfsck->ml_time_last_checkpoint = cfs_time_current();
2306         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2307                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2308         return result;
2309 }
2310
2311 static int mdd_lfsck_double_scan(const struct lu_env *env,
2312                                  struct md_lfsck *lfsck)
2313 {
2314         struct lfsck_component *com;
2315         struct lfsck_component *next;
2316         int                     rc;
2317
2318         cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_double_scan,
2319                                      lc_link) {
2320                 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2321                         com->lc_journal = 0;
2322
2323                 rc = com->lc_ops->lfsck_double_scan(env, com);
2324                 if (rc != 0)
2325                         return rc;
2326         }
2327         return 0;
2328 }
2329
2330 /* LFSCK engines */
2331
2332 static int mdd_lfsck_dir_engine(const struct lu_env *env,
2333                                 struct md_lfsck *lfsck)
2334 {
2335         struct mdd_thread_info  *info   = mdd_env_info(env);
2336         struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
2337         const struct dt_it_ops  *iops   =
2338                         &lfsck->ml_obj_dir->do_index_ops->dio_it;
2339         struct dt_it            *di     = lfsck->ml_di_dir;
2340         struct lu_dirent        *ent    = &info->mti_ent;
2341         struct lu_fid           *fid    = &info->mti_fid;
2342         struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
2343         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
2344         int                      rc;
2345         ENTRY;
2346
2347         do {
2348                 struct mdd_object *child;
2349
2350                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY2) &&
2351                     cfs_fail_val > 0) {
2352                         struct l_wait_info lwi;
2353
2354                         lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2355                                           NULL, NULL);
2356                         l_wait_event(thread->t_ctl_waitq,
2357                                      !thread_is_running(thread),
2358                                      &lwi);
2359                 }
2360
2361                 lfsck->ml_new_scanned++;
2362                 rc = iops->rec(env, di, (struct dt_rec *)ent,
2363                                lfsck->ml_args_dir);
2364                 if (rc != 0) {
2365                         mdd_lfsck_fail(env, lfsck, false, true);
2366                         if (bk->lb_param & LPF_FAILOUT)
2367                                 RETURN(rc);
2368                         else
2369                                 goto checkpoint;
2370                 }
2371
2372                 mdd_lfsck_unpack_ent(ent);
2373                 if (ent->lde_attrs & LUDA_IGNORE)
2374                         goto checkpoint;
2375
2376                 *fid = ent->lde_fid;
2377                 child = mdd_object_find(env, mdd, fid);
2378                 if (child == NULL) {
2379                         goto checkpoint;
2380                 } else if (IS_ERR(child)) {
2381                         mdd_lfsck_fail(env, lfsck, false, true);
2382                         if (bk->lb_param & LPF_FAILOUT)
2383                                 RETURN(PTR_ERR(child));
2384                         else
2385                                 goto checkpoint;
2386                 }
2387
2388                 /* XXX: need more processing for remote object in the future. */
2389                 if (mdd_object_exists(child) && !mdd_object_remote(child))
2390                         rc = mdd_lfsck_exec_dir(env, lfsck, child, ent);
2391                 mdd_object_put(env, child);
2392                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2393                         RETURN(rc);
2394
2395 checkpoint:
2396                 rc = mdd_lfsck_checkpoint(env, lfsck, false);
2397                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2398                         RETURN(rc);
2399
2400                 /* Rate control. */
2401                 mdd_lfsck_control_speed(lfsck);
2402                 if (unlikely(!thread_is_running(thread)))
2403                         RETURN(0);
2404
2405                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL2)) {
2406                         spin_lock(&lfsck->ml_lock);
2407                         thread_set_flags(thread, SVC_STOPPING);
2408                         spin_unlock(&lfsck->ml_lock);
2409                         RETURN(-EINVAL);
2410                 }
2411
2412                 rc = iops->next(env, di);
2413         } while (rc == 0);
2414
2415         if (rc > 0 && !lfsck->ml_oit_over)
2416                 mdd_lfsck_close_dir(env, lfsck);
2417
2418         RETURN(rc);
2419 }
2420
2421 static int mdd_lfsck_oit_engine(const struct lu_env *env,
2422                                 struct md_lfsck *lfsck)
2423 {
2424         struct mdd_thread_info  *info   = mdd_env_info(env);
2425         struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
2426         const struct dt_it_ops  *iops   =
2427                                 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2428         struct dt_it            *di     = lfsck->ml_di_oit;
2429         struct lu_fid           *fid    = &info->mti_fid;
2430         struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
2431         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
2432         int                      rc;
2433         ENTRY;
2434
2435         do {
2436                 struct mdd_object *target;
2437
2438                 if (lfsck->ml_di_dir != NULL) {
2439                         rc = mdd_lfsck_dir_engine(env, lfsck);
2440                         if (rc <= 0)
2441                                 RETURN(rc);
2442                 }
2443
2444                 if (unlikely(lfsck->ml_oit_over))
2445                         RETURN(1);
2446
2447                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) &&
2448                     cfs_fail_val > 0) {
2449                         struct l_wait_info lwi;
2450
2451                         lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2452                                           NULL, NULL);
2453                         l_wait_event(thread->t_ctl_waitq,
2454                                      !thread_is_running(thread),
2455                                      &lwi);
2456                 }
2457
2458                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
2459                         RETURN(0);
2460
2461                 lfsck->ml_new_scanned++;
2462                 rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
2463                 if (rc != 0) {
2464                         mdd_lfsck_fail(env, lfsck, true, true);
2465                         if (bk->lb_param & LPF_FAILOUT)
2466                                 RETURN(rc);
2467                         else
2468                                 goto checkpoint;
2469                 }
2470
2471                 target = mdd_object_find(env, mdd, fid);
2472                 if (target == NULL) {
2473                         goto checkpoint;
2474                 } else if (IS_ERR(target)) {
2475                         mdd_lfsck_fail(env, lfsck, true, true);
2476                         if (bk->lb_param & LPF_FAILOUT)
2477                                 RETURN(PTR_ERR(target));
2478                         else
2479                                 goto checkpoint;
2480                 }
2481
2482                 /* XXX: In fact, low layer otable-based iteration should not
2483                  *      return agent object. But before LU-2646 resolved, we
2484                  *      need more processing for agent object. */
2485                 if (mdd_object_exists(target) && !mdd_object_remote(target))
2486                         rc = mdd_lfsck_exec_oit(env, lfsck, target);
2487                 mdd_object_put(env, target);
2488                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2489                         RETURN(rc);
2490
2491 checkpoint:
2492                 rc = mdd_lfsck_checkpoint(env, lfsck, true);
2493                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2494                         RETURN(rc);
2495
2496                 /* Rate control. */
2497                 mdd_lfsck_control_speed(lfsck);
2498
2499                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) {
2500                         spin_lock(&lfsck->ml_lock);
2501                         thread_set_flags(thread, SVC_STOPPING);
2502                         spin_unlock(&lfsck->ml_lock);
2503                         RETURN(-EINVAL);
2504                 }
2505
2506                 rc = iops->next(env, di);
2507                 if (rc > 0)
2508                         lfsck->ml_oit_over = 1;
2509
2510                 if (unlikely(!thread_is_running(thread)))
2511                         RETURN(0);
2512         } while (rc == 0 || lfsck->ml_di_dir != NULL);
2513
2514         RETURN(rc);
2515 }
2516
2517 static int mdd_lfsck_main(void *args)
2518 {
2519         struct lu_env            env;
2520         struct md_lfsck         *lfsck    = (struct md_lfsck *)args;
2521         struct ptlrpc_thread    *thread   = &lfsck->ml_thread;
2522         struct dt_object        *oit_obj  = lfsck->ml_obj_oit;
2523         const struct dt_it_ops  *oit_iops = &oit_obj->do_index_ops->dio_it;
2524         struct dt_it            *oit_di;
2525         int                      rc;
2526         ENTRY;
2527
2528         cfs_daemonize("lfsck");
2529         rc = lu_env_init(&env, LCT_MD_THREAD);
2530         if (rc != 0) {
2531                 CERROR("%s: LFSCK, fail to init env, rc = %d\n",
2532                        mdd_lfsck2name(lfsck), rc);
2533                 GOTO(noenv, rc);
2534         }
2535
2536         oit_di = oit_iops->init(&env, oit_obj, lfsck->ml_args_oit, BYPASS_CAPA);
2537         if (IS_ERR(oit_di)) {
2538                 rc = PTR_ERR(oit_di);
2539                 CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
2540                        mdd_lfsck2name(lfsck), rc);
2541                 GOTO(fini_env, rc);
2542         }
2543
2544         spin_lock(&lfsck->ml_lock);
2545         lfsck->ml_di_oit = oit_di;
2546         spin_unlock(&lfsck->ml_lock);
2547         rc = mdd_lfsck_prep(&env, lfsck);
2548         if (rc != 0)
2549                 GOTO(fini_oit, rc);
2550
2551         CDEBUG(D_LFSCK, "LFSCK entry: oit_flags = 0x%x, dir_flags = 0x%x, "
2552                "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2553                ", pid = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2554                lfsck->ml_pos_current.lp_oit_cookie,
2555                lfsck->ml_pos_current.lp_dir_cookie,
2556                PFID(&lfsck->ml_pos_current.lp_dir_parent),
2557                cfs_curproc_pid());
2558
2559         spin_lock(&lfsck->ml_lock);
2560         thread_set_flags(thread, SVC_RUNNING);
2561         spin_unlock(&lfsck->ml_lock);
2562         cfs_waitq_broadcast(&thread->t_ctl_waitq);
2563
2564         if (!cfs_list_empty(&lfsck->ml_list_scan) ||
2565             cfs_list_empty(&lfsck->ml_list_double_scan))
2566                 rc = mdd_lfsck_oit_engine(&env, lfsck);
2567         else
2568                 rc = 1;
2569
2570         CDEBUG(D_LFSCK, "LFSCK exit: oit_flags = 0x%x, dir_flags = 0x%x, "
2571                "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2572                ", pid = %d, rc = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2573                lfsck->ml_pos_current.lp_oit_cookie,
2574                lfsck->ml_pos_current.lp_dir_cookie,
2575                PFID(&lfsck->ml_pos_current.lp_dir_parent),
2576                cfs_curproc_pid(), rc);
2577
2578         if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
2579                 oit_iops->put(&env, oit_di);
2580
2581         if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_CRASH))
2582                 rc = mdd_lfsck_post(&env, lfsck, rc);
2583         if (lfsck->ml_di_dir != NULL)
2584                 mdd_lfsck_close_dir(&env, lfsck);
2585
2586 fini_oit:
2587         spin_lock(&lfsck->ml_lock);
2588         lfsck->ml_di_oit = NULL;
2589         spin_unlock(&lfsck->ml_lock);
2590
2591         oit_iops->fini(&env, oit_di);
2592         if (rc == 1) {
2593                 if (!cfs_list_empty(&lfsck->ml_list_double_scan))
2594                         rc = mdd_lfsck_double_scan(&env, lfsck);
2595                 else
2596                         rc = 0;
2597         }
2598
2599         /* XXX: Purge the pinned objects in the future. */
2600
2601 fini_env:
2602         lu_env_fini(&env);
2603
2604 noenv:
2605         spin_lock(&lfsck->ml_lock);
2606         thread_set_flags(thread, SVC_STOPPED);
2607         cfs_waitq_broadcast(&thread->t_ctl_waitq);
2608         spin_unlock(&lfsck->ml_lock);
2609         return rc;
2610 }
2611
2612 /* external interfaces */
2613
2614 int mdd_lfsck_set_speed(const struct lu_env *env, struct md_lfsck *lfsck,
2615                         __u32 limit)
2616 {
2617         int rc;
2618
2619         mutex_lock(&lfsck->ml_mutex);
2620         __mdd_lfsck_set_speed(lfsck, limit);
2621         rc = mdd_lfsck_bookmark_store(env, lfsck);
2622         mutex_unlock(&lfsck->ml_mutex);
2623         return rc;
2624 }
2625
2626 int mdd_lfsck_dump(const struct lu_env *env, struct md_lfsck *lfsck,
2627                    __u16 type, char *buf, int len)
2628 {
2629         struct lfsck_component *com;
2630         int                     rc;
2631
2632         if (!lfsck->ml_initialized)
2633                 return -ENODEV;
2634
2635         com = mdd_lfsck_component_find(lfsck, type);
2636         if (com == NULL)
2637                 return -ENOTSUPP;
2638
2639         rc = com->lc_ops->lfsck_dump(env, com, buf, len);
2640         mdd_lfsck_component_put(env, com);
2641         return rc;
2642 }
2643
2644 int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
2645                     struct lfsck_start *start)
2646 {
2647         struct lfsck_bookmark  *bk     = &lfsck->ml_bookmark_ram;
2648         struct ptlrpc_thread   *thread = &lfsck->ml_thread;
2649         struct lfsck_component *com;
2650         struct l_wait_info      lwi    = { 0 };
2651         bool                    dirty  = false;
2652         int                     rc     = 0;
2653         __u16                   valid  = 0;
2654         __u16                   flags  = 0;
2655         ENTRY;
2656
2657         if (lfsck->ml_obj_oit == NULL)
2658                 RETURN(-ENOTSUPP);
2659
2660         /* start == NULL means auto trigger paused LFSCK. */
2661         if ((start == NULL) &&
2662             (cfs_list_empty(&lfsck->ml_list_scan) ||
2663              OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
2664                 RETURN(0);
2665
2666         mutex_lock(&lfsck->ml_mutex);
2667         spin_lock(&lfsck->ml_lock);
2668         if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
2669                 spin_unlock(&lfsck->ml_lock);
2670                 mutex_unlock(&lfsck->ml_mutex);
2671                 RETURN(-EALREADY);
2672         }
2673
2674         spin_unlock(&lfsck->ml_lock);
2675
2676         lfsck->ml_paused = 0;
2677         lfsck->ml_oit_over = 0;
2678         lfsck->ml_drop_dryrun = 0;
2679         lfsck->ml_new_scanned = 0;
2680
2681         /* For auto trigger. */
2682         if (start == NULL)
2683                 goto trigger;
2684
2685         start->ls_version = bk->lb_version;
2686         if (start->ls_valid & LSV_SPEED_LIMIT) {
2687                 __mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
2688                 dirty = true;
2689         }
2690
2691         if (start->ls_valid & LSV_ERROR_HANDLE) {
2692                 valid |= DOIV_ERROR_HANDLE;
2693                 if (start->ls_flags & LPF_FAILOUT)
2694                         flags |= DOIF_FAILOUT;
2695
2696                 if ((start->ls_flags & LPF_FAILOUT) &&
2697                     !(bk->lb_param & LPF_FAILOUT)) {
2698                         bk->lb_param |= LPF_FAILOUT;
2699                         dirty = true;
2700                 } else if (!(start->ls_flags & LPF_FAILOUT) &&
2701                            (bk->lb_param & LPF_FAILOUT)) {
2702                         bk->lb_param &= ~LPF_FAILOUT;
2703                         dirty = true;
2704                 }
2705         }
2706
2707         if (start->ls_valid & LSV_DRYRUN) {
2708                 if ((start->ls_flags & LPF_DRYRUN) &&
2709                     !(bk->lb_param & LPF_DRYRUN)) {
2710                         bk->lb_param |= LPF_DRYRUN;
2711                         dirty = true;
2712                 } else if (!(start->ls_flags & LPF_DRYRUN) &&
2713                            (bk->lb_param & LPF_DRYRUN)) {
2714                         bk->lb_param &= ~LPF_DRYRUN;
2715                         lfsck->ml_drop_dryrun = 1;
2716                         dirty = true;
2717                 }
2718         }
2719
2720         if (dirty) {
2721                 rc = mdd_lfsck_bookmark_store(env, lfsck);
2722                 if (rc != 0)
2723                         GOTO(out, rc);
2724         }
2725
2726         if (start->ls_flags & LPF_RESET)
2727                 flags |= DOIF_RESET;
2728
2729         if (start->ls_active != 0) {
2730                 struct lfsck_component *next;
2731                 __u16 type = 1;
2732
2733                 if (start->ls_active == LFSCK_TYPES_ALL)
2734                         start->ls_active = LFSCK_TYPES_SUPPORTED;
2735
2736                 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
2737                         start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
2738                         GOTO(out, rc = -ENOTSUPP);
2739                 }
2740
2741                 cfs_list_for_each_entry_safe(com, next,
2742                                              &lfsck->ml_list_scan, lc_link) {
2743                         if (!(com->lc_type & start->ls_active)) {
2744                                 rc = com->lc_ops->lfsck_post(env, com, 0);
2745                                 if (rc != 0)
2746                                         GOTO(out, rc);
2747                         }
2748                 }
2749
2750                 while (start->ls_active != 0) {
2751                         if (type & start->ls_active) {
2752                                 com = __mdd_lfsck_component_find(lfsck, type,
2753                                                         &lfsck->ml_list_idle);
2754                                 if (com != NULL) {
2755                                         /* The component status will be updated
2756                                          * when its prep() is called later by
2757                                          * the LFSCK main engine. */
2758                                         cfs_list_del_init(&com->lc_link);
2759                                         cfs_list_add_tail(&com->lc_link,
2760                                                           &lfsck->ml_list_scan);
2761                                 }
2762                                 start->ls_active &= ~type;
2763                         }
2764                         type <<= 1;
2765                 }
2766         }
2767
2768         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2769                 start->ls_active |= com->lc_type;
2770                 if (flags & DOIF_RESET) {
2771                         rc = com->lc_ops->lfsck_reset(env, com, false);
2772                         if (rc != 0)
2773                                 GOTO(out, rc);
2774                 }
2775         }
2776
2777 trigger:
2778         lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
2779         if (bk->lb_param & LPF_DRYRUN)
2780                 lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN;
2781
2782         if (bk->lb_param & LPF_FAILOUT) {
2783                 valid |= DOIV_ERROR_HANDLE;
2784                 flags |= DOIF_FAILOUT;
2785         }
2786
2787         if (!cfs_list_empty(&lfsck->ml_list_scan))
2788                 flags |= DOIF_OUTUSED;
2789
2790         lfsck->ml_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
2791         thread_set_flags(thread, 0);
2792         rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
2793         if (rc < 0)
2794                 CERROR("%s: cannot start LFSCK thread, rc = %d\n",
2795                        mdd_lfsck2name(lfsck), rc);
2796         else
2797                 l_wait_event(thread->t_ctl_waitq,
2798                              thread_is_running(thread) ||
2799                              thread_is_stopped(thread),
2800                              &lwi);
2801
2802         GOTO(out, rc = 0);
2803
2804 out:
2805         mutex_unlock(&lfsck->ml_mutex);
2806         return (rc < 0 ? rc : 0);
2807 }
2808
2809 int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck,
2810                    bool pause)
2811 {
2812         struct ptlrpc_thread *thread = &lfsck->ml_thread;
2813         struct l_wait_info    lwi    = { 0 };
2814         ENTRY;
2815
2816         if (!lfsck->ml_initialized)
2817                 RETURN(0);
2818
2819         mutex_lock(&lfsck->ml_mutex);
2820         spin_lock(&lfsck->ml_lock);
2821         if (thread_is_init(thread) || thread_is_stopped(thread)) {
2822                 spin_unlock(&lfsck->ml_lock);
2823                 mutex_unlock(&lfsck->ml_mutex);
2824                 RETURN(-EALREADY);
2825         }
2826
2827         if (pause)
2828                 lfsck->ml_paused = 1;
2829         thread_set_flags(thread, SVC_STOPPING);
2830         /* The LFSCK thread may be sleeping on low layer wait queue,
2831          * wake it up. */
2832         if (likely(lfsck->ml_di_oit != NULL))
2833                 lfsck->ml_obj_oit->do_index_ops->dio_it.put(env,
2834                                                             lfsck->ml_di_oit);
2835         spin_unlock(&lfsck->ml_lock);
2836
2837         cfs_waitq_broadcast(&thread->t_ctl_waitq);
2838         l_wait_event(thread->t_ctl_waitq,
2839                      thread_is_stopped(thread),
2840                      &lwi);
2841         mutex_unlock(&lfsck->ml_mutex);
2842
2843         RETURN(0);
2844 }
2845
2846 static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
2847                                             .f_oid = OTABLE_IT_OID,
2848                                             .f_ver = 0 };
2849
2850 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
2851 {
2852         struct md_lfsck         *lfsck = &mdd->mdd_lfsck;
2853         struct dt_object        *obj;
2854         struct lu_fid            fid;
2855         int                      rc;
2856
2857         ENTRY;
2858
2859         LASSERT(!lfsck->ml_initialized);
2860
2861         lfsck->ml_initialized = 1;
2862         mutex_init(&lfsck->ml_mutex);
2863         spin_lock_init(&lfsck->ml_lock);
2864         CFS_INIT_LIST_HEAD(&lfsck->ml_list_scan);
2865         CFS_INIT_LIST_HEAD(&lfsck->ml_list_dir);
2866         CFS_INIT_LIST_HEAD(&lfsck->ml_list_double_scan);
2867         CFS_INIT_LIST_HEAD(&lfsck->ml_list_idle);
2868         cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
2869
2870         obj = dt_locate(env, mdd->mdd_bottom, &lfsck_it_fid);
2871         if (IS_ERR(obj))
2872                 RETURN(PTR_ERR(obj));
2873
2874         lfsck->ml_obj_oit = obj;
2875         rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
2876         if (rc != 0) {
2877                 if (rc == -ENOTSUPP)
2878                         RETURN(0);
2879                 GOTO(out, rc);
2880         }
2881
2882         /* LFSCK bookmark */
2883         fid_zero(&fid);
2884         rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid,
2885                                    lfsck_bookmark_name,
2886                                    S_IFREG | S_IRUGO | S_IWUSR, &fid);
2887         if (rc < 0)
2888                 GOTO(out, rc);
2889
2890         obj = dt_locate(env, mdd->mdd_bottom, &fid);
2891         if (IS_ERR(obj))
2892                 GOTO(out, rc = PTR_ERR(obj));
2893
2894         LASSERT(lu_object_exists(&obj->do_lu));
2895         lfsck->ml_bookmark_obj = obj;
2896
2897         rc = mdd_lfsck_bookmark_load(env, lfsck);
2898         if (rc == -ENODATA)
2899                 rc = mdd_lfsck_bookmark_init(env, lfsck);
2900         if (rc != 0)
2901                 GOTO(out, rc);
2902
2903         rc = mdd_lfsck_namespace_setup(env, lfsck);
2904         if (rc < 0)
2905                 GOTO(out, rc);
2906         /* XXX: LFSCK components initialization to be added here. */
2907         RETURN(0);
2908 out:
2909         lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
2910         lfsck->ml_obj_oit = NULL;
2911         return 0;
2912 }
2913
2914 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
2915 {
2916         struct md_lfsck         *lfsck  = &mdd->mdd_lfsck;
2917         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
2918         struct lfsck_component  *com;
2919
2920         if (!lfsck->ml_initialized)
2921                 return;
2922
2923         LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
2924
2925         if (lfsck->ml_obj_oit != NULL) {
2926                 lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
2927                 lfsck->ml_obj_oit = NULL;
2928         }
2929
2930         LASSERT(lfsck->ml_obj_dir == NULL);
2931
2932         if (lfsck->ml_bookmark_obj != NULL) {
2933                 lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
2934                 lfsck->ml_bookmark_obj = NULL;
2935         }
2936
2937         while (!cfs_list_empty(&lfsck->ml_list_scan)) {
2938                 com = cfs_list_entry(lfsck->ml_list_scan.next,
2939                                      struct lfsck_component,
2940                                      lc_link);
2941                 mdd_lfsck_component_cleanup(env, com);
2942         }
2943
2944         LASSERT(cfs_list_empty(&lfsck->ml_list_dir));
2945
2946         while (!cfs_list_empty(&lfsck->ml_list_double_scan)) {
2947                 com = cfs_list_entry(lfsck->ml_list_double_scan.next,
2948                                      struct lfsck_component,
2949                                      lc_link);
2950                 mdd_lfsck_component_cleanup(env, com);
2951         }
2952
2953         while (!cfs_list_empty(&lfsck->ml_list_idle)) {
2954                 com = cfs_list_entry(lfsck->ml_list_idle.next,
2955                                      struct lfsck_component,
2956                                      lc_link);
2957                 mdd_lfsck_component_cleanup(env, com);
2958         }
2959 }