Whamcloud - gitweb
78228ab35d701759d53b85ec360630da3ce76c52
[fs/lustre-release.git] / lustre / mdd / mdd_lfsck.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, Intel Corporation.
24  */
25 /*
26  * lustre/mdd/mdd_lfsck.c
27  *
28  * Top-level entry points into mdd module
29  *
30  * LFSCK controller, which scans the whole device through low layer
31  * iteration APIs, drives all lfsck compeonents, controls the speed.
32  *
33  * Author: Fan Yong <yong.fan@whamcloud.com>
34  */
35
36 #ifndef EXPORT_SYMTAB
37 # define EXPORT_SYMTAB
38 #endif
39 #define DEBUG_SUBSYSTEM S_MDS
40
41 #include <lustre/lustre_idl.h>
42 #include <lustre_fid.h>
43 #include <obd_support.h>
44
45 #include "mdd_internal.h"
46 #include "mdd_lfsck.h"
47
48 #define HALF_SEC                        (CFS_HZ >> 1)
49 #define LFSCK_CHECKPOINT_INTERVAL       60
50 #define MDS_DIR_DUMMY_START             0xffffffffffffffffULL
51
52 #define LFSCK_NAMEENTRY_DEAD            1 /* The object has been unlinked. */
53 #define LFSCK_NAMEENTRY_REMOVED         2 /* The entry has been removed. */
54 #define LFSCK_NAMEENTRY_RECREATED       3 /* The entry has been recreated. */
55
56 const char lfsck_bookmark_name[] = "lfsck_bookmark";
57 const char lfsck_namespace_name[] = "lfsck_namespace";
58
59 static const char *lfsck_status_names[] = {
60         "init",
61         "scanning-phase1",
62         "scanning-phase2",
63         "completed",
64         "failed",
65         "stopped",
66         "paused",
67         "crashed",
68         NULL
69 };
70
71 static const char *lfsck_flags_names[] = {
72         "scanned-once",
73         "inconsistent",
74         "upgrade",
75         NULL
76 };
77
78 static const char *lfsck_param_names[] = {
79         "failout",
80         "dryrun",
81         NULL
82 };
83
84 /* misc functions */
85
86 static inline struct mdd_device *mdd_lfsck2mdd(struct md_lfsck *lfsck)
87 {
88         return container_of0(lfsck, struct mdd_device, mdd_lfsck);
89 }
90
91 static inline char *mdd_lfsck2name(struct md_lfsck *lfsck)
92 {
93         struct mdd_device *mdd = mdd_lfsck2mdd(lfsck);
94
95         return mdd2obd_dev(mdd)->obd_name;
96 }
97
98 static inline void mdd_lfsck_component_get(struct lfsck_component *com)
99 {
100         atomic_inc(&com->lc_ref);
101 }
102
103 static inline void mdd_lfsck_component_put(const struct lu_env *env,
104                                            struct lfsck_component *com)
105 {
106         if (atomic_dec_and_test(&com->lc_ref)) {
107                 if (com->lc_obj != NULL)
108                         lu_object_put(env, &com->lc_obj->do_lu);
109                 if (com->lc_file_ram != NULL)
110                         OBD_FREE(com->lc_file_ram, com->lc_file_size);
111                 if (com->lc_file_disk != NULL)
112                         OBD_FREE(com->lc_file_disk, com->lc_file_size);
113                 OBD_FREE_PTR(com);
114         }
115 }
116
117 static inline struct lfsck_component *
118 __mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type, cfs_list_t *list)
119 {
120         struct lfsck_component *com;
121
122         cfs_list_for_each_entry(com, list, lc_link) {
123                 if (com->lc_type == type)
124                         return com;
125         }
126         return NULL;
127 }
128
129 static struct lfsck_component *
130 mdd_lfsck_component_find(struct md_lfsck *lfsck, __u16 type)
131 {
132         struct lfsck_component *com;
133
134         spin_lock(&lfsck->ml_lock);
135         com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_scan);
136         if (com != NULL)
137                 goto unlock;
138
139         com = __mdd_lfsck_component_find(lfsck, type,
140                                          &lfsck->ml_list_double_scan);
141         if (com != NULL)
142                 goto unlock;
143
144         com = __mdd_lfsck_component_find(lfsck, type, &lfsck->ml_list_idle);
145
146 unlock:
147         if (com != NULL)
148                 mdd_lfsck_component_get(com);
149         spin_unlock(&lfsck->ml_lock);
150         return com;
151 }
152
153 static void mdd_lfsck_component_cleanup(const struct lu_env *env,
154                                         struct lfsck_component *com)
155 {
156         if (!cfs_list_empty(&com->lc_link))
157                 cfs_list_del_init(&com->lc_link);
158         if (!cfs_list_empty(&com->lc_link_dir))
159                 cfs_list_del_init(&com->lc_link_dir);
160
161         mdd_lfsck_component_put(env, com);
162 }
163
164 static int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
165                            const char *prefix)
166 {
167         int save = *len;
168         int flag;
169         int rc;
170         int i;
171
172         rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
173         if (rc <= 0)
174                 return -ENOSPC;
175
176         *buf += rc;
177         *len -= rc;
178         for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
179                 if (flag & bits) {
180                         bits &= ~flag;
181                         rc = snprintf(*buf, *len, "%s%c", names[i],
182                                       bits != 0 ? ',' : '\n');
183                         if (rc <= 0)
184                                 return -ENOSPC;
185
186                         *buf += rc;
187                         *len -= rc;
188                 }
189         }
190         return save - *len;
191 }
192
193 static int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
194 {
195         int rc;
196
197         if (time != 0)
198                 rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
199                               cfs_time_current_sec() - time);
200         else
201                 rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
202         if (rc <= 0)
203                 return -ENOSPC;
204
205         *buf += rc;
206         *len -= rc;
207         return rc;
208 }
209
210 static int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
211                           const char *prefix)
212 {
213         int rc;
214
215         if (fid_is_zero(&pos->lp_dir_parent)) {
216                 if (pos->lp_oit_cookie == 0)
217                         rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
218                                       prefix);
219                 else
220                         rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
221                                       prefix, pos->lp_oit_cookie);
222         } else {
223                 rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
224                               prefix, pos->lp_oit_cookie,
225                               PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
226         }
227         if (rc <= 0)
228                 return -ENOSPC;
229
230         *buf += rc;
231         *len -= rc;
232         return rc;
233 }
234
235 static void mdd_lfsck_pos_fill(const struct lu_env *env, struct md_lfsck *lfsck,
236                                struct lfsck_position *pos, bool oit_processed,
237                                bool dir_processed)
238 {
239         const struct dt_it_ops *iops = &lfsck->ml_obj_oit->do_index_ops->dio_it;
240
241         spin_lock(&lfsck->ml_lock);
242         if (unlikely(lfsck->ml_di_oit == NULL)) {
243                 spin_unlock(&lfsck->ml_lock);
244                 memset(pos, 0, sizeof(*pos));
245                 return;
246         }
247
248         pos->lp_oit_cookie = iops->store(env, lfsck->ml_di_oit);
249
250         LASSERT(pos->lp_oit_cookie > 0);
251
252         if (!oit_processed)
253                 pos->lp_oit_cookie--;
254
255         if (lfsck->ml_di_dir != NULL) {
256                 struct dt_object *dto = lfsck->ml_obj_dir;
257
258                 pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
259                 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
260                                                         lfsck->ml_di_dir);
261
262                 LASSERT(pos->lp_dir_cookie != MDS_DIR_DUMMY_START);
263
264                 if (pos->lp_dir_cookie == MDS_DIR_END_OFF)
265                         LASSERT(dir_processed);
266
267                 /* For the dir which just to be processed,
268                  * lp_dir_cookie will become MDS_DIR_DUMMY_START,
269                  * which can be correctly handled by mdd_lfsck_prep. */
270                 if (!dir_processed)
271                         pos->lp_dir_cookie--;
272         } else {
273                 fid_zero(&pos->lp_dir_parent);
274                 pos->lp_dir_cookie = 0;
275         }
276         spin_unlock(&lfsck->ml_lock);
277 }
278
279 static inline void mdd_lfsck_pos_set_zero(struct lfsck_position *pos)
280 {
281         memset(pos, 0, sizeof(*pos));
282 }
283
284 static inline int mdd_lfsck_pos_is_zero(const struct lfsck_position *pos)
285 {
286         return pos->lp_oit_cookie == 0 && fid_is_zero(&pos->lp_dir_parent);
287 }
288
289 static inline int mdd_lfsck_pos_is_eq(const struct lfsck_position *pos1,
290                                       const struct lfsck_position *pos2)
291 {
292         if (pos1->lp_oit_cookie < pos2->lp_oit_cookie)
293                 return -1;
294
295         if (pos1->lp_oit_cookie > pos2->lp_oit_cookie)
296                 return 1;
297
298         if (fid_is_zero(&pos1->lp_dir_parent) &&
299             !fid_is_zero(&pos2->lp_dir_parent))
300                 return -1;
301
302         if (!fid_is_zero(&pos1->lp_dir_parent) &&
303             fid_is_zero(&pos2->lp_dir_parent))
304                 return 1;
305
306         if (fid_is_zero(&pos1->lp_dir_parent) &&
307             fid_is_zero(&pos2->lp_dir_parent))
308                 return 0;
309
310         LASSERT(lu_fid_eq(&pos1->lp_dir_parent, &pos2->lp_dir_parent));
311
312         if (pos1->lp_dir_cookie < pos2->lp_dir_cookie)
313                 return -1;
314
315         if (pos1->lp_dir_cookie > pos2->lp_dir_cookie)
316                 return 1;
317
318         return 0;
319 }
320
321 static void mdd_lfsck_close_dir(const struct lu_env *env,
322                                 struct md_lfsck *lfsck)
323 {
324         struct dt_object        *dir_obj  = lfsck->ml_obj_dir;
325         const struct dt_it_ops  *dir_iops = &dir_obj->do_index_ops->dio_it;
326         struct dt_it            *dir_di   = lfsck->ml_di_dir;
327
328         spin_lock(&lfsck->ml_lock);
329         lfsck->ml_di_dir = NULL;
330         spin_unlock(&lfsck->ml_lock);
331
332         dir_iops->put(env, dir_di);
333         dir_iops->fini(env, dir_di);
334         lfsck->ml_obj_dir = NULL;
335         lu_object_put(env, &dir_obj->do_lu);
336 }
337
338 static void __mdd_lfsck_set_speed(struct md_lfsck *lfsck, __u32 limit)
339 {
340         lfsck->ml_bookmark_ram.lb_speed_limit = limit;
341         if (limit != LFSCK_SPEED_NO_LIMIT) {
342                 if (limit > CFS_HZ) {
343                         lfsck->ml_sleep_rate = limit / CFS_HZ;
344                         lfsck->ml_sleep_jif = 1;
345                 } else {
346                         lfsck->ml_sleep_rate = 1;
347                         lfsck->ml_sleep_jif = CFS_HZ / limit;
348                 }
349         } else {
350                 lfsck->ml_sleep_jif = 0;
351                 lfsck->ml_sleep_rate = 0;
352         }
353 }
354
355 static void mdd_lfsck_control_speed(struct md_lfsck *lfsck)
356 {
357         struct ptlrpc_thread *thread = &lfsck->ml_thread;
358         struct l_wait_info    lwi;
359
360         if (lfsck->ml_sleep_jif > 0 &&
361             lfsck->ml_new_scanned >= lfsck->ml_sleep_rate) {
362                 spin_lock(&lfsck->ml_lock);
363                 if (likely(lfsck->ml_sleep_jif > 0 &&
364                            lfsck->ml_new_scanned >= lfsck->ml_sleep_rate)) {
365                         lwi = LWI_TIMEOUT_INTR(lfsck->ml_sleep_jif, NULL,
366                                                LWI_ON_SIGNAL_NOOP, NULL);
367                         spin_unlock(&lfsck->ml_lock);
368
369                         l_wait_event(thread->t_ctl_waitq,
370                                      !thread_is_running(thread),
371                                      &lwi);
372                         lfsck->ml_new_scanned = 0;
373                 } else {
374                         spin_unlock(&lfsck->ml_lock);
375                 }
376         }
377 }
378
379 /* lfsck_bookmark file ops */
380
381 static void inline mdd_lfsck_bookmark_to_cpu(struct lfsck_bookmark *des,
382                                              struct lfsck_bookmark *src)
383 {
384         des->lb_magic = le32_to_cpu(src->lb_magic);
385         des->lb_version = le16_to_cpu(src->lb_version);
386         des->lb_param = le16_to_cpu(src->lb_param);
387         des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit);
388 }
389
390 static void inline mdd_lfsck_bookmark_to_le(struct lfsck_bookmark *des,
391                                             struct lfsck_bookmark *src)
392 {
393         des->lb_magic = cpu_to_le32(src->lb_magic);
394         des->lb_version = cpu_to_le16(src->lb_version);
395         des->lb_param = cpu_to_le16(src->lb_param);
396         des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit);
397 }
398
399 static int mdd_lfsck_bookmark_load(const struct lu_env *env,
400                                    struct md_lfsck *lfsck)
401 {
402         loff_t pos = 0;
403         int    len = sizeof(struct lfsck_bookmark);
404         int    rc;
405
406         rc = dt_record_read(env, lfsck->ml_bookmark_obj,
407                             mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
408                             &pos);
409         if (rc == 0) {
410                 struct lfsck_bookmark *bm = &lfsck->ml_bookmark_ram;
411
412                 mdd_lfsck_bookmark_to_cpu(bm, &lfsck->ml_bookmark_disk);
413                 if (bm->lb_magic != LFSCK_BOOKMARK_MAGIC) {
414                         CWARN("%.16s: invalid lfsck_bookmark magic "
415                               "0x%x != 0x%x\n", mdd_lfsck2name(lfsck),
416                               bm->lb_magic, LFSCK_BOOKMARK_MAGIC);
417                         /* Process it as new lfsck_bookmark. */
418                         rc = -ENODATA;
419                 }
420         } else {
421                 if (rc == -EFAULT && pos == 0)
422                         /* return -ENODATA for empty lfsck_bookmark. */
423                         rc = -ENODATA;
424                 else
425                         CERROR("%.16s: fail to load lfsck_bookmark, "
426                                "expected = %d, rc = %d\n",
427                                mdd_lfsck2name(lfsck), len, rc);
428         }
429         return rc;
430 }
431
432 static int mdd_lfsck_bookmark_store(const struct lu_env *env,
433                                     struct md_lfsck *lfsck)
434 {
435         struct mdd_device *mdd    = mdd_lfsck2mdd(lfsck);
436         struct thandle    *handle;
437         struct dt_object  *obj    = lfsck->ml_bookmark_obj;
438         loff_t             pos    = 0;
439         int                len    = sizeof(struct lfsck_bookmark);
440         int                rc;
441         ENTRY;
442
443         mdd_lfsck_bookmark_to_le(&lfsck->ml_bookmark_disk,
444                                  &lfsck->ml_bookmark_ram);
445         handle = dt_trans_create(env, mdd->mdd_bottom);
446         if (IS_ERR(handle)) {
447                 rc = PTR_ERR(handle);
448                 CERROR("%.16s: fail to create trans for storing "
449                        "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
450                 RETURN(rc);
451         }
452
453         rc = dt_declare_record_write(env, obj, len, 0, handle);
454         if (rc != 0) {
455                 CERROR("%.16s: fail to declare trans for storing "
456                        "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
457                 GOTO(out, rc);
458         }
459
460         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
461         if (rc != 0) {
462                 CERROR("%.16s: fail to start trans for storing "
463                        "lfsck_bookmark: %d\n,", mdd_lfsck2name(lfsck), rc);
464                 GOTO(out, rc);
465         }
466
467         rc = dt_record_write(env, obj,
468                              mdd_buf_get(env, &lfsck->ml_bookmark_disk, len),
469                              &pos, handle);
470         if (rc != 0)
471                 CERROR("%.16s: fail to store lfsck_bookmark, expected = %d, "
472                        "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
473
474         GOTO(out, rc);
475
476 out:
477         dt_trans_stop(env, mdd->mdd_bottom, handle);
478         return rc;
479 }
480
481 static int mdd_lfsck_bookmark_init(const struct lu_env *env,
482                                    struct md_lfsck *lfsck)
483 {
484         struct lfsck_bookmark *mb = &lfsck->ml_bookmark_ram;
485         int rc;
486
487         memset(mb, 0, sizeof(mb));
488         mb->lb_magic = LFSCK_BOOKMARK_MAGIC;
489         mb->lb_version = LFSCK_VERSION_V2;
490         mutex_lock(&lfsck->ml_mutex);
491         rc = mdd_lfsck_bookmark_store(env, lfsck);
492         mutex_unlock(&lfsck->ml_mutex);
493         return rc;
494 }
495
496 /* lfsck_namespace file ops */
497
498 static void inline mdd_lfsck_position_to_cpu(struct lfsck_position *des,
499                                              struct lfsck_position *src)
500 {
501         des->lp_oit_cookie = le64_to_cpu(src->lp_oit_cookie);
502         fid_le_to_cpu(&des->lp_dir_parent, &src->lp_dir_parent);
503         des->lp_dir_cookie = le64_to_cpu(src->lp_dir_cookie);
504 }
505
506 static void inline mdd_lfsck_position_to_le(struct lfsck_position *des,
507                                              struct lfsck_position *src)
508 {
509         des->lp_oit_cookie = cpu_to_le64(src->lp_oit_cookie);
510         fid_cpu_to_le(&des->lp_dir_parent, &src->lp_dir_parent);
511         des->lp_dir_cookie = cpu_to_le64(src->lp_dir_cookie);
512 }
513
514 static void inline mdd_lfsck_namespace_to_cpu(struct lfsck_namespace *des,
515                                               struct lfsck_namespace *src)
516 {
517         des->ln_magic = le32_to_cpu(src->ln_magic);
518         des->ln_status = le32_to_cpu(src->ln_status);
519         des->ln_flags = le32_to_cpu(src->ln_flags);
520         des->ln_success_count = le32_to_cpu(src->ln_success_count);
521         des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
522         des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
523         des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
524         des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
525         des->ln_time_last_checkpoint =
526                                 le64_to_cpu(src->ln_time_last_checkpoint);
527         mdd_lfsck_position_to_cpu(&des->ln_pos_latest_start,
528                                   &src->ln_pos_latest_start);
529         mdd_lfsck_position_to_cpu(&des->ln_pos_last_checkpoint,
530                                   &src->ln_pos_last_checkpoint);
531         mdd_lfsck_position_to_cpu(&des->ln_pos_first_inconsistent,
532                                   &src->ln_pos_first_inconsistent);
533         des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
534         des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
535         des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
536         des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
537         des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
538         des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
539         des->ln_objs_repaired_phase2 =
540                                 le64_to_cpu(src->ln_objs_repaired_phase2);
541         des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
542         des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
543         des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
544         fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
545                       &src->ln_fid_latest_scanned_phase2);
546 }
547
548 static void inline mdd_lfsck_namespace_to_le(struct lfsck_namespace *des,
549                                              struct lfsck_namespace *src)
550 {
551         des->ln_magic = cpu_to_le32(src->ln_magic);
552         des->ln_status = cpu_to_le32(src->ln_status);
553         des->ln_flags = cpu_to_le32(src->ln_flags);
554         des->ln_success_count = cpu_to_le32(src->ln_success_count);
555         des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
556         des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
557         des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
558         des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
559         des->ln_time_last_checkpoint =
560                                 cpu_to_le64(src->ln_time_last_checkpoint);
561         mdd_lfsck_position_to_le(&des->ln_pos_latest_start,
562                                  &src->ln_pos_latest_start);
563         mdd_lfsck_position_to_le(&des->ln_pos_last_checkpoint,
564                                  &src->ln_pos_last_checkpoint);
565         mdd_lfsck_position_to_le(&des->ln_pos_first_inconsistent,
566                                  &src->ln_pos_first_inconsistent);
567         des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
568         des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
569         des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
570         des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
571         des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
572         des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
573         des->ln_objs_repaired_phase2 =
574                                 cpu_to_le64(src->ln_objs_repaired_phase2);
575         des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
576         des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
577         des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
578         fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
579                       &src->ln_fid_latest_scanned_phase2);
580 }
581
582 /**
583  * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
584  * \retval 0: succeed.
585  * \retval -ve: failed cases.
586  */
587 static int mdd_lfsck_namespace_load(const struct lu_env *env,
588                                     struct lfsck_component *com)
589 {
590         int len = com->lc_file_size;
591         int rc;
592
593         rc = dt_xattr_get(env, com->lc_obj,
594                           mdd_buf_get(env, com->lc_file_disk, len),
595                           XATTR_NAME_LFSCK_NAMESPACE, BYPASS_CAPA);
596         if (rc == len) {
597                 struct lfsck_namespace *ns = com->lc_file_ram;
598
599                 mdd_lfsck_namespace_to_cpu(ns,
600                                 (struct lfsck_namespace *)com->lc_file_disk);
601                 if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
602                         CWARN("%.16s: invalid lfsck_namespace magic "
603                               "0x%x != 0x%x\n",
604                               mdd_lfsck2name(com->lc_lfsck),
605                               ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
606                         rc = 1;
607                 } else {
608                         rc = 0;
609                 }
610         } else if (rc != -ENODATA) {
611                 CERROR("%.16s: fail to load lfsck_namespace, expected = %d, "
612                        "rc = %d\n", mdd_lfsck2name(com->lc_lfsck), len, rc);
613                 if (rc >= 0)
614                         rc = 1;
615         }
616         return rc;
617 }
618
619 static int mdd_lfsck_namespace_store(const struct lu_env *env,
620                                      struct lfsck_component *com, bool init)
621 {
622         struct dt_object  *obj    = com->lc_obj;
623         struct md_lfsck   *lfsck  = com->lc_lfsck;
624         struct mdd_device *mdd    = mdd_lfsck2mdd(lfsck);
625         struct thandle    *handle;
626         int                len    = com->lc_file_size;
627         int                rc;
628         ENTRY;
629
630         mdd_lfsck_namespace_to_le((struct lfsck_namespace *)com->lc_file_disk,
631                                   (struct lfsck_namespace *)com->lc_file_ram);
632         handle = dt_trans_create(env, mdd->mdd_bottom);
633         if (IS_ERR(handle)) {
634                 rc = PTR_ERR(handle);
635                 CERROR("%.16s: fail to create trans for storing "
636                        "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
637                 RETURN(rc);
638         }
639
640         rc = dt_declare_xattr_set(env, obj,
641                                   mdd_buf_get(env, com->lc_file_disk, len),
642                                   XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
643         if (rc != 0) {
644                 CERROR("%.16s: fail to declare trans for storing "
645                        "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
646                 GOTO(out, rc);
647         }
648
649         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
650         if (rc != 0) {
651                 CERROR("%.16s: fail to start trans for storing "
652                        "lfsck_namespace: %d\n,", mdd_lfsck2name(lfsck), rc);
653                 GOTO(out, rc);
654         }
655
656         rc = dt_xattr_set(env, obj,
657                           mdd_buf_get(env, com->lc_file_disk, len),
658                           XATTR_NAME_LFSCK_NAMESPACE,
659                           init ? LU_XATTR_CREATE : LU_XATTR_REPLACE,
660                           handle, BYPASS_CAPA);
661         if (rc != 0)
662                 CERROR("%.16s: fail to store lfsck_namespace, len = %d, "
663                        "rc = %d\n", mdd_lfsck2name(lfsck), len, rc);
664
665         GOTO(out, rc);
666
667 out:
668         dt_trans_stop(env, mdd->mdd_bottom, handle);
669         return rc;
670 }
671
672 static int mdd_lfsck_namespace_init(const struct lu_env *env,
673                                     struct lfsck_component *com)
674 {
675         struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
676         int rc;
677
678         memset(ns, 0, sizeof(*ns));
679         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
680         ns->ln_status = LS_INIT;
681         down_write(&com->lc_sem);
682         rc = mdd_lfsck_namespace_store(env, com, true);
683         up_write(&com->lc_sem);
684         return rc;
685 }
686
687 static int mdd_declare_lfsck_namespace_unlink(const struct lu_env *env,
688                                               struct mdd_device *mdd,
689                                               struct dt_object *p,
690                                               struct dt_object *c,
691                                               const char *name,
692                                               struct thandle *handle)
693 {
694         int rc;
695
696         rc = dt_declare_delete(env, p, (const struct dt_key *)name, handle);
697         if (rc != 0)
698                 return rc;
699
700         rc = dt_declare_ref_del(env, c, handle);
701         if (rc != 0)
702                 return rc;
703
704         rc = dt_declare_destroy(env, c, handle);
705         return rc;
706 }
707
708 static int mdd_lfsck_namespace_unlink(const struct lu_env *env,
709                                       struct mdd_device *mdd,
710                                       struct lfsck_component *com)
711 {
712         struct mdd_thread_info  *info   = mdd_env_info(env);
713         struct lu_fid           *fid    = &info->mti_fid;
714         struct dt_object        *child  = com->lc_obj;
715         struct dt_object        *parent;
716         struct thandle          *handle;
717         int                      rc;
718         ENTRY;
719
720         parent = dt_store_resolve(env, mdd->mdd_bottom, "", fid);
721         if (IS_ERR(parent))
722                 RETURN(rc = PTR_ERR(parent));
723
724         if (dt_try_as_dir(env, parent))
725                 GOTO(out, rc = -ENOTDIR);
726
727         handle = dt_trans_create(env, mdd->mdd_bottom);
728         if (IS_ERR(handle))
729                 GOTO(out, rc = PTR_ERR(handle));
730
731         rc = mdd_declare_lfsck_namespace_unlink(env, mdd, parent, child,
732                                                 lfsck_namespace_name, handle);
733         if (rc != 0)
734                 GOTO(stop, rc);
735
736         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
737         if (rc != 0)
738                 GOTO(stop, rc);
739
740         rc = dt_delete(env, parent, (struct dt_key *)lfsck_namespace_name,
741                        handle, BYPASS_CAPA);
742         if (rc != 0)
743                 GOTO(stop, rc);
744
745         rc = child->do_ops->do_ref_del(env, child, handle);
746         if (rc != 0) {
747                 lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID);
748                 rc = dt_insert(env, parent,
749                                (const struct dt_rec*)fid,
750                                (const struct dt_key *)lfsck_namespace_name,
751                                handle, BYPASS_CAPA, 1);
752
753                 GOTO(stop, rc);
754         }
755
756
757         rc = dt_destroy(env, child, handle);
758         if (rc == 0) {
759                 lu_object_put(env, &child->do_lu);
760                 com->lc_obj = NULL;
761         }
762
763         GOTO(stop, rc);
764
765 stop:
766         dt_trans_stop(env, mdd->mdd_bottom, handle);
767
768 out:
769         lu_object_put(env, &parent->do_lu);
770         return rc;
771 }
772
773 static int mdd_lfsck_namespace_lookup(const struct lu_env *env,
774                                       struct lfsck_component *com,
775                                       const struct lu_fid *fid,
776                                       __u8 *flags)
777 {
778         struct lu_fid *key = &mdd_env_info(env)->mti_fid;
779         int            rc;
780
781         fid_cpu_to_be(key, fid);
782         rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
783                        (const struct dt_key *)key, BYPASS_CAPA);
784         return rc;
785 }
786
787 static int mdd_lfsck_namespace_update(const struct lu_env *env,
788                                       struct lfsck_component *com,
789                                       const struct lu_fid *fid,
790                                       __u8 flags, bool force)
791 {
792         struct mdd_device *mdd    = mdd_lfsck2mdd(com->lc_lfsck);
793         struct lu_fid     *key    = &mdd_env_info(env)->mti_fid;
794         struct thandle    *handle;
795         struct dt_object *obj     = com->lc_obj;
796         int               rc;
797         bool              exist   = false;
798         __u8              tf;
799         ENTRY;
800
801         rc = mdd_lfsck_namespace_lookup(env, com, fid, &tf);
802         if (rc != 0 && rc != -ENOENT)
803                 RETURN(rc);
804
805         if (rc == 0) {
806                 if (!force || flags == tf)
807                         RETURN(0);
808
809                 exist = true;
810                 handle = dt_trans_create(env, mdd->mdd_bottom);
811                 if (IS_ERR(handle))
812                         RETURN(PTR_ERR(handle));
813
814                 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
815                                        handle);
816                 if (rc != 0)
817                         GOTO(out, rc);
818         } else {
819                 handle = dt_trans_create(env, mdd->mdd_bottom);
820                 if (IS_ERR(handle))
821                         RETURN(PTR_ERR(handle));
822         }
823
824         rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
825                                (const struct dt_key *)fid, handle);
826         if (rc != 0)
827                 GOTO(out, rc);
828
829         rc = dt_trans_start_local(env, mdd->mdd_bottom, handle);
830         if (rc != 0)
831                 GOTO(out, rc);
832
833         fid_cpu_to_be(key, fid);
834         if (exist) {
835                 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
836                                BYPASS_CAPA);
837                 if (rc != 0) {
838                         CERROR("%s: fail to insert "DFID", rc = %d\n",
839                                mdd_lfsck2name(com->lc_lfsck), PFID(fid), rc);
840                         GOTO(out, rc);
841                 }
842         }
843
844         rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
845                        (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
846
847         GOTO(out, rc);
848
849 out:
850         dt_trans_stop(env, mdd->mdd_bottom, handle);
851         return rc;
852 }
853
854 /* namespace APIs */
855
856 static int mdd_lfsck_namespace_reset(const struct lu_env *env,
857                                      struct lfsck_component *com, bool init)
858 {
859         struct mdd_thread_info  *info = mdd_env_info(env);
860         struct lu_fid           *fid  = &info->mti_fid;
861         struct lfsck_namespace  *ns   = (struct lfsck_namespace *)com->lc_file_ram;
862         struct mdd_device       *mdd  = mdd_lfsck2mdd(com->lc_lfsck);
863         struct md_object        *mdo;
864         struct dt_object        *dto;
865         int                      rc;
866         ENTRY;
867
868         down_write(&com->lc_sem);
869         if (init) {
870                 memset(ns, 0, sizeof(*ns));
871         } else {
872                 __u32 count = ns->ln_success_count;
873                 __u64 last_time = ns->ln_time_last_complete;
874
875                 memset(ns, 0, sizeof(*ns));
876                 ns->ln_success_count = count;
877                 ns->ln_time_last_complete = last_time;
878         }
879         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
880         ns->ln_status = LS_INIT;
881
882         rc = mdd_lfsck_namespace_unlink(env, mdd, com);
883         if (rc != 0)
884                 GOTO(out, rc);
885
886         lu_local_obj_fid(fid, LFSCK_NAMESPACE_OID);
887         mdo = llo_store_create_index(env, &mdd->mdd_md_dev, mdd->mdd_bottom, "",
888                                      lfsck_namespace_name, fid,
889                                      &dt_lfsck_features);
890         if (IS_ERR(mdo))
891                 GOTO(out, rc = PTR_ERR(mdo));
892
893         lu_object_put(env, &mdo->mo_lu);
894         dto = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name, fid);
895         if (IS_ERR(dto))
896                 GOTO(out, rc = PTR_ERR(dto));
897
898         com->lc_obj = dto;
899         rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
900         if (rc != 0)
901                 GOTO(out, rc);
902
903         rc = mdd_lfsck_namespace_store(env, com, true);
904
905         GOTO(out, rc);
906
907 out:
908         up_write(&com->lc_sem);
909         return rc;
910 }
911
912 static void
913 mdd_lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
914                          bool oit, bool new_checked)
915 {
916         struct lfsck_namespace *ns = (struct lfsck_namespace *)com->lc_file_ram;
917
918         down_write(&com->lc_sem);
919         if (new_checked)
920                 com->lc_new_checked++;
921         ns->ln_items_failed++;
922         if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
923                 mdd_lfsck_pos_fill(env, com->lc_lfsck,
924                                    &ns->ln_pos_first_inconsistent, oit, !oit);
925         up_write(&com->lc_sem);
926 }
927
928 static int mdd_lfsck_namespace_checkpoint(const struct lu_env *env,
929                                           struct lfsck_component *com,
930                                           bool init)
931 {
932         struct md_lfsck         *lfsck = com->lc_lfsck;
933         struct lfsck_namespace  *ns    =
934                                 (struct lfsck_namespace *)com->lc_file_ram;
935         int                      rc;
936
937         if (com->lc_new_checked == 0 && !init)
938                 return 0;
939
940         down_write(&com->lc_sem);
941
942         ns->ln_pos_last_checkpoint = lfsck->ml_pos_current;
943         if (init) {
944                 ns->ln_time_last_checkpoint = ns->ln_time_latest_start;
945                 ns->ln_pos_latest_start = lfsck->ml_pos_current;
946         } else {
947                 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
948                                 HALF_SEC - lfsck->ml_time_last_checkpoint);
949                 ns->ln_time_last_checkpoint = cfs_time_current_sec();
950                 ns->ln_items_checked += com->lc_new_checked;
951                 com->lc_new_checked = 0;
952         }
953
954         rc = mdd_lfsck_namespace_store(env, com, false);
955
956         up_write(&com->lc_sem);
957         return rc;
958 }
959
960 static int mdd_lfsck_namespace_prep(const struct lu_env *env,
961                                     struct lfsck_component *com)
962 {
963         struct md_lfsck         *lfsck  = com->lc_lfsck;
964         struct lfsck_namespace  *ns     =
965                                 (struct lfsck_namespace *)com->lc_file_ram;
966         struct lfsck_position   *pos    = &com->lc_pos_start;
967
968         if (ns->ln_status == LS_COMPLETED) {
969                 int rc;
970
971                 rc = mdd_lfsck_namespace_reset(env, com, false);
972                 if (rc != 0)
973                         return rc;
974         }
975
976         down_write(&com->lc_sem);
977
978         ns->ln_time_latest_start = cfs_time_current_sec();
979
980         spin_lock(&lfsck->ml_lock);
981         if (ns->ln_flags & LF_SCANNED_ONCE) {
982                 if (!lfsck->ml_drop_dryrun ||
983                     mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
984                         ns->ln_status = LS_SCANNING_PHASE2;
985                         cfs_list_del_init(&com->lc_link);
986                         cfs_list_add_tail(&com->lc_link,
987                                           &lfsck->ml_list_double_scan);
988                         if (!cfs_list_empty(&com->lc_link_dir))
989                                 cfs_list_del_init(&com->lc_link_dir);
990                         mdd_lfsck_pos_set_zero(pos);
991                 } else {
992                         ns->ln_status = LS_SCANNING_PHASE1;
993                         ns->ln_run_time_phase1 = 0;
994                         ns->ln_run_time_phase2 = 0;
995                         ns->ln_items_checked = 0;
996                         ns->ln_items_repaired = 0;
997                         ns->ln_items_failed = 0;
998                         ns->ln_dirs_checked = 0;
999                         ns->ln_mlinked_checked = 0;
1000                         ns->ln_objs_checked_phase2 = 0;
1001                         ns->ln_objs_repaired_phase2 = 0;
1002                         ns->ln_objs_failed_phase2 = 0;
1003                         ns->ln_objs_nlink_repaired = 0;
1004                         ns->ln_objs_lost_found = 0;
1005                         fid_zero(&ns->ln_fid_latest_scanned_phase2);
1006                         if (cfs_list_empty(&com->lc_link_dir))
1007                                 cfs_list_add_tail(&com->lc_link_dir,
1008                                                   &lfsck->ml_list_dir);
1009                         *pos = ns->ln_pos_first_inconsistent;
1010                 }
1011         } else {
1012                 ns->ln_status = LS_SCANNING_PHASE1;
1013                 if (cfs_list_empty(&com->lc_link_dir))
1014                         cfs_list_add_tail(&com->lc_link_dir,
1015                                           &lfsck->ml_list_dir);
1016                 if (!lfsck->ml_drop_dryrun ||
1017                     mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
1018                         *pos = ns->ln_pos_last_checkpoint;
1019                         pos->lp_oit_cookie++;
1020                         if (!fid_is_zero(&pos->lp_dir_parent)) {
1021                                 if (pos->lp_dir_cookie == MDS_DIR_END_OFF) {
1022                                         fid_zero(&pos->lp_dir_parent);
1023                                 } else {
1024                                         pos->lp_dir_cookie++;
1025                                 }
1026                         }
1027                 } else {
1028                         *pos = ns->ln_pos_first_inconsistent;
1029                 }
1030         }
1031         spin_unlock(&lfsck->ml_lock);
1032
1033         up_write(&com->lc_sem);
1034         return 0;
1035 }
1036
1037 static int mdd_lfsck_namespace_exec_oit(const struct lu_env *env,
1038                                         struct lfsck_component *com,
1039                                         struct mdd_object *obj)
1040 {
1041         down_write(&com->lc_sem);
1042         com->lc_new_checked++;
1043         if (S_ISDIR(mdd_object_type(obj)))
1044                 ((struct lfsck_namespace *)com->lc_file_ram)->ln_dirs_checked++;
1045         up_write(&com->lc_sem);
1046         return 0;
1047 }
1048
1049 static int mdd_declare_lfsck_namespace_exec_dir(const struct lu_env *env,
1050                                                 struct mdd_object *obj,
1051                                                 struct thandle *handle)
1052 {
1053         int rc;
1054
1055         /* For destroying all invalid linkEA entries. */
1056         rc = mdo_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
1057         if (rc != 0)
1058                 return rc;
1059
1060         /* For insert new linkEA entry. */
1061         rc = mdd_declare_links_add(env, obj, handle);
1062         return rc;
1063 }
1064
1065 static int mdd_lfsck_namespace_check_exist(const struct lu_env *env,
1066                                            struct md_lfsck *lfsck,
1067                                            struct mdd_object *obj,
1068                                            const char *name)
1069 {
1070         struct dt_object *dir = lfsck->ml_obj_dir;
1071         struct lu_fid    *fid = &mdd_env_info(env)->mti_fid;
1072         int               rc;
1073         ENTRY;
1074
1075         if (unlikely(mdd_is_dead_obj(obj)))
1076                 RETURN(LFSCK_NAMEENTRY_DEAD);
1077
1078         rc = dt_lookup(env, dir, (struct dt_rec *)fid,
1079                        (const struct dt_key *)name, BYPASS_CAPA);
1080         if (rc == -ENOENT)
1081                 RETURN(LFSCK_NAMEENTRY_REMOVED);
1082
1083         if (rc < 0)
1084                 RETURN(rc);
1085
1086         if (!lu_fid_eq(fid, mdo2fid(obj)))
1087                 RETURN(LFSCK_NAMEENTRY_RECREATED);
1088
1089         RETURN(0);
1090 }
1091
1092 static int mdd_lfsck_namespace_exec_dir(const struct lu_env *env,
1093                                         struct lfsck_component *com,
1094                                         struct mdd_object *obj,
1095                                         struct lu_dirent *ent)
1096 {
1097         struct mdd_thread_info     *info     = mdd_env_info(env);
1098         struct lu_attr             *la       = &info->mti_la;
1099         struct md_lfsck            *lfsck    = com->lc_lfsck;
1100         struct lfsck_bookmark      *bk       = &lfsck->ml_bookmark_ram;
1101         struct lfsck_namespace     *ns       =
1102                                 (struct lfsck_namespace *)com->lc_file_ram;
1103         struct mdd_device          *mdd      = mdd_lfsck2mdd(lfsck);
1104         struct mdd_link_data        ldata    = { 0 };
1105         const struct lu_fid        *pfid     =
1106                                 lu_object_fid(&lfsck->ml_obj_dir->do_lu);
1107         const struct lu_fid        *cfid     = mdo2fid(obj);
1108         const struct lu_name       *cname;
1109         struct thandle             *handle   = NULL;
1110         bool                        repaired = false;
1111         bool                        locked   = false;
1112         int                         count    = 0;
1113         int                         rc;
1114         ENTRY;
1115
1116         cname = mdd_name_get_const(env, ent->lde_name, ent->lde_namelen);
1117         down_write(&com->lc_sem);
1118         com->lc_new_checked++;
1119
1120         if (ent->lde_attrs & LUDA_UPGRADE) {
1121                 ns->ln_flags |= LF_UPGRADE;
1122                 repaired = true;
1123         } else if (ent->lde_attrs & LUDA_REPAIR) {
1124                 ns->ln_flags |= LF_INCONSISTENT;
1125                 repaired = true;
1126         }
1127
1128         if (ent->lde_name[0] == '.' &&
1129             (ent->lde_namelen == 1 ||
1130              (ent->lde_namelen == 2 && ent->lde_name[1] == '.')))
1131                 GOTO(out, rc = 0);
1132
1133         if (!(bk->lb_param & LPF_DRYRUN) &&
1134             (com->lc_journal || repaired)) {
1135
1136 again:
1137                 LASSERT(!locked);
1138
1139                 com->lc_journal = 1;
1140                 handle = mdd_trans_create(env, mdd);
1141                 if (IS_ERR(handle))
1142                         GOTO(out, rc = PTR_ERR(handle));
1143
1144                 rc = mdd_declare_lfsck_namespace_exec_dir(env, obj, handle);
1145                 if (rc != 0)
1146                         GOTO(stop, rc);
1147
1148                 rc = mdd_trans_start(env, mdd, handle);
1149                 if (rc != 0)
1150                         GOTO(stop, rc);
1151
1152                 mdd_write_lock(env, obj, MOR_TGT_CHILD);
1153                 locked = true;
1154         }
1155
1156         rc = mdd_lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
1157         if (rc != 0)
1158                 GOTO(stop, rc);
1159
1160         rc = mdd_links_read(env, obj, &ldata);
1161         if (rc == 0) {
1162                 count = ldata.ml_leh->leh_reccount;
1163                 rc = mdd_links_find(env, obj, &ldata, cname, pfid);
1164                 if (rc == 0) {
1165                         /* For dir, if there are more than one linkea entries,
1166                          * then remove all the other redundant linkea entries.*/
1167                         if (unlikely(count > 1 &&
1168                                      S_ISDIR(mdd_object_type(obj))))
1169                                 goto unmatch;
1170
1171                         goto record;
1172                 } else {
1173
1174 unmatch:
1175                         ns->ln_flags |= LF_INCONSISTENT;
1176                         if (bk->lb_param & LPF_DRYRUN) {
1177                                 repaired = true;
1178                                 goto record;
1179                         }
1180
1181                         /*For dir, remove the unmatched linkea entry directly.*/
1182                         if (S_ISDIR(mdd_object_type(obj))) {
1183                                 if (!com->lc_journal)
1184                                         goto again;
1185
1186                                 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK,
1187                                                    handle, BYPASS_CAPA);
1188                                 if (rc != 0)
1189                                         GOTO(stop, rc);
1190
1191                                 goto nodata;
1192                         } else {
1193                                 goto add;
1194                         }
1195                 }
1196         } else if (unlikely(rc == -EINVAL)) {
1197                 ns->ln_flags |= LF_INCONSISTENT;
1198                 if (bk->lb_param & LPF_DRYRUN) {
1199                         count = 1;
1200                         repaired = true;
1201                         goto record;
1202                 }
1203
1204                 if (!com->lc_journal)
1205                         goto again;
1206
1207                 /* The magic crashed, we are not sure whether there are more
1208                  * corrupt data in the linkea, so remove all linkea entries. */
1209                 rc = mdo_xattr_del(env, obj, XATTR_NAME_LINK, handle,
1210                                    BYPASS_CAPA);
1211                 if (rc != 0)
1212                         GOTO(stop, rc);
1213
1214                 goto nodata;
1215         } else if (rc == -ENODATA) {
1216                 ns->ln_flags |= LF_UPGRADE;
1217                 if (bk->lb_param & LPF_DRYRUN) {
1218                         count = 1;
1219                         repaired = true;
1220                         goto record;
1221                 }
1222
1223 nodata:
1224                 rc = mdd_links_new(env, &ldata);
1225                 if (rc != 0)
1226                         GOTO(stop, rc);
1227
1228 add:
1229                 if (!com->lc_journal)
1230                         goto again;
1231
1232                 rc = mdd_links_add_buf(env, &ldata, cname, pfid);
1233                 if (rc != 0)
1234                         GOTO(stop, rc);
1235
1236                 rc = mdd_links_write(env, obj, &ldata, handle);
1237                 if (rc != 0)
1238                         GOTO(stop, rc);
1239
1240                 count = ldata.ml_leh->leh_reccount;
1241                 repaired = true;
1242         } else {
1243                 GOTO(stop, rc);
1244         }
1245
1246 record:
1247         LASSERT(count > 0);
1248
1249         rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
1250         if (rc != 0)
1251                 GOTO(stop, rc);
1252
1253         if ((count == 1) &&
1254             (la->la_nlink == 1 || S_ISDIR(mdd_object_type(obj))))
1255                 /* Usually, it is for single linked object or dir, do nothing.*/
1256                 GOTO(stop, rc);
1257
1258         /* Following modification will be in another transaction.  */
1259         if (handle != NULL) {
1260                 LASSERT(mdd_write_locked(env, obj));
1261
1262                 mdd_write_unlock(env, obj);
1263                 locked = false;
1264
1265                 mdd_trans_stop(env, mdd, 0, handle);
1266                 handle = NULL;
1267         }
1268
1269         ns->ln_mlinked_checked++;
1270         rc = mdd_lfsck_namespace_update(env, com, cfid,
1271                         count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
1272
1273         GOTO(out, rc);
1274
1275 stop:
1276         if (locked)
1277                 mdd_write_unlock(env, obj);
1278
1279         if (handle != NULL)
1280                 mdd_trans_stop(env, mdd, rc, handle);
1281
1282 out:
1283         if (rc < 0) {
1284                 ns->ln_items_failed++;
1285                 if (mdd_lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1286                         mdd_lfsck_pos_fill(env, lfsck,
1287                                            &ns->ln_pos_first_inconsistent,
1288                                            true, false);
1289                 if (!(bk->lb_param & LPF_FAILOUT))
1290                         rc = 0;
1291         } else {
1292                 if (repaired)
1293                         ns->ln_items_repaired++;
1294                 else
1295                         com->lc_journal = 0;
1296                 rc = 0;
1297         }
1298         up_write(&com->lc_sem);
1299         return rc;
1300 }
1301
1302 static int mdd_lfsck_namespace_post(const struct lu_env *env,
1303                                     struct lfsck_component *com,
1304                                     int result)
1305 {
1306         struct md_lfsck         *lfsck = com->lc_lfsck;
1307         struct lfsck_namespace  *ns    =
1308                                 (struct lfsck_namespace *)com->lc_file_ram;
1309         int                      rc;
1310
1311         down_write(&com->lc_sem);
1312
1313         spin_lock(&lfsck->ml_lock);
1314         if (result > 0) {
1315                 ns->ln_status = LS_SCANNING_PHASE2;
1316                 ns->ln_flags |= LF_SCANNED_ONCE;
1317                 ns->ln_flags &= ~LF_UPGRADE;
1318                 cfs_list_del_init(&com->lc_link);
1319                 cfs_list_del_init(&com->lc_link_dir);
1320                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_double_scan);
1321         } else if (result == 0) {
1322                 if (lfsck->ml_paused) {
1323                         ns->ln_status = LS_PAUSED;
1324                 } else {
1325                         ns->ln_status = LS_STOPPED;
1326                         cfs_list_del_init(&com->lc_link);
1327                         cfs_list_del_init(&com->lc_link_dir);
1328                         cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1329                 }
1330         } else {
1331                 ns->ln_status = LS_FAILED;
1332                 cfs_list_del_init(&com->lc_link);
1333                 cfs_list_del_init(&com->lc_link_dir);
1334                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1335         }
1336         spin_unlock(&lfsck->ml_lock);
1337
1338         ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1339                                 HALF_SEC - lfsck->ml_time_last_checkpoint);
1340         ns->ln_time_last_checkpoint = cfs_time_current_sec();
1341         ns->ln_items_checked += com->lc_new_checked;
1342         com->lc_new_checked = 0;
1343
1344         rc = mdd_lfsck_namespace_store(env, com, false);
1345
1346         up_write(&com->lc_sem);
1347         return rc;
1348 }
1349
1350 static int
1351 mdd_lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
1352                          char *buf, int len)
1353 {
1354         struct md_lfsck         *lfsck = com->lc_lfsck;
1355         struct lfsck_bookmark   *bk    = &lfsck->ml_bookmark_ram;
1356         struct lfsck_namespace  *ns    =
1357                                 (struct lfsck_namespace *)com->lc_file_ram;
1358         int                      save  = len;
1359         int                      ret   = -ENOSPC;
1360         int                      rc;
1361
1362         down_read(&com->lc_sem);
1363         rc = snprintf(buf, len,
1364                       "name: lfsck_namespace\n"
1365                       "magic: 0x%x\n"
1366                       "version: %d\n"
1367                       "status: %s\n",
1368                       ns->ln_magic,
1369                       bk->lb_version,
1370                       lfsck_status_names[ns->ln_status]);
1371         if (rc <= 0)
1372                 goto out;
1373
1374         buf += rc;
1375         len -= rc;
1376         rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
1377                              "flags");
1378         if (rc < 0)
1379                 goto out;
1380
1381         rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
1382                              "param");
1383         if (rc < 0)
1384                 goto out;
1385
1386         rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
1387                              "time_since_last_completed");
1388         if (rc < 0)
1389                 goto out;
1390
1391         rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
1392                              "time_since_latest_start");
1393         if (rc < 0)
1394                 goto out;
1395
1396         rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
1397                              "time_since_last_checkpoint");
1398         if (rc < 0)
1399                 goto out;
1400
1401         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
1402                             "latest_start_position");
1403         if (rc < 0)
1404                 goto out;
1405
1406         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
1407                             "last_checkpoint_position");
1408         if (rc < 0)
1409                 goto out;
1410
1411         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
1412                             "first_failure_position");
1413         if (rc < 0)
1414                 goto out;
1415
1416         if (ns->ln_status == LS_SCANNING_PHASE1) {
1417                 struct lfsck_position pos;
1418                 cfs_duration_t duration = cfs_time_current() -
1419                                           lfsck->ml_time_last_checkpoint;
1420                 __u64 checked = ns->ln_items_checked + com->lc_new_checked;
1421                 __u64 speed = checked;
1422                 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1423                 __u32 rtime = ns->ln_run_time_phase1 +
1424                               cfs_duration_sec(duration + HALF_SEC);
1425
1426                 if (duration != 0)
1427                         do_div(new_checked, duration);
1428                 if (rtime != 0)
1429                         do_div(speed, rtime);
1430                 rc = snprintf(buf, len,
1431                               "checked_phase1: "LPU64"\n"
1432                               "checked_phase2: "LPU64"\n"
1433                               "updated_phase1: "LPU64"\n"
1434                               "updated_phase2: "LPU64"\n"
1435                               "failed_phase1: "LPU64"\n"
1436                               "failed_phase2: "LPU64"\n"
1437                               "dirs: "LPU64"\n"
1438                               "M-linked: "LPU64"\n"
1439                               "nlinks_repaired: "LPU64"\n"
1440                               "lost_found: "LPU64"\n"
1441                               "success_count: %u\n"
1442                               "run_time_phase1: %u seconds\n"
1443                               "run_time_phase2: %u seconds\n"
1444                               "average_speed_phase1: "LPU64" items/sec\n"
1445                               "average_speed_phase2: N/A\n"
1446                               "real-time_speed_phase1: "LPU64" items/sec\n"
1447                               "real-time_speed_phase2: N/A\n",
1448                               checked,
1449                               ns->ln_objs_checked_phase2,
1450                               ns->ln_items_repaired,
1451                               ns->ln_objs_repaired_phase2,
1452                               ns->ln_items_failed,
1453                               ns->ln_objs_failed_phase2,
1454                               ns->ln_dirs_checked,
1455                               ns->ln_mlinked_checked,
1456                               ns->ln_objs_nlink_repaired,
1457                               ns->ln_objs_lost_found,
1458                               ns->ln_success_count,
1459                               rtime,
1460                               ns->ln_run_time_phase2,
1461                               speed,
1462                               new_checked);
1463                 if (rc <= 0)
1464                         goto out;
1465
1466                 buf += rc;
1467                 len -= rc;
1468                 mdd_lfsck_pos_fill(env, lfsck, &pos, true, true);
1469                 rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
1470                 if (rc <= 0)
1471                         goto out;
1472         } else if (ns->ln_status == LS_SCANNING_PHASE2) {
1473                 cfs_duration_t duration = cfs_time_current() -
1474                                           lfsck->ml_time_last_checkpoint;
1475                 __u64 checked = ns->ln_objs_checked_phase2 +
1476                                 com->lc_new_checked;
1477                 __u64 speed1 = ns->ln_items_checked;
1478                 __u64 speed2 = checked;
1479                 __u64 new_checked = com->lc_new_checked * CFS_HZ;
1480                 __u32 rtime = ns->ln_run_time_phase2 +
1481                               cfs_duration_sec(duration + HALF_SEC);
1482
1483                 if (duration != 0)
1484                         do_div(new_checked, duration);
1485                 if (ns->ln_run_time_phase1 != 0)
1486                         do_div(speed1, ns->ln_run_time_phase1);
1487                 if (rtime != 0)
1488                         do_div(speed2, rtime);
1489                 rc = snprintf(buf, len,
1490                               "checked_phase1: "LPU64"\n"
1491                               "checked_phase2: "LPU64"\n"
1492                               "updated_phase1: "LPU64"\n"
1493                               "updated_phase2: "LPU64"\n"
1494                               "failed_phase1: "LPU64"\n"
1495                               "failed_phase2: "LPU64"\n"
1496                               "dirs: "LPU64"\n"
1497                               "M-linked: "LPU64"\n"
1498                               "nlinks_repaired: "LPU64"\n"
1499                               "lost_found: "LPU64"\n"
1500                               "success_count: %u\n"
1501                               "run_time_phase1: %u seconds\n"
1502                               "run_time_phase2: %u seconds\n"
1503                               "average_speed_phase1: "LPU64" items/sec\n"
1504                               "average_speed_phase2: "LPU64" objs/sec\n"
1505                               "real-time_speed_phase1: N/A\n"
1506                               "real-time_speed_phase2: "LPU64" objs/sec\n"
1507                               "current_position: "DFID"\n",
1508                               ns->ln_items_checked,
1509                               checked,
1510                               ns->ln_items_repaired,
1511                               ns->ln_objs_repaired_phase2,
1512                               ns->ln_items_failed,
1513                               ns->ln_objs_failed_phase2,
1514                               ns->ln_dirs_checked,
1515                               ns->ln_mlinked_checked,
1516                               ns->ln_objs_nlink_repaired,
1517                               ns->ln_objs_lost_found,
1518                               ns->ln_success_count,
1519                               ns->ln_run_time_phase1,
1520                               rtime,
1521                               speed1,
1522                               speed2,
1523                               new_checked,
1524                               PFID(&ns->ln_fid_latest_scanned_phase2));
1525                 if (rc <= 0)
1526                         goto out;
1527
1528                 buf += rc;
1529                 len -= rc;
1530         } else {
1531                 __u64 speed1 = ns->ln_items_checked;
1532                 __u64 speed2 = ns->ln_objs_checked_phase2;
1533
1534                 if (ns->ln_run_time_phase1 != 0)
1535                         do_div(speed1, ns->ln_run_time_phase1);
1536                 if (ns->ln_run_time_phase2 != 0)
1537                         do_div(speed2, ns->ln_run_time_phase2);
1538                 rc = snprintf(buf, len,
1539                               "checked_phase1: "LPU64"\n"
1540                               "checked_phase2: "LPU64"\n"
1541                               "updated_phase1: "LPU64"\n"
1542                               "updated_phase2: "LPU64"\n"
1543                               "failed_phase1: "LPU64"\n"
1544                               "failed_phase2: "LPU64"\n"
1545                               "dirs: "LPU64"\n"
1546                               "M-linked: "LPU64"\n"
1547                               "nlinks_repaired: "LPU64"\n"
1548                               "lost_found: "LPU64"\n"
1549                               "success_count: %u\n"
1550                               "run_time_phase1: %u seconds\n"
1551                               "run_time_phase2: %u seconds\n"
1552                               "average_speed_phase1: "LPU64" items/sec\n"
1553                               "average_speed_phase2: "LPU64" objs/sec\n"
1554                               "real-time_speed_phase1: N/A\n"
1555                               "real-time_speed_phase2: N/A\n"
1556                               "current_position: N/A\n",
1557                               ns->ln_items_checked,
1558                               ns->ln_objs_checked_phase2,
1559                               ns->ln_items_repaired,
1560                               ns->ln_objs_repaired_phase2,
1561                               ns->ln_items_failed,
1562                               ns->ln_objs_failed_phase2,
1563                               ns->ln_dirs_checked,
1564                               ns->ln_mlinked_checked,
1565                               ns->ln_objs_nlink_repaired,
1566                               ns->ln_objs_lost_found,
1567                               ns->ln_success_count,
1568                               ns->ln_run_time_phase1,
1569                               ns->ln_run_time_phase2,
1570                               speed1,
1571                               speed2);
1572                 if (rc <= 0)
1573                         goto out;
1574
1575                 buf += rc;
1576                 len -= rc;
1577         }
1578         ret = save - len;
1579
1580 out:
1581         up_read(&com->lc_sem);
1582         return ret;
1583 }
1584
1585 /* XXX: to be implemented in other patch.  */
1586 static int mdd_lfsck_namespace_double_scan(const struct lu_env *env,
1587                                            struct lfsck_component *com)
1588 {
1589         struct md_lfsck         *lfsck  = com->lc_lfsck;
1590         struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
1591         struct lfsck_namespace  *ns     =
1592                                 (struct lfsck_namespace *)com->lc_file_ram;
1593         int                      rc;
1594
1595         down_write(&com->lc_sem);
1596
1597         ns->ln_time_last_checkpoint = cfs_time_current_sec();
1598         com->lc_new_checked = 0;
1599         com->lc_journal = 0;
1600
1601         ns->ln_status = LS_COMPLETED;
1602         if (!(bk->lb_param & LPF_DRYRUN))
1603                 ns->ln_flags &=
1604                 ~(LF_SCANNED_ONCE | LF_INCONSISTENT | LF_UPGRADE);
1605         ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
1606         ns->ln_success_count++;
1607
1608         spin_lock(&lfsck->ml_lock);
1609         cfs_list_del_init(&com->lc_link);
1610         cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1611         spin_unlock(&lfsck->ml_lock);
1612
1613         rc = mdd_lfsck_namespace_store(env, com, false);
1614
1615         up_write(&com->lc_sem);
1616         return rc;
1617 }
1618
1619 static struct lfsck_operations mdd_lfsck_namespace_ops = {
1620         .lfsck_reset            = mdd_lfsck_namespace_reset,
1621         .lfsck_fail             = mdd_lfsck_namespace_fail,
1622         .lfsck_checkpoint       = mdd_lfsck_namespace_checkpoint,
1623         .lfsck_prep             = mdd_lfsck_namespace_prep,
1624         .lfsck_exec_oit         = mdd_lfsck_namespace_exec_oit,
1625         .lfsck_exec_dir         = mdd_lfsck_namespace_exec_dir,
1626         .lfsck_post             = mdd_lfsck_namespace_post,
1627         .lfsck_dump             = mdd_lfsck_namespace_dump,
1628         .lfsck_double_scan      = mdd_lfsck_namespace_double_scan,
1629 };
1630
1631 /* LFSCK component setup/cleanup functions */
1632
1633 static int mdd_lfsck_namespace_setup(const struct lu_env *env,
1634                                      struct md_lfsck *lfsck)
1635 {
1636         struct mdd_device      *mdd = mdd_lfsck2mdd(lfsck);
1637         struct lfsck_component *com;
1638         struct lfsck_namespace *ns;
1639         struct dt_object       *obj;
1640         int                     rc;
1641         ENTRY;
1642
1643         OBD_ALLOC_PTR(com);
1644         if (com == NULL)
1645                 RETURN(-ENOMEM);
1646
1647         CFS_INIT_LIST_HEAD(&com->lc_link);
1648         CFS_INIT_LIST_HEAD(&com->lc_link_dir);
1649         init_rwsem(&com->lc_sem);
1650         atomic_set(&com->lc_ref, 1);
1651         com->lc_lfsck = lfsck;
1652         com->lc_type = LT_NAMESPACE;
1653         com->lc_ops = &mdd_lfsck_namespace_ops;
1654         com->lc_file_size = sizeof(struct lfsck_namespace);
1655         OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
1656         if (com->lc_file_ram == NULL)
1657                 GOTO(out, rc = -ENOMEM);
1658
1659         OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
1660         if (com->lc_file_disk == NULL)
1661                 GOTO(out, rc = -ENOMEM);
1662
1663         obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_namespace_name,
1664                             &mdd_env_info(env)->mti_fid);
1665         if (IS_ERR(obj))
1666                 GOTO(out, rc = PTR_ERR(obj));
1667
1668         com->lc_obj = obj;
1669         rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features);
1670         if (rc != 0)
1671                 GOTO(out, rc);
1672
1673         rc = mdd_lfsck_namespace_load(env, com);
1674         if (rc > 0)
1675                 rc = mdd_lfsck_namespace_reset(env, com, true);
1676         else if (rc == -ENODATA)
1677                 rc = mdd_lfsck_namespace_init(env, com);
1678         if (rc != 0)
1679                 GOTO(out, rc);
1680
1681         ns = (struct lfsck_namespace *)com->lc_file_ram;
1682         switch (ns->ln_status) {
1683         case LS_INIT:
1684         case LS_COMPLETED:
1685         case LS_FAILED:
1686         case LS_STOPPED:
1687                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_idle);
1688                 break;
1689         default:
1690                 CERROR("%s: unknown status: %u\n",
1691                        mdd_lfsck2name(lfsck), ns->ln_status);
1692                 /* fall through */
1693         case LS_SCANNING_PHASE1:
1694         case LS_SCANNING_PHASE2:
1695                 /* No need to store the status to disk right now.
1696                  * If the system crashed before the status stored,
1697                  * it will be loaded back when next time. */
1698                 ns->ln_status = LS_CRASHED;
1699                 /* fall through */
1700         case LS_PAUSED:
1701         case LS_CRASHED:
1702                 cfs_list_add_tail(&com->lc_link, &lfsck->ml_list_scan);
1703                 cfs_list_add_tail(&com->lc_link_dir, &lfsck->ml_list_dir);
1704                 break;
1705         }
1706
1707         GOTO(out, rc = 0);
1708
1709 out:
1710         if (rc != 0)
1711                 mdd_lfsck_component_cleanup(env, com);
1712         return rc;
1713 }
1714
1715 /* helper functions for framework */
1716
1717 static int object_is_client_visible(const struct lu_env *env,
1718                                     struct mdd_device *mdd,
1719                                     struct mdd_object *obj)
1720 {
1721         struct lu_fid *fid   = &mdd_env_info(env)->mti_fid;
1722         int            depth = 0;
1723         int            rc;
1724
1725         LASSERT(S_ISDIR(mdd_object_type(obj)));
1726
1727         while (1) {
1728                 if (mdd_is_root(mdd, mdo2fid(obj))) {
1729                         if (depth > 0)
1730                                 mdd_object_put(env, obj);
1731                         return 1;
1732                 }
1733
1734                 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1735                 if (unlikely(mdd_is_dead_obj(obj))) {
1736                         mdd_read_unlock(env, obj);
1737                         if (depth > 0)
1738                                 mdd_object_put(env, obj);
1739                         return 0;
1740                 }
1741
1742                 rc = dt_xattr_get(env, mdd_object_child(obj),
1743                                   mdd_buf_get(env, NULL, 0), XATTR_NAME_LINK,
1744                                   BYPASS_CAPA);
1745                 mdd_read_unlock(env, obj);
1746                 if (rc >= 0) {
1747                         if (depth > 0)
1748                                 mdd_object_put(env, obj);
1749                         return 1;
1750                 }
1751
1752                 if (rc < 0 && rc != -ENODATA) {
1753                         if (depth > 0)
1754                                 mdd_object_put(env, obj);
1755                         return rc;
1756                 }
1757
1758                 rc = mdd_parent_fid(env, obj, fid);
1759                 if (depth > 0)
1760                         mdd_object_put(env, obj);
1761                 if (rc != 0)
1762                         return rc;
1763
1764                 if (unlikely(lu_fid_eq(fid, &mdd->mdd_local_root_fid)))
1765                         return 0;
1766
1767                 obj = mdd_object_find(env, mdd, fid);
1768                 if (obj == NULL)
1769                         return 0;
1770                 else if (IS_ERR(obj))
1771                         return PTR_ERR(obj);
1772
1773                 /* XXX: need more processing for remote object in the future. */
1774                 if (!mdd_object_exists(obj) || mdd_object_remote(obj)) {
1775                         mdd_object_put(env, obj);
1776                         return 0;
1777                 }
1778
1779                 depth++;
1780         }
1781         return 0;
1782 }
1783
1784 static void mdd_lfsck_unpack_ent(struct lu_dirent *ent)
1785 {
1786         fid_le_to_cpu(&ent->lde_fid, &ent->lde_fid);
1787         ent->lde_hash = le64_to_cpu(ent->lde_hash);
1788         ent->lde_reclen = le16_to_cpu(ent->lde_reclen);
1789         ent->lde_namelen = le16_to_cpu(ent->lde_namelen);
1790         ent->lde_attrs = le32_to_cpu(ent->lde_attrs);
1791
1792         /* Make sure the name is terminated with '0'.
1793          * The data (type) after ent::lde_name maybe
1794          * broken, but we do not care. */
1795         ent->lde_name[ent->lde_namelen] = 0;
1796 }
1797
1798 /* LFSCK wrap functions */
1799
1800 static void mdd_lfsck_fail(const struct lu_env *env, struct md_lfsck *lfsck,
1801                            bool oit, bool new_checked)
1802 {
1803         struct lfsck_component *com;
1804
1805         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
1806                 com->lc_ops->lfsck_fail(env, com, oit, new_checked);
1807         }
1808 }
1809
1810 static int mdd_lfsck_checkpoint(const struct lu_env *env,
1811                                 struct md_lfsck *lfsck, bool oit)
1812 {
1813         struct lfsck_component *com;
1814         int                     rc;
1815
1816         if (likely(cfs_time_beforeq(cfs_time_current(),
1817                                     lfsck->ml_time_next_checkpoint)))
1818                 return 0;
1819
1820         mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, oit, !oit);
1821         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
1822                 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
1823                 if (rc != 0)
1824                         return rc;;
1825         }
1826
1827         lfsck->ml_time_last_checkpoint = cfs_time_current();
1828         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1829                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1830         return 0;
1831 }
1832
1833 static int mdd_lfsck_prep(struct lu_env *env, struct md_lfsck *lfsck)
1834 {
1835         struct mdd_device      *mdd     = mdd_lfsck2mdd(lfsck);
1836         struct mdd_object      *obj     = NULL;
1837         struct dt_object       *dt_obj;
1838         struct lfsck_component *com;
1839         struct lfsck_component *next;
1840         struct lfsck_position  *pos     = NULL;
1841         const struct dt_it_ops *iops    =
1842                                 &lfsck->ml_obj_oit->do_index_ops->dio_it;
1843         struct dt_it           *di;
1844         int                     rc;
1845         ENTRY;
1846
1847         LASSERT(lfsck->ml_obj_dir == NULL);
1848         LASSERT(lfsck->ml_di_dir == NULL);
1849
1850         cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
1851                 com->lc_new_checked = 0;
1852                 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
1853                         com->lc_journal = 0;
1854
1855                 rc = com->lc_ops->lfsck_prep(env, com);
1856                 if (rc != 0)
1857                         RETURN(rc);
1858
1859                 if ((pos == NULL) ||
1860                     (!mdd_lfsck_pos_is_zero(&com->lc_pos_start) &&
1861                      mdd_lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
1862                         pos = &com->lc_pos_start;
1863         }
1864
1865         /* Init otable-based iterator. */
1866         if (pos == NULL) {
1867                 rc = iops->load(env, lfsck->ml_di_oit, 0);
1868                 GOTO(out, rc = (rc >= 0 ? 0 : rc));
1869         }
1870
1871         rc = iops->load(env, lfsck->ml_di_oit, pos->lp_oit_cookie);
1872         if (rc < 0)
1873                 GOTO(out, rc);
1874
1875         if (fid_is_zero(&pos->lp_dir_parent))
1876                 GOTO(out, rc = 0);
1877
1878         /* Find the directory for namespace-based traverse. */
1879         obj = mdd_object_find(env, mdd, &pos->lp_dir_parent);
1880         if (obj == NULL)
1881                 GOTO(out, rc = 0);
1882         else if (IS_ERR(obj))
1883                 RETURN(PTR_ERR(obj));
1884
1885         /* XXX: need more processing for remote object in the future. */
1886         if (!mdd_object_exists(obj) || mdd_object_remote(obj) ||
1887             unlikely(!S_ISDIR(mdd_object_type(obj))))
1888                 GOTO(out, rc = 0);
1889
1890         if (unlikely(mdd_is_dead_obj(obj)))
1891                 GOTO(out, rc = 0);
1892
1893         dt_obj = mdd_object_child(obj);
1894         if (unlikely(!dt_try_as_dir(env, dt_obj)))
1895                 GOTO(out, rc = -ENOTDIR);
1896
1897         /* Init the namespace-based directory traverse. */
1898         iops = &dt_obj->do_index_ops->dio_it;
1899         di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
1900         if (IS_ERR(di))
1901                 GOTO(out, rc = PTR_ERR(di));
1902
1903         rc = iops->load(env, di, pos->lp_dir_cookie);
1904         if (rc == 0)
1905                 rc = iops->next(env, di);
1906         else if (rc > 0)
1907                 rc = 0;
1908
1909         if (rc != 0) {
1910                 iops->put(env, di);
1911                 iops->fini(env, di);
1912                 GOTO(out, rc);
1913         }
1914
1915         lfsck->ml_obj_dir = dt_obj;
1916         spin_lock(&lfsck->ml_lock);
1917         lfsck->ml_di_dir = di;
1918         spin_unlock(&lfsck->ml_lock);
1919         obj = NULL;
1920
1921         GOTO(out, rc = 0);
1922
1923 out:
1924         if (obj != NULL)
1925                 mdd_object_put(env, obj);
1926
1927         if (rc != 0)
1928                 return (rc > 0 ? 0 : rc);
1929
1930         mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, false, false);
1931         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
1932                 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
1933                 if (rc != 0)
1934                         break;
1935         }
1936
1937         lfsck->ml_time_last_checkpoint = cfs_time_current();
1938         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
1939                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1940         return rc;
1941 }
1942
1943 static int mdd_lfsck_exec_oit(const struct lu_env *env, struct md_lfsck *lfsck,
1944                               struct mdd_object *obj)
1945 {
1946         struct lfsck_component *com;
1947         struct dt_object       *dt_obj;
1948         const struct dt_it_ops *iops;
1949         struct dt_it           *di;
1950         int                     rc;
1951         ENTRY;
1952
1953         LASSERT(lfsck->ml_obj_dir == NULL);
1954
1955         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
1956                 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
1957                 if (rc != 0)
1958                         RETURN(rc);
1959         }
1960
1961         if (!S_ISDIR(mdd_object_type(obj)) ||
1962             cfs_list_empty(&lfsck->ml_list_dir))
1963                RETURN(0);
1964
1965         rc = object_is_client_visible(env, mdd_lfsck2mdd(lfsck), obj);
1966         if (rc <= 0)
1967                 GOTO(out, rc);
1968
1969         if (unlikely(mdd_is_dead_obj(obj)))
1970                 GOTO(out, rc = 0);
1971
1972         dt_obj = mdd_object_child(obj);
1973         if (unlikely(!dt_try_as_dir(env, dt_obj)))
1974                 GOTO(out, rc = -ENOTDIR);
1975
1976         iops = &dt_obj->do_index_ops->dio_it;
1977         di = iops->init(env, dt_obj, lfsck->ml_args_dir, BYPASS_CAPA);
1978         if (IS_ERR(di))
1979                 GOTO(out, rc = PTR_ERR(di));
1980
1981         rc = iops->load(env, di, 0);
1982         if (rc == 0)
1983                 rc = iops->next(env, di);
1984         else if (rc > 0)
1985                 rc = 0;
1986
1987         if (rc != 0) {
1988                 iops->put(env, di);
1989                 iops->fini(env, di);
1990                 GOTO(out, rc);
1991         }
1992
1993         mdd_object_get(obj);
1994         lfsck->ml_obj_dir = dt_obj;
1995         spin_lock(&lfsck->ml_lock);
1996         lfsck->ml_di_dir = di;
1997         spin_unlock(&lfsck->ml_lock);
1998
1999         GOTO(out, rc = 0);
2000
2001 out:
2002         if (rc < 0)
2003                 mdd_lfsck_fail(env, lfsck, false, false);
2004         return (rc > 0 ? 0 : rc);
2005 }
2006
2007 static int mdd_lfsck_exec_dir(const struct lu_env *env, struct md_lfsck *lfsck,
2008                               struct mdd_object *obj, struct lu_dirent *ent)
2009 {
2010         struct lfsck_component *com;
2011         int                     rc;
2012
2013         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2014                 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
2015                 if (rc != 0)
2016                         return rc;
2017         }
2018         return 0;
2019 }
2020
2021 static int mdd_lfsck_post(const struct lu_env *env, struct md_lfsck *lfsck,
2022                           int result)
2023 {
2024         struct lfsck_component *com;
2025         struct lfsck_component *next;
2026         int                     rc;
2027
2028         mdd_lfsck_pos_fill(env, lfsck, &lfsck->ml_pos_current, true, true);
2029         cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_scan, lc_link) {
2030                 rc = com->lc_ops->lfsck_post(env, com, result);
2031                 if (rc != 0)
2032                         return rc;
2033         }
2034
2035         lfsck->ml_time_last_checkpoint = cfs_time_current();
2036         lfsck->ml_time_next_checkpoint = lfsck->ml_time_last_checkpoint +
2037                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
2038         return result;
2039 }
2040
2041 static int mdd_lfsck_double_scan(const struct lu_env *env,
2042                                  struct md_lfsck *lfsck)
2043 {
2044         struct lfsck_component *com;
2045         struct lfsck_component *next;
2046         int                     rc;
2047
2048         cfs_list_for_each_entry_safe(com, next, &lfsck->ml_list_double_scan,
2049                                      lc_link) {
2050                 if (lfsck->ml_bookmark_ram.lb_param & LPF_DRYRUN)
2051                         com->lc_journal = 0;
2052
2053                 rc = com->lc_ops->lfsck_double_scan(env, com);
2054                 if (rc != 0)
2055                         return rc;
2056         }
2057         return 0;
2058 }
2059
2060 /* LFSCK engines */
2061
2062 static int mdd_lfsck_dir_engine(const struct lu_env *env,
2063                                 struct md_lfsck *lfsck)
2064 {
2065         struct mdd_thread_info  *info   = mdd_env_info(env);
2066         struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
2067         const struct dt_it_ops  *iops   =
2068                         &lfsck->ml_obj_dir->do_index_ops->dio_it;
2069         struct dt_it            *di     = lfsck->ml_di_dir;
2070         struct lu_dirent        *ent    = &info->mti_ent;
2071         struct lu_fid           *fid    = &info->mti_fid;
2072         struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
2073         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
2074         int                      rc;
2075         ENTRY;
2076
2077         do {
2078                 struct mdd_object *child;
2079
2080                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY2) &&
2081                     cfs_fail_val > 0) {
2082                         struct l_wait_info lwi;
2083
2084                         lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2085                                           NULL, NULL);
2086                         l_wait_event(thread->t_ctl_waitq,
2087                                      !thread_is_running(thread),
2088                                      &lwi);
2089                 }
2090
2091                 lfsck->ml_new_scanned++;
2092                 rc = iops->rec(env, di, (struct dt_rec *)ent,
2093                                lfsck->ml_args_dir);
2094                 if (rc != 0) {
2095                         mdd_lfsck_fail(env, lfsck, false, true);
2096                         if (bk->lb_param & LPF_FAILOUT)
2097                                 RETURN(rc);
2098                         else
2099                                 goto checkpoint;
2100                 }
2101
2102                 mdd_lfsck_unpack_ent(ent);
2103                 if (ent->lde_attrs & LUDA_IGNORE)
2104                         goto checkpoint;
2105
2106                 *fid = ent->lde_fid;
2107                 child = mdd_object_find(env, mdd, fid);
2108                 if (child == NULL) {
2109                         goto checkpoint;
2110                 } else if (IS_ERR(child)) {
2111                         mdd_lfsck_fail(env, lfsck, false, true);
2112                         if (bk->lb_param & LPF_FAILOUT)
2113                                 RETURN(PTR_ERR(child));
2114                         else
2115                                 goto checkpoint;
2116                 }
2117
2118                 /* XXX: need more processing for remote object in the future. */
2119                 if (mdd_object_exists(child) && !mdd_object_remote(child))
2120                         rc = mdd_lfsck_exec_dir(env, lfsck, child, ent);
2121                 mdd_object_put(env, child);
2122                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2123                         RETURN(rc);
2124
2125 checkpoint:
2126                 rc = mdd_lfsck_checkpoint(env, lfsck, false);
2127                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2128                         RETURN(rc);
2129
2130                 /* Rate control. */
2131                 mdd_lfsck_control_speed(lfsck);
2132                 if (unlikely(!thread_is_running(thread)))
2133                         RETURN(0);
2134
2135                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL2)) {
2136                         spin_lock(&lfsck->ml_lock);
2137                         thread_set_flags(thread, SVC_STOPPING);
2138                         spin_unlock(&lfsck->ml_lock);
2139                         RETURN(-EINVAL);
2140                 }
2141
2142                 rc = iops->next(env, di);
2143         } while (rc == 0);
2144
2145         if (rc > 0 && !lfsck->ml_oit_over)
2146                 mdd_lfsck_close_dir(env, lfsck);
2147
2148         RETURN(rc);
2149 }
2150
2151 static int mdd_lfsck_oit_engine(const struct lu_env *env,
2152                                 struct md_lfsck *lfsck)
2153 {
2154         struct mdd_thread_info  *info   = mdd_env_info(env);
2155         struct mdd_device       *mdd    = mdd_lfsck2mdd(lfsck);
2156         const struct dt_it_ops  *iops   =
2157                                 &lfsck->ml_obj_oit->do_index_ops->dio_it;
2158         struct dt_it            *di     = lfsck->ml_di_oit;
2159         struct lu_fid           *fid    = &info->mti_fid;
2160         struct lfsck_bookmark   *bk     = &lfsck->ml_bookmark_ram;
2161         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
2162         int                      rc;
2163         ENTRY;
2164
2165         do {
2166                 struct mdd_object *target;
2167
2168                 if (lfsck->ml_di_dir != NULL) {
2169                         rc = mdd_lfsck_dir_engine(env, lfsck);
2170                         if (rc <= 0)
2171                                 RETURN(rc);
2172                 }
2173
2174                 if (unlikely(lfsck->ml_oit_over))
2175                         RETURN(1);
2176
2177                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY1) &&
2178                     cfs_fail_val > 0) {
2179                         struct l_wait_info lwi;
2180
2181                         lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
2182                                           NULL, NULL);
2183                         l_wait_event(thread->t_ctl_waitq,
2184                                      !thread_is_running(thread),
2185                                      &lwi);
2186                 }
2187
2188                 lfsck->ml_new_scanned++;
2189                 rc = iops->rec(env, di, (struct dt_rec *)fid, 0);
2190                 if (rc != 0) {
2191                         mdd_lfsck_fail(env, lfsck, true, true);
2192                         if (bk->lb_param & LPF_FAILOUT)
2193                                 RETURN(rc);
2194                         else
2195                                 goto checkpoint;
2196                 }
2197
2198                 target = mdd_object_find(env, mdd, fid);
2199                 if (target == NULL) {
2200                         goto checkpoint;
2201                 } else if (IS_ERR(target)) {
2202                         mdd_lfsck_fail(env, lfsck, true, true);
2203                         if (bk->lb_param & LPF_FAILOUT)
2204                                 RETURN(PTR_ERR(target));
2205                         else
2206                                 goto checkpoint;
2207                 }
2208
2209                 /* XXX: In fact, low layer otable-based iteration should not
2210                  *      return agent object. But before LU-2646 resolved, we
2211                  *      need more processing for agent object. */
2212                 if (mdd_object_exists(target) && !mdd_object_remote(target))
2213                         rc = mdd_lfsck_exec_oit(env, lfsck, target);
2214                 mdd_object_put(env, target);
2215                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2216                         RETURN(rc);
2217
2218 checkpoint:
2219                 rc = mdd_lfsck_checkpoint(env, lfsck, true);
2220                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
2221                         RETURN(rc);
2222
2223                 /* Rate control. */
2224                 mdd_lfsck_control_speed(lfsck);
2225
2226                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_FATAL1)) {
2227                         spin_lock(&lfsck->ml_lock);
2228                         thread_set_flags(thread, SVC_STOPPING);
2229                         spin_unlock(&lfsck->ml_lock);
2230                         RETURN(-EINVAL);
2231                 }
2232
2233                 rc = iops->next(env, di);
2234                 if (rc > 0)
2235                         lfsck->ml_oit_over = 1;
2236
2237                 if (unlikely(!thread_is_running(thread)))
2238                         RETURN(0);
2239         } while (rc == 0 || lfsck->ml_di_dir != NULL);
2240
2241         RETURN(rc);
2242 }
2243
2244 static int mdd_lfsck_main(void *args)
2245 {
2246         struct lu_env            env;
2247         struct md_lfsck         *lfsck    = (struct md_lfsck *)args;
2248         struct ptlrpc_thread    *thread   = &lfsck->ml_thread;
2249         struct dt_object        *oit_obj  = lfsck->ml_obj_oit;
2250         const struct dt_it_ops  *oit_iops = &oit_obj->do_index_ops->dio_it;
2251         struct dt_it            *oit_di;
2252         int                      rc;
2253         ENTRY;
2254
2255         cfs_daemonize("lfsck");
2256         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
2257         if (rc != 0) {
2258                 CERROR("%s: LFSCK, fail to init env, rc = %d\n",
2259                        mdd_lfsck2name(lfsck), rc);
2260                 GOTO(noenv, rc);
2261         }
2262
2263         oit_di = oit_iops->init(&env, oit_obj, lfsck->ml_args_oit, BYPASS_CAPA);
2264         if (IS_ERR(oit_di)) {
2265                 rc = PTR_ERR(oit_di);
2266                 CERROR("%s: LFSCK, fail to init iteration, rc = %d\n",
2267                        mdd_lfsck2name(lfsck), rc);
2268                 GOTO(fini_env, rc);
2269         }
2270
2271         spin_lock(&lfsck->ml_lock);
2272         lfsck->ml_di_oit = oit_di;
2273         spin_unlock(&lfsck->ml_lock);
2274         rc = mdd_lfsck_prep(&env, lfsck);
2275         if (rc != 0)
2276                 GOTO(fini_oit, rc);
2277
2278         CDEBUG(D_LFSCK, "LFSCK entry: oit_flags = 0x%x, dir_flags = 0x%x, "
2279                "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2280                ", pid = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2281                lfsck->ml_pos_current.lp_oit_cookie,
2282                lfsck->ml_pos_current.lp_dir_cookie,
2283                PFID(&lfsck->ml_pos_current.lp_dir_parent),
2284                cfs_curproc_pid());
2285
2286         spin_lock(&lfsck->ml_lock);
2287         thread_set_flags(thread, SVC_RUNNING);
2288         spin_unlock(&lfsck->ml_lock);
2289         cfs_waitq_broadcast(&thread->t_ctl_waitq);
2290
2291         if (!cfs_list_empty(&lfsck->ml_list_scan) ||
2292             cfs_list_empty(&lfsck->ml_list_double_scan))
2293                 rc = mdd_lfsck_oit_engine(&env, lfsck);
2294         else
2295                 rc = 1;
2296
2297         CDEBUG(D_LFSCK, "LFSCK exit: oit_flags = 0x%x, dir_flags = 0x%x, "
2298                "oit_cookie = "LPU64", dir_cookie = "LPU64", parent = "DFID
2299                ", pid = %d, rc = %d\n", lfsck->ml_args_oit, lfsck->ml_args_dir,
2300                lfsck->ml_pos_current.lp_oit_cookie,
2301                lfsck->ml_pos_current.lp_dir_cookie,
2302                PFID(&lfsck->ml_pos_current.lp_dir_parent),
2303                cfs_curproc_pid(), rc);
2304
2305         if (lfsck->ml_paused && cfs_list_empty(&lfsck->ml_list_scan))
2306                 oit_iops->put(&env, oit_di);
2307
2308         rc = mdd_lfsck_post(&env, lfsck, rc);
2309         if (lfsck->ml_di_dir != NULL)
2310                 mdd_lfsck_close_dir(&env, lfsck);
2311
2312 fini_oit:
2313         spin_lock(&lfsck->ml_lock);
2314         lfsck->ml_di_oit = NULL;
2315         spin_unlock(&lfsck->ml_lock);
2316
2317         oit_iops->fini(&env, oit_di);
2318         if (rc == 1) {
2319                 if (!cfs_list_empty(&lfsck->ml_list_double_scan))
2320                         rc = mdd_lfsck_double_scan(&env, lfsck);
2321                 else
2322                         rc = 0;
2323         }
2324
2325         /* XXX: Purge the pinned objects in the future. */
2326
2327 fini_env:
2328         lu_env_fini(&env);
2329
2330 noenv:
2331         spin_lock(&lfsck->ml_lock);
2332         thread_set_flags(thread, SVC_STOPPED);
2333         cfs_waitq_broadcast(&thread->t_ctl_waitq);
2334         spin_unlock(&lfsck->ml_lock);
2335         return rc;
2336 }
2337
2338 /* external interfaces */
2339
2340 int mdd_lfsck_set_speed(const struct lu_env *env, struct md_lfsck *lfsck,
2341                         __u32 limit)
2342 {
2343         int rc;
2344
2345         mutex_lock(&lfsck->ml_mutex);
2346         __mdd_lfsck_set_speed(lfsck, limit);
2347         rc = mdd_lfsck_bookmark_store(env, lfsck);
2348         mutex_unlock(&lfsck->ml_mutex);
2349         return rc;
2350 }
2351
2352 int mdd_lfsck_dump(const struct lu_env *env, struct md_lfsck *lfsck,
2353                    __u16 type, char *buf, int len)
2354 {
2355         struct lfsck_component *com;
2356         int                     rc;
2357
2358         if (!lfsck->ml_initialized)
2359                 return -ENODEV;
2360
2361         com = mdd_lfsck_component_find(lfsck, type);
2362         if (com == NULL)
2363                 return -ENOTSUPP;
2364
2365         rc = com->lc_ops->lfsck_dump(env, com, buf, len);
2366         mdd_lfsck_component_put(env, com);
2367         return rc;
2368 }
2369
2370 int mdd_lfsck_start(const struct lu_env *env, struct md_lfsck *lfsck,
2371                     struct lfsck_start *start)
2372 {
2373         struct lfsck_bookmark  *bk     = &lfsck->ml_bookmark_ram;
2374         struct ptlrpc_thread   *thread = &lfsck->ml_thread;
2375         struct lfsck_component *com;
2376         struct l_wait_info      lwi    = { 0 };
2377         bool                    dirty  = false;
2378         int                     rc     = 0;
2379         __u16                   valid  = 0;
2380         __u16                   flags  = 0;
2381         ENTRY;
2382
2383         if (lfsck->ml_obj_oit == NULL)
2384                 RETURN(-ENOTSUPP);
2385
2386         /* start == NULL means auto trigger paused LFSCK. */
2387         if (start == NULL && cfs_list_empty(&lfsck->ml_list_scan))
2388                 RETURN(0);
2389
2390         mutex_lock(&lfsck->ml_mutex);
2391         spin_lock(&lfsck->ml_lock);
2392         if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
2393                 spin_unlock(&lfsck->ml_lock);
2394                 mutex_unlock(&lfsck->ml_mutex);
2395                 RETURN(-EALREADY);
2396         }
2397
2398         spin_unlock(&lfsck->ml_lock);
2399
2400         lfsck->ml_paused = 0;
2401         lfsck->ml_oit_over = 0;
2402         lfsck->ml_drop_dryrun = 0;
2403         lfsck->ml_new_scanned = 0;
2404
2405         /* For auto trigger. */
2406         if (start == NULL)
2407                 goto trigger;
2408
2409         start->ls_version = bk->lb_version;
2410         if (start->ls_valid & LSV_SPEED_LIMIT) {
2411                 __mdd_lfsck_set_speed(lfsck, start->ls_speed_limit);
2412                 dirty = true;
2413         }
2414
2415         if (start->ls_valid & LSV_ERROR_HANDLE) {
2416                 valid |= DOIV_ERROR_HANDLE;
2417                 if (start->ls_flags & LPF_FAILOUT)
2418                         flags |= DOIF_FAILOUT;
2419
2420                 if ((start->ls_flags & LPF_FAILOUT) &&
2421                     !(bk->lb_param & LPF_FAILOUT)) {
2422                         bk->lb_param |= LPF_FAILOUT;
2423                         dirty = true;
2424                 } else if (!(start->ls_flags & LPF_FAILOUT) &&
2425                            (bk->lb_param & LPF_FAILOUT)) {
2426                         bk->lb_param &= ~LPF_FAILOUT;
2427                         dirty = true;
2428                 }
2429         }
2430
2431         if (start->ls_valid & LSV_DRYRUN) {
2432                 if ((start->ls_flags & LPF_DRYRUN) &&
2433                     !(bk->lb_param & LPF_DRYRUN)) {
2434                         bk->lb_param |= LPF_DRYRUN;
2435                         dirty = true;
2436                 } else if (!(start->ls_flags & LPF_DRYRUN) &&
2437                            (bk->lb_param & LPF_DRYRUN)) {
2438                         bk->lb_param &= ~LPF_DRYRUN;
2439                         lfsck->ml_drop_dryrun = 1;
2440                         dirty = true;
2441                 }
2442         }
2443
2444         if (dirty) {
2445                 rc = mdd_lfsck_bookmark_store(env, lfsck);
2446                 if (rc != 0)
2447                         GOTO(out, rc);
2448         }
2449
2450         if (start->ls_flags & LPF_RESET)
2451                 flags |= DOIF_RESET;
2452
2453         if (start->ls_active != 0) {
2454                 struct lfsck_component *next;
2455                 __u16 type = 1;
2456
2457                 if (start->ls_active == LFSCK_TYPES_ALL)
2458                         start->ls_active = LFSCK_TYPES_SUPPORTED;
2459
2460                 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
2461                         start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
2462                         GOTO(out, rc = -ENOTSUPP);
2463                 }
2464
2465                 cfs_list_for_each_entry_safe(com, next,
2466                                              &lfsck->ml_list_scan, lc_link) {
2467                         if (!(com->lc_type & start->ls_active)) {
2468                                 rc = com->lc_ops->lfsck_post(env, com, 0);
2469                                 if (rc != 0)
2470                                         GOTO(out, rc);
2471                         }
2472                 }
2473
2474                 while (start->ls_active != 0) {
2475                         if (type & start->ls_active) {
2476                                 com = __mdd_lfsck_component_find(lfsck, type,
2477                                                         &lfsck->ml_list_idle);
2478                                 if (com != NULL) {
2479                                         /* The component status will be updated
2480                                          * when its prep() is called later by
2481                                          * the LFSCK main engine. */
2482                                         cfs_list_del_init(&com->lc_link);
2483                                         cfs_list_add_tail(&com->lc_link,
2484                                                           &lfsck->ml_list_scan);
2485                                 }
2486                                 start->ls_active &= ~type;
2487                         }
2488                         type <<= 1;
2489                 }
2490         }
2491
2492         cfs_list_for_each_entry(com, &lfsck->ml_list_scan, lc_link) {
2493                 start->ls_active |= com->lc_type;
2494                 if (flags & DOIF_RESET) {
2495                         rc = com->lc_ops->lfsck_reset(env, com, false);
2496                         if (rc != 0)
2497                                 GOTO(out, rc);
2498                 }
2499         }
2500
2501 trigger:
2502         lfsck->ml_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
2503         if (bk->lb_param & LPF_DRYRUN)
2504                 lfsck->ml_args_dir |= LUDA_VERIFY_DRYRUN;
2505
2506         if (bk->lb_param & LPF_FAILOUT) {
2507                 valid |= DOIV_ERROR_HANDLE;
2508                 flags |= DOIF_FAILOUT;
2509         }
2510
2511         if (!cfs_list_empty(&lfsck->ml_list_scan))
2512                 flags |= DOIF_OUTUSED;
2513
2514         lfsck->ml_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
2515         thread_set_flags(thread, 0);
2516         rc = cfs_create_thread(mdd_lfsck_main, lfsck, 0);
2517         if (rc < 0)
2518                 CERROR("%s: cannot start LFSCK thread, rc = %d\n",
2519                        mdd_lfsck2name(lfsck), rc);
2520         else
2521                 l_wait_event(thread->t_ctl_waitq,
2522                              thread_is_running(thread) ||
2523                              thread_is_stopped(thread),
2524                              &lwi);
2525
2526         GOTO(out, rc = 0);
2527
2528 out:
2529         mutex_unlock(&lfsck->ml_mutex);
2530         return (rc < 0 ? rc : 0);
2531 }
2532
2533 int mdd_lfsck_stop(const struct lu_env *env, struct md_lfsck *lfsck,
2534                    bool pause)
2535 {
2536         struct ptlrpc_thread *thread = &lfsck->ml_thread;
2537         struct l_wait_info    lwi    = { 0 };
2538         ENTRY;
2539
2540         mutex_lock(&lfsck->ml_mutex);
2541         spin_lock(&lfsck->ml_lock);
2542         if (thread_is_init(thread) || thread_is_stopped(thread)) {
2543                 spin_unlock(&lfsck->ml_lock);
2544                 mutex_unlock(&lfsck->ml_mutex);
2545                 RETURN(-EALREADY);
2546         }
2547
2548         if (pause)
2549                 lfsck->ml_paused = 1;
2550         thread_set_flags(thread, SVC_STOPPING);
2551         /* The LFSCK thread may be sleeping on low layer wait queue,
2552          * wake it up. */
2553         if (likely(lfsck->ml_di_oit != NULL))
2554                 lfsck->ml_obj_oit->do_index_ops->dio_it.put(env,
2555                                                             lfsck->ml_di_oit);
2556         spin_unlock(&lfsck->ml_lock);
2557
2558         cfs_waitq_broadcast(&thread->t_ctl_waitq);
2559         l_wait_event(thread->t_ctl_waitq,
2560                      thread_is_stopped(thread),
2561                      &lwi);
2562         mutex_unlock(&lfsck->ml_mutex);
2563
2564         RETURN(0);
2565 }
2566
2567 static const struct lu_fid lfsck_it_fid = { .f_seq = FID_SEQ_LOCAL_FILE,
2568                                             .f_oid = OTABLE_IT_OID,
2569                                             .f_ver = 0 };
2570
2571 int mdd_lfsck_setup(const struct lu_env *env, struct mdd_device *mdd)
2572 {
2573         struct md_lfsck  *lfsck = &mdd->mdd_lfsck;
2574         struct dt_object *obj;
2575         int               rc;
2576         ENTRY;
2577
2578         LASSERT(!lfsck->ml_initialized);
2579
2580         lfsck->ml_initialized = 1;
2581         mutex_init(&lfsck->ml_mutex);
2582         spin_lock_init(&lfsck->ml_lock);
2583         CFS_INIT_LIST_HEAD(&lfsck->ml_list_scan);
2584         CFS_INIT_LIST_HEAD(&lfsck->ml_list_dir);
2585         CFS_INIT_LIST_HEAD(&lfsck->ml_list_double_scan);
2586         CFS_INIT_LIST_HEAD(&lfsck->ml_list_idle);
2587         cfs_waitq_init(&lfsck->ml_thread.t_ctl_waitq);
2588
2589         obj = dt_locate(env, mdd->mdd_bottom, &lfsck_it_fid);
2590         if (IS_ERR(obj))
2591                 RETURN(PTR_ERR(obj));
2592
2593         lfsck->ml_obj_oit = obj;
2594         rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
2595         if (rc != 0) {
2596                 if (rc == -ENOTSUPP)
2597                         rc = 0;
2598
2599                 RETURN(rc);
2600         }
2601
2602         obj = dt_store_open(env, mdd->mdd_bottom, "", lfsck_bookmark_name,
2603                             &mdd_env_info(env)->mti_fid);
2604         if (IS_ERR(obj))
2605                 RETURN(PTR_ERR(obj));
2606
2607         lfsck->ml_bookmark_obj = obj;
2608         rc = mdd_lfsck_bookmark_load(env, lfsck);
2609         if (rc == -ENODATA)
2610                 rc = mdd_lfsck_bookmark_init(env, lfsck);
2611         if (rc != 0)
2612                 RETURN(rc);
2613
2614         rc = mdd_lfsck_namespace_setup(env, lfsck);
2615         /* XXX: LFSCK components initialization to be added here. */
2616
2617         RETURN(rc);
2618 }
2619
2620 void mdd_lfsck_cleanup(const struct lu_env *env, struct mdd_device *mdd)
2621 {
2622         struct md_lfsck         *lfsck  = &mdd->mdd_lfsck;
2623         struct ptlrpc_thread    *thread = &lfsck->ml_thread;
2624         struct lfsck_component  *com;
2625
2626         if (!lfsck->ml_initialized)
2627                 return;
2628
2629         LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
2630
2631         if (lfsck->ml_obj_oit != NULL) {
2632                 lu_object_put(env, &lfsck->ml_obj_oit->do_lu);
2633                 lfsck->ml_obj_oit = NULL;
2634         }
2635
2636         LASSERT(lfsck->ml_obj_dir == NULL);
2637
2638         if (lfsck->ml_bookmark_obj != NULL) {
2639                 lu_object_put(env, &lfsck->ml_bookmark_obj->do_lu);
2640                 lfsck->ml_bookmark_obj = NULL;
2641         }
2642
2643         while (!cfs_list_empty(&lfsck->ml_list_scan)) {
2644                 com = cfs_list_entry(lfsck->ml_list_scan.next,
2645                                      struct lfsck_component,
2646                                      lc_link);
2647                 mdd_lfsck_component_cleanup(env, com);
2648         }
2649
2650         LASSERT(cfs_list_empty(&lfsck->ml_list_dir));
2651
2652         while (!cfs_list_empty(&lfsck->ml_list_double_scan)) {
2653                 com = cfs_list_entry(lfsck->ml_list_double_scan.next,
2654                                      struct lfsck_component,
2655                                      lc_link);
2656                 mdd_lfsck_component_cleanup(env, com);
2657         }
2658
2659         while (!cfs_list_empty(&lfsck->ml_list_idle)) {
2660                 com = cfs_list_entry(lfsck->ml_list_idle.next,
2661                                      struct lfsck_component,
2662                                      lc_link);
2663                 mdd_lfsck_component_cleanup(env, com);
2664         }
2665 }