Whamcloud - gitweb
430522b0b0d4742814135b0154ad4205cba04441
[fs/lustre-release.git] / lustre / lfsck / lfsck_lib.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, 2013, Intel Corporation.
24  */
25 /*
26  * lustre/lfsck/lfsck_lib.c
27  *
28  * Author: Fan, Yong <fan.yong@intel.com>
29  */
30
31 #define DEBUG_SUBSYSTEM S_LFSCK
32
33 #include <libcfs/list.h>
34 #include <lu_object.h>
35 #include <dt_object.h>
36 #include <md_object.h>
37 #include <lustre_fld.h>
38 #include <lustre_lib.h>
39 #include <lustre_net.h>
40 #include <lustre_lfsck.h>
41 #include <lustre/lustre_lfsck_user.h>
42
43 #include "lfsck_internal.h"
44
45 /* define lfsck thread key */
46 LU_KEY_INIT(lfsck, struct lfsck_thread_info);
47
48 static void lfsck_key_fini(const struct lu_context *ctx,
49                            struct lu_context_key *key, void *data)
50 {
51         struct lfsck_thread_info *info = data;
52
53         lu_buf_free(&info->lti_linkea_buf);
54         lu_buf_free(&info->lti_big_buf);
55         OBD_FREE_PTR(info);
56 }
57
58 LU_CONTEXT_KEY_DEFINE(lfsck, LCT_MD_THREAD | LCT_DT_THREAD);
59 LU_KEY_INIT_GENERIC(lfsck);
60
61 static CFS_LIST_HEAD(lfsck_instance_list);
62 static struct list_head lfsck_ost_orphan_list;
63 static struct list_head lfsck_mdt_orphan_list;
64 static DEFINE_SPINLOCK(lfsck_instance_lock);
65
66 static const char *lfsck_status_names[] = {
67         [LS_INIT]               = "init",
68         [LS_SCANNING_PHASE1]    = "scanning-phase1",
69         [LS_SCANNING_PHASE2]    = "scanning-phase2",
70         [LS_COMPLETED]          = "completed",
71         [LS_FAILED]             = "failed",
72         [LS_STOPPED]            = "stopped",
73         [LS_PAUSED]             = "paused",
74         [LS_CRASHED]            = "crashed",
75         [LS_PARTIAL]            = "partial",
76         [LS_CO_FAILED]          = "co-failed",
77         [LS_CO_STOPPED]         = "co-stopped",
78         [LS_CO_PAUSED]          = "co-paused"
79 };
80
81 const char *lfsck_flags_names[] = {
82         "scanned-once",
83         "inconsistent",
84         "upgrade",
85         "incomplete",
86         "crashed_lastid",
87         NULL
88 };
89
90 const char *lfsck_param_names[] = {
91         NULL,
92         "failout",
93         "dryrun",
94         "all_targets",
95         "broadcast",
96         "orphan",
97         "create_ostobj",
98         NULL
99 };
100
101 const char *lfsck_status2names(enum lfsck_status status)
102 {
103         if (unlikely(status < 0 || status >= LS_MAX))
104                 return "unknown";
105
106         return lfsck_status_names[status];
107 }
108
109 static int lfsck_tgt_descs_init(struct lfsck_tgt_descs *ltds)
110 {
111         spin_lock_init(&ltds->ltd_lock);
112         init_rwsem(&ltds->ltd_rw_sem);
113         INIT_LIST_HEAD(&ltds->ltd_orphan);
114         ltds->ltd_tgts_bitmap = CFS_ALLOCATE_BITMAP(BITS_PER_LONG);
115         if (ltds->ltd_tgts_bitmap == NULL)
116                 return -ENOMEM;
117
118         return 0;
119 }
120
121 static void lfsck_tgt_descs_fini(struct lfsck_tgt_descs *ltds)
122 {
123         struct lfsck_tgt_desc   *ltd;
124         struct lfsck_tgt_desc   *next;
125         int                      idx;
126
127         down_write(&ltds->ltd_rw_sem);
128
129         list_for_each_entry_safe(ltd, next, &ltds->ltd_orphan,
130                                  ltd_orphan_list) {
131                 list_del_init(&ltd->ltd_orphan_list);
132                 lfsck_tgt_put(ltd);
133         }
134
135         if (unlikely(ltds->ltd_tgts_bitmap == NULL)) {
136                 up_write(&ltds->ltd_rw_sem);
137
138                 return;
139         }
140
141         cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
142                 ltd = LTD_TGT(ltds, idx);
143                 if (likely(ltd != NULL)) {
144                         LASSERT(list_empty(&ltd->ltd_layout_list));
145                         LASSERT(list_empty(&ltd->ltd_layout_phase_list));
146
147                         ltds->ltd_tgtnr--;
148                         cfs_bitmap_clear(ltds->ltd_tgts_bitmap, idx);
149                         LTD_TGT(ltds, idx) = NULL;
150                         lfsck_tgt_put(ltd);
151                 }
152         }
153
154         LASSERTF(ltds->ltd_tgtnr == 0, "tgt count unmatched: %d\n",
155                  ltds->ltd_tgtnr);
156
157         for (idx = 0; idx < TGT_PTRS; idx++) {
158                 if (ltds->ltd_tgts_idx[idx] != NULL) {
159                         OBD_FREE_PTR(ltds->ltd_tgts_idx[idx]);
160                         ltds->ltd_tgts_idx[idx] = NULL;
161                 }
162         }
163
164         CFS_FREE_BITMAP(ltds->ltd_tgts_bitmap);
165         ltds->ltd_tgts_bitmap = NULL;
166         up_write(&ltds->ltd_rw_sem);
167 }
168
169 static int __lfsck_add_target(const struct lu_env *env,
170                               struct lfsck_instance *lfsck,
171                               struct lfsck_tgt_desc *ltd,
172                               bool for_ost, bool locked)
173 {
174         struct lfsck_tgt_descs *ltds;
175         __u32                   index = ltd->ltd_index;
176         int                     rc    = 0;
177         ENTRY;
178
179         if (for_ost)
180                 ltds = &lfsck->li_ost_descs;
181         else
182                 ltds = &lfsck->li_mdt_descs;
183
184         if (!locked)
185                 down_write(&ltds->ltd_rw_sem);
186
187         LASSERT(ltds->ltd_tgts_bitmap != NULL);
188
189         if (index >= ltds->ltd_tgts_bitmap->size) {
190                 __u32 newsize = max((__u32)ltds->ltd_tgts_bitmap->size,
191                                     (__u32)BITS_PER_LONG);
192                 cfs_bitmap_t *old_bitmap = ltds->ltd_tgts_bitmap;
193                 cfs_bitmap_t *new_bitmap;
194
195                 while (newsize < index + 1)
196                         newsize <<= 1;
197
198                 new_bitmap = CFS_ALLOCATE_BITMAP(newsize);
199                 if (new_bitmap == NULL)
200                         GOTO(unlock, rc = -ENOMEM);
201
202                 if (ltds->ltd_tgtnr > 0)
203                         cfs_bitmap_copy(new_bitmap, old_bitmap);
204                 ltds->ltd_tgts_bitmap = new_bitmap;
205                 CFS_FREE_BITMAP(old_bitmap);
206         }
207
208         if (cfs_bitmap_check(ltds->ltd_tgts_bitmap, index)) {
209                 CERROR("%s: the device %s (%u) is registered already\n",
210                        lfsck_lfsck2name(lfsck),
211                        ltd->ltd_tgt->dd_lu_dev.ld_obd->obd_name, index);
212                 GOTO(unlock, rc = -EEXIST);
213         }
214
215         if (ltds->ltd_tgts_idx[index / TGT_PTRS_PER_BLOCK] == NULL) {
216                 OBD_ALLOC_PTR(ltds->ltd_tgts_idx[index / TGT_PTRS_PER_BLOCK]);
217                 if (ltds->ltd_tgts_idx[index / TGT_PTRS_PER_BLOCK] == NULL)
218                         GOTO(unlock, rc = -ENOMEM);
219         }
220
221         LTD_TGT(ltds, index) = ltd;
222         cfs_bitmap_set(ltds->ltd_tgts_bitmap, index);
223         ltds->ltd_tgtnr++;
224
225         GOTO(unlock, rc = 0);
226
227 unlock:
228         if (!locked)
229                 up_write(&ltds->ltd_rw_sem);
230
231         return rc;
232 }
233
234 static int lfsck_add_target_from_orphan(const struct lu_env *env,
235                                         struct lfsck_instance *lfsck)
236 {
237         struct lfsck_tgt_descs  *ltds    = &lfsck->li_ost_descs;
238         struct lfsck_tgt_desc   *ltd;
239         struct lfsck_tgt_desc   *next;
240         struct list_head        *head    = &lfsck_ost_orphan_list;
241         int                      rc;
242         bool                     for_ost = true;
243
244 again:
245         spin_lock(&lfsck_instance_lock);
246         list_for_each_entry_safe(ltd, next, head, ltd_orphan_list) {
247                 if (ltd->ltd_key == lfsck->li_bottom) {
248                         list_del_init(&ltd->ltd_orphan_list);
249                         list_add_tail(&ltd->ltd_orphan_list,
250                                       &ltds->ltd_orphan);
251                 }
252         }
253         spin_unlock(&lfsck_instance_lock);
254
255         down_write(&ltds->ltd_rw_sem);
256         while (!list_empty(&ltds->ltd_orphan)) {
257                 ltd = list_entry(ltds->ltd_orphan.next,
258                                  struct lfsck_tgt_desc,
259                                  ltd_orphan_list);
260                 list_del_init(&ltd->ltd_orphan_list);
261                 rc = __lfsck_add_target(env, lfsck, ltd, for_ost, true);
262                 /* Do not hold the semaphore for too long time. */
263                 up_write(&ltds->ltd_rw_sem);
264                 if (rc != 0)
265                         return rc;
266
267                 down_write(&ltds->ltd_rw_sem);
268         }
269         up_write(&ltds->ltd_rw_sem);
270
271         if (for_ost) {
272                 ltds = &lfsck->li_mdt_descs;
273                 head = &lfsck_mdt_orphan_list;
274                 for_ost = false;
275                 goto again;
276         }
277
278         return 0;
279 }
280
281 static inline struct lfsck_component *
282 __lfsck_component_find(struct lfsck_instance *lfsck, __u16 type, cfs_list_t *list)
283 {
284         struct lfsck_component *com;
285
286         cfs_list_for_each_entry(com, list, lc_link) {
287                 if (com->lc_type == type)
288                         return com;
289         }
290         return NULL;
291 }
292
293 struct lfsck_component *
294 lfsck_component_find(struct lfsck_instance *lfsck, __u16 type)
295 {
296         struct lfsck_component *com;
297
298         spin_lock(&lfsck->li_lock);
299         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_scan);
300         if (com != NULL)
301                 goto unlock;
302
303         com = __lfsck_component_find(lfsck, type,
304                                      &lfsck->li_list_double_scan);
305         if (com != NULL)
306                 goto unlock;
307
308         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_idle);
309
310 unlock:
311         if (com != NULL)
312                 lfsck_component_get(com);
313         spin_unlock(&lfsck->li_lock);
314         return com;
315 }
316
317 void lfsck_component_cleanup(const struct lu_env *env,
318                              struct lfsck_component *com)
319 {
320         if (!cfs_list_empty(&com->lc_link))
321                 cfs_list_del_init(&com->lc_link);
322         if (!cfs_list_empty(&com->lc_link_dir))
323                 cfs_list_del_init(&com->lc_link_dir);
324
325         lfsck_component_put(env, com);
326 }
327
328 int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck,
329                     struct lu_fid *fid, bool locked)
330 {
331         struct lfsck_bookmark   *bk = &lfsck->li_bookmark_ram;
332         int                      rc = 0;
333         ENTRY;
334
335         if (!locked)
336                 mutex_lock(&lfsck->li_mutex);
337
338         rc = seq_client_alloc_fid(env, lfsck->li_seq, fid);
339         if (rc >= 0) {
340                 bk->lb_last_fid = *fid;
341                 /* We do not care about whether the subsequent sub-operations
342                  * failed or not. The worst case is that one FID is lost that
343                  * is not a big issue for the LFSCK since it is relative rare
344                  * for LFSCK create. */
345                 rc = lfsck_bookmark_store(env, lfsck);
346         }
347
348         if (!locked)
349                 mutex_unlock(&lfsck->li_mutex);
350
351         RETURN(rc);
352 }
353
354 static const char dot[] = ".";
355 static const char dotdot[] = "..";
356 static const char dotlustre[] = ".lustre";
357 static const char lostfound[] = "lost+found";
358
359 static int lfsck_create_lpf_local(const struct lu_env *env,
360                                   struct lfsck_instance *lfsck,
361                                   struct dt_object *parent,
362                                   struct dt_object *child,
363                                   struct lu_attr *la,
364                                   struct dt_object_format *dof,
365                                   const char *name)
366 {
367         struct dt_insert_rec    *rec    = &lfsck_env_info(env)->lti_dt_rec;
368         struct dt_device        *dev    = lfsck->li_bottom;
369         struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
370         struct dt_object        *bk_obj = lfsck->li_bookmark_obj;
371         const struct lu_fid     *cfid   = lfsck_dto2fid(child);
372         struct thandle          *th     = NULL;
373         struct linkea_data       ldata  = { 0 };
374         struct lu_buf            linkea_buf;
375         const struct lu_name    *cname;
376         loff_t                   pos    = 0;
377         int                      len    = sizeof(struct lfsck_bookmark);
378         int                      rc;
379         ENTRY;
380
381         rc = linkea_data_new(&ldata,
382                              &lfsck_env_info(env)->lti_linkea_buf);
383         if (rc != 0)
384                 RETURN(rc);
385
386         cname = lfsck_name_get_const(env, name, strlen(name));
387         rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent));
388         if (rc != 0)
389                 RETURN(rc);
390
391         th = dt_trans_create(env, dev);
392         if (IS_ERR(th))
393                 RETURN(PTR_ERR(th));
394
395         /* 1a. create child */
396         rc = dt_declare_create(env, child, la, NULL, dof, th);
397         if (rc != 0)
398                 GOTO(stop, rc);
399
400         /* 2a. increase child nlink */
401         rc = dt_declare_ref_add(env, child, th);
402         if (rc != 0)
403                 GOTO(stop, rc);
404
405         /* 3a. insert linkEA for child */
406         linkea_buf.lb_buf = ldata.ld_buf->lb_buf;
407         linkea_buf.lb_len = ldata.ld_leh->leh_len;
408         rc = dt_declare_xattr_set(env, child, &linkea_buf,
409                                   XATTR_NAME_LINK, 0, th);
410         if (rc != 0)
411                 GOTO(stop, rc);
412
413         /* 4a. insert name into parent dir */
414         rec->rec_type = S_IFDIR;
415         rec->rec_fid = cfid;
416         rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
417                                (const struct dt_key *)name, th);
418         if (rc != 0)
419                 GOTO(stop, rc);
420
421         /* 5a. increase parent nlink */
422         rc = dt_declare_ref_add(env, parent, th);
423         if (rc != 0)
424                 GOTO(stop, rc);
425
426         /* 6a. update bookmark */
427         rc = dt_declare_record_write(env, bk_obj,
428                                      lfsck_buf_get(env, bk, len), 0, th);
429         if (rc != 0)
430                 GOTO(stop, rc);
431
432         rc = dt_trans_start_local(env, dev, th);
433         if (rc != 0)
434                 GOTO(stop, rc);
435
436         dt_write_lock(env, child, 0);
437         /* 1b.1. create child */
438         rc = dt_create(env, child, la, NULL, dof, th);
439         if (rc != 0)
440                 GOTO(unlock, rc);
441
442         if (unlikely(!dt_try_as_dir(env, child)))
443                 GOTO(unlock, rc = -ENOTDIR);
444
445         /* 1b.2. insert dot into child dir */
446         rec->rec_fid = cfid;
447         rc = dt_insert(env, child, (const struct dt_rec *)rec,
448                        (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
449         if (rc != 0)
450                 GOTO(unlock, rc);
451
452         /* 1b.3. insert dotdot into child dir */
453         rec->rec_fid = &LU_LPF_FID;
454         rc = dt_insert(env, child, (const struct dt_rec *)rec,
455                        (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1);
456         if (rc != 0)
457                 GOTO(unlock, rc);
458
459         /* 2b. increase child nlink */
460         rc = dt_ref_add(env, child, th);
461         if (rc != 0)
462                 GOTO(unlock, rc);
463
464         /* 3b. insert linkEA for child. */
465         rc = dt_xattr_set(env, child, &linkea_buf,
466                           XATTR_NAME_LINK, 0, th, BYPASS_CAPA);
467         dt_write_unlock(env, child);
468         if (rc != 0)
469                 GOTO(stop, rc);
470
471         /* 4b. insert name into parent dir */
472         rec->rec_fid = cfid;
473         rc = dt_insert(env, parent, (const struct dt_rec *)rec,
474                        (const struct dt_key *)name, th, BYPASS_CAPA, 1);
475         if (rc != 0)
476                 GOTO(stop, rc);
477
478         dt_write_lock(env, parent, 0);
479         /* 5b. increase parent nlink */
480         rc = dt_ref_add(env, parent, th);
481         dt_write_unlock(env, parent);
482         if (rc != 0)
483                 GOTO(stop, rc);
484
485         bk->lb_lpf_fid = *cfid;
486         lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk);
487
488         /* 6b. update bookmark */
489         rc = dt_record_write(env, bk_obj,
490                              lfsck_buf_get(env, bk, len), &pos, th);
491
492         GOTO(stop, rc);
493
494 unlock:
495         dt_write_unlock(env, child);
496
497 stop:
498         dt_trans_stop(env, dev, th);
499
500         return rc;
501 }
502
503 static int lfsck_create_lpf_remote(const struct lu_env *env,
504                                    struct lfsck_instance *lfsck,
505                                    struct dt_object *parent,
506                                    struct dt_object *child,
507                                    struct lu_attr *la,
508                                    struct dt_object_format *dof,
509                                    const char *name)
510 {
511         struct dt_insert_rec    *rec    = &lfsck_env_info(env)->lti_dt_rec;
512         struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
513         struct dt_object        *bk_obj = lfsck->li_bookmark_obj;
514         const struct lu_fid     *cfid   = lfsck_dto2fid(child);
515         struct thandle          *th     = NULL;
516         struct linkea_data       ldata  = { 0 };
517         struct lu_buf            linkea_buf;
518         const struct lu_name    *cname;
519         struct dt_device        *dev;
520         loff_t                   pos    = 0;
521         int                      len    = sizeof(struct lfsck_bookmark);
522         int                      rc;
523         ENTRY;
524
525         rc = linkea_data_new(&ldata,
526                              &lfsck_env_info(env)->lti_linkea_buf);
527         if (rc != 0)
528                 RETURN(rc);
529
530         cname = lfsck_name_get_const(env, name, strlen(name));
531         rc = linkea_add_buf(&ldata, cname, lfsck_dto2fid(parent));
532         if (rc != 0)
533                 RETURN(rc);
534
535         /* Create .lustre/lost+found/MDTxxxx. */
536
537         /* XXX: Currently, cross-MDT create operation needs to create the child
538          *      object firstly, then insert name into the parent directory. For
539          *      this case, the child object resides on current MDT (local), but
540          *      the parent ".lustre/lost+found" may be on remote MDT. It is not
541          *      easy to contain all the sub-modifications orderly within single
542          *      transaction.
543          *
544          *      To avoid more inconsistency, we split the create operation into
545          *      two transactions:
546          *
547          *      1) create the child and update the lfsck_bookmark::lb_lpf_fid
548          *         locally.
549          *      2) insert the name "MDTXXXX" in the parent ".lustre/lost+found"
550          *         remotely.
551          *
552          *      If 1) done, but 2) failed, then go ahead, the LFSCK will try to
553          *      repair such inconsistency when LFSCK run next time. */
554
555         /* Transaction I: locally */
556
557         dev = lfsck->li_bottom;
558         th = dt_trans_create(env, dev);
559         if (IS_ERR(th))
560                 RETURN(PTR_ERR(th));
561
562         /* 1a. create child */
563         rc = dt_declare_create(env, child, la, NULL, dof, th);
564         if (rc != 0)
565                 GOTO(stop, rc);
566
567         /* 2a. increase child nlink */
568         rc = dt_declare_ref_add(env, child, th);
569         if (rc != 0)
570                 GOTO(stop, rc);
571
572         /* 3a. insert linkEA for child */
573         linkea_buf.lb_buf = ldata.ld_buf->lb_buf;
574         linkea_buf.lb_len = ldata.ld_leh->leh_len;
575         rc = dt_declare_xattr_set(env, child, &linkea_buf,
576                                   XATTR_NAME_LINK, 0, th);
577         if (rc != 0)
578                 GOTO(stop, rc);
579
580         /* 4a. update bookmark */
581         rc = dt_declare_record_write(env, bk_obj,
582                                      lfsck_buf_get(env, bk, len), 0, th);
583         if (rc != 0)
584                 GOTO(stop, rc);
585
586         rc = dt_trans_start_local(env, dev, th);
587         if (rc != 0)
588                 GOTO(stop, rc);
589
590         dt_write_lock(env, child, 0);
591         /* 1b.1. create child */
592         rc = dt_create(env, child, la, NULL, dof, th);
593         if (rc != 0)
594                 GOTO(unlock, rc);
595
596         if (unlikely(!dt_try_as_dir(env, child)))
597                 GOTO(unlock, rc = -ENOTDIR);
598
599         /* 1b.2. insert dot into child dir */
600         rec->rec_type = S_IFDIR;
601         rec->rec_fid = cfid;
602         rc = dt_insert(env, child, (const struct dt_rec *)rec,
603                        (const struct dt_key *)dot, th, BYPASS_CAPA, 1);
604         if (rc != 0)
605                 GOTO(unlock, rc);
606
607         /* 1b.3. insert dotdot into child dir */
608         rec->rec_fid = &LU_LPF_FID;
609         rc = dt_insert(env, child, (const struct dt_rec *)rec,
610                        (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1);
611         if (rc != 0)
612                 GOTO(unlock, rc);
613
614         /* 2b. increase child nlink */
615         rc = dt_ref_add(env, child, th);
616         if (rc != 0)
617                 GOTO(unlock, rc);
618
619         /* 3b. insert linkEA for child */
620         rc = dt_xattr_set(env, child, &linkea_buf,
621                           XATTR_NAME_LINK, 0, th, BYPASS_CAPA);
622         if (rc != 0)
623                 GOTO(unlock, rc);
624
625         bk->lb_lpf_fid = *cfid;
626         lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk);
627
628         /* 4b. update bookmark */
629         rc = dt_record_write(env, bk_obj,
630                              lfsck_buf_get(env, bk, len), &pos, th);
631
632         dt_write_unlock(env, child);
633         dt_trans_stop(env, dev, th);
634         if (rc != 0)
635                 RETURN(rc);
636
637         /* Transaction II: remotely */
638
639         dev = lfsck->li_next;
640         th = dt_trans_create(env, dev);
641         if (IS_ERR(th))
642                 RETURN(PTR_ERR(th));
643
644         /* 5a. insert name into parent dir */
645         rec->rec_fid = cfid;
646         rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
647                                (const struct dt_key *)name, th);
648         if (rc != 0)
649                 GOTO(stop, rc);
650
651         /* 6a. increase parent nlink */
652         rc = dt_declare_ref_add(env, parent, th);
653         if (rc != 0)
654                 GOTO(stop, rc);
655
656         rc = dt_trans_start(env, dev, th);
657         if (rc != 0)
658                 GOTO(stop, rc);
659
660         /* 5b. insert name into parent dir */
661         rc = dt_insert(env, parent, (const struct dt_rec *)rec,
662                        (const struct dt_key *)name, th, BYPASS_CAPA, 1);
663         if (rc != 0)
664                 GOTO(stop, rc);
665
666         dt_write_lock(env, parent, 0);
667         /* 6b. increase parent nlink */
668         rc = dt_ref_add(env, parent, th);
669         dt_write_unlock(env, parent);
670
671         GOTO(stop, rc);
672
673 unlock:
674         dt_write_unlock(env, child);
675 stop:
676         dt_trans_stop(env, dev, th);
677
678         if (rc != 0 && dev == lfsck->li_next)
679                 CDEBUG(D_LFSCK, "%s: partially created the object "DFID
680                        "for orphans, but failed to insert the name %s "
681                        "to the .lustre/lost+found/. Such inconsistency "
682                        "will be repaired when LFSCK run next time: rc = %d\n",
683                        lfsck_lfsck2name(lfsck), PFID(cfid), name, rc);
684
685         return rc;
686 }
687
688 /* Do NOT create .lustre/lost+found/MDTxxxx when register the lfsck instance,
689  * because the MDT0 maybe not reaady for sequence allocation yet. We do that
690  * only when it is required, such as orphan OST-objects repairing. */
691 int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck)
692 {
693         struct lfsck_bookmark    *bk    = &lfsck->li_bookmark_ram;
694         struct lfsck_thread_info *info  = lfsck_env_info(env);
695         struct lu_fid            *cfid  = &info->lti_fid2;
696         struct lu_attr           *la    = &info->lti_la;
697         struct dt_object_format  *dof   = &info->lti_dof;
698         struct dt_object         *parent = NULL;
699         struct dt_object         *child = NULL;
700         char                      name[8];
701         int                       node  = lfsck_dev_idx(lfsck->li_bottom);
702         int                       rc    = 0;
703         ENTRY;
704
705         LASSERT(lfsck->li_master);
706
707         sprintf(name, "MDT%04x", node);
708         if (node == 0) {
709                 parent = lfsck_object_find_by_dev(env, lfsck->li_bottom,
710                                                   &LU_LPF_FID);
711         } else {
712                 struct lfsck_tgt_desc *ltd;
713
714                 ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, 0);
715                 if (unlikely(ltd == NULL))
716                         RETURN(-ENXIO);
717
718                 parent = lfsck_object_find_by_dev(env, ltd->ltd_tgt,
719                                                   &LU_LPF_FID);
720                 lfsck_tgt_put(ltd);
721         }
722         if (IS_ERR(parent))
723                 RETURN(PTR_ERR(parent));
724
725         if (unlikely(!dt_try_as_dir(env, parent)))
726                 GOTO(out, rc = -ENOTDIR);
727
728         mutex_lock(&lfsck->li_mutex);
729         if (lfsck->li_lpf_obj != NULL)
730                 GOTO(unlock, rc = 0);
731
732         if (fid_is_zero(&bk->lb_lpf_fid)) {
733                 /* There is corner case that: in former LFSCK scanning we have
734                  * created the .lustre/lost+found/MDTxxxx but failed to update
735                  * the lfsck_bookmark::lb_lpf_fid successfully. So need lookup
736                  * it from MDT0 firstly. */
737                 rc = dt_lookup(env, parent, (struct dt_rec *)cfid,
738                                (const struct dt_key *)name, BYPASS_CAPA);
739                 if (rc != 0 && rc != -ENOENT)
740                         GOTO(unlock, rc);
741
742                 if (rc == 0) {
743                         bk->lb_lpf_fid = *cfid;
744                         rc = lfsck_bookmark_store(env, lfsck);
745                 } else {
746                         rc = lfsck_fid_alloc(env, lfsck, cfid, true);
747                 }
748                 if (rc != 0)
749                         GOTO(unlock, rc);
750         } else {
751                 *cfid = bk->lb_lpf_fid;
752         }
753
754         child = lfsck_object_find_by_dev(env, lfsck->li_bottom, cfid);
755         if (IS_ERR(child))
756                 GOTO(unlock, rc = PTR_ERR(child));
757
758         if (dt_object_exists(child) != 0) {
759                 if (unlikely(!dt_try_as_dir(env, child)))
760                         rc = -ENOTDIR;
761                 else
762                         lfsck->li_lpf_obj = child;
763
764                 GOTO(unlock, rc);
765         }
766
767         memset(la, 0, sizeof(*la));
768         la->la_atime = la->la_mtime = la->la_ctime = cfs_time_current_sec();
769         la->la_mode = S_IFDIR | S_IRWXU;
770         la->la_valid = LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE |
771                        LA_UID | LA_GID;
772         memset(dof, 0, sizeof(*dof));
773         dof->dof_type = dt_mode_to_dft(S_IFDIR);
774
775         if (node == 0)
776                 rc = lfsck_create_lpf_local(env, lfsck, parent, child, la,
777                                             dof, name);
778         else
779                 rc = lfsck_create_lpf_remote(env, lfsck, parent, child, la,
780                                              dof, name);
781         if (rc == 0)
782                 lfsck->li_lpf_obj = child;
783
784         GOTO(unlock, rc);
785
786 unlock:
787         mutex_unlock(&lfsck->li_mutex);
788         if (rc != 0 && child != NULL && !IS_ERR(child))
789                 lu_object_put(env, &child->do_lu);
790 out:
791         if (parent != NULL && !IS_ERR(parent))
792                 lu_object_put(env, &parent->do_lu);
793
794         return rc;
795 }
796
797 static int lfsck_fid_init(struct lfsck_instance *lfsck)
798 {
799         struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
800         struct seq_server_site  *ss;
801         char                    *prefix;
802         int                      rc     = 0;
803         ENTRY;
804
805         ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site);
806         if (unlikely(ss == NULL))
807                 RETURN(-ENXIO);
808
809         OBD_ALLOC_PTR(lfsck->li_seq);
810         if (lfsck->li_seq == NULL)
811                 RETURN(-ENOMEM);
812
813         OBD_ALLOC(prefix, MAX_OBD_NAME + 7);
814         if (prefix == NULL)
815                 GOTO(out, rc = -ENOMEM);
816
817         snprintf(prefix, MAX_OBD_NAME + 7, "lfsck-%s", lfsck_lfsck2name(lfsck));
818         rc = seq_client_init(lfsck->li_seq, NULL, LUSTRE_SEQ_METADATA, prefix,
819                              ss->ss_server_seq);
820         OBD_FREE(prefix, MAX_OBD_NAME + 7);
821         if (rc != 0)
822                 GOTO(out, rc);
823
824         if (fid_is_sane(&bk->lb_last_fid))
825                 lfsck->li_seq->lcs_fid = bk->lb_last_fid;
826
827         RETURN(0);
828
829 out:
830         OBD_FREE_PTR(lfsck->li_seq);
831         lfsck->li_seq = NULL;
832
833         return rc;
834 }
835
836 static void lfsck_fid_fini(struct lfsck_instance *lfsck)
837 {
838         if (lfsck->li_seq != NULL) {
839                 seq_client_fini(lfsck->li_seq);
840                 OBD_FREE_PTR(lfsck->li_seq);
841                 lfsck->li_seq = NULL;
842         }
843 }
844
845 void lfsck_instance_cleanup(const struct lu_env *env,
846                             struct lfsck_instance *lfsck)
847 {
848         struct ptlrpc_thread    *thread = &lfsck->li_thread;
849         struct lfsck_component  *com;
850         ENTRY;
851
852         LASSERT(list_empty(&lfsck->li_link));
853         LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
854
855         if (lfsck->li_obj_oit != NULL) {
856                 lu_object_put_nocache(env, &lfsck->li_obj_oit->do_lu);
857                 lfsck->li_obj_oit = NULL;
858         }
859
860         LASSERT(lfsck->li_obj_dir == NULL);
861
862         while (!cfs_list_empty(&lfsck->li_list_scan)) {
863                 com = cfs_list_entry(lfsck->li_list_scan.next,
864                                      struct lfsck_component,
865                                      lc_link);
866                 lfsck_component_cleanup(env, com);
867         }
868
869         LASSERT(cfs_list_empty(&lfsck->li_list_dir));
870
871         while (!cfs_list_empty(&lfsck->li_list_double_scan)) {
872                 com = cfs_list_entry(lfsck->li_list_double_scan.next,
873                                      struct lfsck_component,
874                                      lc_link);
875                 lfsck_component_cleanup(env, com);
876         }
877
878         while (!cfs_list_empty(&lfsck->li_list_idle)) {
879                 com = cfs_list_entry(lfsck->li_list_idle.next,
880                                      struct lfsck_component,
881                                      lc_link);
882                 lfsck_component_cleanup(env, com);
883         }
884
885         lfsck_tgt_descs_fini(&lfsck->li_ost_descs);
886         lfsck_tgt_descs_fini(&lfsck->li_mdt_descs);
887
888         if (lfsck->li_bookmark_obj != NULL) {
889                 lu_object_put_nocache(env, &lfsck->li_bookmark_obj->do_lu);
890                 lfsck->li_bookmark_obj = NULL;
891         }
892
893         if (lfsck->li_lpf_obj != NULL) {
894                 lu_object_put(env, &lfsck->li_lpf_obj->do_lu);
895                 lfsck->li_lpf_obj = NULL;
896         }
897
898         if (lfsck->li_los != NULL) {
899                 local_oid_storage_fini(env, lfsck->li_los);
900                 lfsck->li_los = NULL;
901         }
902
903         lfsck_fid_fini(lfsck);
904
905         OBD_FREE_PTR(lfsck);
906 }
907
908 static inline struct lfsck_instance *
909 __lfsck_instance_find(struct dt_device *key, bool ref, bool unlink)
910 {
911         struct lfsck_instance *lfsck;
912
913         cfs_list_for_each_entry(lfsck, &lfsck_instance_list, li_link) {
914                 if (lfsck->li_bottom == key) {
915                         if (ref)
916                                 lfsck_instance_get(lfsck);
917                         if (unlink)
918                                 list_del_init(&lfsck->li_link);
919
920                         return lfsck;
921                 }
922         }
923
924         return NULL;
925 }
926
927 struct lfsck_instance *lfsck_instance_find(struct dt_device *key, bool ref,
928                                            bool unlink)
929 {
930         struct lfsck_instance *lfsck;
931
932         spin_lock(&lfsck_instance_lock);
933         lfsck = __lfsck_instance_find(key, ref, unlink);
934         spin_unlock(&lfsck_instance_lock);
935
936         return lfsck;
937 }
938
939 static inline int lfsck_instance_add(struct lfsck_instance *lfsck)
940 {
941         struct lfsck_instance *tmp;
942
943         spin_lock(&lfsck_instance_lock);
944         cfs_list_for_each_entry(tmp, &lfsck_instance_list, li_link) {
945                 if (lfsck->li_bottom == tmp->li_bottom) {
946                         spin_unlock(&lfsck_instance_lock);
947                         return -EEXIST;
948                 }
949         }
950
951         cfs_list_add_tail(&lfsck->li_link, &lfsck_instance_list);
952         spin_unlock(&lfsck_instance_lock);
953         return 0;
954 }
955
956 int lfsck_bits_dump(struct seq_file *m, int bits, const char *names[],
957                     const char *prefix)
958 {
959         int flag;
960         int i;
961         bool newline = (bits != 0 ? false : true);
962
963         seq_printf(m, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
964
965         for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
966                 if (flag & bits) {
967                         bits &= ~flag;
968                         if (names[i] != NULL) {
969                                 if (bits == 0)
970                                         newline = true;
971
972                                 seq_printf(m, "%s%c", names[i],
973                                            newline ? '\n' : ',');
974                         }
975                 }
976         }
977
978         if (!newline)
979                 seq_printf(m, "\n");
980         return 0;
981 }
982
983 int lfsck_time_dump(struct seq_file *m, __u64 time, const char *prefix)
984 {
985         if (time != 0)
986                 seq_printf(m, "%s: "LPU64" seconds\n", prefix,
987                           cfs_time_current_sec() - time);
988         else
989                 seq_printf(m, "%s: N/A\n", prefix);
990         return 0;
991 }
992
993 int lfsck_pos_dump(struct seq_file *m, struct lfsck_position *pos,
994                    const char *prefix)
995 {
996         if (fid_is_zero(&pos->lp_dir_parent)) {
997                 if (pos->lp_oit_cookie == 0)
998                         seq_printf(m, "%s: N/A, N/A, N/A\n",
999                                    prefix);
1000                 else
1001                         seq_printf(m, "%s: "LPU64", N/A, N/A\n",
1002                                    prefix, pos->lp_oit_cookie);
1003         } else {
1004                 seq_printf(m, "%s: "LPU64", "DFID", "LPX64"\n",
1005                            prefix, pos->lp_oit_cookie,
1006                            PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
1007         }
1008         return 0;
1009 }
1010
1011 void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
1012                     struct lfsck_position *pos, bool init)
1013 {
1014         const struct dt_it_ops *iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
1015
1016         if (unlikely(lfsck->li_di_oit == NULL)) {
1017                 memset(pos, 0, sizeof(*pos));
1018                 return;
1019         }
1020
1021         pos->lp_oit_cookie = iops->store(env, lfsck->li_di_oit);
1022         if (!lfsck->li_current_oit_processed && !init)
1023                 pos->lp_oit_cookie--;
1024
1025         LASSERT(pos->lp_oit_cookie > 0);
1026
1027         if (lfsck->li_di_dir != NULL) {
1028                 struct dt_object *dto = lfsck->li_obj_dir;
1029
1030                 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
1031                                                         lfsck->li_di_dir);
1032
1033                 if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) {
1034                         fid_zero(&pos->lp_dir_parent);
1035                         pos->lp_dir_cookie = 0;
1036                 } else {
1037                         pos->lp_dir_parent = *lfsck_dto2fid(dto);
1038                 }
1039         } else {
1040                 fid_zero(&pos->lp_dir_parent);
1041                 pos->lp_dir_cookie = 0;
1042         }
1043 }
1044
1045 bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
1046 {
1047         bool dirty = false;
1048
1049         if (limit != LFSCK_SPEED_NO_LIMIT) {
1050                 if (limit > HZ) {
1051                         lfsck->li_sleep_rate = limit / HZ;
1052                         lfsck->li_sleep_jif = 1;
1053                 } else {
1054                         lfsck->li_sleep_rate = 1;
1055                         lfsck->li_sleep_jif = HZ / limit;
1056                 }
1057         } else {
1058                 lfsck->li_sleep_jif = 0;
1059                 lfsck->li_sleep_rate = 0;
1060         }
1061
1062         if (lfsck->li_bookmark_ram.lb_speed_limit != limit) {
1063                 lfsck->li_bookmark_ram.lb_speed_limit = limit;
1064                 dirty = true;
1065         }
1066
1067         return dirty;
1068 }
1069
1070 void lfsck_control_speed(struct lfsck_instance *lfsck)
1071 {
1072         struct ptlrpc_thread *thread = &lfsck->li_thread;
1073         struct l_wait_info    lwi;
1074
1075         if (lfsck->li_sleep_jif > 0 &&
1076             lfsck->li_new_scanned >= lfsck->li_sleep_rate) {
1077                 lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL,
1078                                        LWI_ON_SIGNAL_NOOP, NULL);
1079
1080                 l_wait_event(thread->t_ctl_waitq,
1081                              !thread_is_running(thread),
1082                              &lwi);
1083                 lfsck->li_new_scanned = 0;
1084         }
1085 }
1086
1087 void lfsck_control_speed_by_self(struct lfsck_component *com)
1088 {
1089         struct lfsck_instance   *lfsck  = com->lc_lfsck;
1090         struct ptlrpc_thread    *thread = &lfsck->li_thread;
1091         struct l_wait_info       lwi;
1092
1093         if (lfsck->li_sleep_jif > 0 &&
1094             com->lc_new_scanned >= lfsck->li_sleep_rate) {
1095                 lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL,
1096                                        LWI_ON_SIGNAL_NOOP, NULL);
1097
1098                 l_wait_event(thread->t_ctl_waitq,
1099                              !thread_is_running(thread),
1100                              &lwi);
1101                 com->lc_new_scanned = 0;
1102         }
1103 }
1104
1105 static int lfsck_parent_fid(const struct lu_env *env, struct dt_object *obj,
1106                             struct lu_fid *fid)
1107 {
1108         if (unlikely(!S_ISDIR(lfsck_object_type(obj)) ||
1109                      !dt_try_as_dir(env, obj)))
1110                 return -ENOTDIR;
1111
1112         return dt_lookup(env, obj, (struct dt_rec *)fid,
1113                          (const struct dt_key *)"..", BYPASS_CAPA);
1114 }
1115
1116 static int lfsck_needs_scan_dir(const struct lu_env *env,
1117                                 struct lfsck_instance *lfsck,
1118                                 struct dt_object *obj)
1119 {
1120         struct lu_fid *fid   = &lfsck_env_info(env)->lti_fid;
1121         int            depth = 0;
1122         int            rc;
1123
1124         if (!lfsck->li_master || !S_ISDIR(lfsck_object_type(obj)) ||
1125             cfs_list_empty(&lfsck->li_list_dir))
1126                RETURN(0);
1127
1128         while (1) {
1129                 /* XXX: Currently, we do not scan the "/REMOTE_PARENT_DIR",
1130                  *      which is the agent directory to manage the objects
1131                  *      which name entries reside on remote MDTs. Related
1132                  *      consistency verification will be processed in LFSCK
1133                  *      phase III. */
1134                 if (lu_fid_eq(lfsck_dto2fid(obj), &lfsck->li_global_root_fid)) {
1135                         if (depth > 0)
1136                                 lfsck_object_put(env, obj);
1137                         return 1;
1138                 }
1139
1140                 /* No need to check .lustre and its children. */
1141                 if (fid_seq_is_dot_lustre(fid_seq(lfsck_dto2fid(obj)))) {
1142                         if (depth > 0)
1143                                 lfsck_object_put(env, obj);
1144                         return 0;
1145                 }
1146
1147                 dt_read_lock(env, obj, MOR_TGT_CHILD);
1148                 if (unlikely(lfsck_is_dead_obj(obj))) {
1149                         dt_read_unlock(env, obj);
1150                         if (depth > 0)
1151                                 lfsck_object_put(env, obj);
1152                         return 0;
1153                 }
1154
1155                 rc = dt_xattr_get(env, obj,
1156                                   lfsck_buf_get(env, NULL, 0), XATTR_NAME_LINK,
1157                                   BYPASS_CAPA);
1158                 dt_read_unlock(env, obj);
1159                 if (rc >= 0) {
1160                         if (depth > 0)
1161                                 lfsck_object_put(env, obj);
1162                         return 1;
1163                 }
1164
1165                 if (rc < 0 && rc != -ENODATA) {
1166                         if (depth > 0)
1167                                 lfsck_object_put(env, obj);
1168                         return rc;
1169                 }
1170
1171                 rc = lfsck_parent_fid(env, obj, fid);
1172                 if (depth > 0)
1173                         lfsck_object_put(env, obj);
1174                 if (rc != 0)
1175                         return rc;
1176
1177                 if (unlikely(lu_fid_eq(fid, &lfsck->li_local_root_fid)))
1178                         return 0;
1179
1180                 obj = lfsck_object_find(env, lfsck, fid);
1181                 if (obj == NULL)
1182                         return 0;
1183                 else if (IS_ERR(obj))
1184                         return PTR_ERR(obj);
1185
1186                 if (!dt_object_exists(obj)) {
1187                         lfsck_object_put(env, obj);
1188                         return 0;
1189                 }
1190
1191                 if (dt_object_remote(obj)) {
1192                         /* .lustre/lost+found/MDTxxx can be remote directory. */
1193                         if (fid_seq_is_dot_lustre(fid_seq(lfsck_dto2fid(obj))))
1194                                 rc = 0;
1195                         else
1196                                 /* Other remote directory should be client
1197                                  * visible and need to be checked. */
1198                                 rc = 1;
1199                         lfsck_object_put(env, obj);
1200                         return rc;
1201                 }
1202
1203                 depth++;
1204         }
1205         return 0;
1206 }
1207
1208 struct lfsck_thread_args *lfsck_thread_args_init(struct lfsck_instance *lfsck,
1209                                                  struct lfsck_component *com,
1210                                                  struct lfsck_start_param *lsp)
1211 {
1212         struct lfsck_thread_args *lta;
1213         int                       rc;
1214
1215         OBD_ALLOC_PTR(lta);
1216         if (lta == NULL)
1217                 return ERR_PTR(-ENOMEM);
1218
1219         rc = lu_env_init(&lta->lta_env, LCT_MD_THREAD | LCT_DT_THREAD);
1220         if (rc != 0) {
1221                 OBD_FREE_PTR(lta);
1222                 return ERR_PTR(rc);
1223         }
1224
1225         lta->lta_lfsck = lfsck_instance_get(lfsck);
1226         if (com != NULL)
1227                 lta->lta_com = lfsck_component_get(com);
1228
1229         lta->lta_lsp = lsp;
1230
1231         return lta;
1232 }
1233
1234 void lfsck_thread_args_fini(struct lfsck_thread_args *lta)
1235 {
1236         if (lta->lta_com != NULL)
1237                 lfsck_component_put(&lta->lta_env, lta->lta_com);
1238         lfsck_instance_put(&lta->lta_env, lta->lta_lfsck);
1239         lu_env_fini(&lta->lta_env);
1240         OBD_FREE_PTR(lta);
1241 }
1242
1243 /* LFSCK wrap functions */
1244
1245 void lfsck_fail(const struct lu_env *env, struct lfsck_instance *lfsck,
1246                 bool new_checked)
1247 {
1248         struct lfsck_component *com;
1249
1250         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1251                 com->lc_ops->lfsck_fail(env, com, new_checked);
1252         }
1253 }
1254
1255 int lfsck_checkpoint(const struct lu_env *env, struct lfsck_instance *lfsck)
1256 {
1257         struct lfsck_component *com;
1258         int                     rc  = 0;
1259         int                     rc1 = 0;
1260
1261         if (likely(cfs_time_beforeq(cfs_time_current(),
1262                                     lfsck->li_time_next_checkpoint)))
1263                 return 0;
1264
1265         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
1266         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1267                 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
1268                 if (rc != 0)
1269                         rc1 = rc;
1270         }
1271
1272         lfsck->li_time_last_checkpoint = cfs_time_current();
1273         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
1274                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1275         return rc1 != 0 ? rc1 : rc;
1276 }
1277
1278 int lfsck_prep(const struct lu_env *env, struct lfsck_instance *lfsck,
1279                struct lfsck_start_param *lsp)
1280 {
1281         struct dt_object       *obj     = NULL;
1282         struct lfsck_component *com;
1283         struct lfsck_component *next;
1284         struct lfsck_position  *pos     = NULL;
1285         const struct dt_it_ops *iops    =
1286                                 &lfsck->li_obj_oit->do_index_ops->dio_it;
1287         struct dt_it           *di;
1288         int                     rc;
1289         ENTRY;
1290
1291         LASSERT(lfsck->li_obj_dir == NULL);
1292         LASSERT(lfsck->li_di_dir == NULL);
1293
1294         lfsck->li_current_oit_processed = 0;
1295         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
1296                 com->lc_new_checked = 0;
1297                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
1298                         com->lc_journal = 0;
1299
1300                 rc = com->lc_ops->lfsck_prep(env, com, lsp);
1301                 if (rc != 0)
1302                         GOTO(out, rc);
1303
1304                 if ((pos == NULL) ||
1305                     (!lfsck_pos_is_zero(&com->lc_pos_start) &&
1306                      lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
1307                         pos = &com->lc_pos_start;
1308         }
1309
1310         /* Init otable-based iterator. */
1311         if (pos == NULL) {
1312                 rc = iops->load(env, lfsck->li_di_oit, 0);
1313                 if (rc > 0) {
1314                         lfsck->li_oit_over = 1;
1315                         rc = 0;
1316                 }
1317
1318                 GOTO(out, rc);
1319         }
1320
1321         rc = iops->load(env, lfsck->li_di_oit, pos->lp_oit_cookie);
1322         if (rc < 0)
1323                 GOTO(out, rc);
1324         else if (rc > 0)
1325                 lfsck->li_oit_over = 1;
1326
1327         if (!lfsck->li_master || fid_is_zero(&pos->lp_dir_parent))
1328                 GOTO(out, rc = 0);
1329
1330         /* Find the directory for namespace-based traverse. */
1331         obj = lfsck_object_find(env, lfsck, &pos->lp_dir_parent);
1332         if (obj == NULL)
1333                 GOTO(out, rc = 0);
1334         else if (IS_ERR(obj))
1335                 RETURN(PTR_ERR(obj));
1336
1337         /* XXX: Currently, skip remote object, the consistency for
1338          *      remote object will be processed in LFSCK phase III. */
1339         if (!dt_object_exists(obj) || dt_object_remote(obj) ||
1340             unlikely(!S_ISDIR(lfsck_object_type(obj))))
1341                 GOTO(out, rc = 0);
1342
1343         if (unlikely(!dt_try_as_dir(env, obj)))
1344                 GOTO(out, rc = -ENOTDIR);
1345
1346         /* Init the namespace-based directory traverse. */
1347         iops = &obj->do_index_ops->dio_it;
1348         di = iops->init(env, obj, lfsck->li_args_dir, BYPASS_CAPA);
1349         if (IS_ERR(di))
1350                 GOTO(out, rc = PTR_ERR(di));
1351
1352         LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF);
1353
1354         rc = iops->load(env, di, pos->lp_dir_cookie);
1355         if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0))
1356                 rc = iops->next(env, di);
1357         else if (rc > 0)
1358                 rc = 0;
1359
1360         if (rc != 0) {
1361                 iops->put(env, di);
1362                 iops->fini(env, di);
1363                 GOTO(out, rc);
1364         }
1365
1366         lfsck->li_obj_dir = lfsck_object_get(obj);
1367         lfsck->li_cookie_dir = iops->store(env, di);
1368         spin_lock(&lfsck->li_lock);
1369         lfsck->li_di_dir = di;
1370         spin_unlock(&lfsck->li_lock);
1371
1372         GOTO(out, rc = 0);
1373
1374 out:
1375         if (obj != NULL)
1376                 lfsck_object_put(env, obj);
1377
1378         if (rc < 0) {
1379                 cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
1380                                              lc_link)
1381                         com->lc_ops->lfsck_post(env, com, rc, true);
1382
1383                 return rc;
1384         }
1385
1386         rc = 0;
1387         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, true);
1388         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1389                 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
1390                 if (rc != 0)
1391                         break;
1392         }
1393
1394         lfsck->li_time_last_checkpoint = cfs_time_current();
1395         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
1396                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1397         return rc;
1398 }
1399
1400 int lfsck_exec_oit(const struct lu_env *env, struct lfsck_instance *lfsck,
1401                    struct dt_object *obj)
1402 {
1403         struct lfsck_component *com;
1404         const struct dt_it_ops *iops;
1405         struct dt_it           *di;
1406         int                     rc;
1407         ENTRY;
1408
1409         LASSERT(lfsck->li_obj_dir == NULL);
1410
1411         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1412                 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
1413                 if (rc != 0)
1414                         RETURN(rc);
1415         }
1416
1417         rc = lfsck_needs_scan_dir(env, lfsck, obj);
1418         if (rc <= 0)
1419                 GOTO(out, rc);
1420
1421         if (unlikely(!dt_try_as_dir(env, obj)))
1422                 GOTO(out, rc = -ENOTDIR);
1423
1424         iops = &obj->do_index_ops->dio_it;
1425         di = iops->init(env, obj, lfsck->li_args_dir, BYPASS_CAPA);
1426         if (IS_ERR(di))
1427                 GOTO(out, rc = PTR_ERR(di));
1428
1429         rc = iops->load(env, di, 0);
1430         if (rc == 0)
1431                 rc = iops->next(env, di);
1432         else if (rc > 0)
1433                 rc = 0;
1434
1435         if (rc != 0) {
1436                 iops->put(env, di);
1437                 iops->fini(env, di);
1438                 GOTO(out, rc);
1439         }
1440
1441         lfsck->li_obj_dir = lfsck_object_get(obj);
1442         lfsck->li_cookie_dir = iops->store(env, di);
1443         spin_lock(&lfsck->li_lock);
1444         lfsck->li_di_dir = di;
1445         spin_unlock(&lfsck->li_lock);
1446
1447         GOTO(out, rc = 0);
1448
1449 out:
1450         if (rc < 0)
1451                 lfsck_fail(env, lfsck, false);
1452         return (rc > 0 ? 0 : rc);
1453 }
1454
1455 int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck,
1456                    struct dt_object *obj, struct lu_dirent *ent)
1457 {
1458         struct lfsck_component *com;
1459         int                     rc;
1460
1461         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1462                 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
1463                 if (rc != 0)
1464                         return rc;
1465         }
1466         return 0;
1467 }
1468
1469 int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
1470                int result)
1471 {
1472         struct lfsck_component *com;
1473         struct lfsck_component *next;
1474         int                     rc  = 0;
1475         int                     rc1 = 0;
1476
1477         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
1478         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
1479                 rc = com->lc_ops->lfsck_post(env, com, result, false);
1480                 if (rc != 0)
1481                         rc1 = rc;
1482         }
1483
1484         lfsck->li_time_last_checkpoint = cfs_time_current();
1485         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
1486                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1487
1488         /* Ignore some component post failure to make other can go ahead. */
1489         return result;
1490 }
1491
1492 static void lfsck_interpret(const struct lu_env *env,
1493                             struct lfsck_instance *lfsck,
1494                             struct ptlrpc_request *req, void *args, int result)
1495 {
1496         struct lfsck_async_interpret_args *laia = args;
1497         struct lfsck_component            *com;
1498
1499         LASSERT(laia->laia_com == NULL);
1500         LASSERT(laia->laia_shared);
1501
1502         spin_lock(&lfsck->li_lock);
1503         list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1504                 if (com->lc_ops->lfsck_interpret != NULL) {
1505                         laia->laia_com = com;
1506                         com->lc_ops->lfsck_interpret(env, req, laia, result);
1507                 }
1508         }
1509
1510         list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
1511                 if (com->lc_ops->lfsck_interpret != NULL) {
1512                         laia->laia_com = com;
1513                         com->lc_ops->lfsck_interpret(env, req, laia, result);
1514                 }
1515         }
1516         spin_unlock(&lfsck->li_lock);
1517 }
1518
1519 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
1520 {
1521         struct lfsck_component *com;
1522         struct lfsck_component *next;
1523         struct l_wait_info      lwi = { 0 };
1524         int                     rc  = 0;
1525         int                     rc1 = 0;
1526
1527         list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
1528                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
1529                         com->lc_journal = 0;
1530
1531                 rc = com->lc_ops->lfsck_double_scan(env, com);
1532                 if (rc != 0)
1533                         rc1 = rc;
1534         }
1535
1536         l_wait_event(lfsck->li_thread.t_ctl_waitq,
1537                      atomic_read(&lfsck->li_double_scan_count) == 0,
1538                      &lwi);
1539
1540         if (lfsck->li_status != LS_PAUSED &&
1541             lfsck->li_status != LS_CO_PAUSED) {
1542                 list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
1543                                          lc_link) {
1544                         spin_lock(&lfsck->li_lock);
1545                         list_del_init(&com->lc_link);
1546                         list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1547                         spin_unlock(&lfsck->li_lock);
1548                 }
1549         }
1550
1551         return rc1 != 0 ? rc1 : rc;
1552 }
1553
1554 static int lfsck_stop_notify(const struct lu_env *env,
1555                              struct lfsck_instance *lfsck,
1556                              struct lfsck_tgt_descs *ltds,
1557                              struct lfsck_tgt_desc *ltd, __u16 type)
1558 {
1559         struct ptlrpc_request_set *set;
1560         struct lfsck_component    *com;
1561         int                        rc  = 0;
1562         ENTRY;
1563
1564         spin_lock(&lfsck->li_lock);
1565         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_scan);
1566         if (com == NULL)
1567                 com = __lfsck_component_find(lfsck, type,
1568                                              &lfsck->li_list_double_scan);
1569         if (com != NULL)
1570                 lfsck_component_get(com);
1571         spin_unlock(&lfsck->li_lock);
1572
1573         if (com != NULL) {
1574                 if (com->lc_ops->lfsck_stop_notify != NULL) {
1575                         set = ptlrpc_prep_set();
1576                         if (set == NULL) {
1577                                 lfsck_component_put(env, com);
1578
1579                                 RETURN(-ENOMEM);
1580                         }
1581
1582                         rc = com->lc_ops->lfsck_stop_notify(env, com, ltds,
1583                                                             ltd, set);
1584                         if (rc == 0)
1585                                 rc = ptlrpc_set_wait(set);
1586
1587                         ptlrpc_set_destroy(set);
1588                 }
1589
1590                 lfsck_component_put(env, com);
1591         }
1592
1593         RETURN(rc);
1594 }
1595
1596 void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
1597 {
1598         struct lfsck_component *com;
1599         struct lfsck_component *next;
1600
1601         list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
1602                                  lc_link) {
1603                 if (com->lc_ops->lfsck_quit != NULL)
1604                         com->lc_ops->lfsck_quit(env, com);
1605
1606                 spin_lock(&lfsck->li_lock);
1607                 list_del_init(&com->lc_link);
1608                 list_del_init(&com->lc_link_dir);
1609                 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1610                 spin_unlock(&lfsck->li_lock);
1611         }
1612
1613         list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
1614                                  lc_link) {
1615                 if (com->lc_ops->lfsck_quit != NULL)
1616                         com->lc_ops->lfsck_quit(env, com);
1617
1618                 spin_lock(&lfsck->li_lock);
1619                 list_del_init(&com->lc_link);
1620                 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1621                 spin_unlock(&lfsck->li_lock);
1622         }
1623 }
1624
1625 static int lfsck_async_interpret(const struct lu_env *env,
1626                                  struct ptlrpc_request *req,
1627                                  void *args, int rc)
1628 {
1629         struct lfsck_async_interpret_args *laia = args;
1630         struct lfsck_instance             *lfsck;
1631
1632         lfsck = container_of0(laia->laia_ltds, struct lfsck_instance,
1633                               li_mdt_descs);
1634         lfsck_interpret(env, lfsck, req, laia, rc);
1635         lfsck_tgt_put(laia->laia_ltd);
1636         if (rc != 0 && laia->laia_result != -EALREADY)
1637                 laia->laia_result = rc;
1638
1639         return 0;
1640 }
1641
1642 int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
1643                         struct lfsck_request *lr,
1644                         struct ptlrpc_request_set *set,
1645                         ptlrpc_interpterer_t interpreter,
1646                         void *args, int request)
1647 {
1648         struct lfsck_async_interpret_args *laia;
1649         struct ptlrpc_request             *req;
1650         struct lfsck_request              *tmp;
1651         struct req_format                 *format;
1652         int                                rc;
1653
1654         switch (request) {
1655         case LFSCK_NOTIFY:
1656                 format = &RQF_LFSCK_NOTIFY;
1657                 break;
1658         case LFSCK_QUERY:
1659                 format = &RQF_LFSCK_QUERY;
1660                 break;
1661         default:
1662                 CDEBUG(D_LFSCK, "%s: unknown async request %d: rc = %d\n",
1663                        exp->exp_obd->obd_name, request, -EINVAL);
1664                 return -EINVAL;
1665         }
1666
1667         req = ptlrpc_request_alloc(class_exp2cliimp(exp), format);
1668         if (req == NULL)
1669                 return -ENOMEM;
1670
1671         rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, request);
1672         if (rc != 0) {
1673                 ptlrpc_request_free(req);
1674
1675                 return rc;
1676         }
1677
1678         tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
1679         *tmp = *lr;
1680         ptlrpc_request_set_replen(req);
1681
1682         laia = ptlrpc_req_async_args(req);
1683         *laia = *(struct lfsck_async_interpret_args *)args;
1684         if (laia->laia_com != NULL)
1685                 lfsck_component_get(laia->laia_com);
1686         req->rq_interpret_reply = interpreter;
1687         ptlrpc_set_add_req(set, req);
1688
1689         return 0;
1690 }
1691
1692 /* external interfaces */
1693
1694 int lfsck_get_speed(struct seq_file *m, struct dt_device *key)
1695 {
1696         struct lu_env           env;
1697         struct lfsck_instance  *lfsck;
1698         int                     rc;
1699         ENTRY;
1700
1701         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1702         if (rc != 0)
1703                 RETURN(rc);
1704
1705         lfsck = lfsck_instance_find(key, true, false);
1706         if (likely(lfsck != NULL)) {
1707                 seq_printf(m, "%u\n", lfsck->li_bookmark_ram.lb_speed_limit);
1708                 lfsck_instance_put(&env, lfsck);
1709         } else {
1710                 rc = -ENXIO;
1711         }
1712
1713         lu_env_fini(&env);
1714
1715         RETURN(rc);
1716 }
1717 EXPORT_SYMBOL(lfsck_get_speed);
1718
1719 int lfsck_set_speed(struct dt_device *key, int val)
1720 {
1721         struct lu_env           env;
1722         struct lfsck_instance  *lfsck;
1723         int                     rc;
1724         ENTRY;
1725
1726         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1727         if (rc != 0)
1728                 RETURN(rc);
1729
1730         lfsck = lfsck_instance_find(key, true, false);
1731         if (likely(lfsck != NULL)) {
1732                 mutex_lock(&lfsck->li_mutex);
1733                 if (__lfsck_set_speed(lfsck, val))
1734                         rc = lfsck_bookmark_store(&env, lfsck);
1735                 mutex_unlock(&lfsck->li_mutex);
1736                 lfsck_instance_put(&env, lfsck);
1737         } else {
1738                 rc = -ENXIO;
1739         }
1740
1741         lu_env_fini(&env);
1742
1743         RETURN(rc);
1744 }
1745 EXPORT_SYMBOL(lfsck_set_speed);
1746
1747 int lfsck_get_windows(struct seq_file *m, struct dt_device *key)
1748 {
1749         struct lu_env           env;
1750         struct lfsck_instance  *lfsck;
1751         int                     rc;
1752         ENTRY;
1753
1754         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1755         if (rc != 0)
1756                 RETURN(rc);
1757
1758         lfsck = lfsck_instance_find(key, true, false);
1759         if (likely(lfsck != NULL)) {
1760                 seq_printf(m, "%u\n", lfsck->li_bookmark_ram.lb_async_windows);
1761                 lfsck_instance_put(&env, lfsck);
1762         } else {
1763                 rc = -ENXIO;
1764         }
1765
1766         lu_env_fini(&env);
1767
1768         RETURN(rc);
1769 }
1770 EXPORT_SYMBOL(lfsck_get_windows);
1771
1772 int lfsck_set_windows(struct dt_device *key, int val)
1773 {
1774         struct lu_env           env;
1775         struct lfsck_instance  *lfsck;
1776         int                     rc;
1777         ENTRY;
1778
1779         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1780         if (rc != 0)
1781                 RETURN(rc);
1782
1783         lfsck = lfsck_instance_find(key, true, false);
1784         if (likely(lfsck != NULL)) {
1785                 if (val > LFSCK_ASYNC_WIN_MAX) {
1786                         CWARN("%s: Too large async window size, which "
1787                               "may cause memory issues. The valid range "
1788                               "is [0 - %u]. If you do not want to restrict "
1789                               "the window size for async requests pipeline, "
1790                               "just set it as 0.\n",
1791                               lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
1792                         rc = -EINVAL;
1793                 } else if (lfsck->li_bookmark_ram.lb_async_windows != val) {
1794                         mutex_lock(&lfsck->li_mutex);
1795                         lfsck->li_bookmark_ram.lb_async_windows = val;
1796                         rc = lfsck_bookmark_store(&env, lfsck);
1797                         mutex_unlock(&lfsck->li_mutex);
1798                 }
1799                 lfsck_instance_put(&env, lfsck);
1800         } else {
1801                 rc = -ENXIO;
1802         }
1803
1804         lu_env_fini(&env);
1805
1806         RETURN(rc);
1807 }
1808 EXPORT_SYMBOL(lfsck_set_windows);
1809
1810 int lfsck_dump(struct seq_file *m, struct dt_device *key, enum lfsck_type type)
1811 {
1812         struct lu_env           env;
1813         struct lfsck_instance  *lfsck;
1814         struct lfsck_component *com;
1815         int                     rc;
1816         ENTRY;
1817
1818         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1819         if (rc != 0)
1820                 RETURN(rc);
1821
1822         lfsck = lfsck_instance_find(key, true, false);
1823         if (likely(lfsck != NULL)) {
1824                 com = lfsck_component_find(lfsck, type);
1825                 if (likely(com != NULL)) {
1826                         rc = com->lc_ops->lfsck_dump(&env, com, m);
1827                         lfsck_component_put(&env, com);
1828                 } else {
1829                         rc = -ENOTSUPP;
1830                 }
1831
1832                 lfsck_instance_put(&env, lfsck);
1833         } else {
1834                 rc = -ENXIO;
1835         }
1836
1837         lu_env_fini(&env);
1838
1839         RETURN(rc);
1840 }
1841 EXPORT_SYMBOL(lfsck_dump);
1842
1843 static int lfsck_stop_all(const struct lu_env *env,
1844                           struct lfsck_instance *lfsck,
1845                           struct lfsck_stop *stop)
1846 {
1847         struct lfsck_thread_info          *info   = lfsck_env_info(env);
1848         struct lfsck_request              *lr     = &info->lti_lr;
1849         struct lfsck_async_interpret_args *laia   = &info->lti_laia;
1850         struct ptlrpc_request_set         *set;
1851         struct lfsck_tgt_descs            *ltds   = &lfsck->li_mdt_descs;
1852         struct lfsck_tgt_desc             *ltd;
1853         struct lfsck_bookmark             *bk     = &lfsck->li_bookmark_ram;
1854         __u32                              idx;
1855         int                                rc     = 0;
1856         int                                rc1    = 0;
1857         ENTRY;
1858
1859         LASSERT(stop->ls_flags & LPF_BROADCAST);
1860
1861         set = ptlrpc_prep_set();
1862         if (unlikely(set == NULL))
1863                 RETURN(-ENOMEM);
1864
1865         memset(lr, 0, sizeof(*lr));
1866         lr->lr_event = LE_STOP;
1867         lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1868         lr->lr_status = stop->ls_status;
1869         lr->lr_version = bk->lb_version;
1870         lr->lr_active = LFSCK_TYPES_ALL;
1871         lr->lr_param = stop->ls_flags;
1872
1873         laia->laia_com = NULL;
1874         laia->laia_ltds = ltds;
1875         laia->laia_lr = lr;
1876         laia->laia_result = 0;
1877         laia->laia_shared = 1;
1878
1879         down_read(&ltds->ltd_rw_sem);
1880         cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1881                 ltd = lfsck_tgt_get(ltds, idx);
1882                 LASSERT(ltd != NULL);
1883
1884                 laia->laia_ltd = ltd;
1885                 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1886                                          lfsck_async_interpret, laia,
1887                                          LFSCK_NOTIFY);
1888                 if (rc != 0) {
1889                         lfsck_interpret(env, lfsck, NULL, laia, rc);
1890                         lfsck_tgt_put(ltd);
1891                         CERROR("%s: cannot notify MDT %x for LFSCK stop: "
1892                                "rc = %d\n", lfsck_lfsck2name(lfsck), idx, rc);
1893                         rc1 = rc;
1894                 }
1895         }
1896         up_read(&ltds->ltd_rw_sem);
1897
1898         rc = ptlrpc_set_wait(set);
1899         ptlrpc_set_destroy(set);
1900
1901         if (rc == 0)
1902                 rc = laia->laia_result;
1903
1904         if (rc == -EALREADY)
1905                 rc = 0;
1906
1907         if (rc != 0)
1908                 CERROR("%s: fail to stop LFSCK on some MDTs: rc = %d\n",
1909                        lfsck_lfsck2name(lfsck), rc);
1910
1911         RETURN(rc != 0 ? rc : rc1);
1912 }
1913
1914 static int lfsck_start_all(const struct lu_env *env,
1915                            struct lfsck_instance *lfsck,
1916                            struct lfsck_start *start)
1917 {
1918         struct lfsck_thread_info          *info   = lfsck_env_info(env);
1919         struct lfsck_request              *lr     = &info->lti_lr;
1920         struct lfsck_async_interpret_args *laia   = &info->lti_laia;
1921         struct ptlrpc_request_set         *set;
1922         struct lfsck_tgt_descs            *ltds   = &lfsck->li_mdt_descs;
1923         struct lfsck_tgt_desc             *ltd;
1924         struct lfsck_bookmark             *bk     = &lfsck->li_bookmark_ram;
1925         __u32                              idx;
1926         int                                rc     = 0;
1927         ENTRY;
1928
1929         LASSERT(start->ls_flags & LPF_BROADCAST);
1930
1931         set = ptlrpc_prep_set();
1932         if (unlikely(set == NULL))
1933                 RETURN(-ENOMEM);
1934
1935         memset(lr, 0, sizeof(*lr));
1936         lr->lr_event = LE_START;
1937         lr->lr_index = lfsck_dev_idx(lfsck->li_bottom);
1938         lr->lr_speed = bk->lb_speed_limit;
1939         lr->lr_version = bk->lb_version;
1940         lr->lr_active = start->ls_active;
1941         lr->lr_param = start->ls_flags;
1942         lr->lr_async_windows = bk->lb_async_windows;
1943         lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
1944                        LSV_ASYNC_WINDOWS;
1945
1946         laia->laia_com = NULL;
1947         laia->laia_ltds = ltds;
1948         laia->laia_lr = lr;
1949         laia->laia_result = 0;
1950         laia->laia_shared = 1;
1951
1952         down_read(&ltds->ltd_rw_sem);
1953         cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
1954                 ltd = lfsck_tgt_get(ltds, idx);
1955                 LASSERT(ltd != NULL);
1956
1957                 laia->laia_ltd = ltd;
1958                 ltd->ltd_layout_done = 0;
1959                 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
1960                                          lfsck_async_interpret, laia,
1961                                          LFSCK_NOTIFY);
1962                 if (rc != 0) {
1963                         lfsck_interpret(env, lfsck, NULL, laia, rc);
1964                         lfsck_tgt_put(ltd);
1965                         CERROR("%s: cannot notify MDT %x for LFSCK "
1966                                "start, failout: rc = %d\n",
1967                                lfsck_lfsck2name(lfsck), idx, rc);
1968                         break;
1969                 }
1970         }
1971         up_read(&ltds->ltd_rw_sem);
1972
1973         if (rc != 0) {
1974                 ptlrpc_set_destroy(set);
1975
1976                 RETURN(rc);
1977         }
1978
1979         rc = ptlrpc_set_wait(set);
1980         ptlrpc_set_destroy(set);
1981
1982         if (rc == 0)
1983                 rc = laia->laia_result;
1984
1985         if (rc != 0) {
1986                 struct lfsck_stop *stop = &info->lti_stop;
1987
1988                 CERROR("%s: cannot start LFSCK on some MDTs, "
1989                        "stop all: rc = %d\n",
1990                        lfsck_lfsck2name(lfsck), rc);
1991                 if (rc != -EALREADY) {
1992                         stop->ls_status = LS_FAILED;
1993                         stop->ls_flags = LPF_ALL_TGT | LPF_BROADCAST;
1994                         lfsck_stop_all(env, lfsck, stop);
1995                 }
1996         }
1997
1998         RETURN(rc);
1999 }
2000
2001 int lfsck_start(const struct lu_env *env, struct dt_device *key,
2002                 struct lfsck_start_param *lsp)
2003 {
2004         struct lfsck_start              *start  = lsp->lsp_start;
2005         struct lfsck_instance           *lfsck;
2006         struct lfsck_bookmark           *bk;
2007         struct ptlrpc_thread            *thread;
2008         struct lfsck_component          *com;
2009         struct l_wait_info               lwi    = { 0 };
2010         struct lfsck_thread_args        *lta;
2011         struct task_struct              *task;
2012         int                              rc     = 0;
2013         __u16                            valid  = 0;
2014         __u16                            flags  = 0;
2015         __u16                            type   = 1;
2016         ENTRY;
2017
2018         lfsck = lfsck_instance_find(key, true, false);
2019         if (unlikely(lfsck == NULL))
2020                 RETURN(-ENXIO);
2021
2022         /* System is not ready, try again later. */
2023         if (unlikely(lfsck->li_namespace == NULL))
2024                 GOTO(put, rc = -EAGAIN);
2025
2026         /* start == NULL means auto trigger paused LFSCK. */
2027         if ((start == NULL) &&
2028             (cfs_list_empty(&lfsck->li_list_scan) ||
2029              OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
2030                 GOTO(put, rc = 0);
2031
2032         bk = &lfsck->li_bookmark_ram;
2033         thread = &lfsck->li_thread;
2034         mutex_lock(&lfsck->li_mutex);
2035         spin_lock(&lfsck->li_lock);
2036         if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
2037                 rc = -EALREADY;
2038                 if (unlikely(start == NULL)) {
2039                         spin_unlock(&lfsck->li_lock);
2040                         GOTO(out, rc);
2041                 }
2042
2043                 while (start->ls_active != 0) {
2044                         if (!(type & start->ls_active)) {
2045                                 type <<= 1;
2046                                 continue;
2047                         }
2048
2049                         com = __lfsck_component_find(lfsck, type,
2050                                                      &lfsck->li_list_scan);
2051                         if (com == NULL)
2052                                 com = __lfsck_component_find(lfsck, type,
2053                                                 &lfsck->li_list_double_scan);
2054                         if (com == NULL) {
2055                                 rc = -EOPNOTSUPP;
2056                                 break;
2057                         }
2058
2059                         if (com->lc_ops->lfsck_join != NULL) {
2060                                 rc = com->lc_ops->lfsck_join( env, com, lsp);
2061                                 if (rc != 0 && rc != -EALREADY)
2062                                         break;
2063                         }
2064                         start->ls_active &= ~type;
2065                         type <<= 1;
2066                 }
2067                 spin_unlock(&lfsck->li_lock);
2068                 GOTO(out, rc);
2069         }
2070         spin_unlock(&lfsck->li_lock);
2071
2072         lfsck->li_status = 0;
2073         lfsck->li_oit_over = 0;
2074         lfsck->li_start_unplug = 0;
2075         lfsck->li_drop_dryrun = 0;
2076         lfsck->li_new_scanned = 0;
2077
2078         /* For auto trigger. */
2079         if (start == NULL)
2080                 goto trigger;
2081
2082         if (start->ls_flags & LPF_BROADCAST && !lfsck->li_master) {
2083                 CERROR("%s: only allow to specify '-A | -o' via MDS\n",
2084                        lfsck_lfsck2name(lfsck));
2085
2086                 GOTO(out, rc = -EPERM);
2087         }
2088
2089         start->ls_version = bk->lb_version;
2090
2091         if (start->ls_active != 0) {
2092                 struct lfsck_component *next;
2093
2094                 if (start->ls_active == LFSCK_TYPES_ALL)
2095                         start->ls_active = LFSCK_TYPES_SUPPORTED;
2096
2097                 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
2098                         start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
2099                         GOTO(out, rc = -ENOTSUPP);
2100                 }
2101
2102                 list_for_each_entry_safe(com, next,
2103                                          &lfsck->li_list_scan, lc_link) {
2104                         if (!(com->lc_type & start->ls_active)) {
2105                                 rc = com->lc_ops->lfsck_post(env, com, 0,
2106                                                              false);
2107                                 if (rc != 0)
2108                                         GOTO(out, rc);
2109                         }
2110                 }
2111
2112                 while (start->ls_active != 0) {
2113                         if (type & start->ls_active) {
2114                                 com = __lfsck_component_find(lfsck, type,
2115                                                         &lfsck->li_list_idle);
2116                                 if (com != NULL) {
2117                                         /* The component status will be updated
2118                                          * when its prep() is called later by
2119                                          * the LFSCK main engine. */
2120                                         list_del_init(&com->lc_link);
2121                                         list_add_tail(&com->lc_link,
2122                                                       &lfsck->li_list_scan);
2123                                 }
2124                                 start->ls_active &= ~type;
2125                         }
2126                         type <<= 1;
2127                 }
2128         }
2129
2130         if (list_empty(&lfsck->li_list_scan)) {
2131                 /* The speed limit will be used to control both the LFSCK and
2132                  * low layer scrub (if applied), need to be handled firstly. */
2133                 if (start->ls_valid & LSV_SPEED_LIMIT) {
2134                         if (__lfsck_set_speed(lfsck, start->ls_speed_limit)) {
2135                                 rc = lfsck_bookmark_store(env, lfsck);
2136                                 if (rc != 0)
2137                                         GOTO(out, rc);
2138                         }
2139                 }
2140
2141                 goto trigger;
2142         }
2143
2144         if (start->ls_flags & LPF_RESET)
2145                 flags |= DOIF_RESET;
2146
2147         rc = lfsck_set_param(env, lfsck, start, !!(flags & DOIF_RESET));
2148         if (rc != 0)
2149                 GOTO(out, rc);
2150
2151         list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
2152                 start->ls_active |= com->lc_type;
2153                 if (flags & DOIF_RESET) {
2154                         rc = com->lc_ops->lfsck_reset(env, com, false);
2155                         if (rc != 0)
2156                                 GOTO(out, rc);
2157                 }
2158         }
2159
2160 trigger:
2161         lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
2162         if (bk->lb_param & LPF_DRYRUN)
2163                 lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN;
2164
2165         if (start != NULL && start->ls_valid & LSV_ERROR_HANDLE) {
2166                 valid |= DOIV_ERROR_HANDLE;
2167                 if (start->ls_flags & LPF_FAILOUT)
2168                         flags |= DOIF_FAILOUT;
2169         }
2170
2171         if (start != NULL && start->ls_valid & LSV_DRYRUN) {
2172                 valid |= DOIV_DRYRUN;
2173                 if (start->ls_flags & LPF_DRYRUN)
2174                         flags |= DOIF_DRYRUN;
2175         }
2176
2177         if (!list_empty(&lfsck->li_list_scan))
2178                 flags |= DOIF_OUTUSED;
2179
2180         lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
2181         thread_set_flags(thread, 0);
2182         lta = lfsck_thread_args_init(lfsck, NULL, lsp);
2183         if (IS_ERR(lta))
2184                 GOTO(out, rc = PTR_ERR(lta));
2185
2186         __lfsck_set_speed(lfsck, bk->lb_speed_limit);
2187         task = kthread_run(lfsck_master_engine, lta, "lfsck");
2188         if (IS_ERR(task)) {
2189                 rc = PTR_ERR(task);
2190                 CERROR("%s: cannot start LFSCK thread: rc = %d\n",
2191                        lfsck_lfsck2name(lfsck), rc);
2192                 lfsck_thread_args_fini(lta);
2193
2194                 GOTO(out, rc);
2195         }
2196
2197         l_wait_event(thread->t_ctl_waitq,
2198                      thread_is_running(thread) ||
2199                      thread_is_stopped(thread),
2200                      &lwi);
2201         if (start == NULL || !(start->ls_flags & LPF_BROADCAST)) {
2202                 lfsck->li_start_unplug = 1;
2203                 wake_up_all(&thread->t_ctl_waitq);
2204
2205                 GOTO(out, rc = 0);
2206         }
2207
2208         /* release lfsck::li_mutex to avoid deadlock. */
2209         mutex_unlock(&lfsck->li_mutex);
2210         rc = lfsck_start_all(env, lfsck, start);
2211         if (rc != 0) {
2212                 spin_lock(&lfsck->li_lock);
2213                 if (thread_is_stopped(thread)) {
2214                         spin_unlock(&lfsck->li_lock);
2215                 } else {
2216                         lfsck->li_status = LS_FAILED;
2217                         lfsck->li_flags = 0;
2218                         thread_set_flags(thread, SVC_STOPPING);
2219                         spin_unlock(&lfsck->li_lock);
2220
2221                         lfsck->li_start_unplug = 1;
2222                         wake_up_all(&thread->t_ctl_waitq);
2223                         l_wait_event(thread->t_ctl_waitq,
2224                                      thread_is_stopped(thread),
2225                                      &lwi);
2226                 }
2227         } else {
2228                 lfsck->li_start_unplug = 1;
2229                 wake_up_all(&thread->t_ctl_waitq);
2230         }
2231
2232         GOTO(put, rc);
2233
2234 out:
2235         mutex_unlock(&lfsck->li_mutex);
2236
2237 put:
2238         lfsck_instance_put(env, lfsck);
2239
2240         return rc < 0 ? rc : 0;
2241 }
2242 EXPORT_SYMBOL(lfsck_start);
2243
2244 int lfsck_stop(const struct lu_env *env, struct dt_device *key,
2245                struct lfsck_stop *stop)
2246 {
2247         struct lfsck_instance   *lfsck;
2248         struct ptlrpc_thread    *thread;
2249         struct l_wait_info       lwi    = { 0 };
2250         int                      rc     = 0;
2251         int                      rc1    = 0;
2252         ENTRY;
2253
2254         lfsck = lfsck_instance_find(key, true, false);
2255         if (unlikely(lfsck == NULL))
2256                 RETURN(-ENXIO);
2257
2258         thread = &lfsck->li_thread;
2259         /* release lfsck::li_mutex to avoid deadlock. */
2260         if (stop != NULL && stop->ls_flags & LPF_BROADCAST) {
2261                 if (!lfsck->li_master) {
2262                         CERROR("%s: only allow to specify '-A' via MDS\n",
2263                                lfsck_lfsck2name(lfsck));
2264
2265                         GOTO(out, rc = -EPERM);
2266                 }
2267
2268                 rc1 = lfsck_stop_all(env, lfsck, stop);
2269         }
2270
2271         mutex_lock(&lfsck->li_mutex);
2272         spin_lock(&lfsck->li_lock);
2273         /* no error if LFSCK is already stopped, or was never started */
2274         if (thread_is_init(thread) || thread_is_stopped(thread)) {
2275                 spin_unlock(&lfsck->li_lock);
2276                 GOTO(out, rc = 0);
2277         }
2278
2279         if (stop != NULL) {
2280                 lfsck->li_status = stop->ls_status;
2281                 lfsck->li_flags = stop->ls_flags;
2282         } else {
2283                 lfsck->li_status = LS_STOPPED;
2284                 lfsck->li_flags = 0;
2285         }
2286
2287         thread_set_flags(thread, SVC_STOPPING);
2288         spin_unlock(&lfsck->li_lock);
2289
2290         wake_up_all(&thread->t_ctl_waitq);
2291         l_wait_event(thread->t_ctl_waitq,
2292                      thread_is_stopped(thread),
2293                      &lwi);
2294
2295         GOTO(out, rc = 0);
2296
2297 out:
2298         mutex_unlock(&lfsck->li_mutex);
2299         lfsck_instance_put(env, lfsck);
2300
2301         return rc != 0 ? rc : rc1;
2302 }
2303 EXPORT_SYMBOL(lfsck_stop);
2304
2305 int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
2306                     struct lfsck_request *lr)
2307 {
2308         int rc = -EOPNOTSUPP;
2309         ENTRY;
2310
2311         switch (lr->lr_event) {
2312         case LE_START: {
2313                 struct lfsck_start       *start = &lfsck_env_info(env)->lti_start;
2314                 struct lfsck_start_param  lsp;
2315
2316                 memset(start, 0, sizeof(*start));
2317                 start->ls_valid = lr->lr_valid;
2318                 start->ls_speed_limit = lr->lr_speed;
2319                 start->ls_version = lr->lr_version;
2320                 start->ls_active = lr->lr_active;
2321                 start->ls_flags = lr->lr_param & ~LPF_BROADCAST;
2322                 start->ls_async_windows = lr->lr_async_windows;
2323
2324                 lsp.lsp_start = start;
2325                 lsp.lsp_index = lr->lr_index;
2326                 lsp.lsp_index_valid = 1;
2327                 rc = lfsck_start(env, key, &lsp);
2328                 break;
2329         }
2330         case LE_STOP: {
2331                 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
2332
2333                 memset(stop, 0, sizeof(*stop));
2334                 stop->ls_status = lr->lr_status;
2335                 stop->ls_flags = lr->lr_param & ~LPF_BROADCAST;
2336                 rc = lfsck_stop(env, key, stop);
2337                 break;
2338         }
2339         case LE_PHASE1_DONE:
2340         case LE_PHASE2_DONE:
2341         case LE_FID_ACCESSED:
2342         case LE_PEER_EXIT:
2343         case LE_CONDITIONAL_DESTROY:
2344         case LE_PAIRS_VERIFY: {
2345                 struct lfsck_instance  *lfsck;
2346                 struct lfsck_component *com;
2347
2348                 lfsck = lfsck_instance_find(key, true, false);
2349                 if (unlikely(lfsck == NULL))
2350                         RETURN(-ENXIO);
2351
2352                 com = lfsck_component_find(lfsck, lr->lr_active);
2353                 if (likely(com != NULL)) {
2354                         rc = com->lc_ops->lfsck_in_notify(env, com, lr);
2355                         lfsck_component_put(env, com);
2356                 }
2357
2358                 lfsck_instance_put(env, lfsck);
2359                 break;
2360         }
2361         default:
2362                 break;
2363         }
2364
2365         RETURN(rc);
2366 }
2367 EXPORT_SYMBOL(lfsck_in_notify);
2368
2369 int lfsck_query(const struct lu_env *env, struct dt_device *key,
2370                 struct lfsck_request *lr)
2371 {
2372         struct lfsck_instance  *lfsck;
2373         struct lfsck_component *com;
2374         int                     rc;
2375         ENTRY;
2376
2377         lfsck = lfsck_instance_find(key, true, false);
2378         if (unlikely(lfsck == NULL))
2379                 RETURN(-ENXIO);
2380
2381         com = lfsck_component_find(lfsck, lr->lr_active);
2382         if (likely(com != NULL)) {
2383                 rc = com->lc_ops->lfsck_query(env, com);
2384                 lfsck_component_put(env, com);
2385         } else {
2386                 rc = -ENOTSUPP;
2387         }
2388
2389         lfsck_instance_put(env, lfsck);
2390
2391         RETURN(rc);
2392 }
2393 EXPORT_SYMBOL(lfsck_query);
2394
2395 int lfsck_register_namespace(const struct lu_env *env, struct dt_device *key,
2396                              struct ldlm_namespace *ns)
2397 {
2398         struct lfsck_instance  *lfsck;
2399         int                     rc      = -ENXIO;
2400
2401         lfsck = lfsck_instance_find(key, true, false);
2402         if (likely(lfsck != NULL)) {
2403                 lfsck->li_namespace = ns;
2404                 lfsck_instance_put(env, lfsck);
2405                 rc = 0;
2406         }
2407
2408         return rc;
2409 }
2410 EXPORT_SYMBOL(lfsck_register_namespace);
2411
2412 int lfsck_register(const struct lu_env *env, struct dt_device *key,
2413                    struct dt_device *next, struct obd_device *obd,
2414                    lfsck_out_notify notify, void *notify_data, bool master)
2415 {
2416         struct lfsck_instance   *lfsck;
2417         struct dt_object        *root  = NULL;
2418         struct dt_object        *obj   = NULL;
2419         struct lu_fid           *fid   = &lfsck_env_info(env)->lti_fid;
2420         int                      rc;
2421         ENTRY;
2422
2423         lfsck = lfsck_instance_find(key, false, false);
2424         if (unlikely(lfsck != NULL))
2425                 RETURN(-EEXIST);
2426
2427         OBD_ALLOC_PTR(lfsck);
2428         if (lfsck == NULL)
2429                 RETURN(-ENOMEM);
2430
2431         mutex_init(&lfsck->li_mutex);
2432         spin_lock_init(&lfsck->li_lock);
2433         CFS_INIT_LIST_HEAD(&lfsck->li_link);
2434         CFS_INIT_LIST_HEAD(&lfsck->li_list_scan);
2435         CFS_INIT_LIST_HEAD(&lfsck->li_list_dir);
2436         CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan);
2437         CFS_INIT_LIST_HEAD(&lfsck->li_list_idle);
2438         atomic_set(&lfsck->li_ref, 1);
2439         atomic_set(&lfsck->li_double_scan_count, 0);
2440         init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq);
2441         lfsck->li_out_notify = notify;
2442         lfsck->li_out_notify_data = notify_data;
2443         lfsck->li_next = next;
2444         lfsck->li_bottom = key;
2445         lfsck->li_obd = obd;
2446
2447         rc = lfsck_tgt_descs_init(&lfsck->li_ost_descs);
2448         if (rc != 0)
2449                 GOTO(out, rc);
2450
2451         rc = lfsck_tgt_descs_init(&lfsck->li_mdt_descs);
2452         if (rc != 0)
2453                 GOTO(out, rc);
2454
2455         fid->f_seq = FID_SEQ_LOCAL_NAME;
2456         fid->f_oid = 1;
2457         fid->f_ver = 0;
2458         rc = local_oid_storage_init(env, key, fid, &lfsck->li_los);
2459         if (rc != 0)
2460                 GOTO(out, rc);
2461
2462         rc = dt_root_get(env, key, fid);
2463         if (rc != 0)
2464                 GOTO(out, rc);
2465
2466         root = dt_locate(env, key, fid);
2467         if (IS_ERR(root))
2468                 GOTO(out, rc = PTR_ERR(root));
2469
2470         if (unlikely(!dt_try_as_dir(env, root)))
2471                 GOTO(out, rc = -ENOTDIR);
2472
2473         lfsck->li_local_root_fid = *fid;
2474         if (master) {
2475                 lfsck->li_master = 1;
2476                 if (lfsck_dev_idx(key) == 0) {
2477                         struct lu_fid *pfid = &lfsck_env_info(env)->lti_fid2;
2478                         const struct lu_name *cname;
2479
2480                         rc = dt_lookup(env, root,
2481                                 (struct dt_rec *)(&lfsck->li_global_root_fid),
2482                                 (const struct dt_key *)"ROOT", BYPASS_CAPA);
2483                         if (rc != 0)
2484                                 GOTO(out, rc);
2485
2486                         obj = dt_locate(env, key, &lfsck->li_global_root_fid);
2487                         if (IS_ERR(obj))
2488                                 GOTO(out, rc = PTR_ERR(obj));
2489
2490                         rc = dt_lookup(env, obj, (struct dt_rec *)fid,
2491                                 (const struct dt_key *)dotlustre, BYPASS_CAPA);
2492                         if (rc != 0)
2493                                 GOTO(out, rc);
2494
2495                         lu_object_put(env, &obj->do_lu);
2496                         obj = dt_locate(env, key, fid);
2497                         if (IS_ERR(obj))
2498                                 GOTO(out, rc = PTR_ERR(obj));
2499
2500                         cname = lfsck_name_get_const(env, dotlustre,
2501                                                      strlen(dotlustre));
2502                         rc = lfsck_verify_linkea(env, key, obj, cname,
2503                                                  &lfsck->li_global_root_fid);
2504                         if (rc != 0)
2505                                 GOTO(out, rc);
2506
2507                         *pfid = *fid;
2508                         rc = dt_lookup(env, obj, (struct dt_rec *)fid,
2509                                        (const struct dt_key *)lostfound,
2510                                        BYPASS_CAPA);
2511                         if (rc != 0)
2512                                 GOTO(out, rc);
2513
2514                         lu_object_put(env, &obj->do_lu);
2515                         obj = dt_locate(env, key, fid);
2516                         if (IS_ERR(obj))
2517                                 GOTO(out, rc = PTR_ERR(obj));
2518
2519                         cname = lfsck_name_get_const(env, lostfound,
2520                                                      strlen(lostfound));
2521                         rc = lfsck_verify_linkea(env, key, obj, cname, pfid);
2522                         if (rc != 0)
2523                                 GOTO(out, rc);
2524
2525                         lu_object_put(env, &obj->do_lu);
2526                         obj = NULL;
2527                 }
2528         }
2529
2530         fid->f_seq = FID_SEQ_LOCAL_FILE;
2531         fid->f_oid = OTABLE_IT_OID;
2532         fid->f_ver = 0;
2533         obj = dt_locate(env, key, fid);
2534         if (IS_ERR(obj))
2535                 GOTO(out, rc = PTR_ERR(obj));
2536
2537         lu_object_get(&obj->do_lu);
2538         lfsck->li_obj_oit = obj;
2539         rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
2540         if (rc != 0)
2541                 GOTO(out, rc);
2542
2543         rc = lfsck_bookmark_setup(env, lfsck);
2544         if (rc != 0)
2545                 GOTO(out, rc);
2546
2547         if (master) {
2548                 rc = lfsck_fid_init(lfsck);
2549                 if (rc < 0)
2550                         GOTO(out, rc);
2551
2552                 rc = lfsck_namespace_setup(env, lfsck);
2553                 if (rc < 0)
2554                         GOTO(out, rc);
2555         }
2556
2557         rc = lfsck_layout_setup(env, lfsck);
2558         if (rc < 0)
2559                 GOTO(out, rc);
2560
2561         /* XXX: more LFSCK components initialization to be added here. */
2562
2563         rc = lfsck_instance_add(lfsck);
2564         if (rc == 0)
2565                 rc = lfsck_add_target_from_orphan(env, lfsck);
2566 out:
2567         if (obj != NULL && !IS_ERR(obj))
2568                 lu_object_put(env, &obj->do_lu);
2569         if (root != NULL && !IS_ERR(root))
2570                 lu_object_put(env, &root->do_lu);
2571         if (rc != 0)
2572                 lfsck_instance_cleanup(env, lfsck);
2573         return rc;
2574 }
2575 EXPORT_SYMBOL(lfsck_register);
2576
2577 void lfsck_degister(const struct lu_env *env, struct dt_device *key)
2578 {
2579         struct lfsck_instance *lfsck;
2580
2581         lfsck = lfsck_instance_find(key, false, true);
2582         if (lfsck != NULL)
2583                 lfsck_instance_put(env, lfsck);
2584 }
2585 EXPORT_SYMBOL(lfsck_degister);
2586
2587 int lfsck_add_target(const struct lu_env *env, struct dt_device *key,
2588                      struct dt_device *tgt, struct obd_export *exp,
2589                      __u32 index, bool for_ost)
2590 {
2591         struct lfsck_instance   *lfsck;
2592         struct lfsck_tgt_desc   *ltd;
2593         int                      rc;
2594         ENTRY;
2595
2596         OBD_ALLOC_PTR(ltd);
2597         if (ltd == NULL)
2598                 RETURN(-ENOMEM);
2599
2600         ltd->ltd_tgt = tgt;
2601         ltd->ltd_key = key;
2602         ltd->ltd_exp = exp;
2603         INIT_LIST_HEAD(&ltd->ltd_orphan_list);
2604         INIT_LIST_HEAD(&ltd->ltd_layout_list);
2605         INIT_LIST_HEAD(&ltd->ltd_layout_phase_list);
2606         atomic_set(&ltd->ltd_ref, 1);
2607         ltd->ltd_index = index;
2608
2609         spin_lock(&lfsck_instance_lock);
2610         lfsck = __lfsck_instance_find(key, true, false);
2611         if (lfsck == NULL) {
2612                 if (for_ost)
2613                         list_add_tail(&ltd->ltd_orphan_list,
2614                                       &lfsck_ost_orphan_list);
2615                 else
2616                         list_add_tail(&ltd->ltd_orphan_list,
2617                                       &lfsck_mdt_orphan_list);
2618                 spin_unlock(&lfsck_instance_lock);
2619
2620                 RETURN(0);
2621         }
2622         spin_unlock(&lfsck_instance_lock);
2623
2624         rc = __lfsck_add_target(env, lfsck, ltd, for_ost, false);
2625         if (rc != 0)
2626                 lfsck_tgt_put(ltd);
2627
2628         lfsck_instance_put(env, lfsck);
2629
2630         RETURN(rc);
2631 }
2632 EXPORT_SYMBOL(lfsck_add_target);
2633
2634 void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
2635                       struct dt_device *tgt, __u32 index, bool for_ost)
2636 {
2637         struct lfsck_instance   *lfsck;
2638         struct lfsck_tgt_descs  *ltds;
2639         struct lfsck_tgt_desc   *ltd;
2640         struct list_head        *head;
2641
2642         if (for_ost)
2643                 head = &lfsck_ost_orphan_list;
2644         else
2645                 head = &lfsck_mdt_orphan_list;
2646
2647         spin_lock(&lfsck_instance_lock);
2648         list_for_each_entry(ltd, head, ltd_orphan_list) {
2649                 if (ltd->ltd_tgt == tgt) {
2650                         list_del_init(&ltd->ltd_orphan_list);
2651                         spin_unlock(&lfsck_instance_lock);
2652                         lfsck_tgt_put(ltd);
2653
2654                         return;
2655                 }
2656         }
2657
2658         ltd = NULL;
2659         lfsck = __lfsck_instance_find(key, true, false);
2660         spin_unlock(&lfsck_instance_lock);
2661         if (unlikely(lfsck == NULL))
2662                 return;
2663
2664         if (for_ost)
2665                 ltds = &lfsck->li_ost_descs;
2666         else
2667                 ltds = &lfsck->li_mdt_descs;
2668
2669         down_write(&ltds->ltd_rw_sem);
2670         LASSERT(ltds->ltd_tgts_bitmap != NULL);
2671
2672         if (unlikely(index >= ltds->ltd_tgts_bitmap->size))
2673                 goto unlock;
2674
2675         ltd = LTD_TGT(ltds, index);
2676         if (unlikely(ltd == NULL))
2677                 goto unlock;
2678
2679         LASSERT(ltds->ltd_tgtnr > 0);
2680
2681         ltds->ltd_tgtnr--;
2682         cfs_bitmap_clear(ltds->ltd_tgts_bitmap, index);
2683         LTD_TGT(ltds, index) = NULL;
2684
2685 unlock:
2686         if (ltd == NULL) {
2687                 if (for_ost)
2688                         head = &lfsck->li_ost_descs.ltd_orphan;
2689                 else
2690                         head = &lfsck->li_ost_descs.ltd_orphan;
2691
2692                 list_for_each_entry(ltd, head, ltd_orphan_list) {
2693                         if (ltd->ltd_tgt == tgt) {
2694                                 list_del_init(&ltd->ltd_orphan_list);
2695                                 break;
2696                         }
2697                 }
2698         }
2699
2700         up_write(&ltds->ltd_rw_sem);
2701         if (ltd != NULL) {
2702                 spin_lock(&ltds->ltd_lock);
2703                 ltd->ltd_dead = 1;
2704                 spin_unlock(&ltds->ltd_lock);
2705                 lfsck_stop_notify(env, lfsck, ltds, ltd, LFSCK_TYPE_LAYOUT);
2706                 lfsck_tgt_put(ltd);
2707         }
2708
2709         lfsck_instance_put(env, lfsck);
2710 }
2711 EXPORT_SYMBOL(lfsck_del_target);
2712
2713 static int __init lfsck_init(void)
2714 {
2715         int rc;
2716
2717         INIT_LIST_HEAD(&lfsck_ost_orphan_list);
2718         INIT_LIST_HEAD(&lfsck_mdt_orphan_list);
2719         lfsck_key_init_generic(&lfsck_thread_key, NULL);
2720         rc = lu_context_key_register(&lfsck_thread_key);
2721         if (rc == 0) {
2722                 tgt_register_lfsck_in_notify(lfsck_in_notify);
2723                 tgt_register_lfsck_query(lfsck_query);
2724         }
2725
2726         return rc;
2727 }
2728
2729 static void __exit lfsck_exit(void)
2730 {
2731         struct lfsck_tgt_desc *ltd;
2732         struct lfsck_tgt_desc *next;
2733
2734         LASSERT(cfs_list_empty(&lfsck_instance_list));
2735
2736         list_for_each_entry_safe(ltd, next, &lfsck_ost_orphan_list,
2737                                  ltd_orphan_list) {
2738                 list_del_init(&ltd->ltd_orphan_list);
2739                 lfsck_tgt_put(ltd);
2740         }
2741
2742         list_for_each_entry_safe(ltd, next, &lfsck_mdt_orphan_list,
2743                                  ltd_orphan_list) {
2744                 list_del_init(&ltd->ltd_orphan_list);
2745                 lfsck_tgt_put(ltd);
2746         }
2747
2748         lu_context_key_degister(&lfsck_thread_key);
2749 }
2750
2751 MODULE_AUTHOR("Intel Corporation <http://www.intel.com/>");
2752 MODULE_DESCRIPTION("LFSCK");
2753 MODULE_LICENSE("GPL");
2754
2755 cfs_module(lfsck, LUSTRE_VERSION_STRING, lfsck_init, lfsck_exit);