Whamcloud - gitweb
LU-1267 lfsck: enhance RPCs (2) for MDT-OST consistency
[fs/lustre-release.git] / lustre / lfsck / lfsck_lib.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, 2013, Intel Corporation.
24  */
25 /*
26  * lustre/lfsck/lfsck_lib.c
27  *
28  * Author: Fan, Yong <fan.yong@intel.com>
29  */
30
31 #define DEBUG_SUBSYSTEM S_LFSCK
32
33 #include <libcfs/list.h>
34 #include <lu_object.h>
35 #include <dt_object.h>
36 #include <md_object.h>
37 #include <lustre_fld.h>
38 #include <lustre_lib.h>
39 #include <lustre_net.h>
40 #include <lustre_lfsck.h>
41 #include <lustre/lustre_lfsck_user.h>
42
43 #include "lfsck_internal.h"
44
45 /* define lfsck thread key */
46 LU_KEY_INIT(lfsck, struct lfsck_thread_info);
47
48 static void lfsck_key_fini(const struct lu_context *ctx,
49                            struct lu_context_key *key, void *data)
50 {
51         struct lfsck_thread_info *info = data;
52
53         lu_buf_free(&info->lti_linkea_buf);
54         OBD_FREE_PTR(info);
55 }
56
57 LU_CONTEXT_KEY_DEFINE(lfsck, LCT_MD_THREAD | LCT_DT_THREAD);
58 LU_KEY_INIT_GENERIC(lfsck);
59
60 static CFS_LIST_HEAD(lfsck_instance_list);
61 static struct list_head lfsck_ost_orphan_list;
62 static struct list_head lfsck_mdt_orphan_list;
63 static DEFINE_SPINLOCK(lfsck_instance_lock);
64
65 static const char *lfsck_status_names[] = {
66         [LS_INIT]               = "init",
67         [LS_SCANNING_PHASE1]    = "scanning-phase1",
68         [LS_SCANNING_PHASE2]    = "scanning-phase2",
69         [LS_COMPLETED]          = "completed",
70         [LS_FAILED]             = "failed",
71         [LS_STOPPED]            = "stopped",
72         [LS_PAUSED]             = "paused",
73         [LS_CRASHED]            = "crashed",
74         [LS_PARTIAL]            = "partial",
75         [LS_CO_FAILED]          = "co-failed",
76         [LS_CO_STOPPED]         = "co-stopped",
77         [LS_CO_PAUSED]          = "co-paused"
78 };
79
80 const char *lfsck_flags_names[] = {
81         "scanned-once",
82         "inconsistent",
83         "upgrade",
84         "incomplete",
85         "crashed_lastid",
86         NULL
87 };
88
89 const char *lfsck_param_names[] = {
90         NULL,
91         "failout",
92         "dryrun",
93         NULL
94 };
95
96 const char *lfsck_status2names(enum lfsck_status status)
97 {
98         if (unlikely(status < 0 || status >= LS_MAX))
99                 return "unknown";
100
101         return lfsck_status_names[status];
102 }
103
104 static int lfsck_tgt_descs_init(struct lfsck_tgt_descs *ltds)
105 {
106         spin_lock_init(&ltds->ltd_lock);
107         init_rwsem(&ltds->ltd_rw_sem);
108         INIT_LIST_HEAD(&ltds->ltd_orphan);
109         ltds->ltd_tgts_bitmap = CFS_ALLOCATE_BITMAP(BITS_PER_LONG);
110         if (ltds->ltd_tgts_bitmap == NULL)
111                 return -ENOMEM;
112
113         return 0;
114 }
115
116 static void lfsck_tgt_descs_fini(struct lfsck_tgt_descs *ltds)
117 {
118         struct lfsck_tgt_desc   *ltd;
119         struct lfsck_tgt_desc   *next;
120         int                      idx;
121
122         down_write(&ltds->ltd_rw_sem);
123
124         list_for_each_entry_safe(ltd, next, &ltds->ltd_orphan,
125                                  ltd_orphan_list) {
126                 list_del_init(&ltd->ltd_orphan_list);
127                 lfsck_tgt_put(ltd);
128         }
129
130         if (unlikely(ltds->ltd_tgts_bitmap == NULL)) {
131                 up_write(&ltds->ltd_rw_sem);
132
133                 return;
134         }
135
136         cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
137                 ltd = LTD_TGT(ltds, idx);
138                 if (likely(ltd != NULL)) {
139                         LASSERT(list_empty(&ltd->ltd_layout_list));
140                         LASSERT(list_empty(&ltd->ltd_layout_phase_list));
141
142                         ltds->ltd_tgtnr--;
143                         cfs_bitmap_clear(ltds->ltd_tgts_bitmap, idx);
144                         LTD_TGT(ltds, idx) = NULL;
145                         lfsck_tgt_put(ltd);
146                 }
147         }
148
149         LASSERTF(ltds->ltd_tgtnr == 0, "tgt count unmatched: %d\n",
150                  ltds->ltd_tgtnr);
151
152         for (idx = 0; idx < TGT_PTRS; idx++) {
153                 if (ltds->ltd_tgts_idx[idx] != NULL) {
154                         OBD_FREE_PTR(ltds->ltd_tgts_idx[idx]);
155                         ltds->ltd_tgts_idx[idx] = NULL;
156                 }
157         }
158
159         CFS_FREE_BITMAP(ltds->ltd_tgts_bitmap);
160         ltds->ltd_tgts_bitmap = NULL;
161         up_write(&ltds->ltd_rw_sem);
162 }
163
164 static int __lfsck_add_target(const struct lu_env *env,
165                               struct lfsck_instance *lfsck,
166                               struct lfsck_tgt_desc *ltd,
167                               bool for_ost, bool locked)
168 {
169         struct lfsck_tgt_descs *ltds;
170         __u32                   index = ltd->ltd_index;
171         int                     rc    = 0;
172         ENTRY;
173
174         if (for_ost)
175                 ltds = &lfsck->li_ost_descs;
176         else
177                 ltds = &lfsck->li_mdt_descs;
178
179         if (!locked)
180                 down_write(&ltds->ltd_rw_sem);
181
182         LASSERT(ltds->ltd_tgts_bitmap != NULL);
183
184         if (index >= ltds->ltd_tgts_bitmap->size) {
185                 __u32 newsize = max((__u32)ltds->ltd_tgts_bitmap->size,
186                                     (__u32)BITS_PER_LONG);
187                 cfs_bitmap_t *old_bitmap = ltds->ltd_tgts_bitmap;
188                 cfs_bitmap_t *new_bitmap;
189
190                 while (newsize < index + 1)
191                         newsize <<= 1;
192
193                 new_bitmap = CFS_ALLOCATE_BITMAP(newsize);
194                 if (new_bitmap == NULL)
195                         GOTO(unlock, rc = -ENOMEM);
196
197                 if (ltds->ltd_tgtnr > 0)
198                         cfs_bitmap_copy(new_bitmap, old_bitmap);
199                 ltds->ltd_tgts_bitmap = new_bitmap;
200                 CFS_FREE_BITMAP(old_bitmap);
201         }
202
203         if (cfs_bitmap_check(ltds->ltd_tgts_bitmap, index)) {
204                 CERROR("%s: the device %s (%u) is registered already\n",
205                        lfsck_lfsck2name(lfsck),
206                        ltd->ltd_tgt->dd_lu_dev.ld_obd->obd_name, index);
207                 GOTO(unlock, rc = -EEXIST);
208         }
209
210         if (ltds->ltd_tgts_idx[index / TGT_PTRS_PER_BLOCK] == NULL) {
211                 OBD_ALLOC_PTR(ltds->ltd_tgts_idx[index / TGT_PTRS_PER_BLOCK]);
212                 if (ltds->ltd_tgts_idx[index / TGT_PTRS_PER_BLOCK] == NULL)
213                         GOTO(unlock, rc = -ENOMEM);
214         }
215
216         LTD_TGT(ltds, index) = ltd;
217         cfs_bitmap_set(ltds->ltd_tgts_bitmap, index);
218         ltds->ltd_tgtnr++;
219
220         GOTO(unlock, rc = 0);
221
222 unlock:
223         if (!locked)
224                 up_write(&ltds->ltd_rw_sem);
225
226         return rc;
227 }
228
229 static int lfsck_add_target_from_orphan(const struct lu_env *env,
230                                         struct lfsck_instance *lfsck)
231 {
232         struct lfsck_tgt_descs  *ltds    = &lfsck->li_ost_descs;
233         struct lfsck_tgt_desc   *ltd;
234         struct lfsck_tgt_desc   *next;
235         struct list_head        *head    = &lfsck_ost_orphan_list;
236         int                      rc;
237         bool                     for_ost = true;
238
239 again:
240         spin_lock(&lfsck_instance_lock);
241         list_for_each_entry_safe(ltd, next, head, ltd_orphan_list) {
242                 if (ltd->ltd_key == lfsck->li_bottom) {
243                         list_del_init(&ltd->ltd_orphan_list);
244                         list_add_tail(&ltd->ltd_orphan_list,
245                                       &ltds->ltd_orphan);
246                 }
247         }
248         spin_unlock(&lfsck_instance_lock);
249
250         down_write(&ltds->ltd_rw_sem);
251         while (!list_empty(&ltds->ltd_orphan)) {
252                 ltd = list_entry(ltds->ltd_orphan.next,
253                                  struct lfsck_tgt_desc,
254                                  ltd_orphan_list);
255                 list_del_init(&ltd->ltd_orphan_list);
256                 rc = __lfsck_add_target(env, lfsck, ltd, for_ost, true);
257                 /* Do not hold the semaphore for too long time. */
258                 up_write(&ltds->ltd_rw_sem);
259                 if (rc != 0)
260                         return rc;
261
262                 down_write(&ltds->ltd_rw_sem);
263         }
264         up_write(&ltds->ltd_rw_sem);
265
266         if (for_ost) {
267                 ltds = &lfsck->li_mdt_descs;
268                 head = &lfsck_mdt_orphan_list;
269                 for_ost = false;
270                 goto again;
271         }
272
273         return 0;
274 }
275
276 static inline struct lfsck_component *
277 __lfsck_component_find(struct lfsck_instance *lfsck, __u16 type, cfs_list_t *list)
278 {
279         struct lfsck_component *com;
280
281         cfs_list_for_each_entry(com, list, lc_link) {
282                 if (com->lc_type == type)
283                         return com;
284         }
285         return NULL;
286 }
287
288 static struct lfsck_component *
289 lfsck_component_find(struct lfsck_instance *lfsck, __u16 type)
290 {
291         struct lfsck_component *com;
292
293         spin_lock(&lfsck->li_lock);
294         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_scan);
295         if (com != NULL)
296                 goto unlock;
297
298         com = __lfsck_component_find(lfsck, type,
299                                      &lfsck->li_list_double_scan);
300         if (com != NULL)
301                 goto unlock;
302
303         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_idle);
304
305 unlock:
306         if (com != NULL)
307                 lfsck_component_get(com);
308         spin_unlock(&lfsck->li_lock);
309         return com;
310 }
311
312 void lfsck_component_cleanup(const struct lu_env *env,
313                              struct lfsck_component *com)
314 {
315         if (!cfs_list_empty(&com->lc_link))
316                 cfs_list_del_init(&com->lc_link);
317         if (!cfs_list_empty(&com->lc_link_dir))
318                 cfs_list_del_init(&com->lc_link_dir);
319
320         lfsck_component_put(env, com);
321 }
322
323 void lfsck_instance_cleanup(const struct lu_env *env,
324                             struct lfsck_instance *lfsck)
325 {
326         struct ptlrpc_thread    *thread = &lfsck->li_thread;
327         struct lfsck_component  *com;
328         ENTRY;
329
330         LASSERT(list_empty(&lfsck->li_link));
331         LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
332
333         lfsck_tgt_descs_fini(&lfsck->li_ost_descs);
334         lfsck_tgt_descs_fini(&lfsck->li_mdt_descs);
335
336         if (lfsck->li_obj_oit != NULL) {
337                 lu_object_put_nocache(env, &lfsck->li_obj_oit->do_lu);
338                 lfsck->li_obj_oit = NULL;
339         }
340
341         LASSERT(lfsck->li_obj_dir == NULL);
342
343         while (!cfs_list_empty(&lfsck->li_list_scan)) {
344                 com = cfs_list_entry(lfsck->li_list_scan.next,
345                                      struct lfsck_component,
346                                      lc_link);
347                 lfsck_component_cleanup(env, com);
348         }
349
350         LASSERT(cfs_list_empty(&lfsck->li_list_dir));
351
352         while (!cfs_list_empty(&lfsck->li_list_double_scan)) {
353                 com = cfs_list_entry(lfsck->li_list_double_scan.next,
354                                      struct lfsck_component,
355                                      lc_link);
356                 lfsck_component_cleanup(env, com);
357         }
358
359         while (!cfs_list_empty(&lfsck->li_list_idle)) {
360                 com = cfs_list_entry(lfsck->li_list_idle.next,
361                                      struct lfsck_component,
362                                      lc_link);
363                 lfsck_component_cleanup(env, com);
364         }
365
366         if (lfsck->li_bookmark_obj != NULL) {
367                 lu_object_put_nocache(env, &lfsck->li_bookmark_obj->do_lu);
368                 lfsck->li_bookmark_obj = NULL;
369         }
370
371         if (lfsck->li_los != NULL) {
372                 local_oid_storage_fini(env, lfsck->li_los);
373                 lfsck->li_los = NULL;
374         }
375
376         OBD_FREE_PTR(lfsck);
377 }
378
379 static inline struct lfsck_instance *
380 __lfsck_instance_find(struct dt_device *key, bool ref, bool unlink)
381 {
382         struct lfsck_instance *lfsck;
383
384         cfs_list_for_each_entry(lfsck, &lfsck_instance_list, li_link) {
385                 if (lfsck->li_bottom == key) {
386                         if (ref)
387                                 lfsck_instance_get(lfsck);
388                         if (unlink)
389                                 list_del_init(&lfsck->li_link);
390
391                         return lfsck;
392                 }
393         }
394
395         return NULL;
396 }
397
398 static inline struct lfsck_instance *lfsck_instance_find(struct dt_device *key,
399                                                          bool ref, bool unlink)
400 {
401         struct lfsck_instance *lfsck;
402
403         spin_lock(&lfsck_instance_lock);
404         lfsck = __lfsck_instance_find(key, ref, unlink);
405         spin_unlock(&lfsck_instance_lock);
406
407         return lfsck;
408 }
409
410 static inline int lfsck_instance_add(struct lfsck_instance *lfsck)
411 {
412         struct lfsck_instance *tmp;
413
414         spin_lock(&lfsck_instance_lock);
415         cfs_list_for_each_entry(tmp, &lfsck_instance_list, li_link) {
416                 if (lfsck->li_bottom == tmp->li_bottom) {
417                         spin_unlock(&lfsck_instance_lock);
418                         return -EEXIST;
419                 }
420         }
421
422         cfs_list_add_tail(&lfsck->li_link, &lfsck_instance_list);
423         spin_unlock(&lfsck_instance_lock);
424         return 0;
425 }
426
427 int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
428                     const char *prefix)
429 {
430         int save = *len;
431         int flag;
432         int rc;
433         int i;
434
435         rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
436         if (rc <= 0)
437                 return -ENOSPC;
438
439         *buf += rc;
440         *len -= rc;
441         for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
442                 if (flag & bits) {
443                         bits &= ~flag;
444                         if (names[i] != NULL) {
445                                 rc = snprintf(*buf, *len, "%s%c", names[i],
446                                               bits != 0 ? ',' : '\n');
447                                 if (rc <= 0)
448                                         return -ENOSPC;
449
450                                 *buf += rc;
451                                 *len -= rc;
452                         }
453                 }
454         }
455         return save - *len;
456 }
457
458 int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
459 {
460         int rc;
461
462         if (time != 0)
463                 rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
464                               cfs_time_current_sec() - time);
465         else
466                 rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
467         if (rc <= 0)
468                 return -ENOSPC;
469
470         *buf += rc;
471         *len -= rc;
472         return rc;
473 }
474
475 int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
476                    const char *prefix)
477 {
478         int rc;
479
480         if (fid_is_zero(&pos->lp_dir_parent)) {
481                 if (pos->lp_oit_cookie == 0)
482                         rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
483                                       prefix);
484                 else
485                         rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
486                                       prefix, pos->lp_oit_cookie);
487         } else {
488                 rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
489                               prefix, pos->lp_oit_cookie,
490                               PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
491         }
492         if (rc <= 0)
493                 return -ENOSPC;
494
495         *buf += rc;
496         *len -= rc;
497         return rc;
498 }
499
500 void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
501                     struct lfsck_position *pos, bool init)
502 {
503         const struct dt_it_ops *iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
504
505         if (unlikely(lfsck->li_di_oit == NULL)) {
506                 memset(pos, 0, sizeof(*pos));
507                 return;
508         }
509
510         pos->lp_oit_cookie = iops->store(env, lfsck->li_di_oit);
511         if (!lfsck->li_current_oit_processed && !init)
512                 pos->lp_oit_cookie--;
513
514         LASSERT(pos->lp_oit_cookie > 0);
515
516         if (lfsck->li_di_dir != NULL) {
517                 struct dt_object *dto = lfsck->li_obj_dir;
518
519                 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
520                                                         lfsck->li_di_dir);
521
522                 if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) {
523                         fid_zero(&pos->lp_dir_parent);
524                         pos->lp_dir_cookie = 0;
525                 } else {
526                         pos->lp_dir_parent = *lfsck_dto2fid(dto);
527                 }
528         } else {
529                 fid_zero(&pos->lp_dir_parent);
530                 pos->lp_dir_cookie = 0;
531         }
532 }
533
534 static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
535 {
536         lfsck->li_bookmark_ram.lb_speed_limit = limit;
537         if (limit != LFSCK_SPEED_NO_LIMIT) {
538                 if (limit > HZ) {
539                         lfsck->li_sleep_rate = limit / HZ;
540                         lfsck->li_sleep_jif = 1;
541                 } else {
542                         lfsck->li_sleep_rate = 1;
543                         lfsck->li_sleep_jif = HZ / limit;
544                 }
545         } else {
546                 lfsck->li_sleep_jif = 0;
547                 lfsck->li_sleep_rate = 0;
548         }
549 }
550
551 void lfsck_control_speed(struct lfsck_instance *lfsck)
552 {
553         struct ptlrpc_thread *thread = &lfsck->li_thread;
554         struct l_wait_info    lwi;
555
556         if (lfsck->li_sleep_jif > 0 &&
557             lfsck->li_new_scanned >= lfsck->li_sleep_rate) {
558                 lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL,
559                                        LWI_ON_SIGNAL_NOOP, NULL);
560
561                 l_wait_event(thread->t_ctl_waitq,
562                              !thread_is_running(thread),
563                              &lwi);
564                 lfsck->li_new_scanned = 0;
565         }
566 }
567
568 void lfsck_control_speed_by_self(struct lfsck_component *com)
569 {
570         struct lfsck_instance   *lfsck  = com->lc_lfsck;
571         struct ptlrpc_thread    *thread = &lfsck->li_thread;
572         struct l_wait_info       lwi;
573
574         if (lfsck->li_sleep_jif > 0 &&
575             com->lc_new_scanned >= lfsck->li_sleep_rate) {
576                 lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL,
577                                        LWI_ON_SIGNAL_NOOP, NULL);
578
579                 l_wait_event(thread->t_ctl_waitq,
580                              !thread_is_running(thread),
581                              &lwi);
582                 com->lc_new_scanned = 0;
583         }
584 }
585
586 static int lfsck_parent_fid(const struct lu_env *env, struct dt_object *obj,
587                             struct lu_fid *fid)
588 {
589         if (unlikely(!S_ISDIR(lfsck_object_type(obj)) ||
590                      !dt_try_as_dir(env, obj)))
591                 return -ENOTDIR;
592
593         return dt_lookup(env, obj, (struct dt_rec *)fid,
594                          (const struct dt_key *)"..", BYPASS_CAPA);
595 }
596
597 static int lfsck_needs_scan_dir(const struct lu_env *env,
598                                 struct lfsck_instance *lfsck,
599                                 struct dt_object *obj)
600 {
601         struct lu_fid *fid   = &lfsck_env_info(env)->lti_fid;
602         int            depth = 0;
603         int            rc;
604
605         if (!lfsck->li_master || !S_ISDIR(lfsck_object_type(obj)) ||
606             cfs_list_empty(&lfsck->li_list_dir))
607                RETURN(0);
608
609         while (1) {
610                 /* XXX: Currently, we do not scan the "/REMOTE_PARENT_DIR",
611                  *      which is the agent directory to manage the objects
612                  *      which name entries reside on remote MDTs. Related
613                  *      consistency verification will be processed in LFSCK
614                  *      phase III. */
615                 if (lu_fid_eq(lfsck_dto2fid(obj), &lfsck->li_global_root_fid)) {
616                         if (depth > 0)
617                                 lfsck_object_put(env, obj);
618                         return 1;
619                 }
620
621                 /* .lustre doesn't contain "real" user objects, no need lfsck */
622                 if (fid_is_dot_lustre(lfsck_dto2fid(obj))) {
623                         if (depth > 0)
624                                 lfsck_object_put(env, obj);
625                         return 0;
626                 }
627
628                 dt_read_lock(env, obj, MOR_TGT_CHILD);
629                 if (unlikely(lfsck_is_dead_obj(obj))) {
630                         dt_read_unlock(env, obj);
631                         if (depth > 0)
632                                 lfsck_object_put(env, obj);
633                         return 0;
634                 }
635
636                 rc = dt_xattr_get(env, obj,
637                                   lfsck_buf_get(env, NULL, 0), XATTR_NAME_LINK,
638                                   BYPASS_CAPA);
639                 dt_read_unlock(env, obj);
640                 if (rc >= 0) {
641                         if (depth > 0)
642                                 lfsck_object_put(env, obj);
643                         return 1;
644                 }
645
646                 if (rc < 0 && rc != -ENODATA) {
647                         if (depth > 0)
648                                 lfsck_object_put(env, obj);
649                         return rc;
650                 }
651
652                 rc = lfsck_parent_fid(env, obj, fid);
653                 if (depth > 0)
654                         lfsck_object_put(env, obj);
655                 if (rc != 0)
656                         return rc;
657
658                 if (unlikely(lu_fid_eq(fid, &lfsck->li_local_root_fid)))
659                         return 0;
660
661                 obj = lfsck_object_find(env, lfsck, fid);
662                 if (obj == NULL)
663                         return 0;
664                 else if (IS_ERR(obj))
665                         return PTR_ERR(obj);
666
667                 if (!dt_object_exists(obj)) {
668                         lfsck_object_put(env, obj);
669                         return 0;
670                 }
671
672                 /* Currently, only client visible directory can be remote. */
673                 if (dt_object_remote(obj)) {
674                         lfsck_object_put(env, obj);
675                         return 1;
676                 }
677
678                 depth++;
679         }
680         return 0;
681 }
682
683 struct lfsck_thread_args *lfsck_thread_args_init(struct lfsck_instance *lfsck,
684                                                  struct lfsck_component *com)
685 {
686         struct lfsck_thread_args *lta;
687         int                       rc;
688
689         OBD_ALLOC_PTR(lta);
690         if (lta == NULL)
691                 return ERR_PTR(-ENOMEM);
692
693         rc = lu_env_init(&lta->lta_env, LCT_MD_THREAD | LCT_DT_THREAD);
694         if (rc != 0) {
695                 OBD_FREE_PTR(lta);
696                 return ERR_PTR(rc);
697         }
698
699         lta->lta_lfsck = lfsck_instance_get(lfsck);
700         if (com != NULL)
701                 lta->lta_com = lfsck_component_get(com);
702
703         return lta;
704 }
705
706 void lfsck_thread_args_fini(struct lfsck_thread_args *lta)
707 {
708         if (lta->lta_com != NULL)
709                 lfsck_component_put(&lta->lta_env, lta->lta_com);
710         lfsck_instance_put(&lta->lta_env, lta->lta_lfsck);
711         lu_env_fini(&lta->lta_env);
712         OBD_FREE_PTR(lta);
713 }
714
715 /* LFSCK wrap functions */
716
717 void lfsck_fail(const struct lu_env *env, struct lfsck_instance *lfsck,
718                 bool new_checked)
719 {
720         struct lfsck_component *com;
721
722         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
723                 com->lc_ops->lfsck_fail(env, com, new_checked);
724         }
725 }
726
727 int lfsck_checkpoint(const struct lu_env *env, struct lfsck_instance *lfsck)
728 {
729         struct lfsck_component *com;
730         int                     rc  = 0;
731         int                     rc1 = 0;
732
733         if (likely(cfs_time_beforeq(cfs_time_current(),
734                                     lfsck->li_time_next_checkpoint)))
735                 return 0;
736
737         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
738         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
739                 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
740                 if (rc != 0)
741                         rc1 = rc;
742         }
743
744         lfsck->li_time_last_checkpoint = cfs_time_current();
745         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
746                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
747         return rc1 != 0 ? rc1 : rc;
748 }
749
750 int lfsck_prep(const struct lu_env *env, struct lfsck_instance *lfsck)
751 {
752         struct dt_object       *obj     = NULL;
753         struct lfsck_component *com;
754         struct lfsck_component *next;
755         struct lfsck_position  *pos     = NULL;
756         const struct dt_it_ops *iops    =
757                                 &lfsck->li_obj_oit->do_index_ops->dio_it;
758         struct dt_it           *di;
759         int                     rc;
760         ENTRY;
761
762         LASSERT(lfsck->li_obj_dir == NULL);
763         LASSERT(lfsck->li_di_dir == NULL);
764
765         lfsck->li_current_oit_processed = 0;
766         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
767                 com->lc_new_checked = 0;
768                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
769                         com->lc_journal = 0;
770
771                 rc = com->lc_ops->lfsck_prep(env, com);
772                 if (rc != 0)
773                         GOTO(out, rc);
774
775                 if ((pos == NULL) ||
776                     (!lfsck_pos_is_zero(&com->lc_pos_start) &&
777                      lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
778                         pos = &com->lc_pos_start;
779         }
780
781         /* Init otable-based iterator. */
782         if (pos == NULL) {
783                 rc = iops->load(env, lfsck->li_di_oit, 0);
784                 if (rc > 0) {
785                         lfsck->li_oit_over = 1;
786                         rc = 0;
787                 }
788
789                 GOTO(out, rc);
790         }
791
792         rc = iops->load(env, lfsck->li_di_oit, pos->lp_oit_cookie);
793         if (rc < 0)
794                 GOTO(out, rc);
795         else if (rc > 0)
796                 lfsck->li_oit_over = 1;
797
798         if (!lfsck->li_master || fid_is_zero(&pos->lp_dir_parent))
799                 GOTO(out, rc = 0);
800
801         /* Find the directory for namespace-based traverse. */
802         obj = lfsck_object_find(env, lfsck, &pos->lp_dir_parent);
803         if (obj == NULL)
804                 GOTO(out, rc = 0);
805         else if (IS_ERR(obj))
806                 RETURN(PTR_ERR(obj));
807
808         /* XXX: Currently, skip remote object, the consistency for
809          *      remote object will be processed in LFSCK phase III. */
810         if (!dt_object_exists(obj) || dt_object_remote(obj) ||
811             unlikely(!S_ISDIR(lfsck_object_type(obj))))
812                 GOTO(out, rc = 0);
813
814         if (unlikely(!dt_try_as_dir(env, obj)))
815                 GOTO(out, rc = -ENOTDIR);
816
817         /* Init the namespace-based directory traverse. */
818         iops = &obj->do_index_ops->dio_it;
819         di = iops->init(env, obj, lfsck->li_args_dir, BYPASS_CAPA);
820         if (IS_ERR(di))
821                 GOTO(out, rc = PTR_ERR(di));
822
823         LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF);
824
825         rc = iops->load(env, di, pos->lp_dir_cookie);
826         if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0))
827                 rc = iops->next(env, di);
828         else if (rc > 0)
829                 rc = 0;
830
831         if (rc != 0) {
832                 iops->put(env, di);
833                 iops->fini(env, di);
834                 GOTO(out, rc);
835         }
836
837         lfsck->li_obj_dir = lfsck_object_get(obj);
838         lfsck->li_cookie_dir = iops->store(env, di);
839         spin_lock(&lfsck->li_lock);
840         lfsck->li_di_dir = di;
841         spin_unlock(&lfsck->li_lock);
842
843         GOTO(out, rc = 0);
844
845 out:
846         if (obj != NULL)
847                 lfsck_object_put(env, obj);
848
849         if (rc < 0) {
850                 cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
851                                              lc_link)
852                         com->lc_ops->lfsck_post(env, com, rc, true);
853
854                 return rc;
855         }
856
857         rc = 0;
858         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, true);
859         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
860                 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
861                 if (rc != 0)
862                         break;
863         }
864
865         lfsck->li_time_last_checkpoint = cfs_time_current();
866         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
867                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
868         return rc;
869 }
870
871 int lfsck_exec_oit(const struct lu_env *env, struct lfsck_instance *lfsck,
872                    struct dt_object *obj)
873 {
874         struct lfsck_component *com;
875         const struct dt_it_ops *iops;
876         struct dt_it           *di;
877         int                     rc;
878         ENTRY;
879
880         LASSERT(lfsck->li_obj_dir == NULL);
881
882         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
883                 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
884                 if (rc != 0)
885                         RETURN(rc);
886         }
887
888         rc = lfsck_needs_scan_dir(env, lfsck, obj);
889         if (rc <= 0)
890                 GOTO(out, rc);
891
892         if (unlikely(!dt_try_as_dir(env, obj)))
893                 GOTO(out, rc = -ENOTDIR);
894
895         iops = &obj->do_index_ops->dio_it;
896         di = iops->init(env, obj, lfsck->li_args_dir, BYPASS_CAPA);
897         if (IS_ERR(di))
898                 GOTO(out, rc = PTR_ERR(di));
899
900         rc = iops->load(env, di, 0);
901         if (rc == 0)
902                 rc = iops->next(env, di);
903         else if (rc > 0)
904                 rc = 0;
905
906         if (rc != 0) {
907                 iops->put(env, di);
908                 iops->fini(env, di);
909                 GOTO(out, rc);
910         }
911
912         lfsck->li_obj_dir = lfsck_object_get(obj);
913         lfsck->li_cookie_dir = iops->store(env, di);
914         spin_lock(&lfsck->li_lock);
915         lfsck->li_di_dir = di;
916         spin_unlock(&lfsck->li_lock);
917
918         GOTO(out, rc = 0);
919
920 out:
921         if (rc < 0)
922                 lfsck_fail(env, lfsck, false);
923         return (rc > 0 ? 0 : rc);
924 }
925
926 int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck,
927                    struct dt_object *obj, struct lu_dirent *ent)
928 {
929         struct lfsck_component *com;
930         int                     rc;
931
932         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
933                 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
934                 if (rc != 0)
935                         return rc;
936         }
937         return 0;
938 }
939
940 int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
941                int result)
942 {
943         struct lfsck_component *com;
944         struct lfsck_component *next;
945         int                     rc  = 0;
946         int                     rc1 = 0;
947
948         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
949         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
950                 rc = com->lc_ops->lfsck_post(env, com, result, false);
951                 if (rc != 0)
952                         rc1 = rc;
953         }
954
955         lfsck->li_time_last_checkpoint = cfs_time_current();
956         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
957                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
958
959         /* Ignore some component post failure to make other can go ahead. */
960         return result;
961 }
962
963 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
964 {
965         struct lfsck_component *com;
966         struct lfsck_component *next;
967         struct l_wait_info      lwi = { 0 };
968         int                     rc  = 0;
969         int                     rc1 = 0;
970
971         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
972                                      lc_link) {
973                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
974                         com->lc_journal = 0;
975
976                 rc = com->lc_ops->lfsck_double_scan(env, com);
977                 if (rc != 0)
978                         rc1 = rc;
979         }
980
981         l_wait_event(lfsck->li_thread.t_ctl_waitq,
982                      atomic_read(&lfsck->li_double_scan_count) == 0,
983                      &lwi);
984
985         return rc1 != 0 ? rc1 : rc;
986 }
987
988 int lfsck_stop_notify(const struct lu_env *env, struct lfsck_instance *lfsck,
989                       struct lfsck_tgt_descs *ltds, struct lfsck_tgt_desc *ltd)
990 {
991         struct ptlrpc_request_set *set;
992         struct lfsck_component    *com;
993         int                        cnt = 0;
994         int                        rc  = 0;
995         int                        rc1 = 0;
996
997         set = ptlrpc_prep_set();
998         if (set == NULL)
999                 return -ENOMEM;
1000
1001         list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1002                 if (com->lc_ops->lfsck_stop_notify != NULL) {
1003                         rc = com->lc_ops->lfsck_stop_notify(env, com, ltds,
1004                                                             ltd, set);
1005                         if (rc != 0)
1006                                 rc1 = rc;
1007                         else
1008                                 cnt++;
1009                 }
1010         }
1011
1012         list_for_each_entry(com, &lfsck->li_list_double_scan, lc_link) {
1013                 if (com->lc_ops->lfsck_stop_notify != NULL) {
1014                         rc = com->lc_ops->lfsck_stop_notify(env, com, ltds,
1015                                                             ltd, set);
1016                         if (rc != 0)
1017                                 rc1 = rc;
1018                         else
1019                                 cnt++;
1020                 }
1021         }
1022
1023         if (cnt > 0)
1024                 rc = ptlrpc_set_wait(set);
1025         ptlrpc_set_destroy(set);
1026
1027         return rc1 != 0 ? rc1 : rc;
1028 }
1029
1030 void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck)
1031 {
1032         struct lfsck_component *com;
1033         struct lfsck_component *next;
1034
1035         list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
1036                                  lc_link) {
1037                 if (com->lc_ops->lfsck_quit != NULL)
1038                         com->lc_ops->lfsck_quit(env, com);
1039         }
1040
1041         list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
1042                                  lc_link) {
1043                 if (com->lc_ops->lfsck_quit != NULL)
1044                         com->lc_ops->lfsck_quit(env, com);
1045         }
1046 }
1047
1048 int lfsck_async_request(const struct lu_env *env, struct obd_export *exp,
1049                         struct lfsck_request *lr,
1050                         struct ptlrpc_request_set *set,
1051                         ptlrpc_interpterer_t interpreter,
1052                         void *args, int request)
1053 {
1054         struct lfsck_async_interpret_args *laia;
1055         struct ptlrpc_request             *req;
1056         struct lfsck_request              *tmp;
1057         struct req_format                 *format;
1058         int                                rc;
1059
1060         if (!(exp_connect_flags(exp) & OBD_CONNECT_LFSCK))
1061                 return -EOPNOTSUPP;
1062
1063         switch (request) {
1064         case LFSCK_NOTIFY:
1065                 format = &RQF_LFSCK_NOTIFY;
1066                 break;
1067         case LFSCK_QUERY:
1068                 format = &RQF_LFSCK_QUERY;
1069                 break;
1070         default:
1071                 CERROR("%s: unknown async request: opc = %d\n",
1072                        exp->exp_obd->obd_name, request);
1073                 return -EINVAL;
1074         }
1075
1076         req = ptlrpc_request_alloc(class_exp2cliimp(exp), format);
1077         if (req == NULL)
1078                 return -ENOMEM;
1079
1080         rc = ptlrpc_request_pack(req, LUSTRE_OBD_VERSION, request);
1081         if (rc != 0) {
1082                 ptlrpc_request_free(req);
1083
1084                 return rc;
1085         }
1086
1087         tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST);
1088         *tmp = *lr;
1089         ptlrpc_request_set_replen(req);
1090
1091         laia = ptlrpc_req_async_args(req);
1092         *laia = *(struct lfsck_async_interpret_args *)args;
1093         req->rq_interpret_reply = interpreter;
1094         ptlrpc_set_add_req(set, req);
1095
1096         return 0;
1097 }
1098
1099 /* external interfaces */
1100
1101 int lfsck_get_speed(struct dt_device *key, void *buf, int len)
1102 {
1103         struct lu_env           env;
1104         struct lfsck_instance  *lfsck;
1105         int                     rc;
1106         ENTRY;
1107
1108         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1109         if (rc != 0)
1110                 RETURN(rc);
1111
1112         lfsck = lfsck_instance_find(key, true, false);
1113         if (likely(lfsck != NULL)) {
1114                 rc = snprintf(buf, len, "%u\n",
1115                               lfsck->li_bookmark_ram.lb_speed_limit);
1116                 lfsck_instance_put(&env, lfsck);
1117         } else {
1118                 rc = -ENODEV;
1119         }
1120
1121         lu_env_fini(&env);
1122
1123         RETURN(rc);
1124 }
1125 EXPORT_SYMBOL(lfsck_get_speed);
1126
1127 int lfsck_set_speed(struct dt_device *key, int val)
1128 {
1129         struct lu_env           env;
1130         struct lfsck_instance  *lfsck;
1131         int                     rc;
1132         ENTRY;
1133
1134         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1135         if (rc != 0)
1136                 RETURN(rc);
1137
1138         lfsck = lfsck_instance_find(key, true, false);
1139         if (likely(lfsck != NULL)) {
1140                 mutex_lock(&lfsck->li_mutex);
1141                 __lfsck_set_speed(lfsck, val);
1142                 rc = lfsck_bookmark_store(&env, lfsck);
1143                 mutex_unlock(&lfsck->li_mutex);
1144                 lfsck_instance_put(&env, lfsck);
1145         } else {
1146                 rc = -ENODEV;
1147         }
1148
1149         lu_env_fini(&env);
1150
1151         RETURN(rc);
1152 }
1153 EXPORT_SYMBOL(lfsck_set_speed);
1154
1155 int lfsck_get_windows(struct dt_device *key, void *buf, int len)
1156 {
1157         struct lu_env           env;
1158         struct lfsck_instance  *lfsck;
1159         int                     rc;
1160         ENTRY;
1161
1162         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1163         if (rc != 0)
1164                 RETURN(rc);
1165
1166         lfsck = lfsck_instance_find(key, true, false);
1167         if (likely(lfsck != NULL)) {
1168                 rc = snprintf(buf, len, "%u\n",
1169                               lfsck->li_bookmark_ram.lb_async_windows);
1170                 lfsck_instance_put(&env, lfsck);
1171         } else {
1172                 rc = -ENODEV;
1173         }
1174
1175         lu_env_fini(&env);
1176
1177         RETURN(rc);
1178 }
1179 EXPORT_SYMBOL(lfsck_get_windows);
1180
1181 int lfsck_set_windows(struct dt_device *key, int val)
1182 {
1183         struct lu_env           env;
1184         struct lfsck_instance  *lfsck;
1185         int                     rc;
1186         ENTRY;
1187
1188         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1189         if (rc != 0)
1190                 RETURN(rc);
1191
1192         lfsck = lfsck_instance_find(key, true, false);
1193         if (likely(lfsck != NULL)) {
1194                 if (val > LFSCK_ASYNC_WIN_MAX) {
1195                         CERROR("%s: Too large async windows size, which "
1196                                "may cause memory issues. The valid range "
1197                                "is [0 - %u]. If you do not want to restrict "
1198                                "the windows size for async requests pipeline, "
1199                                "just set it as 0.\n",
1200                                lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX);
1201                         rc = -EINVAL;
1202                 } else if (lfsck->li_bookmark_ram.lb_async_windows != val) {
1203                         mutex_lock(&lfsck->li_mutex);
1204                         lfsck->li_bookmark_ram.lb_async_windows = val;
1205                         rc = lfsck_bookmark_store(&env, lfsck);
1206                         mutex_unlock(&lfsck->li_mutex);
1207                 }
1208                 lfsck_instance_put(&env, lfsck);
1209         } else {
1210                 rc = -ENODEV;
1211         }
1212
1213         lu_env_fini(&env);
1214
1215         RETURN(rc);
1216 }
1217 EXPORT_SYMBOL(lfsck_set_windows);
1218
1219 int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type)
1220 {
1221         struct lu_env           env;
1222         struct lfsck_instance  *lfsck;
1223         struct lfsck_component *com;
1224         int                     rc;
1225         ENTRY;
1226
1227         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
1228         if (rc != 0)
1229                 RETURN(rc);
1230
1231         lfsck = lfsck_instance_find(key, true, false);
1232         if (likely(lfsck != NULL)) {
1233                 com = lfsck_component_find(lfsck, type);
1234                 if (likely(com != NULL)) {
1235                         rc = com->lc_ops->lfsck_dump(&env, com, buf, len);
1236                         lfsck_component_put(&env, com);
1237                 } else {
1238                         rc = -ENOTSUPP;
1239                 }
1240
1241                 lfsck_instance_put(&env, lfsck);
1242         } else {
1243                 rc = -ENODEV;
1244         }
1245
1246         lu_env_fini(&env);
1247
1248         RETURN(rc);
1249 }
1250 EXPORT_SYMBOL(lfsck_dump);
1251
1252 int lfsck_start(const struct lu_env *env, struct dt_device *key,
1253                 struct lfsck_start_param *lsp)
1254 {
1255         struct lfsck_start              *start  = lsp->lsp_start;
1256         struct lfsck_instance           *lfsck;
1257         struct lfsck_bookmark           *bk;
1258         struct ptlrpc_thread            *thread;
1259         struct lfsck_component          *com;
1260         struct l_wait_info               lwi    = { 0 };
1261         struct lfsck_thread_args        *lta;
1262         bool                             dirty  = false;
1263         long                             rc     = 0;
1264         __u16                            valid  = 0;
1265         __u16                            flags  = 0;
1266         __u16                            type   = 1;
1267         ENTRY;
1268
1269         lfsck = lfsck_instance_find(key, true, false);
1270         if (unlikely(lfsck == NULL))
1271                 RETURN(-ENODEV);
1272
1273         /* start == NULL means auto trigger paused LFSCK. */
1274         if ((start == NULL) &&
1275             (cfs_list_empty(&lfsck->li_list_scan) ||
1276              OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
1277                 GOTO(put, rc = 0);
1278
1279         bk = &lfsck->li_bookmark_ram;
1280         thread = &lfsck->li_thread;
1281         mutex_lock(&lfsck->li_mutex);
1282         spin_lock(&lfsck->li_lock);
1283         if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
1284                 rc = -EALREADY;
1285                 while (start->ls_active != 0) {
1286                         if (type & start->ls_active) {
1287                                 com = __lfsck_component_find(lfsck, type,
1288                                                         &lfsck->li_list_scan);
1289                                 if (com == NULL)
1290                                         com = __lfsck_component_find(lfsck,
1291                                                 type,
1292                                                 &lfsck->li_list_double_scan);
1293                                 if (com == NULL) {
1294                                         rc = -EBUSY;
1295                                         break;
1296                                 } else {
1297                                         start->ls_active &= ~type;
1298                                 }
1299                         }
1300                         type <<= 1;
1301                 }
1302                 spin_unlock(&lfsck->li_lock);
1303                 GOTO(out, rc);
1304         }
1305         spin_unlock(&lfsck->li_lock);
1306
1307         lfsck->li_namespace = lsp->lsp_namespace;
1308         lfsck->li_status = 0;
1309         lfsck->li_oit_over = 0;
1310         lfsck->li_drop_dryrun = 0;
1311         lfsck->li_new_scanned = 0;
1312
1313         /* For auto trigger. */
1314         if (start == NULL)
1315                 goto trigger;
1316
1317         start->ls_version = bk->lb_version;
1318         if (start->ls_valid & LSV_SPEED_LIMIT) {
1319                 __lfsck_set_speed(lfsck, start->ls_speed_limit);
1320                 dirty = true;
1321         }
1322
1323         if (start->ls_valid & LSV_ASYNC_WINDOWS &&
1324             bk->lb_async_windows != start->ls_async_windows) {
1325                 bk->lb_async_windows = start->ls_async_windows;
1326                 dirty = true;
1327         }
1328
1329         if (start->ls_valid & LSV_ERROR_HANDLE) {
1330                 valid |= DOIV_ERROR_HANDLE;
1331                 if (start->ls_flags & LPF_FAILOUT)
1332                         flags |= DOIF_FAILOUT;
1333
1334                 if ((start->ls_flags & LPF_FAILOUT) &&
1335                     !(bk->lb_param & LPF_FAILOUT)) {
1336                         bk->lb_param |= LPF_FAILOUT;
1337                         dirty = true;
1338                 } else if (!(start->ls_flags & LPF_FAILOUT) &&
1339                            (bk->lb_param & LPF_FAILOUT)) {
1340                         bk->lb_param &= ~LPF_FAILOUT;
1341                         dirty = true;
1342                 }
1343         }
1344
1345         if (start->ls_valid & LSV_DRYRUN) {
1346                 valid |= DOIV_DRYRUN;
1347                 if (start->ls_flags & LPF_DRYRUN)
1348                         flags |= DOIF_DRYRUN;
1349
1350                 if ((start->ls_flags & LPF_DRYRUN) &&
1351                     !(bk->lb_param & LPF_DRYRUN)) {
1352                         bk->lb_param |= LPF_DRYRUN;
1353                         dirty = true;
1354                 } else if (!(start->ls_flags & LPF_DRYRUN) &&
1355                            (bk->lb_param & LPF_DRYRUN)) {
1356                         bk->lb_param &= ~LPF_DRYRUN;
1357                         lfsck->li_drop_dryrun = 1;
1358                         dirty = true;
1359                 }
1360         }
1361
1362         if (dirty) {
1363                 rc = lfsck_bookmark_store(env, lfsck);
1364                 if (rc != 0)
1365                         GOTO(out, rc);
1366         }
1367
1368         if (start->ls_flags & LPF_RESET)
1369                 flags |= DOIF_RESET;
1370
1371         if (start->ls_active != 0) {
1372                 struct lfsck_component *next;
1373
1374                 if (start->ls_active == LFSCK_TYPES_ALL)
1375                         start->ls_active = LFSCK_TYPES_SUPPORTED;
1376
1377                 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
1378                         start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
1379                         GOTO(out, rc = -ENOTSUPP);
1380                 }
1381
1382                 cfs_list_for_each_entry_safe(com, next,
1383                                              &lfsck->li_list_scan, lc_link) {
1384                         if (!(com->lc_type & start->ls_active)) {
1385                                 rc = com->lc_ops->lfsck_post(env, com, 0,
1386                                                              false);
1387                                 if (rc != 0)
1388                                         GOTO(out, rc);
1389                         }
1390                 }
1391
1392                 while (start->ls_active != 0) {
1393                         if (type & start->ls_active) {
1394                                 com = __lfsck_component_find(lfsck, type,
1395                                                         &lfsck->li_list_idle);
1396                                 if (com != NULL) {
1397                                         /* The component status will be updated
1398                                          * when its prep() is called later by
1399                                          * the LFSCK main engine. */
1400                                         cfs_list_del_init(&com->lc_link);
1401                                         cfs_list_add_tail(&com->lc_link,
1402                                                           &lfsck->li_list_scan);
1403                                 }
1404                                 start->ls_active &= ~type;
1405                         }
1406                         type <<= 1;
1407                 }
1408         }
1409
1410         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
1411                 start->ls_active |= com->lc_type;
1412                 if (flags & DOIF_RESET) {
1413                         rc = com->lc_ops->lfsck_reset(env, com, false);
1414                         if (rc != 0)
1415                                 GOTO(out, rc);
1416                 }
1417         }
1418
1419 trigger:
1420         lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
1421         if (bk->lb_param & LPF_DRYRUN) {
1422                 lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN;
1423                 valid |= DOIV_DRYRUN;
1424                 flags |= DOIF_DRYRUN;
1425         }
1426
1427         if (bk->lb_param & LPF_FAILOUT) {
1428                 valid |= DOIV_ERROR_HANDLE;
1429                 flags |= DOIF_FAILOUT;
1430         }
1431
1432         if (!cfs_list_empty(&lfsck->li_list_scan))
1433                 flags |= DOIF_OUTUSED;
1434
1435         lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
1436         thread_set_flags(thread, 0);
1437         lta = lfsck_thread_args_init(lfsck, NULL);
1438         if (IS_ERR(lta))
1439                 GOTO(out, rc = PTR_ERR(lta));
1440
1441         rc = PTR_ERR(kthread_run(lfsck_master_engine, lta, "lfsck"));
1442         if (IS_ERR_VALUE(rc)) {
1443                 CERROR("%s: cannot start LFSCK thread: rc = %ld\n",
1444                        lfsck_lfsck2name(lfsck), rc);
1445                 lfsck_thread_args_fini(lta);
1446         } else {
1447                 rc = 0;
1448                 l_wait_event(thread->t_ctl_waitq,
1449                              thread_is_running(thread) ||
1450                              thread_is_stopped(thread),
1451                              &lwi);
1452         }
1453
1454         GOTO(out, rc);
1455
1456 out:
1457         mutex_unlock(&lfsck->li_mutex);
1458 put:
1459         lfsck_instance_put(env, lfsck);
1460         return (rc < 0 ? rc : 0);
1461 }
1462 EXPORT_SYMBOL(lfsck_start);
1463
1464 int lfsck_stop(const struct lu_env *env, struct dt_device *key,
1465                struct lfsck_stop *stop)
1466 {
1467         struct lfsck_instance   *lfsck;
1468         struct ptlrpc_thread    *thread;
1469         struct l_wait_info       lwi    = { 0 };
1470         int                      rc     = 0;
1471         ENTRY;
1472
1473         lfsck = lfsck_instance_find(key, true, false);
1474         if (unlikely(lfsck == NULL))
1475                 RETURN(-ENODEV);
1476
1477         thread = &lfsck->li_thread;
1478         mutex_lock(&lfsck->li_mutex);
1479         spin_lock(&lfsck->li_lock);
1480         if (thread_is_init(thread) || thread_is_stopped(thread)) {
1481                 spin_unlock(&lfsck->li_lock);
1482                 GOTO(out, rc = -EALREADY);
1483         }
1484
1485         if (stop != NULL)
1486                 lfsck->li_status = stop->ls_status;
1487         else
1488                 lfsck->li_status = LS_STOPPED;
1489
1490         thread_set_flags(thread, SVC_STOPPING);
1491         spin_unlock(&lfsck->li_lock);
1492
1493         wake_up_all(&thread->t_ctl_waitq);
1494         l_wait_event(thread->t_ctl_waitq,
1495                      thread_is_stopped(thread),
1496                      &lwi);
1497
1498         GOTO(out, rc = 0);
1499
1500 out:
1501         mutex_unlock(&lfsck->li_mutex);
1502         lfsck_instance_put(env, lfsck);
1503
1504         return rc;
1505 }
1506 EXPORT_SYMBOL(lfsck_stop);
1507
1508 int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
1509                     struct lfsck_request *lr)
1510 {
1511         struct lfsck_instance  *lfsck;
1512         struct lfsck_component *com;
1513         int                     rc;
1514         ENTRY;
1515
1516         switch (lr->lr_event) {
1517         case LE_STOP:
1518         case LE_PHASE1_DONE:
1519         case LE_PHASE2_DONE:
1520                 break;
1521         default:
1522                 RETURN(-EOPNOTSUPP);
1523         }
1524
1525         lfsck = lfsck_instance_find(key, true, false);
1526         if (unlikely(lfsck == NULL))
1527                 RETURN(-ENODEV);
1528
1529         com = lfsck_component_find(lfsck, lr->lr_active);
1530         if (likely(com != NULL)) {
1531                 rc = com->lc_ops->lfsck_in_notify(env, com, lr);
1532                 lfsck_component_put(env, com);
1533         } else {
1534                 rc = -ENOTSUPP;
1535         }
1536
1537         lfsck_instance_put(env, lfsck);
1538
1539         RETURN(rc);
1540 }
1541 EXPORT_SYMBOL(lfsck_in_notify);
1542
1543 int lfsck_query(const struct lu_env *env, struct dt_device *key,
1544                 struct lfsck_request *lr)
1545 {
1546         struct lfsck_instance  *lfsck;
1547         struct lfsck_component *com;
1548         int                     rc;
1549         ENTRY;
1550
1551         lfsck = lfsck_instance_find(key, true, false);
1552         if (unlikely(lfsck == NULL))
1553                 RETURN(-ENODEV);
1554
1555         com = lfsck_component_find(lfsck, lr->lr_active);
1556         if (likely(com != NULL)) {
1557                 rc = com->lc_ops->lfsck_query(env, com);
1558                 lfsck_component_put(env, com);
1559         } else {
1560                 rc = -ENOTSUPP;
1561         }
1562
1563         lfsck_instance_put(env, lfsck);
1564
1565         RETURN(rc);
1566 }
1567 EXPORT_SYMBOL(lfsck_query);
1568
1569 int lfsck_register(const struct lu_env *env, struct dt_device *key,
1570                    struct dt_device *next, lfsck_out_notify notify,
1571                    void *notify_data, bool master)
1572 {
1573         struct lfsck_instance   *lfsck;
1574         struct dt_object        *root  = NULL;
1575         struct dt_object        *obj;
1576         struct lu_fid           *fid   = &lfsck_env_info(env)->lti_fid;
1577         int                      rc;
1578         ENTRY;
1579
1580         lfsck = lfsck_instance_find(key, false, false);
1581         if (unlikely(lfsck != NULL))
1582                 RETURN(-EEXIST);
1583
1584         OBD_ALLOC_PTR(lfsck);
1585         if (lfsck == NULL)
1586                 RETURN(-ENOMEM);
1587
1588         mutex_init(&lfsck->li_mutex);
1589         spin_lock_init(&lfsck->li_lock);
1590         CFS_INIT_LIST_HEAD(&lfsck->li_link);
1591         CFS_INIT_LIST_HEAD(&lfsck->li_list_scan);
1592         CFS_INIT_LIST_HEAD(&lfsck->li_list_dir);
1593         CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan);
1594         CFS_INIT_LIST_HEAD(&lfsck->li_list_idle);
1595         atomic_set(&lfsck->li_ref, 1);
1596         atomic_set(&lfsck->li_double_scan_count, 0);
1597         init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq);
1598         lfsck->li_out_notify = notify;
1599         lfsck->li_out_notify_data = notify_data;
1600         lfsck->li_next = next;
1601         lfsck->li_bottom = key;
1602
1603         rc = lfsck_tgt_descs_init(&lfsck->li_ost_descs);
1604         if (rc != 0)
1605                 GOTO(out, rc);
1606
1607         rc = lfsck_tgt_descs_init(&lfsck->li_mdt_descs);
1608         if (rc != 0)
1609                 GOTO(out, rc);
1610
1611         fid->f_seq = FID_SEQ_LOCAL_NAME;
1612         fid->f_oid = 1;
1613         fid->f_ver = 0;
1614         rc = local_oid_storage_init(env, lfsck->li_bottom, fid, &lfsck->li_los);
1615         if (rc != 0)
1616                 GOTO(out, rc);
1617
1618         rc = dt_root_get(env, key, fid);
1619         if (rc != 0)
1620                 GOTO(out, rc);
1621
1622         root = dt_locate(env, lfsck->li_bottom, fid);
1623         if (IS_ERR(root))
1624                 GOTO(out, rc = PTR_ERR(root));
1625
1626         if (unlikely(!dt_try_as_dir(env, root)))
1627                 GOTO(out, rc = -ENOTDIR);
1628
1629         lfsck->li_local_root_fid = *fid;
1630         if (master) {
1631                 lfsck->li_master = 1;
1632                 if (lfsck_dev_idx(lfsck->li_bottom) == 0) {
1633                         rc = dt_lookup(env, root,
1634                                 (struct dt_rec *)(&lfsck->li_global_root_fid),
1635                                 (const struct dt_key *)"ROOT", BYPASS_CAPA);
1636                         if (rc != 0)
1637                                 GOTO(out, rc);
1638                 }
1639         }
1640
1641         fid->f_seq = FID_SEQ_LOCAL_FILE;
1642         fid->f_oid = OTABLE_IT_OID;
1643         fid->f_ver = 0;
1644         obj = dt_locate(env, lfsck->li_bottom, fid);
1645         if (IS_ERR(obj))
1646                 GOTO(out, rc = PTR_ERR(obj));
1647
1648         lfsck->li_obj_oit = obj;
1649         rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
1650         if (rc != 0) {
1651                 if (rc == -ENOTSUPP)
1652                         GOTO(add, rc = 0);
1653
1654                 GOTO(out, rc);
1655         }
1656
1657         rc = lfsck_bookmark_setup(env, lfsck);
1658         if (rc != 0)
1659                 GOTO(out, rc);
1660
1661         if (master) {
1662                 rc = lfsck_namespace_setup(env, lfsck);
1663                 if (rc < 0)
1664                         GOTO(out, rc);
1665         }
1666
1667         rc = lfsck_layout_setup(env, lfsck);
1668         if (rc < 0)
1669                 GOTO(out, rc);
1670
1671         /* XXX: more LFSCK components initialization to be added here. */
1672
1673 add:
1674         rc = lfsck_instance_add(lfsck);
1675         if (rc == 0)
1676                 rc = lfsck_add_target_from_orphan(env, lfsck);
1677 out:
1678         if (root != NULL && !IS_ERR(root))
1679                 lu_object_put(env, &root->do_lu);
1680         if (rc != 0)
1681                 lfsck_instance_cleanup(env, lfsck);
1682         return rc;
1683 }
1684 EXPORT_SYMBOL(lfsck_register);
1685
1686 void lfsck_degister(const struct lu_env *env, struct dt_device *key)
1687 {
1688         struct lfsck_instance *lfsck;
1689
1690         lfsck = lfsck_instance_find(key, false, true);
1691         if (lfsck != NULL)
1692                 lfsck_instance_put(env, lfsck);
1693 }
1694 EXPORT_SYMBOL(lfsck_degister);
1695
1696 int lfsck_add_target(const struct lu_env *env, struct dt_device *key,
1697                      struct dt_device *tgt, struct obd_export *exp,
1698                      __u32 index, bool for_ost)
1699 {
1700         struct lfsck_instance   *lfsck;
1701         struct lfsck_tgt_desc   *ltd;
1702         int                      rc;
1703         ENTRY;
1704
1705         OBD_ALLOC_PTR(ltd);
1706         if (ltd == NULL)
1707                 RETURN(-ENOMEM);
1708
1709         ltd->ltd_tgt = tgt;
1710         ltd->ltd_key = key;
1711         ltd->ltd_exp = exp;
1712         INIT_LIST_HEAD(&ltd->ltd_orphan_list);
1713         INIT_LIST_HEAD(&ltd->ltd_layout_list);
1714         INIT_LIST_HEAD(&ltd->ltd_layout_phase_list);
1715         atomic_set(&ltd->ltd_ref, 1);
1716         ltd->ltd_index = index;
1717
1718         spin_lock(&lfsck_instance_lock);
1719         lfsck = __lfsck_instance_find(key, true, false);
1720         if (lfsck == NULL) {
1721                 if (for_ost)
1722                         list_add_tail(&ltd->ltd_orphan_list,
1723                                       &lfsck_ost_orphan_list);
1724                 else
1725                         list_add_tail(&ltd->ltd_orphan_list,
1726                                       &lfsck_mdt_orphan_list);
1727                 spin_unlock(&lfsck_instance_lock);
1728
1729                 RETURN(0);
1730         }
1731         spin_unlock(&lfsck_instance_lock);
1732
1733         rc = __lfsck_add_target(env, lfsck, ltd, for_ost, false);
1734         if (rc != 0)
1735                 lfsck_tgt_put(ltd);
1736
1737         lfsck_instance_put(env, lfsck);
1738
1739         RETURN(rc);
1740 }
1741 EXPORT_SYMBOL(lfsck_add_target);
1742
1743 void lfsck_del_target(const struct lu_env *env, struct dt_device *key,
1744                       struct dt_device *tgt, __u32 index, bool for_ost)
1745 {
1746         struct lfsck_instance   *lfsck;
1747         struct lfsck_tgt_descs  *ltds;
1748         struct lfsck_tgt_desc   *ltd;
1749         struct list_head        *head;
1750         bool                     found = false;
1751         bool                     stop  = false;
1752
1753         if (for_ost)
1754                 head = &lfsck_ost_orphan_list;
1755         else
1756                 head = &lfsck_mdt_orphan_list;
1757
1758         spin_lock(&lfsck_instance_lock);
1759         list_for_each_entry(ltd, head, ltd_orphan_list) {
1760                 if (ltd->ltd_tgt == tgt) {
1761                         list_del_init(&ltd->ltd_orphan_list);
1762                         spin_unlock(&lfsck_instance_lock);
1763                         lfsck_tgt_put(ltd);
1764
1765                         return;
1766                 }
1767         }
1768
1769         lfsck = __lfsck_instance_find(key, true, false);
1770         spin_unlock(&lfsck_instance_lock);
1771         if (unlikely(lfsck == NULL))
1772                 return;
1773
1774         if (for_ost)
1775                 ltds = &lfsck->li_ost_descs;
1776         else
1777                 ltds = &lfsck->li_mdt_descs;
1778
1779         down_write(&ltds->ltd_rw_sem);
1780
1781         LASSERT(ltds->ltd_tgts_bitmap != NULL);
1782
1783         if (unlikely(index >= ltds->ltd_tgts_bitmap->size))
1784                 goto unlock;
1785
1786         ltd = LTD_TGT(ltds, index);
1787         if (unlikely(ltd == NULL))
1788                 goto unlock;
1789
1790         found = true;
1791         spin_lock(&ltds->ltd_lock);
1792         ltd->ltd_dead = 1;
1793         if (!list_empty(&ltd->ltd_layout_list)) {
1794                 list_del_init(&ltd->ltd_layout_list);
1795                 stop = true;
1796         } else {
1797                 LASSERT(list_empty(&ltd->ltd_layout_phase_list));
1798         }
1799         spin_unlock(&ltds->ltd_lock);
1800
1801         if (stop && lfsck->li_master)
1802                 lfsck_stop_notify(env, lfsck, ltds, ltd);
1803
1804         LASSERT(ltds->ltd_tgtnr > 0);
1805
1806         ltds->ltd_tgtnr--;
1807         cfs_bitmap_clear(ltds->ltd_tgts_bitmap, index);
1808         LTD_TGT(ltds, index) = NULL;
1809         lfsck_tgt_put(ltd);
1810
1811 unlock:
1812         if (!found) {
1813                 if (for_ost)
1814                         head = &lfsck->li_ost_descs.ltd_orphan;
1815                 else
1816                         head = &lfsck->li_ost_descs.ltd_orphan;
1817
1818                 list_for_each_entry(ltd, head, ltd_orphan_list) {
1819                         if (ltd->ltd_tgt == tgt) {
1820                                 list_del_init(&ltd->ltd_orphan_list);
1821                                 lfsck_tgt_put(ltd);
1822                                 break;
1823                         }
1824                 }
1825         }
1826
1827         up_write(&ltds->ltd_rw_sem);
1828         lfsck_instance_put(env, lfsck);
1829 }
1830 EXPORT_SYMBOL(lfsck_del_target);
1831
1832 static int __init lfsck_init(void)
1833 {
1834         int rc;
1835
1836         INIT_LIST_HEAD(&lfsck_ost_orphan_list);
1837         INIT_LIST_HEAD(&lfsck_mdt_orphan_list);
1838         lfsck_key_init_generic(&lfsck_thread_key, NULL);
1839         rc = lu_context_key_register(&lfsck_thread_key);
1840         if (rc == 0) {
1841                 tgt_register_lfsck_start(lfsck_start);
1842                 tgt_register_lfsck_in_notify(lfsck_in_notify);
1843                 tgt_register_lfsck_query(lfsck_query);
1844         }
1845
1846         return rc;
1847 }
1848
1849 static void __exit lfsck_exit(void)
1850 {
1851         struct lfsck_tgt_desc *ltd;
1852         struct lfsck_tgt_desc *next;
1853
1854         LASSERT(cfs_list_empty(&lfsck_instance_list));
1855
1856         list_for_each_entry_safe(ltd, next, &lfsck_ost_orphan_list,
1857                                  ltd_orphan_list) {
1858                 list_del_init(&ltd->ltd_orphan_list);
1859                 lfsck_tgt_put(ltd);
1860         }
1861
1862         list_for_each_entry_safe(ltd, next, &lfsck_mdt_orphan_list,
1863                                  ltd_orphan_list) {
1864                 list_del_init(&ltd->ltd_orphan_list);
1865                 lfsck_tgt_put(ltd);
1866         }
1867
1868         lu_context_key_degister(&lfsck_thread_key);
1869 }
1870
1871 MODULE_AUTHOR("Intel Corporation <http://www.intel.com/>");
1872 MODULE_DESCRIPTION("LFSCK");
1873 MODULE_LICENSE("GPL");
1874
1875 cfs_module(lfsck, LUSTRE_VERSION_STRING, lfsck_init, lfsck_exit);