Whamcloud - gitweb
LU-3884 lfsck: LFSCK should NOT hold root object
[fs/lustre-release.git] / lustre / lfsck / lfsck_lib.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, 2013, Intel Corporation.
24  */
25 /*
26  * lustre/lfsck/lfsck_lib.c
27  *
28  * Author: Fan, Yong <fan.yong@intel.com>
29  */
30
31 #define DEBUG_SUBSYSTEM S_LFSCK
32
33 #include <libcfs/list.h>
34 #include <lu_object.h>
35 #include <dt_object.h>
36 #include <md_object.h>
37 #include <lustre_fld.h>
38 #include <lustre_lib.h>
39 #include <lustre_net.h>
40 #include <lustre_lfsck.h>
41 #include <lustre/lustre_lfsck_user.h>
42
43 #include "lfsck_internal.h"
44
45 /* define lfsck thread key */
46 LU_KEY_INIT(lfsck, struct lfsck_thread_info);
47
48 static void lfsck_key_fini(const struct lu_context *ctx,
49                            struct lu_context_key *key, void *data)
50 {
51         struct lfsck_thread_info *info = data;
52
53         lu_buf_free(&info->lti_linkea_buf);
54         OBD_FREE_PTR(info);
55 }
56
57 LU_CONTEXT_KEY_DEFINE(lfsck, LCT_MD_THREAD | LCT_DT_THREAD);
58 LU_KEY_INIT_GENERIC(lfsck);
59
60 static CFS_LIST_HEAD(lfsck_instance_list);
61 static DEFINE_SPINLOCK(lfsck_instance_lock);
62
63 const char *lfsck_status_names[] = {
64         "init",
65         "scanning-phase1",
66         "scanning-phase2",
67         "completed",
68         "failed",
69         "stopped",
70         "paused",
71         "crashed",
72         NULL
73 };
74
75 const char *lfsck_flags_names[] = {
76         "scanned-once",
77         "inconsistent",
78         "upgrade",
79         NULL
80 };
81
82 const char *lfsck_param_names[] = {
83         "failout",
84         "dryrun",
85         NULL
86 };
87
88 static inline mdsno_t lfsck_dev_idx(struct dt_device *dev)
89 {
90         return dev->dd_lu_dev.ld_site->ld_seq_site->ss_node_id;
91 }
92
93 static inline void lfsck_component_get(struct lfsck_component *com)
94 {
95         atomic_inc(&com->lc_ref);
96 }
97
98 static inline void lfsck_component_put(const struct lu_env *env,
99                                        struct lfsck_component *com)
100 {
101         if (atomic_dec_and_test(&com->lc_ref)) {
102                 if (com->lc_obj != NULL)
103                         lu_object_put_nocache(env, &com->lc_obj->do_lu);
104                 if (com->lc_file_ram != NULL)
105                         OBD_FREE(com->lc_file_ram, com->lc_file_size);
106                 if (com->lc_file_disk != NULL)
107                         OBD_FREE(com->lc_file_disk, com->lc_file_size);
108                 OBD_FREE_PTR(com);
109         }
110 }
111
112 static inline struct lfsck_component *
113 __lfsck_component_find(struct lfsck_instance *lfsck, __u16 type, cfs_list_t *list)
114 {
115         struct lfsck_component *com;
116
117         cfs_list_for_each_entry(com, list, lc_link) {
118                 if (com->lc_type == type)
119                         return com;
120         }
121         return NULL;
122 }
123
124 static struct lfsck_component *
125 lfsck_component_find(struct lfsck_instance *lfsck, __u16 type)
126 {
127         struct lfsck_component *com;
128
129         spin_lock(&lfsck->li_lock);
130         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_scan);
131         if (com != NULL)
132                 goto unlock;
133
134         com = __lfsck_component_find(lfsck, type,
135                                      &lfsck->li_list_double_scan);
136         if (com != NULL)
137                 goto unlock;
138
139         com = __lfsck_component_find(lfsck, type, &lfsck->li_list_idle);
140
141 unlock:
142         if (com != NULL)
143                 lfsck_component_get(com);
144         spin_unlock(&lfsck->li_lock);
145         return com;
146 }
147
148 void lfsck_component_cleanup(const struct lu_env *env,
149                              struct lfsck_component *com)
150 {
151         if (!cfs_list_empty(&com->lc_link))
152                 cfs_list_del_init(&com->lc_link);
153         if (!cfs_list_empty(&com->lc_link_dir))
154                 cfs_list_del_init(&com->lc_link_dir);
155
156         lfsck_component_put(env, com);
157 }
158
159 static void lfsck_instance_cleanup(const struct lu_env *env,
160                                    struct lfsck_instance *lfsck)
161 {
162         struct ptlrpc_thread    *thread = &lfsck->li_thread;
163         struct lfsck_component  *com;
164         ENTRY;
165
166         LASSERT(list_empty(&lfsck->li_link));
167         LASSERT(thread_is_init(thread) || thread_is_stopped(thread));
168
169         if (lfsck->li_obj_oit != NULL) {
170                 lu_object_put_nocache(env, &lfsck->li_obj_oit->do_lu);
171                 lfsck->li_obj_oit = NULL;
172         }
173
174         LASSERT(lfsck->li_obj_dir == NULL);
175
176         while (!cfs_list_empty(&lfsck->li_list_scan)) {
177                 com = cfs_list_entry(lfsck->li_list_scan.next,
178                                      struct lfsck_component,
179                                      lc_link);
180                 lfsck_component_cleanup(env, com);
181         }
182
183         LASSERT(cfs_list_empty(&lfsck->li_list_dir));
184
185         while (!cfs_list_empty(&lfsck->li_list_double_scan)) {
186                 com = cfs_list_entry(lfsck->li_list_double_scan.next,
187                                      struct lfsck_component,
188                                      lc_link);
189                 lfsck_component_cleanup(env, com);
190         }
191
192         while (!cfs_list_empty(&lfsck->li_list_idle)) {
193                 com = cfs_list_entry(lfsck->li_list_idle.next,
194                                      struct lfsck_component,
195                                      lc_link);
196                 lfsck_component_cleanup(env, com);
197         }
198
199         if (lfsck->li_bookmark_obj != NULL) {
200                 lu_object_put_nocache(env, &lfsck->li_bookmark_obj->do_lu);
201                 lfsck->li_bookmark_obj = NULL;
202         }
203
204         if (lfsck->li_los != NULL) {
205                 local_oid_storage_fini(env, lfsck->li_los);
206                 lfsck->li_los = NULL;
207         }
208
209         OBD_FREE_PTR(lfsck);
210 }
211
212 static inline void lfsck_instance_get(struct lfsck_instance *lfsck)
213 {
214         atomic_inc(&lfsck->li_ref);
215 }
216
217 static inline void lfsck_instance_put(const struct lu_env *env,
218                                       struct lfsck_instance *lfsck)
219 {
220         if (atomic_dec_and_test(&lfsck->li_ref))
221                 lfsck_instance_cleanup(env, lfsck);
222 }
223
224 static inline struct lfsck_instance *lfsck_instance_find(struct dt_device *key,
225                                                          bool ref, bool unlink)
226 {
227         struct lfsck_instance *lfsck;
228
229         spin_lock(&lfsck_instance_lock);
230         cfs_list_for_each_entry(lfsck, &lfsck_instance_list, li_link) {
231                 if (lfsck->li_bottom == key) {
232                         if (ref)
233                                 lfsck_instance_get(lfsck);
234                         if (unlink)
235                                 list_del_init(&lfsck->li_link);
236                         spin_unlock(&lfsck_instance_lock);
237                         return lfsck;
238                 }
239         }
240         spin_unlock(&lfsck_instance_lock);
241         return NULL;
242 }
243
244 static inline int lfsck_instance_add(struct lfsck_instance *lfsck)
245 {
246         struct lfsck_instance *tmp;
247
248         spin_lock(&lfsck_instance_lock);
249         cfs_list_for_each_entry(tmp, &lfsck_instance_list, li_link) {
250                 if (lfsck->li_bottom == tmp->li_bottom) {
251                         spin_unlock(&lfsck_instance_lock);
252                         return -EEXIST;
253                 }
254         }
255
256         cfs_list_add_tail(&lfsck->li_link, &lfsck_instance_list);
257         spin_unlock(&lfsck_instance_lock);
258         return 0;
259 }
260
261 int lfsck_bits_dump(char **buf, int *len, int bits, const char *names[],
262                     const char *prefix)
263 {
264         int save = *len;
265         int flag;
266         int rc;
267         int i;
268
269         rc = snprintf(*buf, *len, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
270         if (rc <= 0)
271                 return -ENOSPC;
272
273         *buf += rc;
274         *len -= rc;
275         for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
276                 if (flag & bits) {
277                         bits &= ~flag;
278                         rc = snprintf(*buf, *len, "%s%c", names[i],
279                                       bits != 0 ? ',' : '\n');
280                         if (rc <= 0)
281                                 return -ENOSPC;
282
283                         *buf += rc;
284                         *len -= rc;
285                 }
286         }
287         return save - *len;
288 }
289
290 int lfsck_time_dump(char **buf, int *len, __u64 time, const char *prefix)
291 {
292         int rc;
293
294         if (time != 0)
295                 rc = snprintf(*buf, *len, "%s: "LPU64" seconds\n", prefix,
296                               cfs_time_current_sec() - time);
297         else
298                 rc = snprintf(*buf, *len, "%s: N/A\n", prefix);
299         if (rc <= 0)
300                 return -ENOSPC;
301
302         *buf += rc;
303         *len -= rc;
304         return rc;
305 }
306
307 int lfsck_pos_dump(char **buf, int *len, struct lfsck_position *pos,
308                    const char *prefix)
309 {
310         int rc;
311
312         if (fid_is_zero(&pos->lp_dir_parent)) {
313                 if (pos->lp_oit_cookie == 0)
314                         rc = snprintf(*buf, *len, "%s: N/A, N/A, N/A\n",
315                                       prefix);
316                 else
317                         rc = snprintf(*buf, *len, "%s: "LPU64", N/A, N/A\n",
318                                       prefix, pos->lp_oit_cookie);
319         } else {
320                 rc = snprintf(*buf, *len, "%s: "LPU64", "DFID", "LPU64"\n",
321                               prefix, pos->lp_oit_cookie,
322                               PFID(&pos->lp_dir_parent), pos->lp_dir_cookie);
323         }
324         if (rc <= 0)
325                 return -ENOSPC;
326
327         *buf += rc;
328         *len -= rc;
329         return rc;
330 }
331
332 void lfsck_pos_fill(const struct lu_env *env, struct lfsck_instance *lfsck,
333                     struct lfsck_position *pos, bool init)
334 {
335         const struct dt_it_ops *iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
336
337         if (unlikely(lfsck->li_di_oit == NULL)) {
338                 memset(pos, 0, sizeof(*pos));
339                 return;
340         }
341
342         pos->lp_oit_cookie = iops->store(env, lfsck->li_di_oit);
343         if (!lfsck->li_current_oit_processed && !init)
344                 pos->lp_oit_cookie--;
345
346         LASSERT(pos->lp_oit_cookie > 0);
347
348         if (lfsck->li_di_dir != NULL) {
349                 struct dt_object *dto = lfsck->li_obj_dir;
350
351                 pos->lp_dir_cookie = dto->do_index_ops->dio_it.store(env,
352                                                         lfsck->li_di_dir);
353
354                 if (pos->lp_dir_cookie >= MDS_DIR_END_OFF) {
355                         fid_zero(&pos->lp_dir_parent);
356                         pos->lp_dir_cookie = 0;
357                 } else {
358                         pos->lp_dir_parent = *lu_object_fid(&dto->do_lu);
359                 }
360         } else {
361                 fid_zero(&pos->lp_dir_parent);
362                 pos->lp_dir_cookie = 0;
363         }
364 }
365
366 static void __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
367 {
368         lfsck->li_bookmark_ram.lb_speed_limit = limit;
369         if (limit != LFSCK_SPEED_NO_LIMIT) {
370                 if (limit > HZ) {
371                         lfsck->li_sleep_rate = limit / HZ;
372                         lfsck->li_sleep_jif = 1;
373                 } else {
374                         lfsck->li_sleep_rate = 1;
375                         lfsck->li_sleep_jif = HZ / limit;
376                 }
377         } else {
378                 lfsck->li_sleep_jif = 0;
379                 lfsck->li_sleep_rate = 0;
380         }
381 }
382
383 void lfsck_control_speed(struct lfsck_instance *lfsck)
384 {
385         struct ptlrpc_thread *thread = &lfsck->li_thread;
386         struct l_wait_info    lwi;
387
388         if (lfsck->li_sleep_jif > 0 &&
389             lfsck->li_new_scanned >= lfsck->li_sleep_rate) {
390                 spin_lock(&lfsck->li_lock);
391                 if (likely(lfsck->li_sleep_jif > 0 &&
392                            lfsck->li_new_scanned >= lfsck->li_sleep_rate)) {
393                         lwi = LWI_TIMEOUT_INTR(lfsck->li_sleep_jif, NULL,
394                                                LWI_ON_SIGNAL_NOOP, NULL);
395                         spin_unlock(&lfsck->li_lock);
396
397                         l_wait_event(thread->t_ctl_waitq,
398                                      !thread_is_running(thread),
399                                      &lwi);
400                         lfsck->li_new_scanned = 0;
401                 } else {
402                         spin_unlock(&lfsck->li_lock);
403                 }
404         }
405 }
406
407 static int lfsck_parent_fid(const struct lu_env *env, struct dt_object *obj,
408                             struct lu_fid *fid)
409 {
410         if (unlikely(!S_ISDIR(lfsck_object_type(obj)) ||
411                      !dt_try_as_dir(env, obj)))
412                 return -ENOTDIR;
413
414         return dt_lookup(env, obj, (struct dt_rec *)fid,
415                          (const struct dt_key *)"..", BYPASS_CAPA);
416 }
417
418 static int lfsck_needs_scan_dir(const struct lu_env *env,
419                                 struct lfsck_instance *lfsck,
420                                 struct dt_object *obj)
421 {
422         struct lu_fid *fid   = &lfsck_env_info(env)->lti_fid;
423         int            depth = 0;
424         int            rc;
425
426         if (!lfsck->li_master || !S_ISDIR(lfsck_object_type(obj)) ||
427             cfs_list_empty(&lfsck->li_list_dir))
428                RETURN(0);
429
430         while (1) {
431                 /* XXX: Currently, we do not scan the "/REMOTE_PARENT_DIR",
432                  *      which is the agent directory to manage the objects
433                  *      which name entries reside on remote MDTs. Related
434                  *      consistency verification will be processed in LFSCK
435                  *      phase III. */
436                 if (lu_fid_eq(lfsck_dto2fid(obj), &lfsck->li_global_root_fid)) {
437                         if (depth > 0)
438                                 lfsck_object_put(env, obj);
439                         return 1;
440                 }
441
442                 /* .lustre doesn't contain "real" user objects, no need lfsck */
443                 if (fid_is_dot_lustre(lfsck_dto2fid(obj))) {
444                         if (depth > 0)
445                                 lfsck_object_put(env, obj);
446                         return 0;
447                 }
448
449                 dt_read_lock(env, obj, MOR_TGT_CHILD);
450                 if (unlikely(lfsck_is_dead_obj(obj))) {
451                         dt_read_unlock(env, obj);
452                         if (depth > 0)
453                                 lfsck_object_put(env, obj);
454                         return 0;
455                 }
456
457                 rc = dt_xattr_get(env, obj,
458                                   lfsck_buf_get(env, NULL, 0), XATTR_NAME_LINK,
459                                   BYPASS_CAPA);
460                 dt_read_unlock(env, obj);
461                 if (rc >= 0) {
462                         if (depth > 0)
463                                 lfsck_object_put(env, obj);
464                         return 1;
465                 }
466
467                 if (rc < 0 && rc != -ENODATA) {
468                         if (depth > 0)
469                                 lfsck_object_put(env, obj);
470                         return rc;
471                 }
472
473                 rc = lfsck_parent_fid(env, obj, fid);
474                 if (depth > 0)
475                         lfsck_object_put(env, obj);
476                 if (rc != 0)
477                         return rc;
478
479                 if (unlikely(lu_fid_eq(fid, &lfsck->li_local_root_fid)))
480                         return 0;
481
482                 obj = lfsck_object_find(env, lfsck, fid);
483                 if (obj == NULL)
484                         return 0;
485                 else if (IS_ERR(obj))
486                         return PTR_ERR(obj);
487
488                 if (!dt_object_exists(obj)) {
489                         lfsck_object_put(env, obj);
490                         return 0;
491                 }
492
493                 /* Currently, only client visible directory can be remote. */
494                 if (dt_object_remote(obj)) {
495                         lfsck_object_put(env, obj);
496                         return 1;
497                 }
498
499                 depth++;
500         }
501         return 0;
502 }
503
504 /* LFSCK wrap functions */
505
506 void lfsck_fail(const struct lu_env *env, struct lfsck_instance *lfsck,
507                 bool new_checked)
508 {
509         struct lfsck_component *com;
510
511         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
512                 com->lc_ops->lfsck_fail(env, com, new_checked);
513         }
514 }
515
516 int lfsck_checkpoint(const struct lu_env *env, struct lfsck_instance *lfsck)
517 {
518         struct lfsck_component *com;
519         int                     rc;
520
521         if (likely(cfs_time_beforeq(cfs_time_current(),
522                                     lfsck->li_time_next_checkpoint)))
523                 return 0;
524
525         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
526         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
527                 rc = com->lc_ops->lfsck_checkpoint(env, com, false);
528                 if (rc != 0)
529                         return rc;;
530         }
531
532         lfsck->li_time_last_checkpoint = cfs_time_current();
533         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
534                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
535         return 0;
536 }
537
538 int lfsck_prep(const struct lu_env *env, struct lfsck_instance *lfsck)
539 {
540         struct dt_object       *obj     = NULL;
541         struct lfsck_component *com;
542         struct lfsck_component *next;
543         struct lfsck_position  *pos     = NULL;
544         const struct dt_it_ops *iops    =
545                                 &lfsck->li_obj_oit->do_index_ops->dio_it;
546         struct dt_it           *di;
547         int                     rc;
548         ENTRY;
549
550         LASSERT(lfsck->li_obj_dir == NULL);
551         LASSERT(lfsck->li_di_dir == NULL);
552
553         lfsck->li_current_oit_processed = 0;
554         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
555                 com->lc_new_checked = 0;
556                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
557                         com->lc_journal = 0;
558
559                 rc = com->lc_ops->lfsck_prep(env, com);
560                 if (rc != 0)
561                         RETURN(rc);
562
563                 if ((pos == NULL) ||
564                     (!lfsck_pos_is_zero(&com->lc_pos_start) &&
565                      lfsck_pos_is_eq(pos, &com->lc_pos_start) > 0))
566                         pos = &com->lc_pos_start;
567         }
568
569         /* Init otable-based iterator. */
570         if (pos == NULL) {
571                 rc = iops->load(env, lfsck->li_di_oit, 0);
572                 if (rc > 0) {
573                         lfsck->li_oit_over = 1;
574                         rc = 0;
575                 }
576
577                 GOTO(out, rc);
578         }
579
580         rc = iops->load(env, lfsck->li_di_oit, pos->lp_oit_cookie);
581         if (rc < 0)
582                 GOTO(out, rc);
583         else if (rc > 0)
584                 lfsck->li_oit_over = 1;
585
586         if (!lfsck->li_master || fid_is_zero(&pos->lp_dir_parent))
587                 GOTO(out, rc = 0);
588
589         /* Find the directory for namespace-based traverse. */
590         obj = lfsck_object_find(env, lfsck, &pos->lp_dir_parent);
591         if (obj == NULL)
592                 GOTO(out, rc = 0);
593         else if (IS_ERR(obj))
594                 RETURN(PTR_ERR(obj));
595
596         /* XXX: Currently, skip remote object, the consistency for
597          *      remote object will be processed in LFSCK phase III. */
598         if (!dt_object_exists(obj) || dt_object_remote(obj) ||
599             unlikely(!S_ISDIR(lfsck_object_type(obj))))
600                 GOTO(out, rc = 0);
601
602         if (unlikely(!dt_try_as_dir(env, obj)))
603                 GOTO(out, rc = -ENOTDIR);
604
605         /* Init the namespace-based directory traverse. */
606         iops = &obj->do_index_ops->dio_it;
607         di = iops->init(env, obj, lfsck->li_args_dir, BYPASS_CAPA);
608         if (IS_ERR(di))
609                 GOTO(out, rc = PTR_ERR(di));
610
611         LASSERT(pos->lp_dir_cookie < MDS_DIR_END_OFF);
612
613         rc = iops->load(env, di, pos->lp_dir_cookie);
614         if ((rc == 0) || (rc > 0 && pos->lp_dir_cookie > 0))
615                 rc = iops->next(env, di);
616         else if (rc > 0)
617                 rc = 0;
618
619         if (rc != 0) {
620                 iops->put(env, di);
621                 iops->fini(env, di);
622                 GOTO(out, rc);
623         }
624
625         lfsck->li_obj_dir = lfsck_object_get(obj);
626         lfsck->li_cookie_dir = iops->store(env, di);
627         spin_lock(&lfsck->li_lock);
628         lfsck->li_di_dir = di;
629         spin_unlock(&lfsck->li_lock);
630
631         GOTO(out, rc = 0);
632
633 out:
634         if (obj != NULL)
635                 lfsck_object_put(env, obj);
636
637         if (rc < 0) {
638                 cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan,
639                                              lc_link)
640                         com->lc_ops->lfsck_post(env, com, rc, true);
641
642                 return rc;
643         }
644
645         rc = 0;
646         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, true);
647         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
648                 rc = com->lc_ops->lfsck_checkpoint(env, com, true);
649                 if (rc != 0)
650                         break;
651         }
652
653         lfsck->li_time_last_checkpoint = cfs_time_current();
654         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
655                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
656         return rc;
657 }
658
659 int lfsck_exec_oit(const struct lu_env *env, struct lfsck_instance *lfsck,
660                    struct dt_object *obj)
661 {
662         struct lfsck_component *com;
663         const struct dt_it_ops *iops;
664         struct dt_it           *di;
665         int                     rc;
666         ENTRY;
667
668         LASSERT(lfsck->li_obj_dir == NULL);
669
670         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
671                 rc = com->lc_ops->lfsck_exec_oit(env, com, obj);
672                 if (rc != 0)
673                         RETURN(rc);
674         }
675
676         rc = lfsck_needs_scan_dir(env, lfsck, obj);
677         if (rc <= 0)
678                 GOTO(out, rc);
679
680         if (unlikely(!dt_try_as_dir(env, obj)))
681                 GOTO(out, rc = -ENOTDIR);
682
683         iops = &obj->do_index_ops->dio_it;
684         di = iops->init(env, obj, lfsck->li_args_dir, BYPASS_CAPA);
685         if (IS_ERR(di))
686                 GOTO(out, rc = PTR_ERR(di));
687
688         rc = iops->load(env, di, 0);
689         if (rc == 0)
690                 rc = iops->next(env, di);
691         else if (rc > 0)
692                 rc = 0;
693
694         if (rc != 0) {
695                 iops->put(env, di);
696                 iops->fini(env, di);
697                 GOTO(out, rc);
698         }
699
700         lfsck->li_obj_dir = lfsck_object_get(obj);
701         lfsck->li_cookie_dir = iops->store(env, di);
702         spin_lock(&lfsck->li_lock);
703         lfsck->li_di_dir = di;
704         spin_unlock(&lfsck->li_lock);
705
706         GOTO(out, rc = 0);
707
708 out:
709         if (rc < 0)
710                 lfsck_fail(env, lfsck, false);
711         return (rc > 0 ? 0 : rc);
712 }
713
714 int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck,
715                    struct dt_object *obj, struct lu_dirent *ent)
716 {
717         struct lfsck_component *com;
718         int                     rc;
719
720         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
721                 rc = com->lc_ops->lfsck_exec_dir(env, com, obj, ent);
722                 if (rc != 0)
723                         return rc;
724         }
725         return 0;
726 }
727
728 int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck,
729                int result)
730 {
731         struct lfsck_component *com;
732         struct lfsck_component *next;
733         int                     rc;
734
735         lfsck_pos_fill(env, lfsck, &lfsck->li_pos_current, false);
736         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_scan, lc_link) {
737                 rc = com->lc_ops->lfsck_post(env, com, result, false);
738                 if (rc != 0)
739                         return rc;
740         }
741
742         lfsck->li_time_last_checkpoint = cfs_time_current();
743         lfsck->li_time_next_checkpoint = lfsck->li_time_last_checkpoint +
744                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
745         return result;
746 }
747
748 int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck)
749 {
750         struct lfsck_component *com;
751         struct lfsck_component *next;
752         int                     rc;
753
754         cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan,
755                                      lc_link) {
756                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
757                         com->lc_journal = 0;
758
759                 rc = com->lc_ops->lfsck_double_scan(env, com);
760                 if (rc != 0)
761                         return rc;
762         }
763         return 0;
764 }
765
766 /* external interfaces */
767
768 int lfsck_get_speed(struct dt_device *key, void *buf, int len)
769 {
770         struct lu_env           env;
771         struct lfsck_instance  *lfsck;
772         int                     rc;
773         ENTRY;
774
775         lfsck = lfsck_instance_find(key, true, false);
776         if (unlikely(lfsck == NULL))
777                 RETURN(-ENODEV);
778
779         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
780         if (rc != 0)
781                 GOTO(out, rc);
782
783         rc = snprintf(buf, len, "%u\n", lfsck->li_bookmark_ram.lb_speed_limit);
784         lu_env_fini(&env);
785
786         GOTO(out, rc);
787
788 out:
789         lfsck_instance_put(&env, lfsck);
790         return rc;
791 }
792 EXPORT_SYMBOL(lfsck_get_speed);
793
794 int lfsck_set_speed(struct dt_device *key, int val)
795 {
796         struct lu_env           env;
797         struct lfsck_instance  *lfsck;
798         int                     rc;
799         ENTRY;
800
801         lfsck = lfsck_instance_find(key, true, false);
802         if (unlikely(lfsck == NULL))
803                 RETURN(-ENODEV);
804
805         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
806         if (rc != 0)
807                 GOTO(out, rc);
808
809         mutex_lock(&lfsck->li_mutex);
810         __lfsck_set_speed(lfsck, val);
811         rc = lfsck_bookmark_store(&env, lfsck);
812         mutex_unlock(&lfsck->li_mutex);
813         lu_env_fini(&env);
814
815         GOTO(out, rc);
816
817 out:
818         lfsck_instance_put(&env, lfsck);
819         return rc;
820 }
821 EXPORT_SYMBOL(lfsck_set_speed);
822
823 int lfsck_dump(struct dt_device *key, void *buf, int len, __u16 type)
824 {
825         struct lu_env           env;
826         struct lfsck_instance  *lfsck;
827         struct lfsck_component *com   = NULL;
828         int                     rc;
829         ENTRY;
830
831         lfsck = lfsck_instance_find(key, true, false);
832         if (unlikely(lfsck == NULL))
833                 RETURN(-ENODEV);
834
835         com = lfsck_component_find(lfsck, type);
836         if (com == NULL)
837                 GOTO(out, rc = -ENOTSUPP);
838
839         rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD);
840         if (rc != 0)
841                 GOTO(out, rc);
842
843         rc = com->lc_ops->lfsck_dump(&env, com, buf, len);
844         lu_env_fini(&env);
845
846         GOTO(out, rc);
847
848 out:
849         if (com != NULL)
850                 lfsck_component_put(&env, com);
851         lfsck_instance_put(&env, lfsck);
852         return rc;
853 }
854 EXPORT_SYMBOL(lfsck_dump);
855
856 int lfsck_start(const struct lu_env *env, struct dt_device *key,
857                 struct lfsck_start_param *lsp)
858 {
859         struct lfsck_start     *start  = lsp->lsp_start;
860         struct lfsck_instance  *lfsck;
861         struct lfsck_bookmark  *bk;
862         struct ptlrpc_thread   *thread;
863         struct lfsck_component *com;
864         struct l_wait_info      lwi    = { 0 };
865         bool                    dirty  = false;
866         long                    rc     = 0;
867         __u16                   valid  = 0;
868         __u16                   flags  = 0;
869         ENTRY;
870
871         lfsck = lfsck_instance_find(key, true, false);
872         if (unlikely(lfsck == NULL))
873                 RETURN(-ENODEV);
874
875         /* start == NULL means auto trigger paused LFSCK. */
876         if ((start == NULL) &&
877             (cfs_list_empty(&lfsck->li_list_scan) ||
878              OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_AUTO)))
879                 GOTO(put, rc = 0);
880
881         bk = &lfsck->li_bookmark_ram;
882         thread = &lfsck->li_thread;
883         mutex_lock(&lfsck->li_mutex);
884         spin_lock(&lfsck->li_lock);
885         if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
886                 spin_unlock(&lfsck->li_lock);
887                 GOTO(out, rc = -EALREADY);
888         }
889
890         spin_unlock(&lfsck->li_lock);
891
892         lfsck->li_namespace = lsp->lsp_namespace;
893         lfsck->li_paused = 0;
894         lfsck->li_oit_over = 0;
895         lfsck->li_drop_dryrun = 0;
896         lfsck->li_new_scanned = 0;
897
898         /* For auto trigger. */
899         if (start == NULL)
900                 goto trigger;
901
902         start->ls_version = bk->lb_version;
903         if (start->ls_valid & LSV_SPEED_LIMIT) {
904                 __lfsck_set_speed(lfsck, start->ls_speed_limit);
905                 dirty = true;
906         }
907
908         if (start->ls_valid & LSV_ERROR_HANDLE) {
909                 valid |= DOIV_ERROR_HANDLE;
910                 if (start->ls_flags & LPF_FAILOUT)
911                         flags |= DOIF_FAILOUT;
912
913                 if ((start->ls_flags & LPF_FAILOUT) &&
914                     !(bk->lb_param & LPF_FAILOUT)) {
915                         bk->lb_param |= LPF_FAILOUT;
916                         dirty = true;
917                 } else if (!(start->ls_flags & LPF_FAILOUT) &&
918                            (bk->lb_param & LPF_FAILOUT)) {
919                         bk->lb_param &= ~LPF_FAILOUT;
920                         dirty = true;
921                 }
922         }
923
924         if (start->ls_valid & LSV_DRYRUN) {
925                 if ((start->ls_flags & LPF_DRYRUN) &&
926                     !(bk->lb_param & LPF_DRYRUN)) {
927                         bk->lb_param |= LPF_DRYRUN;
928                         dirty = true;
929                 } else if (!(start->ls_flags & LPF_DRYRUN) &&
930                            (bk->lb_param & LPF_DRYRUN)) {
931                         bk->lb_param &= ~LPF_DRYRUN;
932                         lfsck->li_drop_dryrun = 1;
933                         dirty = true;
934                 }
935         }
936
937         if (dirty) {
938                 rc = lfsck_bookmark_store(env, lfsck);
939                 if (rc != 0)
940                         GOTO(out, rc);
941         }
942
943         if (start->ls_flags & LPF_RESET)
944                 flags |= DOIF_RESET;
945
946         if (start->ls_active != 0) {
947                 struct lfsck_component *next;
948                 __u16 type = 1;
949
950                 if (start->ls_active == LFSCK_TYPES_ALL)
951                         start->ls_active = LFSCK_TYPES_SUPPORTED;
952
953                 if (start->ls_active & ~LFSCK_TYPES_SUPPORTED) {
954                         start->ls_active &= ~LFSCK_TYPES_SUPPORTED;
955                         GOTO(out, rc = -ENOTSUPP);
956                 }
957
958                 cfs_list_for_each_entry_safe(com, next,
959                                              &lfsck->li_list_scan, lc_link) {
960                         if (!(com->lc_type & start->ls_active)) {
961                                 rc = com->lc_ops->lfsck_post(env, com, 0,
962                                                              false);
963                                 if (rc != 0)
964                                         GOTO(out, rc);
965                         }
966                 }
967
968                 while (start->ls_active != 0) {
969                         if (type & start->ls_active) {
970                                 com = __lfsck_component_find(lfsck, type,
971                                                         &lfsck->li_list_idle);
972                                 if (com != NULL) {
973                                         /* The component status will be updated
974                                          * when its prep() is called later by
975                                          * the LFSCK main engine. */
976                                         cfs_list_del_init(&com->lc_link);
977                                         cfs_list_add_tail(&com->lc_link,
978                                                           &lfsck->li_list_scan);
979                                 }
980                                 start->ls_active &= ~type;
981                         }
982                         type <<= 1;
983                 }
984         }
985
986         cfs_list_for_each_entry(com, &lfsck->li_list_scan, lc_link) {
987                 start->ls_active |= com->lc_type;
988                 if (flags & DOIF_RESET) {
989                         rc = com->lc_ops->lfsck_reset(env, com, false);
990                         if (rc != 0)
991                                 GOTO(out, rc);
992                 }
993         }
994
995 trigger:
996         lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY;
997         if (bk->lb_param & LPF_DRYRUN)
998                 lfsck->li_args_dir |= LUDA_VERIFY_DRYRUN;
999
1000         if (bk->lb_param & LPF_FAILOUT) {
1001                 valid |= DOIV_ERROR_HANDLE;
1002                 flags |= DOIF_FAILOUT;
1003         }
1004
1005         if (!cfs_list_empty(&lfsck->li_list_scan))
1006                 flags |= DOIF_OUTUSED;
1007
1008         lfsck->li_args_oit = (flags << DT_OTABLE_IT_FLAGS_SHIFT) | valid;
1009         thread_set_flags(thread, 0);
1010         rc = PTR_ERR(kthread_run(lfsck_master_engine, lfsck, "lfsck"));
1011         if (IS_ERR_VALUE(rc)) {
1012                 CERROR("%s: cannot start LFSCK thread, rc = %ld\n",
1013                        lfsck_lfsck2name(lfsck), rc);
1014         } else {
1015                 rc = 0;
1016                 l_wait_event(thread->t_ctl_waitq,
1017                              thread_is_running(thread) ||
1018                              thread_is_stopped(thread),
1019                              &lwi);
1020         }
1021
1022         GOTO(out, rc);
1023
1024 out:
1025         mutex_unlock(&lfsck->li_mutex);
1026 put:
1027         lfsck_instance_put(env, lfsck);
1028         return (rc < 0 ? rc : 0);
1029 }
1030 EXPORT_SYMBOL(lfsck_start);
1031
1032 int lfsck_stop(const struct lu_env *env, struct dt_device *key, bool pause)
1033 {
1034         struct lfsck_instance   *lfsck;
1035         struct ptlrpc_thread    *thread;
1036         struct l_wait_info       lwi    = { 0 };
1037         ENTRY;
1038
1039         lfsck = lfsck_instance_find(key, true, false);
1040         if (unlikely(lfsck == NULL))
1041                 RETURN(-ENODEV);
1042
1043         thread = &lfsck->li_thread;
1044         mutex_lock(&lfsck->li_mutex);
1045         spin_lock(&lfsck->li_lock);
1046         if (thread_is_init(thread) || thread_is_stopped(thread)) {
1047                 spin_unlock(&lfsck->li_lock);
1048                 mutex_unlock(&lfsck->li_mutex);
1049                 lfsck_instance_put(env, lfsck);
1050                 RETURN(-EALREADY);
1051         }
1052
1053         if (pause)
1054                 lfsck->li_paused = 1;
1055         thread_set_flags(thread, SVC_STOPPING);
1056         spin_unlock(&lfsck->li_lock);
1057
1058         wake_up_all(&thread->t_ctl_waitq);
1059         l_wait_event(thread->t_ctl_waitq,
1060                      thread_is_stopped(thread),
1061                      &lwi);
1062         mutex_unlock(&lfsck->li_mutex);
1063         lfsck_instance_put(env, lfsck);
1064
1065         RETURN(0);
1066 }
1067 EXPORT_SYMBOL(lfsck_stop);
1068
1069 int lfsck_register(const struct lu_env *env, struct dt_device *key,
1070                    struct dt_device *next, bool master)
1071 {
1072         struct lfsck_instance   *lfsck;
1073         struct dt_object        *root  = NULL;
1074         struct dt_object        *obj;
1075         struct lu_fid           *fid   = &lfsck_env_info(env)->lti_fid;
1076         int                      rc;
1077         ENTRY;
1078
1079         lfsck = lfsck_instance_find(key, false, false);
1080         if (unlikely(lfsck != NULL))
1081                 RETURN(-EEXIST);
1082
1083         OBD_ALLOC_PTR(lfsck);
1084         if (lfsck == NULL)
1085                 RETURN(-ENOMEM);
1086
1087         mutex_init(&lfsck->li_mutex);
1088         spin_lock_init(&lfsck->li_lock);
1089         CFS_INIT_LIST_HEAD(&lfsck->li_link);
1090         CFS_INIT_LIST_HEAD(&lfsck->li_list_scan);
1091         CFS_INIT_LIST_HEAD(&lfsck->li_list_dir);
1092         CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan);
1093         CFS_INIT_LIST_HEAD(&lfsck->li_list_idle);
1094         atomic_set(&lfsck->li_ref, 1);
1095         init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq);
1096         lfsck->li_next = next;
1097         lfsck->li_bottom = key;
1098
1099         fid->f_seq = FID_SEQ_LOCAL_NAME;
1100         fid->f_oid = 1;
1101         fid->f_ver = 0;
1102         rc = local_oid_storage_init(env, lfsck->li_bottom, fid, &lfsck->li_los);
1103         if (rc != 0)
1104                 GOTO(out, rc);
1105
1106         rc = dt_root_get(env, key, fid);
1107         if (rc != 0)
1108                 GOTO(out, rc);
1109
1110         root = dt_locate(env, lfsck->li_bottom, fid);
1111         if (IS_ERR(root))
1112                 GOTO(out, rc = PTR_ERR(root));
1113
1114         lfsck->li_local_root_fid = *fid;
1115         dt_try_as_dir(env, root);
1116         if (master) {
1117                 lfsck->li_master = 1;
1118                 if (lfsck_dev_idx(lfsck->li_bottom) == 0) {
1119                         rc = dt_lookup(env, root,
1120                                 (struct dt_rec *)(&lfsck->li_global_root_fid),
1121                                 (const struct dt_key *)"ROOT", BYPASS_CAPA);
1122                         if (rc != 0)
1123                                 GOTO(out, rc);
1124                 }
1125         }
1126
1127         fid->f_seq = FID_SEQ_LOCAL_FILE;
1128         fid->f_oid = OTABLE_IT_OID;
1129         fid->f_ver = 0;
1130         obj = dt_locate(env, lfsck->li_bottom, fid);
1131         if (IS_ERR(obj))
1132                 GOTO(out, rc = PTR_ERR(obj));
1133
1134         lfsck->li_obj_oit = obj;
1135         rc = obj->do_ops->do_index_try(env, obj, &dt_otable_features);
1136         if (rc != 0) {
1137                 if (rc == -ENOTSUPP)
1138                         GOTO(add, rc = 0);
1139
1140                 GOTO(out, rc);
1141         }
1142
1143         rc = lfsck_bookmark_setup(env, lfsck);
1144         if (rc != 0)
1145                 GOTO(out, rc);
1146
1147         if (master) {
1148                 rc = lfsck_namespace_setup(env, lfsck);
1149                 if (rc < 0)
1150                         GOTO(out, rc);
1151         }
1152
1153         /* XXX: more LFSCK components initialization to be added here. */
1154
1155 add:
1156         rc = lfsck_instance_add(lfsck);
1157 out:
1158         if (root != NULL && !IS_ERR(root))
1159                 lu_object_put(env, &root->do_lu);
1160         if (rc != 0)
1161                 lfsck_instance_cleanup(env, lfsck);
1162         return rc;
1163 }
1164 EXPORT_SYMBOL(lfsck_register);
1165
1166 void lfsck_degister(const struct lu_env *env, struct dt_device *key)
1167 {
1168         struct lfsck_instance *lfsck;
1169
1170         lfsck = lfsck_instance_find(key, false, true);
1171         if (lfsck != NULL)
1172                 lfsck_instance_put(env, lfsck);
1173 }
1174 EXPORT_SYMBOL(lfsck_degister);
1175
1176 static int __init lfsck_init(void)
1177 {
1178         int rc;
1179
1180         lfsck_key_init_generic(&lfsck_thread_key, NULL);
1181         rc = lu_context_key_register(&lfsck_thread_key);
1182         return rc;
1183 }
1184
1185 static void __exit lfsck_exit(void)
1186 {
1187         LASSERT(cfs_list_empty(&lfsck_instance_list));
1188
1189         lu_context_key_degister(&lfsck_thread_key);
1190 }
1191
1192 MODULE_AUTHOR("Intel Corporation <http://www.intel.com/>");
1193 MODULE_DESCRIPTION("LFSCK");
1194 MODULE_LICENSE("GPL");
1195
1196 cfs_module(lfsck, LUSTRE_VERSION_STRING, lfsck_init, lfsck_exit);