Whamcloud - gitweb
LU-1267 lfsck: enhance RPCs (3) for MDT-OST consistency
[fs/lustre-release.git] / lustre / lfsck / lfsck_namespace.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, 2013, Intel Corporation.
24  */
25 /*
26  * lustre/lfsck/lfsck_namespace.c
27  *
28  * Author: Fan, Yong <fan.yong@intel.com>
29  */
30
31 #define DEBUG_SUBSYSTEM S_LFSCK
32
33 #include <lustre/lustre_idl.h>
34 #include <lu_object.h>
35 #include <dt_object.h>
36 #include <md_object.h>
37 #include <lustre_linkea.h>
38 #include <lustre_fid.h>
39 #include <lustre_lib.h>
40 #include <lustre_net.h>
41 #include <lustre/lustre_user.h>
42
43 #include "lfsck_internal.h"
44
45 #define LFSCK_NAMESPACE_MAGIC   0xA0629D03
46
47 static const char lfsck_namespace_name[] = "lfsck_namespace";
48
49 static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *des,
50                                       struct lfsck_namespace *src)
51 {
52         des->ln_magic = le32_to_cpu(src->ln_magic);
53         des->ln_status = le32_to_cpu(src->ln_status);
54         des->ln_flags = le32_to_cpu(src->ln_flags);
55         des->ln_success_count = le32_to_cpu(src->ln_success_count);
56         des->ln_run_time_phase1 = le32_to_cpu(src->ln_run_time_phase1);
57         des->ln_run_time_phase2 = le32_to_cpu(src->ln_run_time_phase2);
58         des->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
59         des->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
60         des->ln_time_last_checkpoint =
61                                 le64_to_cpu(src->ln_time_last_checkpoint);
62         lfsck_position_le_to_cpu(&des->ln_pos_latest_start,
63                                  &src->ln_pos_latest_start);
64         lfsck_position_le_to_cpu(&des->ln_pos_last_checkpoint,
65                                  &src->ln_pos_last_checkpoint);
66         lfsck_position_le_to_cpu(&des->ln_pos_first_inconsistent,
67                                  &src->ln_pos_first_inconsistent);
68         des->ln_items_checked = le64_to_cpu(src->ln_items_checked);
69         des->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
70         des->ln_items_failed = le64_to_cpu(src->ln_items_failed);
71         des->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
72         des->ln_mlinked_checked = le64_to_cpu(src->ln_mlinked_checked);
73         des->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
74         des->ln_objs_repaired_phase2 =
75                                 le64_to_cpu(src->ln_objs_repaired_phase2);
76         des->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
77         des->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
78         des->ln_objs_lost_found = le64_to_cpu(src->ln_objs_lost_found);
79         fid_le_to_cpu(&des->ln_fid_latest_scanned_phase2,
80                       &src->ln_fid_latest_scanned_phase2);
81 }
82
83 static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *des,
84                                       struct lfsck_namespace *src)
85 {
86         des->ln_magic = cpu_to_le32(src->ln_magic);
87         des->ln_status = cpu_to_le32(src->ln_status);
88         des->ln_flags = cpu_to_le32(src->ln_flags);
89         des->ln_success_count = cpu_to_le32(src->ln_success_count);
90         des->ln_run_time_phase1 = cpu_to_le32(src->ln_run_time_phase1);
91         des->ln_run_time_phase2 = cpu_to_le32(src->ln_run_time_phase2);
92         des->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
93         des->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
94         des->ln_time_last_checkpoint =
95                                 cpu_to_le64(src->ln_time_last_checkpoint);
96         lfsck_position_cpu_to_le(&des->ln_pos_latest_start,
97                                  &src->ln_pos_latest_start);
98         lfsck_position_cpu_to_le(&des->ln_pos_last_checkpoint,
99                                  &src->ln_pos_last_checkpoint);
100         lfsck_position_cpu_to_le(&des->ln_pos_first_inconsistent,
101                                  &src->ln_pos_first_inconsistent);
102         des->ln_items_checked = cpu_to_le64(src->ln_items_checked);
103         des->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
104         des->ln_items_failed = cpu_to_le64(src->ln_items_failed);
105         des->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
106         des->ln_mlinked_checked = cpu_to_le64(src->ln_mlinked_checked);
107         des->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
108         des->ln_objs_repaired_phase2 =
109                                 cpu_to_le64(src->ln_objs_repaired_phase2);
110         des->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
111         des->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
112         des->ln_objs_lost_found = cpu_to_le64(src->ln_objs_lost_found);
113         fid_cpu_to_le(&des->ln_fid_latest_scanned_phase2,
114                       &src->ln_fid_latest_scanned_phase2);
115 }
116
117 /**
118  * \retval +ve: the lfsck_namespace is broken, the caller should reset it.
119  * \retval 0: succeed.
120  * \retval -ve: failed cases.
121  */
122 static int lfsck_namespace_load(const struct lu_env *env,
123                                 struct lfsck_component *com)
124 {
125         int len = com->lc_file_size;
126         int rc;
127
128         rc = dt_xattr_get(env, com->lc_obj,
129                           lfsck_buf_get(env, com->lc_file_disk, len),
130                           XATTR_NAME_LFSCK_NAMESPACE, BYPASS_CAPA);
131         if (rc == len) {
132                 struct lfsck_namespace *ns = com->lc_file_ram;
133
134                 lfsck_namespace_le_to_cpu(ns,
135                                 (struct lfsck_namespace *)com->lc_file_disk);
136                 if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
137                         CWARN("%s: invalid lfsck_namespace magic %#x != %#x\n",
138                               lfsck_lfsck2name(com->lc_lfsck), ns->ln_magic,
139                               LFSCK_NAMESPACE_MAGIC);
140                         rc = 1;
141                 } else {
142                         rc = 0;
143                 }
144         } else if (rc != -ENODATA) {
145                 CERROR("%s: fail to load lfsck_namespace: expected = %d, "
146                        "rc = %d\n", lfsck_lfsck2name(com->lc_lfsck), len, rc);
147                 if (rc >= 0)
148                         rc = 1;
149         }
150         return rc;
151 }
152
153 static int lfsck_namespace_store(const struct lu_env *env,
154                                  struct lfsck_component *com, bool init)
155 {
156         struct dt_object        *obj    = com->lc_obj;
157         struct lfsck_instance   *lfsck  = com->lc_lfsck;
158         struct thandle          *handle;
159         int                      len    = com->lc_file_size;
160         int                      rc;
161         ENTRY;
162
163         lfsck_namespace_cpu_to_le((struct lfsck_namespace *)com->lc_file_disk,
164                                   (struct lfsck_namespace *)com->lc_file_ram);
165         handle = dt_trans_create(env, lfsck->li_bottom);
166         if (IS_ERR(handle)) {
167                 rc = PTR_ERR(handle);
168                 CERROR("%s: fail to create trans for storing lfsck_namespace: "
169                        "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
170                 RETURN(rc);
171         }
172
173         rc = dt_declare_xattr_set(env, obj,
174                                   lfsck_buf_get(env, com->lc_file_disk, len),
175                                   XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
176         if (rc != 0) {
177                 CERROR("%s: fail to declare trans for storing lfsck_namespace: "
178                        "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
179                 GOTO(out, rc);
180         }
181
182         rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
183         if (rc != 0) {
184                 CERROR("%s: fail to start trans for storing lfsck_namespace: "
185                        "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
186                 GOTO(out, rc);
187         }
188
189         rc = dt_xattr_set(env, obj,
190                           lfsck_buf_get(env, com->lc_file_disk, len),
191                           XATTR_NAME_LFSCK_NAMESPACE,
192                           init ? LU_XATTR_CREATE : LU_XATTR_REPLACE,
193                           handle, BYPASS_CAPA);
194         if (rc != 0)
195                 CERROR("%s: fail to store lfsck_namespace: len = %d, "
196                        "rc = %d\n", lfsck_lfsck2name(lfsck), len, rc);
197
198         GOTO(out, rc);
199
200 out:
201         dt_trans_stop(env, lfsck->li_bottom, handle);
202         return rc;
203 }
204
205 static int lfsck_namespace_init(const struct lu_env *env,
206                                 struct lfsck_component *com)
207 {
208         struct lfsck_namespace *ns = com->lc_file_ram;
209         int rc;
210
211         memset(ns, 0, sizeof(*ns));
212         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
213         ns->ln_status = LS_INIT;
214         down_write(&com->lc_sem);
215         rc = lfsck_namespace_store(env, com, true);
216         up_write(&com->lc_sem);
217         return rc;
218 }
219
220 static int lfsck_namespace_lookup(const struct lu_env *env,
221                                   struct lfsck_component *com,
222                                   const struct lu_fid *fid, __u8 *flags)
223 {
224         struct lu_fid *key = &lfsck_env_info(env)->lti_fid;
225         int            rc;
226
227         fid_cpu_to_be(key, fid);
228         rc = dt_lookup(env, com->lc_obj, (struct dt_rec *)flags,
229                        (const struct dt_key *)key, BYPASS_CAPA);
230         return rc;
231 }
232
233 static int lfsck_namespace_delete(const struct lu_env *env,
234                                   struct lfsck_component *com,
235                                   const struct lu_fid *fid)
236 {
237         struct lfsck_instance   *lfsck  = com->lc_lfsck;
238         struct lu_fid           *key    = &lfsck_env_info(env)->lti_fid;
239         struct thandle          *handle;
240         struct dt_object        *obj    = com->lc_obj;
241         int                      rc;
242         ENTRY;
243
244         handle = dt_trans_create(env, lfsck->li_bottom);
245         if (IS_ERR(handle))
246                 RETURN(PTR_ERR(handle));
247
248         rc = dt_declare_delete(env, obj, (const struct dt_key *)fid, handle);
249         if (rc != 0)
250                 GOTO(out, rc);
251
252         rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
253         if (rc != 0)
254                 GOTO(out, rc);
255
256         fid_cpu_to_be(key, fid);
257         rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
258                        BYPASS_CAPA);
259
260         GOTO(out, rc);
261
262 out:
263         dt_trans_stop(env, lfsck->li_bottom, handle);
264         return rc;
265 }
266
267 static int lfsck_namespace_update(const struct lu_env *env,
268                                   struct lfsck_component *com,
269                                   const struct lu_fid *fid,
270                                   __u8 flags, bool force)
271 {
272         struct lfsck_instance   *lfsck  = com->lc_lfsck;
273         struct lu_fid           *key    = &lfsck_env_info(env)->lti_fid;
274         struct thandle          *handle;
275         struct dt_object        *obj    = com->lc_obj;
276         int                      rc;
277         bool                     exist  = false;
278         __u8                     tf;
279         ENTRY;
280
281         rc = lfsck_namespace_lookup(env, com, fid, &tf);
282         if (rc != 0 && rc != -ENOENT)
283                 RETURN(rc);
284
285         if (rc == 0) {
286                 if (!force || flags == tf)
287                         RETURN(0);
288
289                 exist = true;
290                 handle = dt_trans_create(env, lfsck->li_bottom);
291                 if (IS_ERR(handle))
292                         RETURN(PTR_ERR(handle));
293
294                 rc = dt_declare_delete(env, obj, (const struct dt_key *)fid,
295                                        handle);
296                 if (rc != 0)
297                         GOTO(out, rc);
298         } else {
299                 handle = dt_trans_create(env, lfsck->li_bottom);
300                 if (IS_ERR(handle))
301                         RETURN(PTR_ERR(handle));
302         }
303
304         rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
305                                (const struct dt_key *)fid, handle);
306         if (rc != 0)
307                 GOTO(out, rc);
308
309         rc = dt_trans_start_local(env, lfsck->li_bottom, handle);
310         if (rc != 0)
311                 GOTO(out, rc);
312
313         fid_cpu_to_be(key, fid);
314         if (exist) {
315                 rc = dt_delete(env, obj, (const struct dt_key *)key, handle,
316                                BYPASS_CAPA);
317                 if (rc != 0) {
318                         CERROR("%s: fail to insert "DFID": rc = %d\n",
319                                lfsck_lfsck2name(com->lc_lfsck), PFID(fid), rc);
320                         GOTO(out, rc);
321                 }
322         }
323
324         rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
325                        (const struct dt_key *)key, handle, BYPASS_CAPA, 1);
326
327         GOTO(out, rc);
328
329 out:
330         dt_trans_stop(env, lfsck->li_bottom, handle);
331         return rc;
332 }
333
334 static int lfsck_namespace_check_exist(const struct lu_env *env,
335                                        struct lfsck_instance *lfsck,
336                                        struct dt_object *obj, const char *name)
337 {
338         struct dt_object *dir = lfsck->li_obj_dir;
339         struct lu_fid    *fid = &lfsck_env_info(env)->lti_fid;
340         int               rc;
341         ENTRY;
342
343         if (unlikely(lfsck_is_dead_obj(obj)))
344                 RETURN(LFSCK_NAMEENTRY_DEAD);
345
346         rc = dt_lookup(env, dir, (struct dt_rec *)fid,
347                        (const struct dt_key *)name, BYPASS_CAPA);
348         if (rc == -ENOENT)
349                 RETURN(LFSCK_NAMEENTRY_REMOVED);
350
351         if (rc < 0)
352                 RETURN(rc);
353
354         if (!lu_fid_eq(fid, lfsck_dto2fid(obj)))
355                 RETURN(LFSCK_NAMEENTRY_RECREATED);
356
357         RETURN(0);
358 }
359
360 static int lfsck_declare_namespace_exec_dir(const struct lu_env *env,
361                                             struct dt_object *obj,
362                                             struct thandle *handle)
363 {
364         int rc;
365
366         /* For destroying all invalid linkEA entries. */
367         rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
368         if (rc != 0)
369                 return rc;
370
371         /* For insert new linkEA entry. */
372         rc = dt_declare_xattr_set(env, obj,
373                         lfsck_buf_get_const(env, NULL, DEFAULT_LINKEA_SIZE),
374                         XATTR_NAME_LINK, 0, handle);
375         return rc;
376 }
377
378 static int lfsck_links_read(const struct lu_env *env, struct dt_object *obj,
379                             struct linkea_data *ldata)
380 {
381         int rc;
382
383         ldata->ld_buf =
384                 lu_buf_check_and_alloc(&lfsck_env_info(env)->lti_linkea_buf,
385                                        PAGE_CACHE_SIZE);
386         if (ldata->ld_buf->lb_buf == NULL)
387                 return -ENOMEM;
388
389         if (!dt_object_exists(obj))
390                 return -ENODATA;
391
392         rc = dt_xattr_get(env, obj, ldata->ld_buf, XATTR_NAME_LINK, BYPASS_CAPA);
393         if (rc == -ERANGE) {
394                 /* Buf was too small, figure out what we need. */
395                 lu_buf_free(ldata->ld_buf);
396                 rc = dt_xattr_get(env, obj, ldata->ld_buf, XATTR_NAME_LINK,
397                                   BYPASS_CAPA);
398                 if (rc < 0)
399                         return rc;
400
401                 ldata->ld_buf = lu_buf_check_and_alloc(ldata->ld_buf, rc);
402                 if (ldata->ld_buf->lb_buf == NULL)
403                         return -ENOMEM;
404
405                 rc = dt_xattr_get(env, obj, ldata->ld_buf, XATTR_NAME_LINK,
406                                   BYPASS_CAPA);
407         }
408         if (rc < 0)
409                 return rc;
410
411         linkea_init(ldata);
412
413         return 0;
414 }
415
416 static int lfsck_links_write(const struct lu_env *env, struct dt_object *obj,
417                              struct linkea_data *ldata, struct thandle *handle)
418 {
419         const struct lu_buf *buf = lfsck_buf_get_const(env,
420                                                        ldata->ld_buf->lb_buf,
421                                                        ldata->ld_leh->leh_len);
422
423         return dt_xattr_set(env, obj, buf, XATTR_NAME_LINK, 0, handle,
424                             BYPASS_CAPA);
425 }
426
427 /**
428  * \retval ve: removed entries
429  */
430 static int lfsck_linkea_entry_unpack(struct lfsck_instance *lfsck,
431                                      struct linkea_data *ldata,
432                                      struct lu_name *cname,
433                                      struct lu_fid *pfid)
434 {
435         struct link_ea_entry    *oldlee;
436         int                      oldlen;
437         int                      removed = 0;
438
439         linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, cname, pfid);
440         oldlee = ldata->ld_lee;
441         oldlen = ldata->ld_reclen;
442         linkea_next_entry(ldata);
443         while (ldata->ld_lee != NULL) {
444                 ldata->ld_reclen = (ldata->ld_lee->lee_reclen[0] << 8) |
445                                    ldata->ld_lee->lee_reclen[1];
446                 if (unlikely(ldata->ld_reclen == oldlen &&
447                              memcmp(ldata->ld_lee, oldlee, oldlen) == 0)) {
448                         linkea_del_buf(ldata, cname);
449                         removed++;
450                 } else {
451                         linkea_next_entry(ldata);
452                 }
453         }
454         ldata->ld_lee = oldlee;
455         ldata->ld_reclen = oldlen;
456         return removed;
457 }
458
459 /**
460  * \retval +ve  repaired
461  * \retval 0    no need to repair
462  * \retval -ve  error cases
463  */
464 static int lfsck_namespace_double_scan_one(const struct lu_env *env,
465                                            struct lfsck_component *com,
466                                            struct dt_object *child, __u8 flags)
467 {
468         struct lfsck_thread_info *info    = lfsck_env_info(env);
469         struct lu_attr           *la      = &info->lti_la;
470         struct lu_name           *cname   = &info->lti_name;
471         struct lu_fid            *pfid    = &info->lti_fid;
472         struct lu_fid            *cfid    = &info->lti_fid2;
473         struct lfsck_instance   *lfsck    = com->lc_lfsck;
474         struct lfsck_bookmark   *bk       = &lfsck->li_bookmark_ram;
475         struct lfsck_namespace  *ns       = com->lc_file_ram;
476         struct linkea_data       ldata    = { 0 };
477         struct thandle          *handle   = NULL;
478         bool                     locked   = false;
479         bool                     update   = false;
480         int                      rc;
481         ENTRY;
482
483         if (com->lc_journal) {
484
485 again:
486                 LASSERT(!locked);
487
488                 update = false;
489                 com->lc_journal = 1;
490                 handle = dt_trans_create(env, lfsck->li_next);
491                 if (IS_ERR(handle))
492                         RETURN(rc = PTR_ERR(handle));
493
494                 rc = dt_declare_xattr_set(env, child,
495                         lfsck_buf_get_const(env, NULL, DEFAULT_LINKEA_SIZE),
496                         XATTR_NAME_LINK, 0, handle);
497                 if (rc != 0)
498                         GOTO(stop, rc);
499
500                 rc = dt_trans_start(env, lfsck->li_next, handle);
501                 if (rc != 0)
502                         GOTO(stop, rc);
503
504                 dt_write_lock(env, child, MOR_TGT_CHILD);
505                 locked = true;
506         }
507
508         if (unlikely(lfsck_is_dead_obj(child)))
509                 GOTO(stop, rc = 0);
510
511         rc = dt_attr_get(env, child, la, BYPASS_CAPA);
512         if (rc == 0)
513                 rc = lfsck_links_read(env, child, &ldata);
514         if (rc != 0) {
515                 if ((bk->lb_param & LPF_DRYRUN) &&
516                     (rc == -EINVAL || rc == -ENODATA))
517                         rc = 1;
518
519                 GOTO(stop, rc);
520         }
521
522         linkea_first_entry(&ldata);
523         while (ldata.ld_lee != NULL) {
524                 struct dt_object *parent = NULL;
525
526                 rc = lfsck_linkea_entry_unpack(lfsck, &ldata, cname, pfid);
527                 if (rc > 0)
528                         update = true;
529
530                 if (!fid_is_sane(pfid))
531                         goto shrink;
532
533                 parent = lfsck_object_find(env, lfsck, pfid);
534                 if (parent == NULL)
535                         goto shrink;
536                 else if (IS_ERR(parent))
537                         GOTO(stop, rc = PTR_ERR(parent));
538
539                 if (!dt_object_exists(parent))
540                         goto shrink;
541
542                 /* XXX: Currently, skip remote object, the consistency for
543                  *      remote object will be processed in LFSCK phase III. */
544                 if (dt_object_remote(parent)) {
545                         lfsck_object_put(env, parent);
546                         linkea_next_entry(&ldata);
547                         continue;
548                 }
549
550                 if (unlikely(!dt_try_as_dir(env, parent)))
551                         goto shrink;
552
553                 /* To guarantee the 'name' is terminated with '0'. */
554                 memcpy(info->lti_key, cname->ln_name, cname->ln_namelen);
555                 info->lti_key[cname->ln_namelen] = 0;
556                 cname->ln_name = info->lti_key;
557                 rc = dt_lookup(env, parent, (struct dt_rec *)cfid,
558                                (const struct dt_key *)cname->ln_name,
559                                BYPASS_CAPA);
560                 if (rc != 0 && rc != -ENOENT) {
561                         lfsck_object_put(env, parent);
562                         GOTO(stop, rc);
563                 }
564
565                 if (rc == 0) {
566                         if (lu_fid_eq(cfid, lfsck_dto2fid(child))) {
567                                 lfsck_object_put(env, parent);
568                                 linkea_next_entry(&ldata);
569                                 continue;
570                         }
571
572                         goto shrink;
573                 }
574
575                 /* If there is no name entry in the parent dir and the object
576                  * link count is less than the linkea entries count, then the
577                  * linkea entry should be removed. */
578                 if (ldata.ld_leh->leh_reccount > la->la_nlink)
579                         goto shrink;
580
581                 /* XXX: For the case of there is a linkea entry, but without
582                  *      name entry pointing to the object and its hard links
583                  *      count is not less than the object name entries count,
584                  *      then seems we should add the 'missed' name entry back
585                  *      to namespace, but before LFSCK phase III finished, we
586                  *      do not know whether the object has some inconsistency
587                  *      on other MDTs. So now, do NOT add the name entry back
588                  *      to the namespace, but keep the linkEA entry. LU-2914 */
589                 lfsck_object_put(env, parent);
590                 linkea_next_entry(&ldata);
591                 continue;
592
593 shrink:
594                 if (parent != NULL)
595                         lfsck_object_put(env, parent);
596                 if (bk->lb_param & LPF_DRYRUN)
597                         RETURN(1);
598
599                 CDEBUG(D_LFSCK, "Remove linkEA: "DFID"[%.*s], "DFID"\n",
600                        PFID(lfsck_dto2fid(child)), cname->ln_namelen, cname->ln_name,
601                        PFID(pfid));
602                 linkea_del_buf(&ldata, cname);
603                 update = true;
604         }
605
606         if (update) {
607                 if (!com->lc_journal) {
608                         com->lc_journal = 1;
609                         goto again;
610                 }
611
612                 rc = lfsck_links_write(env, child, &ldata, handle);
613         }
614
615         GOTO(stop, rc);
616
617 stop:
618         if (locked) {
619         /* XXX: For the case linkea entries count does not match the object hard
620          *      links count, we cannot update the later one simply. Before LFSCK
621          *      phase III finished, we cannot know whether there are some remote
622          *      name entries to be repaired or not. LU-2914 */
623                 if (rc == 0 && !lfsck_is_dead_obj(child) &&
624                     ldata.ld_leh != NULL &&
625                     ldata.ld_leh->leh_reccount != la->la_nlink)
626                         CWARN("%s: the object "DFID" linkEA entry count %u "
627                               "may not match its hardlink count %u\n",
628                               lfsck_lfsck2name(lfsck), PFID(cfid),
629                               ldata.ld_leh->leh_reccount, la->la_nlink);
630
631                 dt_write_unlock(env, child);
632         }
633
634         if (handle != NULL)
635                 dt_trans_stop(env, lfsck->li_next, handle);
636
637         if (rc == 0 && update) {
638                 ns->ln_objs_nlink_repaired++;
639                 rc = 1;
640         }
641
642         return rc;
643 }
644
645 /* namespace APIs */
646
647 static int lfsck_namespace_reset(const struct lu_env *env,
648                                  struct lfsck_component *com, bool init)
649 {
650         struct lfsck_instance   *lfsck = com->lc_lfsck;
651         struct lfsck_namespace  *ns    = com->lc_file_ram;
652         struct dt_object        *root;
653         struct dt_object        *dto;
654         int                      rc;
655         ENTRY;
656
657         root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
658         if (IS_ERR(root))
659                 RETURN(PTR_ERR(root));
660
661         if (unlikely(!dt_try_as_dir(env, root))) {
662                 lu_object_put(env, &root->do_lu);
663                 RETURN(-ENOTDIR);
664         }
665
666         down_write(&com->lc_sem);
667         if (init) {
668                 memset(ns, 0, sizeof(*ns));
669         } else {
670                 __u32 count = ns->ln_success_count;
671                 __u64 last_time = ns->ln_time_last_complete;
672
673                 memset(ns, 0, sizeof(*ns));
674                 ns->ln_success_count = count;
675                 ns->ln_time_last_complete = last_time;
676         }
677         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
678         ns->ln_status = LS_INIT;
679
680         rc = local_object_unlink(env, lfsck->li_bottom, root,
681                                  lfsck_namespace_name);
682         if (rc != 0)
683                 GOTO(out, rc);
684
685         lfsck_object_put(env, com->lc_obj);
686         com->lc_obj = NULL;
687         dto = local_index_find_or_create(env, lfsck->li_los, root,
688                                          lfsck_namespace_name,
689                                          S_IFREG | S_IRUGO | S_IWUSR,
690                                          &dt_lfsck_features);
691         if (IS_ERR(dto))
692                 GOTO(out, rc = PTR_ERR(dto));
693
694         com->lc_obj = dto;
695         rc = dto->do_ops->do_index_try(env, dto, &dt_lfsck_features);
696         if (rc != 0)
697                 GOTO(out, rc);
698
699         rc = lfsck_namespace_store(env, com, true);
700
701         GOTO(out, rc);
702
703 out:
704         up_write(&com->lc_sem);
705         lu_object_put(env, &root->do_lu);
706         return rc;
707 }
708
709 static void
710 lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
711                      bool new_checked)
712 {
713         struct lfsck_namespace *ns = com->lc_file_ram;
714
715         down_write(&com->lc_sem);
716         if (new_checked)
717                 com->lc_new_checked++;
718         ns->ln_items_failed++;
719         if (lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
720                 lfsck_pos_fill(env, com->lc_lfsck,
721                                &ns->ln_pos_first_inconsistent, false);
722         up_write(&com->lc_sem);
723 }
724
725 static int lfsck_namespace_checkpoint(const struct lu_env *env,
726                                       struct lfsck_component *com, bool init)
727 {
728         struct lfsck_instance   *lfsck = com->lc_lfsck;
729         struct lfsck_namespace  *ns    = com->lc_file_ram;
730         int                      rc;
731
732         if (com->lc_new_checked == 0 && !init)
733                 return 0;
734
735         down_write(&com->lc_sem);
736
737         if (init) {
738                 ns->ln_pos_latest_start = lfsck->li_pos_current;
739         } else {
740                 ns->ln_pos_last_checkpoint = lfsck->li_pos_current;
741                 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
742                                 HALF_SEC - lfsck->li_time_last_checkpoint);
743                 ns->ln_time_last_checkpoint = cfs_time_current_sec();
744                 ns->ln_items_checked += com->lc_new_checked;
745                 com->lc_new_checked = 0;
746         }
747
748         rc = lfsck_namespace_store(env, com, false);
749
750         up_write(&com->lc_sem);
751         return rc;
752 }
753
754 static int lfsck_namespace_prep(const struct lu_env *env,
755                                 struct lfsck_component *com,
756                                 struct lfsck_start_param *lsp)
757 {
758         struct lfsck_instance   *lfsck  = com->lc_lfsck;
759         struct lfsck_namespace  *ns     = com->lc_file_ram;
760         struct lfsck_position   *pos    = &com->lc_pos_start;
761
762         if (ns->ln_status == LS_COMPLETED) {
763                 int rc;
764
765                 rc = lfsck_namespace_reset(env, com, false);
766                 if (rc != 0)
767                         return rc;
768         }
769
770         down_write(&com->lc_sem);
771
772         ns->ln_time_latest_start = cfs_time_current_sec();
773
774         spin_lock(&lfsck->li_lock);
775         if (ns->ln_flags & LF_SCANNED_ONCE) {
776                 if (!lfsck->li_drop_dryrun ||
777                     lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
778                         ns->ln_status = LS_SCANNING_PHASE2;
779                         cfs_list_del_init(&com->lc_link);
780                         cfs_list_add_tail(&com->lc_link,
781                                           &lfsck->li_list_double_scan);
782                         if (!cfs_list_empty(&com->lc_link_dir))
783                                 cfs_list_del_init(&com->lc_link_dir);
784                         lfsck_pos_set_zero(pos);
785                 } else {
786                         ns->ln_status = LS_SCANNING_PHASE1;
787                         ns->ln_run_time_phase1 = 0;
788                         ns->ln_run_time_phase2 = 0;
789                         ns->ln_items_checked = 0;
790                         ns->ln_items_repaired = 0;
791                         ns->ln_items_failed = 0;
792                         ns->ln_dirs_checked = 0;
793                         ns->ln_mlinked_checked = 0;
794                         ns->ln_objs_checked_phase2 = 0;
795                         ns->ln_objs_repaired_phase2 = 0;
796                         ns->ln_objs_failed_phase2 = 0;
797                         ns->ln_objs_nlink_repaired = 0;
798                         ns->ln_objs_lost_found = 0;
799                         fid_zero(&ns->ln_fid_latest_scanned_phase2);
800                         if (cfs_list_empty(&com->lc_link_dir))
801                                 cfs_list_add_tail(&com->lc_link_dir,
802                                                   &lfsck->li_list_dir);
803                         *pos = ns->ln_pos_first_inconsistent;
804                 }
805         } else {
806                 ns->ln_status = LS_SCANNING_PHASE1;
807                 if (cfs_list_empty(&com->lc_link_dir))
808                         cfs_list_add_tail(&com->lc_link_dir,
809                                           &lfsck->li_list_dir);
810                 if (!lfsck->li_drop_dryrun ||
811                     lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
812                         *pos = ns->ln_pos_last_checkpoint;
813                         pos->lp_oit_cookie++;
814                 } else {
815                         *pos = ns->ln_pos_first_inconsistent;
816                 }
817         }
818         spin_unlock(&lfsck->li_lock);
819
820         up_write(&com->lc_sem);
821         return 0;
822 }
823
824 static int lfsck_namespace_exec_oit(const struct lu_env *env,
825                                     struct lfsck_component *com,
826                                     struct dt_object *obj)
827 {
828         down_write(&com->lc_sem);
829         com->lc_new_checked++;
830         if (S_ISDIR(lfsck_object_type(obj)))
831                 ((struct lfsck_namespace *)com->lc_file_ram)->ln_dirs_checked++;
832         up_write(&com->lc_sem);
833         return 0;
834 }
835
836 static int lfsck_namespace_exec_dir(const struct lu_env *env,
837                                     struct lfsck_component *com,
838                                     struct dt_object *obj,
839                                     struct lu_dirent *ent)
840 {
841         struct lfsck_thread_info   *info     = lfsck_env_info(env);
842         struct lu_attr             *la       = &info->lti_la;
843         struct lfsck_instance      *lfsck    = com->lc_lfsck;
844         struct lfsck_bookmark      *bk       = &lfsck->li_bookmark_ram;
845         struct lfsck_namespace     *ns       = com->lc_file_ram;
846         struct linkea_data          ldata    = { 0 };
847         const struct lu_fid        *pfid     = lfsck_dto2fid(lfsck->li_obj_dir);
848         const struct lu_fid        *cfid     = lfsck_dto2fid(obj);
849         const struct lu_name       *cname;
850         struct thandle             *handle   = NULL;
851         bool                        repaired = false;
852         bool                        locked   = false;
853         bool                        remove;
854         bool                        newdata;
855         int                         count    = 0;
856         int                         rc;
857         ENTRY;
858
859         cname = lfsck_name_get_const(env, ent->lde_name, ent->lde_namelen);
860         down_write(&com->lc_sem);
861         com->lc_new_checked++;
862
863         if (ent->lde_attrs & LUDA_UPGRADE) {
864                 ns->ln_flags |= LF_UPGRADE;
865                 repaired = true;
866         } else if (ent->lde_attrs & LUDA_REPAIR) {
867                 ns->ln_flags |= LF_INCONSISTENT;
868                 repaired = true;
869         }
870
871         if (ent->lde_name[0] == '.' &&
872             (ent->lde_namelen == 1 ||
873              (ent->lde_namelen == 2 && ent->lde_name[1] == '.') ||
874              fid_is_dot_lustre(&ent->lde_fid)))
875                 GOTO(out, rc = 0);
876
877         if (!(bk->lb_param & LPF_DRYRUN) &&
878             (com->lc_journal || repaired)) {
879
880 again:
881                 LASSERT(!locked);
882
883                 com->lc_journal = 1;
884                 handle = dt_trans_create(env, lfsck->li_next);
885                 if (IS_ERR(handle))
886                         GOTO(out, rc = PTR_ERR(handle));
887
888                 rc = lfsck_declare_namespace_exec_dir(env, obj, handle);
889                 if (rc != 0)
890                         GOTO(stop, rc);
891
892                 rc = dt_trans_start(env, lfsck->li_next, handle);
893                 if (rc != 0)
894                         GOTO(stop, rc);
895
896                 dt_write_lock(env, obj, MOR_TGT_CHILD);
897                 locked = true;
898         }
899
900         rc = lfsck_namespace_check_exist(env, lfsck, obj, ent->lde_name);
901         if (rc != 0)
902                 GOTO(stop, rc);
903
904         rc = lfsck_links_read(env, obj, &ldata);
905         if (rc == 0) {
906                 count = ldata.ld_leh->leh_reccount;
907                 rc = linkea_links_find(&ldata, cname, pfid);
908                 if ((rc == 0) &&
909                     (count == 1 || !S_ISDIR(lfsck_object_type(obj))))
910                         goto record;
911
912                 ns->ln_flags |= LF_INCONSISTENT;
913                 /* For dir, if there are more than one linkea entries, or the
914                  * linkea entry does not match the name entry, then remove all
915                  * and add the correct one. */
916                 if (S_ISDIR(lfsck_object_type(obj))) {
917                         remove = true;
918                         newdata = true;
919                 } else {
920                         remove = false;
921                         newdata = false;
922                 }
923                 goto nodata;
924         } else if (unlikely(rc == -EINVAL)) {
925                 count = 1;
926                 ns->ln_flags |= LF_INCONSISTENT;
927                 /* The magic crashed, we are not sure whether there are more
928                  * corrupt data in the linkea, so remove all linkea entries. */
929                 remove = true;
930                 newdata = true;
931                 goto nodata;
932         } else if (rc == -ENODATA) {
933                 count = 1;
934                 ns->ln_flags |= LF_UPGRADE;
935                 remove = false;
936                 newdata = true;
937
938 nodata:
939                 if (bk->lb_param & LPF_DRYRUN) {
940                         repaired = true;
941                         goto record;
942                 }
943
944                 if (!com->lc_journal)
945                         goto again;
946
947                 if (remove) {
948                         LASSERT(newdata);
949
950                         rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, handle,
951                                           BYPASS_CAPA);
952                         if (rc != 0)
953                                 GOTO(stop, rc);
954                 }
955
956                 if (newdata) {
957                         rc = linkea_data_new(&ldata,
958                                         &lfsck_env_info(env)->lti_linkea_buf);
959                         if (rc != 0)
960                                 GOTO(stop, rc);
961                 }
962
963                 rc = linkea_add_buf(&ldata, cname, pfid);
964                 if (rc != 0)
965                         GOTO(stop, rc);
966
967                 rc = lfsck_links_write(env, obj, &ldata, handle);
968                 if (rc != 0)
969                         GOTO(stop, rc);
970
971                 count = ldata.ld_leh->leh_reccount;
972                 repaired = true;
973         } else {
974                 GOTO(stop, rc);
975         }
976
977 record:
978         LASSERT(count > 0);
979
980         rc = dt_attr_get(env, obj, la, BYPASS_CAPA);
981         if (rc != 0)
982                 GOTO(stop, rc);
983
984         if ((count == 1) &&
985             (la->la_nlink == 1 || S_ISDIR(lfsck_object_type(obj))))
986                 /* Usually, it is for single linked object or dir, do nothing.*/
987                 GOTO(stop, rc);
988
989         /* Following modification will be in another transaction.  */
990         if (handle != NULL) {
991                 LASSERT(dt_write_locked(env, obj));
992
993                 dt_write_unlock(env, obj);
994                 locked = false;
995
996                 dt_trans_stop(env, lfsck->li_next, handle);
997                 handle = NULL;
998         }
999
1000         ns->ln_mlinked_checked++;
1001         rc = lfsck_namespace_update(env, com, cfid,
1002                         count != la->la_nlink ? LLF_UNMATCH_NLINKS : 0, false);
1003
1004         GOTO(out, rc);
1005
1006 stop:
1007         if (locked)
1008                 dt_write_unlock(env, obj);
1009
1010         if (handle != NULL)
1011                 dt_trans_stop(env, lfsck->li_next, handle);
1012
1013 out:
1014         if (rc < 0) {
1015                 ns->ln_items_failed++;
1016                 if (lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1017                         lfsck_pos_fill(env, lfsck,
1018                                        &ns->ln_pos_first_inconsistent, false);
1019                 if (!(bk->lb_param & LPF_FAILOUT))
1020                         rc = 0;
1021         } else {
1022                 if (repaired) {
1023                         ns->ln_items_repaired++;
1024                         if (bk->lb_param & LPF_DRYRUN &&
1025                             lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
1026                                 lfsck_pos_fill(env, lfsck,
1027                                                &ns->ln_pos_first_inconsistent,
1028                                                false);
1029                 } else {
1030                         com->lc_journal = 0;
1031                 }
1032                 rc = 0;
1033         }
1034         up_write(&com->lc_sem);
1035         return rc;
1036 }
1037
1038 static int lfsck_namespace_post(const struct lu_env *env,
1039                                 struct lfsck_component *com,
1040                                 int result, bool init)
1041 {
1042         struct lfsck_instance   *lfsck = com->lc_lfsck;
1043         struct lfsck_namespace  *ns    = com->lc_file_ram;
1044         int                      rc;
1045
1046         down_write(&com->lc_sem);
1047
1048         spin_lock(&lfsck->li_lock);
1049         if (!init)
1050                 ns->ln_pos_last_checkpoint = lfsck->li_pos_current;
1051         if (result > 0) {
1052                 ns->ln_status = LS_SCANNING_PHASE2;
1053                 ns->ln_flags |= LF_SCANNED_ONCE;
1054                 ns->ln_flags &= ~LF_UPGRADE;
1055                 cfs_list_del_init(&com->lc_link);
1056                 cfs_list_del_init(&com->lc_link_dir);
1057                 cfs_list_add_tail(&com->lc_link, &lfsck->li_list_double_scan);
1058         } else if (result == 0) {
1059                 ns->ln_status = lfsck->li_status;
1060                 if (ns->ln_status == 0)
1061                         ns->ln_status = LS_STOPPED;
1062                 if (ns->ln_status != LS_PAUSED) {
1063                         cfs_list_del_init(&com->lc_link);
1064                         cfs_list_del_init(&com->lc_link_dir);
1065                         cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1066                 }
1067         } else {
1068                 ns->ln_status = LS_FAILED;
1069                 cfs_list_del_init(&com->lc_link);
1070                 cfs_list_del_init(&com->lc_link_dir);
1071                 cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1072         }
1073         spin_unlock(&lfsck->li_lock);
1074
1075         if (!init) {
1076                 ns->ln_run_time_phase1 += cfs_duration_sec(cfs_time_current() +
1077                                 HALF_SEC - lfsck->li_time_last_checkpoint);
1078                 ns->ln_time_last_checkpoint = cfs_time_current_sec();
1079                 ns->ln_items_checked += com->lc_new_checked;
1080                 com->lc_new_checked = 0;
1081         }
1082
1083         rc = lfsck_namespace_store(env, com, false);
1084
1085         up_write(&com->lc_sem);
1086         return rc;
1087 }
1088
1089 static int
1090 lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
1091                      char *buf, int len)
1092 {
1093         struct lfsck_instance   *lfsck = com->lc_lfsck;
1094         struct lfsck_bookmark   *bk    = &lfsck->li_bookmark_ram;
1095         struct lfsck_namespace  *ns    = com->lc_file_ram;
1096         int                      save  = len;
1097         int                      ret   = -ENOSPC;
1098         int                      rc;
1099
1100         down_read(&com->lc_sem);
1101         rc = snprintf(buf, len,
1102                       "name: lfsck_namespace\n"
1103                       "magic: %#x\n"
1104                       "version: %d\n"
1105                       "status: %s\n",
1106                       ns->ln_magic,
1107                       bk->lb_version,
1108                       lfsck_status2names(ns->ln_status));
1109         if (rc <= 0)
1110                 goto out;
1111
1112         buf += rc;
1113         len -= rc;
1114         rc = lfsck_bits_dump(&buf, &len, ns->ln_flags, lfsck_flags_names,
1115                              "flags");
1116         if (rc < 0)
1117                 goto out;
1118
1119         rc = lfsck_bits_dump(&buf, &len, bk->lb_param, lfsck_param_names,
1120                              "param");
1121         if (rc < 0)
1122                 goto out;
1123
1124         rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_complete,
1125                              "time_since_last_completed");
1126         if (rc < 0)
1127                 goto out;
1128
1129         rc = lfsck_time_dump(&buf, &len, ns->ln_time_latest_start,
1130                              "time_since_latest_start");
1131         if (rc < 0)
1132                 goto out;
1133
1134         rc = lfsck_time_dump(&buf, &len, ns->ln_time_last_checkpoint,
1135                              "time_since_last_checkpoint");
1136         if (rc < 0)
1137                 goto out;
1138
1139         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_latest_start,
1140                             "latest_start_position");
1141         if (rc < 0)
1142                 goto out;
1143
1144         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_last_checkpoint,
1145                             "last_checkpoint_position");
1146         if (rc < 0)
1147                 goto out;
1148
1149         rc = lfsck_pos_dump(&buf, &len, &ns->ln_pos_first_inconsistent,
1150                             "first_failure_position");
1151         if (rc < 0)
1152                 goto out;
1153
1154         if (ns->ln_status == LS_SCANNING_PHASE1) {
1155                 struct lfsck_position pos;
1156                 const struct dt_it_ops *iops;
1157                 cfs_duration_t duration = cfs_time_current() -
1158                                           lfsck->li_time_last_checkpoint;
1159                 __u64 checked = ns->ln_items_checked + com->lc_new_checked;
1160                 __u64 speed = checked;
1161                 __u64 new_checked = com->lc_new_checked * HZ;
1162                 __u32 rtime = ns->ln_run_time_phase1 +
1163                               cfs_duration_sec(duration + HALF_SEC);
1164
1165                 if (duration != 0)
1166                         do_div(new_checked, duration);
1167                 if (rtime != 0)
1168                         do_div(speed, rtime);
1169                 rc = snprintf(buf, len,
1170                               "checked_phase1: "LPU64"\n"
1171                               "checked_phase2: "LPU64"\n"
1172                               "updated_phase1: "LPU64"\n"
1173                               "updated_phase2: "LPU64"\n"
1174                               "failed_phase1: "LPU64"\n"
1175                               "failed_phase2: "LPU64"\n"
1176                               "dirs: "LPU64"\n"
1177                               "M-linked: "LPU64"\n"
1178                               "nlinks_repaired: "LPU64"\n"
1179                               "lost_found: "LPU64"\n"
1180                               "success_count: %u\n"
1181                               "run_time_phase1: %u seconds\n"
1182                               "run_time_phase2: %u seconds\n"
1183                               "average_speed_phase1: "LPU64" items/sec\n"
1184                               "average_speed_phase2: N/A\n"
1185                               "real-time_speed_phase1: "LPU64" items/sec\n"
1186                               "real-time_speed_phase2: N/A\n",
1187                               checked,
1188                               ns->ln_objs_checked_phase2,
1189                               ns->ln_items_repaired,
1190                               ns->ln_objs_repaired_phase2,
1191                               ns->ln_items_failed,
1192                               ns->ln_objs_failed_phase2,
1193                               ns->ln_dirs_checked,
1194                               ns->ln_mlinked_checked,
1195                               ns->ln_objs_nlink_repaired,
1196                               ns->ln_objs_lost_found,
1197                               ns->ln_success_count,
1198                               rtime,
1199                               ns->ln_run_time_phase2,
1200                               speed,
1201                               new_checked);
1202                 if (rc <= 0)
1203                         goto out;
1204
1205                 buf += rc;
1206                 len -= rc;
1207
1208                 LASSERT(lfsck->li_di_oit != NULL);
1209
1210                 iops = &lfsck->li_obj_oit->do_index_ops->dio_it;
1211
1212                 /* The low layer otable-based iteration position may NOT
1213                  * exactly match the namespace-based directory traversal
1214                  * cookie. Generally, it is not a serious issue. But the
1215                  * caller should NOT make assumption on that. */
1216                 pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit);
1217                 if (!lfsck->li_current_oit_processed)
1218                         pos.lp_oit_cookie--;
1219
1220                 spin_lock(&lfsck->li_lock);
1221                 if (lfsck->li_di_dir != NULL) {
1222                         pos.lp_dir_cookie = lfsck->li_cookie_dir;
1223                         if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) {
1224                                 fid_zero(&pos.lp_dir_parent);
1225                                 pos.lp_dir_cookie = 0;
1226                         } else {
1227                                 pos.lp_dir_parent =
1228                                         *lfsck_dto2fid(lfsck->li_obj_dir);
1229                         }
1230                 } else {
1231                         fid_zero(&pos.lp_dir_parent);
1232                         pos.lp_dir_cookie = 0;
1233                 }
1234                 spin_unlock(&lfsck->li_lock);
1235                 rc = lfsck_pos_dump(&buf, &len, &pos, "current_position");
1236                 if (rc <= 0)
1237                         goto out;
1238         } else if (ns->ln_status == LS_SCANNING_PHASE2) {
1239                 cfs_duration_t duration = cfs_time_current() -
1240                                           lfsck->li_time_last_checkpoint;
1241                 __u64 checked = ns->ln_objs_checked_phase2 +
1242                                 com->lc_new_checked;
1243                 __u64 speed1 = ns->ln_items_checked;
1244                 __u64 speed2 = checked;
1245                 __u64 new_checked = com->lc_new_checked * HZ;
1246                 __u32 rtime = ns->ln_run_time_phase2 +
1247                               cfs_duration_sec(duration + HALF_SEC);
1248
1249                 if (duration != 0)
1250                         do_div(new_checked, duration);
1251                 if (ns->ln_run_time_phase1 != 0)
1252                         do_div(speed1, ns->ln_run_time_phase1);
1253                 if (rtime != 0)
1254                         do_div(speed2, rtime);
1255                 rc = snprintf(buf, len,
1256                               "checked_phase1: "LPU64"\n"
1257                               "checked_phase2: "LPU64"\n"
1258                               "updated_phase1: "LPU64"\n"
1259                               "updated_phase2: "LPU64"\n"
1260                               "failed_phase1: "LPU64"\n"
1261                               "failed_phase2: "LPU64"\n"
1262                               "dirs: "LPU64"\n"
1263                               "M-linked: "LPU64"\n"
1264                               "nlinks_repaired: "LPU64"\n"
1265                               "lost_found: "LPU64"\n"
1266                               "success_count: %u\n"
1267                               "run_time_phase1: %u seconds\n"
1268                               "run_time_phase2: %u seconds\n"
1269                               "average_speed_phase1: "LPU64" items/sec\n"
1270                               "average_speed_phase2: "LPU64" objs/sec\n"
1271                               "real-time_speed_phase1: N/A\n"
1272                               "real-time_speed_phase2: "LPU64" objs/sec\n"
1273                               "current_position: "DFID"\n",
1274                               ns->ln_items_checked,
1275                               checked,
1276                               ns->ln_items_repaired,
1277                               ns->ln_objs_repaired_phase2,
1278                               ns->ln_items_failed,
1279                               ns->ln_objs_failed_phase2,
1280                               ns->ln_dirs_checked,
1281                               ns->ln_mlinked_checked,
1282                               ns->ln_objs_nlink_repaired,
1283                               ns->ln_objs_lost_found,
1284                               ns->ln_success_count,
1285                               ns->ln_run_time_phase1,
1286                               rtime,
1287                               speed1,
1288                               speed2,
1289                               new_checked,
1290                               PFID(&ns->ln_fid_latest_scanned_phase2));
1291                 if (rc <= 0)
1292                         goto out;
1293
1294                 buf += rc;
1295                 len -= rc;
1296         } else {
1297                 __u64 speed1 = ns->ln_items_checked;
1298                 __u64 speed2 = ns->ln_objs_checked_phase2;
1299
1300                 if (ns->ln_run_time_phase1 != 0)
1301                         do_div(speed1, ns->ln_run_time_phase1);
1302                 if (ns->ln_run_time_phase2 != 0)
1303                         do_div(speed2, ns->ln_run_time_phase2);
1304                 rc = snprintf(buf, len,
1305                               "checked_phase1: "LPU64"\n"
1306                               "checked_phase2: "LPU64"\n"
1307                               "updated_phase1: "LPU64"\n"
1308                               "updated_phase2: "LPU64"\n"
1309                               "failed_phase1: "LPU64"\n"
1310                               "failed_phase2: "LPU64"\n"
1311                               "dirs: "LPU64"\n"
1312                               "M-linked: "LPU64"\n"
1313                               "nlinks_repaired: "LPU64"\n"
1314                               "lost_found: "LPU64"\n"
1315                               "success_count: %u\n"
1316                               "run_time_phase1: %u seconds\n"
1317                               "run_time_phase2: %u seconds\n"
1318                               "average_speed_phase1: "LPU64" items/sec\n"
1319                               "average_speed_phase2: "LPU64" objs/sec\n"
1320                               "real-time_speed_phase1: N/A\n"
1321                               "real-time_speed_phase2: N/A\n"
1322                               "current_position: N/A\n",
1323                               ns->ln_items_checked,
1324                               ns->ln_objs_checked_phase2,
1325                               ns->ln_items_repaired,
1326                               ns->ln_objs_repaired_phase2,
1327                               ns->ln_items_failed,
1328                               ns->ln_objs_failed_phase2,
1329                               ns->ln_dirs_checked,
1330                               ns->ln_mlinked_checked,
1331                               ns->ln_objs_nlink_repaired,
1332                               ns->ln_objs_lost_found,
1333                               ns->ln_success_count,
1334                               ns->ln_run_time_phase1,
1335                               ns->ln_run_time_phase2,
1336                               speed1,
1337                               speed2);
1338                 if (rc <= 0)
1339                         goto out;
1340
1341                 buf += rc;
1342                 len -= rc;
1343         }
1344         ret = save - len;
1345
1346 out:
1347         up_read(&com->lc_sem);
1348         return ret;
1349 }
1350
1351 static int lfsck_namespace_double_scan_main(void *args)
1352 {
1353         struct lfsck_thread_args *lta   = args;
1354         const struct lu_env     *env    = &lta->lta_env;
1355         struct lfsck_component  *com    = lta->lta_com;
1356         struct lfsck_instance   *lfsck  = com->lc_lfsck;
1357         struct ptlrpc_thread    *thread = &lfsck->li_thread;
1358         struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
1359         struct lfsck_namespace  *ns     = com->lc_file_ram;
1360         struct dt_object        *obj    = com->lc_obj;
1361         const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
1362         struct dt_object        *target;
1363         struct dt_it            *di;
1364         struct dt_key           *key;
1365         struct lu_fid            fid;
1366         int                      rc;
1367         __u8                     flags = 0;
1368         ENTRY;
1369
1370         com->lc_new_checked = 0;
1371         com->lc_new_scanned = 0;
1372         com->lc_time_last_checkpoint = cfs_time_current();
1373         com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
1374                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1375
1376         di = iops->init(env, obj, 0, BYPASS_CAPA);
1377         if (IS_ERR(di))
1378                 GOTO(out, rc = PTR_ERR(di));
1379
1380         fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
1381         rc = iops->get(env, di, (const struct dt_key *)&fid);
1382         if (rc < 0)
1383                 GOTO(fini, rc);
1384
1385         /* Skip the start one, which either has been processed or non-exist. */
1386         rc = iops->next(env, di);
1387         if (rc != 0)
1388                 GOTO(put, rc);
1389
1390         if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_NO_DOUBLESCAN))
1391                 GOTO(put, rc = 0);
1392
1393         do {
1394                 if (OBD_FAIL_CHECK(OBD_FAIL_LFSCK_DELAY3) &&
1395                     cfs_fail_val > 0) {
1396                         struct l_wait_info lwi;
1397
1398                         lwi = LWI_TIMEOUT(cfs_time_seconds(cfs_fail_val),
1399                                           NULL, NULL);
1400                         l_wait_event(thread->t_ctl_waitq,
1401                                      !thread_is_running(thread),
1402                                      &lwi);
1403                 }
1404
1405                 key = iops->key(env, di);
1406                 fid_be_to_cpu(&fid, (const struct lu_fid *)key);
1407                 target = lfsck_object_find(env, lfsck, &fid);
1408                 down_write(&com->lc_sem);
1409                 if (target == NULL) {
1410                         rc = 0;
1411                         goto checkpoint;
1412                 } else if (IS_ERR(target)) {
1413                         rc = PTR_ERR(target);
1414                         goto checkpoint;
1415                 }
1416
1417                 /* XXX: Currently, skip remote object, the consistency for
1418                  *      remote object will be processed in LFSCK phase III. */
1419                 if (dt_object_exists(target) && !dt_object_remote(target)) {
1420                         rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
1421                         if (rc == 0)
1422                                 rc = lfsck_namespace_double_scan_one(env, com,
1423                                                                 target, flags);
1424                 }
1425
1426                 lfsck_object_put(env, target);
1427
1428 checkpoint:
1429                 com->lc_new_checked++;
1430                 com->lc_new_scanned++;
1431                 ns->ln_fid_latest_scanned_phase2 = fid;
1432                 if (rc > 0)
1433                         ns->ln_objs_repaired_phase2++;
1434                 else if (rc < 0)
1435                         ns->ln_objs_failed_phase2++;
1436                 up_write(&com->lc_sem);
1437
1438                 if ((rc == 0) || ((rc > 0) && !(bk->lb_param & LPF_DRYRUN))) {
1439                         lfsck_namespace_delete(env, com, &fid);
1440                 } else if (rc < 0) {
1441                         flags |= LLF_REPAIR_FAILED;
1442                         lfsck_namespace_update(env, com, &fid, flags, true);
1443                 }
1444
1445                 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
1446                         GOTO(put, rc);
1447
1448                 if (unlikely(cfs_time_beforeq(com->lc_time_next_checkpoint,
1449                                               cfs_time_current())) &&
1450                     com->lc_new_checked != 0) {
1451                         down_write(&com->lc_sem);
1452                         ns->ln_run_time_phase2 +=
1453                                 cfs_duration_sec(cfs_time_current() +
1454                                 HALF_SEC - com->lc_time_last_checkpoint);
1455                         ns->ln_time_last_checkpoint = cfs_time_current_sec();
1456                         ns->ln_objs_checked_phase2 += com->lc_new_checked;
1457                         com->lc_new_checked = 0;
1458                         rc = lfsck_namespace_store(env, com, false);
1459                         up_write(&com->lc_sem);
1460                         if (rc != 0)
1461                                 GOTO(put, rc);
1462
1463                         com->lc_time_last_checkpoint = cfs_time_current();
1464                         com->lc_time_next_checkpoint =
1465                                 com->lc_time_last_checkpoint +
1466                                 cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL);
1467                 }
1468
1469                 lfsck_control_speed_by_self(com);
1470                 if (unlikely(!thread_is_running(thread)))
1471                         GOTO(put, rc = 0);
1472
1473                 rc = iops->next(env, di);
1474         } while (rc == 0);
1475
1476         GOTO(put, rc);
1477
1478 put:
1479         iops->put(env, di);
1480
1481 fini:
1482         iops->fini(env, di);
1483
1484 out:
1485         down_write(&com->lc_sem);
1486
1487         ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() +
1488                                 HALF_SEC - lfsck->li_time_last_checkpoint);
1489         ns->ln_time_last_checkpoint = cfs_time_current_sec();
1490         ns->ln_objs_checked_phase2 += com->lc_new_checked;
1491         com->lc_new_checked = 0;
1492
1493         if (rc > 0) {
1494                 com->lc_journal = 0;
1495                 ns->ln_status = LS_COMPLETED;
1496                 if (!(bk->lb_param & LPF_DRYRUN))
1497                         ns->ln_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT);
1498                 ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
1499                 ns->ln_success_count++;
1500         } else if (rc == 0) {
1501                 ns->ln_status = lfsck->li_status;
1502                 if (ns->ln_status == 0)
1503                         ns->ln_status = LS_STOPPED;
1504         } else {
1505                 ns->ln_status = LS_FAILED;
1506         }
1507
1508         if (ns->ln_status != LS_PAUSED) {
1509                 spin_lock(&lfsck->li_lock);
1510                 cfs_list_del_init(&com->lc_link);
1511                 cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1512                 spin_unlock(&lfsck->li_lock);
1513         }
1514
1515         rc = lfsck_namespace_store(env, com, false);
1516
1517         up_write(&com->lc_sem);
1518         if (atomic_dec_and_test(&lfsck->li_double_scan_count))
1519                 wake_up_all(&thread->t_ctl_waitq);
1520
1521         lfsck_thread_args_fini(lta);
1522
1523         return rc;
1524 }
1525
1526 static int lfsck_namespace_double_scan(const struct lu_env *env,
1527                                        struct lfsck_component *com)
1528 {
1529         struct lfsck_instance           *lfsck = com->lc_lfsck;
1530         struct lfsck_namespace          *ns    = com->lc_file_ram;
1531         struct lfsck_thread_args        *lta;
1532         long                             rc;
1533         ENTRY;
1534
1535         if (unlikely(ns->ln_status != LS_SCANNING_PHASE2))
1536                 RETURN(0);
1537
1538         lta = lfsck_thread_args_init(lfsck, com, NULL);
1539         if (IS_ERR(lta))
1540                 RETURN(PTR_ERR(lta));
1541
1542         atomic_inc(&lfsck->li_double_scan_count);
1543         rc = PTR_ERR(kthread_run(lfsck_namespace_double_scan_main, lta,
1544                                  "lfsck_namespace"));
1545         if (IS_ERR_VALUE(rc)) {
1546                 CERROR("%s: cannot start LFSCK namespace thread: rc = %ld\n",
1547                        lfsck_lfsck2name(lfsck), rc);
1548                 atomic_dec(&lfsck->li_double_scan_count);
1549                 lfsck_thread_args_fini(lta);
1550         } else {
1551                 rc = 0;
1552         }
1553
1554         RETURN(rc);
1555 }
1556
1557 static int lfsck_namespace_in_notify(const struct lu_env *env,
1558                                      struct lfsck_component *com,
1559                                      struct lfsck_request *lr)
1560 {
1561         return 0;
1562 }
1563
1564 static int lfsck_namespace_query(const struct lu_env *env,
1565                                  struct lfsck_component *com)
1566 {
1567         struct lfsck_namespace *ns = com->lc_file_ram;
1568
1569         return ns->ln_status;
1570 }
1571
1572 static struct lfsck_operations lfsck_namespace_ops = {
1573         .lfsck_reset            = lfsck_namespace_reset,
1574         .lfsck_fail             = lfsck_namespace_fail,
1575         .lfsck_checkpoint       = lfsck_namespace_checkpoint,
1576         .lfsck_prep             = lfsck_namespace_prep,
1577         .lfsck_exec_oit         = lfsck_namespace_exec_oit,
1578         .lfsck_exec_dir         = lfsck_namespace_exec_dir,
1579         .lfsck_post             = lfsck_namespace_post,
1580         .lfsck_dump             = lfsck_namespace_dump,
1581         .lfsck_double_scan      = lfsck_namespace_double_scan,
1582         .lfsck_in_notify        = lfsck_namespace_in_notify,
1583         .lfsck_query            = lfsck_namespace_query,
1584 };
1585
1586 int lfsck_namespace_setup(const struct lu_env *env,
1587                           struct lfsck_instance *lfsck)
1588 {
1589         struct lfsck_component  *com;
1590         struct lfsck_namespace  *ns;
1591         struct dt_object        *root = NULL;
1592         struct dt_object        *obj;
1593         int                      rc;
1594         ENTRY;
1595
1596         LASSERT(lfsck->li_master);
1597
1598         OBD_ALLOC_PTR(com);
1599         if (com == NULL)
1600                 RETURN(-ENOMEM);
1601
1602         CFS_INIT_LIST_HEAD(&com->lc_link);
1603         CFS_INIT_LIST_HEAD(&com->lc_link_dir);
1604         init_rwsem(&com->lc_sem);
1605         atomic_set(&com->lc_ref, 1);
1606         com->lc_lfsck = lfsck;
1607         com->lc_type = LT_NAMESPACE;
1608         com->lc_ops = &lfsck_namespace_ops;
1609         com->lc_file_size = sizeof(struct lfsck_namespace);
1610         OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
1611         if (com->lc_file_ram == NULL)
1612                 GOTO(out, rc = -ENOMEM);
1613
1614         OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
1615         if (com->lc_file_disk == NULL)
1616                 GOTO(out, rc = -ENOMEM);
1617
1618         root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
1619         if (IS_ERR(root))
1620                 GOTO(out, rc = PTR_ERR(root));
1621
1622         if (unlikely(!dt_try_as_dir(env, root)))
1623                 GOTO(out, rc = -ENOTDIR);
1624
1625         obj = local_index_find_or_create(env, lfsck->li_los, root,
1626                                          lfsck_namespace_name,
1627                                          S_IFREG | S_IRUGO | S_IWUSR,
1628                                          &dt_lfsck_features);
1629         if (IS_ERR(obj))
1630                 GOTO(out, rc = PTR_ERR(obj));
1631
1632         com->lc_obj = obj;
1633         rc = obj->do_ops->do_index_try(env, obj, &dt_lfsck_features);
1634         if (rc != 0)
1635                 GOTO(out, rc);
1636
1637         rc = lfsck_namespace_load(env, com);
1638         if (rc > 0)
1639                 rc = lfsck_namespace_reset(env, com, true);
1640         else if (rc == -ENODATA)
1641                 rc = lfsck_namespace_init(env, com);
1642         if (rc != 0)
1643                 GOTO(out, rc);
1644
1645         ns = com->lc_file_ram;
1646         switch (ns->ln_status) {
1647         case LS_INIT:
1648         case LS_COMPLETED:
1649         case LS_FAILED:
1650         case LS_STOPPED:
1651                 spin_lock(&lfsck->li_lock);
1652                 cfs_list_add_tail(&com->lc_link, &lfsck->li_list_idle);
1653                 spin_unlock(&lfsck->li_lock);
1654                 break;
1655         default:
1656                 CERROR("%s: unknown lfsck_namespace status: rc = %u\n",
1657                        lfsck_lfsck2name(lfsck), ns->ln_status);
1658                 /* fall through */
1659         case LS_SCANNING_PHASE1:
1660         case LS_SCANNING_PHASE2:
1661                 /* No need to store the status to disk right now.
1662                  * If the system crashed before the status stored,
1663                  * it will be loaded back when next time. */
1664                 ns->ln_status = LS_CRASHED;
1665                 /* fall through */
1666         case LS_PAUSED:
1667         case LS_CRASHED:
1668                 spin_lock(&lfsck->li_lock);
1669                 cfs_list_add_tail(&com->lc_link, &lfsck->li_list_scan);
1670                 cfs_list_add_tail(&com->lc_link_dir, &lfsck->li_list_dir);
1671                 spin_unlock(&lfsck->li_lock);
1672                 break;
1673         }
1674
1675         GOTO(out, rc = 0);
1676
1677 out:
1678         if (root != NULL && !IS_ERR(root))
1679                 lu_object_put(env, &root->do_lu);
1680         if (rc != 0)
1681                 lfsck_component_cleanup(env, com);
1682         return rc;
1683 }