Whamcloud - gitweb
LU-17010 lfsck: don't create trans in dryrun mode
[fs/lustre-release.git] / lustre / lfsck / lfsck_namespace.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2013, 2017, Intel Corporation.
24  */
25 /*
26  * lustre/lfsck/lfsck_namespace.c
27  *
28  * Author: Fan, Yong <fan.yong@intel.com>
29  */
30
31 #define DEBUG_SUBSYSTEM S_LFSCK
32
33 #include <lu_object.h>
34 #include <dt_object.h>
35 #include <md_object.h>
36 #include <lustre_fid.h>
37 #include <lustre_lib.h>
38 #include <lustre_net.h>
39 #include <lu_target.h>
40
41 #include "lfsck_internal.h"
42
43 #define LFSCK_NAMESPACE_MAGIC_V1        0xA0629D03
44 #define LFSCK_NAMESPACE_MAGIC_V2        0xA0621A0B
45 #define LFSCK_NAMESPACE_MAGIC_V3        0xA06249FF
46
47 /* For Lustre-2.x (x <= 6), the namespace LFSCK used LFSCK_NAMESPACE_MAGIC_V1
48  * as the trace file magic. When downgrade to such old release, the old LFSCK
49  * will not recognize the new LFSCK_NAMESPACE_MAGIC_V2 in the new trace file,
50  * then it will reset the whole LFSCK, and will not cause start failure. The
51  * similar case will happen when upgrade from such old release. */
52 #define LFSCK_NAMESPACE_MAGIC           LFSCK_NAMESPACE_MAGIC_V3
53
54 enum lfsck_nameentry_check {
55         LFSCK_NAMEENTRY_DEAD            = 1, /* The object has been unlinked. */
56         LFSCK_NAMEENTRY_REMOVED         = 2, /* The entry has been removed. */
57         LFSCK_NAMEENTRY_RECREATED       = 3, /* The entry has been recreated. */
58 };
59
60 static struct lfsck_namespace_req *
61 lfsck_namespace_assistant_req_init(struct lfsck_instance *lfsck,
62                                    struct lfsck_assistant_object *lso,
63                                    struct lu_dirent *ent, __u16 type)
64 {
65         struct lfsck_namespace_req *lnr;
66         int                         size;
67
68         size = sizeof(*lnr) + (ent->lde_namelen & ~3) + 4;
69         OBD_ALLOC(lnr, size);
70         if (lnr == NULL)
71                 return ERR_PTR(-ENOMEM);
72
73         INIT_LIST_HEAD(&lnr->lnr_lar.lar_list);
74         lnr->lnr_lar.lar_parent = lfsck_assistant_object_get(lso);
75         lnr->lnr_lmv = lfsck_lmv_get(lfsck->li_lmv);
76         lnr->lnr_fid = ent->lde_fid;
77         lnr->lnr_dir_cookie = ent->lde_hash;
78         lnr->lnr_attr = ent->lde_attrs;
79         lnr->lnr_size = size;
80         lnr->lnr_type = type;
81         lnr->lnr_namelen = ent->lde_namelen;
82         memcpy(lnr->lnr_name, ent->lde_name, ent->lde_namelen);
83
84         return lnr;
85 }
86
87 static void lfsck_namespace_assistant_req_fini(const struct lu_env *env,
88                                                struct lfsck_assistant_req *lar)
89 {
90         struct lfsck_namespace_req *lnr =
91                 container_of(lar, struct lfsck_namespace_req, lnr_lar);
92
93         if (lnr->lnr_lmv != NULL)
94                 lfsck_lmv_put(env, lnr->lnr_lmv);
95
96         lfsck_assistant_object_put(env, lar->lar_parent);
97         OBD_FREE(lnr, lnr->lnr_size);
98 }
99
100 static void lfsck_namespace_le_to_cpu(struct lfsck_namespace *dst,
101                                       struct lfsck_namespace *src)
102 {
103         dst->ln_magic = le32_to_cpu(src->ln_magic);
104         dst->ln_status = le32_to_cpu(src->ln_status);
105         dst->ln_flags = le32_to_cpu(src->ln_flags);
106         dst->ln_success_count = le32_to_cpu(src->ln_success_count);
107         dst->ln_run_time_phase1 = le64_to_cpu(src->ln_run_time_phase1);
108         dst->ln_run_time_phase2 = le64_to_cpu(src->ln_run_time_phase2);
109         dst->ln_time_last_complete = le64_to_cpu(src->ln_time_last_complete);
110         dst->ln_time_latest_start = le64_to_cpu(src->ln_time_latest_start);
111         dst->ln_time_last_checkpoint =
112                                 le64_to_cpu(src->ln_time_last_checkpoint);
113         lfsck_position_le_to_cpu(&dst->ln_pos_latest_start,
114                                  &src->ln_pos_latest_start);
115         lfsck_position_le_to_cpu(&dst->ln_pos_last_checkpoint,
116                                  &src->ln_pos_last_checkpoint);
117         lfsck_position_le_to_cpu(&dst->ln_pos_first_inconsistent,
118                                  &src->ln_pos_first_inconsistent);
119         dst->ln_items_checked = le64_to_cpu(src->ln_items_checked);
120         dst->ln_items_repaired = le64_to_cpu(src->ln_items_repaired);
121         dst->ln_items_failed = le64_to_cpu(src->ln_items_failed);
122         dst->ln_dirs_checked = le64_to_cpu(src->ln_dirs_checked);
123         dst->ln_objs_checked_phase2 = le64_to_cpu(src->ln_objs_checked_phase2);
124         dst->ln_objs_repaired_phase2 =
125                                 le64_to_cpu(src->ln_objs_repaired_phase2);
126         dst->ln_objs_failed_phase2 = le64_to_cpu(src->ln_objs_failed_phase2);
127         dst->ln_objs_nlink_repaired = le64_to_cpu(src->ln_objs_nlink_repaired);
128         fid_le_to_cpu(&dst->ln_fid_latest_scanned_phase2,
129                       &src->ln_fid_latest_scanned_phase2);
130         dst->ln_dirent_repaired = le64_to_cpu(src->ln_dirent_repaired);
131         dst->ln_linkea_repaired = le64_to_cpu(src->ln_linkea_repaired);
132         dst->ln_mul_linked_checked = le64_to_cpu(src->ln_mul_linked_checked);
133         dst->ln_mul_linked_repaired = le64_to_cpu(src->ln_mul_linked_repaired);
134         dst->ln_unknown_inconsistency =
135                                 le64_to_cpu(src->ln_unknown_inconsistency);
136         dst->ln_unmatched_pairs_repaired =
137                                 le64_to_cpu(src->ln_unmatched_pairs_repaired);
138         dst->ln_dangling_repaired = le64_to_cpu(src->ln_dangling_repaired);
139         dst->ln_mul_ref_repaired = le64_to_cpu(src->ln_mul_ref_repaired);
140         dst->ln_bad_type_repaired = le64_to_cpu(src->ln_bad_type_repaired);
141         dst->ln_lost_dirent_repaired =
142                                 le64_to_cpu(src->ln_lost_dirent_repaired);
143         dst->ln_striped_dirs_scanned =
144                                 le64_to_cpu(src->ln_striped_dirs_scanned);
145         dst->ln_striped_dirs_repaired =
146                                 le64_to_cpu(src->ln_striped_dirs_repaired);
147         dst->ln_striped_dirs_failed =
148                                 le64_to_cpu(src->ln_striped_dirs_failed);
149         dst->ln_striped_dirs_disabled =
150                                 le64_to_cpu(src->ln_striped_dirs_disabled);
151         dst->ln_striped_dirs_skipped =
152                                 le64_to_cpu(src->ln_striped_dirs_skipped);
153         dst->ln_striped_shards_scanned =
154                                 le64_to_cpu(src->ln_striped_shards_scanned);
155         dst->ln_striped_shards_repaired =
156                                 le64_to_cpu(src->ln_striped_shards_repaired);
157         dst->ln_striped_shards_failed =
158                                 le64_to_cpu(src->ln_striped_shards_failed);
159         dst->ln_striped_shards_skipped =
160                                 le64_to_cpu(src->ln_striped_shards_skipped);
161         dst->ln_name_hash_repaired = le64_to_cpu(src->ln_name_hash_repaired);
162         dst->ln_local_lpf_scanned = le64_to_cpu(src->ln_local_lpf_scanned);
163         dst->ln_local_lpf_moved = le64_to_cpu(src->ln_local_lpf_moved);
164         dst->ln_local_lpf_skipped = le64_to_cpu(src->ln_local_lpf_skipped);
165         dst->ln_local_lpf_failed = le64_to_cpu(src->ln_local_lpf_failed);
166         dst->ln_bitmap_size = le32_to_cpu(src->ln_bitmap_size);
167         dst->ln_time_latest_reset = le64_to_cpu(src->ln_time_latest_reset);
168         dst->ln_linkea_overflow_cleared =
169                                 le64_to_cpu(src->ln_linkea_overflow_cleared);
170         dst->ln_agent_entries_repaired =
171                                 le64_to_cpu(src->ln_agent_entries_repaired);
172 }
173
174 static void lfsck_namespace_cpu_to_le(struct lfsck_namespace *dst,
175                                       struct lfsck_namespace *src)
176 {
177         dst->ln_magic = cpu_to_le32(src->ln_magic);
178         dst->ln_status = cpu_to_le32(src->ln_status);
179         dst->ln_flags = cpu_to_le32(src->ln_flags);
180         dst->ln_success_count = cpu_to_le32(src->ln_success_count);
181         dst->ln_run_time_phase1 = cpu_to_le64(src->ln_run_time_phase1);
182         dst->ln_run_time_phase2 = cpu_to_le64(src->ln_run_time_phase2);
183         dst->ln_time_last_complete = cpu_to_le64(src->ln_time_last_complete);
184         dst->ln_time_latest_start = cpu_to_le64(src->ln_time_latest_start);
185         dst->ln_time_last_checkpoint =
186                                 cpu_to_le64(src->ln_time_last_checkpoint);
187         lfsck_position_cpu_to_le(&dst->ln_pos_latest_start,
188                                  &src->ln_pos_latest_start);
189         lfsck_position_cpu_to_le(&dst->ln_pos_last_checkpoint,
190                                  &src->ln_pos_last_checkpoint);
191         lfsck_position_cpu_to_le(&dst->ln_pos_first_inconsistent,
192                                  &src->ln_pos_first_inconsistent);
193         dst->ln_items_checked = cpu_to_le64(src->ln_items_checked);
194         dst->ln_items_repaired = cpu_to_le64(src->ln_items_repaired);
195         dst->ln_items_failed = cpu_to_le64(src->ln_items_failed);
196         dst->ln_dirs_checked = cpu_to_le64(src->ln_dirs_checked);
197         dst->ln_objs_checked_phase2 = cpu_to_le64(src->ln_objs_checked_phase2);
198         dst->ln_objs_repaired_phase2 =
199                                 cpu_to_le64(src->ln_objs_repaired_phase2);
200         dst->ln_objs_failed_phase2 = cpu_to_le64(src->ln_objs_failed_phase2);
201         dst->ln_objs_nlink_repaired = cpu_to_le64(src->ln_objs_nlink_repaired);
202         fid_cpu_to_le(&dst->ln_fid_latest_scanned_phase2,
203                       &src->ln_fid_latest_scanned_phase2);
204         dst->ln_dirent_repaired = cpu_to_le64(src->ln_dirent_repaired);
205         dst->ln_linkea_repaired = cpu_to_le64(src->ln_linkea_repaired);
206         dst->ln_mul_linked_checked = cpu_to_le64(src->ln_mul_linked_checked);
207         dst->ln_mul_linked_repaired = cpu_to_le64(src->ln_mul_linked_repaired);
208         dst->ln_unknown_inconsistency =
209                                 cpu_to_le64(src->ln_unknown_inconsistency);
210         dst->ln_unmatched_pairs_repaired =
211                                 cpu_to_le64(src->ln_unmatched_pairs_repaired);
212         dst->ln_dangling_repaired = cpu_to_le64(src->ln_dangling_repaired);
213         dst->ln_mul_ref_repaired = cpu_to_le64(src->ln_mul_ref_repaired);
214         dst->ln_bad_type_repaired = cpu_to_le64(src->ln_bad_type_repaired);
215         dst->ln_lost_dirent_repaired =
216                                 cpu_to_le64(src->ln_lost_dirent_repaired);
217         dst->ln_striped_dirs_scanned =
218                                 cpu_to_le64(src->ln_striped_dirs_scanned);
219         dst->ln_striped_dirs_repaired =
220                                 cpu_to_le64(src->ln_striped_dirs_repaired);
221         dst->ln_striped_dirs_failed =
222                                 cpu_to_le64(src->ln_striped_dirs_failed);
223         dst->ln_striped_dirs_disabled =
224                                 cpu_to_le64(src->ln_striped_dirs_disabled);
225         dst->ln_striped_dirs_skipped =
226                                 cpu_to_le64(src->ln_striped_dirs_skipped);
227         dst->ln_striped_shards_scanned =
228                                 cpu_to_le64(src->ln_striped_shards_scanned);
229         dst->ln_striped_shards_repaired =
230                                 cpu_to_le64(src->ln_striped_shards_repaired);
231         dst->ln_striped_shards_failed =
232                                 cpu_to_le64(src->ln_striped_shards_failed);
233         dst->ln_striped_shards_skipped =
234                                 cpu_to_le64(src->ln_striped_shards_skipped);
235         dst->ln_name_hash_repaired = cpu_to_le64(src->ln_name_hash_repaired);
236         dst->ln_local_lpf_scanned = cpu_to_le64(src->ln_local_lpf_scanned);
237         dst->ln_local_lpf_moved = cpu_to_le64(src->ln_local_lpf_moved);
238         dst->ln_local_lpf_skipped = cpu_to_le64(src->ln_local_lpf_skipped);
239         dst->ln_local_lpf_failed = cpu_to_le64(src->ln_local_lpf_failed);
240         dst->ln_bitmap_size = cpu_to_le32(src->ln_bitmap_size);
241         dst->ln_time_latest_reset = cpu_to_le64(src->ln_time_latest_reset);
242         dst->ln_linkea_overflow_cleared =
243                                 cpu_to_le64(src->ln_linkea_overflow_cleared);
244         dst->ln_agent_entries_repaired =
245                                 cpu_to_le64(src->ln_agent_entries_repaired);
246 }
247
248 static void lfsck_namespace_record_failure(const struct lu_env *env,
249                                            struct lfsck_instance *lfsck,
250                                            struct lfsck_namespace *ns)
251 {
252         struct lfsck_position pos;
253
254         ns->ln_items_failed++;
255         lfsck_pos_fill(env, lfsck, &pos, false);
256         if (lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent) ||
257             lfsck_pos_is_eq(&pos, &ns->ln_pos_first_inconsistent) < 0) {
258                 ns->ln_pos_first_inconsistent = pos;
259
260                 CDEBUG(D_LFSCK, "%s: namespace LFSCK hit first non-repaired "
261                        "inconsistency at the pos [%llu, "DFID", %#llx]\n",
262                        lfsck_lfsck2name(lfsck),
263                        ns->ln_pos_first_inconsistent.lp_oit_cookie,
264                        PFID(&ns->ln_pos_first_inconsistent.lp_dir_parent),
265                        ns->ln_pos_first_inconsistent.lp_dir_cookie);
266         }
267 }
268
269 /**
270  * Load the MDT bitmap from the lfsck_namespace trace file.
271  *
272  * \param[in] env       pointer to the thread context
273  * \param[in] com       pointer to the lfsck component
274  *
275  * \retval              0 for success
276  * \retval              negative error number on failure or data corruption
277  */
278 static int lfsck_namespace_load_bitmap(const struct lu_env *env,
279                                        struct lfsck_component *com)
280 {
281         struct dt_object *obj = com->lc_obj;
282         struct lfsck_assistant_data *lad = com->lc_data;
283         struct lfsck_namespace *ns = com->lc_file_ram;
284         unsigned long *bitmap = lad->lad_bitmap;
285         ssize_t size;
286         __u32 nbits;
287         int rc;
288
289         ENTRY;
290         if (com->lc_lfsck->li_mdt_descs.ltd_tgts_mask_len >
291             ns->ln_bitmap_size)
292                 nbits = com->lc_lfsck->li_mdt_descs.ltd_tgts_mask_len;
293         else
294                 nbits = ns->ln_bitmap_size;
295
296         if (unlikely(nbits < BITS_PER_LONG))
297                 nbits = BITS_PER_LONG;
298
299         if (nbits > lad->lad_bitmap_count) {
300                 u32 new_bits = lad->lad_bitmap_count;
301                 unsigned long *new_bitmap;
302
303                 while (new_bits < nbits)
304                         new_bits <<= 1;
305
306                 new_bitmap = bitmap_zalloc(new_bits, GFP_KERNEL);
307                 if (new_bitmap == NULL)
308                         RETURN(-ENOMEM);
309
310                 lad->lad_bitmap = new_bitmap;
311                 lad->lad_bitmap_count = new_bits;
312                 bitmap_free(bitmap);
313                 bitmap = new_bitmap;
314         }
315
316         if (ns->ln_bitmap_size == 0) {
317                 clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
318                 bitmap_zero(bitmap, lad->lad_bitmap_count);
319                 RETURN(0);
320         }
321
322         size = (ns->ln_bitmap_size + 7) >> 3;
323         rc = dt_xattr_get(env, obj,
324                           lfsck_buf_get(env, bitmap, size),
325                           XATTR_NAME_LFSCK_BITMAP);
326         if (rc != size)
327                 RETURN(rc >= 0 ? -EINVAL : rc);
328
329         if (bitmap_empty(bitmap, lad->lad_bitmap_count))
330                 clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
331         else
332                 set_bit(LAD_INCOMPLETE, &lad->lad_flags);
333
334         RETURN(0);
335 }
336
337 /**
338  * Load namespace LFSCK statistics information from the trace file.
339  *
340  * \param[in] env       pointer to the thread context
341  * \param[in] com       pointer to the lfsck component
342  *
343  * \retval              0 for success
344  * \retval              negative error number on failure
345  */
346 static int lfsck_namespace_load(const struct lu_env *env,
347                                 struct lfsck_component *com)
348 {
349         int len = com->lc_file_size;
350         int rc;
351
352         rc = dt_xattr_get(env, com->lc_obj,
353                           lfsck_buf_get(env, com->lc_file_disk, len),
354                           XATTR_NAME_LFSCK_NAMESPACE);
355         if (rc == len) {
356                 struct lfsck_namespace *ns = com->lc_file_ram;
357
358                 lfsck_namespace_le_to_cpu(ns,
359                                 (struct lfsck_namespace *)com->lc_file_disk);
360                 if (ns->ln_magic != LFSCK_NAMESPACE_MAGIC) {
361                         CDEBUG(D_LFSCK, "%s: invalid lfsck_namespace magic "
362                                "%#x != %#x\n", lfsck_lfsck2name(com->lc_lfsck),
363                                ns->ln_magic, LFSCK_NAMESPACE_MAGIC);
364                         rc = -ESTALE;
365                 } else {
366                         rc = 0;
367                 }
368         } else if (rc != -ENODATA) {
369                 CDEBUG(D_LFSCK, "%s: fail to load lfsck_namespace, "
370                        "expected = %d: rc = %d\n",
371                        lfsck_lfsck2name(com->lc_lfsck), len, rc);
372                 if (rc >= 0)
373                         rc = -ESTALE;
374         }
375
376         return rc;
377 }
378
379 static int lfsck_namespace_store(const struct lu_env *env,
380                                  struct lfsck_component *com)
381 {
382         struct dt_object *obj = com->lc_obj;
383         struct lfsck_instance *lfsck = com->lc_lfsck;
384         struct lfsck_namespace *ns = com->lc_file_ram;
385         struct lfsck_assistant_data *lad = com->lc_data;
386         struct dt_device *dev = lfsck_obj2dev(obj);
387         unsigned long *bitmap = NULL;
388         struct thandle *handle;
389         __u32 nbits = 0;
390         int len = com->lc_file_size;
391         int rc;
392
393         ENTRY;
394         if (lad != NULL) {
395                 bitmap = lad->lad_bitmap;
396                 nbits = lad->lad_bitmap_count;
397
398                 LASSERT(nbits > 0);
399                 LASSERTF((nbits & 7) == 0, "Invalid nbits %u\n", nbits);
400         }
401
402         ns->ln_bitmap_size = nbits;
403         lfsck_namespace_cpu_to_le((struct lfsck_namespace *)com->lc_file_disk,
404                                   ns);
405         handle = dt_trans_create(env, dev);
406         if (IS_ERR(handle))
407                 GOTO(log, rc = PTR_ERR(handle));
408
409         rc = dt_declare_xattr_set(env, obj,
410                                   lfsck_buf_get(env, com->lc_file_disk, len),
411                                   XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
412         if (rc != 0)
413                 GOTO(out, rc);
414
415         if (bitmap != NULL) {
416                 rc = dt_declare_xattr_set(env, obj,
417                                 lfsck_buf_get(env, bitmap, nbits >> 3),
418                                 XATTR_NAME_LFSCK_BITMAP, 0, handle);
419                 if (rc != 0)
420                         GOTO(out, rc);
421         }
422
423         rc = dt_trans_start_local(env, dev, handle);
424         if (rc != 0)
425                 GOTO(out, rc);
426
427         rc = dt_xattr_set(env, obj,
428                           lfsck_buf_get(env, com->lc_file_disk, len),
429                           XATTR_NAME_LFSCK_NAMESPACE, 0, handle);
430         if (rc == 0 && bitmap != NULL)
431                 rc = dt_xattr_set(env, obj,
432                                   lfsck_buf_get(env, bitmap, nbits >> 3),
433                                   XATTR_NAME_LFSCK_BITMAP, 0, handle);
434
435         GOTO(out, rc);
436
437 out:
438         dt_trans_stop(env, dev, handle);
439
440 log:
441         if (rc != 0)
442                 CDEBUG(D_LFSCK, "%s: fail to store lfsck_namespace: rc = %d\n",
443                        lfsck_lfsck2name(lfsck), rc);
444         return rc;
445 }
446
447 static int lfsck_namespace_init(const struct lu_env *env,
448                                 struct lfsck_component *com)
449 {
450         struct lfsck_namespace *ns = com->lc_file_ram;
451         int rc;
452
453         memset(ns, 0, sizeof(*ns));
454         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
455         ns->ln_status = LS_INIT;
456         ns->ln_time_latest_reset = ktime_get_real_seconds();
457         down_write(&com->lc_sem);
458         rc = lfsck_namespace_store(env, com);
459         if (rc == 0)
460                 rc = lfsck_load_sub_trace_files(env, com,
461                         &dt_lfsck_namespace_features, LFSCK_NAMESPACE, true);
462         up_write(&com->lc_sem);
463
464         return rc;
465 }
466
467 /**
468  * Update the namespace LFSCK trace file for the given @fid
469  *
470  * \param[in] env       pointer to the thread context
471  * \param[in] com       pointer to the lfsck component
472  * \param[in] fid       the fid which flags to be updated in the lfsck
473  *                      trace file
474  * \param[in] add       true if add new flags, otherwise remove flags
475  *
476  * \retval              0 for success or nothing to be done
477  * \retval              negative error number on failure
478  */
479 int lfsck_namespace_trace_update(const struct lu_env *env,
480                                  struct lfsck_component *com,
481                                  const struct lu_fid *fid,
482                                  const __u8 flags, bool add)
483 {
484         struct lfsck_instance   *lfsck  = com->lc_lfsck;
485         struct dt_object        *obj;
486         struct lu_fid           *key    = &lfsck_env_info(env)->lti_fid3;
487         struct dt_device        *dev;
488         struct thandle          *th     = NULL;
489         int                      idx;
490         int                      rc     = 0;
491         __u8                     old    = 0;
492         __u8                     new    = 0;
493         ENTRY;
494
495         LASSERT(flags != 0);
496
497         if (unlikely(!fid_is_sane(fid)))
498                 RETURN(0);
499
500         idx = lfsck_sub_trace_file_fid2idx(fid);
501         mutex_lock(&com->lc_sub_trace_objs[idx].lsto_mutex);
502         obj = com->lc_sub_trace_objs[idx].lsto_obj;
503         if (unlikely(obj == NULL)) {
504                 mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex);
505                 RETURN(0);
506         }
507
508         lfsck_object_get(obj);
509         dev = lfsck_obj2dev(obj);
510         fid_cpu_to_be(key, fid);
511         rc = dt_lookup(env, obj, (struct dt_rec *)&old,
512                        (const struct dt_key *)key);
513         if (rc == -ENOENT) {
514                 if (!add)
515                         GOTO(unlock, rc = 0);
516
517                 old = 0;
518                 new = flags;
519         } else if (rc == 0) {
520                 if (add) {
521                         if ((old & flags) == flags)
522                                 GOTO(unlock, rc = 0);
523
524                         new = old | flags;
525                 } else {
526                         if ((old & flags) == 0)
527                                 GOTO(unlock, rc = 0);
528
529                         new = old & ~flags;
530                 }
531         } else {
532                 GOTO(log, rc);
533         }
534
535         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
536                 GOTO(log, rc = 0);
537
538         th = lfsck_trans_create(env, dev, lfsck);
539         if (IS_ERR(th))
540                 GOTO(log, rc = PTR_ERR(th));
541
542         if (old != 0) {
543                 rc = dt_declare_delete(env, obj,
544                                        (const struct dt_key *)key, th);
545                 if (rc != 0)
546                         GOTO(log, rc);
547         }
548
549         if (new != 0) {
550                 rc = dt_declare_insert(env, obj,
551                                        (const struct dt_rec *)&new,
552                                        (const struct dt_key *)key, th);
553                 if (rc != 0)
554                         GOTO(log, rc);
555         }
556
557         rc = dt_trans_start_local(env, dev, th);
558         if (rc != 0)
559                 GOTO(log, rc);
560
561         if (old != 0) {
562                 rc = dt_delete(env, obj, (const struct dt_key *)key, th);
563                 if (rc != 0)
564                         GOTO(log, rc);
565         }
566
567         if (new != 0) {
568                 rc = dt_insert(env, obj, (const struct dt_rec *)&new,
569                                (const struct dt_key *)key, th);
570                 if (rc != 0)
571                         GOTO(log, rc);
572         }
573
574         GOTO(log, rc);
575
576 log:
577         if (th != NULL && !IS_ERR(th))
578                 dt_trans_stop(env, dev, th);
579
580         CDEBUG(D_LFSCK, "%s: namespace LFSCK %s flags for "DFID" in the "
581                "trace file, flags %x, old %x, new %x: rc = %d\n",
582                lfsck_lfsck2name(lfsck), add ? "add" : "del", PFID(fid),
583                (__u32)flags, (__u32)old, (__u32)new, rc);
584
585 unlock:
586         mutex_unlock(&com->lc_sub_trace_objs[idx].lsto_mutex);
587         lfsck_object_put(env, obj);
588
589         return rc;
590 }
591
592 int lfsck_namespace_check_exist(const struct lu_env *env,
593                                 struct dt_object *dir,
594                                 struct dt_object *obj, const char *name)
595 {
596         struct lu_fid    *fid = &lfsck_env_info(env)->lti_fid;
597         int               rc;
598         ENTRY;
599
600         if (unlikely(lfsck_is_dead_obj(obj)))
601                 RETURN(LFSCK_NAMEENTRY_DEAD);
602
603         rc = dt_lookup_dir(env, dir, name, fid);
604         if (rc == -ENOENT)
605                 RETURN(LFSCK_NAMEENTRY_REMOVED);
606
607         if (rc < 0)
608                 RETURN(rc);
609
610         if (!lu_fid_eq(fid, lfsck_dto2fid(obj)))
611                 RETURN(LFSCK_NAMEENTRY_RECREATED);
612
613         RETURN(0);
614 }
615
616 static int lfsck_declare_namespace_exec_dir(const struct lu_env *env,
617                                             struct dt_object *obj,
618                                             struct thandle *handle)
619 {
620         int rc;
621
622         /* For remote updating LINKEA, there may be further LFSCK action
623          * on remote MDT after the updating, so update the LINKEA ASAP. */
624         if (dt_object_remote(obj))
625                 handle->th_sync = 1;
626
627         /* For destroying all invalid linkEA entries. */
628         rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LINK, handle);
629         if (rc == 0)
630                 /* For insert new linkEA entry. */
631                 rc = dt_declare_xattr_set(env, obj,
632                         lfsck_buf_get_const(env, NULL, MAX_LINKEA_SIZE),
633                         XATTR_NAME_LINK, 0, handle);
634         return rc;
635 }
636
637 int __lfsck_links_read(const struct lu_env *env, struct dt_object *obj,
638                        struct linkea_data *ldata, bool with_rec)
639 {
640         int rc;
641
642         if (ldata->ld_buf->lb_buf == NULL)
643                 return -ENOMEM;
644
645         if (!dt_object_exists(obj))
646                 return -ENOENT;
647
648         rc = dt_xattr_get(env, obj, ldata->ld_buf, XATTR_NAME_LINK);
649         if (rc == -ERANGE) {
650                 /* Buf was too small, figure out what we need. */
651                 rc = dt_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LINK);
652                 if (unlikely(rc == 0))
653                         return -ENODATA;
654
655                 if (rc < 0)
656                         return rc;
657
658                 lu_buf_realloc(ldata->ld_buf, rc);
659                 if (ldata->ld_buf->lb_buf == NULL)
660                         return -ENOMEM;
661
662                 rc = dt_xattr_get(env, obj, ldata->ld_buf, XATTR_NAME_LINK);
663         }
664
665         if (unlikely(rc == 0))
666                 return -ENODATA;
667
668         if (rc > 0) {
669                 if (with_rec)
670                         rc = linkea_init_with_rec(ldata);
671                 else
672                         rc = linkea_init(ldata);
673         }
674
675         return rc;
676 }
677
678 /**
679  * Remove linkEA for the given object.
680  *
681  * The caller should take the ldlm lock before the calling.
682  *
683  * \param[in] env       pointer to the thread context
684  * \param[in] com       pointer to the lfsck component
685  * \param[in] obj       pointer to the dt_object to be handled
686  *
687  * \retval              0 for repaired cases
688  * \retval              negative error number on failure
689  */
690 static int lfsck_namespace_links_remove(const struct lu_env *env,
691                                         struct lfsck_component *com,
692                                         struct dt_object *obj)
693 {
694         struct lfsck_instance           *lfsck  = com->lc_lfsck;
695         struct dt_device                *dev    = lfsck_obj2dev(obj);
696         struct thandle                  *th     = NULL;
697         int                              rc     = 0;
698         ENTRY;
699
700         LASSERT(dt_object_remote(obj) == 0);
701
702         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
703                 GOTO(unlock, rc = 0);
704
705         th = lfsck_trans_create(env, dev, lfsck);
706         if (IS_ERR(th))
707                 GOTO(log, rc = PTR_ERR(th));
708
709         rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LINK, th);
710         if (rc != 0)
711                 GOTO(stop, rc);
712
713         rc = dt_trans_start_local(env, dev, th);
714         if (rc != 0)
715                 GOTO(stop, rc);
716
717         dt_write_lock(env, obj, 0);
718         if (unlikely(lfsck_is_dead_obj(obj)))
719                 GOTO(unlock, rc = -ENOENT);
720
721         rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, th);
722
723         GOTO(unlock, rc);
724
725 unlock:
726         dt_write_unlock(env, obj);
727
728 stop:
729         dt_trans_stop(env, dev, th);
730
731 log:
732         CDEBUG(D_LFSCK, "%s: namespace LFSCK remove invalid linkEA "
733                "for the object "DFID": rc = %d\n",
734                lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)), rc);
735
736         if (rc == 0) {
737                 struct lfsck_namespace *ns = com->lc_file_ram;
738
739                 ns->ln_flags |= LF_INCONSISTENT;
740         }
741
742         return rc;
743 }
744
745 static int lfsck_links_write(const struct lu_env *env, struct dt_object *obj,
746                              struct linkea_data *ldata, struct thandle *handle)
747 {
748         struct lu_buf buf;
749         int rc;
750
751         lfsck_buf_init(&buf, ldata->ld_buf->lb_buf, ldata->ld_leh->leh_len);
752
753 again:
754         rc = dt_xattr_set(env, obj, &buf, XATTR_NAME_LINK, 0, handle);
755         if (unlikely(rc == -ENOSPC)) {
756                 rc = linkea_overflow_shrink(ldata);
757                 if (likely(rc > 0)) {
758                         buf.lb_len = rc;
759                         goto again;
760                 }
761         }
762
763         return rc;
764 }
765
766 static inline bool linkea_reclen_is_valid(const struct linkea_data *ldata)
767 {
768         if (ldata->ld_reclen <= 0)
769                 return false;
770
771         if ((char *)ldata->ld_lee + ldata->ld_reclen >
772             (char *)ldata->ld_leh + ldata->ld_leh->leh_len)
773                 return false;
774
775         return true;
776 }
777
778 static inline bool linkea_entry_is_valid(const struct linkea_data *ldata,
779                                          const struct lu_name *cname,
780                                          const struct lu_fid *pfid)
781 {
782         if (!linkea_reclen_is_valid(ldata))
783                 return false;
784
785         if (cname->ln_namelen <= 0 || cname->ln_namelen > NAME_MAX)
786                 return false;
787
788         if (!fid_is_sane(pfid))
789                 return false;
790
791         return true;
792 }
793
794 static int lfsck_namespace_unpack_linkea_entry(struct linkea_data *ldata,
795                                                struct lu_name *cname,
796                                                struct lu_fid *pfid,
797                                                char *buf, const int buflen)
798 {
799         linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, cname, pfid);
800         if (unlikely(!linkea_entry_is_valid(ldata, cname, pfid)))
801                 return -EINVAL;
802
803         /* To guarantee the 'name' is terminated with '0'. */
804         memcpy(buf, cname->ln_name, cname->ln_namelen);
805         buf[cname->ln_namelen] = 0;
806         cname->ln_name = buf;
807
808         return 0;
809 }
810
811 static void lfsck_linkea_del_buf(struct linkea_data *ldata,
812                                  const struct lu_name *lname)
813 {
814         LASSERT(ldata->ld_leh != NULL && ldata->ld_lee != NULL);
815
816         /* If current record is corrupted, all the subsequent
817          * records will be dropped. */
818         if (unlikely(!linkea_reclen_is_valid(ldata))) {
819                 void *ptr = ldata->ld_lee;
820
821                 ldata->ld_leh->leh_len = sizeof(struct link_ea_header);
822                 ldata->ld_leh->leh_reccount = 0;
823                 linkea_first_entry(ldata);
824                 while (ldata->ld_lee != NULL &&
825                        (char *)ldata->ld_lee < (char *)ptr) {
826                         int reclen = (ldata->ld_lee->lee_reclen[0] << 8) |
827                                      ldata->ld_lee->lee_reclen[1];
828
829                         ldata->ld_leh->leh_len += reclen;
830                         ldata->ld_leh->leh_reccount++;
831                         ldata->ld_lee = (struct link_ea_entry *)
832                                         ((char *)ldata->ld_lee + reclen);
833                 }
834
835                 ldata->ld_lee = NULL;
836         } else {
837                 linkea_del_buf(ldata, lname, false);
838         }
839 }
840
841 static int lfsck_namespace_filter_linkea_entry(struct linkea_data *ldata,
842                                                struct lu_name *cname,
843                                                struct lu_fid *pfid,
844                                                bool remove)
845 {
846         struct link_ea_entry    *oldlee;
847         int                      oldlen;
848         int                      repeated = 0;
849
850         oldlee = ldata->ld_lee;
851         oldlen = ldata->ld_reclen;
852         linkea_next_entry(ldata);
853         while (ldata->ld_lee != NULL) {
854                 ldata->ld_reclen = (ldata->ld_lee->lee_reclen[0] << 8) |
855                                    ldata->ld_lee->lee_reclen[1];
856                 if (unlikely(!linkea_reclen_is_valid(ldata))) {
857                         lfsck_linkea_del_buf(ldata, NULL);
858                         LASSERT(ldata->ld_lee == NULL);
859                 } else if (unlikely(ldata->ld_reclen == oldlen &&
860                              memcmp(ldata->ld_lee, oldlee, oldlen) == 0)) {
861                         repeated++;
862                         if (!remove)
863                                 break;
864
865                         lfsck_linkea_del_buf(ldata, cname);
866                 } else {
867                         linkea_next_entry(ldata);
868                 }
869         }
870         ldata->ld_lee = oldlee;
871         ldata->ld_reclen = oldlen;
872
873         return repeated;
874 }
875
876 /**
877  * Insert orphan into .lustre/lost+found/MDTxxxx/ locally.
878  *
879  * Add the specified orphan MDT-object to the .lustre/lost+found/MDTxxxx/
880  * with the given type to generate the name, the detailed rules for name
881  * have been described as following.
882  *
883  * The function also generates the linkEA corresponding to the name entry
884  * under the .lustre/lost+found/MDTxxxx/ for the orphan MDT-object.
885  *
886  * \param[in] env       pointer to the thread context
887  * \param[in] com       pointer to the lfsck component
888  * \param[in] orphan    pointer to the orphan MDT-object
889  * \param[in] infix     additional information for the orphan name, such as
890  *                      the FID for original
891  * \param[in] type      the type for describing why the orphan MDT-object is
892  *                      created. The rules are as following:
893  *
894  *  type "D":           The MDT-object is a directory, it may knows its parent
895  *                      but because there is no valid linkEA, the LFSCK cannot
896  *                      know where to put it back to the namespace.
897  *  type "O":           The MDT-object has no linkEA, and there is no name
898  *                      entry that references the MDT-object.
899  *
900  *  type "S":           The orphan MDT-object is a shard of a striped directory
901  *
902  * \see lfsck_layout_recreate_parent() for more types.
903  *
904  * The orphan name will be like:
905  * ${FID}-${infix}-${type}-${conflict_version}
906  *
907  * \param[out] count    if some others inserted some linkEA entries by race,
908  *                      then return the linkEA entries count.
909  *
910  * \retval              positive number for repaired cases
911  * \retval              0 if needs to repair nothing
912  * \retval              negative error number on failure
913  */
914 static int lfsck_namespace_insert_orphan(const struct lu_env *env,
915                                          struct lfsck_component *com,
916                                          struct dt_object *orphan,
917                                          const char *infix, const char *type,
918                                          int *count)
919 {
920         struct lfsck_thread_info        *info   = lfsck_env_info(env);
921         struct lu_name                  *cname  = &info->lti_name;
922         struct dt_insert_rec            *rec    = &info->lti_dt_rec;
923         struct lu_attr                  *la     = &info->lti_la2;
924         const struct lu_fid             *cfid   = lfsck_dto2fid(orphan);
925         const struct lu_fid             *pfid;
926         struct lu_fid                    tfid;
927         struct lfsck_instance           *lfsck  = com->lc_lfsck;
928         struct dt_device                *dev    = lfsck_obj2dev(orphan);
929         struct dt_object                *parent;
930         struct thandle                  *th     = NULL;
931         struct lfsck_lock_handle        *pllh   = &info->lti_llh;
932         struct lustre_handle             clh    = { 0 };
933         struct linkea_data               ldata2 = { NULL };
934         struct lu_buf                    linkea_buf;
935         int                              namelen;
936         int                              idx    = 0;
937         int                              rc     = 0;
938         bool                             exist  = false;
939         ENTRY;
940
941         cname->ln_name = NULL;
942         if (unlikely(lfsck->li_lpf_obj == NULL))
943                 GOTO(log, rc = -ENXIO);
944
945         parent = lfsck->li_lpf_obj;
946         pfid = lfsck_dto2fid(parent);
947
948 again:
949         do {
950                 namelen = snprintf(info->lti_key, NAME_MAX, DFID"%s-%s-%d",
951                                    PFID(cfid), infix, type, idx++);
952                 rc = dt_lookup_dir(env, parent, info->lti_key, &tfid);
953                 if (rc != 0 && rc != -ENOENT)
954                         GOTO(log, rc);
955
956                 if (unlikely(rc == 0 && lu_fid_eq(cfid, &tfid)))
957                         exist = true;
958         } while (rc == 0 && !exist);
959
960         rc = lfsck_lock(env, lfsck, parent, info->lti_key, pllh,
961                         MDS_INODELOCK_UPDATE, LCK_PW);
962         if (rc != 0)
963                 GOTO(log, rc);
964
965         /* Re-check whether the name conflict with othrs after taken
966          * the ldlm lock. */
967         rc = dt_lookup_dir(env, parent, info->lti_key, &tfid);
968         if (rc == 0) {
969                 if (!lu_fid_eq(cfid, &tfid)) {
970                         exist = false;
971                         lfsck_unlock(pllh);
972                         goto again;
973                 }
974
975                 exist = true;
976         } else if (rc != -ENOENT) {
977                 GOTO(log, rc);
978         } else {
979                 exist = false;
980         }
981
982         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
983                 GOTO(log, rc = 0);
984
985         cname->ln_name = info->lti_key;
986         cname->ln_namelen = namelen;
987         rc = linkea_links_new(&ldata2, &info->lti_linkea_buf2,
988                               cname, pfid);
989         if (rc != 0)
990                 GOTO(log, rc);
991
992         rc = lfsck_ibits_lock(env, lfsck, orphan, &clh,
993                               MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP |
994                               MDS_INODELOCK_XATTR, LCK_EX);
995         if (rc != 0)
996                 GOTO(log, rc);
997
998         lfsck_buf_init(&linkea_buf, ldata2.ld_buf->lb_buf,
999                        ldata2.ld_leh->leh_len);
1000         th = lfsck_trans_create(env, dev, lfsck);
1001         if (IS_ERR(th))
1002                 GOTO(log, rc = PTR_ERR(th));
1003
1004         if (S_ISDIR(lfsck_object_type(orphan))) {
1005                 rc = dt_declare_delete(env, orphan,
1006                                        (const struct dt_key *)dotdot, th);
1007                 if (rc != 0)
1008                         GOTO(stop, rc);
1009
1010                 rec->rec_type = S_IFDIR;
1011                 rec->rec_fid = pfid;
1012                 rc = dt_declare_insert(env, orphan, (const struct dt_rec *)rec,
1013                                        (const struct dt_key *)dotdot, th);
1014                 if (rc != 0)
1015                         GOTO(stop, rc);
1016         }
1017
1018         rc = dt_declare_xattr_set(env, orphan, &linkea_buf,
1019                                   XATTR_NAME_LINK, 0, th);
1020         if (rc != 0)
1021                 GOTO(stop, rc);
1022
1023         if (!exist) {
1024                 rec->rec_type = lfsck_object_type(orphan) & S_IFMT;
1025                 rec->rec_fid = cfid;
1026                 rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
1027                                        (const struct dt_key *)cname->ln_name,
1028                                        th);
1029                 if (rc != 0)
1030                         GOTO(stop, rc);
1031
1032                 if (S_ISDIR(rec->rec_type)) {
1033                         rc = dt_declare_ref_add(env, parent, th);
1034                         if (rc != 0)
1035                                 GOTO(stop, rc);
1036                 }
1037         }
1038
1039         memset(la, 0, sizeof(*la));
1040         la->la_ctime = ktime_get_real_seconds();
1041         la->la_valid = LA_CTIME;
1042         rc = dt_declare_attr_set(env, orphan, la, th);
1043         if (rc != 0)
1044                 GOTO(stop, rc);
1045
1046         rc = dt_trans_start_local(env, dev, th);
1047         if (rc != 0)
1048                 GOTO(stop, rc);
1049
1050         dt_write_lock(env, orphan, 0);
1051         rc = lfsck_links_read2_with_rec(env, orphan, &ldata2);
1052         if (likely(rc == -ENODATA || rc == -EINVAL)) {
1053                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
1054                         GOTO(unlock, rc = 1);
1055
1056                 if (S_ISDIR(lfsck_object_type(orphan))) {
1057                         rc = dt_delete(env, orphan,
1058                                        (const struct dt_key *)dotdot, th);
1059                         if (rc != 0)
1060                                 GOTO(unlock, rc);
1061
1062                         rec->rec_type = S_IFDIR;
1063                         rec->rec_fid = pfid;
1064                         rc = dt_insert(env, orphan, (const struct dt_rec *)rec,
1065                                        (const struct dt_key *)dotdot, th);
1066                         if (rc != 0)
1067                                 GOTO(unlock, rc);
1068                 }
1069
1070                 rc = dt_xattr_set(env, orphan, &linkea_buf, XATTR_NAME_LINK, 0,
1071                                   th);
1072         } else {
1073                 if (rc == 0 && count != NULL)
1074                         *count = ldata2.ld_leh->leh_reccount;
1075
1076                 GOTO(unlock, rc);
1077         }
1078         dt_write_unlock(env, orphan);
1079
1080         if (rc == 0 && !exist) {
1081                 rec->rec_type = lfsck_object_type(orphan) & S_IFMT;
1082                 rec->rec_fid = cfid;
1083                 rc = dt_insert(env, parent, (const struct dt_rec *)rec,
1084                                (const struct dt_key *)cname->ln_name, th);
1085                 if (rc == 0 && S_ISDIR(rec->rec_type)) {
1086                         dt_write_lock(env, parent, 0);
1087                         rc = dt_ref_add(env, parent, th);
1088                         dt_write_unlock(env, parent);
1089                 }
1090         }
1091
1092         if (rc == 0)
1093                 rc = dt_attr_set(env, orphan, la, th);
1094
1095         GOTO(stop, rc = (rc == 0 ? 1 : rc));
1096
1097 unlock:
1098         dt_write_unlock(env, orphan);
1099
1100 stop:
1101         dt_trans_stop(env, dev, th);
1102
1103 log:
1104         lfsck_ibits_unlock(&clh, LCK_EX);
1105         lfsck_unlock(pllh);
1106         CDEBUG(D_LFSCK, "%s: namespace LFSCK insert orphan for the "
1107                "object "DFID", name = %s: rc = %d\n",
1108                lfsck_lfsck2name(lfsck), PFID(cfid),
1109                cname->ln_name != NULL ? cname->ln_name : "<NULL>", rc);
1110
1111         if (rc != 0) {
1112                 struct lfsck_namespace *ns = com->lc_file_ram;
1113
1114                 ns->ln_flags |= LF_INCONSISTENT;
1115         }
1116
1117         return rc;
1118 }
1119
1120 static int lfsck_lmv_set(const struct lu_env *env,
1121                          struct lfsck_instance *lfsck,
1122                          struct dt_object *obj,
1123                          struct lmv_mds_md_v1 *lmv)
1124 {
1125         struct dt_device *dev = lfsck->li_next;
1126         struct thandle *th = NULL;
1127         struct lu_buf buf = { lmv, sizeof(*lmv) };
1128         int rc;
1129
1130         ENTRY;
1131
1132         th = lfsck_trans_create(env, dev, lfsck);
1133         if (IS_ERR(th))
1134                 RETURN(PTR_ERR(th));
1135
1136         rc = dt_declare_xattr_set(env, obj, &buf, XATTR_NAME_LMV, 0, th);
1137         if (rc)
1138                 GOTO(stop, rc);
1139
1140         rc = dt_trans_start_local(env, dev, th);
1141         if (rc != 0)
1142                 GOTO(stop, rc);
1143
1144         rc = dt_xattr_set(env, obj, &buf, XATTR_NAME_LMV, 0, th);
1145         if (rc)
1146                 GOTO(stop, rc);
1147
1148         EXIT;
1149 stop:
1150         dt_trans_stop(env, dev, th);
1151
1152         return rc;
1153 }
1154
1155 static int lfsck_lmv_delete(const struct lu_env *env,
1156                             struct lfsck_instance *lfsck,
1157                             struct dt_object *obj)
1158 {
1159         struct dt_device *dev = lfsck->li_next;
1160         struct thandle *th = NULL;
1161         int rc;
1162
1163         ENTRY;
1164
1165         th = lfsck_trans_create(env, dev, lfsck);
1166         if (IS_ERR(th))
1167                 RETURN(PTR_ERR(th));
1168
1169         rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LMV, th);
1170         if (rc)
1171                 GOTO(stop, rc);
1172
1173         rc = dt_trans_start_local(env, dev, th);
1174         if (rc != 0)
1175                 GOTO(stop, rc);
1176
1177         rc = dt_xattr_del(env, obj, XATTR_NAME_LMV, th);
1178         if (rc)
1179                 GOTO(stop, rc);
1180
1181         EXIT;
1182 stop:
1183         dt_trans_stop(env, dev, th);
1184
1185         return rc;
1186 }
1187
1188 static inline int lfsck_object_is_shard(const struct lu_env *env,
1189                                         struct lfsck_instance *lfsck,
1190                                         struct dt_object *obj,
1191                                         const struct lu_name *lname)
1192 {
1193         struct lfsck_thread_info *info = lfsck_env_info(env);
1194         struct lmv_mds_md_v1 *lmv = &info->lti_lmv;
1195         int rc;
1196
1197         rc = lfsck_shard_name_to_index(env, lname->ln_name, lname->ln_namelen,
1198                                        lfsck_object_type(obj),
1199                                        lfsck_dto2fid(obj));
1200         if (rc < 0)
1201                 return 0;
1202
1203         rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv);
1204         if (rc == -ENODATA)
1205                 return 0;
1206
1207         if (!rc && lmv->lmv_magic == LMV_MAGIC_STRIPE)
1208                 return 1;
1209
1210         return rc;
1211 }
1212
1213 /**
1214  * Add the specified name entry back to namespace.
1215  *
1216  * If there is a linkEA entry that back references a name entry under
1217  * some parent directory, but such parent directory does not have the
1218  * claimed name entry. On the other hand, the linkEA entries count is
1219  * not larger than the MDT-object's hard link count. Under such case,
1220  * it is quite possible that the name entry is lost. Then the LFSCK
1221  * should add the name entry back to the namespace.
1222  *
1223  * If \a child is shard, which means \a parent is a striped directory,
1224  * if \a parent has LMV, we need to delete it before insertion because
1225  * now parent's striping is broken and can't be parsed correctly.
1226  *
1227  * \param[in] env       pointer to the thread context
1228  * \param[in] com       pointer to the lfsck component
1229  * \param[in] parent    pointer to the directory under which the name entry
1230  *                      will be inserted into
1231  * \param[in] child     pointer to the object referenced by the name entry
1232  *                      that to be inserted into the parent
1233  * \param[in] lname     the name for the child in the parent directory
1234  *
1235  * \retval              positive number for repaired cases
1236  * \retval              0 if nothing to be repaired
1237  * \retval              negative error number on failure
1238  */
1239 static int lfsck_namespace_insert_normal(const struct lu_env *env,
1240                                          struct lfsck_component *com,
1241                                          struct dt_object *parent,
1242                                          struct dt_object *child,
1243                                          const struct lu_name *lname)
1244 {
1245         struct lfsck_thread_info *info = lfsck_env_info(env);
1246         struct lu_attr *la = &info->lti_la;
1247         struct dt_insert_rec *rec = &info->lti_dt_rec;
1248         struct lfsck_instance *lfsck = com->lc_lfsck;
1249         /* The child and its name may be on different MDTs. */
1250         const struct lu_fid *pfid = lfsck_dto2fid(parent);
1251         const struct lu_fid *cfid = lfsck_dto2fid(child);
1252         struct dt_device *dev = lfsck->li_next;
1253         struct thandle *th = NULL;
1254         struct lfsck_lock_handle *llh = &info->lti_llh;
1255         struct lmv_mds_md_v1 *lmv = &info->lti_lmv;
1256         struct lu_buf buf = { lmv, sizeof(*lmv) };
1257         /* whether parent's LMV is deleted before insertion */
1258         bool parent_lmv_deleted = false;
1259         /* whether parent's LMV is missing */
1260         bool parent_lmv_lost = false;
1261         int rc = 0;
1262
1263         ENTRY;
1264
1265         /* @parent/@child may be based on lfsck->li_bottom,
1266          * but here we need the object based on the lfsck->li_next. */
1267
1268         parent = lfsck_object_locate(dev, parent);
1269         if (IS_ERR(parent))
1270                 GOTO(log, rc = PTR_ERR(parent));
1271
1272         child = lfsck_object_locate(dev, child);
1273         if (IS_ERR(child))
1274                 GOTO(log, rc = PTR_ERR(child));
1275
1276         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
1277                 GOTO(log, rc = 1);
1278
1279         rc = lfsck_lock(env, lfsck, parent, lname->ln_name, llh,
1280                         MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
1281                         MDS_INODELOCK_XATTR, LCK_EX);
1282         if (rc)
1283                 GOTO(log, rc);
1284
1285         rc = lfsck_object_is_shard(env, lfsck, child, lname);
1286         if (rc < 0)
1287                 GOTO(unlock, rc);
1288
1289         if (rc == 1) {
1290                 rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv);
1291                 if (!rc) {
1292                         /*
1293                          * To add a shard, we need to convert parent to a
1294                          * plain directory by deleting its LMV, and after
1295                          * insertion set it back.
1296                          */
1297                         rc = lfsck_lmv_delete(env, lfsck, parent);
1298                         if (rc)
1299                                 GOTO(unlock, rc);
1300                         parent_lmv_deleted = true;
1301                         lmv->lmv_layout_version++;
1302                         lfsck_lmv_header_cpu_to_le(lmv, lmv);
1303                 } else if (rc == -ENODATA) {
1304                         struct lu_seq_range *range = &info->lti_range;
1305                         struct seq_server_site *ss = lfsck_dev_site(lfsck);
1306
1307                         rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
1308                         if (rc)
1309                                 GOTO(unlock, rc);
1310
1311                         fld_range_set_mdt(range);
1312                         rc = fld_server_lookup(env, ss->ss_server_fld,
1313                                        fid_seq(lfsck_dto2fid(parent)), range);
1314                         if (rc)
1315                                 GOTO(unlock, rc);
1316
1317                         parent_lmv_lost = true;
1318                         lmv->lmv_magic = LMV_MAGIC;
1319                         lmv->lmv_master_mdt_index = range->lsr_index;
1320                         lmv->lmv_layout_version++;
1321                         lfsck_lmv_header_cpu_to_le(lmv, lmv);
1322                 } else {
1323                         GOTO(unlock, rc);
1324                 }
1325         }
1326
1327         if (unlikely(!dt_try_as_dir(env, parent, true)))
1328                 GOTO(unlock, rc = -ENOTDIR);
1329
1330         th = lfsck_trans_create(env, dev, lfsck);
1331         if (IS_ERR(th))
1332                 GOTO(unlock, rc = PTR_ERR(th));
1333
1334         rec->rec_type = lfsck_object_type(child) & S_IFMT;
1335         rec->rec_fid = cfid;
1336         rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
1337                                (const struct dt_key *)lname->ln_name, th);
1338         if (rc != 0)
1339                 GOTO(stop, rc);
1340
1341         if (S_ISDIR(rec->rec_type)) {
1342                 rc = dt_declare_ref_add(env, parent, th);
1343                 if (rc != 0)
1344                         GOTO(stop, rc);
1345         }
1346
1347         if (parent_lmv_lost) {
1348                 rc = dt_declare_xattr_set(env, parent, &buf, XATTR_NAME_LMV,
1349                                           0, th);
1350                 if (rc)
1351                         GOTO(stop, rc);
1352         }
1353
1354         la->la_ctime = ktime_get_real_seconds();
1355         la->la_valid = LA_CTIME;
1356         rc = dt_declare_attr_set(env, parent, la, th);
1357         if (rc != 0)
1358                 GOTO(stop, rc);
1359
1360         rc = dt_declare_attr_set(env, child, la, th);
1361         if (rc != 0)
1362                 GOTO(stop, rc);
1363
1364         rc = dt_trans_start_local(env, dev, th);
1365         if (rc != 0)
1366                 GOTO(stop, rc);
1367
1368         rc = dt_insert(env, parent, (const struct dt_rec *)rec,
1369                        (const struct dt_key *)lname->ln_name, th);
1370         if (rc != 0)
1371                 GOTO(stop, rc);
1372
1373         if (S_ISDIR(rec->rec_type)) {
1374                 dt_write_lock(env, parent, 0);
1375                 rc = dt_ref_add(env, parent, th);
1376                 dt_write_unlock(env, parent);
1377                 if (rc != 0)
1378                         GOTO(stop, rc);
1379         }
1380
1381         if (parent_lmv_lost) {
1382                 rc = dt_xattr_set(env, parent, &buf, XATTR_NAME_LMV, 0, th);
1383                 if (rc)
1384                         GOTO(stop, rc);
1385         }
1386
1387         rc = dt_attr_set(env, parent, la, th);
1388         if (rc != 0)
1389                 GOTO(stop, rc);
1390
1391         rc = dt_attr_set(env, child, la, th);
1392
1393         GOTO(stop, rc = (rc == 0 ? 1 : rc));
1394
1395 stop:
1396         dt_trans_stop(env, dev, th);
1397
1398 unlock:
1399         if (parent_lmv_deleted)
1400                 lfsck_lmv_set(env, lfsck, parent, lmv);
1401
1402         lfsck_unlock(llh);
1403
1404 log:
1405         CDEBUG(D_LFSCK, "%s: namespace LFSCK insert object "DFID" with "
1406                "the name %s and type %o to the parent "DFID": rc = %d\n",
1407                lfsck_lfsck2name(lfsck), PFID(cfid), lname->ln_name,
1408                lfsck_object_type(child) & S_IFMT, PFID(pfid), rc);
1409
1410         if (rc != 0) {
1411                 struct lfsck_namespace *ns = com->lc_file_ram;
1412
1413                 ns->ln_flags |= LF_INCONSISTENT;
1414                 if (rc > 0)
1415                         ns->ln_lost_dirent_repaired++;
1416         }
1417
1418         return rc;
1419 }
1420
1421 /**
1422  * Create the specified orphan directory.
1423  *
1424  * For the case that the parent MDT-object stored in some MDT-object's
1425  * linkEA entry is lost, the LFSCK will re-create the parent object as
1426  * an orphan and insert it into .lustre/lost+found/MDTxxxx/ directory
1427  * with the name ${FID}-P-${conflict_version}.
1428  *
1429  * \param[in] env       pointer to the thread context
1430  * \param[in] com       pointer to the lfsck component
1431  * \param[in] orphan    pointer to the orphan MDT-object to be created
1432  * \param[in] lmv       pointer to master LMV EA that will be set to the orphan
1433  *
1434  * \retval              positive number for repaired cases
1435  * \retval              negative error number on failure
1436  */
1437 static int lfsck_namespace_create_orphan_dir(const struct lu_env *env,
1438                                              struct lfsck_component *com,
1439                                              struct dt_object *orphan,
1440                                              struct lmv_mds_md_v1 *lmv)
1441 {
1442         struct lfsck_thread_info        *info   = lfsck_env_info(env);
1443         struct lu_attr                  *la     = &info->lti_la;
1444         struct dt_allocation_hint       *hint   = &info->lti_hint;
1445         struct dt_object_format         *dof    = &info->lti_dof;
1446         struct lu_name                  *cname  = &info->lti_name2;
1447         struct dt_insert_rec            *rec    = &info->lti_dt_rec;
1448         struct lmv_mds_md_v1            *lmv2   = &info->lti_lmv2;
1449         const struct lu_fid             *cfid   = lfsck_dto2fid(orphan);
1450         struct lu_fid                    tfid;
1451         struct lfsck_instance           *lfsck  = com->lc_lfsck;
1452         struct lfsck_namespace          *ns     = com->lc_file_ram;
1453         struct dt_device                *dev    = lfsck_obj2dev(orphan);
1454         struct dt_object                *parent = NULL;
1455         struct thandle                  *th     = NULL;
1456         struct lfsck_lock_handle        *llh    = &info->lti_llh;
1457         struct linkea_data               ldata  = { NULL };
1458         struct lu_buf                    linkea_buf;
1459         struct lu_buf                    lmv_buf;
1460         char                             name[32];
1461         int                              namelen;
1462         int                              idx    = 0;
1463         int                              rc     = 0;
1464         int                              rc1    = 0;
1465         ENTRY;
1466
1467         LASSERT(!dt_object_exists(orphan));
1468
1469         cname->ln_name = NULL;
1470         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
1471                 GOTO(log, rc = 1);
1472
1473         if (dt_object_remote(orphan)) {
1474                 if (lfsck->li_lpf_root_obj == NULL)
1475                         GOTO(log, rc = -EBADF);
1476
1477                 idx = lfsck_find_mdt_idx_by_fid(env, lfsck, cfid);
1478                 if (idx < 0)
1479                         GOTO(log, rc = idx);
1480
1481                 snprintf(name, 8, "MDT%04x", idx);
1482                 rc = dt_lookup_dir(env, lfsck->li_lpf_root_obj, name, &tfid);
1483                 if (rc != 0)
1484                         GOTO(log, rc = (rc == -ENOENT ? -ENXIO : rc));
1485
1486                 parent = lfsck_object_find_bottom(env, lfsck, &tfid);
1487                 if (IS_ERR(parent))
1488                         GOTO(log, rc = PTR_ERR(parent));
1489
1490                 if (unlikely(!dt_try_as_dir(env, parent, true)))
1491                         GOTO(log, rc = -ENOTDIR);
1492         } else {
1493                 if (unlikely(lfsck->li_lpf_obj == NULL))
1494                         GOTO(log, rc = -ENXIO);
1495
1496                 parent = lfsck->li_lpf_obj;
1497         }
1498
1499         dev = lfsck_find_dev_by_fid(env, lfsck, cfid);
1500         if (IS_ERR(dev))
1501                 GOTO(log, rc = PTR_ERR(dev));
1502
1503         idx = 0;
1504
1505 again:
1506         do {
1507                 namelen = snprintf(name, 31, DFID"-P-%d",
1508                                    PFID(cfid), idx++);
1509                 rc = dt_lookup_dir(env, parent, name, &tfid);
1510                 if (rc != 0 && rc != -ENOENT)
1511                         GOTO(log, rc);
1512         } while (rc == 0);
1513
1514         rc = lfsck_lock(env, lfsck, parent, name, llh,
1515                         MDS_INODELOCK_UPDATE, LCK_PW);
1516         if (rc != 0)
1517                 GOTO(log, rc);
1518
1519         /* Re-check whether the name conflict with othrs after taken
1520          * the ldlm lock. */
1521         rc = dt_lookup_dir(env, parent, name, &tfid);
1522         if (unlikely(rc == 0)) {
1523                 lfsck_unlock(llh);
1524                 goto again;
1525         }
1526
1527         if (rc != -ENOENT)
1528                 GOTO(unlock1, rc);
1529
1530         cname->ln_name = name;
1531         cname->ln_namelen = namelen;
1532
1533         memset(la, 0, sizeof(*la));
1534         la->la_mode = S_IFDIR | 0700;
1535         la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
1536                        LA_ATIME | LA_MTIME | LA_CTIME;
1537
1538         orphan->do_ops->do_ah_init(env, hint, parent, orphan,
1539                                    la->la_mode & S_IFMT);
1540
1541         memset(dof, 0, sizeof(*dof));
1542         dof->dof_type = dt_mode_to_dft(S_IFDIR);
1543
1544         rc = linkea_links_new(&ldata, &info->lti_linkea_buf2,
1545                               cname, lfsck_dto2fid(parent));
1546         if (rc != 0)
1547                 GOTO(unlock1, rc);
1548
1549         th = lfsck_trans_create(env, dev, lfsck);
1550         if (IS_ERR(th))
1551                 GOTO(unlock1, rc = PTR_ERR(th));
1552
1553         /* Sync the remote transaction to guarantee that the subsequent
1554          * lock against the @orphan can find the @orphan in time. */
1555         if (dt_object_remote(orphan))
1556                 th->th_sync = 1;
1557
1558         rc = dt_declare_create(env, orphan, la, hint, dof, th);
1559         if (rc != 0)
1560                 GOTO(stop, rc);
1561
1562         if (unlikely(!dt_try_as_dir(env, orphan, false)))
1563                 GOTO(stop, rc = -ENOTDIR);
1564
1565         rc = dt_declare_ref_add(env, orphan, th);
1566         if (rc != 0)
1567                 GOTO(stop, rc);
1568
1569         rec->rec_type = S_IFDIR;
1570         rec->rec_fid = cfid;
1571         rc = dt_declare_insert(env, orphan, (const struct dt_rec *)rec,
1572                                (const struct dt_key *)dot, th);
1573         if (rc != 0)
1574                 GOTO(stop, rc);
1575
1576         rec->rec_fid = lfsck_dto2fid(parent);
1577         rc = dt_declare_insert(env, orphan, (const struct dt_rec *)rec,
1578                                (const struct dt_key *)dotdot, th);
1579         if (rc != 0)
1580                 GOTO(stop, rc);
1581
1582         if (lmv != NULL) {
1583                 lmv->lmv_magic = LMV_MAGIC;
1584                 lmv->lmv_master_mdt_index = lfsck_dev_idx(lfsck);
1585                 lfsck_lmv_header_cpu_to_le(lmv2, lmv);
1586                 lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2));
1587                 rc = dt_declare_xattr_set(env, orphan, &lmv_buf, XATTR_NAME_LMV,
1588                                           0, th);
1589                 if (rc != 0)
1590                         GOTO(stop, rc);
1591         }
1592
1593         lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
1594                        ldata.ld_leh->leh_len);
1595         rc = dt_declare_xattr_set(env, orphan, &linkea_buf,
1596                                   XATTR_NAME_LINK, 0, th);
1597         if (rc != 0)
1598                 GOTO(stop, rc);
1599
1600         rec->rec_fid = cfid;
1601         rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
1602                                (const struct dt_key *)name, th);
1603         if (rc == 0)
1604                 rc = dt_declare_ref_add(env, parent, th);
1605
1606         if (rc != 0)
1607                 GOTO(stop, rc);
1608
1609         rc = dt_trans_start_local(env, dev, th);
1610         if (rc != 0)
1611                 GOTO(stop, rc);
1612
1613         dt_write_lock(env, orphan, 0);
1614         rc = dt_create(env, orphan, la, hint, dof, th);
1615         if (rc != 0)
1616                 GOTO(unlock2, rc);
1617
1618         rc = dt_ref_add(env, orphan, th);
1619         if (rc != 0)
1620                 GOTO(unlock2, rc);
1621
1622         rec->rec_fid = cfid;
1623         rc = dt_insert(env, orphan, (const struct dt_rec *)rec,
1624                        (const struct dt_key *)dot, th);
1625         if (rc != 0)
1626                 GOTO(unlock2, rc);
1627
1628         rec->rec_fid = lfsck_dto2fid(parent);
1629         rc = dt_insert(env, orphan, (const struct dt_rec *)rec,
1630                        (const struct dt_key *)dotdot, th);
1631         if (rc != 0)
1632                 GOTO(unlock2, rc);
1633
1634         if (lmv != NULL) {
1635                 rc = dt_xattr_set(env, orphan, &lmv_buf, XATTR_NAME_LMV, 0, th);
1636                 if (rc != 0)
1637                         GOTO(unlock2, rc);
1638         }
1639
1640         rc = dt_xattr_set(env, orphan, &linkea_buf,
1641                           XATTR_NAME_LINK, 0, th);
1642         dt_write_unlock(env, orphan);
1643         if (rc != 0)
1644                 GOTO(stop, rc);
1645
1646         rec->rec_fid = cfid;
1647         rc = dt_insert(env, parent, (const struct dt_rec *)rec,
1648                        (const struct dt_key *)name, th);
1649         if (rc == 0) {
1650                 dt_write_lock(env, parent, 0);
1651                 rc = dt_ref_add(env, parent, th);
1652                 dt_write_unlock(env, parent);
1653         }
1654
1655         GOTO(stop, rc = (rc == 0 ? 1 : rc));
1656
1657 unlock2:
1658         dt_write_unlock(env, orphan);
1659
1660 stop:
1661         rc1 = dt_trans_stop(env, dev, th);
1662         if (rc1 != 0 && rc > 0)
1663                 rc = rc1;
1664
1665 unlock1:
1666         lfsck_unlock(llh);
1667
1668 log:
1669         CDEBUG(D_LFSCK, "%s: namespace LFSCK create orphan dir for "
1670                "the object "DFID", name = %s: rc = %d\n",
1671                lfsck_lfsck2name(lfsck), PFID(cfid),
1672                cname->ln_name != NULL ? cname->ln_name : "<NULL>", rc);
1673
1674         if (parent != NULL && !IS_ERR(parent) && parent != lfsck->li_lpf_obj)
1675                 lfsck_object_put(env, parent);
1676
1677         if (rc != 0)
1678                 ns->ln_flags |= LF_INCONSISTENT;
1679
1680         return rc;
1681 }
1682
1683 /**
1684  * Remove the specified entry from the linkEA.
1685  *
1686  * Locate the linkEA entry with the given @cname and @pfid, then
1687  * remove this entry or the other entries those are repeated with
1688  * this entry.
1689  *
1690  * \param[in] env       pointer to the thread context
1691  * \param[in] com       pointer to the lfsck component
1692  * \param[in] obj       pointer to the dt_object to be handled
1693  * \param[in,out]ldata  pointer to the buffer that holds the linkEA
1694  * \param[in] cname     the name for the child in the parent directory
1695  * \param[in] pfid      the parent directory's FID for the linkEA
1696  * \param[in] next      if true, then remove the first found linkEA
1697  *                      entry, and move the ldata->ld_lee to next entry
1698  *
1699  * \retval              positive number for repaired cases
1700  * \retval              0 if nothing to be repaired
1701  * \retval              negative error number on failure
1702  */
1703 static int lfsck_namespace_shrink_linkea(const struct lu_env *env,
1704                                          struct lfsck_component *com,
1705                                          struct dt_object *obj,
1706                                          struct linkea_data *ldata,
1707                                          struct lu_name *cname,
1708                                          struct lu_fid *pfid,
1709                                          bool next)
1710 {
1711         struct lfsck_instance           *lfsck     = com->lc_lfsck;
1712         struct dt_device                *dev       = lfsck_obj2dev(obj);
1713         struct lfsck_bookmark           *bk        = &lfsck->li_bookmark_ram;
1714         struct thandle                  *th        = NULL;
1715         struct lustre_handle             lh        = { 0 };
1716         struct linkea_data               ldata_new = { NULL };
1717         struct lu_buf                    linkea_buf;
1718         int                              buflen    = 0;
1719         int                              rc        = 0;
1720         ENTRY;
1721
1722         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
1723                 GOTO(log, rc = 0);
1724
1725         rc = lfsck_ibits_lock(env, lfsck, obj, &lh,
1726                               MDS_INODELOCK_UPDATE | MDS_INODELOCK_XATTR,
1727                               LCK_EX);
1728         if (rc != 0)
1729                 GOTO(log, rc);
1730
1731         if (next)
1732                 lfsck_linkea_del_buf(ldata, cname);
1733         else
1734                 lfsck_namespace_filter_linkea_entry(ldata, cname, pfid,
1735                                                     true);
1736         if (ldata->ld_leh->leh_reccount > 0 ||
1737             unlikely(ldata->ld_leh->leh_overflow_time)) {
1738                 lfsck_buf_init(&linkea_buf, ldata->ld_buf->lb_buf,
1739                                ldata->ld_leh->leh_len);
1740                 buflen = linkea_buf.lb_len;
1741         }
1742
1743 again:
1744         th = lfsck_trans_create(env, dev, lfsck);
1745         if (IS_ERR(th))
1746                 GOTO(unlock1, rc = PTR_ERR(th));
1747
1748         if (buflen != 0)
1749                 rc = dt_declare_xattr_set(env, obj, &linkea_buf,
1750                                           XATTR_NAME_LINK, 0, th);
1751         else
1752                 rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LINK, th);
1753         if (rc != 0)
1754                 GOTO(stop, rc);
1755
1756         rc = dt_trans_start_local(env, dev, th);
1757         if (rc != 0)
1758                 GOTO(stop, rc);
1759
1760         dt_write_lock(env, obj, 0);
1761         if (unlikely(lfsck_is_dead_obj(obj)))
1762                 GOTO(unlock2, rc = -ENOENT);
1763
1764         rc = lfsck_links_read2_with_rec(env, obj, &ldata_new);
1765         if (rc)
1766                 GOTO(unlock2, rc = (rc == -ENODATA ? 0 : rc));
1767
1768         /* The specified linkEA entry has been removed by race. */
1769         rc = linkea_links_find(&ldata_new, cname, pfid);
1770         if (rc != 0)
1771                 GOTO(unlock2, rc = 0);
1772
1773         if (bk->lb_param & LPF_DRYRUN)
1774                 GOTO(unlock2, rc = 1);
1775
1776         if (next)
1777                 lfsck_linkea_del_buf(&ldata_new, cname);
1778         else
1779                 lfsck_namespace_filter_linkea_entry(&ldata_new, cname, pfid,
1780                                                     true);
1781
1782         /*
1783          * linkea may change because it doesn't take lock in the first read, if
1784          * it becomes larger, restart from beginning.
1785          */
1786         if ((ldata_new.ld_leh->leh_reccount > 0 ||
1787              unlikely(ldata_new.ld_leh->leh_overflow_time)) &&
1788             buflen < ldata_new.ld_leh->leh_len) {
1789                 dt_write_unlock(env, obj);
1790                 dt_trans_stop(env, dev, th);
1791                 lfsck_buf_init(&linkea_buf, ldata_new.ld_buf->lb_buf,
1792                                ldata_new.ld_leh->leh_len);
1793                 buflen = linkea_buf.lb_len;
1794                 goto again;
1795         }
1796
1797         if (buflen)
1798                 rc = lfsck_links_write(env, obj, &ldata_new, th);
1799         else
1800                 rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, th);
1801
1802         GOTO(unlock2, rc = (rc == 0 ? 1 : rc));
1803
1804 unlock2:
1805         dt_write_unlock(env, obj);
1806
1807 stop:
1808         dt_trans_stop(env, dev, th);
1809
1810 unlock1:
1811         lfsck_ibits_unlock(&lh, LCK_EX);
1812
1813 log:
1814         CDEBUG(D_LFSCK, "%s: namespace LFSCK remove %s linkEA entry "
1815                "for the object: "DFID", parent "DFID", name %.*s\n",
1816                lfsck_lfsck2name(lfsck), next ? "invalid" : "redundant",
1817                PFID(lfsck_dto2fid(obj)), PFID(pfid), cname->ln_namelen,
1818                cname->ln_name);
1819
1820         if (rc != 0) {
1821                 struct lfsck_namespace *ns = com->lc_file_ram;
1822
1823                 ns->ln_flags |= LF_INCONSISTENT;
1824         }
1825
1826         return rc;
1827 }
1828
1829 /**
1830  * Conditionally remove the specified entry from the linkEA.
1831  *
1832  * Take the parent lock firstly, then check whether the specified
1833  * name entry exists or not: if yes, do nothing; otherwise, call
1834  * lfsck_namespace_shrink_linkea() to remove the linkea entry.
1835  *
1836  * \param[in] env       pointer to the thread context
1837  * \param[in] com       pointer to the lfsck component
1838  * \param[in] parent    pointer to the parent directory
1839  * \param[in] child     pointer to the child object that holds the linkEA
1840  * \param[in,out]ldata  pointer to the buffer that holds the linkEA
1841  * \param[in] cname     the name for the child in the parent directory
1842  * \param[in] pfid      the parent directory's FID for the linkEA
1843  *
1844  * \retval              positive number for repaired cases
1845  * \retval              0 if nothing to be repaired
1846  * \retval              negative error number on failure
1847  */
1848 static int lfsck_namespace_shrink_linkea_cond(const struct lu_env *env,
1849                                               struct lfsck_component *com,
1850                                               struct dt_object *parent,
1851                                               struct dt_object *child,
1852                                               struct linkea_data *ldata,
1853                                               struct lu_name *cname,
1854                                               struct lu_fid *pfid)
1855 {
1856         struct lfsck_thread_info *info  = lfsck_env_info(env);
1857         struct lu_fid            *cfid  = &info->lti_fid3;
1858         struct lfsck_lock_handle *llh   = &info->lti_llh;
1859         int                       rc;
1860         ENTRY;
1861
1862         rc = lfsck_lock(env, com->lc_lfsck, parent, cname->ln_name, llh,
1863                         MDS_INODELOCK_UPDATE, LCK_PR);
1864         if (rc != 0)
1865                 RETURN(rc);
1866
1867         dt_read_lock(env, parent, 0);
1868         if (unlikely(lfsck_is_dead_obj(parent))) {
1869                 dt_read_unlock(env, parent);
1870                 lfsck_unlock(llh);
1871                 rc = lfsck_namespace_shrink_linkea(env, com, child, ldata,
1872                                                    cname, pfid, true);
1873
1874                 RETURN(rc);
1875         }
1876
1877         rc = dt_lookup(env, parent, (struct dt_rec *)cfid,
1878                        (const struct dt_key *)cname->ln_name);
1879         dt_read_unlock(env, parent);
1880
1881         /* It is safe to release the ldlm lock, because when the logic come
1882          * here, we have got all the needed information above whether the
1883          * linkEA entry is valid or not. It is not important that others
1884          * may add new linkEA entry after the ldlm lock released. If other
1885          * has removed the specified linkEA entry by race, then it is OK,
1886          * because the subsequent lfsck_namespace_shrink_linkea() can handle
1887          * such case. */
1888         lfsck_unlock(llh);
1889         if (rc == -ENOENT) {
1890                 rc = lfsck_namespace_shrink_linkea(env, com, child, ldata,
1891                                                    cname, pfid, true);
1892
1893                 RETURN(rc);
1894         }
1895
1896         if (rc != 0)
1897                 RETURN(rc);
1898
1899         /* The LFSCK just found some internal status of cross-MDTs
1900          * create operation. That is normal. */
1901         if (lu_fid_eq(cfid, lfsck_dto2fid(child))) {
1902                 linkea_next_entry(ldata);
1903
1904                 RETURN(0);
1905         }
1906
1907         rc = lfsck_namespace_shrink_linkea(env, com, child, ldata, cname,
1908                                            pfid, true);
1909
1910         RETURN(rc);
1911 }
1912
1913 /**
1914  * Conditionally replace name entry in the parent.
1915  *
1916  * As required, the LFSCK may re-create the lost MDT-object for dangling
1917  * name entry, but such repairing may be wrong because of bad FID in the
1918  * name entry. As the LFSCK processing, the real MDT-object may be found,
1919  * then the LFSCK should check whether the former re-created MDT-object
1920  * has been modified or not, if not, then destroy it and update the name
1921  * entry in the parent to reference the real MDT-object.
1922  *
1923  * \param[in] env       pointer to the thread context
1924  * \param[in] com       pointer to the lfsck component
1925  * \param[in] parent    pointer to the parent directory
1926  * \param[in] child     pointer to the MDT-object that may be the real
1927  *                      MDT-object corresponding to the name entry in parent
1928  * \param[in] cfid      the current FID in the name entry
1929  * \param[in] cname     contains the name of the child in the parent directory
1930  *
1931  * \retval              positive number for repaired cases
1932  * \retval              0 if nothing to be repaired
1933  * \retval              negative error number on failure
1934  */
1935 static int lfsck_namespace_replace_cond(const struct lu_env *env,
1936                                         struct lfsck_component *com,
1937                                         struct dt_object *parent,
1938                                         struct dt_object *child,
1939                                         const struct lu_fid *cfid,
1940                                         const struct lu_name *cname)
1941 {
1942         struct lfsck_thread_info        *info   = lfsck_env_info(env);
1943         struct lu_attr                  *la     = &info->lti_la;
1944         struct dt_insert_rec            *rec    = &info->lti_dt_rec;
1945         struct lu_fid                    tfid;
1946         struct lfsck_instance           *lfsck  = com->lc_lfsck;
1947         /* The child and its name may be on different MDTs. */
1948         struct dt_device                *dev    = lfsck->li_next;
1949         const char                      *name   = cname->ln_name;
1950         const struct lu_fid             *pfid   = lfsck_dto2fid(parent);
1951         struct dt_object                *cobj   = NULL;
1952         struct lfsck_lock_handle        *pllh   = &info->lti_llh;
1953         struct lustre_handle             clh    = { 0 };
1954         struct linkea_data               ldata  = { NULL };
1955         struct thandle                  *th     = NULL;
1956         bool                             exist  = true;
1957         int                              rc     = 0;
1958         ENTRY;
1959
1960         /* @parent/@child may be based on lfsck->li_bottom,
1961          * but here we need the object based on the lfsck->li_next. */
1962
1963         parent = lfsck_object_locate(dev, parent);
1964         if (IS_ERR(parent))
1965                 GOTO(log, rc = PTR_ERR(parent));
1966
1967         if (unlikely(!dt_try_as_dir(env, parent, true)))
1968                 GOTO(log, rc = -ENOTDIR);
1969
1970         rc = lfsck_lock(env, lfsck, parent, name, pllh,
1971                         MDS_INODELOCK_UPDATE, LCK_PW);
1972         if (rc != 0)
1973                 GOTO(log, rc);
1974
1975         if (!fid_is_sane(cfid)) {
1976                 exist = false;
1977                 goto replace;
1978         }
1979
1980         cobj = lfsck_object_find_by_dev(env, dev, cfid);
1981         if (IS_ERR(cobj)) {
1982                 rc = PTR_ERR(cobj);
1983                 if (rc == -ENOENT) {
1984                         exist = false;
1985                         goto replace;
1986                 }
1987
1988                 GOTO(log, rc);
1989         }
1990
1991         if (!dt_object_exists(cobj)) {
1992                 exist = false;
1993                 goto replace;
1994         }
1995
1996         rc = dt_lookup_dir(env, parent, name, &tfid);
1997         if (rc == -ENOENT) {
1998                 exist = false;
1999                 goto replace;
2000         }
2001
2002         if (rc != 0)
2003                 GOTO(log, rc);
2004
2005         /* Someone changed the name entry, cannot replace it. */
2006         if (!lu_fid_eq(cfid, &tfid))
2007                 GOTO(log, rc = 0);
2008
2009         /* lock the object to be destroyed. */
2010         rc = lfsck_ibits_lock(env, lfsck, cobj, &clh,
2011                               MDS_INODELOCK_UPDATE |
2012                               MDS_INODELOCK_UPDATE | MDS_INODELOCK_XATTR,
2013                               LCK_EX);
2014         if (rc != 0)
2015                 GOTO(log, rc);
2016
2017         if (unlikely(lfsck_is_dead_obj(cobj))) {
2018                 exist = false;
2019                 goto replace;
2020         }
2021
2022         rc = dt_attr_get(env, cobj, la);
2023         if (rc != 0)
2024                 GOTO(log, rc);
2025
2026         /* The object has been modified by other(s), or it is not created by
2027          * LFSCK, the two cases are indistinguishable. So cannot replace it. */
2028         if (la->la_ctime != 0)
2029                 GOTO(log, rc);
2030
2031         if (S_ISREG(la->la_mode)) {
2032                 rc = dt_xattr_get(env, cobj, &LU_BUF_NULL, XATTR_NAME_LOV);
2033                 /* If someone has created related OST-object(s),
2034                  * then keep it. */
2035                 if ((rc > 0) || (rc < 0 && rc != -ENODATA))
2036                         GOTO(log, rc = (rc > 0 ? 0 : rc));
2037         }
2038
2039 replace:
2040         dt_read_lock(env, child, 0);
2041         rc = lfsck_links_read2_with_rec(env, child, &ldata);
2042         dt_read_unlock(env, child);
2043
2044         /* Someone changed the child, no need to replace. */
2045         if (rc == -ENODATA)
2046                 GOTO(log, rc = 0);
2047
2048         if (rc != 0)
2049                 GOTO(log, rc);
2050
2051         rc = linkea_links_find(&ldata, cname, pfid);
2052         /* Someone moved the child, no need to replace. */
2053         if (rc != 0)
2054                 GOTO(log, rc = 0);
2055
2056         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
2057                 GOTO(log, rc = 1);
2058
2059         th = lfsck_trans_create(env, dev, lfsck);
2060         if (IS_ERR(th))
2061                 GOTO(log, rc = PTR_ERR(th));
2062
2063         if (exist) {
2064                 rc = dt_declare_destroy(env, cobj, th);
2065                 if (rc != 0)
2066                         GOTO(stop, rc);
2067         }
2068
2069         rc = dt_declare_delete(env, parent, (const struct dt_key *)name, th);
2070         if (rc != 0)
2071                 GOTO(stop, rc);
2072
2073         rec->rec_type = S_IFDIR;
2074         rec->rec_fid = lfsck_dto2fid(child);
2075         rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
2076                                (const struct dt_key *)name, th);
2077         if (rc != 0)
2078                 GOTO(stop, rc);
2079
2080         rc = dt_trans_start_local(env, dev, th);
2081         if (rc != 0)
2082                 GOTO(stop, rc);
2083
2084         if (exist) {
2085                 rc = dt_destroy(env, cobj, th);
2086                 if (rc != 0)
2087                         GOTO(stop, rc);
2088         }
2089
2090         /* The old name entry maybe not exist. */
2091         rc = dt_delete(env, parent, (const struct dt_key *)name, th);
2092         if (rc != 0 && rc != -ENOENT)
2093                 GOTO(stop, rc);
2094
2095         rc = dt_insert(env, parent, (const struct dt_rec *)rec,
2096                        (const struct dt_key *)name, th);
2097
2098         GOTO(stop, rc = (rc == 0 ? 1 : rc));
2099
2100 stop:
2101         dt_trans_stop(env, dev, th);
2102
2103 log:
2104         lfsck_ibits_unlock(&clh, LCK_EX);
2105         lfsck_unlock(pllh);
2106
2107         if (cobj != NULL && !IS_ERR(cobj))
2108                 lfsck_object_put(env, cobj);
2109
2110         CDEBUG(D_LFSCK, "%s: namespace LFSCK conditionally destroy the "
2111                "object "DFID" because of conflict with the object "DFID
2112                " under the parent "DFID" with name %s: rc = %d\n",
2113                lfsck_lfsck2name(lfsck), PFID(cfid),
2114                PFID(lfsck_dto2fid(child)), PFID(pfid), name, rc);
2115
2116         return rc;
2117 }
2118
2119 /**
2120  * Overwrite the linkEA for the object with the given ldata.
2121  *
2122  * The caller should take the ldlm lock before the calling.
2123  *
2124  * \param[in] env       pointer to the thread context
2125  * \param[in] com       pointer to the lfsck component
2126  * \param[in] obj       pointer to the dt_object to be handled
2127  * \param[in] ldata     pointer to the new linkEA data
2128  *
2129  * \retval              positive number for repaired cases
2130  * \retval              0 if nothing to be repaired
2131  * \retval              negative error number on failure
2132  */
2133 int lfsck_namespace_rebuild_linkea(const struct lu_env *env,
2134                                    struct lfsck_component *com,
2135                                    struct dt_object *obj,
2136                                    struct linkea_data *ldata)
2137 {
2138         struct lfsck_instance           *lfsck  = com->lc_lfsck;
2139         struct dt_device                *dev    = lfsck_obj2dev(obj);
2140         struct thandle                  *th     = NULL;
2141         struct lu_buf                    linkea_buf;
2142         int                              rc     = 0;
2143         ENTRY;
2144
2145         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
2146                 GOTO(log, rc = 1);
2147
2148         th = lfsck_trans_create(env, dev, lfsck);
2149         if (IS_ERR(th))
2150                 GOTO(log, rc = PTR_ERR(th));
2151
2152         lfsck_buf_init(&linkea_buf, ldata->ld_buf->lb_buf,
2153                        ldata->ld_leh->leh_len);
2154         rc = dt_declare_xattr_set(env, obj, &linkea_buf,
2155                                   XATTR_NAME_LINK, 0, th);
2156         if (rc != 0)
2157                 GOTO(stop, rc);
2158
2159         rc = dt_trans_start_local(env, dev, th);
2160         if (rc != 0)
2161                 GOTO(stop, rc);
2162
2163         dt_write_lock(env, obj, 0);
2164         if (unlikely(lfsck_is_dead_obj(obj)))
2165                 GOTO(unlock, rc = 0);
2166
2167         rc = dt_xattr_set(env, obj, &linkea_buf,
2168                           XATTR_NAME_LINK, 0, th);
2169
2170         GOTO(unlock, rc = (rc == 0 ? 1 : rc));
2171
2172 unlock:
2173         dt_write_unlock(env, obj);
2174
2175 stop:
2176         dt_trans_stop(env, dev, th);
2177
2178 log:
2179         CDEBUG(D_LFSCK, "%s: namespace LFSCK rebuild linkEA for the "
2180                "object "DFID": rc = %d\n",
2181                lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)), rc);
2182
2183         if (rc != 0) {
2184                 struct lfsck_namespace *ns = com->lc_file_ram;
2185
2186                 ns->ln_flags |= LF_INCONSISTENT;
2187         }
2188
2189         return rc;
2190 }
2191
2192 /**
2193  * Repair invalid name entry.
2194  *
2195  * If the name entry contains invalid information, such as bad file type
2196  * or (and) corrupted object FID, then either remove the name entry or
2197  * udpate the name entry with the given (right) information.
2198  *
2199  * \param[in] env       pointer to the thread context
2200  * \param[in] com       pointer to the lfsck component
2201  * \param[in] parent    pointer to the parent directory
2202  * \param[in] child     pointer to the object referenced by the name entry
2203  * \param[in] name      the old name of the child under the parent directory
2204  * \param[in] name2     the new name of the child under the parent directory
2205  * \param[in] type      the type claimed by the name entry
2206  * \param[in] update    update the name entry if true; otherwise, remove it
2207  * \param[in] dec       decrease the parent nlink count if true
2208  *
2209  * \retval              positive number for repaired successfully
2210  * \retval              0 if nothing to be repaired
2211  * \retval              negative error number on failure
2212  */
2213 int lfsck_namespace_repair_dirent(const struct lu_env *env,
2214                                   struct lfsck_component *com,
2215                                   struct dt_object *parent,
2216                                   struct dt_object *child,
2217                                   const char *name, const char *name2,
2218                                   __u16 type, bool update, bool dec)
2219 {
2220         struct lfsck_thread_info        *info   = lfsck_env_info(env);
2221         struct dt_insert_rec            *rec    = &info->lti_dt_rec;
2222         const struct lu_fid             *pfid   = lfsck_dto2fid(parent);
2223         struct lu_fid                   cfid    = {0};
2224         struct lu_fid                    tfid;
2225         struct lfsck_instance           *lfsck  = com->lc_lfsck;
2226         struct dt_device                *dev    = lfsck->li_next;
2227         struct thandle                  *th     = NULL;
2228         struct lfsck_lock_handle        *llh    = &info->lti_llh;
2229         struct lustre_handle             lh     = { 0 };
2230         int                              rc     = 0;
2231         ENTRY;
2232
2233         if (child)
2234                 cfid = *lfsck_dto2fid(child);
2235         parent = lfsck_object_locate(dev, parent);
2236         if (IS_ERR(parent))
2237                 GOTO(log, rc = PTR_ERR(parent));
2238
2239         if (unlikely(!dt_try_as_dir(env, parent, true)))
2240                 GOTO(log, rc = -ENOTDIR);
2241
2242         if (!update || strcmp(name, name2) == 0)
2243                 rc = lfsck_lock(env, lfsck, parent, name, llh,
2244                                 MDS_INODELOCK_UPDATE, LCK_PW);
2245         else
2246                 rc = lfsck_ibits_lock(env, lfsck, parent, &lh,
2247                                       MDS_INODELOCK_UPDATE, LCK_PW);
2248         if (rc != 0)
2249                 GOTO(log, rc);
2250
2251         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
2252                 GOTO(unlock1, rc = 1);
2253
2254         th = lfsck_trans_create(env, dev, lfsck);
2255         if (IS_ERR(th))
2256                 GOTO(unlock1, rc = PTR_ERR(th));
2257
2258         rc = dt_declare_delete(env, parent, (const struct dt_key *)name, th);
2259         if (rc != 0)
2260                 GOTO(stop, rc);
2261
2262         if (update) {
2263                 rec->rec_type = lfsck_object_type(child) & S_IFMT;
2264                 LASSERT(!fid_is_zero(&cfid));
2265                 rec->rec_fid = &cfid;
2266                 rc = dt_declare_insert(env, parent,
2267                                        (const struct dt_rec *)rec,
2268                                        (const struct dt_key *)name2, th);
2269                 if (rc != 0)
2270                         GOTO(stop, rc);
2271         }
2272
2273         if (dec && S_ISDIR(type)) {
2274                 rc = dt_declare_ref_del(env, parent, th);
2275                 if (rc != 0)
2276                         GOTO(stop, rc);
2277         }
2278
2279         rc = dt_trans_start_local(env, dev, th);
2280         if (rc != 0)
2281                 GOTO(stop, rc);
2282
2283
2284         dt_write_lock(env, parent, 0);
2285         rc = dt_lookup_dir(env, dt_object_child(parent), name, &tfid);
2286         /* Someone has removed the bad name entry by race. */
2287         if (rc == -ENOENT)
2288                 GOTO(unlock2, rc = 0);
2289
2290         if (rc != 0)
2291                 GOTO(unlock2, rc);
2292
2293         /* Someone has removed the bad name entry and reused it for other
2294          * object by race. */
2295         if (!lu_fid_eq(&tfid, &cfid))
2296                 GOTO(unlock2, rc = 0);
2297
2298         rc = dt_delete(env, parent, (const struct dt_key *)name, th);
2299         if (rc != 0)
2300                 GOTO(unlock2, rc);
2301
2302         if (update) {
2303                 rc = dt_insert(env, parent,
2304                                (const struct dt_rec *)rec,
2305                                (const struct dt_key *)name2, th);
2306                 if (rc != 0)
2307                         GOTO(unlock2, rc);
2308         }
2309
2310         if (dec && S_ISDIR(type)) {
2311                 rc = dt_ref_del(env, parent, th);
2312                 if (rc != 0)
2313                         GOTO(unlock2, rc);
2314         }
2315
2316         GOTO(unlock2, rc = (rc == 0 ? 1 : rc));
2317
2318 unlock2:
2319         dt_write_unlock(env, parent);
2320
2321 stop:
2322         dt_trans_stop(env, dev, th);
2323
2324         /* We are not sure whether the child will become orphan or not.
2325          * Record it in the LFSCK trace file for further checking in
2326          * the second-stage scanning. */
2327         if (!update && !dec && child && rc == 0)
2328                 lfsck_namespace_trace_update(env, com, &cfid,
2329                                              LNTF_CHECK_LINKEA, true);
2330
2331 unlock1:
2332         /* It is harmless even if unlock the unused lock_handle */
2333         lfsck_ibits_unlock(&lh, LCK_PW);
2334         lfsck_unlock(llh);
2335
2336 log:
2337         CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found bad name "
2338                "entry for: parent "DFID", child "DFID", name %s, type "
2339                "in name entry %o, type claimed by child %o. repair it "
2340                "by %s with new name2 %s: rc = %d\n",
2341                lfsck_lfsck2name(lfsck), PFID(pfid), PFID(&cfid),
2342                name, type, update ? lfsck_object_type(child) : 0,
2343                update ? "updating" : "removing", name2, rc);
2344
2345         if (rc != 0) {
2346                 struct lfsck_namespace *ns = com->lc_file_ram;
2347
2348                 ns->ln_flags |= LF_INCONSISTENT;
2349         }
2350
2351         return rc;
2352 }
2353
2354 /**
2355  * Update the ".." name entry for the given object.
2356  *
2357  * The object's ".." is corrupted, this function will update the ".." name
2358  * entry with the given pfid, and the linkEA with the given ldata.
2359  *
2360  * The caller should take the ldlm lock before the calling.
2361  *
2362  * \param[in] env       pointer to the thread context
2363  * \param[in] com       pointer to the lfsck component
2364  * \param[in] obj       pointer to the dt_object to be handled
2365  * \param[in] pfid      the new fid for the object's ".." name entry
2366  * \param[in] cname     the name for the @obj in the parent directory
2367  *
2368  * \retval              positive number for repaired cases
2369  * \retval              0 if nothing to be repaired
2370  * \retval              negative error number on failure
2371  */
2372 static int lfsck_namespace_repair_unmatched_pairs(const struct lu_env *env,
2373                                                   struct lfsck_component *com,
2374                                                   struct dt_object *obj,
2375                                                   const struct lu_fid *pfid,
2376                                                   struct lu_name *cname)
2377 {
2378         struct lfsck_thread_info        *info   = lfsck_env_info(env);
2379         struct dt_insert_rec            *rec    = &info->lti_dt_rec;
2380         struct lfsck_instance           *lfsck  = com->lc_lfsck;
2381         struct dt_device                *dev    = lfsck_obj2dev(obj);
2382         struct thandle                  *th     = NULL;
2383         struct linkea_data               ldata  = { NULL };
2384         struct lu_buf                    linkea_buf;
2385         int                              rc     = 0;
2386         ENTRY;
2387
2388         LASSERT(!dt_object_remote(obj));
2389         LASSERT(S_ISDIR(lfsck_object_type(obj)));
2390
2391         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
2392                 GOTO(log, rc = 1);
2393
2394         rc = linkea_links_new(&ldata, &info->lti_big_buf, cname, pfid);
2395         if (rc != 0)
2396                 GOTO(log, rc);
2397
2398         lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
2399                        ldata.ld_leh->leh_len);
2400
2401         th = lfsck_trans_create(env, dev, lfsck);
2402         if (IS_ERR(th))
2403                 GOTO(log, rc = PTR_ERR(th));
2404
2405         rc = dt_declare_delete(env, obj, (const struct dt_key *)dotdot, th);
2406         if (rc != 0)
2407                 GOTO(stop, rc);
2408
2409         rec->rec_type = S_IFDIR;
2410         rec->rec_fid = pfid;
2411         rc = dt_declare_insert(env, obj, (const struct dt_rec *)rec,
2412                                (const struct dt_key *)dotdot, th);
2413         if (rc != 0)
2414                 GOTO(stop, rc);
2415
2416         rc = dt_declare_xattr_set(env, obj, &linkea_buf,
2417                                   XATTR_NAME_LINK, 0, th);
2418         if (rc != 0)
2419                 GOTO(stop, rc);
2420
2421         rc = dt_trans_start_local(env, dev, th);
2422         if (rc != 0)
2423                 GOTO(stop, rc);
2424
2425         dt_write_lock(env, obj, 0);
2426         if (unlikely(lfsck_is_dead_obj(obj)))
2427                 GOTO(unlock, rc = 0);
2428
2429         /* The old ".." name entry maybe not exist. */
2430         dt_delete(env, obj, (const struct dt_key *)dotdot, th);
2431
2432         rc = dt_insert(env, obj, (const struct dt_rec *)rec,
2433                        (const struct dt_key *)dotdot, th);
2434         if (rc != 0)
2435                 GOTO(unlock, rc);
2436
2437         rc = lfsck_links_write(env, obj, &ldata, th);
2438
2439         GOTO(unlock, rc = (rc == 0 ? 1 : rc));
2440
2441 unlock:
2442         dt_write_unlock(env, obj);
2443
2444 stop:
2445         dt_trans_stop(env, dev, th);
2446
2447 log:
2448         CDEBUG(D_LFSCK, "%s: namespace LFSCK rebuild dotdot name entry for "
2449                "the object "DFID", new parent "DFID": rc = %d\n",
2450                lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)),
2451                PFID(pfid), rc);
2452
2453         if (rc != 0) {
2454                 struct lfsck_namespace *ns = com->lc_file_ram;
2455
2456                 ns->ln_flags |= LF_INCONSISTENT;
2457         }
2458
2459         return rc;
2460 }
2461
2462 /**
2463  * Handle orphan @obj during Double Scan Directory.
2464  *
2465  * Remove the @obj's current (invalid) linkEA entries, and insert
2466  * it in the directory .lustre/lost+found/MDTxxxx/ with the name:
2467  * ${FID}-${PFID}-D-${conflict_version}
2468  *
2469  * The caller should take the ldlm lock before the calling.
2470  *
2471  * \param[in] env       pointer to the thread context
2472  * \param[in] com       pointer to the lfsck component
2473  * \param[in] obj       pointer to the orphan object to be handled
2474  * \param[in] pfid      the new fid for the object's ".." name entry
2475  * \param[in,out] lh    ldlm lock handler for the given @obj
2476  * \param[out] type     to tell the caller what the inconsistency is
2477  *
2478  * \retval              positive number for repaired cases
2479  * \retval              0 if nothing to be repaired
2480  * \retval              negative error number on failure
2481  */
2482 static int
2483 lfsck_namespace_dsd_orphan(const struct lu_env *env,
2484                            struct lfsck_component *com,
2485                            struct dt_object *obj,
2486                            const struct lu_fid *pfid,
2487                            struct lustre_handle *lh,
2488                            enum lfsck_namespace_inconsistency_type *type)
2489 {
2490         struct lfsck_thread_info *info = lfsck_env_info(env);
2491         struct lfsck_namespace   *ns   = com->lc_file_ram;
2492         int                       rc;
2493         ENTRY;
2494
2495         /* Remove the unrecognized linkEA. */
2496         rc = lfsck_namespace_links_remove(env, com, obj);
2497         lfsck_ibits_unlock(lh, LCK_EX);
2498         if (rc < 0 && rc != -ENODATA)
2499                 RETURN(rc);
2500
2501         *type = LNIT_MUL_REF;
2502
2503         /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT has
2504          * ever tried to verify some remote MDT-object that resides on this
2505          * MDT, but this MDT failed to respond such request. So means there
2506          * may be some remote name entry on other MDT that references this
2507          * object with another name, so we cannot know whether this linkEA
2508          * is valid or not. So keep it there and maybe resolved when next
2509          * LFSCK run. */
2510         if (ns->ln_flags & LF_INCOMPLETE)
2511                 RETURN(0);
2512
2513         /* The unique linkEA is invalid, even if the ".." name entry may be
2514          * valid, we still cannot know via which name entry this directory
2515          * will be referenced. Then handle it as pure orphan. */
2516         snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf),
2517                  "-"DFID, PFID(pfid));
2518         rc = lfsck_namespace_insert_orphan(env, com, obj,
2519                                            info->lti_tmpbuf, "D", NULL);
2520
2521         RETURN(rc);
2522 }
2523
2524 /**
2525  * Double Scan Directory object for single linkEA entry case.
2526  *
2527  * The given @child has unique linkEA entry. If the linkEA entry is valid,
2528  * then check whether the name is in the namespace or not, if not, add the
2529  * missing name entry back to namespace. If the linkEA entry is invalid,
2530  * then remove it and insert the @child in the .lustre/lost+found/MDTxxxx/
2531  * as an orphan.
2532  *
2533  * \param[in] env       pointer to the thread context
2534  * \param[in] com       pointer to the lfsck component
2535  * \param[in] child     pointer to the directory to be double scanned
2536  * \param[in] pfid      the FID corresponding to the ".." entry
2537  * \param[in] ldata     pointer to the linkEA data for the given @child
2538  * \param[in,out] lh    ldlm lock handler for the given @child
2539  * \param[out] type     to tell the caller what the inconsistency is
2540  * \param[in] retry     if found inconsistency, but the caller does not hold
2541  *                      ldlm lock on the @child, then set @retry as true
2542  * \param[in] unknown   set if does not know how to repair the inconsistency
2543  *
2544  * \retval              positive number for repaired cases
2545  * \retval              0 if nothing to be repaired
2546  * \retval              negative error number on failure
2547  */
2548 static int
2549 lfsck_namespace_dsd_single(const struct lu_env *env,
2550                            struct lfsck_component *com,
2551                            struct dt_object *child,
2552                            const struct lu_fid *pfid,
2553                            struct linkea_data *ldata,
2554                            struct lustre_handle *lh,
2555                            enum lfsck_namespace_inconsistency_type *type,
2556                            bool *retry, bool *unknown)
2557 {
2558         struct lfsck_thread_info *info          = lfsck_env_info(env);
2559         struct lu_name           *cname         = &info->lti_name;
2560         const struct lu_fid      *cfid          = lfsck_dto2fid(child);
2561         struct lu_fid             tfid;
2562         struct lfsck_namespace   *ns            = com->lc_file_ram;
2563         struct lfsck_instance    *lfsck         = com->lc_lfsck;
2564         struct dt_object         *parent        = NULL;
2565         struct lmv_mds_md_v1     *lmv;
2566         int                       rc            = 0;
2567         ENTRY;
2568
2569         rc = lfsck_namespace_unpack_linkea_entry(ldata, cname, &tfid,
2570                                                  info->lti_key,
2571                                                  sizeof(info->lti_key));
2572         /* The unique linkEA entry with bad parent will be handled as orphan. */
2573         if (rc != 0) {
2574                 if (!lustre_handle_is_used(lh) && retry != NULL)
2575                         *retry = true;
2576                 else
2577                         rc = lfsck_namespace_dsd_orphan(env, com, child,
2578                                                         pfid, lh, type);
2579
2580                 GOTO(out, rc);
2581         }
2582
2583         parent = lfsck_object_find_bottom(env, lfsck, &tfid);
2584         if (IS_ERR(parent))
2585                 GOTO(out, rc = PTR_ERR(parent));
2586
2587         /* We trust the unique linkEA entry in spite of whether it matches the
2588          * ".." name entry or not. Because even if the linkEA entry is wrong
2589          * and the ".." name entry is right, we still cannot know via which
2590          * name entry the child will be referenced, since all known entries
2591          * have been verified during the first-stage scanning. */
2592         if (!dt_object_exists(parent)) {
2593                 /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT
2594                  * has ever tried to verify some remote MDT-object that resides
2595                  * on this MDT, but this MDT failed to respond such request. So
2596                  * means there may be some remote name entry on other MDT that
2597                  * references this object with another name, so we cannot know
2598                  * whether this linkEA is valid or not. So keep it there and
2599                  * maybe resolved when next LFSCK run. */
2600                 if (ns->ln_flags & LF_INCOMPLETE)
2601                         GOTO(out, rc = 0);
2602
2603                 if (!lustre_handle_is_used(lh) && retry != NULL) {
2604                         *retry = true;
2605
2606                         GOTO(out, rc = 0);
2607                 }
2608
2609                 lfsck_ibits_unlock(lh, LCK_EX);
2610
2611 lost_parent:
2612                 lmv = &info->lti_lmv;
2613                 rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
2614                 if (rc != 0 && rc != -ENODATA)
2615                         GOTO(out, rc);
2616
2617                 if (rc == -ENODATA || lmv->lmv_magic != LMV_MAGIC_STRIPE) {
2618                         lmv = NULL;
2619                 } else if (lfsck_shard_name_to_index(env,
2620                                         cname->ln_name, cname->ln_namelen,
2621                                         S_IFDIR, cfid) < 0) {
2622                         /* It is an invalid name entry, we
2623                          * cannot trust the parent also. */
2624                         rc = lfsck_namespace_shrink_linkea(env, com, child,
2625                                                 ldata, cname, &tfid, true);
2626                         if (rc < 0)
2627                                 GOTO(out, rc);
2628
2629                         snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf),
2630                                  "-"DFID, PFID(pfid));
2631                         rc = lfsck_namespace_insert_orphan(env, com, child,
2632                                                 info->lti_tmpbuf, "S", NULL);
2633
2634                         GOTO(out, rc);
2635                 }
2636
2637                 /* Create the lost parent as an orphan. */
2638                 rc = lfsck_namespace_create_orphan_dir(env, com, parent, lmv);
2639                 if (rc >= 0) {
2640                         /* Add the missing name entry to the parent. */
2641                         rc = lfsck_namespace_insert_normal(env, com, parent,
2642                                                            child, cname);
2643                         if (unlikely(rc == -EEXIST)) {
2644                                 /* Unfortunately, someone reused the name
2645                                  * under the parent by race. So we have
2646                                  * to remove the linkEA entry from
2647                                  * current child object. It means that the
2648                                  * LFSCK cannot recover the system
2649                                  * totally back to its original status,
2650                                  * but it is necessary to make the
2651                                  * current system to be consistent. */
2652                                 rc = lfsck_namespace_shrink_linkea(env,
2653                                                 com, child, ldata,
2654                                                 cname, &tfid, true);
2655                                 if (rc >= 0) {
2656                                         snprintf(info->lti_tmpbuf,
2657                                                  sizeof(info->lti_tmpbuf),
2658                                                  "-"DFID, PFID(pfid));
2659                                         rc = lfsck_namespace_insert_orphan(env,
2660                                                 com, child, info->lti_tmpbuf,
2661                                                 "D", NULL);
2662                                 }
2663                         }
2664                 }
2665
2666                 GOTO(out, rc);
2667         } /* !dt_object_exists(parent) */
2668
2669         /* The unique linkEA entry with bad parent will be handled as orphan. */
2670         if (unlikely(!dt_try_as_dir(env, parent, true))) {
2671                 if (!lustre_handle_is_used(lh) && retry != NULL)
2672                         *retry = true;
2673                 else
2674                         rc = lfsck_namespace_dsd_orphan(env, com, child,
2675                                                         pfid, lh, type);
2676
2677                 GOTO(out, rc);
2678         }
2679
2680         rc = dt_lookup_dir(env, parent, cname->ln_name, &tfid);
2681         if (rc == -ENOENT) {
2682                 /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT
2683                  * has ever tried to verify some remote MDT-object that resides
2684                  * on this MDT, but this MDT failed to respond such request. So
2685                  * means there may be some remote name entry on other MDT that
2686                  * references this object with another name, so we cannot know
2687                  * whether this linkEA is valid or not. So keep it there and
2688                  * maybe resolved when next LFSCK run. */
2689                 if (ns->ln_flags & LF_INCOMPLETE)
2690                         GOTO(out, rc = 0);
2691
2692                 if (!lustre_handle_is_used(lh) && retry != NULL) {
2693                         *retry = true;
2694
2695                         GOTO(out, rc = 0);
2696                 }
2697
2698                 lfsck_ibits_unlock(lh, LCK_EX);
2699                 rc = lfsck_namespace_check_name(env, lfsck, parent, child,
2700                                                 cname);
2701                 if (rc == -ENOENT)
2702                         goto lost_parent;
2703
2704                 if (rc < 0)
2705                         GOTO(out, rc);
2706
2707                 /* It is an invalid name entry, drop it. */
2708                 if (unlikely(rc > 0)) {
2709                         rc = lfsck_namespace_shrink_linkea(env, com, child,
2710                                                 ldata, cname, &tfid, true);
2711                         if (rc >= 0) {
2712                                 snprintf(info->lti_tmpbuf,
2713                                          sizeof(info->lti_tmpbuf),
2714                                          "-"DFID, PFID(pfid));
2715                                 rc = lfsck_namespace_insert_orphan(env, com,
2716                                         child, info->lti_tmpbuf, "D", NULL);
2717                         }
2718
2719                         GOTO(out, rc);
2720                 }
2721
2722                 /* Add the missing name entry back to the namespace. */
2723                 rc = lfsck_namespace_insert_normal(env, com, parent, child,
2724                                                    cname);
2725                 if (unlikely(rc == -ESTALE))
2726                         /* It may happen when the remote object has been
2727                          * removed, but the local MDT is not aware of that. */
2728                         goto lost_parent;
2729
2730                 if (unlikely(rc == -EEXIST)) {
2731                         /* Unfortunately, someone reused the name under the
2732                          * parent by race. So we have to remove the linkEA
2733                          * entry from current child object. It means that the
2734                          * LFSCK cannot recover the system totally back to
2735                          * its original status, but it is necessary to make
2736                          * the current system to be consistent.
2737                          *
2738                          * It also may be because of the LFSCK found some
2739                          * internal status of create operation. Under such
2740                          * case, nothing to be done. */
2741                         rc = lfsck_namespace_shrink_linkea_cond(env, com,
2742                                         parent, child, ldata, cname, &tfid);
2743                         if (rc >= 0) {
2744                                 snprintf(info->lti_tmpbuf,
2745                                          sizeof(info->lti_tmpbuf),
2746                                          "-"DFID, PFID(pfid));
2747                                 rc = lfsck_namespace_insert_orphan(env, com,
2748                                         child, info->lti_tmpbuf, "D", NULL);
2749                         }
2750                 }
2751
2752                 GOTO(out, rc);
2753         } /* rc == -ENOENT */
2754
2755         if (rc != 0)
2756                 GOTO(out, rc);
2757
2758         if (!lu_fid_eq(&tfid, cfid)) {
2759                 if (!lustre_handle_is_used(lh) && retry != NULL) {
2760                         *retry = true;
2761
2762                         GOTO(out, rc = 0);
2763                 }
2764
2765                 lfsck_ibits_unlock(lh, LCK_EX);
2766                 /* The name entry references another MDT-object that
2767                  * may be created by the LFSCK for repairing dangling
2768                  * name entry. Try to replace it. */
2769                 rc = lfsck_namespace_replace_cond(env, com, parent, child,
2770                                                   &tfid, cname);
2771                 if (rc == 0)
2772                         rc = lfsck_namespace_dsd_orphan(env, com, child,
2773                                                         pfid, lh, type);
2774
2775                 GOTO(out, rc);
2776         }
2777
2778         /* Zero FID may because the remote directroy object has invalid linkEA,
2779          * or lost linkEA. Under such case, the LFSCK on this MDT does not know
2780          * how to repair the inconsistency, but the namespace LFSCK on the MDT
2781          * where its name entry resides may has more information (name, FID) to
2782          * repair such inconsistency. So here, keep the inconsistency to avoid
2783          * some imporper repairing. */
2784         if (fid_is_zero(pfid)) {
2785                 if (unknown)
2786                         *unknown = true;
2787
2788                 GOTO(out, rc = 0);
2789         }
2790
2791         /* The ".." name entry is wrong, update it. */
2792         if (!lu_fid_eq(pfid, lfsck_dto2fid(parent))) {
2793                 if (!lustre_handle_is_used(lh) && retry != NULL) {
2794                         *retry = true;
2795
2796                         GOTO(out, rc = 0);
2797                 }
2798
2799                 *type = LNIT_UNMATCHED_PAIRS;
2800                 rc = lfsck_namespace_repair_unmatched_pairs(env, com, child,
2801                                                 lfsck_dto2fid(parent), cname);
2802         }
2803
2804         GOTO(out, rc);
2805
2806 out:
2807         if (parent != NULL && !IS_ERR(parent))
2808                 lfsck_object_put(env, parent);
2809
2810         return rc;
2811 }
2812
2813 /**
2814  * Double Scan Directory object for multiple linkEA entries case.
2815  *
2816  * The given @child has multiple linkEA entries. There is at most one linkEA
2817  * entry will be valid, all the others will be removed. Firstly, the function
2818  * will try to find out the linkEA entry for which the name entry exists under
2819  * the given parent (@pfid). If there is no linkEA entry that matches the given
2820  * ".." name entry, then tries to find out the first linkEA entry that both the
2821  * parent and the name entry exist to rebuild a new ".." name entry.
2822  *
2823  * \param[in] env       pointer to the thread context
2824  * \param[in] com       pointer to the lfsck component
2825  * \param[in] child     pointer to the directory to be double scanned
2826  * \param[in] pfid      the FID corresponding to the ".." entry
2827  * \param[in] ldata     pointer to the linkEA data for the given @child
2828  * \param[in,out] lh    ldlm lock handler for the given @child
2829  * \param[out] type     to tell the caller what the inconsistency is
2830  * \param[in] lpf       true if the ".." entry is under lost+found/MDTxxxx/
2831  * \param[in] unknown   set if does not know how to repair the inconsistency
2832  *
2833  * \retval              positive number for repaired cases
2834  * \retval              0 if nothing to be repaired
2835  * \retval              negative error number on failure
2836  */
2837 static int
2838 lfsck_namespace_dsd_multiple(const struct lu_env *env,
2839                              struct lfsck_component *com,
2840                              struct dt_object *child,
2841                              const struct lu_fid *pfid,
2842                              struct linkea_data *ldata,
2843                              struct lustre_handle *lh,
2844                              enum lfsck_namespace_inconsistency_type *type,
2845                              bool lpf, bool *unknown)
2846 {
2847         struct lfsck_thread_info *info          = lfsck_env_info(env);
2848         struct lu_name           *cname         = &info->lti_name;
2849         const struct lu_fid      *cfid          = lfsck_dto2fid(child);
2850         struct lu_fid            *pfid2         = &info->lti_fid3;
2851         struct lu_fid             tfid;
2852         struct lfsck_namespace   *ns            = com->lc_file_ram;
2853         struct lfsck_instance    *lfsck         = com->lc_lfsck;
2854         struct lfsck_bookmark    *bk            = &lfsck->li_bookmark_ram;
2855         struct dt_object         *parent        = NULL;
2856         struct linkea_data        ldata_new     = { NULL };
2857         int                       dirent_count  = 0;
2858         int                       rc            = 0;
2859         bool                      once          = true;
2860         ENTRY;
2861
2862 again:
2863         while (ldata->ld_lee != NULL) {
2864                 rc = lfsck_namespace_unpack_linkea_entry(ldata, cname, &tfid,
2865                                                          info->lti_key,
2866                                                          sizeof(info->lti_key));
2867                 /* Drop invalid linkEA entry. */
2868                 if (rc != 0) {
2869                         lfsck_linkea_del_buf(ldata, cname);
2870                         continue;
2871                 }
2872
2873                 /* Drop repeated linkEA entries. */
2874                 lfsck_namespace_filter_linkea_entry(ldata, cname, &tfid, true);
2875
2876                 /* If current dotdot is the .lustre/lost+found/MDTxxxx/,
2877                  * then it is possible that: the directry object has ever
2878                  * been lost, but its name entry was there. In the former
2879                  * LFSCK run, during the first-stage scanning, the LFSCK
2880                  * found the dangling name entry, but it did not recreate
2881                  * the lost object, and when moved to the second-stage
2882                  * scanning, some children objects of the lost directory
2883                  * object were found, then the LFSCK recreated such lost
2884                  * directory object as an orphan.
2885                  *
2886                  * When the LFSCK runs again, if the dangling name is still
2887                  * there, the LFSCK should move the orphan directory object
2888                  * back to the normal namespace. */
2889                 if (!lpf && !fid_is_zero(pfid) &&
2890                     !lu_fid_eq(pfid, &tfid) && once) {
2891                         linkea_next_entry(ldata);
2892                         continue;
2893                 }
2894
2895                 parent = lfsck_object_find_bottom(env, lfsck, &tfid);
2896                 if (IS_ERR(parent)) {
2897                         rc = PTR_ERR(parent);
2898                         /* if @pfid doesn't have a valid OI mapping, it will
2899                          * trigger OI scrub, and -ENONET is is returned if it's
2900                          * remote, -EINPROGRESS if local.
2901                          */
2902                         if ((rc == -ENOENT || rc == -EINPROGRESS) &&
2903                             ldata->ld_leh->leh_reccount > 1) {
2904                                 lfsck_linkea_del_buf(ldata, cname);
2905                                 continue;
2906                         }
2907
2908                         RETURN(rc);
2909                 }
2910
2911                 if (!dt_object_exists(parent)) {
2912                         lfsck_object_put(env, parent);
2913                         if (ldata->ld_leh->leh_reccount > 1) {
2914                                 /* If it is NOT the last linkEA entry, then
2915                                  * there is still other chance to make the
2916                                  * child to be visible via other parent, then
2917                                  * remove this linkEA entry. */
2918                                 lfsck_linkea_del_buf(ldata, cname);
2919                                 continue;
2920                         }
2921
2922                         break;
2923                 }
2924
2925                 /* The linkEA entry with bad parent will be removed. */
2926                 if (unlikely(!dt_try_as_dir(env, parent, true))) {
2927                         lfsck_object_put(env, parent);
2928                         lfsck_linkea_del_buf(ldata, cname);
2929                         continue;
2930                 }
2931
2932                 rc = dt_lookup_dir(env, parent, cname->ln_name, &tfid);
2933                 *pfid2 = *lfsck_dto2fid(parent);
2934                 if (rc == -ENOENT) {
2935                         lfsck_object_put(env, parent);
2936                         linkea_next_entry(ldata);
2937                         continue;
2938                 }
2939
2940                 if (rc != 0) {
2941                         lfsck_object_put(env, parent);
2942
2943                         RETURN(rc);
2944                 }
2945
2946                 if (lu_fid_eq(&tfid, cfid)) {
2947                         lfsck_object_put(env, parent);
2948                         /* If the parent (that is declared via linkEA entry)
2949                          * directory contains the specified child, but such
2950                          * parent does not match the dotdot name entry, then
2951                          * trust the linkEA. */
2952                         if (!fid_is_zero(pfid) && !lu_fid_eq(pfid, pfid2)) {
2953                                 *type = LNIT_UNMATCHED_PAIRS;
2954                                 rc = lfsck_namespace_repair_unmatched_pairs(env,
2955                                                 com, child, pfid2, cname);
2956
2957                                 RETURN(rc);
2958                         }
2959
2960 rebuild:
2961                         /* It is the most common case that we find the
2962                          * name entry corresponding to the linkEA entry
2963                          * that matches the ".." name entry. */
2964                         rc = linkea_links_new(&ldata_new, &info->lti_big_buf,
2965                                               cname, pfid2);
2966                         if (rc != 0)
2967                                 RETURN(rc);
2968
2969                         rc = lfsck_namespace_rebuild_linkea(env, com, child,
2970                                                             &ldata_new);
2971                         if (rc < 0)
2972                                 RETURN(rc);
2973
2974                         lfsck_linkea_del_buf(ldata, cname);
2975                         linkea_first_entry(ldata);
2976                         /* There may be some invalid dangling name entries under
2977                          * other parent directories, remove all of them. */
2978                         while (ldata->ld_lee != NULL) {
2979                                 rc = lfsck_namespace_unpack_linkea_entry(ldata,
2980                                                 cname, &tfid, info->lti_key,
2981                                                 sizeof(info->lti_key));
2982                                 if (rc != 0)
2983                                         goto next;
2984
2985                                 parent = lfsck_object_find_bottom(env, lfsck,
2986                                                                   &tfid);
2987                                 if (IS_ERR(parent)) {
2988                                         rc = PTR_ERR(parent);
2989                                         if (rc != -ENOENT &&
2990                                             bk->lb_param & LPF_FAILOUT)
2991                                                 RETURN(rc);
2992
2993                                         goto next;
2994                                 }
2995
2996                                 if (!dt_object_exists(parent)) {
2997                                         lfsck_object_put(env, parent);
2998                                         goto next;
2999                                 }
3000
3001                                 rc = lfsck_namespace_repair_dirent(env, com,
3002                                         parent, child, cname->ln_name,
3003                                         cname->ln_name, S_IFDIR, false, true);
3004                                 lfsck_object_put(env, parent);
3005                                 if (rc < 0) {
3006                                         if (bk->lb_param & LPF_FAILOUT)
3007                                                 RETURN(rc);
3008
3009                                         goto next;
3010                                 }
3011
3012                                 dirent_count += rc;
3013
3014 next:
3015                                 lfsck_linkea_del_buf(ldata, cname);
3016                         }
3017
3018                         ns->ln_dirent_repaired += dirent_count;
3019
3020                         RETURN(rc);
3021                 } /* lu_fid_eq(&tfid, lfsck_dto2fid(child)) */
3022
3023                 lfsck_ibits_unlock(lh, LCK_EX);
3024                 /* The name entry references another MDT-object that may be
3025                  * created by the LFSCK for repairing dangling name entry.
3026                  * Try to replace it. */
3027                 rc = lfsck_namespace_replace_cond(env, com, parent, child,
3028                                                   &tfid, cname);
3029                 lfsck_object_put(env, parent);
3030                 if (rc < 0)
3031                         RETURN(rc);
3032
3033                 if (rc > 0)
3034                         goto rebuild;
3035
3036                 lfsck_linkea_del_buf(ldata, cname);
3037         } /* while (ldata->ld_lee != NULL) */
3038
3039         /* If there is still linkEA overflow, return. */
3040         if (unlikely(ldata->ld_leh->leh_overflow_time))
3041                 RETURN(0);
3042
3043         linkea_first_entry(ldata);
3044         if (ldata->ld_leh->leh_reccount == 1) {
3045                 rc = lfsck_namespace_dsd_single(env, com, child, pfid, ldata,
3046                                                 lh, type, NULL, unknown);
3047
3048                 RETURN(rc);
3049         }
3050
3051         /* All linkEA entries are invalid and removed, then handle the @child
3052          * as an orphan.*/
3053         if (ldata->ld_leh->leh_reccount == 0) {
3054                 rc = lfsck_namespace_dsd_orphan(env, com, child, pfid, lh,
3055                                                 type);
3056
3057                 RETURN(rc);
3058         }
3059
3060         /* If the dangling name entry for the orphan directory object has
3061          * been remvoed, then just check whether the directory object is
3062          * still under the .lustre/lost+found/MDTxxxx/ or not. */
3063         if (lpf) {
3064                 lpf = false;
3065                 goto again;
3066         }
3067
3068         /* There is no linkEA entry that matches the ".." name entry. Find
3069          * the first linkEA entry that both parent and name entry exist to
3070          * rebuild a new ".." name entry. */
3071         if (once) {
3072                 once = false;
3073                 goto again;
3074         }
3075
3076         RETURN(rc);
3077 }
3078
3079 /**
3080  * Repair the object's nlink attribute.
3081  *
3082  * If all the known name entries have been verified, then the object's hard
3083  * link attribute should match the object's linkEA entries count unless the
3084  * object's has too many hard link to be recorded in the linkEA. Such cases
3085  * should have been marked in the LFSCK trace file. Otherwise, trust the
3086  * linkEA to update the object's nlink attribute.
3087  *
3088  * \param[in] env       pointer to the thread context
3089  * \param[in] com       pointer to the lfsck component
3090  * \param[in] obj       pointer to the dt_object to be handled
3091  * \param[in,out] la    pointer to buffer to object's attribute before
3092  *                      and after the repairing
3093  *
3094  * \retval              positive number for repaired cases
3095  * \retval              0 if nothing to be repaired
3096  * \retval              negative error number on failure
3097  */
3098 static int lfsck_namespace_repair_nlink(const struct lu_env *env,
3099                                         struct lfsck_component *com,
3100                                         struct dt_object *obj,
3101                                         struct lu_attr *la)
3102 {
3103         struct lfsck_namespace          *ns     = com->lc_file_ram;
3104         struct lfsck_instance           *lfsck  = com->lc_lfsck;
3105         struct dt_device                *dev    = lfsck_obj2dev(obj);
3106         const struct lu_fid             *cfid   = lfsck_dto2fid(obj);
3107         struct thandle                  *th     = NULL;
3108         struct linkea_data               ldata  = { NULL };
3109         struct lustre_handle             lh     = { 0 };
3110         __u32                            old    = la->la_nlink;
3111         int                              rc     = 0;
3112         ENTRY;
3113
3114         LASSERT(!dt_object_remote(obj));
3115
3116         if (ns->ln_flags & LF_INCOMPLETE)
3117                 GOTO(log, rc = 0);
3118
3119         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
3120                 GOTO(log, rc = 1);
3121
3122         rc = lfsck_ibits_lock(env, lfsck, obj, &lh,
3123                               MDS_INODELOCK_UPDATE, LCK_PW);
3124         if (rc != 0)
3125                 GOTO(log, rc);
3126
3127         th = lfsck_trans_create(env, dev, lfsck);
3128         if (IS_ERR(th))
3129                 GOTO(log, rc = PTR_ERR(th));
3130
3131         la->la_valid = LA_NLINK;
3132         rc = dt_declare_attr_set(env, obj, la, th);
3133         if (rc != 0)
3134                 GOTO(stop, rc);
3135
3136         rc = dt_trans_start_local(env, dev, th);
3137         if (rc != 0)
3138                 GOTO(stop, rc);
3139
3140         dt_write_lock(env, obj, 0);
3141         /* If the LFSCK is marked as LF_INCOMPLETE, then means some MDT has
3142          * ever tried to verify some remote MDT-object that resides on this
3143          * MDT, but this MDT failed to respond such request. So means there
3144          * may be some remote name entry on other MDT that references this
3145          * object with another name, so we cannot know whether this linkEA
3146          * is valid or not. So keep it there and maybe resolved when next
3147          * LFSCK run. */
3148         rc = dt_attr_get(env, obj, la);
3149         if (rc != 0)
3150                 GOTO(unlock, rc = (rc == -ENOENT ? 0 : rc));
3151
3152         rc = lfsck_links_read2_with_rec(env, obj, &ldata);
3153         if (rc)
3154                 GOTO(unlock, rc = (rc == -ENODATA ? 0 : rc));
3155
3156         /* XXX: Currently, we only update the nlink attribute if the known
3157          *      linkEA entries is larger than the nlink attribute. That is
3158          *      safe action. */
3159         if (la->la_nlink >= ldata.ld_leh->leh_reccount ||
3160             unlikely(la->la_nlink == 0 ||
3161                      ldata.ld_leh->leh_overflow_time))
3162                 GOTO(unlock, rc = 0);
3163
3164         la->la_nlink = ldata.ld_leh->leh_reccount;
3165
3166         rc = dt_attr_set(env, obj, la, th);
3167
3168         GOTO(unlock, rc = (rc == 0 ? 1 : rc));
3169
3170 unlock:
3171         dt_write_unlock(env, obj);
3172
3173 stop:
3174         dt_trans_stop(env, dev, th);
3175
3176 log:
3177         lfsck_ibits_unlock(&lh, LCK_PW);
3178         CDEBUG(D_LFSCK, "%s: namespace LFSCK repaired the object "DFID"'s "
3179                "nlink count from %u to %u: rc = %d\n",
3180                lfsck_lfsck2name(lfsck), PFID(cfid), old, la->la_nlink, rc);
3181
3182         if (rc != 0)
3183                 ns->ln_flags |= LF_INCONSISTENT;
3184
3185         return rc;
3186 }
3187
3188 /**
3189  * Double scan the directory object for namespace LFSCK.
3190  *
3191  * This function will verify the <parent, child> pairs in the namespace tree:
3192  * the parent references the child via some name entry that should be in the
3193  * child's linkEA entry, the child should back references the parent via its
3194  * ".." name entry.
3195  *
3196  * The LFSCK will scan every linkEA entry in turn until find out the first
3197  * matched pairs. If found, then all other linkEA entries will be dropped.
3198  * If all the linkEA entries cannot match the ".." name entry, then there
3199  * are serveral possible cases:
3200  *
3201  * 1) If there is only one linkEA entry, then trust it as long as the PFID
3202  *    in the linkEA entry is valid.
3203  *
3204  * 2) If there are multiple linkEA entries, then try to find the linkEA
3205  *    that matches the ".." name entry. If found, then all other entries
3206  *    are invalid; otherwise, it is quite possible that the ".." name entry
3207  *    is corrupted. Under such case, the LFSCK will rebuild the ".." name
3208  *    entry according to the first valid linkEA entry (both the parent and
3209  *    the name entry should exist).
3210  *
3211  * 3) If the directory object has no (valid) linkEA entry, then the
3212  *    directory object will be handled as pure orphan and inserted
3213  *    in the .lustre/lost+found/MDTxxxx/ with the name:
3214  *    ${self_FID}-${PFID}-D-${conflict_version}
3215  *
3216  * \param[in] env       pointer to the thread context
3217  * \param[in] com       pointer to the lfsck component
3218  * \param[in] child     pointer to the directory object to be handled
3219  * \param[in] flags     to indicate the specical checking on the @child
3220  *
3221  * \retval              positive number for repaired cases
3222  * \retval              0 if nothing to be repaired
3223  * \retval              negative error number on failure
3224  */
3225 static int lfsck_namespace_double_scan_dir(const struct lu_env *env,
3226                                            struct lfsck_component *com,
3227                                            struct dt_object *child, __u8 flags)
3228 {
3229         struct lfsck_thread_info *info          = lfsck_env_info(env);
3230         const struct lu_fid      *cfid          = lfsck_dto2fid(child);
3231         struct lu_fid            *pfid          = &info->lti_fid2;
3232         struct lfsck_namespace   *ns            = com->lc_file_ram;
3233         struct lfsck_instance    *lfsck         = com->lc_lfsck;
3234         struct lustre_handle      lh            = { 0 };
3235         struct linkea_data        ldata         = { NULL };
3236         bool                      unknown       = false;
3237         bool                      lpf           = false;
3238         bool                      retry         = false;
3239         enum lfsck_namespace_inconsistency_type type = LNIT_BAD_LINKEA;
3240         int                       rc            = 0;
3241         ENTRY;
3242
3243         LASSERT(!dt_object_remote(child));
3244
3245         if (flags & LNTF_UNCERTAIN_LMV) {
3246                 if (flags & LNTF_RECHECK_NAME_HASH) {
3247                         rc = lfsck_namespace_scan_shard(env, com, child);
3248                         if (rc < 0)
3249                                 RETURN(rc);
3250
3251                         ns->ln_striped_shards_scanned++;
3252                 } else {
3253                         ns->ln_striped_shards_skipped++;
3254                 }
3255         }
3256
3257         flags &= ~(LNTF_RECHECK_NAME_HASH | LNTF_UNCERTAIN_LMV);
3258         if (flags == 0)
3259                 RETURN(0);
3260
3261         if (flags & (LNTF_CHECK_LINKEA | LNTF_CHECK_PARENT) &&
3262             !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) {
3263                 CDEBUG(D_LFSCK,
3264                        "%s: some MDT(s) maybe NOT take part in the the namespace LFSCK, then the LFSCK cannot guarantee all the name entries have been verified in first-stage scanning. So have to skip orphan related handling for the directory object "DFID" with remote name entry\n",
3265                        lfsck_lfsck2name(lfsck), PFID(cfid));
3266
3267                 RETURN(0);
3268         }
3269
3270         if (unlikely(!dt_try_as_dir(env, child, true)))
3271                 GOTO(out, rc = -ENOTDIR);
3272
3273         /* We only take ldlm lock on the @child when required. When the
3274          * logic comes here for the first time, it is always false. */
3275         if (0) {
3276
3277 lock:
3278                 rc = lfsck_ibits_lock(env, lfsck, child, &lh,
3279                                       MDS_INODELOCK_UPDATE |
3280                                       MDS_INODELOCK_XATTR, LCK_EX);
3281                 if (rc != 0)
3282                         GOTO(out, rc);
3283         }
3284
3285         dt_read_lock(env, child, 0);
3286         if (unlikely(lfsck_is_dead_obj(child))) {
3287                 dt_read_unlock(env, child);
3288
3289                 GOTO(out, rc = 0);
3290         }
3291
3292         rc = dt_lookup_dir(env, child, dotdot, pfid);
3293         if (rc != 0) {
3294                 if (rc != -ENOENT && rc != -ENODATA && rc != -EINVAL) {
3295                         dt_read_unlock(env, child);
3296
3297                         GOTO(out, rc);
3298                 }
3299
3300                 if (!lustre_handle_is_used(&lh)) {
3301                         dt_read_unlock(env, child);
3302                         goto lock;
3303                 }
3304
3305                 fid_zero(pfid);
3306         } else if (lfsck->li_lpf_obj != NULL &&
3307                    lu_fid_eq(pfid, lfsck_dto2fid(lfsck->li_lpf_obj))) {
3308                 lpf = true;
3309         } else if (unlikely(!fid_is_sane(pfid))) {
3310                 fid_zero(pfid);
3311         }
3312
3313         rc = lfsck_links_read(env, child, &ldata);
3314         dt_read_unlock(env, child);
3315         if (rc != 0) {
3316                 if (rc != -ENODATA && rc != -EINVAL)
3317                         GOTO(out, rc);
3318
3319                 if (!lustre_handle_is_used(&lh))
3320                         goto lock;
3321
3322                 if (rc == -EINVAL && !fid_is_zero(pfid)) {
3323                         /* Remove the corrupted linkEA. */
3324                         rc = lfsck_namespace_links_remove(env, com, child);
3325                         if (rc == 0)
3326                                 /* Here, because of the crashed linkEA, we
3327                                  * cannot know whether there is some parent
3328                                  * that references the child directory via
3329                                  * some name entry or not. So keep it there,
3330                                  * when the LFSCK run next time, if there is
3331                                  * some parent that references this object,
3332                                  * then the LFSCK can rebuild the linkEA;
3333                                  * otherwise, this object will be handled
3334                                  * as orphan as above. */
3335                                 unknown = true;
3336                 } else {
3337                         /* 1. If we have neither ".." nor linkEA,
3338                          *    then it is an orphan.
3339                          *
3340                          * 2. If we only have the ".." name entry,
3341                          *    but no parent references this child
3342                          *    directory, then handle it as orphan. */
3343                         lfsck_ibits_unlock(&lh, LCK_EX);
3344                         type = LNIT_MUL_REF;
3345
3346                         /* If the LFSCK is marked as LF_INCOMPLETE,
3347                          * then means some MDT has ever tried to
3348                          * verify some remote MDT-object that resides
3349                          * on this MDT, but this MDT failed to respond
3350                          * such request. So means there may be some
3351                          * remote name entry on other MDT that
3352                          * references this object with another name,
3353                          * so we cannot know whether this linkEA is
3354                          * valid or not. So keep it there and maybe
3355                          * resolved when next LFSCK run. */
3356                         if (ns->ln_flags & LF_INCOMPLETE)
3357                                 GOTO(out, rc = 0);
3358
3359                         snprintf(info->lti_tmpbuf, sizeof(info->lti_tmpbuf),
3360                                  "-"DFID, PFID(pfid));
3361                         rc = lfsck_namespace_insert_orphan(env, com, child,
3362                                                 info->lti_tmpbuf, "D", NULL);
3363                 }
3364
3365                 GOTO(out, rc);
3366         } /* rc != 0 */
3367
3368         linkea_first_entry(&ldata);
3369         /* This is the most common case: the object has unique linkEA entry. */
3370         if (ldata.ld_leh->leh_reccount == 1) {
3371                 rc = lfsck_namespace_dsd_single(env, com, child, pfid, &ldata,
3372                                                 &lh, &type, &retry, &unknown);
3373                 if (retry) {
3374                         LASSERT(!lustre_handle_is_used(&lh));
3375
3376                         retry = false;
3377                         goto lock;
3378                 }
3379
3380                 GOTO(out, rc);
3381         }
3382
3383         if (!lustre_handle_is_used(&lh))
3384                 goto lock;
3385
3386         if (unlikely(ldata.ld_leh->leh_reccount == 0)) {
3387                 rc = lfsck_namespace_dsd_orphan(env, com, child, pfid, &lh,
3388                                                 &type);
3389
3390                 GOTO(out, rc);
3391         }
3392
3393         /* When we come here, the cases usually like that:
3394          * 1) The directory object has a corrupted linkEA entry. During the
3395          *    first-stage scanning, the LFSCK cannot know such corruption,
3396          *    then it appends the right linkEA entry according to the found
3397          *    name entry after the bad one.
3398          *
3399          * 2) The directory object has a right linkEA entry. During the
3400          *    first-stage scanning, the LFSCK finds some bad name entry,
3401          *    but the LFSCK cannot aware that at that time, then it adds
3402          *    the bad linkEA entry for further processing. */
3403         rc = lfsck_namespace_dsd_multiple(env, com, child, pfid, &ldata,
3404                                           &lh, &type, lpf, &unknown);
3405
3406         GOTO(out, rc);
3407
3408 out:
3409         lfsck_ibits_unlock(&lh, LCK_EX);
3410         if (rc > 0) {
3411                 switch (type) {
3412                 case LNIT_BAD_LINKEA:
3413                         ns->ln_linkea_repaired++;
3414                         break;
3415                 case LNIT_UNMATCHED_PAIRS:
3416                         ns->ln_unmatched_pairs_repaired++;
3417                         break;
3418                 case LNIT_MUL_REF:
3419                         ns->ln_mul_ref_repaired++;
3420                         break;
3421                 default:
3422                         break;
3423                 }
3424         }
3425
3426         if (unknown)
3427                 ns->ln_unknown_inconsistency++;
3428
3429         return rc;
3430 }
3431
3432 static inline bool
3433 lfsck_namespace_linkea_stale_overflow(struct linkea_data *ldata,
3434                                       struct lfsck_namespace *ns)
3435 {
3436         /* Both the leh_overflow_time and ln_time_latest_reset are
3437          * local time based, so need NOT to care about clock drift
3438          * among the servers. */
3439         return ldata->ld_leh->leh_overflow_time &&
3440                ldata->ld_leh->leh_overflow_time < ns->ln_time_latest_reset;
3441 }
3442
3443 /**
3444  * Clear the object's linkEA overflow timestamp.
3445  *
3446  * If the MDT-object has too many hard links as to the linkEA cannot hold
3447  * all of them, then overflow timestamp will be set in the linkEA header.
3448  * If some hard links are removed after that, then it is possible to hold
3449  * other missed linkEA entries. If the namespace LFSCK have added all the
3450  * related linkEA entries, then it will remove the overflow timestamp.
3451  *
3452  * \param[in] env       pointer to the thread context
3453  * \param[in] com       pointer to the lfsck component
3454  * \param[in] ldata     pointer to the linkEA data for the given @obj
3455  * \param[in] obj       pointer to the dt_object to be handled
3456  *
3457  * \retval              positive number for repaired cases
3458  * \retval              0 if nothing to be repaired
3459  * \retval              negative error number on failure
3460  */
3461 static int lfsck_namespace_linkea_clear_overflow(const struct lu_env *env,
3462                                                  struct lfsck_component *com,
3463                                                  struct linkea_data *ldata,
3464                                                  struct dt_object *obj)
3465 {
3466         struct lfsck_namespace *ns = com->lc_file_ram;
3467         struct lfsck_instance *lfsck = com->lc_lfsck;
3468         struct dt_device *dev = lfsck_obj2dev(obj);
3469         struct thandle *th = NULL;
3470         struct lustre_handle lh = { 0 };
3471         struct lu_buf linkea_buf;
3472         int rc = 0;
3473         ENTRY;
3474
3475         LASSERT(!dt_object_remote(obj));
3476
3477         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
3478                 GOTO(log, rc = 1);
3479
3480         rc = lfsck_ibits_lock(env, lfsck, obj, &lh,
3481                               MDS_INODELOCK_UPDATE, LCK_PW);
3482         if (rc != 0)
3483                 GOTO(log, rc);
3484
3485         th = lfsck_trans_create(env, dev, lfsck);
3486         if (IS_ERR(th))
3487                 GOTO(log, rc = PTR_ERR(th));
3488
3489         rc = dt_declare_xattr_set(env, obj,
3490                         lfsck_buf_get_const(env, NULL, MAX_LINKEA_SIZE),
3491                         XATTR_NAME_LINK, 0, th);
3492         if (rc != 0)
3493                 GOTO(stop, rc);
3494
3495         rc = dt_trans_start_local(env, dev, th);
3496         if (rc != 0)
3497                 GOTO(stop, rc);
3498
3499         dt_write_lock(env, obj, 0);
3500         rc = lfsck_links_read(env, obj, ldata);
3501         if (rc != 0)
3502                 GOTO(unlock, rc);
3503
3504         if (unlikely(!lfsck_namespace_linkea_stale_overflow(ldata, ns)))
3505                 GOTO(unlock, rc = 0);
3506
3507         ldata->ld_leh->leh_overflow_time = 0;
3508         lfsck_buf_init(&linkea_buf, ldata->ld_buf->lb_buf,
3509                        ldata->ld_leh->leh_len);
3510         rc = dt_xattr_set(env, obj, &linkea_buf, XATTR_NAME_LINK, 0, th);
3511         if (unlikely(rc == -ENOSPC))
3512                 rc = 0;
3513         else if (!rc)
3514                 rc = 1;
3515
3516         GOTO(unlock, rc);
3517
3518 unlock:
3519         dt_write_unlock(env, obj);
3520
3521 stop:
3522         dt_trans_stop(env, dev, th);
3523
3524 log:
3525         lfsck_ibits_unlock(&lh, LCK_PW);
3526         CDEBUG(D_LFSCK, "%s: clear linkea overflow timestamp for the object "
3527                DFID": rc = %d\n",
3528                lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)), rc);
3529
3530         return rc;
3531 }
3532
3533 /**
3534  * Verify the object's agent entry.
3535  *
3536  * If the object claims to have agent entry but the linkEA does not contain
3537  * remote parent, then remove the agent entry. Otherwise, if the object has
3538  * no agent entry but its linkEA contains remote parent, then will generate
3539  * agent entry for it.
3540  *
3541  * \param[in] env       pointer to the thread context
3542  * \param[in] com       pointer to the lfsck component
3543  * \param[in] obj       pointer to the dt_object to be handled
3544  *
3545  * \retval              positive number for repaired cases
3546  * \retval              0 if nothing to be repaired
3547  * \retval              negative error number on failure
3548  */
3549 static int lfsck_namespace_check_agent_entry(const struct lu_env *env,
3550                                              struct lfsck_component *com,
3551                                              struct dt_object *obj)
3552 {
3553         struct linkea_data ldata = { NULL };
3554         struct lfsck_thread_info *info = lfsck_env_info(env);
3555         struct lfsck_namespace *ns = com->lc_file_ram;
3556         struct lfsck_instance *lfsck = com->lc_lfsck;
3557         struct lu_fid *pfid = &info->lti_fid2;
3558         struct lu_name *cname = &info->lti_name;
3559         struct lu_seq_range *range = &info->lti_range;
3560         struct seq_server_site *ss = lfsck_dev_site(lfsck);
3561         __u32 idx = lfsck_dev_idx(lfsck);
3562         int rc;
3563         bool remote = false;
3564         ENTRY;
3565
3566         if (!(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT))
3567                 RETURN(0);
3568
3569         rc = lfsck_links_read_with_rec(env, obj, &ldata);
3570         if (rc == -ENOENT || rc == -ENODATA)
3571                 RETURN(0);
3572
3573         if (rc && rc != -EINVAL)
3574                 GOTO(out, rc);
3575
3576         /* We check the agent entry again after verifying the linkEA
3577          * successfully. So invalid linkEA should be dryrun mode. */
3578         if (rc == -EINVAL || unlikely(!ldata.ld_leh->leh_reccount))
3579                 RETURN(0);
3580
3581         linkea_first_entry(&ldata);
3582         while (ldata.ld_lee != NULL && !remote) {
3583                 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
3584                                     cname, pfid);
3585                 if (!linkea_entry_is_valid(&ldata, cname, pfid))
3586                         GOTO(out, rc = 0);
3587
3588                 fld_range_set_mdt(range);
3589                 rc = fld_server_lookup(env, ss->ss_server_fld,
3590                                        fid_seq(pfid), range);
3591                 if (rc)
3592                         GOTO(out, rc = (rc == -ENOENT ? 0 : rc));
3593
3594                 if (range->lsr_index != idx)
3595                         remote = true;
3596                 else
3597                         linkea_next_entry(&ldata);
3598         }
3599
3600         if ((lu_object_has_agent_entry(&obj->do_lu) && !remote) ||
3601             (!lu_object_has_agent_entry(&obj->do_lu) && remote)) {
3602                 struct dt_device *dev = lfsck_obj2dev(obj);
3603                 struct linkea_data ldata2 = { NULL };
3604                 struct lustre_handle lh = { 0 };
3605                 struct lu_buf linkea_buf;
3606                 struct thandle *handle;
3607
3608                 if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
3609                         GOTO(out, rc = 1);
3610
3611                 rc = lfsck_ibits_lock(env, lfsck, obj, &lh,
3612                                       MDS_INODELOCK_UPDATE |
3613                                       MDS_INODELOCK_XATTR, LCK_EX);
3614                 if (rc)
3615                         GOTO(out, rc);
3616
3617                 handle = lfsck_trans_create(env, dev, lfsck);
3618                 if (IS_ERR(handle))
3619                         GOTO(unlock, rc = PTR_ERR(handle));
3620
3621                 lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
3622                                ldata.ld_leh->leh_len);
3623                 rc = dt_declare_xattr_set(env, obj, &linkea_buf,
3624                                 XATTR_NAME_LINK, LU_XATTR_REPLACE, handle);
3625                 if (rc)
3626                         GOTO(stop, rc);
3627
3628                 rc = dt_trans_start_local(env, dev, handle);
3629                 if (rc)
3630                         GOTO(stop, rc);
3631
3632                 dt_write_lock(env, obj, 0);
3633                 rc = lfsck_links_read2_with_rec(env, obj, &ldata2);
3634                 if (rc) {
3635                         if (rc == -ENOENT || rc == -ENODATA)
3636                                 rc = 0;
3637                         GOTO(unlock2, rc);
3638                 }
3639
3640                 /* If someone changed linkEA by race, then the agent
3641                  * entry will be updated by lower layer automatically. */
3642                 if (ldata.ld_leh->leh_len != ldata2.ld_leh->leh_len ||
3643                     memcmp(ldata.ld_buf->lb_buf, ldata2.ld_buf->lb_buf,
3644                            ldata.ld_leh->leh_len) != 0)
3645                         GOTO(unlock2, rc = 0);
3646
3647                 rc = dt_xattr_set(env, obj, &linkea_buf, XATTR_NAME_LINK,
3648                                   LU_XATTR_REPLACE, handle);
3649                 if (!rc)
3650                         rc = 1;
3651
3652                 GOTO(unlock2, rc);
3653
3654 unlock2:
3655                 dt_write_unlock(env, obj);
3656 stop:
3657                 dt_trans_stop(env, dev, handle);
3658 unlock:
3659                 lfsck_ibits_unlock(&lh, LCK_EX);
3660         }
3661
3662         GOTO(out, rc);
3663
3664 out:
3665         if (rc > 0)
3666                 ns->ln_agent_entries_repaired++;
3667         if (rc)
3668                 CDEBUG(D_LFSCK, "%s: repair agent entry for "DFID": rc = %d\n",
3669                        lfsck_lfsck2name(lfsck), PFID(lfsck_dto2fid(obj)), rc);
3670         return rc;
3671 }
3672
3673 /**
3674  * Double scan the MDT-object for namespace LFSCK.
3675  *
3676  * If the MDT-object contains invalid or repeated linkEA entries, then drop
3677  * those entries from the linkEA; if the linkEA becomes empty or the object
3678  * has no linkEA, then it is an orphan and will be added into the directory
3679  * .lustre/lost+found/MDTxxxx/; if the remote parent is lost, then recreate
3680  * the remote parent; if the name entry corresponding to some linkEA entry
3681  * is lost, then add the name entry back to the namespace.
3682  *
3683  * \param[in] env       pointer to the thread context
3684  * \param[in] com       pointer to the lfsck component
3685  * \param[in] child     pointer to the dt_object to be handled
3686  * \param[in] flags     some hints to indicate how the @child should be handled
3687  *
3688  * \retval              positive number for repaired cases
3689  * \retval              0 if nothing to be repaired
3690  * \retval              negative error number on failure
3691  */
3692 static int lfsck_namespace_double_scan_one(const struct lu_env *env,
3693                                            struct lfsck_component *com,
3694                                            struct dt_object *child, __u8 flags)
3695 {
3696         struct lfsck_thread_info *info     = lfsck_env_info(env);
3697         struct lu_attr           *la       = &info->lti_la;
3698         struct lu_name           *cname    = &info->lti_name;
3699         struct lu_fid            *pfid     = &info->lti_fid;
3700         struct lu_fid            *cfid     = &info->lti_fid2;
3701         struct lfsck_instance    *lfsck    = com->lc_lfsck;
3702         struct lfsck_namespace   *ns       = com->lc_file_ram;
3703         struct dt_object         *parent   = NULL;
3704         struct linkea_data        ldata    = { NULL };
3705         bool                      repaired = false;
3706         int                       count    = 0;
3707         int                       rc;
3708         ENTRY;
3709
3710         dt_read_lock(env, child, 0);
3711         if (unlikely(lfsck_is_dead_obj(child))) {
3712                 dt_read_unlock(env, child);
3713
3714                 RETURN(0);
3715         }
3716
3717         if (S_ISDIR(lfsck_object_type(child))) {
3718                 dt_read_unlock(env, child);
3719                 rc = lfsck_namespace_double_scan_dir(env, com, child, flags);
3720                 if (!rc && flags & LNTF_CHECK_AGENT_ENTRY)
3721                         rc = lfsck_namespace_check_agent_entry(env, com, child);
3722
3723                 RETURN(rc);
3724         }
3725
3726         rc = lfsck_links_read(env, child, &ldata);
3727         dt_read_unlock(env, child);
3728
3729         if (rc == -EINVAL) {
3730                 struct lustre_handle lh = { 0 };
3731
3732                 rc = lfsck_ibits_lock(env, com->lc_lfsck, child, &lh,
3733                                       MDS_INODELOCK_UPDATE |
3734                                       MDS_INODELOCK_XATTR, LCK_EX);
3735                 if (rc == 0) {
3736                         rc = lfsck_namespace_links_remove(env, com, child);
3737                         lfsck_ibits_unlock(&lh, LCK_EX);
3738                 }
3739
3740                 GOTO(out, rc);
3741         }
3742
3743         if (rc != 0)
3744                 GOTO(out, rc);
3745
3746         if (!(ns->ln_flags & LF_INCOMPLETE) &&
3747             unlikely(lfsck_namespace_linkea_stale_overflow(&ldata, ns))) {
3748                 rc = lfsck_namespace_linkea_clear_overflow(env, com, &ldata,
3749                                                            child);
3750                 if (rc < 0)
3751                         GOTO(out, rc);
3752
3753                 if (rc > 0)
3754                         ns->ln_linkea_overflow_cleared++;
3755         }
3756
3757         linkea_first_entry(&ldata);
3758         while (ldata.ld_lee != NULL) {
3759                 rc = lfsck_namespace_unpack_linkea_entry(&ldata, cname, pfid,
3760                                                          info->lti_key,
3761                                                          sizeof(info->lti_key));
3762                 /* Invalid PFID in the linkEA entry. */
3763                 if (rc != 0) {
3764                         rc = lfsck_namespace_shrink_linkea(env, com, child,
3765                                                 &ldata, cname, pfid, true);
3766                         if (rc < 0)
3767                                 GOTO(out, rc);
3768
3769                         if (rc > 0)
3770                                 repaired = true;
3771
3772                         continue;
3773                 }
3774
3775                 rc = lfsck_namespace_filter_linkea_entry(&ldata, cname, pfid,
3776                                                          false);
3777                 /* Found repeated linkEA entries */
3778                 if (rc > 0) {
3779                         rc = lfsck_namespace_shrink_linkea(env, com, child,
3780                                                 &ldata, cname, pfid, false);
3781                         if (rc < 0)
3782                                 GOTO(out, rc);
3783
3784                         if (rc == 0)
3785                                 continue;
3786
3787                         repaired = true;
3788
3789                         /* fallthrough */
3790                 }
3791
3792                 parent = lfsck_object_find_bottom(env, lfsck, pfid);
3793                 if (IS_ERR(parent)) {
3794                         rc = PTR_ERR(parent);
3795                         /* if @pfid doesn't have a valid OI mapping, it will
3796                          * trigger OI scrub, and -ENONET is is returned if it's
3797                          * remote, -EINPROGRESS if local.
3798                          */
3799                         if ((rc == -ENOENT || rc == -EINPROGRESS) &&
3800                             ldata.ld_leh->leh_reccount > 1)
3801                                 rc = lfsck_namespace_shrink_linkea(env, com,
3802                                         child, &ldata, cname, pfid, true);
3803                         GOTO(out, rc);
3804                 }
3805
3806                 if (!dt_object_exists(parent)) {
3807
3808 lost_parent:
3809                         if (ldata.ld_leh->leh_reccount > 1) {
3810                                 /* If it is NOT the last linkEA entry, then
3811                                  * there is still other chance to make the
3812                                  * child to be visible via other parent, then
3813                                  * remove this linkEA entry. */
3814                                 rc = lfsck_namespace_shrink_linkea(env, com,
3815                                         child, &ldata, cname, pfid, true);
3816                         } else {
3817                                 /* If the LFSCK is marked as LF_INCOMPLETE,
3818                                  * then means some MDT has ever tried to
3819                                  * verify some remote MDT-object that resides
3820                                  * on this MDT, but this MDT failed to respond
3821                                  * such request. So means there may be some
3822                                  * remote name entry on other MDT that
3823                                  * references this object with another name,
3824                                  * so we cannot know whether this linkEA is
3825                                  * valid or not. So keep it there and maybe
3826                                  * resolved when next LFSCK run. */
3827                                 if (ns->ln_flags & LF_INCOMPLETE) {
3828                                         lfsck_object_put(env, parent);
3829
3830                                         GOTO(out, rc = 0);
3831                                 }
3832
3833                                 /* Create the lost parent as an orphan. */
3834                                 rc = lfsck_namespace_create_orphan_dir(env, com,
3835                                                                 parent, NULL);
3836                                 if (rc < 0) {
3837                                         lfsck_object_put(env, parent);
3838
3839                                         GOTO(out, rc);
3840                                 }
3841
3842                                 if (rc > 0)
3843                                         repaired = true;
3844
3845                                 /* Add the missing name entry to the parent. */
3846                                 rc = lfsck_namespace_insert_normal(env, com,
3847                                                         parent, child, cname);
3848                                 if (unlikely(rc == -EEXIST))
3849                                         /* Unfortunately, someone reused the
3850                                          * name under the parent by race. So we
3851                                          * have to remove the linkEA entry from
3852                                          * current child object. It means that
3853                                          * the LFSCK cannot recover the system
3854                                          * totally back to its original status,
3855                                          * but it is necessary to make the
3856                                          * current system to be consistent. */
3857                                         rc = lfsck_namespace_shrink_linkea(env,
3858                                                         com, child, &ldata,
3859                                                         cname, pfid, true);
3860                                 else
3861                                         linkea_next_entry(&ldata);
3862                         }
3863
3864                         lfsck_object_put(env, parent);
3865                         if (rc < 0)
3866                                 GOTO(out, rc);
3867
3868                         if (rc > 0)
3869                                 repaired = true;
3870
3871                         continue;
3872                 } /* !dt_object_exists(parent) */
3873
3874                 /* The linkEA entry with bad parent will be removed. */
3875                 if (unlikely(!dt_try_as_dir(env, parent, true))) {
3876                         lfsck_object_put(env, parent);
3877                         rc = lfsck_namespace_shrink_linkea(env, com, child,
3878                                                 &ldata, cname, pfid, true);
3879                         if (rc < 0)
3880                                 GOTO(out, rc);
3881
3882                         if (rc > 0)
3883                                 repaired = true;
3884
3885                         continue;
3886                 }
3887
3888                 rc = dt_lookup_dir(env, parent, cname->ln_name, cfid);
3889                 if (rc != 0 && rc != -ENOENT) {
3890                         lfsck_object_put(env, parent);
3891
3892                         GOTO(out, rc);
3893                 }
3894
3895                 if (rc == 0) {
3896                         if (lu_fid_eq(cfid, lfsck_dto2fid(child))) {
3897                                 /* It is the most common case that we
3898                                  * find the name entry corresponding
3899                                  * to the linkEA entry. */
3900                                 lfsck_object_put(env, parent);
3901                                 linkea_next_entry(&ldata);
3902                         } else {
3903                                 /* The name entry references another
3904                                  * MDT-object that may be created by
3905                                  * the LFSCK for repairing dangling
3906                                  * name entry. Try to replace it. */
3907                                 rc = lfsck_namespace_replace_cond(env, com,
3908                                                 parent, child, cfid, cname);
3909                                 lfsck_object_put(env, parent);
3910                                 if (rc < 0)
3911                                         GOTO(out, rc);
3912
3913                                 if (rc > 0) {
3914                                         repaired = true;
3915                                         linkea_next_entry(&ldata);
3916                                 } else {
3917                                         rc = lfsck_namespace_shrink_linkea(env,
3918                                                         com, child, &ldata,
3919                                                         cname, pfid, true);
3920                                         if (rc < 0)
3921                                                 GOTO(out, rc);
3922
3923                                         if (rc > 0)
3924                                                 repaired = true;
3925                                 }
3926                         }
3927
3928                         continue;
3929                 }
3930
3931                 /* The following handles -ENOENT case */
3932
3933                 rc = dt_attr_get(env, child, la);
3934                 if (rc != 0)
3935                         GOTO(out, rc);
3936
3937                 /* If there is no name entry in the parent dir and the object
3938                  * link count is fewer than the linkea entries count, then the
3939                  * linkea entry should be removed. */
3940                 if (ldata.ld_leh->leh_reccount > la->la_nlink) {
3941                         rc = lfsck_namespace_shrink_linkea_cond(env, com,
3942                                         parent, child, &ldata, cname, pfid);
3943                         lfsck_object_put(env, parent);
3944                         if (rc < 0)
3945                                 GOTO(out, rc);
3946
3947                         if (rc > 0)
3948                                 repaired = true;
3949
3950                         continue;
3951                 }
3952
3953                 /* If the LFSCK is marked as LF_INCOMPLETE, then means some
3954                  * MDT has ever tried to verify some remote MDT-object that
3955                  * resides on this MDT, but this MDT failed to respond such
3956                  * request. So means there may be some remote name entry on
3957                  * other MDT that references this object with another name,
3958                  * so we cannot know whether this linkEA is valid or not.
3959                  * So keep it there and maybe resolved when next LFSCK run. */
3960                 if (ns->ln_flags & LF_INCOMPLETE) {
3961                         lfsck_object_put(env, parent);
3962
3963                         GOTO(out, rc = 0);
3964                 }
3965
3966                 rc = lfsck_namespace_check_name(env, lfsck, parent, child,
3967                                                 cname);
3968                 if (rc == -ENOENT)
3969                         goto lost_parent;
3970
3971                 if (rc < 0) {
3972                         lfsck_object_put(env, parent);
3973
3974                         GOTO(out, rc);
3975                 }
3976
3977                 /* It is an invalid name entry, drop it. */
3978                 if (unlikely(rc > 0)) {
3979                         lfsck_object_put(env, parent);
3980                         rc = lfsck_namespace_shrink_linkea(env, com, child,
3981                                                 &ldata, cname, pfid, true);
3982                         if (rc < 0)
3983                                 GOTO(out, rc);
3984
3985                         if (rc > 0)
3986                                 repaired = true;
3987
3988                         continue;
3989                 }
3990
3991                 /* Add the missing name entry back to the namespace. */
3992                 rc = lfsck_namespace_insert_normal(env, com, parent, child,
3993                                                    cname);
3994                 if (unlikely(rc == -ESTALE))
3995                         /* It may happen when the remote object has been
3996                          * removed, but the local MDT is not aware of that. */
3997                         goto lost_parent;
3998
3999                 if (unlikely(rc == -EEXIST))
4000                         /* Unfortunately, someone reused the name under the
4001                          * parent by race. So we have to remove the linkEA
4002                          * entry from current child object. It means that the
4003                          * LFSCK cannot recover the system totally back to
4004                          * its original status, but it is necessary to make
4005                          * the current system to be consistent.
4006                          *
4007                          * It also may be because of the LFSCK found some
4008                          * internal status of create operation. Under such
4009                          * case, nothing to be done. */
4010                         rc = lfsck_namespace_shrink_linkea_cond(env, com,
4011                                         parent, child, &ldata, cname, pfid);
4012                 else
4013                         linkea_next_entry(&ldata);
4014
4015                 lfsck_object_put(env, parent);
4016                 if (rc < 0)
4017                         GOTO(out, rc);
4018
4019                 if (rc > 0)
4020                         repaired = true;
4021         }
4022
4023         GOTO(out, rc = 0);
4024
4025 out:
4026         if (rc < 0 && rc != -ENODATA)
4027                 return rc;
4028
4029         if (rc == 0 && ldata.ld_leh != NULL)
4030                 count = ldata.ld_leh->leh_reccount;
4031
4032         if (count == 0) {
4033                 /* If the LFSCK is marked as LF_INCOMPLETE, then means some
4034                  * MDT has ever tried to verify some remote MDT-object that
4035                  * resides on this MDT, but this MDT failed to respond such
4036                  * request. So means there may be some remote name entry on
4037                  * other MDT that references this object with another name,
4038                  * so we cannot know whether this linkEA is valid or not.
4039                  * So keep it there and maybe resolved when next LFSCK run. */
4040                 if (!(ns->ln_flags & LF_INCOMPLETE) &&
4041                     (ldata.ld_leh == NULL ||
4042                      !ldata.ld_leh->leh_overflow_time)) {
4043                         /* If the child becomes orphan, then insert it into
4044                          * the global .lustre/lost+found/MDTxxxx directory. */
4045                         rc = lfsck_namespace_insert_orphan(env, com, child,
4046                                                            "", "O", &count);
4047                         if (rc < 0)
4048                                 return rc;
4049
4050                         if (rc > 0) {
4051                                 ns->ln_mul_ref_repaired++;
4052                                 repaired = true;
4053                         }
4054                 }
4055         } else {
4056                 rc = dt_attr_get(env, child, la);
4057                 if (rc != 0)
4058                         return rc;
4059
4060                 if (la->la_nlink != 0 && la->la_nlink != count) {
4061                         if (unlikely(!S_ISREG(lfsck_object_type(child)) &&
4062                                      !S_ISLNK(lfsck_object_type(child)))) {
4063                                 CDEBUG(D_LFSCK, "%s: namespace LFSCK finds "
4064                                        "the object "DFID"'s nlink count %d "
4065                                        "does not match linkEA count %d, "
4066                                        "type %o, skip it.\n",
4067                                        lfsck_lfsck2name(lfsck),
4068                                        PFID(lfsck_dto2fid(child)),
4069                                        la->la_nlink, count,
4070                                        lfsck_object_type(child));
4071                         } else if (la->la_nlink < count &&
4072                                    likely(!ldata.ld_leh->leh_overflow_time)) {
4073                                 rc = lfsck_namespace_repair_nlink(env, com,
4074                                                                   child, la);
4075                                 if (rc > 0) {
4076                                         ns->ln_objs_nlink_repaired++;
4077                                         rc = 0;
4078                                 }
4079                         }
4080                 }
4081         }
4082
4083         if (repaired) {
4084                 if (la->la_nlink > 1)
4085                         ns->ln_mul_linked_repaired++;
4086
4087                 if (rc == 0)
4088                         rc = 1;
4089         }
4090
4091         if (!rc && flags & LNTF_CHECK_AGENT_ENTRY)
4092                 rc = lfsck_namespace_check_agent_entry(env, com, child);
4093
4094         return rc;
4095 }
4096
4097 static void lfsck_namespace_dump_statistics(struct seq_file *m,
4098                                             struct lfsck_namespace *ns,
4099                                             __u64 checked_phase1,
4100                                             __u64 checked_phase2,
4101                                             time64_t time_phase1,
4102                                             time64_t time_phase2, bool dryrun)
4103 {
4104         const char *postfix = dryrun ? "inconsistent" : "repaired";
4105
4106         seq_printf(m, "checked_phase1: %llu\n"
4107                    "checked_phase2: %llu\n"
4108                    "%s_phase1: %llu\n"
4109                    "%s_phase2: %llu\n"
4110                    "failed_phase1: %llu\n"
4111                    "failed_phase2: %llu\n"
4112                    "directories: %llu\n"
4113                    "dirent_%s: %llu\n"
4114                    "linkea_%s: %llu\n"
4115                    "nlinks_%s: %llu\n"
4116                    "multiple_linked_checked: %llu\n"
4117                    "multiple_linked_%s: %llu\n"
4118                    "unknown_inconsistency: %llu\n"
4119                    "unmatched_pairs_%s: %llu\n"
4120                    "dangling_%s: %llu\n"
4121                    "multiple_referenced_%s: %llu\n"
4122                    "bad_file_type_%s: %llu\n"
4123                    "lost_dirent_%s: %llu\n"
4124                    "local_lost_found_scanned: %llu\n"
4125                    "local_lost_found_moved: %llu\n"
4126                    "local_lost_found_skipped: %llu\n"
4127                    "local_lost_found_failed: %llu\n"
4128                    "striped_dirs_scanned: %llu\n"
4129                    "striped_dirs_%s: %llu\n"
4130                    "striped_dirs_failed: %llu\n"
4131                    "striped_dirs_disabled: %llu\n"
4132                    "striped_dirs_skipped: %llu\n"
4133                    "striped_shards_scanned: %llu\n"
4134                    "striped_shards_%s: %llu\n"
4135                    "striped_shards_failed: %llu\n"
4136                    "striped_shards_skipped: %llu\n"
4137                    "name_hash_%s: %llu\n"
4138                    "linkea_overflow_%s: %llu\n"
4139                    "agent_entries_%s: %llu\n"
4140                    "success_count: %u\n"
4141                    "run_time_phase1: %lld seconds\n"
4142                    "run_time_phase2: %lld seconds\n",
4143                    checked_phase1,
4144                    checked_phase2,
4145                    dryrun ? "inconsistent" : "updated",
4146                    ns->ln_items_repaired,
4147                    dryrun ? "inconsistent" : "updated",
4148                    ns->ln_objs_repaired_phase2,
4149                    ns->ln_items_failed,
4150                    ns->ln_objs_failed_phase2,
4151                    ns->ln_dirs_checked,
4152                    postfix, ns->ln_dirent_repaired,
4153                    postfix, ns->ln_linkea_repaired,
4154                    postfix, ns->ln_objs_nlink_repaired,
4155                    ns->ln_mul_linked_checked,
4156                    postfix, ns->ln_mul_linked_repaired,
4157                    ns->ln_unknown_inconsistency,
4158                    postfix, ns->ln_unmatched_pairs_repaired,
4159                    postfix, ns->ln_dangling_repaired,
4160                    postfix, ns->ln_mul_ref_repaired,
4161                    postfix, ns->ln_bad_type_repaired,
4162                    postfix, ns->ln_lost_dirent_repaired,
4163                    ns->ln_local_lpf_scanned,
4164                    ns->ln_local_lpf_moved,
4165                    ns->ln_local_lpf_skipped,
4166                    ns->ln_local_lpf_failed,
4167                    ns->ln_striped_dirs_scanned,
4168                    postfix, ns->ln_striped_dirs_repaired,
4169                    ns->ln_striped_dirs_failed,
4170                    ns->ln_striped_dirs_disabled,
4171                    ns->ln_striped_dirs_skipped,
4172                    ns->ln_striped_shards_scanned,
4173                    postfix, ns->ln_striped_shards_repaired,
4174                    ns->ln_striped_shards_failed,
4175                    ns->ln_striped_shards_skipped,
4176                    postfix, ns->ln_name_hash_repaired,
4177                    dryrun ? "inconsistent" : "cleared",
4178                    ns->ln_linkea_overflow_cleared,
4179                    postfix, ns->ln_agent_entries_repaired,
4180                    ns->ln_success_count,
4181                    time_phase1,
4182                    time_phase2);
4183 }
4184
4185 static void lfsck_namespace_release_lmv(const struct lu_env *env,
4186                                         struct lfsck_component *com)
4187 {
4188         struct lfsck_instance           *lfsck  = com->lc_lfsck;
4189         struct lfsck_namespace          *ns     = com->lc_file_ram;
4190
4191         while (!list_empty(&lfsck->li_list_lmv)) {
4192                 struct lfsck_lmv_unit   *llu;
4193                 struct lfsck_lmv        *llmv;
4194
4195                 llu = list_first_entry(&lfsck->li_list_lmv,
4196                                        struct lfsck_lmv_unit, llu_link);
4197                 llmv = &llu->llu_lmv;
4198
4199                 LASSERTF(atomic_read(&llmv->ll_ref) == 1,
4200                          "still in using: %u\n",
4201                          atomic_read(&llmv->ll_ref));
4202
4203                 ns->ln_striped_dirs_skipped++;
4204                 lfsck_lmv_put(env, llmv);
4205         }
4206 }
4207
4208 static int lfsck_namespace_check_for_double_scan(const struct lu_env *env,
4209                                                  struct lfsck_component *com,
4210                                                  struct dt_object *obj)
4211 {
4212         struct lu_attr *la = &lfsck_env_info(env)->lti_la;
4213         int             rc;
4214
4215         rc = dt_attr_get(env, obj, la);
4216         if (rc != 0)
4217                 return rc;
4218
4219         /* zero-linkEA object may be orphan, but it also maybe because
4220          * of upgrading. Currently, we cannot record it for double scan.
4221          * Because it may cause the LFSCK trace file to be too large. */
4222
4223         /* "la_ctime" == 1 means that it has ever been removed from
4224          * backend /lost+found directory but not been added back to
4225          * the normal namespace yet. */
4226
4227         if ((S_ISREG(lfsck_object_type(obj)) && la->la_nlink > 1) ||
4228             unlikely(la->la_ctime == 1))
4229                 rc = lfsck_namespace_trace_update(env, com, lfsck_dto2fid(obj),
4230                                                   LNTF_CHECK_LINKEA, true);
4231
4232         return rc;
4233 }
4234
4235 /* namespace APIs */
4236
4237 static int lfsck_namespace_reset(const struct lu_env *env,
4238                                  struct lfsck_component *com, bool init)
4239 {
4240         struct lfsck_instance           *lfsck  = com->lc_lfsck;
4241         struct lfsck_namespace          *ns     = com->lc_file_ram;
4242         struct lfsck_assistant_data     *lad    = com->lc_data;
4243         struct dt_object                *root;
4244         int                              rc;
4245         ENTRY;
4246
4247         root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
4248         if (IS_ERR(root))
4249                 GOTO(log, rc = PTR_ERR(root));
4250
4251         if (unlikely(!dt_try_as_dir(env, root, true)))
4252                 GOTO(put, rc = -ENOTDIR);
4253
4254         down_write(&com->lc_sem);
4255         if (init) {
4256                 memset(ns, 0, sizeof(*ns));
4257         } else {
4258                 __u32 count = ns->ln_success_count;
4259                 time64_t last_time = ns->ln_time_last_complete;
4260
4261                 memset(ns, 0, sizeof(*ns));
4262                 ns->ln_success_count = count;
4263                 ns->ln_time_last_complete = last_time;
4264         }
4265         ns->ln_magic = LFSCK_NAMESPACE_MAGIC;
4266         ns->ln_status = LS_INIT;
4267         ns->ln_time_latest_reset = ktime_get_real_seconds();
4268
4269         rc = lfsck_load_one_trace_file(env, com, root, &com->lc_obj,
4270                                        &dt_lfsck_namespace_features,
4271                                        LFSCK_NAMESPACE, true);
4272         if (rc)
4273                 GOTO(out, rc);
4274
4275         rc = lfsck_load_sub_trace_files(env, com, &dt_lfsck_namespace_features,
4276                                         LFSCK_NAMESPACE, true);
4277         if (rc != 0)
4278                 GOTO(out, rc);
4279
4280         clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
4281         bitmap_zero(lad->lad_bitmap, lad->lad_bitmap_count);
4282
4283         rc = lfsck_namespace_store(env, com);
4284
4285         GOTO(out, rc);
4286
4287 out:
4288         up_write(&com->lc_sem);
4289
4290 put:
4291         lfsck_object_put(env, root);
4292 log:
4293         CDEBUG(D_LFSCK, "%s: namespace LFSCK reset: rc = %d\n",
4294                lfsck_lfsck2name(lfsck), rc);
4295         return rc;
4296 }
4297
4298 static void
4299 lfsck_namespace_fail(const struct lu_env *env, struct lfsck_component *com,
4300                      bool new_checked)
4301 {
4302         struct lfsck_namespace *ns = com->lc_file_ram;
4303
4304         down_write(&com->lc_sem);
4305         if (new_checked)
4306                 com->lc_new_checked++;
4307         lfsck_namespace_record_failure(env, com->lc_lfsck, ns);
4308         up_write(&com->lc_sem);
4309 }
4310
4311 static void lfsck_namespace_close_dir(const struct lu_env *env,
4312                                       struct lfsck_component *com)
4313 {
4314         struct lfsck_namespace          *ns     = com->lc_file_ram;
4315         struct lfsck_assistant_data     *lad    = com->lc_data;
4316         struct lfsck_assistant_object   *lso    = NULL;
4317         struct lfsck_instance           *lfsck  = com->lc_lfsck;
4318         struct lfsck_lmv                *llmv   = lfsck->li_lmv;
4319         struct lfsck_namespace_req      *lnr;
4320         struct lu_attr *la = &lfsck_env_info(env)->lti_la2;
4321         __u32 size = sizeof(*lnr) + LFSCK_TMPBUF_LEN;
4322         int rc;
4323         bool wakeup = false;
4324         ENTRY;
4325
4326         if (llmv == NULL)
4327                 RETURN_EXIT;
4328
4329         rc = dt_attr_get(env, lfsck->li_obj_dir, la);
4330         if (rc)
4331                 RETURN_EXIT;
4332
4333         OBD_ALLOC(lnr, size);
4334         if (lnr == NULL) {
4335                 ns->ln_striped_dirs_skipped++;
4336
4337                 RETURN_EXIT;
4338         }
4339
4340         lso = lfsck_assistant_object_init(env, lfsck_dto2fid(lfsck->li_obj_dir),
4341                         la, lfsck->li_pos_current.lp_oit_cookie, true);
4342         if (IS_ERR(lso)) {
4343                 OBD_FREE(lnr, size);
4344                 ns->ln_striped_dirs_skipped++;
4345
4346                 RETURN_EXIT;
4347         }
4348
4349         /* Generate a dummy request to indicate that all shards' name entry
4350          * in this striped directory has been scanned for the first time. */
4351         INIT_LIST_HEAD(&lnr->lnr_lar.lar_list);
4352         lnr->lnr_lar.lar_parent = lso;
4353         lnr->lnr_lmv = lfsck_lmv_get(llmv);
4354         lnr->lnr_fid = *lfsck_dto2fid(lfsck->li_obj_dir);
4355         lnr->lnr_dir_cookie = MDS_DIR_END_OFF;
4356         lnr->lnr_size = size;
4357         lnr->lnr_type = lso->lso_attr.la_mode;
4358
4359         spin_lock(&lad->lad_lock);
4360         if (lad->lad_assistant_status < 0 ||
4361             unlikely(!thread_is_running(&lfsck->li_thread) ||
4362                      !thread_is_running(&lad->lad_thread))) {
4363                 spin_unlock(&lad->lad_lock);
4364                 lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar);
4365                 ns->ln_striped_dirs_skipped++;
4366
4367                 RETURN_EXIT;
4368         }
4369
4370         list_add_tail(&lnr->lnr_lar.lar_list, &lad->lad_req_list);
4371         if (lad->lad_prefetched == 0)
4372                 wakeup = true;
4373
4374         lad->lad_prefetched++;
4375         spin_unlock(&lad->lad_lock);
4376         if (wakeup)
4377                 wake_up(&lad->lad_thread.t_ctl_waitq);
4378
4379         EXIT;
4380 }
4381
4382 static int lfsck_namespace_open_dir(const struct lu_env *env,
4383                                     struct lfsck_component *com)
4384 {
4385         struct lfsck_instance   *lfsck  = com->lc_lfsck;
4386         struct lfsck_namespace  *ns     = com->lc_file_ram;
4387         struct lfsck_lmv        *llmv   = lfsck->li_lmv;
4388         int                      rc     = 0;
4389         ENTRY;
4390
4391         if (llmv == NULL)
4392                 RETURN(0);
4393
4394         if (llmv->ll_lmv_master) {
4395                 struct lmv_mds_md_v1 *lmv = &llmv->ll_lmv;
4396
4397                 if (lmv->lmv_master_mdt_index != lfsck_dev_idx(lfsck)) {
4398                         lmv->lmv_master_mdt_index =
4399                                 lfsck_dev_idx(lfsck);
4400                         ns->ln_flags |= LF_INCONSISTENT;
4401                         llmv->ll_lmv_updated = 1;
4402                 }
4403         } else {
4404                 rc = lfsck_namespace_verify_stripe_slave(env, com,
4405                                         lfsck->li_obj_dir, llmv);
4406         }
4407
4408         RETURN(rc > 0 ? 0 : rc);
4409 }
4410
4411 static int lfsck_namespace_checkpoint(const struct lu_env *env,
4412                                       struct lfsck_component *com, bool init)
4413 {
4414         struct lfsck_instance   *lfsck = com->lc_lfsck;
4415         struct lfsck_namespace  *ns    = com->lc_file_ram;
4416         int                      rc;
4417
4418         if (!init) {
4419                 rc = lfsck_checkpoint_generic(env, com);
4420                 if (rc != 0)
4421                         goto log;
4422         }
4423
4424         down_write(&com->lc_sem);
4425         if (init) {
4426                 ns->ln_pos_latest_start = lfsck->li_pos_checkpoint;
4427         } else {
4428                 ns->ln_pos_last_checkpoint = lfsck->li_pos_checkpoint;
4429                 ns->ln_run_time_phase1 += ktime_get_seconds() -
4430                                           lfsck->li_time_last_checkpoint;
4431                 ns->ln_time_last_checkpoint = ktime_get_real_seconds();
4432                 ns->ln_items_checked += com->lc_new_checked;
4433                 com->lc_new_checked = 0;
4434         }
4435
4436         rc = lfsck_namespace_store(env, com);
4437         up_write(&com->lc_sem);
4438
4439 log:
4440         CDEBUG(D_LFSCK, "%s: namespace LFSCK checkpoint at the pos [%llu"
4441                ", "DFID", %#llx], status = %d: rc = %d\n",
4442                lfsck_lfsck2name(lfsck), lfsck->li_pos_current.lp_oit_cookie,
4443                PFID(&lfsck->li_pos_current.lp_dir_parent),
4444                lfsck->li_pos_current.lp_dir_cookie, ns->ln_status, rc);
4445
4446         return rc > 0 ? 0 : rc;
4447 }
4448
4449 static int lfsck_namespace_prep(const struct lu_env *env,
4450                                 struct lfsck_component *com,
4451                                 struct lfsck_start_param *lsp)
4452 {
4453         struct lfsck_instance   *lfsck  = com->lc_lfsck;
4454         struct lfsck_namespace  *ns     = com->lc_file_ram;
4455         struct lfsck_position   *pos    = &com->lc_pos_start;
4456         int                      rc;
4457
4458         rc = lfsck_namespace_load_bitmap(env, com);
4459         if (rc != 0 || ns->ln_status == LS_COMPLETED) {
4460                 rc = lfsck_namespace_reset(env, com, false);
4461                 if (rc == 0)
4462                         rc = lfsck_set_param(env, lfsck, lsp->lsp_start, true);
4463
4464                 if (rc != 0) {
4465                         CDEBUG(D_LFSCK, "%s: namespace LFSCK prep failed: "
4466                                "rc = %d\n", lfsck_lfsck2name(lfsck), rc);
4467
4468                         return rc;
4469                 }
4470         }
4471
4472         down_write(&com->lc_sem);
4473         ns->ln_time_latest_start = ktime_get_real_seconds();
4474         spin_lock(&lfsck->li_lock);
4475
4476         if (ns->ln_flags & LF_SCANNED_ONCE) {
4477                 if (!lfsck->li_drop_dryrun ||
4478                     lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
4479                         ns->ln_status = LS_SCANNING_PHASE2;
4480                         list_move_tail(&com->lc_link,
4481                                        &lfsck->li_list_double_scan);
4482                         if (!list_empty(&com->lc_link_dir))
4483                                 list_del_init(&com->lc_link_dir);
4484                         lfsck_pos_set_zero(pos);
4485                 } else {
4486                         ns->ln_status = LS_SCANNING_PHASE1;
4487                         ns->ln_run_time_phase1 = 0;
4488                         ns->ln_run_time_phase2 = 0;
4489                         ns->ln_items_checked = 0;
4490                         ns->ln_items_repaired = 0;
4491                         ns->ln_items_failed = 0;
4492                         ns->ln_dirs_checked = 0;
4493                         ns->ln_objs_checked_phase2 = 0;
4494                         ns->ln_objs_repaired_phase2 = 0;
4495                         ns->ln_objs_failed_phase2 = 0;
4496                         ns->ln_objs_nlink_repaired = 0;
4497                         ns->ln_dirent_repaired = 0;
4498                         ns->ln_linkea_repaired = 0;
4499                         ns->ln_mul_linked_checked = 0;
4500                         ns->ln_mul_linked_repaired = 0;
4501                         ns->ln_unknown_inconsistency = 0;
4502                         ns->ln_unmatched_pairs_repaired = 0;
4503                         ns->ln_dangling_repaired = 0;
4504                         ns->ln_mul_ref_repaired = 0;
4505                         ns->ln_bad_type_repaired = 0;
4506                         ns->ln_lost_dirent_repaired = 0;
4507                         ns->ln_striped_dirs_scanned = 0;
4508                         ns->ln_striped_dirs_repaired = 0;
4509                         ns->ln_striped_dirs_failed = 0;
4510                         ns->ln_striped_dirs_disabled = 0;
4511                         ns->ln_striped_dirs_skipped = 0;
4512                         ns->ln_striped_shards_scanned = 0;
4513                         ns->ln_striped_shards_repaired = 0;
4514                         ns->ln_striped_shards_failed = 0;
4515                         ns->ln_striped_shards_skipped = 0;
4516                         ns->ln_name_hash_repaired = 0;
4517                         fid_zero(&ns->ln_fid_latest_scanned_phase2);
4518                         if (list_empty(&com->lc_link_dir))
4519                                 list_add_tail(&com->lc_link_dir,
4520                                               &lfsck->li_list_dir);
4521                         *pos = ns->ln_pos_first_inconsistent;
4522                 }
4523         } else {
4524                 ns->ln_status = LS_SCANNING_PHASE1;
4525                 if (list_empty(&com->lc_link_dir))
4526                         list_add_tail(&com->lc_link_dir,
4527                                       &lfsck->li_list_dir);
4528                 if (!lfsck->li_drop_dryrun ||
4529                     lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent)) {
4530                         *pos = ns->ln_pos_last_checkpoint;
4531                         pos->lp_oit_cookie++;
4532                 } else {
4533                         *pos = ns->ln_pos_first_inconsistent;
4534                 }
4535         }
4536
4537         spin_unlock(&lfsck->li_lock);
4538         up_write(&com->lc_sem);
4539
4540         rc = lfsck_start_assistant(env, com, lsp);
4541
4542         CDEBUG(D_LFSCK, "%s: namespace LFSCK prep done, start pos [%llu, "
4543                DFID", %#llx]: rc = %d\n",
4544                lfsck_lfsck2name(lfsck), pos->lp_oit_cookie,
4545                PFID(&pos->lp_dir_parent), pos->lp_dir_cookie, rc);
4546
4547         return rc;
4548 }
4549
4550 static int lfsck_namespace_exec_oit(const struct lu_env *env,
4551                                     struct lfsck_component *com,
4552                                     struct dt_object *obj)
4553 {
4554         struct lfsck_thread_info *info = lfsck_env_info(env);
4555         struct lfsck_namespace *ns = com->lc_file_ram;
4556         struct lfsck_instance *lfsck = com->lc_lfsck;
4557         const struct lu_fid *fid = lfsck_dto2fid(obj);
4558         struct lu_fid *pfid = &info->lti_fid2;
4559         struct lu_name *cname = &info->lti_name;
4560         struct lu_seq_range *range = &info->lti_range;
4561         struct seq_server_site *ss = lfsck_dev_site(lfsck);
4562         struct linkea_data ldata = { NULL };
4563         __u32 idx = lfsck_dev_idx(lfsck);
4564         struct lu_attr la = { .la_valid = 0 };
4565         bool remote = false;
4566         int rc;
4567         ENTRY;
4568
4569         rc = dt_attr_get(env, obj, &la);
4570         if (unlikely(rc || (la.la_valid & LA_FLAGS &&
4571                             la.la_flags & LUSTRE_ORPHAN_FL))) {
4572                 CDEBUG(D_INFO,
4573                        "%s: skip orphan "DFID", %llx/%x: rc = %d\n",
4574                        lfsck_lfsck2name(lfsck), PFID(fid),
4575                        la.la_valid, la.la_flags, rc);
4576
4577                 return rc;
4578         }
4579
4580         rc = lfsck_links_read(env, obj, &ldata);
4581         if (rc == -ENOENT)
4582                 GOTO(out, rc = 0);
4583
4584         /* -EINVAL means crashed linkEA, should be verified. */
4585         if (rc == -EINVAL) {
4586                 rc = lfsck_namespace_trace_update(env, com, fid,
4587                                                   LNTF_CHECK_LINKEA, true);
4588                 if (rc == 0) {
4589                         struct lustre_handle lh = { 0 };
4590
4591                         rc = lfsck_ibits_lock(env, lfsck, obj, &lh,
4592                                               MDS_INODELOCK_UPDATE |
4593                                               MDS_INODELOCK_XATTR, LCK_EX);
4594                         if (rc == 0) {
4595                                 rc = lfsck_namespace_links_remove(env, com,
4596                                                                   obj);
4597                                 lfsck_ibits_unlock(&lh, LCK_EX);
4598                         }
4599                 }
4600
4601                 GOTO(out, rc = (rc == -ENOENT ? 0 : rc));
4602         }
4603
4604         if (rc && rc != -ENODATA)
4605                 GOTO(out, rc);
4606
4607         if (rc == -ENODATA || unlikely(!ldata.ld_leh->leh_reccount)) {
4608                 rc = lfsck_namespace_check_for_double_scan(env, com, obj);
4609
4610                 GOTO(out, rc);
4611         }
4612
4613         linkea_first_entry(&ldata);
4614         while (ldata.ld_lee != NULL) {
4615                 linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen,
4616                                     cname, pfid);
4617                 if (!fid_is_sane(pfid)) {
4618                         rc = lfsck_namespace_trace_update(env, com, fid,
4619                                                   LNTF_CHECK_PARENT, true);
4620                 } else if (!linkea_entry_is_valid(&ldata, cname, pfid)) {
4621                         GOTO(out, rc);
4622                 } else {
4623                         fld_range_set_mdt(range);
4624                         rc = fld_server_lookup(env, ss->ss_server_fld,
4625                                                fid_seq(pfid), range);
4626                         if ((rc == -ENOENT) ||
4627                             (!rc && range->lsr_index != idx)) {
4628                                 remote = true;
4629                                 break;
4630                         }
4631                 }
4632                 if (rc)
4633                         GOTO(out, rc);
4634
4635                 linkea_next_entry(&ldata);
4636         }
4637
4638         if ((lu_object_has_agent_entry(&obj->do_lu) && !remote) ||
4639             (!lu_object_has_agent_entry(&obj->do_lu) && remote)) {
4640                 rc = lfsck_namespace_trace_update(env, com, fid,
4641                                                   LNTF_CHECK_AGENT_ENTRY, true);
4642                 if (rc)
4643                         GOTO(out, rc);
4644         }
4645
4646         /* Record multiple-linked object. */
4647         if (ldata.ld_leh->leh_reccount > 1) {
4648                 rc = lfsck_namespace_trace_update(env, com, fid,
4649                                                   LNTF_CHECK_LINKEA, true);
4650
4651                 GOTO(out, rc);
4652         }
4653
4654         if (remote)
4655                 rc = lfsck_namespace_trace_update(env, com, fid,
4656                                                   LNTF_CHECK_LINKEA, true);
4657         else
4658                 rc = lfsck_namespace_check_for_double_scan(env, com, obj);
4659
4660         GOTO(out, rc);
4661
4662 out:
4663         down_write(&com->lc_sem);
4664         if (S_ISDIR(lfsck_object_type(obj)))
4665                 ns->ln_dirs_checked++;
4666         if (rc != 0)
4667                 lfsck_namespace_record_failure(env, com->lc_lfsck, ns);
4668         up_write(&com->lc_sem);
4669
4670         return rc;
4671 }
4672
4673 static int lfsck_namespace_exec_dir(const struct lu_env *env,
4674                                     struct lfsck_component *com,
4675                                     struct lfsck_assistant_object *lso,
4676                                     struct lu_dirent *ent, __u16 type)
4677 {
4678         struct lfsck_assistant_data     *lad     = com->lc_data;
4679         struct lfsck_instance           *lfsck   = com->lc_lfsck;
4680         struct lfsck_namespace_req      *lnr;
4681         struct lfsck_bookmark           *bk      = &lfsck->li_bookmark_ram;
4682         struct ptlrpc_thread            *mthread = &lfsck->li_thread;
4683         struct ptlrpc_thread            *athread = &lad->lad_thread;
4684         bool                             wakeup  = false;
4685
4686         wait_event_idle(mthread->t_ctl_waitq,
4687                         lad->lad_prefetched < bk->lb_async_windows ||
4688                         !thread_is_running(mthread) ||
4689                         !thread_is_running(athread));
4690
4691         if (unlikely(!thread_is_running(mthread) ||
4692                      !thread_is_running(athread)))
4693                 return 0;
4694
4695         if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir)))
4696                 return 0;
4697
4698         lnr = lfsck_namespace_assistant_req_init(com->lc_lfsck, lso, ent, type);
4699         if (IS_ERR(lnr)) {
4700                 struct lfsck_namespace *ns = com->lc_file_ram;
4701
4702                 lfsck_namespace_record_failure(env, com->lc_lfsck, ns);
4703                 return PTR_ERR(lnr);
4704         }
4705
4706         spin_lock(&lad->lad_lock);
4707         if (lad->lad_assistant_status < 0 ||
4708             unlikely(!thread_is_running(mthread) ||
4709                      !thread_is_running(athread))) {
4710                 spin_unlock(&lad->lad_lock);
4711                 lfsck_namespace_assistant_req_fini(env, &lnr->lnr_lar);
4712                 return lad->lad_assistant_status;
4713         }
4714
4715         list_add_tail(&lnr->lnr_lar.lar_list, &lad->lad_req_list);
4716         if (lad->lad_prefetched == 0)
4717                 wakeup = true;
4718
4719         lad->lad_prefetched++;
4720         spin_unlock(&lad->lad_lock);
4721         if (wakeup)
4722                 wake_up(&lad->lad_thread.t_ctl_waitq);
4723
4724         down_write(&com->lc_sem);
4725         com->lc_new_checked++;
4726         up_write(&com->lc_sem);
4727
4728         return 0;
4729 }
4730
4731 static int lfsck_namespace_post(const struct lu_env *env,
4732                                 struct lfsck_component *com,
4733                                 int result, bool init)
4734 {
4735         struct lfsck_instance   *lfsck = com->lc_lfsck;
4736         struct lfsck_namespace  *ns    = com->lc_file_ram;
4737         int                      rc;
4738         ENTRY;
4739
4740         lfsck_post_generic(env, com, &result);
4741
4742         down_write(&com->lc_sem);
4743         lfsck_namespace_release_lmv(env, com);
4744
4745         spin_lock(&lfsck->li_lock);
4746         if (!init)
4747                 ns->ln_pos_last_checkpoint = lfsck->li_pos_checkpoint;
4748         if (result > 0) {
4749                 ns->ln_status = LS_SCANNING_PHASE2;
4750                 ns->ln_flags |= LF_SCANNED_ONCE;
4751                 ns->ln_flags &= ~LF_UPGRADE;
4752                 list_del_init(&com->lc_link_dir);
4753                 list_move_tail(&com->lc_link, &lfsck->li_list_double_scan);
4754         } else if (result == 0) {
4755                 if (lfsck->li_status != 0)
4756                         ns->ln_status = lfsck->li_status;
4757                 else
4758                         ns->ln_status = LS_STOPPED;
4759                 if (ns->ln_status != LS_PAUSED) {
4760                         list_del_init(&com->lc_link_dir);
4761                         list_move_tail(&com->lc_link, &lfsck->li_list_idle);
4762                 }
4763         } else {
4764                 ns->ln_status = LS_FAILED;
4765                 list_del_init(&com->lc_link_dir);
4766                 list_move_tail(&com->lc_link, &lfsck->li_list_idle);
4767         }
4768         spin_unlock(&lfsck->li_lock);
4769
4770         if (!init) {
4771                 ns->ln_run_time_phase1 += ktime_get_seconds() -
4772                                           lfsck->li_time_last_checkpoint;
4773                 ns->ln_time_last_checkpoint = ktime_get_real_seconds();
4774                 ns->ln_items_checked += com->lc_new_checked;
4775                 com->lc_new_checked = 0;
4776         }
4777
4778         rc = lfsck_namespace_store(env, com);
4779         up_write(&com->lc_sem);
4780
4781         CDEBUG(D_LFSCK, "%s: namespace LFSCK post done: rc = %d\n",
4782                lfsck_lfsck2name(lfsck), rc);
4783
4784         RETURN(rc);
4785 }
4786
4787 static void
4788 lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
4789                      struct seq_file *m)
4790 {
4791         struct lfsck_instance   *lfsck = com->lc_lfsck;
4792         struct lfsck_bookmark   *bk    = &lfsck->li_bookmark_ram;
4793         struct lfsck_namespace  *ns    = com->lc_file_ram;
4794
4795         down_read(&com->lc_sem);
4796         seq_printf(m, "name: lfsck_namespace\n"
4797                    "magic: %#x\n"
4798                    "version: %d\n"
4799                    "status: %s\n",
4800                    ns->ln_magic,
4801                    bk->lb_version,
4802                    lfsck_status2name(ns->ln_status));
4803
4804         lfsck_bits_dump(m, ns->ln_flags, lfsck_flags_names, "flags");
4805
4806         lfsck_bits_dump(m, bk->lb_param, lfsck_param_names, "param");
4807
4808         lfsck_time_dump(m, ns->ln_time_last_complete, "last_completed");
4809
4810         lfsck_time_dump(m, ns->ln_time_latest_start, "latest_start");
4811
4812         lfsck_time_dump(m, ns->ln_time_last_checkpoint, "last_checkpoint");
4813
4814         lfsck_pos_dump(m, &ns->ln_pos_latest_start, "latest_start_position");
4815
4816         lfsck_pos_dump(m, &ns->ln_pos_last_checkpoint,
4817                        "last_checkpoint_position");
4818
4819         lfsck_pos_dump(m, &ns->ln_pos_first_inconsistent,
4820                        "first_failure_position");
4821
4822         if (ns->ln_status == LS_SCANNING_PHASE1) {
4823                 struct lfsck_position pos;
4824                 time64_t duration = ktime_get_seconds() -
4825                                     lfsck->li_time_last_checkpoint;
4826                 u64 checked = ns->ln_items_checked + com->lc_new_checked;
4827                 u64 speed = checked;
4828                 u64 new_checked = com->lc_new_checked;
4829                 time64_t rtime = ns->ln_run_time_phase1 + duration;
4830
4831                 if (duration != 0)
4832                         new_checked = div64_s64(new_checked, duration);
4833
4834                 if (rtime != 0)
4835                         speed = div64_s64(speed, rtime);
4836
4837                 lfsck_namespace_dump_statistics(m, ns, checked, 0, rtime, 0,
4838                                                 bk->lb_param & LPF_DRYRUN);
4839                 seq_printf(m, "average_speed_phase1: %llu items/sec\n"
4840                            "average_speed_phase2: N/A\n"
4841                            "average_speed_total: %llu items/sec\n"
4842                            "real_time_speed_phase1: %llu items/sec\n"
4843                            "real_time_speed_phase2: N/A\n",
4844                            speed,
4845                            speed,
4846                            new_checked);
4847
4848                 if (likely(lfsck->li_di_oit)) {
4849                         const struct dt_it_ops *iops =
4850                                 &lfsck->li_obj_oit->do_index_ops->dio_it;
4851
4852                         /* The low layer otable-based iteration position may NOT
4853                          * exactly match the namespace-based directory traversal
4854                          * cookie. Generally, it is not a serious issue. But the
4855                          * caller should NOT make assumption on that. */
4856                         pos.lp_oit_cookie = iops->store(env, lfsck->li_di_oit);
4857                         if (!lfsck->li_current_oit_processed)
4858                                 pos.lp_oit_cookie--;
4859
4860                         spin_lock(&lfsck->li_lock);
4861                         if (lfsck->li_di_dir) {
4862                                 pos.lp_dir_cookie = lfsck->li_cookie_dir;
4863                                 if (pos.lp_dir_cookie >= MDS_DIR_END_OFF) {
4864                                         fid_zero(&pos.lp_dir_parent);
4865                                         pos.lp_dir_cookie = 0;
4866                                 } else {
4867                                         pos.lp_dir_parent =
4868                                         *lfsck_dto2fid(lfsck->li_obj_dir);
4869                                 }
4870                         } else {
4871                                 fid_zero(&pos.lp_dir_parent);
4872                                 pos.lp_dir_cookie = 0;
4873                         }
4874                         spin_unlock(&lfsck->li_lock);
4875                 } else {
4876                         pos = ns->ln_pos_last_checkpoint;
4877                 }
4878
4879                 lfsck_pos_dump(m, &pos, "current_position");
4880         } else if (ns->ln_status == LS_SCANNING_PHASE2) {
4881                 time64_t duration = ktime_get_seconds() -
4882                                     com->lc_time_last_checkpoint;
4883                 __u64 checked = ns->ln_objs_checked_phase2 +
4884                                 com->lc_new_checked;
4885                 __u64 speed1 = ns->ln_items_checked;
4886                 __u64 speed2 = checked;
4887                 __u64 speed0 = speed1 + speed2;
4888                 __u64 new_checked = com->lc_new_checked;
4889                 time64_t rtime = ns->ln_run_time_phase2 + duration;
4890                 time64_t time0 = ns->ln_run_time_phase1 + rtime;
4891
4892                 if (duration != 0)
4893                         new_checked = div64_s64(new_checked, duration);
4894
4895                 if (ns->ln_run_time_phase1 != 0)
4896                         speed1 = div64_s64(speed1, ns->ln_run_time_phase1);
4897                 else if (ns->ln_items_checked != 0)
4898                         time0++;
4899
4900                 if (rtime != 0)
4901                         speed2 = div64_s64(speed2, rtime);
4902                 else if (checked != 0)
4903                         time0++;
4904
4905                 if (time0 != 0)
4906                         speed0 = div64_s64(speed0, time0);
4907
4908                 lfsck_namespace_dump_statistics(m, ns, ns->ln_items_checked,
4909                                                 checked,
4910                                                 ns->ln_run_time_phase1, rtime,
4911                                                 bk->lb_param & LPF_DRYRUN);
4912                 seq_printf(m, "average_speed_phase1: %llu items/sec\n"
4913                            "average_speed_phase2: %llu objs/sec\n"
4914                            "average_speed_total: %llu items/sec\n"
4915                            "real_time_speed_phase1: N/A\n"
4916                            "real_time_speed_phase2: %llu objs/sec\n"
4917                            "current_position: "DFID"\n",
4918                            speed1,
4919                            speed2,
4920                            speed0,
4921                            new_checked,
4922                            PFID(&ns->ln_fid_latest_scanned_phase2));
4923         } else {
4924                 __u64 speed1 = ns->ln_items_checked;
4925                 __u64 speed2 = ns->ln_objs_checked_phase2;
4926                 __u64 speed0 = speed1 + speed2;
4927                 time64_t time0 = ns->ln_run_time_phase1 + ns->ln_run_time_phase2;
4928
4929                 if (ns->ln_run_time_phase1 != 0)
4930                         speed1 = div64_s64(speed1, ns->ln_run_time_phase1);
4931                 else if (ns->ln_items_checked != 0)
4932                         time0++;
4933
4934                 if (ns->ln_run_time_phase2 != 0)
4935                         speed2 = div64_s64(speed2, ns->ln_run_time_phase2);
4936                 else if (ns->ln_objs_checked_phase2 != 0)
4937                         time0++;
4938
4939                 if (time0 != 0)
4940                         speed0 = div64_s64(speed0, time0);
4941
4942                 lfsck_namespace_dump_statistics(m, ns, ns->ln_items_checked,
4943                                                 ns->ln_objs_checked_phase2,
4944                                                 ns->ln_run_time_phase1,
4945                                                 ns->ln_run_time_phase2,
4946                                                 bk->lb_param & LPF_DRYRUN);
4947                 seq_printf(m, "average_speed_phase1: %llu items/sec\n"
4948                            "average_speed_phase2: %llu objs/sec\n"
4949                            "average_speed_total: %llu items/sec\n"
4950                            "real_time_speed_phase1: N/A\n"
4951                            "real_time_speed_phase2: N/A\n"
4952                            "current_position: N/A\n",
4953                            speed1,
4954                            speed2,
4955                            speed0);
4956         }
4957
4958         up_read(&com->lc_sem);
4959 }
4960
4961 static int lfsck_namespace_double_scan(const struct lu_env *env,
4962                                        struct lfsck_component *com)
4963 {
4964         struct lfsck_namespace          *ns     = com->lc_file_ram;
4965         struct lfsck_assistant_data     *lad    = com->lc_data;
4966         struct lfsck_tgt_descs          *ltds   = &com->lc_lfsck->li_mdt_descs;
4967         struct lfsck_tgt_desc           *ltd;
4968         struct lfsck_tgt_desc           *next;
4969         int                              rc;
4970
4971         rc = lfsck_double_scan_generic(env, com, ns->ln_status);
4972         if (thread_is_stopped(&lad->lad_thread)) {
4973                 LASSERT(list_empty(&lad->lad_req_list));
4974                 LASSERT(list_empty(&lad->lad_mdt_phase1_list));
4975
4976                 spin_lock(&ltds->ltd_lock);
4977                 list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase2_list,
4978                                          ltd_namespace_phase_list) {
4979                         list_del_init(&ltd->ltd_namespace_phase_list);
4980                 }
4981                 spin_unlock(&ltds->ltd_lock);
4982         }
4983
4984         return rc;
4985 }
4986
4987 static void lfsck_namespace_data_release(const struct lu_env *env,
4988                                          struct lfsck_component *com)
4989 {
4990         struct lfsck_assistant_data     *lad    = com->lc_data;
4991         struct lfsck_tgt_descs          *ltds   = &com->lc_lfsck->li_mdt_descs;
4992         struct lfsck_tgt_desc           *ltd;
4993         struct lfsck_tgt_desc           *next;
4994
4995         LASSERT(lad != NULL);
4996         LASSERT(thread_is_init(&lad->lad_thread) ||
4997                 thread_is_stopped(&lad->lad_thread));
4998         LASSERT(list_empty(&lad->lad_req_list));
4999
5000         com->lc_data = NULL;
5001         lfsck_namespace_release_lmv(env, com);
5002
5003         spin_lock(&ltds->ltd_lock);
5004         list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase1_list,
5005                                  ltd_namespace_phase_list) {
5006                 list_del_init(&ltd->ltd_namespace_phase_list);
5007         }
5008         list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase2_list,
5009                                  ltd_namespace_phase_list) {
5010                 list_del_init(&ltd->ltd_namespace_phase_list);
5011         }
5012         list_for_each_entry_safe(ltd, next, &lad->lad_mdt_list,
5013                                  ltd_namespace_list) {
5014                 list_del_init(&ltd->ltd_namespace_list);
5015         }
5016         spin_unlock(&ltds->ltd_lock);
5017
5018         if (likely(lad->lad_bitmap != NULL))
5019                 bitmap_free(lad->lad_bitmap);
5020
5021         OBD_FREE_PTR(lad);
5022 }
5023
5024 static void lfsck_namespace_quit(const struct lu_env *env,
5025                                  struct lfsck_component *com)
5026 {
5027         struct lfsck_assistant_data     *lad    = com->lc_data;
5028         struct lfsck_tgt_descs          *ltds   = &com->lc_lfsck->li_mdt_descs;
5029         struct lfsck_tgt_desc           *ltd;
5030         struct lfsck_tgt_desc           *next;
5031
5032         LASSERT(lad != NULL);
5033
5034         lfsck_quit_generic(env, com);
5035
5036         LASSERT(thread_is_init(&lad->lad_thread) ||
5037                 thread_is_stopped(&lad->lad_thread));
5038         LASSERT(list_empty(&lad->lad_req_list));
5039
5040         lfsck_namespace_release_lmv(env, com);
5041
5042         spin_lock(&ltds->ltd_lock);
5043         list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase1_list,
5044                                  ltd_namespace_phase_list) {
5045                 list_del_init(&ltd->ltd_namespace_phase_list);
5046         }
5047         list_for_each_entry_safe(ltd, next, &lad->lad_mdt_phase2_list,
5048                                  ltd_namespace_phase_list) {
5049                 list_del_init(&ltd->ltd_namespace_phase_list);
5050         }
5051         spin_unlock(&ltds->ltd_lock);
5052 }
5053
5054 static int lfsck_namespace_in_notify(const struct lu_env *env,
5055                                      struct lfsck_component *com,
5056                                      struct lfsck_request *lr)
5057 {
5058         struct lfsck_instance *lfsck = com->lc_lfsck;
5059         struct lfsck_namespace *ns = com->lc_file_ram;
5060         struct lfsck_assistant_data *lad = com->lc_data;
5061         struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs;
5062         struct lfsck_tgt_desc *ltd;
5063         int rc = 0;
5064         bool fail = false;
5065         ENTRY;
5066
5067         switch (lr->lr_event) {
5068         case LE_SET_LMV_MASTER: {
5069                 struct dt_object        *obj;
5070
5071                 obj = lfsck_object_find_bottom(env, lfsck, &lr->lr_fid);
5072                 if (IS_ERR(obj))
5073                         RETURN(PTR_ERR(obj));
5074
5075                 if (likely(dt_object_exists(obj)))
5076                         rc = lfsck_namespace_notify_lmv_master_local(env, com,
5077                                                                      obj);
5078
5079                 lfsck_object_put(env, obj);
5080
5081                 RETURN(rc > 0 ? 0 : rc);
5082         }
5083         case LE_SET_LMV_SLAVE: {
5084                 if (!(lr->lr_flags & LEF_RECHECK_NAME_HASH))
5085                         ns->ln_striped_shards_repaired++;
5086
5087                 rc = lfsck_namespace_trace_update(env, com, &lr->lr_fid,
5088                                                   LNTF_RECHECK_NAME_HASH, true);
5089
5090                 RETURN(rc > 0 ? 0 : rc);
5091         }
5092         case LE_PHASE1_DONE:
5093         case LE_PHASE2_DONE:
5094         case LE_PEER_EXIT:
5095                 break;
5096         default:
5097                 RETURN(-EINVAL);
5098         }
5099
5100         CDEBUG(D_LFSCK, "%s: namespace LFSCK handles notify %u from MDT %x, "
5101                "status %d, flags %x\n", lfsck_lfsck2name(lfsck), lr->lr_event,
5102                lr->lr_index, lr->lr_status, lr->lr_flags2);
5103
5104         spin_lock(&ltds->ltd_lock);
5105         ltd = lfsck_ltd2tgt(ltds, lr->lr_index);
5106         if (ltd == NULL) {
5107                 spin_unlock(&ltds->ltd_lock);
5108
5109                 RETURN(-ENXIO);
5110         }
5111
5112         list_del_init(&ltd->ltd_namespace_phase_list);
5113         switch (lr->lr_event) {
5114         case LE_PHASE1_DONE:
5115                 if (lr->lr_status <= 0) {
5116                         ltd->ltd_namespace_done = 1;
5117                         list_del_init(&ltd->ltd_namespace_list);
5118                         CDEBUG(D_LFSCK, "%s: MDT %x failed/stopped at "
5119                                "phase1 for namespace LFSCK: rc = %d.\n",
5120                                lfsck_lfsck2name(lfsck),
5121                                ltd->ltd_index, lr->lr_status);
5122                         ns->ln_flags |= LF_INCOMPLETE;
5123                         fail = true;
5124                         break;
5125                 }
5126
5127                 if (lr->lr_flags2 & LF_INCOMPLETE)
5128                         ns->ln_flags |= LF_INCOMPLETE;
5129
5130                 if (list_empty(&ltd->ltd_namespace_list))
5131                         list_add_tail(&ltd->ltd_namespace_list,
5132                                       &lad->lad_mdt_list);
5133                 list_add_tail(&ltd->ltd_namespace_phase_list,
5134                               &lad->lad_mdt_phase2_list);
5135                 break;
5136         case LE_PHASE2_DONE:
5137                 ltd->ltd_namespace_done = 1;
5138                 list_del_init(&ltd->ltd_namespace_list);
5139                 break;
5140         case LE_PEER_EXIT:
5141                 fail = true;
5142                 ltd->ltd_namespace_done = 1;
5143                 list_del_init(&ltd->ltd_namespace_list);
5144                 if (!(lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT)) {
5145                         CDEBUG(D_LFSCK,
5146                                "%s: the peer MDT %x exit namespace LFSCK\n",
5147                                lfsck_lfsck2name(lfsck), ltd->ltd_index);
5148                         ns->ln_flags |= LF_INCOMPLETE;
5149                 }
5150                 break;
5151         default:
5152                 break;
5153         }
5154         spin_unlock(&ltds->ltd_lock);
5155
5156         if (fail && lfsck->li_bookmark_ram.lb_param & LPF_FAILOUT) {
5157                 struct lfsck_stop *stop = &lfsck_env_info(env)->lti_stop;
5158
5159                 memset(stop, 0, sizeof(*stop));
5160                 stop->ls_status = lr->lr_status;
5161                 stop->ls_flags = lr->lr_param & ~LPF_BROADCAST;
5162                 lfsck_stop(env, lfsck->li_bottom, stop);
5163         } else if (lfsck_phase2_next_ready(lad)) {
5164                 wake_up(&lad->lad_thread.t_ctl_waitq);
5165         }
5166
5167         RETURN(0);
5168 }
5169
5170 static void lfsck_namespace_repaired(struct lfsck_namespace *ns, __u64 *count)
5171 {
5172         *count += ns->ln_objs_nlink_repaired;
5173         *count += ns->ln_dirent_repaired;
5174         *count += ns->ln_linkea_repaired;
5175         *count += ns->ln_mul_linked_repaired;
5176         *count += ns->ln_unmatched_pairs_repaired;
5177         *count += ns->ln_dangling_repaired;
5178         *count += ns->ln_mul_ref_repaired;
5179         *count += ns->ln_bad_type_repaired;
5180         *count += ns->ln_lost_dirent_repaired;
5181         *count += ns->ln_striped_dirs_disabled;
5182         *count += ns->ln_striped_dirs_repaired;
5183         *count += ns->ln_striped_shards_repaired;
5184         *count += ns->ln_name_hash_repaired;
5185         *count += ns->ln_local_lpf_moved;
5186 }
5187
5188 static int lfsck_namespace_query_all(const struct lu_env *env,
5189                                      struct lfsck_component *com,
5190                                      __u32 *mdts_count, __u64 *repaired)
5191 {
5192         struct lfsck_namespace *ns = com->lc_file_ram;
5193         struct lfsck_tgt_descs *ltds = &com->lc_lfsck->li_mdt_descs;
5194         struct lfsck_tgt_desc *ltd;
5195         int idx;
5196         int rc;
5197         ENTRY;
5198
5199         rc = lfsck_query_all(env, com);
5200         if (rc != 0)
5201                 RETURN(rc);
5202
5203         down_read(&ltds->ltd_rw_sem);
5204         for_each_set_bit(idx, ltds->ltd_tgts_bitmap, ltds->ltd_tgts_mask_len) {
5205                 ltd = lfsck_ltd2tgt(ltds, idx);
5206                 LASSERT(ltd != NULL);
5207
5208                 mdts_count[ltd->ltd_namespace_status]++;
5209                 *repaired += ltd->ltd_namespace_repaired;
5210         }
5211         up_read(&ltds->ltd_rw_sem);
5212
5213         down_read(&com->lc_sem);
5214         mdts_count[ns->ln_status]++;
5215         lfsck_namespace_repaired(ns, repaired);
5216         up_read(&com->lc_sem);
5217
5218         RETURN(0);
5219 }
5220
5221 static int lfsck_namespace_query(const struct lu_env *env,
5222                                  struct lfsck_component *com,
5223                                  struct lfsck_request *req,
5224                                  struct lfsck_reply *rep,
5225                                  struct lfsck_query *que, int idx)
5226 {
5227         struct lfsck_namespace *ns = com->lc_file_ram;
5228         int rc = 0;
5229
5230         if (que != NULL) {
5231                 LASSERT(com->lc_lfsck->li_master);
5232
5233                 rc = lfsck_namespace_query_all(env, com,
5234                                                que->lu_mdts_count[idx],
5235                                                &que->lu_repaired[idx]);
5236         } else {
5237                 down_read(&com->lc_sem);
5238                 rep->lr_status = ns->ln_status;
5239                 if (req->lr_flags & LEF_QUERY_ALL)
5240                         lfsck_namespace_repaired(ns, &rep->lr_repaired);
5241                 up_read(&com->lc_sem);
5242         }
5243
5244         return rc;
5245 }
5246
5247 static const struct lfsck_operations lfsck_namespace_ops = {
5248         .lfsck_reset            = lfsck_namespace_reset,
5249         .lfsck_fail             = lfsck_namespace_fail,
5250         .lfsck_close_dir        = lfsck_namespace_close_dir,
5251         .lfsck_open_dir         = lfsck_namespace_open_dir,
5252         .lfsck_checkpoint       = lfsck_namespace_checkpoint,
5253         .lfsck_prep             = lfsck_namespace_prep,
5254         .lfsck_exec_oit         = lfsck_namespace_exec_oit,
5255         .lfsck_exec_dir         = lfsck_namespace_exec_dir,
5256         .lfsck_post             = lfsck_namespace_post,
5257         .lfsck_dump             = lfsck_namespace_dump,
5258         .lfsck_double_scan      = lfsck_namespace_double_scan,
5259         .lfsck_data_release     = lfsck_namespace_data_release,
5260         .lfsck_quit             = lfsck_namespace_quit,
5261         .lfsck_in_notify        = lfsck_namespace_in_notify,
5262         .lfsck_query            = lfsck_namespace_query,
5263 };
5264
5265 /**
5266  * Repair dangling name entry.
5267  *
5268  * For the name entry with dangling reference, we need to repare the
5269  * inconsistency according to the LFSCK sponsor's requirement:
5270  *
5271  * 1) Keep the inconsistency there and report the inconsistency case,
5272  *    then give the chance to the application to find related issues,
5273  *    and the users can make the decision about how to handle it with
5274  *    more human knownledge. (by default)
5275  *
5276  * 2) Re-create the missing MDT-object with the FID information.
5277  *
5278  * \param[in] env       pointer to the thread context
5279  * \param[in] com       pointer to the lfsck component
5280  * \param[in] parent    pointer to the dir object that contains the dangling
5281  *                      name entry
5282  * \param[in] child     pointer to the object corresponding to the dangling
5283  *                      name entry
5284  * \param[in] lnr       pointer to the namespace request that contains the
5285  *                      name's name, parent object, parent's LMV, and ect.
5286  *
5287  * \retval              positive number if no need to repair
5288  * \retval              zero for repaired successfully
5289  * \retval              negative error number on failure
5290  */
5291 int lfsck_namespace_repair_dangling(const struct lu_env *env,
5292                                     struct lfsck_component *com,
5293                                     struct dt_object *parent,
5294                                     struct dt_object *child,
5295                                     struct lfsck_namespace_req *lnr)
5296 {
5297         struct lfsck_thread_info *info = lfsck_env_info(env);
5298         struct lu_attr *la = &info->lti_la;
5299         struct dt_allocation_hint *hint = &info->lti_hint;
5300         struct dt_object_format *dof = &info->lti_dof;
5301         struct dt_insert_rec *rec = &info->lti_dt_rec;
5302         struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2;
5303         const struct lu_name *cname;
5304         const struct lu_fid *pfid = lfsck_dto2fid(parent);
5305         const struct lu_fid *cfid = lfsck_dto2fid(child);
5306         struct linkea_data ldata = { NULL };
5307         struct lfsck_lock_handle *llh = &info->lti_llh;
5308         struct lustre_handle rlh = { 0 };
5309         struct lustre_handle clh = { 0 };
5310         struct lu_buf linkea_buf;
5311         struct lu_buf lmv_buf;
5312         struct lfsck_instance *lfsck = com->lc_lfsck;
5313         struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
5314         struct dt_device *dev = lfsck->li_next;
5315         struct thandle *th = NULL;
5316         int rc = 0;
5317         __u16 type = lnr->lnr_type;
5318         bool create;
5319         ENTRY;
5320
5321         cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
5322         if (bk->lb_param & LPF_CREATE_MDTOBJ)
5323                 create = true;
5324         else
5325                 create = false;
5326
5327         if (!create || bk->lb_param & LPF_DRYRUN)
5328                 GOTO(log, rc = 0);
5329
5330         /* We may need to create the sub-objects of the @child via LOD,
5331          * so make the modification based on lfsck->li_next. */
5332
5333         parent = lfsck_object_locate(dev, parent);
5334         if (IS_ERR(parent))
5335                 GOTO(log, rc = PTR_ERR(parent));
5336
5337         if (unlikely(!dt_try_as_dir(env, parent, true)))
5338                 GOTO(log, rc = -ENOTDIR);
5339
5340         child = lfsck_object_locate(dev, child);
5341         if (IS_ERR(child))
5342                 GOTO(log, rc = PTR_ERR(child));
5343
5344         rc = linkea_links_new(&ldata, &info->lti_linkea_buf2,
5345                               cname, pfid);
5346         if (rc != 0)
5347                 GOTO(log, rc);
5348
5349         rc = lfsck_lock(env, lfsck, parent, lnr->lnr_name, llh,
5350                         MDS_INODELOCK_UPDATE, LCK_PW);
5351         if (rc != 0)
5352                 GOTO(log, rc);
5353
5354         rc = lfsck_namespace_check_exist(env, parent, child, lnr->lnr_name);
5355         if (rc != 0)
5356                 GOTO(log, rc);
5357
5358         if (dt_object_remote(child)) {
5359                 rc = lfsck_remote_lookup_lock(env, lfsck, parent, child, &rlh,
5360                                               LCK_EX);
5361                 if (rc != 0)
5362                         GOTO(log, rc);
5363         }
5364
5365         rc = lfsck_ibits_lock(env, lfsck, child, &clh,
5366                               MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP |
5367                               MDS_INODELOCK_XATTR, LCK_EX);
5368         if (rc != 0)
5369                 GOTO(unlock_remote_lookup, rc);
5370
5371         /* Set the ctime as zero, then others can know it is created for
5372          * repairing dangling name entry by LFSCK. And if the LFSCK made
5373          * wrong decision and the real MDT-object has been found later,
5374          * then the LFSCK has chance to fix the incosistency properly. */
5375         memset(la, 0, sizeof(*la));
5376         if (S_ISDIR(type))
5377                 la->la_mode = (type & S_IFMT) | 0700;
5378         else
5379                 la->la_mode = (type & S_IFMT) | 0600;
5380         la->la_valid = LA_TYPE | LA_MODE | LA_CTIME;
5381
5382         /*
5383          * if it's directory, skip do_ah_init() to create a plain directory
5384          * because it may have shards already, which will be inserted back
5385          * later, besides, it may be remote, and creating stripe directory
5386          * remotely is not supported.
5387          */
5388         if (S_ISREG(type))
5389                 child->do_ops->do_ah_init(env, hint,  parent, child, type);
5390         else if (S_ISDIR(type))
5391                 child->do_ops->do_ah_init(env, hint,  NULL, child, type);
5392
5393         memset(dof, 0, sizeof(*dof));
5394         dof->dof_type = dt_mode_to_dft(type);
5395         /* If the target is a regular file, then the LFSCK will only create
5396          * the MDT-object without stripes (dof->dof_reg.striped = 0). related
5397          * OST-objects will be created when write open. */
5398
5399         th = lfsck_trans_create(env, dev, lfsck);
5400         if (IS_ERR(th))
5401                 GOTO(unlock_child, rc = PTR_ERR(th));
5402
5403         /* 1a. create child. */
5404         rc = dt_declare_create(env, child, la, hint, dof, th);
5405         if (rc != 0)
5406                 GOTO(stop, rc);
5407
5408         if (S_ISDIR(type)) {
5409                 if (unlikely(!dt_try_as_dir(env, child, false)))
5410                         GOTO(stop, rc = -ENOTDIR);
5411
5412                 /* 2a. increase child nlink */
5413                 rc = dt_declare_ref_add(env, child, th);
5414                 if (rc != 0)
5415                         GOTO(stop, rc);
5416
5417                 /* 3a. insert dot into child dir */
5418                 rec->rec_type = S_IFDIR;
5419                 rec->rec_fid = cfid;
5420                 rc = dt_declare_insert(env, child,
5421                                        (const struct dt_rec *)rec,
5422                                        (const struct dt_key *)dot, th);
5423                 if (rc != 0)
5424                         GOTO(stop, rc);
5425
5426                 /* 4a. insert dotdot into child dir */
5427                 rec->rec_fid = pfid;
5428                 rc = dt_declare_insert(env, child,
5429                                        (const struct dt_rec *)rec,
5430                                        (const struct dt_key *)dotdot, th);
5431                 if (rc != 0)
5432                         GOTO(stop, rc);
5433
5434                 /* 5a. generate slave LMV EA. */
5435                 if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) {
5436                         int idx;
5437
5438                         idx = lfsck_shard_name_to_index(env,
5439                                         lnr->lnr_name, lnr->lnr_namelen,
5440                                         type, cfid);
5441                         if (unlikely(idx < 0))
5442                                 GOTO(stop, rc = idx);
5443
5444                         *lmv2 = lnr->lnr_lmv->ll_lmv;
5445                         lmv2->lmv_magic = LMV_MAGIC_STRIPE;
5446                         lmv2->lmv_master_mdt_index = idx;
5447
5448                         lfsck_lmv_header_cpu_to_le(lmv2, lmv2);
5449                         lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2));
5450                         rc = dt_declare_xattr_set(env, child, &lmv_buf,
5451                                                   XATTR_NAME_LMV, 0, th);
5452                         if (rc != 0)
5453                                 GOTO(stop, rc);
5454                 }
5455         }
5456
5457         /* 6a. insert linkEA for child */
5458         lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
5459                        ldata.ld_leh->leh_len);
5460         rc = dt_declare_xattr_set(env, child, &linkea_buf,
5461                                   XATTR_NAME_LINK, 0, th);
5462         if (rc != 0)
5463                 GOTO(stop, rc);
5464
5465         /* 7a. if child is remote, delete and insert to generate local agent */
5466         if (dt_object_remote(child)) {
5467                 rc = dt_declare_delete(env, parent,
5468                                        (const struct dt_key *)lnr->lnr_name,
5469                                        th);
5470                 if (rc)
5471                         GOTO(stop, rc);
5472
5473                 rec->rec_fid = cfid;
5474                 rec->rec_type = type;
5475                 rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
5476                                        (const struct dt_key *)lnr->lnr_name,
5477                                        th);
5478                 if (rc)
5479                         GOTO(stop, rc);
5480         }
5481
5482         rc = dt_trans_start_local(env, dev, th);
5483         if (rc != 0)
5484                 GOTO(stop, rc = (rc == -EEXIST ? 1 : rc));
5485
5486         dt_write_lock(env, child, 0);
5487         /* 1b. create child */
5488         rc = dt_create(env, child, la, hint, dof, th);
5489         if (rc != 0)
5490                 GOTO(unlock, rc = (rc == -EEXIST ? 1 : rc));
5491
5492         if (S_ISDIR(type)) {
5493                 /* 2b. increase child nlink */
5494                 rc = dt_ref_add(env, child, th);
5495                 if (rc != 0)
5496                         GOTO(unlock, rc);
5497
5498                 /* 3b. insert dot into child dir */
5499                 rec->rec_type = S_IFDIR;
5500                 rec->rec_fid = cfid;
5501                 rc = dt_insert(env, child, (const struct dt_rec *)rec,
5502                                (const struct dt_key *)dot, th);
5503                 if (rc != 0)
5504                         GOTO(unlock, rc);
5505
5506                 /* 4b. insert dotdot into child dir */
5507                 rec->rec_fid = pfid;
5508                 rc = dt_insert(env, child, (const struct dt_rec *)rec,
5509                                (const struct dt_key *)dotdot, th);
5510                 if (rc != 0)
5511                         GOTO(unlock, rc);
5512
5513                 /* 5b. generate slave LMV EA. */
5514                 if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) {
5515                         rc = dt_xattr_set(env, child, &lmv_buf, XATTR_NAME_LMV,
5516                                           0, th);
5517                         if (rc != 0)
5518                                 GOTO(unlock, rc);
5519                 }
5520         }
5521
5522         /* 6b. insert linkEA for child. */
5523         rc = dt_xattr_set(env, child, &linkea_buf,
5524                           XATTR_NAME_LINK, 0, th);
5525         if (rc)
5526                 GOTO(unlock, rc);
5527
5528         /* 7b. if child is remote, delete and insert to generate local agent */
5529         if (dt_object_remote(child)) {
5530                 rc = dt_delete(env, parent,
5531                                (const struct dt_key *)lnr->lnr_name, th);
5532                 if (rc)
5533                         GOTO(unlock, rc);
5534
5535                 rec->rec_type = type;
5536                 rec->rec_fid = cfid;
5537                 rc = dt_insert(env, parent, (const struct dt_rec *)rec,
5538                                (const struct dt_key *)lnr->lnr_name, th);
5539                 if (rc)
5540                         GOTO(unlock, rc);
5541         }
5542
5543         GOTO(unlock, rc);
5544
5545 unlock:
5546         dt_write_unlock(env, child);
5547
5548 stop:
5549         dt_trans_stop(env, dev, th);
5550
5551 unlock_child:
5552         lfsck_ibits_unlock(&clh, LCK_EX);
5553 unlock_remote_lookup:
5554         if (dt_object_remote(child))
5555                 lfsck_ibits_unlock(&rlh, LCK_EX);
5556 log:
5557         lfsck_unlock(llh);
5558         CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found dangling "
5559                "reference for: parent "DFID", child "DFID", type %u, "
5560                "name %s. %s: rc = %d\n", lfsck_lfsck2name(lfsck),
5561                PFID(pfid), PFID(cfid), type, cname->ln_name,
5562                create ? "Create the lost MDT-object as required" :
5563                         "Keep the MDT-object there by default", rc);
5564
5565         if (rc <= 0) {
5566                 struct lfsck_namespace *ns = com->lc_file_ram;
5567
5568                 ns->ln_flags |= LF_INCONSISTENT;
5569         }
5570
5571         return rc;
5572 }
5573
5574 static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
5575                                                 struct lfsck_component *com,
5576                                                 struct lfsck_assistant_req *lar)
5577 {
5578         struct lfsck_thread_info   *info     = lfsck_env_info(env);
5579         struct lu_attr             *la       = &info->lti_la;
5580         struct lfsck_instance      *lfsck    = com->lc_lfsck;
5581         struct lfsck_bookmark      *bk       = &lfsck->li_bookmark_ram;
5582         struct lfsck_namespace     *ns       = com->lc_file_ram;
5583         struct lfsck_assistant_data *lad     = com->lc_data;
5584         struct linkea_data          ldata    = { NULL };
5585         const struct lu_name       *cname;
5586         struct thandle             *handle   = NULL;
5587         struct lfsck_namespace_req *lnr      =
5588                 container_of(lar, struct lfsck_namespace_req, lnr_lar);
5589         struct dt_object           *dir      = NULL;
5590         struct dt_object           *obj      = NULL;
5591         struct lfsck_assistant_object *lso   = lar->lar_parent;
5592         const struct lu_fid        *pfid     = &lso->lso_fid;
5593         struct dt_device           *dev      = NULL;
5594         struct lustre_handle        lh       = { 0 };
5595         bool                        repaired = false;
5596         bool                        dtlocked = false;
5597         bool                        remove = false;
5598         bool                        newdata = false;
5599         bool                        log      = false;
5600         bool                        bad_hash = false;
5601         bool                        bad_linkea = false;
5602         int                         idx      = 0;
5603         int                         count    = 0;
5604         int                         rc       = 0;
5605         enum lfsck_namespace_inconsistency_type type = LNIT_NONE;
5606         ENTRY;
5607
5608         if (lso->lso_dead)
5609                 RETURN(0);
5610
5611         la->la_nlink = 0;
5612         if (lnr->lnr_attr & LUDA_UPGRADE) {
5613                 ns->ln_flags |= LF_UPGRADE;
5614                 ns->ln_dirent_repaired++;
5615                 repaired = true;
5616         } else if (lnr->lnr_attr & LUDA_REPAIR) {
5617                 ns->ln_flags |= LF_INCONSISTENT;
5618                 ns->ln_dirent_repaired++;
5619                 repaired = true;
5620         }
5621
5622         if (unlikely(fid_is_zero(&lnr->lnr_fid) &&
5623                      strcmp(lnr->lnr_name, dotdot) == 0)) {
5624                 rc = lfsck_namespace_trace_update(env, com, pfid,
5625                                                 LNTF_CHECK_PARENT, true);
5626
5627                 GOTO(out, rc);
5628         }
5629
5630         if (unlikely(!fid_is_sane(&lnr->lnr_fid))) {
5631                 CDEBUG(D_LFSCK, "%s: dir scan find invalid FID "DFID
5632                        " for the name entry %.*s under "DFID"\n",
5633                        lfsck_lfsck2name(lfsck), PFID(&lnr->lnr_fid),
5634                        lnr->lnr_namelen, lnr->lnr_name, PFID(pfid));
5635
5636                 if (strcmp(lnr->lnr_name, dotdot) != 0)
5637                         /* invalid FID means bad name entry, remove it. */
5638                         type = LNIT_BAD_DIRENT;
5639                 else
5640                         /* If the parent FID is invalid, we cannot remove
5641                          * the ".." entry directly. */
5642                         rc = lfsck_namespace_trace_update(env, com, pfid,
5643                                                 LNTF_CHECK_PARENT, true);
5644
5645                 GOTO(out, rc);
5646         }
5647
5648         if (unlikely(lnr->lnr_dir_cookie == MDS_DIR_END_OFF)) {
5649                 rc = lfsck_namespace_striped_dir_rescan(env, com, lnr);
5650
5651                 RETURN(rc);
5652         }
5653
5654         if (fid_seq_is_dot(fid_seq(&lnr->lnr_fid)))
5655                 GOTO(out, rc = 0);
5656
5657         if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) {
5658                 rc = lfsck_namespace_handle_striped_master(env, com, lnr);
5659
5660                 RETURN(rc);
5661         }
5662
5663         idx = lfsck_find_mdt_idx_by_fid(env, lfsck, &lnr->lnr_fid);
5664         if (idx < 0)
5665                 GOTO(out, rc = idx);
5666
5667         if (idx == lfsck_dev_idx(lfsck)) {
5668                 if (unlikely(strcmp(lnr->lnr_name, dotdot) == 0))
5669                         GOTO(out, rc = 0);
5670
5671                 dev = lfsck->li_bottom;
5672         } else {
5673                 struct lfsck_tgt_desc *ltd;
5674
5675                 /* Usually, some local filesystem consistency verification
5676                  * tools can guarantee the local namespace tree consistenct.
5677                  * So the LFSCK will only verify the remote directory. */
5678                 if (unlikely(strcmp(lnr->lnr_name, dotdot) == 0)) {
5679                         rc = lfsck_namespace_trace_update(env, com, pfid,
5680                                                 LNTF_CHECK_PARENT, true);
5681
5682                         GOTO(out, rc);
5683                 }
5684
5685                 ltd = lfsck_ltd2tgt(&lfsck->li_mdt_descs, idx);
5686                 if (unlikely(ltd == NULL)) {
5687                         CDEBUG(D_LFSCK, "%s: cannot talk with MDT %x which "
5688                                "did not join the namespace LFSCK\n",
5689                                lfsck_lfsck2name(lfsck), idx);
5690                         lfsck_lad_set_bitmap(env, com, idx);
5691
5692                         GOTO(out, rc = -ENODEV);
5693                 }
5694
5695                 dev = ltd->ltd_tgt;
5696         }
5697
5698         obj = lfsck_object_find_by_dev(env, dev, &lnr->lnr_fid);
5699         if (IS_ERR(obj))
5700                 GOTO(out, rc = PTR_ERR(obj));
5701
5702         cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
5703         if (dt_object_exists(obj) == 0) {
5704
5705 dangling:
5706                 if (dir == NULL) {
5707                         dir = lfsck_assistant_object_load(env, lfsck, lso);
5708                         if (IS_ERR(dir)) {
5709                                 rc = PTR_ERR(dir);
5710
5711                                 GOTO(trace, rc == -ENOENT ? 0 : rc);
5712                         }
5713                 }
5714
5715                 rc = lfsck_namespace_check_exist(env, dir, obj, lnr->lnr_name);
5716                 if (rc == 0) {
5717                         if (!lfsck_is_valid_slave_name_entry(env, lnr->lnr_lmv,
5718                                         lnr->lnr_name, lnr->lnr_namelen)) {
5719                                 type = LNIT_BAD_DIRENT;
5720
5721                                 GOTO(out, rc);
5722                         }
5723
5724                         type = LNIT_DANGLING;
5725                         rc = lfsck_namespace_repair_dangling(env, com, dir,
5726                                                              obj, lnr);
5727                         if (rc == 0)
5728                                 repaired = true;
5729                 }
5730
5731                 GOTO(out, rc);
5732         }
5733
5734         if (!(bk->lb_param & LPF_DRYRUN) && lad->lad_advance_lock) {
5735
5736 again:
5737                 rc = lfsck_ibits_lock(env, lfsck, obj, &lh,
5738                                       MDS_INODELOCK_UPDATE |
5739                                       MDS_INODELOCK_XATTR, LCK_EX);
5740                 if (rc != 0)
5741                         GOTO(out, rc);
5742
5743                 handle = lfsck_trans_create(env, dev, lfsck);
5744                 if (IS_ERR(handle))
5745                         GOTO(out, rc = PTR_ERR(handle));
5746
5747                 rc = lfsck_declare_namespace_exec_dir(env, obj, handle);
5748                 if (rc != 0)
5749                         GOTO(stop, rc);
5750
5751                 rc = dt_trans_start_local(env, dev, handle);
5752                 if (rc != 0)
5753                         GOTO(stop, rc);
5754
5755                 dt_write_lock(env, obj, 0);
5756                 dtlocked = true;
5757         }
5758
5759         rc = lfsck_links_read(env, obj, &ldata);
5760         if (unlikely(rc == -ENOENT)) {
5761                 if (handle != NULL) {
5762                         dt_write_unlock(env, obj);
5763                         dtlocked = false;
5764
5765                         dt_trans_stop(env, dev, handle);
5766                         handle = NULL;
5767
5768                         lfsck_ibits_unlock(&lh, LCK_EX);
5769                 }
5770
5771                 /* It may happen when the remote object has been removed,
5772                  * but the local MDT is not aware of that. */
5773                 goto dangling;
5774         } else if (rc == 0) {
5775                 count = ldata.ld_leh->leh_reccount;
5776                 rc = linkea_links_find(&ldata, cname, pfid);
5777                 if ((rc == 0) &&
5778                     (count == 1 || !S_ISDIR(lfsck_object_type(obj)))) {
5779                         if ((lfsck_object_type(obj) & S_IFMT) !=
5780                             lnr->lnr_type) {
5781                                 ns->ln_flags |= LF_INCONSISTENT;
5782                                 type = LNIT_BAD_TYPE;
5783                         }
5784
5785                         goto stop;
5786                 }
5787
5788                 /* If the name entry hash does not match the slave striped
5789                  * directory, and the name entry does not match also, then
5790                  * it is quite possible that name entry is corrupted. */
5791                 if (!lfsck_is_valid_slave_name_entry(env, lnr->lnr_lmv,
5792                                         lnr->lnr_name, lnr->lnr_namelen)) {
5793                         ns->ln_flags |= LF_INCONSISTENT;
5794                         type = LNIT_BAD_DIRENT;
5795
5796                         GOTO(stop, rc = 0);
5797                 }
5798
5799                 /* If the file type stored in the name entry does not match
5800                  * the file type claimed by the object, and the object does
5801                  * not recognize the name entry, then it is quite possible
5802                  * that the name entry is corrupted. */
5803                 if ((lfsck_object_type(obj) & S_IFMT) != lnr->lnr_type) {
5804                         ns->ln_flags |= LF_INCONSISTENT;
5805                         type = LNIT_BAD_DIRENT;
5806
5807                         GOTO(stop, rc = 0);
5808                 }
5809
5810                 /* For sub-dir object, we cannot make sure whether the sub-dir
5811                  * back references the parent via ".." name entry correctly or
5812                  * not in the LFSCK first-stage scanning. It may be that the
5813                  * (remote) sub-dir ".." name entry has no parent FID after
5814                  * file-level backup/restore and its linkEA may be wrong.
5815                  * So under such case, we should replace the linkEA according
5816                  * to current name entry. But this needs to be done during the
5817                  * LFSCK second-stage scanning. The LFSCK will record the name
5818                  * entry for further possible using. */
5819                 remove = false;
5820                 newdata = false;
5821                 goto nodata;
5822         } else if (unlikely(rc == -EINVAL)) {
5823                 if ((lfsck_object_type(obj) & S_IFMT) != lnr->lnr_type)
5824                         type = LNIT_BAD_TYPE;
5825
5826                 count = 1;
5827                 /* The magic crashed, we are not sure whether there are more
5828                  * corrupt data in the linkea, so remove all linkea entries. */
5829                 remove = true;
5830                 newdata = true;
5831                 goto nodata;
5832         } else if (rc == -ENODATA) {
5833                 if ((lfsck_object_type(obj) & S_IFMT) != lnr->lnr_type)
5834                         type = LNIT_BAD_TYPE;
5835
5836                 count = 1;
5837                 remove = false;
5838                 newdata = true;
5839
5840 nodata:
5841                 if (rc == -ENOENT &&
5842                     linkea_will_overflow(&ldata, cname)) {
5843                         CDEBUG(D_INODE, "No enough space to hold linkea entry '"
5844                                DFID": %.*s' at %u\n", PFID(pfid),
5845                                cname->ln_namelen, cname->ln_name,
5846                                ldata.ld_leh->leh_overflow_time);
5847                         log = true;
5848                         rc = 0;
5849                         goto stop;
5850                 }
5851
5852                 if (bk->lb_param & LPF_DRYRUN) {
5853                         if (rc == -ENODATA)
5854                                 ns->ln_flags |= LF_UPGRADE;
5855                         else
5856                                 ns->ln_flags |= LF_INCONSISTENT;
5857                         ns->ln_linkea_repaired++;
5858                         repaired = true;
5859                         log = true;
5860                         goto stop;
5861                 }
5862
5863                 if (!lustre_handle_is_used(&lh)) {
5864                         remove = false;
5865                         newdata = false;
5866                         type = LNIT_NONE;
5867
5868                         goto again;
5869                 }
5870
5871                 LASSERT(handle != NULL);
5872
5873                 if (dir == NULL) {
5874                         dir = lfsck_assistant_object_load(env, lfsck, lso);
5875                         if (IS_ERR(dir)) {
5876                                 rc = PTR_ERR(dir);
5877
5878                                 GOTO(stop, rc == -ENOENT ? 0 : rc);
5879                         }
5880                 }
5881
5882                 rc = lfsck_namespace_check_exist(env, dir, obj, lnr->lnr_name);
5883                 if (rc != 0)
5884                         GOTO(stop, rc);
5885
5886                 bad_linkea = true;
5887                 if (!remove && newdata)
5888                         ns->ln_flags |= LF_UPGRADE;
5889                 else if (remove || !(ns->ln_flags & LF_UPGRADE))
5890                         ns->ln_flags |= LF_INCONSISTENT;
5891
5892                 if (remove) {
5893                         LASSERT(newdata);
5894
5895                         rc = dt_xattr_del(env, obj, XATTR_NAME_LINK, handle);
5896                         if (rc != 0 && rc != -ENOENT && rc != -ENODATA)
5897                                 GOTO(stop, rc);
5898                 }
5899
5900                 if (newdata) {
5901                         rc = linkea_data_new(&ldata,
5902                                         &lfsck_env_info(env)->lti_linkea_buf);
5903                         if (rc != 0)
5904                                 GOTO(stop, rc);
5905                 }
5906
5907                 rc = linkea_add_buf(&ldata, cname, pfid, false);
5908                 if (rc == 0)
5909                         rc = lfsck_links_write(env, obj, &ldata, handle);
5910                 if (rc != 0)
5911                         GOTO(stop, rc);
5912
5913                 count = ldata.ld_leh->leh_reccount;
5914                 if (!S_ISDIR(lfsck_object_type(obj)) ||
5915                     !dt_object_remote(obj)) {
5916                         ns->ln_linkea_repaired++;
5917                         repaired = true;
5918                         log = true;
5919                 }
5920         } else {
5921                 GOTO(stop, rc);
5922         }
5923
5924 stop:
5925         if (dtlocked)
5926                 dt_write_unlock(env, obj);
5927
5928         if (handle != NULL && !IS_ERR(handle))
5929                 dt_trans_stop(env, dev, handle);
5930
5931 out:
5932         lfsck_ibits_unlock(&lh, LCK_EX);
5933
5934         if (!name_is_dot_or_dotdot(lnr->lnr_name, lnr->lnr_namelen) &&
5935             !lfsck_is_valid_slave_name_entry(env, lnr->lnr_lmv,
5936                                              lnr->lnr_name, lnr->lnr_namelen) &&
5937             type != LNIT_BAD_DIRENT) {
5938                 ns->ln_flags |= LF_INCONSISTENT;
5939
5940                 log = false;
5941                 if (dir == NULL) {
5942                         dir = lfsck_assistant_object_load(env, lfsck, lso);
5943                         if (IS_ERR(dir)) {
5944                                 rc = PTR_ERR(dir);
5945
5946                                 GOTO(trace, rc == -ENOENT ? 0 : rc);
5947                         }
5948                 }
5949
5950                 rc = lfsck_namespace_repair_bad_name_hash(env, com, dir,
5951                                                 lnr->lnr_lmv, lnr->lnr_name);
5952                 if (rc == 0)
5953                         bad_hash = true;
5954         }
5955
5956         if (rc >= 0) {
5957                 if (type != LNIT_NONE && dir == NULL) {
5958                         dir = lfsck_assistant_object_load(env, lfsck, lso);
5959                         if (IS_ERR(dir)) {
5960                                 rc = PTR_ERR(dir);
5961
5962                                 GOTO(trace, rc == -ENOENT ? 0 : rc);
5963                         }
5964                 }
5965
5966                 switch (type) {
5967                 case LNIT_BAD_TYPE:
5968                         log = false;
5969                         rc = lfsck_namespace_repair_dirent(env, com, dir,
5970                                         obj, lnr->lnr_name, lnr->lnr_name,
5971                                         lnr->lnr_type, true, false);
5972                         if (rc > 0)
5973                                 repaired = true;
5974                         break;
5975                 case LNIT_BAD_DIRENT:
5976                         log = false;
5977                         /* XXX: This is a bad dirent, we do not know whether
5978                          *      the original name entry reference a regular
5979                          *      file or a directory, then keep the parent's
5980                          *      nlink count unchanged here. */
5981                         rc = lfsck_namespace_repair_dirent(env, com, dir,
5982                                         obj, lnr->lnr_name, lnr->lnr_name,
5983                                         lnr->lnr_type, false, false);
5984                         if (rc > 0)
5985                                 repaired = true;
5986                         break;
5987                 default:
5988                         break;
5989                 }
5990
5991                 if (obj != NULL && count == 1 &&
5992                     S_ISREG(lfsck_object_type(obj)))
5993                         dt_attr_get(env, obj, la);
5994
5995                 /* if new linkea entry is added, the old entry may be stale,
5996                  * check it in phase 2. Sigh, linkea check can only be done
5997                  * locally.
5998                  */
5999                 if (bad_linkea && !remove && !newdata &&
6000                     !dt_object_remote(obj) && count > 1)
6001                         rc = lfsck_namespace_trace_update(env, com,
6002                                                           &lnr->lnr_fid,
6003                                                           LNTF_CHECK_LINKEA,
6004                                                           true);
6005         }
6006
6007 trace:
6008         down_write(&com->lc_sem);
6009         if (rc < 0) {
6010                 CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant fail to handle "
6011                        "the entry: "DFID", parent "DFID", name %.*s: rc = %d\n",
6012                        lfsck_lfsck2name(lfsck), PFID(&lnr->lnr_fid), PFID(pfid),
6013                        lnr->lnr_namelen, lnr->lnr_name, rc);
6014
6015                 lfsck_namespace_record_failure(env, lfsck, ns);
6016                 if ((rc == -ENOTCONN || rc == -ESHUTDOWN || rc == -EREMCHG ||
6017                      rc == -ETIMEDOUT || rc == -EHOSTDOWN ||
6018                      rc == -EHOSTUNREACH || rc == -EINPROGRESS) &&
6019                     dev != NULL && dev != lfsck->li_bottom)
6020                         lfsck_lad_set_bitmap(env, com, idx);
6021
6022                 if (!(bk->lb_param & LPF_FAILOUT))
6023                         rc = 0;
6024         } else {
6025                 if (repaired) {
6026                         ns->ln_items_repaired++;
6027                         if (log)
6028                                 CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant "
6029                                        "repaired the entry: "DFID", parent "DFID
6030                                        ", name %.*s, type %d\n",
6031                                        lfsck_lfsck2name(lfsck),
6032                                        PFID(&lnr->lnr_fid), PFID(pfid),
6033                                        lnr->lnr_namelen, lnr->lnr_name, type);
6034
6035                         switch (type) {
6036                         case LNIT_DANGLING:
6037                                 ns->ln_dangling_repaired++;
6038                                 break;
6039                         case LNIT_BAD_TYPE:
6040                                 ns->ln_bad_type_repaired++;
6041                                 break;
6042                         case LNIT_BAD_DIRENT:
6043                                 ns->ln_dirent_repaired++;
6044                                 break;
6045                         default:
6046                                 break;
6047                         }
6048
6049                         if (bk->lb_param & LPF_DRYRUN &&
6050                             lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
6051                                 lfsck_pos_fill(env, lfsck,
6052                                                &ns->ln_pos_first_inconsistent,
6053                                                false);
6054                 }
6055
6056                 if (bad_hash) {
6057                         ns->ln_name_hash_repaired++;
6058
6059                         /* Not count repeatedly. */
6060                         if (!repaired) {
6061                                 ns->ln_items_repaired++;
6062                                 if (log)
6063                                         CDEBUG(D_LFSCK, "%s: namespace LFSCK "
6064                                                "assistant repaired the entry: "
6065                                                DFID", parent "DFID
6066                                                ", name %.*s\n",
6067                                                lfsck_lfsck2name(lfsck),
6068                                                PFID(&lnr->lnr_fid), PFID(pfid),
6069                                                lnr->lnr_namelen, lnr->lnr_name);
6070                         }
6071
6072                         if (bk->lb_param & LPF_DRYRUN &&
6073                             lfsck_pos_is_zero(&ns->ln_pos_first_inconsistent))
6074                                 lfsck_pos_fill(env, lfsck,
6075                                                &ns->ln_pos_first_inconsistent,
6076                                                false);
6077                 }
6078
6079                 rc = 0;
6080         }
6081
6082         if (count > 1 || la->la_nlink > 1)
6083                 ns->ln_mul_linked_checked++;
6084
6085         up_write(&com->lc_sem);
6086
6087         if (obj != NULL && !IS_ERR(obj))
6088                 lfsck_object_put(env, obj);
6089
6090         if (dir != NULL && !IS_ERR(dir))
6091                 lfsck_object_put(env, dir);
6092
6093         lad->lad_advance_lock = bad_linkea;
6094
6095         return rc;
6096 }
6097
6098 /**
6099  * Handle one orphan under the backend /lost+found directory
6100  *
6101  * Insert the orphan FID into the namespace LFSCK trace file for further
6102  * processing (via the subsequent namespace LFSCK second-stage scanning).
6103  * At the same time, remove the orphan name entry from backend /lost+found
6104  * directory. There is an interval between the orphan name entry removed
6105  * from the backend /lost+found directory and the orphan FID in the LFSCK
6106  * trace file handled. In such interval, the LFSCK can be reset, then
6107  * all the FIDs recorded in the namespace LFSCK trace file will be dropped.
6108  * To guarantee that the orphans can be found when LFSCK run next time
6109  * without e2fsck again, when remove the orphan name entry, the LFSCK
6110  * will set the orphan's ctime attribute as 1. Since normal applications
6111  * cannot change the object's ctime attribute as 1. Then when LFSCK run
6112  * next time, it can record the object (that ctime is 1) in the namespace
6113  * LFSCK trace file during the first-stage scanning.
6114  *
6115  * \param[in] env       pointer to the thread context
6116  * \param[in] com       pointer to the lfsck component
6117  * \param[in] parent    pointer to the object for the backend /lost+found
6118  * \param[in] ent       pointer to the name entry for the target under the
6119  *                      backend /lost+found
6120  *
6121  * \retval              positive for repaired
6122  * \retval              0 if needs to repair nothing
6123  * \retval              negative error number on failure
6124  */
6125 static int lfsck_namespace_scan_local_lpf_one(const struct lu_env *env,
6126                                               struct lfsck_component *com,
6127                                               struct dt_object *parent,
6128                                               struct lu_dirent *ent)
6129 {
6130         struct lfsck_thread_info        *info   = lfsck_env_info(env);
6131         struct lu_fid                   *key    = &info->lti_fid;
6132         struct lu_attr                  *la     = &info->lti_la;
6133         struct lfsck_instance           *lfsck  = com->lc_lfsck;
6134         struct dt_object                *obj;
6135         struct dt_device                *dev    = lfsck->li_bottom;
6136         struct dt_object                *child  = NULL;
6137         struct thandle                  *th     = NULL;
6138         int                              idx;
6139         int                              rc     = 0;
6140         __u8                             flags  = 0;
6141         bool                             exist  = false;
6142
6143         ENTRY;
6144
6145         child = lfsck_object_find_by_dev(env, dev, &ent->lde_fid);
6146         if (IS_ERR(child))
6147                 RETURN(PTR_ERR(child));
6148
6149         if (!dt_object_exists(child)) {
6150                 CDEBUG(D_LFSCK, "%s: lost+found/%s doesn't exist\n",
6151                        lfsck_lfsck2name(lfsck), ent->lde_name);
6152                 GOTO(out, rc = -ENOENT);
6153         }
6154
6155         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
6156                 GOTO(out, rc = 1);
6157
6158         LASSERT(!dt_object_remote(child));
6159
6160         idx = lfsck_sub_trace_file_fid2idx(&ent->lde_fid);
6161         obj = com->lc_sub_trace_objs[idx].lsto_obj;
6162         fid_cpu_to_be(key, &ent->lde_fid);
6163         rc = dt_lookup(env, obj, (struct dt_rec *)&flags,
6164                        (const struct dt_key *)key);
6165         if (rc == 0) {
6166                 exist = true;
6167                 flags |= LNTF_CHECK_ORPHAN;
6168         } else if (rc == -ENOENT) {
6169                 flags = LNTF_CHECK_ORPHAN;
6170         } else {
6171                 GOTO(out, rc);
6172         }
6173
6174         th = lfsck_trans_create(env, dev, lfsck);
6175         if (IS_ERR(th))
6176                 GOTO(out, rc = PTR_ERR(th));
6177
6178         /* a1. remove name entry from backend /lost+found */
6179         rc = dt_declare_delete(env, parent,
6180                                (const struct dt_key *)ent->lde_name, th);
6181         if (rc != 0)
6182                 GOTO(stop, rc);
6183
6184         if (S_ISDIR(lfsck_object_type(child))) {
6185                 /* a2. decrease parent's nlink */
6186                 rc = dt_declare_ref_del(env, parent, th);
6187                 if (rc != 0)
6188                         GOTO(stop, rc);
6189         }
6190
6191         if (exist) {
6192                 /* a3. remove child's FID from the LFSCK trace file. */
6193                 rc = dt_declare_delete(env, obj,
6194                                        (const struct dt_key *)key, th);
6195                 if (rc != 0)
6196                         GOTO(stop, rc);
6197         } else {
6198                 /* a4. set child's ctime as 1 */
6199                 memset(la, 0, sizeof(*la));
6200                 la->la_ctime = 1;
6201                 la->la_valid = LA_CTIME;
6202                 rc = dt_declare_attr_set(env, child, la, th);
6203                 if (rc != 0)
6204                         GOTO(stop, rc);
6205         }
6206
6207         /* a5. insert child's FID into the LFSCK trace file. */
6208         rc = dt_declare_insert(env, obj, (const struct dt_rec *)&flags,
6209                                (const struct dt_key *)key, th);
6210         if (rc != 0)
6211                 GOTO(stop, rc);
6212
6213         rc = dt_trans_start_local(env, dev, th);
6214         if (rc != 0)
6215                 GOTO(stop, rc);
6216
6217         /* b1. remove name entry from backend /lost+found */
6218         rc = dt_delete(env, parent, (const struct dt_key *)ent->lde_name, th);
6219         if (rc != 0)
6220                 GOTO(stop, rc);
6221
6222         if (S_ISDIR(lfsck_object_type(child))) {
6223                 /* b2. decrease parent's nlink */
6224                 dt_write_lock(env, parent, 0);
6225                 rc = dt_ref_del(env, parent, th);
6226                 dt_write_unlock(env, parent);
6227                 if (rc != 0)
6228                         GOTO(stop, rc);
6229         }
6230
6231         if (exist) {
6232                 /* a3. remove child's FID from the LFSCK trace file. */
6233                 rc = dt_delete(env, obj, (const struct dt_key *)key, th);
6234                 if (rc != 0)
6235                         GOTO(stop, rc);
6236         } else {
6237                 /* b4. set child's ctime as 1 */
6238                 rc = dt_attr_set(env, child, la, th);
6239                 if (rc != 0)
6240                         GOTO(stop, rc);
6241         }
6242
6243         /* b5. insert child's FID into the LFSCK trace file. */
6244         rc = dt_insert(env, obj, (const struct dt_rec *)&flags,
6245                        (const struct dt_key *)key, th);
6246
6247         GOTO(stop, rc = (rc == 0 ? 1 : rc));
6248
6249 stop:
6250         dt_trans_stop(env, dev, th);
6251
6252 out:
6253         lfsck_object_put(env, child);
6254
6255         return rc;
6256 }
6257
6258 /**
6259  * Handle orphans under the backend /lost+found directory
6260  *
6261  * Some backend checker, such as e2fsck for ldiskfs may find some orphans
6262  * and put them under the backend /lost+found directory that is invisible
6263  * to client. The LFSCK will scan such directory, for the original client
6264  * visible orphans, add their fids into the namespace LFSCK trace file,
6265  * then the subsenquent namespace LFSCK second-stage scanning can handle
6266  * them as other objects to be double scanned: either move back to normal
6267  * namespace, or to the global visible orphan directory:
6268  * /ROOT/.lustre/lost+found/MDTxxxx/
6269  *
6270  * \param[in] env       pointer to the thread context
6271  * \param[in] com       pointer to the lfsck component
6272  */
6273 static void lfsck_namespace_scan_local_lpf(const struct lu_env *env,
6274                                            struct lfsck_component *com)
6275 {
6276         struct lfsck_thread_info        *info   = lfsck_env_info(env);
6277         struct lu_dirent                *ent    =
6278                                         (struct lu_dirent *)info->lti_key;
6279         struct lu_seq_range             *range  = &info->lti_range;
6280         struct lfsck_instance           *lfsck  = com->lc_lfsck;
6281         struct ptlrpc_thread            *thread = &lfsck->li_thread;
6282         struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
6283         struct lfsck_namespace          *ns     = com->lc_file_ram;
6284         struct dt_object                *parent;
6285         const struct dt_it_ops          *iops;
6286         struct dt_it                    *di;
6287         struct seq_server_site          *ss     = lfsck_dev_site(lfsck);
6288         __u64                            cookie;
6289         __u32                            idx    = lfsck_dev_idx(lfsck);
6290         int                              rc     = 0;
6291         __u16                            type;
6292         ENTRY;
6293
6294         parent = lfsck_object_find_by_dev(env, lfsck->li_bottom,
6295                                           &LU_BACKEND_LPF_FID);
6296         if (IS_ERR(parent)) {
6297                 CERROR("%s: fail to find backend /lost+found: rc = %ld\n",
6298                        lfsck_lfsck2name(lfsck), PTR_ERR(parent));
6299                 RETURN_EXIT;
6300         }
6301
6302         /* It is normal that the /lost+found does not exist for ZFS backend. */
6303         if (!dt_object_exists(parent))
6304                 GOTO(out, rc = 0);
6305
6306         if (unlikely(!dt_try_as_dir(env, parent, true)))
6307                 GOTO(out, rc = -ENOTDIR);
6308
6309         CDEBUG(D_LFSCK, "%s: start to scan backend /lost+found\n",
6310                lfsck_lfsck2name(lfsck));
6311
6312         com->lc_new_scanned = 0;
6313         iops = &parent->do_index_ops->dio_it;
6314         di = iops->init(env, parent, LUDA_64BITHASH | LUDA_TYPE);
6315         if (IS_ERR(di))
6316                 GOTO(out, rc = PTR_ERR(di));
6317
6318         rc = iops->load(env, di, 0);
6319         if (rc == 0)
6320                 rc = iops->next(env, di);
6321         else if (rc > 0)
6322                 rc = 0;
6323
6324         while (rc == 0) {
6325                 if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) &&
6326                     unlikely(!thread_is_running(thread)))
6327                         break;
6328
6329                 rc = iops->rec(env, di, (struct dt_rec *)ent,
6330                                LUDA_64BITHASH | LUDA_TYPE);
6331                 if (rc == 0)
6332                         rc = lfsck_unpack_ent(ent, &cookie, &type);
6333
6334                 if (unlikely(rc != 0)) {
6335                         CDEBUG(D_LFSCK, "%s: fail to iterate backend "
6336                                "/lost+found: rc = %d\n",
6337                                lfsck_lfsck2name(lfsck), rc);
6338
6339                         goto skip;
6340                 }
6341
6342                 /* skip dot and dotdot entries */
6343                 if (name_is_dot_or_dotdot(ent->lde_name, ent->lde_namelen))
6344                         goto next;
6345
6346                 if (!fid_seq_in_fldb(fid_seq(&ent->lde_fid)))
6347                         goto skip;
6348
6349                 if (fid_is_norm(&ent->lde_fid)) {
6350                         fld_range_set_mdt(range);
6351                         rc = fld_local_lookup(env, ss->ss_server_fld,
6352                                               fid_seq(&ent->lde_fid), range);
6353                         if (rc != 0)
6354                                 goto skip;
6355                 } else if (idx != 0) {
6356                         /* If the returned FID is IGIF, then there are three
6357                          * possible cases:
6358                          *
6359                          * 1) The object is upgraded from old Lustre-1.8 with
6360                          *    IGIF assigned to such object.
6361                          * 2) The object is a backend local object and is
6362                          *    invisible to client.
6363                          * 3) The object lost its LMV EA, and since there is
6364                          *    no FID-in-dirent for the orphan in the backend
6365                          *    /lost+found directory, then the low layer will
6366                          *    return IGIF for such object.
6367                          *
6368                          * For MDTx (x != 0), it is either case 2) or case 3),
6369                          * but from the LFSCK view, they are indistinguishable.
6370                          * To be safe, the LFSCK will keep it there and report
6371                          * some message, then the adminstrator can handle that
6372                          * furtherly.
6373                          *
6374                          * For MDT0, it is more possible the case 1). The LFSCK
6375                          * will handle the orphan as an upgraded object. */
6376                         CDEBUG(D_LFSCK, "%s: the orphan %.*s with IGIF "DFID
6377                                "in the backend /lost+found on the MDT %04x, "
6378                                "to be safe, skip it.\n",
6379                                lfsck_lfsck2name(lfsck), ent->lde_namelen,
6380                                ent->lde_name, PFID(&ent->lde_fid), idx);
6381                         goto skip;
6382                 }
6383
6384                 rc = lfsck_namespace_scan_local_lpf_one(env, com, parent, ent);
6385
6386 skip:
6387                 down_write(&com->lc_sem);
6388                 com->lc_new_scanned++;
6389                 ns->ln_local_lpf_scanned++;
6390                 if (rc > 0)
6391                         ns->ln_local_lpf_moved++;
6392                 else if (rc == 0)
6393                         ns->ln_local_lpf_skipped++;
6394                 else
6395                         ns->ln_local_lpf_failed++;
6396                 up_write(&com->lc_sem);
6397
6398                 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
6399                         break;
6400
6401 next:
6402                 lfsck_control_speed_by_self(com);
6403                 if (unlikely(!thread_is_running(thread))) {
6404                         rc = 0;
6405                         break;
6406                 }
6407
6408                 rc = iops->next(env, di);
6409         }
6410
6411         iops->put(env, di);
6412         iops->fini(env, di);
6413
6414         EXIT;
6415
6416 out:
6417         CDEBUG(D_LFSCK, "%s: stop to scan backend /lost+found: rc = %d\n",
6418                lfsck_lfsck2name(lfsck), rc);
6419
6420         lfsck_object_put(env, parent);
6421 }
6422
6423 /**
6424  * Rescan the striped directory after the master LMV EA reset.
6425  *
6426  * Sometimes, the master LMV EA of the striped directory maybe lost, so when
6427  * the namespace LFSCK engine scan the striped directory for the first time,
6428  * it will be regarded as a normal directory. As the LFSCK processing, some
6429  * other LFSCK instance on other MDT will find the shard of this striped dir,
6430  * and find that the master MDT-object of the striped directory lost its LMV
6431  * EA, then such remote LFSCK instance will regenerate the master LMV EA and
6432  * notify the LFSCK instance on this MDT to rescan the striped directory.
6433  *
6434  * \param[in] env       pointer to the thread context
6435  * \param[in] com       pointer to the lfsck component
6436  * \param[in] llu       the lfsck_lmv_unit that contains the striped directory
6437  *                      to be rescanned.
6438  *
6439  * \retval              positive number for success
6440  * \retval              0 for LFSCK stopped/paused
6441  * \retval              negative error number on failure
6442  */
6443 static int lfsck_namespace_rescan_striped_dir(const struct lu_env *env,
6444                                               struct lfsck_component *com,
6445                                               struct lfsck_lmv_unit *llu)
6446 {
6447         struct lfsck_thread_info        *info   = lfsck_env_info(env);
6448         struct lfsck_instance           *lfsck  = com->lc_lfsck;
6449         struct lfsck_assistant_data     *lad    = com->lc_data;
6450         struct dt_object                *dir;
6451         const struct dt_it_ops          *iops;
6452         struct dt_it                    *di;
6453         struct lu_dirent                *ent    =
6454                         (struct lu_dirent *)info->lti_key;
6455         struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
6456         struct ptlrpc_thread            *thread = &lfsck->li_thread;
6457         struct lfsck_assistant_object   *lso    = NULL;
6458         struct lfsck_namespace_req      *lnr;
6459         struct lfsck_assistant_req      *lar;
6460         int                              rc;
6461         __u16                            type;
6462         ENTRY;
6463
6464         LASSERT(list_empty(&lad->lad_req_list));
6465
6466         lfsck->li_lmv = &llu->llu_lmv;
6467         lfsck->li_obj_dir = lfsck_object_get(llu->llu_obj);
6468         rc = lfsck_open_dir(env, lfsck, 0);
6469         if (rc != 0)
6470                 RETURN(rc);
6471
6472         dir = lfsck->li_obj_dir;
6473         di = lfsck->li_di_dir;
6474         iops = &dir->do_index_ops->dio_it;
6475         do {
6476                 rc = iops->rec(env, di, (struct dt_rec *)ent,
6477                                lfsck->li_args_dir);
6478                 if (rc == 0)
6479                         rc = lfsck_unpack_ent(ent, &lfsck->li_cookie_dir,
6480                                               &type);
6481
6482                 if (rc != 0) {
6483                         if (bk->lb_param & LPF_FAILOUT)
6484                                 GOTO(out, rc);
6485
6486                         goto next;
6487                 }
6488
6489                 if (name_is_dot_or_dotdot(ent->lde_name, ent->lde_namelen))
6490                         goto next;
6491
6492                 if (lso == NULL) {
6493                         lso = lfsck_assistant_object_init(env,
6494                                 lfsck_dto2fid(dir), NULL,
6495                                 lfsck->li_pos_current.lp_oit_cookie, true);
6496                         if (IS_ERR(lso)) {
6497                                 if (bk->lb_param & LPF_FAILOUT)
6498                                         GOTO(out, rc = PTR_ERR(lso));
6499
6500                                 lso = NULL;
6501                                 goto next;
6502                         }
6503                 }
6504
6505                 lnr = lfsck_namespace_assistant_req_init(lfsck, lso, ent, type);
6506                 if (IS_ERR(lnr)) {
6507                         if (bk->lb_param & LPF_FAILOUT)
6508                                 GOTO(out, rc = PTR_ERR(lnr));
6509
6510                         goto next;
6511                 }
6512
6513                 lar = &lnr->lnr_lar;
6514                 rc = lfsck_namespace_assistant_handler_p1(env, com, lar);
6515                 lfsck_namespace_assistant_req_fini(env, lar);
6516                 if (rc != 0 && bk->lb_param & LPF_FAILOUT)
6517                         GOTO(out, rc);
6518
6519                 if (unlikely(!thread_is_running(thread)))
6520                         GOTO(out, rc = 0);
6521
6522 next:
6523                 rc = iops->next(env, di);
6524         } while (rc == 0);
6525
6526 out:
6527         if (lso != NULL && !IS_ERR(lso))
6528                 lfsck_assistant_object_put(env, lso);
6529
6530         lfsck_close_dir(env, lfsck, rc);
6531         if (rc <= 0)
6532                 RETURN(rc);
6533
6534         /* The close_dir() may insert a dummy lnr in the lad->lad_req_list. */
6535         if (list_empty(&lad->lad_req_list))
6536                 RETURN(1);
6537
6538         spin_lock(&lad->lad_lock);
6539         lar = list_first_entry(&lad->lad_req_list, struct lfsck_assistant_req,
6540                                lar_list);
6541         list_del_init(&lar->lar_list);
6542         spin_unlock(&lad->lad_lock);
6543
6544         rc = lfsck_namespace_assistant_handler_p1(env, com, lar);
6545         lfsck_namespace_assistant_req_fini(env, lar);
6546
6547         RETURN(rc == 0 ? 1 : rc);
6548 }
6549
6550 static int
6551 lfsck_namespace_double_scan_one_trace_file(const struct lu_env *env,
6552                                            struct lfsck_component *com,
6553                                            struct dt_object *obj, bool first)
6554 {
6555         struct lfsck_instance   *lfsck  = com->lc_lfsck;
6556         struct ptlrpc_thread    *thread = &lfsck->li_thread;
6557         struct lfsck_bookmark   *bk     = &lfsck->li_bookmark_ram;
6558         struct lfsck_namespace  *ns     = com->lc_file_ram;
6559         const struct dt_it_ops  *iops   = &obj->do_index_ops->dio_it;
6560         struct dt_object        *target;
6561         struct dt_it            *di;
6562         struct dt_key           *key;
6563         struct lu_fid            fid;
6564         int                      rc;
6565         __u8                     flags  = 0;
6566         ENTRY;
6567
6568         di = iops->init(env, obj, 0);
6569         if (IS_ERR(di))
6570                 RETURN(PTR_ERR(di));
6571
6572         if (first)
6573                 fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2);
6574         else
6575                 fid_zero(&fid);
6576         rc = iops->get(env, di, (const struct dt_key *)&fid);
6577         if (rc < 0)
6578                 GOTO(fini, rc);
6579
6580         if (first) {
6581                 /* The start one either has been processed or does not exist,
6582                  * skip it. */
6583                 rc = iops->next(env, di);
6584                 if (rc != 0)
6585                         GOTO(put, rc);
6586         }
6587
6588         do {
6589                 if (CFS_FAIL_TIMEOUT(OBD_FAIL_LFSCK_DELAY3, cfs_fail_val) &&
6590                     unlikely(!thread_is_running(thread)))
6591                         GOTO(put, rc = 0);
6592
6593                 key = iops->key(env, di);
6594                 if (IS_ERR(key)) {
6595                         rc = PTR_ERR(key);
6596                         if (rc == -ENOENT)
6597                                 GOTO(put, rc = 1);
6598
6599                         goto checkpoint;
6600                 }
6601
6602                 fid_be_to_cpu(&fid, (const struct lu_fid *)key);
6603                 if (!fid_is_sane(&fid)) {
6604                         rc = 0;
6605                         goto checkpoint;
6606                 }
6607
6608                 target = lfsck_object_find_bottom(env, lfsck, &fid);
6609                 if (IS_ERR(target)) {
6610                         rc = PTR_ERR(target);
6611                         goto checkpoint;
6612                 }
6613
6614                 if (dt_object_exists(target)) {
6615                         rc = iops->rec(env, di, (struct dt_rec *)&flags, 0);
6616                         if (rc == 0) {
6617                                 rc = lfsck_namespace_double_scan_one(env, com,
6618                                                                 target, flags);
6619                                 if (rc == -ENOENT)
6620                                         rc = 0;
6621                         }
6622                 }
6623
6624                 lfsck_object_put(env, target);
6625
6626 checkpoint:
6627                 down_write(&com->lc_sem);
6628                 com->lc_new_checked++;
6629                 com->lc_new_scanned++;
6630                 if (rc >= 0)
6631                         ns->ln_fid_latest_scanned_phase2 = fid;
6632
6633                 if (rc > 0)
6634                         ns->ln_objs_repaired_phase2++;
6635                 else if (rc < 0)
6636                         ns->ln_objs_failed_phase2++;
6637                 up_write(&com->lc_sem);
6638
6639                 if (rc < 0 && bk->lb_param & LPF_FAILOUT)
6640                         GOTO(put, rc);
6641
6642                 if (unlikely(com->lc_time_next_checkpoint <=
6643                              ktime_get_seconds()) &&
6644                     com->lc_new_checked != 0) {
6645                         down_write(&com->lc_sem);
6646                         ns->ln_run_time_phase2 += ktime_get_seconds() -
6647                                                   com->lc_time_last_checkpoint;
6648                         ns->ln_time_last_checkpoint = ktime_get_real_seconds();
6649                         ns->ln_objs_checked_phase2 += com->lc_new_checked;
6650                         com->lc_new_checked = 0;
6651                         lfsck_namespace_store(env, com);
6652                         up_write(&com->lc_sem);
6653
6654                         com->lc_time_last_checkpoint = ktime_get_seconds();
6655                         com->lc_time_next_checkpoint =
6656                                 com->lc_time_last_checkpoint +
6657                                 LFSCK_CHECKPOINT_INTERVAL;
6658                 }
6659
6660                 lfsck_control_speed_by_self(com);
6661                 if (unlikely(!thread_is_running(thread)))
6662                         GOTO(put, rc = 0);
6663
6664                 rc = iops->next(env, di);
6665         } while (rc == 0);
6666
6667         GOTO(put, rc);
6668
6669 put:
6670         iops->put(env, di);
6671
6672 fini:
6673         iops->fini(env, di);
6674
6675         return rc;
6676 }
6677
6678 static int lfsck_namespace_assistant_handler_p2(const struct lu_env *env,
6679                                                 struct lfsck_component *com)
6680 {
6681         struct lfsck_instance   *lfsck  = com->lc_lfsck;
6682         struct lfsck_namespace  *ns     = com->lc_file_ram;
6683         int                      rc;
6684         int                      i;
6685         ENTRY;
6686
6687         while (!list_empty(&lfsck->li_list_lmv)) {
6688                 struct lfsck_lmv_unit *llu;
6689
6690                 spin_lock(&lfsck->li_lock);
6691                 llu = list_first_entry(&lfsck->li_list_lmv,
6692                                        struct lfsck_lmv_unit, llu_link);
6693                 list_del_init(&llu->llu_link);
6694                 spin_unlock(&lfsck->li_lock);
6695
6696                 rc = lfsck_namespace_rescan_striped_dir(env, com, llu);
6697                 if (rc <= 0)
6698                         RETURN(rc);
6699         }
6700
6701         CDEBUG(D_LFSCK, "%s: namespace LFSCK phase2 scan start\n",
6702                lfsck_lfsck2name(lfsck));
6703
6704         lfsck_namespace_scan_local_lpf(env, com);
6705
6706         com->lc_new_checked = 0;
6707         com->lc_new_scanned = 0;
6708         com->lc_time_last_checkpoint = ktime_get_seconds();
6709         com->lc_time_next_checkpoint = com->lc_time_last_checkpoint +
6710                                        LFSCK_CHECKPOINT_INTERVAL;
6711
6712         i = lfsck_sub_trace_file_fid2idx(&ns->ln_fid_latest_scanned_phase2);
6713         rc = lfsck_namespace_double_scan_one_trace_file(env, com,
6714                                 com->lc_sub_trace_objs[i].lsto_obj, true);
6715         while (rc > 0 && ++i < LFSCK_STF_COUNT)
6716                 rc = lfsck_namespace_double_scan_one_trace_file(env, com,
6717                                 com->lc_sub_trace_objs[i].lsto_obj, false);
6718
6719         CDEBUG(D_LFSCK, "%s: namespace LFSCK phase2 scan stop at the No. %d "
6720                "trace file: rc = %d\n", lfsck_lfsck2name(lfsck), i, rc);
6721
6722         RETURN(rc);
6723 }
6724
6725 static void lfsck_namespace_assistant_fill_pos(const struct lu_env *env,
6726                                                struct lfsck_component *com,
6727                                                struct lfsck_position *pos)
6728 {
6729         struct lfsck_assistant_data     *lad = com->lc_data;
6730         struct lfsck_namespace_req      *lnr;
6731
6732         if (((struct lfsck_namespace *)(com->lc_file_ram))->ln_status !=
6733             LS_SCANNING_PHASE1)
6734                 return;
6735
6736         if (list_empty(&lad->lad_req_list))
6737                 return;
6738
6739         lnr = list_first_entry(&lad->lad_req_list,
6740                                struct lfsck_namespace_req,
6741                                lnr_lar.lar_list);
6742         pos->lp_oit_cookie = lnr->lnr_lar.lar_parent->lso_oit_cookie;
6743         pos->lp_dir_cookie = lnr->lnr_dir_cookie - 1;
6744         pos->lp_dir_parent = lnr->lnr_lar.lar_parent->lso_fid;
6745 }
6746
6747 static int lfsck_namespace_double_scan_result(const struct lu_env *env,
6748                                               struct lfsck_component *com,
6749                                               int rc)
6750 {
6751         struct lfsck_instance   *lfsck  = com->lc_lfsck;
6752         struct lfsck_namespace  *ns     = com->lc_file_ram;
6753
6754         down_write(&com->lc_sem);
6755         ns->ln_run_time_phase2 += ktime_get_seconds() -
6756                                   com->lc_time_last_checkpoint;
6757         ns->ln_time_last_checkpoint = ktime_get_real_seconds();
6758         ns->ln_objs_checked_phase2 += com->lc_new_checked;
6759         com->lc_new_checked = 0;
6760
6761         if (rc > 0) {
6762                 if (ns->ln_flags & LF_INCOMPLETE)
6763                         ns->ln_status = LS_PARTIAL;
6764                 else
6765                         ns->ln_status = LS_COMPLETED;
6766                 ns->ln_flags &= ~LF_SCANNED_ONCE;
6767                 if (!(lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN))
6768                         ns->ln_flags &= ~LF_INCONSISTENT;
6769                 ns->ln_time_last_complete = ns->ln_time_last_checkpoint;
6770                 ns->ln_success_count++;
6771         } else if (rc == 0) {
6772                 if (lfsck->li_status != 0)
6773                         ns->ln_status = lfsck->li_status;
6774                 else
6775                         ns->ln_status = LS_STOPPED;
6776         } else {
6777                 ns->ln_status = LS_FAILED;
6778         }
6779
6780         rc = lfsck_namespace_store(env, com);
6781         up_write(&com->lc_sem);
6782
6783         return rc;
6784 }
6785
6786 static int
6787 lfsck_namespace_assistant_sync_failures_interpret(const struct lu_env *env,
6788                                                   struct ptlrpc_request *req,
6789                                                   void *args, int rc)
6790 {
6791         if (rc == 0) {
6792                 struct lfsck_async_interpret_args *laia = args;
6793                 struct lfsck_tgt_desc             *ltd  = laia->laia_ltd;
6794
6795                 ltd->ltd_synced_failures = 1;
6796         }
6797
6798         return 0;
6799 }
6800
6801 /**
6802  * Notify remote LFSCK instances about former failures.
6803  *
6804  * The local LFSCK instance has recorded which MDTs have ever failed to respond
6805  * some LFSCK verification requests (maybe because of network issues or the MDT
6806  * itself trouble). During the respond gap the MDT may missed some name entries
6807  * verification, then the MDT cannot know whether related MDT-objects have been
6808  * referenced by related name entries or not, then in the second-stage scanning,
6809  * these MDT-objects will be regarded as orphan, if the MDT-object contains bad
6810  * linkEA for back reference, then it will misguide the LFSCK to generate wrong
6811  * name entry for repairing the orphan.
6812  *
6813  * To avoid above trouble, when layout LFSCK finishes the first-stage scanning,
6814  * it will scan the bitmap for the ever failed MDTs, and notify them that they
6815  * have ever missed some name entries verification and should skip the handling
6816  * for orphan MDT-objects.
6817  *
6818  * \param[in] env       pointer to the thread context
6819  * \param[in] com       pointer to the lfsck component
6820  * \param[in] lr        pointer to the lfsck request
6821  */
6822 static void lfsck_namespace_assistant_sync_failures(const struct lu_env *env,
6823                                                     struct lfsck_component *com,
6824                                                     struct lfsck_request *lr)
6825 {
6826         struct lfsck_async_interpret_args *laia  =
6827                                 &lfsck_env_info(env)->lti_laia2;
6828         struct lfsck_assistant_data *lad = com->lc_data;
6829         struct lfsck_namespace *ns = com->lc_file_ram;
6830         struct lfsck_instance *lfsck = com->lc_lfsck;
6831         struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs;
6832         struct lfsck_tgt_desc *ltd;
6833         struct ptlrpc_request_set *set;
6834         int idx;
6835         int rc = 0;
6836
6837         ENTRY;
6838         if (!test_bit(LAD_INCOMPLETE, &lad->lad_flags))
6839                 RETURN_EXIT;
6840
6841         set = ptlrpc_prep_set();
6842         if (set == NULL)
6843                 GOTO(out, rc = -ENOMEM);
6844
6845         lr->lr_flags2 = ns->ln_flags | LF_INCOMPLETE;
6846         memset(laia, 0, sizeof(*laia));
6847         lad->lad_touch_gen++;
6848
6849         down_read(&ltds->ltd_rw_sem);
6850         for_each_set_bit(idx, lad->lad_bitmap, lad->lad_bitmap_count) {
6851                 ltd = lfsck_ltd2tgt(ltds, idx);
6852                 if (unlikely(!ltd))
6853                         continue;
6854
6855                 laia->laia_ltd = ltd;
6856                 rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
6857                         lfsck_namespace_assistant_sync_failures_interpret,
6858                         laia, LFSCK_NOTIFY);
6859                 if (rc != 0)
6860                         CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant fail "
6861                                "to sync failure with MDT %x: rc = %d\n",
6862                                lfsck_lfsck2name(lfsck), ltd->ltd_index, rc);
6863         }
6864         up_read(&ltds->ltd_rw_sem);
6865
6866         rc = ptlrpc_set_wait(env, set);
6867         ptlrpc_set_destroy(set);
6868
6869         GOTO(out, rc);
6870
6871 out:
6872         if (rc != 0)
6873                 CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant fail "
6874                        "to sync failure with MDTs, and related MDTs "
6875                        "may handle orphan improperly: rc = %d\n",
6876                        lfsck_lfsck2name(lfsck), rc);
6877
6878         EXIT;
6879 }
6880
6881 const struct lfsck_assistant_operations lfsck_namespace_assistant_ops = {
6882         .la_handler_p1          = lfsck_namespace_assistant_handler_p1,
6883         .la_handler_p2          = lfsck_namespace_assistant_handler_p2,
6884         .la_fill_pos            = lfsck_namespace_assistant_fill_pos,
6885         .la_double_scan_result  = lfsck_namespace_double_scan_result,
6886         .la_req_fini            = lfsck_namespace_assistant_req_fini,
6887         .la_sync_failures       = lfsck_namespace_assistant_sync_failures,
6888 };
6889
6890 /**
6891  * Verify the specified linkEA entry for the given directory object.
6892  * If the object has no such linkEA entry or it has more other linkEA
6893  * entries, then re-generate the linkEA with the given information.
6894  *
6895  * \param[in] env       pointer to the thread context
6896  * \param[in] obj       pointer to the dt_object to be handled
6897  * \param[in] cname     the name for the child in the parent directory
6898  * \param[in] pfid      the parent directory's FID for the linkEA
6899  *
6900  * \retval              0 for success
6901  * \retval              negative error number on failure
6902  */
6903 int lfsck_verify_linkea(const struct lu_env *env, struct lfsck_instance *lfsck,
6904                         struct dt_object *obj, const struct lu_name *cname,
6905                         const struct lu_fid *pfid)
6906 {
6907         struct dt_device        *dev    = lfsck_obj2dev(obj);
6908         struct linkea_data       ldata  = { NULL };
6909         struct lu_buf            linkea_buf;
6910         struct thandle          *th;
6911         int                      rc;
6912         int                      fl     = LU_XATTR_CREATE;
6913         bool                     dirty  = false;
6914
6915         ENTRY;
6916
6917         if (!dt_object_exists(obj))
6918                 RETURN(-ENOENT);
6919
6920         if (!S_ISDIR(lfsck_object_type(obj)))
6921                 RETURN(-ENOTDIR);
6922
6923         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
6924                 RETURN(0);
6925
6926         rc = lfsck_links_read_with_rec(env, obj, &ldata);
6927         if (rc == -ENODATA) {
6928                 dirty = true;
6929         } else if (rc == 0) {
6930                 fl = LU_XATTR_REPLACE;
6931                 if (ldata.ld_leh->leh_reccount != 1) {
6932                         dirty = true;
6933                 } else {
6934                         rc = linkea_links_find(&ldata, cname, pfid);
6935                         if (rc != 0)
6936                                 dirty = true;
6937                 }
6938         }
6939
6940         if (!dirty)
6941                 RETURN(rc);
6942
6943         rc = linkea_links_new(&ldata, &lfsck_env_info(env)->lti_linkea_buf,
6944                               cname, pfid);
6945         if (rc != 0)
6946                 RETURN(rc);
6947
6948         lfsck_buf_init(&linkea_buf, ldata.ld_buf->lb_buf,
6949                        ldata.ld_leh->leh_len);
6950         th = lfsck_trans_create(env, dev, lfsck);
6951         if (IS_ERR(th))
6952                 RETURN(PTR_ERR(th));
6953
6954         rc = dt_declare_xattr_set(env, obj, &linkea_buf,
6955                                   XATTR_NAME_LINK, fl, th);
6956         if (rc != 0)
6957                 GOTO(stop, rc);
6958
6959         rc = dt_trans_start_local(env, dev, th);
6960         if (rc != 0)
6961                 GOTO(stop, rc);
6962
6963         dt_write_lock(env, obj, 0);
6964         rc = dt_xattr_set(env, obj, &linkea_buf,
6965                           XATTR_NAME_LINK, fl, th);
6966         dt_write_unlock(env, obj);
6967
6968         GOTO(stop, rc);
6969
6970 stop:
6971         dt_trans_stop(env, dev, th);
6972         return rc;
6973 }
6974
6975 /**
6976  * Get the name and parent directory's FID from the first linkEA entry.
6977  *
6978  * \param[in] env       pointer to the thread context
6979  * \param[in] obj       pointer to the object which get linkEA from
6980  * \param[out] name     pointer to the buffer to hold the name
6981  *                      in the first linkEA entry
6982  * \param[out] pfid     pointer to the buffer to hold the parent
6983  *                      directory's FID in the first linkEA entry
6984  *
6985  * \retval              0 for success
6986  * \retval              negative error number on failure
6987  */
6988 int lfsck_links_get_first(const struct lu_env *env, struct dt_object *obj,
6989                           char *name, struct lu_fid *pfid)
6990 {
6991         struct lu_name           *cname = &lfsck_env_info(env)->lti_name;
6992         struct linkea_data        ldata = { NULL };
6993         int                       rc;
6994
6995         rc = lfsck_links_read_with_rec(env, obj, &ldata);
6996         if (rc)
6997                 return rc;
6998
6999         linkea_first_entry(&ldata);
7000         linkea_entry_unpack(ldata.ld_lee, &ldata.ld_reclen, cname, pfid);
7001         if (!linkea_entry_is_valid(&ldata, cname, pfid))
7002                 return -EINVAL;
7003
7004         /* To guarantee the 'name' is terminated with '0'. */
7005         memcpy(name, cname->ln_name, cname->ln_namelen);
7006         name[cname->ln_namelen] = 0;
7007
7008         return 0;
7009 }
7010
7011 /**
7012  * Update the object's name entry with the given FID.
7013  *
7014  * \param[in] env       pointer to the thread context
7015  * \param[in] lfsck     pointer to the lfsck instance
7016  * \param[in] dir       pointer to the directory that holds
7017  *                      the name entry
7018  * \param[in] name      the name for the entry to be updated
7019  * \param[in] fid       the new FID for the name entry referenced
7020  * \param[in] type      the type for the name entry to be updated
7021  *
7022  * \retval              0 for success
7023  * \retval              negative error number on failure
7024  */
7025 int lfsck_update_name_entry(const struct lu_env *env,
7026                             struct lfsck_instance *lfsck,
7027                             struct dt_object *dir, const char *name,
7028                             const struct lu_fid *fid, __u32 type)
7029 {
7030         struct lfsck_thread_info *info   = lfsck_env_info(env);
7031         struct dt_insert_rec     *rec    = &info->lti_dt_rec;
7032         struct lfsck_lock_handle *llh    = &info->lti_llh;
7033         struct dt_device         *dev    = lfsck_obj2dev(dir);
7034         struct thandle           *th;
7035         int                       rc;
7036         bool                      exists = true;
7037         ENTRY;
7038
7039         if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
7040                 RETURN(0);
7041
7042         rc = lfsck_lock(env, lfsck, dir, name, llh,
7043                         MDS_INODELOCK_UPDATE, LCK_PW);
7044         if (rc != 0)
7045                 RETURN(rc);
7046
7047         th = lfsck_trans_create(env, dev, lfsck);
7048         if (IS_ERR(th))
7049                 GOTO(unlock, rc = PTR_ERR(th));
7050
7051         rc = dt_declare_delete(env, dir, (const struct dt_key *)name, th);
7052         if (rc != 0)
7053                 GOTO(stop, rc);
7054
7055         rec->rec_type = type;
7056         rec->rec_fid = fid;
7057         rc = dt_declare_insert(env, dir, (const struct dt_rec *)rec,
7058                                (const struct dt_key *)name, th);
7059         if (rc != 0)
7060                 GOTO(stop, rc);
7061
7062         rc = dt_declare_ref_add(env, dir, th);
7063         if (rc != 0)
7064                 GOTO(stop, rc);
7065
7066         rc = dt_trans_start_local(env, dev, th);
7067         if (rc != 0)
7068                 GOTO(stop, rc);
7069
7070         rc = dt_delete(env, dir, (const struct dt_key *)name, th);
7071         if (rc == -ENOENT) {
7072                 exists = false;
7073                 rc = 0;
7074         }
7075
7076         if (rc != 0)
7077                 GOTO(stop, rc);
7078
7079         rc = dt_insert(env, dir, (const struct dt_rec *)rec,
7080                        (const struct dt_key *)name, th);
7081         if (rc == 0 && S_ISDIR(type) && !exists) {
7082                 dt_write_lock(env, dir, 0);
7083                 rc = dt_ref_add(env, dir, th);
7084                 dt_write_unlock(env, dir);
7085         }
7086
7087         GOTO(stop, rc);
7088
7089 stop:
7090         dt_trans_stop(env, dev, th);
7091
7092 unlock:
7093         lfsck_unlock(llh);
7094         CDEBUG(D_LFSCK, "%s: update name entry "DFID"/%s with the FID "DFID
7095                " and the type %o: rc = %d\n", lfsck_lfsck2name(lfsck),
7096                PFID(lfsck_dto2fid(dir)), name, PFID(fid), type, rc);
7097
7098         return rc;
7099 }
7100
7101 int lfsck_namespace_setup(const struct lu_env *env,
7102                           struct lfsck_instance *lfsck)
7103 {
7104         struct lfsck_component  *com;
7105         struct lfsck_namespace  *ns;
7106         struct dt_object        *root = NULL;
7107         struct dt_object        *obj;
7108         int                      i;
7109         int                      rc;
7110         ENTRY;
7111
7112         LASSERT(lfsck->li_master);
7113
7114         OBD_ALLOC_PTR(com);
7115         if (com == NULL)
7116                 RETURN(-ENOMEM);
7117
7118         INIT_LIST_HEAD(&com->lc_link);
7119         INIT_LIST_HEAD(&com->lc_link_dir);
7120         init_rwsem(&com->lc_sem);
7121         atomic_set(&com->lc_ref, 1);
7122         com->lc_lfsck = lfsck;
7123         com->lc_type = LFSCK_TYPE_NAMESPACE;
7124         com->lc_ops = &lfsck_namespace_ops;
7125         com->lc_data = lfsck_assistant_data_init(
7126                         &lfsck_namespace_assistant_ops,
7127                         LFSCK_NAMESPACE);
7128         if (com->lc_data == NULL)
7129                 GOTO(out, rc = -ENOMEM);
7130
7131         com->lc_file_size = sizeof(struct lfsck_namespace);
7132         OBD_ALLOC(com->lc_file_ram, com->lc_file_size);
7133         if (com->lc_file_ram == NULL)
7134                 GOTO(out, rc = -ENOMEM);
7135
7136         OBD_ALLOC(com->lc_file_disk, com->lc_file_size);
7137         if (com->lc_file_disk == NULL)
7138                 GOTO(out, rc = -ENOMEM);
7139
7140         for (i = 0; i < LFSCK_STF_COUNT; i++)
7141                 mutex_init(&com->lc_sub_trace_objs[i].lsto_mutex);
7142
7143         root = dt_locate(env, lfsck->li_bottom, &lfsck->li_local_root_fid);
7144         if (IS_ERR(root))
7145                 GOTO(out, rc = PTR_ERR(root));
7146
7147         if (unlikely(!dt_try_as_dir(env, root, true)))
7148                 GOTO(out, rc = -ENOTDIR);
7149
7150         obj = local_index_find_or_create(env, lfsck->li_los, root,
7151                                          LFSCK_NAMESPACE,
7152                                          S_IFREG | S_IRUGO | S_IWUSR,
7153                                          &dt_lfsck_namespace_features);
7154         if (IS_ERR(obj))
7155                 GOTO(out, rc = PTR_ERR(obj));
7156
7157         com->lc_obj = obj;
7158         rc = lfsck_namespace_load(env, com);
7159         if (rc == -ENODATA) {
7160                 rc = lfsck_namespace_init(env, com);
7161         } else if (rc < 0) {
7162                 rc = lfsck_namespace_reset(env, com, true);
7163         } else {
7164                 rc = lfsck_load_sub_trace_files(env, com,
7165                         &dt_lfsck_namespace_features, LFSCK_NAMESPACE, false);
7166                 if (rc)
7167                         rc = lfsck_namespace_reset(env, com, true);
7168         }
7169         if (rc != 0)
7170                 GOTO(out, rc);
7171
7172         ns = com->lc_file_ram;
7173         switch (ns->ln_status) {
7174         case LS_INIT:
7175         case LS_COMPLETED:
7176         case LS_FAILED:
7177         case LS_STOPPED:
7178                 spin_lock(&lfsck->li_lock);
7179                 list_add_tail(&com->lc_link, &lfsck->li_list_idle);
7180                 spin_unlock(&lfsck->li_lock);
7181                 break;
7182         default:
7183                 CERROR("%s: unknown lfsck_namespace status %d\n",
7184                        lfsck_lfsck2name(lfsck), ns->ln_status);
7185                 fallthrough;
7186         case LS_SCANNING_PHASE1:
7187         case LS_SCANNING_PHASE2:
7188                 /* No need to store the status to disk right now.
7189                  * If the system crashed before the status stored,
7190                  * it will be loaded back when next time. */
7191                 ns->ln_status = LS_CRASHED;
7192                 fallthrough;
7193         case LS_PAUSED:
7194         case LS_CRASHED:
7195                 spin_lock(&lfsck->li_lock);
7196                 list_add_tail(&com->lc_link, &lfsck->li_list_scan);
7197                 list_add_tail(&com->lc_link_dir, &lfsck->li_list_dir);
7198                 spin_unlock(&lfsck->li_lock);
7199                 break;
7200         }
7201
7202         GOTO(out, rc = 0);
7203
7204 out:
7205         if (root != NULL && !IS_ERR(root))
7206                 lfsck_object_put(env, root);
7207         if (rc != 0) {
7208                 lfsck_component_cleanup(env, com);
7209                 CERROR("%s: fail to init namespace LFSCK component: rc = %d\n",
7210                        lfsck_lfsck2name(lfsck), rc);
7211         }
7212         return rc;
7213 }