1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2012, 2017, Intel Corporation.
8 * This file is part of Lustre, http://www.lustre.org/
10 * Top-level entry points into osd module
12 * The OI scrub is used for rebuilding Object Index files when restores MDT from
15 * The otable based iterator scans ldiskfs inode table to feed up layer LFSCK.
17 * Author: Fan Yong <yong.fan@whamcloud.com>
20 #define DEBUG_SUBSYSTEM S_LFSCK
22 #include <linux/kthread.h>
23 #include <uapi/linux/lustre/lustre_idl.h>
24 #include <lustre_disk.h>
25 #include <dt_object.h>
26 #include <linux/xattr.h>
27 #include <lustre_scrub.h>
28 #include <lustre_nodemap.h>
30 #include "osd_internal.h"
32 #include "osd_scrub.h"
34 #define OSD_OTABLE_MAX_HASH 0x00000000ffffffffULL
36 /* high priority inconsistent items list APIs */
37 #define SCRUB_BAD_OIMAP_DECAY_INTERVAL 60
40 * Add mapping into scrub.os_inconsistent_item list, and the OI scrub thread
41 * will fix them in priority.
43 int osd_scrub_oi_insert(struct osd_device *dev, const struct lu_fid *fid,
44 struct osd_inode_id *id, int insert)
46 struct osd_inconsistent_item *oii;
47 struct osd_scrub *oscrub = &dev->od_scrub;
48 struct lustre_scrub *lscrub = &oscrub->os_scrub;
54 if (unlikely(oii == NULL))
57 INIT_LIST_HEAD(&oii->oii_list);
58 oii->oii_cache.oic_fid = *fid;
59 oii->oii_cache.oic_lid = *id;
60 oii->oii_cache.oic_dev = dev;
61 oii->oii_insert = insert;
63 spin_lock(&lscrub->os_lock);
64 if (lscrub->os_partial_scan) {
65 __u64 now = ktime_get_real_seconds();
67 /* If there haven't been errors in a long time,
68 * decay old count until either the errors are
69 * gone or we reach the current interval.
71 while (unlikely(oscrub->os_bad_oimap_count > 0 &&
72 oscrub->os_bad_oimap_time +
73 SCRUB_BAD_OIMAP_DECAY_INTERVAL < now)) {
74 oscrub->os_bad_oimap_count >>= 1;
75 oscrub->os_bad_oimap_time +=
76 SCRUB_BAD_OIMAP_DECAY_INTERVAL;
79 oscrub->os_bad_oimap_time = now;
80 if (++oscrub->os_bad_oimap_count >
81 dev->od_full_scrub_threshold_rate)
82 lscrub->os_full_scrub = 1;
85 if (list_empty(&lscrub->os_inconsistent_items)) {
88 struct osd_inconsistent_item *tmp;
90 list_for_each_entry(tmp, &lscrub->os_inconsistent_items,
92 if (lu_fid_eq(fid, &tmp->oii_cache.oic_fid)) {
93 spin_unlock(&lscrub->os_lock);
100 list_add_tail(&oii->oii_list, &lscrub->os_inconsistent_items);
101 spin_unlock(&lscrub->os_lock);
109 /* if item could not be repaired, add it to the os_stale_items list to avoid
110 * triggering scrub repeatedly.
112 static inline void osd_scrub_oi_mark_stale(struct lustre_scrub *scrub,
113 struct osd_inconsistent_item *oii)
115 spin_lock(&scrub->os_lock);
116 list_move_tail(&oii->oii_list, &scrub->os_stale_items);
117 spin_unlock(&scrub->os_lock);
120 /* OI of \a fid may be marked stale, and if its mapping is scrubbed, remove it
121 * from os_stale_items list.
123 bool osd_scrub_oi_resurrect(struct lustre_scrub *scrub,
124 const struct lu_fid *fid)
126 struct osd_inconsistent_item *oii;
127 bool resurrected = false;
129 if (list_empty(&scrub->os_stale_items))
132 spin_lock(&scrub->os_lock);
133 list_for_each_entry(oii, &scrub->os_stale_items, oii_list) {
134 if (lu_fid_eq(fid, &oii->oii_cache.oic_fid)) {
135 list_del(&oii->oii_list);
141 spin_unlock(&scrub->os_lock);
146 static void osd_scrub_ois_fini(struct lustre_scrub *scrub,
147 struct list_head *list)
149 struct osd_inconsistent_item *oii;
150 struct osd_inconsistent_item *tmp;
152 spin_lock(&scrub->os_lock);
153 list_for_each_entry_safe(oii, tmp, list, oii_list) {
154 list_del(&oii->oii_list);
157 spin_unlock(&scrub->os_lock);
160 static inline int osd_scrub_has_window(struct lustre_scrub *scrub,
161 struct osd_otable_cache *ooc)
163 return scrub->os_pos_current < ooc->ooc_pos_preload + SCRUB_WINDOW_SIZE;
167 * update/insert/delete the specified OI mapping (@fid @id) according to the ops
169 * \retval 1, changed nothing
170 * \retval 0, changed successfully
171 * \retval -ve, on error
173 int osd_scrub_refresh_mapping(struct osd_thread_info *info,
174 struct osd_device *dev,
175 const struct lu_fid *fid,
176 const struct osd_inode_id *id,
178 enum oi_check_flags flags, bool *exist)
184 if (dev->od_scrub.os_scrub.os_file.sf_param & SP_DRYRUN && !force)
187 /* DTO_INDEX_INSERT is enough for other two ops:
188 * delete/update, but save stack. */
189 th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC,
190 osd_dto_credits_noquota[DTO_INDEX_INSERT]);
193 CWARN("%s: fail to start trans for scrub op %d "
194 DFID" => %u/%u: rc = %d\n", osd_name(dev), ops,
195 PFID(fid), id ? id->oii_ino : -1, id ? id->oii_gen : -1,
201 case DTO_INDEX_UPDATE:
202 rc = osd_oi_update(info, dev, fid, id, th, flags);
203 if (unlikely(rc == -ENOENT)) {
204 /* Some unlink thread may removed the OI mapping. */
208 case DTO_INDEX_INSERT:
209 rc = osd_oi_insert(info, dev, fid, id, th, flags, exist);
210 if (unlikely(rc == -EEXIST)) {
212 /* XXX: There are trouble things when adding OI
213 * mapping for IGIF object, which may cause
214 * multiple objects to be mapped to the same
215 * IGIF formatted FID. Consider the following
218 * 1) The MDT is upgrading from 1.8 device.
219 * The OI scrub generates IGIF FID1 for the
220 * OBJ1 and adds the OI mapping.
222 * 2) For some reason, the OI scrub does not
223 * process all the IGIF objects completely.
225 * 3) The MDT is backuped and restored against
228 * 4) When the MDT mounts up, the OI scrub will
229 * try to rebuild the OI files. For some IGIF
230 * object, OBJ2, which was not processed by the
231 * OI scrub before the backup/restore, and the
232 * new generated IGIF formatted FID may be just
233 * the FID1, the same as OBJ1.
235 * Under such case, the OI scrub cannot know how
236 * to generate new FID for the OBJ2.
238 * Currently, we do nothing for that. One possible
239 * solution is to generate new normal FID for the
242 * Anyway, it is rare, only exists in theory. */
245 case DTO_INDEX_DELETE:
246 rc = osd_oi_delete(info, dev, fid, th, flags);
248 /* It is normal that the unlink thread has removed the
249 * OI mapping already. */
254 LASSERTF(0, "Unexpected ops %d\n", ops);
258 ldiskfs_journal_stop(th);
260 CDEBUG(D_LFSCK, "%s: fail to refresh OI map for scrub op %d "
261 DFID" => %u/%u: rc = %d\n", osd_name(dev), ops,
262 PFID(fid), id ? id->oii_ino : -1, id ? id->oii_gen : -1,
269 osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
270 struct osd_idmap_cache *oic, int val)
272 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
273 struct scrub_file *sf = &scrub->os_file;
274 struct lu_fid *fid = &oic->oic_fid;
275 struct osd_inode_id *lid = &oic->oic_lid;
276 struct osd_inode_id *lid2 = &info->oti_id;
277 struct osd_inconsistent_item *oii = NULL;
278 struct inode *inode = NULL;
279 int ops = DTO_INDEX_UPDATE;
281 bool bad_inode = false;
286 down_write(&scrub->os_rwsem);
287 /* remove IDIF support to simplify logic */
288 if (val == SCRUB_NEXT_OSTOBJ_OLD)
289 GOTO(out, rc = -EOPNOTSUPP);
291 if (val == SCRUB_NEXT_OSTOBJ)
292 flags = OI_KNOWN_ON_OST;
294 scrub->os_new_checked++;
298 if (scrub->os_in_prior) {
299 oii = list_entry(oic, struct osd_inconsistent_item,
301 if (CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE))
302 GOTO(out, rc = -ESTALE);
305 if (lid->oii_ino < sf->sf_pos_latest_start && !oii)
307 if (lid->oii_ino < LDISKFS_FIRST_INO(osd_sb(dev)))
308 GOTO(out, rc = -ENOENT);
310 if (fid_is_igif(fid))
314 inode = osd_iget(info, dev, lid, 0);
317 /* someone removed the inode. */
318 if (rc == -ENOENT || rc == -ESTALE)
322 } else if (val == SCRUB_NEXT_NOLMA) {
323 if (!scrub->os_convert_igif ||
324 CFS_FAIL_CHECK(OBD_FAIL_FID_NOLMA))
327 /* set LMA if missing */
328 sf->sf_flags |= SF_UPGRADE;
329 if (!(sf->sf_param & SP_DRYRUN)) {
330 rc = osd_ea_fid_set(info, inode, fid, 0, 0);
336 /* checking existing mapping */
337 rc = osd_oi_lookup(info, dev, fid, lid2, flags);
339 /* insert if mapping doesn't exist */
341 ops = DTO_INDEX_INSERT;
342 else if (rc != -ESTALE)
348 if (val == SCRUB_NEXT_OSTOBJ)
349 sf->sf_flags |= SF_INCONSISTENT;
350 } else if (osd_id_eq(lid, lid2)) {
351 /* mapping matches */
353 /* delete mapping if it's stale */
354 rc = osd_scrub_refresh_mapping(info, dev, fid, lid,
355 DTO_INDEX_DELETE, false, flags, NULL);
357 "%s: delete stale OI "DFID" -> %u/%u: rc = %d\n",
358 osd_dev2name(dev), PFID(fid), lid->oii_ino,
363 struct inode *inode2;
366 /* mapping mismatch */
367 if (!scrub->os_partial_scan) {
368 spin_lock(&scrub->os_lock);
369 scrub->os_full_speed = 1;
370 spin_unlock(&scrub->os_lock);
372 sf->sf_flags |= SF_INCONSISTENT;
374 /* if new inode is bad, keep existing mapping */
378 /* verify existing mapping */
379 inode2 = osd_iget(info, dev, lid2, 0);
380 if (IS_ERR(inode2)) {
381 rc = PTR_ERR(inode2);
382 if (rc == -ENOENT || rc == -ESTALE)
387 rc = osd_get_lma(info, inode2, &info->oti_obj_dentry,
388 &info->oti_ost_attrs);
396 /* if inode2 looks better, keep existing mapping */
397 fid2 = &info->oti_ost_attrs.loa_lma.lma_self_fid;
398 if ((rc == 0 && lu_fid_eq(fid, fid2)) &&
399 ((inode->i_size == 0 && inode2->i_size > 0 &&
400 inode_get_mtime_sec(inode) == inode_get_mtime_sec(inode2)) ||
401 inode_get_mtime_sec(inode) < inode_get_mtime_sec(inode2))) {
407 /* otherwise delete existing mapping */
408 CDEBUG(D_LFSCK, "%s: delete stale OI "DFID" -> %u/%u\n",
409 osd_dev2name(dev), PFID(fid), lid2->oii_ino,
411 rc = osd_scrub_refresh_mapping(info, dev, fid, lid2,
412 DTO_INDEX_DELETE, false, flags, NULL);
415 /* and then insert new one */
416 ops = DTO_INDEX_INSERT;
418 LASSERT(ops == DTO_INDEX_INSERT || ops == DTO_INDEX_UPDATE);
419 CDEBUG(D_LFSCK, "%s: %s OI "DFID" -> %u/%u\n",
420 osd_dev2name(dev), ops == DTO_INDEX_INSERT ? "insert" : "update",
421 PFID(fid), lid->oii_ino, lid->oii_gen);
422 rc = osd_scrub_refresh_mapping(info, dev, fid, lid, ops, false, flags,
425 if (scrub->os_in_prior)
426 sf->sf_items_updated_prior++;
428 sf->sf_items_updated++;
430 if (ops == DTO_INDEX_INSERT && val == 0 && !exist) {
431 int idx = osd_oi_fid2idx(dev, fid);
433 sf->sf_flags |= SF_RECREATED;
434 if (unlikely(!ldiskfs_test_bit(idx, sf->sf_oi_bitmap)))
435 ldiskfs_set_bit(idx, sf->sf_oi_bitmap);
441 sf->sf_items_failed++;
442 if (lid->oii_ino >= LDISKFS_FIRST_INO(osd_sb(dev)) &&
443 (sf->sf_pos_first_inconsistent == 0 ||
444 sf->sf_pos_first_inconsistent > lid->oii_ino))
445 sf->sf_pos_first_inconsistent = lid->oii_ino;
447 if (!oii && !CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_STALE)) {
448 if (osd_scrub_oi_resurrect(scrub, fid))
450 "%s: resurrect OI "DFID" -> %u/%u\n",
451 osd_dev2name(dev), PFID(fid),
452 lid->oii_ino, lid->oii_gen);
454 /* release fixed inconsistent item */
456 "%s: inconsistent OI "DFID" -> %u/%u %s\n",
457 osd_dev2name(dev), PFID(fid), lid->oii_ino,
458 lid->oii_gen, bad_inode ? "deleted" : "fixed");
459 spin_lock(&scrub->os_lock);
460 list_del_init(&oii->oii_list);
461 spin_unlock(&scrub->os_lock);
470 /* something strange with item, moving to stale */
471 osd_scrub_oi_mark_stale(scrub, oii);
473 "%s: fix inconsistent OI "DFID" -> %u/%u failed: %d\n",
474 osd_dev2name(dev), PFID(fid), lid->oii_ino,
477 up_write(&scrub->os_rwsem);
479 if (!IS_ERR_OR_NULL(inode))
482 RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
485 /* iteration engine */
487 typedef int (*osd_iit_next_policy)(struct osd_thread_info *info,
488 struct osd_device *dev,
489 struct osd_iit_param *param,
490 struct osd_idmap_cache **oic,
493 typedef int (*osd_iit_exec_policy)(struct osd_thread_info *info,
494 struct osd_device *dev,
495 struct osd_iit_param *param,
496 struct osd_idmap_cache *oic,
497 bool *noslot, int rc);
499 static int osd_iit_next(struct osd_iit_param *param, __u64 *pos)
504 param->offset = ldiskfs_find_next_bit(param->bitmap->b_data,
505 LDISKFS_INODES_PER_GROUP(param->sb), param->offset);
506 if (param->offset >= LDISKFS_INODES_PER_GROUP(param->sb)) {
507 *pos = 1 + (param->bg+1) * LDISKFS_INODES_PER_GROUP(param->sb);
508 return SCRUB_NEXT_BREAK;
511 offset = param->offset++;
512 if (unlikely(*pos == param->gbase + offset && *pos != param->start)) {
513 /* We should NOT find the same object more than once. */
514 CERROR("%s: scan the same object multiple times at the pos: "
515 "group = %u, base = %u, offset = %u, start = %u\n",
516 osd_sb2name(param->sb), (__u32)param->bg, param->gbase,
517 offset, param->start);
521 *pos = param->gbase + offset;
526 * \retval SCRUB_NEXT_OSTOBJ_OLD: FID-on-OST
527 * \retval 0: FID-on-MDT
529 static int osd_scrub_check_local_fldb(struct osd_thread_info *info,
530 struct osd_device *dev,
533 /* XXX: The initial OI scrub will scan the top level /O to generate
534 * a small local FLDB according to the <seq>. If the given FID
535 * is in the local FLDB, then it is FID-on-OST; otherwise it's
536 * quite possible for FID-on-MDT. */
538 return SCRUB_NEXT_OSTOBJ_OLD;
543 static int osd_scrub_get_fid(struct osd_thread_info *info,
544 struct osd_device *dev, struct inode *inode,
545 struct lu_fid *fid, bool scrub)
547 struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
548 bool has_lma = false;
551 rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
552 &info->oti_ost_attrs);
555 if (lma->lma_compat & LMAC_NOT_IN_OI ||
556 lma->lma_incompat & LMAI_AGENT)
557 return SCRUB_NEXT_CONTINUE;
559 *fid = lma->lma_self_fid;
563 if (lma->lma_compat & LMAC_FID_ON_OST)
564 return SCRUB_NEXT_OSTOBJ;
566 if (fid_is_idif(fid))
567 return SCRUB_NEXT_OSTOBJ_OLD;
569 /* For local object. */
570 if (fid_is_internal(fid))
573 /* For external visible MDT-object with non-normal FID. */
574 if (fid_is_namespace_visible(fid) && !fid_is_norm(fid))
577 /* For the object with normal FID, it may be MDT-object,
578 * or may be 2.4 OST-object, need further distinguish.
579 * Fall through to next section. */
582 if (rc == -ENODATA || rc == 0) {
583 rc = osd_get_idif(info, inode, &info->oti_obj_dentry, fid);
586 /* It is 2.3 or older OST-object. */
587 rc = SCRUB_NEXT_OSTOBJ_OLD;
593 /* It is FID-on-OST, but we do not know how
594 * to generate its FID, ignore it directly. */
595 rc = SCRUB_NEXT_CONTINUE;
597 /* It is 2.4 or newer OST-object. */
598 rc = SCRUB_NEXT_OSTOBJ_OLD;
606 if (dev->od_scrub.os_scrub.os_convert_igif) {
607 lu_igif_build(fid, inode->i_ino,
608 inode->i_generation);
610 rc = SCRUB_NEXT_NOLMA;
614 /* It may be FID-on-OST, or may be FID for
615 * non-MDT0, anyway, we do not know how to
616 * generate its FID, ignore it directly. */
617 rc = SCRUB_NEXT_CONTINUE;
622 /* For OI scrub case only: the object has LMA but has no ff
623 * (or ff crashed). It may be MDT-object, may be OST-object
624 * with crashed ff. The last check is local FLDB. */
625 rc = osd_scrub_check_local_fldb(info, dev, fid);
631 static int osd_iit_iget(struct osd_thread_info *info, struct osd_device *dev,
632 struct lu_fid *fid, struct osd_inode_id *lid, __u32 pos,
633 struct super_block *sb, bool is_scrub)
635 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
642 /* Not handle the backend root object and agent parent object.
643 * They are neither visible to namespace nor have OI mappings. */
644 if (unlikely(pos == osd_sb(dev)->s_root->d_inode->i_ino ||
645 is_remote_parent_ino(dev, pos)))
646 RETURN(SCRUB_NEXT_CONTINUE);
648 /* Skip project quota inode since it is greater than s_first_ino. */
649 #ifdef HAVE_PROJECT_QUOTA
650 if (ldiskfs_has_feature_project(sb) &&
651 pos == le32_to_cpu(LDISKFS_SB(sb)->s_es->s_prj_quota_inum))
652 RETURN(SCRUB_NEXT_CONTINUE);
655 osd_id_gen(lid, pos, OSD_OII_NOGEN);
656 inode = osd_iget(info, dev, lid, LDISKFS_IGET_NO_CHECKS);
659 /* The inode may be removed after bitmap searching, or the
660 * file is new created without inode initialized yet.
661 * LU-15754: After "new primitive: discard_new_inode()" change
662 * in the kernel find_inode_fast() returns -ESTALE, but
663 * iget_locked replaces it to the NULL and finally
664 * ldiskfs_inode_attach_jinode() returns -ENOMEM
665 * Let's skip an inode if -ENOMEM returned.
667 if (rc == -ENOENT || rc == -ESTALE || rc == -ENOMEM)
668 RETURN(SCRUB_NEXT_CONTINUE);
670 CDEBUG(D_LFSCK, "%s: fail to read inode, ino# = %u: "
671 "rc = %d\n", osd_dev2name(dev), pos, rc);
675 if (dev->od_is_ost && S_ISREG(inode->i_mode) && inode->i_nlink > 1)
676 dev->od_scrub.os_scrub.os_has_ml_file = 1;
679 ldiskfs_test_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB)) {
680 /* Only skip it for the first OI scrub accessing. */
681 ldiskfs_clear_inode_state(inode, LDISKFS_STATE_LUSTRE_NOSCRUB);
682 GOTO(put, rc = SCRUB_NEXT_NOSCRUB);
685 rc = osd_scrub_get_fid(info, dev, inode, fid, is_scrub);
686 if (rc >= 0 && scrub->os_ls_count > 0 && fid_is_local_storage(fid)) {
688 for (index = 0; index < scrub->os_ls_count; index++)
689 if (scrub->os_ls_fids[index].f_seq == fid->f_seq)
692 if (index < scrub->os_ls_count &&
693 scrub->os_ls_fids[index].f_oid < fid->f_oid)
694 scrub->os_ls_fids[index].f_oid = fid->f_oid;
703 static int osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev,
704 struct osd_iit_param *param,
705 struct osd_idmap_cache **oic, const bool noslot)
707 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
709 struct osd_inode_id *lid;
712 if (CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) && cfs_fail_val > 0)
713 wait_var_event_timeout(
715 !list_empty(&scrub->os_inconsistent_items) ||
716 kthread_should_stop(),
717 cfs_time_seconds(cfs_fail_val));
719 if (CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_CRASH)) {
720 spin_lock(&scrub->os_lock);
721 scrub->os_running = 0;
722 spin_unlock(&scrub->os_lock);
723 return SCRUB_NEXT_CRASH;
726 if (CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_FATAL))
727 return SCRUB_NEXT_FATAL;
729 if (kthread_should_stop())
730 return SCRUB_NEXT_EXIT;
732 if (!list_empty(&scrub->os_inconsistent_items)) {
733 spin_lock(&scrub->os_lock);
734 if (likely(!list_empty(&scrub->os_inconsistent_items))) {
735 struct osd_inconsistent_item *oii;
737 oii = list_first_entry(&scrub->os_inconsistent_items,
738 struct osd_inconsistent_item,
741 *oic = &oii->oii_cache;
742 scrub->os_in_prior = 1;
743 spin_unlock(&scrub->os_lock);
747 spin_unlock(&scrub->os_lock);
751 return SCRUB_NEXT_WAIT;
753 rc = osd_iit_next(param, &scrub->os_pos_current);
757 *oic = &dev->od_scrub.os_oic;
758 fid = &(*oic)->oic_fid;
759 lid = &(*oic)->oic_lid;
760 rc = osd_iit_iget(info, dev, fid, lid,
761 scrub->os_pos_current, param->sb, true);
765 static int osd_preload_next(struct osd_thread_info *info,
766 struct osd_device *dev, struct osd_iit_param *param,
767 struct osd_idmap_cache **oic, const bool noslot)
769 struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache;
770 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
773 if (scrub->os_running &&
774 ooc->ooc_pos_preload >= scrub->os_pos_current)
775 return SCRUB_NEXT_EXIT;
777 rc = osd_iit_next(param, &ooc->ooc_pos_preload);
781 rc = osd_iit_iget(info, dev,
782 &ooc->ooc_cache[ooc->ooc_producer_idx].oic_fid,
783 &ooc->ooc_cache[ooc->ooc_producer_idx].oic_lid,
784 ooc->ooc_pos_preload, param->sb, false);
789 osd_scrub_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
791 spin_lock(&scrub->os_lock);
792 if (osd_scrub_has_window(scrub, &it->ooi_cache) ||
793 !list_empty(&scrub->os_inconsistent_items) ||
794 it->ooi_waiting || kthread_should_stop())
795 scrub->os_waiting = 0;
797 scrub->os_waiting = 1;
798 spin_unlock(&scrub->os_lock);
800 return !scrub->os_waiting;
803 static int osd_scrub_exec(struct osd_thread_info *info, struct osd_device *dev,
804 struct osd_iit_param *param,
805 struct osd_idmap_cache *oic, bool *noslot, int rc)
807 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
808 struct scrub_file *sf = &scrub->os_file;
809 struct osd_otable_it *it = dev->od_otable_it;
810 struct osd_otable_cache *ooc = it ? &it->ooi_cache : NULL;
813 case SCRUB_NEXT_NOSCRUB:
814 down_write(&scrub->os_rwsem);
815 scrub->os_new_checked++;
816 sf->sf_items_noscrub++;
817 up_write(&scrub->os_rwsem);
818 case SCRUB_NEXT_CONTINUE:
819 case SCRUB_NEXT_WAIT:
823 rc = osd_scrub_check_update(info, dev, oic, rc);
825 spin_lock(&scrub->os_lock);
826 scrub->os_in_prior = 0;
827 spin_unlock(&scrub->os_lock);
831 rc = scrub_checkpoint(info->oti_env, scrub);
833 CDEBUG(D_LFSCK, "%s: fail to checkpoint, pos = %llu: "
834 "rc = %d\n", osd_scrub2name(scrub),
835 scrub->os_pos_current, rc);
836 /* Continue, as long as the scrub itself can go ahead. */
839 if (scrub->os_in_prior) {
840 spin_lock(&scrub->os_lock);
841 scrub->os_in_prior = 0;
842 spin_unlock(&scrub->os_lock);
847 if (it != NULL && it->ooi_waiting && ooc != NULL &&
848 ooc->ooc_pos_preload < scrub->os_pos_current) {
849 spin_lock(&scrub->os_lock);
852 spin_unlock(&scrub->os_lock);
855 if (rc == SCRUB_NEXT_CONTINUE)
858 if (scrub->os_full_speed || !ooc || osd_scrub_has_window(scrub, ooc)) {
864 wait_var_event(scrub, osd_scrub_wakeup(scrub, it));
866 if (!ooc || osd_scrub_has_window(scrub, ooc))
873 static int osd_preload_exec(struct osd_thread_info *info,
874 struct osd_device *dev, struct osd_iit_param *param,
875 struct osd_idmap_cache *oic, bool *noslot, int rc)
877 struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache;
880 ooc->ooc_cached_items++;
881 ooc->ooc_producer_idx = (ooc->ooc_producer_idx + 1) &
882 ~OSD_OTABLE_IT_CACHE_MASK;
884 return rc > 0 ? 0 : rc;
887 #define SCRUB_IT_ALL 1
888 #define SCRUB_IT_CRASH 2
890 static void osd_scrub_join(const struct lu_env *env, struct osd_device *dev,
891 __u32 flags, bool inconsistent)
893 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
894 struct scrub_file *sf = &scrub->os_file;
898 LASSERT(!(flags & SS_AUTO_PARTIAL));
900 down_write(&scrub->os_rwsem);
901 spin_lock(&scrub->os_lock);
902 scrub->os_in_join = 1;
903 if (flags & SS_SET_FAILOUT)
904 sf->sf_param |= SP_FAILOUT;
905 else if (flags & SS_CLEAR_FAILOUT)
906 sf->sf_param &= ~SP_FAILOUT;
908 if (flags & SS_SET_DRYRUN)
909 sf->sf_param |= SP_DRYRUN;
910 else if (flags & SS_CLEAR_DRYRUN)
911 sf->sf_param &= ~SP_DRYRUN;
913 if (flags & SS_RESET) {
914 scrub_file_reset(scrub, dev->od_uuid,
915 inconsistent ? SF_INCONSISTENT : 0);
916 sf->sf_status = SS_SCANNING;
919 if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_UPGRADE))
920 scrub->os_full_speed = 1;
922 scrub->os_full_speed = 0;
924 if (flags & SS_AUTO_FULL) {
925 sf->sf_flags |= SF_AUTO;
926 scrub->os_full_speed = 1;
928 spin_unlock(&scrub->os_lock);
930 scrub->os_new_checked = 0;
931 if (sf->sf_pos_last_checkpoint != 0)
932 sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1;
934 sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev)) + 1;
936 scrub->os_pos_current = sf->sf_pos_latest_start;
937 sf->sf_time_latest_start = ktime_get_real_seconds();
938 sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
939 sf->sf_pos_last_checkpoint = sf->sf_pos_latest_start - 1;
940 rc = scrub_file_store(env, scrub);
942 spin_lock(&scrub->os_lock);
943 scrub->os_waiting = 0;
944 scrub->os_paused = 0;
945 scrub->os_partial_scan = 0;
946 scrub->os_in_join = 0;
947 scrub->os_full_scrub = 0;
948 spin_unlock(&scrub->os_lock);
950 up_write(&scrub->os_rwsem);
952 CDEBUG(D_LFSCK, "%s: joined in the OI scrub with flag %u: rc = %d\n",
953 osd_scrub2name(scrub), flags, rc);
958 static int osd_inode_iteration(struct osd_thread_info *info,
959 struct osd_device *dev, __u32 max, bool preload)
961 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
962 struct scrub_file *sf = &scrub->os_file;
963 osd_iit_next_policy next;
964 osd_iit_exec_policy exec;
967 struct osd_iit_param *param;
976 param = &dev->od_scrub.os_iit_param;
977 memset(param, 0, sizeof(*param));
978 param->sb = osd_sb(dev);
980 while (scrub->os_partial_scan && !scrub->os_in_join) {
981 struct osd_idmap_cache *oic = NULL;
983 rc = osd_scrub_next(info, dev, param, &oic, noslot);
985 case SCRUB_NEXT_EXIT:
987 case SCRUB_NEXT_CRASH:
988 RETURN(SCRUB_IT_CRASH);
989 case SCRUB_NEXT_FATAL:
991 case SCRUB_NEXT_WAIT: {
992 struct kstatfs *ksfs = &info->oti_ksfs;
995 if (dev->od_full_scrub_ratio == OFSR_NEVER ||
996 unlikely(sf->sf_items_updated_prior == 0))
999 if (dev->od_full_scrub_ratio == OFSR_DIRECTLY ||
1000 scrub->os_full_scrub) {
1001 osd_scrub_join(info->oti_env, dev,
1002 SS_AUTO_FULL | SS_RESET, true);
1006 rc = param->sb->s_op->statfs(param->sb->s_root, ksfs);
1008 __u64 used = ksfs->f_files - ksfs->f_ffree;
1010 used = div64_u64(used, sf->sf_items_updated_prior);
1011 /* If we hit too much inconsistent OI
1012 * mappings during the partial scan,
1013 * then scan the device completely. */
1014 if (used < dev->od_full_scrub_ratio) {
1015 osd_scrub_join(info->oti_env, dev,
1016 SS_AUTO_FULL | SS_RESET, true);
1022 if (CFS_FAIL_CHECK(OBD_FAIL_OSD_SCRUB_DELAY) &&
1026 saved_flags = sf->sf_flags;
1027 sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT |
1028 SF_UPGRADE | SF_AUTO);
1029 sf->sf_status = SS_COMPLETED;
1032 kthread_should_stop() ||
1033 !scrub->os_partial_scan ||
1034 scrub->os_in_join ||
1035 !list_empty(&scrub->os_inconsistent_items));
1036 sf->sf_flags = saved_flags;
1037 sf->sf_status = SS_SCANNING;
1039 if (kthread_should_stop())
1042 if (!scrub->os_partial_scan || scrub->os_in_join)
1048 LASSERTF(rc == 0, "rc = %d\n", rc);
1050 osd_scrub_exec(info, dev, param, oic, &noslot, rc);
1057 wait_var_event(scrub,
1058 kthread_should_stop() ||
1059 !scrub->os_in_join);
1061 if (kthread_should_stop())
1067 next = osd_scrub_next;
1068 exec = osd_scrub_exec;
1069 pos = &scrub->os_pos_current;
1070 count = &scrub->os_new_checked;
1071 param->start = *pos;
1072 param->bg = (*pos - 1) / LDISKFS_INODES_PER_GROUP(param->sb);
1074 (*pos - 1) % LDISKFS_INODES_PER_GROUP(param->sb);
1076 1 + param->bg * LDISKFS_INODES_PER_GROUP(param->sb);
1078 struct osd_otable_cache *ooc = &dev->od_otable_it->ooi_cache;
1080 next = osd_preload_next;
1081 exec = osd_preload_exec;
1082 pos = &ooc->ooc_pos_preload;
1083 count = &ooc->ooc_cached_items;
1084 param = &dev->od_otable_it->ooi_iit_param;
1088 limit = le32_to_cpu(LDISKFS_SB(osd_sb(dev))->s_es->s_inodes_count);
1089 while (*pos <= limit && *count < max) {
1090 struct ldiskfs_group_desc *desc;
1091 bool next_group = false;
1093 desc = ldiskfs_get_group_desc(param->sb, param->bg, NULL);
1097 if (desc->bg_flags & cpu_to_le16(LDISKFS_BG_INODE_UNINIT)) {
1102 param->bitmap = ldiskfs_read_inode_bitmap(param->sb, param->bg);
1103 if (IS_ERR_OR_NULL(param->bitmap)) {
1104 if (param->bitmap) {
1105 rc = PTR_ERR(param->bitmap);
1106 param->bitmap = NULL;
1110 CERROR("%s: fail to read bitmap for %u, scrub will stop, urgent mode: rc = %d\n",
1111 osd_scrub2name(scrub), (__u32)param->bg, rc);
1116 struct osd_idmap_cache *oic = NULL;
1119 ldiskfs_itable_unused_count(param->sb, desc) >=
1120 LDISKFS_INODES_PER_GROUP(param->sb)) {
1125 rc = next(info, dev, param, &oic, noslot);
1127 case SCRUB_NEXT_BREAK:
1130 case SCRUB_NEXT_EXIT:
1131 brelse(param->bitmap);
1133 case SCRUB_NEXT_CRASH:
1134 brelse(param->bitmap);
1135 RETURN(SCRUB_IT_CRASH);
1136 case SCRUB_NEXT_FATAL:
1137 brelse(param->bitmap);
1141 rc = exec(info, dev, param, oic, &noslot, rc);
1142 } while (!rc && *pos <= limit && *count < max);
1145 if (param->bitmap) {
1146 brelse(param->bitmap);
1147 param->bitmap = NULL;
1157 param->bg * LDISKFS_INODES_PER_GROUP(param->sb);
1158 *pos = param->gbase;
1159 param->start = *pos;
1164 RETURN(SCRUB_IT_ALL);
1170 static int osd_otable_it_preload(const struct lu_env *env,
1171 struct osd_otable_it *it)
1173 struct osd_device *dev = it->ooi_dev;
1174 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
1175 struct osd_otable_cache *ooc = &it->ooi_cache;
1179 rc = osd_inode_iteration(osd_oti_get(env), dev,
1180 OSD_OTABLE_IT_CACHE_SIZE, true);
1181 if (rc == SCRUB_IT_ALL)
1182 it->ooi_all_cached = 1;
1184 if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) {
1185 spin_lock(&scrub->os_lock);
1186 scrub->os_waiting = 0;
1188 spin_unlock(&scrub->os_lock);
1191 RETURN(rc < 0 ? rc : ooc->ooc_cached_items);
1194 static int osd_scan_ml_file_main(const struct lu_env *env,
1195 struct osd_device *dev);
1197 static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev);
1199 static int osd_scan_last_id_main(const struct lu_env *env,
1200 struct osd_device *dev);
1202 static int osd_scrub_main(void *args)
1205 struct osd_device *dev = (struct osd_device *)args;
1206 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
1210 rc = lu_env_init(&env, LCT_LOCAL | LCT_DT_THREAD);
1212 CDEBUG(D_LFSCK, "%s: OI scrub fail to init env: rc = %d\n",
1213 osd_scrub2name(scrub), rc);
1217 rc = scrub_thread_prep(&env, scrub, dev->od_uuid,
1218 LDISKFS_FIRST_INO(osd_sb(dev)) + 1);
1220 CDEBUG(D_LFSCK, "%s: OI scrub fail to scrub prep: rc = %d\n",
1221 osd_scrub2name(scrub), rc);
1225 if (!scrub->os_full_speed && !scrub->os_partial_scan) {
1226 struct osd_otable_it *it = dev->od_otable_it;
1227 struct osd_otable_cache *ooc = &it->ooi_cache;
1229 wait_var_event(scrub,
1230 it->ooi_user_ready || kthread_should_stop());
1231 if (kthread_should_stop())
1234 scrub->os_pos_current = ooc->ooc_pos_preload;
1237 CDEBUG(D_LFSCK, "%s: OI scrub start, flags = 0x%x, pos = %llu%s\n",
1238 osd_scrub2name(scrub), scrub->os_start_flags,
1239 scrub->os_pos_current,
1240 scrub->os_file.sf_param & SP_DRYRUN ? " dryrun mode" : "");
1242 scrub->os_ls_count = 0;
1243 scrub->os_ls_size = 4;
1244 OBD_ALLOC(scrub->os_ls_fids, scrub->os_ls_size * sizeof(struct lu_fid));
1245 if (scrub->os_ls_fids == NULL)
1246 GOTO(out, rc = -ENOMEM);
1248 rc = osd_scan_O_main(&env, dev);
1252 rc = osd_inode_iteration(osd_oti_get(&env), dev, ~0U, false);
1253 if (unlikely(rc == SCRUB_IT_CRASH)) {
1254 spin_lock(&scrub->os_lock);
1255 scrub->os_running = 0;
1256 spin_unlock(&scrub->os_lock);
1257 GOTO(out, rc = -EINVAL);
1260 if (scrub->os_has_ml_file) {
1261 ret = osd_scan_ml_file_main(&env, dev);
1263 GOTO(out, rc = ret);
1266 ret = osd_scan_last_id_main(&env, dev);
1274 dev->od_igif_inoi = 1;
1275 dev->od_check_ff = 0;
1277 rc = scrub_thread_post(&env, &dev->od_scrub.os_scrub, rc);
1278 CDEBUG(D_LFSCK, "%s: OI scrub: stop, pos = %llu: rc = %d%s\n",
1279 osd_scrub2name(scrub), scrub->os_pos_current, rc,
1280 scrub->os_file.sf_param & SP_DRYRUN ? " dryrun mode" : "");
1284 if (scrub->os_ls_fids) {
1285 OBD_FREE(scrub->os_ls_fids,
1286 scrub->os_ls_size * sizeof(struct lu_fid));
1288 scrub->os_ls_size = 0;
1289 scrub->os_ls_count = 0;
1290 scrub->os_ls_fids = NULL;
1293 osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
1297 spin_lock(&scrub->os_lock);
1298 scrub->os_running = 0;
1299 spin_unlock(&scrub->os_lock);
1300 if (xchg(&scrub->os_task, NULL) == NULL)
1301 /* scrub_stop() is waiting, we need to synchronize */
1302 wait_var_event(scrub, kthread_should_stop());
1307 /* initial OI scrub */
1309 typedef int (*scandir_t)(struct osd_thread_info *, struct osd_device *,
1310 struct dentry *, filldir_t filldir);
1312 #ifdef HAVE_FILLDIR_USE_CTX
1314 osd_ios_varfid_fill(struct dir_context *buf, const char *name, int namelen,
1315 loff_t offset, __u64 ino, unsigned int d_type);
1318 osd_ios_lf_fill(struct dir_context *buf, const char *name, int namelen,
1319 loff_t offset, __u64 ino, unsigned int d_type);
1322 osd_ios_dl_fill(struct dir_context *buf, const char *name, int namelen,
1323 loff_t offset, __u64 ino, unsigned int d_type);
1326 osd_ios_uld_fill(struct dir_context *buf, const char *name, int namelen,
1327 loff_t offset, __u64 ino, unsigned int d_type);
1329 static int osd_ios_varfid_fill(void *buf, const char *name, int namelen,
1330 loff_t offset, __u64 ino, unsigned int d_type);
1331 static int osd_ios_lf_fill(void *buf, const char *name, int namelen,
1332 loff_t offset, __u64 ino, unsigned int d_type);
1333 static int osd_ios_dl_fill(void *buf, const char *name, int namelen,
1334 loff_t offset, __u64 ino, unsigned int d_type);
1335 static int osd_ios_uld_fill(void *buf, const char *name, int namelen,
1336 loff_t offset, __u64 ino, unsigned int d_type);
1340 osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
1341 struct dentry *dentry, filldir_t filldir);
1343 osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev,
1344 struct dentry *dentry, filldir_t filldir);
1347 osd_ios_OBJECTS_scan(struct osd_thread_info *info, struct osd_device *dev,
1348 struct dentry *dentry, filldir_t filldir);
1352 struct lu_fid olm_fid;
1355 scandir_t olm_scandir;
1356 filldir_t olm_filldir;
1359 /* Add the new introduced local files in the list in the future. */
1360 static const struct osd_lf_map osd_lf_maps[] = {
1363 .olm_name = CATLIST,
1365 .f_seq = FID_SEQ_LOCAL_FILE,
1366 .f_oid = LLOG_CATALOGS_OID,
1368 .olm_flags = OLF_SHOW_NAME,
1369 .olm_namelen = sizeof(CATLIST) - 1,
1374 .olm_name = MOUNT_CONFIGS_DIR,
1376 .f_seq = FID_SEQ_LOCAL_FILE,
1377 .f_oid = MGS_CONFIGS_OID,
1379 .olm_flags = OLF_SCAN_SUBITEMS,
1380 .olm_namelen = sizeof(MOUNT_CONFIGS_DIR) - 1,
1381 .olm_scandir = osd_ios_general_scan,
1382 .olm_filldir = osd_ios_varfid_fill,
1385 /* NIDTBL_VERSIONS */
1387 .olm_name = MGS_NIDTBL_DIR,
1388 .olm_flags = OLF_SCAN_SUBITEMS,
1389 .olm_namelen = sizeof(MGS_NIDTBL_DIR) - 1,
1390 .olm_scandir = osd_ios_general_scan,
1391 .olm_filldir = osd_ios_varfid_fill,
1396 .olm_name = MDT_ORPHAN_DIR,
1397 .olm_namelen = sizeof(MDT_ORPHAN_DIR) - 1,
1404 .f_seq = FID_SEQ_ROOT,
1405 .f_oid = FID_OID_ROOT,
1407 .olm_flags = OLF_SCAN_SUBITEMS | OLF_HIDE_FID,
1408 .olm_namelen = sizeof("ROOT") - 1,
1409 .olm_scandir = osd_ios_ROOT_scan,
1412 /* changelog_catalog */
1414 .olm_name = CHANGELOG_CATALOG,
1415 .olm_namelen = sizeof(CHANGELOG_CATALOG) - 1,
1418 /* changelog_users */
1420 .olm_name = CHANGELOG_USERS,
1421 .olm_namelen = sizeof(CHANGELOG_USERS) - 1,
1428 .f_seq = FID_SEQ_LOCAL_FILE,
1429 .f_oid = FLD_INDEX_OID,
1431 .olm_flags = OLF_SHOW_NAME,
1432 .olm_namelen = sizeof("fld") - 1,
1437 .olm_name = LAST_RCVD,
1439 .f_seq = FID_SEQ_LOCAL_FILE,
1440 .f_oid = LAST_RECV_OID,
1442 .olm_flags = OLF_SHOW_NAME,
1443 .olm_namelen = sizeof(LAST_RCVD) - 1,
1448 .olm_name = REPLY_DATA,
1450 .f_seq = FID_SEQ_LOCAL_FILE,
1451 .f_oid = REPLY_DATA_OID,
1453 .olm_flags = OLF_SHOW_NAME,
1454 .olm_namelen = sizeof(REPLY_DATA) - 1,
1459 .olm_name = LOV_OBJID,
1461 .f_seq = FID_SEQ_LOCAL_FILE,
1462 .f_oid = MDD_LOV_OBJ_OID,
1464 .olm_flags = OLF_SHOW_NAME,
1465 .olm_namelen = sizeof(LOV_OBJID) - 1,
1470 .olm_name = LOV_OBJSEQ,
1472 .f_seq = FID_SEQ_LOCAL_FILE,
1473 .f_oid = MDD_LOV_OBJ_OSEQ,
1475 .olm_flags = OLF_SHOW_NAME,
1476 .olm_namelen = sizeof(LOV_OBJSEQ) - 1,
1481 .olm_name = QMT_DIR,
1482 .olm_flags = OLF_SCAN_SUBITEMS,
1483 .olm_namelen = sizeof(QMT_DIR) - 1,
1484 .olm_scandir = osd_ios_general_scan,
1485 .olm_filldir = osd_ios_varfid_fill,
1490 .olm_name = QSD_DIR,
1491 .olm_flags = OLF_SCAN_SUBITEMS,
1492 .olm_namelen = sizeof(QSD_DIR) - 1,
1493 .olm_scandir = osd_ios_general_scan,
1494 .olm_filldir = osd_ios_varfid_fill,
1499 .olm_name = "seq_ctl",
1501 .f_seq = FID_SEQ_LOCAL_FILE,
1502 .f_oid = FID_SEQ_CTL_OID,
1504 .olm_flags = OLF_SHOW_NAME,
1505 .olm_namelen = sizeof("seq_ctl") - 1,
1510 .olm_name = "seq_srv",
1512 .f_seq = FID_SEQ_LOCAL_FILE,
1513 .f_oid = FID_SEQ_SRV_OID,
1515 .olm_flags = OLF_SHOW_NAME,
1516 .olm_namelen = sizeof("seq_srv") - 1,
1521 .olm_name = HEALTH_CHECK,
1523 .f_seq = FID_SEQ_LOCAL_FILE,
1524 .f_oid = OFD_HEALTH_CHECK_OID,
1526 .olm_flags = OLF_SHOW_NAME,
1527 .olm_namelen = sizeof(HEALTH_CHECK) - 1,
1532 .olm_name = LFSCK_DIR,
1533 .olm_flags = OLF_SCAN_SUBITEMS,
1534 .olm_namelen = sizeof(LFSCK_DIR) - 1,
1535 .olm_scandir = osd_ios_general_scan,
1536 .olm_filldir = osd_ios_varfid_fill,
1539 /* lfsck_bookmark */
1541 .olm_name = LFSCK_BOOKMARK,
1542 .olm_namelen = sizeof(LFSCK_BOOKMARK) - 1,
1547 .olm_name = LFSCK_LAYOUT,
1548 .olm_namelen = sizeof(LFSCK_LAYOUT) - 1,
1551 /* lfsck_namespace */
1553 .olm_name = LFSCK_NAMESPACE,
1554 .olm_namelen = sizeof(LFSCK_NAMESPACE) - 1,
1557 /* OBJECTS, upgrade from old device */
1559 .olm_name = OBJECTS,
1560 .olm_flags = OLF_SCAN_SUBITEMS,
1561 .olm_namelen = sizeof(OBJECTS) - 1,
1562 .olm_scandir = osd_ios_OBJECTS_scan,
1565 /* lquota_v2.user, upgrade from old device */
1567 .olm_name = "lquota_v2.user",
1568 .olm_namelen = sizeof("lquota_v2.user") - 1,
1571 /* lquota_v2.group, upgrade from old device */
1573 .olm_name = "lquota_v2.group",
1574 .olm_namelen = sizeof("lquota_v2.group") - 1,
1577 /* LAST_GROUP, upgrade from old device */
1579 .olm_name = "LAST_GROUP",
1581 .f_seq = FID_SEQ_LOCAL_FILE,
1582 .f_oid = OFD_LAST_GROUP_OID,
1584 .olm_flags = OLF_SHOW_NAME,
1585 .olm_namelen = sizeof("LAST_GROUP") - 1,
1588 /* committed batchid for cross-MDT operation */
1590 .olm_name = "BATCHID",
1592 .f_seq = FID_SEQ_LOCAL_FILE,
1593 .f_oid = BATCHID_COMMITTED_OID,
1595 .olm_flags = OLF_SHOW_NAME,
1596 .olm_namelen = sizeof("BATCHID") - 1,
1599 /* OSP update logs update_log{_dir} use f_seq = FID_SEQ_UPDATE_LOG{_DIR}
1600 * and f_oid = index for their log files. See lu_update_log{_dir}_fid()
1601 * for more details. */
1605 .olm_name = "update_log",
1607 .f_seq = FID_SEQ_UPDATE_LOG,
1609 .olm_flags = OLF_SHOW_NAME | OLF_IDX_IN_FID,
1610 .olm_namelen = sizeof("update_log") - 1,
1613 /* update_log_dir */
1615 .olm_name = "update_log_dir",
1617 .f_seq = FID_SEQ_UPDATE_LOG_DIR,
1619 .olm_flags = OLF_SHOW_NAME | OLF_SCAN_SUBITEMS |
1621 .olm_namelen = sizeof("update_log_dir") - 1,
1622 .olm_scandir = osd_ios_general_scan,
1623 .olm_filldir = osd_ios_uld_fill,
1628 .olm_name = "lost+found",
1630 .f_seq = FID_SEQ_LOCAL_FILE,
1631 .f_oid = OSD_LPF_OID,
1633 .olm_flags = OLF_SCAN_SUBITEMS,
1634 .olm_namelen = sizeof("lost+found") - 1,
1635 .olm_scandir = osd_ios_general_scan,
1636 .olm_filldir = osd_ios_lf_fill,
1641 .olm_name = HSM_ACTIONS,
1646 .olm_name = LUSTRE_NODEMAP_NAME,
1651 .olm_name = INDEX_BACKUP_DIR,
1653 .f_seq = FID_SEQ_LOCAL_FILE,
1654 .f_oid = INDEX_BACKUP_OID,
1656 .olm_flags = OLF_SCAN_SUBITEMS | OLF_NOT_BACKUP,
1657 .olm_namelen = sizeof(INDEX_BACKUP_DIR) - 1,
1658 .olm_scandir = osd_ios_general_scan,
1659 .olm_filldir = osd_ios_varfid_fill,
1667 /* Add the new introduced files under .lustre/ in the list in the future. */
1668 static const struct osd_lf_map osd_dl_maps[] = {
1673 .f_seq = FID_SEQ_DOT_LUSTRE,
1674 .f_oid = FID_OID_DOT_LUSTRE_OBF,
1676 .olm_namelen = sizeof("fid") - 1,
1679 /* .lustre/lost+found */
1681 .olm_name = "lost+found",
1683 .f_seq = FID_SEQ_DOT_LUSTRE,
1684 .f_oid = FID_OID_DOT_LUSTRE_LPF,
1686 .olm_namelen = sizeof("lost+found") - 1,
1694 struct osd_ios_item {
1695 struct list_head oii_list;
1696 struct dentry *oii_dentry;
1697 scandir_t oii_scandir;
1698 filldir_t oii_filldir;
1701 struct osd_ios_filldir_buf {
1702 /* please keep it as first member */
1703 struct dir_context ctx;
1704 struct osd_thread_info *oifb_info;
1705 struct osd_device *oifb_dev;
1706 struct dentry *oifb_dentry;
1711 osd_ios_new_item(struct osd_device *dev, struct dentry *dentry,
1712 scandir_t scandir, filldir_t filldir)
1714 struct osd_ios_item *item;
1717 OBD_ALLOC_PTR(item);
1721 INIT_LIST_HEAD(&item->oii_list);
1722 item->oii_dentry = dget(dentry);
1723 item->oii_scandir = scandir;
1724 item->oii_filldir = filldir;
1725 list_add_tail(&item->oii_list, &dev->od_ios_list);
1730 static bool osd_index_need_recreate(const struct lu_env *env,
1731 struct osd_device *dev, struct inode *inode)
1733 struct osd_directory *iam = &osd_oti_get(env)->oti_iam;
1734 struct iam_container *bag = &iam->od_container;
1738 rc = iam_container_init(bag, &iam->od_descr, inode);
1742 rc = iam_container_setup(bag);
1743 iam_container_fini(bag);
1750 static void osd_ios_index_register(const struct lu_env *env,
1751 struct osd_device *osd,
1752 const struct lu_fid *fid,
1753 struct inode *inode)
1755 struct osd_directory *iam = &osd_oti_get(env)->oti_iam;
1756 struct iam_container *bag = &iam->od_container;
1757 struct super_block *sb = osd_sb(osd);
1758 struct iam_descr *descr;
1764 /* Index must be a regular file. */
1765 if (!S_ISREG(inode->i_mode))
1768 /* Index's size must be block aligned. */
1769 if (inode->i_size < sb->s_blocksize ||
1770 (inode->i_size & (sb->s_blocksize - 1)) != 0)
1773 iam_container_init(bag, &iam->od_descr, inode);
1774 rc = iam_container_setup(bag);
1778 descr = bag->ic_descr;
1779 /* May be regular file with IAM_LFIX_ROOT_MAGIC matched
1780 * coincidentally, or corrupted index object, skip it. */
1781 if (descr->id_ptr_size != 4)
1784 keysize = descr->id_key_size;
1785 recsize = descr->id_rec_size;
1786 rc = osd_index_register(osd, fid, keysize, recsize);
1791 iam_container_fini(bag);
1793 CDEBUG(D_LFSCK, "%s: index object "DFID" (%u/%u) registered\n",
1794 osd_name(osd), PFID(fid), keysize, recsize);
1797 static void osd_index_restore(const struct lu_env *env, struct osd_device *dev,
1798 struct lustre_index_restore_unit *liru,
1799 void *buf, int bufsize)
1801 struct osd_thread_info *info = osd_oti_get(env);
1802 struct osd_inode_id *id = &info->oti_id;
1803 struct lu_fid *tgt_fid = &liru->liru_cfid;
1804 struct inode *bak_inode = NULL;
1805 struct ldiskfs_dir_entry_2 *de = NULL;
1806 struct buffer_head *bh = NULL;
1807 struct dentry *dentry;
1809 struct lu_fid bak_fid;
1813 lustre_fid2lbx(name, tgt_fid, bufsize);
1814 dentry = osd_child_dentry_by_inode(env, dev->od_index_backup_inode,
1815 name, strlen(name));
1816 bh = osd_ldiskfs_find_entry(dev->od_index_backup_inode,
1817 &dentry->d_name, &de, NULL, NULL);
1819 GOTO(log, rc = PTR_ERR(bh));
1821 osd_id_gen(id, le32_to_cpu(de->inode), OSD_OII_NOGEN);
1823 bak_inode = osd_iget_fid(info, dev, id, &bak_fid, 0);
1824 if (IS_ERR(bak_inode))
1825 GOTO(log, rc = PTR_ERR(bak_inode));
1828 /* The OI mapping for index may be invalid, since it will be
1829 * re-created, not update the OI mapping, just cache it in RAM. */
1830 osd_id_gen(id, liru->liru_clid, OSD_OII_NOGEN);
1831 osd_add_oi_cache(info, dev, id, tgt_fid);
1832 rc = lustre_index_restore(env, &dev->od_dt_dev, &liru->liru_pfid,
1833 tgt_fid, &bak_fid, liru->liru_name,
1834 &dev->od_index_backup_list, &dev->od_lock,
1839 CDEBUG(D_WARNING, "%s: restore index '%s' with "DFID": rc = %d\n",
1840 osd_name(dev), liru->liru_name, PFID(tgt_fid), rc);
1844 * osd_ios_scan_one() - check/fix LMA FID and OI entry for one inode
1846 * The passed \a inode's \a fid is verified against the LMA FID. If the \a fid
1847 * is NULL or is empty the IGIF FID is used. The FID is verified in the OI to
1848 * reference the inode, or fixed if it is missing or references another inode.
1851 osd_ios_scan_one(struct osd_thread_info *info, struct osd_device *dev,
1852 struct inode *parent, struct inode *inode,
1853 const struct lu_fid *fid, const char *name,
1854 int namelen, int flags)
1856 struct lustre_mdt_attrs *lma = &info->oti_ost_attrs.loa_lma;
1857 struct osd_inode_id *id = &info->oti_id;
1858 struct osd_inode_id *id2 = &info->oti_id2;
1859 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
1860 struct scrub_file *sf = &scrub->os_file;
1866 CDEBUG(D_INODE, "%s: child '%.*s' lacks inode: rc = -2\n",
1867 osd_name(dev), namelen, name);
1871 rc = osd_get_lma(info, inode, &info->oti_obj_dentry,
1872 &info->oti_ost_attrs);
1873 if (rc != 0 && rc != -ENODATA) {
1874 CDEBUG(D_LFSCK, "%s: fail to get lma for init OI scrub: "
1875 "rc = %d\n", osd_name(dev), rc);
1880 osd_id_gen(id, inode->i_ino, inode->i_generation);
1881 if (rc == -ENODATA) {
1882 if (fid == NULL || fid_is_zero(fid) || flags & OLF_HIDE_FID) {
1883 lu_igif_build(&tfid, inode->i_ino, inode->i_generation);
1886 if (flags & OLF_IDX_IN_FID) {
1887 LASSERT(dev->od_index >= 0);
1889 tfid.f_oid = dev->od_index;
1892 rc = osd_ea_fid_set(info, inode, &tfid, 0, 0);
1894 CDEBUG(D_LFSCK, "%s: fail to set LMA for init OI "
1895 "scrub: rc = %d\n", osd_name(dev), rc);
1900 if (lma->lma_compat & LMAC_NOT_IN_OI)
1903 tfid = lma->lma_self_fid;
1904 if (lma->lma_compat & LMAC_IDX_BACKUP &&
1905 osd_index_need_recreate(info->oti_env, dev, inode)) {
1906 struct lu_fid *pfid = &info->oti_fid3;
1908 if (is_root_inode(parent)) {
1909 lu_local_obj_fid(pfid, OSD_FS_ROOT_OID);
1911 rc = osd_scrub_get_fid(info, dev, parent, pfid,
1917 rc = lustre_liru_new(&dev->od_index_restore_list, pfid,
1918 &tfid, inode->i_ino, name, namelen);
1923 if (!(flags & OLF_NOT_BACKUP))
1924 osd_ios_index_register(info->oti_env, dev, &tfid,
1928 /* Since this called from iterate_dir() the inode lock will be taken */
1929 rc = osd_oi_lookup(info, dev, &tfid, id2, OI_LOCKED);
1934 rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
1935 DTO_INDEX_INSERT, true,
1943 if (osd_id_eq_strict(id, id2))
1946 if (!(sf->sf_flags & SF_INCONSISTENT)) {
1947 scrub_file_reset(scrub, dev->od_uuid, SF_INCONSISTENT);
1948 rc = scrub_file_store(info->oti_env, scrub);
1953 rc = osd_scrub_refresh_mapping(info, dev, &tfid, id,
1954 DTO_INDEX_UPDATE, true,
1963 * It scans the /lost+found, and for the OST-object (with filter_fid
1964 * or filter_fid_18_23), move them back to its proper /O/<seq>/d<x>.
1966 #ifdef HAVE_FILLDIR_USE_CTX
1967 static FILLDIR_TYPE do_osd_ios_lf_fill(struct dir_context *buf,
1969 static int osd_ios_lf_fill(void *buf,
1971 const char *name, int namelen,
1972 loff_t offset, __u64 ino, unsigned int d_type)
1974 struct osd_ios_filldir_buf *fill_buf =
1975 (struct osd_ios_filldir_buf *)buf;
1976 struct osd_thread_info *info = fill_buf->oifb_info;
1977 struct osd_device *dev = fill_buf->oifb_dev;
1978 struct lu_fid *fid = &info->oti_fid;
1979 struct osd_scrub *scrub = &dev->od_scrub;
1980 struct dentry *parent = fill_buf->oifb_dentry;
1981 struct dentry *child;
1982 struct inode *dir = parent->d_inode;
1983 struct inode *inode;
1987 fill_buf->oifb_items++;
1989 /* skip any '.' started names */
1993 scrub->os_lf_scanned++;
1994 child = osd_lookup_one_len(dev, name, parent, namelen);
1995 if (IS_ERR(child)) {
1996 rc = PTR_ERR(child);
1997 CDEBUG(D_LFSCK, "%s: cannot lookup child '%.*s': rc = %d\n",
1998 osd_name(dev), namelen, name, rc);
2000 } else if (!child->d_inode) {
2002 CDEBUG(D_INODE, "%s: child '%.*s' lacks inode\n",
2003 osd_name(dev), namelen, name);
2007 inode = child->d_inode;
2008 if (S_ISDIR(inode->i_mode)) {
2009 rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
2012 CDEBUG(D_LFSCK, "%s: cannot add child '%.*s': "
2013 "rc = %d\n", osd_name(dev), namelen, name, rc);
2017 if (!S_ISREG(inode->i_mode))
2020 rc = osd_scrub_get_fid(info, dev, inode, fid, true);
2021 if (rc == SCRUB_NEXT_OSTOBJ || rc == SCRUB_NEXT_OSTOBJ_OLD) {
2022 rc = osd_obj_map_recover(info, dev, dir, child, fid);
2024 CDEBUG(D_LFSCK, "recovered '%.*s' ["DFID"] from "
2025 "/lost+found.\n", namelen, name, PFID(fid));
2026 scrub->os_lf_repaired++;
2028 CDEBUG(D_LFSCK, "%s: cannot rename for '%.*s' "
2030 osd_name(dev), namelen, name, PFID(fid), rc);
2034 /* XXX: For MDT-objects, we can move them from /lost+found to namespace
2035 * visible place, such as the /ROOT/.lustre/lost+found, then LFSCK
2036 * can process them in furtuer. */
2042 scrub->os_lf_failed++;
2044 /* skip the failure to make the scanning to continue. */
2047 WRAP_FILLDIR_FN(do_, osd_ios_lf_fill)
2049 #ifdef HAVE_FILLDIR_USE_CTX
2050 static FILLDIR_TYPE do_osd_ios_varfid_fill(struct dir_context *buf,
2052 static int osd_ios_varfid_fill(void *buf,
2054 const char *name, int namelen,
2055 loff_t offset, __u64 ino, unsigned int d_type)
2057 struct osd_ios_filldir_buf *fill_buf =
2058 (struct osd_ios_filldir_buf *)buf;
2059 struct osd_device *dev = fill_buf->oifb_dev;
2060 struct dentry *child;
2064 fill_buf->oifb_items++;
2066 /* skip any '.' started names */
2070 child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
2072 RETURN(PTR_ERR(child));
2074 rc = osd_ios_scan_one(fill_buf->oifb_info, dev,
2075 fill_buf->oifb_dentry->d_inode, child->d_inode,
2076 NULL, name, namelen, 0);
2077 if (rc == 0 && S_ISDIR(child->d_inode->i_mode))
2078 rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
2079 osd_ios_varfid_fill);
2084 WRAP_FILLDIR_FN(do_, osd_ios_varfid_fill)
2086 #ifdef HAVE_FILLDIR_USE_CTX
2087 static FILLDIR_TYPE do_osd_ios_dl_fill(struct dir_context *buf,
2089 static int osd_ios_dl_fill(void *buf,
2091 const char *name, int namelen,
2092 loff_t offset, __u64 ino, unsigned int d_type)
2094 struct osd_ios_filldir_buf *fill_buf =
2095 (struct osd_ios_filldir_buf *)buf;
2096 struct osd_device *dev = fill_buf->oifb_dev;
2097 const struct osd_lf_map *map;
2098 struct dentry *child;
2102 fill_buf->oifb_items++;
2104 /* skip any '.' started names */
2108 for (map = osd_dl_maps; map->olm_name != NULL; map++) {
2109 if (map->olm_namelen != namelen)
2112 if (strncmp(map->olm_name, name, namelen) == 0)
2116 if (map->olm_name == NULL)
2119 child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
2121 RETURN(PTR_ERR(child));
2123 rc = osd_ios_scan_one(fill_buf->oifb_info, dev,
2124 fill_buf->oifb_dentry->d_inode, child->d_inode,
2125 &map->olm_fid, name, namelen, map->olm_flags);
2130 WRAP_FILLDIR_FN(do_, osd_ios_dl_fill)
2132 #ifdef HAVE_FILLDIR_USE_CTX
2133 static FILLDIR_TYPE do_osd_ios_uld_fill(struct dir_context *buf,
2135 static int osd_ios_uld_fill(void *buf,
2137 const char *name, int namelen,
2138 loff_t offset, __u64 ino, unsigned int d_type)
2140 struct osd_ios_filldir_buf *fill_buf =
2141 (struct osd_ios_filldir_buf *)buf;
2142 struct osd_device *dev = fill_buf->oifb_dev;
2143 struct dentry *child;
2148 fill_buf->oifb_items++;
2150 /* skip any non-DFID format name */
2154 child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
2156 RETURN(PTR_ERR(child));
2158 /* skip the start '[' */
2159 sscanf(&name[1], SFID, RFID(&tfid));
2160 if (fid_is_sane(&tfid))
2161 rc = osd_ios_scan_one(fill_buf->oifb_info, fill_buf->oifb_dev,
2162 fill_buf->oifb_dentry->d_inode,
2163 child->d_inode, &tfid, name, namelen, 0);
2170 WRAP_FILLDIR_FN(do_, osd_ios_uld_fill)
2172 #ifdef HAVE_FILLDIR_USE_CTX
2173 static FILLDIR_TYPE do_osd_ios_root_fill(struct dir_context *buf,
2175 static int osd_ios_root_fill(void *buf,
2177 const char *name, int namelen,
2178 loff_t offset, __u64 ino, unsigned int d_type)
2180 struct osd_ios_filldir_buf *fill_buf =
2181 (struct osd_ios_filldir_buf *)buf;
2182 struct osd_device *dev = fill_buf->oifb_dev;
2183 const struct osd_lf_map *map;
2184 struct dentry *child;
2188 fill_buf->oifb_items++;
2190 /* skip any '.' started names */
2194 for (map = osd_lf_maps; map->olm_name != NULL; map++) {
2195 if (map->olm_namelen != namelen)
2198 if (strncmp(map->olm_name, name, namelen) == 0)
2202 if (map->olm_name == NULL)
2205 child = osd_lookup_one_len(dev, name, fill_buf->oifb_dentry, namelen);
2207 RETURN(PTR_ERR(child));
2208 else if (!child->d_inode)
2209 GOTO(out_put, rc = -ENOENT);
2211 if (!(map->olm_flags & OLF_NO_OI))
2212 rc = osd_ios_scan_one(fill_buf->oifb_info, dev,
2213 fill_buf->oifb_dentry->d_inode, child->d_inode,
2214 &map->olm_fid, name, namelen, map->olm_flags);
2215 if (rc == 0 && map->olm_flags & OLF_SCAN_SUBITEMS)
2216 rc = osd_ios_new_item(dev, child, map->olm_scandir,
2224 WRAP_FILLDIR_FN(do_, osd_ios_root_fill)
2227 osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
2228 struct dentry *dentry, filldir_t filldir)
2230 struct osd_ios_filldir_buf buf = {
2231 .ctx.actor = filldir,
2234 .oifb_dentry = dentry
2242 path.dentry = dget(dentry);
2243 path.mnt = mntget(dev->od_mnt);
2245 filp = dentry_open(&path, O_RDONLY, current_cred());
2248 RETURN(PTR_ERR(filp));
2250 filp->f_mode |= FMODE_64BITHASH | FMODE_NONOTIFY;
2251 filp->f_flags |= O_NOATIME;
2256 rc = iterate_dir(filp, &buf.ctx);
2257 } while (rc >= 0 && buf.oifb_items > 0 &&
2258 filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
2265 osd_ios_ROOT_scan(struct osd_thread_info *info, struct osd_device *dev,
2266 struct dentry *dentry, filldir_t filldir)
2268 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2269 struct scrub_file *sf = &scrub->os_file;
2270 struct dentry *child;
2274 /* It is existing MDT0 device. We only allow the case of object without
2275 * LMA to happen on the MDT0, which is usually for old 1.8 MDT. Then we
2276 * can generate IGIF mode FID for the object and related OI mapping. If
2277 * it is on other MDTs, then becuase file-level backup/restore, related
2278 * OI mapping may be invalid already, we do not know which is the right
2279 * FID for the object. We only allow IGIF objects to reside on the MDT0.
2281 * XXX: For the case of object on non-MDT0 device with neither LMA nor
2282 * "fid" xattr, then something crashed. We cannot re-generate the
2283 * FID directly, instead, the OI scrub will scan the OI structure
2284 * and try to re-generate the LMA from the OI mapping. But if the
2285 * OI mapping crashed or lost also, then we have to give up under
2286 * double failure cases.
2288 spin_lock(&scrub->os_lock);
2289 scrub->os_convert_igif = 1;
2290 spin_unlock(&scrub->os_lock);
2291 child = osd_lookup_one_len_unlocked(dev, dot_lustre_name, dentry,
2292 strlen(dot_lustre_name));
2293 if (IS_ERR(child)) {
2294 if (PTR_ERR(child) != -ENOENT)
2295 RETURN(PTR_ERR(child));
2299 /* For lustre-2.x (x <= 3), the ".lustre" has NO FID-in-LMA,
2300 * so the client will get IGIF for the ".lustre" object when
2303 * From the OI scrub view, when the MDT upgrade to Lustre-2.4,
2304 * it does not know whether there are some old clients cached
2305 * the ".lustre" IGIF during the upgrading. Two choices:
2307 * 1) Generate IGIF-in-LMA and IGIF-in-OI for the ".lustre".
2308 * It will allow the old connected clients to access the
2309 * ".lustre" with cached IGIF. But it will cause others
2310 * on the MDT failed to check "fid_is_dot_lustre()".
2312 * 2) Use fixed FID {FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE, 0}
2313 * for ".lustre" in spite of whether there are some clients
2314 * cached the ".lustre" IGIF or not. It enables the check
2315 * "fid_is_dot_lustre()" on the MDT, although it will cause
2316 * that the old connected clients cannot access the ".lustre"
2317 * with the cached IGIF.
2319 * Usually, it is rare case for the old connected clients
2320 * to access the ".lustre" with cached IGIF. So we prefer
2321 * to the solution 2).
2323 inode_lock(dentry->d_inode);
2324 rc = osd_ios_scan_one(info, dev, dentry->d_inode,
2325 child->d_inode, &LU_DOT_LUSTRE_FID,
2327 strlen(dot_lustre_name), 0);
2328 inode_unlock(dentry->d_inode);
2329 if (rc == -ENOENT) {
2331 /* It is 1.8 MDT device. */
2332 if (!(sf->sf_flags & SF_UPGRADE)) {
2333 scrub_file_reset(scrub, dev->od_uuid,
2335 sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
2336 rc = scrub_file_store(info->oti_env, scrub);
2340 } else if (rc == 0) {
2341 rc = osd_ios_new_item(dev, child, osd_ios_general_scan,
2350 osd_ios_OBJECTS_scan(struct osd_thread_info *info, struct osd_device *dev,
2351 struct dentry *dentry, filldir_t filldir)
2353 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2354 struct scrub_file *sf = &scrub->os_file;
2355 struct dentry *child;
2359 if (unlikely(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID)) {
2360 sf->sf_internal_flags &= ~SIF_NO_HANDLE_OLD_FID;
2361 rc = scrub_file_store(info->oti_env, scrub);
2366 child = osd_lookup_one_len_unlocked(dev, ADMIN_USR, dentry,
2368 if (IS_ERR(child)) {
2369 rc = PTR_ERR(child);
2371 inode_lock(dentry->d_inode);
2372 rc = osd_ios_scan_one(info, dev, dentry->d_inode,
2373 child->d_inode, NULL, ADMIN_USR,
2374 strlen(ADMIN_USR), 0);
2375 inode_unlock(dentry->d_inode);
2379 if (rc != 0 && rc != -ENOENT)
2382 child = osd_lookup_one_len_unlocked(dev, ADMIN_GRP, dentry,
2385 GOTO(out, rc = PTR_ERR(child));
2387 inode_lock(dentry->d_inode);
2388 rc = osd_ios_scan_one(info, dev, dentry->d_inode,
2389 child->d_inode, NULL, ADMIN_GRP,
2390 strlen(ADMIN_GRP), 0);
2391 inode_unlock(dentry->d_inode);
2394 RETURN(rc == -ENOENT ? 0 : rc);
2397 static void osd_initial_OI_scrub(struct osd_thread_info *info,
2398 struct osd_device *dev)
2400 struct osd_ios_item *item = NULL;
2401 scandir_t scandir = osd_ios_general_scan;
2402 filldir_t filldir = osd_ios_root_fill;
2403 struct dentry *dentry = osd_sb(dev)->s_root;
2404 const struct osd_lf_map *map = osd_lf_maps;
2407 /* Lookup IGIF in OI by force for initial OI scrub. */
2408 dev->od_igif_inoi = 1;
2411 /* Don't take inode_lock here since scandir() callbacks
2412 * can call VFS functions which may manully take the
2413 * inode lock itself like iterate_dir(). Since this
2414 * is the case it is best to leave the scandir()
2415 * callbacks to managing the inode lock.
2417 scandir(info, dev, dentry, filldir);
2419 dput(item->oii_dentry);
2423 if (list_empty(&dev->od_ios_list))
2426 item = list_first_entry(&dev->od_ios_list,
2427 struct osd_ios_item, oii_list);
2428 list_del_init(&item->oii_list);
2430 LASSERT(item->oii_scandir != NULL);
2431 scandir = item->oii_scandir;
2432 filldir = item->oii_filldir;
2433 dentry = item->oii_dentry;
2436 /* There maybe the case that the object has been removed, but its OI
2437 * mapping is still in the OI file, such as the "CATALOGS" after MDT
2438 * file-level backup/restore. So here cleanup the stale OI mappings. */
2439 while (map->olm_name != NULL) {
2440 struct dentry *child;
2442 if (fid_is_zero(&map->olm_fid)) {
2447 child = osd_lookup_one_len_unlocked(dev, map->olm_name,
2448 osd_sb(dev)->s_root,
2450 if (PTR_ERR(child) == -ENOENT ||
2451 (!IS_ERR(child) && !child->d_inode))
2452 osd_scrub_refresh_mapping(info, dev, &map->olm_fid,
2453 NULL, DTO_INDEX_DELETE,
2460 if (!list_empty(&dev->od_index_restore_list)) {
2463 OBD_ALLOC_LARGE(buf, INDEX_BACKUP_BUFSIZE);
2465 CERROR("%s: not enough RAM for rebuild index\n",
2468 while (!list_empty(&dev->od_index_restore_list)) {
2469 struct lustre_index_restore_unit *liru;
2471 liru = list_first_entry(&dev->od_index_restore_list,
2472 struct lustre_index_restore_unit,
2474 list_del(&liru->liru_link);
2476 osd_index_restore(info->oti_env, dev, liru,
2477 buf, INDEX_BACKUP_BUFSIZE);
2478 OBD_FREE(liru, liru->liru_len);
2482 OBD_FREE_LARGE(buf, INDEX_BACKUP_BUFSIZE);
2488 char *osd_lf_fid2name(const struct lu_fid *fid)
2490 const struct osd_lf_map *map = osd_lf_maps;
2492 while (map->olm_name != NULL) {
2493 if (!lu_fid_eq(fid, &map->olm_fid)) {
2498 if (map->olm_flags & OLF_SHOW_NAME)
2499 return map->olm_name;
2507 /* OI scrub start/stop */
2509 int osd_scrub_start(const struct lu_env *env, struct osd_device *dev,
2512 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2516 if (dev->od_dt_dev.dd_rdonly)
2519 /* od_otable_mutex: prevent curcurrent start/stop */
2520 mutex_lock(&dev->od_otable_mutex);
2521 rc = scrub_start(osd_scrub_main, scrub, dev, flags);
2522 if (rc == -EALREADY) {
2524 if ((scrub->os_file.sf_flags & SF_AUTO ||
2525 scrub->os_partial_scan) &&
2526 !(flags & SS_AUTO_PARTIAL))
2527 osd_scrub_join(env, dev, flags, false);
2529 mutex_unlock(&dev->od_otable_mutex);
2534 void osd_scrub_stop(struct osd_device *dev)
2536 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2538 /* od_otable_mutex: prevent curcurrent start/stop */
2539 mutex_lock(&dev->od_otable_mutex);
2540 spin_lock(&scrub->os_lock);
2541 scrub->os_paused = 1;
2542 spin_unlock(&scrub->os_lock);
2544 mutex_unlock(&dev->od_otable_mutex);
2546 osd_scrub_ois_fini(scrub, &scrub->os_inconsistent_items);
2547 osd_scrub_ois_fini(scrub, &scrub->os_stale_items);
2550 /* OI scrub setup/cleanup */
2552 static const char osd_scrub_name[] = "OI_scrub";
2554 int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev,
2557 struct osd_thread_info *info = osd_oti_get(env);
2558 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2559 struct lvfs_run_ctxt *ctxt = &dev->od_scrub.os_ctxt;
2560 time64_t interval = scrub->os_auto_scrub_interval;
2561 struct scrub_file *sf = &scrub->os_file;
2562 struct super_block *sb = osd_sb(dev);
2563 struct lvfs_run_ctxt saved;
2565 struct inode *inode;
2566 struct lu_fid *fid = &info->oti_fid;
2567 struct osd_inode_id *id = &info->oti_id;
2568 struct dt_object *obj;
2573 memset(&dev->od_scrub, 0, sizeof(struct osd_scrub));
2574 OBD_SET_CTXT_MAGIC(ctxt);
2575 ctxt->pwdmnt = dev->od_mnt;
2576 ctxt->pwd = dev->od_mnt->mnt_root;
2578 init_rwsem(&scrub->os_rwsem);
2579 spin_lock_init(&scrub->os_lock);
2580 INIT_LIST_HEAD(&scrub->os_inconsistent_items);
2581 INIT_LIST_HEAD(&scrub->os_stale_items);
2582 scrub->os_name = osd_name(dev);
2583 scrub->os_auto_scrub_interval = interval;
2585 push_ctxt(&saved, ctxt);
2586 filp = filp_open(osd_scrub_name,
2587 (dev->od_dt_dev.dd_rdonly ? O_RDONLY :
2591 pop_ctxt(&saved, ctxt);
2592 RETURN(PTR_ERR(filp));
2595 inode = file_inode(filp);
2596 ldiskfs_set_inode_flag(inode, LDISKFS_INODE_JOURNAL_DATA);
2597 if (!dev->od_dt_dev.dd_rdonly) {
2598 /* 'What the @fid is' is not imporatant, because the object
2599 * has no OI mapping, and only is visible inside the OSD.*/
2600 lu_igif_build(fid, inode->i_ino, inode->i_generation);
2601 rc = osd_ea_fid_set(info, inode, fid, LMAC_NOT_IN_OI, 0);
2603 filp_close(filp, NULL);
2604 pop_ctxt(&saved, ctxt);
2609 osd_id_gen(id, inode->i_ino, inode->i_generation);
2610 osd_add_oi_cache(info, dev, id, fid);
2611 filp_close(filp, NULL);
2612 pop_ctxt(&saved, ctxt);
2614 obj = lu2dt(lu_object_find_slice(env, osd2lu_dev(dev), fid, NULL));
2615 if (IS_ERR_OR_NULL(obj))
2616 RETURN(obj ? PTR_ERR(obj) : -ENOENT);
2618 guid_copy(&dev->od_uuid, (guid_t *)&sb->s_uuid);
2619 scrub->os_obj = obj;
2620 rc = scrub_file_load(env, scrub);
2621 if (rc == -ENOENT || rc == -EFAULT) {
2622 scrub_file_init(scrub, dev->od_uuid);
2623 /* If the "/O" dir does not exist when mount (indicated by
2624 * osd_device::od_maybe_new), neither for the "/OI_scrub",
2625 * then it is quite probably that the device is a new one,
2626 * under such case, mark it as SIF_NO_HANDLE_OLD_FID.
2628 * For the rare case that "/O" and "OI_scrub" both lost on
2629 * an old device, it can be found and cleared later.
2631 * For the system with "SIF_NO_HANDLE_OLD_FID", we do not
2632 * need to check "filter_fid_18_23" and to convert it to
2633 * "filter_fid" for each object, and all the IGIF should
2634 * have their FID mapping in OI files already. */
2635 if (dev->od_maybe_new && rc == -ENOENT)
2636 sf->sf_internal_flags = SIF_NO_HANDLE_OLD_FID;
2638 } else if (rc < 0) {
2639 GOTO(cleanup_obj, rc);
2641 if (!guid_equal(&sf->sf_uuid, &dev->od_uuid)) {
2643 "%s: UUID has been changed from %pU to %pU\n",
2644 osd_dev2name(dev), &sf->sf_uuid, &dev->od_uuid);
2645 scrub_file_reset(scrub, dev->od_uuid, SF_INCONSISTENT);
2648 } else if (sf->sf_status == SS_SCANNING) {
2649 sf->sf_status = SS_CRASHED;
2653 if ((sf->sf_oi_count & (sf->sf_oi_count - 1)) != 0) {
2654 LCONSOLE_WARN("%s: invalid oi count %d, set it to %d\n",
2655 osd_dev2name(dev), sf->sf_oi_count,
2657 sf->sf_oi_count = osd_oi_count;
2662 if (sf->sf_pos_last_checkpoint != 0)
2663 scrub->os_pos_current = sf->sf_pos_last_checkpoint + 1;
2665 scrub->os_pos_current = LDISKFS_FIRST_INO(sb) + 1;
2668 rc = scrub_file_store(env, scrub);
2670 GOTO(cleanup_obj, rc);
2673 /* Initialize OI files. */
2674 rc = osd_oi_init(info, dev, restored);
2676 GOTO(cleanup_obj, rc);
2678 if (!dev->od_dt_dev.dd_rdonly)
2679 osd_initial_OI_scrub(info, dev);
2681 if (sf->sf_flags & SF_UPGRADE ||
2682 !(sf->sf_internal_flags & SIF_NO_HANDLE_OLD_FID ||
2683 sf->sf_success_count > 0)) {
2684 dev->od_igif_inoi = 0;
2685 dev->od_check_ff = dev->od_is_ost;
2687 dev->od_igif_inoi = 1;
2688 dev->od_check_ff = 0;
2691 if (sf->sf_flags & SF_INCONSISTENT)
2692 /* The 'od_igif_inoi' will be set under the
2694 * 1) new created system, or
2695 * 2) restored from file-level backup, or
2696 * 3) the upgrading completed.
2698 * The 'od_igif_inoi' may be cleared by OI scrub
2699 * later if found that the system is upgrading. */
2700 dev->od_igif_inoi = 1;
2702 if (!dev->od_dt_dev.dd_rdonly &&
2703 dev->od_scrub.os_scrub.os_auto_scrub_interval != AS_NEVER &&
2704 ((sf->sf_status == SS_PAUSED) ||
2705 (sf->sf_status == SS_CRASHED &&
2706 sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT |
2707 SF_UPGRADE | SF_AUTO)) ||
2708 (sf->sf_status == SS_INIT &&
2709 sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT |
2711 rc = osd_scrub_start(env, dev, SS_AUTO_FULL);
2714 GOTO(cleanup_oi, rc);
2716 /* it is possible that dcache entries may keep objects after they are
2717 * deleted by OSD. While it looks safe this can cause object data to
2718 * stay until umount causing failures in tests calculating free space,
2719 * e.g. replay-ost-single. Since those dcache entries are not used
2720 * anymore let's just free them after use here */
2721 shrink_dcache_sb(sb);
2725 osd_oi_fini(info, dev);
2727 dt_object_put_nocache(env, scrub->os_obj);
2728 scrub->os_obj = NULL;
2733 void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev)
2735 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2737 LASSERT(dev->od_otable_it == NULL);
2739 if (scrub->os_obj != NULL) {
2740 osd_scrub_stop(dev);
2741 dt_object_put_nocache(env, scrub->os_obj);
2742 scrub->os_obj = NULL;
2746 /* object table based iteration APIs */
2748 static struct dt_it *osd_otable_it_init(const struct lu_env *env,
2749 struct dt_object *dt, __u32 attr)
2751 enum dt_otable_it_flags flags = attr >> DT_OTABLE_IT_FLAGS_SHIFT;
2752 enum dt_otable_it_valid valid = attr & ~DT_OTABLE_IT_FLAGS_MASK;
2753 struct osd_device *dev = osd_dev(dt->do_lu.lo_dev);
2754 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2755 struct osd_otable_it *it;
2760 /* od_otable_mutex: prevent curcurrent init/fini */
2761 mutex_lock(&dev->od_otable_mutex);
2762 if (dev->od_otable_it != NULL)
2763 GOTO(out, it = ERR_PTR(-EALREADY));
2767 GOTO(out, it = ERR_PTR(-ENOMEM));
2769 dev->od_otable_it = it;
2771 it->ooi_cache.ooc_consumer_idx = -1;
2772 if (flags & DOIF_OUTUSED)
2773 it->ooi_used_outside = 1;
2775 if (flags & DOIF_RESET)
2778 if (valid & DOIV_ERROR_HANDLE) {
2779 if (flags & DOIF_FAILOUT)
2780 start |= SS_SET_FAILOUT;
2782 start |= SS_CLEAR_FAILOUT;
2785 if (valid & DOIV_DRYRUN) {
2786 if (flags & DOIF_DRYRUN)
2787 start |= SS_SET_DRYRUN;
2789 start |= SS_CLEAR_DRYRUN;
2792 rc = scrub_start(osd_scrub_main, scrub, dev, start & ~SS_AUTO_PARTIAL);
2793 if (rc == -EALREADY) {
2794 it->ooi_cache.ooc_pos_preload = scrub->os_pos_current;
2795 } else if (rc < 0) {
2796 dev->od_otable_it = NULL;
2800 /* We have to start from the begining. */
2801 it->ooi_cache.ooc_pos_preload =
2802 LDISKFS_FIRST_INO(osd_sb(dev)) + 1;
2808 mutex_unlock(&dev->od_otable_mutex);
2809 return (struct dt_it *)it;
2812 static void osd_otable_it_fini(const struct lu_env *env, struct dt_it *di)
2814 struct osd_otable_it *it = (struct osd_otable_it *)di;
2815 struct osd_device *dev = it->ooi_dev;
2817 /* od_otable_mutex: prevent curcurrent init/fini */
2818 mutex_lock(&dev->od_otable_mutex);
2819 scrub_stop(&dev->od_scrub.os_scrub);
2820 LASSERT(dev->od_otable_it == it);
2822 dev->od_otable_it = NULL;
2823 mutex_unlock(&dev->od_otable_mutex);
2827 static int osd_otable_it_get(const struct lu_env *env,
2828 struct dt_it *di, const struct dt_key *key)
2833 static void osd_otable_it_put(const struct lu_env *env, struct dt_it *di)
2838 osd_otable_it_wakeup(struct lustre_scrub *scrub, struct osd_otable_it *it)
2840 spin_lock(&scrub->os_lock);
2841 if (it->ooi_cache.ooc_pos_preload < scrub->os_pos_current ||
2842 scrub->os_waiting || !scrub->os_running)
2843 it->ooi_waiting = 0;
2845 it->ooi_waiting = 1;
2846 spin_unlock(&scrub->os_lock);
2848 return !it->ooi_waiting;
2851 static int osd_otable_it_next(const struct lu_env *env, struct dt_it *di)
2853 struct osd_otable_it *it = (struct osd_otable_it *)di;
2854 struct osd_device *dev = it->ooi_dev;
2855 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2856 struct osd_otable_cache *ooc = &it->ooi_cache;
2860 LASSERT(it->ooi_user_ready);
2863 if (!scrub->os_running && !it->ooi_used_outside)
2866 if (ooc->ooc_cached_items > 0) {
2867 ooc->ooc_cached_items--;
2868 ooc->ooc_consumer_idx = (ooc->ooc_consumer_idx + 1) &
2869 ~OSD_OTABLE_IT_CACHE_MASK;
2873 if (it->ooi_all_cached) {
2874 wait_var_event(scrub, !scrub->os_running);
2878 if (scrub->os_waiting && osd_scrub_has_window(scrub, ooc)) {
2879 spin_lock(&scrub->os_lock);
2880 scrub->os_waiting = 0;
2882 spin_unlock(&scrub->os_lock);
2885 if (it->ooi_cache.ooc_pos_preload >= scrub->os_pos_current)
2886 wait_var_event(scrub, osd_otable_it_wakeup(scrub, it));
2888 if (!scrub->os_running && !it->ooi_used_outside)
2891 rc = osd_otable_it_preload(env, it);
2898 static struct dt_key *osd_otable_it_key(const struct lu_env *env,
2899 const struct dt_it *di)
2904 static int osd_otable_it_key_size(const struct lu_env *env,
2905 const struct dt_it *di)
2907 return sizeof(__u64);
2910 static int osd_otable_it_rec(const struct lu_env *env, const struct dt_it *di,
2911 struct dt_rec *rec, __u32 attr)
2913 struct osd_otable_it *it = (struct osd_otable_it *)di;
2914 struct osd_otable_cache *ooc = &it->ooi_cache;
2916 *(struct lu_fid *)rec = ooc->ooc_cache[ooc->ooc_consumer_idx].oic_fid;
2918 /* Filter out Invald FID already. */
2919 LASSERTF(fid_is_sane((struct lu_fid *)rec),
2920 "Invalid FID "DFID", p_idx = %d, c_idx = %d\n",
2921 PFID((struct lu_fid *)rec),
2922 ooc->ooc_producer_idx, ooc->ooc_consumer_idx);
2927 static __u64 osd_otable_it_store(const struct lu_env *env,
2928 const struct dt_it *di)
2930 struct osd_otable_it *it = (struct osd_otable_it *)di;
2931 struct osd_otable_cache *ooc = &it->ooi_cache;
2934 if (it->ooi_user_ready && ooc->ooc_consumer_idx != -1)
2935 hash = ooc->ooc_cache[ooc->ooc_consumer_idx].oic_lid.oii_ino;
2937 hash = ooc->ooc_pos_preload;
2942 * Set the OSD layer iteration start position as the specified hash.
2944 static int osd_otable_it_load(const struct lu_env *env,
2945 const struct dt_it *di, __u64 hash)
2947 struct osd_otable_it *it = (struct osd_otable_it *)di;
2948 struct osd_device *dev = it->ooi_dev;
2949 struct osd_otable_cache *ooc = &it->ooi_cache;
2950 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
2951 struct osd_iit_param *param = &it->ooi_iit_param;
2955 /* Forbid to set iteration position after iteration started. */
2956 if (it->ooi_user_ready)
2959 LASSERT(!scrub->os_partial_scan);
2961 if (hash > OSD_OTABLE_MAX_HASH)
2962 hash = OSD_OTABLE_MAX_HASH;
2964 /* The hash is the last checkpoint position,
2965 * we will start from the next one. */
2966 ooc->ooc_pos_preload = hash + 1;
2967 if (ooc->ooc_pos_preload <= LDISKFS_FIRST_INO(osd_sb(dev)))
2968 ooc->ooc_pos_preload = LDISKFS_FIRST_INO(osd_sb(dev)) + 1;
2970 it->ooi_user_ready = 1;
2971 if (!scrub->os_full_speed)
2974 memset(param, 0, sizeof(*param));
2975 param->sb = osd_sb(dev);
2976 param->start = ooc->ooc_pos_preload;
2977 param->bg = (ooc->ooc_pos_preload - 1) /
2978 LDISKFS_INODES_PER_GROUP(param->sb);
2979 param->offset = (ooc->ooc_pos_preload - 1) %
2980 LDISKFS_INODES_PER_GROUP(param->sb);
2981 param->gbase = 1 + param->bg * LDISKFS_INODES_PER_GROUP(param->sb);
2983 /* Unplug OSD layer iteration by the first next() call. */
2984 rc = osd_otable_it_next(env, (struct dt_it *)it);
2989 const struct dt_index_operations osd_otable_ops = {
2991 .init = osd_otable_it_init,
2992 .fini = osd_otable_it_fini,
2993 .get = osd_otable_it_get,
2994 .put = osd_otable_it_put,
2995 .next = osd_otable_it_next,
2996 .key = osd_otable_it_key,
2997 .key_size = osd_otable_it_key_size,
2998 .rec = osd_otable_it_rec,
2999 .store = osd_otable_it_store,
3000 .load = osd_otable_it_load,
3004 void osd_scrub_dump(struct seq_file *m, struct osd_device *dev)
3006 struct osd_scrub *scrub = &dev->od_scrub;
3008 scrub_dump(m, &scrub->os_scrub);
3009 seq_printf(m, "lf_scanned: %llu\n"
3011 "lf_failed: %llu\n",
3012 scrub->os_lf_scanned,
3013 scrub->os_scrub.os_file.sf_param & SP_DRYRUN ?
3014 "inconsistent" : "repaired",
3015 scrub->os_lf_repaired,
3016 scrub->os_lf_failed);
3019 typedef int (*scan_dir_helper_t)(const struct lu_env *env,
3020 struct osd_device *dev, struct inode *dir,
3021 struct osd_it_ea *oie);
3023 static int osd_scan_dir(const struct lu_env *env, struct osd_device *dev,
3024 struct inode *inode, scan_dir_helper_t cb)
3026 struct osd_it_ea *oie;
3031 oie = osd_it_dir_init(env, dev, inode, LUDA_TYPE);
3033 RETURN(PTR_ERR(oie));
3035 oie->oie_file->f_pos = 0;
3036 rc = osd_ldiskfs_it_fill(env, (struct dt_it *)oie);
3042 while (oie->oie_it_dirent <= oie->oie_rd_dirent) {
3043 if (!name_is_dot_or_dotdot(oie->oie_dirent->oied_name,
3044 oie->oie_dirent->oied_namelen))
3045 cb(env, dev, inode, oie);
3047 oie->oie_dirent = (void *)oie->oie_dirent +
3048 round_up(sizeof(struct osd_it_ea_dirent) +
3049 oie->oie_dirent->oied_namelen, 8);
3051 oie->oie_it_dirent++;
3052 if (oie->oie_it_dirent <= oie->oie_rd_dirent)
3055 if (oie->oie_file->f_pos ==
3056 ldiskfs_get_htree_eof(oie->oie_file))
3059 rc = osd_ldiskfs_it_fill(env, (struct dt_it *)oie);
3068 osd_it_dir_fini(env, oie, inode);
3072 static int osd_remove_ml_file(struct osd_thread_info *info,
3073 struct osd_device *dev, struct inode *dir,
3074 struct inode *inode, struct osd_it_ea *oie)
3077 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
3078 struct dentry dentry;
3083 if (scrub->os_file.sf_param & SP_DRYRUN)
3086 th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC,
3087 osd_dto_credits_noquota[DTO_INDEX_DELETE] +
3088 osd_dto_credits_noquota[DTO_ATTR_SET_BASE]);
3090 RETURN(PTR_ERR(th));
3092 /* Should be created by the VFS layer */
3093 dentry.d_inode = dir;
3094 dentry.d_sb = dir->i_sb;
3095 rc = osd_obj_del_entry(info, dev, &dentry, oie->oie_dirent->oied_name,
3096 oie->oie_dirent->oied_namelen, th);
3098 mark_inode_dirty(inode);
3099 ldiskfs_journal_stop(th);
3103 static int osd_scan_ml_file(const struct lu_env *env, struct osd_device *dev,
3104 struct inode *dir, struct osd_it_ea *oie)
3106 struct osd_thread_info *info = osd_oti_get(env);
3107 struct osd_inode_id id;
3108 struct inode *inode;
3109 struct osd_obj_seq *oseq;
3110 struct ost_id *ostid = &info->oti_ostid;
3111 struct lu_fid *fid = &oie->oie_dirent->oied_fid;
3117 osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
3119 if (!fid_is_sane(fid))
3120 inode = osd_iget_fid(info, dev, &id, fid, 0);
3122 inode = osd_iget(info, dev, &id, 0);
3125 RETURN(PTR_ERR(inode));
3127 fid_to_ostid(fid, ostid);
3128 oseq = osd_seq_load(info, dev, ostid_seq(ostid));
3130 RETURN(PTR_ERR(oseq));
3132 dirn = ostid_id(ostid) & (oseq->oos_subdir_count - 1);
3133 LASSERT(oseq->oos_dirs[dirn] != NULL);
3135 osd_oid_name(name, sizeof(name), fid, ostid_id(ostid));
3136 if (((strlen(oseq->oos_root->d_name.name) !=
3137 info->oti_seq_dirent->oied_namelen) ||
3138 strncmp(oseq->oos_root->d_name.name,
3139 info->oti_seq_dirent->oied_name,
3140 info->oti_seq_dirent->oied_namelen) != 0) ||
3141 ((strlen(oseq->oos_dirs[dirn]->d_name.name) !=
3142 info->oti_dir_dirent->oied_namelen) ||
3143 strncmp(oseq->oos_dirs[dirn]->d_name.name,
3144 info->oti_dir_dirent->oied_name,
3145 info->oti_dir_dirent->oied_namelen) != 0) ||
3146 ((strlen(name) != oie->oie_dirent->oied_namelen) ||
3147 strncmp(oie->oie_dirent->oied_name, name,
3148 oie->oie_dirent->oied_namelen) != 0)) {
3149 CDEBUG(D_LFSCK, "%s: the file O/%s/%s/%s is corrupted\n",
3150 osd_name(dev), info->oti_seq_dirent->oied_name,
3151 info->oti_dir_dirent->oied_name,
3152 oie->oie_dirent->oied_name);
3154 rc = osd_remove_ml_file(info, dev, dir, inode, oie);
3161 static int osd_scan_ml_file_dir(const struct lu_env *env,
3162 struct osd_device *dev, struct inode *dir,
3163 struct osd_it_ea *oie)
3165 struct osd_thread_info *info = osd_oti_get(env);
3166 struct inode *inode;
3167 struct osd_inode_id id;
3172 osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
3173 inode = osd_iget(info, dev, &id, 0);
3175 RETURN(PTR_ERR(inode));
3177 if (!S_ISDIR(inode->i_mode))
3180 info->oti_dir_dirent = oie->oie_dirent;
3181 rc = osd_scan_dir(env, dev, inode, osd_scan_ml_file);
3182 info->oti_dir_dirent = NULL;
3189 static int osd_scan_ml_file_seq(const struct lu_env *env,
3190 struct osd_device *dev, struct inode *dir,
3191 struct osd_it_ea *oie)
3193 struct osd_thread_info *info = osd_oti_get(env);
3194 struct inode *inode;
3195 struct osd_inode_id id;
3200 osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
3201 inode = osd_iget(info, dev, &id, 0);
3203 RETURN(PTR_ERR(inode));
3205 if (!S_ISDIR(inode->i_mode))
3208 info->oti_seq_dirent = oie->oie_dirent;
3209 rc = osd_scan_dir(env, dev, inode, osd_scan_ml_file_dir);
3210 info->oti_seq_dirent = NULL;
3217 static int osd_scan_ml_file_main(const struct lu_env *env,
3218 struct osd_device *dev)
3220 return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
3221 osd_scan_ml_file_seq);
3224 #define LASTID "LAST_ID"
3226 static int osd_update_lastid(struct osd_device *dev, struct inode *inode,
3236 th = osd_journal_start_sb(osd_sb(dev), LDISKFS_HT_MISC,
3237 osd_dto_credits_noquota[DTO_WRITE_BLOCK]);
3239 RETURN(PTR_ERR(th));
3241 lastid = cpu_to_le64(lastid_known);
3242 rc = osd_ldiskfs_write(dev, inode, &lastid, sizeof(lastid), 0, &offset,
3244 mark_inode_dirty(inode);
3245 ldiskfs_journal_stop(th);
3249 static int osd_create_lastid(const struct lu_env *env, struct osd_device *dev,
3250 struct inode *dir, __u64 lastid_known)
3253 struct osd_thread_info *info = osd_oti_get(env);
3254 struct dentry *d_lastid;
3255 struct inode *i_lastid;
3257 int credits = LDISKFS_DATA_TRANS_BLOCKS(dir->i_sb) +
3258 LDISKFS_INDEX_EXTRA_TRANS_BLOCKS + 3 +
3259 osd_dto_credits_noquota[DTO_WRITE_BLOCK];
3264 sb_start_write(dir->i_sb);
3265 th = osd_journal_start_sb(dir->i_sb, LDISKFS_HT_MISC, credits);
3267 RETURN(PTR_ERR(th));
3269 i_lastid = ldiskfs_create_inode(th, dir, (S_IFREG | 0644), NULL);
3270 if (IS_ERR(i_lastid))
3271 GOTO(out_stop, rc = PTR_ERR(i_lastid));
3273 unlock_new_inode(i_lastid);
3275 d_lastid = osd_child_dentry_by_inode(env, dir, LASTID, strlen(LASTID));
3276 rc = osd_ldiskfs_add_entry(info, dev, th, d_lastid, i_lastid, NULL);
3280 rc = osd_ldiskfs_write(dev, i_lastid, &lastid_known,
3281 sizeof(lastid_known), 0, &offset, th);
3284 mark_inode_dirty(i_lastid);
3286 ldiskfs_journal_stop(th);
3288 sb_end_write(dir->i_sb);
3292 if (!IS_ERR_OR_NULL(th))
3293 ldiskfs_journal_stop(th);
3294 sb_end_write(dir->i_sb);
3297 if (!IS_ERR_OR_NULL(i_lastid))
3302 static int osd_scan_lastid_dir(const struct lu_env *env, struct osd_device *dev,
3303 struct inode *dir, struct osd_it_ea *oie)
3305 struct osd_thread_info *info = osd_oti_get(env);
3306 struct inode *inode;
3307 struct osd_inode_id id;
3312 osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
3313 inode = osd_iget(info, dev, &id, 0);
3315 RETURN(PTR_ERR(inode));
3317 if (S_ISDIR(inode->i_mode))
3320 if (strlen(LASTID) != oie->oie_dirent->oied_namelen ||
3321 strncmp(oie->oie_dirent->oied_name, LASTID,
3322 oie->oie_dirent->oied_namelen) != 0) {
3323 CDEBUG(D_LFSCK, "%s: the file O/%s/%s is unexpected\n",
3324 osd_name(dev), info->oti_seq_dirent->oied_name,
3325 oie->oie_dirent->oied_name);
3329 info->oti_lastid_inode = inode;
3337 static int osd_scan_lastid_seq(const struct lu_env *env, struct osd_device *dev,
3338 struct inode *dir, struct osd_it_ea *oie)
3340 struct osd_thread_info *info = osd_oti_get(env);
3341 struct lustre_ost_attrs *lma = &info->oti_ost_attrs;
3342 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
3343 struct inode *inode;
3344 struct osd_inode_id id;
3354 osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
3355 inode = osd_iget(info, dev, &id, 0);
3357 RETURN(PTR_ERR(inode));
3359 if (!S_ISDIR(inode->i_mode))
3362 rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq);
3367 rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq);
3372 if (!fid_seq_is_local_storage(seq))
3375 info->oti_lastid_inode = NULL;
3376 info->oti_seq_dirent = oie->oie_dirent;
3377 rc = osd_scan_dir(env, dev, inode, osd_scan_lastid_dir);
3378 info->oti_seq_dirent = NULL;
3383 if (scrub->os_file.sf_param & SP_DRYRUN)
3386 for (index = 0; index < scrub->os_ls_count; index++)
3387 if (scrub->os_ls_fids[index].f_seq == seq)
3390 if (unlikely(index >= scrub->os_ls_count)) {
3392 "%s: can't find seq %llu, it's modified during scrub?\n",
3393 osd_name(dev), seq);
3397 lastid_known = scrub->os_ls_fids[index].f_oid;
3398 if (!info->oti_lastid_inode) {
3399 rc = osd_create_lastid(env, dev, dir, lastid_known);
3403 rc = osd_get_lma(info, info->oti_lastid_inode, &info->oti_obj_dentry,
3405 if (rc && rc != -ENODATA) {
3406 CDEBUG(D_LFSCK, "%s: failed to get the xattr %s for O/%s/%s\n",
3407 osd_name(dev), XATTR_NAME_LMA,
3408 oie->oie_dirent->oied_name, LASTID);
3412 if (rc != 0 || lma->loa_lma.lma_self_fid.f_seq != seq ||
3413 lma->loa_lma.lma_self_fid.f_oid != 0 ||
3414 lma->loa_lma.lma_self_fid.f_ver != 0) {
3415 lma->loa_lma.lma_self_fid.f_seq = seq;
3416 lma->loa_lma.lma_self_fid.f_oid = 0;
3417 lma->loa_lma.lma_self_fid.f_ver = 0;
3419 rc = __osd_xattr_set(info, info->oti_lastid_inode,
3420 XATTR_NAME_LMA, lma, sizeof(*lma),
3422 XATTR_CREATE : XATTR_REPLACE);
3427 spin_lock(&info->oti_lastid_inode->i_lock);
3428 if (i_size_read(info->oti_lastid_inode) < sizeof(lastid)) {
3429 spin_unlock(&info->oti_lastid_inode->i_lock);
3432 spin_unlock(&info->oti_lastid_inode->i_lock);
3434 rc = osd_ldiskfs_read(info->oti_lastid_inode, &lastid,
3435 sizeof(lastid), &offset);
3439 if (rc < sizeof(lastid))
3442 lastid = le64_to_cpu(lastid);
3445 if (lastid < lastid_known)
3446 rc = osd_update_lastid(dev, info->oti_lastid_inode,
3450 if (info->oti_lastid_inode) {
3451 iput(info->oti_lastid_inode);
3452 info->oti_lastid_inode = NULL;
3459 static int osd_scan_last_id_main(const struct lu_env *env,
3460 struct osd_device *dev)
3462 return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,
3463 osd_scan_lastid_seq);
3466 static int osd_scan_O_seq(const struct lu_env *env, struct osd_device *dev,
3467 struct inode *dir, struct osd_it_ea *oie)
3469 struct osd_thread_info *info = osd_oti_get(env);
3470 struct lustre_scrub *scrub = &dev->od_scrub.os_scrub;
3471 struct inode *inode;
3472 struct osd_inode_id id;
3473 struct lu_fid *fids;
3479 osd_id_gen(&id, oie->oie_dirent->oied_ino, OSD_OII_NOGEN);
3480 inode = osd_iget(info, dev, &id, 0);
3482 RETURN(PTR_ERR(inode));
3484 if (!S_ISDIR(inode->i_mode))
3487 rc = kstrtoull(oie->oie_dirent->oied_name, 16, &seq);
3492 rc = kstrtoull(oie->oie_dirent->oied_name, 10, &seq);
3497 if (!fid_seq_is_local_storage(seq))
3500 scrub->os_ls_count++;
3501 if (unlikely(scrub->os_ls_count > scrub->os_ls_size)) {
3503 sizeof(struct lu_fid) * (scrub->os_ls_size + 4));
3507 memcpy(fids, scrub->os_ls_fids,
3508 sizeof(struct lu_fid) * scrub->os_ls_size);
3509 OBD_FREE(scrub->os_ls_fids,
3510 sizeof(struct lu_fid) * scrub->os_ls_size);
3512 scrub->os_ls_size += 4;
3513 scrub->os_ls_fids = fids;
3516 scrub->os_ls_fids[scrub->os_ls_count - 1].f_seq = seq;
3523 static int osd_scan_O_main(const struct lu_env *env, struct osd_device *dev)
3525 return osd_scan_dir(env, dev, dev->od_ost_map->om_root->d_inode,