4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012 Whamcloud, Inc.
26 * lustre/osd-ldiskfs/osd_scrub.c
28 * Top-level entry points into osd module
30 * The OI scrub is used for rebuilding Object Index files when restores MDT from
33 * The otable based iterator scans ldiskfs inode table to feed up layer LFSCK.
35 * Author: Fan Yong <yong.fan@whamcloud.com>
39 # define EXPORT_SYMTAB
41 #define DEBUG_SUBSYSTEM S_MDS
43 #include <lustre/lustre_idl.h>
44 #include <lustre_disk.h>
45 #include <dt_object.h>
47 #include "osd_internal.h"
49 #include "osd_scrub.h"
51 #define HALF_SEC (CFS_HZ >> 1)
53 static inline struct osd_device *osd_scrub2dev(struct osd_scrub *scrub)
55 return container_of0(scrub, struct osd_device, od_scrub);
58 static inline struct super_block *osd_scrub2sb(struct osd_scrub *scrub)
60 return osd_sb(osd_scrub2dev(scrub));
63 static void osd_scrub_file_to_cpu(struct scrub_file *des,
64 struct scrub_file *src)
66 memcpy(des->sf_uuid, src->sf_uuid, 16);
67 des->sf_flags = le64_to_cpu(src->sf_flags);
68 des->sf_magic = le32_to_cpu(src->sf_magic);
69 des->sf_status = le16_to_cpu(src->sf_status);
70 des->sf_param = le16_to_cpu(src->sf_param);
71 des->sf_time_last_complete =
72 le64_to_cpu(src->sf_time_last_complete);
73 des->sf_time_latest_start =
74 le64_to_cpu(src->sf_time_latest_start);
75 des->sf_time_last_checkpoint =
76 le64_to_cpu(src->sf_time_last_checkpoint);
77 des->sf_pos_latest_start =
78 le64_to_cpu(src->sf_pos_latest_start);
79 des->sf_pos_last_checkpoint =
80 le64_to_cpu(src->sf_pos_last_checkpoint);
81 des->sf_pos_first_inconsistent =
82 le64_to_cpu(src->sf_pos_first_inconsistent);
83 des->sf_items_checked =
84 le64_to_cpu(src->sf_items_checked);
85 des->sf_items_updated =
86 le64_to_cpu(src->sf_items_updated);
87 des->sf_items_failed =
88 le64_to_cpu(src->sf_items_failed);
89 des->sf_items_updated_prior =
90 le64_to_cpu(src->sf_items_updated_prior);
91 des->sf_run_time = le32_to_cpu(src->sf_run_time);
92 des->sf_success_count = le32_to_cpu(src->sf_success_count);
93 des->sf_oi_count = le16_to_cpu(src->sf_oi_count);
94 memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE);
97 static void osd_scrub_file_to_le(struct scrub_file *des,
98 struct scrub_file *src)
100 memcpy(des->sf_uuid, src->sf_uuid, 16);
101 des->sf_flags = cpu_to_le64(src->sf_flags);
102 des->sf_magic = cpu_to_le32(src->sf_magic);
103 des->sf_status = cpu_to_le16(src->sf_status);
104 des->sf_param = cpu_to_le16(src->sf_param);
105 des->sf_time_last_complete =
106 cpu_to_le64(src->sf_time_last_complete);
107 des->sf_time_latest_start =
108 cpu_to_le64(src->sf_time_latest_start);
109 des->sf_time_last_checkpoint =
110 cpu_to_le64(src->sf_time_last_checkpoint);
111 des->sf_pos_latest_start =
112 cpu_to_le64(src->sf_pos_latest_start);
113 des->sf_pos_last_checkpoint =
114 cpu_to_le64(src->sf_pos_last_checkpoint);
115 des->sf_pos_first_inconsistent =
116 cpu_to_le64(src->sf_pos_first_inconsistent);
117 des->sf_items_checked =
118 cpu_to_le64(src->sf_items_checked);
119 des->sf_items_updated =
120 cpu_to_le64(src->sf_items_updated);
121 des->sf_items_failed =
122 cpu_to_le64(src->sf_items_failed);
123 des->sf_items_updated_prior =
124 cpu_to_le64(src->sf_items_updated_prior);
125 des->sf_run_time = cpu_to_le32(src->sf_run_time);
126 des->sf_success_count = cpu_to_le32(src->sf_success_count);
127 des->sf_oi_count = cpu_to_le16(src->sf_oi_count);
128 memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE);
131 static void osd_scrub_file_init(struct osd_scrub *scrub, __u8 *uuid)
133 struct scrub_file *sf = &scrub->os_file;
135 memset(sf, 0, sizeof(*sf));
136 memcpy(sf->sf_uuid, uuid, 16);
137 sf->sf_magic = SCRUB_MAGIC_V1;
138 sf->sf_status = SS_INIT;
141 void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags)
143 struct scrub_file *sf = &scrub->os_file;
145 CDEBUG(D_LFSCK, "Reset OI scrub file, flags = "LPX64"\n", flags);
146 memcpy(sf->sf_uuid, uuid, 16);
147 sf->sf_status = SS_INIT;
148 sf->sf_flags |= flags;
151 sf->sf_time_latest_start = 0;
152 sf->sf_time_last_checkpoint = 0;
153 sf->sf_pos_latest_start = 0;
154 sf->sf_pos_last_checkpoint = 0;
155 sf->sf_pos_first_inconsistent = 0;
156 sf->sf_items_checked = 0;
157 sf->sf_items_updated = 0;
158 sf->sf_items_failed = 0;
159 sf->sf_items_updated_prior = 0;
162 static int osd_scrub_file_load(struct osd_scrub *scrub)
165 char *name = LDISKFS_SB(osd_scrub2sb(scrub))->s_es->s_volume_name;
166 int len = sizeof(scrub->os_file_disk);
169 rc = osd_ldiskfs_read(scrub->os_inode, &scrub->os_file_disk, len, &pos);
171 struct scrub_file *sf = &scrub->os_file;
173 osd_scrub_file_to_cpu(sf, &scrub->os_file_disk);
174 if (sf->sf_magic != SCRUB_MAGIC_V1) {
175 CWARN("%.16s: invalid scrub magic 0x%x != 0x%x\n,",
176 name, sf->sf_magic, SCRUB_MAGIC_V1);
177 /* Process it as new scrub file. */
182 } else if (rc != 0) {
183 CERROR("%.16s: fail to load scrub file, expected = %d, "
184 "rc = %d\n", name, len, rc);
188 /* return -ENOENT for empty scrub file case. */
195 int osd_scrub_file_store(struct osd_scrub *scrub)
197 struct osd_device *dev;
200 int len = sizeof(scrub->os_file_disk);
204 dev = container_of0(scrub, struct osd_device, od_scrub);
205 credits = osd_dto_credits_noquota[DTO_WRITE_BASE] +
206 osd_dto_credits_noquota[DTO_WRITE_BLOCK];
207 jh = ldiskfs_journal_start_sb(osd_sb(dev), credits);
210 CERROR("%.16s: fail to start trans for scrub store, rc = %d\n",
211 LDISKFS_SB(osd_scrub2sb(scrub))->s_es->s_volume_name,rc);
215 osd_scrub_file_to_le(&scrub->os_file_disk, &scrub->os_file);
216 rc = osd_ldiskfs_write_record(scrub->os_inode, &scrub->os_file_disk,
218 ldiskfs_journal_stop(jh);
220 CERROR("%.16s: fail to store scrub file, expected = %d, "
222 LDISKFS_SB(osd_scrub2sb(scrub))->s_es->s_volume_name,
224 scrub->os_time_last_checkpoint = cfs_time_current();
225 scrub->os_time_next_checkpoint = scrub->os_time_last_checkpoint +
226 cfs_time_seconds(SCRUB_CHECKPOINT_INTERVAL);
230 static int osd_scrub_prep(struct osd_device *dev)
232 struct osd_scrub *scrub = &dev->od_scrub;
233 struct ptlrpc_thread *thread = &scrub->os_thread;
234 struct scrub_file *sf = &scrub->os_file;
235 __u32 flags = scrub->os_start_flags;
239 cfs_down_write(&scrub->os_rwsem);
240 if (flags & SS_SET_FAILOUT)
241 sf->sf_param |= SP_FAILOUT;
243 if (flags & SS_CLEAR_FAILOUT)
244 sf->sf_param &= ~SP_FAILOUT;
246 if (flags & SS_RESET)
247 osd_scrub_file_reset(scrub,
248 LDISKFS_SB(osd_sb(dev))->s_es->s_uuid, sf->sf_flags);
250 if (flags & SS_AUTO) {
251 scrub->os_full_speed = 1;
252 sf->sf_flags |= SF_AUTO;
254 scrub->os_full_speed = 0;
257 if (sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT))
258 scrub->os_full_speed = 1;
260 scrub->os_in_prior = 0;
261 scrub->os_waiting = 0;
262 scrub->os_new_checked = 0;
263 if (sf->sf_pos_last_checkpoint != 0)
264 sf->sf_pos_latest_start = sf->sf_pos_last_checkpoint + 1;
266 sf->sf_pos_latest_start = LDISKFS_FIRST_INO(osd_sb(dev));
268 scrub->os_pos_current = sf->sf_pos_latest_start;
269 sf->sf_status = SS_SCANNING;
270 sf->sf_time_latest_start = cfs_time_current_sec();
271 sf->sf_time_last_checkpoint = sf->sf_time_latest_start;
272 rc = osd_scrub_file_store(scrub);
274 cfs_spin_lock(&scrub->os_lock);
275 thread_set_flags(thread, SVC_RUNNING);
276 cfs_spin_unlock(&scrub->os_lock);
277 cfs_waitq_broadcast(&thread->t_ctl_waitq);
279 cfs_up_write(&scrub->os_rwsem);
284 static int osd_scrub_error_handler(struct osd_device *dev,
285 struct osd_inode_id *lid, int rc)
287 struct osd_scrub *scrub = &dev->od_scrub;
288 struct scrub_file *sf = &scrub->os_file;
290 cfs_down_write(&scrub->os_rwsem);
291 scrub->os_new_checked++;
292 sf->sf_items_failed++;
293 if (sf->sf_pos_first_inconsistent == 0 ||
294 sf->sf_pos_first_inconsistent > lid->oii_ino)
295 sf->sf_pos_first_inconsistent = lid->oii_ino;
296 cfs_up_write(&scrub->os_rwsem);
297 return sf->sf_param & SP_FAILOUT ? rc : 0;
301 osd_scrub_check_update(struct osd_thread_info *info, struct osd_device *dev,
302 struct osd_idmap_cache *oic)
304 struct osd_scrub *scrub = &dev->od_scrub;
305 struct scrub_file *sf = &scrub->os_file;
306 struct osd_inode_id *lid2 = &info->oti_id;
307 struct lu_fid *oi_fid = &info->oti_fid;
308 struct osd_inode_id *oi_id = &info->oti_id;
310 struct osd_inconsistent_item *oii = NULL;
311 struct inode *inode = NULL;
312 struct lu_fid *fid = &oic->oic_fid;
313 struct osd_inode_id *lid = &oic->oic_lid;
314 struct iam_container *bag;
315 struct iam_path_descr *ipd;
316 int ops = DTO_INDEX_UPDATE;
321 if (scrub->os_in_prior)
322 oii = cfs_list_entry(oic, struct osd_inconsistent_item,
325 cfs_down_write(&scrub->os_rwsem);
326 scrub->os_new_checked++;
327 if (lid->oii_ino < sf->sf_pos_latest_start && oii == NULL)
330 if (oii != NULL && oii->oii_insert)
333 rc = osd_oi_lookup(info, dev, fid, lid2);
339 inode = osd_iget(info, dev, lid);
342 /* Someone removed the inode. */
343 if (rc == -ENOENT || rc == -ESTALE)
348 /* Prevent the inode to be unlinked during OI scrub. */
349 cfs_mutex_lock(&inode->i_mutex);
350 if (unlikely(inode->i_nlink == 0)) {
351 cfs_mutex_unlock(&inode->i_mutex);
356 ops = DTO_INDEX_INSERT;
357 idx = osd_oi_fid2idx(dev, fid);
358 if (unlikely(!ldiskfs_test_bit(idx, sf->sf_oi_bitmap)))
359 ldiskfs_set_bit(idx, sf->sf_oi_bitmap);
360 sf->sf_flags |= SF_RECREATED;
361 } else if (osd_id_eq(lid, lid2)) {
365 sf->sf_flags |= SF_INCONSISTENT;
366 fid_cpu_to_be(oi_fid, fid);
367 osd_id_pack(oi_id, &oic->oic_lid);
368 jh = ldiskfs_journal_start_sb(osd_sb(dev),
369 osd_dto_credits_noquota[ops]);
372 CERROR("%.16s: fail to start trans for scrub store, rc = %d\n",
373 LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc);
377 bag = &osd_fid2oi(dev, fid)->oi_dir.od_container;
378 ipd = osd_idx_ipd_get(info->oti_env, bag);
379 if (unlikely(ipd == NULL)) {
380 ldiskfs_journal_stop(jh);
381 CERROR("%.16s: fail to get ipd for scrub store\n",
382 LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name);
383 GOTO(out, rc = -ENOMEM);
386 if (ops == DTO_INDEX_UPDATE)
387 rc = iam_update(jh, bag, (const struct iam_key *)oi_fid,
388 (struct iam_rec *)oi_id, ipd);
390 rc = iam_insert(jh, bag, (const struct iam_key *)oi_fid,
391 (struct iam_rec *)oi_id, ipd);
392 osd_ipd_put(info->oti_env, bag, ipd);
393 ldiskfs_journal_stop(jh);
395 if (scrub->os_in_prior)
396 sf->sf_items_updated_prior++;
398 sf->sf_items_updated++;
405 sf->sf_items_failed++;
406 if (sf->sf_pos_first_inconsistent == 0 ||
407 sf->sf_pos_first_inconsistent > lid->oii_ino)
408 sf->sf_pos_first_inconsistent = lid->oii_ino;
411 if (ops == DTO_INDEX_INSERT) {
412 cfs_mutex_unlock(&inode->i_mutex);
415 cfs_up_write(&scrub->os_rwsem);
418 LASSERT(!cfs_list_empty(&oii->oii_list));
420 cfs_spin_lock(&scrub->os_lock);
421 cfs_list_del_init(&oii->oii_list);
422 cfs_spin_unlock(&scrub->os_lock);
425 RETURN(sf->sf_param & SP_FAILOUT ? rc : 0);
428 static int do_osd_scrub_checkpoint(struct osd_scrub *scrub)
430 struct scrub_file *sf = &scrub->os_file;
434 cfs_down_write(&scrub->os_rwsem);
435 sf->sf_items_checked += scrub->os_new_checked;
436 scrub->os_new_checked = 0;
437 sf->sf_pos_last_checkpoint = scrub->os_pos_current;
438 sf->sf_time_last_checkpoint = cfs_time_current_sec();
439 sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC -
440 scrub->os_time_last_checkpoint);
441 rc = osd_scrub_file_store(scrub);
442 cfs_up_write(&scrub->os_rwsem);
447 static inline int osd_scrub_checkpoint(struct osd_scrub *scrub)
449 if (unlikely(cfs_time_beforeq(scrub->os_time_next_checkpoint,
450 cfs_time_current()) &&
451 scrub->os_new_checked > 0))
452 return do_osd_scrub_checkpoint(scrub);
456 static void osd_scrub_post(struct osd_scrub *scrub, int result)
458 struct scrub_file *sf = &scrub->os_file;
461 cfs_down_write(&scrub->os_rwsem);
462 cfs_spin_lock(&scrub->os_lock);
463 thread_set_flags(&scrub->os_thread, SVC_STOPPING);
464 cfs_spin_unlock(&scrub->os_lock);
465 if (scrub->os_new_checked > 0) {
466 sf->sf_items_checked += scrub->os_new_checked;
467 scrub->os_new_checked = 0;
468 sf->sf_pos_last_checkpoint = scrub->os_pos_current;
470 sf->sf_time_last_checkpoint = cfs_time_current_sec();
472 sf->sf_status = SS_COMPLETED;
473 memset(sf->sf_oi_bitmap, 0, SCRUB_OI_BITMAP_SIZE);
474 sf->sf_flags &= ~(SF_RECREATED | SF_INCONSISTENT | SF_AUTO);
475 sf->sf_time_last_complete = sf->sf_time_last_checkpoint;
476 sf->sf_success_count++;
477 } else if (result == 0) {
478 sf->sf_status = SS_PAUSED;
480 sf->sf_status = SS_FAILED;
482 sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC -
483 scrub->os_time_last_checkpoint);
484 result = osd_scrub_file_store(scrub);
486 CERROR("%.16s: fail to osd_scrub_post, rc = %d\n",
487 LDISKFS_SB(osd_scrub2sb(scrub))->s_es->s_volume_name,
489 cfs_up_write(&scrub->os_rwsem);
494 #define SCRUB_NEXT_BREAK 1
495 #define SCRUB_NEXT_CONTINUE 2
498 osd_scrub_next(struct osd_thread_info *info, struct osd_device *dev,
499 struct osd_scrub *scrub, struct super_block *sb,
500 ldiskfs_group_t bg, struct buffer_head *bitmap, __u32 gbase,
501 __u32 *offset, struct osd_idmap_cache **oic)
503 struct osd_inconsistent_item *oii;
505 struct osd_inode_id *lid;
509 if (!cfs_list_empty(&scrub->os_inconsistent_items)) {
510 oii = cfs_list_entry(scrub->os_inconsistent_items.next,
511 struct osd_inconsistent_item, oii_list);
512 *oic = &oii->oii_cache;
513 scrub->os_in_prior = 1;
517 *oic = &scrub->os_oic;
518 fid = &(*oic)->oic_fid;
519 lid = &(*oic)->oic_lid;
520 *offset = ldiskfs_find_next_bit(bitmap->b_data,
521 LDISKFS_INODES_PER_GROUP(sb), *offset);
522 if (*offset >= LDISKFS_INODES_PER_GROUP(sb)) {
524 scrub->os_pos_current = 1 + (bg + 1) *
525 LDISKFS_INODES_PER_GROUP(sb);
526 return SCRUB_NEXT_BREAK;
529 scrub->os_pos_current = gbase + *offset;
530 osd_id_gen(lid, scrub->os_pos_current, OSD_OII_NOGEN);
531 inode = osd_iget_fid(info, dev, lid, fid);
534 /* The inode may be removed after bitmap searching, or the
535 * file is new created without inode initialized yet. */
536 if (rc == -ENOENT || rc == -ESTALE)
537 rc = SCRUB_NEXT_CONTINUE;
539 CERROR("%.16s: fail to read inode, group = %u, "
540 "ino# = %u, rc = %d\n",
541 LDISKFS_SB(sb)->s_es->s_volume_name,
542 bg, scrub->os_pos_current, rc);
544 if (fid_is_igif(fid) || fid_is_idif(fid) ||
545 fid_seq(fid) == FID_SEQ_LLOG ||
546 fid_seq(fid) == FID_SEQ_LOCAL_FILE ||
547 fid_seq_is_rsvd(fid_seq(fid)) ||
548 inode->i_state & I_LUSTRE_NOSCRUB)
549 rc = SCRUB_NEXT_CONTINUE;
555 static inline int osd_scrub_has_window(struct osd_scrub *scrub,
556 struct osd_otable_cache *ooc)
558 return scrub->os_pos_current < ooc->ooc_pos_preload + SCRUB_WINDOW_SIZE;
561 static int osd_scrub_main(void *args)
564 struct osd_thread_info *info;
565 struct osd_device *dev = (struct osd_device *)args;
566 struct osd_scrub *scrub = &dev->od_scrub;
567 struct ptlrpc_thread *thread = &scrub->os_thread;
568 cfs_list_t *list = &scrub->os_inconsistent_items;
569 struct l_wait_info lwi = { 0 };
570 struct super_block *sb = osd_sb(dev);
571 struct osd_otable_it *it = NULL;
572 struct osd_otable_cache *ooc = NULL;
578 cfs_daemonize("OI_scrub");
579 rc = lu_env_init(&env, LCT_DT_THREAD);
581 CERROR("%.16s: OI scrub, fail to init env, rc = %d\n",
582 LDISKFS_SB(sb)->s_es->s_volume_name, rc);
586 info = osd_oti_get(&env);
587 rc = osd_scrub_prep(dev);
589 CERROR("%.16s: OI scrub, fail to scrub prep, rc = %d\n",
590 LDISKFS_SB(sb)->s_es->s_volume_name, rc);
594 if (!scrub->os_full_speed) {
595 LASSERT(dev->od_otable_it != NULL);
597 it = dev->od_otable_it;
598 ooc = &it->ooi_cache;
599 l_wait_event(thread->t_ctl_waitq,
600 it->ooi_user_ready || !thread_is_running(thread),
602 if (unlikely(!thread_is_running(thread)))
605 LASSERT(scrub->os_pos_current >= ooc->ooc_pos_preload);
606 scrub->os_pos_current = ooc->ooc_pos_preload;
609 CDEBUG(D_LFSCK, "OI scrub: flags = 0x%x, pos = %u\n",
610 scrub->os_start_flags, scrub->os_pos_current);
612 max = le32_to_cpu(LDISKFS_SB(sb)->s_es->s_inodes_count);
613 while (scrub->os_pos_current <= max) {
614 struct buffer_head *bitmap = NULL;
615 struct osd_idmap_cache *oic = NULL;
616 ldiskfs_group_t bg = (scrub->os_pos_current - 1) /
617 LDISKFS_INODES_PER_GROUP(sb);
618 __u32 offset = (scrub->os_pos_current - 1) %
619 LDISKFS_INODES_PER_GROUP(sb);
620 __u32 gbase = 1 + bg * LDISKFS_INODES_PER_GROUP(sb);
622 bitmap = ldiskfs_read_inode_bitmap(sb, bg);
623 if (bitmap == NULL) {
624 CERROR("%.16s: fail to read bitmap at pos = %u, "
625 "bg = %u, scrub will stop\n",
626 LDISKFS_SB(sb)->s_es->s_volume_name,
627 scrub->os_pos_current, (__u32)bg);
628 GOTO(post, rc = -EIO);
631 while (offset < LDISKFS_INODES_PER_GROUP(sb)) {
632 if (unlikely(!thread_is_running(thread))) {
637 if (cfs_list_empty(list) && noslot != 0)
640 rc = osd_scrub_next(info, dev, scrub, sb, bg,
641 bitmap, gbase, &offset, &oic);
642 if (rc == SCRUB_NEXT_BREAK)
644 else if (rc == SCRUB_NEXT_CONTINUE)
648 rc = osd_scrub_error_handler(dev, &oic->oic_lid,
651 rc = osd_scrub_check_update(info, dev, oic);
657 rc = osd_scrub_checkpoint(scrub);
659 CERROR("%.16s: fail to checkpoint, pos = %u, "
661 LDISKFS_SB(sb)->s_es->s_volume_name,
662 scrub->os_pos_current, rc);
667 if (scrub->os_in_prior) {
668 scrub->os_in_prior = 0;
673 scrub->os_pos_current = gbase + ++offset;
674 if (dev->od_otable_it != NULL) {
675 if (unlikely(it == NULL)) {
676 it = dev->od_otable_it;
677 ooc = &it->ooi_cache;
680 if (it->ooi_waiting &&
681 (ooc->ooc_pos_preload <
682 scrub->os_pos_current)) {
685 &thread->t_ctl_waitq);
689 if (scrub->os_full_speed || rc == SCRUB_NEXT_CONTINUE)
693 if (osd_scrub_has_window(scrub, ooc)) {
698 scrub->os_waiting = 1;
699 l_wait_event(thread->t_ctl_waitq,
700 osd_scrub_has_window(scrub, ooc) ||
701 !cfs_list_empty(list) ||
702 !thread_is_running(thread),
704 scrub->os_waiting = 0;
706 if (osd_scrub_has_window(scrub, ooc))
713 GOTO(post, rc = (scrub->os_pos_current > max ? 1 : rc));
716 osd_scrub_post(scrub, rc);
717 CDEBUG(D_LFSCK, "OI scrub: stop, rc = %d, pos = %u\n",
718 rc, scrub->os_pos_current);
721 while (!cfs_list_empty(list)) {
722 struct osd_inconsistent_item *oii;
724 oii = cfs_list_entry(list->next,
725 struct osd_inconsistent_item, oii_list);
726 cfs_list_del_init(&oii->oii_list);
732 cfs_spin_lock(&scrub->os_lock);
733 thread_set_flags(thread, SVC_STOPPED);
734 cfs_waitq_broadcast(&thread->t_ctl_waitq);
735 cfs_spin_unlock(&scrub->os_lock);
739 static int do_osd_scrub_start(struct osd_device *dev, __u32 flags)
741 struct osd_scrub *scrub = &dev->od_scrub;
742 struct ptlrpc_thread *thread = &scrub->os_thread;
743 struct l_wait_info lwi = { 0 };
748 /* os_lock: sync status between stop and scrub thread */
749 cfs_spin_lock(&scrub->os_lock);
750 if (thread_is_running(thread)) {
751 cfs_spin_unlock(&scrub->os_lock);
753 } else if (unlikely(thread_is_stopping(thread))) {
754 cfs_spin_unlock(&scrub->os_lock);
755 l_wait_event(thread->t_ctl_waitq,
756 thread_is_stopped(thread),
760 cfs_spin_unlock(&scrub->os_lock);
762 scrub->os_start_flags = flags;
763 thread_set_flags(thread, 0);
764 rc = cfs_create_thread(osd_scrub_main, dev, 0);
766 CERROR("%.16s: cannot start iteration thread, rc = %d\n",
767 LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name, rc);
771 l_wait_event(thread->t_ctl_waitq,
772 thread_is_running(thread) || thread_is_stopped(thread),
778 int osd_scrub_start(struct osd_device *dev)
780 __u32 flags = SS_AUTO;
784 if (dev->od_scrub.os_file.sf_status == SS_COMPLETED)
787 /* od_otable_mutex: prevent curcurrent start/stop */
788 cfs_mutex_lock(&dev->od_otable_mutex);
789 rc = do_osd_scrub_start(dev, flags);
790 cfs_mutex_unlock(&dev->od_otable_mutex);
792 RETURN(rc == -EALREADY ? 0 : rc);
795 static void do_osd_scrub_stop(struct osd_scrub *scrub)
797 struct ptlrpc_thread *thread = &scrub->os_thread;
798 struct l_wait_info lwi = { 0 };
800 /* os_lock: sync status between stop and scrub thread */
801 cfs_spin_lock(&scrub->os_lock);
802 if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
803 thread_set_flags(thread, SVC_STOPPING);
804 cfs_spin_unlock(&scrub->os_lock);
805 cfs_waitq_broadcast(&thread->t_ctl_waitq);
806 l_wait_event(thread->t_ctl_waitq,
807 thread_is_stopped(thread),
809 /* Do not skip the last lock/unlock, which can guarantee that
810 * the caller cannot return until the OI scrub thread exit. */
811 cfs_spin_lock(&scrub->os_lock);
813 cfs_spin_unlock(&scrub->os_lock);
816 static void osd_scrub_stop(struct osd_device *dev)
818 /* od_otable_mutex: prevent curcurrent start/stop */
819 cfs_mutex_lock(&dev->od_otable_mutex);
820 do_osd_scrub_stop(&dev->od_scrub);
821 cfs_mutex_unlock(&dev->od_otable_mutex);
824 static const char osd_scrub_name[] = "OI_scrub";
826 int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev)
828 struct osd_thread_info *info = osd_oti_get(env);
829 struct osd_scrub *scrub = &dev->od_scrub;
830 struct lvfs_run_ctxt *ctxt = &scrub->os_ctxt;
831 struct scrub_file *sf = &scrub->os_file;
832 struct osd_inode_id *id = &scrub->os_oic.oic_lid;
833 struct super_block *sb = osd_sb(dev);
834 struct ldiskfs_super_block *es = LDISKFS_SB(sb)->s_es;
836 struct lvfs_run_ctxt saved;
843 OBD_SET_CTXT_MAGIC(ctxt);
844 ctxt->pwdmnt = dev->od_mnt;
845 ctxt->pwd = dev->od_mnt->mnt_root;
848 cfs_waitq_init(&scrub->os_thread.t_ctl_waitq);
849 cfs_init_rwsem(&scrub->os_rwsem);
850 cfs_spin_lock_init(&scrub->os_lock);
851 CFS_INIT_LIST_HEAD(&scrub->os_inconsistent_items);
852 if (get_mount_flags(dev->od_mount->lmi_sb) & LMD_FLG_NOSCRUB)
853 scrub->os_no_scrub = 1;
855 push_ctxt(&saved, ctxt, NULL);
856 filp = filp_open(osd_scrub_name, O_RDWR | O_CREAT, 0644);
858 RETURN(PTR_ERR(filp));
860 scrub->os_inode = igrab(filp->f_dentry->d_inode);
862 pop_ctxt(&saved, ctxt, NULL);
864 rc = osd_scrub_file_load(scrub);
866 osd_scrub_file_init(scrub, es->s_uuid);
869 } else if (rc != 0) {
872 if (memcmp(sf->sf_uuid, es->s_uuid, 16) != 0) {
873 osd_scrub_file_reset(scrub, es->s_uuid,SF_INCONSISTENT);
875 } else if (sf->sf_status == SS_SCANNING) {
876 sf->sf_status = SS_CRASHED;
881 if (sf->sf_pos_last_checkpoint != 0)
882 scrub->os_pos_current = sf->sf_pos_last_checkpoint + 1;
884 scrub->os_pos_current = LDISKFS_FIRST_INO(sb);
887 rc = osd_scrub_file_store(scrub);
892 /* Initialize OI files. */
893 rc = osd_oi_init(info, dev);
898 rc = __osd_oi_lookup(info, dev, &LU_DOT_LUSTRE_FID, id);
900 inode = osd_iget(info, dev, id);
903 /* It is restored from old 2.x backup. */
904 if (rc == -ENOENT || rc == -ESTALE) {
905 osd_scrub_file_reset(scrub, es->s_uuid,
907 rc = osd_scrub_file_store(scrub);
912 } else if (rc == -ENOENT) {
917 if (rc == 0 && !scrub->os_no_scrub &&
918 ((sf->sf_status == SS_CRASHED &&
919 sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT | SF_AUTO)) ||
920 (sf->sf_status == SS_INIT &&
921 sf->sf_flags & (SF_RECREATED | SF_INCONSISTENT))))
922 rc = osd_scrub_start(dev);
927 void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev)
929 struct osd_scrub *scrub = &dev->od_scrub;
931 LASSERT(dev->od_otable_it == NULL);
933 if (scrub->os_inode != NULL) {
935 iput(scrub->os_inode);
936 scrub->os_inode = NULL;
938 if (dev->od_oi_table != NULL)
939 osd_oi_fini(osd_oti_get(env), dev);