4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2017, Intel Corporation.
26 * lustre/include/lustre_scrub.h
28 * Shared definitions and declarations for Lustre OI scrub.
30 * Author: Fan Yong <fan.yong@intel.com>
33 #ifndef _LUSTRE_SCRUB_H
34 # define _LUSTRE_SCRUB_H
36 #include <libcfs/linux/linux-uuid.h>
37 #include <dt_object.h>
38 #include <lustre_net.h>
40 #define OSD_OI_FID_OID_BITS_MAX 10
41 #define OSD_OI_FID_NR_MAX (1UL << OSD_OI_FID_OID_BITS_MAX)
42 #define SCRUB_OI_BITMAP_SIZE (OSD_OI_FID_NR_MAX >> 3)
43 #define PFID_STRIPE_IDX_BITS 16
44 #define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
46 #define SCRUB_MAGIC_V1 0x4C5FD252
47 #define SCRUB_CHECKPOINT_INTERVAL 60
48 #define SCRUB_WINDOW_SIZE 1024
50 enum scrub_next_status {
51 /* exit current loop and process next group */
54 /* skip current object and process next bit */
55 SCRUB_NEXT_CONTINUE = 2,
57 /* exit all the loops */
60 /* wait for free cache slot */
63 /* simulate system crash during OI scrub */
66 /* simulate failure during OI scrub */
69 /* new created object, no scrub on it */
70 SCRUB_NEXT_NOSCRUB = 7,
72 /* the object has no FID-in-LMA */
76 SCRUB_NEXT_OSTOBJ = 9,
78 /* old OST-object, no LMA or no FID-on-OST flags in LMA */
79 SCRUB_NEXT_OSTOBJ_OLD = 10,
82 enum scrub_local_file_flags {
83 SLFF_SCAN_SUBITEMS = 0x0001,
84 SLFF_HIDE_FID = 0x0002,
85 SLFF_SHOW_NAME = 0x0004,
87 SLFF_IDX_IN_FID = 0x0010,
91 /* The scrub file is new created, for new MDT, upgrading from old disk,
92 * or re-creating the scrub file manually. */
95 /* The scrub is checking/repairing the OI files. */
98 /* The scrub checked/repaired the OI files successfully. */
101 /* The scrub failed to check/repair the OI files. */
104 /* The scrub is stopped manually, the OI files may be inconsistent. */
107 /* The scrub is paused automatically when umount. */
110 /* The scrub crashed during the scanning, should be restarted. */
115 /* OI files have been recreated, OI mappings should be re-inserted. */
116 SF_RECREATED = 0x0000000000000001ULL,
118 /* OI files are invalid, should be rebuild ASAP */
119 SF_INCONSISTENT = 0x0000000000000002ULL,
121 /* OI scrub is triggered automatically. */
122 SF_AUTO = 0x0000000000000004ULL,
124 /* The device is upgraded from 1.8 format. */
125 SF_UPGRADE = 0x0000000000000008ULL,
129 /* Exit when fail. */
132 /* Check only without repairing. */
137 /* Set failout flag. */
138 SS_SET_FAILOUT = 0x00000001,
140 /* Clear failout flag. */
141 SS_CLEAR_FAILOUT = 0x00000002,
143 /* Reset scrub start position. */
144 SS_RESET = 0x00000004,
146 /* Trigger full scrub automatically. */
147 SS_AUTO_FULL = 0x00000008,
149 /* Trigger partial scrub automatically. */
150 SS_AUTO_PARTIAL = 0x00000010,
152 /* Set dryrun flag. */
153 SS_SET_DRYRUN = 0x00000020,
155 /* Clear dryrun flag. */
156 SS_CLEAR_DRYRUN = 0x00000040,
160 OLF_SCAN_SUBITEMS = 0x0001,
161 OLF_HIDE_FID = 0x0002,
162 OLF_SHOW_NAME = 0x0004,
164 OLF_IDX_IN_FID = 0x0010,
165 OLF_NOT_BACKUP = 0x0020,
168 /* There are some overhead to detect OI inconsistency automatically
169 * during normal RPC handling. We do not want to always auto detect
170 * OI inconsistency especailly when OI scrub just done recently.
172 * The 'auto_scrub' defines the time (united as second) interval to
173 * enable auto detect OI inconsistency since last OI scurb done. */
175 /* Disable auto scrub. */
178 /* 1 second is too short interval, it is almost equal to always auto
179 * detect inconsistent OI, usually used for test. */
182 /* Enable auto detect OI inconsistency one month (60 * 60 * 24 * 30)
183 * after last OI scrub. */
184 AS_DEFAULT = 2592000LL,
188 /* 128-bit uuid for volume. */
191 /* See 'enum scrub_flags'. */
194 /* The scrub magic. */
197 /* See 'enum scrub_status'. */
200 /* See 'enum scrub_param'. */
203 /* The time for the last OI scrub completed. */
204 time64_t sf_time_last_complete;
206 /* The ttime for the latest OI scrub ran. */
207 time64_t sf_time_latest_start;
209 /* The time for the last OI scrub checkpoint. */
210 time64_t sf_time_last_checkpoint;
212 /* The position for the latest OI scrub started from. */
213 __u64 sf_pos_latest_start;
215 /* The position for the last OI scrub checkpoint. */
216 __u64 sf_pos_last_checkpoint;
218 /* The position for the first should be updated object. */
219 __u64 sf_pos_first_inconsistent;
221 /* How many objects have been checked. */
222 __u64 sf_items_checked;
224 /* How many objects have been updated. */
225 __u64 sf_items_updated;
227 /* How many objects failed to be processed. */
228 __u64 sf_items_failed;
230 /* How many prior objects have been updated during scanning. */
231 __u64 sf_items_updated_prior;
233 /* How many objects marked as LDISKFS_STATE_LUSTRE_NOSCRUB. */
234 __u64 sf_items_noscrub;
236 /* How many IGIF objects. */
239 /* How long the OI scrub has run in seconds. Do NOT change
240 * to time64_t since this breaks backwards compatibility.
241 * It shouldn't take more than 136 years to complete :-)
245 /* How many completed OI scrub ran on the device. */
246 __u32 sf_success_count;
248 /* How many OI files. */
251 /* Keep the flags after scrub reset. See 'enum scrub_internal_flags' */
252 __u16 sf_internal_flags;
255 __u64 sf_reserved_2[16];
257 /* Bitmap for OI files recreated case. */
258 __u8 sf_oi_bitmap[SCRUB_OI_BITMAP_SIZE];
261 struct lustre_scrub {
262 /* Object for the scrub file. */
263 struct dt_object *os_obj;
265 struct task_struct *os_task;
266 struct list_head os_inconsistent_items;
267 /* once inconsistent mapping can't be fixed, put into this list */
268 struct list_head os_stale_items;
270 /* write lock for scrub prep/update/post/checkpoint,
271 * read lock for scrub dump. */
272 struct rw_semaphore os_rwsem;
275 /* Scrub file in memory. */
276 struct scrub_file os_file;
278 /* Buffer for scrub file load/store. */
279 struct scrub_file os_file_disk;
283 /* The time for last checkpoint, seconds */
284 time64_t os_time_last_checkpoint;
286 /* The time for next checkpoint, seconds */
287 time64_t os_time_next_checkpoint;
289 /* How long to wait to start scrubbing */
290 time64_t os_auto_scrub_interval;
292 /* How many objects have been checked since last checkpoint. */
293 __u64 os_new_checked;
294 __u64 os_pos_current;
295 __u32 os_start_flags;
296 /* Some of these bits can be set by different threads so
297 * all updates must be protected by ->os_lock to avoid
298 * racing read-modify-write cycles causing corruption.
300 unsigned int os_in_prior:1, /* process inconsistent item
301 * found by RPC prior */
302 os_waiting:1, /* Waiting for scan window. */
303 os_full_speed:1, /* run w/o speed limit */
304 os_paused:1, /* The scrub is paused. */
308 os_running:1, /* scrub thread is running */
313 #define INDEX_BACKUP_MAGIC_V1 0x1E41F208
314 #define INDEX_BACKUP_BUFSIZE (4096 * 4)
316 enum lustre_index_backup_policy {
317 /* By default, do not backup the index */
320 /* Backup the dirty index objects when umount */
324 struct lustre_index_backup_header {
329 struct lu_fid libh_owner;
330 __u64 libh_pad[60]; /* keep header 512 bytes aligned */
333 struct lustre_index_backup_unit {
334 struct list_head libu_link;
335 struct lu_fid libu_fid;
340 struct lustre_index_restore_unit {
341 struct list_head liru_link;
342 struct lu_fid liru_pfid;
343 struct lu_fid liru_cfid;
349 void scrub_file_init(struct lustre_scrub *scrub, uuid_t uuid);
350 void scrub_file_reset(struct lustre_scrub *scrub, uuid_t uuid, u64 flags);
351 int scrub_file_load(const struct lu_env *env, struct lustre_scrub *scrub);
352 int scrub_file_store(const struct lu_env *env, struct lustre_scrub *scrub);
353 bool scrub_needs_check(struct lustre_scrub *scrub, const struct lu_fid *fid,
355 int scrub_checkpoint(const struct lu_env *env, struct lustre_scrub *scrub);
356 int scrub_thread_prep(const struct lu_env *env, struct lustre_scrub *scrub,
357 uuid_t uuid, u64 start);
358 int scrub_thread_post(const struct lu_env *env, struct lustre_scrub *scrub,
360 int scrub_start(int (*threadfn)(void *data), struct lustre_scrub *scrub,
361 void *data, __u32 flags);
362 void scrub_stop(struct lustre_scrub *scrub);
363 void scrub_dump(struct seq_file *m, struct lustre_scrub *scrub);
365 int lustre_liru_new(struct list_head *head, const struct lu_fid *pfid,
366 const struct lu_fid *cfid, __u64 child,
367 const char *name, int namelen);
369 int lustre_index_register(struct dt_device *dev, const char *devname,
370 struct list_head *head, spinlock_t *lock, int *guard,
371 const struct lu_fid *fid,
372 __u32 keysize, __u32 recsize);
374 void lustre_index_backup(const struct lu_env *env, struct dt_device *dev,
375 const char *devname, struct list_head *head,
376 spinlock_t *lock, int *guard, bool backup);
377 int lustre_index_restore(const struct lu_env *env, struct dt_device *dev,
378 const struct lu_fid *parent_fid,
379 const struct lu_fid *tgt_fid,
380 const struct lu_fid *bak_fid, const char *name,
381 struct list_head *head, spinlock_t *lock,
382 char *buf, int bufsize);
384 static inline void lustre_fid2lbx(char *buf, const struct lu_fid *fid, int len)
386 snprintf(buf, len, DFID_NOBRACE".lbx", PFID(fid));
389 static inline const char *osd_scrub2name(struct lustre_scrub *scrub)
391 return scrub->os_name;
393 #endif /* _LUSTRE_SCRUB_H */