Whamcloud - gitweb
LU-9796 kernel: improve metadata performaces for RHEL7
[fs/lustre-release.git] / lustre / obdclass / scrub.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2017, Intel Corporation.
24  */
25 /*
26  * lustre/obdclass/scrub.c
27  *
28  * The OI scrub is used for checking and (re)building Object Index files
29  * that are usually backend special. Here are some general scrub related
30  * functions that can be shared by different backends for OI scrub.
31  *
32  * Author: Fan Yong <fan.yong@intel.com>
33  */
34
35 #define DEBUG_SUBSYSTEM S_LFSCK
36
37 #include <linux/kthread.h>
38 #include <lustre_scrub.h>
39 #include <lustre_lib.h>
40
41 static inline struct dt_device *scrub_obj2dev(struct dt_object *obj)
42 {
43         return container_of0(obj->do_lu.lo_dev, struct dt_device, dd_lu_dev);
44 }
45
46 static void scrub_file_to_cpu(struct scrub_file *des, struct scrub_file *src)
47 {
48         memcpy(des->sf_uuid, src->sf_uuid, 16);
49         des->sf_flags   = le64_to_cpu(src->sf_flags);
50         des->sf_magic   = le32_to_cpu(src->sf_magic);
51         des->sf_status  = le16_to_cpu(src->sf_status);
52         des->sf_param   = le16_to_cpu(src->sf_param);
53         des->sf_time_last_complete      =
54                                 le64_to_cpu(src->sf_time_last_complete);
55         des->sf_time_latest_start       =
56                                 le64_to_cpu(src->sf_time_latest_start);
57         des->sf_time_last_checkpoint    =
58                                 le64_to_cpu(src->sf_time_last_checkpoint);
59         des->sf_pos_latest_start        =
60                                 le64_to_cpu(src->sf_pos_latest_start);
61         des->sf_pos_last_checkpoint     =
62                                 le64_to_cpu(src->sf_pos_last_checkpoint);
63         des->sf_pos_first_inconsistent  =
64                                 le64_to_cpu(src->sf_pos_first_inconsistent);
65         des->sf_items_checked           =
66                                 le64_to_cpu(src->sf_items_checked);
67         des->sf_items_updated           =
68                                 le64_to_cpu(src->sf_items_updated);
69         des->sf_items_failed            =
70                                 le64_to_cpu(src->sf_items_failed);
71         des->sf_items_updated_prior     =
72                                 le64_to_cpu(src->sf_items_updated_prior);
73         des->sf_run_time        = le32_to_cpu(src->sf_run_time);
74         des->sf_success_count   = le32_to_cpu(src->sf_success_count);
75         des->sf_oi_count        = le16_to_cpu(src->sf_oi_count);
76         des->sf_internal_flags  = le16_to_cpu(src->sf_internal_flags);
77         memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE);
78 }
79
80 static void scrub_file_to_le(struct scrub_file *des, struct scrub_file *src)
81 {
82         memcpy(des->sf_uuid, src->sf_uuid, 16);
83         des->sf_flags   = cpu_to_le64(src->sf_flags);
84         des->sf_magic   = cpu_to_le32(src->sf_magic);
85         des->sf_status  = cpu_to_le16(src->sf_status);
86         des->sf_param   = cpu_to_le16(src->sf_param);
87         des->sf_time_last_complete      =
88                                 cpu_to_le64(src->sf_time_last_complete);
89         des->sf_time_latest_start       =
90                                 cpu_to_le64(src->sf_time_latest_start);
91         des->sf_time_last_checkpoint    =
92                                 cpu_to_le64(src->sf_time_last_checkpoint);
93         des->sf_pos_latest_start        =
94                                 cpu_to_le64(src->sf_pos_latest_start);
95         des->sf_pos_last_checkpoint     =
96                                 cpu_to_le64(src->sf_pos_last_checkpoint);
97         des->sf_pos_first_inconsistent  =
98                                 cpu_to_le64(src->sf_pos_first_inconsistent);
99         des->sf_items_checked           =
100                                 cpu_to_le64(src->sf_items_checked);
101         des->sf_items_updated           =
102                                 cpu_to_le64(src->sf_items_updated);
103         des->sf_items_failed            =
104                                 cpu_to_le64(src->sf_items_failed);
105         des->sf_items_updated_prior     =
106                                 cpu_to_le64(src->sf_items_updated_prior);
107         des->sf_run_time        = cpu_to_le32(src->sf_run_time);
108         des->sf_success_count   = cpu_to_le32(src->sf_success_count);
109         des->sf_oi_count        = cpu_to_le16(src->sf_oi_count);
110         des->sf_internal_flags  = cpu_to_le16(src->sf_internal_flags);
111         memcpy(des->sf_oi_bitmap, src->sf_oi_bitmap, SCRUB_OI_BITMAP_SIZE);
112 }
113
114 void scrub_file_init(struct lustre_scrub *scrub, __u8 *uuid)
115 {
116         struct scrub_file *sf = &scrub->os_file;
117
118         memset(sf, 0, sizeof(*sf));
119         memcpy(sf->sf_uuid, uuid, 16);
120         sf->sf_magic = SCRUB_MAGIC_V1;
121         sf->sf_status = SS_INIT;
122 }
123 EXPORT_SYMBOL(scrub_file_init);
124
125 void scrub_file_reset(struct lustre_scrub *scrub, __u8 *uuid, __u64 flags)
126 {
127         struct scrub_file *sf = &scrub->os_file;
128
129         CDEBUG(D_LFSCK, "%s: reset OI scrub file, old flags = "
130                "%#llx, add flags = %#llx\n",
131                scrub->os_name, sf->sf_flags, flags);
132
133         memcpy(sf->sf_uuid, uuid, 16);
134         sf->sf_status = SS_INIT;
135         sf->sf_flags |= flags;
136         sf->sf_flags &= ~SF_AUTO;
137         sf->sf_run_time = 0;
138         sf->sf_time_latest_start = 0;
139         sf->sf_time_last_checkpoint = 0;
140         sf->sf_pos_latest_start = 0;
141         sf->sf_pos_last_checkpoint = 0;
142         sf->sf_pos_first_inconsistent = 0;
143         sf->sf_items_checked = 0;
144         sf->sf_items_updated = 0;
145         sf->sf_items_failed = 0;
146         sf->sf_items_noscrub = 0;
147         sf->sf_items_igif = 0;
148         if (!scrub->os_in_join)
149                 sf->sf_items_updated_prior = 0;
150 }
151 EXPORT_SYMBOL(scrub_file_reset);
152
153 int scrub_file_load(const struct lu_env *env, struct lustre_scrub *scrub)
154 {
155         struct scrub_file *sf = &scrub->os_file;
156         struct lu_buf buf = {
157                 .lb_buf = &scrub->os_file_disk,
158                 .lb_len = sizeof(scrub->os_file_disk)
159         };
160         loff_t pos = 0;
161         int rc;
162
163         rc = dt_read(env, scrub->os_obj, &buf, &pos);
164         /* failure */
165         if (rc < 0) {
166                 CERROR("%s: fail to load scrub file: rc = %d\n",
167                        scrub->os_name, rc);
168                 return rc;
169         }
170
171         /* empty */
172         if (!rc)
173                 return -ENOENT;
174
175         /* corrupted */
176         if (rc < buf.lb_len) {
177                 CDEBUG(D_LFSCK, "%s: fail to load scrub file, "
178                        "expected = %d: rc = %d\n",
179                        scrub->os_name, (int)buf.lb_len, rc);
180                 return -EFAULT;
181         }
182
183         scrub_file_to_cpu(sf, &scrub->os_file_disk);
184         if (sf->sf_magic != SCRUB_MAGIC_V1) {
185                 CDEBUG(D_LFSCK, "%s: invalid scrub magic 0x%x != 0x%x\n",
186                        scrub->os_name, sf->sf_magic, SCRUB_MAGIC_V1);
187                 return -EFAULT;
188         }
189
190         return 0;
191 }
192 EXPORT_SYMBOL(scrub_file_load);
193
194 int scrub_file_store(const struct lu_env *env, struct lustre_scrub *scrub)
195 {
196         struct scrub_file *sf = &scrub->os_file_disk;
197         struct dt_object *obj = scrub->os_obj;
198         struct dt_device *dev = scrub_obj2dev(obj);
199         struct lu_buf buf = {
200                 .lb_buf = sf,
201                 .lb_len = sizeof(*sf)
202         };
203         struct thandle *th;
204         loff_t pos = 0;
205         int rc;
206         ENTRY;
207
208         /* Skip store under rdonly mode. */
209         if (dev->dd_rdonly)
210                 RETURN(0);
211
212         scrub_file_to_le(sf, &scrub->os_file);
213         th = dt_trans_create(env, dev);
214         if (IS_ERR(th))
215                 GOTO(log, rc = PTR_ERR(th));
216
217         rc = dt_declare_record_write(env, obj, &buf, pos, th);
218         if (rc)
219                 GOTO(stop, rc);
220
221         rc = dt_trans_start_local(env, dev, th);
222         if (rc)
223                 GOTO(stop, rc);
224
225         rc = dt_record_write(env, obj, &buf, &pos, th);
226
227         GOTO(stop, rc);
228
229 stop:
230         dt_trans_stop(env, dev, th);
231
232 log:
233         if (rc)
234                 CERROR("%s: store scrub file: rc = %d\n",
235                        scrub->os_name, rc);
236         else
237                 CDEBUG(D_LFSCK, "%s: store scrub file: rc = %d\n",
238                        scrub->os_name, rc);
239
240         scrub->os_time_last_checkpoint = cfs_time_current();
241         scrub->os_time_next_checkpoint = scrub->os_time_last_checkpoint +
242                                 cfs_time_seconds(SCRUB_CHECKPOINT_INTERVAL);
243         return rc;
244 }
245 EXPORT_SYMBOL(scrub_file_store);
246
247 int scrub_checkpoint(const struct lu_env *env, struct lustre_scrub *scrub)
248 {
249         struct scrub_file *sf = &scrub->os_file;
250         int rc;
251
252         if (likely(cfs_time_before(cfs_time_current(),
253                                    scrub->os_time_next_checkpoint) ||
254                    scrub->os_new_checked == 0))
255                 return 0;
256
257         CDEBUG(D_LFSCK, "%s: OI scrub checkpoint at pos %llu\n",
258                scrub->os_name, scrub->os_pos_current);
259
260         down_write(&scrub->os_rwsem);
261         sf->sf_items_checked += scrub->os_new_checked;
262         scrub->os_new_checked = 0;
263         sf->sf_pos_last_checkpoint = scrub->os_pos_current;
264         sf->sf_time_last_checkpoint = cfs_time_current_sec();
265         sf->sf_run_time += cfs_duration_sec(cfs_time_current() + HALF_SEC -
266                                             scrub->os_time_last_checkpoint);
267         rc = scrub_file_store(env, scrub);
268         up_write(&scrub->os_rwsem);
269
270         return rc;
271 }
272 EXPORT_SYMBOL(scrub_checkpoint);
273
274 int scrub_start(int (*threadfn)(void *data), struct lustre_scrub *scrub,
275                 void *data, __u32 flags)
276 {
277         struct ptlrpc_thread *thread = &scrub->os_thread;
278         struct l_wait_info lwi = { 0 };
279         struct task_struct *task;
280         int rc;
281         ENTRY;
282
283 again:
284         /* os_lock: sync status between stop and scrub thread */
285         spin_lock(&scrub->os_lock);
286         if (thread_is_running(thread)) {
287                 spin_unlock(&scrub->os_lock);
288                 RETURN(-EALREADY);
289         }
290
291         if (unlikely(thread_is_stopping(thread))) {
292                 spin_unlock(&scrub->os_lock);
293                 l_wait_event(thread->t_ctl_waitq,
294                              thread_is_stopped(thread),
295                              &lwi);
296                 goto again;
297         }
298         spin_unlock(&scrub->os_lock);
299
300         if (scrub->os_file.sf_status == SS_COMPLETED) {
301                 if (!(flags & SS_SET_FAILOUT))
302                         flags |= SS_CLEAR_FAILOUT;
303
304                 if (!(flags & SS_SET_DRYRUN))
305                         flags |= SS_CLEAR_DRYRUN;
306
307                 flags |= SS_RESET;
308         }
309
310         scrub->os_start_flags = flags;
311         thread_set_flags(thread, 0);
312         task = kthread_run(threadfn, data, "OI_scrub");
313         if (IS_ERR(task)) {
314                 rc = PTR_ERR(task);
315                 CERROR("%s: cannot start iteration thread: rc = %d\n",
316                        scrub->os_name, rc);
317                 RETURN(rc);
318         }
319
320         l_wait_event(thread->t_ctl_waitq,
321                      thread_is_running(thread) || thread_is_stopped(thread),
322                      &lwi);
323
324         RETURN(0);
325 }
326 EXPORT_SYMBOL(scrub_start);
327
328 void scrub_stop(struct lustre_scrub *scrub)
329 {
330         struct ptlrpc_thread *thread = &scrub->os_thread;
331         struct l_wait_info lwi = { 0 };
332
333         /* os_lock: sync status between stop and scrub thread */
334         spin_lock(&scrub->os_lock);
335         if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
336                 thread_set_flags(thread, SVC_STOPPING);
337                 spin_unlock(&scrub->os_lock);
338                 wake_up_all(&thread->t_ctl_waitq);
339                 l_wait_event(thread->t_ctl_waitq,
340                              thread_is_stopped(thread),
341                              &lwi);
342                 /* Do not skip the last lock/unlock, which can guarantee that
343                  * the caller cannot return until the OI scrub thread exit. */
344                 spin_lock(&scrub->os_lock);
345         }
346         spin_unlock(&scrub->os_lock);
347 }
348 EXPORT_SYMBOL(scrub_stop);
349
350 const char *scrub_status_names[] = {
351         "init",
352         "scanning",
353         "completed",
354         "failed",
355         "stopped",
356         "paused",
357         "crashed",
358         NULL
359 };
360
361 const char *scrub_flags_names[] = {
362         "recreated",
363         "inconsistent",
364         "auto",
365         "upgrade",
366         NULL
367 };
368
369 const char *scrub_param_names[] = {
370         "failout",
371         "dryrun",
372         NULL
373 };
374
375 static void scrub_bits_dump(struct seq_file *m, int bits, const char *names[],
376                             const char *prefix)
377 {
378         int flag;
379         int i;
380
381         seq_printf(m, "%s:%c", prefix, bits != 0 ? ' ' : '\n');
382
383         for (i = 0, flag = 1; bits != 0; i++, flag = 1 << i) {
384                 if (flag & bits) {
385                         bits &= ~flag;
386                         seq_printf(m, "%s%c", names[i],
387                                    bits != 0 ? ',' : '\n');
388                 }
389         }
390 }
391
392 static void scrub_time_dump(struct seq_file *m, __u64 time, const char *prefix)
393 {
394         if (time != 0)
395                 seq_printf(m, "%s: %llu seconds\n", prefix,
396                            cfs_time_current_sec() - time);
397         else
398                 seq_printf(m, "%s: N/A\n", prefix);
399 }
400
401 static void scrub_pos_dump(struct seq_file *m, __u64 pos, const char *prefix)
402 {
403         if (pos != 0)
404                 seq_printf(m, "%s: %llu\n", prefix, pos);
405         else
406                 seq_printf(m, "%s: N/A\n", prefix);
407 }
408
409 void scrub_dump(struct seq_file *m, struct lustre_scrub *scrub)
410 {
411         struct scrub_file *sf = &scrub->os_file;
412         __u64 checked;
413         __u64 speed;
414
415         down_read(&scrub->os_rwsem);
416         seq_printf(m, "name: OI_scrub\n"
417                    "magic: 0x%x\n"
418                    "oi_files: %d\n"
419                    "status: %s\n",
420                    sf->sf_magic, (int)sf->sf_oi_count,
421                    scrub_status_names[sf->sf_status]);
422
423         scrub_bits_dump(m, sf->sf_flags, scrub_flags_names, "flags");
424
425         scrub_bits_dump(m, sf->sf_param, scrub_param_names, "param");
426
427         scrub_time_dump(m, sf->sf_time_last_complete,
428                         "time_since_last_completed");
429
430         scrub_time_dump(m, sf->sf_time_latest_start,
431                         "time_since_latest_start");
432
433         scrub_time_dump(m, sf->sf_time_last_checkpoint,
434                         "time_since_last_checkpoint");
435
436         scrub_pos_dump(m, sf->sf_pos_latest_start,
437                         "latest_start_position");
438
439         scrub_pos_dump(m, sf->sf_pos_last_checkpoint,
440                         "last_checkpoint_position");
441
442         scrub_pos_dump(m, sf->sf_pos_first_inconsistent,
443                         "first_failure_position");
444
445         checked = sf->sf_items_checked + scrub->os_new_checked;
446         seq_printf(m, "checked: %llu\n"
447                    "%s: %llu\n"
448                    "failed: %llu\n"
449                    "prior_%s: %llu\n"
450                    "noscrub: %llu\n"
451                    "igif: %llu\n"
452                    "success_count: %u\n",
453                    checked,
454                    sf->sf_param & SP_DRYRUN ? "inconsistent" : "updated",
455                    sf->sf_items_updated, sf->sf_items_failed,
456                    sf->sf_param & SP_DRYRUN ? "inconsistent" : "updated",
457                    sf->sf_items_updated_prior, sf->sf_items_noscrub,
458                    sf->sf_items_igif, sf->sf_success_count);
459
460         speed = checked;
461         if (thread_is_running(&scrub->os_thread)) {
462                 cfs_duration_t duration = cfs_time_current() -
463                                           scrub->os_time_last_checkpoint;
464                 __u64 new_checked = msecs_to_jiffies(scrub->os_new_checked *
465                                                      MSEC_PER_SEC);
466                 __u32 rtime = sf->sf_run_time +
467                               cfs_duration_sec(duration + HALF_SEC);
468
469                 if (duration != 0)
470                         do_div(new_checked, duration);
471                 if (rtime != 0)
472                         do_div(speed, rtime);
473                 seq_printf(m, "run_time: %u seconds\n"
474                            "average_speed: %llu objects/sec\n"
475                            "real-time_speed: %llu objects/sec\n"
476                            "current_position: %llu\n"
477                            "scrub_in_prior: %s\n"
478                            "scrub_full_speed: %s\n"
479                            "partial_scan: %s\n",
480                            rtime, speed, new_checked, scrub->os_pos_current,
481                            scrub->os_in_prior ? "yes" : "no",
482                            scrub->os_full_speed ? "yes" : "no",
483                            scrub->os_partial_scan ? "yes" : "no");
484         } else {
485                 if (sf->sf_run_time != 0)
486                         do_div(speed, sf->sf_run_time);
487                 seq_printf(m, "run_time: %u seconds\n"
488                            "average_speed: %llu objects/sec\n"
489                            "real-time_speed: N/A\n"
490                            "current_position: N/A\n",
491                            sf->sf_run_time, speed);
492         }
493
494         up_read(&scrub->os_rwsem);
495 }
496 EXPORT_SYMBOL(scrub_dump);