Whamcloud - gitweb
2c0f369aaeee4c8b17223d48edb6f9048da78784
[fs/lustre-release.git] / lustre / utils / lustre_lfsck.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, 2016, Intel Corporation.
24  */
25 /*
26  * lustre/utils/lustre_lfsck.c
27  *
28  * Lustre user-space tools for LFSCK.
29  *
30  * Author: Fan Yong <yong.fan@whamcloud.com>
31  */
32
33 #include <stdio.h>
34 #include <unistd.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <getopt.h>
39 #include <sys/ioctl.h>
40 #include <time.h>
41
42 #include "obdctl.h"
43
44 #include <lustre/lustre_lfsck_user.h>
45 #include <lnet/lnetctl.h>
46 #include <lustre_ioctl.h>
47 /* Needs to be last to avoid clashes */
48 #include <libcfs/util/ioctl.h>
49 #include <libcfs/util/param.h>
50
51 static struct option long_opt_start[] = {
52         {"device",              required_argument, 0, 'M'},
53         {"all",                 no_argument,       0, 'A'},
54         {"create_ostobj",       optional_argument, 0, 'c'},
55         {"create-ostobj",       optional_argument, 0, 'c'},
56         {"create_mdtobj",       optional_argument, 0, 'C'},
57         {"create-mdtobj",       optional_argument, 0, 'C'},
58         {"delay_create_ostobj", optional_argument, 0, 'd'},
59         {"delay-create-ostobj", optional_argument, 0, 'd'},
60         {"error",               required_argument, 0, 'e'},
61         {"help",                no_argument,       0, 'h'},
62         {"dryrun",              optional_argument, 0, 'n'},
63         {"orphan",              no_argument,       0, 'o'},
64         {"reset",               no_argument,       0, 'r'},
65         {"speed",               required_argument, 0, 's'},
66         {"type",                required_argument, 0, 't'},
67         {"window_size",         required_argument, 0, 'w'},
68         {"window-size",         required_argument, 0, 'w'},
69         {0,                     0,                 0,  0 }
70 };
71
72 static struct option long_opt_stop[] = {
73         {"device",      required_argument, 0, 'M'},
74         {"all",         no_argument,       0, 'A'},
75         {"help",        no_argument,       0, 'h'},
76         {0,             0,                 0,  0 }
77 };
78
79 static struct option long_opt_query[] = {
80         {"device",      required_argument, 0, 'M'},
81         {"type",        required_argument, 0, 't'},
82         {"help",        no_argument,       0, 'h'},
83         {"wait",        no_argument,       0, 'w'},
84         {0,             0,                 0,  0 }
85 };
86
87 struct lfsck_type_name {
88         char            *ltn_name;
89         enum lfsck_type  ltn_type;
90 };
91
92 static struct lfsck_type_name lfsck_types_names[] = {
93         { "scrub",      LFSCK_TYPE_SCRUB },
94         { "layout",     LFSCK_TYPE_LAYOUT },
95         { "namespace",  LFSCK_TYPE_NAMESPACE },
96         { "default",    LFSCK_TYPES_DEF },
97         { "all",        LFSCK_TYPES_SUPPORTED },
98         { NULL,         0 }
99 };
100
101 static enum lfsck_type lfsck_name2type(const char *name)
102 {
103         int i;
104
105         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
106                 if (strcmp(lfsck_types_names[i].ltn_name, name) == 0)
107                         return lfsck_types_names[i].ltn_type;
108         }
109         return -1;
110 }
111
112 static const char *lfsck_type2name(__u16 type)
113 {
114         int i;
115
116         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
117                 if (type == lfsck_types_names[i].ltn_type)
118                         return lfsck_types_names[i].ltn_name;
119         }
120
121         return NULL;
122 }
123
124 static void usage_start(void)
125 {
126         fprintf(stderr, "start LFSCK\n"
127                 "usage:\n"
128                 "lfsck_start [-M | --device {MDT,OST}_device]\n"
129                 "            [-A | --all] [-c | --create_ostobj [on | off]]\n"
130                 "            [-C | --create_mdtobj [on | off]]\n"
131                 "            [-d | --delay_create_ostobj [on | off]]\n"
132                 "            [-e | --error {continue | abort}] [-h | --help]\n"
133                 "            [-n | --dryrun [on | off]] [-o | --orphan]\n"
134                 "            [-r | --reset] [-s | --speed ops_per_sec_limit]\n"
135                 "            [-t | --type check_type[,check_type...]]\n"
136                 "            [-w | --window_size size]\n"
137                 "options:\n"
138                 "-M: device to start LFSCK/scrub on\n"
139                 "-A: start LFSCK on all nodes via the specified MDT device "
140                     "(see \"-M\" option) by single LFSCK command\n"
141                 "-c: create the lost OST-object for dangling LOV EA "
142                     "(default 'off', or 'on')\n"
143                 "-C: create the lost MDT-object for dangling name entry "
144                     "(default 'off', or 'on')\n"
145                 "-d: delay create the lost OST-object for dangling LOV EA "
146                     "until orphan OST-objects handled (default 'off', or 'on')\n"
147                 "-e: error handle mode (default 'continue', or 'abort')\n"
148                 "-h: this help message\n"
149                 "-n: check with no modification (default 'off', or 'on')\n"
150                 "-o: repair orphan OST-objects\n"
151                 "-r: reset scanning to the start of the device\n"
152                 "-s: maximum items to be scanned per second "
153                     "(default '%d' = no limit)\n"
154                 "-t: check type(s) to be performed (default all)\n"
155                 "-w: window size for async requests pipeline\n",
156                 LFSCK_SPEED_NO_LIMIT);
157 }
158
159 static void usage_stop(void)
160 {
161         fprintf(stderr, "stop LFSCK\n"
162                 "usage:\n"
163                 "lfsck_stop [-M | --device {MDT,OST}_device]\n"
164                 "           [-A | --all] [-h | --help]\n"
165                 "options:\n"
166                 "-M: device to stop LFSCK/scrub on\n"
167                 "-A: stop LFSCK on all nodes via the specified MDT device "
168                     "(see \"-M\" option) by single LFSCK command\n"
169                 "-h: this help message\n");
170 }
171
172 static void usage_query(void)
173 {
174         fprintf(stderr, "check the LFSCK global status\n"
175                 "usage:\n"
176                 "lfsck_query [-M | --device MDT_device] [-h | --help]\n"
177                 "            [-t | --type check_type[,check_type...]]\n"
178                 "            [-t | --wait]\n"
179                 "options:\n"
180                 "-M: device to query LFSCK on\n"
181                 "-t: LFSCK type(s) to be queried (default is all)\n"
182                 "-h: this help message\n"
183                 "-w: do not return until LFSCK not running\n");
184 }
185
186 static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg)
187 {
188         int len = strlen(arg) + 1;
189
190         if (len > MAX_OBD_NAME) {
191                 fprintf(stderr, "device name is too long. "
192                         "Valid length should be less than %d\n", MAX_OBD_NAME);
193                 return -EINVAL;
194         }
195
196         memcpy(device, arg, len);
197         data->ioc_inlbuf4 = device;
198         data->ioc_inllen4 = len;
199         data->ioc_dev = OBD_DEV_BY_DEVNAME;
200         return 0;
201 }
202
203 static int lfsck_get_dev_name(struct obd_ioctl_data *data, char *device,
204                               int types, bool multipe_devices)
205 {
206         glob_t param = { 0 };
207         char *ptr;
208         int rc;
209         int i;
210
211         rc = cfs_get_param_paths(&param, "mdd/*-MDT*");
212         if (rc) {
213                 if (multipe_devices || errno != ENOENT ||
214                     types & LFSCK_TYPE_NAMESPACE) {
215                         fprintf(stderr, "Fail to get device name: rc = %d\n."
216                                 "You can specify the device explicitly "
217                                 "via '-M' option.\n", rc);
218                         return rc;
219                 }
220
221                 rc = cfs_get_param_paths(&param, "obdfilter/*-OST*");
222                 if (rc) {
223                         fprintf(stderr, "Fail to get device name: rc = %d\n."
224                                 "You can specify the device explicitly "
225                                 "via '-M' option.\n", rc);
226                         return rc;
227                 }
228         }
229
230         if (param.gl_pathc == 1)
231                 goto pack;
232
233         if (!multipe_devices) {
234                 fprintf(stderr,
235                         "Detect multiple devices on current node. "
236                         "Please specify the device explicitly "
237                         "via '-M' option or '-A' option for all.\n");
238                 rc = -EINVAL;
239                 goto out;
240         }
241
242         ptr = strrchr(param.gl_pathv[0], '-');
243         LASSERT(ptr != NULL);
244
245         for (i = 1; i < param.gl_pathc; i++) {
246                 char *ptr2 = strrchr(param.gl_pathv[i], '-');
247
248                 LASSERT(ptr2 != NULL);
249
250                 if ((ptr - param.gl_pathv[0]) != (ptr2 - param.gl_pathv[i]) ||
251                     strncmp(param.gl_pathv[0], param.gl_pathv[i],
252                             (ptr - param.gl_pathv[0])) != 0) {
253                         fprintf(stderr,
254                                 "Detect multiple filesystems on current node. "
255                                 "Please specify the device explicitly "
256                                 "via '-M' option.\n");
257                         rc = -EINVAL;
258                         goto out;
259                 }
260         }
261
262 pack:
263         rc = lfsck_pack_dev(data, device, basename(param.gl_pathv[0]));
264
265 out:
266         cfs_free_param_data(&param);
267
268         return rc;
269 }
270
271 int jt_lfsck_start(int argc, char **argv)
272 {
273         struct obd_ioctl_data data;
274         char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
275         char device[MAX_OBD_NAME];
276         struct lfsck_start start;
277         char *optstring = "Ac::C::d::e:hM:n::ors:t:w:";
278         int opt, index, rc, val, i;
279
280         memset(&data, 0, sizeof(data));
281         memset(&start, 0, sizeof(start));
282         memset(device, 0, MAX_OBD_NAME);
283         start.ls_version = LFSCK_VERSION_V1;
284         start.ls_active = LFSCK_TYPES_ALL;
285
286         /* Reset the 'optind' for the case of getopt_long() called multiple
287          * times under the same lctl. */
288         optind = 0;
289         while ((opt = getopt_long(argc, argv, optstring, long_opt_start,
290                                   &index)) != EOF) {
291                 switch (opt) {
292                 case 'A':
293                         start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
294                         break;
295                 case 'c':
296                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
297                                 start.ls_flags |= LPF_CREATE_OSTOBJ;
298                         } else if (strcmp(optarg, "off") != 0) {
299                                 fprintf(stderr, "invalid switch: -c '%s'. "
300                                         "valid switches are:\n"
301                                         "empty ('on'), or 'off' without space. "
302                                         "For example:\n"
303                                         "'-c', '-con', '-coff'\n", optarg);
304                                 return -EINVAL;
305                         }
306                         start.ls_valid |= LSV_CREATE_OSTOBJ;
307                         break;
308                 case 'C':
309                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
310                                 start.ls_flags |= LPF_CREATE_MDTOBJ;
311                         } else if (strcmp(optarg, "off") != 0) {
312                                 fprintf(stderr, "invalid switch: -C '%s'. "
313                                         "valid switches are:\n"
314                                         "empty ('on'), or 'off' without space. "
315                                         "For example:\n"
316                                         "'-C', '-Con', '-Coff'\n", optarg);
317                                 return -EINVAL;
318                         }
319                         start.ls_valid |= LSV_CREATE_MDTOBJ;
320                         break;
321                 case 'd':
322                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
323                                 start.ls_flags |= LPF_DELAY_CREATE_OSTOBJ;
324                         } else if (strcmp(optarg, "off") != 0) {
325                                 fprintf(stderr, "invalid switch: -c '%s'. "
326                                         "valid switches are:\n"
327                                         "empty ('on'), or 'off' without space. "
328                                         "For example:\n"
329                                         "'-c', '-con', '-coff'\n", optarg);
330                                 return -EINVAL;
331                         }
332                         start.ls_valid |= LSV_DELAY_CREATE_OSTOBJ;
333                         break;
334                 case 'e':
335                         if (strcmp(optarg, "abort") == 0) {
336                                 start.ls_flags |= LPF_FAILOUT;
337                         } else if (strcmp(optarg, "continue") != 0) {
338                                 fprintf(stderr, "invalid error mode: -e '%s'."
339                                         "valid modes are: "
340                                         "'continue' or 'abort'.\n", optarg);
341                                 return -EINVAL;
342                         }
343                         start.ls_valid |= LSV_ERROR_HANDLE;
344                         break;
345                 case 'h':
346                         usage_start();
347                         return 0;
348                 case 'M':
349                         rc = lfsck_pack_dev(&data, device, optarg);
350                         if (rc != 0)
351                                 return rc;
352                         break;
353                 case 'n':
354                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
355                                 start.ls_flags |= LPF_DRYRUN;
356                         } else if (strcmp(optarg, "off") != 0) {
357                                 fprintf(stderr, "invalid switch: -n '%s'. "
358                                         "valid switches are:\n"
359                                         "empty ('on'), or 'off' without space. "
360                                         "For example:\n"
361                                         "'-n', '-non', '-noff'\n", optarg);
362                                 return -EINVAL;
363                         }
364                         start.ls_valid |= LSV_DRYRUN;
365                         break;
366                 case 'o':
367                         start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST |
368                                           LPF_OST_ORPHAN;
369                         break;
370                 case 'r':
371                         start.ls_flags |= LPF_RESET;
372                         break;
373                 case 's':
374                         val = atoi(optarg);
375                         start.ls_speed_limit = val;
376                         start.ls_valid |= LSV_SPEED_LIMIT;
377                         break;
378                 case 't': {
379                         char *typename;
380
381                         if (start.ls_active == LFSCK_TYPES_ALL)
382                                 start.ls_active = 0;
383                         while ((typename = strsep(&optarg, ",")) != NULL) {
384                                 enum lfsck_type type;
385
386                                 type = lfsck_name2type(typename);
387                                 if (type == -1)
388                                         goto bad_type;
389                                 start.ls_active |= type;
390                         }
391                         break;
392 bad_type:
393                         fprintf(stderr, "invalid check type -t '%s'. "
394                                 "valid types are:\n", typename);
395                         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++)
396                                 fprintf(stderr, "%s%s", i != 0 ? "," : "",
397                                         lfsck_types_names[i].ltn_name);
398                         fprintf(stderr, "\n");
399                         return -EINVAL;
400                 }
401                 case 'w':
402                         val = atoi(optarg);
403                         if (val < 1 || val > LFSCK_ASYNC_WIN_MAX) {
404                                 fprintf(stderr,
405                                         "Invalid async window size that "
406                                         "may cause memory issues. The valid "
407                                         "range is [1 - %u].\n",
408                                         LFSCK_ASYNC_WIN_MAX);
409                                 return -EINVAL;
410                         }
411
412                         start.ls_async_windows = val;
413                         start.ls_valid |= LSV_ASYNC_WINDOWS;
414                         break;
415                 default:
416                         fprintf(stderr, "Invalid option, '-h' for help.\n");
417                         return -EINVAL;
418                 }
419         }
420
421         if (start.ls_active == LFSCK_TYPES_ALL)
422                 start.ls_active = LFSCK_TYPES_DEF;
423
424         if (data.ioc_inlbuf4 == NULL) {
425                 rc = lfsck_get_dev_name(&data, device, start.ls_active,
426                                         start.ls_flags & LPF_ALL_TGT);
427                 if (rc != 0)
428                         return rc;
429         }
430
431         data.ioc_inlbuf1 = (char *)&start;
432         data.ioc_inllen1 = sizeof(start);
433         memset(buf, 0, sizeof(rawbuf));
434         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
435         if (rc != 0) {
436                 fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
437                 return rc;
438         }
439
440         rc = l_ioctl(OBD_DEV_ID, OBD_IOC_START_LFSCK, buf);
441         if (rc < 0) {
442                 perror("Fail to start LFSCK");
443                 return rc;
444         }
445
446         obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
447         printf("Started LFSCK on the device %s: scrub", device);
448         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
449                 if (start.ls_active & lfsck_types_names[i].ltn_type) {
450                         printf(" %s", lfsck_types_names[i].ltn_name);
451                         start.ls_active &= ~lfsck_types_names[i].ltn_type;
452                 }
453         }
454         if (start.ls_active != 0)
455                 printf(" unknown(0x%x)", start.ls_active);
456         printf("\n");
457
458         return 0;
459 }
460
461 int jt_lfsck_stop(int argc, char **argv)
462 {
463         struct obd_ioctl_data data;
464         char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
465         char device[MAX_OBD_NAME];
466         struct lfsck_stop stop;
467         char *optstring = "AhM:";
468         int opt, index, rc;
469
470         memset(&data, 0, sizeof(data));
471         memset(&stop, 0, sizeof(stop));
472         memset(device, 0, MAX_OBD_NAME);
473
474         /* Reset the 'optind' for the case of getopt_long() called multiple
475          * times under the same lctl. */
476         optind = 0;
477         while ((opt = getopt_long(argc, argv, optstring, long_opt_stop,
478                                   &index)) != EOF) {
479                 switch (opt) {
480                 case 'A':
481                         stop.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
482                         break;
483                 case 'h':
484                         usage_stop();
485                         return 0;
486                 case 'M':
487                         rc = lfsck_pack_dev(&data, device, optarg);
488                         if (rc != 0)
489                                 return rc;
490                         break;
491                 default:
492                         fprintf(stderr, "Invalid option, '-h' for help.\n");
493                         return -EINVAL;
494                 }
495         }
496
497         if (data.ioc_inlbuf4 == NULL) {
498                 rc = lfsck_get_dev_name(&data, device, 0,
499                                         stop.ls_flags & LPF_ALL_TGT);
500                 if (rc != 0)
501                         return rc;
502         }
503
504         data.ioc_inlbuf1 = (char *)&stop;
505         data.ioc_inllen1 = sizeof(stop);
506         memset(buf, 0, sizeof(rawbuf));
507         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
508         if (rc != 0) {
509                 fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
510                 return rc;
511         }
512
513         rc = l_ioctl(OBD_DEV_ID, OBD_IOC_STOP_LFSCK, buf);
514         if (rc < 0) {
515                 perror("Fail to stop LFSCK");
516                 return rc;
517         }
518
519         printf("Stopped LFSCK on the device %s.\n", device);
520         return 0;
521 }
522
523 int jt_lfsck_query(int argc, char **argv)
524 {
525         struct obd_ioctl_data data = { 0 };
526         char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
527         char device[MAX_OBD_NAME] = "";
528         struct lfsck_query query = { .lu_types = LFSCK_TYPES_ALL };
529         int opt, index, rc, i;
530         enum lfsck_type type;
531
532         while ((opt = getopt_long(argc, argv, "hM:t:w", long_opt_query,
533                                   &index)) != EOF) {
534                 switch (opt) {
535                 case 'h':
536                         usage_query();
537                         return 0;
538                 case 'M':
539                         rc = lfsck_pack_dev(&data, device, optarg);
540                         if (rc != 0)
541                                 return rc;
542                         break;
543                 case 't': {
544                         char *typename;
545
546                         if (query.lu_types == LFSCK_TYPES_ALL)
547                                 query.lu_types = 0;
548                         while ((typename = strsep(&optarg, ",")) != NULL) {
549                                 type = lfsck_name2type(typename);
550                                 if (type == -1)
551                                         goto bad_type;
552                                 query.lu_types |= type;
553                         }
554                         break;
555
556 bad_type:
557                         fprintf(stderr, "invalid LFSCK type -t '%s'. "
558                                 "valid types are:\n", typename);
559                         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++)
560                                 fprintf(stderr, "%s%s", i != 0 ? "," : "",
561                                         lfsck_types_names[i].ltn_name);
562                         fprintf(stderr, "\n");
563                         return -EINVAL;
564                 }
565                 case 'w':
566                         query.lu_flags |= LPF_WAIT;
567                         break;
568                 default:
569                         fprintf(stderr, "Invalid option, '-h' for help.\n");
570                         usage_query();
571                         return -EINVAL;
572                 }
573         }
574
575         if (data.ioc_inlbuf4 == NULL) {
576                 rc = lfsck_get_dev_name(&data, device, 0, true);
577                 if (rc != 0)
578                         return rc;
579         }
580
581         data.ioc_inlbuf1 = (char *)&query;
582         data.ioc_inllen1 = sizeof(query);
583         memset(buf, 0, sizeof(rawbuf));
584         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
585         if (rc != 0) {
586                 fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
587                 return rc;
588         }
589
590         rc = l_ioctl(OBD_DEV_ID, OBD_IOC_QUERY_LFSCK, buf);
591         if (rc < 0) {
592                 perror("Fail to query LFSCK");
593                 return rc;
594         }
595
596         obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
597         for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS; i++, type = 1 << i) {
598                 const char *name;
599                 int j;
600
601                 if (!(query.lu_types & type))
602                         continue;
603
604                 name = lfsck_type2name(type);
605                 for (j = 0; j <= LS_MAX; j++)
606                         printf("%s_mdts_%s: %d\n", name,
607                                lfsck_status2name(j), query.lu_mdts_count[i][j]);
608
609                 for (j = 0; j <= LS_MAX; j++)
610                         printf("%s_osts_%s: %d\n", name,
611                                lfsck_status2name(j), query.lu_osts_count[i][j]);
612
613                 printf("%s_repaired: %llu\n", name, query.lu_repaired[i]);
614         }
615
616         return 0;
617 }