Whamcloud - gitweb
94f0b2b12ffe035d6f390ff168885ab50c4b6302
[fs/lustre-release.git] / lustre / utils / lustre_lfsck.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2012, 2016, Intel Corporation.
24  */
25 /*
26  * lustre/utils/lustre_lfsck.c
27  *
28  * Lustre user-space tools for LFSCK.
29  *
30  * Author: Fan Yong <yong.fan@whamcloud.com>
31  */
32
33 #include <stdio.h>
34 #include <unistd.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <getopt.h>
39 #include <sys/ioctl.h>
40 #include <time.h>
41
42 #include "obdctl.h"
43 #include "lustreapi_internal.h"
44
45 #include <lustre/lustre_lfsck_user.h>
46 #include <lnet/lnetctl.h>
47 #include <linux/lustre_ioctl.h>
48 /* Needs to be last to avoid clashes */
49 #include <libcfs/util/ioctl.h>
50 #include <libcfs/util/param.h>
51
52 static struct option long_opt_start[] = {
53         {"device",              required_argument, 0, 'M'},
54         {"all",                 no_argument,       0, 'A'},
55         {"create_ostobj",       optional_argument, 0, 'c'},
56         {"create-ostobj",       optional_argument, 0, 'c'},
57         {"create_mdtobj",       optional_argument, 0, 'C'},
58         {"create-mdtobj",       optional_argument, 0, 'C'},
59         {"delay_create_ostobj", optional_argument, 0, 'd'},
60         {"delay-create-ostobj", optional_argument, 0, 'd'},
61         {"error",               required_argument, 0, 'e'},
62         {"help",                no_argument,       0, 'h'},
63         {"dryrun",              optional_argument, 0, 'n'},
64         {"orphan",              no_argument,       0, 'o'},
65         {"reset",               no_argument,       0, 'r'},
66         {"speed",               required_argument, 0, 's'},
67         {"type",                required_argument, 0, 't'},
68         {"window_size",         required_argument, 0, 'w'},
69         {"window-size",         required_argument, 0, 'w'},
70         {0,                     0,                 0,  0 }
71 };
72
73 static struct option long_opt_stop[] = {
74         {"device",      required_argument, 0, 'M'},
75         {"all",         no_argument,       0, 'A'},
76         {"help",        no_argument,       0, 'h'},
77         {0,             0,                 0,  0 }
78 };
79
80 static struct option long_opt_query[] = {
81         {"device",      required_argument, 0, 'M'},
82         {"type",        required_argument, 0, 't'},
83         {"help",        no_argument,       0, 'h'},
84         {"wait",        no_argument,       0, 'w'},
85         {0,             0,                 0,  0 }
86 };
87
88 struct lfsck_type_name {
89         char            *ltn_name;
90         enum lfsck_type  ltn_type;
91 };
92
93 static struct lfsck_type_name lfsck_types_names[] = {
94         { "scrub",      LFSCK_TYPE_SCRUB },
95         { "layout",     LFSCK_TYPE_LAYOUT },
96         { "namespace",  LFSCK_TYPE_NAMESPACE },
97         { "default",    LFSCK_TYPES_DEF },
98         { "all",        LFSCK_TYPES_SUPPORTED },
99         { NULL,         0 }
100 };
101
102 static enum lfsck_type lfsck_name2type(const char *name)
103 {
104         int i;
105
106         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
107                 if (strcmp(lfsck_types_names[i].ltn_name, name) == 0)
108                         return lfsck_types_names[i].ltn_type;
109         }
110         return -1;
111 }
112
113 static const char *lfsck_type2name(__u16 type)
114 {
115         int i;
116
117         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
118                 if (type == lfsck_types_names[i].ltn_type)
119                         return lfsck_types_names[i].ltn_name;
120         }
121
122         return NULL;
123 }
124
125 static void usage_start(void)
126 {
127         fprintf(stderr, "start LFSCK\n"
128                 "usage:\n"
129                 "lfsck_start [-M | --device {MDT,OST}_device]\n"
130                 "            [-A | --all] [-c | --create_ostobj [on | off]]\n"
131                 "            [-C | --create_mdtobj [on | off]]\n"
132                 "            [-d | --delay_create_ostobj [on | off]]\n"
133                 "            [-e | --error {continue | abort}] [-h | --help]\n"
134                 "            [-n | --dryrun [on | off]] [-o | --orphan]\n"
135                 "            [-r | --reset] [-s | --speed ops_per_sec_limit]\n"
136                 "            [-t | --type check_type[,check_type...]]\n"
137                 "            [-w | --window_size size]\n"
138                 "options:\n"
139                 "-M: device to start LFSCK/scrub on\n"
140                 "-A: start LFSCK on all nodes via the specified MDT device "
141                     "(see \"-M\" option) by single LFSCK command\n"
142                 "-c: create the lost OST-object for dangling LOV EA "
143                     "(default 'off', or 'on')\n"
144                 "-C: create the lost MDT-object for dangling name entry "
145                     "(default 'off', or 'on')\n"
146                 "-d: delay create the lost OST-object for dangling LOV EA "
147                     "until orphan OST-objects handled (default 'off', or 'on')\n"
148                 "-e: error handle mode (default 'continue', or 'abort')\n"
149                 "-h: this help message\n"
150                 "-n: check with no modification (default 'off', or 'on')\n"
151                 "-o: repair orphan OST-objects\n"
152                 "-r: reset scanning to the start of the device\n"
153                 "-s: maximum items to be scanned per second "
154                     "(default '%d' = no limit)\n"
155                 "-t: check type(s) to be performed (default all)\n"
156                 "-w: window size for async requests pipeline\n",
157                 LFSCK_SPEED_NO_LIMIT);
158 }
159
160 static void usage_stop(void)
161 {
162         fprintf(stderr, "stop LFSCK\n"
163                 "usage:\n"
164                 "lfsck_stop [-M | --device {MDT,OST}_device]\n"
165                 "           [-A | --all] [-h | --help]\n"
166                 "options:\n"
167                 "-M: device to stop LFSCK/scrub on\n"
168                 "-A: stop LFSCK on all nodes via the specified MDT device "
169                     "(see \"-M\" option) by single LFSCK command\n"
170                 "-h: this help message\n");
171 }
172
173 static void usage_query(void)
174 {
175         fprintf(stderr, "check the LFSCK global status\n"
176                 "usage:\n"
177                 "lfsck_query [-M | --device MDT_device] [-h | --help]\n"
178                 "            [-t | --type check_type[,check_type...]]\n"
179                 "            [-t | --wait]\n"
180                 "options:\n"
181                 "-M: device to query LFSCK on\n"
182                 "-t: LFSCK type(s) to be queried (default is all)\n"
183                 "-h: this help message\n"
184                 "-w: do not return until LFSCK not running\n");
185 }
186
187 static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg)
188 {
189         int len = strlen(arg) + 1;
190
191         if (len > MAX_OBD_NAME) {
192                 fprintf(stderr, "device name is too long. "
193                         "Valid length should be less than %d\n", MAX_OBD_NAME);
194                 return -EINVAL;
195         }
196
197         memcpy(device, arg, len);
198         data->ioc_inlbuf4 = device;
199         data->ioc_inllen4 = len;
200         data->ioc_dev = OBD_DEV_BY_DEVNAME;
201         return 0;
202 }
203
204 static int lfsck_get_dev_name(struct obd_ioctl_data *data, char *device,
205                               int types, bool multipe_devices)
206 {
207         glob_t param = { 0 };
208         char *ptr;
209         int rc;
210         int i;
211
212         rc = cfs_get_param_paths(&param, "mdd/*-MDT*");
213         if (rc) {
214                 if (multipe_devices || errno != ENOENT ||
215                     types & LFSCK_TYPE_NAMESPACE) {
216                         fprintf(stderr, "Fail to get device name: rc = %d\n."
217                                 "You can specify the device explicitly "
218                                 "via '-M' option.\n", rc);
219                         return rc;
220                 }
221
222                 rc = cfs_get_param_paths(&param, "obdfilter/*-OST*");
223                 if (rc) {
224                         fprintf(stderr, "Fail to get device name: rc = %d\n."
225                                 "You can specify the device explicitly "
226                                 "via '-M' option.\n", rc);
227                         return rc;
228                 }
229         }
230
231         if (param.gl_pathc == 1)
232                 goto pack;
233
234         if (!multipe_devices) {
235                 fprintf(stderr,
236                         "Detect multiple devices on current node. "
237                         "Please specify the device explicitly "
238                         "via '-M' option or '-A' option for all.\n");
239                 rc = -EINVAL;
240                 goto out;
241         }
242
243         ptr = strrchr(param.gl_pathv[0], '-');
244         if (ptr == NULL) {
245                 rc = -EINVAL;
246                 goto out;
247         }
248
249         for (i = 1; i < param.gl_pathc; i++) {
250                 char *ptr2 = strrchr(param.gl_pathv[i], '-');
251
252                 if (ptr2 == NULL) {
253                         rc = -EINVAL;
254                         goto out;
255                 }
256
257                 if ((ptr - param.gl_pathv[0]) != (ptr2 - param.gl_pathv[i]) ||
258                     strncmp(param.gl_pathv[0], param.gl_pathv[i],
259                             (ptr - param.gl_pathv[0])) != 0) {
260                         fprintf(stderr,
261                                 "Detect multiple filesystems on current node. "
262                                 "Please specify the device explicitly "
263                                 "via '-M' option.\n");
264                         rc = -EINVAL;
265                         goto out;
266                 }
267         }
268
269 pack:
270         rc = lfsck_pack_dev(data, device, basename(param.gl_pathv[0]));
271
272 out:
273         cfs_free_param_data(&param);
274
275         return rc;
276 }
277
278 int jt_lfsck_start(int argc, char **argv)
279 {
280         struct obd_ioctl_data data;
281         char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
282         char device[MAX_OBD_NAME];
283         struct lfsck_start start;
284         char *optstring = "Ac::C::d::e:hM:n::ors:t:w:";
285         int opt, index, rc, val, i;
286
287         memset(&data, 0, sizeof(data));
288         memset(&start, 0, sizeof(start));
289         memset(device, 0, MAX_OBD_NAME);
290         start.ls_version = LFSCK_VERSION_V1;
291         start.ls_active = LFSCK_TYPES_ALL;
292
293         /* Reset the 'optind' for the case of getopt_long() called multiple
294          * times under the same lctl. */
295         optind = 0;
296         while ((opt = getopt_long(argc, argv, optstring, long_opt_start,
297                                   &index)) != EOF) {
298                 switch (opt) {
299                 case 'A':
300                         start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
301                         break;
302                 case 'c':
303                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
304                                 start.ls_flags |= LPF_CREATE_OSTOBJ;
305                         } else if (strcmp(optarg, "off") != 0) {
306                                 fprintf(stderr, "invalid switch: -c '%s'. "
307                                         "valid switches are:\n"
308                                         "empty ('on'), or 'off' without space. "
309                                         "For example:\n"
310                                         "'-c', '-con', '-coff'\n", optarg);
311                                 return -EINVAL;
312                         }
313                         start.ls_valid |= LSV_CREATE_OSTOBJ;
314                         break;
315                 case 'C':
316                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
317                                 start.ls_flags |= LPF_CREATE_MDTOBJ;
318                         } else if (strcmp(optarg, "off") != 0) {
319                                 fprintf(stderr, "invalid switch: -C '%s'. "
320                                         "valid switches are:\n"
321                                         "empty ('on'), or 'off' without space. "
322                                         "For example:\n"
323                                         "'-C', '-Con', '-Coff'\n", optarg);
324                                 return -EINVAL;
325                         }
326                         start.ls_valid |= LSV_CREATE_MDTOBJ;
327                         break;
328                 case 'd':
329                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
330                                 start.ls_flags |= LPF_DELAY_CREATE_OSTOBJ;
331                         } else if (strcmp(optarg, "off") != 0) {
332                                 fprintf(stderr, "invalid switch: -c '%s'. "
333                                         "valid switches are:\n"
334                                         "empty ('on'), or 'off' without space. "
335                                         "For example:\n"
336                                         "'-c', '-con', '-coff'\n", optarg);
337                                 return -EINVAL;
338                         }
339                         start.ls_valid |= LSV_DELAY_CREATE_OSTOBJ;
340                         break;
341                 case 'e':
342                         if (strcmp(optarg, "abort") == 0) {
343                                 start.ls_flags |= LPF_FAILOUT;
344                         } else if (strcmp(optarg, "continue") != 0) {
345                                 fprintf(stderr, "invalid error mode: -e '%s'."
346                                         "valid modes are: "
347                                         "'continue' or 'abort'.\n", optarg);
348                                 return -EINVAL;
349                         }
350                         start.ls_valid |= LSV_ERROR_HANDLE;
351                         break;
352                 case 'h':
353                         usage_start();
354                         return 0;
355                 case 'M':
356                         rc = lfsck_pack_dev(&data, device, optarg);
357                         if (rc != 0)
358                                 return rc;
359                         break;
360                 case 'n':
361                         if (optarg == NULL || strcmp(optarg, "on") == 0) {
362                                 start.ls_flags |= LPF_DRYRUN;
363                         } else if (strcmp(optarg, "off") != 0) {
364                                 fprintf(stderr, "invalid switch: -n '%s'. "
365                                         "valid switches are:\n"
366                                         "empty ('on'), or 'off' without space. "
367                                         "For example:\n"
368                                         "'-n', '-non', '-noff'\n", optarg);
369                                 return -EINVAL;
370                         }
371                         start.ls_valid |= LSV_DRYRUN;
372                         break;
373                 case 'o':
374                         start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST |
375                                           LPF_OST_ORPHAN;
376                         break;
377                 case 'r':
378                         start.ls_flags |= LPF_RESET;
379                         break;
380                 case 's':
381                         val = atoi(optarg);
382                         start.ls_speed_limit = val;
383                         start.ls_valid |= LSV_SPEED_LIMIT;
384                         break;
385                 case 't': {
386                         char *typename;
387
388                         if (start.ls_active == LFSCK_TYPES_ALL)
389                                 start.ls_active = 0;
390                         while ((typename = strsep(&optarg, ",")) != NULL) {
391                                 enum lfsck_type type;
392
393                                 type = lfsck_name2type(typename);
394                                 if (type == -1)
395                                         goto bad_type;
396                                 start.ls_active |= type;
397                         }
398                         break;
399 bad_type:
400                         fprintf(stderr, "invalid check type -t '%s'. "
401                                 "valid types are:\n", typename);
402                         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++)
403                                 fprintf(stderr, "%s%s", i != 0 ? "," : "",
404                                         lfsck_types_names[i].ltn_name);
405                         fprintf(stderr, "\n");
406                         return -EINVAL;
407                 }
408                 case 'w':
409                         val = atoi(optarg);
410                         if (val < 1 || val > LFSCK_ASYNC_WIN_MAX) {
411                                 fprintf(stderr,
412                                         "Invalid async window size that "
413                                         "may cause memory issues. The valid "
414                                         "range is [1 - %u].\n",
415                                         LFSCK_ASYNC_WIN_MAX);
416                                 return -EINVAL;
417                         }
418
419                         start.ls_async_windows = val;
420                         start.ls_valid |= LSV_ASYNC_WINDOWS;
421                         break;
422                 default:
423                         fprintf(stderr, "Invalid option, '-h' for help.\n");
424                         return -EINVAL;
425                 }
426         }
427
428         if (start.ls_active == LFSCK_TYPES_ALL)
429                 start.ls_active = LFSCK_TYPES_DEF;
430
431         if (data.ioc_inlbuf4 == NULL) {
432                 rc = lfsck_get_dev_name(&data, device, start.ls_active,
433                                         start.ls_flags & LPF_ALL_TGT);
434                 if (rc != 0)
435                         return rc;
436         }
437
438         data.ioc_inlbuf1 = (char *)&start;
439         data.ioc_inllen1 = sizeof(start);
440         memset(buf, 0, sizeof(rawbuf));
441         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
442         if (rc != 0) {
443                 fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
444                 return rc;
445         }
446
447         rc = l_ioctl(OBD_DEV_ID, OBD_IOC_START_LFSCK, buf);
448         if (rc < 0) {
449                 perror("Fail to start LFSCK");
450                 return rc;
451         }
452
453         obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
454         printf("Started LFSCK on the device %s: scrub", device);
455         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
456                 if (start.ls_active & lfsck_types_names[i].ltn_type) {
457                         printf(" %s", lfsck_types_names[i].ltn_name);
458                         start.ls_active &= ~lfsck_types_names[i].ltn_type;
459                 }
460         }
461         if (start.ls_active != 0)
462                 printf(" unknown(0x%x)", start.ls_active);
463         printf("\n");
464
465         return 0;
466 }
467
468 int jt_lfsck_stop(int argc, char **argv)
469 {
470         struct obd_ioctl_data data;
471         char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
472         char device[MAX_OBD_NAME];
473         struct lfsck_stop stop;
474         char *optstring = "AhM:";
475         int opt, index, rc;
476
477         memset(&data, 0, sizeof(data));
478         memset(&stop, 0, sizeof(stop));
479         memset(device, 0, MAX_OBD_NAME);
480
481         /* Reset the 'optind' for the case of getopt_long() called multiple
482          * times under the same lctl. */
483         optind = 0;
484         while ((opt = getopt_long(argc, argv, optstring, long_opt_stop,
485                                   &index)) != EOF) {
486                 switch (opt) {
487                 case 'A':
488                         stop.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
489                         break;
490                 case 'h':
491                         usage_stop();
492                         return 0;
493                 case 'M':
494                         rc = lfsck_pack_dev(&data, device, optarg);
495                         if (rc != 0)
496                                 return rc;
497                         break;
498                 default:
499                         fprintf(stderr, "Invalid option, '-h' for help.\n");
500                         return -EINVAL;
501                 }
502         }
503
504         if (data.ioc_inlbuf4 == NULL) {
505                 rc = lfsck_get_dev_name(&data, device, 0,
506                                         stop.ls_flags & LPF_ALL_TGT);
507                 if (rc != 0)
508                         return rc;
509         }
510
511         data.ioc_inlbuf1 = (char *)&stop;
512         data.ioc_inllen1 = sizeof(stop);
513         memset(buf, 0, sizeof(rawbuf));
514         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
515         if (rc != 0) {
516                 fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
517                 return rc;
518         }
519
520         rc = l_ioctl(OBD_DEV_ID, OBD_IOC_STOP_LFSCK, buf);
521         if (rc < 0) {
522                 perror("Fail to stop LFSCK");
523                 return rc;
524         }
525
526         printf("Stopped LFSCK on the device %s.\n", device);
527         return 0;
528 }
529
530 int jt_lfsck_query(int argc, char **argv)
531 {
532         struct obd_ioctl_data data = { 0 };
533         char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
534         char device[MAX_OBD_NAME] = "";
535         struct lfsck_query query = { .lu_types = LFSCK_TYPES_ALL };
536         int opt, index, rc, i;
537         enum lfsck_type type;
538
539         while ((opt = getopt_long(argc, argv, "hM:t:w", long_opt_query,
540                                   &index)) != EOF) {
541                 switch (opt) {
542                 case 'h':
543                         usage_query();
544                         return 0;
545                 case 'M':
546                         rc = lfsck_pack_dev(&data, device, optarg);
547                         if (rc != 0)
548                                 return rc;
549                         break;
550                 case 't': {
551                         char *typename;
552
553                         if (query.lu_types == LFSCK_TYPES_ALL)
554                                 query.lu_types = 0;
555                         while ((typename = strsep(&optarg, ",")) != NULL) {
556                                 type = lfsck_name2type(typename);
557                                 if (type == -1)
558                                         goto bad_type;
559                                 query.lu_types |= type;
560                         }
561                         break;
562
563 bad_type:
564                         fprintf(stderr, "invalid LFSCK type -t '%s'. "
565                                 "valid types are:\n", typename);
566                         for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++)
567                                 fprintf(stderr, "%s%s", i != 0 ? "," : "",
568                                         lfsck_types_names[i].ltn_name);
569                         fprintf(stderr, "\n");
570                         return -EINVAL;
571                 }
572                 case 'w':
573                         query.lu_flags |= LPF_WAIT;
574                         break;
575                 default:
576                         fprintf(stderr, "Invalid option, '-h' for help.\n");
577                         usage_query();
578                         return -EINVAL;
579                 }
580         }
581
582         if (data.ioc_inlbuf4 == NULL) {
583                 rc = lfsck_get_dev_name(&data, device, 0, true);
584                 if (rc != 0)
585                         return rc;
586         }
587
588         data.ioc_inlbuf1 = (char *)&query;
589         data.ioc_inllen1 = sizeof(query);
590         memset(buf, 0, sizeof(rawbuf));
591         rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
592         if (rc != 0) {
593                 fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
594                 return rc;
595         }
596
597         rc = l_ioctl(OBD_DEV_ID, OBD_IOC_QUERY_LFSCK, buf);
598         if (rc < 0) {
599                 perror("Fail to query LFSCK");
600                 return rc;
601         }
602
603         obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
604         for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS; i++, type = 1 << i) {
605                 const char *name;
606                 int j;
607
608                 if (!(query.lu_types & type))
609                         continue;
610
611                 name = lfsck_type2name(type);
612                 for (j = 0; j <= LS_MAX; j++)
613                         printf("%s_mdts_%s: %d\n", name,
614                                lfsck_status2name(j), query.lu_mdts_count[i][j]);
615
616                 for (j = 0; j <= LS_MAX; j++)
617                         printf("%s_osts_%s: %d\n", name,
618                                lfsck_status2name(j), query.lu_osts_count[i][j]);
619
620                 printf("%s_repaired: %llu\n", name, query.lu_repaired[i]);
621         }
622
623         return 0;
624 }