Whamcloud - gitweb
8f24e9a89e092deca3956f2787906e074f33bba0
[fs/lustre-release.git] / lustre / utils / ll_recover_lost_found_objs.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * lustre/utils/ll_recover_lost_found_objs.c
35  *
36  * Tool for recovering objects from lost+found that might result from a
37  * Lustre OST with a corrupted directory. Running e2fsck will fix the
38  * directory, but puts all of the objects into lost+found, where they are
39  * inaccessible to Lustre.
40  *
41  * Author: Kalpak Shah <kalpak.shah@sun.com>
42  */
43
44 #ifndef _GNU_SOURCE
45 #define _GNU_SOURCE
46 #endif
47
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include <unistd.h>
51 #include <string.h>
52 #include <errno.h>
53 #include <dirent.h>
54 #include <sys/types.h>
55 #include <sys/xattr.h>
56 #include <sys/stat.h>
57
58 #include <liblustre.h>
59
60 #define MAX_GROUPS 64
61
62 int verbose = 0;
63
64 struct obd_group_info {
65         int dir_exists;
66 };
67 struct obd_group_info grp_info[MAX_GROUPS];
68
69 static void usage(char *progname)
70 {
71         fprintf(stderr, "Usage: %s [-hv] -d lost+found_directory\n", progname);
72         fprintf(stderr, "You need to mount the corrupted OST filesystem and "
73                 "provide the path for the lost+found directory as the -d "
74                 "option, for example:\n"
75                 "ll_recover_lost_found_objs -d /mnt/ost/lost+found\n");
76         exit(1);
77 }
78
79 static int _ll_sprintf(char *buf, size_t size, const char *func, int line,
80                       const char *format, ...)
81 {
82         int rc;
83         va_list ap;
84
85         va_start(ap, format);
86         rc = vsnprintf(buf, size, format, ap);
87         if (!(rc > -1 && rc < size)) {
88                 fprintf(stderr,
89                         "error: %s:%d: path \"", func, line);
90                 vfprintf(stderr, format, ap);
91                 va_end(ap);
92                 fprintf(stderr, "\" is too long\n");
93                 return 1;
94         }
95         va_end(ap);
96         return 0;
97 }
98
99 #define ll_sprintf(buf, size, format, ...) \
100         _ll_sprintf(buf, size, __FUNCTION__, __LINE__, format, ## __VA_ARGS__)
101
102 static int mkdir_p(const char *dest_path, const char *mount, __u64 ff_group)
103 {
104         struct stat stat_buf;
105         int retval;
106         mode_t mode = 0700;
107         char tmp_path[PATH_MAX];
108
109         if (stat(dest_path, &stat_buf) == 0)
110                 return 0;
111
112         if (grp_info[ff_group].dir_exists == 0) {
113                 if (ll_sprintf(tmp_path, PATH_MAX, "%s/O/"LPU64,
114                                mount, ff_group))
115                         return 1;
116
117                 if (stat(tmp_path, &stat_buf) != 0) {
118                         retval = mkdir(tmp_path, mode);
119                         if (retval < 0) {
120                                 fprintf(stderr, "error: creating directory %s: "
121                                         "%s\n", tmp_path, strerror(errno));
122                                 return 1;
123                         }
124                         grp_info[ff_group].dir_exists = 1;
125                 }
126         }
127
128         retval = mkdir(dest_path, mode);
129         if (retval < 0) {
130                 fprintf(stderr, "error: creating directory %s: "
131                         "%s\n", dest_path, strerror(errno));
132                 return 1;
133         }
134
135         return 0;
136 }
137
138 /* This is returning 0 for an error */
139 static __u64 read_last_id(char *file_path)
140 {
141         __u64 last_id;
142         int fd;
143         int count;
144
145         fd = open(file_path, O_RDONLY);
146         if (fd < 0) {
147                 if (errno != ENOENT)
148                         fprintf(stderr, "error: opening %s: %s\n",
149                                         file_path, strerror(errno));
150                 return 0;
151         }
152
153         count = read(fd, &last_id, sizeof(last_id));
154         if (count < 0) {
155                 fprintf(stderr, "error: reading file %s: %s\n", file_path,
156                         strerror(errno));
157                 close(fd);
158                 return 0;
159         }
160         if (count != sizeof(last_id)) {
161                 fprintf(stderr, "error: Could not read full last_id from %s\n",
162                         file_path);
163                 close(fd);
164                 return 0;
165         }
166
167         close(fd);
168         return le64_to_cpu(last_id);
169 }
170
171 static unsigned filetype_dir_table[] = {
172         [0]= DT_UNKNOWN,
173         [S_IFIFO]= DT_FIFO,
174         [S_IFCHR] = DT_CHR,
175         [S_IFDIR] = DT_DIR,
176         [S_IFBLK] = DT_BLK,
177         [S_IFREG] = DT_REG,
178         [S_IFLNK] = DT_LNK,
179         [S_IFSOCK]= DT_SOCK,
180 #if defined(DT_DOOR) && defined(S_IFDOOR)
181         [S_IFDOOR]= DT_DOOR,
182 #endif
183 };
184
185 static int traverse_lost_found(char *src_dir, const char *mount_path)
186 {
187         DIR *dir_ptr;
188         struct filter_fid parent_fid;
189         struct dirent64 *dirent;
190         __u64 ff_group, ff_objid;
191         char *file_path;
192         char dest_path[PATH_MAX];
193         char last_id_file[PATH_MAX];
194         __u64 last_id[MAX_GROUPS] = {0};
195         __u64 tmp_last_id;
196         struct stat st;
197         int obj_exists, xattr_len;
198         int len, ret = 0, error = 0;
199
200         len = strlen(src_dir);
201
202         dir_ptr = opendir(src_dir);
203         if (!dir_ptr) {
204                 fprintf(stderr, "error: opening directory: %s\n",
205                         strerror(errno));
206                 return 1;
207         }
208
209         while ((dirent = readdir64(dir_ptr)) != NULL) {
210                 if (!strcmp(dirent->d_name, ".") ||
211                     !strcmp(dirent->d_name, ".."))
212                         continue;
213
214                 src_dir[len] = 0;
215                 if ((len + strlen(dirent->d_name) + 2) > PATH_MAX) {
216                         fprintf(stderr, "error: %s/%s: path too long\n",
217                                 src_dir, dirent->d_name);
218                         break;
219                 }
220                 strcat(src_dir, "/");
221                 strcat(src_dir, dirent->d_name);
222
223                 if (dirent->d_type == DT_UNKNOWN) {
224                         ret = stat(src_dir, &st);
225                         if (ret == -1) {
226                                 fprintf(stderr,
227                                         "error: stating %s: %s\n",
228                                         src_dir, strerror(errno));
229                                 continue;
230                         }
231                         dirent->d_type = filetype_dir_table[st.st_mode &
232                                                             S_IFMT];
233                         if (dirent->d_type == DT_UNKNOWN) {
234                                 fprintf(stderr,
235                                         "error: %s of unknown type 0%o\n",
236                                         src_dir, st.st_mode);
237                                 continue;
238                         }
239                 }
240
241                 switch(dirent->d_type) {
242                 case DT_DIR:
243                 ret = traverse_lost_found(src_dir, mount_path);
244                 if (ret) {
245                         closedir(dir_ptr);
246                         return ret;
247                 }
248                 break;
249
250                 case DT_REG:
251                 file_path = src_dir;
252                 xattr_len = getxattr(file_path, "trusted.fid",
253                                      (void *)&parent_fid,
254                                      sizeof(parent_fid));
255
256                 if (xattr_len == -1 || xattr_len < sizeof(parent_fid))
257                         /*
258                          * Its very much possible that we dont find fid
259                          * on precreated files, LAST_ID
260                          */
261                         continue;
262
263                 ff_group = le64_to_cpu(parent_fid.ff_seq);
264                 if (ff_group >= FID_SEQ_OST_MAX) {
265                         fprintf(stderr, "error: invalid group "LPU64" likely"
266                                 "indicates a corrupt xattr for file %s.\n",
267                                 ff_group, file_path);
268                         continue;
269                 }
270                 ff_objid = le64_to_cpu(parent_fid.ff_objid);
271
272                 /* might need to create the parent directories for
273                    this object */
274                 if (ll_sprintf(dest_path, PATH_MAX, "%s/O/"LPU64"/d"LPU64,
275                                mount_path, ff_group, ff_objid % 32)) {
276                         closedir(dir_ptr);
277                         return 1;
278                 }
279
280                 ret = mkdir_p(dest_path, mount_path, ff_group);
281                 if (ret) {
282                         closedir(dir_ptr);
283                         return ret;
284                 }
285
286                 /*
287                  * Object ID needs to be verified against last_id.
288                  * LAST_ID file may not be present in the group directory
289                  * due to corruption. In case of any error tyr to recover
290                  * as many objects as possible by setting last_id to ~0ULL.
291                  */
292                 if (last_id[ff_group] == 0) {
293                         if (ll_sprintf(last_id_file, PATH_MAX,
294                                        "%s/O/"LPU64"/LAST_ID",
295                                        mount_path, ff_group)) {
296                                 closedir(dir_ptr);
297                                 return 1;
298                         }
299
300                         tmp_last_id = read_last_id(last_id_file);
301                         if (tmp_last_id == 0)
302                                 tmp_last_id = ~0ULL;
303                         last_id[ff_group] = tmp_last_id;
304                 }
305
306                 if (ff_objid > last_id[ff_group]) {
307                         fprintf(stderr, "error: file skipped because object ID "
308                                 "greater than LAST_ID\nFilename: %s\n"
309                                 "Group: "LPU64"\nObjectid: "LPU64"\n"
310                                 "LAST_ID: "LPU64, file_path, ff_group, ff_objid,
311                                 last_id[ff_group]);
312                         continue;
313                 }
314
315                 /* move file from lost+found to proper object
316                    directory */
317                 if (ll_sprintf(dest_path, PATH_MAX,
318                                "%s/O/"LPU64"/d"LPU64"/"LPU64, mount_path,
319                                ff_group, ff_objid % 32, ff_objid)) {
320                         closedir(dir_ptr);
321                         return 1;
322                 }
323
324                 obj_exists = 1;
325                 ret = stat(dest_path, &st);
326                 if (ret == 0) {
327                         if (st.st_size == 0)
328                                 obj_exists = 0;
329                 } else {
330                         if (errno != ENOENT)
331                                 fprintf(stderr,
332                                         "warning: stat for %s: %s\n",
333                                         dest_path, strerror(errno));
334                         obj_exists = 0;
335                 }
336
337                 if (obj_exists) {
338                         fprintf(stderr, "error: target object %s already "
339                                 "exists and will not be replaced.\n",dest_path);
340                         continue;
341                 }
342
343                 if (rename(file_path, dest_path) < 0) {
344                         fprintf(stderr, "error: rename failed for file %s: %s\n",
345                                 file_path, strerror(errno));
346                         error++;
347                         continue;
348                 }
349
350                 printf("Object %s restored.\n", dest_path);
351                 break;
352                 }
353         }
354
355         closedir(dir_ptr);
356
357         return error;
358 }
359
360 /*
361  * If LAST_ID file is not present in some group then restore it with the highest
362  * object ID found in that group. By the time we come here all possible objects
363  * have been restored.
364  */
365 static int check_last_id(const char *mount_path)
366 {
367         char lastid_path[PATH_MAX];
368         char dirname[PATH_MAX], subdirname[PATH_MAX];
369         DIR *groupdir, *subdir;
370         struct stat st;
371         struct dirent *dirent;
372         __u64 group;
373         __u64 max_objid;
374         int fd;
375         int ret;
376
377         for (group = 0; group < MAX_GROUPS; group++) {
378                 max_objid = 0;
379
380                 if (ll_sprintf(dirname, PATH_MAX, "%s/O/"LPU64,
381                                mount_path, group))
382                         return 1;
383                 if (ll_sprintf(lastid_path, PATH_MAX, "%s/LAST_ID", dirname))
384                         return 1;
385
386                 if (stat(lastid_path, &st) == 0)
387                         continue;
388
389                 groupdir = opendir(dirname);
390                 if (groupdir == NULL) {
391                         if (errno != ENOENT)
392                                 fprintf(stderr, "error: opening %s: %s\n",
393                                         dirname, strerror(errno));
394                         continue;
395                 }
396
397                 while ((dirent = readdir(groupdir)) != NULL) {
398                         if (!strcmp(dirent->d_name, ".") ||
399                             !strcmp(dirent->d_name, ".."))
400                                 continue;
401
402                         if (ll_sprintf(subdirname, PATH_MAX, "%s/%s",
403                                        dirname, dirent->d_name)) {
404                                 closedir(groupdir);
405                                 return 1;
406                         }
407                         subdir = opendir(subdirname);
408                         if (subdir == NULL) {
409                                 fprintf(stderr, "error: opening %s: %s\n",
410                                         subdirname, strerror(errno));
411                                 continue;
412                         }
413
414                         while ((dirent = readdir(subdir)) != NULL) {
415                                 __u64 objid;
416                                 char *end;
417
418                                 if (!strcmp(dirent->d_name, ".") ||
419                                     !strcmp(dirent->d_name, ".."))
420                                         continue;
421
422                                 objid = strtoull(dirent->d_name, &end, 0);
423                                 if (end == dirent->d_name || *end != 0) {
424                                         fprintf(stderr, "error: unknown object"
425                                                 "ID %s/%s\n", subdirname,
426                                                 dirent->d_name);
427                                         continue;
428                                 }
429                                 if (objid > max_objid)
430                                        max_objid = objid;
431                         }
432                         closedir(subdir);
433                 }
434                 closedir(groupdir);
435
436                 fd = open(lastid_path, O_RDWR | O_CREAT, 0700);
437                 if (fd < 0) {
438                         fprintf(stderr, "error: open \"%s\" failed: %s\n",
439                                 lastid_path, strerror(errno));
440                         return 1;
441                 }
442
443                 max_objid = cpu_to_le64(max_objid);
444                 ret = write(fd, &max_objid, sizeof(__u64));
445                 if (ret < sizeof(__u64)) {
446                         fprintf(stderr, "error: write \"%s\" failed: %s\n",
447                                 lastid_path, strerror(errno));
448                         close(fd);
449                         return 1;
450                 }
451
452                 close(fd);
453         }
454
455         return 0;
456 }
457
458 int main(int argc, char **argv)
459 {
460         char *progname;
461         struct stat stat_buf;
462         char src_dir[PATH_MAX] = "";
463         char mount_path[PATH_MAX];
464         char tmp_path[PATH_MAX];
465         int c;
466         int retval;
467
468         progname = argv[0];
469
470         while ((c = getopt(argc, argv, "d:hv")) != EOF) {
471                 switch (c) {
472                 case 'd':
473                         if (chdir(optarg)) {
474                                 fprintf(stderr, "error: chdir to %s: %s\n",
475                                         optarg, strerror(errno));
476                                 return 1;
477                         }
478                         if (getcwd(src_dir, PATH_MAX) == NULL) {
479                                 fprintf(stderr,
480                                         "error: getcwd of lost+found: %s\n",
481                                         strerror(errno));
482                                 return 1;
483                         }
484                         if (chdir("..")) {
485                                 fprintf(stderr, "error: chdir to \"..\": %s\n",
486                                         strerror(errno));
487                                 return 1;
488                         }
489                         if (getcwd(mount_path, PATH_MAX) == NULL) {
490                                 fprintf(stderr,
491                                         "error: getcwd of mount point: %s\n",
492                                         strerror(errno));
493                                 return 1;
494                         }
495                         if (!strcmp(src_dir, mount_path)) {
496                                 fprintf(stderr,
497                                         "error: root directory is detected\n");
498                                 return 1;
499                         }
500                         fprintf(stdout, "\"lost+found\" directory path: %s\n",
501                                 src_dir);
502                         break;
503                 case 'v':
504                         verbose = 1;
505                         break;
506                 case 'h':
507                         usage(progname);
508                 default:
509                         fprintf(stderr, "%s: bad option '%c'\n",
510                                 progname, c);
511                         usage(progname);
512                 }
513         }
514
515         if (src_dir[0] == 0)
516                 usage(progname);
517
518         /* Check if 'O' directory exists and create it if needed */
519         if (ll_sprintf(tmp_path, PATH_MAX, "%s/O",  mount_path))
520                 return 1;
521
522         if (stat(tmp_path, &stat_buf) != 0) {
523                 retval = mkdir(tmp_path, 0700);
524                 if (retval == -1) {
525                         fprintf(stderr, "error: creating objects directory %s:"
526                                 " %s\n", tmp_path, strerror(errno));
527                         return 1;
528                 }
529         }
530
531         retval = traverse_lost_found(src_dir, mount_path);
532         if (retval) {
533                 fprintf(stderr, "error: traversing lost+found looking for "
534                         "orphan objects.\n");
535                 return retval;
536         }
537
538         retval = check_last_id(mount_path);
539         if (retval)
540                 fprintf(stderr, "error: while checking/restoring LAST_ID.\n");
541
542         return retval;
543 }