Whamcloud - gitweb
b=23428 Fix lustre built with --enable-lu_ref
[fs/lustre-release.git] / lustre / utils / ll_recover_lost_found_objs.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/utils/ll_recover_lost_found_objs.c
37  *
38  * Tool for recovering objects from lost+found that might result from a
39  * Lustre OST with a corrupted directory. Running e2fsck will fix the
40  * directory, but puts all of the objects into lost+found, where they are
41  * inaccessible to Lustre.
42  *
43  * Author: Kalpak Shah <kalpak.shah@sun.com>
44  */
45
46 #ifndef _GNU_SOURCE
47 #define _GNU_SOURCE
48 #endif
49
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <unistd.h>
53 #include <string.h>
54 #include <errno.h>
55 #include <dirent.h>
56 #include <sys/types.h>
57 #include <sys/xattr.h>
58 #include <sys/stat.h>
59
60 #include <liblustre.h>
61
62 #define MAX_GROUPS 64
63
64 int verbose = 0;
65
66 struct obd_group_info {
67         int dir_exists;
68 };
69 struct obd_group_info grp_info[MAX_GROUPS];
70
71 static void usage(char *progname)
72 {
73         fprintf(stderr, "Usage: %s [-hv] -d lost+found_directory\n", progname);
74         fprintf(stderr, "You need to mount the corrupted OST filesystem and "
75                 "provide the path for the lost+found directory as the -d "
76                 "option, for example:\n"
77                 "ll_recover_lost_found_objs -d /mnt/ost/lost+found\n");
78         exit(1);
79 }
80
81 static int _ll_sprintf(char *buf, size_t size, const char *func, int line,
82                       const char *format, ...)
83 {
84         int rc;
85         va_list ap;
86
87         va_start(ap, format);
88         rc = vsnprintf(buf, size, format, ap);
89         if (!(rc > -1 && rc < size)) {
90                 fprintf(stderr,
91                         "error: %s:%d: path \"", func, line);
92                 vfprintf(stderr, format, ap);
93                 va_end(ap);
94                 fprintf(stderr, "\" is too long\n");
95                 return 1;
96         }
97         va_end(ap);
98         return 0;
99 }
100
101 #define ll_sprintf(buf, size, format, ...) \
102         _ll_sprintf(buf, size, __FUNCTION__, __LINE__, format, ## __VA_ARGS__)
103
104 static int mkdir_p(const char *dest_path, const char *mount, __u64 ff_group)
105 {
106         struct stat stat_buf;
107         int retval;
108         mode_t mode = 0700;
109         char tmp_path[PATH_MAX];
110
111         if (stat(dest_path, &stat_buf) == 0)
112                 return 0;
113
114         if (grp_info[ff_group].dir_exists == 0) {
115                 if (ll_sprintf(tmp_path, PATH_MAX, "%s/O/"LPU64,
116                                mount, ff_group))
117                         return 1;
118
119                 if (stat(tmp_path, &stat_buf) != 0) {
120                         retval = mkdir(tmp_path, mode);
121                         if (retval < 0) {
122                                 fprintf(stderr, "error: creating directory %s: "
123                                         "%s\n", tmp_path, strerror(errno));
124                                 return 1;
125                         }
126                         grp_info[ff_group].dir_exists = 1;
127                 }
128         }
129
130         retval = mkdir(dest_path, mode);
131         if (retval < 0) {
132                 fprintf(stderr, "error: creating directory %s: "
133                         "%s\n", dest_path, strerror(errno));
134                 return 1;
135         }
136
137         return 0;
138 }
139
140 /* This is returning 0 for an error */
141 static __u64 read_last_id(char *file_path)
142 {
143         __u64 last_id;
144         int fd;
145         int count;
146
147         fd = open(file_path, O_RDONLY);
148         if (fd < 0) {
149                 if (errno != ENOENT)
150                         fprintf(stderr, "error: opening %s: %s\n",
151                                         file_path, strerror(errno));
152                 return 0;
153         }
154
155         count = read(fd, &last_id, sizeof(last_id));
156         if (count < 0) {
157                 fprintf(stderr, "error: reading file %s: %s\n", file_path,
158                         strerror(errno));
159                 close(fd);
160                 return 0;
161         }
162         if (count != sizeof(last_id)) {
163                 fprintf(stderr, "error: Could not read full last_id from %s\n",
164                         file_path);
165                 close(fd);
166                 return 0;
167         }
168
169         close(fd);
170         return le64_to_cpu(last_id);
171 }
172
173 static unsigned filetype_dir_table[] = {
174         [0]= DT_UNKNOWN,
175         [S_IFIFO]= DT_FIFO,
176         [S_IFCHR] = DT_CHR,
177         [S_IFDIR] = DT_DIR,
178         [S_IFBLK] = DT_BLK,
179         [S_IFREG] = DT_REG,
180         [S_IFLNK] = DT_LNK,
181         [S_IFSOCK]= DT_SOCK,
182 #if defined(DT_DOOR) && defined(S_IFDOOR)
183         [S_IFDOOR]= DT_DOOR,
184 #endif
185 };
186
187 static int traverse_lost_found(char *src_dir, const char *mount_path)
188 {
189         DIR *dir_ptr;
190         struct filter_fid parent_fid;
191         struct dirent64 *dirent;
192         __u64 ff_group, ff_objid;
193         char *file_path;
194         char dest_path[PATH_MAX];
195         char last_id_file[PATH_MAX];
196         __u64 last_id[MAX_GROUPS] = {0};
197         __u64 tmp_last_id;
198         struct stat st;
199         int obj_exists, xattr_len;
200         int len, ret = 0, error = 0;
201
202         len = strlen(src_dir);
203
204         dir_ptr = opendir(src_dir);
205         if (!dir_ptr) {
206                 fprintf(stderr, "error: opening directory: %s\n",
207                         strerror(errno));
208                 return 1;
209         }
210
211         while ((dirent = readdir64(dir_ptr)) != NULL) {
212                 if (!strcmp(dirent->d_name, ".") ||
213                     !strcmp(dirent->d_name, ".."))
214                         continue;
215
216                 src_dir[len] = 0;
217                 if ((len + strlen(dirent->d_name) + 2) > PATH_MAX) {
218                         fprintf(stderr, "error: %s/%s: path too long\n",
219                                 src_dir, dirent->d_name);
220                         break;
221                 }
222                 strcat(src_dir, "/");
223                 strcat(src_dir, dirent->d_name);
224
225                 if (dirent->d_type == DT_UNKNOWN) {
226                         ret = stat(src_dir, &st);
227                         if (ret == -1) {
228                                 fprintf(stderr,
229                                         "error: stating %s: %s\n",
230                                         src_dir, strerror(errno));
231                                 continue;
232                         }
233                         dirent->d_type = filetype_dir_table[st.st_mode &
234                                                             S_IFMT];
235                         if (dirent->d_type == DT_UNKNOWN) {
236                                 fprintf(stderr,
237                                         "error: %s of unknown type 0%o\n",
238                                         src_dir, st.st_mode);
239                                 continue;
240                         }
241                 }
242
243                 switch(dirent->d_type) {
244                 case DT_DIR:
245                 ret = traverse_lost_found(src_dir, mount_path);
246                 if (ret) {
247                         closedir(dir_ptr);
248                         return ret;
249                 }
250                 break;
251
252                 case DT_REG:
253                 file_path = src_dir;
254                 xattr_len = getxattr(file_path, "trusted.fid",
255                                      (void *)&parent_fid,
256                                      sizeof(parent_fid));
257
258                 if (xattr_len == -1 || xattr_len < sizeof(parent_fid))
259                         /*
260                          * Its very much possible that we dont find fid
261                          * on precreated files, LAST_ID
262                          */
263                         continue;
264
265                 ff_group = le64_to_cpu(parent_fid.ff_seq);
266                 if (ff_group >= FID_SEQ_OST_MAX) {
267                         fprintf(stderr, "error: invalid group "LPU64" likely"
268                                 "indicates a corrupt xattr for file %s.\n",
269                                 ff_group, file_path);
270                         continue;
271                 }
272                 ff_objid = le64_to_cpu(parent_fid.ff_objid);
273
274                 /* might need to create the parent directories for
275                    this object */
276                 if (ll_sprintf(dest_path, PATH_MAX, "%s/O/"LPU64"/d"LPU64,
277                                mount_path, ff_group, ff_objid % 32)) {
278                         closedir(dir_ptr);
279                         return 1;
280                 }
281
282                 ret = mkdir_p(dest_path, mount_path, ff_group);
283                 if (ret) {
284                         closedir(dir_ptr);
285                         return ret;
286                 }
287
288                 /*
289                  * Object ID needs to be verified against last_id.
290                  * LAST_ID file may not be present in the group directory
291                  * due to corruption. In case of any error tyr to recover
292                  * as many objects as possible by setting last_id to ~0ULL.
293                  */
294                 if (last_id[ff_group] == 0) {
295                         if (ll_sprintf(last_id_file, PATH_MAX,
296                                        "%s/O/"LPU64"/LAST_ID",
297                                        mount_path, ff_group)) {
298                                 closedir(dir_ptr);
299                                 return 1;
300                         }
301
302                         tmp_last_id = read_last_id(last_id_file);
303                         if (tmp_last_id == 0)
304                                 tmp_last_id = ~0ULL;
305                         last_id[ff_group] = tmp_last_id;
306                 }
307
308                 if (ff_objid > last_id[ff_group]) {
309                         fprintf(stderr, "error: file skipped because object ID "
310                                 "greater than LAST_ID\nFilename: %s\n"
311                                 "Group: "LPU64"\nObjectid: "LPU64"\n"
312                                 "LAST_ID: "LPU64, file_path, ff_group, ff_objid,
313                                 last_id[ff_group]);
314                         continue;
315                 }
316
317                 /* move file from lost+found to proper object
318                    directory */
319                 if (ll_sprintf(dest_path, PATH_MAX,
320                                "%s/O/"LPU64"/d"LPU64"/"LPU64, mount_path,
321                                ff_group, ff_objid % 32, ff_objid)) {
322                         closedir(dir_ptr);
323                         return 1;
324                 }
325
326                 obj_exists = 1;
327                 ret = stat(dest_path, &st);
328                 if (ret == 0) {
329                         if (st.st_size == 0)
330                                 obj_exists = 0;
331                 } else {
332                         if (errno != ENOENT)
333                                 fprintf(stderr,
334                                         "warning: stat for %s: %s\n",
335                                         dest_path, strerror(errno));
336                         obj_exists = 0;
337                 }
338
339                 if (obj_exists) {
340                         fprintf(stderr, "error: target object %s already "
341                                 "exists and will not be replaced.\n",dest_path);
342                         continue;
343                 }
344
345                 if (rename(file_path, dest_path) < 0) {
346                         fprintf(stderr, "error: rename failed for file %s: %s\n",
347                                 file_path, strerror(errno));
348                         error++;
349                         continue;
350                 }
351
352                 printf("Object %s restored.\n", dest_path);
353                 break;
354                 }
355         }
356
357         closedir(dir_ptr);
358
359         return error;
360 }
361
362 /*
363  * If LAST_ID file is not present in some group then restore it with the highest
364  * object ID found in that group. By the time we come here all possible objects
365  * have been restored.
366  */
367 static int check_last_id(const char *mount_path)
368 {
369         char lastid_path[PATH_MAX];
370         char dirname[PATH_MAX], subdirname[PATH_MAX];
371         DIR *groupdir, *subdir;
372         struct stat st;
373         struct dirent *dirent;
374         __u64 group;
375         __u64 max_objid;
376         int fd;
377         int ret;
378
379         for (group = 0; group < MAX_GROUPS; group++) {
380                 max_objid = 0;
381
382                 if (ll_sprintf(dirname, PATH_MAX, "%s/O/"LPU64,
383                                mount_path, group))
384                         return 1;
385                 if (ll_sprintf(lastid_path, PATH_MAX, "%s/LAST_ID", dirname))
386                         return 1;
387
388                 if (stat(lastid_path, &st) == 0)
389                         continue;
390
391                 groupdir = opendir(dirname);
392                 if (groupdir == NULL) {
393                         if (errno != ENOENT)
394                                 fprintf(stderr, "error: opening %s: %s\n",
395                                         dirname, strerror(errno));
396                         continue;
397                 }
398
399                 while ((dirent = readdir(groupdir)) != NULL) {
400                         if (!strcmp(dirent->d_name, ".") ||
401                             !strcmp(dirent->d_name, ".."))
402                                 continue;
403
404                         if (ll_sprintf(subdirname, PATH_MAX, "%s/%s",
405                                        dirname, dirent->d_name)) {
406                                 closedir(groupdir);
407                                 return 1;
408                         }
409                         subdir = opendir(subdirname);
410                         if (subdir == NULL) {
411                                 fprintf(stderr, "error: opening %s: %s\n",
412                                         subdirname, strerror(errno));
413                                 continue;
414                         }
415
416                         while ((dirent = readdir(subdir)) != NULL) {
417                                 __u64 objid;
418                                 char *end;
419
420                                 if (!strcmp(dirent->d_name, ".") ||
421                                     !strcmp(dirent->d_name, ".."))
422                                         continue;
423
424                                 objid = strtoull(dirent->d_name, &end, 0);
425                                 if (end == dirent->d_name || *end != 0) {
426                                         fprintf(stderr, "error: unknown object"
427                                                 "ID %s/%s\n", subdirname,
428                                                 dirent->d_name);
429                                         continue;
430                                 }
431                                 if (objid > max_objid)
432                                        max_objid = objid;
433                         }
434                         closedir(subdir);
435                 }
436                 closedir(groupdir);
437
438                 fd = open(lastid_path, O_RDWR | O_CREAT, 0700);
439                 if (fd < 0) {
440                         fprintf(stderr, "error: open \"%s\" failed: %s\n",
441                                 lastid_path, strerror(errno));
442                         return 1;
443                 }
444
445                 max_objid = cpu_to_le64(max_objid);
446                 ret = write(fd, &max_objid, sizeof(__u64));
447                 if (ret < sizeof(__u64)) {
448                         fprintf(stderr, "error: write \"%s\" failed: %s\n",
449                                 lastid_path, strerror(errno));
450                         close(fd);
451                         return 1;
452                 }
453
454                 close(fd);
455         }
456
457         return 0;
458 }
459
460 int main(int argc, char **argv)
461 {
462         char *progname;
463         struct stat stat_buf;
464         char src_dir[PATH_MAX] = "";
465         char mount_path[PATH_MAX];
466         char tmp_path[PATH_MAX];
467         int c;
468         int retval;
469
470         progname = argv[0];
471
472         while ((c = getopt(argc, argv, "d:hv")) != EOF) {
473                 switch (c) {
474                 case 'd':
475                         if (chdir(optarg)) {
476                                 fprintf(stderr, "error: chdir to %s: %s\n",
477                                         optarg, strerror(errno));
478                                 return 1;
479                         }
480                         if (getcwd(src_dir, PATH_MAX) == NULL) {
481                                 fprintf(stderr,
482                                         "error: getcwd of lost+found: %s\n",
483                                         strerror(errno));
484                                 return 1;
485                         }
486                         if (chdir("..")) {
487                                 fprintf(stderr, "error: chdir to \"..\": %s\n",
488                                         strerror(errno));
489                                 return 1;
490                         }
491                         if (getcwd(mount_path, PATH_MAX) == NULL) {
492                                 fprintf(stderr,
493                                         "error: getcwd of mount point: %s\n",
494                                         strerror(errno));
495                                 return 1;
496                         }
497                         if (!strcmp(src_dir, mount_path)) {
498                                 fprintf(stderr,
499                                         "error: root directory is detected\n");
500                                 return 1;
501                         }
502                         fprintf(stdout, "\"lost+found\" directory path: %s\n",
503                                 src_dir);
504                         break;
505                 case 'v':
506                         verbose = 1;
507                         break;
508                 case 'h':
509                         usage(progname);
510                 default:
511                         fprintf(stderr, "%s: bad option '%c'\n",
512                                 progname, c);
513                         usage(progname);
514                 }
515         }
516
517         if (src_dir[0] == 0)
518                 usage(progname);
519
520         /* Check if 'O' directory exists and create it if needed */
521         if (ll_sprintf(tmp_path, PATH_MAX, "%s/O",  mount_path))
522                 return 1;
523
524         if (stat(tmp_path, &stat_buf) != 0) {
525                 retval = mkdir(tmp_path, 0700);
526                 if (retval == -1) {
527                         fprintf(stderr, "error: creating objects directory %s:"
528                                 " %s\n", tmp_path, strerror(errno));
529                         return 1;
530                 }
531         }
532
533         retval = traverse_lost_found(src_dir, mount_path);
534         if (retval) {
535                 fprintf(stderr, "error: traversing lost+found looking for "
536                         "orphan objects.\n");
537                 return retval;
538         }
539
540         retval = check_last_id(mount_path);
541         if (retval)
542                 fprintf(stderr, "error: while checking/restoring LAST_ID.\n");
543
544         return retval;
545 }