Whamcloud - gitweb
Branch b1_8
[fs/lustre-release.git] / lustre / utils / ll_recover_lost_found_objs.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/utils/ll_recover_lost_found_objs.c
37  *
38  * Tool for recovering objects from lost+found that might result from a
39  * Lustre OST with a corrupted directory. Running e2fsck will fix the
40  * directory, but puts all of the objects into lost+found, where they are
41  * inaccessible to Lustre.
42  *
43  * Author: Kalpak Shah <kalpak.shah@sun.com>
44  */
45
46 #ifndef _GNU_SOURCE
47 #define _GNU_SOURCE
48 #endif
49
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <unistd.h>
53 #include <string.h>
54 #include <errno.h>
55 #include <dirent.h>
56 #include <sys/types.h>
57 #include <sys/xattr.h>
58 #include <sys/stat.h>
59
60 #include <liblustre.h>
61
62 #define MAX_GROUPS 64
63
64 int verbose = 0;
65
66 struct obd_group_info {
67         int dir_exists;
68 };
69 struct obd_group_info grp_info[MAX_GROUPS];
70
71 void usage(char *progname)
72 {
73         fprintf(stderr, "Usage: %s [-hv] -d lost+found_directory\n", progname);
74         fprintf(stderr, "You need to mount the corrupted OST filesystem and"
75                 "provide the path for the lost+found directory as the -d "
76                 "option, for example:\n"
77                 "ll_recover_lost_found_objs -d /mnt/ost/lost+found\n");
78         exit(1);
79 }
80
81 int mkdir_p(char *dest_path, char *mount, __u64 ff_group)
82 {
83         struct stat stat_buf;
84         char tmp_path[PATH_MAX];
85         int retval;
86         mode_t mode = 0700;
87
88         if (stat(dest_path, &stat_buf) == 0)
89                 return 0;
90
91         if (grp_info[ff_group].dir_exists == 0) {
92                 sprintf(tmp_path, "%s/O/"LPU64, mount, ff_group);
93                 if (stat(tmp_path, &stat_buf) != 0) {
94                         retval = mkdir(tmp_path, 0700);
95                         if (retval < 0) {
96                                 fprintf(stderr, "error: creating directory %s: "
97                                         "%s\n", tmp_path, strerror(errno));
98                                 return 1;
99                         }
100                         grp_info[ff_group].dir_exists = 1;
101                 }
102         }
103
104         retval = mkdir(dest_path, mode);
105         if (retval < 0)
106                 return 1;
107
108         return 0;
109 }
110
111 /* This is returning 0 for an error */
112 __u64 read_last_id(char *file_path)
113 {
114         __u64 last_id;
115         int fd;
116         int count;
117
118         fd = open(file_path, O_RDONLY);
119         if (fd < 0)
120                 return 0;
121
122         count = read(fd, &last_id, sizeof(last_id));
123         if (count < 0) {
124                 fprintf(stderr, "error: reading file %s: %s\n", file_path,
125                         strerror(errno));
126                 close(fd);
127                 return 0;
128         }
129         if (count != sizeof(last_id)) {
130                 fprintf(stderr, "error: Could not read full last_id from %s\n",
131                         file_path);
132                 close(fd);
133                 return 0;
134         }
135
136         close(fd);
137         return le64_to_cpu(last_id);
138 }
139
140 static unsigned filetype_dir_table[] = {
141         [0]= DT_UNKNOWN,
142         [S_IFIFO]= DT_FIFO,
143         [S_IFCHR] = DT_CHR,
144         [S_IFDIR] = DT_DIR,
145         [S_IFBLK] = DT_BLK,
146         [S_IFREG] = DT_REG,
147         [S_IFLNK] = DT_LNK,
148         [S_IFSOCK]= DT_SOCK,
149 #if defined(DT_DOOR) && defined(S_IFDOOR)
150         [S_IFDOOR]= DT_DOOR,
151 #endif
152 };
153
154 static int traverse_lost_found(char *src_dir, char *mount_path)
155 {
156         DIR *dir_ptr;
157         struct filter_fid trusted_fid;
158         struct dirent64 *dirent;
159         __u64 ff_group, ff_objid;
160         char file_path[PATH_MAX];
161         char dest_path[PATH_MAX];
162         char last_id_file[PATH_MAX];
163         __u64 last_id[MAX_GROUPS] = {0};
164         __u64 tmp_last_id;
165         struct stat st;
166         int obj_exists, xattr_len;
167         int len, ret = 0, error = 0;
168
169         len = strlen(src_dir);
170
171         dir_ptr = opendir(src_dir);
172         if (!dir_ptr) {
173                 fprintf(stderr, "error: opening directory: %s\n",
174                         strerror(errno));
175                 return errno;
176         }
177
178         while ((dirent = readdir64(dir_ptr)) != NULL) {
179                 if (!strcmp(dirent->d_name, ".") ||
180                     !strcmp(dirent->d_name, ".."))
181                         continue;
182
183                 src_dir[len] = 0;
184                 if ((len + dirent->d_reclen + 2) > PATH_MAX) {
185                         fprintf(stderr, "error: %s: string buffer is too small",
186                                 __FUNCTION__);
187                         break;
188                 }
189                 strcat(src_dir, "/");
190                 strcat(src_dir, dirent->d_name);
191
192                 if (dirent->d_type == DT_UNKNOWN) {
193                         struct stat st;
194
195                         ret = stat(src_dir, &st);
196                         if (ret == 0)
197                                 dirent->d_type = filetype_dir_table[st.st_mode &
198                                                                     S_IFMT];
199                 }
200
201                 switch(dirent->d_type) {
202                 case DT_DIR:
203                 ret = traverse_lost_found(src_dir, mount_path);
204                 if (ret)
205                         goto out;
206                 break;
207
208                 case DT_REG:
209                 sprintf(file_path, "%s", src_dir);
210                 xattr_len = getxattr(file_path, "trusted.fid", (void *)&trusted_fid,
211                                sizeof(trusted_fid));
212
213                 if (xattr_len < 0 || xattr_len < sizeof(trusted_fid)) {
214                         /*
215                          * Its very much possible that we dont find fid
216                          * on precreated files, LAST_ID
217                          */
218                         continue;
219                 }
220
221                 ff_group = le64_to_cpu(trusted_fid.ff_group);
222                 ff_objid = le64_to_cpu(trusted_fid.ff_objid);
223
224                 if (ff_group >= MAX_GROUPS) {
225                         fprintf(stderr, "error: invalid group "LPU64" likely"
226                                 "indicates a corrupt xattr for file %s.\n",
227                                 ff_group, file_path);
228                         continue;
229                 }
230
231                 /* might need to create the parent directories for this object */
232                 sprintf(dest_path, "%s/O/"LPU64"/d"LPU64, mount_path, ff_group,
233                         ff_objid % 32);
234
235                 ret = mkdir_p(dest_path, mount_path, ff_group);
236                 if (ret) {
237                         fprintf(stderr, "error: creating directory %s : %s\n",
238                                 dest_path, strerror(errno));
239                         goto out;
240                 }
241
242                 /*
243                  * Object ID needs to be verified against last_id.
244                  * LAST_ID file may not be present in the group directory
245                  * due to corruption. In case of any error try to recover
246                  * as many objects as possible by setting last_id to ~0ULL.
247                  */
248                 if (last_id[ff_group] == 0) {
249                         sprintf(last_id_file, "%s/O/"LPU64"/LAST_ID",
250                                 mount_path, ff_group);
251                         tmp_last_id = read_last_id(last_id_file);
252
253                         if (tmp_last_id == 0)
254                                 tmp_last_id = ~0ULL;
255                         last_id[ff_group] = tmp_last_id;
256                 }
257
258                 if (ff_objid > last_id[ff_group]) {
259                         fprintf(stderr, "error: file skipped because object ID "
260                                 "greater than LAST_ID\nFilename: %s\n"
261                                 "Group: "LPU64"\nObjectid: "LPU64"\n"
262                                 "LAST_ID: "LPU64, file_path, ff_group, ff_objid,
263                                 last_id[ff_group]);
264                         continue;
265                 }
266
267                 /* move file from lost+found to proper object directory */
268                 sprintf(dest_path, "%s/O/"LPU64"/d"LPU64"/"LPU64, mount_path,
269                         ff_group, ff_objid % 32, ff_objid);
270
271                 obj_exists = 1;
272                 ret = stat(dest_path, &st);
273                 if (ret == 0) {
274                         if (st.st_size == 0)
275                                 obj_exists = 0;
276                 } else if (ret < 0 && errno == ENOENT) {
277                         obj_exists = 0;
278                 }
279
280                 if (obj_exists) {
281                         fprintf(stderr, "error: target object %s already "
282                                 "exists and will not be replaced.\n",dest_path);
283                         continue;
284                 }
285
286                 if (rename(file_path, dest_path) < 0) {
287                         fprintf(stderr, "error: rename failed for file %s: %s\n",
288                                 file_path, strerror(errno));
289                         error++;
290                         continue;
291                 }
292
293                 printf("Object %s restored.\n", dest_path);
294                 break;
295
296                 case DT_UNKNOWN:
297                         continue;
298         }
299         }
300 out:
301         if (dir_ptr)
302                 closedir(dir_ptr);
303
304         return error;
305 }
306
307 /*
308  * If LAST_ID file is not present in some group then restore it with the highest
309  * object ID found in that group. By the time we come here all possible objects
310  * have been restored.
311  */
312 int check_last_id(char *mount_path)
313 {
314         char lastid_path[PATH_MAX];
315         char dirname[PATH_MAX], subdirname[PATH_MAX];
316         DIR *groupdir, *subdir;
317         struct stat st;
318         struct dirent *dirent;
319         unsigned long long group;
320         __u64 max_objid;
321         int fd;
322         int ret;
323
324         for (group = 0; group < MAX_GROUPS; group++) {
325                 max_objid = 0;
326                 sprintf(dirname, "%s/O/%llu", mount_path, group);
327
328                 strcpy(lastid_path, dirname);
329                 strcat(lastid_path, "/LAST_ID");
330                 if (stat(lastid_path, &st) == 0)
331                         continue;
332
333                 groupdir = opendir(dirname);
334                 if (groupdir == NULL) {
335                         if (errno != ENOENT)
336                                 fprintf(stderr, "error: opening %s: %s\n",
337                                         dirname, strerror(errno));
338                         continue;
339                 }
340
341                 while ((dirent = readdir(groupdir)) != NULL) {
342                         if (!strcmp(dirent->d_name, ".") ||
343                             !strcmp(dirent->d_name, ".."))
344                                 continue;
345
346                         sprintf(subdirname, "%s/%s", dirname, dirent->d_name);
347
348                         subdir = opendir(subdirname);
349                         if (subdir == NULL) {
350                                 fprintf(stderr, "error: opening %s: %s\n",
351                                         subdirname, strerror(errno));
352                                 continue;
353                         }
354
355                         while ((dirent = readdir(subdir)) != NULL) {
356                                 __u64 objid;
357                                 char *end;
358
359                                 if (!strcmp(dirent->d_name, ".") ||
360                                     !strcmp(dirent->d_name, ".."))
361                                         continue;
362
363                                 objid = strtoull(dirent->d_name, &end, 0);
364                                 if (end == dirent->d_name || *end != 0) {
365                                         fprintf(stderr, "error: unknown object"
366                                                 "ID %s/%s\n", subdirname,
367                                                 dirent->d_name);
368                                         continue;
369                                 }
370                                 if (objid > max_objid)
371                                        max_objid = objid;
372                         }
373                         closedir(subdir);
374                 }
375                 closedir(groupdir);
376
377                 fd = open(lastid_path, O_RDWR | O_CREAT, 0700);
378                 if (fd < 0) {
379                         fprintf(stderr, "error: open \"%s\" failed: %s\n",
380                                 lastid_path, strerror(errno));
381                         close(fd);
382                         return -errno;
383                 }
384
385                 ret = write(fd, &max_objid, sizeof(__u64));
386                 if (ret < sizeof(__u64)) {
387                         close(fd);
388                         return errno;
389                 }
390
391                 close(fd);
392         }
393
394         return 0;
395 }
396
397 int main(int argc, char **argv)
398 {
399         char *progname;
400         char *src_dir = NULL, *last_dir = NULL;
401         struct stat stat_buf;
402         char tmp_path[PATH_MAX];
403         char mount_path[PATH_MAX] = {0};
404         int c;
405         int retval;
406
407         progname = argv[0];
408
409         while ((c = getopt(argc, argv, "d:hv")) != EOF) {
410                 switch (c) {
411                 case 'd':
412                         src_dir = optarg;
413                         /* Trim last '/' */
414                         last_dir = strrchr(src_dir, '/');
415                         if (last_dir != strchr(src_dir, '/')) {
416                                 if (last_dir != NULL && (*(last_dir + 1) == '\0'))
417                                         *(last_dir) = '\0';
418                         }
419                         fprintf(stdout, "\"lost+found\" directory path: %s\n",
420                                 src_dir);
421                         break;
422                 case 'v':
423                         verbose = 1;
424                         break;
425                 case 'h':
426                         usage(progname);
427                 default:
428                         fprintf(stderr, "%s: bad option '%c'\n",
429                                 progname, c);
430                         usage(progname);
431                 }
432         }
433
434         if (src_dir == NULL)
435                 usage(progname);
436
437         last_dir = strrchr(src_dir, '/');
438         if (last_dir == NULL) {
439                 /* Current directory */
440                 strcpy(mount_path, src_dir);
441                 strcat(mount_path, "/..");
442         } else {
443                 strncpy(mount_path, src_dir, (int)(last_dir - src_dir));
444         }
445
446         /* Check if 'O' directory exists and create it if needed */
447         sprintf(tmp_path, "%s/O", mount_path);
448         if (stat(tmp_path, &stat_buf) != 0) {
449                 retval = mkdir(tmp_path, 0700);
450                 if (retval < 0)
451                         fprintf(stderr, "error: creating objects directory %s:"
452                                 " %s\n", tmp_path, strerror(errno));
453                         return errno;
454         }
455
456         memset(grp_info, 0, MAX_GROUPS * sizeof(struct obd_group_info));
457
458         retval = traverse_lost_found(src_dir, mount_path);
459         if (retval) {
460                 fprintf(stderr, "error: traversing lost+found looking for "
461                         "orphan objects.\n");
462                 return retval;
463         }
464
465         retval = check_last_id(mount_path);
466         if (retval)
467                 fprintf(stderr, "error: while checking/restoring LAST_ID.\n");
468
469         return retval;
470 }