4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
34 * lustre/utils/ll_recover_lost_found_objs.c
36 * Tool for recovering objects from lost+found that might result from a
37 * Lustre OST with a corrupted directory. Running e2fsck will fix the
38 * directory, but puts all of the objects into lost+found, where they are
39 * inaccessible to Lustre.
41 * Author: Kalpak Shah <kalpak.shah@sun.com>
54 #include <sys/types.h>
55 #include <sys/xattr.h>
58 #include <liblustre.h>
64 struct obd_group_info {
67 struct obd_group_info grp_info[MAX_GROUPS];
69 static void usage(char *progname)
71 fprintf(stderr, "Usage: %s [-hv] -d lost+found_directory\n", progname);
72 fprintf(stderr, "You need to mount the corrupted OST filesystem and "
73 "provide the path for the lost+found directory as the -d "
74 "option, for example:\n"
75 "ll_recover_lost_found_objs -d /mnt/ost/lost+found\n");
79 static int _ll_sprintf(char *buf, size_t size, const char *func, int line,
80 const char *format, ...)
86 rc = vsnprintf(buf, size, format, ap);
87 if (!(rc > -1 && rc < size)) {
89 "error: %s:%d: path \"", func, line);
90 vfprintf(stderr, format, ap);
92 fprintf(stderr, "\" is too long\n");
99 #define ll_sprintf(buf, size, format, ...) \
100 _ll_sprintf(buf, size, __FUNCTION__, __LINE__, format, ## __VA_ARGS__)
102 static int mkdir_p(const char *dest_path, const char *mount, __u64 ff_group)
104 struct stat stat_buf;
107 char tmp_path[PATH_MAX];
109 if (stat(dest_path, &stat_buf) == 0)
112 if (grp_info[ff_group].dir_exists == 0) {
113 if (ll_sprintf(tmp_path, PATH_MAX, "%s/O/"LPU64,
117 if (stat(tmp_path, &stat_buf) != 0) {
118 retval = mkdir(tmp_path, mode);
120 fprintf(stderr, "error: creating directory %s: "
121 "%s\n", tmp_path, strerror(errno));
124 grp_info[ff_group].dir_exists = 1;
128 retval = mkdir(dest_path, mode);
130 fprintf(stderr, "error: creating directory %s: "
131 "%s\n", dest_path, strerror(errno));
138 /* This is returning 0 for an error */
139 static __u64 read_last_id(char *file_path)
145 fd = open(file_path, O_RDONLY);
148 fprintf(stderr, "error: opening %s: %s\n",
149 file_path, strerror(errno));
153 count = read(fd, &last_id, sizeof(last_id));
155 fprintf(stderr, "error: reading file %s: %s\n", file_path,
160 if (count != sizeof(last_id)) {
161 fprintf(stderr, "error: Could not read full last_id from %s\n",
168 return le64_to_cpu(last_id);
171 static unsigned filetype_dir_table[] = {
180 #if defined(DT_DOOR) && defined(S_IFDOOR)
185 static int traverse_lost_found(char *src_dir, const char *mount_path)
188 struct filter_fid parent_fid;
189 struct dirent64 *dirent;
190 __u64 ff_group, ff_objid;
192 char dest_path[PATH_MAX];
193 char last_id_file[PATH_MAX];
194 __u64 last_id[MAX_GROUPS] = {0};
197 int obj_exists, xattr_len;
198 int len, ret = 0, error = 0;
200 len = strlen(src_dir);
202 dir_ptr = opendir(src_dir);
204 fprintf(stderr, "error: opening directory: %s\n",
209 while ((dirent = readdir64(dir_ptr)) != NULL) {
210 if (!strcmp(dirent->d_name, ".") ||
211 !strcmp(dirent->d_name, ".."))
215 if ((len + strlen(dirent->d_name) + 2) > PATH_MAX) {
216 fprintf(stderr, "error: %s/%s: path too long\n",
217 src_dir, dirent->d_name);
220 strcat(src_dir, "/");
221 strcat(src_dir, dirent->d_name);
223 if (dirent->d_type == DT_UNKNOWN) {
224 ret = stat(src_dir, &st);
227 "error: stating %s: %s\n",
228 src_dir, strerror(errno));
231 dirent->d_type = filetype_dir_table[st.st_mode &
233 if (dirent->d_type == DT_UNKNOWN) {
235 "error: %s of unknown type 0%o\n",
236 src_dir, st.st_mode);
241 switch(dirent->d_type) {
243 ret = traverse_lost_found(src_dir, mount_path);
252 xattr_len = getxattr(file_path, "trusted.fid",
256 if (xattr_len == -1 || xattr_len < sizeof(parent_fid))
258 * Its very much possible that we dont find fid
259 * on precreated files, LAST_ID
263 ff_group = le64_to_cpu(parent_fid.ff_seq);
264 if (ff_group >= FID_SEQ_OST_MAX) {
265 fprintf(stderr, "error: invalid group "LPU64" likely"
266 "indicates a corrupt xattr for file %s.\n",
267 ff_group, file_path);
270 ff_objid = le64_to_cpu(parent_fid.ff_objid);
272 /* might need to create the parent directories for
274 if (ll_sprintf(dest_path, PATH_MAX, "%s/O/"LPU64"/d"LPU64,
275 mount_path, ff_group, ff_objid % 32)) {
280 ret = mkdir_p(dest_path, mount_path, ff_group);
287 * Object ID needs to be verified against last_id.
288 * LAST_ID file may not be present in the group directory
289 * due to corruption. In case of any error tyr to recover
290 * as many objects as possible by setting last_id to ~0ULL.
292 if (last_id[ff_group] == 0) {
293 if (ll_sprintf(last_id_file, PATH_MAX,
294 "%s/O/"LPU64"/LAST_ID",
295 mount_path, ff_group)) {
300 tmp_last_id = read_last_id(last_id_file);
301 if (tmp_last_id == 0)
303 last_id[ff_group] = tmp_last_id;
306 if (ff_objid > last_id[ff_group]) {
307 fprintf(stderr, "error: file skipped because object ID "
308 "greater than LAST_ID\nFilename: %s\n"
309 "Group: "LPU64"\nObjectid: "LPU64"\n"
310 "LAST_ID: "LPU64, file_path, ff_group, ff_objid,
315 /* move file from lost+found to proper object
317 if (ll_sprintf(dest_path, PATH_MAX,
318 "%s/O/"LPU64"/d"LPU64"/"LPU64, mount_path,
319 ff_group, ff_objid % 32, ff_objid)) {
325 ret = stat(dest_path, &st);
332 "warning: stat for %s: %s\n",
333 dest_path, strerror(errno));
338 fprintf(stderr, "error: target object %s already "
339 "exists and will not be replaced.\n",dest_path);
343 if (rename(file_path, dest_path) < 0) {
344 fprintf(stderr, "error: rename failed for file %s: %s\n",
345 file_path, strerror(errno));
350 printf("Object %s restored.\n", dest_path);
361 * If LAST_ID file is not present in some group then restore it with the highest
362 * object ID found in that group. By the time we come here all possible objects
363 * have been restored.
365 static int check_last_id(const char *mount_path)
367 char lastid_path[PATH_MAX];
368 char dirname[PATH_MAX], subdirname[PATH_MAX];
369 DIR *groupdir, *subdir;
371 struct dirent *dirent;
377 for (group = 0; group < MAX_GROUPS; group++) {
380 if (ll_sprintf(dirname, PATH_MAX, "%s/O/"LPU64,
383 if (ll_sprintf(lastid_path, PATH_MAX, "%s/LAST_ID", dirname))
386 if (stat(lastid_path, &st) == 0)
389 groupdir = opendir(dirname);
390 if (groupdir == NULL) {
392 fprintf(stderr, "error: opening %s: %s\n",
393 dirname, strerror(errno));
397 while ((dirent = readdir(groupdir)) != NULL) {
398 if (!strcmp(dirent->d_name, ".") ||
399 !strcmp(dirent->d_name, ".."))
402 if (ll_sprintf(subdirname, PATH_MAX, "%s/%s",
403 dirname, dirent->d_name)) {
407 subdir = opendir(subdirname);
408 if (subdir == NULL) {
409 fprintf(stderr, "error: opening %s: %s\n",
410 subdirname, strerror(errno));
414 while ((dirent = readdir(subdir)) != NULL) {
418 if (!strcmp(dirent->d_name, ".") ||
419 !strcmp(dirent->d_name, ".."))
422 objid = strtoull(dirent->d_name, &end, 0);
423 if (end == dirent->d_name || *end != 0) {
424 fprintf(stderr, "error: unknown object"
425 "ID %s/%s\n", subdirname,
429 if (objid > max_objid)
436 fd = open(lastid_path, O_RDWR | O_CREAT, 0700);
438 fprintf(stderr, "error: open \"%s\" failed: %s\n",
439 lastid_path, strerror(errno));
443 max_objid = cpu_to_le64(max_objid);
444 ret = write(fd, &max_objid, sizeof(__u64));
445 if (ret < sizeof(__u64)) {
446 fprintf(stderr, "error: write \"%s\" failed: %s\n",
447 lastid_path, strerror(errno));
458 int main(int argc, char **argv)
461 struct stat stat_buf;
462 char src_dir[PATH_MAX] = "";
463 char mount_path[PATH_MAX];
464 char tmp_path[PATH_MAX];
470 while ((c = getopt(argc, argv, "d:hv")) != EOF) {
474 fprintf(stderr, "error: chdir to %s: %s\n",
475 optarg, strerror(errno));
478 if (getcwd(src_dir, PATH_MAX) == NULL) {
480 "error: getcwd of lost+found: %s\n",
485 fprintf(stderr, "error: chdir to \"..\": %s\n",
489 if (getcwd(mount_path, PATH_MAX) == NULL) {
491 "error: getcwd of mount point: %s\n",
495 if (!strcmp(src_dir, mount_path)) {
497 "error: root directory is detected\n");
500 fprintf(stdout, "\"lost+found\" directory path: %s\n",
509 fprintf(stderr, "%s: bad option '%c'\n",
518 /* Check if 'O' directory exists and create it if needed */
519 if (ll_sprintf(tmp_path, PATH_MAX, "%s/O", mount_path))
522 if (stat(tmp_path, &stat_buf) != 0) {
523 retval = mkdir(tmp_path, 0700);
525 fprintf(stderr, "error: creating objects directory %s:"
526 " %s\n", tmp_path, strerror(errno));
531 retval = traverse_lost_found(src_dir, mount_path);
533 fprintf(stderr, "error: traversing lost+found looking for "
534 "orphan objects.\n");
538 retval = check_last_id(mount_path);
540 fprintf(stderr, "error: while checking/restoring LAST_ID.\n");