enum lfsck_param_flags {
/* Reset LFSCK iterator position to the device beginning. */
- LPF_RESET = 0x0001,
+ LPF_RESET = 0x0001,
/* Exit when fail. */
- LPF_FAILOUT = 0x0002,
+ LPF_FAILOUT = 0x0002,
/* Dryrun mode, only check without modification */
- LPF_DRYRUN = 0x0004,
+ LPF_DRYRUN = 0x0004,
/* LFSCK runs on all targets. */
- LPF_ALL_TGT = 0x0008,
+ LPF_ALL_TGT = 0x0008,
/* Broadcast the command to other MDTs. Only valid on the sponsor MDT */
- LPF_BROADCAST = 0x0010,
+ LPF_BROADCAST = 0x0010,
/* Handle orphan objects. */
- LPF_ORPHAN = 0x0020,
+ LPF_ORPHAN = 0x0020,
+
+ /* Create OST-object for dangling LOV EA. */
+ LPF_CREATE_OSTOBJ = 0x0040,
};
enum lfsck_type {
LSV_ERROR_HANDLE = 0x00000002,
LSV_DRYRUN = 0x00000004,
LSV_ASYNC_WINDOWS = 0x00000008,
+ LSV_CREATE_OSTOBJ = 0x00000010,
};
/* Arguments for starting lfsck. */
/* How many objects failed to be processed during double scan. */
__u64 ll_objs_failed_phase2;
- /* kinds of inconsistency have been repaired.
+ /* kinds of inconsistency have been or to be repaired.
* ll_objs_repaired[type - 1] is the count for the given @type. */
__u64 ll_objs_repaired[LLIT_MAX];
return rc > 0 ? 0 : rc;
}
-/* For the MDT-object with dangling reference, we need to re-create
- * the missed OST-object with the known FID/owner information. */
-static int lfsck_layout_recreate_ostobj(const struct lu_env *env,
+/* For the MDT-object with dangling reference, we need to repare the
+ * inconsistency according to the LFSCK sponsor's requirement:
+ *
+ * 1) Keep the inconsistency there and report the inconsistency case,
+ * then give the chance to the application to find related issues,
+ * and the users can make the decision about how to handle it with
+ * more human knownledge. (by default)
+ *
+ * 2) Re-create the missed OST-object with the FID/owner information. */
+static int lfsck_layout_repair_dangling(const struct lu_env *env,
struct lfsck_component *com,
struct lfsck_layout_req *llr,
- struct lu_attr *la)
+ const struct lu_attr *pla)
{
struct lfsck_thread_info *info = lfsck_env_info(env);
struct filter_fid *pfid = &info->lti_new_pfid;
struct dt_allocation_hint *hint = &info->lti_hint;
+ struct lu_attr *cla = &info->lti_la2;
struct dt_object *parent = llr->llr_parent->llo_obj;
struct dt_object *child = llr->llr_child;
struct dt_device *dev = lfsck_obj2dt_dev(child);
struct lu_buf *buf;
struct lustre_handle lh = { 0 };
int rc;
+ bool create;
ENTRY;
- CDEBUG(D_LFSCK, "Repair dangling reference for: parent "DFID
- ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u\n",
+ if (com->lc_lfsck->li_bookmark_ram.lb_param & LPF_CREATE_OSTOBJ)
+ create = true;
+ else
+ create = false;
+
+ CDEBUG(D_LFSCK, "Found dangling reference for: parent "DFID
+ ", child "DFID", OST-index %u, stripe-index %u, owner %u:%u. %s",
PFID(lfsck_dto2fid(parent)), PFID(lfsck_dto2fid(child)),
- llr->llr_ost_idx, llr->llr_lov_idx, la->la_uid, la->la_gid);
+ llr->llr_ost_idx, llr->llr_lov_idx, pla->la_uid, pla->la_gid,
+ create ? "Create the lost OST-object as required.\n" :
+ "Keep the MDT-object there by default.\n");
+
+ if (!create)
+ RETURN(1);
+
+ memset(cla, 0, sizeof(*cla));
+ cla->la_uid = pla->la_uid;
+ cla->la_gid = pla->la_gid;
+ cla->la_mode = S_IFREG | 0666;
+ cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
+ LA_ATIME | LA_MTIME | LA_CTIME;
rc = lfsck_layout_lock(env, com, parent, &lh,
MDS_INODELOCK_LAYOUT | MDS_INODELOCK_XATTR);
pfid->ff_parent.f_stripe_idx = cpu_to_le32(llr->llr_lov_idx);
buf = lfsck_buf_get(env, pfid, sizeof(struct filter_fid));
- rc = dt_declare_create(env, child, la, hint, NULL, handle);
+ rc = dt_declare_create(env, child, cla, hint, NULL, handle);
if (rc != 0)
GOTO(stop, rc);
if (unlikely(lu_object_is_dying(parent->do_lu.lo_header)))
GOTO(unlock2, rc = 1);
- rc = dt_create(env, child, la, hint, NULL, handle);
+ rc = dt_create(env, child, cla, hint, NULL, handle);
if (rc != 0)
GOTO(unlock2, rc);
switch (type) {
case LLIT_DANGLING:
- memset(cla, 0, sizeof(*cla));
- cla->la_uid = pla->la_uid;
- cla->la_gid = pla->la_gid;
- cla->la_mode = S_IFREG | 0666;
- cla->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
- LA_ATIME | LA_MTIME | LA_CTIME;
- rc = lfsck_layout_recreate_ostobj(env, com, llr, cla);
+ rc = lfsck_layout_repair_dangling(env, com, llr, pla);
break;
case LLIT_UNMATCHED_PAIR:
rc = lfsck_layout_repair_unmatched_pair(env, com, llr, pla);
memset(lr, 0, sizeof(*lr));
lr->lr_event = LE_START;
lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN |
- LSV_ASYNC_WINDOWS;
+ LSV_ASYNC_WINDOWS | LSV_CREATE_OSTOBJ;
lr->lr_speed = bk->lb_speed_limit;
lr->lr_version = bk->lb_version;
lr->lr_param = bk->lb_param;
"all_targets",
"broadcast",
"orphan",
+ "create_ostobj",
NULL
};
dirty = true;
}
+ if (start->ls_valid & LSV_CREATE_OSTOBJ) {
+ if (bk->lb_param & LPF_CREATE_OSTOBJ &&
+ !(start->ls_flags & LPF_CREATE_OSTOBJ)) {
+ bk->lb_param &= ~LPF_CREATE_OSTOBJ;
+ dirty = true;
+ } else if (!(bk->lb_param & LPF_CREATE_OSTOBJ) &&
+ start->ls_flags & LPF_CREATE_OSTOBJ) {
+ bk->lb_param |= LPF_CREATE_OSTOBJ;
+ dirty = true;
+ }
+ }
+
if (dirty) {
rc = lfsck_bookmark_store(env, lfsck);
if (rc != 0)
echo "'ls' should fail because of dangling referenced MDT-object"
ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(1) ls should fail."
- echo "Trigger layout LFSCK to find out dangling reference and fix them"
+ echo "Trigger layout LFSCK to find out dangling reference"
$START_LAYOUT || error "(2) Fail to start LFSCK for layout!"
wait_update_facet $SINGLEMDS "$LCTL get_param -n \
[ $repaired -eq 32 ] ||
error "(4) Fail to repair dangling reference: $repaired"
+ echo "'ls' should fail because it will not repair dangling by default"
+ ls -ail $DIR/$tdir > /dev/null 2>&1 && error "(5) ls should fail."
+
+ echo "Trigger layout LFSCK to repair dangling reference"
+ $START_LAYOUT -r -c || error "(6) Fail to start LFSCK for layout!"
+
+ wait_update_facet $SINGLEMDS "$LCTL get_param -n \
+ mdd.${MDT_DEV}.lfsck_layout |
+ awk '/^status/ { print \\\$2 }'" "completed" 6 || return 3
+
+ local repaired=$($SHOW_LAYOUT |
+ awk '/^repaired_dangling/ { print $2 }')
+ [ $repaired -eq 32 ] ||
+ error "(7) Fail to repair dangling reference: $repaired"
+
echo "'ls' should success after layout LFSCK repairing"
- ls -ail $DIR/$tdir > /dev/null || error "(5) ls should success."
+ ls -ail $DIR/$tdir > /dev/null || error "(8) ls should success."
}
run_test 14 "LFSCK can repair MDT-object with dangling reference"
error "(1) Expect incorrect file2 size"
echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
- $START_LAYOUT -o || error "(2) Fail to start LFSCK for layout!"
+ $START_LAYOUT -o -c || error "(2) Fail to start LFSCK for layout!"
for k in $(seq $MDSCOUNT); do
# The LFSCK status query internal is 30 seconds. For the case
do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1602
echo "Trigger layout LFSCK on all devices to find out orphan OST-object"
- $START_LAYOUT -o || error "(2) Fail to start LFSCK for layout!"
+ $START_LAYOUT -o -c || error "(2) Fail to start LFSCK for layout!"
wait_update_facet mds1 "$LCTL get_param -n \
mdd.$(facet_svc mds1).lfsck_layout |
scrub_check_flags 4 inconsistent
# run under dryrun mode
- scrub_start 5 -n on
+ scrub_start 5 --dryrun
sleep 3
scrub_check_status 6 completed
scrub_check_flags 7 inconsistent
scrub_check_repaired 9 20
# run under dryrun mode again
- scrub_start 10 -n on
+ scrub_start 10 --dryrun
sleep 3
scrub_check_status 11 completed
scrub_check_flags 12 inconsistent
scrub_check_repaired 14 20
# run under normal mode
- scrub_start 15 -n off
+ #
+ # Lustre-2.x (x <= 5) used "-n off" to disable dryrun which does not
+ # work under Lustre-2.y (y >=6), the test scripts should be fixed as
+ # "-noff" or "--dryrun=off".
+ scrub_start 15 --dryrun=off
sleep 3
scrub_check_status 16 completed
scrub_check_flags 17 ""
scrub_check_repaired 19 20
# run under normal mode again
- scrub_start 20 -n off
+ scrub_start 20 --dryrun=off
sleep 3
scrub_check_status 21 completed
scrub_check_flags 22 ""
{"==== LFSCK ====", jt_noop, 0, "LFSCK"},
{"lfsck_start", jt_lfsck_start, 0, "start LFSCK\n"
"usage: lfsck_start <-M | --device [MDT,OST]_device>\n"
+ " [-A | --all] [-c | --create_ostobj [swtich]]\n"
" [-e | --error error_handle] [-h | --help]\n"
- " [-n | --dryrun switch] [-r | --reset]\n"
- " [-s | --speed speed_limit] [-A | --all]\n"
+ " [-n | --dryrun [switch]] [-o | --orphan]\n"
+ " [-r | --reset] [-s | --speed speed_limit]\n"
" [-t | --type lfsck_type[,lfsck_type...]]\n"
- " [-w | --windows win_size] [-o | --orphan]"},
+ " [-w | --windows win_size]"},
{"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
"usage: lfsck_stop <-M | --device [MDT,OST]_device>\n"
" [-A | --all] [-h | --help]"},
#include <lnet/lnetctl.h>
static struct option long_opt_start[] = {
- {"device", required_argument, 0, 'M'},
- {"error", required_argument, 0, 'e'},
- {"help", no_argument, 0, 'h'},
- {"dryrun", required_argument, 0, 'n'},
- {"reset", no_argument, 0, 'r'},
- {"speed", required_argument, 0, 's'},
- {"all", no_argument, 0, 'A'},
- {"type", required_argument, 0, 't'},
- {"windows", required_argument, 0, 'w'},
- {"orphan", no_argument, 0, 'o'},
- {0, 0, 0, 0}
+ {"device", required_argument, 0, 'M'},
+ {"all", no_argument, 0, 'A'},
+ {"create_ostobj", optional_argument, 0, 'c'},
+ {"error", required_argument, 0, 'e'},
+ {"help", no_argument, 0, 'h'},
+ {"dryrun", optional_argument, 0, 'n'},
+ {"orphan", no_argument, 0, 'o'},
+ {"reset", no_argument, 0, 'r'},
+ {"speed", required_argument, 0, 's'},
+ {"type", required_argument, 0, 't'},
+ {"windows", required_argument, 0, 'w'},
+ {0, 0, 0, 0 }
};
static struct option long_opt_stop[] = {
fprintf(stderr, "Start LFSCK.\n"
"SYNOPSIS:\n"
"lfsck_start <-M | --device [MDT,OST]_device>\n"
+ " [-A | --all] [-c | --create_ostobj [swtich]]\n"
" [-e | --error error_handle] [-h | --help]\n"
- " [-n | --dryrun switch] [-r | --reset]\n"
- " [-s | --speed speed_limit] [-A | --all]\n"
+ " [-n | --dryrun [switch]] [-o | --orphan]\n"
+ " [-r | --reset] [-s | --speed speed_limit]\n"
" [-t | --type lfsck_type[,lfsck_type...]]\n"
- " [-w | --windows win_size] [-o | --orphan]\n"
+ " [-w | --windows win_size]\n"
"OPTIONS:\n"
"-M: The device to start LFSCK/scrub on.\n"
+ "-A: Start LFSCK on all MDT devices.\n"
+ "-c: create the lost OST-object for dangling LOV EA. "
+ "'off'(default) or 'on'.\n"
"-e: Error handle, 'continue'(default) or 'abort'.\n"
"-h: Help information.\n"
"-n: Check without modification. 'off'(default) or 'on'.\n"
+ "-o: handle orphan objects.\n"
"-r: Reset scanning start position to the device beginning.\n"
"-s: How many items can be scanned at most per second. "
"'%d' means no limit (default).\n"
- "-A: Start LFSCK on all MDT devices.\n"
"-t: The LFSCK type(s) to be started.\n"
- "-w: The windows size for async requests pipeline.\n"
- "-o: handle orphan objects.\n",
+ "-w: The windows size for async requests pipeline.\n",
LFSCK_SPEED_NO_LIMIT);
}
char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
char device[MAX_OBD_NAME];
struct lfsck_start start;
- char *optstring = "M:e:hn:rs:At:w:o";
+ char *optstring = "M:Ac::e:hn::ors:t:w:";
int opt, index, rc, val, i, type;
memset(&data, 0, sizeof(data));
if (rc != 0)
return rc;
break;
+ case 'A':
+ start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
+ break;
+ case 'c':
+ if (optarg == NULL || strcmp(optarg, "on") == 0) {
+ start.ls_flags |= LPF_CREATE_OSTOBJ;
+ } else if (strcmp(optarg, "off") != 0) {
+ fprintf(stderr, "Invalid switch: %s. "
+ "The valid switch should be: 'on' "
+ "or 'off' (default) without blank, "
+ "or empty. For example: '-non' or "
+ "'-noff' or '-n'.\n", optarg);
+ return -EINVAL;
+ }
+ start.ls_valid |= LSV_CREATE_OSTOBJ;
+ break;
case 'e':
if (strcmp(optarg, "abort") == 0) {
start.ls_flags |= LPF_FAILOUT;
usage_start();
return 0;
case 'n':
- if (strcmp(optarg, "on") == 0) {
+ if (optarg == NULL || strcmp(optarg, "on") == 0) {
start.ls_flags |= LPF_DRYRUN;
} else if (strcmp(optarg, "off") != 0) {
- fprintf(stderr, "Invalid dryrun switch: %s. "
- "The valid value shou be: 'off'"
- "(default) or 'on'\n", optarg);
+ fprintf(stderr, "Invalid switch: %s. "
+ "The valid switch should be: 'on' "
+ "or 'off' (default) without blank, "
+ "or empty. For example: '-non' or "
+ "'-noff' or '-n'.\n", optarg);
return -EINVAL;
}
start.ls_valid |= LSV_DRYRUN;
break;
+ case 'o':
+ start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST |
+ LPF_ORPHAN;
+ break;
case 'r':
start.ls_flags |= LPF_RESET;
break;
start.ls_speed_limit = val;
start.ls_valid |= LSV_SPEED_LIMIT;
break;
- case 'A':
- start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST;
- break;
case 't': {
char *str = optarg, *p, c;
start.ls_async_windows = val;
start.ls_valid |= LSV_ASYNC_WINDOWS;
break;
- case 'o':
- start.ls_flags |= LPF_ALL_TGT | LPF_BROADCAST |
- LPF_ORPHAN;
- break;
default:
fprintf(stderr, "Invalid option, '-h' for help.\n");
return -EINVAL;