From 4c4f74b0a581e139f3bf3db1a41d2b11594c039f Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Tue, 13 May 2014 21:07:02 +0800 Subject: [PATCH] LU-4887 lfsck: run all LFSCK scan types by default When running "lctl lfsck_start" run all available scan types by default (scrub, layout, and namespace currently) if none is given. If '-t' is given, run only the specified scan types. 'scrub' is always run. Rename the lfsck_type constants to use a LFSCK_TYPE_ prefix to be more descriptive. Add LFSCK_TYPE_ constants to wirecheck. Add ltn_ prefix to lfsck_type_name structure. Print the available types from the lfsck_types_names[] array in the help message instead of hard-coding them in the help message. Don't return an error if "lctl lfsck_stop" is used when stopped. Fix up lfsck_namespace /proc stats to use more descriptive names. Signed-off-by: Fan Yong Signed-off-by: Andreas Dilger Change-Id: I106483ade0bf07c2920dd95a263acdbe4e500c1e Reviewed-on: http://review.whamcloud.com/10278 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Nunez --- lustre/doc/lctl.8 | 60 ++++++---- lustre/include/lustre/lustre_lfsck_user.h | 25 ++-- lustre/include/lustre_lfsck.h | 2 +- lustre/lfsck/lfsck_layout.c | 33 ++++-- lustre/lfsck/lfsck_lib.c | 5 +- lustre/lfsck/lfsck_namespace.c | 26 ++-- lustre/mdd/mdd_lproc.c | 4 +- lustre/ofd/lproc_ofd.c | 2 +- lustre/ofd/ofd_io.c | 2 +- lustre/ptlrpc/wirehdr.c | 3 +- lustre/ptlrpc/wiretest.c | 12 +- lustre/tests/sanity-lfsck.sh | 29 ++++- lustre/tests/sanity-scrub.sh | 12 +- lustre/utils/lustre_lfsck.c | 191 ++++++++++++++---------------- lustre/utils/wirecheck.c | 8 +- lustre/utils/wirehdr.c | 3 +- lustre/utils/wiretest.c | 11 +- 17 files changed, 247 insertions(+), 181 deletions(-) diff --git a/lustre/doc/lctl.8 b/lustre/doc/lctl.8 index 466ad3c..dc71f4a 100644 --- a/lustre/doc/lctl.8 +++ b/lustre/doc/lctl.8 @@ -312,49 +312,63 @@ Start LFSCK on the specified MDT or OST device with specified parameters. The MDT or OST device to start LFSCK/scrub on. .TP -A, --all -Start LFSCK on all MDT devices. +Start LFSCK on all available MDT devices. .TP -c, --create_ostobj [on | off] -Create the lost OST-object for dangling LOV EA: 'off'(default) or 'on'. Under +Create the lost OST-object for dangling LOV EA: 'off' (default) or 'on'. Under default mode, when the LFSCK find some MDT-object with dangling reference, it -will report the inconsistency but without repairing. Otherwise the LFSCK will -re-create the missed OST-object. +will report the inconsistency but will not repair it. If 'on' is given, then +LFSCK will re-create the missed OST-object. .TP -e, --error -With error_handle as 'abort' LFSCK will stop if a repair is impossible. If no -value is specified, the saved value will be used if resuming from a checkpoint. -Otherwise the default behavior is to 'continue' if a repair is impossible. +With error_handle as 'abort' then if a repair is impossible LFSCK will save +the current position stop with an error. Otherwise the default behavior is +to 'continue' if a repair is impossible. .TP -h, --help -Show this help. +Show the usage message. .TP -n, --dryrun [on | off] -Perform a trial run with no changes made. Default is 'off' +Perform a trial run with no changes made, if 'on' or no argument is given. +Default is 'off', meaning that any inconsistencies found will be repaired. .TP -o, --orphan -Handle orphan objects, such as orphan OST-objects for layout LFSCK. +Handle orphan objects, such as orphan OST-objects for layout LFSCK by +linking them under the .../.lustre/lost+found directory. .TP -r, --reset Set the current position of object iteration to the beginning of the specified -device. The non-specified parameters will also be reset as default. By default -the iterator will resume the scanning from the last checkpoint (position), and -other non-specified parameters will be the same as former non-completed run. +device. The non-specified parameters will also be reset to the default. By +default the iterator will resume the scanning from the last saved checkpoint +position, and other unspecified parameters will be the same as the prior +incomplete run. .TP -s, --speed -Set the upper limit of LFSCK processing in objects per second. If no value is -specified the saved value is used (if resuming from a check point). Otherwise -the default value of 0 is used. 0 means run as fast as possible. +Set the upper limit of LFSCK processing in objects per second to reduce load +on the servers and storage. If no value is specified the saved value is used +(if resuming from a checkpoint). Otherwise the default value of 0 is used, +which means check the filesystem as quickly as possible. .TP -t, --type -The type of LFSCK checking/repair to execute. By default, the LFSCK -component(s) which ran last time and did not finish or the component(s) -corresponding to some known system inconsistency, will be started. Anytime -LFSCK is triggered on an ldiskfs MDT or OST, the OI Scrub is executed. -Alternative types include FID-in-dirent and linkEA (namespace) and MDT-OST -inconsistency (layout). +The type of LFSCK checking/repair to execute. If no type is given and the +previous run was incomplete or internal consistency checks detected an error, +then the same types are used for the next run. Otherwise, the default is to +check all types of consistency. Any time LFSCK is triggered on an ldiskfs +MDT or OST then OI Scrub is run. Valid types are a comma-separated list of one or more of +.B scrub +to run only the local OI Scrub on ldiskfs targets, +.B namespace +for FID-in-dirent and linkEA checking on the MDT(s), +.B layout +for MDT-OST cross-reference consistency, and +.B all +to run all of the available check types. .TP -w, --window_size -The window size for async requests pipeline. +Specifies the maximum number of in-flight request being processed at +one time. This controls the load placed on remote OSTs when running +.B layout +checks. By default there are at most 1024 outstanding requests. .TP .B lfsck_stop \fR<-M | --device [MDT,OST]_device> [-A | --all] [-h | --help] Stop LFSCK on the specified MDT or OST device. diff --git a/lustre/include/lustre/lustre_lfsck_user.h b/lustre/include/lustre/lustre_lfsck_user.h index 4c4654d..e116e07 100644 --- a/lustre/include/lustre/lustre_lfsck_user.h +++ b/lustre/include/lustre/lustre_lfsck_user.h @@ -58,30 +58,33 @@ enum lfsck_param_flags { }; enum lfsck_type { - /* For MDT-OST consistency check/repair. */ - LT_LAYOUT = 0x0001, + /* For MDT and OST internal OSD consistency check/repair. */ + LFSCK_TYPE_SCRUB = 0x0000, - /* For MDT-MDT consistency check/repair. */ - LT_DNE = 0x0002, + /* For MDT-OST (layout, object) consistency check/repair. */ + LFSCK_TYPE_LAYOUT = 0x0001, - /* For FID-in-dirent and linkEA consistency check/repair. */ - LT_NAMESPACE = 0x0004, + /* For MDT-MDT (remote object) consistency check/repair. */ + LFSCK_TYPE_DNE = 0x0002, + + /* For MDT (FID-in-dirent, linkEA) consistency check/repair. */ + LFSCK_TYPE_NAMESPACE = 0x0004, + LFSCK_TYPES_SUPPORTED = (LFSCK_TYPE_SCRUB | LFSCK_TYPE_LAYOUT | + LFSCK_TYPE_NAMESPACE), + LFSCK_TYPES_DEF = LFSCK_TYPES_SUPPORTED, + LFSCK_TYPES_ALL = ((__u16)(~0)) }; #define LFSCK_VERSION_V1 1 #define LFSCK_VERSION_V2 2 -#define LFSCK_TYPES_ALL ((__u16)(~0)) -#define LFSCK_TYPES_DEF ((__u16)0) -#define LFSCK_TYPES_SUPPORTED (LT_LAYOUT | LT_NAMESPACE) - #define LFSCK_SPEED_NO_LIMIT 0 #define LFSCK_SPEED_LIMIT_DEF LFSCK_SPEED_NO_LIMIT #define LFSCK_ASYNC_WIN_DEFAULT 1024 #define LFSCK_ASYNC_WIN_MAX ((__u16)(~0)) enum lfsck_start_valid { - LSV_SPEED_LIMIT = 0x00000001, + LSV_SPEED_LIMIT = 0x00000001, LSV_ERROR_HANDLE = 0x00000002, LSV_DRYRUN = 0x00000004, LSV_ASYNC_WINDOWS = 0x00000008, diff --git a/lustre/include/lustre_lfsck.h b/lustre/include/lustre_lfsck.h index 5adbffe..635aa21 100644 --- a/lustre/include/lustre_lfsck.h +++ b/lustre/include/lustre_lfsck.h @@ -160,7 +160,7 @@ static inline void lfsck_pack_rfa(struct lfsck_request *lr, { memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_FID_ACCESSED; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *fid; } diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index cb39f8c..1ccc63b 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -1324,7 +1324,7 @@ static int lfsck_layout_master_query_others(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; laia->laia_com = com; laia->laia_lr = lr; laia->laia_shared = 0; @@ -1418,7 +1418,7 @@ static int lfsck_layout_master_notify_others(const struct lu_env *env, RETURN(-ENOMEM); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; laia->laia_com = com; laia->laia_lr = lr; laia->laia_shared = 0; @@ -2169,7 +2169,7 @@ static int lfsck_layout_master_conditional_destroy(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_CONDITIONAL_DESTROY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *fid; tmp = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); @@ -3854,7 +3854,7 @@ lfsck_layout_slave_query_master(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_QUERY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; llsd->llsd_touch_gen++; spin_lock(&llsd->llsd_lock); @@ -3923,7 +3923,7 @@ lfsck_layout_slave_notify_master(const struct lu_env *env, lr->lr_flags = LEF_FROM_OST; lr->lr_status = result; lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; llsd->llsd_touch_gen++; spin_lock(&llsd->llsd_lock); while (!list_empty(&llsd->llsd_master_list)) { @@ -4100,7 +4100,7 @@ static int lfsck_layout_slave_check_pairs(const struct lu_env *env, lr = req_capsule_client_get(&req->rq_pill, &RMF_LFSCK_REQUEST); memset(lr, 0, sizeof(*lr)); lr->lr_event = LE_PAIRS_VERIFY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_fid = *cfid; /* OST-object itself FID. */ lr->lr_fid2 = *pfid; /* The claimed parent FID. */ @@ -5550,7 +5550,7 @@ static int lfsck_layout_master_stop_notify(const struct lu_env *env, memset(lr, 0, sizeof(*lr)); lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); lr->lr_event = LE_PEER_EXIT; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; lr->lr_status = LS_CO_PAUSED; if (ltds == &lfsck->li_ost_descs) lr->lr_flags = LEF_TO_OST; @@ -5588,10 +5588,17 @@ static int lfsck_layout_slave_join(const struct lu_env *env, int rc = 0; ENTRY; - if (!lsp->lsp_index_valid || start == NULL || - !(start->ls_flags & LPF_ALL_TGT) || - !(lfsck->li_bookmark_ram.lb_param & LPF_ALL_TGT)) - RETURN(-EALREADY); + if (start == NULL || !(start->ls_flags & LPF_ORPHAN)) + RETURN(0); + + if (!lsp->lsp_index_valid) + RETURN(-EINVAL); + + /* If someone is running the LFSCK without orphan handling, + * it will not maintain the object accessing rbtree. So we + * cannot join it for orphan handling. */ + if (!llsd->llsd_rbtree_valid) + RETURN(-EBUSY); spin_unlock(&lfsck->li_lock); rc = lfsck_layout_llst_add(llsd, lsp->lsp_index); @@ -5662,7 +5669,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) init_rwsem(&com->lc_sem); atomic_set(&com->lc_ref, 1); com->lc_lfsck = lfsck; - com->lc_type = LT_LAYOUT; + com->lc_type = LFSCK_TYPE_LAYOUT; if (lfsck->li_master) { struct lfsck_layout_master_data *llmd; @@ -5930,7 +5937,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env, if (unlikely(lfsck == NULL)) RETURN(ERR_PTR(-ENXIO)); - com = lfsck_component_find(lfsck, LT_LAYOUT); + com = lfsck_component_find(lfsck, LFSCK_TYPE_LAYOUT); if (unlikely(com == NULL)) GOTO(out, rc = -ENOENT); diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index f6f1cce..c509cbd 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -2248,9 +2248,10 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, mutex_lock(&lfsck->li_mutex); spin_lock(&lfsck->li_lock); + /* no error if LFSCK is already stopped, or was never started */ if (thread_is_init(thread) || thread_is_stopped(thread)) { spin_unlock(&lfsck->li_lock); - GOTO(out, rc = -EALREADY); + GOTO(out, rc = 0); } if (stop != NULL) { @@ -2635,7 +2636,7 @@ unlock: spin_lock(<ds->ltd_lock); ltd->ltd_dead = 1; spin_unlock(<ds->ltd_lock); - lfsck_stop_notify(env, lfsck, ltds, ltd, LT_LAYOUT); + lfsck_stop_notify(env, lfsck, ltds, ltd, LFSCK_TYPE_LAYOUT); lfsck_tgt_put(ltd); } diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index fc9a0dc..5f4ab64 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -1174,8 +1174,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "updated_phase2: "LPU64"\n" "failed_phase1: "LPU64"\n" "failed_phase2: "LPU64"\n" - "dirs: "LPU64"\n" - "M-linked: "LPU64"\n" + "directories: "LPU64"\n" + "multi_linked_files: "LPU64"\n" "dirent_repaired: "LPU64"\n" "linkea_repaired: "LPU64"\n" "nlinks_repaired: "LPU64"\n" @@ -1185,8 +1185,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "run_time_phase2: %u seconds\n" "average_speed_phase1: "LPU64" items/sec\n" "average_speed_phase2: N/A\n" - "real-time_speed_phase1: "LPU64" items/sec\n" - "real-time_speed_phase2: N/A\n", + "real_time_speed_phase1: "LPU64" items/sec\n" + "real_time_speed_phase2: N/A\n", checked, ns->ln_objs_checked_phase2, ns->ln_items_repaired, @@ -1256,8 +1256,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "updated_phase2: "LPU64"\n" "failed_phase1: "LPU64"\n" "failed_phase2: "LPU64"\n" - "dirs: "LPU64"\n" - "M-linked: "LPU64"\n" + "directories: "LPU64"\n" + "multi_linked_files: "LPU64"\n" "dirent_repaired: "LPU64"\n" "linkea_repaired: "LPU64"\n" "nlinks_repaired: "LPU64"\n" @@ -1267,8 +1267,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "run_time_phase2: %u seconds\n" "average_speed_phase1: "LPU64" items/sec\n" "average_speed_phase2: "LPU64" objs/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: "LPU64" objs/sec\n" + "real_time_speed_phase1: N/A\n" + "real_time_speed_phase2: "LPU64" objs/sec\n" "current_position: "DFID"\n", ns->ln_items_checked, checked, @@ -1303,8 +1303,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "updated_phase2: "LPU64"\n" "failed_phase1: "LPU64"\n" "failed_phase2: "LPU64"\n" - "dirs: "LPU64"\n" - "M-linked: "LPU64"\n" + "directories: "LPU64"\n" + "multi_linked_files: "LPU64"\n" "dirent_repaired: "LPU64"\n" "linkea_repaired: "LPU64"\n" "nlinks_repaired: "LPU64"\n" @@ -1314,8 +1314,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com, "run_time_phase2: %u seconds\n" "average_speed_phase1: "LPU64" items/sec\n" "average_speed_phase2: "LPU64" objs/sec\n" - "real-time_speed_phase1: N/A\n" - "real-time_speed_phase2: N/A\n" + "real_time_speed_phase1: N/A\n" + "real_time_speed_phase2: N/A\n" "current_position: N/A\n", ns->ln_items_checked, ns->ln_objs_checked_phase2, @@ -1589,7 +1589,7 @@ int lfsck_namespace_setup(const struct lu_env *env, init_rwsem(&com->lc_sem); atomic_set(&com->lc_ref, 1); com->lc_lfsck = lfsck; - com->lc_type = LT_NAMESPACE; + com->lc_type = LFSCK_TYPE_NAMESPACE; com->lc_ops = &lfsck_namespace_ops; com->lc_file_size = sizeof(struct lfsck_namespace); OBD_ALLOC(com->lc_file_ram, com->lc_file_size); diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index b2a05fa..ca52771 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -262,7 +262,7 @@ static int mdd_lfsck_namespace_seq_show(struct seq_file *m, void *data) LASSERT(mdd != NULL); - return lfsck_dump(m, mdd->mdd_bottom, LT_NAMESPACE); + return lfsck_dump(m, mdd->mdd_bottom, LFSCK_TYPE_NAMESPACE); } LPROC_SEQ_FOPS_RO(mdd_lfsck_namespace); @@ -272,7 +272,7 @@ static int mdd_lfsck_layout_seq_show(struct seq_file *m, void *data) LASSERT(mdd != NULL); - return lfsck_dump(m, mdd->mdd_bottom, LT_LAYOUT); + return lfsck_dump(m, mdd->mdd_bottom, LFSCK_TYPE_LAYOUT); } LPROC_SEQ_FOPS_RO(mdd_lfsck_layout); diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index fcde82a..00218c4 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -535,7 +535,7 @@ static int ofd_lfsck_layout_seq_show(struct seq_file *m, void *data) struct obd_device *obd = m->private; struct ofd_device *ofd = ofd_dev(obd->obd_lu_dev); - return lfsck_dump(m, ofd->ofd_osd, LT_LAYOUT); + return lfsck_dump(m, ofd->ofd_osd, LFSCK_TYPE_LAYOUT); } LPROC_SEQ_FOPS_RO(ofd_lfsck_layout); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index a4d7c17..599e59f 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -150,7 +150,7 @@ static int ofd_inconsistency_verification_main(void *args) GOTO(out, rc = -ENOMEM); lr->lr_event = LE_PAIRS_VERIFY; - lr->lr_active = LT_LAYOUT; + lr->lr_active = LFSCK_TYPE_LAYOUT; spin_lock(&ofd->ofd_inconsistency_lock); while (1) { diff --git a/lustre/ptlrpc/wirehdr.c b/lustre/ptlrpc/wirehdr.c index dc5e090..fe76629 100644 --- a/lustre/ptlrpc/wirehdr.c +++ b/lustre/ptlrpc/wirehdr.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -49,4 +49,5 @@ #include #include #include +#include #include diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index 2f07137..72d0bb0 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -27,7 +27,7 @@ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -49,7 +49,9 @@ #include #include #include +#include #include + void lustre_assert_wire_constants(void) { /* Wire protocol assertions generated by 'wirecheck' @@ -4615,6 +4617,14 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lfsck_request, lr_padding_3)); LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n", (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3)); + LASSERTF(LFSCK_TYPE_SCRUB == 0x00000000UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_SCRUB); + LASSERTF(LFSCK_TYPE_LAYOUT == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_LAYOUT); + LASSERTF(LFSCK_TYPE_DNE == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_DNE); + LASSERTF(LFSCK_TYPE_NAMESPACE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_NAMESPACE); LASSERTF(LE_LASTID_REBUILDING == 1, "found %lld\n", (long long)LE_LASTID_REBUILDING); LASSERTF(LE_LASTID_REBUILT == 2, "found %lld\n", diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index b6fad5a..e22f841 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -42,7 +42,7 @@ setupall ALWAYS_EXCEPT="$ALWAYS_EXCEPT 2c" [[ $(lustre_version_code ost1) -lt $(version_code 2.5.55) ]] && - ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19" + ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14 15 16 17 18 19 20 21" [ $(facet_fstype $SINGLEMDS) = "zfs" ] && # bug number for skipped test: LU-4970 @@ -2407,6 +2407,33 @@ test_20() { } run_test 20 "Handle the orphan with dummy LOV EA slot properly" +test_21() { + [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.59) ]] && + skip "ignore the test if MDS is older than 2.5.59" && exit 0 + + check_mount_and_prep + createmany -o $DIR/$tdir/f 100 || error "(0) Fail to create 100 files" + + echo "Start all LFSCK components by default (-s 1)" + do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -s 1 -r || + error "Fail to start LFSCK" + + echo "namespace LFSCK should be in 'scanning-phase1' status" + local STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "Expect namespace 'scanning-phase1', but got '$STATUS'" + + echo "layout LFSCK should be in 'scanning-phase1' status" + STATUS=$($SHOW_LAYOUT | awk '/^status/ { print $2 }') + [ "$STATUS" == "scanning-phase1" ] || + error "Expect layout 'scanning-phase1', but got '$STATUS'" + + echo "Stop all LFSCK components by default" + do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 || + error "Fail to stop LFSCK" +} +run_test 21 "run all LFSCK components by default" + $LCTL set_param debug=-lfsck > /dev/null || true # restore MDS/OST size diff --git a/lustre/tests/sanity-scrub.sh b/lustre/tests/sanity-scrub.sh index 5ca94e2..c19e59e 100644 --- a/lustre/tests/sanity-scrub.sh +++ b/lustre/tests/sanity-scrub.sh @@ -58,6 +58,9 @@ setupall [[ $(lustre_version_code ost1) -lt $(version_code 2.4.50) ]] && ALWAYS_EXCEPT="$ALWAYS_EXCEPT 11 12 13 14" +[[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.5.59) ]] && + SCRUB_ONLY="-t scrub" + build_test_filter MDT_DEV="${FSNAME}-MDT0000" @@ -68,8 +71,10 @@ scrub_start() { local error_id=$1 local n + # use "lfsck_start -A" when we no longer need testing interop for n in $(seq $MDSCOUNT); do - do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) "$@" || + do_facet mds$n $LCTL lfsck_start -M $(facet_svc mds$n) \ + $SCRUB_ONLY "$@" || error "($error_id) Failed to start OI scrub on mds$n" done } @@ -78,6 +83,7 @@ scrub_stop() { local error_id=$1 local n + # use "lfsck_stop -A" when we no longer need testing interop for n in $(seq $MDSCOUNT); do do_facet mds$n $LCTL lfsck_stop -M $(facet_svc mds$n) || error "($error_id) Failed to stop OI scrub on mds$n" @@ -91,8 +97,8 @@ scrub_status() { osd-ldiskfs.$(facet_svc mds$n).oi_scrub } -START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV}" -START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV}" +START_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_start -M ${MDT_DEV} $SCRUB_ONLY" +START_SCRUB_ON_OST="do_facet ost1 $LCTL lfsck_start -M ${OST_DEV} $SCRUB_ONLY" STOP_SCRUB="do_facet $SINGLEMDS $LCTL lfsck_stop -M ${MDT_DEV}" SHOW_SCRUB="do_facet $SINGLEMDS \ $LCTL get_param -n osd-ldiskfs.${MDT_DEV}.oi_scrub" diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c index b698a08..bbaed41 100644 --- a/lustre/utils/lustre_lfsck.c +++ b/lustre/utils/lustre_lfsck.c @@ -68,69 +68,69 @@ static struct option long_opt_stop[] = { }; struct lfsck_type_name { - char *name; - int namelen; - enum lfsck_type type; + char *ltn_name; + enum lfsck_type ltn_type; }; static struct lfsck_type_name lfsck_types_names[] = { - { "layout", 6, LT_LAYOUT }, - { "namespace", 9, LT_NAMESPACE}, - { 0, 0, 0 } + { "scrub", LFSCK_TYPE_SCRUB }, + { "layout", LFSCK_TYPE_LAYOUT }, +/* { "dne", LFSCK_TYPE_DNE }, */ + { "namespace", LFSCK_TYPE_NAMESPACE }, + { "default", LFSCK_TYPES_DEF }, + { "all", LFSCK_TYPES_SUPPORTED }, + { NULL, 0 } }; -static inline int lfsck_name2type(const char *name, int namelen) +static enum lfsck_type lfsck_name2type(const char *name) { - int i = 0; + int i; - while (lfsck_types_names[i].name != NULL) { - if (namelen == lfsck_types_names[i].namelen && - strncmp(lfsck_types_names[i].name, name, namelen) == 0) - return lfsck_types_names[i].type; - i++; + for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) { + if (strcmp(lfsck_types_names[i].ltn_name, name) == 0) + return lfsck_types_names[i].ltn_type; } - return 0; + return -1; } static void usage_start(void) { - fprintf(stderr, "Start LFSCK.\n" - "SYNOPSIS:\n" - "lfsck_start <-M | --device [MDT,OST]_device>\n" + fprintf(stderr, "start LFSCK\n" + "usage:\n" + "lfsck_start <-M | --device {MDT,OST}_device>\n" " [-A | --all] [-c | --create_ostobj [on | off]]\n" " [-e | --error {continue | abort}] [-h | --help]\n" " [-n | --dryrun [on | off]] [-o | --orphan]\n" - " [-r | --reset] [-s | --speed speed_limit]\n" - " [-t | --type lfsck_type[,lfsck_type...]]\n" + " [-r | --reset] [-s | --speed ops_per_sec_limit]\n" + " [-t | --type check_type[,check_type...]]\n" " [-w | --window_size size]\n" - "OPTIONS:\n" - "-M: The device to start LFSCK/scrub on.\n" - "-A: Start LFSCK on all MDT devices.\n" - "-c: Create the lost OST-object for dangling LOV EA: " - "'off'(default) or 'on'.\n" - "-e: Error handle, 'continue'(default) or 'abort'.\n" - "-h: Help information.\n" - "-n: Check without modification. 'off'(default) or 'on'.\n" - "-o: Handle orphan objects.\n" - "-r: Reset scanning start position to the device beginning.\n" - " The non-specified parameters will be reset as default.\n" - "-s: How many items can be scanned at most per second. " - "'%d' means no limit (default).\n" - "-t: The LFSCK type(s) to be started.\n" - "-w: The window size for async requests pipeline.\n", + "options:\n" + "-M: device to start LFSCK/scrub on\n" + "-A: start LFSCK on all MDT devices\n" + "-c: create the lost OST-object for dangling LOV EA " + "(default 'off', or 'on')\n" + "-e: error handle mode (default 'continue', or 'abort')\n" + "-h: this help message\n" + "-n: check with no modification (default 'off', or 'on')\n" + "-o: repair orphan objects\n" + "-r: reset scanning to the start of the device\n" + "-s: maximum items to be scanned per second " + "(default '%d' = no limit)\n" + "-t: check type(s) to be performed (default all)\n" + "-w: window size for async requests pipeline\n", LFSCK_SPEED_NO_LIMIT); } static void usage_stop(void) { - fprintf(stderr, "Stop LFSCK.\n" - "SYNOPSIS:\n" - "lfsck_stop <-M | --device [MDT,OST]_device>\n" - "[-A | --all] [-h | --help]\n" - "OPTIONS:\n" - "-M: The device to stop LFSCK/scrub on.\n" - "-A: Stop LFSCK on all MDT devices.\n" - "-h: Help information.\n"); + fprintf(stderr, "stop LFSCK\n" + "usage:\n" + "lfsck_stop <-M | --device {MDT,OST}_device>\n" + " [-A | --all] [-h | --help]\n" + "options:\n" + "-M: device to stop LFSCK/scrub on\n" + "-A: stop LFSCK on all MDT devices\n" + "-h: this help message\n"); } static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg) @@ -157,13 +157,13 @@ int jt_lfsck_start(int argc, char **argv) char device[MAX_OBD_NAME]; struct lfsck_start start; char *optstring = "Ac::e:hM:n::ors:t:w:"; - int opt, index, rc, val, i, type; + int opt, index, rc, val, i; memset(&data, 0, sizeof(data)); memset(&start, 0, sizeof(start)); memset(device, 0, MAX_OBD_NAME); start.ls_version = LFSCK_VERSION_V1; - start.ls_active = LFSCK_TYPES_DEF; + start.ls_active = LFSCK_TYPES_ALL; /* Reset the 'optind' for the case of getopt_long() called multiple * times under the same lctl. */ @@ -178,11 +178,11 @@ int jt_lfsck_start(int argc, char **argv) if (optarg == NULL || strcmp(optarg, "on") == 0) { start.ls_flags |= LPF_CREATE_OSTOBJ; } else if (strcmp(optarg, "off") != 0) { - fprintf(stderr, "Invalid switch: %s. " - "The valid switch should be: 'on' " - "or 'off' (default) without blank, " - "or empty. For example: '-non' or " - "'-noff' or '-n'.\n", optarg); + fprintf(stderr, "invalid switch: -c '%s'. " + "valid switches are:\n" + "empty ('on'), or 'off' without space. " + "For example:\n" + "'-c', '-con', '-coff'\n", optarg); return -EINVAL; } start.ls_valid |= LSV_CREATE_OSTOBJ; @@ -191,9 +191,9 @@ int jt_lfsck_start(int argc, char **argv) if (strcmp(optarg, "abort") == 0) { start.ls_flags |= LPF_FAILOUT; } else if (strcmp(optarg, "continue") != 0) { - fprintf(stderr, "Invalid error handler: %s. " - "The valid value should be: 'continue'" - "(default) or 'abort'.\n", optarg); + fprintf(stderr, "invalid error mode: -e '%s'." + "valid modes are: " + "'continue' or 'abort'.\n", optarg); return -EINVAL; } start.ls_valid |= LSV_ERROR_HANDLE; @@ -210,11 +210,11 @@ int jt_lfsck_start(int argc, char **argv) if (optarg == NULL || strcmp(optarg, "on") == 0) { start.ls_flags |= LPF_DRYRUN; } else if (strcmp(optarg, "off") != 0) { - fprintf(stderr, "Invalid switch: %s. " - "The valid switch should be: 'on' " - "or 'off' (default) without blank, " - "or empty. For example: '-non' or " - "'-noff' or '-n'.\n", optarg); + fprintf(stderr, "invalid switch: -n '%s'. " + "valid switches are:\n" + "empty ('on'), or 'off' without space. " + "For example:\n" + "'-n', '-non', '-noff'\n", optarg); return -EINVAL; } start.ls_valid |= LSV_DRYRUN; @@ -232,44 +232,27 @@ int jt_lfsck_start(int argc, char **argv) start.ls_valid |= LSV_SPEED_LIMIT; break; case 't': { - char *str = optarg, *p, c; - - start.ls_active = 0; - while (*str) { - while (*str == ' ' || *str == ',') - str++; - - if (*str == 0) - break; - - p = str; - while (*p != 0 && *p != ' ' && *p != ',') - p++; - - c = *p; - *p = 0; - type = lfsck_name2type(str, strlen(str)); - if (type == 0) { - fprintf(stderr, "Invalid type (%s).\n" - "The valid value should be " - "'layout' or 'namespace'.\n", - str); - *p = c; - return -EINVAL; - } - - *p = c; - str = p; + char *typename; + + if (start.ls_active == LFSCK_TYPES_ALL) + start.ls_active = 0; + while ((typename = strsep(&optarg, ",")) != NULL) { + enum lfsck_type type; + type = lfsck_name2type(typename); + if (type == -1) + goto bad_type; start.ls_active |= type; } - if (start.ls_active == 0) { - fprintf(stderr, "Miss LFSCK type(s).\n" - "The valid value should be " - "'layout' or 'namespace'.\n"); - return -EINVAL; - } break; +bad_type: + fprintf(stderr, "invalid check type -t '%s'. " + "valid types are:\n", typename); + for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) + fprintf(stderr, "%s%s", i != 0 ? "," : "", + lfsck_types_names[i].ltn_name); + fprintf(stderr, "\n"); + return -EINVAL; } case 'w': val = atoi(optarg); @@ -294,6 +277,9 @@ int jt_lfsck_start(int argc, char **argv) } } + if (start.ls_active == LFSCK_TYPES_ALL) + start.ls_active = LFSCK_TYPES_DEF; + if (data.ioc_inlbuf4 == NULL) { if (lcfg_get_devname() != NULL) { rc = lfsck_pack_dev(&data, device, lcfg_get_devname()); @@ -322,22 +308,17 @@ int jt_lfsck_start(int argc, char **argv) } obd_ioctl_unpack(&data, buf, sizeof(rawbuf)); - if (start.ls_active == 0) { - printf("Started LFSCK on the device %s", device); - } else { - printf("Started LFSCK on the device %s:", device); - i = 0; - while (lfsck_types_names[i].name != NULL) { - if (start.ls_active & lfsck_types_names[i].type) { - printf(" %s", lfsck_types_names[i].name); - start.ls_active &= ~lfsck_types_names[i].type; - } - i++; + printf("Started LFSCK on the device %s: scrub", device); + for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) { + if (start.ls_active & lfsck_types_names[i].ltn_type) { + printf(" %s", lfsck_types_names[i].ltn_name); + start.ls_active &= ~lfsck_types_names[i].ltn_type; } - if (start.ls_active != 0) - printf(" unknown(0x%x)", start.ls_active); } - printf(".\n"); + if (start.ls_active != 0) + printf(" unknown(0x%x)", start.ls_active); + printf("\n"); + return 0; } diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index bf3adb7..20d2b04 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -27,7 +27,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2013, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -39,6 +39,7 @@ #include #include #include +#include #include #define BLANK_LINE() \ @@ -2054,6 +2055,11 @@ static void check_lfsck_request(void) CHECK_MEMBER(lfsck_request, lr_padding_2); CHECK_MEMBER(lfsck_request, lr_padding_3); + CHECK_VALUE_X(LFSCK_TYPE_SCRUB); + CHECK_VALUE_X(LFSCK_TYPE_LAYOUT); + CHECK_VALUE_X(LFSCK_TYPE_DNE); + CHECK_VALUE_X(LFSCK_TYPE_NAMESPACE); + CHECK_VALUE(LE_LASTID_REBUILDING); CHECK_VALUE(LE_LASTID_REBUILT); CHECK_VALUE(LE_PHASE1_DONE); diff --git a/lustre/utils/wirehdr.c b/lustre/utils/wirehdr.c index 6bf14b7..f337dce 100644 --- a/lustre/utils/wirehdr.c +++ b/lustre/utils/wirehdr.c @@ -27,7 +27,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -37,6 +37,7 @@ #include #include #include +#include #include #undef LASSERT diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index 715e952..129ad4f 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -27,7 +27,7 @@ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. * - * Copyright (c) 2011, 2012, Intel Corporation. + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -37,6 +37,7 @@ #include #include #include +#include #include #undef LASSERT @@ -4622,6 +4623,14 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct lfsck_request, lr_padding_3)); LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n", (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3)); + LASSERTF(LFSCK_TYPE_SCRUB == 0x00000000UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_SCRUB); + LASSERTF(LFSCK_TYPE_LAYOUT == 0x00000001UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_LAYOUT); + LASSERTF(LFSCK_TYPE_DNE == 0x00000002UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_DNE); + LASSERTF(LFSCK_TYPE_NAMESPACE == 0x00000004UL, "found 0x%.8xUL\n", + (unsigned)LFSCK_TYPE_NAMESPACE); LASSERTF(LE_LASTID_REBUILDING == 1, "found %lld\n", (long long)LE_LASTID_REBUILDING); LASSERTF(LE_LASTID_REBUILT == 2, "found %lld\n", -- 1.8.3.1