.TP
-h, --help
Show this help.
+.TP
+.B lfsck_query \fR<-M | --device MDT_device> [-h | --help]
+ \fR[-t | --type lfsck_type[,lfsck_type...]] [-w | --wait]
+.br
+Get the LFSCK global status via the specified MDT device.
+.TP
+ -M, --device <MDT_device>
+Specify the MDT device on which the LFSCK runs or ever ran.
+.TP
+ -t, --type <lfsck_type[,lfsck_type...]>
+Specify what kind(s) of LFSCK to be checked. If no type is given, the default
+is to check all types of LFSCK status. Valid types are a comma-separated list
+of one or more of: namespace, layout, all
+.TP
+ -h, --help
+Show this help.
+.TP
+ -w, --wait
+Do not return until there is no on-going LFSCK scanning on any target
+associated with this filesystem (MDT or OST).
.SS Debug
.TP
.BI debug_daemon
struct thandle *));
void tgt_register_lfsck_query(int (*query)(const struct lu_env *,
struct dt_device *,
- struct lfsck_request *));
+ struct lfsck_request *,
+ struct lfsck_reply *,
+ struct lfsck_query *));
bool req_can_reconstruct(struct ptlrpc_request *req, struct tg_reply_data *trd);
extern struct tgt_handler tgt_sec_ctx_handlers[];
struct lfsck_reply {
__u32 lr_status;
__u32 lr_padding_1;
- __u64 lr_padding_2;
+ __u64 lr_repaired;
};
enum lfsck_events {
LEF_SET_LMV_HASH = 0x00000004,
LEF_SET_LMV_ALL = 0x00000008,
LEF_RECHECK_NAME_HASH = 0x00000010,
+ LEF_QUERY_ALL = 0x00000020,
};
static inline void lustre_set_wire_obdo(const struct obd_connect_data *ocd,
# define _LUSTRE_LFSCK_USER_H
# include <lustre/lustre_user.h>
+/**
+ * state machine:
+ *
+ * LS_INIT
+ * |
+ * (lfsck|start)
+ * |
+ * v
+ * LS_SCANNING_PHASE1
+ * | ^
+ * | :
+ * | (lfsck:restart)
+ * | :
+ * v :
+ * -----------------------------------------------------------------
+ * | |^ |^ |^ |^ |^
+ * | |: |: |: |: |:
+ * v v: v: v: v: v:
+ * LS_SCANNING_PHASE2 LS_FAILED LS_STOPPED LS_PAUSED LS_CRASHED LS_PARTIAL
+ * (CO_) (CO_) (CO_)
+ * | ^ ^: ^: ^: ^: ^:
+ * | : |: |: |: |: |:
+ * | (lfsck:restart) |: |: |: |: |:
+ * v : |v |v |v |v |v
+ * -----------------------------------------------------------------
+ * |
+ * v
+ * LS_COMPLETED
+ */
+enum lfsck_status {
+ /* The lfsck file is new created, for new MDT, upgrading from old disk,
+ * or re-creating the lfsck file manually. */
+ LS_INIT = 0,
+
+ /* The first-step system scanning. The checked items during the phase1
+ * scanning depends on the LFSCK type. */
+ LS_SCANNING_PHASE1 = 1,
+
+ /* The second-step system scanning. The checked items during the phase2
+ * scanning depends on the LFSCK type. */
+ LS_SCANNING_PHASE2 = 2,
+
+ /* The LFSCK processing has completed for all objects. */
+ LS_COMPLETED = 3,
+
+ /* The LFSCK exited automatically for failure, will not auto restart. */
+ LS_FAILED = 4,
+
+ /* The LFSCK is stopped manually, will not auto restart. */
+ LS_STOPPED = 5,
+
+ /* LFSCK is paused automatically when umount,
+ * will be restarted automatically when remount. */
+ LS_PAUSED = 6,
+
+ /* System crashed during the LFSCK,
+ * will be restarted automatically after recovery. */
+ LS_CRASHED = 7,
+
+ /* Some OST/MDT failed during the LFSCK, or not join the LFSCK. */
+ LS_PARTIAL = 8,
+
+ /* The LFSCK is failed because its controller is failed. */
+ LS_CO_FAILED = 9,
+
+ /* The LFSCK is stopped because its controller is stopped. */
+ LS_CO_STOPPED = 10,
+
+ /* The LFSCK is paused because its controller is paused. */
+ LS_CO_PAUSED = 11,
+
+ LS_MAX
+};
+
+static inline const char *lfsck_status2name(int status)
+{
+ static const char * const lfsck_status_names[] = {
+ [LS_INIT] = "init",
+ [LS_SCANNING_PHASE1] = "scanning-phase1",
+ [LS_SCANNING_PHASE2] = "scanning-phase2",
+ [LS_COMPLETED] = "completed",
+ [LS_FAILED] = "failed",
+ [LS_STOPPED] = "stopped",
+ [LS_PAUSED] = "paused",
+ [LS_CRASHED] = "crashed",
+ [LS_PARTIAL] = "partial",
+ [LS_CO_FAILED] = "co-failed",
+ [LS_CO_STOPPED] = "co-stopped",
+ [LS_CO_PAUSED] = "co-paused"
+ };
+
+ if (status < 0 || status >= LS_MAX)
+ return "unknown";
+
+ return lfsck_status_names[status];
+}
+
enum lfsck_param_flags {
/* Reset LFSCK iterator position to the device beginning. */
LPF_RESET = 0x0001,
/* Create MDT-object for dangling name entry. */
LPF_CREATE_MDTOBJ = 0x0080,
+
+ /* Do not return until the LFSCK not running. */
+ LPF_WAIT = 0x0100,
};
enum lfsck_type {
#define LFSCK_SPEED_LIMIT_DEF LFSCK_SPEED_NO_LIMIT
#define LFSCK_ASYNC_WIN_DEFAULT 1024
#define LFSCK_ASYNC_WIN_MAX ((__u16)(~0))
+#define LFSCK_TYPE_BITS 16
enum lfsck_start_valid {
LSV_SPEED_LIMIT = 0x00000001,
__u64 ls_padding_2;
};
+struct lfsck_query {
+ __u16 lu_types;
+ __u16 lu_flags;
+ __u32 lu_mdts_count[LFSCK_TYPE_BITS][LS_MAX + 1];
+ __u32 lu_osts_count[LFSCK_TYPE_BITS][LS_MAX + 1];
+ __u64 lu_repaired[LFSCK_TYPE_BITS];
+};
+
#endif /* _LUSTRE_LFSCK_USER_H */
#define OBD_IOC_ECHO_ALLOC_SEQ _IOWR('f', 222, struct obd_ioctl_data)
#define OBD_IOC_START_LFSCK _IOWR('f', 230, OBD_IOC_DATA_TYPE)
#define OBD_IOC_STOP_LFSCK _IOW ('f', 231, OBD_IOC_DATA_TYPE)
+#define OBD_IOC_QUERY_LFSCK _IOR('f', 232, struct obd_ioctl_data)
/* lustre/lustre_user.h 240-249 */
/* LIBCFS_IOC_DEBUG_MASK 250 */
#include <lu_object.h>
#include <dt_object.h>
-/**
- * status machine:
- *
- * LS_INIT
- * |
- * (lfsck|start)
- * |
- * v
- * LS_SCANNING_PHASE1
- * | ^
- * | :
- * | (lfsck:restart)
- * | :
- * v :
- * -----------------------------------------------------------------
- * | |^ |^ |^ |^ |^
- * | |: |: |: |: |:
- * v v: v: v: v: v:
- * LS_SCANNING_PHASE2 LS_FAILED LS_STOPPED LS_PAUSED LS_CRASHED LS_PARTIAL
- * (CO_) (CO_) (CO_)
- * | ^ ^: ^: ^: ^: ^:
- * | : |: |: |: |: |:
- * | (lfsck:restart) |: |: |: |: |:
- * v : |v |v |v |v |v
- * -----------------------------------------------------------------
- * |
- * v
- * LS_COMPLETED
- */
-enum lfsck_status {
- /* The lfsck file is new created, for new MDT, upgrading from old disk,
- * or re-creating the lfsck file manually. */
- LS_INIT = 0,
-
- /* The first-step system scanning. */
- LS_SCANNING_PHASE1 = 1,
-
- /* The second-step system scanning. */
- LS_SCANNING_PHASE2 = 2,
-
- /* The LFSCK processing has completed for all objects. */
- LS_COMPLETED = 3,
-
- /* The LFSCK exited automatically for failure, will not auto restart. */
- LS_FAILED = 4,
-
- /* The LFSCK is stopped manually, will not auto restart. */
- LS_STOPPED = 5,
-
- /* LFSCK is paused automatically when umount,
- * will be restarted automatically when remount. */
- LS_PAUSED = 6,
-
- /* System crashed during the LFSCK,
- * will be restarted automatically after recovery. */
- LS_CRASHED = 7,
-
- /* Some OST/MDT failed during the LFSCK, or not join the LFSCK. */
- LS_PARTIAL = 8,
-
- /* The LFSCK is failed because its controller is failed. */
- LS_CO_FAILED = 9,
-
- /* The LFSCK is stopped because its controller is stopped. */
- LS_CO_STOPPED = 10,
-
- /* The LFSCK is paused because its controller is paused. */
- LS_CO_PAUSED = 11,
-
- LS_MAX
-};
-
struct lfsck_start_param {
struct lfsck_start *lsp_start;
__u32 lsp_index;
int lfsck_in_notify(const struct lu_env *env, struct dt_device *key,
struct lfsck_request *lr, struct thandle *th);
int lfsck_query(const struct lu_env *env, struct dt_device *key,
- struct lfsck_request *lr);
+ struct lfsck_request *req, struct lfsck_reply *rep,
+ struct lfsck_query *que);
int lfsck_get_speed(struct seq_file *m, struct dt_device *key);
int lfsck_set_speed(struct dt_device *key, int val);
memset(lr, 0, sizeof(*lr));
lr->lr_event = LE_QUERY;
lr->lr_active = com->lc_type;
+
+ memset(laia, 0, sizeof(*laia));
laia->laia_com = com;
laia->laia_lr = lr;
- laia->laia_shared = 0;
if (!list_empty(&lad->lad_mdt_phase1_list)) {
ltds = &lfsck->li_mdt_descs;
lr->lr_index = lfsck_dev_idx(lfsck);
lr->lr_active = com->lc_type;
+
+ memset(laia, 0, sizeof(*laia));
laia->laia_com = com;
laia->laia_lr = lr;
- laia->laia_shared = 0;
switch (lr->lr_event) {
case LE_START:
LASSERT(ltd != NULL);
laia->laia_ltd = ltd;
- ltd->ltd_layout_done = 0;
- ltd->ltd_synced_failures = 0;
rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
lfsck_async_interpret_common,
laia, LFSCK_NOTIFY);
struct thandle *th);
int (*lfsck_query)(const struct lu_env *env,
- struct lfsck_component *com);
+ struct lfsck_component *com,
+ struct lfsck_request *req,
+ struct lfsck_reply *rep,
+ struct lfsck_query *que, int idx);
int (*lfsck_join)(const struct lu_env *env,
struct lfsck_component *com,
struct list_head ltd_layout_phase_list;
struct list_head ltd_namespace_list;
struct list_head ltd_namespace_phase_list;
+ __u32 ltd_layout_status;
+ __u32 ltd_namespace_status;
+ __u64 ltd_layout_repaired;
+ __u64 ltd_namespace_repaired;
atomic_t ltd_ref;
__u32 ltd_index;
__u32 ltd_layout_gen;
/* list for the ost targets in phase1 scanning. */
struct list_head lad_ost_phase1_list;
- /* list for the ost targets in phase1 scanning. */
+ /* list for the ost targets in phase2 scanning. */
struct list_head lad_ost_phase2_list;
/* list for the mdt targets involve LFSCK. */
/* list for the mdt targets in phase1 scanning. */
struct list_head lad_mdt_phase1_list;
- /* list for the mdt targets in phase1 scanning. */
+ /* list for the mdt targets in phase2 scanning. */
struct list_head lad_mdt_phase2_list;
const char *lad_name;
bool unlink);
struct lfsck_component *lfsck_component_find(struct lfsck_instance *lfsck,
__u16 type);
-const char *lfsck_status2names(enum lfsck_status status);
void lfsck_component_cleanup(const struct lu_env *env,
struct lfsck_component *com);
void lfsck_instance_cleanup(const struct lu_env *env,
struct ptlrpc_request_set *set,
ptlrpc_interpterer_t interpterer,
void *args, int request);
+int lfsck_query_all(const struct lu_env *env, struct lfsck_component *com);
int lfsck_start_assistant(const struct lu_env *env, struct lfsck_component *com,
struct lfsck_start_param *lsp);
int lfsck_checkpoint_generic(const struct lu_env *env,
"status: %s\n",
lo->ll_magic,
bk->lb_version,
- lfsck_status2names(lo->ll_status));
+ lfsck_status2name(lo->ll_status));
rc = lfsck_bits_dump(m, lo->ll_flags, lfsck_flags_names, "flags");
if (rc < 0)
RETURN(0);
}
+static void lfsck_layout_repaired(struct lfsck_layout *lo, __u64 *count)
+{
+ int i;
+
+ for (i = 0; i < LLIT_MAX; i++)
+ *count += lo->ll_objs_repaired[i];
+}
+
+static int lfsck_layout_query_all(const struct lu_env *env,
+ struct lfsck_component *com,
+ __u32 *mdts_count, __u32 *osts_count,
+ __u64 *repaired)
+{
+ struct lfsck_layout *lo = com->lc_file_ram;
+ struct lfsck_tgt_descs *ltds;
+ struct lfsck_tgt_desc *ltd;
+ int idx;
+ int rc;
+ ENTRY;
+
+ rc = lfsck_query_all(env, com);
+ if (rc != 0)
+ RETURN(rc);
+
+ ltds = &com->lc_lfsck->li_mdt_descs;
+ down_read(<ds->ltd_rw_sem);
+ cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+ ltd = lfsck_ltd2tgt(ltds, idx);
+ LASSERT(ltd != NULL);
+
+ mdts_count[ltd->ltd_layout_status]++;
+ *repaired += ltd->ltd_layout_repaired;
+ }
+ up_read(<ds->ltd_rw_sem);
+
+ ltds = &com->lc_lfsck->li_ost_descs;
+ down_read(<ds->ltd_rw_sem);
+ cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+ ltd = lfsck_ltd2tgt(ltds, idx);
+ LASSERT(ltd != NULL);
+
+ osts_count[ltd->ltd_layout_status]++;
+ *repaired += ltd->ltd_layout_repaired;
+ }
+ up_read(<ds->ltd_rw_sem);
+
+ down_read(&com->lc_sem);
+ mdts_count[lo->ll_status]++;
+ lfsck_layout_repaired(lo, repaired);
+ up_read(&com->lc_sem);
+
+ RETURN(0);
+}
+
static int lfsck_layout_query(const struct lu_env *env,
- struct lfsck_component *com)
+ struct lfsck_component *com,
+ struct lfsck_request *req,
+ struct lfsck_reply *rep,
+ struct lfsck_query *que, int idx)
{
struct lfsck_layout *lo = com->lc_file_ram;
+ int rc = 0;
- return lo->ll_status;
+ if (que != NULL) {
+ LASSERT(com->lc_lfsck->li_master);
+
+ rc = lfsck_layout_query_all(env, com,
+ que->lu_mdts_count[idx],
+ que->lu_osts_count[idx],
+ &que->lu_repaired[idx]);
+ } else {
+ down_read(&com->lc_sem);
+ rep->lr_status = lo->ll_status;
+ if (req->lr_flags & LEF_QUERY_ALL)
+ lfsck_layout_repaired(lo, &rep->lr_repaired);
+ up_read(&com->lc_sem);
+ }
+
+ return rc;
}
/* with lfsck::li_lock held */
static struct list_head lfsck_mdt_orphan_list;
static DEFINE_SPINLOCK(lfsck_instance_lock);
-static const char *lfsck_status_names[] = {
- [LS_INIT] = "init",
- [LS_SCANNING_PHASE1] = "scanning-phase1",
- [LS_SCANNING_PHASE2] = "scanning-phase2",
- [LS_COMPLETED] = "completed",
- [LS_FAILED] = "failed",
- [LS_STOPPED] = "stopped",
- [LS_PAUSED] = "paused",
- [LS_CRASHED] = "crashed",
- [LS_PARTIAL] = "partial",
- [LS_CO_FAILED] = "co-failed",
- [LS_CO_STOPPED] = "co-stopped",
- [LS_CO_PAUSED] = "co-paused"
-};
-
const char *lfsck_flags_names[] = {
"scanned-once",
"inconsistent",
LVLT_BY_NAMEENTRY = 1,
};
-const char *lfsck_status2names(enum lfsck_status status)
+static inline void
+lfsck_reset_ltd_status(struct lfsck_tgt_desc *ltd, enum lfsck_type type)
{
- if (unlikely(status < 0 || status >= LS_MAX))
- return "unknown";
-
- return lfsck_status_names[status];
+ if (type == LFSCK_TYPE_LAYOUT) {
+ ltd->ltd_layout_status = LS_MAX;
+ ltd->ltd_layout_repaired = 0;
+ } else {
+ ltd->ltd_namespace_status = LS_MAX;
+ ltd->ltd_namespace_repaired = 0;
+ }
}
static int lfsck_tgt_descs_init(struct lfsck_tgt_descs *ltds)
}
if (rc != 0) {
+ if (lr->lr_flags & LEF_QUERY_ALL) {
+ lfsck_reset_ltd_status(ltd, com->lc_type);
+ break;
+ }
+
spin_lock(<ds->ltd_lock);
list_del_init(phase_list);
list_del_init(list);
CDEBUG(D_LFSCK, "%s: invalid query reply for %s: "
"rc = %d\n", lfsck_lfsck2name(com->lc_lfsck),
lad->lad_name, rc);
+
+ if (lr->lr_flags & LEF_QUERY_ALL) {
+ lfsck_reset_ltd_status(ltd, com->lc_type);
+ break;
+ }
+
spin_lock(<ds->ltd_lock);
list_del_init(phase_list);
list_del_init(list);
break;
}
+ if (lr->lr_flags & LEF_QUERY_ALL) {
+ if (com->lc_type == LFSCK_TYPE_LAYOUT) {
+ ltd->ltd_layout_status = reply->lr_status;
+ ltd->ltd_layout_repaired = reply->lr_repaired;
+ } else {
+ ltd->ltd_namespace_status = reply->lr_status;
+ ltd->ltd_namespace_repaired =
+ reply->lr_repaired;
+ }
+ break;
+ }
+
switch (reply->lr_status) {
case LS_SCANNING_PHASE1:
break;
if (ltds == &lfsck->li_ost_descs)
lr->lr_flags = LEF_TO_OST;
+ memset(laia, 0, sizeof(*laia));
laia->laia_com = com;
laia->laia_ltds = ltds;
atomic_inc(<d->ltd_ref);
laia->laia_ltd = ltd;
laia->laia_lr = lr;
- laia->laia_shared = 0;
rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
lfsck_async_interpret_common,
return 0;
}
+int lfsck_query_all(const struct lu_env *env, struct lfsck_component *com)
+{
+ struct lfsck_thread_info *info = lfsck_env_info(env);
+ struct lfsck_request *lr = &info->lti_lr;
+ struct lfsck_async_interpret_args *laia = &info->lti_laia;
+ struct lfsck_instance *lfsck = com->lc_lfsck;
+ struct lfsck_tgt_descs *ltds = &lfsck->li_mdt_descs;
+ struct lfsck_tgt_desc *ltd;
+ struct ptlrpc_request_set *set;
+ int idx;
+ int rc;
+ ENTRY;
+
+ memset(lr, 0, sizeof(*lr));
+ lr->lr_event = LE_QUERY;
+ lr->lr_active = com->lc_type;
+ lr->lr_flags = LEF_QUERY_ALL;
+
+ memset(laia, 0, sizeof(*laia));
+ laia->laia_com = com;
+ laia->laia_lr = lr;
+
+ set = ptlrpc_prep_set();
+ if (set == NULL)
+ RETURN(-ENOMEM);
+
+again:
+ laia->laia_ltds = ltds;
+ down_read(<ds->ltd_rw_sem);
+ cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+ ltd = lfsck_tgt_get(ltds, idx);
+ LASSERT(ltd != NULL);
+
+ laia->laia_ltd = ltd;
+ up_read(<ds->ltd_rw_sem);
+ rc = lfsck_async_request(env, ltd->ltd_exp, lr, set,
+ lfsck_async_interpret_common,
+ laia, LFSCK_QUERY);
+ if (rc != 0) {
+ struct lfsck_assistant_data *lad = com->lc_data;
+
+ CDEBUG(D_LFSCK, "%s: Fail to query %s %x for stat %s: "
+ "rc = %d\n", lfsck_lfsck2name(lfsck),
+ (lr->lr_flags & LEF_TO_OST) ? "OST" : "MDT",
+ ltd->ltd_index, lad->lad_name, rc);
+ lfsck_reset_ltd_status(ltd, com->lc_type);
+ lfsck_tgt_put(ltd);
+ }
+ down_read(<ds->ltd_rw_sem);
+ }
+ up_read(<ds->ltd_rw_sem);
+
+ if (com->lc_type == LFSCK_TYPE_LAYOUT && !(lr->lr_flags & LEF_TO_OST)) {
+ ltds = &lfsck->li_ost_descs;
+ lr->lr_flags |= LEF_TO_OST;
+ goto again;
+ }
+
+ rc = ptlrpc_set_wait(set);
+ ptlrpc_set_destroy(set);
+
+ RETURN(rc);
+}
+
int lfsck_start_assistant(const struct lu_env *env, struct lfsck_component *com,
struct lfsck_start_param *lsp)
{
lr->lr_active = LFSCK_TYPES_ALL;
lr->lr_param = stop->ls_flags;
- laia->laia_com = NULL;
+ memset(laia, 0, sizeof(*laia));
laia->laia_ltds = ltds;
laia->laia_lr = lr;
- laia->laia_result = 0;
laia->laia_shared = 1;
down_read(<ds->ltd_rw_sem);
LSV_ASYNC_WINDOWS | LSV_CREATE_OSTOBJ |
LSV_CREATE_MDTOBJ;
- laia->laia_com = NULL;
+ memset(laia, 0, sizeof(*laia));
laia->laia_ltds = ltds;
laia->laia_lr = lr;
- laia->laia_result = 0;
laia->laia_shared = 1;
down_read(<ds->ltd_rw_sem);
struct l_wait_info lwi = { 0 };
struct lfsck_thread_args *lta;
struct task_struct *task;
+ struct lfsck_tgt_descs *ltds;
+ struct lfsck_tgt_desc *ltd;
+ __u32 idx;
int rc = 0;
__u16 valid = 0;
__u16 flags = 0;
}
}
+ ltds = &lfsck->li_mdt_descs;
+ down_read(<ds->ltd_rw_sem);
+ cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+ ltd = lfsck_ltd2tgt(ltds, idx);
+ LASSERT(ltd != NULL);
+
+ ltd->ltd_layout_done = 0;
+ ltd->ltd_namespace_done = 0;
+ ltd->ltd_synced_failures = 0;
+ lfsck_reset_ltd_status(ltd, LFSCK_TYPE_NAMESPACE);
+ lfsck_reset_ltd_status(ltd, LFSCK_TYPE_LAYOUT);
+ list_del_init(<d->ltd_layout_phase_list);
+ list_del_init(<d->ltd_layout_list);
+ list_del_init(<d->ltd_namespace_phase_list);
+ list_del_init(<d->ltd_namespace_list);
+ }
+ up_read(<ds->ltd_rw_sem);
+
+ ltds = &lfsck->li_ost_descs;
+ down_read(<ds->ltd_rw_sem);
+ cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+ ltd = lfsck_ltd2tgt(ltds, idx);
+ LASSERT(ltd != NULL);
+
+ ltd->ltd_layout_done = 0;
+ ltd->ltd_synced_failures = 0;
+ lfsck_reset_ltd_status(ltd, LFSCK_TYPE_LAYOUT);
+ list_del_init(<d->ltd_layout_phase_list);
+ list_del_init(<d->ltd_layout_list);
+ }
+ up_read(<ds->ltd_rw_sem);
+
trigger:
lfsck->li_args_dir = LUDA_64BITHASH | LUDA_VERIFY | LUDA_TYPE;
if (bk->lb_param & LPF_DRYRUN)
EXPORT_SYMBOL(lfsck_in_notify);
int lfsck_query(const struct lu_env *env, struct dt_device *key,
- struct lfsck_request *lr)
+ struct lfsck_request *req, struct lfsck_reply *rep,
+ struct lfsck_query *que)
{
struct lfsck_instance *lfsck;
struct lfsck_component *com;
- int rc;
+ int i;
+ int rc = 0;
+ __u16 type;
ENTRY;
lfsck = lfsck_instance_find(key, true, false);
if (unlikely(lfsck == NULL))
RETURN(-ENXIO);
- com = lfsck_component_find(lfsck, lr->lr_active);
- if (likely(com != NULL)) {
- rc = com->lc_ops->lfsck_query(env, com);
- lfsck_component_put(env, com);
+ if (que != NULL) {
+ if (que->lu_types == LFSCK_TYPES_ALL)
+ que->lu_types =
+ LFSCK_TYPES_SUPPORTED & ~LFSCK_TYPE_SCRUB;
+
+ if (que->lu_types & ~LFSCK_TYPES_SUPPORTED) {
+ que->lu_types &= ~LFSCK_TYPES_SUPPORTED;
+
+ GOTO(out, rc = -ENOTSUPP);
+ }
+
+ for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS;
+ i++, type = 1 << i) {
+ if (!(que->lu_types & type))
+ continue;
+
+again:
+ com = lfsck_component_find(lfsck, type);
+ if (unlikely(com == NULL))
+ GOTO(out, rc = -ENOTSUPP);
+
+ memset(que->lu_mdts_count[i], 0,
+ sizeof(__u32) * (LS_MAX + 1));
+ memset(que->lu_osts_count[i], 0,
+ sizeof(__u32) * (LS_MAX + 1));
+ que->lu_repaired[i] = 0;
+ rc = com->lc_ops->lfsck_query(env, com, req, rep,
+ que, i);
+ lfsck_component_put(env, com);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
+ if (!(que->lu_flags & LPF_WAIT))
+ GOTO(out, rc);
+
+ for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS;
+ i++, type = 1 << i) {
+ if (!(que->lu_types & type))
+ continue;
+
+ if (que->lu_mdts_count[i][LS_SCANNING_PHASE1] != 0 ||
+ que->lu_mdts_count[i][LS_SCANNING_PHASE2] != 0 ||
+ que->lu_osts_count[i][LS_SCANNING_PHASE1] != 0 ||
+ que->lu_osts_count[i][LS_SCANNING_PHASE2] != 0) {
+ struct l_wait_info lwi;
+
+ /* If it is required to wait, then sleep
+ * 3 seconds and try to query again. */
+ lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(3),
+ NULL,
+ LWI_ON_SIGNAL_NOOP,
+ NULL);
+ rc = l_wait_event(lfsck->li_thread.t_ctl_waitq,
+ 0, &lwi);
+ if (rc == -ETIMEDOUT)
+ goto again;
+ }
+ }
} else {
- rc = -ENOTSUPP;
+ com = lfsck_component_find(lfsck, req->lr_active);
+ if (likely(com != NULL)) {
+ rc = com->lc_ops->lfsck_query(env, com, req, rep,
+ que, -1);
+ lfsck_component_put(env, com);
+ } else {
+ rc = -ENOTSUPP;
+ }
}
- lfsck_instance_put(env, lfsck);
+ GOTO(out, rc);
- RETURN(rc);
+out:
+ lfsck_instance_put(env, lfsck);
+ return rc;
}
+EXPORT_SYMBOL(lfsck_query);
int lfsck_register_namespace(const struct lu_env *env, struct dt_device *key,
struct ldlm_namespace *ns)
"status: %s\n",
ns->ln_magic,
bk->lb_version,
- lfsck_status2names(ns->ln_status));
+ lfsck_status2name(ns->ln_status));
rc = lfsck_bits_dump(m, ns->ln_flags, lfsck_flags_names, "flags");
if (rc < 0)
RETURN(0);
}
+static void lfsck_namespace_repaired(struct lfsck_namespace *ns, __u64 *count)
+{
+ *count += ns->ln_objs_nlink_repaired;
+ *count += ns->ln_dirent_repaired;
+ *count += ns->ln_linkea_repaired;
+ *count += ns->ln_mul_linked_repaired;
+ *count += ns->ln_unmatched_pairs_repaired;
+ *count += ns->ln_dangling_repaired;
+ *count += ns->ln_mul_ref_repaired;
+ *count += ns->ln_bad_type_repaired;
+ *count += ns->ln_lost_dirent_repaired;
+ *count += ns->ln_striped_dirs_disabled;
+ *count += ns->ln_striped_dirs_repaired;
+ *count += ns->ln_striped_shards_repaired;
+ *count += ns->ln_name_hash_repaired;
+ *count += ns->ln_local_lpf_moved;
+}
+
+static int lfsck_namespace_query_all(const struct lu_env *env,
+ struct lfsck_component *com,
+ __u32 *mdts_count, __u64 *repaired)
+{
+ struct lfsck_namespace *ns = com->lc_file_ram;
+ struct lfsck_tgt_descs *ltds = &com->lc_lfsck->li_mdt_descs;
+ struct lfsck_tgt_desc *ltd;
+ int idx;
+ int rc;
+ ENTRY;
+
+ rc = lfsck_query_all(env, com);
+ if (rc != 0)
+ RETURN(rc);
+
+ down_read(<ds->ltd_rw_sem);
+ cfs_foreach_bit(ltds->ltd_tgts_bitmap, idx) {
+ ltd = lfsck_ltd2tgt(ltds, idx);
+ LASSERT(ltd != NULL);
+
+ mdts_count[ltd->ltd_namespace_status]++;
+ *repaired += ltd->ltd_namespace_repaired;
+ }
+ up_read(<ds->ltd_rw_sem);
+
+ down_read(&com->lc_sem);
+ mdts_count[ns->ln_status]++;
+ lfsck_namespace_repaired(ns, repaired);
+ up_read(&com->lc_sem);
+
+ RETURN(0);
+}
+
static int lfsck_namespace_query(const struct lu_env *env,
- struct lfsck_component *com)
+ struct lfsck_component *com,
+ struct lfsck_request *req,
+ struct lfsck_reply *rep,
+ struct lfsck_query *que, int idx)
{
struct lfsck_namespace *ns = com->lc_file_ram;
+ int rc = 0;
+
+ if (que != NULL) {
+ LASSERT(com->lc_lfsck->li_master);
- return ns->ln_status;
+ rc = lfsck_namespace_query_all(env, com,
+ que->lu_mdts_count[idx],
+ &que->lu_repaired[idx]);
+ } else {
+ down_read(&com->lc_sem);
+ rep->lr_status = ns->ln_status;
+ if (req->lr_flags & LEF_QUERY_ALL)
+ lfsck_namespace_repaired(ns, &rep->lr_repaired);
+ up_read(&com->lc_sem);
+ }
+
+ return rc;
}
static struct lfsck_operations lfsck_namespace_ops = {
(struct lfsck_stop *)karg);
RETURN(rc);
}
- }
+ case OBD_IOC_QUERY_LFSCK: {
+ rc = lfsck_query(env, mdd->mdd_bottom, NULL, NULL,
+ (struct lfsck_query *)karg);
+ RETURN(rc);
+ }
+ }
/* Below ioctls use obd_ioctl_data */
if (len != sizeof(*data)) {
rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0, &stop);
break;
}
+ case OBD_IOC_QUERY_LFSCK: {
+ struct md_device *next = mdt->mdt_child;
+ struct obd_ioctl_data *data = karg;
+
+ rc = next->md_ops->mdo_iocontrol(&env, next, cmd, 0,
+ data->ioc_inlbuf1);
+ break;
+ }
case OBD_IOC_GET_OBJ_VERSION: {
struct mdt_thread_info *mti;
mti = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
{
__swab32s(&lr->lr_status);
CLASSERT(offsetof(typeof(*lr), lr_padding_1) != 0);
- CLASSERT(offsetof(typeof(*lr), lr_padding_2) != 0);
+ __swab64s(&lr->lr_repaired);
}
void lustre_swab_orphan_ent(struct lu_orphan_ent *ent)
(unsigned)LEF_SET_LMV_ALL);
LASSERTF(LEF_RECHECK_NAME_HASH == 0x00000010UL, "found 0x%.8xUL\n",
(unsigned)LEF_RECHECK_NAME_HASH);
+ LASSERTF(LEF_QUERY_ALL == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)LEF_QUERY_ALL);
/* Checks for struct lfsck_reply */
LASSERTF((int)sizeof(struct lfsck_reply) == 16, "found %lld\n",
(long long)(int)offsetof(struct lfsck_reply, lr_padding_1));
LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_1) == 4, "found %lld\n",
(long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_1));
- LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_2) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lfsck_reply, lr_padding_2));
- LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_2));
+ LASSERTF((int)offsetof(struct lfsck_reply, lr_repaired) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lfsck_reply, lr_repaired));
+ LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_repaired) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_repaired));
/* Checks for struct update_params */
LASSERTF((int)sizeof(struct update_params) == 0, "found %lld\n",
static int (*tgt_lfsck_query)(const struct lu_env *env,
struct dt_device *key,
- struct lfsck_request *lr) = NULL;
+ struct lfsck_request *req,
+ struct lfsck_reply *rep,
+ struct lfsck_query *que) = NULL;
void tgt_register_lfsck_query(int (*query)(const struct lu_env *,
struct dt_device *,
- struct lfsck_request *))
+ struct lfsck_request *,
+ struct lfsck_reply *,
+ struct lfsck_query *))
{
tgt_lfsck_query = query;
}
if (reply == NULL)
RETURN(-ENOMEM);
- rc = tgt_lfsck_query(tsi->tsi_env, tsi->tsi_tgt->lut_bottom, request);
- reply->lr_status = rc;
+ rc = tgt_lfsck_query(tsi->tsi_env, tsi->tsi_tgt->lut_bottom,
+ request, reply, NULL);
RETURN(rc < 0 ? rc : 0);
}
error "(3) Fail to start MDT0"
}
+wait_all_targets_blocked() {
+ local com=$1
+ local status=$2
+ local err=$3
+
+ local count=$(do_facet mds1 \
+ "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000 -w |
+ awk '/^${com}_mdts_${status}/ { print \\\$2 }'")
+ [[ $count -eq $MDSCOUNT ]] || {
+ do_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000"
+ error "($err) only $count of $MDSCOUNT MDTs are in ${status}"
+ }
+}
+
+wait_all_targets() {
+ local com=$1
+ local status=$2
+ local err=$3
+
+ wait_update_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000 |
+ awk '/^${com}_mdts_${status}/ { print \\\$2 }'" \
+ "$MDSCOUNT" $LTIME || {
+ do_facet mds1 "$LCTL lfsck_query -t $com -M ${FSNAME}-MDT0000"
+ error "($err) some MDTs are not in ${status}"
+ }
+}
+
test_0() {
lfsck_prep 3 3
do_facet $SINGLEMDS $LCTL set_param fail_loc=0
$START_NAMESPACE -r -A || error "(3) Fail to start LFSCK for namespace!"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(4) unexpected status"
- }
+
+ wait_all_targets_blocked namespace completed 4
local repaired=$($SHOW_NAMESPACE |
awk '/^linkea_repaired/ { print $2 }')
-s 1 -r || error "(2) Fail to start LFSCK on all devices!"
echo "All the LFSCK targets should be in 'scanning-phase1' status."
- for k in $(seq $MDSCOUNT); do
- local STATUS=$(do_facet mds${k} $LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_namespace |
- awk '/^status/ { print $2 }')
- [ "$STATUS" == "scanning-phase1" ] ||
- error "(3) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
- done
+ wait_all_targets namespace scanning-phase1 3
echo "Stop namespace LFSCK on all targets by single lctl command."
do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
error "(4) Fail to stop LFSCK on all devices!"
echo "All the LFSCK targets should be in 'stopped' status."
- for k in $(seq $MDSCOUNT); do
- local STATUS=$(do_facet mds${k} $LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_namespace |
- awk '/^status/ { print $2 }')
- [ "$STATUS" == "stopped" ] ||
- error "(5) MDS${k} Expect 'stopped', but got '$STATUS'"
- done
+ wait_all_targets_blocked namespace stopped 5
echo "Re-start namespace LFSCK on all targets by single command (-s 0)."
do_facet mds1 $LCTL lfsck_start -M ${FSNAME}-MDT0000 -t namespace -A \
-s 0 -r || error "(6) Fail to start LFSCK on all devices!"
echo "All the LFSCK targets should be in 'completed' status."
- for k in $(seq $MDSCOUNT); do
- wait_update_facet mds${k} "$LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 8 ||
- error "(7) MDS${k} is not the expected 'completed'"
- done
+ wait_all_targets_blocked namespace completed 7
start_full_debug_logging
-s 1 -r || error "(8) Fail to start LFSCK on all devices!"
echo "All the LFSCK targets should be in 'scanning-phase1' status."
- for k in $(seq $MDSCOUNT); do
- local STATUS=$(do_facet mds${k} $LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_layout |
- awk '/^status/ { print $2 }')
- [ "$STATUS" == "scanning-phase1" ] ||
- error "(9) MDS${k} Expect 'scanning-phase1', but got '$STATUS'"
- done
+ wait_all_targets layout scanning-phase1 9
echo "Stop layout LFSCK on all targets by single lctl command."
do_facet mds1 $LCTL lfsck_stop -M ${FSNAME}-MDT0000 -A ||
error "(10) Fail to stop LFSCK on all devices!"
echo "All the LFSCK targets should be in 'stopped' status."
- for k in $(seq $MDSCOUNT); do
- local STATUS=$(do_facet mds${k} $LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_layout |
- awk '/^status/ { print $2 }')
- [ "$STATUS" == "stopped" ] ||
- error "(11) MDS${k} Expect 'stopped', but got '$STATUS'"
- done
+ wait_all_targets_blocked layout stopped 11
for k in $(seq $OSTCOUNT); do
local STATUS=$(do_facet ost${k} $LCTL get_param -n \
-s 0 -r || error "(13) Fail to start LFSCK on all devices!"
echo "All the LFSCK targets should be in 'completed' status."
- for k in $(seq $MDSCOUNT); do
- # The LFSCK status query internal is 30 seconds. For the case
- # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
- # time to guarantee the status sync up.
- wait_update_facet mds${k} "$LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_layout |
- awk '/^status/ { print \\\$2 }'" "completed" 32 ||
- error "(14) MDS${k} is not the expected 'completed'"
- done
+ wait_all_targets_blocked layout completed 14
stop_full_debug_logging
}
echo "Trigger layout LFSCK to race with the migration"
$START_LAYOUT -A -r || error "(1) Fail to start layout LFSCK!"
- for k in $(seq $MDSCOUNT); do
- # The LFSCK status query internal is 30 seconds. For the case
- # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
- # time to guarantee the status sync up.
- wait_update_facet mds${k} "$LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_layout |
- awk '/^status/ { print \\\$2 }'" "completed" $LTIME ||
- error "(2) MDS${k} is not the expected 'completed'"
- done
+ wait_all_targets_blocked layout completed 2
do_facet mds2 $LCTL set_param fail_loc=0 fail_val=0
local repaired=$($SHOW_LAYOUT |
$START_NAMESPACE -A -r ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^unmatched_pairs_repaired/ { print $2 }')
$START_NAMESPACE -A -r ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^unmatched_pairs_repaired/ { print $2 }')
$START_NAMESPACE -A -r ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^dangling_repaired/ { print $2 }')
$START_NAMESPACE -A -r -C ||
error "(9) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(10) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 10
repaired=$($SHOW_NAMESPACE |
awk '/^dangling_repaired/ { print $2 }')
$START_NAMESPACE -A -r ||
error "(7) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(8) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 8
local repaired=$($SHOW_NAMESPACE |
awk '/^multiple_referenced_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(6) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(7) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 7
local repaired=$($SHOW_NAMESPACE |
awk '/^lost_dirent_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^lost_dirent_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(6) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(7) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 7
local repaired=$($SHOW_NAMESPACE |
awk '/^lost_dirent_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(6) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(7) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 7
local repaired=$($SHOW_NAMESPACE |
awk '/^lost_dirent_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(8) Fail to start LFSCK for namespace"
- for k in $(seq $MDSCOUNT); do
- # The LFSCK status query internal is 30 seconds. For the case
- # of some LFSCK_NOTIFY RPCs failure/lost, we will wait enough
- # time to guarantee the status sync up.
- wait_update_facet mds${k} "$LCTL get_param -n \
- mdd.$(facet_svc mds${k}).lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 ||
- error "(9) MDS${k} is not the expected 'completed'"
- done
+ wait_all_targets_blocked namespace completed 9
local repaired=$(do_facet mds1 $LCTL get_param -n \
mdd.$(facet_svc mds1).lfsck_namespace |
$START_NAMESPACE -r -A ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^nlinks_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^nlinks_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(7) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(8) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 8
do_facet $SINGLEMDS $LCTL set_param fail_loc=0
local repaired=$($SHOW_NAMESPACE |
$START_NAMESPACE -r -A ||
error "(14) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(15) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 15
local repaired=$($SHOW_NAMESPACE |
awk '/^local_lost_found_moved/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(3) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(4) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 4
local repaired=$($SHOW_NAMESPACE |
awk '/^name_hash_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(3) Fail to start LFSCK for namespace"
- wait_update_facet mds2 "$LCTL get_param -n \
- mdd.$(facet_svc mds2).lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 ||
- error "(4) unexpected status"
+ wait_all_targets_blocked namespace completed 4
local repaired=$(do_facet mds2 $LCTL get_param -n \
mdd.$(facet_svc mds2).lfsck_namespace |
$START_NAMESPACE -r -A ||
error "(2) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(3) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 3
local repaired=$($SHOW_NAMESPACE |
awk '/^striped_dirs_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(5) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(6) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 6
local repaired=$($SHOW_NAMESPACE |
awk '/^striped_dirs_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(2) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(3) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 3
local repaired=$($SHOW_NAMESPACE |
awk '/^striped_shards_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(2) Fail to start LFSCK for namespace"
- wait_update_facet mds2 "$LCTL get_param -n \
- mdd.$(facet_svc mds2).lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 ||
- error "(3) unexpected status"
+ wait_all_targets_blocked namespace completed 3
local repaired=$(do_facet mds2 $LCTL get_param -n \
mdd.$(facet_svc mds2).lfsck_namespace |
$START_NAMESPACE -r -A ||
error "(2) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(3) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 3
local repaired=$($SHOW_NAMESPACE |
awk '/^striped_shards_repaired/ { print $2 }')
$START_NAMESPACE -r -A ||
error "(2) Fail to start LFSCK for namespace"
- wait_update_facet $SINGLEMDS "$LCTL get_param -n \
- mdd.${MDT_DEV}.lfsck_namespace |
- awk '/^status/ { print \\\$2 }'" "completed" 32 || {
- $SHOW_NAMESPACE
- error "(3) unexpected status"
- }
+ wait_all_targets_blocked namespace completed 3
local repaired=$($SHOW_NAMESPACE |
awk '/^dirent_repaired/ { print $2 }')
{"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n"
"usage: lfsck_stop <-M | --device [MDT,OST]_device>\n"
" [-A | --all] [-h | --help]"},
+ {"lfsck_query", jt_lfsck_query, 0, "check lfsck(s) status\n"
+ "usage: lfsck_query <-M | --device MDT_device> [-h | --help]\n"
+ " [-t | --type lfsck_type[,lfsck_type...]]\n"
+ " [-w | --wait]"},
{"==== obsolete (DANGEROUS) ====", jt_noop, 0, "obsolete (DANGEROUS)"},
/* some test scripts still use these */
{"device", required_argument, 0, 'M'},
{"all", no_argument, 0, 'A'},
{"help", no_argument, 0, 'h'},
- {0, 0, 0, 0}
+ {0, 0, 0, 0 }
+};
+
+static struct option long_opt_query[] = {
+ {"device", required_argument, 0, 'M'},
+ {"type", required_argument, 0, 't'},
+ {"help", no_argument, 0, 'h'},
+ {"wait", no_argument, 0, 'w'},
+ {0, 0, 0, 0 }
};
struct lfsck_type_name {
return -1;
}
+static const char *lfsck_type2name(__u16 type)
+{
+ int i;
+
+ for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++) {
+ if (type == lfsck_types_names[i].ltn_type)
+ return lfsck_types_names[i].ltn_name;
+ }
+
+ return NULL;
+}
+
static void usage_start(void)
{
fprintf(stderr, "start LFSCK\n"
"-h: this help message\n");
}
+static void usage_query(void)
+{
+ fprintf(stderr, "check the LFSCK global status\n"
+ "usage:\n"
+ "lfsck_query <-M | --device MDT_device> [-h | --help]\n"
+ " [-t | --type check_type[,check_type...]]\n"
+ " [-t | --wait]\n"
+ "options:\n"
+ "-M: device to query LFSCK on\n"
+ "-t: LFSCK type(s) to be queried (default is all)\n"
+ "-h: this help message\n"
+ "-w: do not return until LFSCK not running\n");
+}
+
static int lfsck_pack_dev(struct obd_ioctl_data *data, char *device, char *arg)
{
int len = strlen(arg) + 1;
data.ioc_inllen1 = sizeof(start);
memset(buf, 0, sizeof(rawbuf));
rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
- if (rc) {
+ if (rc != 0) {
fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
return rc;
}
data.ioc_inllen1 = sizeof(stop);
memset(buf, 0, sizeof(rawbuf));
rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
- if (rc) {
+ if (rc != 0) {
fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
return rc;
}
printf("Stopped LFSCK on the device %s.\n", device);
return 0;
}
+
+int jt_lfsck_query(int argc, char **argv)
+{
+ struct obd_ioctl_data data = { 0 };
+ char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf;
+ char device[MAX_OBD_NAME] = "";
+ struct lfsck_query query = { .lu_types = LFSCK_TYPES_ALL };
+ int opt, index, rc, i;
+ enum lfsck_type type;
+
+ while ((opt = getopt_long(argc, argv, "hM:t:w", long_opt_query,
+ &index)) != EOF) {
+ switch (opt) {
+ case 'h':
+ usage_query();
+ return 0;
+ case 'M':
+ rc = lfsck_pack_dev(&data, device, optarg);
+ if (rc != 0)
+ return rc;
+ break;
+ case 't': {
+ char *typename;
+
+ if (query.lu_types == LFSCK_TYPES_ALL)
+ query.lu_types = 0;
+ while ((typename = strsep(&optarg, ",")) != NULL) {
+ type = lfsck_name2type(typename);
+ if (type == -1)
+ goto bad_type;
+ query.lu_types |= type;
+ }
+ break;
+
+bad_type:
+ fprintf(stderr, "invalid LFSCK type -t '%s'. "
+ "valid types are:\n", typename);
+ for (i = 0; lfsck_types_names[i].ltn_name != NULL; i++)
+ fprintf(stderr, "%s%s", i != 0 ? "," : "",
+ lfsck_types_names[i].ltn_name);
+ fprintf(stderr, "\n");
+ return -EINVAL;
+ }
+ case 'w':
+ query.lu_flags |= LPF_WAIT;
+ break;
+ default:
+ fprintf(stderr, "Invalid option, '-h' for help.\n");
+ usage_query();
+ return -EINVAL;
+ }
+ }
+
+ if (data.ioc_inlbuf4 == NULL) {
+ if (lcfg_get_devname() != NULL) {
+ rc = lfsck_pack_dev(&data, device, lcfg_get_devname());
+ if (rc != 0)
+ return rc;
+ } else {
+ fprintf(stderr,
+ "Must specify device to query LFSCK.\n");
+ return -EINVAL;
+ }
+ }
+
+ data.ioc_inlbuf1 = (char *)&query;
+ data.ioc_inllen1 = sizeof(query);
+ memset(buf, 0, sizeof(rawbuf));
+ rc = obd_ioctl_pack(&data, &buf, sizeof(rawbuf));
+ if (rc != 0) {
+ fprintf(stderr, "Fail to pack ioctl data: rc = %d.\n", rc);
+ return rc;
+ }
+
+ rc = l_ioctl(OBD_DEV_ID, OBD_IOC_QUERY_LFSCK, buf);
+ if (rc < 0) {
+ perror("Fail to query LFSCK");
+ return rc;
+ }
+
+ obd_ioctl_unpack(&data, buf, sizeof(rawbuf));
+ for (i = 0, type = 1 << i; i < LFSCK_TYPE_BITS; i++, type = 1 << i) {
+ const char *name;
+ int j;
+
+ if (!(query.lu_types & type))
+ continue;
+
+ name = lfsck_type2name(type);
+ for (j = 0; j <= LS_MAX; j++)
+ printf("%s_mdts_%s: %d\n", name,
+ lfsck_status2name(j), query.lu_mdts_count[i][j]);
+
+ for (j = 0; j <= LS_MAX; j++)
+ printf("%s_osts_%s: %d\n", name,
+ lfsck_status2name(j), query.lu_osts_count[i][j]);
+
+ printf("%s_repaired: %llu\n", name, query.lu_repaired[i]);
+ }
+
+ return 0;
+}
/* lustre_lfsck.c */
int jt_lfsck_start(int argc, char **argv);
int jt_lfsck_stop(int argc, char **argv);
+int jt_lfsck_query(int argc, char **argv);
#endif
CHECK_VALUE_X(LEF_SET_LMV_HASH);
CHECK_VALUE_X(LEF_SET_LMV_ALL);
CHECK_VALUE_X(LEF_RECHECK_NAME_HASH);
+ CHECK_VALUE_X(LEF_QUERY_ALL);
}
static void check_lfsck_reply(void)
CHECK_STRUCT(lfsck_reply);
CHECK_MEMBER(lfsck_reply, lr_status);
CHECK_MEMBER(lfsck_reply, lr_padding_1);
- CHECK_MEMBER(lfsck_reply, lr_padding_2);
+ CHECK_MEMBER(lfsck_reply, lr_repaired);
}
static void check_update_params(void)
(unsigned)LEF_SET_LMV_ALL);
LASSERTF(LEF_RECHECK_NAME_HASH == 0x00000010UL, "found 0x%.8xUL\n",
(unsigned)LEF_RECHECK_NAME_HASH);
+ LASSERTF(LEF_QUERY_ALL == 0x00000020UL, "found 0x%.8xUL\n",
+ (unsigned)LEF_QUERY_ALL);
/* Checks for struct lfsck_reply */
LASSERTF((int)sizeof(struct lfsck_reply) == 16, "found %lld\n",
(long long)(int)offsetof(struct lfsck_reply, lr_padding_1));
LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_1) == 4, "found %lld\n",
(long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_1));
- LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_2) == 8, "found %lld\n",
- (long long)(int)offsetof(struct lfsck_reply, lr_padding_2));
- LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_2) == 8, "found %lld\n",
- (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_2));
+ LASSERTF((int)offsetof(struct lfsck_reply, lr_repaired) == 8, "found %lld\n",
+ (long long)(int)offsetof(struct lfsck_reply, lr_repaired));
+ LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_repaired) == 8, "found %lld\n",
+ (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_repaired));
/* Checks for struct update_params */
LASSERTF((int)sizeof(struct update_params) == 0, "found %lld\n",