From 46e82c4644108eac92ac7c813c16bd7dd3283a56 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Sat, 5 Apr 2014 10:29:33 +0800 Subject: [PATCH] LU-4609 ofd: auto resume LFSCK after the recovery To prevent the LFSCK to recreate some objects which should be done via some replayable RPCs, we will postpone the LFSCK auto assuming after the server restart until the recovery finished. Such serialization also avoid some race between LFSCK and recovery to misguide the LFSCK to regard the system as inconsistent. Another fix is that: During the OST recovery, the client write RPC may create the missed OST-object which will cause the real last_id to be greater than the last_id value stored in the LAST_ID file temporarily. Normally, the LAST_ID file will be synced between MDT and OST during the recovery, but we should not assume that the MDT-OST recovery will succeed. If it failed, then subsequent LFSCK on the OST may regard such LAST_ID file as crashed, then it will mark the OST as read-only and rebuild the LAST_ID files. To avoid such case, before the write RPC to create the lost object, it needs to update the LAST_ID file firstly. Signed-off-by: Fan Yong Change-Id: I963f3c1b70c7cad0c943f2485417d2e783768bf1 Reviewed-on: http://review.whamcloud.com/10010 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger --- lustre/lfsck/lfsck_layout.c | 15 ++++++++- lustre/lfsck/lfsck_lib.c | 5 +++ lustre/mdt/mdt_handler.c | 32 +++++++++---------- lustre/ofd/ofd_dev.c | 10 ------ lustre/ofd/ofd_internal.h | 4 --- lustre/ofd/ofd_io.c | 73 ++++++++++++++++++++++++++++++++------------ lustre/ofd/ofd_obd.c | 10 ++++++ lustre/ofd/ofd_objects.c | 22 ------------- lustre/tests/sanity-lfsck.sh | 16 +++------- 9 files changed, 102 insertions(+), 85 deletions(-) diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index f4153e8..d63d5ee 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -4271,13 +4271,26 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, struct lfsck_start_param *lsp) { struct lfsck_layout_slave_data *llsd = com->lc_data; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_layout *lo = com->lc_file_ram; struct lfsck_start *start = lsp->lsp_start; int rc; rc = lfsck_layout_prep(env, com, start); - if (rc != 0 || !lsp->lsp_index_valid) + if (rc != 0) return rc; + if (lo->ll_flags & LF_CRASHED_LASTID && + list_empty(&llsd->llsd_master_list)) { + LASSERT(lfsck->li_out_notify != NULL); + + lfsck->li_out_notify(env, lfsck->li_out_notify_data, + LE_LASTID_REBUILDING); + } + + if (!lsp->lsp_index_valid) + return 0; + rc = lfsck_layout_llst_add(llsd, lsp->lsp_index); if (rc == 0 && start != NULL && start->ls_flags & LPF_ORPHAN) { LASSERT(!llsd->llsd_rbtree_valid); diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index 891becc..aff81bc 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -2046,6 +2046,11 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, spin_lock(&lfsck->li_lock); if (!thread_is_init(thread) && !thread_is_stopped(thread)) { rc = -EALREADY; + if (unlikely(start == NULL)) { + spin_unlock(&lfsck->li_lock); + GOTO(out, rc); + } + while (start->ls_active != 0) { if (!(type & start->ls_active)) { type <<= 1; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 34d5b97..8f4c86d 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -4822,7 +4822,6 @@ static int mdt_prepare(const struct lu_env *env, struct mdt_device *mdt = mdt_dev(cdev); struct lu_device *next = &mdt->mdt_child->md_lu_dev; struct obd_device *obd = cdev->ld_obd; - struct lfsck_start_param lsp; int rc; ENTRY; @@ -4846,17 +4845,6 @@ static int mdt_prepare(const struct lu_env *env, * register the namespace to such instance. */ LASSERTF(rc == 0, "register namespace failed: rc = %d\n", rc); - lsp.lsp_start = NULL; - lsp.lsp_index_valid = 0; - rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child, - OBD_IOC_START_LFSCK, - 0, &lsp); - if (rc != 0) { - CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", - mdt_obd_name(mdt), rc); - rc = 0; - } - if (mdt->mdt_seq_site.ss_node_id == 0) { rc = mdt->mdt_child->md_ops->mdo_root_get(env, mdt->mdt_child, &mdt->mdt_md_root_fid); @@ -5757,12 +5745,22 @@ static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, static int mdt_postrecov(const struct lu_env *env, struct mdt_device *mdt) { - struct lu_device *ld = md2lu_dev(mdt->mdt_child); - int rc; - ENTRY; + struct lu_device *ld = md2lu_dev(mdt->mdt_child); + struct lfsck_start_param lsp; + int rc; + ENTRY; - rc = ld->ld_ops->ldo_recovery_complete(env, ld); - RETURN(rc); + lsp.lsp_start = NULL; + lsp.lsp_index_valid = 0; + rc = mdt->mdt_child->md_ops->mdo_iocontrol(env, mdt->mdt_child, + OBD_IOC_START_LFSCK, + 0, &lsp); + if (rc != 0 && rc != -EALREADY) + CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", + mdt_obd_name(mdt), rc); + + rc = ld->ld_ops->ldo_recovery_complete(env, ld); + RETURN(rc); } static int mdt_obd_postrecov(struct obd_device *obd) diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 7f24f32..0f0d51a 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -419,7 +419,6 @@ static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev, struct ofd_device *ofd = ofd_dev(dev); struct obd_device *obd = ofd_obd(ofd); struct lu_device *next = &ofd->ofd_osd->dd_lu_dev; - struct lfsck_start_param lsp; int rc; ENTRY; @@ -446,15 +445,6 @@ static int ofd_prepare(const struct lu_env *env, struct lu_device *pdev, * register the namespace to such instance. */ LASSERTF(rc == 0, "register namespace failed: rc = %d\n", rc); - lsp.lsp_start = NULL; - lsp.lsp_index_valid = 0; - rc = lfsck_start(env, ofd->ofd_osd, &lsp); - if (rc != 0) { - CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", - obd->obd_name, rc); - rc = 0; - } - target_recovery_init(&ofd->ofd_lut, tgt_request_handle); LASSERT(obd->obd_no_conn); spin_lock(&obd->obd_dev_lock); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index 3aae6dd..4f12506 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -417,10 +417,6 @@ static inline void ofd_stats_counter_init(struct lprocfs_stats *stats) {} struct ofd_object *ofd_object_find(const struct lu_env *env, struct ofd_device *ofd, const struct lu_fid *fid); -struct ofd_object *ofd_object_find_or_create(const struct lu_env *env, - struct ofd_device *ofd, - const struct lu_fid *fid, - struct lu_attr *attr); int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo); int ofd_precreate_objects(const struct lu_env *env, struct ofd_device *ofd, obd_id id, struct ofd_seq *oseq, int nr, int sync); diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 6a81d87..96811b7 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -436,27 +436,62 @@ static int ofd_preprw_write(const struct lu_env *env, struct obd_export *exp, LASSERT(objcount == 1); if (unlikely(exp->exp_obd->obd_recovering)) { - struct ofd_thread_info *info = ofd_info(env); - - /* copied from ofd_precreate_object */ - /* XXX this should be consolidated to use the same code - * instead of a copy, due to the ongoing risk of bugs. */ - memset(&info->fti_attr, 0, sizeof(info->fti_attr)); - info->fti_attr.la_valid = LA_TYPE | LA_MODE; - info->fti_attr.la_mode = S_IFREG | S_ISUID | S_ISGID | 0666; - info->fti_attr.la_valid |= LA_ATIME | LA_MTIME | LA_CTIME; - /* Initialize a/c/m time so any client timestamp will always - * be newer and update the inode. ctime = 0 is also handled - * specially in osd_inode_setattr(). See LU-221, LU-1042 */ - info->fti_attr.la_atime = 0; - info->fti_attr.la_mtime = 0; - info->fti_attr.la_ctime = 0; - - fo = ofd_object_find_or_create(env, ofd, fid, &info->fti_attr); - } else { - fo = ofd_object_find(env, ofd, fid); + obd_seq seq = fid_seq(fid); + obd_id oid = fid_oid(fid); + struct ofd_seq *oseq; + + oseq = ofd_seq_load(env, ofd, seq); + if (IS_ERR(oseq)) { + CERROR("%s: Can't find FID Sequence "LPX64": rc = %d\n", + ofd_name(ofd), seq, (int)PTR_ERR(oseq)); + GOTO(out, rc = -EINVAL); + } + + if (oid > ofd_seq_last_oid(oseq)) { + int sync = 0; + int diff; + + mutex_lock(&oseq->os_create_lock); + diff = oid - ofd_seq_last_oid(oseq); + + /* Do sync create if the seq is about to used up */ + if (fid_seq_is_idif(seq) || fid_seq_is_mdt0(seq)) { + if (unlikely(oid >= IDIF_MAX_OID - 1)) + sync = 1; + } else if (fid_seq_is_norm(seq)) { + if (unlikely(oid >= + LUSTRE_DATA_SEQ_MAX_WIDTH - 1)) + sync = 1; + } else { + CERROR("%s : invalid o_seq "DOSTID"\n", + ofd_name(ofd), POSTID(&oa->o_oi)); + mutex_unlock(&oseq->os_create_lock); + ofd_seq_put(env, oseq); + GOTO(out, rc = -EINVAL); + } + + while (diff > 0) { + obd_id next_id = ofd_seq_last_oid(oseq) + 1; + int count = ofd_precreate_batch(ofd, diff); + + rc = ofd_precreate_objects(env, ofd, next_id, + oseq, count, sync); + if (rc < 0) { + mutex_unlock(&oseq->os_create_lock); + ofd_seq_put(env, oseq); + GOTO(out, rc); + } + + diff -= rc; + } + + mutex_unlock(&oseq->os_create_lock); + } + + ofd_seq_put(env, oseq); } + fo = ofd_object_find(env, ofd, fid); if (IS_ERR(fo)) GOTO(out, rc = PTR_ERR(fo)); LASSERT(fo != NULL); diff --git a/lustre/ofd/ofd_obd.c b/lustre/ofd/ofd_obd.c index 8ba5eca..9c62c8b 100644 --- a/lustre/ofd/ofd_obd.c +++ b/lustre/ofd/ofd_obd.c @@ -410,8 +410,18 @@ static int ofd_destroy_export(struct obd_export *exp) int ofd_postrecov(const struct lu_env *env, struct ofd_device *ofd) { struct lu_device *ldev = &ofd->ofd_dt_dev.dd_lu_dev; + struct lfsck_start_param lsp; + int rc; CDEBUG(D_HA, "%s: recovery is over\n", ofd_obd(ofd)->obd_name); + + lsp.lsp_start = NULL; + lsp.lsp_index_valid = 0; + rc = lfsck_start(env, ofd->ofd_osd, &lsp); + if (rc != 0 && rc != -EALREADY) + CWARN("%s: auto trigger paused LFSCK failed: rc = %d\n", + ofd_obd(ofd)->obd_name, rc); + return ldev->ld_ops->ldo_recovery_complete(env, ldev); } diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 2f19478..c60e18f 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -92,28 +92,6 @@ struct ofd_object *ofd_object_find(const struct lu_env *env, RETURN(fo); } -struct ofd_object *ofd_object_find_or_create(const struct lu_env *env, - struct ofd_device *ofd, - const struct lu_fid *fid, - struct lu_attr *attr) -{ - struct ofd_thread_info *info = ofd_info(env); - struct lu_object *fo_obj; - struct dt_object *dto; - - ENTRY; - - info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG); - - dto = dt_find_or_create(env, ofd->ofd_osd, fid, &info->fti_dof, attr); - if (IS_ERR(dto)) - RETURN(ERR_CAST(dto)); - - fo_obj = lu_object_locate(dto->do_lu.lo_header, - ofd->ofd_dt_dev.dd_lu_dev.ld_type); - RETURN(ofd_obj(fo_obj)); -} - int ofd_object_ff_load(const struct lu_env *env, struct ofd_object *fo) { struct ofd_thread_info *info = ofd_info(env); diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 7d51934..1c3b3df 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -607,16 +607,12 @@ test_7a() start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(5) Fail to start MDS!" - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase1" ] || - error "(6) Expect 'scanning-phase1', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 6 || { + awk '/^status/ { print \\\$2 }'" "completed" 30 || { $SHOW_NAMESPACE - error "(7) unexpected status" + error "(6) unexpected status" } } run_test 7a "non-stopped LFSCK should auto restarts after MDS remount (1)" @@ -648,16 +644,12 @@ test_7b() start $SINGLEMDS $MDT_DEVNAME $MOUNT_OPTS_SCRUB > /dev/null || error "(6) Fail to start MDS!" - STATUS=$($SHOW_NAMESPACE | awk '/^status/ { print $2 }') - [ "$STATUS" == "scanning-phase2" ] || - error "(7) Expect 'scanning-phase2', but got '$STATUS'" - do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0 wait_update_facet $SINGLEMDS "$LCTL get_param -n \ mdd.${MDT_DEV}.lfsck_namespace | - awk '/^status/ { print \\\$2 }'" "completed" 6 || { + awk '/^status/ { print \\\$2 }'" "completed" 30 || { $SHOW_NAMESPACE - error "(8) unexpected status" + error "(7) unexpected status" } } run_test 7b "non-stopped LFSCK should auto restarts after MDS remount (2)" -- 1.8.3.1