From 688d5da6a89882fdf9ebfb3e52ca869e9e959655 Mon Sep 17 00:00:00 2001 From: Yang Sheng Date: Tue, 29 Oct 2019 14:35:59 +0800 Subject: [PATCH] LU-12846 mdd: return error while delete failed Since we use a global buffer, So avoid to replace the index name while iterate the orphan directory. Also return error code in mdd_orphan_destroy while dt_delete failed. Else will cause a dead loop. Fixes: e1ace3751f ("LU-8514 mdd: transaction failure should be checked") Signed-off-by: Yang Sheng Change-Id: I6fc3e992333ffa61900074309223555264cfe66b Reviewed-on: https://review.whamcloud.com/36602 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Artem Blagodarenko Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/mdd/mdd_internal.h | 3 +++ lustre/mdd/mdd_orphans.c | 43 +++++++++++++++--------------------------- lustre/tests/recovery-small.sh | 34 +++++++++++++++++++++++++++++++++ lustre/tests/sanity.sh | 16 ---------------- 4 files changed, 52 insertions(+), 44 deletions(-) diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 27c2247..b26fe1b 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -176,6 +176,8 @@ struct mdd_object { struct list_head mod_users; /**< unique user opens */ }; +#define MTI_KEEP_KEY 0x01 + struct mdd_thread_info { struct lu_fid mti_fid; struct lu_fid mti_fid2; /* used for be & cpu converting */ @@ -194,6 +196,7 @@ struct mdd_thread_info { * then mti_ent::lde_name will be mti_key. */ struct lu_dirent mti_ent; char mti_key[NAME_MAX + 16]; + int mti_flags; char mti_name[NAME_MAX + 1]; struct lu_buf mti_buf[4]; struct lu_buf mti_big_buf; /* biggish persistent buf */ diff --git a/lustre/mdd/mdd_orphans.c b/lustre/mdd/mdd_orphans.c index 2652d96..9f1a10f 100644 --- a/lustre/mdd/mdd_orphans.c +++ b/lustre/mdd/mdd_orphans.c @@ -61,8 +61,9 @@ static struct dt_key *mdd_orphan_key_fill(const struct lu_env *env, char *key = mdd_env_info(env)->mti_key; LASSERT(key); - snprintf(key, sizeof(mdd_env_info(env)->mti_key), - DFID_NOBRACE, PFID(lf)); + if (!(MTI_KEEP_KEY & mdd_env_info(env)->mti_flags)) + snprintf(key, sizeof(mdd_env_info(env)->mti_key), + DFID_NOBRACE, PFID(lf)); return (struct dt_key *)key; } @@ -74,9 +75,11 @@ static struct dt_key *mdd_orphan_key_fill_20(const struct lu_env *env, char *key = mdd_env_info(env)->mti_key; LASSERT(key); - snprintf(key, sizeof(mdd_env_info(env)->mti_key), - ORPHAN_FILE_NAME_FORMAT_20, - fid_seq(lf), fid_oid(lf), fid_ver(lf), ORPH_OP_UNLINK); + if (!(MTI_KEEP_KEY & mdd_env_info(env)->mti_flags)) + snprintf(key, sizeof(mdd_env_info(env)->mti_key), + ORPHAN_FILE_NAME_FORMAT_20, + fid_seq(lf), fid_oid(lf), fid_ver(lf), + ORPH_OP_UNLINK); return (struct dt_key *)key; } @@ -291,7 +294,7 @@ static int mdd_orphan_destroy(const struct lu_env *env, struct mdd_object *obj, struct thandle *th = NULL; struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); bool orphan_exists = true; - int rc = 0; + int rc = 0, rc1 = 0; ENTRY; th = mdd_trans_create(env, mdd); @@ -323,34 +326,22 @@ static int mdd_orphan_destroy(const struct lu_env *env, struct mdd_object *obj, if (likely(obj->mod_count == 0)) { dt_write_lock(env, mdd->mdd_orphans, DT_TGT_ORPHAN); rc = dt_delete(env, mdd->mdd_orphans, key, th); - if (rc == -ENOENT) { - key = mdd_orphan_key_fill_20(env, mdd_object_fid(obj)); - rc = dt_delete(env, mdd->mdd_orphans, key, th); - } - if (rc) { - CERROR("%s: could not delete orphan "DFID": rc = %d\n", - mdd_obj_dev_name(obj), - PFID(mdd_object_fid(obj)), rc); - } else if (orphan_exists) { + /* We should remove object even dt_delete failed */ + if (orphan_exists) { mdo_ref_del(env, obj, th); if (S_ISDIR(mdd_object_type(obj))) { mdo_ref_del(env, obj, th); dt_ref_del(env, mdd->mdd_orphans, th); } - rc = mdo_destroy(env, obj, th); - } else { - CWARN("%s: orphan %s "DFID" doesn't exist\n", - mdd_obj_dev_name(obj), (char *)key, - PFID(mdd_object_fid(obj))); + rc1 = mdo_destroy(env, obj, th); } dt_write_unlock(env, mdd->mdd_orphans); } unlock: mdd_write_unlock(env, obj); + mdd_trans_stop(env, mdd, 0, th); - rc = mdd_trans_stop(env, mdd, 0, th); - - RETURN(rc); + RETURN(rc ? rc : rc1); } /** @@ -419,7 +410,6 @@ static int mdd_orphan_index_iterate(const struct lu_env *env, struct lu_fid fid; int key_sz = 0; int rc; - __u64 cookie; ENTRY; iops = &dor->do_index_ops->dio_it; @@ -441,6 +431,7 @@ static int mdd_orphan_index_iterate(const struct lu_env *env, GOTO(out_put, rc = -EIO); } + mdd_env_info(env)->mti_flags |= MTI_KEEP_KEY; do { if (thread->mgt_abort) break; @@ -466,16 +457,12 @@ static int mdd_orphan_index_iterate(const struct lu_env *env, } /* kill orphan object */ - cookie = iops->store(env, it); iops->put(env, it); rc = mdd_orphan_key_test_and_delete(env, mdd, &fid, (struct dt_key *)ent->lde_name); - /* after index delete reset iterator */ if (rc == 0) rc = iops->get(env, it, (const void *)""); - else - rc = iops->load(env, it, cookie); next: rc = iops->next(env, it); } while (rc == 0); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 972441c..91d0b0f 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -3001,6 +3001,40 @@ test_141() { } run_test 141 "do not lose locks on MGS restart" +test_142() { + [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && + skip "Need MDS version at least 2.11.56" + + #define OBD_FAIL_MDS_ORPHAN_DELETE 0x165 + do_facet mds1 $LCTL set_param fail_loc=0x165 + $MULTIOP $DIR/$tfile Ouc || error "multiop failed" + + stop mds1 + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS + + wait_update_facet mds1 "pgrep orph_.*-MDD | wc -l" "0" || + error "MDD orphan cleanup thread not quit" +} +run_test 142 "orphan name stub can be cleaned up in startup" + +test_143() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.13.00) ] && + skip "Need MDS version at least 2.13.00" + [ $PARALLEL == "yes" ] && skip "skip parallel run" + + local mntpt=$(facet_mntpt $SINGLEMDS) + stop mds1 + mount_fstype $SINGLEMDS || error "mount as fstype $SINGLEMDS failed" + do_facet $SINGLEMDS touch $mntpt/PENDING/$tfile + unmount_fstype $SINGLEMDS + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "mds1 start fail" + + wait_recovery_complete $SINGLEMDS || error "MDS recovery not done" + wait_update_facet mds1 "pgrep orph_.*-MDD | wc -l" "0" || + error "MDD orphan cleanup thread not quit" +} +run_test 143 "orphan cleanup thread shouldn't be blocked even delete failed" + complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 8e386c7..1ff6090 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -22933,22 +22933,6 @@ test_810() { } run_test 810 "partial page writes on ZFS (LU-11663)" -test_811() { - [ $MDS1_VERSION -lt $(version_code 2.11.56) ] && - skip "Need MDS version at least 2.11.56" - - #define OBD_FAIL_MDS_ORPHAN_DELETE 0x165 - do_facet mds1 $LCTL set_param fail_loc=0x165 - $MULTIOP $DIR/$tfile Ouc || error "multiop failed" - - stop mds1 - start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS - - wait_update_facet mds1 "pgrep orph_.*-MDD | wc -l" "0" || - error "MDD orphan cleanup thread not quit" -} -run_test 811 "orphan name stub can be cleaned up in startup" - test_812a() { [ $OST1_VERSION -lt $(version_code 2.12.51) ] && skip "OST < 2.12.51 doesn't support this fail_loc" -- 1.8.3.1