From 0cb008aa46d283c03aeae79fd3ab36c828da2174 Mon Sep 17 00:00:00 2001 From: yury Date: Wed, 22 Oct 2008 14:25:38 +0000 Subject: [PATCH] b=17323 r=johann,adilger - fixes resent rpcs for those sent by set and also fixes log_cancel resent handling; - cleanups. --- lustre/include/obd_support.h | 2 + lustre/ldlm/ldlm_lockd.c | 4 ++ lustre/obdclass/llog.c | 12 ++--- lustre/obdclass/llog_cat.c | 2 +- lustre/osc/osc_request.c | 1 + lustre/ost/ost_handler.c | 2 + lustre/ptlrpc/client.c | 6 ++- lustre/ptlrpc/llog_server.c | 118 ++++++++++++++++++++++++++---------------- lustre/tests/replay-single.sh | 19 ++++++- 9 files changed, 110 insertions(+), 56 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 78481ce..0507ce6 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -292,6 +292,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_OBD_LOGD_NET 0x602 #define OBD_FAIL_OBD_QC_CALLBACK_NET 0x603 #define OBD_FAIL_OBD_DQACQ 0x604 +#define OBD_FAIL_OBD_LLOG_SETUP 0x605 +#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 #define OBD_FAIL_TGT_REPLY_NET 0x700 #define OBD_FAIL_TGT_CONN_RACE 0x701 diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 08f841e..83b08d0 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -1630,6 +1630,8 @@ static int ldlm_callback_handler(struct ptlrpc_request *req) if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET)) RETURN(0); rc = llog_origin_handle_cancel(req); + if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP)) + RETURN(0); ldlm_callback_reply(req, rc); RETURN(0); case OBD_QC_CALLBACK: @@ -1815,6 +1817,8 @@ static int ldlm_cancel_handler(struct ptlrpc_request *req) if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET)) RETURN(0); rc = llog_origin_handle_cancel(req); + if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP)) + RETURN(0); ldlm_callback_reply(req, rc); RETURN(0); default: diff --git a/lustre/obdclass/llog.c b/lustre/obdclass/llog.c index 366dae8..4c63f6f 100644 --- a/lustre/obdclass/llog.c +++ b/lustre/obdclass/llog.c @@ -100,17 +100,17 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) int rc = 0; ENTRY; - CDEBUG(D_RPCTRACE, "canceling %d in log "LPX64"\n", + CDEBUG(D_RPCTRACE, "Canceling %d in log "LPX64"\n", index, loghandle->lgh_id.lgl_oid); if (index == 0) { - CERROR("cannot cancel index 0 (which is header)\n"); + CERROR("Can't cancel index 0 which is header\n"); RETURN(-EINVAL); } if (!ext2_clear_bit(index, llh->llh_bitmap)) { - CDEBUG(D_RPCTRACE, "catalog index %u already clear?\n", index); - RETURN(-EINVAL); + CDEBUG(D_RPCTRACE, "Catalog index %u already clear?\n", index); + RETURN(-ENOENT); } llh->llh_count--; @@ -120,7 +120,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) (loghandle->lgh_last_idx == (LLOG_BITMAP_BYTES * 8) - 1)) { rc = llog_destroy(loghandle); if (rc) { - CERROR("failure destroying log after last cancel: %d\n", + CERROR("Failure destroying log after last cancel: %d\n", rc); ext2_set_bit(index, llh->llh_bitmap); llh->llh_count++; @@ -132,7 +132,7 @@ int llog_cancel_rec(struct llog_handle *loghandle, int index) rc = llog_write_rec(loghandle, &llh->llh_hdr, NULL, 0, NULL, 0); if (rc) { - CERROR("failure re-writing header %d\n", rc); + CERROR("Failure re-writing header %d\n", rc); ext2_set_bit(index, llh->llh_bitmap); llh->llh_count++; } diff --git a/lustre/obdclass/llog_cat.c b/lustre/obdclass/llog_cat.c index dafcb23..73ee3c5 100644 --- a/lustre/obdclass/llog_cat.c +++ b/lustre/obdclass/llog_cat.c @@ -317,7 +317,7 @@ EXPORT_SYMBOL(llog_cat_add_rec); * Assumes caller has already pushed us into the kernel context. */ int llog_cat_cancel_records(struct llog_handle *cathandle, int count, - struct llog_cookie *cookies) + struct llog_cookie *cookies) { int i, index, rc = 0; ENTRY; diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index cbebdbb9..ec49db6 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -4233,6 +4233,7 @@ struct obd_ops osc_obd_ops = { .o_register_lock_cancel_cb = osc_register_lock_cancel_cb, .o_unregister_lock_cancel_cb = osc_unregister_lock_cancel_cb, }; + int __init osc_init(void) { struct lprocfs_static_vars lvars = { 0 }; diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index a720a1d..a9ed9c5 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1631,6 +1631,8 @@ int ost_handle(struct ptlrpc_request *req) if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_NET)) RETURN(0); rc = llog_origin_handle_cancel(req); + if (OBD_FAIL_CHECK(OBD_FAIL_OBD_LOG_CANCEL_REP)) + RETURN(0); req->rq_status = rc; rc = req_capsule_server_pack(&req->rq_pill); if (rc) diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 79755d7..39d7049 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1231,7 +1231,11 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) spin_unlock(&imp->imp_lock); req->rq_waiting = 0; - if (req->rq_resend) { + + if (req->rq_timedout||req->rq_resend) { + /* This is re-sending anyways, + * let's mark req as resend. */ + req->rq_resend = 1; lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); if (req->rq_bulk) { diff --git a/lustre/ptlrpc/llog_server.c b/lustre/ptlrpc/llog_server.c index 974b8b8..8259bd1 100644 --- a/lustre/ptlrpc/llog_server.c +++ b/lustre/ptlrpc/llog_server.c @@ -88,7 +88,7 @@ int llog_origin_handle_create(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - RETURN(-EINVAL); + RETURN(-ENODEV); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -103,6 +103,7 @@ int llog_origin_handle_create(struct ptlrpc_request *req) body = req_capsule_server_get(&req->rq_pill, &RMF_LLOGD_BODY); body->lgd_logid = loghandle->lgh_id; + GOTO(out_close, rc); out_close: rc2 = llog_close(loghandle); if (!rc) @@ -110,7 +111,7 @@ out_close: out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); llog_ctxt_put(ctxt); - RETURN(rc); + return rc; } int llog_origin_handle_destroy(struct ptlrpc_request *req) @@ -136,7 +137,7 @@ int llog_origin_handle_destroy(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - RETURN(-EINVAL); + RETURN(-ENODEV); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -159,14 +160,14 @@ int llog_origin_handle_destroy(struct ptlrpc_request *req) if (rc) GOTO(out_close, rc); llog_free_handle(loghandle); - + GOTO(out_close, rc); out_close: if (rc) llog_close(loghandle); out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); llog_ctxt_put(ctxt); - RETURN(rc); + return rc; } int llog_origin_handle_next_block(struct ptlrpc_request *req) @@ -195,7 +196,7 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - GOTO(out_free, rc = -EINVAL); + GOTO(out_free, rc = -ENODEV); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -226,18 +227,17 @@ int llog_origin_handle_next_block(struct ptlrpc_request *req) ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA); memcpy(ptr, buf, LLOG_CHUNK_SIZE); - + GOTO(out_close, rc); out_close: rc2 = llog_close(loghandle); if (!rc) rc = rc2; - out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); llog_ctxt_put(ctxt); out_free: OBD_FREE(buf, LLOG_CHUNK_SIZE); - RETURN(rc); + return rc; } int llog_origin_handle_prev_block(struct ptlrpc_request *req) @@ -265,7 +265,9 @@ int llog_origin_handle_prev_block(struct ptlrpc_request *req) RETURN(-ENOMEM); ctxt = llog_get_context(obd, body->lgd_ctxt_idx); - LASSERT(ctxt != NULL); + if (ctxt == NULL) + GOTO(out_free, rc = -ENODEV); + disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -295,7 +297,7 @@ int llog_origin_handle_prev_block(struct ptlrpc_request *req) ptr = req_capsule_server_get(&req->rq_pill, &RMF_EADATA); memcpy(ptr, buf, LLOG_CHUNK_SIZE); - + GOTO(out_close, rc); out_close: rc2 = llog_close(loghandle); if (!rc) @@ -304,8 +306,9 @@ out_close: out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); llog_ctxt_put(ctxt); +out_free: OBD_FREE(buf, LLOG_CHUNK_SIZE); - RETURN(rc); + return rc; } int llog_origin_handle_read_header(struct ptlrpc_request *req) @@ -328,7 +331,8 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) ctxt = llog_get_context(obd, body->lgd_ctxt_idx); if (ctxt == NULL) - RETURN(-EINVAL); + RETURN(-ENODEV); + disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -336,7 +340,9 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) if (rc) GOTO(out_pop, rc); - /* init_handle reads the header */ + /* + * llog_init_handle() reads the llog header + */ flags = body->lgd_llh_flags; rc = llog_init_handle(loghandle, flags, NULL); if (rc) @@ -348,7 +354,7 @@ int llog_origin_handle_read_header(struct ptlrpc_request *req) hdr = req_capsule_server_get(&req->rq_pill, &RMF_LLOG_LOG_HDR); *hdr = *loghandle->lgh_hdr; - + GOTO(out_close, rc); out_close: rc2 = llog_close(loghandle); if (!rc) @@ -356,25 +362,23 @@ out_close: out_pop: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); llog_ctxt_put(ctxt); - RETURN(rc); + return rc; } int llog_origin_handle_close(struct ptlrpc_request *req) { - int rc; - - rc = 0; - - RETURN(rc); + ENTRY; + /* Nothing to do */ + RETURN(0); } int llog_origin_handle_cancel(struct ptlrpc_request *req) { struct obd_device *obd = req->rq_export->exp_obd; + int num_cookies, rc = 0, err, i, failed = 0; struct obd_device *disk_obd; struct llog_cookie *logcookies; struct llog_ctxt *ctxt = NULL; - int num_cookies, rc = 0, err, i; struct lvfs_run_ctxt saved; struct llog_handle *cathandle; struct inode *inode; @@ -385,15 +389,13 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) num_cookies = req_capsule_get_size(&req->rq_pill, &RMF_LOGCOOKIES, RCL_CLIENT) / sizeof(*logcookies); if (logcookies == NULL || num_cookies == 0) { - DEBUG_REQ(D_HA, req, "no cookies sent"); + DEBUG_REQ(D_HA, req, "No llog cookies sent"); RETURN(-EFAULT); } ctxt = llog_get_context(obd, logcookies->lgc_subsys); - if (ctxt == NULL) { - CWARN("llog subsys not setup or already cleanup\n"); - RETURN(-ENOENT); - } + if (ctxt == NULL) + RETURN(-ENODEV); disk_obd = ctxt->loc_exp->exp_obd; push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); @@ -405,29 +407,50 @@ int llog_origin_handle_cancel(struct ptlrpc_request *req) handle = fsfilt_start_log(disk_obd, inode, FSFILT_OP_CANCEL_UNLINK, NULL, 1); if (IS_ERR(handle)) { - CERROR("fsfilt_start failed: %ld\n", PTR_ERR(handle)); + CERROR("fsfilt_start_log() failed: %ld\n", + PTR_ERR(handle)); GOTO(pop_ctxt, rc = PTR_ERR(handle)); } rc = llog_cat_cancel_records(cathandle, 1, logcookies); + /* + * Do not raise -ENOENT errors for resent rpcs. This rec already + * might be killed. + */ + if (rc == -ENOENT && + (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)) { + /* + * Do not change this message, reply-single.sh test_59b + * expects to find this in log. + */ + CDEBUG(D_RPCTRACE, "RESENT cancel req %p - ignored\n", + req); + rc = 0; + } else if (rc == 0) { + CDEBUG(D_RPCTRACE, "Canceled %d llog-records\n", + num_cookies); + } + err = fsfilt_commit(disk_obd, inode, handle, 0); if (err) { - CERROR("error committing transaction: %d\n", err); + CERROR("Error committing transaction: %d\n", err); if (!rc) rc = err; + failed++; GOTO(pop_ctxt, rc); - } + } else if (rc) + failed++; } + GOTO(pop_ctxt, rc); pop_ctxt: pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); if (rc) - CERROR("cancel %d llog-records failed: %d\n", num_cookies, rc); - else - CDEBUG(D_RPCTRACE, "cancel %d llog-records\n", num_cookies); + CERROR("Cancel %d of %d llog-records failed: %d\n", + failed, num_cookies, rc); llog_ctxt_put(ctxt); - RETURN(rc); + return rc; } EXPORT_SYMBOL(llog_origin_handle_cancel); @@ -441,9 +464,10 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, char name[4][64]; int rc, i, l, remains = buf_len; char *out = buf; + ENTRY; if (ctxt == NULL || mds == NULL) - GOTO(release_ctxt, rc = -EOPNOTSUPP); + GOTO(release_ctxt, rc = -ENODEV); push_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); @@ -480,11 +504,12 @@ static int llog_catinfo_config(struct obd_device *obd, char *buf, int buf_len, if (remains <= 0) break; } + GOTO(out_pop, rc); out_pop: pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); release_ctxt: llog_ctxt_put(ctxt); - RETURN(rc); + return rc; } struct cb_data { @@ -505,6 +530,7 @@ static int llog_catinfo_cb(struct llog_handle *cat, struct llog_logid_rec *lir; int l, rc, index, count = 0; struct cb_data *cbd = (struct cb_data*)data; + ENTRY; if (cbd->init) { out = cbd->out; @@ -516,7 +542,7 @@ static int llog_catinfo_cb(struct llog_handle *cat, RETURN(-EINVAL); if (!cbd->ctxt) - RETURN(-EINVAL); + RETURN(-ENODEV); lir = (struct llog_logid_rec *)rec; logid = &lir->lid_id; @@ -546,10 +572,10 @@ static int llog_catinfo_cb(struct llog_handle *cat, CWARN("Not enough memory\n"); rc = -ENOMEM; } - + EXIT; out_close: llog_close(handle); - RETURN(rc); + return rc; } static int llog_catinfo_deletions(struct obd_device *obd, char *buf, @@ -564,9 +590,10 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf, struct cb_data data; struct llog_ctxt *ctxt = llog_get_context(obd, LLOG_CONFIG_ORIG_CTXT); int rc; + ENTRY; if (ctxt == NULL || mds == NULL) - GOTO(release_ctxt, rc = -EOPNOTSUPP); + GOTO(release_ctxt, rc = -ENODEV); count = mds->mds_lov_desc.ld_tgt_count; size = sizeof(*idarray) * count; @@ -621,14 +648,11 @@ static int llog_catinfo_deletions(struct obd_device *obd, char *buf, out_pop: pop_ctxt(&saved, &ctxt->loc_exp->exp_obd->obd_lvfs_ctxt, NULL); out_free: - /* release semphore */ mutex_up(&obd->obd_olg.olg_cat_processing); - OBD_VFREE(idarray, size); release_ctxt: llog_ctxt_put(ctxt); - - return(rc); + return rc; } int llog_catinfo(struct ptlrpc_request *req) @@ -638,10 +662,12 @@ int llog_catinfo(struct ptlrpc_request *req) char *keyword; char *buf, *reply; int rc; + ENTRY; OBD_ALLOC(buf, LLOG_CHUNK_SIZE); if (buf == NULL) - return -ENOMEM; + RETURN(-ENOMEM); + memset(buf, 0, LLOG_CHUNK_SIZE); keyword = req_capsule_client_get(&req->rq_pill, &RMF_NAME); @@ -669,7 +695,7 @@ int llog_catinfo(struct ptlrpc_request *req) if (strlen(buf) == 0) sprintf(buf, "%s", "No log informations\n"); memcpy(reply, buf, LLOG_CHUNK_SIZE); - + EXIT; out_free: OBD_FREE(buf, LLOG_CHUNK_SIZE); return rc; diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index cff0f7d..5c4f470 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -18,8 +18,8 @@ GRANT_CHECK_LIST=${GRANT_CHECK_LIST:-""} # Skip these tests -# bug number: -ALWAYS_EXCEPT="$REPLAY_SINGLE_EXCEPT" +# bug number: 17466 +ALWAYS_EXCEPT="61d $REPLAY_SINGLE_EXCEPT" if [ "$FAILURE_MODE" = "HARD" ] && mixed_ost_devs; then CONFIG_EXCEPTIONS="0b 42 47 61a 61c" @@ -1417,6 +1417,21 @@ test_59() { } run_test 59 "test log_commit_thread vs filter_destroy race" +# bug 17323 +test_59b() { + mkdir -p $DIR/$tdir + createmany -o $DIR/$tdir/$tfile-%d 2000 + sync +#define OBD_FAIL_OBD_LOG_CANCEL_REP 0x606 + do_facet mds "lctl set_param fail_loc=0x606" + unlinkmany $DIR/$tdir/$tfile-%d 2000 + sleep 60 + do_facet mds "lctl set_param fail_loc=0x0" + $LCTL dk | grep -q "RESENT cancel req" || return 1 + rmdir $DIR/$tdir +} +run_test 59b "resent handle in llog_origin_handle_cancel" + # race between add unlink llog vs cat log init in post_recovery (only for b1_6) # bug 12086: should no oops and No ctxt error for this test test_60() { -- 1.8.3.1