From: ericm Date: Mon, 27 Jul 2009 15:42:31 +0000 (+0000) Subject: branch: HEAD X-Git-Tag: v1_9_230~36 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=017a292f2c6cf47e98e0bf376eb0f7a001d673d2;p=fs%2Flustre-release.git branch: HEAD fix sptlrpc subflavor switching, fix associated test. b=20084 r=wangdi r=yujian --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3ee66aa..e482567 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -320,6 +320,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_PTLRPC_LONG_BULK_UNLINK 0x510 #define OBD_FAIL_PTLRPC_HPREQ_TIMEOUT 0x511 #define OBD_FAIL_PTLRPC_HPREQ_NOTIMEOUT 0x512 +#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513 #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 diff --git a/lustre/ptlrpc/sec.c b/lustre/ptlrpc/sec.c index bcb4ea6..e97dd78 100644 --- a/lustre/ptlrpc/sec.c +++ b/lustre/ptlrpc/sec.c @@ -448,15 +448,14 @@ int sptlrpc_req_ctx_switch(struct ptlrpc_request *req, int reqmsg_size; int rc; - if (likely(oldctx->cc_sec == newctx->cc_sec)) - return 0; - LASSERT(req->rq_reqmsg); LASSERT(req->rq_reqlen); LASSERT(req->rq_replen); - CWARN("req %p: switch ctx %p -> %p, switch sec %p(%s) -> %p(%s)\n", - req, oldctx, newctx, + CWARN("req %p: switch ctx %p(%u->%s) -> %p(%u->%s), " + "switch sec %p(%s) -> %p(%s)\n", req, + oldctx, oldctx->cc_vcred.vc_uid, sec2target_str(oldctx->cc_sec), + newctx, newctx->cc_vcred.vc_uid, sec2target_str(newctx->cc_sec), oldctx->cc_sec, oldctx->cc_sec->ps_policy->sp_name, newctx->cc_sec, newctx->cc_sec->ps_policy->sp_name); @@ -528,18 +527,20 @@ int sptlrpc_req_replace_dead_ctx(struct ptlrpc_request *req) newctx = req->rq_cli_ctx; LASSERT(newctx); - if (unlikely(newctx == oldctx)) { - if (test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags)) { - /* - * still get the old ctx, usually means system busy - */ - CWARN("ctx (%p, fl %lx) doesn't switch, " - "relax a little bit\n", - newctx, newctx->cc_flags); + if (unlikely(newctx == oldctx && + test_bit(PTLRPC_CTX_DEAD_BIT, &oldctx->cc_flags))) { + /* + * still get the old dead ctx, usually means system too busy + */ + CWARN("ctx (%p, fl %lx) doesn't switch, relax a little bit\n", + newctx, newctx->cc_flags); - cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ); - } + cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, HZ); } else { + /* + * it's possible newctx == oldctx if we're switching + * subflavor with the same sec. + */ rc = sptlrpc_req_ctx_switch(req, oldctx, newctx); if (rc) { /* restore old ctx */ @@ -639,9 +640,13 @@ again: if (rc) RETURN(rc); - if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) + if (sec->ps_flvr.sf_rpc != req->rq_flvr.sf_rpc) { + CDEBUG(D_SEC, "req %p: flavor has changed %x -> %x\n", + req, req->rq_flvr.sf_rpc, sec->ps_flvr.sf_rpc); + req_off_ctx_list(req, ctx); sptlrpc_req_replace_dead_ctx(req); - + ctx = req->rq_cli_ctx; + } sptlrpc_sec_put(sec); if (cli_ctx_is_eternal(ctx)) @@ -697,11 +702,11 @@ again: } if (unlikely(test_bit(PTLRPC_CTX_DEAD_BIT, &ctx->cc_flags))) { + req_off_ctx_list(req, ctx); /* * don't switch ctx if import was deactivated */ if (req->rq_import->imp_deactive) { - req_off_ctx_list(req, ctx); req->rq_err = 1; RETURN(-EINTR); } @@ -712,18 +717,10 @@ again: CERROR("req %p: failed to replace dead ctx %p: %d\n", req, ctx, rc); req->rq_err = 1; - LASSERT(list_empty(&req->rq_ctx_chain)); RETURN(rc); } - CWARN("req %p: replace dead ctx %p => ctx %p (%u->%s)\n", - req, ctx, req->rq_cli_ctx, - req->rq_cli_ctx->cc_vcred.vc_uid, - sec2target_str(req->rq_cli_ctx->cc_sec)); - ctx = req->rq_cli_ctx; - LASSERT(list_empty(&req->rq_ctx_chain)); - goto again; } diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index d86ec52..58edd8a 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1356,6 +1356,13 @@ ptlrpc_server_handle_req_in(struct ptlrpc_service *svc) goto err_req; } + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) && + lustre_msg_get_opc(req->rq_reqmsg) == obd_fail_val) { + CERROR("drop incoming rpc opc %u, x"LPU64"\n", + obd_fail_val, req->rq_xid); + goto err_req; + } + rc = -EINVAL; if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) { CERROR("wrong packet type received (type=%u) from %s\n", diff --git a/lustre/tests/sanity-gss.sh b/lustre/tests/sanity-gss.sh index 6a6a5d1..c4d0562 100644 --- a/lustre/tests/sanity-gss.sh +++ b/lustre/tests/sanity-gss.sh @@ -867,51 +867,44 @@ run_test 100 "change security flavor on the fly under load" switch_sec_test() { - local count=$1 - local flavor0=$2 - local flavor1=$3 - local flavor2=$4 - local df_pid=0 - local wait_time=$((TIMEOUT + TIMEOUT / 4)) + local flavor0=$1 + local flavor1=$2 + local filename=$DIR/$tfile + local multiop_pid local num # - # stop gss daemon, then switch to flavor1 (which should be a gss flavor), - # and run a 'df' which should hanging, wait the request timeout and - # resend, then switch the flavor to another one. To exercise the code of - # switching ctx/sec for a resend request. + # after set to flavor0, start multop which use flavor0 rpc, and let + # server drop the reply; then switch to flavor1, the resend should be + # completed using flavor1. To exercise the code of switching ctx/sec + # for a resend request. # - echo ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 -> $flavor2..." + log ">>>>>>>>>>>>>>> Testing $flavor0 -> $flavor1 <<<<<<<<<<<<<<<<<<<" - echo "(0) set base flavor $flavor0" set_rule $FSNAME any cli2mdt $flavor0 - wait_flavor cli2mdt $flavor0 $count - df $MOUNT - if [ $? -ne 0 ]; then - error "initial df failed" - fi - - stop_gss_daemons + wait_flavor cli2mdt $flavor0 $cnt_cli2mdt + rm -f $filename || error "remove old $filename failed" + +#MDS_REINT = 36 +#define OBD_FAIL_PTLRPC_DROP_REQ_OPC 0x513 + do_facet $SINGLEMDS lctl set_param fail_val=36 + do_facet $SINGLEMDS lctl set_param fail_loc=0x513 + log "starting multiop" + multiop $filename m & + multiop_pid=$! + echo "multiop pid=$multiop_pid" sleep 1 - echo "(1) $flavor0 -> $flavor1" set_rule $FSNAME any cli2mdt $flavor1 - wait_flavor cli2mdt $flavor1 $count - df $MOUNT & - df_pid=$! - sleep 1 + wait_flavor cli2mdt $flavor1 $cnt_cli2mdt - echo "waiting $wait_time seconds for df ($df_pid)" - sleep $wait_time - num=`ps --no-headers -p $df_pid 2>/dev/null | wc -l` - [ $num -eq 1 ] || error "df already ended ($num)" - echo "process $df_pid is still hanging there... OK" + num=`ps --no-headers -p $multiop_pid 2>/dev/null | wc -l` + [ $num -eq 1 ] || error "multiop($multiop_pid) already ended ($num)" + echo "process $multiop_pid is still hanging there... OK" - echo "(2) set end flavor $flavor2" - set_rule $FSNAME any cli2mdt $flavor2 - wait_flavor cli2mdt $flavor2 $count - start_gss_daemons - wait $df_pid || error "df returned error" + do_facet $SINGLEMDS lctl set_param fail_loc=0 + log "waiting for multiop ($multiop_pid) to finish" + wait $multiop_pid || error "multiop returned error" } test_101() @@ -919,18 +912,18 @@ test_101() # started from default flavors restore_to_default_flavor - switch_sec_test $cnt_cli2mdt null krb5n null - switch_sec_test $cnt_cli2mdt null krb5a null - switch_sec_test $cnt_cli2mdt null krb5i null - switch_sec_test $cnt_cli2mdt null krb5p null - switch_sec_test $cnt_cli2mdt null krb5i plain - switch_sec_test $cnt_cli2mdt plain krb5p plain - switch_sec_test $cnt_cli2mdt plain krb5n krb5a - switch_sec_test $cnt_cli2mdt krb5a krb5i krb5p - switch_sec_test $cnt_cli2mdt krb5p krb5a krb5n - switch_sec_test $cnt_cli2mdt krb5n krb5p krb5i + switch_sec_test null plain + switch_sec_test plain krb5n + switch_sec_test krb5n krb5a + switch_sec_test krb5a krb5i + switch_sec_test krb5i krb5p + switch_sec_test krb5p null + switch_sec_test null krb5p + switch_sec_test krb5p krb5i + switch_sec_test krb5i plain + switch_sec_test plain krb5p } -run_test 101 "switch ctx as well as sec for resending request" +run_test 101 "switch ctx/sec for resending request" error_102() {