From 30be03b4dd593894687773d2a460d441d85f88a2 Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Sun, 22 Jun 2014 02:02:37 +0400 Subject: [PATCH] LU-5520 ldlm: resend AST callbacks While clients will resend client->server RPCs, servers would not resend server->client RPCs such as LDLM callbacks (blocking or completion callbacks/ASTs). This could result in clients being evicted from the server if blocking callbacks were dropped by the network (a failed router or lossy network) and the client did not cancel the requested lock in time. In order to fix this problem, this patch adds the ability to resend LDLM callbacks from the server and give the client a chance to respond within the timeout period before it is evicted: - resend BL AST within lock callback timeout period; - still do not resend CANCEL_ON_BLOCK; - regular resend for CP AST without BL AST embedded; - prolong lock callback timeout on resend; some fixes: - recovery-small test_10 to actually evict the client with dropped BL AST; - ETIMEDOUT to be returned if send limit is expired; Signed-off-by: Vitaly Fertman Change-Id: Ie65fac94ea68defffd1769cbbb0f74381c11262c Tested-by: Elena Gryaznova Reviewed-by: Alexey Lyashkov Reviewed-by: Andriy Skulysh Xyratex-bug-id: MRP-417 Reviewed-on: http://review.whamcloud.com/9335 Reviewed-by: Andreas Dilger Reviewed-by: Johann Lombardi Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 5 +++ lustre/ldlm/ldlm_lockd.c | 48 +++++++++++++++++++--------- lustre/mdc/mdc_reint.c | 9 ++---- lustre/ptlrpc/client.c | 6 ++-- lustre/ptlrpc/niobuf.c | 7 +++-- lustre/tests/recovery-small.sh | 71 ++++++++++++++++++++++++++++++------------ lustre/tests/replay-dual.sh | 2 ++ lustre/tests/test-framework.sh | 21 ++++++++++--- 8 files changed, 119 insertions(+), 50 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 1af3a6a..e55f42a 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -716,6 +716,9 @@ enum rq_phase { typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env, struct ptlrpc_request *req, void *arg, int rc); +/** Type of request resend call-back */ +typedef void (*ptlrpc_resend_cb_t)(struct ptlrpc_request *req, + void *arg); /** * Definition of request pool structure. @@ -2026,6 +2029,8 @@ struct ptlrpc_request { struct ptlrpc_request_set *rq_set; /** Async completion handler, called when reply is received */ ptlrpc_interpterer_t rq_interpret_reply; + /** Resend handler, called when request is resend to update RPC data */ + ptlrpc_resend_cb_t rq_resend_cb; /** Async completion context */ union ptlrpc_async_args rq_async_args; diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 5d1bc0d..bfb4af1 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -637,11 +637,11 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock, } } else { - LDLM_ERROR(lock, "client (nid %s) returned %d " - "from %s AST", libcfs_nid2str(peer.nid), - (req->rq_repmsg != NULL) ? - lustre_msg_get_status(req->rq_repmsg) : 0, - ast_type); + LDLM_ERROR(lock, "client (nid %s) returned %d: rc=%d " + "from %s AST", libcfs_nid2str(peer.nid), + (req->rq_repmsg != NULL) ? + lustre_msg_get_status(req->rq_repmsg) : 0, + rc, ast_type); } ldlm_lock_cancel(lock); /* Server-side AST functions are called from ldlm_reprocess_all, @@ -705,6 +705,14 @@ static int ldlm_cb_interpret(const struct lu_env *env, RETURN(0); } +static void ldlm_update_resend(struct ptlrpc_request *req, void *data) +{ + struct ldlm_cb_async_args *ca = data; + struct ldlm_lock *lock = ca->ca_lock; + + ldlm_refresh_waiting_lock(lock, ldlm_get_enq_timeout(lock)); +} + static inline int ldlm_ast_fini(struct ptlrpc_request *req, struct ldlm_cb_set_arg *arg, struct ldlm_lock *lock, @@ -800,7 +808,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, ca->ca_lock = lock; req->rq_interpret_reply = ldlm_cb_interpret; - req->rq_no_resend = 1; lock_res_and_lock(lock); if (lock->l_granted_mode != lock->l_req_mode) { @@ -834,10 +841,16 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock, if (instant_cancel) { unlock_res_and_lock(lock); ldlm_lock_cancel(lock); + + req->rq_no_resend = 1; } else { LASSERT(lock->l_granted_mode == lock->l_req_mode); ldlm_add_waiting_lock(lock); unlock_res_and_lock(lock); + + /* Do not resend after lock callback timeout */ + req->rq_delay_limit = ldlm_get_enq_timeout(lock); + req->rq_resend_cb = ldlm_update_resend; } req->rq_send_state = LUSTRE_IMP_FULL; @@ -915,7 +928,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) ca->ca_lock = lock; req->rq_interpret_reply = ldlm_cb_interpret; - req->rq_no_resend = 1; body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); body->lock_handle[0] = lock->l_remote_handle; @@ -983,14 +995,20 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data) * that would not only cancel the lock, but will also remove * it from waiting list */ if (ldlm_is_cancel_on_block(lock)) { - unlock_res_and_lock(lock); - ldlm_lock_cancel(lock); - instant_cancel = 1; - lock_res_and_lock(lock); - } else { - /* start the lock-timeout clock */ - ldlm_add_waiting_lock(lock); - } + unlock_res_and_lock(lock); + ldlm_lock_cancel(lock); + + instant_cancel = 1; + req->rq_no_resend = 1; + + lock_res_and_lock(lock); + } else { + /* start the lock-timeout clock */ + ldlm_add_waiting_lock(lock); + /* Do not resend after lock callback timeout */ + req->rq_delay_limit = ldlm_get_enq_timeout(lock); + req->rq_resend_cb = ldlm_update_resend; + } } unlock_res_and_lock(lock); diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 271792f..7c68524 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -119,8 +119,7 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data, if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) bits |= MDS_INODELOCK_LOOKUP; if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1)) && - !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) + (fid_is_sane(&op_data->op_fid1))) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, bits); req = ptlrpc_request_alloc(class_exp2cliimp(exp), @@ -330,14 +329,12 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data, LASSERT(req == NULL); if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && - (fid_is_sane(&op_data->op_fid1)) && - !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) + (fid_is_sane(&op_data->op_fid1))) count = mdc_resource_get_unused(exp, &op_data->op_fid1, &cancels, LCK_EX, MDS_INODELOCK_UPDATE); if ((op_data->op_flags & MF_MDC_CANCEL_FID3) && - (fid_is_sane(&op_data->op_fid3)) && - !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET)) + (fid_is_sane(&op_data->op_fid3))) count += mdc_resource_get_unused(exp, &op_data->op_fid3, &cancels, LCK_EX, MDS_INODELOCK_FULL); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 87f1c6a..ddfc91b 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1075,9 +1075,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp, D_HA : D_ERROR, req, "IMP_CLOSED "); *status = -EIO; } else if (ptlrpc_send_limit_expired(req)) { - /* probably doesn't need to be a D_ERROR after initial testing */ - DEBUG_REQ(D_ERROR, req, "send limit expired "); - *status = -EIO; + /* probably doesn't need to be a D_ERROR after initial testing*/ + DEBUG_REQ(D_HA, req, "send limit expired "); + *status = -ETIMEDOUT; } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING && imp->imp_state == LUSTRE_IMP_CONNECTING) { /* allow CONNECT even if import is invalid */ ; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index dedb574..8a76e58 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -709,9 +709,12 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) lustre_msghdr_set_flags(request->rq_reqmsg, request->rq_import->imp_msghdr_flags); - if (request->rq_resend) - lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); + if (request->rq_resend) { + lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT); + if (request->rq_resend_cb != NULL) + request->rq_resend_cb(request, &request->rq_async_args); + } if (request->rq_memalloc) mpflag = cfs_memory_pressure_get_and_set(); diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index fe2a1df..48f3c45 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -149,21 +149,50 @@ test_9() { run_test 9 "pause bulk on OST (bug 1420)" #bug 1521 -test_10() { - do_facet client mcreate $DIR/$tfile || - { error "mcreate failed: $?"; return 1; } - drop_bl_callback "chmod 0777 $DIR/$tfile" || echo "evicted as expected" - # wait for the mds to evict the client - #echo "sleep $(($TIMEOUT*2))" - #sleep $(($TIMEOUT*2)) - do_facet client touch $DIR/$tfile || echo "touch failed, evicted" - do_facet client checkstat -v -p 0777 $DIR/$tfile || - { error "client checkstat failed: $?"; return 3; } - do_facet client "munlink $DIR/$tfile" - # allow recovery to complete - client_up || client_up || sleep $TIMEOUT +test_10a() { + local before=$(date +%s) + local evict + + do_facet client "stat $DIR > /dev/null" || + error "failed to stat $DIR: $?" + drop_bl_callback "chmod 0777 $DIR" || + error "failed to chmod $DIR: $?" + + # let the client reconnect + client_reconnect + evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state | + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + [ ! -z "$evict" ] && [[ $evict -gt $before ]] || + (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state; + error "no eviction: $evict before:$before") + + do_facet client checkstat -v -p 0777 $DIR || + error "client checkstat failed: $?" +} +run_test 10a "finish request on server after client eviction (bug 1521)" + +test_10b() { + local before=$(date +%s) + local evict + + do_facet client "stat $DIR > /dev/null" || + error "failed to stat $DIR: $?" + drop_bl_callback_once "chmod 0777 $DIR" || + error "failed to chmod $DIR: $?" + + # let the client reconnect + client_reconnect + evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state | + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') + + [ -z "$evict" ] || [[ $evict -le $before ]] || + (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state; + error "eviction happened: $evict before:$before") + + do_facet client checkstat -v -p 0777 $DIR || + error "client checkstat failed: $?" } -run_test 10 "finish request on server after client eviction (bug 1521)" +run_test 10b "re-send BL AST" #bug 2460 # wake up a thread waiting for completion after eviction @@ -177,7 +206,8 @@ test_11(){ do_facet client $MULTIOP $DIR/$tfile or || { error "multiop read failed: $?"; return 3; } - drop_bl_callback $MULTIOP $DIR/$tfile Ow || echo "evicted as expected" + drop_bl_callback_once $MULTIOP $DIR/$tfile Ow || + echo "evicted as expected" do_facet client munlink $DIR/$tfile || { error "munlink failed: $?"; return 4; } @@ -492,7 +522,7 @@ test_19c() { # let the client reconnect sleep 5 EVICT=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state | - awk -F"[ [,]" '/EVICTED]$/ { if (mx<$4) {mx=$4;} } END { print mx }') + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') [ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened" } @@ -911,7 +941,8 @@ run_test 27 "fail LOV while using OSC's" test_28() { # bug 6086 - error adding new clients do_facet client mcreate $DIR/$tfile || return 1 - drop_bl_callback "chmod 0777 $DIR/$tfile" ||echo "evicted as expected" + drop_bl_callback_once "chmod 0777 $DIR/$tfile" || + echo "evicted as expected" #define OBD_FAIL_MDS_CLIENT_ADD 0x12f do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f" # fail once (evicted), reconnect fail (fail_loc), ok @@ -1163,10 +1194,10 @@ test_58() { # bug 11546 pid=$! sleep 1 lctl set_param fail_loc=0 - drop_bl_callback rm -f $DIR/$tfile + drop_bl_callback_once rm -f $DIR/$tfile wait $pid # the first 'df' could tigger the eviction caused by - # 'drop_bl_callback', and it's normal case. + # 'drop_bl_callback_once', and it's normal case. # but the next 'df' should return successfully. do_facet client "df $DIR" || do_facet client "df $DIR" } @@ -1969,7 +2000,7 @@ test_113() { # let the client reconnect client_reconnect EVICT=$($LCTL get_param mdc.$FSNAME-MDT*.state | - awk -F"[ [,]" '/EVICTED]$/ { if (mx<$4) {mx=$4;} } END { print mx }') + awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }') [ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened" } diff --git a/lustre/tests/replay-dual.sh b/lustre/tests/replay-dual.sh index 2d1a511..1f1c8ab 100755 --- a/lustre/tests/replay-dual.sh +++ b/lustre/tests/replay-dual.sh @@ -445,11 +445,13 @@ test_18() { # bug 3822 - evicting client with enqueued lock do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b # hold enqueue sleep 1 #define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 + do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0 do_facet client lctl set_param fail_loc=0x80000305 # drop cb, evict cancel_lru_locks mdc usleep 500 # wait to ensure first client is one that will be evicted openfile -f O_RDONLY $MOUNT2/$tdir/$tfile wait $OPENPID + do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1 do_facet $SINGLEMDS lctl debug_kernel | grep "not entering recovery" && error "client not evicted" do_facet client "lctl set_param fail_loc=0" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index dd84abb..f38f088 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -4372,13 +4372,26 @@ drop_ldlm_cancel() { return $RC } -drop_bl_callback() { +drop_bl_callback_once() { + rc=0 + do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0 #define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 - RC=0 do_facet client lctl set_param fail_loc=0x80000305 - do_facet client "$@" || RC=$? + do_facet client "$@" || rc=$? do_facet client lctl set_param fail_loc=0 - return $RC + do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1 + return $rc +} + +drop_bl_callback() { + rc=0 + do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0 +#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 + do_facet client lctl set_param fail_loc=0x305 + do_facet client "$@" || rc=$? + do_facet client lctl set_param fail_loc=0 + do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1 + return $rc } drop_ldlm_reply() { -- 1.8.3.1