Whamcloud - gitweb
LU-5520 ldlm: resend AST callbacks 35/9335/7
authorVitaly Fertman <vitaly_fertman@xyratex.com>
Sat, 21 Jun 2014 22:02:37 +0000 (02:02 +0400)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 11 Sep 2014 19:58:10 +0000 (19:58 +0000)
While clients will resend client->server RPCs, servers would not
resend server->client RPCs such as LDLM callbacks (blocking
or completion callbacks/ASTs).  This could result in clients being
evicted from the server if blocking callbacks were dropped by the
network (a failed router or lossy network) and the client did not
cancel the requested lock in time.
In order to fix this problem, this patch adds the ability to resend
LDLM callbacks from the server and give the client a chance to
respond within the timeout period before it is evicted:

- resend BL AST within lock callback timeout period;
- still do not resend CANCEL_ON_BLOCK;
- regular resend for CP AST without BL AST embedded;
- prolong lock callback timeout on resend;

some fixes:
- recovery-small test_10 to actually evict the client
  with dropped BL AST;
- ETIMEDOUT to be returned if send limit is expired;

Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Change-Id: Ie65fac94ea68defffd1769cbbb0f74381c11262c
Tested-by: Elena Gryaznova <Elena_Gryaznova@xyratex.com>
Reviewed-by: Alexey Lyashkov <Alexey_Lyashkov@xyratex.com>
Reviewed-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Xyratex-bug-id: MRP-417
Reviewed-on: http://review.whamcloud.com/9335
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Johann Lombardi <johann.lombardi@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre_net.h
lustre/ldlm/ldlm_lockd.c
lustre/mdc/mdc_reint.c
lustre/ptlrpc/client.c
lustre/ptlrpc/niobuf.c
lustre/tests/recovery-small.sh
lustre/tests/replay-dual.sh
lustre/tests/test-framework.sh

index 1af3a6a..e55f42a 100644 (file)
@@ -716,6 +716,9 @@ enum rq_phase {
 typedef int (*ptlrpc_interpterer_t)(const struct lu_env *env,
                                     struct ptlrpc_request *req,
                                     void *arg, int rc);
+/** Type of request resend call-back */
+typedef void (*ptlrpc_resend_cb_t)(struct ptlrpc_request *req,
+                                  void *arg);
 
 /**
  * Definition of request pool structure.
@@ -2026,6 +2029,8 @@ struct ptlrpc_request {
         struct ptlrpc_request_set *rq_set;
         /** Async completion handler, called when reply is received */
         ptlrpc_interpterer_t rq_interpret_reply;
+       /** Resend handler, called when request is resend to update RPC data */
+       ptlrpc_resend_cb_t rq_resend_cb;
         /** Async completion context */
         union ptlrpc_async_args rq_async_args;
 
index 5d1bc0d..bfb4af1 100644 (file)
@@ -637,11 +637,11 @@ static int ldlm_handle_ast_error(struct ldlm_lock *lock,
                         }
 
                 } else {
-                        LDLM_ERROR(lock, "client (nid %s) returned %d "
-                                   "from %s AST", libcfs_nid2str(peer.nid),
-                                   (req->rq_repmsg != NULL) ?
-                                   lustre_msg_get_status(req->rq_repmsg) : 0,
-                                   ast_type);
+                       LDLM_ERROR(lock, "client (nid %s) returned %d: rc=%d "
+                                  "from %s AST", libcfs_nid2str(peer.nid),
+                                  (req->rq_repmsg != NULL) ?
+                                  lustre_msg_get_status(req->rq_repmsg) : 0,
+                                  rc, ast_type);
                 }
                 ldlm_lock_cancel(lock);
                 /* Server-side AST functions are called from ldlm_reprocess_all,
@@ -705,6 +705,14 @@ static int ldlm_cb_interpret(const struct lu_env *env,
        RETURN(0);
 }
 
+static void ldlm_update_resend(struct ptlrpc_request *req, void *data)
+{
+       struct ldlm_cb_async_args *ca   = data;
+       struct ldlm_lock          *lock = ca->ca_lock;
+
+       ldlm_refresh_waiting_lock(lock, ldlm_get_enq_timeout(lock));
+}
+
 static inline int ldlm_ast_fini(struct ptlrpc_request *req,
                                struct ldlm_cb_set_arg *arg,
                                struct ldlm_lock *lock,
@@ -800,7 +808,6 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
         ca->ca_lock = lock;
 
         req->rq_interpret_reply = ldlm_cb_interpret;
-        req->rq_no_resend = 1;
 
        lock_res_and_lock(lock);
        if (lock->l_granted_mode != lock->l_req_mode) {
@@ -834,10 +841,16 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
        if (instant_cancel) {
                unlock_res_and_lock(lock);
                ldlm_lock_cancel(lock);
+
+               req->rq_no_resend = 1;
        } else {
                LASSERT(lock->l_granted_mode == lock->l_req_mode);
                ldlm_add_waiting_lock(lock);
                unlock_res_and_lock(lock);
+
+               /* Do not resend after lock callback timeout */
+               req->rq_delay_limit = ldlm_get_enq_timeout(lock);
+               req->rq_resend_cb = ldlm_update_resend;
        }
 
         req->rq_send_state = LUSTRE_IMP_FULL;
@@ -915,7 +928,6 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
         ca->ca_lock = lock;
 
         req->rq_interpret_reply = ldlm_cb_interpret;
-        req->rq_no_resend = 1;
         body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
 
         body->lock_handle[0] = lock->l_remote_handle;
@@ -983,14 +995,20 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
                  * that would not only cancel the lock, but will also remove
                  * it from waiting list */
                if (ldlm_is_cancel_on_block(lock)) {
-                        unlock_res_and_lock(lock);
-                        ldlm_lock_cancel(lock);
-                        instant_cancel = 1;
-                        lock_res_and_lock(lock);
-                } else {
-                        /* start the lock-timeout clock */
-                        ldlm_add_waiting_lock(lock);
-                }
+                       unlock_res_and_lock(lock);
+                       ldlm_lock_cancel(lock);
+
+                       instant_cancel = 1;
+                       req->rq_no_resend = 1;
+
+                       lock_res_and_lock(lock);
+               } else {
+                       /* start the lock-timeout clock */
+                       ldlm_add_waiting_lock(lock);
+                       /* Do not resend after lock callback timeout */
+                       req->rq_delay_limit = ldlm_get_enq_timeout(lock);
+                       req->rq_resend_cb = ldlm_update_resend;
+               }
         }
         unlock_res_and_lock(lock);
 
index 271792f..7c68524 100644 (file)
@@ -119,8 +119,7 @@ int mdc_setattr(struct obd_export *exp, struct md_op_data *op_data,
         if (op_data->op_attr.ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID))
                 bits |= MDS_INODELOCK_LOOKUP;
        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
-           (fid_is_sane(&op_data->op_fid1)) &&
-           !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+           (fid_is_sane(&op_data->op_fid1)))
                count = mdc_resource_get_unused(exp, &op_data->op_fid1,
                                                &cancels, LCK_EX, bits);
         req = ptlrpc_request_alloc(class_exp2cliimp(exp),
@@ -330,14 +329,12 @@ int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
         LASSERT(req == NULL);
 
        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
-           (fid_is_sane(&op_data->op_fid1)) &&
-           !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+           (fid_is_sane(&op_data->op_fid1)))
                count = mdc_resource_get_unused(exp, &op_data->op_fid1,
                                                &cancels, LCK_EX,
                                                MDS_INODELOCK_UPDATE);
        if ((op_data->op_flags & MF_MDC_CANCEL_FID3) &&
-           (fid_is_sane(&op_data->op_fid3)) &&
-           !OBD_FAIL_CHECK(OBD_FAIL_LDLM_BL_CALLBACK_NET))
+           (fid_is_sane(&op_data->op_fid3)))
                count += mdc_resource_get_unused(exp, &op_data->op_fid3,
                                                 &cancels, LCK_EX,
                                                 MDS_INODELOCK_FULL);
index 87f1c6a..ddfc91b 100644 (file)
@@ -1075,9 +1075,9 @@ static int ptlrpc_import_delay_req(struct obd_import *imp,
                          D_HA : D_ERROR, req, "IMP_CLOSED ");
                *status = -EIO;
         } else if (ptlrpc_send_limit_expired(req)) {
-                /* probably doesn't need to be a D_ERROR after initial testing */
-                DEBUG_REQ(D_ERROR, req, "send limit expired ");
-                *status = -EIO;
+               /* probably doesn't need to be a D_ERROR after initial testing*/
+               DEBUG_REQ(D_HA, req, "send limit expired ");
+               *status = -ETIMEDOUT;
        } else if (req->rq_send_state == LUSTRE_IMP_CONNECTING &&
                   imp->imp_state == LUSTRE_IMP_CONNECTING) {
                /* allow CONNECT even if import is invalid */ ;
index dedb574..8a76e58 100644 (file)
@@ -709,9 +709,12 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         lustre_msghdr_set_flags(request->rq_reqmsg,
                                 request->rq_import->imp_msghdr_flags);
 
-        if (request->rq_resend)
-                lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
+       if (request->rq_resend) {
+               lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
 
+               if (request->rq_resend_cb != NULL)
+                       request->rq_resend_cb(request, &request->rq_async_args);
+       }
         if (request->rq_memalloc)
                 mpflag = cfs_memory_pressure_get_and_set();
 
index fe2a1df..48f3c45 100755 (executable)
@@ -149,21 +149,50 @@ test_9() {
 run_test 9 "pause bulk on OST (bug 1420)"
 
 #bug 1521
-test_10() {
-       do_facet client mcreate $DIR/$tfile ||
-               { error "mcreate failed: $?"; return 1; }
-       drop_bl_callback "chmod 0777 $DIR/$tfile"  || echo "evicted as expected"
-       # wait for the mds to evict the client
-       #echo "sleep $(($TIMEOUT*2))"
-       #sleep $(($TIMEOUT*2))
-       do_facet client touch $DIR/$tfile || echo "touch failed, evicted"
-       do_facet client checkstat -v -p 0777 $DIR/$tfile ||
-               { error "client checkstat failed: $?"; return 3; }
-       do_facet client "munlink $DIR/$tfile"
-       # allow recovery to complete
-       client_up || client_up || sleep $TIMEOUT
+test_10a() {
+       local before=$(date +%s)
+       local evict
+
+       do_facet client "stat $DIR > /dev/null"  ||
+               error "failed to stat $DIR: $?"
+       drop_bl_callback "chmod 0777 $DIR" ||
+               error "failed to chmod $DIR: $?"
+
+       # let the client reconnect
+       client_reconnect
+       evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
+         awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+       [ ! -z "$evict" ] && [[ $evict -gt $before ]] ||
+               (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state;
+                   error "no eviction: $evict before:$before")
+
+       do_facet client checkstat -v -p 0777 $DIR ||
+               error "client checkstat failed: $?"
+}
+run_test 10a "finish request on server after client eviction (bug 1521)"
+
+test_10b() {
+       local before=$(date +%s)
+       local evict
+
+       do_facet client "stat $DIR > /dev/null"  ||
+               error "failed to stat $DIR: $?"
+       drop_bl_callback_once "chmod 0777 $DIR" ||
+               error "failed to chmod $DIR: $?"
+
+       # let the client reconnect
+       client_reconnect
+       evict=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
+         awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
+
+       [ -z "$evict" ] || [[ $evict -le $before ]] ||
+               (do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state;
+                   error "eviction happened: $evict before:$before")
+
+       do_facet client checkstat -v -p 0777 $DIR ||
+               error "client checkstat failed: $?"
 }
-run_test 10 "finish request on server after client eviction (bug 1521)"
+run_test 10b "re-send BL AST"
 
 #bug 2460
 # wake up a thread waiting for completion after eviction
@@ -177,7 +206,8 @@ test_11(){
 
        do_facet client $MULTIOP $DIR/$tfile or  ||
                { error "multiop read failed: $?"; return 3; }
-       drop_bl_callback $MULTIOP $DIR/$tfile Ow || echo "evicted as expected"
+       drop_bl_callback_once $MULTIOP $DIR/$tfile Ow ||
+               echo "evicted as expected"
 
        do_facet client munlink $DIR/$tfile ||
                { error "munlink failed: $?"; return 4; }
@@ -492,7 +522,7 @@ test_19c() {
        # let the client reconnect
        sleep 5
        EVICT=$(do_facet client $LCTL get_param mdc.$FSNAME-MDT*.state |
-          awk -F"[ [,]" '/EVICTED]$/ { if (mx<$4) {mx=$4;} } END { print mx }')
+         awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
 
        [ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened"
 }
@@ -911,7 +941,8 @@ run_test 27 "fail LOV while using OSC's"
 
 test_28() {      # bug 6086 - error adding new clients
        do_facet client mcreate $DIR/$tfile       || return 1
-       drop_bl_callback "chmod 0777 $DIR/$tfile" ||echo "evicted as expected"
+       drop_bl_callback_once "chmod 0777 $DIR/$tfile" ||
+               echo "evicted as expected"
        #define OBD_FAIL_MDS_CLIENT_ADD 0x12f
        do_facet $SINGLEMDS "lctl set_param fail_loc=0x8000012f"
        # fail once (evicted), reconnect fail (fail_loc), ok
@@ -1163,10 +1194,10 @@ test_58() { # bug 11546
         pid=$!
         sleep 1
         lctl set_param fail_loc=0
-        drop_bl_callback rm -f $DIR/$tfile
+        drop_bl_callback_once rm -f $DIR/$tfile
         wait $pid
         # the first 'df' could tigger the eviction caused by
-        # 'drop_bl_callback', and it's normal case.
+        # 'drop_bl_callback_once', and it's normal case.
         # but the next 'df' should return successfully.
         do_facet client "df $DIR" || do_facet client "df $DIR"
 }
@@ -1969,7 +2000,7 @@ test_113() {
        # let the client reconnect
        client_reconnect
        EVICT=$($LCTL get_param mdc.$FSNAME-MDT*.state |
-          awk -F"[ [,]" '/EVICTED]$/ { if (mx<$4) {mx=$4;} } END { print mx }')
+         awk -F"[ [,]" '/EVICTED ]$/ { if (mx<$5) {mx=$5;} } END { print mx }')
 
        [ -z "$EVICT" ] || [[ $EVICT -le $BEFORE ]] || error "eviction happened"
 }
index 2d1a511..1f1c8ab 100755 (executable)
@@ -445,11 +445,13 @@ test_18() { # bug 3822 - evicting client with enqueued lock
        do_facet $SINGLEMDS lctl set_param fail_loc=0x8000030b  # hold enqueue
        sleep 1
        #define OBD_FAIL_LDLM_BL_CALLBACK_NET                   0x305
+       do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
        do_facet client lctl set_param fail_loc=0x80000305  # drop cb, evict
        cancel_lru_locks mdc
        usleep 500 # wait to ensure first client is one that will be evicted
        openfile -f O_RDONLY $MOUNT2/$tdir/$tfile
        wait $OPENPID
+       do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
        do_facet $SINGLEMDS lctl debug_kernel |
                grep "not entering recovery" && error "client not evicted"
        do_facet client "lctl set_param fail_loc=0"
index dd84abb..f38f088 100755 (executable)
@@ -4372,13 +4372,26 @@ drop_ldlm_cancel() {
        return $RC
 }
 
-drop_bl_callback() {
+drop_bl_callback_once() {
+       rc=0
+       do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
 #define OBD_FAIL_LDLM_BL_CALLBACK_NET                  0x305
-       RC=0
        do_facet client lctl set_param fail_loc=0x80000305
-       do_facet client "$@" || RC=$?
+       do_facet client "$@" || rc=$?
        do_facet client lctl set_param fail_loc=0
-       return $RC
+       do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
+       return $rc
+}
+
+drop_bl_callback() {
+       rc=0
+       do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=0
+#define OBD_FAIL_LDLM_BL_CALLBACK_NET                  0x305
+       do_facet client lctl set_param fail_loc=0x305
+       do_facet client "$@" || rc=$?
+       do_facet client lctl set_param fail_loc=0
+       do_facet client lctl set_param ldlm.namespaces.*.early_lock_cancel=1
+       return $rc
 }
 
 drop_ldlm_reply() {