struct obd_import *imp = req->rq_import;
unsigned int debug_mask = D_RPCTRACE;
int rc = 0;
+ __u32 opc;
ENTRY;
spin_lock(&req->rq_lock);
req->rq_timedout = 1;
spin_unlock(&req->rq_lock);
- if (ptlrpc_console_allow(req, lustre_msg_get_opc(req->rq_reqmsg),
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ if (ptlrpc_console_allow(req, opc,
lustre_msg_get_status(req->rq_reqmsg)))
debug_mask = D_WARNING;
DEBUG_REQ(debug_mask, req, "Request sent has %s: [sent %lld/real %lld]",
rc = 1;
}
- ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg));
+ if (opc != OBD_PING || req->rq_xid > imp->imp_highest_replied_xid)
+ ptlrpc_fail_import(imp,
+ lustre_msg_get_conn_cnt(req->rq_reqmsg));
RETURN(rc);
}
int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
{
int rc;
+ __u32 opc;
int mpflag = 0;
struct lnet_handle_md bulk_cookie;
struct ptlrpc_connection *connection;
"Allocating new XID for resend on EINPROGRESS");
}
+ opc = lustre_msg_get_opc(request->rq_reqmsg);
+
if (request->rq_bulk != NULL) {
ptlrpc_set_bulk_mbits(request);
lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits);
DEBUG_REQ(D_INFO, request, "send flags=%x",
lustre_msg_get_flags(request->rq_reqmsg));
+
+ if (unlikely(opc == OBD_PING &&
+ OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND_FAIL, cfs_fail_val))) {
+ DEBUG_REQ(D_INFO, request, "Simulate delay send failure");
+ GOTO(skip_send, rc);
+ }
+
rc = ptl_send_buf(&request->rq_req_md_h,
request->rq_reqbuf, request->rq_reqdata_len,
LNET_NOACK_REQ, &request->rq_req_cbid,
if (likely(rc == 0))
GOTO(out, rc);
+skip_send:
request->rq_req_unlinked = 1;
ptlrpc_req_finished(request);
if (noreply)
}
run_test 136 "MDS to disconnect all OSPs first, then cleanup ldlm"
+test_200() {
+ [[ -z $RCLIENTS ]] && skip "Need remote client"
+
+ local rcli old
+
+ rcli=$(echo $RCLIENTS | cut -d ' ' -f 1)
+
+ echo "Selected \"$rcli\" from \"$RCLIENTS\""
+
+ # We want idle disconnect enabled on all targets except OST0000. Test
+ # assumes all nodes are configured the same.
+ old=$(do_node $rcli "$LCTL get_param -n *.*.idle_timeout 2>/dev/null |
+ head -n 1")
+ [[ -n $old ]] || error "Cannot determine current idle_timeout"
+
+ if ((old == 0)); then
+ do_node "$rcli" "$LCTL set_param *.*.idle_timeout=10"
+ do_node "$rcli" "$LCTL set_param osc.*OST0000*.idle_timeout=0"
+ stack_trap "do_node $rcli $LCTL set_param *.*.idle_timeout=$old"
+ else
+ do_node "$rcli" "$LCTL set_param osc.*OST0000*.idle_timeout=0"
+ stack_trap "do_node $rcli $LCTL set_param osc.*OST0000*.idle_timeout=$old"
+ fi
+
+ # Ensure OST0 import is idle
+ wait_update "$rcli" \
+ "$LCTL get_param osc.*OST0000*.import | \
+ awk 'BEGIN{ count=0 } /idle: 0/ { count+=1 } \
+ END { print count }'" \
+ "0" "30"
+
+ (( $? != 0 )) && error "OST0000 not idle after 30s"
+
+ # Send ping to ensure import is FULL
+ do_node "$rcli" "lctl set_param osc.*OST0000*.ping=1" ||
+ error "OBD ping failed"
+
+ declare -a conns
+
+ conns=( $(do_node "$rcli" "$LCTL get_param *.*.import" |
+ awk '/connection_attempts:/{print $NF}' | xargs echo) )
+
+ local saved_debug
+
+ saved_debug=$(do_node "$rcli" \
+ "cat /sys/module/libcfs/parameters/libcfs_debug" \
+ 2>/dev/null)
+ [[ -z $saved_debug ]] && error "Failed to get existing debug"
+ stack_trap "do_node $rcli $LCTL set_param debug=$saved_debug"
+ do_node "$rcli" "$LCTL set_param debug=+info+rpctrace"
+
+ # From lustre/obdclass/class_obd.c
+ # unsigned int ping_interval = (OBD_TIMEOUT_DEFAULT > 4) ?
+ # (OBD_TIMEOUT_DEFAULT / 4) : 1;
+ # Delay a ping for 3 intervals
+ local timeout delay
+
+ timeout=$(do_node "$rcli" "$LCTL get_param -n timeout")
+ (( timeout > 4 )) && delay=$((3 * timeout / 4)) || delay=3
+
+ do_node "$rcli" "$LCTL clear"
+ log "delay ping ${delay}s"
+
+ #define OBD_FAIL_PTLRPC_DELAY_SEND_FAIL 0x535
+ do_node "$rcli" "$LCTL set_param fail_loc=0x80000535 fail_val=${delay}"
+ # Send ping that will be delayed for ${delay} seconds
+ # This races with the pinger, but it is okay if a ping sent by pinger
+ # thread is delayed instead
+ do_node "$rcli" "lctl set_param osc.*OST0000*.ping=1"
+
+ local logfile="$TMP/lustre-log-${TESTNAME}.log"
+
+ local nsent expired
+ local waited=0
+ local begin=$SECONDS
+ local max_wait=$((delay + 1))
+ local reason=""
+ local sleep=""
+ while (( $waited <= $max_wait )); do
+ [[ -z $sleep ]] || sleep $sleep
+ sleep=1
+ waited=$((SECONDS - begin))
+ do_node "$rcli" "$LCTL dk" >> ${logfile}
+
+ if ! grep -q 'fail_timeout id 535 sleeping for' $logfile; then
+ reason="Did not hit fail_loc"
+ continue
+ fi
+
+ if ! grep -q 'cfs_fail_timeout id 535 awake' $logfile; then
+ reason="Delayed send did not wake"
+ continue
+ fi
+
+ nsent=$(grep -c "ptl_send_rpc.*o400->.*OST0000" $logfile)
+ if (( nsent <= 1 )); then
+ reason="Did not send more than 1 obd ping"
+ continue
+ fi
+
+ expired=$(grep "Request sent has timed out .* o400->" $logfile)
+ if [[ -z $expired ]]; then
+ reason="RPC did not time out"
+ continue
+ fi
+ reason=""
+ break
+ done
+
+ if [[ -n $reason ]]; then
+ cat $logfile
+ rm -f $logfile
+ error "$reason"
+ else
+ echo "${expired}"
+ rm -f $logfile
+ fi
+
+ declare -a conns2
+
+ conns2=( $(do_node "$rcli" "$LCTL get_param *.*.import" |
+ awk '/connection_attempts:/{print $NF}' | xargs echo) )
+
+ echo "conns: ${conns[*]}"
+
+ echo "conns2: ${conns2[*]}"
+
+ (( ${#conns[@]} != ${#conns2[@]} )) &&
+ error "Expected ${#conns[@]} imports found ${#conns2[@]}"
+
+ local i
+
+ for ((i = 0; i < ${#conns[@]}; i++)); do
+ if (( conns[i] != conns2[i] )); then
+ error "New connection attempt ${conns[i]} -> ${conns2[i]}"
+ fi
+ done
+
+ return 0
+}
+run_test 200 "Dropping one OBD_PING should not cause disconnect"
+
complete_test $SECONDS
check_and_cleanup_lustre
exit_status