struct obd_import *imp = req->rq_import;
unsigned int debug_mask = D_RPCTRACE;
int rc = 0;
+ __u32 opc;
ENTRY;
spin_lock(&req->rq_lock);
req->rq_timedout = 1;
spin_unlock(&req->rq_lock);
- if (ptlrpc_console_allow(req, lustre_msg_get_opc(req->rq_reqmsg),
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ if (ptlrpc_console_allow(req, opc,
lustre_msg_get_status(req->rq_reqmsg)))
debug_mask = D_WARNING;
DEBUG_REQ(debug_mask, req, "Request sent has %s: [sent %lld/real %lld]",
rc = 1;
}
- ptlrpc_fail_import(imp, lustre_msg_get_conn_cnt(req->rq_reqmsg));
+ if (opc != OBD_PING || req->rq_xid > imp->imp_highest_replied_xid)
+ ptlrpc_fail_import(imp,
+ lustre_msg_get_conn_cnt(req->rq_reqmsg));
RETURN(rc);
}
}
run_test 136 "MDS to disconnect all OSPs first, then cleanup ldlm"
+test_200() {
+ [[ -z $RCLIENTS ]] && skip "Need remote client"
+
+ local rcli old
+
+ rcli=$(echo "$RCLIENTS" | cut -d ' ' -f 1)
+ old=$(do_node "$rcli" "$LCTL get_param -n osc.*OST0000*.idle_timeout")
+
+ if ((old != 0)); then
+ do_node "$rcli" "$LCTL set_param osc.*OST0000*.idle_timeout=0"
+ stack_trap "do_node $rcli $LCTL set_param osc.*OST0000*.idle_timeout=$old"
+ fi
+
+ # Ensure OST0 import is idle
+ wait_update "$rcli" \
+ "$LCTL get_param osc.*OST0000*.import | \
+ awk 'BEGIN{ count=0 } /idle: 0/ { count+=1 } \
+ END { print count }'" \
+ "0" "30"
+
+ (( $? != 0 )) && error "OST0000 not idle after 30s"
+
+ # Send ping to ensure import is FULL
+ do_node "$rcli" "lctl set_param osc.*OST0000*.ping=1" ||
+ error "OBD ping failed"
+
+ declare -a conns
+
+ conns=( $(do_node "$rcli" "$LCTL get_param *.*.import" |
+ awk '/connection_attempts:/{print $NF}' | xargs echo) )
+
+ local saved_debug
+
+ saved_debug=$(do_node "$rcli" \
+ "cat /sys/module/libcfs/parameters/libcfs_debug" \
+ 2>/dev/null)
+ [[ -z $saved_debug ]] && error "Failed to get existing debug"
+ stack_trap "do_node $rcli $LCTL set_param debug=$saved_debug"
+ do_node "$rcli" "$LCTL set_param debug=+info+rpctrace"
+
+ # From lustre/obdclass/class_obd.c
+ # unsigned int ping_interval = (OBD_TIMEOUT_DEFAULT > 4) ?
+ # (OBD_TIMEOUT_DEFAULT / 4) : 1;
+ # Delay a ping for 3 intervals
+ local timeout delay ts1 ts2 elapsed
+
+ timeout=$(do_node "$rcli" "$LCTL get_param -n timeout")
+ (( timeout > 4 )) && delay=$((3 * timeout / 4)) || delay=3
+
+ do_node "$rcli" "$LCTL clear"
+ log "delay ping ${delay}s"
+
+ ts1=$(date +%s)
+
+ #define OBD_FAIL_PTLRPC_DELAY_SEND_FAIL 0x535
+ do_node "$rcli" "$LCTL set_param fail_loc=0x80000535 fail_val=${delay}"
+ # Send ping that will be delayed for ${delay} seconds
+ # This races with the pinger, but it is okay if a ping sent by pinger
+ # thread is delayed instead
+ do_node "$rcli" "lctl set_param osc.*OST0000*.ping=1"
+
+ # Make sure we hit the fail_loc
+ wait_update --quiet "$rcli" \
+ "dmesg | tail -n 10 | \
+ grep -c 'fail_timeout id 535 sleeping for'" \
+ "1" "${timeout}"
+
+ (( $? != 0 )) && error "Did not hit fail_loc"
+
+ ts2=$(date +%s)
+
+ # If we raced with pinger thread then we need to wait ${delay} seconds
+ # for the ping to fail
+ elapsed=$((ts2 - ts1))
+
+ if (( elapsed < delay )); then
+ sleep $(((delay - elapsed) + 1))
+ fi
+
+ do_node "$rcli" "$LCTL dk" > "$TMP/lustre-log-${TESTNAME}.log"
+
+ local nsent
+
+ nsent=$(grep -c "ptl_send_rpc.*o400->.*OST0000" \
+ "$TMP/lustre-log-${TESTNAME}.log")
+
+ if (( nsent <= 1)); then
+ cat "$TMP/lustre-log-${TESTNAME}.log"
+ rm -f "$TMP/lustre-log-${TESTNAME}.log"
+ error "Did not send more than 1 obd ping"
+ fi
+
+ local expired
+
+ expired=$(grep "Request sent has timed out .* o400->" \
+ "$TMP/lustre-log-${TESTNAME}.log")
+
+ if [[ -z $expired ]]; then
+ cat "$TMP/lustre-log-${TESTNAME}.log"
+ rm -f "$TMP/lustre-log-${TESTNAME}.log"
+ error "RPC did not time out"
+ else
+ echo "${expired}"
+ rm -f "$TMP/lustre-log-${TESTNAME}.log"
+ fi
+
+ declare -a conns2
+
+ conns2=( $(do_node "$rcli" "$LCTL get_param *.*.import" |
+ awk '/connection_attempts:/{print $NF}' | xargs echo) )
+
+ echo "conns: ${conns[*]}"
+
+ echo "conns2: ${conns2[*]}"
+
+ (( ${#conns[@]} != ${#conns2[@]} )) &&
+ error "Expected ${#conns[@]} imports found ${#conns2[@]}"
+
+ local i
+
+ for ((i = 0; i < ${#conns[@]}; i++)); do
+ if (( conns[i] != conns2[i] )); then
+ error "New connection attempt ${conns[i]} -> ${conns2[i]}"
+ fi
+ done
+
+ return 0
+}
+run_test 200 "Dropping one OBD_PING should not cause disconnect"
+
complete $SECONDS
check_and_cleanup_lustre
exit_status