int LNetDist(struct lnet_nid *nid, struct lnet_nid *srcnid, __u32 *order);
void LNetPrimaryNID(struct lnet_nid *nid);
bool LNetIsPeerLocal(struct lnet_nid *nid);
-int LNetPeerDiscovered(struct lnet_nid *nid);
+bool LNetPeerDiscovered(struct lnet_nid *nid);
/** @} lnet_addr */
}
EXPORT_SYMBOL(LNetPrimaryNID);
-int LNetPeerDiscovered(struct lnet_nid *nid)
+bool
+LNetPeerDiscovered(struct lnet_nid *nid)
{
- int cpt, rc;
+ int cpt, disc = false;
struct lnet_peer *lp;
- if (nid_is_lo0(nid))
- return 1;
-
lp = lnet_find_peer(nid);
- if (!lp) {
- CDEBUG(D_NET, "No peer for NID %s, can't discover\n",
- libcfs_nidstr(nid));
- return -EHOSTUNREACH;
- }
+ if (!lp)
+ goto out;
cpt = lnet_net_lock_current();
spin_lock(&lp->lp_lock);
- if (lp->lp_state & LNET_PEER_NO_DISCOVERY ||
- (lp->lp_state & LNET_PEER_DISCOVERED &&
- lp->lp_state & LNET_PEER_NIDS_UPTODATE))
- rc = 1;
- else if (lp->lp_state & LNET_PEER_PING_FAILED)
- rc = -EHOSTUNREACH;
- else if (lp->lp_state & LNET_PEER_DISCOVERING)
- rc = -EALREADY;
- else
- rc = -EAGAIN;
+ if (((lp->lp_state & LNET_PEER_DISCOVERED) &&
+ (lp->lp_state & LNET_PEER_NIDS_UPTODATE)) ||
+ (lp->lp_state & LNET_PEER_NO_DISCOVERY))
+ disc = true;
spin_unlock(&lp->lp_lock);
- if (rc == -EAGAIN)
- lnet_peer_queue_for_discovery(lp);
-
/* Drop refcount from lookup */
lnet_peer_decref_locked(lp);
lnet_net_unlock(cpt);
-
- CDEBUG(D_NET, "Peer NID %s is %sdiscovered: rc = %d\n",
- libcfs_nidstr(nid), rc > 0 ? "" : "not ", rc);
- return rc;
+out:
+ CDEBUG(D_NET, "Peer NID %s discovered: %d\n", libcfs_nidstr(nid),
+ disc);
+ return disc;
}
EXPORT_SYMBOL(LNetPeerDiscovered);
time64_t oic_last_attempt;
unsigned int oic_attempts;
unsigned int oic_replied;
- int oic_uptodate;
+ bool oic_uptodate;
};
/* state history */
ocd->ocd_maxmodrpcs);
}
-static inline const char *conn_uptodate2str(int status)
-{
- if (status > 0)
- return "uptodate";
- if (status == -EHOSTUNREACH)
- return "unreachable";
- if (status == -EALREADY)
- return "discovering";
- if (status == -EAGAIN)
- return "rediscover";
- return "unknown";
-}
-
static void lprocfs_import_seq_show_locked(struct seq_file *m,
struct obd_device *obd,
struct obd_import *imp)
seq_printf(m, "\n \"%s\": { connects: %u, replied: %u,"
" uptodate: %s, sec_ago: ",
nidstr, conn->oic_attempts, conn->oic_replied,
- conn_uptodate2str(conn->oic_uptodate));
+ conn->oic_uptodate ? "true" : "false");
if (conn->oic_last_attempt)
seq_printf(m, "%lld }", ktime_get_seconds() -
conn->oic_last_attempt);
imp->imp_obd->obd_name,
libcfs_nidstr(&conn->oic_conn->c_peer.nid),
conn->oic_last_attempt);
+
conn->oic_uptodate =
LNetPeerDiscovered(&conn->oic_conn->c_peer.nid);
- /* LNET ping failed, skip peer completely */
- if (conn->oic_uptodate == -EHOSTUNREACH) {
- CDEBUG(D_HA, "%s: skip NID %s as unreachable\n",
- imp->imp_obd->obd_name,
- libcfs_nidstr(&conn->oic_conn->c_peer.nid));
- continue;
- }
-
/* track least recently used conn for fallback */
if (!lru_conn ||
lru_conn->oic_last_attempt > conn->oic_last_attempt)
*/
if (conn->oic_last_attempt <= imp->imp_last_success_conn) {
tried_all = false;
- if (conn->oic_uptodate > 0) {
+ if (conn->oic_uptodate) {
imp_conn = conn;
break;
}
- CDEBUG(D_HA, "%s: skip NID %s as not ready: rc = %d\n",
+ CDEBUG(D_HA, "%s: skip NID %s as not ready\n",
imp->imp_obd->obd_name,
- libcfs_nidstr(&conn->oic_conn->c_peer.nid),
- conn->oic_uptodate);
+ libcfs_nidstr(&conn->oic_conn->c_peer.nid));
}
}
- /* all connections are unreachable ATM, get just first in list */
- if (!lru_conn)
- lru_conn = list_entry(imp->imp_conn_list.next,
- struct obd_import_conn, oic_item);
/* no ready connections or all are tried in this round */
if (!imp_conn)
RETURN(-ENODEV);
}
- /* drop request over non-uptodate peers at connection stage,
- * otherwise LNet peer discovery may pin request for much longer
- * time than its ptlrpc expire time. LU-17906
- */
- spin_lock(&imp->imp_lock);
- if (imp->imp_conn_current && imp->imp_conn_current->oic_uptodate <= 0 &&
- imp->imp_state == LUSTRE_IMP_CONNECTING) {
- spin_unlock(&imp->imp_lock);
- request->rq_sent = ktime_get_real_seconds();
- RETURN(0);
- }
- spin_unlock(&imp->imp_lock);
-
connection = imp->imp_connection;
lustre_msg_set_handle(request->rq_reqmsg,
local nid=$($LCTL list_nids | grep ${NETTYPE} | head -n1)
local net=${nid#*@}
- local mgs_nid=$(do_facet mgs $LCTL list_nids | head -1)
- local ost1_nid=$(do_facet ost1 $LCTL list_nids | head -1)
- local fake_pnid="192.168.252.112@${net}"
- local fake_nids="${fake_pnid},${fake_pnid}2"
- local fake_failover="10.252.252.113@${net},10.252.252.113@${net}2"
- local nids_and_failover="$fake_nids:$fake_failover:$ost1_nid:$mgs_nid"
+ local MGS_NID=$(do_facet mgs $LCTL list_nids | head -1)
+ local OST1_NID=$(do_facet ost1 $LCTL list_nids | head -1)
+ local FAKE_PNID="192.168.252.112@${net}"
+ local FAKE_NIDS="${FAKE_PNID},${FAKE_PNID}2"
+ local FAKE_FAILOVER="10.252.252.113@${net},10.252.252.113@${net}2"
+ local NIDS_AND_FAILOVER="$FAKE_NIDS:$FAKE_FAILOVER:$OST1_NID:$MGS_NID"
local period=0
local pid
local rc
- mount -t lustre $nids_and_failover:/lustre $MOUNT &
+ mount -t lustre $NIDS_AND_FAILOVER:/lustre $MOUNT &
pid=$!
while (( period < 30 )); do
[[ -n "$(ps -p $pid -o pid=)" ]] || break
sleep 5
period=$((period + 5))
done
- $LCTL get_param mgc.MGC${fake_pnid}.import | grep "uptodate:"
+ $LCTL get_param mgc.MGC${FAKE_PNID}.import | grep "uptodate:"
check_mount || error "check_mount failed"
umount $MOUNT
cleanup || error "cleanup failed with rc $?"
}
run_test 153b "added IPv6 NID support"
-test_153c() {
- reformat_and_config
-
- start_mds || error "MDS start failed"
- start_ost || error "OST start failed"
-
- local nid=$($LCTL list_nids | grep ${NETTYPE} | head -n1)
- local net=${nid#*@}
- local fake_pnid="192.168.252.112@${net}"
- local fake_failover="192.168.252.113@${net}:192.168.252.115@${net}"
- local nids_and_failover="$fake_pnid:$fake_failover"
- local period=0
- local pid
- local rc
-
- umount_client $MOUNT
- mount -t lustre $nids_and_failover:/lustre $MOUNT &
- pid=$!
- while (( period < 30 )); do
- [[ -n "$(ps -p $pid -o pid=)" ]] || break
- echo "waiting for mount ..."
- sleep 5
- period=$((period + 5))
- done
- $LCTL get_param mgc.MGC${fake_pnid}.import | grep "sec_ago"
- conn=$($LCTL get_param mgc.MGC${fake_pnid}.import |
- awk '/connection_attempts:/ {print $2}')
- echo "connection attempts: $conn"
- (( conn > 2)) || error "too few connection attempts"
- echo "Waiting for mount to fail"
- wait $pid
- cleanup || error "cleanup failed with rc $?"
-}
-run_test 153c "don't stuck on unreached NID"
-
test_154() {
[ "$mds1_FSTYPE" == "ldiskfs" ] || skip "ldiskfs only test"
(( $MDS1_VERSION >= $(version_code 2.15.63.1) )) ||