From ac7b37cd2bd612a64a4009ba82f1cd1d57f37434 Mon Sep 17 00:00:00 2001 From: Suhas Daftuar Date: Tue, 24 Oct 2017 16:56:07 -0400 Subject: [PATCH] Connect to an extra outbound peer if our tip is stale If our tip hasn't updated in a while, that may be because our peers are not relaying blocks to us that we would consider valid. Allow connection to an additional outbound peer in that circumstance. Also, periodically check to see if we are exceeding our target number of outbound peers, and disconnect the one which has least recently announced a new block to us (choosing the newest such peer in the case of tie). --- src/init.cpp | 2 +- src/net.cpp | 1 + src/net_processing.cpp | 99 ++++++++++++++++++++++++++++++++++++++- src/net_processing.h | 16 ++++++- src/test/test_bitcoin.cpp | 2 +- 5 files changed, 114 insertions(+), 6 deletions(-) diff --git a/src/init.cpp b/src/init.cpp index 655743488..e57ea0f43 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -1270,7 +1270,7 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler) g_connman = std::unique_ptr(new CConnman(GetRand(std::numeric_limits::max()), GetRand(std::numeric_limits::max()))); CConnman& connman = *g_connman; - peerLogic.reset(new PeerLogicValidation(&connman)); + peerLogic.reset(new PeerLogicValidation(&connman, scheduler)); RegisterValidationInterface(peerLogic.get()); // sanitize comments per BIP-0014, format user agent and check total size diff --git a/src/net.cpp b/src/net.cpp index 077a51bac..5eaeaab8f 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -1701,6 +1701,7 @@ bool CConnman::GetTryNewOutboundPeer() void CConnman::SetTryNewOutboundPeer(bool flag) { m_try_another_outbound_peer = flag; + LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", flag ? "true" : "false"); } // Return the number of peers we have over our outbound connection limit diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 59df5d650..3aa13fff4 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -23,6 +23,7 @@ #include "primitives/transaction.h" #include "random.h" #include "reverse_iterator.h" +#include "scheduler.h" #include "tinyformat.h" #include "txmempool.h" #include "ui_interface.h" @@ -127,7 +128,6 @@ namespace { /** Number of outbound peers with m_chain_sync.m_protect. */ int g_outbound_peers_with_protect_from_disconnect = 0; - /** When our tip was last updated. */ int64_t g_last_tip_update = 0; @@ -435,6 +435,15 @@ void MaybeSetPeerAsAnnouncingHeaderAndIDs(NodeId nodeid, CConnman* connman) { } } +bool TipMayBeStale(const Consensus::Params &consensusParams) +{ + AssertLockHeld(cs_main); + if (g_last_tip_update == 0) { + g_last_tip_update = GetTime(); + } + return g_last_tip_update < GetTime() - consensusParams.nPowTargetSpacing * 3 && mapBlocksInFlight.empty(); +} + // Requires cs_main bool CanDirectFetch(const Consensus::Params &consensusParams) { @@ -772,9 +781,17 @@ static bool StaleBlockRequestAllowed(const CBlockIndex* pindex, const Consensus: (GetBlockProofEquivalentTime(*pindexBestHeader, *pindex, *pindexBestHeader, consensusParams) < STALE_RELAY_AGE_LIMIT); } -PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn) : connman(connmanIn) { +PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn, CScheduler &scheduler) : connman(connmanIn), m_stale_tip_check_time(0) { // Initialize global variables that cannot be constructed at startup. recentRejects.reset(new CRollingBloomFilter(120000, 0.000001)); + + const Consensus::Params& consensusParams = Params().GetConsensus(); + // Stale tip checking and peer eviction are on two different timers, but we + // don't want them to get out of sync due to drift in the scheduler, so we + // combine them in one function and schedule at the quicker (peer-eviction) + // timer. + static_assert(EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, "peer eviction timer should be less than stale tip check timer"); + scheduler.scheduleEvery(std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, consensusParams), EXTRA_PEER_CHECK_INTERVAL * 1000); } void PeerLogicValidation::BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindex, const std::vector& vtxConflicted) { @@ -1424,6 +1441,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve // If this is an outbound peer, check to see if we should protect // it from the bad/lagging chain logic. if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) { + LogPrint(BCLog::NET, "Protecting outbound peer=%d from eviction\n", pfrom->GetId()); nodestate->m_chain_sync.m_protect = true; ++g_outbound_peers_with_protect_from_disconnect; } @@ -3004,6 +3022,83 @@ void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds) } } +void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds) +{ + // Check whether we have too many outbound peers + int extra_peers = connman->GetExtraOutboundCount(); + if (extra_peers > 0) { + // If we have more outbound peers than we target, disconnect one. + // Pick the outbound peer that least recently announced + // us a new block, with ties broken by choosing the more recent + // connection (higher node id) + NodeId worst_peer = -1; + int64_t oldest_block_announcement = std::numeric_limits::max(); + + LOCK(cs_main); + + connman->ForEachNode([&](CNode* pnode) { + // Ignore non-outbound peers, or nodes marked for disconnect already + if (!IsOutboundDisconnectionCandidate(pnode) || pnode->fDisconnect) return; + CNodeState *state = State(pnode->GetId()); + if (state == nullptr) return; // shouldn't be possible, but just in case + // Don't evict our protected peers + if (state->m_chain_sync.m_protect) return; + if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) { + worst_peer = pnode->GetId(); + oldest_block_announcement = state->m_last_block_announcement; + } + }); + if (worst_peer != -1) { + bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) { + // Only disconnect a peer that has been connected to us for + // some reasonable fraction of our check-frequency, to give + // it time for new information to have arrived. + // Also don't disconnect any peer we're trying to download a + // block from. + CNodeState &state = *State(pnode->GetId()); + if (time_in_seconds - pnode->nTimeConnected > MINIMUM_CONNECT_TIME && state.nBlocksInFlight == 0) { + LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d (last block announcement received at time %d)\n", pnode->GetId(), oldest_block_announcement); + pnode->fDisconnect = true; + return true; + } else { + LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", pnode->GetId(), pnode->nTimeConnected, state.nBlocksInFlight); + return false; + } + }); + if (disconnected) { + // If we disconnected an extra peer, that means we successfully + // connected to at least one peer after the last time we + // detected a stale tip. Don't try any more extra peers until + // we next detect a stale tip, to limit the load we put on the + // network from these extra connections. + connman->SetTryNewOutboundPeer(false); + } + } + } +} + +void PeerLogicValidation::CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams) +{ + if (connman == nullptr) return; + + int64_t time_in_seconds = GetTime(); + + EvictExtraOutboundPeers(time_in_seconds); + + if (time_in_seconds > m_stale_tip_check_time) { + LOCK(cs_main); + // Check whether our tip is stale, and if so, allow using an extra + // outbound peer + if (TipMayBeStale(consensusParams)) { + LogPrintf("Potential stale tip detected, will try using extra outbound peer (last tip update: %d seconds ago)\n", time_in_seconds - g_last_tip_update); + connman->SetTryNewOutboundPeer(true); + } else if (connman->GetTryNewOutboundPeer()) { + connman->SetTryNewOutboundPeer(false); + } + m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL; + } +} + class CompareInvMempoolOrder { CTxMemPool *mp; diff --git a/src/net_processing.h b/src/net_processing.h index 656324bba..0a49972ee 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -8,6 +8,7 @@ #include "net.h" #include "validationinterface.h" +#include "consensus/params.h" /** Default for -maxorphantx, maximum number of orphan transactions kept in memory */ static const unsigned int DEFAULT_MAX_ORPHAN_TRANSACTIONS = 100; @@ -27,13 +28,19 @@ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/head static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4; /** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */ static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes +/** How frequently to check for stale tips, in seconds */ +static constexpr int64_t STALE_CHECK_INTERVAL = 10 * 60; // 10 minutes +/** How frequently to check for extra outbound peers and disconnect, in seconds */ +static constexpr int64_t EXTRA_PEER_CHECK_INTERVAL = 45; +/** Minimum time an outbound-peer-eviction candidate must be connected for, in order to evict, in seconds */ +static constexpr int64_t MINIMUM_CONNECT_TIME = 30; class PeerLogicValidation : public CValidationInterface, public NetEventsInterface { private: - CConnman* connman; + CConnman* const connman; public: - explicit PeerLogicValidation(CConnman* connman); + explicit PeerLogicValidation(CConnman* connman, CScheduler &scheduler); void BlockConnected(const std::shared_ptr& pblock, const CBlockIndex* pindexConnected, const std::vector& vtxConflicted) override; void UpdatedBlockTip(const CBlockIndex *pindexNew, const CBlockIndex *pindexFork, bool fInitialDownload) override; @@ -55,6 +62,11 @@ public: bool SendMessages(CNode* pto, std::atomic& interrupt) override; void ConsiderEviction(CNode *pto, int64_t time_in_seconds); + void CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams); + void EvictExtraOutboundPeers(int64_t time_in_seconds); + +private: + int64_t m_stale_tip_check_time; //! Next time to check for stale tip }; struct CNodeStateStats { diff --git a/src/test/test_bitcoin.cpp b/src/test/test_bitcoin.cpp index 79bc48a11..c768446d3 100644 --- a/src/test/test_bitcoin.cpp +++ b/src/test/test_bitcoin.cpp @@ -86,7 +86,7 @@ TestingSetup::TestingSetup(const std::string& chainName) : BasicTestingSetup(cha threadGroup.create_thread(&ThreadScriptCheck); g_connman = std::unique_ptr(new CConnman(0x1337, 0x1337)); // Deterministic randomness for tests. connman = g_connman.get(); - peerLogic.reset(new PeerLogicValidation(connman)); + peerLogic.reset(new PeerLogicValidation(connman, scheduler)); } TestingSetup::~TestingSetup()