diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 8551e8455..f83a20102 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -124,6 +124,9 @@ namespace { /** Number of peers from which we're downloading blocks. */ int nPeersWithValidatedDownloads = 0; + /** Number of outbound peers with m_chain_sync.m_protect. */ + int g_outbound_peers_with_protect_from_disconnect = 0; + /** Relay map, protected by cs_main. */ typedef std::map MapRelay; MapRelay mapRelay; @@ -201,6 +204,33 @@ struct CNodeState { */ bool fSupportsDesiredCmpctVersion; + /** State used to enforce CHAIN_SYNC_TIMEOUT + * Only in effect for outbound, non-manual connections, with + * m_protect == false + * Algorithm: if a peer's best known block has less work than our tip, + * set a timeout CHAIN_SYNC_TIMEOUT seconds in the future: + * - If at timeout their best known block now has more work than our tip + * when the timeout was set, then either reset the timeout or clear it + * (after comparing against our current tip's work) + * - If at timeout their best known block still has less work than our + * tip did when the timeout was set, then send a getheaders message, + * and set a shorter timeout, HEADERS_RESPONSE_TIME seconds in future. + * If their best known block is still behind when that new timeout is + * reached, disconnect. + */ + struct ChainSyncTimeoutState { + //! A timeout used for checking whether our peer has sufficiently synced + int64_t m_timeout; + //! A header with the work we require on our peer's chain + const CBlockIndex * m_work_header; + //! After timeout is reached, set to true after sending getheaders + bool m_sent_getheaders; + //! Whether this peer is protected from disconnection due to a bad/slow chain + bool m_protect; + }; + + ChainSyncTimeoutState m_chain_sync; + CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) { fCurrentlyConnected = false; nMisbehavior = 0; @@ -223,6 +253,7 @@ struct CNodeState { fHaveWitness = false; fWantsCmpctWitness = false; fSupportsDesiredCmpctVersion = false; + m_chain_sync = { 0, nullptr, false, false }; } }; @@ -502,6 +533,13 @@ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vectorfInbound || node->m_manual_connection || node->fFeeler || node->fOneShot); +} + void PeerLogicValidation::InitializeNode(CNode *pnode) { CAddress addr = pnode->addr; std::string addrName = pnode->GetAddrName(); @@ -534,6 +572,8 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim nPreferredDownload -= state->fPreferredDownload; nPeersWithValidatedDownloads -= (state->nBlocksInFlightValidHeaders != 0); assert(nPeersWithValidatedDownloads >= 0); + g_outbound_peers_with_protect_from_disconnect -= state->m_chain_sync.m_protect; + assert(g_outbound_peers_with_protect_from_disconnect >= 0); mapNodeState.erase(nodeid); @@ -542,6 +582,7 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim assert(mapBlocksInFlight.empty()); assert(nPreferredDownload == 0); assert(nPeersWithValidatedDownloads == 0); + assert(g_outbound_peers_with_protect_from_disconnect == 0); } LogPrint(BCLog::NET, "Cleared nodestate for peer=%d\n", nodeid); } @@ -2324,6 +2365,10 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr assert(pindexLast); UpdateBlockAvailability(pfrom->GetId(), pindexLast->GetBlockHash()); + // From here, pindexBestKnownBlock should be guaranteed to be non-null, + // because it is set in UpdateBlockAvailability. Some nullptr checks + // are still present, however, as belt-and-suspenders. + if (nCount == MAX_HEADERS_RESULTS) { // Headers message had its maximum size; the peer may have more headers. // TODO: optimize: if pindexLast is an ancestor of chainActive.Tip or pindexBestHeader, continue @@ -2396,11 +2441,22 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr // chainActive.Tip()) because we won't start block download // until we have a headers chain that has at least // nMinimumChainWork, even if a peer has a chain past our tip, - if (!(pfrom->fInbound || pfrom->fWhitelisted || pfrom->m_manual_connection)) { + // as an anti-DoS measure. + if (IsOutboundDisconnectionCandidate(pfrom)) { + LogPrintf("Disconnecting outbound peer %d -- headers chain has insufficient work\n", pfrom->GetId()); pfrom->fDisconnect = true; } } } + + if (!pfrom->fDisconnect && IsOutboundDisconnectionCandidate(pfrom) && nodestate->pindexBestKnownBlock != nullptr) { + // If this is an outbound peer, check to see if we should protect + // it from the bad/lagging chain logic. + if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) { + nodestate->m_chain_sync.m_protect = true; + ++g_outbound_peers_with_protect_from_disconnect; + } + } } } @@ -2799,6 +2855,58 @@ bool PeerLogicValidation::ProcessMessages(CNode* pfrom, std::atomic& inter return fMoreWork; } +void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds) +{ + AssertLockHeld(cs_main); + + CNodeState &state = *State(pto->GetId()); + const CNetMsgMaker msgMaker(pto->GetSendVersion()); + + if (!state.m_chain_sync.m_protect && IsOutboundDisconnectionCandidate(pto) && state.fSyncStarted) { + // This is an outbound peer subject to disconnection if they don't + // announce a block with as much work as the current tip within + // CHAIN_SYNC_TIMEOUT + HEADERS_RESPONSE_TIME seconds (note: if + // their chain has more work than ours, we should sync to it, + // unless it's invalid, in which case we should find that out and + // disconnect from them elsewhere). + if (state.pindexBestKnownBlock != nullptr && state.pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork) { + if (state.m_chain_sync.m_timeout != 0) { + state.m_chain_sync.m_timeout = 0; + state.m_chain_sync.m_work_header = nullptr; + state.m_chain_sync.m_sent_getheaders = false; + } + } else if (state.m_chain_sync.m_timeout == 0 || (state.m_chain_sync.m_work_header != nullptr && state.pindexBestKnownBlock != nullptr && state.pindexBestKnownBlock->nChainWork >= state.m_chain_sync.m_work_header->nChainWork)) { + // Our best block known by this peer is behind our tip, and we're either noticing + // that for the first time, OR this peer was able to catch up to some earlier point + // where we checked against our tip. + // Either way, set a new timeout based on current tip. + state.m_chain_sync.m_timeout = time_in_seconds + CHAIN_SYNC_TIMEOUT; + state.m_chain_sync.m_work_header = chainActive.Tip(); + state.m_chain_sync.m_sent_getheaders = false; + } else if (state.m_chain_sync.m_timeout > 0 && time_in_seconds > state.m_chain_sync.m_timeout) { + // No evidence yet that our peer has synced to a chain with work equal to that + // of our tip, when we first detected it was behind. Send a single getheaders + // message to give the peer a chance to update us. + if (state.m_chain_sync.m_sent_getheaders) { + // They've run out of time to catch up! + LogPrintf("Disconnecting outbound peer %d for old chain, best known block = %s\n", pto->GetId(), state.pindexBestKnownBlock != nullptr ? state.pindexBestKnownBlock->GetBlockHash().ToString() : ""); + pto->fDisconnect = true; + } else { + LogPrint(BCLog::NET, "sending getheaders to outbound peer=%d to verify chain work (current best known block:%s, benchmark blockhash: %s)\n", pto->GetId(), state.pindexBestKnownBlock != nullptr ? state.pindexBestKnownBlock->GetBlockHash().ToString() : "", state.m_chain_sync.m_work_header->GetBlockHash().ToString()); + connman->PushMessage(pto, msgMaker.Make(NetMsgType::GETHEADERS, chainActive.GetLocator(state.m_chain_sync.m_work_header->pprev), uint256())); + state.m_chain_sync.m_sent_getheaders = true; + constexpr int64_t HEADERS_RESPONSE_TIME = 120; // 2 minutes + // Bump the timeout to allow a response, which could clear the timeout + // (if the response shows the peer has synced), reset the timeout (if + // the peer syncs to the required work but not to our tip), or result + // in disconnect (if we advance to the timeout and pindexBestKnownBlock + // has not sufficiently progressed) + state.m_chain_sync.m_timeout = time_in_seconds + HEADERS_RESPONSE_TIME; + } + } + } +} + class CompareInvMempoolOrder { CTxMemPool *mp; @@ -3265,6 +3373,9 @@ bool PeerLogicValidation::SendMessages(CNode* pto, std::atomic& interruptM } } + // Check that outbound peers have reasonable chains + // GetTime() is used by this anti-DoS logic so we can test this using mocktime + ConsiderEviction(pto, GetTime()); // // Message: getdata (blocks) diff --git a/src/net_processing.h b/src/net_processing.h index 79745cdd4..656324bba 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -21,6 +21,12 @@ static const unsigned int DEFAULT_BLOCK_RECONSTRUCTION_EXTRA_TXN = 100; * Timeout = base + per_header * (expected number of headers) */ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_BASE = 15 * 60 * 1000000; // 15 minutes static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/header +/** Protect at least this many outbound peers from disconnection due to slow/ + * behind headers chain. + */ +static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4; +/** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */ +static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes class PeerLogicValidation : public CValidationInterface, public NetEventsInterface { private: @@ -47,6 +53,8 @@ public: * @return True if there is more work to be done */ bool SendMessages(CNode* pto, std::atomic& interrupt) override; + + void ConsiderEviction(CNode *pto, int64_t time_in_seconds); }; struct CNodeStateStats {