Connect to an extra outbound peer if our tip is stale

If our tip hasn't updated in a while, that may be because our peers are
not relaying blocks to us that we would consider valid. Allow connection
to an additional outbound peer in that circumstance.

Also, periodically check to see if we are exceeding our target number of
outbound peers, and disconnect the one which has least recently
announced a new block to us (choosing the newest such peer in the case
of tie).
This commit is contained in:
Suhas Daftuar 2017-10-24 16:56:07 -04:00
parent db32a65897
commit ac7b37cd2b
5 changed files with 114 additions and 6 deletions

View file

@ -1270,7 +1270,7 @@ bool AppInitMain(boost::thread_group& threadGroup, CScheduler& scheduler)
g_connman = std::unique_ptr<CConnman>(new CConnman(GetRand(std::numeric_limits<uint64_t>::max()), GetRand(std::numeric_limits<uint64_t>::max())));
CConnman& connman = *g_connman;
peerLogic.reset(new PeerLogicValidation(&connman));
peerLogic.reset(new PeerLogicValidation(&connman, scheduler));
RegisterValidationInterface(peerLogic.get());
// sanitize comments per BIP-0014, format user agent and check total size

View file

@ -1701,6 +1701,7 @@ bool CConnman::GetTryNewOutboundPeer()
void CConnman::SetTryNewOutboundPeer(bool flag)
{
m_try_another_outbound_peer = flag;
LogPrint(BCLog::NET, "net: setting try another outbound peer=%s\n", flag ? "true" : "false");
}
// Return the number of peers we have over our outbound connection limit

View file

@ -23,6 +23,7 @@
#include "primitives/transaction.h"
#include "random.h"
#include "reverse_iterator.h"
#include "scheduler.h"
#include "tinyformat.h"
#include "txmempool.h"
#include "ui_interface.h"
@ -127,7 +128,6 @@ namespace {
/** Number of outbound peers with m_chain_sync.m_protect. */
int g_outbound_peers_with_protect_from_disconnect = 0;
/** When our tip was last updated. */
int64_t g_last_tip_update = 0;
@ -435,6 +435,15 @@ void MaybeSetPeerAsAnnouncingHeaderAndIDs(NodeId nodeid, CConnman* connman) {
}
}
bool TipMayBeStale(const Consensus::Params &consensusParams)
{
AssertLockHeld(cs_main);
if (g_last_tip_update == 0) {
g_last_tip_update = GetTime();
}
return g_last_tip_update < GetTime() - consensusParams.nPowTargetSpacing * 3 && mapBlocksInFlight.empty();
}
// Requires cs_main
bool CanDirectFetch(const Consensus::Params &consensusParams)
{
@ -772,9 +781,17 @@ static bool StaleBlockRequestAllowed(const CBlockIndex* pindex, const Consensus:
(GetBlockProofEquivalentTime(*pindexBestHeader, *pindex, *pindexBestHeader, consensusParams) < STALE_RELAY_AGE_LIMIT);
}
PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn) : connman(connmanIn) {
PeerLogicValidation::PeerLogicValidation(CConnman* connmanIn, CScheduler &scheduler) : connman(connmanIn), m_stale_tip_check_time(0) {
// Initialize global variables that cannot be constructed at startup.
recentRejects.reset(new CRollingBloomFilter(120000, 0.000001));
const Consensus::Params& consensusParams = Params().GetConsensus();
// Stale tip checking and peer eviction are on two different timers, but we
// don't want them to get out of sync due to drift in the scheduler, so we
// combine them in one function and schedule at the quicker (peer-eviction)
// timer.
static_assert(EXTRA_PEER_CHECK_INTERVAL < STALE_CHECK_INTERVAL, "peer eviction timer should be less than stale tip check timer");
scheduler.scheduleEvery(std::bind(&PeerLogicValidation::CheckForStaleTipAndEvictPeers, this, consensusParams), EXTRA_PEER_CHECK_INTERVAL * 1000);
}
void PeerLogicValidation::BlockConnected(const std::shared_ptr<const CBlock>& pblock, const CBlockIndex* pindex, const std::vector<CTransactionRef>& vtxConflicted) {
@ -1424,6 +1441,7 @@ bool static ProcessHeadersMessage(CNode *pfrom, CConnman *connman, const std::ve
// If this is an outbound peer, check to see if we should protect
// it from the bad/lagging chain logic.
if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) {
LogPrint(BCLog::NET, "Protecting outbound peer=%d from eviction\n", pfrom->GetId());
nodestate->m_chain_sync.m_protect = true;
++g_outbound_peers_with_protect_from_disconnect;
}
@ -3004,6 +3022,83 @@ void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds)
}
}
void PeerLogicValidation::EvictExtraOutboundPeers(int64_t time_in_seconds)
{
// Check whether we have too many outbound peers
int extra_peers = connman->GetExtraOutboundCount();
if (extra_peers > 0) {
// If we have more outbound peers than we target, disconnect one.
// Pick the outbound peer that least recently announced
// us a new block, with ties broken by choosing the more recent
// connection (higher node id)
NodeId worst_peer = -1;
int64_t oldest_block_announcement = std::numeric_limits<int64_t>::max();
LOCK(cs_main);
connman->ForEachNode([&](CNode* pnode) {
// Ignore non-outbound peers, or nodes marked for disconnect already
if (!IsOutboundDisconnectionCandidate(pnode) || pnode->fDisconnect) return;
CNodeState *state = State(pnode->GetId());
if (state == nullptr) return; // shouldn't be possible, but just in case
// Don't evict our protected peers
if (state->m_chain_sync.m_protect) return;
if (state->m_last_block_announcement < oldest_block_announcement || (state->m_last_block_announcement == oldest_block_announcement && pnode->GetId() > worst_peer)) {
worst_peer = pnode->GetId();
oldest_block_announcement = state->m_last_block_announcement;
}
});
if (worst_peer != -1) {
bool disconnected = connman->ForNode(worst_peer, [&](CNode *pnode) {
// Only disconnect a peer that has been connected to us for
// some reasonable fraction of our check-frequency, to give
// it time for new information to have arrived.
// Also don't disconnect any peer we're trying to download a
// block from.
CNodeState &state = *State(pnode->GetId());
if (time_in_seconds - pnode->nTimeConnected > MINIMUM_CONNECT_TIME && state.nBlocksInFlight == 0) {
LogPrint(BCLog::NET, "disconnecting extra outbound peer=%d (last block announcement received at time %d)\n", pnode->GetId(), oldest_block_announcement);
pnode->fDisconnect = true;
return true;
} else {
LogPrint(BCLog::NET, "keeping outbound peer=%d chosen for eviction (connect time: %d, blocks_in_flight: %d)\n", pnode->GetId(), pnode->nTimeConnected, state.nBlocksInFlight);
return false;
}
});
if (disconnected) {
// If we disconnected an extra peer, that means we successfully
// connected to at least one peer after the last time we
// detected a stale tip. Don't try any more extra peers until
// we next detect a stale tip, to limit the load we put on the
// network from these extra connections.
connman->SetTryNewOutboundPeer(false);
}
}
}
}
void PeerLogicValidation::CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams)
{
if (connman == nullptr) return;
int64_t time_in_seconds = GetTime();
EvictExtraOutboundPeers(time_in_seconds);
if (time_in_seconds > m_stale_tip_check_time) {
LOCK(cs_main);
// Check whether our tip is stale, and if so, allow using an extra
// outbound peer
if (TipMayBeStale(consensusParams)) {
LogPrintf("Potential stale tip detected, will try using extra outbound peer (last tip update: %d seconds ago)\n", time_in_seconds - g_last_tip_update);
connman->SetTryNewOutboundPeer(true);
} else if (connman->GetTryNewOutboundPeer()) {
connman->SetTryNewOutboundPeer(false);
}
m_stale_tip_check_time = time_in_seconds + STALE_CHECK_INTERVAL;
}
}
class CompareInvMempoolOrder
{
CTxMemPool *mp;

View file

@ -8,6 +8,7 @@
#include "net.h"
#include "validationinterface.h"
#include "consensus/params.h"
/** Default for -maxorphantx, maximum number of orphan transactions kept in memory */
static const unsigned int DEFAULT_MAX_ORPHAN_TRANSACTIONS = 100;
@ -27,13 +28,19 @@ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/head
static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4;
/** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */
static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes
/** How frequently to check for stale tips, in seconds */
static constexpr int64_t STALE_CHECK_INTERVAL = 10 * 60; // 10 minutes
/** How frequently to check for extra outbound peers and disconnect, in seconds */
static constexpr int64_t EXTRA_PEER_CHECK_INTERVAL = 45;
/** Minimum time an outbound-peer-eviction candidate must be connected for, in order to evict, in seconds */
static constexpr int64_t MINIMUM_CONNECT_TIME = 30;
class PeerLogicValidation : public CValidationInterface, public NetEventsInterface {
private:
CConnman* connman;
CConnman* const connman;
public:
explicit PeerLogicValidation(CConnman* connman);
explicit PeerLogicValidation(CConnman* connman, CScheduler &scheduler);
void BlockConnected(const std::shared_ptr<const CBlock>& pblock, const CBlockIndex* pindexConnected, const std::vector<CTransactionRef>& vtxConflicted) override;
void UpdatedBlockTip(const CBlockIndex *pindexNew, const CBlockIndex *pindexFork, bool fInitialDownload) override;
@ -55,6 +62,11 @@ public:
bool SendMessages(CNode* pto, std::atomic<bool>& interrupt) override;
void ConsiderEviction(CNode *pto, int64_t time_in_seconds);
void CheckForStaleTipAndEvictPeers(const Consensus::Params &consensusParams);
void EvictExtraOutboundPeers(int64_t time_in_seconds);
private:
int64_t m_stale_tip_check_time; //! Next time to check for stale tip
};
struct CNodeStateStats {

View file

@ -86,7 +86,7 @@ TestingSetup::TestingSetup(const std::string& chainName) : BasicTestingSetup(cha
threadGroup.create_thread(&ThreadScriptCheck);
g_connman = std::unique_ptr<CConnman>(new CConnman(0x1337, 0x1337)); // Deterministic randomness for tests.
connman = g_connman.get();
peerLogic.reset(new PeerLogicValidation(connman));
peerLogic.reset(new PeerLogicValidation(connman, scheduler));
}
TestingSetup::~TestingSetup()