contrib: Add port parsing to makeseeds.py

Allow for non-8333 nodes to appear in the internal seeds. This will
allow bitcoind to bypas a filter on 8333. This also makes it possible to
use the same tool for e.g. testnet.

As hosts with multiple nodes per IP are likely abusive, add a filter to
remove these (the ASN check will take care of them for IPv4, but not
IPv6 or onion).
This commit is contained in:
Wladimir J. van der Laan 2015-06-25 07:53:15 +02:00
parent ccd4369a23
commit 884454aebe

View file

@ -22,10 +22,11 @@ SUSPICIOUS_HOSTS = set([
import re import re
import sys import sys
import dns.resolver import dns.resolver
import collections
PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):8333$") PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$")
PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:8333$") PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$")
PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):8333$") PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$")
PATTERN_AGENT = re.compile(r"^(\/Satoshi:0\.8\.6\/|\/Satoshi:0\.9\.(2|3|4|5)\/|\/Satoshi:0\.10\.\d{1,2}\/|\/Satoshi:0\.11\.\d{1,2}\/)$") PATTERN_AGENT = re.compile(r"^(\/Satoshi:0\.8\.6\/|\/Satoshi:0\.9\.(2|3|4|5)\/|\/Satoshi:0\.10\.\d{1,2}\/|\/Satoshi:0\.11\.\d{1,2}\/)$")
def parseline(line): def parseline(line):
@ -43,12 +44,15 @@ def parseline(line):
return None return None
else: else:
net = 'onion' net = 'onion'
sortkey = m.group(1) ipstr = sortkey = m.group(1)
port = int(m.group(2))
else: else:
net = 'ipv6' net = 'ipv6'
if m.group(1) in ['::']: # Not interested in localhost if m.group(1) in ['::']: # Not interested in localhost
return None return None
sortkey = m.group(1) # XXX parse IPv6 into number, could use name_to_ipv6 from generate-seeds ipstr = m.group(1)
sortkey = ipstr # XXX parse IPv6 into number, could use name_to_ipv6 from generate-seeds
port = int(m.group(2))
else: else:
# Do IPv4 sanity check # Do IPv4 sanity check
ip = 0 ip = 0
@ -60,6 +64,8 @@ def parseline(line):
return None return None
net = 'ipv4' net = 'ipv4'
sortkey = ip sortkey = ip
ipstr = m.group(1)
port = int(m.group(6))
# Skip bad results. # Skip bad results.
if sline[1] == 0: if sline[1] == 0:
return None return None
@ -78,7 +84,8 @@ def parseline(line):
# Construct result. # Construct result.
return { return {
'net': net, 'net': net,
'ip': m.group(1), 'ip': ipstr,
'port': port,
'ipnum': ip, 'ipnum': ip,
'uptime': uptime30, 'uptime': uptime30,
'lastsuccess': lastsuccess, 'lastsuccess': lastsuccess,
@ -89,6 +96,13 @@ def parseline(line):
'sortkey': sortkey, 'sortkey': sortkey,
} }
def filtermultiport(ips):
'''Filter out hosts with more nodes per IP'''
hist = collections.defaultdict(list)
for ip in ips:
hist[ip['sortkey']].append(ip)
return [value[0] for (key,value) in hist.items() if len(value)==1]
# Based on Greg Maxwell's seed_filter.py # Based on Greg Maxwell's seed_filter.py
def filterbyasn(ips, max_per_asn, max_total): def filterbyasn(ips, max_per_asn, max_total):
# Sift out ips by type # Sift out ips by type
@ -138,13 +152,18 @@ def main():
ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])] ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])]
# Sort by availability (and use last success as tie breaker) # Sort by availability (and use last success as tie breaker)
ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True) ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True)
# Filter out hosts with multiple bitcoin ports, these are likely abusive
ips = filtermultiport(ips)
# Look up ASNs and limit results, both per ASN and globally. # Look up ASNs and limit results, both per ASN and globally.
ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS)
# Sort the results by IP address (for deterministic output). # Sort the results by IP address (for deterministic output).
ips.sort(key=lambda x: (x['net'], x['sortkey'])) ips.sort(key=lambda x: (x['net'], x['sortkey']))
for ip in ips: for ip in ips:
print ip['ip'] if ip['net'] == 'ipv6':
print '[%s]:%i' % (ip['ip'], ip['port'])
else:
print '%s:%i' % (ip['ip'], ip['port'])
if __name__ == '__main__': if __name__ == '__main__':
main() main()