]>
Commit | Line | Data |
---|---|---|
9126e087 GA |
1 | #!/usr/bin/env python |
2 | # | |
be22b3da | 3 | # Generate seeds.txt from Pieter's DNS seeder |
9126e087 GA |
4 | # |
5 | ||
be22b3da PW |
6 | NSEEDS=512 |
7 | ||
8 | MAX_SEEDS_PER_ASN=2 | |
9 | ||
10 | MIN_BLOCKS = 337600 | |
11 | ||
12 | # These are hosts that have been observed to be behaving strangely (e.g. | |
13 | # aggressively connecting to every node). | |
14 | SUSPICIOUS_HOSTS = set([ | |
15 | "130.211.129.106", "178.63.107.226", | |
16 | "83.81.130.26", "88.198.17.7", "148.251.238.178", "176.9.46.6", | |
17 | "54.173.72.127", "54.174.10.182", "54.183.64.54", "54.194.231.211", | |
18 | "54.66.214.167", "54.66.220.137", "54.67.33.14", "54.77.251.214", | |
19 | "54.94.195.96", "54.94.200.247" | |
20 | ]) | |
9126e087 GA |
21 | |
22 | import re | |
23 | import sys | |
be22b3da | 24 | import dns.resolver |
41bbc85e | 25 | import collections |
be22b3da | 26 | |
41bbc85e WL |
27 | PATTERN_IPV4 = re.compile(r"^((\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})):(\d+)$") |
28 | PATTERN_IPV6 = re.compile(r"^\[([0-9a-z:]+)\]:(\d+)$") | |
29 | PATTERN_ONION = re.compile(r"^([abcdefghijklmnopqrstuvwxyz234567]{16}\.onion):(\d+)$") | |
5bd677f5 | 30 | PATTERN_AGENT = re.compile(r"^(\/MagicBean:([0-9a-z-.]+)\/)$") |
be22b3da PW |
31 | |
32 | def parseline(line): | |
33 | sline = line.split() | |
34 | if len(sline) < 11: | |
35 | return None | |
be22b3da | 36 | m = PATTERN_IPV4.match(sline[0]) |
41bbc85e WL |
37 | sortkey = None |
38 | ip = None | |
be22b3da | 39 | if m is None: |
41bbc85e WL |
40 | m = PATTERN_IPV6.match(sline[0]) |
41 | if m is None: | |
42 | m = PATTERN_ONION.match(sline[0]) | |
43 | if m is None: | |
44 | return None | |
45 | else: | |
46 | net = 'onion' | |
47 | ipstr = sortkey = m.group(1) | |
48 | port = int(m.group(2)) | |
49 | else: | |
50 | net = 'ipv6' | |
51 | if m.group(1) in ['::']: # Not interested in localhost | |
52 | return None | |
53 | ipstr = m.group(1) | |
54 | sortkey = ipstr # XXX parse IPv6 into number, could use name_to_ipv6 from generate-seeds | |
55 | port = int(m.group(2)) | |
56 | else: | |
57 | # Do IPv4 sanity check | |
58 | ip = 0 | |
59 | for i in range(0,4): | |
60 | if int(m.group(i+2)) < 0 or int(m.group(i+2)) > 255: | |
61 | return None | |
62 | ip = ip + (int(m.group(i+2)) << (8*(3-i))) | |
63 | if ip == 0: | |
be22b3da | 64 | return None |
41bbc85e WL |
65 | net = 'ipv4' |
66 | sortkey = ip | |
67 | ipstr = m.group(1) | |
68 | port = int(m.group(6)) | |
be22b3da PW |
69 | # Skip bad results. |
70 | if sline[1] == 0: | |
71 | return None | |
72 | # Extract uptime %. | |
73 | uptime30 = float(sline[7][:-1]) | |
74 | # Extract Unix timestamp of last success. | |
75 | lastsuccess = int(sline[2]) | |
76 | # Extract protocol version. | |
77 | version = int(sline[10]) | |
78 | # Extract user agent. | |
79 | agent = sline[11][1:-1] | |
80 | # Extract service flags. | |
81 | service = int(sline[9], 16) | |
82 | # Extract blocks. | |
83 | blocks = int(sline[8]) | |
84 | # Construct result. | |
85 | return { | |
41bbc85e WL |
86 | 'net': net, |
87 | 'ip': ipstr, | |
88 | 'port': port, | |
be22b3da PW |
89 | 'ipnum': ip, |
90 | 'uptime': uptime30, | |
91 | 'lastsuccess': lastsuccess, | |
92 | 'version': version, | |
93 | 'agent': agent, | |
94 | 'service': service, | |
95 | 'blocks': blocks, | |
41bbc85e | 96 | 'sortkey': sortkey, |
be22b3da PW |
97 | } |
98 | ||
41bbc85e WL |
99 | def filtermultiport(ips): |
100 | '''Filter out hosts with more nodes per IP''' | |
101 | hist = collections.defaultdict(list) | |
102 | for ip in ips: | |
103 | hist[ip['sortkey']].append(ip) | |
104 | return [value[0] for (key,value) in hist.items() if len(value)==1] | |
105 | ||
be22b3da PW |
106 | # Based on Greg Maxwell's seed_filter.py |
107 | def filterbyasn(ips, max_per_asn, max_total): | |
41bbc85e WL |
108 | # Sift out ips by type |
109 | ips_ipv4 = [ip for ip in ips if ip['net'] == 'ipv4'] | |
110 | ips_ipv6 = [ip for ip in ips if ip['net'] == 'ipv6'] | |
111 | ips_onion = [ip for ip in ips if ip['net'] == 'onion'] | |
112 | ||
113 | # Filter IPv4 by ASN | |
be22b3da PW |
114 | result = [] |
115 | asn_count = {} | |
41bbc85e | 116 | for ip in ips_ipv4: |
be22b3da PW |
117 | if len(result) == max_total: |
118 | break | |
119 | try: | |
120 | asn = int([x.to_text() for x in dns.resolver.query('.'.join(reversed(ip['ip'].split('.'))) + '.origin.asn.cymru.com', 'TXT').response.answer][0].split('\"')[1].split(' ')[0]) | |
121 | if asn not in asn_count: | |
122 | asn_count[asn] = 0 | |
123 | if asn_count[asn] == max_per_asn: | |
124 | continue | |
125 | asn_count[asn] += 1 | |
126 | result.append(ip) | |
127 | except: | |
128 | sys.stderr.write('ERR: Could not resolve ASN for "' + ip['ip'] + '"\n') | |
41bbc85e WL |
129 | |
130 | # TODO: filter IPv6 by ASN | |
131 | ||
132 | # Add back non-IPv4 | |
133 | result.extend(ips_ipv6) | |
134 | result.extend(ips_onion) | |
be22b3da | 135 | return result |
9126e087 GA |
136 | |
137 | def main(): | |
138 | lines = sys.stdin.readlines() | |
be22b3da PW |
139 | ips = [parseline(line) for line in lines] |
140 | ||
41bbc85e | 141 | # Skip entries with valid address. |
be22b3da PW |
142 | ips = [ip for ip in ips if ip is not None] |
143 | # Skip entries from suspicious hosts. | |
144 | ips = [ip for ip in ips if ip['ip'] not in SUSPICIOUS_HOSTS] | |
145 | # Enforce minimal number of blocks. | |
146 | ips = [ip for ip in ips if ip['blocks'] >= MIN_BLOCKS] | |
147 | # Require service bit 1. | |
148 | ips = [ip for ip in ips if (ip['service'] & 1) == 1] | |
149 | # Require at least 50% 30-day uptime. | |
150 | ips = [ip for ip in ips if ip['uptime'] > 50] | |
151 | # Require a known and recent user agent. | |
152 | ips = [ip for ip in ips if PATTERN_AGENT.match(ip['agent'])] | |
153 | # Sort by availability (and use last success as tie breaker) | |
154 | ips.sort(key=lambda x: (x['uptime'], x['lastsuccess'], x['ip']), reverse=True) | |
41bbc85e WL |
155 | # Filter out hosts with multiple bitcoin ports, these are likely abusive |
156 | ips = filtermultiport(ips) | |
be22b3da PW |
157 | # Look up ASNs and limit results, both per ASN and globally. |
158 | ips = filterbyasn(ips, MAX_SEEDS_PER_ASN, NSEEDS) | |
159 | # Sort the results by IP address (for deterministic output). | |
41bbc85e | 160 | ips.sort(key=lambda x: (x['net'], x['sortkey'])) |
9126e087 | 161 | |
be22b3da | 162 | for ip in ips: |
41bbc85e WL |
163 | if ip['net'] == 'ipv6': |
164 | print '[%s]:%i' % (ip['ip'], ip['port']) | |
165 | else: | |
166 | print '%s:%i' % (ip['ip'], ip['port']) | |
9126e087 GA |
167 | |
168 | if __name__ == '__main__': | |
169 | main() |