Compare commits

...

205 commits

Author SHA1 Message Date
Jack Robison
ebcc6e5086
update snapshot url in example docker-composes 2023-03-03 13:15:25 -05:00
Jack Robison
c0766f6abc faster RevertableOpStack.apply_packed_undo_ops 2023-02-13 13:09:03 -05:00
Jack Robison
7bc90c425f
fix error 2023-02-07 17:31:36 -05:00
Jack Robison
f2c494d4d6 check for scribe needing a restart due to low memory 2023-02-07 17:23:23 -05:00
Jack Robison
8147bbf3b9 remove --cache_all_claim_txos setting 2023-02-07 17:23:23 -05:00
Jack Robison
adbeeaf203 update snapshot 2023-01-06 20:32:45 -05:00
Jack Robison
f55ed56215 add comment 2023-01-06 20:32:45 -05:00
Jack Robison
d1d33c4bce feedback 2023-01-06 20:32:45 -05:00
Jack Robison
b7de08ba0b add ResumableSHA256 and HashXHistoryHasherPrefixRow column family 2023-01-06 20:32:45 -05:00
Jack Robison
405cef8d28 ResumableSHA256 2023-01-06 20:32:45 -05:00
Jack Robison
21262d2e43
fix edge case deleting an empty value 2022-12-28 13:40:19 -05:00
dependabot[bot]
75d64f9dc6 Bump protobuf from 3.17.2 to 3.18.3
Bumps [protobuf](https://github.com/protocolbuffers/protobuf) from 3.17.2 to 3.18.3.
- [Release notes](https://github.com/protocolbuffers/protobuf/releases)
- [Changelog](https://github.com/protocolbuffers/protobuf/blob/main/generate_changelog.py)
- [Commits](https://github.com/protocolbuffers/protobuf/compare/v3.17.2...v3.18.3)

---
updated-dependencies:
- dependency-name: protobuf
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-11-13 00:01:36 -05:00
Jack Robison
929448d64b collect db values for integrity check in chunks 2022-11-12 23:58:36 -05:00
Jack Robison
134aad29f1 fix issue with supports with wrong names 2022-11-12 23:58:36 -05:00
Jack Robison
dcd4d7a7a8 update docker entrypoint and example composes 2022-11-09 22:06:24 -05:00
Jack Robison
e0c26c0952 update docs 2022-11-09 22:06:24 -05:00
Jack Robison
9fa2d0b6ca batched update blocked/filtered in ES 2022-11-09 22:06:24 -05:00
Jack Robison
0877e34289 use multi_get to make status hash 2022-11-09 22:06:24 -05:00
Jack Robison
92c3d3840d fix address history 2022-11-09 22:06:24 -05:00
Jack Robison
98017e9a76 fix uncaught error in es sync 2022-11-09 22:06:24 -05:00
Jack Robison
6932c38854 logging 2022-11-09 22:06:24 -05:00
Jack Robison
ecdb5ef859 logging 2022-11-09 22:06:24 -05:00
Jack Robison
81c44f5f4e assert 2022-11-09 22:06:24 -05:00
Jack Robison
59c07e315a fix https://github.com/lbryio/hub/issues/104 2022-11-09 22:06:24 -05:00
Jack Robison
07f8ec41a1 fix including early activated claim amount during takeover due to abandon 2022-11-09 22:06:24 -05:00
Jack Robison
01e52e795a fix edge case activating claim updates 2022-11-09 22:06:24 -05:00
Jack Robison
98ec0f5e0c fix supports that activate on the block the claim is abandoned 2022-11-09 22:06:24 -05:00
Jack Robison
eb3d7a183a fix spending claims and supports on the block they activate 2022-11-09 22:06:24 -05:00
Jack Robison
93aebedd7d tests 2022-11-09 22:06:24 -05:00
Jack Robison
616daec0d9 dead code 2022-11-09 22:06:24 -05:00
Jack Robison
745462d3ae migrate 11 to 12 2022-11-09 22:06:24 -05:00
Jack Robison
0634c1e05f fix handling supports without claims 2022-11-09 22:06:24 -05:00
Jack Robison
b230b693f8 effective amount 2022-11-09 22:06:24 -05:00
Jack Robison
97a0d00bfd faster active amount as of height 2022-11-09 22:06:24 -05:00
Jack Robison
f2f0f426aa stash 2022-11-09 22:06:24 -05:00
Jack Robison
7be5905b46 refactor to use batched pending and future effective amounts 2022-11-09 22:06:24 -05:00
Jack Robison
7c9e91f91a batched _get_pending_effective_amounts 2022-11-09 22:06:24 -05:00
Jack Robison
586e9a613b logging 2022-11-09 22:06:24 -05:00
Jack Robison
e61efcd00d future effective amount index 2022-11-09 22:06:24 -05:00
Jack Robison
9311d924f7 improve logging 2022-11-09 22:06:24 -05:00
Jack Robison
8947d3cb19 increase UTXO and HashXUTXO cache sizes 2022-11-09 22:06:24 -05:00
Jack Robison
2150363108 batch spend claims and supports 2022-11-09 22:06:24 -05:00
Jack Robison
6a9a2ad40f batch spend utxos 2022-11-09 22:06:24 -05:00
Jack Robison
55eb8818ea add --db_disable_integrity_checks option to scribe 2022-11-09 22:06:24 -05:00
Jack Robison
a48564e3b2 batched db integrity checks 2022-11-09 22:06:24 -05:00
Jack Robison
606e9bb0d6 support multiple column families in one raw multi_get call 2022-11-09 22:06:24 -05:00
Jack Robison
abc5184e19 add future effective amount index
-increase lru cache size for effective amount column famlily to 64mb
2022-11-09 22:06:24 -05:00
Jack Robison
99ddd208db add Block.decoded_header helper property 2022-11-09 22:06:24 -05:00
Jack Robison
fc234b12e5 handle es_info being an empty file 2022-11-09 22:06:24 -05:00
Jack Robison
04d747ff99 failover support for elastic-sync-notifier and elasticsearch
deprecates herald options `elastic_host`, `elastic_port`, `elastic_notifier_host`, and `elastic_notifier_port` in favor of the single new `elastic_services` option
2022-11-09 22:06:24 -05:00
Jack Robison
4586b344ce move search_index object to HubServerService 2022-11-09 22:06:24 -05:00
Jonathan Moody
9b17822229 Address feedback. Broaden logging to more types of Exception. 2022-09-28 11:04:28 -04:00
Jonathan Moody
0f33f2bfef Retry on EADDRINUSE. 2022-09-28 11:04:28 -04:00
Jonathan Moody
bc4e3aa7fb Get addr status after subscribing to fix race. 2022-09-26 11:30:09 -04:00
Jack Robison
e60bb35ebd fix https://github.com/lbryio/hub/issues/61 2022-09-20 14:13:06 -04:00
Jack Robison
024aceda53 feedback 2022-09-14 12:28:09 -04:00
Jack Robison
6155700a68 use session TaskGroup for notification tasks 2022-09-14 12:28:09 -04:00
Jack Robison
9b3618f73e feedback 2022-09-13 13:46:15 -04:00
Jack Robison
911f483ce3 multi_put claim_short_id 2022-09-13 13:46:15 -04:00
Jack Robison
09e1aba567 use effective amount index for faster future amount calculations 2022-09-13 13:46:15 -04:00
Jack Robison
66c63f0511 fix effective amount index for supports for non existent claims 2022-09-13 13:46:15 -04:00
Jack Robison
115fd217e8 fix trying to insert an effective amount for a new claim abandoned this block 2022-09-13 13:46:15 -04:00
Jack Robison
27520c835e use effective_amount index in scribe for faster takeover calculations 2022-09-13 13:46:15 -04:00
Jack Robison
c686187e35 add migrators to build new indexes 2022-09-13 13:46:15 -04:00
Jack Robison
ee02a80a98 cleanup 2022-09-13 13:46:15 -04:00
Jack Robison
bb6166c62d expose protocol class for tests to override 2022-09-13 13:46:15 -04:00
Jack Robison
08f25a7c50 effective amount index
fixes https://github.com/lbryio/hub/issues/64
2022-09-13 13:46:15 -04:00
Jack Robison
292ad2b9b6 rename effective_amount prefix -> bid_order 2022-09-13 13:46:15 -04:00
Jack Robison
2627f02a55 add reposted_count index
fixes https://github.com/lbryio/hub/issues/63
2022-09-13 13:46:15 -04:00
Jonathan Moody
18e0571e81 Updates for review comments. Implement timeout counter
bump in different way.
2022-09-12 14:35:38 -04:00
Jonathan Moody
807e6151f2 Revert "Report API-level search timeouts in interrupt_count_metric."
This reverts commit 28c711efad.
2022-09-12 14:35:38 -04:00
Jonathan Moody
8a1f0f7f33 Report API-level search timeouts in interrupt_count_metric. 2022-09-12 14:35:38 -04:00
Jonathan Moody
3cf620ed8e Specify API-level search timeout to reduce hard timeout errors. 2022-09-12 14:35:38 -04:00
Jonathan Moody
75d53c3c6f Treat RPC response=None as a WarmingUpError. 2022-09-07 11:01:20 -04:00
Jack Robison
59043a9add
fix https://github.com/lbryio/hub/issues/89 2022-08-30 11:30:18 -04:00
Jack Robison
f7eca425eb
fix cache 2022-08-27 10:12:35 -04:00
Jack Robison
84b978278e
fix https://github.com/lbryio/hub/issues/87 2022-08-27 09:51:04 -04:00
Jack Robison
579f95f9fc
set maxlen 2022-08-26 11:38:56 -04:00
Jack Robison
99e1006cb5 add largest_address_history_cache_size setting, defaulting to 256 2022-08-26 11:05:57 -04:00
Jack Robison
4958097b66 fix attribute name 2022-08-26 11:05:57 -04:00
Jack Robison
cd9b91e1d9 use LargestValueCache for caching the largest needed full address histories 2022-08-26 11:05:57 -04:00
Jonathan Moody
fc38bda03c Strengthen --daemon_url checks using yarl.URL (what aiohttp uses). 2022-08-24 10:17:07 -04:00
Jack Robison
c53d2b6f5a increase the default history_tx_cache_size 2022-08-18 18:32:36 -04:00
Jack Robison
570bda9c8b remove redundant hashX_history_cache
-remove redundant cache that used a lot of memory
2022-08-18 18:32:36 -04:00
Jack Robison
34c5ab2e56 include blocked/filtered in the error message 2022-08-17 10:58:45 -04:00
Jack Robison
4bc2bf79eb simplify applying filtering/blocking 2022-08-17 10:58:45 -04:00
Jack Robison
db3294e6e0 include reason from the repost description in blocking/filtering errors 2022-08-17 10:58:45 -04:00
Jack Robison
93850d72eb
Update README.md 2022-08-15 14:59:13 -04:00
Jack Robison
c6cc3cbd26
fix https://github.com/lbryio/hub/issues/68 2022-08-09 15:42:57 -04:00
Jonathan Moody
23599ee1b2 Update tests to account for multi_get feature and new DBStatePrefixRow fields. 2022-08-09 14:17:54 -04:00
Jack Robison
42584ca60a
Revert "add version file"
This reverts commit d57fc7eab9.
2022-08-09 14:17:34 -04:00
Jeffrey Picard
d57fc7eab9 add version file 2022-08-09 17:12:50 +03:00
Jack Robison
4845b92248
update diagram 2022-08-01 11:52:03 -04:00
Jack Robison
9ad09c7c6d
update diagram 2022-08-01 11:50:14 -04:00
Jack Robison
35483fa0b1 increase scribe history tx lfu cache size 2022-07-31 15:38:53 -04:00
Jack Robison
bffd1b1394 fix initial height for scribe metrics 2022-07-31 15:38:53 -04:00
Jack Robison
34e3f9ecee expose cache size settings 2022-07-31 15:38:53 -04:00
Jack Robison
9df8f9c651 metrics 2022-07-31 15:38:53 -04:00
Jack Robison
0918299163 use LFU caches 2022-07-31 15:38:53 -04:00
Jack Robison
a46343c84f add LFUCache and LFUCacheWithMetrics 2022-07-31 15:38:53 -04:00
Jack Robison
eb87474b48 expose tx_cache_size setting 2022-07-17 13:56:34 -04:00
Jack Robison
fc9b0af5b6 expose resolved_url_cache_size 2022-07-17 13:56:34 -04:00
Jack Robison
a41abc870d expose merkle_cache_size setting 2022-07-17 13:56:34 -04:00
Jack Robison
78e9d7b50b
add example scripts for manually copying docker volumes 2022-07-06 16:00:38 -04:00
dependabot[bot]
a10beac943 Bump ujson from 5.2.0 to 5.4.0
Bumps [ujson](https://github.com/ultrajson/ultrajson) from 5.2.0 to 5.4.0.
- [Release notes](https://github.com/ultrajson/ultrajson/releases)
- [Commits](https://github.com/ultrajson/ultrajson/compare/5.2.0...5.4.0)

---
updated-dependencies:
- dependency-name: ujson
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-07-06 13:08:21 -04:00
Jack Robison
9e3963ba23
fix resolve error messages 2022-06-30 12:37:12 -04:00
Jack Robison
3b2b8f814c add wait_until_block testing helper to BlockchainProcessorService 2022-06-30 11:22:17 -04:00
Jack Robison
2363865e00 prune stale trending notifications during initial bulk sync 2022-06-30 11:22:17 -04:00
Jack Robison
903a44d991 add mempool time metric for scribe prometheus 2022-06-30 11:22:17 -04:00
Jack Robison
6b46f0488d fix scribe prometheus 2022-06-30 11:22:17 -04:00
Jack Robison
c3703fd13f fix using lbcd with ssl 2022-06-30 11:22:17 -04:00
Jack Robison
79b84d89a3 batched claim txo fetching for the ES claim producers 2022-06-30 11:22:17 -04:00
Jack Robison
ac01a17214 fix estimate_timestamp 2022-06-27 18:56:59 -04:00
Jack Robison
a86388f6de add resolved url lru cache 2022-06-27 18:56:59 -04:00
Jack Robison
4b90097997 increase cache sizes for Tx, TxNum, and TxHash prefixes 2022-06-27 18:56:59 -04:00
Jack Robison
0094237b97 update default ADDRESS_HISTORY_CACHE_SIZE 2022-06-27 18:56:59 -04:00
Jack Robison
028143ec7e reduce tx_cache and merkle_cache sizes 2022-06-27 18:56:59 -04:00
Jack Robison
ea7b55f1f0
fix issue with claims for the same channel, related to https://github.com/lbryio/hub/issues/51 2022-06-17 01:21:11 -04:00
Jack Robison
b069e3d824
fix https://github.com/lbryio/hub/issues/51 2022-06-17 00:53:46 -04:00
Jack Robison
2038877e4e
update example docker composes to use master 2022-06-16 18:00:44 -04:00
Jack Robison
6c0f901d33
fix updating the block height in udp pong responses 2022-06-16 15:26:54 -04:00
Jack Robison
1a5fd214b9
Update cluster_guide.md 2022-06-15 17:57:07 -04:00
Jack Robison
5512a841e1
update docker build action 2022-06-15 17:42:13 -04:00
kodxana
983955f5d0 Update docker-image.yml 2022-06-15 17:40:50 -04:00
Jack Robison
23ac3fcd89
Merge pull request #49 from lbryio/lbcd-ssl
add `--daemon_ca_path` arg to use ssl with lbcd
2022-06-15 10:13:28 -04:00
Jack Robison
9dbd8cab4b
add --daemon_ca_path arg to use ssl with lbcd
fixes https://github.com/lbryio/hub/issues/41
2022-06-14 15:25:28 -04:00
Jack Robison
6fd718f353
Merge pull request #48 from lbryio/initialize-from-snapshot
Optionally initialize rocksdb from a snapshot
2022-06-14 14:35:22 -04:00
Jack Robison
09ea58c062
update examples 2022-06-14 14:20:31 -04:00
Jack Robison
fbe68d516c
optionally initialize rocksdb from a snapshot
fixes https://github.com/lbryio/hub/issues/10
2022-06-14 13:59:53 -04:00
Jack Robison
4187afd165
Merge pull request #45 from lbryio/batched-status
Use multi_get for resolve and address statuses
2022-06-14 13:53:12 -04:00
Jack Robison
5d44018018
batched resolve using multi_get 2022-06-10 09:31:08 -04:00
Jack Robison
287de0807c
remove unused code 2022-05-31 16:42:23 -04:00
Jack Robison
e9be86229d
address_history_cache_size 2022-05-31 12:23:21 -04:00
Jack Robison
237b78ee63
use multi_get for sending history notifications and for batched subscriptions 2022-05-27 14:06:34 -04:00
Jack Robison
f1d51eae7b
add get_hashX_statuses, using multi_get 2022-05-27 14:05:43 -04:00
Jack Robison
709ea1ebcb
set constant relayfee and and estimatefee values 2022-05-27 13:57:52 -04:00
Jack Robison
9f2e329d99
Merge pull request #43 from lbryio/cleanup-db
Split up the db classes used by the different hub services
2022-05-27 12:54:21 -04:00
Jack Robison
76dd9c392b
fix uncaught error returning transactions that dont yet exist 2022-05-27 11:58:09 -04:00
Jack Robison
78bd2da267
cache estimated_timestamp 2022-05-27 10:05:13 -04:00
Jack Robison
0901f67d89
split up secondary/primary db classes 2022-05-27 09:47:23 -04:00
Jack Robison
66278443c4
Merge pull request #42 from lbryio/dependabot/pip/ujson-5.2.0
Bump ujson from 5.1.0 to 5.2.0
2022-05-27 09:43:36 -04:00
dependabot[bot]
c11aba7aa4
Bump ujson from 5.1.0 to 5.2.0
Bumps [ujson](https://github.com/ultrajson/ultrajson) from 5.1.0 to 5.2.0.
- [Release notes](https://github.com/ultrajson/ultrajson/releases)
- [Commits](https://github.com/ultrajson/ultrajson/compare/5.1.0...5.2.0)

---
updated-dependencies:
- dependency-name: ujson
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-05-26 01:23:19 +00:00
Jack Robison
c43ec575ae
Update cluster_guide.md 2022-05-24 15:31:58 -04:00
Jack Robison
a7e6bcb366
fix metric 2022-05-22 12:11:24 -04:00
Jack Robison
fb98a4d7d0
Merge pull request #40 from lbryio/improve-history-cache
Improve `blockchain.address.get_history` performance
2022-05-22 00:00:38 -04:00
Jack Robison
75e9123eaf
update the history cache in place instead of clearing/rebuilding 2022-05-21 15:25:07 -04:00
Jack Robison
7263ec553e
asyncify for loop 2022-05-20 14:43:27 -04:00
Jack Robison
4466bb1451
address_subscriptions gauge and history_size histogram metrics 2022-05-20 11:58:18 -04:00
Jack Robison
e9f2b1efea
asyncio.sleep less often 2022-05-20 11:32:39 -04:00
Jack Robison
cb1ed3beb1
history_tx_info_cache metrics 2022-05-19 16:36:55 -04:00
Jack Robison
e5713dc63c
improve caching for blockchain.address.get_history 2022-05-19 12:56:17 -04:00
Jack Robison
9a6f2a6d96
update examples and deploy script 2022-05-19 10:37:38 -04:00
Jack Robison
844bdbdf60
update readme 2022-05-18 16:49:42 -04:00
Jack Robison
8d125f8d44
Update README.md 2022-05-18 16:34:29 -04:00
Jack Robison
36c1471dcf
Update README.md 2022-05-18 16:31:52 -04:00
Jack Robison
9e9c778edd
Merge pull request #38 from lbryio/rename-things
Rename package to `hub`
2022-05-18 16:18:21 -04:00
Jack Robison
87c8457144
fix dockerfile 2022-05-18 15:37:22 -04:00
Jack Robison
b873f75ff6
update entrypoint 2022-05-18 15:08:56 -04:00
Jack Robison
706971edbe
fix logging 2022-05-18 14:41:34 -04:00
Jack Robison
f8022c9c9a
move docker compose examples into docs 2022-05-18 13:42:36 -04:00
Jack Robison
4f7d2af1fa
delete unused 2022-05-18 13:42:25 -04:00
Jack Robison
a919a3a519
move search.py to herald 2022-05-18 13:34:36 -04:00
Jack Robison
37fc334c46
update docker and entrypoints 2022-05-18 13:13:29 -04:00
Jack Robison
30e295ec28
move notifier_protocol.py 2022-05-18 12:02:08 -04:00
Jack Robison
301a0ca66d
rename scribe.elasticsearch -> hub.elastic_sync 2022-05-18 10:57:08 -04:00
Jack Robison
060e423707
rename scribe.hub -> hub.herald 2022-05-18 10:56:09 -04:00
Jack Robison
0da235bceb
rename scribe.blockchain -> hub.scribe 2022-05-18 10:55:12 -04:00
Jack Robison
27e09d7aa7
rename repo to lbry-hub and module to hub 2022-05-18 10:50:20 -04:00
Jack Robison
0eeab397cf
move files to docs folder 2022-05-18 10:44:42 -04:00
Jack Robison
3b7850802a
Merge pull request #35 from lbryio/optional-address-history-status
Expose `--index_address_statuses` setting and improve first sync performance
2022-05-17 20:14:59 -04:00
Jack Robison
14b14686f4
update readme 2022-05-17 19:58:01 -04:00
Jack Robison
830ee294ef
batched catch up for the address status index 2022-05-17 19:20:31 -04:00
Jack Robison
25a8c6b558
remove unneeded labels from prometheus 2022-05-17 11:59:39 -04:00
Jack Robison
f747637688
reduce cache sizes 2022-05-17 11:34:59 -04:00
Jack Robison
bf1667b44d
fix turning address status index on and off 2022-05-17 11:34:59 -04:00
Jack Robison
51a753c4d2
fix settings 2022-05-17 11:34:59 -04:00
Jack Robison
32c21a26a9
cleanup 2022-05-17 11:34:59 -04:00
Jack Robison
708c45504a
delete unused code 2022-05-17 11:34:59 -04:00
Jack Robison
460a06ec04
reduce history_tx_info_cache size 2022-05-17 11:34:59 -04:00
Jack Robison
f91d2be91e
log if address status index is turned on 2022-05-17 11:34:59 -04:00
Jack Robison
d244136efd
add --index_address_statuses option
-scribe no longer writes address statuses nor compacts them during initial sync
-scribe will only precompute address statuses if `--index_address_statuses` is set
-combine history compaction with updating the address status
2022-05-17 11:34:59 -04:00
Jack Robison
e4ac106b98
_get_clear_mempool_ops 2022-05-17 11:34:59 -04:00
Jack Robison
869fc1698c
add tx caching layer to scribe writer to improve performance when cache_all_tx_hashes isn't on 2022-05-17 11:34:59 -04:00
Jack Robison
02922845dd
add multi_put and multi_delete api to the db class 2022-05-17 11:34:59 -04:00
Jack Robison
c5f18a4166
expose --address_history_cache_size setting for scribe 2022-05-17 11:34:59 -04:00
Jack Robison
195bc7c69d
Merge pull request #37 from lbryio/fix-resolve-duplicate-claims-in-channel
fix resolving duplicate names in a channel
2022-05-17 11:34:29 -04:00
Jack Robison
19ac0e83ad
fix https://github.com/lbryio/scribe/issues/16 2022-05-17 11:26:16 -04:00
Jack Robison
d6d758c5c1
fix es timeout 2022-05-09 11:20:15 -04:00
Jack Robison
9fa232e3a1
Merge pull request #28 from lbryio/update-settings
Improve documentation
2022-05-05 17:27:59 -04:00
Jack Robison
a00b11822a
readme 2022-05-05 17:22:14 -04:00
Jack Robison
5d0868704b
add instructions for blocking/filtering 2022-05-05 17:19:44 -04:00
Jack Robison
bda9561178
type annotations 2022-05-05 16:50:39 -04:00
Jack Robison
6f22767486
split env classes 2022-05-05 16:34:27 -04:00
Jack Robison
b230a13761
add lbcd-compose.yml 2022-05-04 16:52:07 -04:00
Jack Robison
53206a0861
update readme and docker files 2022-05-04 15:24:47 -04:00
Jack Robison
10baf47c02
lbcd volume 2022-05-04 13:00:16 -04:00
Jack Robison
7f277dda2f
add lbcd to all-in-one docker-compose 2022-05-04 13:00:16 -04:00
Jack Robison
5a0af081e6
update readme and example docker-composes 2022-05-04 13:00:16 -04:00
Jack Robison
a1e5d22570
update example docker-compose 2022-05-04 13:00:16 -04:00
Jack Robison
ca39d38dda
improve cli/env settings
-remove unused settings
-add help strings
-fix required settings
2022-05-04 13:00:16 -04:00
Jack Robison
9efddcdbf9
remove unnecessary docker-compose down in deploy script 2022-05-04 12:59:36 -04:00
Jack Robison
95495aa786
Merge pull request #30 from lbryio/get-transaction-json
fix `verbose` arg for `blockchain.transaction.get`
2022-05-02 18:07:02 -04:00
Jack Robison
9525c86a78
add fixmes for fields that aren't yet matching lbrycrd 2022-05-02 18:05:49 -04:00
Jack Robison
a9a4f87628
fix peer list 2022-05-02 17:58:15 -04:00
Jack Robison
4e8c8d4054
return transaction as json if verbose is given to blockchain.transaction.get
-fixes https://github.com/lbryio/scribe/issues/27
2022-05-02 15:20:58 -04:00
120 changed files with 6656 additions and 3680 deletions

View file

@ -1,16 +1,79 @@
name: Docker Image CI
name: Publish Docker Image
on:
push:
branches: [ master ]
branches:
- 'master'
- 'development'
tags:
- '*'
env:
# github.repository as <account>/<repo>
IMAGE_NAME: lbry/hub
jobs:
login:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
steps:
-
name: Login to Docker Hub
uses: docker/login-action@v1
- name: Checkout repository
uses: actions/checkout@v3
# # Install the cosign tool except on PR
# # https://github.com/sigstore/cosign-installer
# - name: Install cosign
# if: github.event_name != 'pull_request'
# uses: sigstore/cosign-installer@d6a3abf1bdea83574e28d40543793018b6035605
# with:
# cosign-release: 'v1.7.1'
# Workaround: https://github.com/docker/build-push-action/issues/461
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v2
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into registry ${{ env.REGISTRY }}
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
# Extract metadata (tags, labels) for Docker
# https://github.com/docker/metadata-action
- name: Extract Docker metadata
id: meta
uses: docker/metadata-action@v2
with:
images: ${{ env.IMAGE_NAME }}
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v3
with:
context: .
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ env.IMAGE_NAME }}:${{ github.ref_name }}
# # Sign the resulting Docker image digest except on PRs.
# # This will only write to the public Rekor transparency log when the Docker
# # repository is public to avoid leaking data. If you would like to publish
# # transparency data even for private images, pass --force to cosign below.
# # https://github.com/sigstore/cosign
# - name: Sign the published Docker image
# if: ${{ github.event_name != 'pull_request' }}
# env:
# COSIGN_EXPERIMENTAL: "true"
# # This step uses the identity token to provision an ephemeral certificate
# # against the sigstore community Fulcio instance.
# run: cosign sign ${{ steps.meta.outputs.tags }}@${{ steps.build-and-push.outputs.digest }}

View file

@ -35,22 +35,12 @@ USER $user
WORKDIR $projects_dir
RUN python3.9 -m pip install pip
RUN python3.9 -m pip install -e .
RUN python3.9 docker/set_build.py
RUN python3.9 scripts/set_build.py
RUN rm ~/.cache -rf
# entry point
ARG host=localhost
ARG tcp_port=50001
ARG daemon_url=http://lbry:lbry@localhost:9245/
VOLUME $db_dir
ENV TCP_PORT=$tcp_port
ENV HOST=$host
ENV DAEMON_URL=$daemon_url
ENV DB_DIRECTORY=$db_dir
ENV MAX_SESSIONS=100000
ENV MAX_SEND=1000000000000000000
ENV MAX_RECEIVE=1000000000000000000
COPY ./docker/scribe_entrypoint.sh /entrypoint.sh
COPY ./scripts/entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

View file

@ -1,69 +1,110 @@
## Scribe
## LBRY Hub
Scribe is a python library for building services that use the processed data from the [LBRY blockchain](https://github.com/lbryio/lbrycrd) in an ongoing manner. Scribe contains a set of three core executable services that are used together:
* `scribe` ([scribe.blockchain.service](https://github.com/lbryio/scribe/tree/master/scribe/blockchain/service.py)) - maintains a [rocksdb](https://github.com/lbryio/lbry-rocksdb) database containing the LBRY blockchain.
* `scribe-hub` ([scribe.hub.service](https://github.com/lbryio/scribe/tree/master/scribe/hub/service.py)) - an electrum server for thin-wallet clients (such as [lbry-sdk](https://github.com/lbryio/lbry-sdk)), provides an api for clients to use thin simple-payment-verification (spv) wallets and to resolve and search claims published to the LBRY blockchain.
* `scribe-elastic-sync` ([scribe.elasticsearch.service](https://github.com/lbryio/scribe/tree/master/scribe/elasticsearch/service.py)) - a utility to maintain an elasticsearch database of metadata for claims in the LBRY blockchain
This repo provides a python library, `hub`, for building services that use the processed data from the [LBRY blockchain](https://github.com/lbryio/lbrycrd) in an ongoing manner. Hub contains a set of three core executable services that are used together:
* `scribe` ([hub.scribe.service](https://github.com/lbryio/hub/tree/master/hub/service.py)) - maintains a [rocksdb](https://github.com/lbryio/lbry-rocksdb) database containing the LBRY blockchain.
* `herald` ([hub.herald.service](https://github.com/lbryio/hub/tree/master/hub/herald/service.py)) - an electrum server for thin-wallet clients (such as [lbry-sdk](https://github.com/lbryio/lbry-sdk)), provides an api for clients to use thin simple-payment-verification (spv) wallets and to resolve and search claims published to the LBRY blockchain. A drop in replacement port of herald written in go - [herald.go](https://github.com/lbryio/herald.go) is currently being worked on.
* `scribe-elastic-sync` ([hub.elastic_sync.service](https://github.com/lbryio/hub/tree/master/hub/elastic_sync/service.py)) - a utility to maintain an elasticsearch database of metadata for claims in the LBRY blockchain
Features and overview of scribe as a python library:
![](https://raw.githubusercontent.com/lbryio/hub/master/docs/diagram.png)
Features and overview of `hub` as a python library:
* Uses Python 3.7-3.9 (3.10 probably works but hasn't yet been tested)
* An interface developers may implement in order to build their own applications able to receive up-to-date blockchain data in an ongoing manner ([scribe.service.BlockchainReaderService](https://github.com/lbryio/scribe/tree/master/scribe/service.py))
* Protobuf schema for encoding and decoding metadata stored on the blockchain ([scribe.schema](https://github.com/lbryio/scribe/tree/master/scribe/schema))
* [Rocksdb 6.25.3](https://github.com/lbryio/lbry-rocksdb/) based database containing the blockchain data ([scribe.db](https://github.com/lbryio/scribe/tree/master/scribe/db))
* [A community driven performant trending algorithm](https://raw.githubusercontent.com/lbryio/scribe/master/scribe/elasticsearch/trending%20algorithm.pdf) for searching claims ([code](https://github.com/lbryio/scribe/blob/master/scribe/elasticsearch/fast_ar_trending.py))
* An interface developers may implement in order to build their own applications able to receive up-to-date blockchain data in an ongoing manner ([hub.service.BlockchainReaderService](https://github.com/lbryio/hub/tree/master/hub/service.py))
* Protobuf schema for encoding and decoding metadata stored on the blockchain ([hub.schema](https://github.com/lbryio/hub/tree/master/hub/schema))
* [Rocksdb 6.25.3](https://github.com/lbryio/lbry-rocksdb/) based database containing the blockchain data ([hub.db](https://github.com/lbryio/hub/tree/master/hub/db))
* [A community driven performant trending algorithm](https://raw.githubusercontent.com/lbryio/hub/master/docs/trending%20algorithm.pdf) for searching claims ([code](https://github.com/lbryio/hub/blob/master/hub/elastic_sync/fast_ar_trending.py))
## Installation
Scribe may be run from source, a binary, or a docker image.
Our [releases page](https://github.com/lbryio/scribe/releases) contains pre-built binaries of the latest release, pre-releases, and past releases for macOS and Debian-based Linux.
Prebuilt [docker images](https://hub.docker.com/r/lbry/scribe/latest-release) are also available.
Our [releases page](https://github.com/lbryio/hub/releases) contains pre-built binaries of the latest release, pre-releases, and past releases for macOS and Debian-based Linux.
Prebuilt [docker images](https://hub.docker.com/r/lbry/hub/tags) are also available.
### Prebuilt docker image
`docker pull lbry/scribe:latest-release`
`docker pull lbry/hub:master`
### Build your own docker image
```
git clone https://github.com/lbryio/scribe.git
cd scribe
docker build -f ./docker/Dockerfile.scribe -t lbry/scribe:development .
git clone https://github.com/lbryio/hub.git
cd hub
docker build -t lbry/hub:development .
```
### Install from source
Scribe has been tested with python 3.7-3.9. Higher versions probably work but have not yet been tested.
1. clone the scribe scribe
1. clone the scribe repo
```
git clone https://github.com/lbryio/scribe.git
cd scribe
git clone https://github.com/lbryio/hub.git
cd hub
```
2. make a virtual env
```
python3.9 -m venv scribe-venv
python3.9 -m venv hub-venv
```
3. from the virtual env, install scribe
```
source scribe-venv/bin/activate
source hub-venv/bin/activate
pip install -e .
```
That completes the installation, now you should have the commands `scribe`, `scribe-elastic-sync` and `herald`
These can also optionally be run with `python -m hub.scribe`, `python -m hub.elastic_sync`, and `python -m hub.herald`
## Usage
Scribe needs either the [lbrycrd](https://github.com/lbryio/lbrycrd) or [lbcd](https://github.com/lbryio/lbcd) blockchain daemon to be running.
### Requirements
As of block 1124663 (3/10/22) the size of the rocksdb database is 87GB and the size of the elasticsearch volume is 49GB.
Scribe needs elasticsearch and either the [lbrycrd](https://github.com/lbryio/lbrycrd) or [lbcd](https://github.com/lbryio/lbcd) blockchain daemon to be running.
With options for high performance, if you have 64gb of memory and 12 cores, everything can be run on the same machine. However, the recommended way is with elasticsearch on one instance with 8gb of memory and at least 4 cores dedicated to it and the blockchain daemon on another with 16gb of memory and at least 4 cores. Then the scribe hub services can be run their own instance with between 16 and 32gb of memory (depending on settings) and 8 cores.
As of block 1147423 (4/21/22) the size of the scribe rocksdb database is 120GB and the size of the elasticsearch volume is 63GB.
### docker-compose
The recommended way to run a scribe hub is with docker. See [this guide](https://github.com/lbryio/hub/blob/master/docs/cluster_guide.md) for instructions.
If you have the resources to run all of the services on one machine (at least 300gb of fast storage, preferably nvme, 64gb of RAM, 12 fast cores), see [this](https://github.com/lbryio/hub/blob/master/docs/docker_examples/docker-compose.yml) docker-compose example.
### From source
To start scribe, run the following (providing your own args)
### Options
```
scribe --db_dir /your/db/path --daemon_url rpcuser:rpcpass@localhost:9245
```
#### Content blocking and filtering
For various reasons it may be desirable to block or filtering content from claim search and resolve results, [here](https://github.com/lbryio/hub/blob/master/docs/blocking.md) are instructions for how to configure and use this feature as well as information about the recommended defaults.
#### Common options across `scribe`, `herald`, and `scribe-elastic-sync`:
- `--db_dir` (required) Path of the directory containing lbry-rocksdb, set from the environment with `DB_DIRECTORY`
- `--daemon_url` (required for `scribe` and `herald`) URL for rpc from lbrycrd or lbcd<rpcuser>:<rpcpassword>@<lbrycrd rpc ip><lbrycrd rpc port>.
- `--reorg_limit` Max reorg depth, defaults to 200, set from the environment with `REORG_LIMIT`.
- `--chain` With blockchain to use - either `mainnet`, `testnet`, or `regtest` - set from the environment with `NET`
- `--max_query_workers` Size of the thread pool, set from the environment with `MAX_QUERY_WORKERS`
- `--cache_all_tx_hashes` If this flag is set, all tx hashes will be stored in memory. For `scribe`, this speeds up the rate it can apply blocks as well as process mempool. For `herald`, this will speed up syncing address histories. This setting will use 10+g of memory. It can be set from the environment with `CACHE_ALL_TX_HASHES=Yes`
- `--cache_all_claim_txos` If this flag is set, all claim txos will be indexed in memory. Set from the environment with `CACHE_ALL_CLAIM_TXOS=Yes`
- `--prometheus_port` If provided this port will be used to provide prometheus metrics, set from the environment with `PROMETHEUS_PORT`
#### Options for `scribe`
- `--db_max_open_files` This setting translates into the max_open_files option given to rocksdb. A higher number will use more memory. Defaults to 64.
- `--address_history_cache_size` The count of items in the address history cache used for processing blocks and mempool updates. A higher number will use more memory, shouldn't ever need to be higher than 10000. Defaults to 1000.
- `--index_address_statuses` Maintain an index of the statuses of address transaction histories, this makes handling notifications for transactions in a block uniformly fast at the expense of more time to process new blocks and somewhat more disk space (~10gb as of block 1161417).
#### Options for `scribe-elastic-sync`
- `--reindex` If this flag is set drop and rebuild the elasticsearch index.
#### Options for `herald`
- `--host` Interface for server to listen on, use 0.0.0.0 to listen on the external interface. Can be set from the environment with `HOST`
- `--tcp_port` Electrum TCP port to listen on for hub server. Can be set from the environment with `TCP_PORT`
- `--udp_port` UDP port to listen on for hub server. Can be set from the environment with `UDP_PORT`
- `--elastic_services` Comma separated list of items in the format `elastic_host:elastic_port/notifier_host:notifier_port`. Can be set from the environment with `ELASTIC_SERVICES`
- `--query_timeout_ms` Timeout for claim searches in elasticsearch in milliseconds. Can be set from the environment with `QUERY_TIMEOUT_MS`
- `--blocking_channel_ids` Space separated list of channel claim ids used for blocking. Claims that are reposted by these channels can't be resolved or returned in search results. Can be set from the environment with `BLOCKING_CHANNEL_IDS`.
- `--filtering_channel_ids` Space separated list of channel claim ids used for blocking. Claims that are reposted by these channels aren't returned in search results. Can be set from the environment with `FILTERING_CHANNEL_IDS`
- `--index_address_statuses` Use the address history status index, this makes handling notifications for transactions in a block uniformly fast (must be turned on in `scribe` too).
## Contributing

Binary file not shown.

Before

Width:  |  Height:  |  Size: 142 KiB

View file

@ -1,80 +0,0 @@
version: "3"
volumes:
lbry_rocksdb:
es01:
services:
scribe:
depends_on:
- scribe_elastic_sync
image: lbry/scribe:${SCRIBE_TAG:-latest-release}
restart: always
network_mode: host
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe
- DAEMON_URL=http://lbry:lbry@127.0.0.1:9245
- MAX_QUERY_WORKERS=2
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6
scribe_elastic_sync:
depends_on:
- es01
image: lbry/scribe:${SCRIBE_TAG:-latest-release}
restart: always
network_mode: host
ports:
- "127.0.0.1:19080:19080" # elastic notifier port
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe-elastic-sync
- MAX_QUERY_WORKERS=2
- ELASTIC_HOST=127.0.0.1
- ELASTIC_PORT=9200
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6
scribe_hub:
depends_on:
- scribe_elastic_sync
- scribe
image: lbry/scribe:${SCRIBE_TAG:-latest-release}
restart: always
network_mode: host
ports:
- "50001:50001" # electrum rpc port and udp ping port
- "2112:2112" # comment out to disable prometheus
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe-hub
- DAEMON_URL=http://lbry:lbry@127.0.0.1:9245 # used for broadcasting transactions
- MAX_QUERY_WORKERS=4 # reader threads
- MAX_SESSIONS=100000
- ELASTIC_HOST=127.0.0.1
- ELASTIC_PORT=9200
- HOST=0.0.0.0
- PROMETHEUS_PORT=2112
- TCP_PORT=50001
- ALLOW_LAN_UDP=No
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.0
container_name: es01
environment:
- node.name=es01
- discovery.type=single-node
- indices.query.bool.max_clause_count=8192
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Dlog4j2.formatMsgNoLookups=true -Xms8g -Xmx8g" # no more than 32, remember to disable swap
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- "es01:/usr/share/elasticsearch/data"
ports:
- "127.0.0.1:9200:9200"

View file

@ -1,7 +0,0 @@
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd "$DIR/../.." ## make sure we're in the right place. Docker Hub screws this up sometimes
echo "docker build dir: $(pwd)"
docker build --build-arg DOCKER_TAG=$DOCKER_TAG --build-arg DOCKER_COMMIT=$SOURCE_COMMIT -f $DOCKERFILE_PATH -t $IMAGE_NAME .

View file

@ -1,17 +0,0 @@
#!/bin/bash
# entrypoint for scribe Docker image
set -euo pipefail
if [ -z "$HUB_COMMAND" ]; then
echo "HUB_COMMAND env variable must be scribe, scribe-hub, or scribe-elastic-sync"
exit 1
fi
case "$HUB_COMMAND" in
scribe ) exec /home/lbry/.local/bin/scribe "$@" ;;
scribe-hub ) exec /home/lbry/.local/bin/scribe-hub "$@" ;;
scribe-elastic-sync ) exec /home/lbry/.local/bin/scribe-elastic-sync ;;
* ) "HUB_COMMAND env variable must be scribe, scribe-hub, or scribe-elastic-sync" && exit 1 ;;
esac

23
docs/blocking.md Normal file
View file

@ -0,0 +1,23 @@
### Claim filtering and blocking
- Filtered claims are removed from claim search results (`blockchain.claimtrie.search`), they can still be resolved (`blockchain.claimtrie.resolve`)
- Blocked claims are not included in claim search results and cannot be resolved.
Claims that are either filtered or blocked are replaced with a corresponding error message that includes the censoring channel id in a result that would return them.
#### How to filter or block claims:
1. Make a channel (using lbry-sdk) and include the claim id of the channel in `--filtering_channel_ids` or `--blocking_channel_ids` used by `scribe-hub` **and** `scribe-elastic-sync`, depending on which you want to use the channel for. To use both blocking and filtering, make one channel for each.
2. Using lbry-sdk, repost the claim to be blocked or filtered using your corresponding channel. If you block/filter a claim id for a channel, it will block/filter all of the claims in the channel.
#### Defaults
The example docker-composes in the setup guide use the following defaults:
Filtering:
- `lbry://@LBRY-TagAbuse#770bd7ecba84fd2f7607fb15aedd2b172c2e153f`
- `lbry://@LBRY-UntaggedPorn#95e5db68a3101df19763f3a5182e4b12ba393ee8`
Blocking
- `lbry://@LBRY-DMCA#dd687b357950f6f271999971f43c785e8067c3a9`
- `lbry://@LBRY-DMCARedFlag#06871aa438032244202840ec59a469b303257cad`
- `lbry://@LBRY-OtherUSIllegal#b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6`

201
docs/cluster_guide.md Normal file
View file

@ -0,0 +1,201 @@
## Cluster environment guide
For best performance the recommended setup uses three server instances, these can be rented VPSs, self hosted VMs (ideally not on one physical host unless the host is sufficiently powerful), or physical computers. One is a dedicated lbcd node, one an elasticsearch server, and the third runs the hub services (scribe, herald, and scribe-elastic-sync). With this configuration the lbcd and elasticsearch servers can be shared between multiple herald servers - more on that later.
Server Requirements (space requirements are at least double what's needed so it's possible to copy snapshots into place or make snapshots):
- lbcd: 2 cores, 8gb ram (slightly more may be required syncing from scratch, from a snapshot 8 is plenty), 150gb of NVMe storage
- elasticsearch: 8 cores, 9gb of ram (8gb minimum given to ES), 150gb of SSD speed storage
- hub: 8 cores, 32gb of ram, 200gb of NVMe storage
All servers are assumed to be running ubuntu 20.04 with user named `lbry` with passwordless sudo and docker group permissions, ssh configured, ulimits set high (in `/etc/security/limits.conf`, also see [this](https://unix.stackexchange.com/questions/366352/etc-security-limits-conf-not-applied/370652#370652) if the ulimit won't apply), and docker + docker-compose installed. The server running elasticsearch should have swap disabled. The three servers need to be able to communicate with each other, they can be on a local network together or communicate over the internet. This guide will assume the three servers are on the internet.
### Setting up the lbcd instance
Log in to the lbcd instance and perform the following steps:
- Build the lbcd docker image by running
```
git clone https://github.com/lbryio/lbcd.git
cd lbcd
docker build . -t lbry/lbcd:latest
```
- Copy the following to `~/docker-compose.yml`
```
version: "3"
volumes:
lbcd:
services:
lbcd:
image: lbry/lbcd:latest
restart: always
network_mode: host
command:
- "--notls"
- "--rpcuser=lbry"
- "--rpcpass=lbry"
- "--rpclisten=127.0.0.1"
volumes:
- "lbcd:/root/.lbcd"
ports:
- "127.0.0.1:9245:9245"
- "9246:9246" # p2p port
```
- Start lbcd by running `docker-compose up -d`
- Check the progress with `docker-compose logs -f --tail 100`
### Setting up the elasticsearch instance
Log in to the elasticsearch instance and perform the following steps:
- Copy the following to `~/docker-compose.yml`
```
version: "3"
volumes:
es01:
services:
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.0
container_name: es01
environment:
- node.name=es01
- discovery.type=single-node
- indices.query.bool.max_clause_count=8192
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Dlog4j2.formatMsgNoLookups=true -Xms8g -Xmx8g" # no more than 32, remember to disable swap
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- "es01:/usr/share/elasticsearch/data"
ports:
- "127.0.0.1:9200:9200"
```
- Start elasticsearch by running `docker-compose up -d`
- Check the status with `docker-compose logs -f --tail 100`
### Setting up the hub instance
- Log in (ssh) to the hub instance and generate and print out a ssh key, this is needed to set up port forwards to the other two instances. Copy the output of the following:
```
ssh-keygen -q -t ed25519 -N '' -f ~/.ssh/id_ed25519 <<<y >/dev/null 2>&1
```
- After copying the above key, log out of the hub instance.
- Log in to the elasticsearch instance add the copied key to `~/.ssh/authorized_keys` (see [this](https://stackoverflow.com/questions/6377009/adding-a-public-key-to-ssh-authorized-keys-does-not-log-me-in-automatically) if confused). Log out of the elasticsearch instance once done.
- Log in to the lbcd instance and add the copied key to `~/.ssh/authorized_keys`, log out when done.
- Log in to the hub instance and copy the following to `/etc/systemd/system/es-tunnel.service`, replacing `lbry` with your user and `your-elastic-ip` with your elasticsearch instance ip.
```
[Unit]
Description=Persistent SSH Tunnel for ES
After=network.target
[Service]
Restart=on-failure
RestartSec=5
ExecStart=/usr/bin/ssh -NTC -o ServerAliveInterval=60 -o ExitOnForwardFailure=yes -L 127.0.0.1:9200:127.0.0.1:9200 lbry@your-elastic-ip
User=lbry
Group=lbry
[Install]
WantedBy=multi-user.target
```
- Next, copy the following to `/etc/systemd/system/lbcd-tunnel.service` on the hub instance, replacing `lbry` with your user and `your-lbcd-ip` with your lbcd instance ip.
```
[Unit]
Description=Persistent SSH Tunnel for lbcd
After=network.target
[Service]
Restart=on-failure
RestartSec=5
ExecStart=/usr/bin/ssh -NTC -o ServerAliveInterval=60 -o ExitOnForwardFailure=yes -L 127.0.0.1:9245:127.0.0.1:9245 lbry@your-lbcd-ip
User=lbry
Group=lbry
[Install]
WantedBy=multi-user.target
```
- Verify you can ssh in to the elasticsearch and lbcd instances from the hub instance
- Enable and start the ssh port forward services on the hub instance
```
sudo systemctl enable es-tunnel.service
sudo systemctl enable lbcd-tunnel.service
sudo systemctl start es-tunnel.service
sudo systemctl start lbcd-tunnel.service
```
- Build the hub docker image on the hub instance by running the following:
```
git clone https://github.com/lbryio/hub.git
cd hub
docker build -t lbry/hub:development .
```
- Copy the following to `~/docker-compose.yml` on the hub instance
```
version: "3"
volumes:
lbry_rocksdb:
services:
scribe:
depends_on:
- scribe_elastic_sync
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe
- SNAPSHOT_URL=https://snapshots.lbry.com/hub/lbry-rocksdb.zip
command:
- "--daemon_url=http://lbry:lbry@127.0.0.1:9245"
- "--max_query_workers=2"
- "--cache_all_tx_hashes"
- "--index_address_statuses"
scribe_elastic_sync:
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
ports:
- "127.0.0.1:19080:19080" # elastic notifier port
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe-elastic-sync
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6
command:
- "--elastic_host=127.0.0.1"
- "--elastic_port=9200"
- "--max_query_workers=2"
herald:
depends_on:
- scribe_elastic_sync
- scribe
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
ports:
- "50001:50001" # electrum rpc port and udp ping port
- "2112:2112" # comment out to disable prometheus metrics
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=herald
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6
command:
- "--index_address_statuses"
- "--daemon_url=http://lbry:lbry@127.0.0.1:9245"
- "--elastic_host=127.0.0.1"
- "--elastic_port=9200"
- "--max_query_workers=4"
- "--host=0.0.0.0"
- "--max_sessions=100000"
- "--prometheus_port=2112" # comment out to disable prometheus metrics
```
- Start the hub services by running `docker-compose up -d`
- Check the status with `docker-compose logs -f --tail 100`
### Manual setup of docker volumes from snapshots
For an example of copying and configuring permissions for a hub docker volume, see [this](https://github.com/lbryio/hub/blob/master/scripts/initialize_rocksdb_snapshot_dev.sh). For an example for the elasticsearch volume, see [this](https://github.com/lbryio/hub/blob/master/scripts/initialize_es_snapshot_dev.sh). **Read these scripts before running them** to avoid overwriting the wrong volume, they are more of a guide on how to set the permissions and where files go than setup scripts.

BIN
docs/diagram.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

View file

@ -0,0 +1,99 @@
version: "3"
volumes:
lbcd:
lbry_rocksdb:
es01:
services:
scribe:
depends_on:
- lbcd
- scribe_elastic_sync
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe
- SNAPSHOT_URL=https://snapshots.lbry.com/hub/block_1312050/lbry-rocksdb.tar
command: # for full options, see `scribe --help`
- "--daemon_url=http://lbry:lbry@127.0.0.1:9245"
- "--max_query_workers=2"
- "--index_address_statuses"
scribe_elastic_sync:
depends_on:
- es01
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
ports:
- "127.0.0.1:19080:19080" # elastic notifier port
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe-elastic-sync
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8 d4612c256a44fc025c37a875751415299b1f8220
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6 145265bd234b7c9c28dfc6857d878cca402dda94 22335fbb132eee86d374b613875bf88bec83492f f665b89b999f411aa5def311bb2eb385778d49c8
command: # for full options, see `scribe-elastic-sync --help`
- "--max_query_workers=2"
- "--elastic_host=127.0.0.1" # elasticsearch host
- "--elastic_port=9200" # elasticsearch port
herald:
depends_on:
- lbcd
- scribe_elastic_sync
- scribe
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
ports:
- "50001:50001" # electrum rpc port and udp ping port
- "2112:2112" # comment out to disable prometheus metrics
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=herald
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8 d4612c256a44fc025c37a875751415299b1f8220
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6 145265bd234b7c9c28dfc6857d878cca402dda94 22335fbb132eee86d374b613875bf88bec83492f f665b89b999f411aa5def311bb2eb385778d49c8
command: # for full options, see `herald --help`
- "--index_address_statuses"
- "--daemon_url=http://lbry:lbry@127.0.0.1:9245"
- "--max_query_workers=4"
- "--host=0.0.0.0"
- "--elastic_services=127.0.0.1:9200/127.0.0.1:19080"
- "--prometheus_port=2112" # comment out to disable prometheus metrics
# - "--max_sessions=100000 # uncomment to increase the maximum number of electrum connections, defaults to 1000
# - "--allow_lan_udp" # uncomment to reply to clients on the local network
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.0
container_name: es01
environment:
- node.name=es01
- discovery.type=single-node
- indices.query.bool.max_clause_count=8192
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Dlog4j2.formatMsgNoLookups=true -Xms8g -Xmx8g" # no more than 32, remember to disable swap
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- "es01:/usr/share/elasticsearch/data"
ports:
- "127.0.0.1:9200:9200"
lbcd:
image: lbry/lbcd:latest
restart: always
network_mode: host
command:
- "--notls"
- "--listen=0.0.0.0:9246"
- "--rpclisten=127.0.0.1:9245"
- "--rpcuser=lbry"
- "--rpcpass=lbry"
volumes:
- "lbcd:/root/.lbcd"
ports:
- "9246:9246" # p2p

View file

@ -0,0 +1,23 @@
version: "3"
volumes:
es01:
services:
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.0
container_name: es01
environment:
- node.name=es01
- discovery.type=single-node
- indices.query.bool.max_clause_count=8192
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Dlog4j2.formatMsgNoLookups=true -Xms8g -Xmx8g" # no more than 32, remember to disable swap
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- "es01:/usr/share/elasticsearch/data"
ports:
- "127.0.0.1:9200:9200"

View file

@ -0,0 +1,59 @@
version: "3"
volumes:
lbry_rocksdb:
services:
scribe:
depends_on:
- scribe_elastic_sync
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe
- SNAPSHOT_URL=https://snapshots.lbry.com/hub/block_1312050/lbry-rocksdb.tar
command:
- "--daemon_url=http://lbry:lbry@127.0.0.1:9245"
- "--max_query_workers=2"
scribe_elastic_sync:
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
ports:
- "127.0.0.1:19080:19080" # elastic notifier port
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=scribe-elastic-sync
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8 d4612c256a44fc025c37a875751415299b1f8220
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6 145265bd234b7c9c28dfc6857d878cca402dda94 22335fbb132eee86d374b613875bf88bec83492f f665b89b999f411aa5def311bb2eb385778d49c8
command:
- "--elastic_host=127.0.0.1"
- "--elastic_port=9200"
- "--max_query_workers=2"
herald:
depends_on:
- scribe_elastic_sync
- scribe
image: lbry/hub:${SCRIBE_TAG:-master}
restart: always
network_mode: host
ports:
- "50001:50001" # electrum rpc port and udp ping port
- "2112:2112" # comment out to disable prometheus metrics
volumes:
- "lbry_rocksdb:/database"
environment:
- HUB_COMMAND=herald
- FILTERING_CHANNEL_IDS=770bd7ecba84fd2f7607fb15aedd2b172c2e153f 95e5db68a3101df19763f3a5182e4b12ba393ee8 d4612c256a44fc025c37a875751415299b1f8220
- BLOCKING_CHANNEL_IDS=dd687b357950f6f271999971f43c785e8067c3a9 06871aa438032244202840ec59a469b303257cad b4a2528f436eca1bf3bf3e10ff3f98c57bd6c4c6 145265bd234b7c9c28dfc6857d878cca402dda94 22335fbb132eee86d374b613875bf88bec83492f f665b89b999f411aa5def311bb2eb385778d49c8
command:
- "--daemon_url=http://lbry:lbry@127.0.0.1:9245"
- "--elastic_services=127.0.0.1:9200/127.0.0.1:19080"
- "--max_query_workers=4"
- "--host=0.0.0.0"
- "--max_sessions=100000"
- "--prometheus_port=2112" # comment out to disable prometheus metrics

View file

@ -0,0 +1,19 @@
version: "3"
volumes:
lbcd:
services:
lbcd:
image: lbry/lbcd:latest
restart: always
network_mode: host
command:
- "--rpcuser=lbry"
- "--rpcpass=lbry"
- "--rpclisten=127.0.0.1"
volumes:
- "lbcd:/root/.lbcd"
ports:
- "127.0.0.1:9245:9245"
- "9246:9246" # p2p port

1101
hub/common.py Normal file

File diff suppressed because it is too large Load diff

1
hub/db/__init__.py Normal file
View file

@ -0,0 +1 @@
from .db import SecondaryDB

View file

@ -1,7 +1,7 @@
import typing
import enum
from typing import Optional
from scribe.error import ResolveCensoredError
from hub.error import ResolveCensoredError
@enum.unique
@ -16,7 +16,7 @@ class DB_PREFIXES(enum.Enum):
channel_to_claim = b'J'
claim_short_id_prefix = b'F'
effective_amount = b'D'
bid_order = b'D'
claim_expiration = b'O'
claim_takeover = b'P'
@ -48,27 +48,15 @@ class DB_PREFIXES(enum.Enum):
touched_hashX = b'e'
hashX_status = b'f'
hashX_mempool_status = b'g'
reposted_count = b'j'
effective_amount = b'i'
future_effective_amount = b'k'
hashX_history_hash = b'l'
COLUMN_SETTINGS = {} # this is updated by the PrefixRow metaclass
CLAIM_TYPES = {
'stream': 1,
'channel': 2,
'repost': 3,
'collection': 4,
}
STREAM_TYPES = {
'video': 1,
'audio': 2,
'image': 3,
'document': 4,
'binary': 5,
'model': 6,
}
# 9/21/2020
MOST_USED_TAGS = {
"gaming",

1432
hub/db/db.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,9 +1,10 @@
import asyncio
import struct
import typing
import rocksdb
from typing import Optional
from scribe.db.common import DB_PREFIXES, COLUMN_SETTINGS
from scribe.db.revertable import RevertableOpStack, RevertablePut, RevertableDelete
from hub.db.common import DB_PREFIXES, COLUMN_SETTINGS
from hub.db.revertable import RevertableOpStack, RevertablePut, RevertableDelete
ROW_TYPES = {}
@ -88,6 +89,12 @@ class PrefixRow(metaclass=PrefixRowType):
if v:
return v if not deserialize_value else self.unpack_value(v)
def key_exists(self, *key_args):
key_may_exist, _ = self._db.key_may_exist((self._column_family, self.pack_key(*key_args)))
if not key_may_exist:
return False
return self._db.get((self._column_family, self.pack_key(*key_args)), fill_cache=True) is not None
def multi_get(self, key_args: typing.List[typing.Tuple], fill_cache=True, deserialize_value=True):
packed_keys = {tuple(args): self.pack_key(*args) for args in key_args}
db_result = self._db.multi_get([(self._column_family, packed_keys[tuple(args)]) for args in key_args],
@ -101,23 +108,44 @@ class PrefixRow(metaclass=PrefixRowType):
handle_value(result[packed_keys[tuple(k_args)]]) for k_args in key_args
]
async def multi_get_async_gen(self, executor, key_args: typing.List[typing.Tuple], deserialize_value=True, step=1000):
packed_keys = {self.pack_key(*args): args for args in key_args}
assert len(packed_keys) == len(key_args), 'duplicate partial keys given to multi_get_dict'
db_result = await asyncio.get_event_loop().run_in_executor(
executor, self._db.multi_get, [(self._column_family, key) for key in packed_keys]
)
unpack_value = self.unpack_value
def handle_value(v):
return None if v is None else v if not deserialize_value else unpack_value(v)
for idx, (k, v) in enumerate((db_result or {}).items()):
yield (packed_keys[k[-1]], handle_value(v))
if idx % step == 0:
await asyncio.sleep(0)
def stash_multi_put(self, items):
self._op_stack.stash_ops([RevertablePut(self.pack_key(*k), self.pack_value(*v)) for k, v in items])
def stash_multi_delete(self, items):
self._op_stack.stash_ops([RevertableDelete(self.pack_key(*k), self.pack_value(*v)) for k, v in items])
def get_pending(self, *key_args, fill_cache=True, deserialize_value=True):
packed_key = self.pack_key(*key_args)
last_op = self._op_stack.get_last_op_for_key(packed_key)
if last_op:
if last_op.is_put:
return last_op.value if not deserialize_value else self.unpack_value(last_op.value)
else: # it's a delete
return
v = self._db.get((self._column_family, packed_key), fill_cache=fill_cache)
if v:
return v if not deserialize_value else self.unpack_value(v)
pending_op = self._op_stack.get_pending_op(packed_key)
if pending_op and pending_op.is_delete:
return
if pending_op:
v = pending_op.value
else:
v = self._db.get((self._column_family, packed_key), fill_cache=fill_cache)
return None if v is None else (v if not deserialize_value else self.unpack_value(v))
def stage_put(self, key_args=(), value_args=()):
self._op_stack.append_op(RevertablePut(self.pack_key(*key_args), self.pack_value(*value_args)))
def stash_put(self, key_args=(), value_args=()):
self._op_stack.stash_ops([RevertablePut(self.pack_key(*key_args), self.pack_value(*value_args))])
def stage_delete(self, key_args=(), value_args=()):
self._op_stack.append_op(RevertableDelete(self.pack_key(*key_args), self.pack_value(*value_args)))
def stash_delete(self, key_args=(), value_args=()):
self._op_stack.stash_ops([RevertableDelete(self.pack_key(*key_args), self.pack_value(*value_args))])
@classmethod
def pack_partial_key(cls, *args) -> bytes:
@ -155,13 +183,14 @@ class BasePrefixDB:
UNDO_KEY_STRUCT = struct.Struct(b'>Q32s')
PARTIAL_UNDO_KEY_STRUCT = struct.Struct(b'>Q')
def __init__(self, path, max_open_files=64, secondary_path='', max_undo_depth: int = 200, unsafe_prefixes=None):
def __init__(self, path, max_open_files=64, secondary_path='', max_undo_depth: int = 200, unsafe_prefixes=None,
enforce_integrity=True):
column_family_options = {}
for prefix in DB_PREFIXES:
settings = COLUMN_SETTINGS[prefix.value]
column_family_options[prefix.value] = rocksdb.ColumnFamilyOptions()
column_family_options[prefix.value].table_factory = rocksdb.BlockBasedTableFactory(
block_cache=rocksdb.LRUCache(settings['cache_size']),
block_cache=rocksdb.LRUCache(settings['cache_size'])
)
self.column_families: typing.Dict[bytes, 'rocksdb.ColumnFamilyHandle'] = {}
options = rocksdb.Options(
@ -178,7 +207,9 @@ class BasePrefixDB:
cf = self._db.get_column_family(prefix.value)
self.column_families[prefix.value] = cf
self._op_stack = RevertableOpStack(self.get, unsafe_prefixes=unsafe_prefixes)
self._op_stack = RevertableOpStack(
self.get, self.multi_get, unsafe_prefixes=unsafe_prefixes, enforce_integrity=enforce_integrity
)
self._max_undo_depth = max_undo_depth
def unsafe_commit(self):
@ -186,6 +217,7 @@ class BasePrefixDB:
Write staged changes to the database without keeping undo information
Changes written cannot be undone
"""
self.apply_stash()
try:
if not len(self._op_stack):
return
@ -206,6 +238,7 @@ class BasePrefixDB:
"""
Write changes for a block height to the database and keep undo information so that the changes can be reverted
"""
self.apply_stash()
undo_ops = self._op_stack.get_undo_ops()
delete_undos = []
if height > self._max_undo_depth:
@ -240,6 +273,7 @@ class BasePrefixDB:
undo_c_f = self.column_families[DB_PREFIXES.undo.value]
undo_info = self._db.get((undo_c_f, undo_key))
self._op_stack.apply_packed_undo_ops(undo_info)
self._op_stack.validate_and_apply_stashed_ops()
try:
with self._db.write_batch(sync=True) as batch:
batch_put = batch.put
@ -255,10 +289,26 @@ class BasePrefixDB:
finally:
self._op_stack.clear()
def apply_stash(self):
self._op_stack.validate_and_apply_stashed_ops()
def get(self, key: bytes, fill_cache: bool = True) -> Optional[bytes]:
cf = self.column_families[key[:1]]
return self._db.get((cf, key), fill_cache=fill_cache)
def multi_get(self, keys: typing.List[bytes], fill_cache=True):
if len(keys) == 0:
return []
get_cf = self.column_families.__getitem__
db_result = self._db.multi_get([(get_cf(k[:1]), k) for k in keys], fill_cache=fill_cache)
return list(db_result.values())
def multi_delete(self, items: typing.List[typing.Tuple[bytes, bytes]]):
self._op_stack.stash_ops([RevertableDelete(k, v) for k, v in items])
def multi_put(self, items: typing.List[typing.Tuple[bytes, bytes]]):
self._op_stack.stash_ops([RevertablePut(k, v) for k, v in items])
def iterator(self, start: bytes, column_family: 'rocksdb.ColumnFamilyHandle' = None,
iterate_lower_bound: bytes = None, iterate_upper_bound: bytes = None,
reverse: bool = False, include_key: bool = True, include_value: bool = True,
@ -276,11 +326,11 @@ class BasePrefixDB:
def try_catch_up_with_primary(self):
self._db.try_catch_up_with_primary()
def stage_raw_put(self, key: bytes, value: bytes):
self._op_stack.append_op(RevertablePut(key, value))
def stash_raw_put(self, key: bytes, value: bytes):
self._op_stack.stash_ops([RevertablePut(key, value)])
def stage_raw_delete(self, key: bytes, value: bytes):
self._op_stack.append_op(RevertableDelete(key, value))
def stash_raw_delete(self, key: bytes, value: bytes):
self._op_stack.stash_ops([RevertableDelete(key, value)])
def estimate_num_keys(self, column_family: 'rocksdb.ColumnFamilyHandle' = None):
return int(self._db.get_property(b'rocksdb.estimate-num-keys', column_family).decode())

View file

@ -29,7 +29,7 @@ import typing
from asyncio import Event
from math import ceil, log
from scribe.common import double_sha256
from hub.common import double_sha256
class Merkle:

View file

@ -0,0 +1,67 @@
import logging
from collections import defaultdict
from hub.db.prefixes import ACTIVATED_SUPPORT_TXO_TYPE
FROM_VERSION = 10
TO_VERSION = 11
def migrate(db):
log = logging.getLogger(__name__)
prefix_db = db.prefix_db
log.info("migrating the db to version 11")
effective_amounts = defaultdict(int)
support_amounts = defaultdict(int)
log.info("deleting any existing effective amounts")
to_delete = list(prefix_db.effective_amount.iterate(deserialize_key=False, deserialize_value=False))
while to_delete:
batch, to_delete = to_delete[:100000], to_delete[100000:]
if batch:
prefix_db.multi_delete(batch)
prefix_db.unsafe_commit()
log.info("calculating claim effective amounts for the new index at block %i", db.db_height)
height = db.db_height
cnt = 0
for k, v in prefix_db.active_amount.iterate():
cnt += 1
claim_hash, activation_height, amount = k.claim_hash, k.activation_height, v.amount
if activation_height <= height:
effective_amounts[claim_hash] += amount
if k.txo_type == ACTIVATED_SUPPORT_TXO_TYPE:
support_amounts[claim_hash] += amount
if cnt % 1000000 == 0:
log.info("scanned %i amounts for %i claims", cnt, len(effective_amounts))
log.info("preparing to insert effective amounts")
effective_amounts_to_put = [
prefix_db.effective_amount.pack_item(claim_hash, effective_amount, support_amounts[claim_hash])
for claim_hash, effective_amount in effective_amounts.items()
]
log.info("inserting %i effective amounts", len(effective_amounts_to_put))
cnt = 0
while effective_amounts_to_put:
batch, effective_amounts_to_put = effective_amounts_to_put[:100000], effective_amounts_to_put[100000:]
if batch:
prefix_db.multi_put(batch)
prefix_db.unsafe_commit()
cnt += len(batch)
if cnt % 1000000 == 0:
log.info("inserted effective amounts for %i claims", cnt)
log.info("finished building the effective amount index")
db.db_version = 11
db.write_db_state()
db.prefix_db.unsafe_commit()
log.info("finished migration to version 11")

View file

@ -0,0 +1,57 @@
import logging
from collections import defaultdict
FROM_VERSION = 11
TO_VERSION = 12
def migrate(db):
log = logging.getLogger(__name__)
prefix_db = db.prefix_db
log.info("migrating the db to version 12")
effective_amounts = defaultdict(int)
log.info("deleting any existing future effective amounts")
to_delete = list(prefix_db.future_effective_amount.iterate(deserialize_key=False, deserialize_value=False))
while to_delete:
batch, to_delete = to_delete[:100000], to_delete[100000:]
if batch:
prefix_db.multi_delete(batch)
prefix_db.unsafe_commit()
log.info("calculating future claim effective amounts for the new index at block %i", db.db_height)
cnt = 0
for k, v in prefix_db.active_amount.iterate():
cnt += 1
effective_amounts[k.claim_hash] += v.amount
if cnt % 1000000 == 0:
log.info("scanned %i amounts for %i claims", cnt, len(effective_amounts))
log.info("preparing to insert future effective amounts")
effective_amounts_to_put = [
prefix_db.future_effective_amount.pack_item(claim_hash, effective_amount)
for claim_hash, effective_amount in effective_amounts.items()
]
log.info("inserting %i future effective amounts", len(effective_amounts_to_put))
cnt = 0
while effective_amounts_to_put:
batch, effective_amounts_to_put = effective_amounts_to_put[:100000], effective_amounts_to_put[100000:]
if batch:
prefix_db.multi_put(batch)
prefix_db.unsafe_commit()
cnt += len(batch)
if cnt % 1000000 == 0:
log.info("inserted effective amounts for %i claims", cnt)
log.info("finished building the effective amount index")
db.db_version = 12
db.write_db_state()
db.prefix_db.unsafe_commit()
log.info("finished migration to version 12")

View file

@ -3,9 +3,9 @@ import time
import array
import typing
from bisect import bisect_right
from scribe.common import sha256
from hub.common import sha256
if typing.TYPE_CHECKING:
from scribe.db.db import HubDB
from hub.scribe.db import PrimaryDB
FROM_VERSION = 7
TO_VERSION = 8
@ -35,7 +35,7 @@ def hashX_history(db: 'HubDB', hashX: bytes):
return history, to_delete
def hashX_status_from_history(db: 'HubDB', history: bytes) -> bytes:
def hashX_status_from_history(db: 'PrimaryDB', history: bytes) -> bytes:
tx_counts = db.tx_counts
hist_tx_nums = array.array('I')
hist_tx_nums.frombytes(history)

View file

@ -0,0 +1,26 @@
import logging
FROM_VERSION = 8
TO_VERSION = 9
def migrate(db):
log = logging.getLogger(__name__)
prefix_db = db.prefix_db
index_address_status = db._index_address_status
log.info("migrating the db to version 9")
if not index_address_status:
log.info("deleting the existing address status index")
to_delete = list(prefix_db.hashX_status.iterate(deserialize_key=False, deserialize_value=False))
while to_delete:
batch, to_delete = to_delete[:10000], to_delete[10000:]
if batch:
prefix_db.multi_delete(batch)
prefix_db.unsafe_commit()
db.db_version = 9
db.write_db_state()
db.prefix_db.unsafe_commit()
log.info("finished migration")

View file

@ -0,0 +1,48 @@
import logging
from collections import defaultdict
from hub.db.revertable import RevertablePut
FROM_VERSION = 9
TO_VERSION = 10
def migrate(db):
log = logging.getLogger(__name__)
prefix_db = db.prefix_db
log.info("migrating the db to version 10")
repost_counts = defaultdict(int)
log.info("deleting any existing repost counts")
to_delete = list(prefix_db.reposted_count.iterate(deserialize_key=False, deserialize_value=False))
while to_delete:
batch, to_delete = to_delete[:10000], to_delete[10000:]
if batch:
prefix_db.multi_delete(batch)
prefix_db.unsafe_commit()
log.info("counting reposts to build the new index")
for reposted_claim_hash in prefix_db.repost.iterate(include_key=False, deserialize_value=False):
repost_counts[reposted_claim_hash] += 1
log.info("inserting repost counts")
reposted_counts_to_put = [
prefix_db.reposted_count.pack_item(claim_hash, count)
for claim_hash, count in repost_counts.items()
]
while reposted_counts_to_put:
batch, reposted_counts_to_put = reposted_counts_to_put[:10000], reposted_counts_to_put[10000:]
if batch:
prefix_db.multi_put(batch)
prefix_db.unsafe_commit()
log.info("finished building the repost count index")
db.db_version = 10
db.write_db_state()
db.prefix_db.unsafe_commit()
log.info("finished migration to version 10")

View file

@ -3,9 +3,10 @@ import struct
import array
import base64
from typing import Union, Tuple, NamedTuple, Optional
from scribe.db.common import DB_PREFIXES
from scribe.db.interface import BasePrefixDB, ROW_TYPES, PrefixRow
from scribe.schema.url import normalize_name
from hub.common import ResumableSHA256
from hub.db.common import DB_PREFIXES
from hub.db.interface import BasePrefixDB, ROW_TYPES, PrefixRow
from hub.schema.url import normalize_name
ACTIVATED_CLAIM_TXO_TYPE = 1
ACTIVATED_SUPPORT_TXO_TYPE = 2
@ -58,7 +59,7 @@ class HashXHistoryKey(NamedTuple):
class HashXHistoryValue(NamedTuple):
hashXes: typing.List[int]
tx_nums: typing.List[int]
class BlockHashKey(NamedTuple):
@ -354,14 +355,14 @@ class ActiveAmountValue(typing.NamedTuple):
amount: int
class EffectiveAmountKey(typing.NamedTuple):
class BidOrderKey(typing.NamedTuple):
normalized_name: str
effective_amount: int
tx_num: int
position: int
class EffectiveAmountValue(typing.NamedTuple):
class BidOrderValue(typing.NamedTuple):
claim_hash: bytes
def __str__(self):
@ -420,12 +421,40 @@ class DBState(typing.NamedTuple):
tip: bytes
utxo_flush_count: int
wall_time: int
catching_up: bool
bit_fields: int
db_version: int
hist_flush_count: int
comp_flush_count: int
comp_cursor: int
es_sync_height: int
hashX_status_last_indexed_height: int
@property
def catching_up(self) -> bool:
return self.bit_fields & 1 != 0
@property
def index_address_statuses(self) -> bool:
return self.bit_fields & 2 != 0
@property
def expanded(self):
return (
self.genesis,
self.height,
self.tx_count,
self.tip,
self.utxo_flush_count,
self.wall_time,
self.catching_up,
self.index_address_statuses,
self.db_version,
self.hist_flush_count,
self.comp_flush_count,
self.comp_cursor,
self.es_sync_height,
self.hashX_status_last_indexed_height
)
class ActiveAmountPrefixRow(PrefixRow):
@ -895,8 +924,8 @@ def effective_amount_helper(struct_fmt):
return wrapper
class EffectiveAmountPrefixRow(PrefixRow):
prefix = DB_PREFIXES.effective_amount.value
class BidOrderPrefixRow(PrefixRow):
prefix = DB_PREFIXES.bid_order.value
key_struct = struct.Struct(b'>QLH')
value_struct = struct.Struct(b'>20s')
key_part_lambdas = [
@ -915,16 +944,16 @@ class EffectiveAmountPrefixRow(PrefixRow):
)
@classmethod
def unpack_key(cls, key: bytes) -> EffectiveAmountKey:
def unpack_key(cls, key: bytes) -> BidOrderKey:
assert key[:1] == cls.prefix
name_len = int.from_bytes(key[1:3], byteorder='big')
name = key[3:3 + name_len].decode()
ones_comp_effective_amount, tx_num, position = cls.key_struct.unpack(key[3 + name_len:])
return EffectiveAmountKey(name, 0xffffffffffffffff - ones_comp_effective_amount, tx_num, position)
return BidOrderKey(name, 0xffffffffffffffff - ones_comp_effective_amount, tx_num, position)
@classmethod
def unpack_value(cls, data: bytes) -> EffectiveAmountValue:
return EffectiveAmountValue(*super().unpack_value(data))
def unpack_value(cls, data: bytes) -> BidOrderValue:
return BidOrderValue(*super().unpack_value(data))
@classmethod
def pack_value(cls, claim_hash: bytes) -> bytes:
@ -997,6 +1026,44 @@ class RepostedPrefixRow(PrefixRow):
return cls.pack_key(reposted_claim_hash, tx_num, position), cls.pack_value(claim_hash)
class RepostedCountKey(NamedTuple):
claim_hash: bytes
class RepostedCountValue(NamedTuple):
reposted_count: int
class RepostedCountPrefixRow(PrefixRow):
prefix = DB_PREFIXES.reposted_count.value
key_struct = struct.Struct(b'>20s')
value_struct = struct.Struct(b'>L')
key_part_lambdas = [
lambda: b'',
struct.Struct(b'>20s').pack,
]
@classmethod
def pack_key(cls, claim_hash: bytes):
return super().pack_key(claim_hash)
@classmethod
def unpack_key(cls, key: bytes) -> RepostedCountKey:
return RepostedCountKey(*super().unpack_key(key))
@classmethod
def pack_value(cls, reposted_count: int) -> bytes:
return super().pack_value(reposted_count)
@classmethod
def unpack_value(cls, data: bytes) -> RepostedCountValue:
return RepostedCountValue(*super().unpack_value(data))
@classmethod
def pack_item(cls, claim_hash: bytes, reposted_count: int):
return cls.pack_key(claim_hash), cls.pack_value(reposted_count)
class UndoKey(NamedTuple):
height: int
block_hash: bytes
@ -1104,6 +1171,7 @@ class TXNumPrefixRow(PrefixRow):
lambda: b'',
struct.Struct(b'>32s').pack
]
cache_size = 1024 * 1024 * 64
@classmethod
def pack_key(cls, tx_hash: bytes) -> bytes:
@ -1167,6 +1235,8 @@ class TXHashPrefixRow(PrefixRow):
struct.Struct(b'>L').pack
]
cache_size = 1024 * 1024 * 64
@classmethod
def pack_key(cls, tx_num: int) -> bytes:
return super().pack_key(tx_num)
@ -1196,6 +1266,7 @@ class TXPrefixRow(PrefixRow):
lambda: b'',
struct.Struct(b'>32s').pack
]
cache_size = 1024 * 1024 * 64
@classmethod
def pack_key(cls, tx_hash: bytes) -> bytes:
@ -1222,7 +1293,7 @@ class UTXOPrefixRow(PrefixRow):
prefix = DB_PREFIXES.utxo.value
key_struct = struct.Struct(b'>11sLH')
value_struct = struct.Struct(b'>Q')
cache_size = 1024 * 1024 * 64
key_part_lambdas = [
lambda: b'',
struct.Struct(b'>11s').pack,
@ -1255,7 +1326,7 @@ class HashXUTXOPrefixRow(PrefixRow):
prefix = DB_PREFIXES.hashx_utxo.value
key_struct = struct.Struct(b'>4sLH')
value_struct = struct.Struct(b'>11s')
cache_size = 1024 * 1024 * 64
key_part_lambdas = [
lambda: b'',
struct.Struct(b'>4s').pack,
@ -1420,7 +1491,7 @@ class SupportAmountPrefixRow(PrefixRow):
class DBStatePrefixRow(PrefixRow):
prefix = DB_PREFIXES.db_state.value
value_struct = struct.Struct(b'>32sLL32sLLBBlllL')
value_struct = struct.Struct(b'>32sLL32sLLBBlllLL')
key_struct = struct.Struct(b'')
key_part_lambdas = [
@ -1437,12 +1508,16 @@ class DBStatePrefixRow(PrefixRow):
@classmethod
def pack_value(cls, genesis: bytes, height: int, tx_count: int, tip: bytes, utxo_flush_count: int, wall_time: int,
catching_up: bool, db_version: int, hist_flush_count: int, comp_flush_count: int,
comp_cursor: int, es_sync_height: int) -> bytes:
catching_up: bool, index_address_statuses: bool, db_version: int, hist_flush_count: int,
comp_flush_count: int, comp_cursor: int, es_sync_height: int,
last_indexed_address_statuses: int) -> bytes:
bit_fields = 0
bit_fields |= int(catching_up) << 0
bit_fields |= int(index_address_statuses) << 1
return super().pack_value(
genesis, height, tx_count, tip, utxo_flush_count,
wall_time, 1 if catching_up else 0, db_version, hist_flush_count,
comp_flush_count, comp_cursor, es_sync_height
wall_time, bit_fields, db_version, hist_flush_count,
comp_flush_count, comp_cursor, es_sync_height, last_indexed_address_statuses
)
@classmethod
@ -1451,15 +1526,18 @@ class DBStatePrefixRow(PrefixRow):
# TODO: delete this after making a new snapshot - 10/20/21
# migrate in the es_sync_height if it doesnt exist
data += data[32:36]
if len(data) == 98:
data += data[32:36]
return DBState(*super().unpack_value(data))
@classmethod
def pack_item(cls, genesis: bytes, height: int, tx_count: int, tip: bytes, utxo_flush_count: int, wall_time: int,
catching_up: bool, db_version: int, hist_flush_count: int, comp_flush_count: int,
comp_cursor: int, es_sync_height: int):
catching_up: bool, index_address_statuses: bool, db_version: int, hist_flush_count: int,
comp_flush_count: int, comp_cursor: int, es_sync_height: int, last_indexed_address_statuses: int):
return cls.pack_key(), cls.pack_value(
genesis, height, tx_count, tip, utxo_flush_count, wall_time, catching_up, db_version, hist_flush_count,
comp_flush_count, comp_cursor, es_sync_height
genesis, height, tx_count, tip, utxo_flush_count, wall_time, catching_up, index_address_statuses,
db_version, hist_flush_count, comp_flush_count, comp_cursor, es_sync_height,
last_indexed_address_statuses
)
@ -1693,11 +1771,134 @@ class HashXMempoolStatusPrefixRow(PrefixRow):
return cls.pack_key(hashX), cls.pack_value(status)
class EffectiveAmountKey(NamedTuple):
claim_hash: bytes
class EffectiveAmountValue(NamedTuple):
activated_sum: int
activated_support_sum: int
class EffectiveAmountPrefixRow(PrefixRow):
prefix = DB_PREFIXES.effective_amount.value
key_struct = struct.Struct(b'>20s')
value_struct = struct.Struct(b'>QQ')
cache_size = 1024 * 1024 * 64
key_part_lambdas = [
lambda: b'',
struct.Struct(b'>20s').pack
]
@classmethod
def pack_key(cls, claim_hash: bytes):
return super().pack_key(claim_hash)
@classmethod
def unpack_key(cls, key: bytes) -> EffectiveAmountKey:
return EffectiveAmountKey(*super().unpack_key(key))
@classmethod
def pack_value(cls, effective_amount: int, support_sum: int) -> bytes:
assert effective_amount >= support_sum
return super().pack_value(effective_amount, support_sum)
@classmethod
def unpack_value(cls, data: bytes) -> EffectiveAmountValue:
return EffectiveAmountValue(*cls.value_struct.unpack(data))
@classmethod
def pack_item(cls, claim_hash: bytes, effective_amount: int, support_sum: int):
return cls.pack_key(claim_hash), cls.pack_value(effective_amount, support_sum)
class FutureEffectiveAmountKey(NamedTuple):
claim_hash: bytes
class FutureEffectiveAmountValue(NamedTuple):
future_effective_amount: int
class FutureEffectiveAmountPrefixRow(PrefixRow):
prefix = DB_PREFIXES.future_effective_amount.value
key_struct = struct.Struct(b'>20s')
value_struct = struct.Struct(b'>Q')
cache_size = 1024 * 1024 * 64
key_part_lambdas = [
lambda: b'',
struct.Struct(b'>20s').pack
]
@classmethod
def pack_key(cls, claim_hash: bytes):
return super().pack_key(claim_hash)
@classmethod
def unpack_key(cls, key: bytes) -> FutureEffectiveAmountKey:
return FutureEffectiveAmountKey(*super().unpack_key(key))
@classmethod
def pack_value(cls, future_effective_amount: int) -> bytes:
return super().pack_value(future_effective_amount)
@classmethod
def unpack_value(cls, data: bytes) -> FutureEffectiveAmountValue:
return FutureEffectiveAmountValue(*cls.value_struct.unpack(data))
@classmethod
def pack_item(cls, claim_hash: bytes, future_effective_amount: int):
return cls.pack_key(claim_hash), cls.pack_value(future_effective_amount)
class HashXHistoryHasherKey(NamedTuple):
hashX: bytes
class HashXHistoryHasherValue(NamedTuple):
hasher: ResumableSHA256
class HashXHistoryHasherPrefixRow(PrefixRow):
prefix = DB_PREFIXES.hashX_history_hash.value
key_struct = struct.Struct(b'>11s')
value_struct = struct.Struct(b'>120s')
cache_size = 1024 * 1024 * 64
key_part_lambdas = [
lambda: b'',
struct.Struct(b'>11s').pack
]
@classmethod
def pack_key(cls, hashX: bytes):
return super().pack_key(hashX)
@classmethod
def unpack_key(cls, key: bytes) -> HashXHistoryHasherKey:
return HashXHistoryHasherKey(*super().unpack_key(key))
@classmethod
def pack_value(cls, hasher: ResumableSHA256) -> bytes:
return super().pack_value(hasher.get_state())
@classmethod
def unpack_value(cls, data: bytes) -> HashXHistoryHasherValue:
return HashXHistoryHasherValue(ResumableSHA256(*super().unpack_value(data)))
@classmethod
def pack_item(cls, hashX: bytes, hasher: ResumableSHA256):
return cls.pack_key(hashX), cls.pack_value(hasher)
class PrefixDB(BasePrefixDB):
def __init__(self, path: str, cache_mb: int = 128, reorg_limit: int = 200, max_open_files: int = 64,
secondary_path: str = '', unsafe_prefixes: Optional[typing.Set[bytes]] = None):
def __init__(self, path: str, reorg_limit: int = 200, max_open_files: int = 64,
secondary_path: str = '', unsafe_prefixes: Optional[typing.Set[bytes]] = None,
enforce_integrity: bool = True):
super().__init__(path, max_open_files=max_open_files, secondary_path=secondary_path,
max_undo_depth=reorg_limit, unsafe_prefixes=unsafe_prefixes)
max_undo_depth=reorg_limit, unsafe_prefixes=unsafe_prefixes,
enforce_integrity=enforce_integrity)
db = self._db
self.claim_to_support = ClaimToSupportPrefixRow(db, self._op_stack)
self.support_to_claim = SupportToClaimPrefixRow(db, self._op_stack)
@ -1711,9 +1912,10 @@ class PrefixDB(BasePrefixDB):
self.pending_activation = PendingActivationPrefixRow(db, self._op_stack)
self.activated = ActivatedPrefixRow(db, self._op_stack)
self.active_amount = ActiveAmountPrefixRow(db, self._op_stack)
self.effective_amount = EffectiveAmountPrefixRow(db, self._op_stack)
self.bid_order = BidOrderPrefixRow(db, self._op_stack)
self.repost = RepostPrefixRow(db, self._op_stack)
self.reposted_claim = RepostedPrefixRow(db, self._op_stack)
self.reposted_count = RepostedCountPrefixRow(db, self._op_stack)
self.undo = UndoPrefixRow(db, self._op_stack)
self.utxo = UTXOPrefixRow(db, self._op_stack)
self.hashX_utxo = HashXUTXOPrefixRow(db, self._op_stack)
@ -1734,6 +1936,9 @@ class PrefixDB(BasePrefixDB):
self.touched_hashX = TouchedHashXPrefixRow(db, self._op_stack)
self.hashX_status = HashXStatusPrefixRow(db, self._op_stack)
self.hashX_mempool_status = HashXMempoolStatusPrefixRow(db, self._op_stack)
self.effective_amount = EffectiveAmountPrefixRow(db, self._op_stack)
self.future_effective_amount = FutureEffectiveAmountPrefixRow(db, self._op_stack)
self.hashX_history_hasher = HashXHistoryHasherPrefixRow(db, self._op_stack)
def auto_decode_item(key: bytes, value: bytes) -> Union[Tuple[NamedTuple, NamedTuple], Tuple[bytes, bytes]]:

347
hub/db/revertable.py Normal file
View file

@ -0,0 +1,347 @@
import struct
import logging
from string import printable
from collections import defaultdict, deque
from typing import Tuple, Iterable, Callable, Optional, List, Deque
from hub.db.common import DB_PREFIXES
_OP_STRUCT = struct.Struct('>BLL')
log = logging.getLogger(__name__)
class RevertableOp:
__slots__ = [
'key',
'value',
]
is_put = 0
def __init__(self, key: bytes, value: bytes):
self.key = key
self.value = value
@property
def is_delete(self) -> bool:
return not self.is_put
def invert(self) -> 'RevertableOp':
raise NotImplementedError()
def pack(self) -> bytes:
"""
Serialize to bytes
"""
return struct.pack(
f'>BLL{len(self.key)}s{len(self.value)}s', int(self.is_put), len(self.key), len(self.value), self.key,
self.value
)
@classmethod
def unpack(cls, packed: bytes) -> Tuple['RevertableOp', bytes]:
"""
Deserialize from bytes
:param packed: bytes containing at least one packed revertable op
:return: tuple of the deserialized op (a put or a delete) and the remaining serialized bytes
"""
is_put, key_len, val_len = _OP_STRUCT.unpack(packed[:9])
key = packed[9:9 + key_len]
value = packed[9 + key_len:9 + key_len + val_len]
if is_put == 1:
return RevertablePut(key, value), packed[9 + key_len + val_len:]
return RevertableDelete(key, value), packed[9 + key_len + val_len:]
def __eq__(self, other: 'RevertableOp') -> bool:
return (self.is_put, self.key, self.value) == (other.is_put, other.key, other.value)
def __repr__(self) -> str:
return str(self)
def __str__(self) -> str:
from hub.db.prefixes import auto_decode_item
k, v = auto_decode_item(self.key, self.value)
key = ''.join(c if c in printable else '.' for c in str(k))
val = ''.join(c if c in printable else '.' for c in str(v))
return f"{'PUT' if self.is_put else 'DELETE'} {DB_PREFIXES(self.key[:1]).name}: {key} | {val}"
class RevertableDelete(RevertableOp):
def invert(self):
return RevertablePut(self.key, self.value)
class RevertablePut(RevertableOp):
is_put = True
def invert(self):
return RevertableDelete(self.key, self.value)
class OpStackIntegrity(Exception):
pass
class RevertableOpStack:
def __init__(self, get_fn: Callable[[bytes], Optional[bytes]],
multi_get_fn: Callable[[List[bytes]], Iterable[Optional[bytes]]], unsafe_prefixes=None,
enforce_integrity=True):
"""
This represents a sequence of revertable puts and deletes to a key-value database that checks for integrity
violations when applying the puts and deletes. The integrity checks assure that keys that do not exist
are not deleted, and that when keys are deleted the current value is correctly known so that the delete
may be undone. When putting values, the integrity checks assure that existing values are not overwritten
without first being deleted. Updates are performed by applying a delete op for the old value and a put op
for the new value.
:param get_fn: getter function from an object implementing `KeyValueStorage`
:param unsafe_prefixes: optional set of prefixes to ignore integrity errors for, violations are still logged
"""
self._get = get_fn
self._multi_get = multi_get_fn
# a defaultdict of verified ops ready to be applied
self._items = defaultdict(list)
# a faster deque of ops that have not yet been checked for integrity errors
self._stash: Deque[RevertableOp] = deque()
self._stashed_last_op_for_key = {}
self._unsafe_prefixes = unsafe_prefixes or set()
self._enforce_integrity = enforce_integrity
def stash_ops(self, ops: Iterable[RevertableOp]):
self._stash.extend(ops)
for op in ops:
self._stashed_last_op_for_key[op.key] = op
def validate_and_apply_stashed_ops(self):
if not self._stash:
return
ops_to_apply = []
append_op_needed = ops_to_apply.append
pop_staged_op = self._stash.popleft
unique_keys = set()
# nullify the ops that cancel against the most recent staged for a key
while self._stash:
op = pop_staged_op()
if self._items[op.key] and op.invert() == self._items[op.key][-1]:
self._items[op.key].pop() # if the new op is the inverse of the last op, we can safely null both
continue
elif self._items[op.key] and self._items[op.key][-1] == op: # duplicate of last op
continue # raise an error?
else:
append_op_needed(op)
unique_keys.add(op.key)
existing = {}
if self._enforce_integrity and unique_keys:
unique_keys = list(unique_keys)
for idx in range(0, len(unique_keys), 10000):
batch = unique_keys[idx:idx+10000]
existing.update({
k: v for k, v in zip(batch, self._multi_get(batch))
})
for op in ops_to_apply:
if op.key in self._items and len(self._items[op.key]) and self._items[op.key][-1] == op.invert():
self._items[op.key].pop()
if not self._items[op.key]:
self._items.pop(op.key)
continue
if not self._enforce_integrity:
self._items[op.key].append(op)
continue
stored_val = existing[op.key]
has_stored_val = stored_val is not None
delete_stored_op = None if not has_stored_val else RevertableDelete(op.key, stored_val)
will_delete_existing_stored = False if delete_stored_op is None else (delete_stored_op in self._items[op.key])
try:
if op.is_delete:
if has_stored_val and stored_val != op.value and not will_delete_existing_stored:
# there is a value and we're not deleting it in this op
# check that a delete for the stored value is in the stack
raise OpStackIntegrity(f"db op tries to delete with incorrect existing value {op}\nvs\n{stored_val}")
elif not has_stored_val:
raise OpStackIntegrity(f"db op tries to delete nonexistent key: {op}")
elif stored_val != op.value:
raise OpStackIntegrity(f"db op tries to delete with incorrect value: {op}")
else:
if has_stored_val and not will_delete_existing_stored:
raise OpStackIntegrity(f"db op tries to overwrite before deleting existing: {op}")
if op.key in self._items and len(self._items[op.key]) and self._items[op.key][-1].is_put:
raise OpStackIntegrity(f"db op tries to overwrite with {op} before deleting pending "
f"{self._items[op.key][-1]}")
except OpStackIntegrity as err:
if op.key[:1] in self._unsafe_prefixes:
log.debug(f"skipping over integrity error: {err}")
else:
raise err
self._items[op.key].append(op)
self._stashed_last_op_for_key.clear()
def append_op(self, op: RevertableOp):
"""
Apply a put or delete op, checking that it introduces no integrity errors
"""
inverted = op.invert()
if self._items[op.key] and inverted == self._items[op.key][-1]:
self._items[op.key].pop() # if the new op is the inverse of the last op, we can safely null both
return
elif self._items[op.key] and self._items[op.key][-1] == op: # duplicate of last op
return # raise an error?
stored_val = self._get(op.key)
has_stored_val = stored_val is not None
delete_stored_op = None if not has_stored_val else RevertableDelete(op.key, stored_val)
will_delete_existing_stored = False if delete_stored_op is None else (delete_stored_op in self._items[op.key])
try:
if op.is_put and has_stored_val and not will_delete_existing_stored:
raise OpStackIntegrity(
f"db op tries to add on top of existing key without deleting first: {op}"
)
elif op.is_delete and has_stored_val and stored_val != op.value and not will_delete_existing_stored:
# there is a value and we're not deleting it in this op
# check that a delete for the stored value is in the stack
raise OpStackIntegrity(f"db op tries to delete with incorrect existing value {op}")
elif op.is_delete and not has_stored_val:
raise OpStackIntegrity(f"db op tries to delete nonexistent key: {op}")
elif op.is_delete and stored_val != op.value:
raise OpStackIntegrity(f"db op tries to delete with incorrect value: {op}")
except OpStackIntegrity as err:
if op.key[:1] in self._unsafe_prefixes:
log.debug(f"skipping over integrity error: {err}")
else:
raise err
self._items[op.key].append(op)
def multi_put(self, ops: List[RevertablePut]):
"""
Apply a put or delete op, checking that it introduces no integrity errors
"""
if not ops:
return
need_put = []
if not all(op.is_put for op in ops):
raise ValueError(f"list must contain only puts")
if not len(set(map(lambda op: op.key, ops))) == len(ops):
raise ValueError(f"list must contain unique keys")
for op in ops:
if self._items[op.key] and op.invert() == self._items[op.key][-1]:
self._items[op.key].pop() # if the new op is the inverse of the last op, we can safely null both
continue
elif self._items[op.key] and self._items[op.key][-1] == op: # duplicate of last op
continue # raise an error?
else:
need_put.append(op)
for op, stored_val in zip(need_put, self._multi_get(list(map(lambda item: item.key, need_put)))):
has_stored_val = stored_val is not None
delete_stored_op = None if not has_stored_val else RevertableDelete(op.key, stored_val)
will_delete_existing_stored = False if delete_stored_op is None else (delete_stored_op in self._items[op.key])
try:
if has_stored_val and not will_delete_existing_stored:
raise OpStackIntegrity(f"db op tries to overwrite before deleting existing: {op}")
except OpStackIntegrity as err:
if op.key[:1] in self._unsafe_prefixes:
log.debug(f"skipping over integrity error: {err}")
else:
raise err
self._items[op.key].append(op)
def multi_delete(self, ops: List[RevertableDelete]):
"""
Apply a put or delete op, checking that it introduces no integrity errors
"""
if not ops:
return
need_delete = []
if not all(op.is_delete for op in ops):
raise ValueError(f"list must contain only deletes")
if not len(set(map(lambda op: op.key, ops))) == len(ops):
raise ValueError(f"list must contain unique keys")
for op in ops:
if self._items[op.key] and op.invert() == self._items[op.key][-1]:
self._items[op.key].pop() # if the new op is the inverse of the last op, we can safely null both
continue
elif self._items[op.key] and self._items[op.key][-1] == op: # duplicate of last op
continue # raise an error?
else:
need_delete.append(op)
for op, stored_val in zip(need_delete, self._multi_get(list(map(lambda item: item.key, need_delete)))):
has_stored_val = stored_val is not None
delete_stored_op = None if not has_stored_val else RevertableDelete(op.key, stored_val)
will_delete_existing_stored = False if delete_stored_op is None else (delete_stored_op in self._items[op.key])
try:
if op.is_delete and has_stored_val and stored_val != op.value and not will_delete_existing_stored:
# there is a value and we're not deleting it in this op
# check that a delete for the stored value is in the stack
raise OpStackIntegrity(f"db op tries to delete with incorrect existing value {op}")
elif not stored_val:
raise OpStackIntegrity(f"db op tries to delete nonexistent key: {op}")
elif op.is_delete and stored_val != op.value:
raise OpStackIntegrity(f"db op tries to delete with incorrect value: {op}")
except OpStackIntegrity as err:
if op.key[:1] in self._unsafe_prefixes:
log.debug(f"skipping over integrity error: {err}")
else:
raise err
self._items[op.key].append(op)
def clear(self):
self._items.clear()
self._stash.clear()
self._stashed_last_op_for_key.clear()
def __len__(self):
return sum(map(len, self._items.values()))
def __iter__(self):
for key, ops in self._items.items():
for op in ops:
yield op
def __reversed__(self):
for key, ops in self._items.items():
for op in reversed(ops):
yield op
def get_undo_ops(self) -> bytes:
"""
Get the serialized bytes to undo all of the changes made by the pending ops
"""
return b''.join(op.invert().pack() for op in reversed(self))
def apply_packed_undo_ops(self, packed: bytes):
"""
Unpack and apply a sequence of undo ops from serialized undo bytes
"""
offset = 0
packed_size = len(packed)
while offset < packed_size:
is_put, key_len, val_len = _OP_STRUCT.unpack(packed[offset:offset + 9])
offset += 9
key = packed[offset:offset + key_len]
offset += key_len
value = packed[offset:offset + val_len]
offset += val_len
if is_put == 1:
op = RevertablePut(key, value)
else:
op = RevertableDelete(key, value)
self._stash.append(op)
self._stashed_last_op_for_key[op.key] = op
def get_pending_op(self, key: bytes) -> Optional[RevertableOp]:
if key in self._stashed_last_op_for_key:
return self._stashed_last_op_for_key[key]
if key in self._items and self._items[key]:
return self._items[key][-1]

View file

@ -2,21 +2,20 @@ import os
import logging
import traceback
import argparse
from scribe.env import Env
from scribe.common import setup_logging
from scribe.elasticsearch.service import ElasticSyncService
from hub.common import setup_logging
from hub.elastic_sync.env import ElasticEnv
from hub.elastic_sync.service import ElasticSyncService
def main():
parser = argparse.ArgumentParser(
prog='scribe-elastic-sync'
)
Env.contribute_to_arg_parser(parser)
parser.add_argument('--reindex', type=bool, default=False)
ElasticEnv.contribute_to_arg_parser(parser)
args = parser.parse_args()
try:
env = Env.from_arg_parser(args)
env = ElasticEnv.from_arg_parser(args)
setup_logging(os.path.join(env.db_dir, 'scribe-elastic-sync.log'))
server = ElasticSyncService(env)
server.run(args.reindex)

230
hub/elastic_sync/db.py Normal file
View file

@ -0,0 +1,230 @@
from typing import Optional, Set, Dict, List
from concurrent.futures.thread import ThreadPoolExecutor
from hub.schema.claim import guess_stream_type
from hub.schema.result import Censor
from hub.common import hash160, STREAM_TYPES, CLAIM_TYPES, LRUCache
from hub.db import SecondaryDB
from hub.db.common import ResolveResult
class ElasticSyncDB(SecondaryDB):
def __init__(self, coin, db_dir: str, secondary_name: str, max_open_files: int = -1, reorg_limit: int = 200,
cache_all_tx_hashes: bool = False,
blocking_channel_ids: List[str] = None,
filtering_channel_ids: List[str] = None, executor: ThreadPoolExecutor = None,
index_address_status=False):
super().__init__(coin, db_dir, secondary_name, max_open_files, reorg_limit,
cache_all_tx_hashes, blocking_channel_ids, filtering_channel_ids, executor,
index_address_status)
self.block_timestamp_cache = LRUCache(1024)
def estimate_timestamp(self, height: int) -> int:
if height in self.block_timestamp_cache:
return self.block_timestamp_cache[height]
header = self.prefix_db.header.get(height, deserialize_value=False)
timestamp = int(self.coin.genesisTime + (self.coin.averageBlockOffset * height)) \
if not header else int.from_bytes(header[100:104], byteorder='little')
self.block_timestamp_cache[height] = timestamp
return timestamp
async def prepare_claim_metadata_batch(self, claims: Dict[bytes, ResolveResult], extras):
metadatas = {}
needed_txos = set()
for claim_hash, claim in claims.items():
reposted_claim_hash = claim.reposted_claim_hash
needed_txos.add((claim.tx_hash, claim.position))
if reposted_claim_hash:
if not reposted_claim_hash not in extras:
continue
reposted_claim = extras.get((reposted_claim_hash))
if reposted_claim:
needed_txos.add((reposted_claim.tx_hash, reposted_claim.position))
metadatas.update(await self.get_claim_metadatas(list(needed_txos)))
for claim_hash, claim in claims.items():
metadata = metadatas.get((claim.tx_hash, claim.position))
if not metadata:
continue
if not metadata.is_stream or not metadata.stream.has_fee:
fee_amount = 0
else:
fee_amount = int(max(metadata.stream.fee.amount or 0, 0) * 1000)
if fee_amount >= 9223372036854775807:
continue
reposted_claim_hash = claim.reposted_claim_hash
reposted_metadata = None
if reposted_claim_hash:
if reposted_claim_hash in extras:
reposted_claim = extras[reposted_claim_hash]
reposted_metadata = metadatas.get((reposted_claim.tx_hash, reposted_claim.position))
reposted_tags = []
reposted_languages = []
reposted_has_source = False
reposted_claim_type = None
reposted_stream_type = None
reposted_media_type = None
reposted_fee_amount = None
reposted_fee_currency = None
reposted_duration = None
if reposted_metadata:
if reposted_metadata.is_stream:
meta = reposted_metadata.stream
elif reposted_metadata.is_channel:
meta = reposted_metadata.channel
elif reposted_metadata.is_collection:
meta = reposted_metadata.collection
elif reposted_metadata.is_repost:
meta = reposted_metadata.repost
else:
continue
reposted_tags = [tag for tag in meta.tags]
reposted_languages = [lang.language or 'none' for lang in meta.languages] or ['none']
reposted_has_source = False if not reposted_metadata.is_stream else reposted_metadata.stream.has_source
reposted_claim_type = CLAIM_TYPES[reposted_metadata.claim_type]
reposted_stream_type = STREAM_TYPES[guess_stream_type(reposted_metadata.stream.source.media_type)] \
if reposted_has_source else 0
reposted_media_type = reposted_metadata.stream.source.media_type if reposted_metadata.is_stream else 0
if not reposted_metadata.is_stream or not reposted_metadata.stream.has_fee:
reposted_fee_amount = 0
else:
reposted_fee_amount = int(max(reposted_metadata.stream.fee.amount or 0, 0) * 1000)
if reposted_fee_amount >= 9223372036854775807:
continue
reposted_fee_currency = None if not reposted_metadata.is_stream else reposted_metadata.stream.fee.currency
reposted_duration = None
if reposted_metadata.is_stream and \
(reposted_metadata.stream.video.duration or reposted_metadata.stream.audio.duration):
reposted_duration = reposted_metadata.stream.video.duration or reposted_metadata.stream.audio.duration
if metadata.is_stream:
meta = metadata.stream
elif metadata.is_channel:
meta = metadata.channel
elif metadata.is_collection:
meta = metadata.collection
elif metadata.is_repost:
meta = metadata.repost
else:
continue
claim_tags = [tag for tag in meta.tags]
claim_languages = [lang.language or 'none' for lang in meta.languages] or ['none']
tags = list(set(claim_tags).union(set(reposted_tags)))
languages = list(set(claim_languages).union(set(reposted_languages)))
blocking_channel = None
blocked_hash = self.blocked_streams.get(claim_hash) or self.blocked_streams.get(
reposted_claim_hash) or self.blocked_channels.get(claim_hash) or self.blocked_channels.get(
reposted_claim_hash) or self.blocked_channels.get(claim.channel_hash)
if blocked_hash:
blocking_channel, blocked_hash = blocked_hash
filtered_channel = None
filtered_hash = self.filtered_streams.get(claim_hash) or self.filtered_streams.get(
reposted_claim_hash) or self.filtered_channels.get(claim_hash) or self.filtered_channels.get(
reposted_claim_hash) or self.filtered_channels.get(claim.channel_hash)
if filtered_hash:
filtered_channel, filtered_hash = filtered_hash
value = {
'claim_id': claim_hash.hex(),
'claim_name': claim.name,
'normalized_name': claim.normalized_name,
'tx_id': claim.tx_hash[::-1].hex(),
'tx_num': claim.tx_num,
'tx_nout': claim.position,
'amount': claim.amount,
'timestamp': self.estimate_timestamp(claim.height),
'creation_timestamp': self.estimate_timestamp(claim.creation_height),
'height': claim.height,
'creation_height': claim.creation_height,
'activation_height': claim.activation_height,
'expiration_height': claim.expiration_height,
'effective_amount': claim.effective_amount,
'support_amount': claim.support_amount,
'is_controlling': bool(claim.is_controlling),
'last_take_over_height': claim.last_takeover_height,
'short_url': claim.short_url,
'canonical_url': claim.canonical_url,
'title': None if not metadata.is_stream else metadata.stream.title,
'author': None if not metadata.is_stream else metadata.stream.author,
'description': None if not metadata.is_stream else metadata.stream.description,
'claim_type': CLAIM_TYPES[metadata.claim_type],
'has_source': reposted_has_source if metadata.is_repost else (
False if not metadata.is_stream else metadata.stream.has_source),
'sd_hash': metadata.stream.source.sd_hash if metadata.is_stream and metadata.stream.has_source else None,
'stream_type': STREAM_TYPES[guess_stream_type(metadata.stream.source.media_type)]
if metadata.is_stream and metadata.stream.has_source
else reposted_stream_type if metadata.is_repost else 0,
'media_type': metadata.stream.source.media_type
if metadata.is_stream else reposted_media_type if metadata.is_repost else None,
'fee_amount': fee_amount if not metadata.is_repost else reposted_fee_amount,
'fee_currency': metadata.stream.fee.currency
if metadata.is_stream else reposted_fee_currency if metadata.is_repost else None,
'repost_count': self.get_reposted_count(claim_hash),
'reposted_claim_id': None if not reposted_claim_hash else reposted_claim_hash.hex(),
'reposted_claim_type': reposted_claim_type,
'reposted_has_source': reposted_has_source,
'channel_id': None if not metadata.is_signed else metadata.signing_channel_hash[::-1].hex(),
'public_key_id': None if not metadata.is_channel else
self.coin.P2PKH_address_from_hash160(hash160(metadata.channel.public_key_bytes)),
'signature': (metadata.signature or b'').hex() or None,
# 'signature_digest': metadata.signature,
'is_signature_valid': bool(claim.signature_valid),
'tags': tags,
'languages': languages,
'censor_type': Censor.RESOLVE if blocked_hash else Censor.SEARCH if filtered_hash else Censor.NOT_CENSORED,
'censoring_channel_id': (blocking_channel or filtered_channel or b'').hex() or None,
'censoring_claim_id': (blocked_hash or filtered_hash or b'').hex() or None,
'claims_in_channel': None if not metadata.is_channel else self.get_claims_in_channel_count(claim_hash),
'reposted_tx_id': None if not claim.reposted_tx_hash else claim.reposted_tx_hash[::-1].hex(),
'reposted_tx_position': claim.reposted_tx_position,
'reposted_height': claim.reposted_height,
'channel_tx_id': None if not claim.channel_tx_hash else claim.channel_tx_hash[::-1].hex(),
'channel_tx_position': claim.channel_tx_position,
'channel_height': claim.channel_height,
}
if metadata.is_repost and reposted_duration is not None:
value['duration'] = reposted_duration
elif metadata.is_stream and (metadata.stream.video.duration or metadata.stream.audio.duration):
value['duration'] = metadata.stream.video.duration or metadata.stream.audio.duration
if metadata.is_stream:
value['release_time'] = metadata.stream.release_time or value['creation_timestamp']
elif metadata.is_repost or metadata.is_collection:
value['release_time'] = value['creation_timestamp']
yield value
async def all_claims_producer(self, batch_size: int):
batch = []
for k in self.prefix_db.claim_to_txo.iterate(include_value=False):
batch.append(k.claim_hash)
if len(batch) == batch_size:
claims = {}
total_extras = {}
async for claim_hash, claim, extras in self._prepare_resolve_results(batch, include_extra=False,
apply_blocking=False,
apply_filtering=False):
if not claim:
self.logger.warning("wat")
continue
claims[claim_hash] = claim
total_extras[claim_hash] = claim
total_extras.update(extras)
async for claim in self.prepare_claim_metadata_batch(claims, total_extras):
if claim:
yield claim
batch.clear()
if batch:
claims = {}
total_extras = {}
async for claim_hash, claim, extras in self._prepare_resolve_results(batch, include_extra=False,
apply_blocking=False,
apply_filtering=False):
if not claim:
self.logger.warning("wat")
continue
claims[claim_hash] = claim
total_extras[claim_hash] = claim
total_extras.update(extras)
async for claim in self.prepare_claim_metadata_batch(claims, total_extras):
if claim:
yield claim

49
hub/elastic_sync/env.py Normal file
View file

@ -0,0 +1,49 @@
from hub.env import Env
class ElasticEnv(Env):
def __init__(self, db_dir=None, max_query_workers=None, chain=None, reorg_limit=None, prometheus_port=None,
cache_all_tx_hashes=None, elastic_host=None, elastic_port=None,
es_index_prefix=None, elastic_notifier_host=None, elastic_notifier_port=None,
blocking_channel_ids=None, filtering_channel_ids=None, reindex=False):
super().__init__(db_dir, max_query_workers, chain, reorg_limit, prometheus_port, cache_all_tx_hashes,
blocking_channel_ids, filtering_channel_ids)
self.elastic_host = elastic_host if elastic_host is not None else self.default('ELASTIC_HOST', 'localhost')
self.elastic_port = elastic_port if elastic_port is not None else self.integer('ELASTIC_PORT', 9200)
self.elastic_notifier_host = elastic_notifier_host if elastic_notifier_host is not None else self.default(
'ELASTIC_NOTIFIER_HOST', 'localhost')
self.elastic_notifier_port = elastic_notifier_port if elastic_notifier_port is not None else self.integer(
'ELASTIC_NOTIFIER_PORT', 19080)
self.es_index_prefix = es_index_prefix if es_index_prefix is not None else self.default('ES_INDEX_PREFIX', '')
# Filtering / Blocking
self.reindex = reindex if reindex is not None else self.boolean('REINDEX_ES', False)
@classmethod
def contribute_to_arg_parser(cls, parser):
super().contribute_to_arg_parser(parser)
parser.add_argument('--reindex', default=False, help="Drop and rebuild the elasticsearch index.",
action='store_true')
parser.add_argument('--elastic_host', default=cls.default('ELASTIC_HOST', 'localhost'), type=str,
help="Hostname or ip address of the elasticsearch instance to connect to. "
"Can be set in env with 'ELASTIC_HOST'")
parser.add_argument('--elastic_port', default=cls.integer('ELASTIC_PORT', 9200), type=int,
help="Elasticsearch port to connect to. Can be set in env with 'ELASTIC_PORT'")
parser.add_argument('--elastic_notifier_host', default=cls.default('ELASTIC_NOTIFIER_HOST', 'localhost'),
type=str, help='elasticsearch sync notifier host, defaults to localhost')
parser.add_argument('--elastic_notifier_port', default=cls.integer('ELASTIC_NOTIFIER_PORT', 19080), type=int,
help='elasticsearch sync notifier port')
parser.add_argument('--es_index_prefix', default=cls.default('ES_INDEX_PREFIX', ''), type=str)
parser.add_argument('--query_timeout_ms', type=int, default=cls.integer('QUERY_TIMEOUT_MS', 10000),
help="Elasticsearch query timeout, in ms. Can be set in env with 'QUERY_TIMEOUT_MS'")
@classmethod
def from_arg_parser(cls, args):
return cls(
db_dir=args.db_dir, elastic_host=args.elastic_host,
elastic_port=args.elastic_port, max_query_workers=args.max_query_workers, chain=args.chain,
es_index_prefix=args.es_index_prefix, reorg_limit=args.reorg_limit,
prometheus_port=args.prometheus_port, cache_all_tx_hashes=args.cache_all_tx_hashes,
blocking_channel_ids=args.blocking_channel_ids,
filtering_channel_ids=args.filtering_channel_ids, elastic_notifier_host=args.elastic_notifier_host,
elastic_notifier_port=args.elastic_notifier_port
)

View file

@ -1,3 +1,4 @@
import errno
import os
import json
import typing
@ -5,21 +6,24 @@ import asyncio
from collections import defaultdict
from elasticsearch import AsyncElasticsearch, NotFoundError
from elasticsearch.helpers import async_streaming_bulk
from scribe.schema.result import Censor
from scribe.service import BlockchainReaderService
from scribe.db.revertable import RevertableOp
from scribe.db.common import TrendingNotification, DB_PREFIXES
from scribe.elasticsearch.notifier_protocol import ElasticNotifierProtocol
from scribe.elasticsearch.search import IndexVersionMismatch, expand_query
from scribe.elasticsearch.constants import ALL_FIELDS, INDEX_DEFAULT_SETTINGS
from scribe.elasticsearch.fast_ar_trending import FAST_AR_TRENDING_SCRIPT
from hub.schema.result import Censor
from hub.service import BlockchainReaderService
from hub.common import IndexVersionMismatch, ALL_FIELDS, INDEX_DEFAULT_SETTINGS, expand_query
from hub.db.revertable import RevertableOp
from hub.db.common import TrendingNotification, DB_PREFIXES, ResolveResult
from hub.notifier_protocol import ElasticNotifierProtocol
from hub.elastic_sync.fast_ar_trending import FAST_AR_TRENDING_SCRIPT
from hub.elastic_sync.db import ElasticSyncDB
if typing.TYPE_CHECKING:
from hub.elastic_sync.env import ElasticEnv
class ElasticSyncService(BlockchainReaderService):
VERSION = 1
def __init__(self, env):
def __init__(self, env: 'ElasticEnv'):
super().__init__(env, 'lbry-elastic-writer', thread_workers=1, thread_prefix='lbry-elastic-writer')
self.env = env
# self._refresh_interval = 0.1
self._task = None
self.index = self.env.es_index_prefix + 'claims'
@ -42,10 +46,34 @@ class ElasticSyncService(BlockchainReaderService):
self._listeners: typing.List[ElasticNotifierProtocol] = []
self._force_reindex = False
async def run_es_notifier(self, synchronized: asyncio.Event):
server = await asyncio.get_event_loop().create_server(
lambda: ElasticNotifierProtocol(self._listeners), self.env.elastic_notifier_host, self.env.elastic_notifier_port
def open_db(self):
env = self.env
self.db = ElasticSyncDB(
env.coin, env.db_dir, self.secondary_name, -1, env.reorg_limit,
env.cache_all_tx_hashes, blocking_channel_ids=env.blocking_channel_ids,
filtering_channel_ids=env.filtering_channel_ids, executor=self._executor,
index_address_status=env.index_address_status
)
async def run_es_notifier(self, synchronized: asyncio.Event):
started = False
while not started:
try:
server = await asyncio.get_event_loop().create_server(
lambda: ElasticNotifierProtocol(self._listeners),
self.env.elastic_notifier_host,
self.env.elastic_notifier_port
)
started = True
except Exception as e:
if not isinstance(e, asyncio.CancelledError):
self.log.error(f'ES notifier server failed to listen on '
f'{self.env.elastic_notifier_host}:'
f'{self.env.elastic_notifier_port:d} : {e!r}')
if isinstance(e, OSError) and e.errno is errno.EADDRINUSE:
await asyncio.sleep(3)
continue
raise
self.log.info("ES notifier server listening on TCP %s:%i", self.env.elastic_notifier_host,
self.env.elastic_notifier_port)
synchronized.set()
@ -61,7 +89,10 @@ class ElasticSyncService(BlockchainReaderService):
info = {}
if os.path.exists(self._es_info_path):
with open(self._es_info_path, 'r') as f:
info.update(json.loads(f.read()))
try:
info.update(json.loads(f.read()))
except json.decoder.JSONDecodeError:
self.log.warning('failed to parse es sync status file')
self._last_wrote_height = int(info.get('height', 0))
self._last_wrote_block_hash = info.get('block_hash', None)
@ -142,28 +173,32 @@ class ElasticSyncService(BlockchainReaderService):
return update
async def apply_filters(self, blocked_streams, blocked_channels, filtered_streams, filtered_channels):
only_channels = lambda x: {k: chan for k, (chan, repost) in x.items()}
async def batched_update_filter(items: typing.Dict[bytes, bytes], channel: bool, censor_type: int):
batches = [{}]
for k, v in items.items():
if len(batches[-1]) == 2000:
batches.append({})
batches[-1][k] = v
for batch in batches:
if batch:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(censor_type, only_channels(batch)), slices=4)
if channel:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(censor_type, only_channels(batch), True),
slices=4)
await self.sync_client.indices.refresh(self.index)
if filtered_streams:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(Censor.SEARCH, filtered_streams), slices=4)
await self.sync_client.indices.refresh(self.index)
await batched_update_filter(filtered_streams, False, Censor.SEARCH)
if filtered_channels:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(Censor.SEARCH, filtered_channels), slices=4)
await self.sync_client.indices.refresh(self.index)
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(Censor.SEARCH, filtered_channels, True), slices=4)
await self.sync_client.indices.refresh(self.index)
await batched_update_filter(filtered_channels, True, Censor.SEARCH)
if blocked_streams:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(Censor.RESOLVE, blocked_streams), slices=4)
await self.sync_client.indices.refresh(self.index)
await batched_update_filter(blocked_streams, False, Censor.RESOLVE)
if blocked_channels:
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(Censor.RESOLVE, blocked_channels), slices=4)
await self.sync_client.indices.refresh(self.index)
await self.sync_client.update_by_query(
self.index, body=self.update_filter_query(Censor.RESOLVE, blocked_channels, True), slices=4)
await self.sync_client.indices.refresh(self.index)
await batched_update_filter(blocked_channels, True, Censor.RESOLVE)
@staticmethod
def _upsert_claim_query(index, claim):
@ -207,10 +242,26 @@ class ElasticSyncService(BlockchainReaderService):
async def _claim_producer(self):
for deleted in self._deleted_claims:
yield self._delete_claim_query(self.index, deleted)
for touched in self._touched_claims:
claim = self.db.claim_producer(touched)
if claim:
yield self._upsert_claim_query(self.index, claim)
touched_claims = list(self._touched_claims)
for idx in range(0, len(touched_claims), 1000):
batch = touched_claims[idx:idx+1000]
claims = {}
total_extras = {}
async for claim_hash, claim, extras in self.db._prepare_resolve_results(batch, include_extra=False,
apply_blocking=False,
apply_filtering=False):
if not claim:
self.log.warning("cannot sync claim %s", (claim_hash or b'').hex())
continue
claims[claim_hash] = claim
total_extras[claim_hash] = claim
total_extras.update(extras)
async for claim in self.db.prepare_claim_metadata_batch(claims, total_extras):
if claim:
yield self._upsert_claim_query(self.index, claim)
for claim_hash, notifications in self._trending.items():
yield self._update_trending_query(self.index, claim_hash, notifications)
@ -231,6 +282,7 @@ class ElasticSyncService(BlockchainReaderService):
self._advanced = True
def unwind(self):
self.db.block_timestamp_cache.clear()
reverted_block_hash = self.db.block_hashes[-1]
super().unwind()
packed = self.db.prefix_db.undo.get(len(self.db.tx_counts), reverted_block_hash)
@ -394,22 +446,26 @@ class ElasticSyncService(BlockchainReaderService):
self.log.info("finished reindexing")
async def _sync_all_claims(self, batch_size=100000):
def load_historic_trending():
notifications = self._trending
for k, v in self.db.prefix_db.trending_notification.iterate():
notifications[k.claim_hash].append(TrendingNotification(k.height, v.previous_amount, v.new_amount))
async def all_claims_producer():
current_height = self.db.db_height
async for claim in self.db.all_claims_producer(batch_size=batch_size):
yield self._upsert_claim_query(self.index, claim)
claim_hash = bytes.fromhex(claim['claim_id'])
if claim_hash in self._trending:
yield self._update_trending_query(self.index, claim_hash, self._trending.pop(claim_hash))
self._trending.clear()
self.log.info("loading about %i historic trending updates", self.db.prefix_db.trending_notification.estimate_num_keys())
await asyncio.get_event_loop().run_in_executor(self._executor, load_historic_trending)
self.log.info("loaded historic trending updates for %i claims", len(self._trending))
self.log.info("applying trending")
for batch_height in range(0, current_height, 10000):
notifications = defaultdict(list)
for k, v in self.db.prefix_db.trending_notification.iterate(start=(batch_height,), stop=(batch_height+10000,)):
notifications[k.claim_hash].append(TrendingNotification(k.height, v.previous_amount, v.new_amount))
async for (k,), v in self.db.prefix_db.claim_to_txo.multi_get_async_gen(
self._executor, [(claim_hash,) for claim_hash in notifications]):
if not v:
notifications.pop(k)
for claim_hash, trending in notifications.items():
yield self._update_trending_query(self.index, claim_hash, trending)
self._trending.clear()
cnt = 0
success = 0
@ -424,7 +480,7 @@ class ElasticSyncService(BlockchainReaderService):
else:
success += 1
if cnt % batch_size == 0:
self.log.info(f"indexed {success} claims")
self.log.info(f"indexed {success}/{cnt} claims")
finished = True
await self.sync_client.indices.refresh(self.index)
self.log.info("indexed %i/%i claims", success, cnt)

195
hub/env.py Normal file
View file

@ -0,0 +1,195 @@
import os
import re
import resource
import logging
from collections import namedtuple
from hub.scribe.network import LBCMainNet, LBCTestNet, LBCRegTest
NetIdentity = namedtuple('NetIdentity', 'host tcp_port ssl_port nick_suffix')
SEGMENT_REGEX = re.compile("(?!-)[A-Z_\\d-]{1,63}(?<!-)$", re.IGNORECASE)
def is_valid_hostname(hostname):
if len(hostname) > 255:
return False
# strip exactly one dot from the right, if present
if hostname and hostname[-1] == ".":
hostname = hostname[:-1]
return all(SEGMENT_REGEX.match(x) for x in hostname.split("."))
class Env:
# Peer discovery
PD_OFF, PD_SELF, PD_ON = range(3)
class Error(Exception):
pass
def __init__(self, db_dir=None, max_query_workers=None, chain=None, reorg_limit=None,
prometheus_port=None, cache_all_tx_hashes=None,
blocking_channel_ids=None, filtering_channel_ids=None, index_address_status=None):
self.logger = logging.getLogger(__name__)
self.db_dir = db_dir if db_dir is not None else self.required('DB_DIRECTORY')
self.obsolete(['UTXO_MB', 'HIST_MB', 'NETWORK'])
self.max_query_workers = max_query_workers if max_query_workers is not None else self.integer('MAX_QUERY_WORKERS', 4)
if chain == 'mainnet':
self.coin = LBCMainNet
elif chain == 'testnet':
self.coin = LBCTestNet
else:
self.coin = LBCRegTest
self.reorg_limit = reorg_limit if reorg_limit is not None else self.integer('REORG_LIMIT', self.coin.REORG_LIMIT)
self.prometheus_port = prometheus_port if prometheus_port is not None else self.integer('PROMETHEUS_PORT', 0)
self.cache_all_tx_hashes = cache_all_tx_hashes if cache_all_tx_hashes is not None else self.boolean('CACHE_ALL_TX_HASHES', False)
# Filtering / Blocking
self.blocking_channel_ids = blocking_channel_ids if blocking_channel_ids is not None else self.default(
'BLOCKING_CHANNEL_IDS', '').split(' ')
self.filtering_channel_ids = filtering_channel_ids if filtering_channel_ids is not None else self.default(
'FILTERING_CHANNEL_IDS', '').split(' ')
self.index_address_status = index_address_status if index_address_status is not None else \
self.boolean('INDEX_ADDRESS_STATUS', False)
@classmethod
def default(cls, envvar, default):
return os.environ.get(envvar, default)
@classmethod
def boolean(cls, envvar, default):
default = 'Yes' if default else ''
return bool(cls.default(envvar, default).strip())
@classmethod
def required(cls, envvar):
value = os.environ.get(envvar)
if value is None:
raise cls.Error(f'required envvar {envvar} not set')
return value
@classmethod
def string_amount(cls, envvar, default):
value = os.environ.get(envvar, default)
amount_pattern = re.compile("[0-9]{0,10}(\.[0-9]{1,8})?")
if len(value) > 0 and not amount_pattern.fullmatch(value):
raise cls.Error(f'{value} is not a valid amount for {envvar}')
return value
@classmethod
def integer(cls, envvar, default):
value = os.environ.get(envvar)
if value is None:
return default
try:
return int(value)
except Exception:
raise cls.Error(f'cannot convert envvar {envvar} value {value} to an integer')
@classmethod
def custom(cls, envvar, default, parse):
value = os.environ.get(envvar)
if value is None:
return default
try:
return parse(value)
except Exception as e:
raise cls.Error(f'cannot parse envvar {envvar} value {value}') from e
@classmethod
def obsolete(cls, envvars):
bad = [envvar for envvar in envvars if os.environ.get(envvar)]
if bad:
raise cls.Error(f'remove obsolete os.environment variables {bad}')
def cs_host(self):
"""Returns the 'host' argument to pass to asyncio's create_server
call. The result can be a single host name string, a list of
host name strings, or an empty string to bind to all interfaces.
If rpc is True the host to use for the RPC server is returned.
Otherwise the host to use for SSL/TCP servers is returned.
"""
host = self.host
result = [part.strip() for part in host.split(',')]
if len(result) == 1:
result = result[0]
if result == 'localhost':
# 'localhost' resolves to ::1 (ipv6) on many systems, which fails on default setup of
# docker, using 127.0.0.1 instead forces ipv4
result = '127.0.0.1'
return result
def sane_max_sessions(self):
"""Return the maximum number of sessions to permit. Normally this
is MAX_SESSIONS. However, to prevent open file exhaustion, ajdust
downwards if running with a small open file rlimit."""
env_value = self.integer('MAX_SESSIONS', 1000)
nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)[0]
# We give the DB 250 files; allow ElectrumX 100 for itself
value = max(0, min(env_value, nofile_limit - 350))
if value < env_value:
self.logger.warning(f'lowered maximum sessions from {env_value:,d} to {value:,d} '
f'because your open file limit is {nofile_limit:,d}')
return value
def peer_discovery_enum(self):
pd = self.default('PEER_DISCOVERY', 'on').strip().lower()
if pd in ('off', ''):
return self.PD_OFF
elif pd == 'self':
return self.PD_SELF
else:
return self.PD_ON
def extract_peer_hubs(self):
peer_hubs = self.default('PEER_HUBS', '')
if not peer_hubs:
return []
return [hub.strip() for hub in peer_hubs.split(',')]
@classmethod
def contribute_to_arg_parser(cls, parser):
"""
Settings used by all services
"""
env_db_dir = cls.default('DB_DIRECTORY', None)
parser.add_argument('--db_dir', type=str, required=env_db_dir is None,
help="Path of the directory containing lbry-rocksdb. ", default=env_db_dir)
parser.add_argument('--reorg_limit', default=cls.integer('REORG_LIMIT', 200), type=int, help='Max reorg depth')
parser.add_argument('--chain', type=str, default=cls.default('NET', 'mainnet'),
help="Which chain to use, default is mainnet, others are used for testing",
choices=['mainnet', 'regtest', 'testnet'])
parser.add_argument('--max_query_workers', type=int, default=cls.integer('MAX_QUERY_WORKERS', 4),
help="Size of the thread pool. Can be set in env with 'MAX_QUERY_WORKERS'")
parser.add_argument('--cache_all_tx_hashes', action='store_true',
help="Load all tx hashes into memory. This will make address subscriptions and sync, "
"resolve, transaction fetching, and block sync all faster at the expense of higher "
"memory usage (at least 10GB more). Can be set in env with 'CACHE_ALL_TX_HASHES'.",
default=cls.boolean('CACHE_ALL_TX_HASHES', False))
parser.add_argument('--prometheus_port', type=int, default=cls.integer('PROMETHEUS_PORT', 0),
help="Port for prometheus metrics to listen on, disabled by default. "
"Can be set in env with 'PROMETHEUS_PORT'.")
parser.add_argument('--blocking_channel_ids', nargs='*',
help="Space separated list of channel claim ids used for blocking. "
"Claims that are reposted by these channels can't be resolved "
"or returned in search results. Can be set in env with 'BLOCKING_CHANNEL_IDS'",
default=cls.default('BLOCKING_CHANNEL_IDS', '').split(' '))
parser.add_argument('--filtering_channel_ids', nargs='*',
help="Space separated list of channel claim ids used for blocking. "
"Claims that are reposted by these channels aren't returned in search results. "
"Can be set in env with 'FILTERING_CHANNEL_IDS'",
default=cls.default('FILTERING_CHANNEL_IDS', '').split(' '))
parser.add_argument('--index_address_statuses', action='store_true',
help="Use precomputed address statuses, must be enabled in the reader and the writer to "
"use it. If disabled (the default), the status of an address must be calculated at "
"runtime when clients request it (address subscriptions, address history sync). "
"If enabled, scribe will maintain an index of precomputed statuses",
default=cls.boolean('INDEX_ADDRESS_STATUS', False))
@classmethod
def from_arg_parser(cls, args):
raise NotImplementedError()

View file

@ -1,4 +1,7 @@
import typing
from .base import BaseError, claim_id
if typing.TYPE_CHECKING:
from hub.db.common import ResolveResult
class UserInputError(BaseError):
@ -262,11 +265,13 @@ class ResolveTimeoutError(WalletError):
class ResolveCensoredError(WalletError):
def __init__(self, url, censor_id, censor_row):
self.url = url
def __init__(self, censor_type: str, censored_url: str, censoring_url: str, censor_id: str, reason: str,
censor_row: 'ResolveResult'):
self.url = censored_url
self.censor_id = censor_id
self.censor_row = censor_row
super().__init__(f"Resolve of '{url}' was censored by channel with claim id '{censor_id}'.")
super().__init__(f"Resolve of '{censored_url}' was {censor_type} by {censoring_url}'. Reason given: {reason}")
class KeyFeeAboveMaxAllowedError(WalletError):

View file

@ -2,21 +2,20 @@ import os
import logging
import traceback
import argparse
from scribe.env import Env
from scribe.common import setup_logging
from scribe.hub.service import HubServerService
from hub.common import setup_logging
from hub.herald.env import ServerEnv
from hub.herald.service import HubServerService
def main():
parser = argparse.ArgumentParser(
prog='scribe-hub'
prog='herald'
)
Env.contribute_to_arg_parser(parser)
ServerEnv.contribute_to_arg_parser(parser)
args = parser.parse_args()
try:
env = Env.from_arg_parser(args)
setup_logging(os.path.join(env.db_dir, 'scribe-hub.log'))
env = ServerEnv.from_arg_parser(args)
setup_logging(os.path.join(env.db_dir, 'herald.log'))
server = HubServerService(env)
server.run()
except Exception:

View file

@ -1,7 +1,7 @@
import inspect
from collections import namedtuple
from functools import lru_cache
from scribe.common import CodeMessageError
from hub.common import CodeMessageError
SignatureInfo = namedtuple('SignatureInfo', 'min_args max_args '

33
hub/herald/db.py Normal file
View file

@ -0,0 +1,33 @@
import asyncio
from typing import List
from concurrent.futures.thread import ThreadPoolExecutor
from hub.db import SecondaryDB
class HeraldDB(SecondaryDB):
def __init__(self, coin, db_dir: str, secondary_name: str, max_open_files: int = -1, reorg_limit: int = 200,
cache_all_tx_hashes: bool = False,
blocking_channel_ids: List[str] = None,
filtering_channel_ids: List[str] = None, executor: ThreadPoolExecutor = None,
index_address_status=False, merkle_cache_size=32768, tx_cache_size=32768):
super().__init__(coin, db_dir, secondary_name, max_open_files, reorg_limit,
cache_all_tx_hashes, blocking_channel_ids, filtering_channel_ids, executor,
index_address_status, merkle_cache_size, tx_cache_size)
# self.headers = None
# async def _read_headers(self):
# def get_headers():
# return [
# header for header in self.prefix_db.header.iterate(
# start=(0, ), stop=(self.db_height + 1, ), include_key=False, fill_cache=False,
# deserialize_value=False
# )
# ]
#
# headers = await asyncio.get_event_loop().run_in_executor(self._executor, get_headers)
# assert len(headers) - 1 == self.db_height, f"{len(headers)} vs {self.db_height}"
# self.headers = headers
# async def initialize_caches(self):
# await super().initialize_caches()
# await self._read_headers()

166
hub/herald/env.py Normal file
View file

@ -0,0 +1,166 @@
import re
from collections import deque
from hub.env import Env
ELASTIC_SERVICES_REGEX = re.compile("(([\d|\.]|[^,:\/])*:\d*\/([\d|\.]|[^,:\/])*:\d*,?)*")
def parse_es_services(elastic_services_arg: str):
match = ELASTIC_SERVICES_REGEX.match(elastic_services_arg)
if not match:
return []
matching = match.group()
services = [item.split('/') for item in matching.split(',') if item]
return [
((es.split(':')[0], int(es.split(':')[1])), (notifier.split(':')[0], int(notifier.split(':')[1])))
for (es, notifier) in services
]
class ServerEnv(Env):
def __init__(self, db_dir=None, max_query_workers=None, chain=None, reorg_limit=None,
prometheus_port=None, cache_all_tx_hashes=None,
daemon_url=None, host=None, elastic_services=None, es_index_prefix=None,
tcp_port=None, udp_port=None, banner_file=None, allow_lan_udp=None, country=None,
payment_address=None, donation_address=None, max_send=None, max_receive=None, max_sessions=None,
session_timeout=None, drop_client=None, description=None, daily_fee=None,
database_query_timeout=None, blocking_channel_ids=None, filtering_channel_ids=None, peer_hubs=None,
peer_announce=None, index_address_status=None, address_history_cache_size=None, daemon_ca_path=None,
merkle_cache_size=None, resolved_url_cache_size=None, tx_cache_size=None,
history_tx_cache_size=None, largest_address_history_cache_size=None):
super().__init__(db_dir, max_query_workers, chain, reorg_limit, prometheus_port, cache_all_tx_hashes,
blocking_channel_ids, filtering_channel_ids, index_address_status)
self.daemon_url = daemon_url if daemon_url is not None else self.required('DAEMON_URL')
self.host = host if host is not None else self.default('HOST', 'localhost')
self.elastic_services = deque(parse_es_services(elastic_services or 'localhost:9200/localhost:19080'))
self.es_index_prefix = es_index_prefix if es_index_prefix is not None else self.default('ES_INDEX_PREFIX', '')
# Server stuff
self.tcp_port = tcp_port if tcp_port is not None else self.integer('TCP_PORT', None)
self.udp_port = udp_port if udp_port is not None else self.integer('UDP_PORT', self.tcp_port)
self.banner_file = banner_file if banner_file is not None else self.default('BANNER_FILE', None)
self.allow_lan_udp = allow_lan_udp if allow_lan_udp is not None else self.boolean('ALLOW_LAN_UDP', False)
self.country = country if country is not None else self.default('COUNTRY', 'US')
# Peer discovery
self.peer_discovery = self.peer_discovery_enum()
self.peer_announce = peer_announce if peer_announce is not None else self.boolean('PEER_ANNOUNCE', True)
if peer_hubs is not None:
self.peer_hubs = [p.strip("") for p in peer_hubs.split(",")]
else:
self.peer_hubs = self.extract_peer_hubs()
# The electrum client takes the empty string as unspecified
self.payment_address = payment_address if payment_address is not None else self.default('PAYMENT_ADDRESS', '')
self.donation_address = donation_address if donation_address is not None else self.default('DONATION_ADDRESS',
'')
# Server limits to help prevent DoS
self.max_send = max_send if max_send is not None else self.integer('MAX_SEND', 1000000000000000000)
self.max_receive = max_receive if max_receive is not None else self.integer('MAX_RECEIVE', 1000000000000000000)
self.max_sessions = max_sessions if max_sessions is not None else self.sane_max_sessions()
self.session_timeout = session_timeout if session_timeout is not None else self.integer('SESSION_TIMEOUT', 600)
self.drop_client = re.compile(drop_client) if drop_client is not None else self.custom("DROP_CLIENT", None, re.compile)
self.description = description if description is not None else self.default('DESCRIPTION', '')
self.daily_fee = daily_fee if daily_fee is not None else self.string_amount('DAILY_FEE', '0')
self.database_query_timeout = (database_query_timeout / 1000.0) if database_query_timeout is not None else \
(float(self.integer('QUERY_TIMEOUT_MS', 10000)) / 1000.0)
self.hashX_history_cache_size = address_history_cache_size if address_history_cache_size is not None \
else self.integer('ADDRESS_HISTORY_CACHE_SIZE', 4096)
self.largest_hashX_history_cache_size = largest_address_history_cache_size if largest_address_history_cache_size is not None \
else self.integer('LARGEST_ADDRESS_HISTORY_CACHE_SIZE', 256)
self.daemon_ca_path = daemon_ca_path if daemon_ca_path else None
self.merkle_cache_size = merkle_cache_size if merkle_cache_size is not None else self.integer('MERKLE_CACHE_SIZE', 32768)
self.resolved_url_cache_size = resolved_url_cache_size if resolved_url_cache_size is not None else self.integer(
'RESOLVED_URL_CACHE_SIZE', 32768)
self.tx_cache_size = tx_cache_size if tx_cache_size is not None else self.integer(
'TX_CACHE_SIZE', 32768)
self.history_tx_cache_size = history_tx_cache_size if history_tx_cache_size is not None else \
self.integer('HISTORY_TX_CACHE_SIZE', 4194304)
@classmethod
def contribute_to_arg_parser(cls, parser):
super().contribute_to_arg_parser(parser)
env_daemon_url = cls.default('DAEMON_URL', None)
parser.add_argument('--daemon_url', required=env_daemon_url is None,
help="URL for rpc from lbrycrd or lbcd, "
"<rpcuser>:<rpcpassword>@<lbrycrd rpc ip><lbrycrd rpc port>.",
default=env_daemon_url)
parser.add_argument('--daemon_ca_path', type=str, default='',
help='Path to the lbcd ca file, used for lbcd with ssl')
parser.add_argument('--host', type=str, default=cls.default('HOST', 'localhost'),
help="Interface for hub server to listen on, use 0.0.0.0 to listen on the external "
"interface. Can be set in env with 'HOST'")
parser.add_argument('--tcp_port', type=int, default=cls.integer('TCP_PORT', 50001),
help="Electrum TCP port to listen on for hub server. Can be set in env with 'TCP_PORT'")
parser.add_argument('--udp_port', type=int, default=cls.integer('UDP_PORT', 50001),
help="'UDP port to listen on for hub server. Can be set in env with 'UDP_PORT'")
parser.add_argument('--max_sessions', type=int, default=cls.integer('MAX_SESSIONS', 100000),
help="Maximum number of electrum clients that can be connected, defaults to 100000.")
parser.add_argument('--max_send', type=int, default=cls.integer('MAX_SESSIONS', 1000000000000000000),
help="Maximum size of a request")
parser.add_argument('--max_receive', type=int, default=cls.integer('MAX_SESSIONS', 1000000000000000000),
help="Maximum size of a response")
parser.add_argument('--drop_client', type=str, default=cls.default('DROP_CLIENT', None),
help="Regex used for blocking clients")
parser.add_argument('--session_timeout', type=int, default=cls.integer('SESSION_TIMEOUT', 600),
help="Session inactivity timeout")
parser.add_argument('--elastic_services',
default=cls.default('ELASTIC_SERVICES', 'localhost:9200/localhost:19080'), type=str,
help="Hosts and ports for elastic search and the scribe elastic sync notifier. "
"Given as a comma separated list without spaces of items in the format "
"<elastic host>:<elastic port>/<notifier host>:<notifier port> . "
"Defaults to 'localhost:9200/localhost:19080'. "
"Can be set in env with 'ELASTIC_SERVICES'")
parser.add_argument('--es_index_prefix', default=cls.default('ES_INDEX_PREFIX', ''), type=str)
parser.add_argument('--allow_lan_udp', action='store_true',
help="Reply to clients on the local network", default=cls.boolean('ALLOW_LAN_UDP', False))
parser.add_argument('--description', default=cls.default('DESCRIPTION', None), type=str)
parser.add_argument('--banner_file', default=cls.default('BANNER_FILE', None), type=str)
parser.add_argument('--country', default=cls.default('COUNTRY', 'US'), type=str)
parser.add_argument('--payment_address', default=cls.default('PAYMENT_ADDRESS', None), type=str)
parser.add_argument('--donation_address', default=cls.default('DONATION_ADDRESS', None), type=str)
parser.add_argument('--daily_fee', default=cls.default('DAILY_FEE', '0'), type=str)
parser.add_argument('--query_timeout_ms', type=int, default=cls.integer('QUERY_TIMEOUT_MS', 10000),
help="Elasticsearch query timeout, in ms. Can be set in env with 'QUERY_TIMEOUT_MS'")
parser.add_argument('--largest_address_history_cache_size', type=int,
default=cls.integer('LARGEST_ADDRESS_HISTORY_CACHE_SIZE', 256),
help="Size of the largest value cache for address histories. "
"Can be set in the env with 'LARGEST_ADDRESS_HISTORY_CACHE_SIZE'")
parser.add_argument('--address_history_cache_size', type=int,
default=cls.integer('ADDRESS_HISTORY_CACHE_SIZE', 4096),
help="Size of the lru cache of address histories. "
"Can be set in the env with 'ADDRESS_HISTORY_CACHE_SIZE'")
parser.add_argument('--merkle_cache_size', type=int,
default=cls.integer('MERKLE_CACHE_SIZE', 32768),
help="Size of the lru cache of merkle trees for txs in blocks. "
"Can be set in the env with 'MERKLE_CACHE_SIZE'")
parser.add_argument('--resolved_url_cache_size', type=int,
default=cls.integer('RESOLVED_URL_CACHE_SIZE', 32768),
help="Size of the lru cache of resolved urls. "
"Can be set in the env with 'RESOLVED_URL_CACHE_SIZE'")
parser.add_argument('--tx_cache_size', type=int,
default=cls.integer('TX_CACHE_SIZE', 32768),
help="Size of the lru cache of transactions. "
"Can be set in the env with 'TX_CACHE_SIZE'")
parser.add_argument('--history_tx_cache_size', type=int,
default=cls.integer('HISTORY_TX_CACHE_SIZE', 524288),
help="Size of the lfu cache of txids in transaction histories for addresses. "
"Can be set in the env with 'HISTORY_TX_CACHE_SIZE'")
@classmethod
def from_arg_parser(cls, args):
return cls(
db_dir=args.db_dir, daemon_url=args.daemon_url, host=args.host, elastic_services=args.elastic_services,
max_query_workers=args.max_query_workers, chain=args.chain,
es_index_prefix=args.es_index_prefix, reorg_limit=args.reorg_limit, tcp_port=args.tcp_port,
udp_port=args.udp_port, prometheus_port=args.prometheus_port, banner_file=args.banner_file,
allow_lan_udp=args.allow_lan_udp, cache_all_tx_hashes=args.cache_all_tx_hashes,
country=args.country, payment_address=args.payment_address,
donation_address=args.donation_address, max_send=args.max_send, max_receive=args.max_receive,
max_sessions=args.max_sessions, session_timeout=args.session_timeout,
drop_client=args.drop_client, description=args.description, daily_fee=args.daily_fee,
database_query_timeout=args.query_timeout_ms, blocking_channel_ids=args.blocking_channel_ids,
filtering_channel_ids=args.filtering_channel_ids, index_address_status=args.index_address_statuses,
address_history_cache_size=args.address_history_cache_size, daemon_ca_path=args.daemon_ca_path,
merkle_cache_size=args.merkle_cache_size, resolved_url_cache_size=args.resolved_url_cache_size,
tx_cache_size=args.tx_cache_size, history_tx_cache_size=args.history_tx_cache_size,
largest_address_history_cache_size=args.largest_address_history_cache_size
)

View file

@ -6,8 +6,8 @@ import asyncio
from asyncio import Event
from functools import partial
from numbers import Number
from scribe.common import RPCError, CodeMessageError
from scribe.hub.common import Notification, Request, Response, Batch, ProtocolError
from hub.common import RPCError, CodeMessageError
from hub.herald.common import Notification, Request, Response, Batch, ProtocolError
class JSONRPC:

View file

@ -6,14 +6,14 @@ import logging
from collections import defaultdict
from prometheus_client import Histogram, Gauge
import rocksdb.errors
from scribe import PROMETHEUS_NAMESPACE
from scribe.common import HISTOGRAM_BUCKETS
from scribe.db.common import UTXO
from scribe.blockchain.transaction.deserializer import Deserializer
from hub import PROMETHEUS_NAMESPACE
from hub.common import HISTOGRAM_BUCKETS
from hub.db.common import UTXO
from hub.scribe.transaction.deserializer import Deserializer
if typing.TYPE_CHECKING:
from scribe.hub.session import SessionManager
from scribe.db import HubDB
from hub.herald.session import SessionManager
from hub.db import SecondaryDB
@attr.s(slots=True)
@ -46,7 +46,7 @@ mempool_touched_address_count_metric = Gauge(
class HubMemPool:
def __init__(self, coin, db: 'HubDB', refresh_secs=1.0):
def __init__(self, coin, db: 'SecondaryDB', refresh_secs=1.0):
self.coin = coin
self._db = db
self.logger = logging.getLogger(__name__)
@ -157,6 +157,14 @@ class HubMemPool:
result.append(MemPoolTxSummary(tx_hash, tx.fee, has_ui))
return result
def mempool_history(self, hashX: bytes) -> str:
result = ''
for tx_hash in self.touched_hashXs.get(hashX, ()):
if tx_hash not in self.txs:
continue # the tx hash for the touched address is an input that isn't in mempool anymore
result += f'{tx_hash[::-1].hex()}:{-any(_hash in self.txs for _hash, idx in self.txs[tx_hash].in_pairs):d}:'
return result
def unordered_UTXOs(self, hashX):
"""Return an unordered list of UTXO named tuples from mempool
transactions that pay to hashX.
@ -229,12 +237,13 @@ class HubMemPool:
session = self.session_manager.sessions.get(session_id)
if session:
if session.subscribe_headers and height_changed:
asyncio.create_task(
session.send_notification('blockchain.headers.subscribe',
(self.session_manager.hsub_results[session.subscribe_headers_raw],))
session.send_notification(
'blockchain.headers.subscribe',
(self.session_manager.hsub_results[session.subscribe_headers_raw],)
)
if hashXes:
asyncio.create_task(session.send_history_notifications(*hashXes))
session.send_history_notifications(hashXes)
async def _notify_sessions(self, height, touched, new_touched):
"""Notify sessions about height changes and touched addresses."""
@ -276,7 +285,6 @@ class HubMemPool:
if session.subscribe_headers and height_changed:
sent_headers += 1
self._notification_q.put_nowait((session_id, height_changed, hashXes))
if sent_headers:
self.logger.info(f'notified {sent_headers} sessions of new block header')
if session_hashxes_to_notify:

View file

@ -1,23 +1,17 @@
import logging
import asyncio
import struct
from bisect import bisect_right
from collections import Counter, deque
from decimal import Decimal
from operator import itemgetter
from typing import Optional, List, Iterable, TYPE_CHECKING
from typing import Optional, List, TYPE_CHECKING, Deque, Tuple
from elasticsearch import AsyncElasticsearch, NotFoundError, ConnectionError
from scribe.schema.result import Censor, Outputs
from scribe.schema.tags import clean_tags
from scribe.schema.url import normalize_name
from scribe.error import TooManyClaimSearchParametersError
from scribe.common import LRUCache
from scribe.db.common import CLAIM_TYPES, STREAM_TYPES
from scribe.elasticsearch.constants import INDEX_DEFAULT_SETTINGS, REPLACEMENTS, FIELDS, TEXT_FIELDS, RANGE_FIELDS
from scribe.db.common import ResolveResult
from hub.schema.result import Censor, Outputs
from hub.common import LRUCache, IndexVersionMismatch, INDEX_DEFAULT_SETTINGS, expand_query, expand_result
from hub.db.common import ResolveResult
if TYPE_CHECKING:
from scribe.db import HubDB
from prometheus_client import Counter as PrometheusCounter
from hub.db import SecondaryDB
class ChannelResolution(str):
@ -32,19 +26,15 @@ class StreamResolution(str):
return LookupError(f'Could not find claim at "{url}".')
class IndexVersionMismatch(Exception):
def __init__(self, got_version, expected_version):
self.got_version = got_version
self.expected_version = expected_version
class SearchIndex:
VERSION = 1
def __init__(self, hub_db: 'HubDB', index_prefix: str, search_timeout=3.0, elastic_host='localhost',
elastic_port=9200):
def __init__(self, hub_db: 'SecondaryDB', index_prefix: str, search_timeout=3.0,
elastic_services: Optional[Deque[Tuple[Tuple[str, int], Tuple[str, int]]]] = None,
timeout_counter: Optional['PrometheusCounter'] = None):
self.hub_db = hub_db
self.search_timeout = search_timeout
self.timeout_counter: Optional['PrometheusCounter'] = timeout_counter
self.sync_timeout = 600 # wont hit that 99% of the time, but can hit on a fresh import
self.search_client: Optional[AsyncElasticsearch] = None
self.sync_client: Optional[AsyncElasticsearch] = None
@ -52,8 +42,8 @@ class SearchIndex:
self.logger = logging.getLogger(__name__)
self.claim_cache = LRUCache(2 ** 15)
self.search_cache = LRUCache(2 ** 17)
self._elastic_host = elastic_host
self._elastic_port = elastic_port
self._elastic_services = elastic_services
self.lost_connection = asyncio.Event()
async def get_index_version(self) -> int:
try:
@ -70,9 +60,9 @@ class SearchIndex:
async def start(self) -> bool:
if self.sync_client:
return False
hosts = [{'host': self._elastic_host, 'port': self._elastic_port}]
hosts = [{'host': self._elastic_services[0][0][0], 'port': self._elastic_services[0][0][1]}]
self.sync_client = AsyncElasticsearch(hosts, timeout=self.sync_timeout)
self.search_client = AsyncElasticsearch(hosts, timeout=self.search_timeout)
self.search_client = AsyncElasticsearch(hosts, timeout=self.search_timeout+1)
while True:
try:
await self.sync_client.cluster.health(wait_for_status='yellow')
@ -91,7 +81,7 @@ class SearchIndex:
self.logger.error("es search index has an incompatible version: %s vs %s", index_version, self.VERSION)
raise IndexVersionMismatch(index_version, self.VERSION)
await self.sync_client.indices.refresh(self.index)
return acked
return True
async def stop(self):
clients = [c for c in (self.sync_client, self.search_client) if c is not None]
@ -218,10 +208,14 @@ class SearchIndex:
reordered_hits = cache_item.result
else:
query = expand_query(**kwargs)
search_hits = deque((await self.search_client.search(
es_resp = await self.search_client.search(
query, index=self.index, track_total_hits=False,
timeout=f'{int(1000*self.search_timeout)}ms',
_source_includes=['_id', 'channel_id', 'reposted_claim_id', 'creation_height']
))['hits']['hits'])
)
search_hits = deque(es_resp['hits']['hits'])
if self.timeout_counter and es_resp['timed_out']:
self.timeout_counter.inc()
if remove_duplicates:
search_hits = self.__remove_duplicates(search_hits)
if per_channel_per_page > 0:
@ -248,7 +242,7 @@ class SearchIndex:
dropped.add(hit['_id'])
return deque(hit for hit in search_hits if hit['_id'] not in dropped)
def __search_ahead(self, search_hits: list, page_size: int, per_channel_per_page: int):
def __search_ahead(self, search_hits: deque, page_size: int, per_channel_per_page: int) -> list:
reordered_hits = []
channel_counters = Counter()
next_page_hits_maybe_check_later = deque()
@ -297,234 +291,6 @@ class SearchIndex:
return referenced_txos
def expand_query(**kwargs):
if "amount_order" in kwargs:
kwargs["limit"] = 1
kwargs["order_by"] = "effective_amount"
kwargs["offset"] = int(kwargs["amount_order"]) - 1
if 'name' in kwargs:
kwargs['name'] = normalize_name(kwargs.pop('name'))
if kwargs.get('is_controlling') is False:
kwargs.pop('is_controlling')
query = {'must': [], 'must_not': []}
collapse = None
if 'fee_currency' in kwargs and kwargs['fee_currency'] is not None:
kwargs['fee_currency'] = kwargs['fee_currency'].upper()
for key, value in kwargs.items():
key = key.replace('claim.', '')
many = key.endswith('__in') or isinstance(value, list)
if many and len(value) > 2048:
raise TooManyClaimSearchParametersError(key, 2048)
if many:
key = key.replace('__in', '')
value = list(filter(None, value))
if value is None or isinstance(value, list) and len(value) == 0:
continue
key = REPLACEMENTS.get(key, key)
if key in FIELDS:
partial_id = False
if key == 'claim_type':
if isinstance(value, str):
value = CLAIM_TYPES[value]
else:
value = [CLAIM_TYPES[claim_type] for claim_type in value]
elif key == 'stream_type':
value = [STREAM_TYPES[value]] if isinstance(value, str) else list(map(STREAM_TYPES.get, value))
if key == '_id':
if isinstance(value, Iterable):
value = [item[::-1].hex() for item in value]
else:
value = value[::-1].hex()
if not many and key in ('_id', 'claim_id', 'sd_hash') and len(value) < 20:
partial_id = True
if key in ('signature_valid', 'has_source'):
continue # handled later
if key in TEXT_FIELDS:
key += '.keyword'
ops = {'<=': 'lte', '>=': 'gte', '<': 'lt', '>': 'gt'}
if partial_id:
query['must'].append({"prefix": {key: value}})
elif key in RANGE_FIELDS and isinstance(value, str) and value[0] in ops:
operator_length = 2 if value[:2] in ops else 1
operator, value = value[:operator_length], value[operator_length:]
if key == 'fee_amount':
value = str(Decimal(value)*1000)
query['must'].append({"range": {key: {ops[operator]: value}}})
elif key in RANGE_FIELDS and isinstance(value, list) and all(v[0] in ops for v in value):
range_constraints = []
release_times = []
for v in value:
operator_length = 2 if v[:2] in ops else 1
operator, stripped_op_v = v[:operator_length], v[operator_length:]
if key == 'fee_amount':
stripped_op_v = str(Decimal(stripped_op_v)*1000)
if key == 'release_time':
release_times.append((operator, stripped_op_v))
else:
range_constraints.append((operator, stripped_op_v))
if key != 'release_time':
query['must'].append({"range": {key: {ops[operator]: v for operator, v in range_constraints}}})
else:
query['must'].append(
{"bool":
{"should": [
{"bool": {
"must_not": {
"exists": {
"field": "release_time"
}
}
}},
{"bool": {
"must": [
{"exists": {"field": "release_time"}},
{'range': {key: {ops[operator]: v for operator, v in release_times}}},
]}},
]}
}
)
elif many:
query['must'].append({"terms": {key: value}})
else:
if key == 'fee_amount':
value = str(Decimal(value)*1000)
query['must'].append({"term": {key: {"value": value}}})
elif key == 'not_channel_ids':
for channel_id in value:
query['must_not'].append({"term": {'channel_id.keyword': channel_id}})
query['must_not'].append({"term": {'_id': channel_id}})
elif key == 'channel_ids':
query['must'].append({"terms": {'channel_id.keyword': value}})
elif key == 'claim_ids':
query['must'].append({"terms": {'claim_id.keyword': value}})
elif key == 'media_types':
query['must'].append({"terms": {'media_type.keyword': value}})
elif key == 'any_languages':
query['must'].append({"terms": {'languages': clean_tags(value)}})
elif key == 'any_languages':
query['must'].append({"terms": {'languages': value}})
elif key == 'all_languages':
query['must'].extend([{"term": {'languages': tag}} for tag in value])
elif key == 'any_tags':
query['must'].append({"terms": {'tags.keyword': clean_tags(value)}})
elif key == 'all_tags':
query['must'].extend([{"term": {'tags.keyword': tag}} for tag in clean_tags(value)])
elif key == 'not_tags':
query['must_not'].extend([{"term": {'tags.keyword': tag}} for tag in clean_tags(value)])
elif key == 'not_claim_id':
query['must_not'].extend([{"term": {'claim_id.keyword': cid}} for cid in value])
elif key == 'limit_claims_per_channel':
collapse = ('channel_id.keyword', value)
if kwargs.get('has_channel_signature'):
query['must'].append({"exists": {"field": "signature"}})
if 'signature_valid' in kwargs:
query['must'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}})
elif 'signature_valid' in kwargs:
query['must'].append(
{"bool":
{"should": [
{"bool": {"must_not": {"exists": {"field": "signature"}}}},
{"bool" : {"must" : {"term": {"is_signature_valid": bool(kwargs["signature_valid"])}}}}
]}
}
)
if 'has_source' in kwargs:
is_stream_or_repost_terms = {"terms": {"claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']]}}
query['must'].append(
{"bool":
{"should": [
{"bool": # when is_stream_or_repost AND has_source
{"must": [
{"match": {"has_source": kwargs['has_source']}},
is_stream_or_repost_terms,
]
},
},
{"bool": # when not is_stream_or_repost
{"must_not": is_stream_or_repost_terms}
},
{"bool": # when reposted_claim_type wouldn't have source
{"must_not":
[
{"term": {"reposted_claim_type": CLAIM_TYPES['stream']}}
],
"must":
[
{"term": {"claim_type": CLAIM_TYPES['repost']}}
]
}
}
]}
}
)
if kwargs.get('text'):
query['must'].append(
{"simple_query_string":
{"query": kwargs["text"], "fields": [
"claim_name^4", "channel_name^8", "title^1", "description^.5", "author^1", "tags^.5"
]}})
query = {
"_source": {"excludes": ["description", "title"]},
'query': {'bool': query},
"sort": [],
}
if "limit" in kwargs:
query["size"] = kwargs["limit"]
if 'offset' in kwargs:
query["from"] = kwargs["offset"]
if 'order_by' in kwargs:
if isinstance(kwargs["order_by"], str):
kwargs["order_by"] = [kwargs["order_by"]]
for value in kwargs['order_by']:
if 'trending_group' in value:
# fixme: trending_mixed is 0 for all records on variable decay, making sort slow.
continue
is_asc = value.startswith('^')
value = value[1:] if is_asc else value
value = REPLACEMENTS.get(value, value)
if value in TEXT_FIELDS:
value += '.keyword'
query['sort'].append({value: "asc" if is_asc else "desc"})
if collapse:
query["collapse"] = {
"field": collapse[0],
"inner_hits": {
"name": collapse[0],
"size": collapse[1],
"sort": query["sort"]
}
}
return query
def expand_result(results):
inner_hits = []
expanded = []
for result in results:
if result.get("inner_hits"):
for _, inner_hit in result["inner_hits"].items():
inner_hits.extend(inner_hit["hits"]["hits"])
continue
result = result['_source']
result['claim_hash'] = bytes.fromhex(result['claim_id'])[::-1]
if result['reposted_claim_id']:
result['reposted_claim_hash'] = bytes.fromhex(result['reposted_claim_id'])[::-1]
else:
result['reposted_claim_hash'] = None
result['channel_hash'] = bytes.fromhex(result['channel_id'])[::-1] if result['channel_id'] else None
result['txo_hash'] = bytes.fromhex(result['tx_id'])[::-1] + struct.pack('<I', result['tx_nout'])
result['tx_hash'] = bytes.fromhex(result['tx_id'])[::-1]
result['reposted'] = result.pop('repost_count')
result['signature_valid'] = result.pop('is_signature_valid')
# result['normalized'] = result.pop('normalized_name')
# if result['censoring_channel_hash']:
# result['censoring_channel_hash'] = unhexlify(result['censoring_channel_hash'])[::-1]
expanded.append(result)
if inner_hits:
return expand_result(inner_hits)
return expanded
class ResultCacheItem:
__slots__ = '_result', 'lock', 'has_result'

194
hub/herald/service.py Normal file
View file

@ -0,0 +1,194 @@
import time
import typing
import asyncio
from prometheus_client import Counter
from hub import PROMETHEUS_NAMESPACE
from hub.scribe.daemon import LBCDaemon
from hub.herald.session import SessionManager
from hub.herald.mempool import HubMemPool
from hub.herald.udp import StatusServer
from hub.herald.db import HeraldDB
from hub.herald.search import SearchIndex
from hub.service import BlockchainReaderService
from hub.notifier_protocol import ElasticNotifierClientProtocol
if typing.TYPE_CHECKING:
from hub.herald.env import ServerEnv
NAMESPACE = f"{PROMETHEUS_NAMESPACE}_hub"
class HubServerService(BlockchainReaderService):
interrupt_count_metric = Counter("interrupt", "Number of interrupted queries", namespace=NAMESPACE)
def __init__(self, env: 'ServerEnv'):
super().__init__(env, 'lbry-reader', thread_workers=max(1, env.max_query_workers), thread_prefix='hub-worker')
self.env = env
self.notifications_to_send = []
self.mempool_notifications = set()
self.status_server = StatusServer()
self.daemon = LBCDaemon(env.coin, env.daemon_url, daemon_ca_path=env.daemon_ca_path) # only needed for broadcasting txs
self.mempool = HubMemPool(self.env.coin, self.db)
self.search_index = SearchIndex(
self.db, self.env.es_index_prefix, self.env.database_query_timeout,
elastic_services=self.env.elastic_services,
timeout_counter=self.interrupt_count_metric
)
self.session_manager = SessionManager(
env, self.db, self.mempool, self.daemon, self.search_index,
self.shutdown_event,
on_available_callback=self.status_server.set_available,
on_unavailable_callback=self.status_server.set_unavailable
)
self.mempool.session_manager = self.session_manager
self.es_notifications = asyncio.Queue()
self.es_notification_client = ElasticNotifierClientProtocol(
self.es_notifications, self.env.elastic_services
)
self.synchronized = asyncio.Event()
self._es_height = None
self._es_block_hash = None
def open_db(self):
env = self.env
self.db = HeraldDB(
env.coin, env.db_dir, self.secondary_name, -1, env.reorg_limit,
env.cache_all_tx_hashes, blocking_channel_ids=env.blocking_channel_ids,
filtering_channel_ids=env.filtering_channel_ids, executor=self._executor,
index_address_status=env.index_address_status, merkle_cache_size=env.merkle_cache_size,
tx_cache_size=env.tx_cache_size
)
def clear_caches(self):
self.session_manager.clear_caches()
# self.clear_search_cache()
# self.mempool.notified_mempool_txs.clear()
def clear_search_cache(self):
self.search_index.clear_caches()
def advance(self, height: int):
super().advance(height)
touched_hashXs = self.db.prefix_db.touched_hashX.get(height).touched_hashXs
self.session_manager.update_history_caches(touched_hashXs)
self.notifications_to_send.append((set(touched_hashXs), height))
def unwind(self):
self.session_manager.hashX_raw_history_cache.clear()
self.session_manager.hashX_history_cache.clear()
prev_count = self.db.tx_counts.pop()
tx_count = self.db.tx_counts[-1]
self.db.block_hashes.pop()
current_count = prev_count
for _ in range(prev_count - tx_count):
if current_count in self.session_manager.history_tx_info_cache:
self.session_manager.history_tx_info_cache.pop(current_count)
current_count -= 1
if self.db._cache_all_tx_hashes:
for _ in range(prev_count - tx_count):
tx_hash = self.db.tx_num_mapping.pop(self.db.total_transactions.pop())
if tx_hash in self.db.tx_cache:
self.db.tx_cache.pop(tx_hash)
assert len(self.db.total_transactions) == tx_count, f"{len(self.db.total_transactions)} vs {tx_count}"
self.db.merkle_cache.clear()
def _detect_changes(self):
super()._detect_changes()
start = time.perf_counter()
self.mempool_notifications.update(self.mempool.refresh())
self.mempool.mempool_process_time_metric.observe(time.perf_counter() - start)
async def poll_for_changes(self):
await super().poll_for_changes()
if self.db.db_height <= 0:
return
self.status_server.set_height(self.db.db_height, self.db.db_tip)
if self.notifications_to_send:
for (touched, height) in self.notifications_to_send:
await self.mempool.on_block(touched, height)
self.log.info("reader advanced to %i", height)
if self._es_height == self.db.db_height:
self.synchronized.set()
if self.mempool_notifications:
await self.mempool.on_mempool(
set(self.mempool.touched_hashXs), self.mempool_notifications, self.db.db_height
)
self.mempool_notifications.clear()
self.notifications_to_send.clear()
async def receive_es_notifications(self, synchronized: asyncio.Event):
synchronized.set()
try:
while True:
self._es_height, self._es_block_hash = await self.es_notifications.get()
self.clear_search_cache()
if self.last_state and self._es_block_hash == self.last_state.tip:
self.synchronized.set()
self.log.info("es and reader are in sync at block %i", self.last_state.height)
else:
self.log.info("es and reader are not yet in sync (block %s vs %s)", self._es_height,
self.db.db_height)
finally:
self.log.warning("closing es sync notification loop at %s", self._es_height)
self.es_notification_client.close()
async def failover_elastic_services(self, synchronized: asyncio.Event):
first_connect = True
if not self.es_notification_client.lost_connection.is_set():
synchronized.set()
while True:
try:
await self.es_notification_client.lost_connection.wait()
if not first_connect:
self.log.warning("lost connection to scribe-elastic-sync notifier (%s:%i)",
self.es_notification_client.host, self.es_notification_client.port)
await self.es_notification_client.connect()
first_connect = False
synchronized.set()
self.log.info("connected to es notifier on %s:%i", self.es_notification_client.host,
self.es_notification_client.port)
await self.search_index.start()
except Exception as e:
if not isinstance(e, asyncio.CancelledError):
self.log.warning("lost connection to scribe-elastic-sync notifier")
await self.search_index.stop()
self.search_index.clear_caches()
if len(self.env.elastic_services) > 1:
self.env.elastic_services.rotate(-1)
self.log.warning("attempting to failover to %s:%i", self.es_notification_client.host,
self.es_notification_client.port)
await asyncio.sleep(1)
else:
self.log.warning("waiting 30s for scribe-elastic-sync notifier to become available (%s:%i)",
self.es_notification_client.host, self.es_notification_client.port)
await asyncio.sleep(30)
else:
self.log.info("stopping the notifier loop")
raise e
async def start_status_server(self):
if self.env.udp_port and int(self.env.udp_port):
await self.status_server.start(
0, bytes.fromhex(self.env.coin.GENESIS_HASH)[::-1], self.env.country,
self.env.host, self.env.udp_port, self.env.allow_lan_udp
)
def _iter_start_tasks(self):
yield self.start_status_server()
yield self.start_cancellable(self.receive_es_notifications)
yield self.start_cancellable(self.failover_elastic_services)
yield self.start_cancellable(self.mempool.send_notifications_forever)
yield self.start_cancellable(self.refresh_blocks_forever)
yield self.finished_initial_catch_up.wait()
self.block_count_metric.set(self.last_state.height)
yield self.start_prometheus()
yield self.start_cancellable(self.session_manager.serve, self.mempool)
def _iter_stop_tasks(self):
yield self.stop_prometheus()
yield self.status_server.stop()
yield self._stop_cancellable_tasks()
yield self.session_manager.search_index.stop()
yield self.daemon.close()

View file

@ -1,7 +1,8 @@
import os
import ssl
import sys
import math
import time
import errno
import codecs
import typing
import asyncio
@ -15,23 +16,26 @@ from contextlib import suppress
from functools import partial
from elasticsearch import ConnectionTimeout
from prometheus_client import Counter, Info, Histogram, Gauge
from scribe.schema.result import Outputs
from scribe.error import ResolveCensoredError, TooManyClaimSearchParametersError
from scribe import __version__, PROMETHEUS_NAMESPACE
from scribe.hub import PROTOCOL_MIN, PROTOCOL_MAX, HUB_PROTOCOL_VERSION
from scribe.build_info import BUILD, COMMIT_HASH, DOCKER_TAG
from scribe.elasticsearch import SearchIndex
from scribe.common import sha256, hash_to_hex_str, hex_str_to_hash, HASHX_LEN, version_string, formatted_time
from scribe.common import protocol_version, RPCError, DaemonError, TaskGroup, HISTOGRAM_BUCKETS
from scribe.hub.jsonrpc import JSONRPCAutoDetect, JSONRPCConnection, JSONRPCv2, JSONRPC
from scribe.hub.common import BatchRequest, ProtocolError, Request, Batch, Notification
from scribe.hub.framer import NewlineFramer
from hub.schema.result import Outputs
from hub.error import ResolveCensoredError, TooManyClaimSearchParametersError
from hub import __version__, PROMETHEUS_NAMESPACE
from hub.herald import PROTOCOL_MIN, PROTOCOL_MAX, HUB_PROTOCOL_VERSION
from hub.build_info import BUILD, COMMIT_HASH, DOCKER_TAG
from hub.herald.search import SearchIndex
from hub.common import sha256, hash_to_hex_str, hex_str_to_hash, HASHX_LEN, version_string, formatted_time, SIZE_BUCKETS
from hub.common import protocol_version, RPCError, DaemonError, TaskGroup, HISTOGRAM_BUCKETS, asyncify_for_loop
from hub.common import LRUCacheWithMetrics, LFUCacheWithMetrics, LargestValueCache
from hub.herald.jsonrpc import JSONRPCAutoDetect, JSONRPCConnection, JSONRPCv2, JSONRPC
from hub.herald.common import BatchRequest, ProtocolError, Request, Batch, Notification
from hub.herald.framer import NewlineFramer
if typing.TYPE_CHECKING:
from scribe.db import HubDB
from scribe.env import Env
from scribe.blockchain.daemon import LBCDaemon
from scribe.hub.mempool import HubMemPool
from hub.db import SecondaryDB
from hub.herald.env import ServerEnv
from hub.scribe.daemon import LBCDaemon
from hub.herald.mempool import HubMemPool
PYTHON_VERSION = sys.version_info.major, sys.version_info.minor
TypedDict = dict if PYTHON_VERSION < (3, 8) else typing.TypedDict
BAD_REQUEST = 1
DAEMON_ERROR = 2
@ -42,6 +46,11 @@ SignatureInfo = namedtuple('SignatureInfo', 'min_args max_args '
'required_names other_names')
class CachedAddressHistoryItem(TypedDict):
tx_hash: str
height: int
def scripthash_to_hashX(scripthash: str) -> bytes:
try:
bin_hash = hex_str_to_hash(scripthash)
@ -128,12 +137,11 @@ class SessionManager:
session_count_metric = Gauge("session_count", "Number of connected client sessions", namespace=NAMESPACE,
labelnames=("version",))
request_count_metric = Counter("requests_count", "Number of requests received", namespace=NAMESPACE,
labelnames=("method", "version"))
labelnames=("method",))
tx_request_count_metric = Counter("requested_transaction", "Number of transactions requested", namespace=NAMESPACE)
tx_replied_count_metric = Counter("replied_transaction", "Number of transactions responded", namespace=NAMESPACE)
urls_to_resolve_count_metric = Counter("urls_to_resolve", "Number of urls to resolve", namespace=NAMESPACE)
resolved_url_count_metric = Counter("resolved_url", "Number of resolved urls", namespace=NAMESPACE)
interrupt_count_metric = Counter("interrupt", "Number of interrupted queries", namespace=NAMESPACE)
db_operational_error_metric = Counter(
"operational_error", "Number of queries that raised operational errors", namespace=NAMESPACE
)
@ -146,7 +154,6 @@ class SessionManager:
pending_query_metric = Gauge(
"pending_queries_count", "Number of pending and running sqlite queries", namespace=NAMESPACE
)
client_version_metric = Counter(
"clients", "Number of connections received per client version",
namespace=NAMESPACE, labelnames=("version",)
@ -155,6 +162,14 @@ class SessionManager:
"address_history", "Time to fetch an address history",
namespace=NAMESPACE, buckets=HISTOGRAM_BUCKETS
)
address_subscription_metric = Gauge(
"address_subscriptions", "Number of subscribed addresses",
namespace=NAMESPACE
)
address_history_size_metric = Histogram(
"history_size", "Sizes of histories for subscribed addresses",
namespace=NAMESPACE, buckets=SIZE_BUCKETS
)
notifications_in_flight_metric = Gauge(
"notifications_in_flight", "Count of notifications in flight",
namespace=NAMESPACE
@ -164,8 +179,8 @@ class SessionManager:
namespace=NAMESPACE, buckets=HISTOGRAM_BUCKETS
)
def __init__(self, env: 'Env', db: 'HubDB', mempool: 'HubMemPool',
daemon: 'LBCDaemon', shutdown_event: asyncio.Event,
def __init__(self, env: 'ServerEnv', db: 'SecondaryDB', mempool: 'HubMemPool',
daemon: 'LBCDaemon', search_index: 'SearchIndex', shutdown_event: asyncio.Event,
on_available_callback: typing.Callable[[], None], on_unavailable_callback: typing.Callable[[], None]):
env.max_send = max(350000, env.max_send)
self.env = env
@ -174,6 +189,7 @@ class SessionManager:
self.on_unavailable_callback = on_unavailable_callback
self.daemon = daemon
self.mempool = mempool
self.search_index = search_index
self.shutdown_event = shutdown_event
self.logger = logging.getLogger(__name__)
self.servers: typing.Dict[str, asyncio.AbstractServer] = {}
@ -183,32 +199,65 @@ class SessionManager:
self.cur_group = SessionGroup(0)
self.txs_sent = 0
self.start_time = time.time()
self.history_cache = {}
self.resolve_outputs_cache = {}
self.resolve_cache = {}
self.resolve_cache = LRUCacheWithMetrics(
env.resolved_url_cache_size, metric_name='resolved_url', namespace=NAMESPACE
)
self.notified_height: typing.Optional[int] = None
# Cache some idea of room to avoid recounting on each subscription
self.subs_room = 0
self.protocol_class = LBRYElectrumX
self.session_event = Event()
# Search index
self.search_index = SearchIndex(
self.db, self.env.es_index_prefix, self.env.database_query_timeout,
elastic_host=env.elastic_host, elastic_port=env.elastic_port
)
self.running = False
# hashX: List[int]
self.hashX_raw_history_cache = LFUCacheWithMetrics(env.hashX_history_cache_size, metric_name='raw_history', namespace=NAMESPACE)
# hashX: List[CachedAddressHistoryItem]
self.hashX_history_cache = LargestValueCache(env.largest_hashX_history_cache_size)
# tx_num: Tuple[txid, height]
self.history_tx_info_cache = LFUCacheWithMetrics(env.history_tx_cache_size, metric_name='history_tx', namespace=NAMESPACE)
def clear_caches(self):
self.history_cache.clear()
self.resolve_outputs_cache.clear()
self.resolve_cache.clear()
def update_history_caches(self, touched_hashXs: typing.List[bytes]):
update_history_cache = {}
for hashX in set(touched_hashXs):
history_tx_nums = None
# if the history is the raw_history_cache, update it
# TODO: use a reversed iterator for this instead of rescanning it all
if hashX in self.hashX_raw_history_cache:
self.hashX_raw_history_cache[hashX] = history_tx_nums = self.db._read_history(hashX, None)
# if it's in hashX_history_cache, prepare to update it in a batch
if hashX in self.hashX_history_cache:
full_cached = self.hashX_history_cache[hashX]
if history_tx_nums is None:
history_tx_nums = self.db._read_history(hashX, None)
new_txs = history_tx_nums[len(full_cached):]
update_history_cache[hashX] = full_cached, new_txs
if update_history_cache:
# get the set of new tx nums that were touched in all of the new histories to be cached
total_tx_nums = set()
for _, new_txs in update_history_cache.values():
total_tx_nums.update(new_txs)
total_tx_nums = list(total_tx_nums)
# collect the total new tx infos
referenced_new_txs = {
tx_num: (CachedAddressHistoryItem(
tx_hash=tx_hash[::-1].hex(), height=bisect_right(self.db.tx_counts, tx_num)
)) for tx_num, tx_hash in zip(total_tx_nums, self.db._get_tx_hashes(total_tx_nums))
}
# update the cached history lists
get_referenced = referenced_new_txs.__getitem__
for hashX, (full, new_txs) in update_history_cache.items():
append_to_full = full.append
for tx_num in new_txs:
append_to_full(get_referenced(tx_num))
async def _start_server(self, kind, *args, **kw_args):
loop = asyncio.get_event_loop()
if kind == 'TCP':
protocol_class = LBRYElectrumX
protocol_class = self.protocol_class
else:
raise ValueError(kind)
protocol_factory = partial(protocol_class, self, kind)
@ -216,9 +265,11 @@ class SessionManager:
host, port = args[:2]
try:
self.servers[kind] = await loop.create_server(protocol_factory, *args, **kw_args)
except OSError as e: # don't suppress CancelledError
self.logger.error(f'{kind} server failed to listen on {host}:'
f'{port:d} :{e!r}')
except Exception as e:
if not isinstance(e, asyncio.CancelledError):
self.logger.error(f'{kind} server failed to listen on '
f'{host}:{port:d} : {e!r}')
raise
else:
self.logger.info(f'{kind} server listening on {host}:{port:d}')
@ -228,12 +279,19 @@ class SessionManager:
"""
env = self.env
host = env.cs_host()
if env.tcp_port is not None:
await self._start_server('TCP', host, env.tcp_port)
if env.ssl_port is not None:
sslc = ssl.SSLContext(ssl.PROTOCOL_TLS)
sslc.load_cert_chain(env.ssl_certfile, keyfile=env.ssl_keyfile)
await self._start_server('SSL', host, env.ssl_port, ssl=sslc)
if env.tcp_port is None:
return
started = False
while not started:
try:
await self._start_server('TCP', host, env.tcp_port)
started = True
except OSError as e:
if e.errno is errno.EADDRINUSE:
await asyncio.sleep(3)
continue
raise
async def _close_servers(self, kinds):
"""Close the servers of the given kinds (TCP etc.)."""
@ -596,25 +654,57 @@ class SessionManager:
self.txs_sent += 1
return hex_hash
async def limited_history(self, hashX):
"""A caching layer."""
if hashX not in self.history_cache:
# History DoS limit. Each element of history is about 99
# bytes when encoded as JSON. This limits resource usage
# on bloated history requests, and uses a smaller divisor
# so large requests are logged before refusing them.
limit = self.env.max_send // 97
self.history_cache[hashX] = await self.db.limited_history(hashX, limit=limit)
return self.history_cache[hashX]
async def _cached_raw_history(self, hashX: bytes, limit: typing.Optional[int] = None):
tx_nums = self.hashX_raw_history_cache.get(hashX)
if tx_nums is None:
self.hashX_raw_history_cache[hashX] = tx_nums = await self.db.read_history(hashX, limit)
return tx_nums
async def cached_confirmed_history(self, hashX: bytes,
limit: typing.Optional[int] = None) -> typing.List[CachedAddressHistoryItem]:
cached_full_history = self.hashX_history_cache.get(hashX)
# return the cached history
if cached_full_history is not None:
self.address_history_size_metric.observe(len(cached_full_history))
return cached_full_history
# return the history and update the caches
tx_nums = await self._cached_raw_history(hashX, limit)
needed_tx_infos = []
append_needed_tx_info = needed_tx_infos.append
tx_infos = {}
for cnt, tx_num in enumerate(tx_nums): # determine which tx_hashes are cached and which we need to look up
cached = self.history_tx_info_cache.get(tx_num)
if cached is not None:
tx_infos[tx_num] = cached
else:
append_needed_tx_info(tx_num)
if cnt % 1000 == 0:
await asyncio.sleep(0)
if needed_tx_infos: # request all the needed tx hashes in one batch, cache the txids and heights
for cnt, (tx_num, tx_hash) in enumerate(zip(needed_tx_infos, await self.db.get_tx_hashes(needed_tx_infos))):
hist = CachedAddressHistoryItem(tx_hash=tx_hash[::-1].hex(), height=bisect_right(self.db.tx_counts, tx_num))
tx_infos[tx_num] = self.history_tx_info_cache[tx_num] = hist
if cnt % 1000 == 0:
await asyncio.sleep(0)
# ensure the ordering of the txs
history = []
history_append = history.append
for cnt, tx_num in enumerate(tx_nums):
history_append(tx_infos[tx_num])
if cnt % 1000 == 0:
await asyncio.sleep(0)
self.hashX_history_cache[hashX] = history
self.address_history_size_metric.observe(len(history))
return history
def _notify_peer(self, peer):
notify_tasks = [
session.send_notification('blockchain.peers.subscribe', [peer])
for session in self.sessions.values() if session.subscribe_peers
]
if notify_tasks:
self.logger.info(f'notify {len(notify_tasks)} sessions of new peers')
asyncio.create_task(asyncio.wait(notify_tasks))
notify_count = 0
for session in self.sessions.values():
if session.subscribe_peers:
notify_count += 1
session.send_notification('blockchain.peers.subscribe', [peer])
if notify_count:
self.logger.info(f'notify {notify_count} sessions of new peers')
def add_session(self, session):
self.sessions[id(session)] = session
@ -627,6 +717,7 @@ class SessionManager:
def remove_session(self, session):
"""Remove a session from our sessions list if there."""
session_id = id(session)
self.address_subscription_metric.dec(len(session.hashX_subs))
for hashX in session.hashX_subs:
sessions = self.hashx_subscriptions_by_session[hashX]
sessions.remove(session_id)
@ -648,9 +739,9 @@ class LBRYElectrumX(asyncio.Protocol):
MAX_CHUNK_SIZE = 40960
session_counter = itertools.count()
RESPONSE_TIMES = Histogram("response_time", "Response times", namespace=NAMESPACE,
labelnames=("method", "version"), buckets=HISTOGRAM_BUCKETS)
labelnames=("method",), buckets=HISTOGRAM_BUCKETS)
NOTIFICATION_COUNT = Counter("notification", "Number of notifications sent (for subscriptions)",
namespace=NAMESPACE, labelnames=("method", "version"))
namespace=NAMESPACE, labelnames=("method",))
REQUEST_ERRORS_COUNT = Counter(
"request_error", "Number of requests that returned errors", namespace=NAMESPACE,
labelnames=("method", "version")
@ -698,7 +789,6 @@ class LBRYElectrumX(asyncio.Protocol):
self.kind = kind # 'RPC', 'TCP' etc.
self.coin = self.env.coin
self.anon_logs = self.env.anon_logs
self.txs_sent = 0
self.log_me = False
self.daemon_request = self.session_manager.daemon_request
@ -785,19 +875,6 @@ class LBRYElectrumX(asyncio.Protocol):
def default_framer(self):
return NewlineFramer(self.env.max_receive)
def peer_address_str(self, *, for_log=True):
"""Returns the peer's IP address and port as a human-readable
string, respecting anon logs if the output is for a log."""
if for_log and self.anon_logs:
return 'xx.xx.xx.xx:xx'
if not self._address:
return 'unknown'
ip_addr_str, port = self._address[:2]
if ':' in ip_addr_str:
return f'[{ip_addr_str}]:{port}'
else:
return f'{ip_addr_str}:{port}'
def toggle_logging(self):
self.log_me = not self.log_me
@ -811,8 +888,6 @@ class LBRYElectrumX(asyncio.Protocol):
"""Handle an incoming request. ElectrumX doesn't receive
notifications from client sessions.
"""
self.session_manager.request_count_metric.labels(method=request.method, version=self.client_version).inc()
if isinstance(request, Request):
method = request.method
if method == 'blockchain.block.get_chunk':
@ -891,6 +966,7 @@ class LBRYElectrumX(asyncio.Protocol):
raise RPCError(JSONRPC.METHOD_NOT_FOUND, f'unknown method "{method}"')
else:
raise ValueError
self.session_manager.request_count_metric.labels(method=request.method).inc()
if isinstance(request.args, dict):
return await coro(**request.args)
return await coro(*request.args)
@ -999,10 +1075,7 @@ class LBRYElectrumX(asyncio.Protocol):
'internal server error')
if isinstance(request, Request):
message = request.send_result(result)
self.RESPONSE_TIMES.labels(
method=request.method,
version=self.client_version
).observe(time.perf_counter() - start)
self.RESPONSE_TIMES.labels(method=request.method).observe(time.perf_counter() - start)
if message:
await self._send_message(message)
if isinstance(result, Exception):
@ -1029,26 +1102,29 @@ class LBRYElectrumX(asyncio.Protocol):
raise result
return result
async def send_notification(self, method, args=()) -> bool:
async def _send_notification(self, method, args=()) -> bool:
"""Send an RPC notification over the network."""
message = self.connection.send_notification(Notification(method, args))
self.NOTIFICATION_COUNT.labels(method=method, version=self.client_version).inc()
self.NOTIFICATION_COUNT.labels(method=method).inc()
try:
await self._send_message(message)
return True
except asyncio.TimeoutError:
self.logger.info("timeout sending address notification to %s", self.peer_address_str(for_log=True))
self.logger.info(f"timeout sending address notification to {self._address[0]}:{self._address[1]}")
self.abort()
return False
async def send_notifications(self, notifications) -> bool:
def send_notification(self, method, args=()):
self._task_group.add(self._send_notification(method, args))
async def _send_notifications(self, notifications) -> bool:
"""Send an RPC notification over the network."""
message, _ = self.connection.send_batch(notifications)
try:
await self._send_message(message)
return True
except asyncio.TimeoutError:
self.logger.info("timeout sending address notification to %s", self.peer_address_str(for_log=True))
self.logger.info(f"timeout sending address notification to {self._address[0]}:{self._address[1]}")
self.abort()
return False
@ -1078,7 +1154,7 @@ class LBRYElectrumX(asyncio.Protocol):
"""Return the server features dictionary."""
min_str, max_str = cls.protocol_min_max_strings()
cls.cached_server_features.update({
'hosts': env.hosts_dict(),
'hosts': {},
'pruning': None,
'server_version': cls.version,
'protocol_min': min_str,
@ -1107,35 +1183,53 @@ class LBRYElectrumX(asyncio.Protocol):
return len(self.hashX_subs)
async def get_hashX_status(self, hashX: bytes):
return await self.loop.run_in_executor(self.db._executor, self.db.get_hashX_status, hashX)
if self.env.index_address_status:
return await self.db.get_hashX_status(hashX)
history = ''.join(
f"{tx_hash[::-1].hex()}:{height:d}:"
for tx_hash, height in await self.db.limited_history(hashX, limit=None)
) + self.mempool.mempool_history(hashX)
if not history:
return
status = sha256(history.encode())
return status.hex()
async def send_history_notifications(self, *hashXes: typing.Iterable[bytes]):
async def get_hashX_statuses(self, hashXes: typing.List[bytes]):
if self.env.index_address_status:
return await self.db.get_hashX_statuses(hashXes)
return [await self.get_hashX_status(hashX) for hashX in hashXes]
async def _send_history_notifications(self, hashXes: typing.List[bytes]):
notifications = []
for hashX in hashXes:
start = time.perf_counter()
statuses = await self.get_hashX_statuses(hashXes)
duration = time.perf_counter() - start
self.session_manager.address_history_metric.observe(duration)
start = time.perf_counter()
scripthash_notifications = 0
address_notifications = 0
for hashX, status in zip(hashXes, statuses):
alias = self.hashX_subs[hashX]
if len(alias) == 64:
method = 'blockchain.scripthash.subscribe'
scripthash_notifications += 1
else:
method = 'blockchain.address.subscribe'
start = time.perf_counter()
status = await self.get_hashX_status(hashX)
duration = time.perf_counter() - start
self.session_manager.address_history_metric.observe(duration)
notifications.append((method, (alias, status)))
if duration > 30:
self.logger.warning("slow history notification (%s) for '%s'", duration, alias)
start = time.perf_counter()
self.session_manager.notifications_in_flight_metric.inc()
for method, args in notifications:
self.NOTIFICATION_COUNT.labels(method=method, version=self.client_version).inc()
address_notifications += 1
notifications.append(Notification(method, (alias, status)))
if scripthash_notifications:
self.NOTIFICATION_COUNT.labels(method='blockchain.scripthash.subscribe',).inc(scripthash_notifications)
if address_notifications:
self.NOTIFICATION_COUNT.labels(method='blockchain.address.subscribe', ).inc(address_notifications)
self.session_manager.notifications_in_flight_metric.inc(len(notifications))
try:
await self.send_notifications(
Batch([Notification(method, (alias, status)) for (method, (alias, status)) in notifications])
)
await self._send_notifications(Batch(notifications))
self.session_manager.notifications_sent_metric.observe(time.perf_counter() - start)
finally:
self.session_manager.notifications_in_flight_metric.dec()
self.session_manager.notifications_in_flight_metric.dec(len(notifications))
def send_history_notifications(self, hashXes: typing.List[bytes]):
self._task_group.add(self._send_history_notifications(hashXes))
# def get_metrics_or_placeholder_for_api(self, query_name):
# """ Do not hold on to a reference to the metrics
@ -1184,7 +1278,7 @@ class LBRYElectrumX(asyncio.Protocol):
kwargs['channel_id'] = channel_claim.claim_hash.hex()
return await self.session_manager.search_index.cached_search(kwargs)
except ConnectionTimeout:
self.session_manager.interrupt_count_metric.inc()
self.session_manager.search_index.timeout_counter.inc()
raise RPCError(JSONRPC.QUERY_TIMEOUT, 'query timed out')
except TooManyClaimSearchParametersError as err:
await asyncio.sleep(2)
@ -1195,22 +1289,26 @@ class LBRYElectrumX(asyncio.Protocol):
self.session_manager.pending_query_metric.dec()
self.session_manager.executor_time_metric.observe(time.perf_counter() - start)
async def _cached_resolve_url(self, url):
if url not in self.session_manager.resolve_cache:
self.session_manager.resolve_cache[url] = await self.loop.run_in_executor(self.db._executor, self.db._resolve, url)
return self.session_manager.resolve_cache[url]
async def claimtrie_resolve(self, *urls) -> str:
sorted_urls = tuple(sorted(urls))
self.session_manager.urls_to_resolve_count_metric.inc(len(sorted_urls))
self.session_manager.urls_to_resolve_count_metric.inc(len(urls))
try:
if sorted_urls in self.session_manager.resolve_outputs_cache:
return self.session_manager.resolve_outputs_cache[sorted_urls]
rows, extra = [], []
resolved = {}
needed = defaultdict(list)
for idx, url in enumerate(urls):
cached = self.session_manager.resolve_cache.get(url)
if cached:
stream, channel, repost, reposted_channel = cached
resolved[url] = stream, channel, repost, reposted_channel
else:
needed[url].append(idx)
if needed:
resolved_needed = await self.db.batch_resolve_urls(list(needed))
for url, resolve_result in resolved_needed.items():
self.session_manager.resolve_cache[url] = resolve_result
resolved.update(resolved_needed)
for url in urls:
if url not in self.session_manager.resolve_cache:
self.session_manager.resolve_cache[url] = await self._cached_resolve_url(url)
stream, channel, repost, reposted_channel = self.session_manager.resolve_cache[url]
(stream, channel, repost, reposted_channel) = resolved[url]
if isinstance(channel, ResolveCensoredError):
rows.append(channel)
extra.append(channel.censor_row)
@ -1225,28 +1323,22 @@ class LBRYElectrumX(asyncio.Protocol):
extra.append(reposted_channel.censor_row)
elif channel and not stream:
rows.append(channel)
# print("resolved channel", channel.name.decode())
if repost:
extra.append(repost)
if reposted_channel:
extra.append(reposted_channel)
elif stream:
# print("resolved stream", stream.name.decode())
rows.append(stream)
if channel:
# print("and channel", channel.name.decode())
extra.append(channel)
if repost:
extra.append(repost)
if reposted_channel:
extra.append(reposted_channel)
await asyncio.sleep(0)
self.session_manager.resolve_outputs_cache[sorted_urls] = result = await self.loop.run_in_executor(
None, Outputs.to_base64, rows, extra
)
return result
return Outputs.to_base64(rows, extra)
finally:
self.session_manager.resolved_url_count_metric.inc(len(sorted_urls))
self.session_manager.resolved_url_count_metric.inc(len(urls))
async def get_server_height(self):
return self.db.db_height
@ -1360,6 +1452,8 @@ class LBRYElectrumX(asyncio.Protocol):
sessions.remove(id(self))
except KeyError:
pass
else:
self.session_manager.address_subscription_metric.dec()
if not sessions:
self.hashX_subs.pop(hashX, None)
@ -1396,11 +1490,12 @@ class LBRYElectrumX(asyncio.Protocol):
address: the address to subscribe to"""
if len(addresses) > 1000:
raise RPCError(BAD_REQUEST, f'too many addresses in subscription request: {len(addresses)}')
results = []
for address in addresses:
results.append(await self.hashX_subscribe(self.address_to_hashX(address), address))
await asyncio.sleep(0)
return results
hashXes = [item async for item in asyncify_for_loop((self.address_to_hashX(address) for address in addresses), 100)]
for hashX, alias in zip(hashXes, addresses):
self.hashX_subs[hashX] = alias
self.session_manager.hashx_subscriptions_by_session[hashX].add(id(self))
self.session_manager.address_subscription_metric.inc(len(addresses))
return await self.get_hashX_statuses(hashXes)
async def address_unsubscribe(self, address):
"""Unsubscribe an address.
@ -1430,10 +1525,8 @@ class LBRYElectrumX(asyncio.Protocol):
async def confirmed_and_unconfirmed_history(self, hashX):
# Note history is ordered but unconfirmed is unordered in e-s
history = await self.session_manager.limited_history(hashX)
conf = [{'tx_hash': hash_to_hex_str(tx_hash), 'height': height}
for tx_hash, height in history]
return conf + self.unconfirmed_history(hashX)
history = await self.session_manager.cached_confirmed_history(hashX)
return history + self.unconfirmed_history(hashX)
async def scripthash_get_history(self, scripthash):
"""Return the confirmed and unconfirmed history of a scripthash."""
@ -1455,6 +1548,7 @@ class LBRYElectrumX(asyncio.Protocol):
scripthash: the SHA256 hash of the script to subscribe to"""
hashX = scripthash_to_hashX(scripthash)
self.session_manager.address_subscription_metric.inc()
return await self.hashX_subscribe(hashX, scripthash)
async def scripthash_unsubscribe(self, scripthash: str):
@ -1572,7 +1666,8 @@ class LBRYElectrumX(asyncio.Protocol):
async def relayfee(self):
"""The minimum fee a low-priority tx must pay in order to be accepted
to the daemon's memory pool."""
return await self.daemon_request('relayfee')
# return await self.daemon_request('relayfee')
return 0.00001
async def estimatefee(self, number):
"""The estimated transaction fee per kilobyte to be paid for a
@ -1580,8 +1675,9 @@ class LBRYElectrumX(asyncio.Protocol):
number: the number of blocks
"""
number = non_negative_integer(number)
return await self.daemon_request('estimatefee', number)
# number = non_negative_integer(number)
# return await self.daemon_request('estimatefee', number)
return 0.00014601
async def ping(self):
"""Serves as a connection keep-alive mechanism and for the client to
@ -1668,14 +1764,15 @@ class LBRYElectrumX(asyncio.Protocol):
verbose: passed on to the daemon
"""
assert_tx_hash(txid)
if verbose not in (True, False):
raise RPCError(BAD_REQUEST, f'"verbose" must be a boolean')
verbose = bool(verbose)
tx_hash_bytes = bytes.fromhex(txid)[::-1]
raw_tx = await asyncio.get_event_loop().run_in_executor(None, self.db.get_raw_tx, tx_hash_bytes)
if raw_tx:
return raw_tx.hex()
return RPCError("No such mempool or blockchain transaction.")
if not verbose:
return raw_tx.hex()
return self.coin.transaction(raw_tx).as_dict(self.coin)
return RPCError(BAD_REQUEST, "No such mempool or blockchain transaction.")
def _get_merkle_branch(self, tx_hashes, tx_pos):
"""Return a merkle branch to a transaction.

View file

@ -3,8 +3,8 @@ import struct
from time import perf_counter
import logging
from typing import Optional, Tuple, NamedTuple
from scribe.schema.attrs import country_str_to_int, country_int_to_str
from scribe.common import LRUCache, is_valid_public_ipv4
from hub.schema.attrs import country_str_to_int, country_int_to_str
from hub.common import LRUCache, is_valid_public_ipv4
log = logging.getLogger(__name__)

View file

@ -2,6 +2,7 @@ import typing
import struct
import asyncio
import logging
from typing import Deque, Tuple
log = logging.getLogger(__name__)
@ -31,52 +32,39 @@ class ElasticNotifierProtocol(asyncio.Protocol):
class ElasticNotifierClientProtocol(asyncio.Protocol):
"""notifies the reader when ES has written updates"""
def __init__(self, notifications: asyncio.Queue, host: str, port: int):
def __init__(self, notifications: asyncio.Queue, notifier_hosts: Deque[Tuple[Tuple[str, int], Tuple[str, int]]]):
assert len(notifier_hosts) > 0, 'no elastic notifier clients given'
self.notifications = notifications
self.transport: typing.Optional[asyncio.Transport] = None
self.host = host
self.port = port
self._lost_connection = asyncio.Event()
self._lost_connection.set()
self._notifier_hosts = notifier_hosts
self.lost_connection = asyncio.Event()
self.lost_connection.set()
@property
def host(self):
return self._notifier_hosts[0][1][0]
@property
def port(self):
return self._notifier_hosts[0][1][1]
async def connect(self):
if self._lost_connection.is_set():
if self.lost_connection.is_set():
await asyncio.get_event_loop().create_connection(
lambda: self, self.host, self.port
)
async def maintain_connection(self, synchronized: asyncio.Event):
first_connect = True
if not self._lost_connection.is_set():
synchronized.set()
while True:
try:
await self._lost_connection.wait()
if not first_connect:
log.warning("lost connection to scribe-elastic-sync notifier")
await self.connect()
first_connect = False
synchronized.set()
log.info("connected to es notifier")
except Exception as e:
if not isinstance(e, asyncio.CancelledError):
log.warning("waiting 30s for scribe-elastic-sync notifier to become available (%s:%i)", self.host, self.port)
await asyncio.sleep(30)
else:
log.info("stopping the notifier loop")
raise e
def close(self):
if self.transport and not self.transport.is_closing():
self.transport.close()
def connection_made(self, transport):
self.transport = transport
self._lost_connection.clear()
self.lost_connection.clear()
def connection_lost(self, exc) -> None:
self.transport = None
self._lost_connection.set()
self.lost_connection.set()
def data_received(self, data: bytes) -> None:
try:

View file

@ -7,13 +7,13 @@ from string import ascii_letters
from decimal import Decimal, ROUND_UP
from google.protobuf.json_format import MessageToDict
from scribe.schema.base58 import Base58, b58_encode
from scribe.error import MissingPublishedFileError, EmptyPublishedFileError
from hub.schema.base58 import Base58, b58_encode
from hub.error import MissingPublishedFileError, EmptyPublishedFileError
from scribe.schema.mime_types import guess_media_type
from scribe.schema.base import Metadata, BaseMessageList
from scribe.schema.tags import normalize_tag
from scribe.schema.types.v2.claim_pb2 import (
from hub.schema.mime_types import guess_media_type
from hub.schema.base import Metadata, BaseMessageList
from hub.schema.tags import normalize_tag
from hub.schema.types.v2.claim_pb2 import (
Fee as FeeMessage,
Location as LocationMessage,
Language as LanguageMessage

View file

@ -8,7 +8,7 @@ from coincurve.utils import (
pem_to_der, lib as libsecp256k1, ffi as libsecp256k1_ffi
)
from coincurve.ecdsa import CDATA_SIG_LENGTH
from scribe.schema.base58 import Base58
from hub.schema.base58 import Base58
if (sys.version_info.major, sys.version_info.minor) > (3, 7):

View file

@ -11,15 +11,15 @@ from hachoir.core.log import log as hachoir_log
from hachoir.parser import createParser as binary_file_parser
from hachoir.metadata import extractMetadata as binary_file_metadata
from scribe.schema import compat
from scribe.schema.base import Signable
from scribe.schema.mime_types import guess_media_type, guess_stream_type
from scribe.schema.attrs import (
from hub.schema import compat
from hub.schema.base import Signable
from hub.schema.mime_types import guess_media_type, guess_stream_type
from hub.schema.attrs import (
Source, Playable, Dimmensional, Fee, Image, Video, Audio,
LanguageList, LocationList, ClaimList, ClaimReference, TagList
)
from scribe.schema.types.v2.claim_pb2 import Claim as ClaimMessage
from scribe.error import InputValueIsNoneError
from hub.schema.types.v2.claim_pb2 import Claim as ClaimMessage
from hub.error import InputValueIsNoneError
hachoir_log.use_print = False

View file

@ -3,9 +3,9 @@ from decimal import Decimal
from google.protobuf.message import DecodeError
from scribe.schema.types.v1.legacy_claim_pb2 import Claim as OldClaimMessage
from scribe.schema.types.v1.certificate_pb2 import KeyType
from scribe.schema.types.v1.fee_pb2 import Fee as FeeMessage
from hub.schema.types.v1.legacy_claim_pb2 import Claim as OldClaimMessage
from hub.schema.types.v1.certificate_pb2 import KeyType
from hub.schema.types.v1.fee_pb2 import Fee as FeeMessage
def from_old_json_schema(claim, payload: bytes):

View file

@ -1,6 +1,6 @@
from google.protobuf.message import DecodeError
from google.protobuf.json_format import MessageToDict
from scribe.schema.types.v2.purchase_pb2 import Purchase as PurchaseMessage
from hub.schema.types.v2.purchase_pb2 import Purchase as PurchaseMessage
from .attrs import ClaimReference

View file

@ -2,11 +2,11 @@ import base64
from typing import List, TYPE_CHECKING, Union, Optional, Dict, Set, Tuple
from itertools import chain
from scribe.error import ResolveCensoredError
from scribe.schema.types.v2.result_pb2 import Outputs as OutputsMessage
from scribe.schema.types.v2.result_pb2 import Error as ErrorMessage
from hub.error import ResolveCensoredError
from hub.schema.types.v2.result_pb2 import Outputs as OutputsMessage
from hub.schema.types.v2.result_pb2 import Error as ErrorMessage
if TYPE_CHECKING:
from scribe.db.common import ResolveResult
from hub.db.common import ResolveResult
INVALID = ErrorMessage.Code.Name(ErrorMessage.INVALID)
NOT_FOUND = ErrorMessage.Code.Name(ErrorMessage.NOT_FOUND)
BLOCKED = ErrorMessage.Code.Name(ErrorMessage.BLOCKED)

View file

@ -1,5 +1,5 @@
from scribe.schema.base import Signable
from scribe.schema.types.v2.support_pb2 import Support as SupportMessage
from hub.schema.base import Signable
from hub.schema.types.v2.support_pb2 import Support as SupportMessage
class Support(Signable):

View file

1
hub/scribe/__init__.py Normal file
View file

@ -0,0 +1 @@
from hub.scribe.network import LBCTestNet, LBCRegTest, LBCMainNet

View file

@ -2,20 +2,20 @@ import os
import logging
import traceback
import argparse
from scribe.env import Env
from scribe.common import setup_logging
from scribe.blockchain.service import BlockchainProcessorService
from hub.common import setup_logging
from hub.scribe.env import BlockchainEnv
from hub.scribe.service import BlockchainProcessorService
def main():
parser = argparse.ArgumentParser(
prog='scribe'
)
Env.contribute_to_arg_parser(parser)
BlockchainEnv.contribute_to_arg_parser(parser)
args = parser.parse_args()
try:
env = Env.from_arg_parser(args)
env = BlockchainEnv.from_arg_parser(args)
setup_logging(os.path.join(env.db_dir, 'scribe.log'))
block_processor = BlockchainProcessorService(env)
block_processor.run()

View file

@ -3,12 +3,13 @@ import itertools
import json
import time
import logging
import ssl
from functools import wraps
import aiohttp
from prometheus_client import Gauge, Histogram
from scribe import PROMETHEUS_NAMESPACE
from scribe.common import LRUCacheWithMetrics, RPCError, DaemonError, WarmingUpError, WorkQueueFullError
from hub import PROMETHEUS_NAMESPACE
from hub.common import LRUCacheWithMetrics, RPCError, DaemonError, WarmingUpError, WorkQueueFullError
log = logging.getLogger(__name__)
@ -43,7 +44,7 @@ class LBCDaemon:
)
def __init__(self, coin, url, max_workqueue=10, init_retry=0.25,
max_retry=4.0):
max_retry=4.0, daemon_ca_path=None):
self.coin = coin
self.logger = logging.getLogger(__name__)
self.set_url(url)
@ -54,9 +55,15 @@ class LBCDaemon:
self.max_retry = max_retry
self._height = None
self.available_rpcs = {}
self.connector = aiohttp.TCPConnector(ssl=False)
self._block_hash_cache = LRUCacheWithMetrics(100000)
self._block_cache = LRUCacheWithMetrics(2 ** 13, metric_name='block', namespace=NAMESPACE)
ssl_context = None if not daemon_ca_path else ssl.create_default_context(
purpose=ssl.Purpose.CLIENT_AUTH, capath=daemon_ca_path
)
if ssl_context:
self.connector = aiohttp.TCPConnector(ssl_context=ssl_context)
else:
self.connector = aiohttp.TCPConnector(ssl=False)
self._block_hash_cache = LRUCacheWithMetrics(1024, metric_name='block_hash', namespace=NAMESPACE)
self._block_cache = LRUCacheWithMetrics(64, metric_name='block', namespace=NAMESPACE)
async def close(self):
if self.connector:
@ -176,6 +183,8 @@ class LBCDaemon:
start = time.perf_counter()
def processor(result):
if result is None:
raise WarmingUpError
err = result['error']
if not err:
return result['result']
@ -200,6 +209,8 @@ class LBCDaemon:
start = time.perf_counter()
def processor(result):
if result is None:
raise WarmingUpError
errs = [item['error'] for item in result if item['error']]
if any(err.get('code') == self.WARMING_UP for err in errs):
raise WarmingUpError

128
hub/scribe/db.py Normal file
View file

@ -0,0 +1,128 @@
import hashlib
import asyncio
import array
import time
from typing import List
from concurrent.futures.thread import ThreadPoolExecutor
from bisect import bisect_right
from hub.common import ResumableSHA256
from hub.db import SecondaryDB
class PrimaryDB(SecondaryDB):
def __init__(self, coin, db_dir: str, reorg_limit: int = 200,
cache_all_tx_hashes: bool = False,
max_open_files: int = 64, blocking_channel_ids: List[str] = None,
filtering_channel_ids: List[str] = None, executor: ThreadPoolExecutor = None,
index_address_status=False, enforce_integrity=True):
super().__init__(coin, db_dir, '', max_open_files, reorg_limit, cache_all_tx_hashes,
blocking_channel_ids, filtering_channel_ids, executor, index_address_status,
enforce_integrity=enforce_integrity)
def _rebuild_hashX_status_index(self, start_height: int):
self.logger.warning("rebuilding the address status index...")
prefix_db = self.prefix_db
def hashX_iterator():
last_hashX = None
for k in prefix_db.hashX_history.iterate(deserialize_key=False, include_value=False):
hashX = k[1:12]
if last_hashX is None:
last_hashX = hashX
if last_hashX != hashX:
yield hashX
last_hashX = hashX
if last_hashX:
yield last_hashX
def hashX_status_from_history(history: bytes) -> ResumableSHA256:
tx_counts = self.tx_counts
hist_tx_nums = array.array('I')
hist_tx_nums.frombytes(history)
digest = ResumableSHA256()
digest.update(
b''.join(f'{tx_hash[::-1].hex()}:{bisect_right(tx_counts, tx_num)}:'.encode()
for tx_num, tx_hash in zip(
hist_tx_nums,
self.prefix_db.tx_hash.multi_get([(tx_num,) for tx_num in hist_tx_nums], deserialize_value=False)
))
)
return digest
start = time.perf_counter()
if start_height <= 0:
self.logger.info("loading all blockchain addresses, this will take a little while...")
hashXs = list({hashX for hashX in hashX_iterator()})
else:
self.logger.info("loading addresses since block %i...", start_height)
hashXs = set()
for touched in prefix_db.touched_hashX.iterate(start=(start_height,), stop=(self.db_height + 1,),
include_key=False):
hashXs.update(touched.touched_hashXs)
hashXs = list(hashXs)
self.logger.info(f"loaded {len(hashXs)} hashXs in {round(time.perf_counter() - start, 2)}s, "
f"now building the status index...")
op_cnt = 0
hashX_cnt = 0
for hashX in hashXs:
hashX_cnt += 1
key = prefix_db.hashX_status.pack_key(hashX)
history = b''.join(prefix_db.hashX_history.iterate(prefix=(hashX,), deserialize_value=False, include_key=False))
digester = hashX_status_from_history(history)
status = digester.digest()
existing_status = prefix_db.hashX_status.get(hashX, deserialize_value=False)
existing_digester = prefix_db.hashX_history_hasher.get(hashX)
if not existing_status:
prefix_db.stash_raw_put(key, status)
op_cnt += 1
else:
prefix_db.stash_raw_delete(key, existing_status)
prefix_db.stash_raw_put(key, status)
op_cnt += 2
if not existing_digester:
prefix_db.hashX_history_hasher.stash_put((hashX,), (digester,))
op_cnt += 1
else:
prefix_db.hashX_history_hasher.stash_delete((hashX,), existing_digester)
prefix_db.hashX_history_hasher.stash_put((hashX,), (digester,))
op_cnt += 2
if op_cnt > 100000:
prefix_db.unsafe_commit()
self.logger.info(f"wrote {hashX_cnt}/{len(hashXs)} hashXs statuses...")
op_cnt = 0
if op_cnt:
prefix_db.unsafe_commit()
self.logger.info(f"wrote {hashX_cnt}/{len(hashXs)} hashXs statuses...")
self._index_address_status = True
self.last_indexed_address_status_height = self.db_height
self.write_db_state()
self.prefix_db.unsafe_commit()
self.logger.info("finished indexing address statuses")
def rebuild_hashX_status_index(self, start_height: int):
return asyncio.get_event_loop().run_in_executor(self._executor, self._rebuild_hashX_status_index, start_height)
def apply_expiration_extension_fork(self):
# TODO: this can't be reorged
for k, v in self.prefix_db.claim_expiration.iterate():
self.prefix_db.claim_expiration.stash_delete(k, v)
self.prefix_db.claim_expiration.stash_put(
(bisect_right(self.tx_counts, k.tx_num) + self.coin.nExtendedClaimExpirationTime,
k.tx_num, k.position), v
)
self.prefix_db.unsafe_commit()
def write_db_state(self):
"""Write (UTXO) state to the batch."""
if self.db_height > 0:
existing = self.prefix_db.db_state.get()
self.prefix_db.db_state.stash_delete((), existing.expanded)
self.prefix_db.db_state.stash_put((), (
self.genesis_bytes, self.db_height, self.db_tx_count, self.db_tip,
self.utxo_flush_count, int(self.wall_time), self.catching_up, self._index_address_status, self.db_version,
self.hist_flush_count, self.hist_comp_flush_count, self.hist_comp_cursor,
self.es_sync_height, self.last_indexed_address_status_height
)
)

64
hub/scribe/env.py Normal file
View file

@ -0,0 +1,64 @@
from hub.env import Env
class BlockchainEnv(Env):
def __init__(self, db_dir=None, max_query_workers=None, chain=None, reorg_limit=None,
prometheus_port=None, cache_all_tx_hashes=None, blocking_channel_ids=None, filtering_channel_ids=None,
db_max_open_files=64, daemon_url=None, hashX_history_cache_size=None,
index_address_status=None, rebuild_address_status_from_height=None,
daemon_ca_path=None, history_tx_cache_size=None,
db_disable_integrity_checks=False):
super().__init__(db_dir, max_query_workers, chain, reorg_limit, prometheus_port, cache_all_tx_hashes,
blocking_channel_ids, filtering_channel_ids, index_address_status)
self.db_max_open_files = db_max_open_files
self.daemon_url = daemon_url if daemon_url is not None else self.required('DAEMON_URL')
self.hashX_history_cache_size = hashX_history_cache_size if hashX_history_cache_size is not None \
else self.integer('ADDRESS_HISTORY_CACHE_SIZE', 4096)
self.rebuild_address_status_from_height = rebuild_address_status_from_height \
if isinstance(rebuild_address_status_from_height, int) else -1
self.daemon_ca_path = daemon_ca_path if daemon_ca_path else None
self.history_tx_cache_size = history_tx_cache_size if history_tx_cache_size is not None else \
self.integer('HISTORY_TX_CACHE_SIZE', 4194304)
self.db_disable_integrity_checks = db_disable_integrity_checks
@classmethod
def contribute_to_arg_parser(cls, parser):
super().contribute_to_arg_parser(parser)
env_daemon_url = cls.default('DAEMON_URL', None)
parser.add_argument('--daemon_url', required=env_daemon_url is None,
help="URL for rpc from lbrycrd or lbcd, "
"<rpcuser>:<rpcpassword>@<lbrycrd rpc ip><lbrycrd rpc port>.",
default=env_daemon_url)
parser.add_argument('--daemon_ca_path', type=str, default='',
help='Path to the lbcd ca file, used for lbcd with ssl')
parser.add_argument('--db_disable_integrity_checks', action='store_true',
help="Disable verifications that no db operation breaks the ability to be rewound",
default=False)
parser.add_argument('--db_max_open_files', type=int, default=64,
help='This setting translates into the max_open_files option given to rocksdb. '
'A higher number will use more memory. Defaults to 64.')
parser.add_argument('--address_history_cache_size', type=int,
default=cls.integer('ADDRESS_HISTORY_CACHE_SIZE', 4096),
help="LRU cache size for address histories, used when processing new blocks "
"and when processing mempool updates. Can be set in env with "
"'ADDRESS_HISTORY_CACHE_SIZE'")
parser.add_argument('--rebuild_address_status_from_height', type=int, default=-1,
help="Rebuild address statuses, set to 0 to reindex all address statuses or provide a "
"block height to start reindexing from. Defaults to -1 (off).")
parser.add_argument('--history_tx_cache_size', type=int,
default=cls.integer('HISTORY_TX_CACHE_SIZE', 4194304),
help="Size of the lfu cache of txids in transaction histories for addresses. "
"Can be set in the env with 'HISTORY_TX_CACHE_SIZE'")
@classmethod
def from_arg_parser(cls, args):
return cls(
db_dir=args.db_dir, daemon_url=args.daemon_url, db_max_open_files=args.db_max_open_files,
max_query_workers=args.max_query_workers, chain=args.chain, reorg_limit=args.reorg_limit,
prometheus_port=args.prometheus_port, cache_all_tx_hashes=args.cache_all_tx_hashes,
index_address_status=args.index_address_statuses,
hashX_history_cache_size=args.address_history_cache_size,
rebuild_address_status_from_height=args.rebuild_address_status_from_height,
daemon_ca_path=args.daemon_ca_path, history_tx_cache_size=args.history_tx_cache_size,
db_disable_integrity_checks=args.db_disable_integrity_checks
)

View file

@ -2,10 +2,10 @@ import itertools
import attr
import typing
from collections import defaultdict
from scribe.blockchain.transaction.deserializer import Deserializer
from hub.scribe.transaction.deserializer import Deserializer
if typing.TYPE_CHECKING:
from scribe.db import HubDB
from hub.scribe.db import PrimaryDB
@attr.s(slots=True)
@ -27,7 +27,7 @@ class MemPoolTxSummary:
class MemPool:
def __init__(self, coin, db: 'HubDB'):
def __init__(self, coin, db: 'PrimaryDB'):
self.coin = coin
self._db = db
self.txs = {}

View file

@ -4,12 +4,13 @@ import typing
from typing import List
from hashlib import sha256
from decimal import Decimal
from scribe.schema.base58 import Base58
from scribe.schema.bip32 import PublicKey
from scribe.common import hash160, hash_to_hex_str, double_sha256
from scribe.blockchain.transaction import TxOutput, TxInput, Block
from scribe.blockchain.transaction.deserializer import Deserializer
from scribe.blockchain.transaction.script import OpCodes, P2PKH_script, P2SH_script, txo_script_parser
from yarl import URL
from hub.schema.base58 import Base58
from hub.schema.bip32 import PublicKey
from hub.common import hash160, hash_to_hex_str, double_sha256
from hub.scribe.transaction import TxOutput, TxInput, Block
from hub.scribe.transaction.deserializer import Deserializer
from hub.scribe.transaction.script import OpCodes, P2PKH_script, P2SH_script, txo_script_parser
HASHX_LEN = 11
@ -58,6 +59,9 @@ class LBCMainNet:
proportionalDelayFactor = 32
maxTakeoverDelay = 4032
averageBlockOffset = 160.31130145580738
genesisTime = 1466660400
@classmethod
def sanitize_url(cls, url):
# Remove surrounding ws and trailing /s
@ -69,6 +73,9 @@ class LBCMainNet:
url += f':{cls.RPC_PORT:d}'
if not url.startswith('http://') and not url.startswith('https://'):
url = 'http://' + url
obj = URL(url)
if not obj.user or not obj.password:
raise CoinError(f'unparseable <user>:<pass> in daemon URL: "{url}"')
return url + '/'
@classmethod

View file

@ -1,9 +1,9 @@
import asyncio
import logging
import typing
from hub.scribe.daemon import LBCDaemon, DaemonError
if typing.TYPE_CHECKING:
from scribe.blockchain.network import LBCMainNet
from scribe.blockchain.daemon import LBCDaemon
from hub.scribe.network import LBCMainNet
def chunks(items, size):
@ -42,8 +42,12 @@ class Prefetcher:
while True:
# Sleep a while if there is nothing to prefetch
await self.refill_event.wait()
if not await self._prefetch_blocks():
await asyncio.sleep(self.polling_delay)
try:
if not await self._prefetch_blocks():
await asyncio.sleep(self.polling_delay)
except DaemonError as err:
self.logger.warning("block prefetcher failed: '%s', retrying in 5 seconds", err)
await asyncio.sleep(5)
except Exception as e:
if not isinstance(e, asyncio.CancelledError):
self.logger.exception("block fetcher loop crashed")

File diff suppressed because it is too large Load diff

View file

@ -1,9 +1,11 @@
import sys
import functools
import typing
import time
from dataclasses import dataclass
from struct import Struct
from scribe.schema.claim import Claim
from hub.schema.claim import Claim
from hub.common import double_sha256
if (sys.version_info.major, sys.version_info.minor) > (3, 7):
cachedproperty = functools.cached_property
@ -84,6 +86,51 @@ class Tx(typing.NamedTuple):
flag: typing.Optional[int] = None
witness: typing.Optional[typing.List[typing.List[bytes]]] = None
def as_dict(self, coin):
txid = double_sha256(self.raw)[::-1].hex()
result = {
"txid": txid,
"hash": txid,
"version": self.version,
"size": len(self.raw),
"vsize": len(self.raw),
"weight": None, # FIXME: add this
"locktime": self.locktime,
"vin": [
{
"txid": txin.prev_hash[::-1].hex(),
"vout": txin.prev_idx,
"scriptSig": {
"asm": None, # FIXME: add this
"hex": txin.script.hex()
},
"sequence": txin.sequence
} for txin in self.inputs
],
"vout": [
{
"value": txo.value / 1E8,
"n": txo.nout,
"scriptPubKey": {
"asm": None, # FIXME: add this
"hex": txo.pk_script.hex(),
"reqSigs": 1, # FIXME: what if it isn't 1?
"type": "nonstandard" if (txo.is_support or txo.is_claim or txo.is_update) else "pubkeyhash" if txo.pubkey_hash else "scripthash",
"addresses": [
coin.claim_address_handler(txo)
]
}
} for txo in self.outputs
],
"hex": self.raw.hex()
}
for n, txo in enumerate(self.outputs):
if txo.is_support or txo.is_claim or txo.is_update:
result['vout'][n]["scriptPubKey"]["isclaim"] = txo.is_claim or txo.is_update
result['vout'][n]["scriptPubKey"]["issupport"] = txo.is_support
result['vout'][n]["scriptPubKey"]["subtype"] = "pubkeyhash" if txo.pubkey_hash else "scripthash"
return result
class TxInput(typing.NamedTuple):
prev_hash: bytes
@ -146,3 +193,21 @@ class Block(typing.NamedTuple):
raw: bytes
header: bytes
transactions: typing.List[Tx]
@property
def decoded_header(self):
header = self.header
version = int.from_bytes(header[:4], byteorder='little')
ts = time.gmtime(int.from_bytes(header[100:104], byteorder='little'))
timestamp = f"{ts.tm_year}-{ts.tm_mon}-{ts.tm_mday}"
bits = int.from_bytes(header[104:108], byteorder='little')
nonce = int.from_bytes(header[108:112], byteorder='little')
return {
'version': version,
'prev_block_hash': header[4:36][::-1].hex(),
'merkle_root': header[36:68][::-1].hex(),
'claim_trie_root': header[68:100][::-1].hex(),
'timestamp': timestamp,
'bits': bits,
'nonce': nonce
}

View file

@ -1,9 +1,9 @@
from scribe.common import double_sha256
from scribe.blockchain.transaction import (
from hub.common import double_sha256
from hub.scribe.transaction import (
unpack_le_int32_from, unpack_le_int64_from, unpack_le_uint16_from,
unpack_le_uint32_from, unpack_le_uint64_from, Tx, TxInput, TxOutput
)
from scribe.blockchain.transaction.script import txo_script_parser
from hub.scribe.transaction.script import txo_script_parser
class Deserializer:

Some files were not shown because too many files have changed in this diff Show more