diff --git a/README.md b/README.md index 9b299c32..d215af8e 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Example: export RPC_URL="http://111.111.111.111:8546" ``` +**Note**: mev-inspect-py currently requires an RPC of a full archive node with support for Erigon traces and receipts. Geth additions have been added to translate geth traces and receipts to Erigon ones and can be accessed using `--geth` flag. Next, start all services with: @@ -65,17 +66,19 @@ On first startup, you'll need to apply database migrations with: ### Inspect a single block Inspecting block [12914944](https://twitter.com/mevalphaleak/status/1420416437575901185): +**Note**: Add `geth` at the end instead of `parity` if RPC_URL points to a geth / geth like node. ``` -./mev inspect 12914944 +./mev inspect 12914944 parity ``` ### Inspect many blocks Inspecting blocks 12914944 to 12914954: +**Note**: Add `geth` at the end instead of `parity` if RPC_URL points to a geth / geth like node. ``` -./mev inspect-many 12914944 12914954 +./mev inspect-many 12914944 12914954 parity ``` ### Inspect all incoming blocks diff --git a/cli.py b/cli.py index 2a78b75a..48cd0099 100644 --- a/cli.py +++ b/cli.py @@ -8,6 +8,7 @@ from mev_inspect.crud.prices import write_prices from mev_inspect.db import get_inspect_session, get_trace_session from mev_inspect.inspector import MEVInspector +from mev_inspect.utils import RPCType from mev_inspect.prices import fetch_all_supported_prices RPC_URL_ENV = "RPC_URL" @@ -24,15 +25,29 @@ def cli(): @cli.command() @click.argument("block_number", type=int) @click.option("--rpc", default=lambda: os.environ.get(RPC_URL_ENV, "")) +@click.option( + "--type", + type=click.Choice(list(map(lambda x: x.name, RPCType)), case_sensitive=False), + default=RPCType.parity.name, +) @coro -async def inspect_block_command(block_number: int, rpc: str): +async def inspect_block_command(block_number: int, rpc: str, type: str): + type_e = convert_str_to_enum(type) inspect_db_session = get_inspect_session() trace_db_session = get_trace_session() - inspector = MEVInspector(rpc, inspect_db_session, trace_db_session) + inspector = MEVInspector(rpc, inspect_db_session, trace_db_session, type_e) await inspector.inspect_single_block(block=block_number) +def convert_str_to_enum(type: str) -> RPCType: + if type == "parity": + return RPCType.parity + elif type == "geth": + return RPCType.geth + raise ValueError + + @cli.command() @click.argument("block_number", type=int) @click.option("--rpc", default=lambda: os.environ.get(RPC_URL_ENV, "")) @@ -41,7 +56,7 @@ async def fetch_block_command(block_number: int, rpc: str): inspect_db_session = get_inspect_session() trace_db_session = get_trace_session() - inspector = MEVInspector(rpc, inspect_db_session, trace_db_session) + inspector = MEVInspector(rpc, inspect_db_session, trace_db_session, RPCType.parity) block = await inspector.create_from_block(block_number=block_number) print(block.json()) @@ -50,6 +65,11 @@ async def fetch_block_command(block_number: int, rpc: str): @click.argument("after_block", type=int) @click.argument("before_block", type=int) @click.option("--rpc", default=lambda: os.environ.get(RPC_URL_ENV, "")) +@click.option( + "--type", + type=click.Choice(list(map(lambda x: x.name, RPCType)), case_sensitive=False), + default=RPCType.parity.name, +) @click.option( "--max-concurrency", type=int, @@ -66,14 +86,16 @@ async def inspect_many_blocks_command( rpc: str, max_concurrency: int, request_timeout: int, + type: str, ): + type_e = convert_str_to_enum(type) inspect_db_session = get_inspect_session() trace_db_session = get_trace_session() - inspector = MEVInspector( rpc, inspect_db_session, trace_db_session, + type_e, max_concurrency=max_concurrency, request_timeout=request_timeout, ) diff --git a/mev b/mev index d85015aa..186ea795 100755 --- a/mev +++ b/mev @@ -37,15 +37,17 @@ case "$1" in ;; inspect) block_number=$2 + rpc_type=$3 echo "Inspecting block $block_number" - kubectl exec -ti deploy/mev-inspect -- poetry run inspect-block $block_number + kubectl exec -ti deploy/mev-inspect -- poetry run inspect-block $block_number --type $rpc_type ;; inspect-many) start_block_number=$2 end_block_number=$3 + rpc_type=$4 echo "Inspecting from block $start_block_number to $end_block_number" kubectl exec -ti deploy/mev-inspect -- \ - poetry run inspect-many-blocks $start_block_number $end_block_number + poetry run inspect-many-blocks $start_block_number $end_block_number --type $rpc_type ;; test) echo "Running tests" diff --git a/mev_inspect/block.py b/mev_inspect/block.py index ab619197..0b2a7592 100644 --- a/mev_inspect/block.py +++ b/mev_inspect/block.py @@ -9,10 +9,18 @@ from mev_inspect.schemas.blocks import Block from mev_inspect.schemas.receipts import Receipt from mev_inspect.schemas.traces import Trace, TraceType -from mev_inspect.utils import hex_to_int +from mev_inspect.utils import RPCType, hex_to_int logger = logging.getLogger(__name__) +_calltype_mapping = { + "CALL": "call", + "DELEGATECALL": "delegateCall", + "CREATE": "create", + "CREATE2": "create2", + "SUICIDE": "suicide", + "REWARD": "reward", +} async def get_latest_block_number(base_provider) -> int: @@ -27,6 +35,7 @@ async def get_latest_block_number(base_provider) -> int: async def create_from_block_number( base_provider, w3: Web3, + type: RPCType, block_number: int, trace_db_session: Optional[orm.Session], ) -> Block: @@ -36,13 +45,19 @@ async def create_from_block_number( block = _find_block(trace_db_session, block_number) if block is None: - block = await _fetch_block(w3, base_provider, block_number) - return block - else: - return block + if type is RPCType.parity: + block = await _fetch_block_parity(w3, base_provider, block_number) + elif type is RPCType.geth: + block = await _fetch_block_geth(w3, base_provider, block_number) + else: + logger.error(f"RPCType not known - {type}") + raise ValueError + return block -async def _fetch_block(w3, base_provider, block_number: int, retries: int = 0) -> Block: +async def _fetch_block_parity( + w3, base_provider, block_number: int, retries: int = 0 +) -> Block: block_json, receipts_json, traces_json, base_fee_per_gas = await asyncio.gather( w3.eth.get_block(block_number), base_provider.make_request("eth_getBlockReceipts", [block_number]), @@ -55,24 +70,56 @@ async def _fetch_block(w3, base_provider, block_number: int, retries: int = 0) - Receipt(**receipt) for receipt in receipts_json["result"] ] traces = [Trace(**trace_json) for trace_json in traces_json["result"]] + return Block( + block_number=block_number, + block_timestamp=block_json["timestamp"], + miner=block_json["miner"], + base_fee_per_gas=base_fee_per_gas, + traces=traces, + receipts=receipts, + ) except KeyError as e: logger.warning( f"Failed to create objects from block: {block_number}: {e}, retrying: {retries + 1} / 3" ) if retries < 3: await asyncio.sleep(5) - return await _fetch_block(w3, base_provider, block_number, retries) + return await _fetch_block_parity(w3, base_provider, block_number, retries) else: raise - return Block( - block_number=block_number, - block_timestamp=block_json["timestamp"], - miner=block_json["miner"], - base_fee_per_gas=base_fee_per_gas, - traces=traces, - receipts=receipts, - ) + +async def _fetch_block_geth( + w3, base_provider, block_number: int, retries: int = 0 +) -> Block: + block_json = await asyncio.gather(w3.eth.get_block(block_number)) + + try: + # Separate calls to help with load during block tracing + traces = await geth_get_tx_traces_parity_format(base_provider, block_json[0]) + geth_tx_receipts = await geth_get_tx_receipts_async( + base_provider, block_json[0]["transactions"] + ) + receipts = geth_receipts_translator(block_json[0], geth_tx_receipts) + base_fee_per_gas = 0 # Polygon specific, TODO for other chains + + return Block( + block_number=block_number, + block_timestamp=block_json[0]["timestamp"], + miner=block_json[0]["miner"], + base_fee_per_gas=base_fee_per_gas, + traces=traces, + receipts=receipts, + ) + except KeyError as e: + logger.warning( + f"Failed to create objects from block: {block_number}: {e}, retrying: {retries + 1} / 3" + ) + if retries < 3: + await asyncio.sleep(5) + return await _fetch_block_geth(w3, base_provider, block_number, retries) + else: + raise def _find_block( @@ -191,3 +238,120 @@ def get_transaction_hashes(calls: List[Trace]) -> List[str]: result.append(call.transaction_hash) return result + + +# Geth specific additions + + +async def geth_get_tx_traces_parity_format(base_provider, block_json: dict): + # print(block_json['hash'].hex()) + block_hash = block_json["hash"] + block_trace = await geth_get_tx_traces(base_provider, block_hash) + # print(block_trace) + parity_traces = [] + for idx, trace in enumerate(block_trace["result"]): + if "result" in trace: + parity_traces.extend( + unwrap_tx_trace_for_parity(block_json, idx, trace["result"]) + ) + return parity_traces + + +async def geth_get_tx_traces(base_provider, block_hash): + block_trace = await base_provider.make_request( + "debug_traceBlockByHash", [block_hash.hex(), {"tracer": "callTracer"}] + ) + return block_trace + + +def unwrap_tx_trace_for_parity( + block_json, tx_pos_in_block, tx_trace, position=[] +) -> List[Trace]: + response_list = [] + try: + if tx_trace["type"] == "STATICCALL": + return [] + action_dict = dict() + action_dict["callType"] = _calltype_mapping[tx_trace["type"]] + if action_dict["callType"] == "call": + action_dict["value"] = tx_trace["value"] + for key in ["from", "to", "gas", "input"]: + action_dict[key] = tx_trace[key] + + result_dict = dict() + for key in ["gasUsed", "output"]: + result_dict[key] = tx_trace[key] + + response_list.append( + Trace( + action=action_dict, + block_hash=str(block_json["hash"]), + block_number=int(block_json["number"]), + result=result_dict, + subtraces=len(tx_trace["calls"]) if "calls" in tx_trace.keys() else 0, + trace_address=position, + transaction_hash=block_json["transactions"][tx_pos_in_block].hex(), + transaction_position=tx_pos_in_block, + type=TraceType(_calltype_mapping[tx_trace["type"]]), + ) + ) + except Exception as e: + logger.warn(f"error while unwraping tx trace for parity {e}") + return [] + + if "calls" in tx_trace.keys(): + for idx, subcall in enumerate(tx_trace["calls"]): + response_list.extend( + unwrap_tx_trace_for_parity( + block_json, tx_pos_in_block, subcall, position + [idx] + ) + ) + return response_list + + +async def geth_get_tx_receipts_task(base_provider, tx): + receipt = await base_provider.make_request("eth_getTransactionReceipt", [tx.hex()]) + return receipt + + +async def geth_get_tx_receipts_async(base_provider, transactions): + geth_tx_receipts = [] + tasks = [ + asyncio.create_task(geth_get_tx_receipts_task(base_provider, tx)) + for tx in transactions + ] + geth_tx_receipts = await asyncio.gather(*tasks) + # return [json.loads(tx_receipts) for tx_receipts in geth_tx_receipts] + return geth_tx_receipts + + +def geth_receipts_translator(block_json, geth_tx_receipts) -> List[Receipt]: + json_decoded_receipts = [ + tx_receipt["result"] + if tx_receipt != None and ("result" in tx_receipt.keys()) + else None + for tx_receipt in geth_tx_receipts + ] + results = [] + for idx, tx_receipt in enumerate(json_decoded_receipts): + if tx_receipt != None: + results.append(unwrap_tx_receipt_for_parity(block_json, idx, tx_receipt)) + return results + + +def unwrap_tx_receipt_for_parity(block_json, tx_pos_in_block, tx_receipt) -> Receipt: + if tx_pos_in_block != int(tx_receipt["transactionIndex"], 16): + logger.info( + "Alert the position of transaction in block is mismatched ", + tx_pos_in_block, + tx_receipt["transactionIndex"], + ) + return Receipt( + block_number=block_json["number"], + transaction_hash=tx_receipt["transactionHash"], + transaction_index=tx_pos_in_block, + gas_used=tx_receipt["gasUsed"], + effective_gas_price=tx_receipt["effectiveGasPrice"], + cumulative_gas_used=tx_receipt["cumulativeGasUsed"], + to=tx_receipt["to"], + ) diff --git a/mev_inspect/geth_poa_middleware.py b/mev_inspect/geth_poa_middleware.py new file mode 100644 index 00000000..6eb103dd --- /dev/null +++ b/mev_inspect/geth_poa_middleware.py @@ -0,0 +1,101 @@ +""" +Modified asynchronous geth_poa_middleware which mirrors functionality of +https://github.com/ethereum/web3.py/blob/master/web3/middleware/geth_poa.py +""" +from typing import ( + Any, + Callable, +) + +from hexbytes import ( + HexBytes, +) + +from eth_utils.curried import ( + apply_formatter_if, + apply_formatters_to_dict, + apply_key_map, + is_null, +) +from eth_utils.toolz import ( + complement, + compose, + assoc, +) + +from web3._utils.rpc_abi import ( + RPC, +) + +from web3.types import ( + Formatters, + RPCEndpoint, + RPCResponse, +) + +from web3 import Web3 # noqa: F401 + + +async def get_geth_poa_middleware( + make_request: Callable[[RPCEndpoint, Any], RPCResponse], + request_formatters: Formatters = {}, + result_formatters: Formatters = {}, + error_formatters: Formatters = {}, +) -> RPCResponse: + async def middleware(method: RPCEndpoint, params: Any) -> RPCResponse: + if method in request_formatters: + formatter = request_formatters[method] + formatted_params = formatter(params) + response = await make_request(method, formatted_params) + else: + response = await make_request(method, params) + + if "result" in response and method in result_formatters: + formatter = result_formatters[method] + formatted_response = assoc( + response, + "result", + formatter(response["result"]), + ) + return formatted_response + elif "error" in response and method in error_formatters: + formatter = error_formatters[method] + formatted_response = assoc( + response, + "error", + formatter(response["error"]), + ) + return formatted_response + else: + return response + + return middleware + + +is_not_null = complement(is_null) + +remap_geth_poa_fields = apply_key_map( + { + "extraData": "proofOfAuthorityData", + } +) + +pythonic_geth_poa = apply_formatters_to_dict( + { + "proofOfAuthorityData": HexBytes, + } +) + +geth_poa_cleanup = compose(pythonic_geth_poa, remap_geth_poa_fields) + + +async def geth_poa_middleware(make_request: Callable[[RPCEndpoint, Any], Any], _: Web3): + return await get_geth_poa_middleware( + make_request=make_request, + request_formatters={}, + result_formatters={ + RPC.eth_getBlockByHash: apply_formatter_if(is_not_null, geth_poa_cleanup), + RPC.eth_getBlockByNumber: apply_formatter_if(is_not_null, geth_poa_cleanup), + }, + error_formatters={}, + ) diff --git a/mev_inspect/inspect_block.py b/mev_inspect/inspect_block.py index 3c8fffe7..104e821d 100644 --- a/mev_inspect/inspect_block.py +++ b/mev_inspect/inspect_block.py @@ -45,6 +45,7 @@ from mev_inspect.swaps import get_swaps from mev_inspect.transfers import get_transfers from mev_inspect.liquidations import get_liquidations +from mev_inspect.utils import RPCType logger = logging.getLogger(__name__) @@ -54,6 +55,7 @@ async def inspect_block( inspect_db_session: orm.Session, base_provider, w3: Web3, + type: RPCType, trace_classifier: TraceClassifier, block_number: int, trace_db_session: Optional[orm.Session], @@ -62,6 +64,7 @@ async def inspect_block( block = await create_from_block_number( base_provider, w3, + type, block_number, trace_db_session, ) diff --git a/mev_inspect/inspector.py b/mev_inspect/inspector.py index 4fb81605..cfc2df13 100644 --- a/mev_inspect/inspector.py +++ b/mev_inspect/inspector.py @@ -12,6 +12,7 @@ from mev_inspect.classifiers.trace import TraceClassifier from mev_inspect.inspect_block import inspect_block from mev_inspect.provider import get_base_provider +from mev_inspect.utils import RPCType logger = logging.getLogger(__name__) @@ -22,12 +23,14 @@ def __init__( rpc: str, inspect_db_session: orm.Session, trace_db_session: Optional[orm.Session], + type: RPCType = RPCType.parity, max_concurrency: int = 1, request_timeout: int = 300, ): self.inspect_db_session = inspect_db_session self.trace_db_session = trace_db_session - self.base_provider = get_base_provider(rpc, request_timeout=request_timeout) + self.base_provider = get_base_provider(rpc, request_timeout, type) + self.type = type self.w3 = Web3(self.base_provider, modules={"eth": (AsyncEth,)}, middlewares=[]) self.trace_classifier = TraceClassifier() self.max_concurrency = asyncio.Semaphore(max_concurrency) @@ -36,6 +39,7 @@ async def create_from_block(self, block_number: int): return await create_from_block_number( base_provider=self.base_provider, w3=self.w3, + type=self.type, block_number=block_number, trace_db_session=self.trace_db_session, ) @@ -45,6 +49,7 @@ async def inspect_single_block(self, block: int): self.inspect_db_session, self.base_provider, self.w3, + self.type, self.trace_classifier, block, trace_db_session=self.trace_db_session, @@ -73,6 +78,7 @@ async def safe_inspect_block(self, block_number: int): self.inspect_db_session, self.base_provider, self.w3, + self.type, self.trace_classifier, block_number, trace_db_session=self.trace_db_session, diff --git a/mev_inspect/provider.py b/mev_inspect/provider.py index 3b930ead..836d51ec 100644 --- a/mev_inspect/provider.py +++ b/mev_inspect/provider.py @@ -1,9 +1,19 @@ from web3 import Web3, AsyncHTTPProvider from mev_inspect.retry import http_retry_with_backoff_request_middleware +from mev_inspect.geth_poa_middleware import geth_poa_middleware +from mev_inspect.utils import RPCType -def get_base_provider(rpc: str, request_timeout: int = 500) -> Web3.AsyncHTTPProvider: +def get_base_provider( + rpc: str, request_timeout: int = 500, type: RPCType = RPCType.parity +) -> Web3.AsyncHTTPProvider: base_provider = AsyncHTTPProvider(rpc, request_kwargs={"timeout": request_timeout}) - base_provider.middlewares += (http_retry_with_backoff_request_middleware,) + if type is RPCType.geth: + base_provider.middlewares += ( + geth_poa_middleware, + http_retry_with_backoff_request_middleware, + ) + else: + base_provider.middlewares += (http_retry_with_backoff_request_middleware,) return base_provider diff --git a/mev_inspect/utils.py b/mev_inspect/utils.py index 922fadad..eb78413f 100644 --- a/mev_inspect/utils.py +++ b/mev_inspect/utils.py @@ -1,5 +1,11 @@ +from enum import Enum from hexbytes._utils import hexstr_to_bytes +class RPCType(Enum): + parity = 0 + geth = 1 + + def hex_to_int(value: str) -> int: return int.from_bytes(hexstr_to_bytes(value), byteorder="big") diff --git a/poetry.lock b/poetry.lock index 6a37a0aa..c398671e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -66,6 +66,14 @@ python-versions = ">=3.6" [package.dependencies] typing-extensions = ">=3.6.5" +[[package]] +name = "asyncio" +version = "3.4.3" +description = "reference implementation of PEP 3156" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "atomicwrites" version = "1.4.0" @@ -1125,6 +1133,12 @@ async-timeout = [ {file = "async-timeout-4.0.0.tar.gz", hash = "sha256:7d87a4e8adba8ededb52e579ce6bc8276985888913620c935094c2276fd83382"}, {file = "async_timeout-4.0.0-py3-none-any.whl", hash = "sha256:f3303dddf6cafa748a92747ab6c2ecf60e0aeca769aee4c151adfce243a05d9b"}, ] +asyncio = [ + {file = "asyncio-3.4.3-cp33-none-win32.whl", hash = "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de"}, + {file = "asyncio-3.4.3-cp33-none-win_amd64.whl", hash = "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c"}, + {file = "asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d"}, + {file = "asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41"}, +] atomicwrites = [ {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, diff --git a/pyproject.toml b/pyproject.toml index 5d69c1e6..2cc9e9b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ hexbytes = "^0.2.1" click = "^8.0.1" psycopg2 = "^2.9.1" aiohttp = "^3.8.0" +asyncio = "^3.4.3" [tool.poetry.dev-dependencies] pre-commit = "^2.13.0"