diff --git a/.env.example b/.env.example index 79da477..2ee69c0 100644 --- a/.env.example +++ b/.env.example @@ -14,6 +14,9 @@ SUBTENSOR_NETWORK=finney PORT=8091 LOG_LEVEL=info +# Restrict the axon to host loopback so only a local reverse proxy can reach it (unset = raw internet publish). +# AXON_PUBLISH=127.0.0.1:9000 + # ─── Contract ────────────────────────────────────────────── # Override the default Allways Swap Manager contract address. Leave unset to # use the bundled mainnet default. Set this when running against testnet, diff --git a/allways/chain_providers/subtensor.py b/allways/chain_providers/subtensor.py index 3e4dc5e..cdfa998 100644 --- a/allways/chain_providers/subtensor.py +++ b/allways/chain_providers/subtensor.py @@ -316,7 +316,7 @@ def verify_from_proof(self, address: str, message: str, signature: str) -> bool: """Verify an sr25519 signature from the given SS58 address.""" try: keypair = Keypair(ss58_address=address) - sig_bytes = bytes.fromhex(signature) + sig_bytes = bytes.fromhex(signature[2:] if signature.startswith('0x') else signature) return keypair.verify(message.encode(), sig_bytes) except Exception as e: bt.logging.error(f'TAO verify_from_proof failed: {e}') diff --git a/allways/cli/swap_commands/miner_commands.py b/allways/cli/swap_commands/miner_commands.py index ba00553..dfcf938 100644 --- a/allways/cli/swap_commands/miner_commands.py +++ b/allways/cli/swap_commands/miner_commands.py @@ -169,13 +169,6 @@ def miner_activate(): except ContractError: pass - # Build synapse - timestamp = str(int(time.time())) - message = f'activate:{hotkey}:{timestamp}' - signature = wallet.hotkey.sign(message.encode()).hex() - - synapse = MinerActivateSynapse(hotkey=hotkey, signature=signature, message=message) - # Discover whitelisted validators from metagraph dendrite = bt.Dendrite(wallet=wallet) with loading('Discovering validators...'): @@ -185,14 +178,31 @@ def miner_activate(): console.print('[red]No validators found on metagraph[/red]\n') return - # Broadcast + # Broadcast, re-signing a fresh timestamp each attempt. On a 429 the request + # was rejected at the edge proxy and never reached the validator — back off + # and retry rather than surfacing a transient rate-limit as a failure. timeout = resolve_dendrite_timeout(60.0) - with loading(f'Broadcasting activation to {len(validator_axons)} validators...'): - responses = asyncio.get_event_loop().run_until_complete( - dendrite(axons=validator_axons, synapse=synapse, deserialize=False, timeout=timeout) - ) + activate_max_retries = 2 + for attempt in range(activate_max_retries + 1): + timestamp = str(int(time.time())) + message = f'activate:{hotkey}:{timestamp}' + signature = wallet.hotkey.sign(message.encode()).hex() + synapse = MinerActivateSynapse(hotkey=hotkey, signature=signature, message=message) + + with loading(f'Broadcasting activation to {len(validator_axons)} validators...'): + responses = asyncio.get_event_loop().run_until_complete( + dendrite(axons=validator_axons, synapse=synapse, deserialize=False, timeout=timeout) + ) + + info = render_and_aggregate(console, responses, label='V', context={'miner_hotkey': hotkey}) + + if info.category == 'rate_limited' and attempt < activate_max_retries: + backoff_s = 6 + with console.status(f'[yellow]Rate limited by validator(s) — retrying in {backoff_s}s...[/yellow]'): + time.sleep(backoff_s) + continue + break - info = render_and_aggregate(console, responses, label='V', context={'miner_hotkey': hotkey}) accepted = info.accepted no_response = info.no_response if accepted == 0 and info.headline: diff --git a/allways/cli/swap_commands/swap.py b/allways/cli/swap_commands/swap.py index 73ba3d6..e93c5d3 100644 --- a/allways/cli/swap_commands/swap.py +++ b/allways/cli/swap_commands/swap.py @@ -112,23 +112,36 @@ def sign_and_broadcast_confirm( to_chain=to_chain, ) - with loading(f'Broadcasting confirmation to {len(validator_axons)} validators...'): - confirm_responses = broadcast_synapse(ephemeral_wallet, validator_axons, confirm_synapse, timeout=60.0) - - info = render_and_aggregate( - console, - confirm_responses, - label='V', - context={ - 'from_chain': from_chain, - 'from_chain_upper': from_chain.upper(), - 'to_chain': to_chain, - 'to_chain_upper': to_chain.upper(), - 'from_address': user_from_address, - 'miner_hotkey': miner_hotkey, - 'miner_uid': miner_uid, - }, - ) + # The confirm proof is over the tx hash (not block-anchored), so the synapse + # is stable across retries. On a 429 the request was rejected at the edge + # proxy and never reached the validator — back off and re-broadcast rather + # than failing the user mid-confirm (the worst place to drop them). + confirm_max_retries = 2 + for attempt in range(confirm_max_retries + 1): + with loading(f'Broadcasting confirmation to {len(validator_axons)} validators...'): + confirm_responses = broadcast_synapse(ephemeral_wallet, validator_axons, confirm_synapse, timeout=60.0) + + info = render_and_aggregate( + console, + confirm_responses, + label='V', + context={ + 'from_chain': from_chain, + 'from_chain_upper': from_chain.upper(), + 'to_chain': to_chain, + 'to_chain_upper': to_chain.upper(), + 'from_address': user_from_address, + 'miner_hotkey': miner_hotkey, + 'miner_uid': miner_uid, + }, + ) + + if info.category == 'rate_limited' and attempt < confirm_max_retries: + backoff_s = 6 + with console.status(f'[yellow]Rate limited by validator(s) — retrying in {backoff_s}s...[/yellow]'): + time.sleep(backoff_s) + continue + break if info.accepted == 0 and info.headline: # tx_not_found is almost always propagation lag, not a real failure — @@ -253,6 +266,18 @@ def broadcast_reserve_with_retry( accepted = info.accepted if accepted == 0: + # Rate limited at the validator edge proxy (429) — transient and + # self-healing. Back off and retry automatically; an instant retry + # would just re-trip the limit (tokens refill over a few seconds). + if info.category == 'rate_limited': + if attempt < max_retries: + backoff_s = 6 + with console.status(f'[yellow]Rate limited by validator(s) — retrying in {backoff_s}s...[/yellow]'): + time.sleep(backoff_s) + continue + console.print('\n[red]Still rate limited after retries — wait a minute and try again.[/red]') + return None + if info.headline: console.print(f'\n[red]{info.headline}[/red]') else: diff --git a/allways/cli/validator_rejections.py b/allways/cli/validator_rejections.py index 32d86fe..94f49e0 100644 --- a/allways/cli/validator_rejections.py +++ b/allways/cli/validator_rejections.py @@ -27,6 +27,7 @@ class RejectionInfo: queued: subset of accepted where the validator queued for confirmations. rejected: validators that responded with a rejection_reason. no_response: validators that timed out / didn't respond. + rate_limited: validators whose edge proxy returned 429 (rate limited). headline: user-facing translated message when all rejections agree; empty when validators disagreed or accepted >= 1. deterministic: True when retrying with identical inputs cannot succeed. @@ -41,6 +42,7 @@ class RejectionInfo: queued: int = 0 rejected: int = 0 no_response: int = 0 + rate_limited: int = 0 headline: str = '' deterministic: bool = False category: str = '' @@ -354,6 +356,15 @@ def render_and_aggregate( console.print(f' {label}{i}: [green]ok[/green]') continue + # A 429 from the edge proxy carries no synapse rejection_reason — the + # validator never ran. Detect it off the dendrite status so it reads as + # "rate limited" (retryable after a short wait), not a generic timeout. + status_code = str(getattr(getattr(resp, 'dendrite', None), 'status_code', '') or '') + if not raw and status_code == '429': + info.rate_limited += 1 + console.print(f' {label}{i}: [yellow]rate limited[/yellow] [dim]— retry in a few seconds[/dim]') + continue + info.raw_reasons.append(raw) if not raw: info.no_response += 1 @@ -362,6 +373,15 @@ def render_and_aggregate( info.rejected += 1 console.print(f' {label}{i}: [red]no[/red] [dim]{raw}[/dim]') + # Rate-limited (429) with no accepts: transient by nature — a short backoff + # and retry clears it. Set this before the no-response early-return below so + # a pure-429 round reads as "rate limited", not a generic timeout. + if info.accepted == 0 and info.rate_limited > 0 and info.rejected == 0 and info.no_response == 0: + info.category = 'rate_limited' + info.headline = 'Rate limited by the validator edge proxy — wait a few seconds and retry.' + info.deterministic = False + return info + if info.accepted > 0 or (info.rejected == 0 and info.no_response == 0): return info diff --git a/docker-compose.vali.yml b/docker-compose.vali.yml index 2317b63..3cc172f 100644 --- a/docker-compose.vali.yml +++ b/docker-compose.vali.yml @@ -8,7 +8,8 @@ services: env_file: - .env ports: - - "${PORT}:${PORT}" + # Set AXON_PUBLISH=127.0.0.1: to bind loopback-only behind a proxy (unset = raw internet publish). + - "${AXON_PUBLISH:-0.0.0.0:${PORT}}:${PORT}" volumes: - ${WALLET_PATH}:/root/.bittensor/wallets:ro - ./data/allways:/root/.allways diff --git a/tests/test_validator_rejections.py b/tests/test_validator_rejections.py index a9a2589..6faf053 100644 --- a/tests/test_validator_rejections.py +++ b/tests/test_validator_rejections.py @@ -142,6 +142,35 @@ def test_no_response_only(): assert 'no validators responded' in info.headline.lower() +def _rate_limited_resp() -> FakeResp: + # A 429 from the edge proxy: no synapse rejection_reason (the validator never + # ran), but the dendrite records status_code 429 from the JSON error body. + from types import SimpleNamespace + + resp = FakeResp(accepted=False, rejection_reason='') + resp.dendrite = SimpleNamespace(status_code='429') + return resp + + +def test_rate_limited_429_is_distinct_from_no_response(): + info = render_and_aggregate(_silent_console(), [_rate_limited_resp(), _rate_limited_resp()]) + assert info.rate_limited == 2 + assert info.no_response == 0 + assert info.category == 'rate_limited' + assert info.deterministic is False + assert 'rate limited' in info.headline.lower() + + +def test_rate_limited_mixed_with_rejection_does_not_claim_pure_headline(): + # 429 + a real rejection: must not be labeled the pure rate_limited category + # (the auto-backoff retry keys off that), but stays transient so a retry is allowed. + responses = [_rate_limited_resp(), FakeResp(accepted=False, rejection_reason='miner busy')] + info = render_and_aggregate(_silent_console(), responses, context={'miner_uid': 1}) + assert info.rate_limited == 1 + assert info.category != 'rate_limited' + assert info.deterministic is False + + def test_unmatched_falls_back_to_raw(): info = render_and_aggregate( _silent_console(),