diff --git a/packages/rules/src/lib.rs b/packages/rules/src/lib.rs index 9971fee..1e40f9f 100644 --- a/packages/rules/src/lib.rs +++ b/packages/rules/src/lib.rs @@ -4,6 +4,7 @@ pub mod vyper; pub mod soroban; pub mod optimization; pub mod solidity; +pub mod security; // Explicitly export core types to avoid ambiguity pub use rule_engine::{Rule, RuleEngine, RuleViolation, ViolationSeverity, extract_struct_fields, find_variable_usage}; @@ -17,6 +18,8 @@ pub use optimization::storage::{ PackingOpportunity, VariableInfo, }; +pub use optimization::deployment::{estimate_bytecode_size, ExcessiveContractSizeRule}; +pub use security::HardcodedAddressesRule; // Export Soroban types specifically pub use soroban::{ diff --git a/packages/rules/src/optimization/deployment/excessive_contract_size.rs b/packages/rules/src/optimization/deployment/excessive_contract_size.rs new file mode 100644 index 0000000..b8646f4 --- /dev/null +++ b/packages/rules/src/optimization/deployment/excessive_contract_size.rs @@ -0,0 +1,187 @@ +//! Detect Excessive Contract Size (Issue #307) +//! +//! Identifies contracts whose estimated bytecode size is approaching or exceeding +//! the EVM deployment limit (EIP-170: 24,576 bytes). Large contracts incur higher +//! deployment gas costs and may fail to deploy entirely. +//! +//! Since full compilation is not performed during static analysis, bytecode size is +//! approximated by counting AST tokens and applying a bytes-per-token multiplier. +//! This is a conservative heuristic — actual compiled bytecode may differ. + +use crate::rule_engine::{Rule, RuleViolation, ViolationSeverity}; +use quote::ToTokens; +use syn::Item; + +/// EVM bytecode deployment limit in bytes (EIP-170). +const EVM_BYTECODE_LIMIT: usize = 24_576; + +/// Fraction of the limit at which a warning is issued. +const WARN_FRACTION: f64 = 0.75; + +/// Conservative estimate of compiled bytes produced per source token. +/// Accounts for opcodes, operands, and ABI encoding overhead. +const ESTIMATED_BYTES_PER_TOKEN: usize = 2; + +pub struct ExcessiveContractSizeRule; + +impl Rule for ExcessiveContractSizeRule { + fn name(&self) -> &str { + "excessive-contract-size" + } + + fn description(&self) -> &str { + "Identifies contracts approaching or exceeding the 24,576-byte EVM deployment \ + limit (EIP-170). Large contracts increase deployment gas costs and may fail \ + to deploy." + } + + fn check(&self, ast: &[Item]) -> Vec { + let mut violations = Vec::new(); + + let total_tokens: usize = ast + .iter() + .map(|item| item.to_token_stream().into_iter().count()) + .sum(); + + let estimated_bytes = total_tokens * ESTIMATED_BYTES_PER_TOKEN; + let warn_limit = (EVM_BYTECODE_LIMIT as f64 * WARN_FRACTION) as usize; + + if estimated_bytes >= EVM_BYTECODE_LIMIT { + violations.push(RuleViolation { + rule_name: self.name().to_string(), + description: format!( + "Estimated contract bytecode size (~{} bytes) meets or exceeds the \ + EVM 24,576-byte deployment limit (EIP-170). The contract may fail \ + to deploy.", + estimated_bytes + ), + severity: ViolationSeverity::Critical, + line_number: 0, + column_number: 0, + variable_name: String::new(), + suggestion: "Split the contract into smaller modules, extract reusable \ + logic into separate library contracts, or remove dead code to reduce \ + bytecode size below the 24,576-byte limit." + .to_string(), + }); + } else if estimated_bytes >= warn_limit { + violations.push(RuleViolation { + rule_name: self.name().to_string(), + description: format!( + "Estimated contract bytecode size (~{} bytes) is approaching the \ + EVM 24,576-byte deployment limit (EIP-170) — {:.1}% of the limit \ + used. Future additions may cause deployment failures.", + estimated_bytes, + (estimated_bytes as f64 / EVM_BYTECODE_LIMIT as f64) * 100.0 + ), + severity: ViolationSeverity::Warning, + line_number: 0, + column_number: 0, + variable_name: String::new(), + suggestion: "Consider extracting reusable logic into library contracts \ + or helper modules to keep the contract well below the 24,576-byte \ + deployment limit." + .to_string(), + }); + } + + violations + } +} + +/// Returns an estimated bytecode size in bytes for the given AST items. +/// +/// This is exposed for use by callers that need a size estimate without +/// triggering a full rule violation (e.g. reporting dashboards). +pub fn estimate_bytecode_size(ast: &[Item]) -> usize { + let total_tokens: usize = ast + .iter() + .map(|item| item.to_token_stream().into_iter().count()) + .sum(); + total_tokens * ESTIMATED_BYTES_PER_TOKEN +} + +#[cfg(test)] +mod tests { + use super::*; + use syn::parse_file; + + fn check(code: &str) -> Vec { + let ast = parse_file(code).expect("parse failed"); + ExcessiveContractSizeRule.check(&ast.items) + } + + #[test] + fn no_violation_for_small_contract() { + let code = r#" + struct Token; + impl Token { + pub fn transfer(to: u64, amount: u64) -> bool { + amount > 0 + } + } + "#; + assert!(check(code).is_empty()); + } + + #[test] + fn estimate_size_grows_with_code() { + let small = r#"fn a() {}"#; + let large = r#" + fn a() { let x = 1; let y = 2; let z = x + y; } + fn b() { let x = 1; let y = 2; let z = x + y; } + fn c() { let x = 1; let y = 2; let z = x + y; } + fn d() { let x = 1; let y = 2; let z = x + y; } + fn e() { let x = 1; let y = 2; let z = x + y; } + "#; + + let small_ast = parse_file(small).unwrap(); + let large_ast = parse_file(large).unwrap(); + + assert!( + estimate_bytecode_size(&large_ast.items) + > estimate_bytecode_size(&small_ast.items) + ); + } + + #[test] + fn warning_threshold_is_below_critical() { + let warn_limit = (EVM_BYTECODE_LIMIT as f64 * WARN_FRACTION) as usize; + assert!(warn_limit < EVM_BYTECODE_LIMIT); + } + + #[test] + fn critical_violation_reported_for_oversized_contract() { + // Build a contract large enough to exceed the estimated limit. + // Each repetition contributes tokens, so we repeat a non-trivial block. + let block = r#" + pub fn compute_fee(amount: u64, rate: u64, base: u64, offset: u64) -> u64 { + let intermediate = amount * rate; + let adjusted = intermediate / base; + let result = adjusted + offset; + result + } + "#; + // Repeat enough times to exceed EVM_BYTECODE_LIMIT / ESTIMATED_BYTES_PER_TOKEN tokens. + let reps = (EVM_BYTECODE_LIMIT / ESTIMATED_BYTES_PER_TOKEN) / 30 + 1; + let mut code = String::new(); + for i in 0..reps { + code.push_str(&block.replace("compute_fee", &format!("compute_fee_{}", i))); + } + + let violations = check(&code); + assert!( + violations + .iter() + .any(|v| matches!(v.severity, ViolationSeverity::Critical)), + "Expected a Critical violation for an oversized contract" + ); + } + + #[test] + fn estimate_bytecode_size_returns_nonzero_for_nonempty_ast() { + let code = r#"fn foo() -> u64 { 42 }"#; + let ast = parse_file(code).unwrap(); + assert!(estimate_bytecode_size(&ast.items) > 0); + } +} diff --git a/packages/rules/src/optimization/deployment/mod.rs b/packages/rules/src/optimization/deployment/mod.rs new file mode 100644 index 0000000..2e5f30b --- /dev/null +++ b/packages/rules/src/optimization/deployment/mod.rs @@ -0,0 +1,3 @@ +pub mod excessive_contract_size; + +pub use excessive_contract_size::{estimate_bytecode_size, ExcessiveContractSizeRule}; diff --git a/packages/rules/src/optimization/mod.rs b/packages/rules/src/optimization/mod.rs index 53f6f46..c507d65 100644 --- a/packages/rules/src/optimization/mod.rs +++ b/packages/rules/src/optimization/mod.rs @@ -1,4 +1,5 @@ pub mod storage; +pub mod deployment; pub use storage::{ detect_packing_opportunities, @@ -8,3 +9,5 @@ pub use storage::{ PackingOpportunity, VariableInfo, }; + +pub use deployment::{estimate_bytecode_size, ExcessiveContractSizeRule}; diff --git a/packages/rules/src/security/configuration/hardcoded_addresses.rs b/packages/rules/src/security/configuration/hardcoded_addresses.rs new file mode 100644 index 0000000..385dc0a --- /dev/null +++ b/packages/rules/src/security/configuration/hardcoded_addresses.rs @@ -0,0 +1,200 @@ +//! Detect Hardcoded Addresses (Issue #310) +//! +//! Flags hardcoded wallet and contract addresses embedded directly in source code. +//! Hardcoded addresses reduce upgradeability and portability: rotating a key or +//! migrating a dependency requires a full redeployment rather than a configuration +//! change. +//! +//! Detection strategy: +//! - Scan every top-level AST item's token stream for 20-byte (40 hex-char) +//! Ethereum-style addresses of the form `0x[0-9a-fA-F]{40}`. +//! - Each unique address is reported once per item to avoid duplicate violations. +//! - Suggests moving addresses to constructor parameters or an on-chain +//! configuration registry. + +use crate::rule_engine::{Rule, RuleViolation, ViolationSeverity}; +use quote::ToTokens; +use regex::Regex; +use std::collections::HashSet; +use syn::Item; + +pub struct HardcodedAddressesRule; + +impl Rule for HardcodedAddressesRule { + fn name(&self) -> &str { + "hardcoded-addresses" + } + + fn description(&self) -> &str { + "Detects hardcoded wallet or contract addresses in source code. Hardcoded \ + addresses reduce upgradeability and portability, requiring a full \ + redeployment to rotate keys or update dependencies." + } + + fn check(&self, ast: &[Item]) -> Vec { + // Matches a 20-byte Ethereum address: 0x followed by exactly 40 hex chars. + // The trailing `(?![0-9a-fA-F])` negative lookahead prevents matching a + // longer hex string (e.g. a 256-bit hash) as an address. + let address_re = + Regex::new(r"(?i)0x[0-9a-f]{40}(?![0-9a-f])").expect("invalid regex"); + + let mut violations = Vec::new(); + // Track addresses seen globally to avoid duplicate violations across items. + let mut reported: HashSet = HashSet::new(); + + for item in ast { + let token_str = item.to_token_stream().to_string(); + // Deduplicate within this item too. + let mut seen_in_item: HashSet = HashSet::new(); + + for m in address_re.find_iter(&token_str) { + let addr = m.as_str().to_lowercase(); + if reported.contains(&addr) { + continue; + } + if seen_in_item.insert(addr.clone()) { + reported.insert(addr.clone()); + violations.push(RuleViolation { + rule_name: self.name().to_string(), + description: format!( + "Hardcoded address `{}` detected. Embedding addresses \ + directly in source code reduces upgradeability and \ + portability of the contract.", + addr + ), + severity: ViolationSeverity::High, + line_number: 0, + column_number: 0, + variable_name: addr.clone(), + suggestion: + "Replace the hardcoded address with a configurable \ + parameter: pass it as a constructor argument, store it in \ + an owner-controlled setter, or read it from an on-chain \ + configuration registry." + .to_string(), + }); + } + } + } + + violations + } +} + +#[cfg(test)] +mod tests { + use super::*; + use syn::parse_file; + + fn check(code: &str) -> Vec { + let ast = parse_file(code).expect("parse failed"); + HardcodedAddressesRule.check(&ast.items) + } + + #[test] + fn flags_hardcoded_address_in_const() { + let code = r#" + const TREASURY: &str = "0xAbCdEf1234567890AbCdEf1234567890AbCdEf12"; + "#; + let violations = check(code); + assert!( + !violations.is_empty(), + "Expected a violation for a hardcoded address constant" + ); + } + + #[test] + fn flags_hardcoded_address_in_function_body() { + let code = r#" + fn get_owner() -> &'static str { + "0xdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" + } + "#; + let violations = check(code); + assert!(!violations.is_empty()); + } + + #[test] + fn flags_hardcoded_address_in_let_binding() { + let code = r#" + fn setup() { + let admin = "0x1111111111111111111111111111111111111111"; + } + "#; + let violations = check(code); + assert!(!violations.is_empty()); + } + + #[test] + fn no_violation_for_short_hex_values() { + // A 32-char hex string is not a valid 20-byte address. + let code = r#" + const HASH: &str = "0xdeadbeefdeadbeefdeadbeefdeadbeef"; + "#; + assert!( + check(code).is_empty(), + "A 16-byte hex value should not be flagged as an address" + ); + } + + #[test] + fn no_violation_for_longer_hash() { + // A 64-char hex string (32-byte hash) should not match. + let code = r#" + const TX_HASH: &str = + "0xabcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890ab"; + "#; + assert!( + check(code).is_empty(), + "A 32-byte hex hash should not be flagged as a 20-byte address" + ); + } + + #[test] + fn deduplicates_same_address_across_items() { + let code = r#" + const A: &str = "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + const B: &str = "0xaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + "#; + let violations = check(code); + assert_eq!( + violations.len(), + 1, + "The same address should only be reported once" + ); + } + + #[test] + fn reports_multiple_distinct_addresses() { + let code = r#" + const ADDR_A: &str = "0x1111111111111111111111111111111111111111"; + const ADDR_B: &str = "0x2222222222222222222222222222222222222222"; + "#; + let violations = check(code); + assert_eq!(violations.len(), 2, "Two distinct addresses should each be reported"); + } + + #[test] + fn violation_severity_is_high() { + let code = r#" + const OWNER: &str = "0xffffffffffffffffffffffffffffffffffffffff"; + "#; + let violations = check(code); + assert!( + violations + .iter() + .all(|v| matches!(v.severity, ViolationSeverity::High)), + "Hardcoded address violations should have High severity" + ); + } + + #[test] + fn no_violation_for_non_address_string() { + let code = r#" + fn greet() -> &'static str { + "hello world" + } + "#; + assert!(check(code).is_empty()); + } +} diff --git a/packages/rules/src/security/configuration/mod.rs b/packages/rules/src/security/configuration/mod.rs new file mode 100644 index 0000000..29c5d28 --- /dev/null +++ b/packages/rules/src/security/configuration/mod.rs @@ -0,0 +1,3 @@ +pub mod hardcoded_addresses; + +pub use hardcoded_addresses::HardcodedAddressesRule; diff --git a/packages/rules/src/security/mod.rs b/packages/rules/src/security/mod.rs new file mode 100644 index 0000000..88246fe --- /dev/null +++ b/packages/rules/src/security/mod.rs @@ -0,0 +1,3 @@ +pub mod configuration; + +pub use configuration::HardcodedAddressesRule;