Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions crates/analysis/src/obfuscation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ pub struct AnalysisConfig<'a> {
pub iterations: usize,
/// Original bytecode (hex string with or without `0x` prefix).
pub original_bytecode: &'a str,
/// Runtime bytecode (hex string with or without `0x` prefix).
pub runtime_bytecode: &'a str,
/// Path to write the markdown report.
pub report_path: PathBuf,
/// Maximum attempts per iteration before giving up on a seed.
Expand All @@ -34,10 +36,11 @@ pub struct AnalysisConfig<'a> {

impl<'a> AnalysisConfig<'a> {
/// Create config with sensible defaults.
pub fn new(original_bytecode: &'a str, iterations: usize) -> Self {
pub fn new(original_bytecode: &'a str, runtime_bytecode: &'a str, iterations: usize) -> Self {
Self {
iterations,
original_bytecode,
runtime_bytecode,
report_path: PathBuf::from("obfuscation_analysis_report.md"),
max_attempts: 5,
}
Expand Down Expand Up @@ -370,7 +373,13 @@ pub async fn analyze_obfuscation(
obfuscation_config.preserve_unknown_opcodes = true;
obfuscation_config.transforms = passes.iter().map(|p| p.build()).collect();

match obfuscate_bytecode(config.original_bytecode, obfuscation_config).await {
match obfuscate_bytecode(
config.original_bytecode,
config.runtime_bytecode,
obfuscation_config,
)
.await
{
Ok(result) => {
let transforms_applied = result.metadata.transforms_applied.clone();
let obfuscated_bytes = hex_to_bytes(&result.obfuscated_bytecode)?;
Expand Down
14 changes: 10 additions & 4 deletions crates/cli/src/commands/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@ use async_trait::async_trait;
use azoth_analysis::obfuscation::{analyze_obfuscation, AnalysisConfig, AnalysisError};
use clap::Args;
use std::{error::Error, path::PathBuf};
const DEFAULT_BYTECODE_PATH: &str = "examples/escrow-bytecode/artifacts/deployment_bytecode.hex";
const DEFAULT_DEPLOYMENT_PATH: &str = "examples/escrow-bytecode/artifacts/deployment_bytecode.hex";
const DEFAULT_RUNTIME_PATH: &str = "examples/escrow-bytecode/artifacts/runtime_bytecode.hex";

/// Analyze how much bytecode survives obfuscation across multiple seeds.
#[derive(Args)]
pub struct AnalyzeArgs {
/// Number of obfuscated samples to generate.
pub iterations: usize,
/// Input bytecode as hex, .hex file, or binary file.
#[arg(value_name = "BYTECODE", default_value = DEFAULT_BYTECODE_PATH)]
/// Input deployment bytecode as hex, .hex file, or binary file.
#[arg(value_name = "BYTECODE", default_value = DEFAULT_DEPLOYMENT_PATH)]
pub input: String,
/// Input runtime bytecode as hex, .hex file, or binary file.
#[arg(long, value_name = "RUNTIME", default_value = DEFAULT_RUNTIME_PATH)]
pub runtime: String,
/// Where to write the markdown report (default: ./obfuscation_analysis_report.md).
#[arg(long, value_name = "PATH")]
output: Option<PathBuf>,
Expand All @@ -27,13 +31,15 @@ impl super::Command for AnalyzeArgs {
let AnalyzeArgs {
iterations,
input,
runtime,
output,
max_attempts,
} = self;

let input_hex = read_input(&input)?;
let runtime_hex = read_input(&runtime)?;

let mut config = AnalysisConfig::new(&input_hex, iterations);
let mut config = AnalysisConfig::new(&input_hex, &runtime_hex, iterations);
config.max_attempts = max_attempts;
if let Some(path) = output {
config.report_path = path;
Expand Down
10 changes: 8 additions & 2 deletions crates/cli/src/commands/cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use async_trait::async_trait;
use azoth_core::cfg_ir::{build_cfg_ir, Block, CfgIrBundle, EdgeType};
use azoth_core::decoder::decode_bytecode;
use azoth_core::detection::locate_sections;
use azoth_core::input_to_bytes;
use azoth_core::strip::strip_bytecode;
use clap::Args;
use std::error::Error;
Expand All @@ -15,8 +16,11 @@ use std::path::Path;
/// Arguments for the `cfg` subcommand.
#[derive(Args)]
pub struct CfgArgs {
/// Input bytecode as a hex string (0x...) or file path containing EVM bytecode.
/// Input deployment bytecode as a hex string (0x...) or file path containing EVM bytecode.
pub input: String,
/// Input runtime bytecode as a hex string (0x...) or file path containing EVM bytecode.
#[arg(long)]
pub runtime: String,
/// Output file for Graphviz .dot (default: stdout)
#[arg(short, long)]
output: Option<String>,
Expand All @@ -27,8 +31,10 @@ pub struct CfgArgs {
impl super::Command for CfgArgs {
async fn execute(self) -> Result<(), Box<dyn Error>> {
let is_file = !self.input.starts_with("0x") && Path::new(&self.input).is_file();
let runtime_is_file = !self.runtime.starts_with("0x") && Path::new(&self.runtime).is_file();
let (instructions, _, _, bytes) = decode_bytecode(&self.input, is_file).await?;
let sections = locate_sections(&bytes, &instructions)?;
let runtime_bytes = input_to_bytes(&self.runtime, runtime_is_file)?;
let sections = locate_sections(&bytes, &instructions, &runtime_bytes)?;
let (_clean_runtime, clean_report) = strip_bytecode(&bytes, &sections)?;
let cfg_ir = build_cfg_ir(&instructions, &sections, clean_report, &bytes)?;

Expand Down
9 changes: 7 additions & 2 deletions crates/cli/src/commands/obfuscate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ use std::path::Path;
/// Arguments for the `obfuscate` subcommand.
#[derive(Args)]
pub struct ObfuscateArgs {
/// Input bytecode as a hex string, .hex file, or binary file containing EVM bytecode.
/// Input deployment bytecode as a hex string, .hex file, or binary file containing EVM bytecode.
pub input: String,
/// Input runtime bytecode as a hex string, .hex file, or binary file containing EVM bytecode.
#[arg(long)]
pub runtime: String,
/// Cryptographic seed for deterministic obfuscation
#[arg(long)]
seed: Option<String>,
Expand All @@ -42,6 +45,7 @@ impl super::Command for ObfuscateArgs {
async fn execute(self) -> Result<(), Box<dyn Error>> {
let ObfuscateArgs {
input,
runtime,
seed,
passes,
emit,
Expand All @@ -50,6 +54,7 @@ impl super::Command for ObfuscateArgs {

// Step 1: Read and normalize input
let input_bytecode = read_input(&input)?;
let runtime_bytecode = read_input(&runtime)?;

// Step 2: Build transforms from CLI args
let transforms = build_passes(&passes)?;
Expand All @@ -68,7 +73,7 @@ impl super::Command for ObfuscateArgs {
config.preserve_unknown_opcodes = true;

// Step 4: Run obfuscation pipeline
let result = match obfuscate_bytecode(&input_bytecode, config).await {
let result = match obfuscate_bytecode(&input_bytecode, &runtime_bytecode, config).await {
Ok(result) => result,
Err(e) => return Err(format!("{e}").into()),
};
Expand Down
10 changes: 8 additions & 2 deletions crates/cli/src/commands/strip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use async_trait::async_trait;
use azoth_core::decoder::decode_bytecode;
use azoth_core::detection::locate_sections;
use azoth_core::input_to_bytes;
use azoth_core::strip::strip_bytecode;
use clap::Args;
use serde_json;
Expand All @@ -14,8 +15,11 @@ use std::path::Path;
/// Arguments for the `strip` subcommand.
#[derive(Args)]
pub struct StripArgs {
/// Input bytecode as a hex string (0x...) or file path containing EVM bytecode.
/// Input deployment bytecode as a hex string (0x...) or file path containing EVM bytecode.
pub input: String,
/// Input runtime bytecode as a hex string (0x...) or file path containing EVM bytecode.
#[arg(long)]
pub runtime: String,
/// Output raw cleaned runtime hex instead of JSON report
#[arg(long)]
raw: bool,
Expand All @@ -26,8 +30,10 @@ pub struct StripArgs {
impl super::Command for StripArgs {
async fn execute(self) -> Result<(), Box<dyn Error>> {
let is_file = !self.input.starts_with("0x") && Path::new(&self.input).is_file();
let runtime_is_file = !self.runtime.starts_with("0x") && Path::new(&self.runtime).is_file();
let (instructions, _, _, bytes) = decode_bytecode(&self.input, is_file).await?;
let sections = locate_sections(&bytes, &instructions)?;
let runtime_bytes = input_to_bytes(&self.runtime, runtime_is_file)?;
let sections = locate_sections(&bytes, &instructions, &runtime_bytes)?;
let (clean_runtime, report) = strip_bytecode(&bytes, &sections)?;

if self.raw {
Expand Down
151 changes: 35 additions & 116 deletions crates/core/src/detection/sections.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,14 @@ impl Section {
}
}

/// Locates all non-overlapping, offset-ordered sections in the bytecode.
pub fn locate_sections(bytes: &[u8], instructions: &[Instruction]) -> Result<Vec<Section>, Error> {
/// Locates all non-overlapping, offset-ordered sections in the deployment bytecode.
pub fn locate_sections(
deployment_bytes: &[u8],
instructions: &[Instruction],
runtime_bytes: &[u8],
) -> Result<Vec<Section>, Error> {
let mut sections = Vec::new();
let total_len = bytes.len();
let total_len = deployment_bytes.len();

tracing::debug!(
"Processing bytecode: {} bytes, {} instructions",
Expand All @@ -53,7 +57,7 @@ pub fn locate_sections(bytes: &[u8], instructions: &[Instruction]) -> Result<Vec
);

// Pass A: Detect Auxdata (CBOR) from the end
let auxdata = detect_auxdata(bytes);
let auxdata = detect_auxdata(deployment_bytes);
let aux_offset = auxdata.map(|(offset, _)| offset).unwrap_or(total_len);
tracing::debug!("Auxdata offset: {}", aux_offset);

Expand All @@ -75,7 +79,7 @@ pub fn locate_sections(bytes: &[u8], instructions: &[Instruction]) -> Result<Vec

// Pass C: Detect Init -> Runtime split using dispatcher pattern
let (mut init_end, mut runtime_start, mut runtime_len) =
detect_init_runtime_split(instructions).unwrap_or((0, 0, aux_offset));
detect_init_runtime_split(deployment_bytes, runtime_bytes).unwrap_or((0, 0, aux_offset));

tracing::debug!(
"Initial detection: init_end={}, runtime_start={}, runtime_len={}",
Expand Down Expand Up @@ -489,128 +493,43 @@ fn detect_padding(instructions: &[Instruction], aux_offset: usize) -> Option<(us
})
}

/// Detects the Init to Runtime split using the dispatcher pattern.
/// Detects the Init to Runtime split by finding runtime bytecode within deployment bytecode.
///
/// Uses a simple sliding window search to find where the runtime bytecode appears within
/// the deployment bytecode. This is more reliable than heuristic-based pattern matching.
///
/// # Arguments
/// * `instructions` - Decoded instructions.
/// * `deployment_bytes` - The full deployment bytecode.
/// * `runtime_bytes` - The runtime bytecode to locate within deployment.
///
/// # Returns
/// Optional tuple of (init_end, runtime_start, runtime_len) if pattern is found, None otherwise.
fn detect_init_runtime_split(instructions: &[Instruction]) -> Option<(usize, usize, usize)> {
// Try the strict pattern first (for backwards compatibility)
if let Some(result) = detect_strict_deployment_pattern(instructions) {
return Some(result);
}

// Fallback: Look for any CODECOPY + RETURN pattern
if let Some(result) = detect_codecopy_return_pattern(instructions) {
return Some(result);
/// Optional tuple of (init_end, runtime_start, runtime_len) if found, None otherwise.
fn detect_init_runtime_split(
deployment_bytes: &[u8],
runtime_bytes: &[u8],
) -> Option<(usize, usize, usize)> {
if runtime_bytes.len() < 20 {
return None;
}

None
}

/// Detects the strict deployment pattern (original heuristic)
fn detect_strict_deployment_pattern(instructions: &[Instruction]) -> Option<(usize, usize, usize)> {
for i in 0..instructions.len().saturating_sub(6) {
if matches!(instructions[i].op, Opcode::PUSH(_) | Opcode::PUSH0)
&& matches!(instructions[i + 1].op, Opcode::PUSH(_) | Opcode::PUSH0)
&& matches!(instructions[i + 2].op, Opcode::PUSH0 | Opcode::PUSH(1))
&& instructions[i + 2].imm.as_deref() == Some("00")
&& instructions[i + 3].op == Opcode::CODECOPY
&& matches!(instructions[i + 4].op, Opcode::PUSH(_) | Opcode::PUSH0)
&& instructions[i + 5].op == Opcode::RETURN
{
let runtime_len = instructions[i]
.imm
.as_ref()
.and_then(|s| usize::from_str_radix(s, 16).ok())?;
let runtime_ofs = instructions[i + 1]
.imm
.as_ref()
.and_then(|s| usize::from_str_radix(s, 16).ok())?;
let init_end = instructions[i + 5].pc + 1;
let needle = &runtime_bytes[..20];
let runtime_offset = deployment_bytes
.windows(needle.len())
.position(|window| window == needle)?;

tracing::debug!(
"Found strict deployment pattern at {}: init_end={}, runtime_start={}, runtime_len={}",
i,
init_end,
runtime_ofs,
runtime_len
);
let remaining_runtime = &runtime_bytes[20..];
let remaining_deployment = &deployment_bytes[runtime_offset + 20..];

return Some((init_end, runtime_ofs, runtime_len));
if remaining_deployment.starts_with(remaining_runtime) {
// if runtime starts at offset 0, we treat it as runtime-only bytecode with no init.
if runtime_offset == 0 {
return Some((0, 0, runtime_bytes.len()));
}
}
None
}

/// Fallback: Look for CODECOPY + RETURN pattern with more flexibility
fn detect_codecopy_return_pattern(instructions: &[Instruction]) -> Option<(usize, usize, usize)> {
// Find CODECOPY instruction
let codecopy_idx = instructions
.iter()
.position(|instruction| instruction.op == Opcode::CODECOPY)?;

// Look for RETURN after CODECOPY (within reasonable distance)
let return_idx = instructions[codecopy_idx + 1..]
.iter()
.take(10) // Look within next 10 instructions
.position(|instruction| instruction.op == Opcode::RETURN)
.map(|pos| codecopy_idx + 1 + pos)?;

// Try to extract runtime parameters from PUSH instructions before CODECOPY
let mut runtime_len = None;
let mut runtime_start = None;

// Look backwards from CODECOPY for PUSH instructions
// CODECOPY stack layout: [destOffset, offset, size] where offset is where runtime starts,
// and size is how many bytes to copy. Scanning backwards, we encounter them in reverse order.
for instruction in (0..codecopy_idx).rev().take(10) {
if matches!(
instructions[instruction].op,
Opcode::PUSH(_) | Opcode::PUSH0
) && let Some(immediate) = &instructions[instruction].imm
&& let Ok(value) = usize::from_str_radix(immediate, 16)
{
if runtime_start.is_none() && value > 0 && value < 100000 {
// First reasonable value (scanning backwards) is the offset where runtime starts
runtime_start = Some(value);
} else if runtime_len.is_none() && value > 0 && value < 100000 {
// Second reasonable value is the size of the runtime code
runtime_len = Some(value);
}

if runtime_len.is_some() && runtime_start.is_some() {
break;
}
}
return Some((runtime_offset - 1, runtime_offset, runtime_bytes.len()));
}

// If we found CODECOPY + RETURN but can't extract parameters,
// make reasonable assumptions
let runtime_len = runtime_len.unwrap_or_else(|| {
// Estimate runtime length from instruction count after return
instructions.len().saturating_sub(return_idx + 1) * 2 // rough estimate
});

let runtime_start = runtime_start.unwrap_or_else(|| {
// Assume runtime starts right after the RETURN instruction
instructions[return_idx].pc + 1
});

let init_end = instructions[return_idx].pc + 1;

tracing::debug!(
"Found fallback deployment pattern: CODECOPY at {}, RETURN at {}, init_end={}, runtime_start={}, runtime_len={}",
codecopy_idx,
return_idx,
init_end,
runtime_start,
runtime_len
);

Some((init_end, runtime_start, runtime_len))
None
}

/// Detects ConstructorArgs section between Init end and Runtime start.
Expand Down Expand Up @@ -658,7 +577,7 @@ mod tests {

use super::*;

const STORAGE_BYTECODE: &str = "6080604052348015600e575f5ffd5b50603e80601a5f395ff3fe60806040525f5ffdfea2646970667358221220e8c66682f723c073c8c5ec2c0de0795c9b8b64e310482b13bc56a554d057842b64736f6c634300081e0033";
const STORAGE_BYTECODE: &str = include_str!("../../../../tests/bytecode/storage.hex");

#[test]
fn detect_auxdata_works() {
Expand Down
Loading
Loading