MiragePrivacy · g4titanx · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025 · Nov 24, 2025
diff --git a/crates/analysis/src/obfuscation.rs b/crates/analysis/src/obfuscation.rs
@@ -26,6 +26,8 @@ pub struct AnalysisConfig<'a> {
     pub iterations: usize,
     /// Original bytecode (hex string with or without `0x` prefix).
     pub original_bytecode: &'a str,
+    /// Runtime bytecode (hex string with or without `0x` prefix).
+    pub runtime_bytecode: &'a str,
     /// Path to write the markdown report.
     pub report_path: PathBuf,
     /// Maximum attempts per iteration before giving up on a seed.
@@ -34,10 +36,11 @@ pub struct AnalysisConfig<'a> {
 
 impl<'a> AnalysisConfig<'a> {
     /// Create config with sensible defaults.
-    pub fn new(original_bytecode: &'a str, iterations: usize) -> Self {
+    pub fn new(original_bytecode: &'a str, runtime_bytecode: &'a str, iterations: usize) -> Self {
         Self {
             iterations,
             original_bytecode,
+            runtime_bytecode,
             report_path: PathBuf::from("obfuscation_analysis_report.md"),
             max_attempts: 5,
         }
@@ -370,7 +373,13 @@ pub async fn analyze_obfuscation(
             obfuscation_config.preserve_unknown_opcodes = true;
             obfuscation_config.transforms = passes.iter().map(|p| p.build()).collect();
 
-            match obfuscate_bytecode(config.original_bytecode, obfuscation_config).await {
+            match obfuscate_bytecode(
+                config.original_bytecode,
+                config.runtime_bytecode,
+                obfuscation_config,
+            )
+            .await
+            {
                 Ok(result) => {
                     let transforms_applied = result.metadata.transforms_applied.clone();
                     let obfuscated_bytes = hex_to_bytes(&result.obfuscated_bytecode)?;

diff --git a/crates/cli/src/commands/analyze.rs b/crates/cli/src/commands/analyze.rs
@@ -3,16 +3,20 @@ use async_trait::async_trait;
 use azoth_analysis::obfuscation::{analyze_obfuscation, AnalysisConfig, AnalysisError};
 use clap::Args;
 use std::{error::Error, path::PathBuf};
-const DEFAULT_BYTECODE_PATH: &str = "examples/escrow-bytecode/artifacts/deployment_bytecode.hex";
+const DEFAULT_DEPLOYMENT_PATH: &str = "examples/escrow-bytecode/artifacts/deployment_bytecode.hex";
+const DEFAULT_RUNTIME_PATH: &str = "examples/escrow-bytecode/artifacts/runtime_bytecode.hex";
 
 /// Analyze how much bytecode survives obfuscation across multiple seeds.
 #[derive(Args)]
 pub struct AnalyzeArgs {
     /// Number of obfuscated samples to generate.
     pub iterations: usize,
-    /// Input bytecode as hex, .hex file, or binary file.
-    #[arg(value_name = "BYTECODE", default_value = DEFAULT_BYTECODE_PATH)]
+    /// Input deployment bytecode as hex, .hex file, or binary file.
+    #[arg(value_name = "BYTECODE", default_value = DEFAULT_DEPLOYMENT_PATH)]
     pub input: String,
+    /// Input runtime bytecode as hex, .hex file, or binary file.
+    #[arg(long, value_name = "RUNTIME", default_value = DEFAULT_RUNTIME_PATH)]
+    pub runtime: String,
     /// Where to write the markdown report (default: ./obfuscation_analysis_report.md).
     #[arg(long, value_name = "PATH")]
     output: Option<PathBuf>,
@@ -27,13 +31,15 @@ impl super::Command for AnalyzeArgs {
         let AnalyzeArgs {
             iterations,
             input,
+            runtime,
             output,
             max_attempts,
         } = self;
 
         let input_hex = read_input(&input)?;
+        let runtime_hex = read_input(&runtime)?;
 
-        let mut config = AnalysisConfig::new(&input_hex, iterations);
+        let mut config = AnalysisConfig::new(&input_hex, &runtime_hex, iterations);
         config.max_attempts = max_attempts;
         if let Some(path) = output {
             config.report_path = path;

diff --git a/crates/cli/src/commands/cfg.rs b/crates/cli/src/commands/cfg.rs
@@ -6,6 +6,7 @@ use async_trait::async_trait;
 use azoth_core::cfg_ir::{build_cfg_ir, Block, CfgIrBundle, EdgeType};
 use azoth_core::decoder::decode_bytecode;
 use azoth_core::detection::locate_sections;
+use azoth_core::input_to_bytes;
 use azoth_core::strip::strip_bytecode;
 use clap::Args;
 use std::error::Error;
@@ -15,8 +16,11 @@ use std::path::Path;
 /// Arguments for the `cfg` subcommand.
 #[derive(Args)]
 pub struct CfgArgs {
-    /// Input bytecode as a hex string (0x...) or file path containing EVM bytecode.
+    /// Input deployment bytecode as a hex string (0x...) or file path containing EVM bytecode.
     pub input: String,
+    /// Input runtime bytecode as a hex string (0x...) or file path containing EVM bytecode.
+    #[arg(long)]
+    pub runtime: String,
     /// Output file for Graphviz .dot (default: stdout)
     #[arg(short, long)]
     output: Option<String>,
@@ -27,8 +31,10 @@ pub struct CfgArgs {
 impl super::Command for CfgArgs {
     async fn execute(self) -> Result<(), Box<dyn Error>> {
         let is_file = !self.input.starts_with("0x") && Path::new(&self.input).is_file();
+        let runtime_is_file = !self.runtime.starts_with("0x") && Path::new(&self.runtime).is_file();
         let (instructions, _, _, bytes) = decode_bytecode(&self.input, is_file).await?;
-        let sections = locate_sections(&bytes, &instructions)?;
+        let runtime_bytes = input_to_bytes(&self.runtime, runtime_is_file)?;
+        let sections = locate_sections(&bytes, &instructions, &runtime_bytes)?;
         let (_clean_runtime, clean_report) = strip_bytecode(&bytes, &sections)?;
         let cfg_ir = build_cfg_ir(&instructions, &sections, clean_report, &bytes)?;
 

diff --git a/crates/cli/src/commands/obfuscate.rs b/crates/cli/src/commands/obfuscate.rs
@@ -19,8 +19,11 @@ use std::path::Path;
 /// Arguments for the `obfuscate` subcommand.
 #[derive(Args)]
 pub struct ObfuscateArgs {
-    /// Input bytecode as a hex string, .hex file, or binary file containing EVM bytecode.
+    /// Input deployment bytecode as a hex string, .hex file, or binary file containing EVM bytecode.
     pub input: String,
+    /// Input runtime bytecode as a hex string, .hex file, or binary file containing EVM bytecode.
+    #[arg(long)]
+    pub runtime: String,
     /// Cryptographic seed for deterministic obfuscation
     #[arg(long)]
     seed: Option<String>,
@@ -42,6 +45,7 @@ impl super::Command for ObfuscateArgs {
     async fn execute(self) -> Result<(), Box<dyn Error>> {
         let ObfuscateArgs {
             input,
+            runtime,
             seed,
             passes,
             emit,
@@ -50,6 +54,7 @@ impl super::Command for ObfuscateArgs {
 
         // Step 1: Read and normalize input
         let input_bytecode = read_input(&input)?;
+        let runtime_bytecode = read_input(&runtime)?;
 
         // Step 2: Build transforms from CLI args
         let transforms = build_passes(&passes)?;
@@ -68,7 +73,7 @@ impl super::Command for ObfuscateArgs {
         config.preserve_unknown_opcodes = true;
 
         // Step 4: Run obfuscation pipeline
-        let result = match obfuscate_bytecode(&input_bytecode, config).await {
+        let result = match obfuscate_bytecode(&input_bytecode, &runtime_bytecode, config).await {
             Ok(result) => result,
             Err(e) => return Err(format!("{e}").into()),
         };

diff --git a/crates/cli/src/commands/strip.rs b/crates/cli/src/commands/strip.rs
@@ -5,6 +5,7 @@
 use async_trait::async_trait;
 use azoth_core::decoder::decode_bytecode;
 use azoth_core::detection::locate_sections;
+use azoth_core::input_to_bytes;
 use azoth_core::strip::strip_bytecode;
 use clap::Args;
 use serde_json;
@@ -14,8 +15,11 @@ use std::path::Path;
 /// Arguments for the `strip` subcommand.
 #[derive(Args)]
 pub struct StripArgs {
-    /// Input bytecode as a hex string (0x...) or file path containing EVM bytecode.
+    /// Input deployment bytecode as a hex string (0x...) or file path containing EVM bytecode.
     pub input: String,
+    /// Input runtime bytecode as a hex string (0x...) or file path containing EVM bytecode.
+    #[arg(long)]
+    pub runtime: String,
     /// Output raw cleaned runtime hex instead of JSON report
     #[arg(long)]
     raw: bool,
@@ -26,8 +30,10 @@ pub struct StripArgs {
 impl super::Command for StripArgs {
     async fn execute(self) -> Result<(), Box<dyn Error>> {
         let is_file = !self.input.starts_with("0x") && Path::new(&self.input).is_file();
+        let runtime_is_file = !self.runtime.starts_with("0x") && Path::new(&self.runtime).is_file();
         let (instructions, _, _, bytes) = decode_bytecode(&self.input, is_file).await?;
-        let sections = locate_sections(&bytes, &instructions)?;
+        let runtime_bytes = input_to_bytes(&self.runtime, runtime_is_file)?;
+        let sections = locate_sections(&bytes, &instructions, &runtime_bytes)?;
         let (clean_runtime, report) = strip_bytecode(&bytes, &sections)?;
 
         if self.raw {

diff --git a/crates/core/src/detection/sections.rs b/crates/core/src/detection/sections.rs
@@ -41,10 +41,14 @@ impl Section {
     }
 }
 
-/// Locates all non-overlapping, offset-ordered sections in the bytecode.
-pub fn locate_sections(bytes: &[u8], instructions: &[Instruction]) -> Result<Vec<Section>, Error> {
+/// Locates all non-overlapping, offset-ordered sections in the deployment bytecode.
+pub fn locate_sections(
+    deployment_bytes: &[u8],
+    instructions: &[Instruction],
+    runtime_bytes: &[u8],
+) -> Result<Vec<Section>, Error> {
     let mut sections = Vec::new();
-    let total_len = bytes.len();
+    let total_len = deployment_bytes.len();
 
     tracing::debug!(
         "Processing bytecode: {} bytes, {} instructions",
@@ -53,7 +57,7 @@ pub fn locate_sections(bytes: &[u8], instructions: &[Instruction]) -> Result<Vec
     );
 
     // Pass A: Detect Auxdata (CBOR) from the end
-    let auxdata = detect_auxdata(bytes);
+    let auxdata = detect_auxdata(deployment_bytes);
     let aux_offset = auxdata.map(|(offset, _)| offset).unwrap_or(total_len);
     tracing::debug!("Auxdata offset: {}", aux_offset);
 
@@ -75,7 +79,7 @@ pub fn locate_sections(bytes: &[u8], instructions: &[Instruction]) -> Result<Vec
 
     // Pass C: Detect Init -> Runtime split using dispatcher pattern
     let (mut init_end, mut runtime_start, mut runtime_len) =
-        detect_init_runtime_split(instructions).unwrap_or((0, 0, aux_offset));
+        detect_init_runtime_split(deployment_bytes, runtime_bytes).unwrap_or((0, 0, aux_offset));
 
     tracing::debug!(
         "Initial detection: init_end={}, runtime_start={}, runtime_len={}",
@@ -489,128 +493,43 @@ fn detect_padding(instructions: &[Instruction], aux_offset: usize) -> Option<(us
     })
 }
 
-/// Detects the Init to Runtime split using the dispatcher pattern.
+/// Detects the Init to Runtime split by finding runtime bytecode within deployment bytecode.
+///
+/// Uses a simple sliding window search to find where the runtime bytecode appears within
+/// the deployment bytecode. This is more reliable than heuristic-based pattern matching.
 ///
 /// # Arguments
-/// * `instructions` - Decoded instructions.
+/// * `deployment_bytes` - The full deployment bytecode.
+/// * `runtime_bytes` - The runtime bytecode to locate within deployment.
 ///
 /// # Returns
-/// Optional tuple of (init_end, runtime_start, runtime_len) if pattern is found, None otherwise.
-fn detect_init_runtime_split(instructions: &[Instruction]) -> Option<(usize, usize, usize)> {
-    // Try the strict pattern first (for backwards compatibility)
-    if let Some(result) = detect_strict_deployment_pattern(instructions) {
-        return Some(result);
-    }
-
-    // Fallback: Look for any CODECOPY + RETURN pattern
-    if let Some(result) = detect_codecopy_return_pattern(instructions) {
-        return Some(result);
+/// Optional tuple of (init_end, runtime_start, runtime_len) if found, None otherwise.
+fn detect_init_runtime_split(
+    deployment_bytes: &[u8],
+    runtime_bytes: &[u8],
+) -> Option<(usize, usize, usize)> {
+    if runtime_bytes.len() < 20 {
+        return None;
     }
 
-    None
-}
-
-/// Detects the strict deployment pattern (original heuristic)
-fn detect_strict_deployment_pattern(instructions: &[Instruction]) -> Option<(usize, usize, usize)> {
-    for i in 0..instructions.len().saturating_sub(6) {
-        if matches!(instructions[i].op, Opcode::PUSH(_) | Opcode::PUSH0)
-            && matches!(instructions[i + 1].op, Opcode::PUSH(_) | Opcode::PUSH0)
-            && matches!(instructions[i + 2].op, Opcode::PUSH0 | Opcode::PUSH(1))
-            && instructions[i + 2].imm.as_deref() == Some("00")
-            && instructions[i + 3].op == Opcode::CODECOPY
-            && matches!(instructions[i + 4].op, Opcode::PUSH(_) | Opcode::PUSH0)
-            && instructions[i + 5].op == Opcode::RETURN
-        {
-            let runtime_len = instructions[i]
-                .imm
-                .as_ref()
-                .and_then(|s| usize::from_str_radix(s, 16).ok())?;
-            let runtime_ofs = instructions[i + 1]
-                .imm
-                .as_ref()
-                .and_then(|s| usize::from_str_radix(s, 16).ok())?;
-            let init_end = instructions[i + 5].pc + 1;
+    let needle = &runtime_bytes[..20];
+    let runtime_offset = deployment_bytes
+        .windows(needle.len())
+        .position(|window| window == needle)?;
 
-            tracing::debug!(
-                "Found strict deployment pattern at {}: init_end={}, runtime_start={}, runtime_len={}",
-                i,
-                init_end,
-                runtime_ofs,
-                runtime_len
-            );
+    let remaining_runtime = &runtime_bytes[20..];
+    let remaining_deployment = &deployment_bytes[runtime_offset + 20..];
 
-            return Some((init_end, runtime_ofs, runtime_len));
+    if remaining_deployment.starts_with(remaining_runtime) {
+        // if runtime starts at offset 0, we treat it as runtime-only bytecode with no init.
+        if runtime_offset == 0 {
+            return Some((0, 0, runtime_bytes.len()));
         }
-    }
-    None
-}
-
-/// Fallback: Look for CODECOPY + RETURN pattern with more flexibility
-fn detect_codecopy_return_pattern(instructions: &[Instruction]) -> Option<(usize, usize, usize)> {
-    // Find CODECOPY instruction
-    let codecopy_idx = instructions
-        .iter()
-        .position(|instruction| instruction.op == Opcode::CODECOPY)?;
-
-    // Look for RETURN after CODECOPY (within reasonable distance)
-    let return_idx = instructions[codecopy_idx + 1..]
-        .iter()
-        .take(10) // Look within next 10 instructions
-        .position(|instruction| instruction.op == Opcode::RETURN)
-        .map(|pos| codecopy_idx + 1 + pos)?;
 
-    // Try to extract runtime parameters from PUSH instructions before CODECOPY
-    let mut runtime_len = None;
-    let mut runtime_start = None;
-
-    // Look backwards from CODECOPY for PUSH instructions
-    // CODECOPY stack layout: [destOffset, offset, size] where offset is where runtime starts,
-    // and size is how many bytes to copy. Scanning backwards, we encounter them in reverse order.
-    for instruction in (0..codecopy_idx).rev().take(10) {
-        if matches!(
-            instructions[instruction].op,
-            Opcode::PUSH(_) | Opcode::PUSH0
-        ) && let Some(immediate) = &instructions[instruction].imm
-            && let Ok(value) = usize::from_str_radix(immediate, 16)
-        {
-            if runtime_start.is_none() && value > 0 && value < 100000 {
-                // First reasonable value (scanning backwards) is the offset where runtime starts
-                runtime_start = Some(value);
-            } else if runtime_len.is_none() && value > 0 && value < 100000 {
-                // Second reasonable value is the size of the runtime code
-                runtime_len = Some(value);
-            }
-
-            if runtime_len.is_some() && runtime_start.is_some() {
-                break;
-            }
-        }
+        return Some((runtime_offset - 1, runtime_offset, runtime_bytes.len()));
     }
 
-    // If we found CODECOPY + RETURN but can't extract parameters,
-    // make reasonable assumptions
-    let runtime_len = runtime_len.unwrap_or_else(|| {
-        // Estimate runtime length from instruction count after return
-        instructions.len().saturating_sub(return_idx + 1) * 2 // rough estimate
-    });
-
-    let runtime_start = runtime_start.unwrap_or_else(|| {
-        // Assume runtime starts right after the RETURN instruction
-        instructions[return_idx].pc + 1
-    });
-
-    let init_end = instructions[return_idx].pc + 1;
-
-    tracing::debug!(
-        "Found fallback deployment pattern: CODECOPY at {}, RETURN at {}, init_end={}, runtime_start={}, runtime_len={}",
-        codecopy_idx,
-        return_idx,
-        init_end,
-        runtime_start,
-        runtime_len
-    );
-
-    Some((init_end, runtime_start, runtime_len))
+    None
 }
 
 /// Detects ConstructorArgs section between Init end and Runtime start.
@@ -658,7 +577,7 @@ mod tests {
 
     use super::*;
 
-    const STORAGE_BYTECODE: &str = "6080604052348015600e575f5ffd5b50603e80601a5f395ff3fe60806040525f5ffdfea2646970667358221220e8c66682f723c073c8c5ec2c0de0795c9b8b64e310482b13bc56a554d057842b64736f6c634300081e0033";
+    const STORAGE_BYTECODE: &str = include_str!("../../../../tests/bytecode/storage.hex");
 
     #[test]
     fn detect_auxdata_works() {