From 907adc5b21c84c8cbf11ecc396253733cd92bc83 Mon Sep 17 00:00:00 2001
From: Ralf Anton Beier <ralf_beier@me.com>
Date: Mon, 2 Mar 2026 18:44:27 +0100
Subject: [PATCH] feat: add ARM target profiles with FPU capability threading
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `fpu: Option<FPUPrecision>` to `TargetSpec` and thread it through
the entire compilation pipeline — CLI, instruction selector, encoder,
ELF builder, and Cortex-M startup code. This prepares the pipeline for
VFP floating-point instruction selection on M4F/M7 targets.

- New target constructors: cortex_m3(), cortex_m4f(), cortex_m7dp()
- New --target CLI flag (cortex-m3, cortex-m4, cortex-m4f, cortex-m7, cortex-m7dp)
- Target-aware float error messages ("no FPU" vs "VFP not yet implemented")
- ELF e_flags: EF_ARM_ABI_FLOAT_HARD set for hard-float targets
- FPU startup: CPACR CP10+CP11 enable with DSB/ISB barriers
- ArmEncoder gains fpu field (prep for VFP encoding)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 crates/synth-backend/src/arm_backend.rs       | 11 ++-
 crates/synth-backend/src/arm_encoder.rs       | 22 ++++-
 crates/synth-backend/src/cortex_m.rs          | 40 ++++++++-
 crates/synth-backend/src/elf_builder.rs       | 19 ++++-
 crates/synth-backend/src/lib.rs               |  6 +-
 .../tests/f64_operations_test.rs              |  7 +-
 crates/synth-cli/src/main.rs                  | 79 ++++++++++++-----
 crates/synth-core/src/target.rs               | 85 ++++++++++++++++++-
 .../src/instruction_selector.rs               | 30 ++++++-
 9 files changed, 255 insertions(+), 44 deletions(-)
diff --git a/crates/synth-backend/src/arm_backend.rs b/crates/synth-backend/src/arm_backend.rs
index 5fcdbd1..5dad1e2 100644
--- a/crates/synth-backend/src/arm_backend.rs
+++ b/crates/synth-backend/src/arm_backend.rs
@@ -46,7 +46,13 @@ impl Backend for ArmBackend {
     }
 
     fn supported_targets(&self) -> Vec<TargetSpec> {
-        vec![TargetSpec::cortex_m4(), TargetSpec::cortex_m7()]
+        vec![
+            TargetSpec::cortex_m3(),
+            TargetSpec::cortex_m4(),
+            TargetSpec::cortex_m4f(),
+            TargetSpec::cortex_m7(),
+            TargetSpec::cortex_m7dp(),
+        ]
     }
 
     fn compile_module(
@@ -149,6 +155,7 @@ fn compile_wasm_to_arm(
         let db = RuleDatabase::with_standard_rules();
         let mut selector =
             InstructionSelector::with_bounds_check(db.rules().to_vec(), bounds_config);
+        selector.set_target(config.target.fpu, &config.target.triple);
         if config.num_imports > 0 {
             selector.set_num_imports(config.num_imports);
         }
@@ -181,7 +188,7 @@ fn compile_wasm_to_arm(
     let use_thumb2 = matches!(config.target.isa, IsaVariant::Thumb2 | IsaVariant::Thumb);
 
     let encoder = if use_thumb2 {
-        ArmEncoder::new_thumb2()
+        ArmEncoder::new_thumb2_with_fpu(config.target.fpu)
     } else {
         ArmEncoder::new_arm32()
     };
diff --git a/crates/synth-backend/src/arm_encoder.rs b/crates/synth-backend/src/arm_encoder.rs
index 492d2f2..8e7e591 100644
--- a/crates/synth-backend/src/arm_encoder.rs
+++ b/crates/synth-backend/src/arm_encoder.rs
@@ -3,23 +3,41 @@
 //! Generates ARM32/Thumb-2 machine code from ARM instruction structures
 
 use synth_core::Result;
+use synth_core::target::FPUPrecision;
 use synth_synthesis::{ArmOp, MemAddr, Operand2, Reg};
 
 /// ARM instruction encoding
 pub struct ArmEncoder {
     /// Use Thumb mode (vs ARM mode)
     thumb_mode: bool,
+    /// FPU capability (prep for VFP encoding in Task 4)
+    #[allow(dead_code)]
+    fpu: Option<FPUPrecision>,
 }
 
 impl ArmEncoder {
     /// Create a new ARM encoder in ARM32 mode
     pub fn new_arm32() -> Self {
-        Self { thumb_mode: false }
+        Self {
+            thumb_mode: false,
+            fpu: None,
+        }
     }
 
     /// Create a new ARM encoder in Thumb-2 mode
     pub fn new_thumb2() -> Self {
-        Self { thumb_mode: true }
+        Self {
+            thumb_mode: true,
+            fpu: None,
+        }
+    }
+
+    /// Create a new Thumb-2 encoder with FPU capability
+    pub fn new_thumb2_with_fpu(fpu: Option<FPUPrecision>) -> Self {
+        Self {
+            thumb_mode: true,
+            fpu,
+        }
     }
 
     /// Encode a single ARM instruction to bytes
diff --git a/crates/synth-backend/src/cortex_m.rs b/crates/synth-backend/src/cortex_m.rs
index c5d5cee..679b05c 100644
--- a/crates/synth-backend/src/cortex_m.rs
+++ b/crates/synth-backend/src/cortex_m.rs
@@ -70,6 +70,8 @@ pub struct StartupCode {
     pub bss_start: u32,
     /// BSS section end
     pub bss_end: u32,
+    /// Enable FPU (set CPACR for CP10+CP11 full access)
+    pub enable_fpu: bool,
 }
 
 impl StartupCode {
@@ -83,22 +85,52 @@ impl StartupCode {
             data_load: 0,
             bss_start: 0,
             bss_end: 0,
+            enable_fpu: false,
         }
     }
 
     /// Generate Thumb-2 startup code (reset handler)
     ///
     /// This generates minimal startup code that:
-    /// 1. Sets up the stack pointer (already done by hardware from vector table)
-    /// 2. Initializes R11 as linear memory base (0x20000000)
-    /// 3. Calls the entry point
-    /// 4. Loops forever if entry returns
+    /// 1. Optionally enables FPU (CPACR setup for CP10+CP11)
+    /// 2. Sets up the stack pointer (already done by hardware from vector table)
+    /// 3. Initializes R11 as linear memory base (0x20000000)
+    /// 4. Calls the entry point
+    /// 5. Loops forever if entry returns
     pub fn generate_thumb(&self) -> Vec<u8> {
         let mut code = Vec::new();
 
         // Reset handler entry point
         // The stack pointer is already set by hardware from vector table[0]
 
+        // Enable FPU: set CP10+CP11 to full access in SCB->CPACR (0xE000ED88)
+        if self.enable_fpu {
+            // MOVW R0, #0xED88 (low 16 bits of CPACR address)
+            // Thumb-2 MOVW encoding: 1111 0 i 10 0 1 0 0 imm4 | 0 imm3 Rd imm8
+            // 0xED88: imm4=0xE, i=1, imm3=0b101, imm8=0x88
+            code.extend_from_slice(&[0x4E, 0xF6, 0x88, 0x50]); // MOVW R0, #0xED88
+
+            // MOVT R0, #0xE000 (high 16 bits of CPACR address)
+            // 0xE000: imm4=0xE, i=0, imm3=0, imm8=0
+            code.extend_from_slice(&[0xCE, 0xF2, 0x00, 0x00]); // MOVT R0, #0xE000
+
+            // LDR R1, [R0]
+            code.extend_from_slice(&[0xD0, 0xF8, 0x00, 0x10]); // LDR R1, [R0, #0]
+
+            // ORR R1, R1, #0x00F00000 (enable CP10+CP11 full access)
+            // Thumb-2 ORR with modified immediate: 0x00F00000
+            code.extend_from_slice(&[0x41, 0xF0, 0xF0, 0x61]); // ORR R1, R1, #0x00F00000
+
+            // STR R1, [R0]
+            code.extend_from_slice(&[0xC0, 0xF8, 0x00, 0x10]); // STR R1, [R0, #0]
+
+            // DSB (data synchronization barrier)
+            code.extend_from_slice(&[0xBF, 0xF3, 0x4F, 0x8F]); // DSB SY
+
+            // ISB (instruction synchronization barrier)
+            code.extend_from_slice(&[0xBF, 0xF3, 0x6F, 0x8F]); // ISB SY
+        }
+
         // Initialize R11 with linear memory base address (0x20000000)
         // This is used for WASM linear memory access: LDR Rd, [R11, Raddr]
         // Thumb-2 MOVW R11, #0x0000 (low 16 bits)
diff --git a/crates/synth-backend/src/elf_builder.rs b/crates/synth-backend/src/elf_builder.rs
index 0f608ca..c7abc6c 100644
--- a/crates/synth-backend/src/elf_builder.rs
+++ b/crates/synth-backend/src/elf_builder.rs
@@ -345,6 +345,13 @@ pub struct Relocation {
     pub reloc_type: ArmRelocationType,
 }
 
+/// ARM EABI version 5 (soft-float)
+pub const EF_ARM_EABI_VER5: u32 = 0x05000000;
+/// ARM hard-float ABI flag
+pub const EF_ARM_ABI_FLOAT_HARD: u32 = 0x00000400;
+/// ARM soft-float ABI flag
+pub const EF_ARM_ABI_FLOAT_SOFT: u32 = 0x00000200;
+
 /// ELF file builder
 pub struct ElfBuilder {
     /// File class (32 or 64 bit)
@@ -357,6 +364,8 @@ pub struct ElfBuilder {
     machine: ElfMachine,
     /// Entry point address
     entry: u32,
+    /// ELF e_flags (EABI version + float ABI)
+    e_flags: u32,
     /// Sections
     sections: Vec<Section>,
     /// Symbols
@@ -376,6 +385,7 @@ impl ElfBuilder {
             elf_type: ElfType::Exec,
             machine: ElfMachine::Arm,
             entry: 0,
+            e_flags: EF_ARM_EABI_VER5,
             sections: Vec::new(),
             symbols: Vec::new(),
             program_headers: Vec::new(),
@@ -389,6 +399,11 @@ impl ElfBuilder {
         self
     }
 
+    /// Set ELF e_flags (e.g. to add hard-float ABI)
+    pub fn set_flags(&mut self, flags: u32) {
+        self.e_flags = flags;
+    }
+
     /// Set file type
     pub fn with_type(mut self, elf_type: ElfType) -> Self {
         self.elf_type = elf_type;
@@ -643,8 +658,8 @@ impl ElfBuilder {
         output[cursor..cursor + 4].copy_from_slice(&sh_offset.to_le_bytes());
         cursor += 4;
 
-        // Flags (little-endian u32) - ARM EABI version 5
-        output[cursor..cursor + 4].copy_from_slice(&0x05000000u32.to_le_bytes());
+        // Flags (little-endian u32) - ARM EABI version 5 + float ABI
+        output[cursor..cursor + 4].copy_from_slice(&self.e_flags.to_le_bytes());
         cursor += 4;
 
         // ELF header size (little-endian u16)
diff --git a/crates/synth-backend/src/lib.rs b/crates/synth-backend/src/lib.rs
index f6c1c52..9e9685b 100644
--- a/crates/synth-backend/src/lib.rs
+++ b/crates/synth-backend/src/lib.rs
@@ -12,9 +12,9 @@ pub mod mpu_allocator;
 pub mod w2c2_wrapper;
 
 pub use elf_builder::{
-    ArmRelocationType, ElfBuilder, ElfClass, ElfData, ElfMachine, ElfType, ProgramFlags,
-    ProgramHeader, ProgramType, Relocation, Section, SectionFlags, SectionType as ElfSectionType,
-    Symbol, SymbolBinding, SymbolType,
+    ArmRelocationType, EF_ARM_ABI_FLOAT_HARD, EF_ARM_ABI_FLOAT_SOFT, EF_ARM_EABI_VER5, ElfBuilder,
+    ElfClass, ElfData, ElfMachine, ElfType, ProgramFlags, ProgramHeader, ProgramType, Relocation,
+    Section, SectionFlags, SectionType as ElfSectionType, Symbol, SymbolBinding, SymbolType,
 };
 pub use linker_script::{LinkerScriptGenerator, MemoryRegion};
 pub use memory_layout::{MemoryLayout, MemoryLayoutAnalyzer, MemorySection, SectionType};
diff --git a/crates/synth-backend/tests/f64_operations_test.rs b/crates/synth-backend/tests/f64_operations_test.rs
index 48f38d0..9b13a89 100644
--- a/crates/synth-backend/tests/f64_operations_test.rs
+++ b/crates/synth-backend/tests/f64_operations_test.rs
@@ -25,8 +25,11 @@ fn assert_float_rejected(wasm_ops: Vec<WasmOp>, op_name: &str) {
     );
     let err = result.unwrap_err().to_string();
     assert!(
-        err.contains("floating-point") || err.contains("i64"),
-        "{op_name} error should mention floating-point or i64, got: {err}"
+        err.contains("no FPU")
+            || err.contains("VFP")
+            || err.contains("floating-point")
+            || err.contains("i64"),
+        "{op_name} error should mention FPU/VFP/floating-point or i64, got: {err}"
     );
 }
 
diff --git a/crates/synth-cli/src/main.rs b/crates/synth-cli/src/main.rs
index 8aadb1b..de2a9de 100644
--- a/crates/synth-cli/src/main.rs
+++ b/crates/synth-cli/src/main.rs
@@ -120,6 +120,11 @@ enum Commands {
         #[arg(long)]
         cortex_m: bool,
 
+        /// Target profile (cortex-m3, cortex-m4, cortex-m4f, cortex-m7, cortex-m7dp)
+        /// Implies --cortex-m for Cortex-M targets
+        #[arg(short, long, value_name = "TARGET")]
+        target: Option<String>,
+
         /// Disable optimization passes (use direct instruction selection)
         #[arg(long)]
         no_optimize: bool,
@@ -209,12 +214,18 @@ fn main() -> Result<()> {
             func_name,
             all_exports,
             cortex_m,
+            target,
             no_optimize,
             loom_compat,
             bounds_check,
             backend,
             verify,
         } => {
+            // Resolve target spec: --target overrides, --cortex-m is backwards compat
+            let target_spec = resolve_target_spec(target.as_deref(), cortex_m)?;
+            let is_cortex_m =
+                cortex_m || target_spec.family == synth_core::target::ArchFamily::ArmCortexM;
+
             compile_command(
                 input,
                 output,
@@ -222,12 +233,13 @@ fn main() -> Result<()> {
                 func_index,
                 func_name,
                 all_exports,
-                cortex_m,
+                is_cortex_m,
                 no_optimize,
                 loom_compat,
                 bounds_check,
                 &backend,
                 verify,
+                &target_spec,
             )?;
         }
         Commands::Disasm { input } => {
@@ -386,6 +398,25 @@ struct ElfFunction {
     relocations: Vec<synth_core::backend::CodeRelocation>,
 }
 
+/// Resolve --target / --cortex-m into a TargetSpec
+fn resolve_target_spec(target: Option<&str>, cortex_m: bool) -> Result<TargetSpec> {
+    match target {
+        Some(name) => TargetSpec::from_triple(name).map_err(|e| {
+            anyhow::anyhow!(
+                "{e}. Supported: cortex-m3, cortex-m4, cortex-m4f, cortex-m7, cortex-m7dp"
+            )
+        }),
+        None if cortex_m => Ok(TargetSpec::cortex_m3()),
+        None => {
+            // Default: Arm32 ISA (non-Cortex-M, no vector table)
+            Ok(TargetSpec {
+                isa: synth_core::target::IsaVariant::Arm32,
+                ..TargetSpec::cortex_m4()
+            })
+        }
+    }
+}
+
 /// Build the backend registry with all available backends
 fn build_backend_registry() -> BackendRegistry {
     let mut registry = BackendRegistry::new();
@@ -421,6 +452,7 @@ fn compile_command(
     bounds_check: bool,
     backend_name: &str,
     verify: bool,
+    target_spec: &TargetSpec,
 ) -> Result<()> {
     // Validate backend exists
     let registry = build_backend_registry();
@@ -461,6 +493,7 @@ fn compile_command(
             bounds_check,
             backend,
             verify,
+            target_spec,
         );
     }
 
@@ -559,14 +592,7 @@ fn compile_command(
         no_optimize,
         loom_compat,
         bounds_check,
-        target: if !cortex_m {
-            TargetSpec {
-                isa: synth_core::target::IsaVariant::Arm32,
-                ..TargetSpec::cortex_m4()
-            }
-        } else {
-            TargetSpec::cortex_m4()
-        },
+        target: target_spec.clone(),
         ..CompileConfig::default()
     };
 
@@ -578,7 +604,7 @@ fn compile_command(
     info!("Encoded {} bytes of machine code", code.len());
 
     let elf_data = if cortex_m {
-        build_cortex_m_elf(&code, &func_name)?
+        build_cortex_m_elf(&code, &func_name, target_spec)?
     } else {
         build_simple_elf(&code, &func_name)?
     };
@@ -1054,6 +1080,7 @@ fn compile_all_exports(
     bounds_check: bool,
     backend: &dyn Backend,
     verify: bool,
+    target_spec: &TargetSpec,
 ) -> Result<()> {
     let path = input.context("--all-exports requires an input file")?;
 
@@ -1134,14 +1161,7 @@ fn compile_all_exports(
         loom_compat,
         bounds_check,
         num_imports: module.num_imported_funcs,
-        target: if !cortex_m {
-            TargetSpec {
-                isa: synth_core::target::IsaVariant::Arm32,
-                ..TargetSpec::cortex_m4()
-            }
-        } else {
-            TargetSpec::cortex_m4()
-        },
+        target: target_spec.clone(),
         ..CompileConfig::default()
     };
 
@@ -1198,7 +1218,7 @@ fn compile_all_exports(
         info!("Module has import calls — producing relocatable object (ET_REL)");
         build_relocatable_elf(&compiled_funcs, &module.imports)?
     } else if cortex_m {
-        build_multi_func_cortex_m_elf(&compiled_funcs, &module.memories)?
+        build_multi_func_cortex_m_elf(&compiled_funcs, &module.memories, target_spec)?
     } else {
         build_multi_func_simple_elf(&compiled_funcs)?
     };
@@ -1405,6 +1425,7 @@ fn build_relocatable_elf(funcs: &[ElfFunction], imports: &[ImportEntry]) -> Resu
 fn build_multi_func_cortex_m_elf(
     funcs: &[ElfFunction],
     memories: &[WasmMemory],
+    target: &TargetSpec,
 ) -> Result<Vec<u8>> {
     let flash_base: u32 = 0x0000_0000;
     let ram_base: u32 = 0x2000_0000;
@@ -1549,6 +1570,12 @@ fn build_multi_func_cortex_m_elf(
     let flash_size = flash_image.len() as u32;
     let mut elf_builder = ElfBuilder::new_arm32().with_entry(startup_addr | 1);
 
+    // Set hard-float ABI flag if target has FPU
+    if target.has_fpu() {
+        elf_builder
+            .set_flags(synth_backend::EF_ARM_EABI_VER5 | synth_backend::EF_ARM_ABI_FLOAT_HARD);
+    }
+
     // Calculate proper file offset for .text section
     let shstrtab_size = 1 + ".shstrtab\0.strtab\0.symtab\0.text\0".len();
     let mut strtab_size = 1 + "Reset_Handler\0Default_Handler\0Trap_Handler\0".len();
@@ -1837,7 +1864,7 @@ fn build_simple_elf(code: &[u8], func_name: &str) -> Result<Vec<u8>> {
 }
 
 /// Build a complete Cortex-M ELF with vector table and startup code
-fn build_cortex_m_elf(code: &[u8], func_name: &str) -> Result<Vec<u8>> {
+fn build_cortex_m_elf(code: &[u8], func_name: &str, target: &TargetSpec) -> Result<Vec<u8>> {
     // Memory layout for generic Cortex-M (works with QEMU/Renode)
     let flash_base: u32 = 0x0000_0000;
     let ram_base: u32 = 0x2000_0000;
@@ -1930,6 +1957,12 @@ fn build_cortex_m_elf(code: &[u8], func_name: &str) -> Result<Vec<u8>> {
     let flash_size = flash_image.len() as u32;
     let mut elf_builder = ElfBuilder::new_arm32().with_entry(startup_addr | 1); // Thumb bit
 
+    // Set hard-float ABI flag if target has FPU
+    if target.has_fpu() {
+        elf_builder
+            .set_flags(synth_backend::EF_ARM_EABI_VER5 | synth_backend::EF_ARM_ABI_FLOAT_HARD);
+    }
+
     // Add LOAD program header for the .text section
     // The offset is calculated as: ELF header (52) + program headers (32 * 1) + string tables
     // String tables: .shstrtab (~33 bytes) + .strtab (~50 bytes)
@@ -2127,7 +2160,7 @@ mod tests {
             0x1e, 0xff, 0x2f, 0xe1, // BX lr (ARM encoding)
         ];
 
-        let elf_data = build_cortex_m_elf(&code, "test_func").unwrap();
+        let elf_data = build_cortex_m_elf(&code, "test_func", &TargetSpec::cortex_m3()).unwrap();
 
         // Verify ELF magic
         assert_eq!(&elf_data[0..4], b"\x7fELF", "Invalid ELF magic");
@@ -2147,7 +2180,7 @@ mod tests {
     fn test_vector_table_structure() {
         let code = vec![0x00, 0x80, 0x80, 0xe0]; // ADD r0, r0, r1
 
-        let elf_data = build_cortex_m_elf(&code, "test").unwrap();
+        let elf_data = build_cortex_m_elf(&code, "test", &TargetSpec::cortex_m3()).unwrap();
 
         // Find .text section (it starts after ELF headers)
         // For simplicity, look for the vector table pattern
@@ -2198,7 +2231,7 @@ mod tests {
     fn test_startup_code_patching() {
         let code = vec![0x00, 0x80, 0x80, 0xe0];
 
-        let elf_data = build_cortex_m_elf(&code, "patched").unwrap();
+        let elf_data = build_cortex_m_elf(&code, "patched", &TargetSpec::cortex_m3()).unwrap();
 
         // With the new startup code layout (28 bytes with R10/R11 init):
         // - Startup: 0x80 (28 bytes)
diff --git a/crates/synth-core/src/target.rs b/crates/synth-core/src/target.rs
index 01663a1..2111558 100644
--- a/crates/synth-core/src/target.rs
+++ b/crates/synth-core/src/target.rs
@@ -290,26 +290,63 @@ pub struct TargetSpec {
     pub isa: IsaVariant,
     /// Memory protection model
     pub mem_protection: MemProtection,
+    /// Floating-point unit capability (None = soft-float only)
+    pub fpu: Option<FPUPrecision>,
 }
 
 impl TargetSpec {
-    /// Cortex-M4 with 8 MPU regions (most common embedded target)
+    /// Cortex-M3 (ARMv7-M, no FPU, 8 MPU regions)
+    pub fn cortex_m3() -> Self {
+        Self {
+            family: ArchFamily::ArmCortexM,
+            triple: "thumbv7m-none-eabi".to_string(),
+            isa: IsaVariant::Thumb2,
+            mem_protection: MemProtection::Mpu { regions: 8 },
+            fpu: None,
+        }
+    }
+
+    /// Cortex-M4 with 8 MPU regions (no FPU — M4 without F suffix)
     pub fn cortex_m4() -> Self {
+        Self {
+            family: ArchFamily::ArmCortexM,
+            triple: "thumbv7em-none-eabi".to_string(),
+            isa: IsaVariant::Thumb2,
+            mem_protection: MemProtection::Mpu { regions: 8 },
+            fpu: None,
+        }
+    }
+
+    /// Cortex-M4F with single-precision FPU
+    pub fn cortex_m4f() -> Self {
         Self {
             family: ArchFamily::ArmCortexM,
             triple: "thumbv7em-none-eabihf".to_string(),
             isa: IsaVariant::Thumb2,
             mem_protection: MemProtection::Mpu { regions: 8 },
+            fpu: Some(FPUPrecision::Single),
         }
     }
 
-    /// Cortex-M7 with 16 MPU regions
+    /// Cortex-M7 with single-precision FPU and 16 MPU regions
     pub fn cortex_m7() -> Self {
         Self {
             family: ArchFamily::ArmCortexM,
             triple: "thumbv7em-none-eabihf".to_string(),
             isa: IsaVariant::Thumb2,
             mem_protection: MemProtection::Mpu { regions: 16 },
+            fpu: Some(FPUPrecision::Single),
+        }
+    }
+
+    /// Cortex-M7 with double-precision FPU and 16 MPU regions
+    pub fn cortex_m7dp() -> Self {
+        Self {
+            family: ArchFamily::ArmCortexM,
+            triple: "thumbv7em-none-eabihf".to_string(),
+            isa: IsaVariant::Thumb2,
+            mem_protection: MemProtection::Mpu { regions: 16 },
+            fpu: Some(FPUPrecision::Double),
         }
     }
 
@@ -320,6 +357,7 @@ impl TargetSpec {
             triple: "armv7r-none-eabihf".to_string(),
             isa: IsaVariant::Arm32,
             mem_protection: MemProtection::Mpu { regions: 12 },
+            fpu: None,
         }
     }
 
@@ -330,6 +368,7 @@ impl TargetSpec {
             triple: "aarch64-none-elf".to_string(),
             isa: IsaVariant::AArch64,
             mem_protection: MemProtection::Mmu,
+            fpu: None,
         }
     }
 
@@ -342,14 +381,18 @@ impl TargetSpec {
                 extensions: "imac".to_string(),
             },
             mem_protection: MemProtection::Pmp { entries: 16 },
+            fpu: None,
         }
     }
 
     /// Parse from an LLVM triple or shorthand name
     pub fn from_triple(triple: &str) -> std::result::Result<Self, String> {
         match triple {
-            "thumbv7em-none-eabihf" | "cortex-m4" => Ok(Self::cortex_m4()),
+            "thumbv7m-none-eabi" | "cortex-m3" => Ok(Self::cortex_m3()),
+            "thumbv7em-none-eabi" | "cortex-m4" => Ok(Self::cortex_m4()),
+            "thumbv7em-none-eabihf" | "cortex-m4f" => Ok(Self::cortex_m4f()),
             "cortex-m7" => Ok(Self::cortex_m7()),
+            "cortex-m7dp" => Ok(Self::cortex_m7dp()),
             "armv7r-none-eabihf" | "cortex-r5" => Ok(Self::cortex_r5()),
             "aarch64-none-elf" | "cortex-a53" => Ok(Self::cortex_a53()),
             "riscv32imac-unknown-none-elf" | "riscv32imac" => Ok(Self::riscv32imac()),
@@ -357,6 +400,11 @@ impl TargetSpec {
         }
     }
 
+    /// Whether this target has a floating-point unit
+    pub fn has_fpu(&self) -> bool {
+        self.fpu.is_some()
+    }
+
     /// Whether this target uses Thumb-2 encoding (Cortex-M)
     pub fn is_thumb2(&self) -> bool {
         self.isa == IsaVariant::Thumb2
@@ -373,10 +421,41 @@ mod tests {
         assert_eq!(spec.family, ArchFamily::ArmCortexM);
         assert!(spec.is_thumb2());
         assert_eq!(spec.mem_protection, MemProtection::Mpu { regions: 8 });
+        assert!(!spec.has_fpu());
     }
 
     #[test]
     fn test_target_spec_unknown() {
         assert!(TargetSpec::from_triple("mips-unknown-none").is_err());
     }
+
+    #[test]
+    fn test_cortex_m3() {
+        let spec = TargetSpec::from_triple("cortex-m3").unwrap();
+        assert_eq!(spec.triple, "thumbv7m-none-eabi");
+        assert!(!spec.has_fpu());
+    }
+
+    #[test]
+    fn test_cortex_m4f() {
+        let spec = TargetSpec::from_triple("cortex-m4f").unwrap();
+        assert_eq!(spec.triple, "thumbv7em-none-eabihf");
+        assert!(spec.has_fpu());
+        assert_eq!(spec.fpu, Some(FPUPrecision::Single));
+    }
+
+    #[test]
+    fn test_cortex_m7dp() {
+        let spec = TargetSpec::from_triple("cortex-m7dp").unwrap();
+        assert!(spec.has_fpu());
+        assert_eq!(spec.fpu, Some(FPUPrecision::Double));
+        assert_eq!(spec.mem_protection, MemProtection::Mpu { regions: 16 });
+    }
+
+    #[test]
+    fn test_cortex_m7_has_fpu() {
+        let spec = TargetSpec::cortex_m7();
+        assert!(spec.has_fpu());
+        assert_eq!(spec.fpu, Some(FPUPrecision::Single));
+    }
 }
diff --git a/crates/synth-synthesis/src/instruction_selector.rs b/crates/synth-synthesis/src/instruction_selector.rs
index ebe25f3..99220d7 100644
--- a/crates/synth-synthesis/src/instruction_selector.rs
+++ b/crates/synth-synthesis/src/instruction_selector.rs
@@ -7,6 +7,7 @@ use crate::{Bindings, PatternMatcher};
 use std::collections::HashMap;
 use synth_core::Result;
 use synth_core::WasmOp;
+use synth_core::target::FPUPrecision;
 
 /// Bounds checking configuration for memory operations
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -109,6 +110,10 @@ pub struct InstructionSelector {
     bounds_check: BoundsCheckConfig,
     /// Number of imported functions (calls to func_idx < this go through Meld dispatch)
     num_imports: u32,
+    /// FPU capability of the target (None = soft-float only)
+    fpu: Option<FPUPrecision>,
+    /// Target name for error messages
+    target_name: String,
 }
 
 impl InstructionSelector {
@@ -119,6 +124,8 @@ impl InstructionSelector {
             regs: RegisterState::new(),
             bounds_check: BoundsCheckConfig::None,
             num_imports: 0,
+            fpu: None,
+            target_name: "cortex-m3".to_string(),
         }
     }
 
@@ -129,6 +136,8 @@ impl InstructionSelector {
             regs: RegisterState::new(),
             bounds_check,
             num_imports: 0,
+            fpu: None,
+            target_name: "cortex-m3".to_string(),
         }
     }
 
@@ -142,6 +151,12 @@ impl InstructionSelector {
         self.bounds_check = config;
     }
 
+    /// Set target FPU capability and name (for target-aware error messages)
+    pub fn set_target(&mut self, fpu: Option<FPUPrecision>, target_name: &str) {
+        self.fpu = fpu;
+        self.target_name = target_name.to_string();
+    }
+
     /// Select ARM instructions for a sequence of WASM operations
     pub fn select(&mut self, wasm_ops: &[WasmOp]) -> Result<Vec<ArmInstruction>> {
         let mut arm_instructions = Vec::new();
@@ -589,9 +604,18 @@ impl InstructionSelector {
             | I64TruncF64U
             | I32TruncF64S
             | I32TruncF64U) => {
-                return Err(synth_core::Error::synthesis(format!(
-                    "floating-point operation not supported (requires VFP/NEON): {op:?}"
-                )));
+                let msg = if self.fpu.is_some() {
+                    format!(
+                        "VFP encoding not yet implemented for {op:?} (target {} has FPU)",
+                        self.target_name
+                    )
+                } else {
+                    format!(
+                        "target {} has no FPU; cannot compile {op:?}",
+                        self.target_name
+                    )
+                };
+                return Err(synth_core::Error::synthesis(msg));
             }
         };
         Ok(instrs)